├── CMakeLists.txt
├── LICENSE
├── README.md
├── docker
    ├── Dockerfile
    ├── build_docker_image.sh
    ├── jammy
    │   └── Dockerfile
    └── scripts
    │   ├── build.sh
    │   ├── build_install_llvm.sh
    │   ├── checkout.sh
    │   └── llvm_checksum
    │       ├── llvm_checksum.py
    │       └── project_tree.py
├── logo.png
└── src
    ├── cicc.cpp
    ├── nvcc-llvm-ir.cpp
    ├── nvcc.cpp
    ├── pass1.cpp
    ├── pass2.cpp
    ├── test1.cu
    └── test2.cu


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.19 FATAL_ERROR)
 2 | 
 3 | project(nvcc-llvm-ir LANGUAGES CXX CUDA)
 4 | 
 5 | find_package(LLVM CONFIG PATHS "/usr/local/lib/cmake" NO_DEFAULT_PATH)
 6 | 
 7 | if (LLVM_FOUND)
 8 | 
 9 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
10 | message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
11 | 
12 | # Locate NVVM components in the CUDA toolkit directory tree:
13 | # https://github.com/nvidia-compiler-sdk/nvvmir-samples/blob/master/CMakeLists.txt
14 | find_package(CUDAToolkit REQUIRED)
15 | get_filename_component(CUDA_HOME "${CUDAToolkit_BIN_DIR}" DIRECTORY)
16 | find_file(LIBNVVM_HOME nvvm PATHS "${CUDA_HOME}")
17 | find_library(NVVM_LIB nvvm PATHS "${LIBNVVM_HOME}/lib64" "${LIBNVVM_HOME}/lib/x64")
18 | find_file(NVVM_H nvvm.h PATH "${LIBNVVM_HOME}/include")
19 | get_filename_component(NVVM_INCLUDE_DIRS ${NVVM_H} DIRECTORY)
20 | 
21 | add_library(cicc SHARED "src/cicc.cpp")
22 | target_include_directories(cicc PRIVATE ${LLVM_INCLUDE_DIRS})
23 | target_include_directories(cicc PRIVATE ${NVVM_INCLUDE_DIRS})
24 | target_compile_features(cicc PRIVATE cxx_std_17)
25 | if (NOT WIN32)
26 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
27 | endif()
28 | target_compile_definitions(cicc PRIVATE ${LLVM_DEFINITIONS_LIST})
29 | target_compile_options(cicc PRIVATE $<$<BOOL:${MSVC}>:/wd4624> $<$<BOOL:${MSVC}>:/wd4291> $<$<BOOL:${MSVC}>:/MT>)
30 | target_link_directories(cicc PRIVATE ${LLVM_LIBRARY_DIRS})
31 | 
32 | # With older LLVM releases, instead of just linking agains ${LLVM_LIBRARIES},
33 | # we have to use this idiotic procedure:
34 | # https://llvm.org/docs/CMake.html#embedding-llvm-in-your-project
35 | # Find the libraries that correspond to the LLVM components we wish to use
36 | llvm_map_components_to_libnames(llvm_libs core support ipo analysis target scalaropts transformutils instcombine)
37 | target_link_libraries(cicc PRIVATE ${llvm_libs})
38 | 
39 | add_library(pass1 SHARED "src/pass1.cpp")
40 | target_include_directories(pass1 PRIVATE ${LLVM_INCLUDE_DIRS})
41 | target_include_directories(pass1 PRIVATE ${NVVM_INCLUDE_DIRS})
42 | target_compile_features(pass1 PRIVATE cxx_std_17)
43 | target_compile_definitions(pass1 PRIVATE ${LLVM_DEFINITIONS_LIST})
44 | target_compile_options(pass1 PRIVATE $<$<BOOL:${MSVC}>:/wd4624> $<$<BOOL:${MSVC}>:/wd4291> $<$<BOOL:${MSVC}>:/MT>)
45 | target_link_directories(pass1 PRIVATE ${LLVM_LIBRARY_DIRS})
46 | target_link_libraries(pass1 PRIVATE ${llvm_libs})
47 | 
48 | add_library(pass2 SHARED "src/pass2.cpp")
49 | target_include_directories(pass2 PRIVATE ${LLVM_INCLUDE_DIRS})
50 | target_include_directories(pass2 PRIVATE ${NVVM_INCLUDE_DIRS})
51 | target_compile_features(pass2 PRIVATE cxx_std_17)
52 | target_compile_definitions(pass2 PRIVATE ${LLVM_DEFINITIONS_LIST})
53 | target_compile_options(pass2 PRIVATE $<$<BOOL:${MSVC}>:/wd4624> $<$<BOOL:${MSVC}>:/wd4291> $<$<BOOL:${MSVC}>:/MT>)
54 | target_link_directories(pass2 PRIVATE ${LLVM_LIBRARY_DIRS})
55 | target_link_libraries(pass2 PRIVATE ${llvm_libs})
56 | 
57 | add_library(nvcc SHARED "src/nvcc.cpp")
58 | target_link_libraries(nvcc ${CMAKE_DL_LIBS})
59 | target_compile_definitions(nvcc PRIVATE LIBCICC="$<TARGET_FILE:cicc>")
60 | 
61 | add_executable(${PROJECT_NAME} "src/${PROJECT_NAME}.cpp")
62 | target_compile_definitions(${PROJECT_NAME} PRIVATE LIBNVCC="$<TARGET_FILE:nvcc>")
63 | 
64 | add_custom_target(test DEPENDS test1 test2)
65 | add_custom_target(test1 DEPENDS test1_unopt test1_opt)
66 | add_custom_target(test2 DEPENDS test2_unopt test2_opt)
67 | 
68 | # Prepend CUDA compiler with a launcher, which shall perform preloading
69 | # of our shared library wrapper.
70 | set(CMAKE_CUDA_COMPILER_LAUNCHER ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME})
71 | 
72 | add_executable(test1_unopt "src/test1.cu")
73 | add_dependencies(test1_unopt cicc nvcc pass1 pass2)
74 | set_property(TARGET test1_unopt PROPERTY CUDA_SEPARABLE_COMPILATION ON)
75 | target_compile_options(test1_unopt PRIVATE -keep --${PROJECT_NAME}-unopt=$<TARGET_FILE:pass1>)
76 | 
77 | add_executable(test1_opt "src/test1.cu")
78 | add_dependencies(test1_unopt cicc nvcc pass1 pass2)
79 | set_property(TARGET test1_opt PROPERTY CUDA_SEPARABLE_COMPILATION ON)
80 | target_compile_options(test1_opt PRIVATE -keep --${PROJECT_NAME}-opt=$<TARGET_FILE:pass1>)
81 | 
82 | add_executable(test2_unopt "src/test2.cu")
83 | add_dependencies(test2_unopt cicc nvcc pass1 pass2)
84 | set_property(TARGET test2_unopt PROPERTY CUDA_SEPARABLE_COMPILATION ON)
85 | target_compile_options(test2_unopt PRIVATE -keep --${PROJECT_NAME}-unopt=$<TARGET_FILE:pass1>)
86 | 
87 | add_executable(test2_opt "src/test2.cu")
88 | add_dependencies(test2_opt cicc nvcc pass1 pass2)
89 | set_property(TARGET test2_opt PROPERTY CUDA_SEPARABLE_COMPILATION ON)
90 | target_compile_options(test2_opt PRIVATE -keep --${PROJECT_NAME}-opt=$<TARGET_FILE:pass1>)
91 | 
92 | endif()
93 | 
94 | add_custom_target(docker
95 |     COMMAND docker build -f ${CMAKE_CURRENT_SOURCE_DIR}/docker/Dockerfile -t ${PROJECT_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/docker
96 |     COMMAND docker run -it --rm --user "${UID}:${GID}" -v${CMAKE_CURRENT_SOURCE_DIR}:/project ${PROJECT_NAME} sh /project/docker/scripts/build.sh
97 |     COMMENT "Building ${PROJECT_NAME} in a Docker container")
98 | 
99 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2014,2015,2020,2023 Applied Parallel Computing LLC, http://parallel-computing.pro
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Enabling on-the-fly manipulations with LLVM IR code of CUDA sources
  2 | 
  3 | ![logo](logo.png)
  4 | 
  5 | Largely thanks to [LLVM](http://llvm.org/), in recent years we've seen a significant increase of interest to domain-specific compilation tools research & development. With the release of PTX backends by NVIDIA (opensource [NVPTX](http://llvm.org/docs/NVPTXUsage.html) and proprietary [libNVVM](https://developer.nvidia.com/cuda-llvm-compiler)), construction of custom LLVM-driven compilers for generating GPU binaries also becomes possible. However, two questions are still remaining:
  6 | 
  7 | 1. How to customize the CUDA source compilation?
  8 | 2. What is the best set of GPU-specific LLVM optimizations and how to continue modifying IR after applying them?
  9 | 
 10 | The first question is the result of opensource CUDA *frontend* unavailability. In fact the *EDG* frontend (by Edison Design Group Inc.) used by NVIDIA CUDA compiler is the only frontend that is able to translate CUDA source into LLVM IR. LLVM's clang has basic support for some CUDA constructs, but yet is too far from implementing the entire set of parallel extensions. EDG frontend is tightly bound to the rest of CUDA compiler (*cicc*), and there is no public API to use it just for LLVM IR generation.
 11 | 
 12 | The second question is essential for generating efficient GPU code and its further customization. It's well-known that the standard LLVM NVPTX backend and NVIDIA's libNVVM may generate different code, because libNVVM applies specific passes in addition to standard `-O3` set. For instance, as of CUDA 6.0, libNVVM has the following optimization chain for the `sm_30` target:
 13 | 
 14 | ```
 15 | opt -nv-cuda -nvvm-pretreat -generic-to-nvvm -nv-inline-must -R __CUDA_PREC_DIV=1 -R __CUDA_PREC_SQRT=1 -opt-arch=sm_30 -inline -globaldce -lower-struct-args -memory-space-opt=1 -disable-rsqrt-opt=1 -O3
 16 | ```
 17 | 
 18 | Some of the passes mnemonics do not exist in standard LLVM 3.0, meaning they are likely NVIDIA's proprietary extensions. Thus, GPU code generation could not be fully reproduced by the opensource NVPTX backend. On the other hand, if libNVVM backend is used, then the LLVM IR input is translated directly into PTX code, without a possibility to review and modify the optimized IR before PTX generation.
 19 | 
 20 | In order to remove these limitations, we have created a special dynamic library. Being attached to NVIDIA CUDA compiler, this library exposes unoptimized and optimized LLVM IR code to the user and allows its on-the-fly modification. As a result, domain-specific compiler developer receives flexibility e.g. to re-target CUDA-generated LLVM IR to different architectures, or to make additional modifications to IR after executing NVIDIA's optimizations. Below we explain the technical details of how unoptimized and optimized LLVM IR versions have been retrieved from CUDA compiler by our dynamic library.
 21 | 
 22 | ## NVIDIA CUDA compiler overview
 23 | 
 24 | NVIDIA CUDA compiler is a complex set of pipelined code processing binaries. After the input source is preprocessed and decomposed into separate host and device sources, compiler driver (*nvcc*) deploys CUDA-to-LLVM compiler -- *cicc*, which shall be our main point of interest.
 25 | 
 26 | According to the [License For Customer Use of NVIDIA Software](http://www.nvidia.com/content/DriverDownload-March2009/licence.php?lang=us), customer may not reverse engineer, decompile, or disassemble the software, nor attempt in any other manner to obtain the source code. Being in strict compliance with this requirement, we analyzed *cicc* only by means of basic debugging tool and standard C library calls instrumentation.
 27 | 
 28 | ## Unoptimized LLVM IR retrieval
 29 | 
 30 | Retrieval of unoptimized LLVM IR is relatively straight-forward. In order to generate the PTX code, *cicc* deploys libNVVM library functions, which have a documented interface. Instrumentation of the first call to `nvvmAddModuleToProgram` function allows to retrieve the LLVM IR for input CUDA source from the second parameter, which is the LLVM bitcode string. This bitcode could be parsed into LLVM Module instance using functions of a compatible LLVM release, and printed as IR:
 31 | 
 32 | ```c++
 33 | string source = "";
 34 | source.reserve(size);
 35 | source.assign(bitcode, bitcode + size);
 36 | MemoryBuffer *input = MemoryBuffer::getMemBuffer(source);
 37 | string err;
 38 | LLVMContext &context = getGlobalContext();
 39 | initial_module = ParseBitcodeFile(input, context, &err);
 40 | if (!initial_module)
 41 | 	cerr << "Error parsing module bitcode : " << err;
 42 | 
 43 | outs() << *initial_module;
 44 | ```
 45 | 
 46 | On-the-fly modification of unoptimized LLVM could be achieved by exporting LLVM Module back into bitcode string:
 47 | 
 48 | ```c++
 49 | SmallVector<char, 128> output;
 50 | raw_svector_ostream outputStream(output);
 51 | WriteBitcodeToFile(initial_module, outputStream);
 52 | outputStream.flush();
 53 | 
 54 | // Call real nvvmAddModuleToProgram
 55 | return nvvmAddModuleToProgram_real(prog, output.data(), output.size(), name);
 56 | ```
 57 | 
 58 | Note the unoptimized LLVM IR does not include math and GPU-specific builtins, that are linked-in later.
 59 | 
 60 | ## Optimized LLVM IR retrieval
 61 | 
 62 | The libNVVM library itself statically links to NVIDIA's customized LLVM engine, and like most of other binaries in CUDA Toolkit is fully stripped (no debug info, no function frames, etc.). Fortunately, libNVVM is still dynamically linked against the standard C library, which allows to analyze memory allocations and data transfers. Instrumentation of `malloc` function reveals Module-sized space allocation in the beginning of `nvvmCompileProgam`:
 63 | 
 64 | ```c++
 65 | void* result = malloc_real(size);
 66 | 
 67 | if (called_compile)
 68 | {
 69 | 	if (size == sizeof(Module))
 70 | 		optimized_module = (Module*)result;
 71 | }
 72 | ```
 73 | 
 74 | Luckily, this very Module instance exists during entire compilation process, and accumulates all changes made to input LLVM IR by optimization passes. Thus, we only need to find an appropriate moment to intercept this Module and modify its contents. The subsequent call to `localtime` is used as heuristic. Unlike the unoptimized case, this Module could be printed and modified directly, without loading/storing any bitcode.
 75 | 
 76 | Retrieved optimized LLVM IR is linked together with math and GPU-specific builtins and is ready for PTX backend.
 77 | 
 78 | ## Building
 79 | 
 80 | Unlike AMD, which uses the most recent versions of clang++ for HIP compilation, NVIDIA CUDA compiler is historically always far behind the actual release of LLVM. In order to determine the matching LLVM release, we can look into the `cicc` executable:
 81 | 
 82 | ```
 83 | $ strings /usr/local/cuda/nvvm/bin/cicc | grep LLVM | grep 7
 84 | LLVM0700H
 85 | LLVM0700
 86 | LLVM7.0.1
 87 | llvm-mc (based on LLVM 7.0.1)
 88 | ```
 89 | 
 90 | Prepare a Docker container with matching releases of CUDA and LLVM pre-installed:
 91 | 
 92 | ```bash
 93 | ./docker/build_docker_image.sh \
 94 |     -s jammy -d llvm7-ubuntu -t "jammy" \
 95 |     --branch release/7.x \
 96 |     -i install \
 97 |     -- \
 98 |     -DLLVM_TARGETS_TO_BUILD="host;NVPTX" \
 99 |     -DCMAKE_BUILD_TYPE=Release
100 | ```
101 | 
102 | Compile our dynamic libraries within the Docker container:
103 | 
104 | ```
105 | $ make
106 | g++ -g -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -I/opt/llvm-3.0/include -I/opt/cuda/nvvm/include/ -fPIC cicc.cpp -shared -o libcicc.so -ldl
107 | g++ -g -I/opt/cuda/nvvm/include/ -fPIC nvcc.cpp -shared -o libnvcc.so -ldl
108 | ```
109 | 
110 | ## Usage 
111 | 
112 | Finally, let's demonstrate LLVM IR on-the-fly modification in action. Consider the following input CUDA source:
113 | 
114 | ```c++
115 | $ cat test.cu
116 | extern "C" __device__ void kernel(int* result) { *result = 1; }
117 | ```
118 | 
119 | The LLVM IR retrieval mode is specified by two environment variables:
120 | 
121 | * `CICC_MODIFY_UNOPT_MODULE=1` -- retrieve unoptimized LLVM IR and change it as specified in `modifyModule` function (`cicc.cpp` source file)
122 | * `CICC_MODIFY_OPT_MODULE=1` -- retrieve unoptimized LLVM IR and change it as specified in `modifyModule` function (`cicc.cpp` source file)
123 | 
124 | Example `modifyModule` simply adds suffix to all existing functions names:
125 | 
126 | ```c++
127 | void modifyModule(Module* module)
128 | {
129 | 	if (!module) return;
130 | 
131 | 	// Add suffix to function name, for example.
132 | 	for (Module::iterator i = module->begin(), e = module->end(); i != e; i++)
133 | 		i->setName(i->getName() + "_modified");
134 | }
135 | 
136 | ```
137 | 
138 | Each of the following two commands deploys the corresponding retrieval mode:
139 | 
140 | ```
141 | CICC_MODIFY_UNOPT_MODULE=1 LD_PRELOAD=./libnvcc.so nvcc -arch=sm_30 test.cu -c -keep
142 | CICC_MODIFY_OPT_MODULE=1 LD_PRELOAD=./libnvcc.so nvcc -arch=sm_30 test.cu -c -keep
143 | ```
144 | 
145 | The `-keep` option is added to store the `test.ptx` file, which could be opened to ensure the LLVM IR modification has landed into output PTX code:
146 | 
147 | ```
148 | $ cat test.ptx 
149 | //
150 | // Generated by NVIDIA NVVM Compiler
151 | // Compiler built on Thu Mar 13 19:31:35 2014 (1394735495)
152 | // Cuda compilation tools, release 6.0, V6.0.1
153 | //
154 | 
155 | .version 4.0
156 | .target sm_30
157 | .address_size 64
158 | 
159 | .visible .func kernel_modified(
160 | 	.param .b64 kernel_modified_param_0
161 | )
162 | {
163 | 	.reg .s32 	%r<2>;
164 | 	.reg .s64 	%rd<2>;
165 | 
166 | 
167 | 	ld.param.u64 	%rd1, [kernel_modified_param_0];
168 | 	mov.u32 	%r1, 1;
169 | 	st.u32 	[%rd1], %r1;
170 | 	ret;
171 | }
172 | ```
173 | 
174 | ## Final credits
175 | 
176 | This library has been developed for the purpose of software interoperability and used in compilation of [CERN SixTrack application](https://github.com/apc-llc/sixtrack).
177 | 
178 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM llvm7-ubuntu:jammy
 2 | 
 3 | LABEL maintainer="dmitry@kernelgen.org"
 4 | 
 5 | ENV DEBIAN_FRONTEND noninteractive
 6 | ENV LC_ALL C.UTF-8
 7 | ENV LANG en_US.UTF-8
 8 | ENV LANGUAGE en_US.UTF-8
 9 | 
10 | RUN apt-get update && \
11 |         apt-get -y --no-install-recommends install \
12 |                 cmake \
13 |                 git \
14 |                 ninja-build \
15 |                 g++-11 && \
16 |         apt-get clean
17 | 
18 | COPY . /root/
19 | 
20 | WORKDIR /root
21 | 


--------------------------------------------------------------------------------
/docker/build_docker_image.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #===- llvm/utils/docker/build_docker_image.sh ----------------------------===//
  3 | #
  4 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  5 | # See https://llvm.org/LICENSE.txt for license information.
  6 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7 | #
  8 | #===----------------------------------------------------------------------===//
  9 | set -e
 10 | 
 11 | IMAGE_SOURCE=""
 12 | DOCKER_REPOSITORY=""
 13 | DOCKER_TAG=""
 14 | BUILDSCRIPT_ARGS=""
 15 | CHECKOUT_ARGS=""
 16 | CMAKE_ENABLED_PROJECTS=""
 17 | 
 18 | function show_usage() {
 19 |   cat << EOF
 20 | Usage: build_docker_image.sh [options] [-- [cmake_args]...]
 21 | 
 22 | Available options:
 23 |   General:
 24 |     -h|--help               show this help message
 25 |   Docker-specific:
 26 |     -s|--source             image source dir (i.e. debian10, nvidia-cuda, etc)
 27 |     -d|--docker-repository  docker repository for the image
 28 |     -t|--docker-tag         docker tag for the image
 29 |   Checkout arguments:
 30 |     -b|--branch         git branch to checkout, i.e. 'main',
 31 |                         'release/10.x'
 32 |                         (default: 'main')
 33 |     -r|--revision       git revision to checkout
 34 |     -c|--cherrypick     revision to cherry-pick. Can be specified multiple times.
 35 |                         Cherry-picks are performed in the sorted order using the
 36 |                         following command:
 37 |                         'git cherry-pick \$rev'.
 38 |     -p|--llvm-project   Add the project to a list LLVM_ENABLE_PROJECTS, passed to
 39 |                         CMake.
 40 |                         Can be specified multiple times.
 41 |     -c|--checksums      name of a file, containing checksums of llvm checkout.
 42 |                         Script will fail if checksums of the checkout do not
 43 |                         match.
 44 |   Build-specific:
 45 |     -i|--install-target name of a cmake install target to build and include in
 46 |                         the resulting archive. Can be specified multiple times.
 47 | 
 48 | Required options: --source and --docker-repository, at least one
 49 |   --install-target.
 50 | 
 51 | All options after '--' are passed to CMake invocation.
 52 | 
 53 | For example, running:
 54 | $ build_docker_image.sh -s debian10 -d mydocker/debian10-clang -t latest \
 55 |   -p clang -i install-clang -i install-clang-resource-headers
 56 | will produce two docker images:
 57 |     mydocker/debian10-clang-build:latest - an intermediate image used to compile
 58 |       clang.
 59 |     mydocker/clang-debian10:latest       - a small image with preinstalled clang.
 60 | Please note that this example produces a not very useful installation, since it
 61 | doesn't override CMake defaults, which produces a Debug and non-boostrapped
 62 | version of clang.
 63 | 
 64 | To get a 2-stage clang build, you could use this command:
 65 | $ ./build_docker_image.sh -s debian10 -d mydocker/clang-debian10 -t "latest" \
 66 |     -p clang -i stage2-install-clang -i stage2-install-clang-resource-headers \ 
 67 |     -- \ 
 68 |     -DLLVM_TARGETS_TO_BUILD=Native -DCMAKE_BUILD_TYPE=Release \ 
 69 |     -DBOOTSTRAP_CMAKE_BUILD_TYPE=Release \ 
 70 |     -DCLANG_ENABLE_BOOTSTRAP=ON \ 
 71 |     -DCLANG_BOOTSTRAP_TARGETS="install-clang;install-clang-resource-headers"
 72 | EOF
 73 | }
 74 | 
 75 | CHECKSUMS_FILE=""
 76 | SEEN_INSTALL_TARGET=0
 77 | SEEN_CMAKE_ARGS=0
 78 | while [[ $# -gt 0 ]]; do
 79 |   case "$1" in
 80 |     -h|--help)
 81 |       show_usage
 82 |       exit 0
 83 |       ;;
 84 |     -s|--source)
 85 |       shift
 86 |       IMAGE_SOURCE="$1"
 87 |       shift
 88 |       ;;
 89 |     -d|--docker-repository)
 90 |       shift
 91 |       DOCKER_REPOSITORY="$1"
 92 |       shift
 93 |       ;;
 94 |     -t|--docker-tag)
 95 |       shift
 96 |       DOCKER_TAG="$1"
 97 |       shift
 98 |       ;;
 99 |     -r|--revision|-c|-cherrypick|-b|--branch)
100 |       CHECKOUT_ARGS="$CHECKOUT_ARGS $1 $2"
101 |       shift 2
102 |       ;;
103 |     -i|--install-target)
104 |       SEEN_INSTALL_TARGET=1
105 |       BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS $1 $2"
106 |       shift 2
107 |       ;;
108 |     -p|--llvm-project)
109 |       PROJ="$2"
110 |       CMAKE_ENABLED_PROJECTS="$CMAKE_ENABLED_PROJECTS;$PROJ"
111 |       shift 2
112 |       ;;
113 |     -c|--checksums)
114 |       shift
115 |       CHECKSUMS_FILE="$1"
116 |       shift
117 |       ;;
118 |     --)
119 |       shift
120 |       BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS -- $*"
121 |       SEEN_CMAKE_ARGS=1
122 |       shift $#
123 |       ;;
124 |     *)
125 |       echo "Unknown argument $1"
126 |       exit 1
127 |       ;;
128 |   esac
129 | done
130 | 
131 | 
132 | if [ "$CMAKE_ENABLED_PROJECTS" != "" ]; then
133 |   # Remove the leading ';' character.
134 |   CMAKE_ENABLED_PROJECTS="${CMAKE_ENABLED_PROJECTS:1}"
135 | 
136 |   if [[ $SEEN_CMAKE_ARGS -eq 0 ]]; then
137 |     BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS --"
138 |   fi
139 |   BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS -DLLVM_ENABLE_PROJECTS=$CMAKE_ENABLED_PROJECTS"
140 | fi
141 | 
142 | command -v docker >/dev/null ||
143 |   {
144 |     echo "Docker binary cannot be found. Please install Docker to use this script."
145 |     exit 1
146 |   }
147 | 
148 | if [ "$IMAGE_SOURCE" == "" ]; then
149 |   echo "Required argument missing: --source"
150 |   exit 1
151 | fi
152 | 
153 | if [ "$DOCKER_REPOSITORY" == "" ]; then
154 |   echo "Required argument missing: --docker-repository"
155 |   exit 1
156 | fi
157 | 
158 | if [ $SEEN_INSTALL_TARGET -eq 0 ]; then
159 |   echo "Please provide at least one --install-target"
160 |   exit 1
161 | fi
162 | 
163 | SOURCE_DIR=$(dirname $0)
164 | if [ ! -d "$SOURCE_DIR/$IMAGE_SOURCE" ]; then
165 |   echo "No sources for '$IMAGE_SOURCE' were found in $SOURCE_DIR"
166 |   exit 1
167 | fi
168 | 
169 | BUILD_DIR=$(mktemp -d)
170 | trap "rm -rf $BUILD_DIR" EXIT
171 | echo "Using a temporary directory for the build: $BUILD_DIR"
172 | 
173 | cp -r "$SOURCE_DIR/$IMAGE_SOURCE" "$BUILD_DIR/$IMAGE_SOURCE"
174 | cp -r "$SOURCE_DIR/scripts" "$BUILD_DIR/scripts"
175 | 
176 | mkdir "$BUILD_DIR/checksums"
177 | if [ "$CHECKSUMS_FILE" != "" ]; then
178 |   cp "$CHECKSUMS_FILE" "$BUILD_DIR/checksums/checksums.txt"
179 | fi
180 | 
181 | if [ "$DOCKER_TAG" != "" ]; then
182 |   DOCKER_TAG=":$DOCKER_TAG"
183 | fi
184 | 
185 | echo "Building ${DOCKER_REPOSITORY}${DOCKER_TAG} from $IMAGE_SOURCE"
186 | DOCKER_BUILDKIT=1 BUILDKIT_PROGRESS=plain docker build -t "${DOCKER_REPOSITORY}${DOCKER_TAG}" \
187 |   --build-arg "checkout_args=$CHECKOUT_ARGS" \
188 |   --build-arg "buildscript_args=$BUILDSCRIPT_ARGS" \
189 |   -f "$BUILD_DIR/$IMAGE_SOURCE/Dockerfile" \
190 |   "$BUILD_DIR"
191 | echo "Done"
192 | 


--------------------------------------------------------------------------------
/docker/jammy/Dockerfile:
--------------------------------------------------------------------------------
 1 | # syntax=docker/dockerfile:experimental
 2 | #===- llvm/utils/docker/jammy/build/Dockerfile -------------------------===//
 3 | #
 4 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 5 | # See https://llvm.org/LICENSE.txt for license information.
 6 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 7 | #
 8 | #===----------------------------------------------------------------------===//
 9 | # Stage 1. Check out LLVM source code and run the build.
10 | FROM nvcr.io/nvidia/nvhpc:22.11-devel-cuda_multi-ubuntu22.04 as builder
11 | LABEL maintainer "dmitry@kernelgen.org"
12 | # Install build dependencies of llvm.
13 | # First, Update the apt's source list and include the sources of the packages.
14 | RUN grep deb /etc/apt/sources.list | \
15 |     sed 's/^deb/deb-src /g' >> /etc/apt/sources.list
16 | # Install compiler, python and subversion.
17 | RUN apt-get update && \
18 |     apt-get install -y --no-install-recommends ca-certificates gnupg \
19 |            build-essential cmake make python3 zlib1g wget subversion unzip git
20 | # Install a newer ninja release. It seems the older version
21 | # randomly crashes when compiling llvm.
22 | RUN wget "https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip" && \
23 |     echo "d2fea9ff33b3ef353161ed906f260d565ca55b8ca0568fa07b1d2cab90a84a07 ninja-linux.zip" \
24 |         | sha256sum -c  && \
25 |     unzip ninja-linux.zip -d /usr/local/bin && \
26 |     rm ninja-linux.zip
27 | # Install mold as recommended here: https://dev-docs.kicad.org/en/build/linux/
28 | RUN git clone https://github.com/rui314/mold.git && \
29 |     mkdir mold/build && \
30 |     cd mold/build && \
31 |     git checkout v1.7.1 && \
32 |     ../install-build-deps.sh && \
33 |     cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=c++ -G Ninja .. && \
34 |     cmake --build . && \
35 |     cmake --install .
36 | 
37 | ADD checksums /checksums
38 | ADD scripts /scripts
39 | 
40 | # Checkout the source code.
41 | # Run the build. Results of the build will be available at /tmp/llvm-install/.
42 | ARG checkout_args
43 | ARG buildscript_args
44 | RUN --mount=type=tmpfs,destination=/tmp \
45 |     /scripts/checkout.sh ${checkout_args} && \
46 |     /scripts/build_install_llvm.sh --to /llvm-install ${buildscript_args}
47 | 
48 | 
49 | # Stage 2. Produce a minimal release image with build results.
50 | FROM nvcr.io/nvidia/nvhpc:22.11-devel-cuda_multi-ubuntu22.04
51 | LABEL maintainer "dmitry@kernelgen.org"
52 | # Install packages for minimal useful image.
53 | RUN apt-get update && \
54 |     apt-get install -y --no-install-recommends libstdc++-9-dev binutils && \
55 |     rm -rf /var/lib/apt/lists/*
56 | # Copy build results of stage 1 to /usr/local.
57 | COPY --from=builder /llvm-install/ /usr/local/
58 | 
59 | 


--------------------------------------------------------------------------------
/docker/scripts/build.sh:
--------------------------------------------------------------------------------
1 | set -e -x
2 | cd /project
3 | mkdir -p build-docker
4 | cd build-docker
5 | cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_DIR=/usr/local/lib/cmake/llvm -DCMAKE_CXX_COMPILER=g++-11 .. && \
6 | #cmake --build .
7 | make VERBOSE=1
8 | 


--------------------------------------------------------------------------------
/docker/scripts/build_install_llvm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #===- llvm/utils/docker/scripts/build_install_llvm.sh ---------------------===//
 3 | #
 4 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 5 | # See https://llvm.org/LICENSE.txt for license information.
 6 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 7 | #
 8 | #===-----------------------------------------------------------------------===//
 9 | 
10 | set -e
11 | 
12 | function show_usage() {
13 |   cat << EOF
14 | Usage: build_install_llvm.sh [options] -- [cmake-args]
15 | 
16 | Run cmake with the specified arguments. Used inside docker container.
17 | Passes additional -DCMAKE_INSTALL_PREFIX and puts the build results into
18 | the directory specified by --to option.
19 | 
20 | Available options:
21 |   -h|--help           show this help message
22 |   -i|--install-target name of a cmake install target to build and include in
23 |                       the resulting archive. Can be specified multiple times.
24 |   --to                destination directory where to install the targets.
25 | Required options: --to, at least one --install-target.
26 | 
27 | All options after '--' are passed to CMake invocation.
28 | EOF
29 | }
30 | 
31 | CMAKE_ARGS=""
32 | CMAKE_INSTALL_TARGETS=""
33 | LLVM_INSTALL_DIR=""
34 | 
35 | while [[ $# -gt 0 ]]; do
36 |   case "$1" in
37 |     -i|--install-target)
38 |       shift
39 |       CMAKE_INSTALL_TARGETS="$CMAKE_INSTALL_TARGETS $1"
40 |       shift
41 |       ;;
42 |     --to)
43 |       shift
44 |       LLVM_INSTALL_DIR="$1"
45 |       shift
46 |       ;;
47 |     --)
48 |       shift
49 |       CMAKE_ARGS="$*"
50 |       shift $#
51 |       ;;
52 |     -h|--help)
53 |       show_usage
54 |       exit 0
55 |       ;;
56 |     *)
57 |       echo "Unknown option: $1"
58 |       exit 1
59 |   esac
60 | done
61 | 
62 | if [ "$CMAKE_INSTALL_TARGETS" == "" ]; then
63 |   echo "No install targets. Please pass one or more --install-target."
64 |   exit 1
65 | fi
66 | 
67 | if [ "$LLVM_INSTALL_DIR" == "" ]; then
68 |   echo "No install directory. Please specify the --to argument."
69 |   exit 1
70 | fi
71 | 
72 | LLVM_BUILD_DIR=/tmp/llvm-build
73 | 
74 | mkdir -p "$LLVM_INSTALL_DIR"
75 | 
76 | mkdir -p /build
77 | pushd /build
78 | 
79 | # Run the build as specified in the build arguments.
80 | echo "Running build"
81 | cmake -GNinja \
82 |   -DCMAKE_LINKER=mold -DCMAKE_C_FLAGS=-fuse-ld=mold -DCMAKE_CXX_FLAGS=-fuse-ld=mold \
83 |   -DCMAKE_INSTALL_PREFIX="$LLVM_INSTALL_DIR" \
84 |   $CMAKE_ARGS \
85 |   "$LLVM_BUILD_DIR/src/llvm"
86 | ninja $CMAKE_INSTALL_TARGETS
87 | 
88 | popd
89 | 
90 | echo "Done"
91 | 


--------------------------------------------------------------------------------
/docker/scripts/checkout.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | #===- llvm/utils/docker/scripts/checkout.sh ---------------------===//
  3 | #
  4 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  5 | # See https://llvm.org/LICENSE.txt for license information.
  6 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7 | #
  8 | #===-----------------------------------------------------------------------===//
  9 | 
 10 | set -e
 11 | 
 12 | function show_usage() {
 13 |   cat << EOF
 14 | Usage: checkout.sh [options]
 15 | 
 16 | Checkout git sources into /tmp/llvm-build/src. Used inside a docker container.
 17 | 
 18 | Available options:
 19 |   -h|--help           show this help message
 20 |   -b|--branch         git branch to checkout, i.e. 'main',
 21 |                       'release/10.x'
 22 |                       (default: 'main')
 23 |   -r|--revision       git revision to checkout
 24 |   -c|--cherrypick     revision to cherry-pick. Can be specified multiple times.
 25 |                       Cherry-picks are performed in the sorted order using the
 26 |                       following command:
 27 |                       'git cherry-pick \$rev)'.
 28 | EOF
 29 | }
 30 | 
 31 | LLVM_GIT_REV=""
 32 | CHERRYPICKS=""
 33 | LLVM_BRANCH=""
 34 | 
 35 | while [[ $# -gt 0 ]]; do
 36 |   case "$1" in
 37 |     -r|--revision)
 38 |       shift
 39 |       LLVM_GIT_REV="$1"
 40 |       shift
 41 |       ;;
 42 |     -c|--cherrypick)
 43 |       shift
 44 |       CHERRYPICKS="$CHERRYPICKS $1"
 45 |       shift
 46 |       ;;
 47 |     -b|--branch)
 48 |       shift
 49 |       LLVM_BRANCH="$1"
 50 |       shift
 51 |       ;;
 52 |     -h|--help)
 53 |       show_usage
 54 |       exit 0
 55 |       ;;
 56 |     *)
 57 |       echo "Unknown option: $1"
 58 |       exit 1
 59 |   esac
 60 | done
 61 | 
 62 | if [ "$LLVM_BRANCH" == "" ]; then
 63 |   LLVM_BRANCH="main"
 64 | fi
 65 | 
 66 | if [ "$LLVM_GIT_REV" != "" ]; then
 67 |   GIT_REV_ARG="$LLVM_GIT_REV"
 68 |   echo "Checking out git revision $LLVM_GIT_REV."
 69 | else
 70 |   GIT_REV_ARG=""
 71 |   echo "Checking out latest git revision."
 72 | fi
 73 | 
 74 | # Sort cherrypicks and remove duplicates.
 75 | CHERRYPICKS="$(echo "$CHERRYPICKS" | xargs -n1 | sort | uniq | xargs)"
 76 | 
 77 | function apply_cherrypicks() {
 78 |   local CHECKOUT_DIR="$1"
 79 | 
 80 |   [ "$CHERRYPICKS" == "" ] || echo "Applying cherrypicks"
 81 |   pushd "$CHECKOUT_DIR"
 82 | 
 83 |   # This function is always called on a sorted list of cherrypicks.
 84 |   for CHERRY_REV in $CHERRYPICKS; do
 85 |     echo "Cherry-picking $CHERRY_REV into $CHECKOUT_DIR"
 86 |     git cherry-pick $CHERRY_REV
 87 |   done
 88 | 
 89 |   popd
 90 | }
 91 | 
 92 | LLVM_BUILD_DIR=/tmp/llvm-build
 93 | 
 94 | # Get the sources from git.
 95 | echo "Checking out sources from git"
 96 | mkdir -p "$LLVM_BUILD_DIR/src"
 97 | CHECKOUT_DIR="$LLVM_BUILD_DIR/src"
 98 | 
 99 | echo "Checking out https://github.com/llvm/llvm-project.git to $CHECKOUT_DIR"
100 | git clone -b $LLVM_BRANCH --single-branch \
101 |   "https://github.com/llvm/llvm-project.git" \
102 |   "$CHECKOUT_DIR"
103 | 
104 | pushd $CHECKOUT_DIR
105 | git checkout -q $GIT_REV_ARG
106 | popd
107 | 
108 |   # We apply cherrypicks to all repositories regardless of whether the revision
109 |   # changes this repository or not. For repositories not affected by the
110 |   # cherrypick, applying the cherrypick is a no-op.
111 |   apply_cherrypicks "$CHECKOUT_DIR"
112 | 
113 | CHECKSUMS_FILE="/tmp/checksums/checksums.txt"
114 | 
115 | if [ -f "$CHECKSUMS_FILE" ]; then
116 |   echo "Validating checksums for LLVM checkout..."
117 |   python "$(dirname $0)/llvm_checksum/llvm_checksum.py" -c "$CHECKSUMS_FILE" \
118 |     --partial --multi_dir "$LLVM_BUILD_DIR/src"
119 | else
120 |   echo "Skipping checksumming checks..."
121 | fi
122 | 
123 | echo "Done"
124 | 


--------------------------------------------------------------------------------
/docker/scripts/llvm_checksum/llvm_checksum.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """ A small program to compute checksums of LLVM checkout.
  3 | """
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | 
  8 | import hashlib
  9 | import logging
 10 | import re
 11 | import sys
 12 | from argparse import ArgumentParser
 13 | from project_tree import *
 14 | 
 15 | SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$")
 16 | 
 17 | 
 18 | def main():
 19 |   parser = ArgumentParser()
 20 |   parser.add_argument(
 21 |       "-v", "--verbose", action="store_true", help="enable debug logging")
 22 |   parser.add_argument(
 23 |       "-c",
 24 |       "--check",
 25 |       metavar="reference_file",
 26 |       help="read checksums from reference_file and " +
 27 |       "check they match checksums of llvm_path.")
 28 |   parser.add_argument(
 29 |       "--partial",
 30 |       action="store_true",
 31 |       help="ignore projects from reference_file " +
 32 |       "that are not checked out in llvm_path.")
 33 |   parser.add_argument(
 34 |       "--multi_dir",
 35 |       action="store_true",
 36 |       help="indicates llvm_path contains llvm, checked out " +
 37 |       "into multiple directories, as opposed to a " +
 38 |       "typical single source tree checkout.")
 39 |   parser.add_argument("llvm_path")
 40 | 
 41 |   args = parser.parse_args()
 42 |   if args.check is not None:
 43 |     with open(args.check, "r") as f:
 44 |       reference_checksums = ReadLLVMChecksums(f)
 45 |   else:
 46 |     reference_checksums = None
 47 | 
 48 |   if args.verbose:
 49 |     logging.basicConfig(level=logging.DEBUG)
 50 | 
 51 |   llvm_projects = CreateLLVMProjects(not args.multi_dir)
 52 |   checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects)
 53 | 
 54 |   if reference_checksums is None:
 55 |     WriteLLVMChecksums(checksums, sys.stdout)
 56 |     sys.exit(0)
 57 | 
 58 |   if not ValidateChecksums(reference_checksums, checksums, args.partial):
 59 |     sys.stdout.write("Checksums differ.\nNew checksums:\n")
 60 |     WriteLLVMChecksums(checksums, sys.stdout)
 61 |     sys.stdout.write("Reference checksums:\n")
 62 |     WriteLLVMChecksums(reference_checksums, sys.stdout)
 63 |     sys.exit(1)
 64 |   else:
 65 |     sys.stdout.write("Checksums match.")
 66 | 
 67 | 
 68 | def ComputeLLVMChecksums(root_path, projects):
 69 |   """Compute checksums for LLVM sources checked out using svn.
 70 | 
 71 |   Args:
 72 |     root_path: a directory of llvm checkout.
 73 |     projects: a list of LLVMProject instances, which describe checkout paths,
 74 |       relative to root_path.
 75 | 
 76 |   Returns:
 77 |     A dict mapping from project name to project checksum.
 78 |   """
 79 |   hash_algo = hashlib.sha256
 80 | 
 81 |   def collapse_svn_substitutions(contents):
 82 |     # Replace svn substitutions for $Date$ and $LastChangedDate$.
 83 |     # Unfortunately, these are locale-specific.
 84 |     return SVN_DATES_REGEX.sub("$\1$", contents)
 85 | 
 86 |   def read_and_collapse_svn_subsitutions(file_path):
 87 |     with open(file_path, "rb") as f:
 88 |       contents = f.read()
 89 |       new_contents = collapse_svn_substitutions(contents)
 90 |       if contents != new_contents:
 91 |         logging.debug("Replaced svn keyword substitutions in %s", file_path)
 92 |         logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents)
 93 |       return new_contents
 94 | 
 95 |   project_checksums = dict()
 96 |   # Hash each project.
 97 |   for proj in projects:
 98 |     project_root = os.path.join(root_path, proj.relpath)
 99 |     if not os.path.exists(project_root):
100 |       logging.info("Folder %s doesn't exist, skipping project %s", proj.relpath,
101 |                    proj.name)
102 |       continue
103 | 
104 |     files = list()
105 | 
106 |     def add_file_hash(file_path):
107 |       if os.path.islink(file_path) and not os.path.exists(file_path):
108 |         content = os.readlink(file_path)
109 |       else:
110 |         content = read_and_collapse_svn_subsitutions(file_path)
111 |       hasher = hash_algo()
112 |       hasher.update(content)
113 |       file_digest = hasher.hexdigest()
114 |       logging.debug("Checksum %s for file %s", file_digest, file_path)
115 |       files.append((file_path, file_digest))
116 | 
117 |     logging.info("Computing checksum for %s", proj.name)
118 |     WalkProjectFiles(root_path, projects, proj, add_file_hash)
119 | 
120 |     # Compute final checksum.
121 |     files.sort(key=lambda x: x[0])
122 |     hasher = hash_algo()
123 |     for file_path, file_digest in files:
124 |       file_path = os.path.relpath(file_path, project_root)
125 |       hasher.update(file_path)
126 |       hasher.update(file_digest)
127 |     project_checksums[proj.name] = hasher.hexdigest()
128 |   return project_checksums
129 | 
130 | 
131 | def WriteLLVMChecksums(checksums, f):
132 |   """Writes checksums to a text file.
133 | 
134 |   Args:
135 |     checksums: a dict mapping from project name to project checksum (result of
136 |       ComputeLLVMChecksums).
137 |     f: a file object to write into.
138 |   """
139 | 
140 |   for proj in sorted(checksums.keys()):
141 |     f.write("{} {}\n".format(checksums[proj], proj))
142 | 
143 | 
144 | def ReadLLVMChecksums(f):
145 |   """Reads checksums from a text file, produced by WriteLLVMChecksums.
146 | 
147 |   Returns:
148 |     A dict, mapping from project name to project checksum.
149 |   """
150 |   checksums = {}
151 |   while True:
152 |     line = f.readline()
153 |     if line == "":
154 |       break
155 |     checksum, proj = line.split()
156 |     checksums[proj] = checksum
157 |   return checksums
158 | 
159 | 
160 | def ValidateChecksums(reference_checksums,
161 |                       new_checksums,
162 |                       allow_missing_projects=False):
163 |   """Validates that reference_checksums and new_checksums match.
164 | 
165 |   Args:
166 |     reference_checksums: a dict of reference checksums, mapping from a project
167 |       name to a project checksum.
168 |     new_checksums: a dict of checksums to be checked, mapping from a project
169 |       name to a project checksum.
170 |     allow_missing_projects:
171 |       When True, reference_checksums may contain more projects than
172 |         new_checksums. Projects missing from new_checksums are ignored.
173 |       When False, new_checksums and reference_checksums must contain checksums
174 |         for the same set of projects. If there is a project in
175 |         reference_checksums, missing from new_checksums, ValidateChecksums
176 |         will return False.
177 | 
178 |   Returns:
179 |     True, if checksums match with regards to allow_missing_projects flag value.
180 |     False, otherwise.
181 |   """
182 |   if not allow_missing_projects:
183 |     if len(new_checksums) != len(reference_checksums):
184 |       return False
185 | 
186 |   for proj, checksum in new_checksums.items():
187 |     # We never computed a checksum for this project.
188 |     if proj not in reference_checksums:
189 |       return False
190 |     # Checksum did not match.
191 |     if reference_checksums[proj] != checksum:
192 |       return False
193 | 
194 |   return True
195 | 
196 | 
197 | if __name__ == "__main__":
198 |   main()
199 | 


--------------------------------------------------------------------------------
/docker/scripts/llvm_checksum/project_tree.py:
--------------------------------------------------------------------------------
 1 | """Contains helper functions to compute checksums for LLVM checkouts.
 2 | """
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import logging
 8 | import os
 9 | import os.path
10 | import sys
11 | 
12 | 
13 | class LLVMProject(object):
14 |   """An LLVM project with a descriptive name and a relative checkout path.
15 |   """
16 | 
17 |   def __init__(self, name, relpath):
18 |     self.name = name
19 |     self.relpath = relpath
20 | 
21 |   def is_subproject(self, other_project):
22 |     """ Check if self is checked out as a subdirectory of other_project.
23 |     """
24 |     return self.relpath.startswith(other_project.relpath)
25 | 
26 | 
27 | def WalkProjectFiles(checkout_root, all_projects, project, visitor):
28 |   """ Walk over all files inside a project without recursing into subprojects, '.git' and '.svn' subfolders.
29 | 
30 |     checkout_root: root of the LLVM checkout.
31 |     all_projects: projects in the LLVM checkout.
32 |     project: a project to walk the files of. Must be inside all_projects.
33 |     visitor: a function called on each visited file.
34 |   """
35 |   assert project in all_projects
36 | 
37 |   ignored_paths = set()
38 |   for other_project in all_projects:
39 |     if other_project != project and other_project.is_subproject(project):
40 |       ignored_paths.add(os.path.join(checkout_root, other_project.relpath))
41 | 
42 |   def raise_error(err):
43 |     raise err
44 | 
45 |   project_root = os.path.join(checkout_root, project.relpath)
46 |   for root, dirs, files in os.walk(project_root, onerror=raise_error):
47 |     dirs[:] = [
48 |         d for d in dirs
49 |         if d != ".svn" and d != ".git" and
50 |         os.path.join(root, d) not in ignored_paths
51 |     ]
52 |     for f in files:
53 |       visitor(os.path.join(root, f))
54 | 
55 | 
56 | def CreateLLVMProjects(single_tree_checkout):
57 |   """Returns a list of LLVMProject instances, describing relative paths of a typical LLVM checkout.
58 | 
59 |   Args:
60 |     single_tree_checkout:
61 |       When True, relative paths for each project points to a typical single
62 |         source tree checkout.
63 |       When False, relative paths for each projects points to a separate
64 |         directory. However, clang-tools-extra is an exception, its relative path
65 |         will always be 'clang/tools/extra'.
66 |   """
67 |   # FIXME: cover all of llvm projects.
68 | 
69 |   # Projects that reside inside 'projects/' in a single source tree checkout.
70 |   ORDINARY_PROJECTS = [
71 |       "compiler-rt", "dragonegg", "libcxx", "libcxxabi", "libunwind",
72 |       "test-suite"
73 |   ]
74 |   # Projects that reside inside 'tools/' in a single source tree checkout.
75 |   TOOLS_PROJECTS = ["clang", "lld", "lldb"]
76 | 
77 |   if single_tree_checkout:
78 |     projects = [LLVMProject("llvm", "")]
79 |     projects += [
80 |         LLVMProject(p, os.path.join("projects", p)) for p in ORDINARY_PROJECTS
81 |     ]
82 |     projects += [
83 |         LLVMProject(p, os.path.join("tools", p)) for p in TOOLS_PROJECTS
84 |     ]
85 |     projects.append(
86 |         LLVMProject("clang-tools-extra",
87 |                     os.path.join("tools", "clang", "tools", "extra")))
88 |   else:
89 |     projects = [LLVMProject("llvm", "llvm")]
90 |     projects += [LLVMProject(p, p) for p in ORDINARY_PROJECTS]
91 |     projects += [LLVMProject(p, p) for p in TOOLS_PROJECTS]
92 |     projects.append(
93 |         LLVMProject("clang-tools-extra", os.path.join("clang", "tools",
94 |                                                       "extra")))
95 |   return projects
96 | 


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apc-llc/nvcc-llvm-ir/46601d8dbf23de070b4c016770c7cb0d37dfe2f3/logo.png


--------------------------------------------------------------------------------
/src/cicc.cpp:
--------------------------------------------------------------------------------
  1 | #include <llvm/IR/Constants.h>
  2 | #include <llvm/IR/Instructions.h>
  3 | #include <llvm/IR/LLVMContext.h>
  4 | #include <llvm/IR/Module.h>
  5 | #include <llvm/IR/PassManager.h>
  6 | #include <llvm/Bitcode/BitcodeReader.h>
  7 | #include <llvm/Bitcode/BitcodeWriter.h>
  8 | #include <llvm/Support/MemoryBuffer.h>
  9 | #include <llvm/Support/raw_ostream.h>
 10 | #include <llvm/Transforms/IPO/PassManagerBuilder.h>
 11 | #include <nvvm.h>
 12 | 
 13 | #include <dlfcn.h>
 14 | #include <filesystem>
 15 | #include <iostream>
 16 | #include <cstdio>
 17 | #include <list>
 18 | #include <string>
 19 | #include <sstream>
 20 | #include <vector>
 21 | 
 22 | using namespace llvm;
 23 | using namespace std;
 24 | 
 25 | namespace fs = std::filesystem;
 26 | 
 27 | #define LIBNVVM "libnvvm.so"
 28 | 
 29 | static void* libnvvm = NULL;
 30 | 
 31 | #define bind_lib(lib) \
 32 | if (!libnvvm) \
 33 | { \
 34 | 	libnvvm = dlopen(lib, RTLD_NOW | RTLD_GLOBAL); \
 35 | 	if (!libnvvm) \
 36 | 	{ \
 37 | 		fprintf(stderr, "Error loading %s: %s\n", lib, dlerror()); \
 38 | 		abort(); \
 39 | 	} \
 40 | }
 41 | 
 42 | #define bind_sym(handle, sym, retty, ...) \
 43 | typedef retty (*sym##_func_t)(__VA_ARGS__); \
 44 | static sym##_func_t sym##_real = NULL; \
 45 | if (!sym##_real) \
 46 | { \
 47 | 	sym##_real = (sym##_func_t)dlsym(handle, #sym); \
 48 | 	if (!sym##_real) \
 49 | 	{ \
 50 | 		fprintf(stderr, "Error loading %s: %s\n", #sym, dlerror()); \
 51 | 		abort(); \
 52 | 	} \
 53 | }
 54 | 
 55 | static Module* initial_module = NULL;
 56 | 
 57 | static bool called_compile = false;
 58 | 
 59 | typedef void (*RunModulePassFunc)(Module* m);
 60 | 
 61 | // Load the user-defined module pass from the specified shared library file.
 62 | static RunModulePassFunc* getModulePass(std::string filename)
 63 | {
 64 | 	// Module pass file must exist.
 65 | 	fs::path p(filename);
 66 | 	std::error_code ec; // For noexcept overload usage.
 67 | 	if (!fs::exists(p, ec) || !ec)
 68 | 		return nullptr;
 69 | 
 70 | 	// Module pass must be readable.
 71 | 	auto perms = fs::status(p, ec).permissions();
 72 | 	if ((perms & fs::perms::owner_read) == fs::perms::none ||
 73 | 		(perms & fs::perms::group_read) == fs::perms::none ||
 74 | 		(perms & fs::perms::others_read) == fs::perms::none)
 75 | 		return nullptr;
 76 | 
 77 | 	void* handle = dlopen(filename.c_str(), RTLD_NOW);
 78 | 	if (!handle) return nullptr;
 79 | 
 80 | 	RunModulePassFunc* runModulePass = (RunModulePassFunc*)dlsym(handle, "runModulePass");
 81 | 	if (!runModulePass) return nullptr;
 82 | 
 83 | 	return runModulePass;
 84 | }
 85 | 
 86 | nvvmResult nvvmAddModuleToProgram(nvvmProgram prog, const char *bitcode, size_t size, const char *name)
 87 | {
 88 | 	bind_lib(LIBNVVM);
 89 | 	bind_sym(libnvvm, nvvmAddModuleToProgram, nvvmResult, nvvmProgram, const char*, size_t, const char*);
 90 | 
 91 | 	// Load module from bitcode.
 92 | 	const char* filename = getenv("CICC_MODIFY_UNOPT_MODULE");
 93 | 	if (filename && !initial_module)
 94 | 	{
 95 | 		auto runModulePass = getModulePass(filename);
 96 | 		if (runModulePass)
 97 | 		{
 98 | 			string source = "";
 99 | 			source.reserve(size);
100 | 			source.assign(bitcode, bitcode + size);
101 | 			auto input = MemoryBuffer::getMemBuffer(source);
102 | 			LLVMContext context;
103 | 			auto m = parseBitcodeFile(input.get()->getMemBufferRef(), context);
104 | 			initial_module = m.get().get();
105 | 			if (!initial_module)
106 | 				cerr << "Error parsing module bitcode" << endl;
107 | 
108 | 			(*runModulePass)(initial_module);
109 | 
110 | 			// Save module back into bitcode.
111 | 			SmallVector<char, 128> output;
112 | 			raw_svector_ostream outputStream(output);
113 | 			WriteBitcodeToFile(*initial_module, outputStream);
114 | 
115 | 			// Call real nvvmAddModuleToProgram
116 | 			return nvvmAddModuleToProgram_real(prog, output.data(), output.size(), name);
117 | 	
118 | 		}
119 | 	}
120 | 
121 | 	called_compile = true;
122 | 
123 | 	// Call real nvvmAddModuleToProgram
124 | 	return nvvmAddModuleToProgram_real(prog, bitcode, size, name);	
125 | }
126 | 
127 | #undef bind_lib
128 | 
129 | #define LIBC "libc.so.6"
130 | 
131 | static void* libc = NULL;
132 | 
133 | #define bind_lib(lib) \
134 | if (!libc) \
135 | { \
136 | 	libc = dlopen(lib, RTLD_NOW | RTLD_GLOBAL); \
137 | 	if (!libc) \
138 | 	{ \
139 | 		fprintf(stderr, "Error loading %s: %s\n", lib, dlerror()); \
140 | 		abort(); \
141 | 	} \
142 | }
143 | 
144 | static Module* optimized_module = NULL;
145 | 
146 | struct tm *localtime(const time_t *timep)
147 | {
148 | 	static bool localtime_first_call = true;
149 | 
150 | 	bind_lib(LIBC);
151 | 	bind_sym(libc, localtime, struct tm*, const time_t*);
152 | 
153 | 	const char* filename = getenv("CICC_MODIFY_OPT_MODULE");
154 |        	if (filename && called_compile && localtime_first_call)
155 | 	{
156 | 		localtime_first_call = false;
157 | 
158 | 		auto runModulePass = getModulePass(filename);
159 | 		if (runModulePass)
160 | 			(*runModulePass)(optimized_module);
161 | 	}
162 | 	
163 | 	return localtime_real(timep);
164 | }
165 | 
166 | #include <unistd.h>
167 | 
168 | #define MAX_SBRKS 16
169 | 
170 | struct sbrk_t { void* address; size_t size; };
171 | static sbrk_t sbrks[MAX_SBRKS];
172 | static int nsbrks = 0;
173 | 
174 | static std::mutex mtx;
175 | 
176 | extern "C" void* malloc(size_t size)
177 | {
178 | 	if (!size) return NULL;
179 | 
180 | 	static bool __thread inside_malloc = false;
181 | 	
182 | 	if (!inside_malloc)
183 | 	{
184 | 		inside_malloc = true;
185 | 
186 | 		bind_lib(LIBC);
187 | 		bind_sym(libc, malloc, void*, size_t);
188 | 		
189 | 		inside_malloc = false;
190 | 
191 | 		void* result = malloc_real(size);
192 | 
193 | 		if (called_compile && !optimized_module)
194 | 		{
195 | 			if (size == sizeof(Module))
196 | 				optimized_module = (Module*)result;
197 | 		}
198 | 
199 | 		return result;
200 | 	}
201 | 
202 | 	void* result = sbrk(size);
203 | 	if (nsbrks == MAX_SBRKS)
204 | 	{
205 | 		fprintf(stderr, "Out of sbrk tracking pool space\n");
206 | 		mtx.unlock();
207 | 		abort();
208 | 	}
209 | 	mtx.lock();
210 | 	sbrk_t s; s.address = result; s.size = size;
211 | 	sbrks[nsbrks++] = s;
212 | 	mtx.unlock();
213 | 
214 | 	return result;
215 | }
216 | 
217 | extern "C" void* realloc(void* ptr, size_t size)
218 | {
219 | 	bind_lib(LIBC);
220 | 	bind_sym(libc, realloc, void*, void*, size_t);
221 | 	
222 | 	for (int i = 0; i < nsbrks; i++)
223 | 		if (ptr == sbrks[i].address)
224 | 		{
225 | 			void* result = malloc(size);
226 | #define MIN(a,b) (a) < (b) ? (a) : (b)
227 | 			memcpy(result, ptr, MIN(size, sbrks[i].size));
228 | 			return result;
229 | 		}
230 | 	
231 | 	return realloc_real(ptr, size);
232 | }
233 | 
234 | extern "C" void free(void* ptr)
235 | {
236 | 	bind_lib(LIBC);
237 | 	bind_sym(libc, free, void, void*);
238 | 
239 | 	mtx.lock();
240 | 	for (int i = 0; i < nsbrks; i++)
241 | 		if (ptr == sbrks[i].address) return;
242 | 	mtx.unlock();
243 | 	
244 | 	free_real(ptr);
245 | }
246 | 
247 | 


--------------------------------------------------------------------------------
/src/nvcc-llvm-ir.cpp:
--------------------------------------------------------------------------------
 1 | // This wrapper simply converts --nvcc-llvm-ir-unopt and --nvcc-llvm-ir-opt
 2 | // arguments into CICC_MODIFY_UNOPT_MODULE=1 and CICC_MODIFY_OPT_MODULE=1
 3 | // env vars, respectively. We have to do it this way, because CMake does not
 4 | // support prepending compilers with environment variables. 
 5 | 
 6 | #include <cstdio>
 7 | #include <cstdlib>
 8 | #include <dlfcn.h>
 9 | #include <iostream>
10 | #include <string>
11 | #include <sstream>
12 | 
13 | using namespace std;
14 | 
15 | static bool startsWith(string const &str, string const &start)
16 | {
17 | 	if (str.length() >= start.length())
18 | 		return (str.compare(0, start.length(), start) == 0);
19 | 	
20 | 	return false;
21 | }
22 | 
23 | static bool endsWith(string const &str, string const &ending)
24 | {
25 | 	if (str.length() >= ending.length())
26 | 		return (str.compare(str.length() - ending.length(), ending.length(), ending) == 0);
27 | 
28 | 	return false;
29 | }
30 | 
31 | int main(int argc, char** argv)
32 | {
33 | 	stringstream ss;
34 | 
35 | 	// Do not do anything further, if we are not running nvcc.
36 | 	string progname = argv[1];
37 | 	if (endsWith(progname, "nvcc"))
38 | 	{
39 | 		string unopt, opt;
40 | 		for (int i = 1; i < argc; i++)
41 | 		{
42 | 			string arg = argv[i];
43 | 			if (startsWith(arg, "--nvcc-llvm-ir-unopt="))
44 | 			{
45 | 				unopt = arg.substr(string("--nvcc-llvm-ir-unopt=").length());
46 | 				continue;
47 | 			}
48 | 			if (startsWith(arg, "--nvcc-llvm-ir-opt="))
49 | 			{
50 | 				opt = arg.substr(string("--nvcc-llvm-ir-opt=").length());
51 | 				continue;
52 | 			}
53 | 		}
54 | 
55 | 		// Cannot be both unopt and opt at the same time.
56 | 		if (unopt != "")
57 | 			ss << "CICC_MODIFY_UNOPT_MODULE=" << unopt << " ";
58 | 		else if (opt != "")
59 | 			ss << "CICC_MODIFY_OPT_MODULE=" << opt << " ";
60 | 
61 | 		ss << "LD_PRELOAD=" << LIBNVCC << " " << argv[1];
62 | 	}
63 | 
64 | 	for (int i = 2; i < argc; i++)
65 | 	{
66 | 		string arg = argv[i];
67 | 		if (startsWith(arg, "--nvcc-llvm-ir-unopt="))
68 | 			continue;
69 | 		if (startsWith(arg, "--nvcc-llvm-ir-opt="))
70 | 			continue;
71 | 
72 | 		ss << " " << arg;
73 | 	}
74 | 
75 | 	string cmd = ss.str();
76 | 	cout << cmd << endl;
77 | 	return system(cmd.c_str());
78 | }
79 | 
80 | 


--------------------------------------------------------------------------------
/src/nvcc.cpp:
--------------------------------------------------------------------------------
 1 | // This wrapper monitors the nvcc driver program. If the driver program
 2 | // executes cicc, then we prepend its execution with our preloaded library.
 3 | 
 4 | #include <cstdio>
 5 | #include <cstdlib>
 6 | #include <dlfcn.h>
 7 | #include <iostream>
 8 | #include <string>
 9 | #include <sstream>
10 | 
11 | using namespace std;
12 | 
13 | static bool endsWith(string const &str, string const &ending)
14 | {
15 | 	if (str.length() >= ending.length())
16 | 		return (str.compare(str.length() - ending.length(), ending.length(), ending) == 0);
17 | 
18 | 	return false;
19 | }
20 | 
21 | __attribute__((constructor)) static void activate(int argc, char** argv)
22 | {
23 | 	// Do not do anything, if we are not running cicc.
24 | 	string progname = argv[0];
25 | 	if (!endsWith(progname, "cicc"))
26 | 		return;
27 | 
28 | 	stringstream ss;
29 | 	ss << "LD_PRELOAD=" << LIBCICC << " " << argv[0];
30 | 	for (int i = 1; i < argc; i++)
31 | 	{
32 | 		string arg = argv[i];
33 | 		ss << " " << arg;
34 | 	}
35 | 
36 | 	string cmd = ss.str();
37 | 	int result = system(cmd.c_str());
38 | 	exit(result);
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/src/pass1.cpp:
--------------------------------------------------------------------------------
 1 | #include <llvm/IR/Constants.h>
 2 | #include <llvm/IR/Instructions.h>
 3 | #include <llvm/IR/LLVMContext.h>
 4 | #include <llvm/IR/Module.h>
 5 | #include <llvm/IR/PassManager.h>
 6 | #include <llvm/Bitcode/BitcodeReader.h>
 7 | #include <llvm/Bitcode/BitcodeWriter.h>
 8 | #include <llvm/Support/MemoryBuffer.h>
 9 | #include <llvm/Support/raw_ostream.h>
10 | #include <llvm/Transforms/IPO/PassManagerBuilder.h>
11 | #include <nvvm.h>
12 | 
13 | #include <iostream>
14 | #include <cstdio>
15 | #include <list>
16 | #include <string>
17 | #include <sstream>
18 | #include <vector>
19 | 
20 | using namespace llvm;
21 | using namespace std;
22 | 
23 | // Add suffix to function name, for example.
24 | extern "C" void runModulePass(Module* module)
25 | {
26 | 	if (!module) return;
27 | 
28 | 	for (Module::iterator i = module->begin(), e = module->end(); i != e; i++)
29 | 		if (!i->isIntrinsic())
30 | 			i->setName(i->getName() + "_modified");
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/src/pass2.cpp:
--------------------------------------------------------------------------------
  1 | #include <llvm/IR/Constants.h>
  2 | #include <llvm/IR/Instructions.h>
  3 | #include <llvm/IR/LLVMContext.h>
  4 | #include <llvm/IR/Module.h>
  5 | #include <llvm/IR/PassManager.h>
  6 | #include <llvm/Bitcode/BitcodeReader.h>
  7 | #include <llvm/Bitcode/BitcodeWriter.h>
  8 | #include <llvm/Support/MemoryBuffer.h>
  9 | #include <llvm/Support/raw_ostream.h>
 10 | #include <llvm/Transforms/IPO/PassManagerBuilder.h>
 11 | #include <nvvm.h>
 12 | 
 13 | #include <iostream>
 14 | #include <cstdio>
 15 | #include <list>
 16 | #include <string>
 17 | #include <sstream>
 18 | #include <vector>
 19 | 
 20 | using namespace llvm;
 21 | using namespace std;
 22 | 
 23 | // Starting form the specified block, follow all braches of parallel region,
 24 | // marking target blocks as parallel. Continue until returning back into
 25 | // block marked as parallel, or until the end_parallel_region call is approached.
 26 | // However, the only valid stopping condition is end_parallel_region, which
 27 | // is indicated by "true" return value.
 28 | static bool followParallelBasicBlock(BasicBlock* bb, list<BasicBlock*>& pbl, int nparallel)
 29 | {
 30 | 	bool result = false;
 31 | 
 32 | 	for (BasicBlock::iterator ii = bb->begin(), ie = bb->end(); ii != ie; ii++)
 33 | 	{
 34 | 		CallInst* ci = dyn_cast<CallInst>(cast<Value>(ii));
 35 | 		if (ci)
 36 | 		{
 37 | 			Function *callee = dyn_cast<Function>(
 38 | 				ci->getCalledValue()->stripPointerCasts());
 39 | 			if (!callee) continue;
 40 | 			if (callee->getName() == "begin_parallel_region")
 41 | 			{
 42 | 				fprintf(stderr, "nvcc-llvm-ir: nested parallel regions are not supported\n");
 43 | 				exit(1);
 44 | 			}
 45 | 			if (callee->getName() == "end_parallel_region")
 46 | 			{
 47 | 				// Move CallInst and all insts below CallInst to a new block.
 48 | 				BasicBlock *nb1 = NULL;
 49 | 				{
 50 | 					BasicBlock::iterator SplitIt = ii;
 51 | 					while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
 52 | 						SplitIt++;
 53 | 					stringstream name;
 54 | 					name << ".end_parallel_" << nparallel;
 55 | 					nb1 = bb->splitBasicBlock(SplitIt, bb->getName() + name.str());
 56 | 				}
 57 | 
 58 | 				// Nuke end_parallel_region call.
 59 | 				nb1->begin()->eraseFromParent();
 60 | 				
 61 | 				// The end of parallel region has been found - leave now.
 62 | 				return true;
 63 | 			}
 64 | 		}
 65 | 		
 66 | 		// Follow successors in BranchInst, SwitchInst and IndirectBranchInst.
 67 | 		// Skip blocks that are already known to belong to parallel region.
 68 | 		BranchInst* bi = dyn_cast<BranchInst>(cast<Value>(ii));
 69 | 		if (bi)
 70 | 		{
 71 | 			for (int i = 0, e = bi->getNumSuccessors(); i != e; i++)
 72 | 			{
 73 | 				BasicBlock* succ = bi->getSuccessor(i);
 74 | 				if (find(pbl.begin(), pbl.end(), succ) != pbl.end()) continue;
 75 | 				pbl.push_back(succ);
 76 | 				result |= followParallelBasicBlock(succ, pbl, nparallel);
 77 | 			}
 78 | 		}
 79 | 		SwitchInst* si = dyn_cast<SwitchInst>(cast<Value>(ii));
 80 | 		if (si)
 81 | 		{
 82 | 			for (int i = 0, e = si->getNumSuccessors(); i != e; i++)
 83 | 			{
 84 | 				BasicBlock* succ = si->getSuccessor(i);
 85 | 				if (find(pbl.begin(), pbl.end(), succ) != pbl.end()) continue;
 86 | 				pbl.push_back(succ);
 87 | 				result |= followParallelBasicBlock(succ, pbl, nparallel);
 88 | 			}			
 89 | 		}
 90 | 		IndirectBrInst* ibi = dyn_cast<IndirectBrInst>(cast<Value>(ii));
 91 | 		if (ibi)
 92 | 		{
 93 | 			for (int i = 0, e = ibi->getNumSuccessors(); i != e; i++)
 94 | 			{
 95 | 				BasicBlock* succ = ibi->getSuccessor(i);
 96 | 				if (find(pbl.begin(), pbl.end(), succ) != pbl.end()) continue;
 97 | 				pbl.push_back(succ);
 98 | 				result |= followParallelBasicBlock(succ, pbl, nparallel);
 99 | 			}
100 | 		}
101 | 	}
102 | 	
103 | 	return result;
104 | }
105 | 
106 | // Mark basic blocks that belong to parallel regions.
107 | static void markParallelBasicBlocks(Module* module, vector<BasicBlock*>& parallelBlocks)
108 | {
109 | 	Function* begin_parallel_region = module->getFunction("begin_parallel_region");
110 | 	Function* end_parallel_region = module->getFunction("end_parallel_region");
111 | 	
112 | 	// If parallel region guards are not declared, then they are not used
113 | 	// anywhere => no parallel regions, nothing to do, leave early.
114 | 	if (!begin_parallel_region && !end_parallel_region)
115 | 		return;
116 | 	
117 | 	if (!begin_parallel_region)
118 | 	{
119 | 		fprintf(stderr, "nvcc-llvm-ir: unmatched end_parallel_region found\n");
120 | 		exit(1);
121 | 	}
122 | 	if (!end_parallel_region)
123 | 	{
124 | 		fprintf(stderr, "nvcc-llvm-ir: unmatched begin_parallel_region found\n");
125 | 		exit(1);
126 | 	}
127 | 
128 | 	list<BasicBlock*> pbl;
129 | 
130 | 	// 1) Split basic blocks at calls to begin_/end_no_predicate_region.
131 | 	// 2) Mark basic blocks that belong to loop regions.
132 | 	for (Module::iterator fi = module->begin(), fe = module->end(); fi != fe; fi++)
133 | 	{
134 | 		int nparallel = 0;
135 | 		BasicBlock* restart = NULL;
136 | 		do
137 | 		{
138 | 			for (Function::iterator bi = fi->begin(), be = fi->end(); bi != be; bi++)
139 | 			{
140 | 				BasicBlock* b = &*bi;
141 | 				if (restart && (b != restart)) continue;
142 | 
143 | 				restart = NULL;
144 | 				
145 | 				// Skip blocks that are already known to belong to parallel region.
146 | 				if (find(pbl.begin(), pbl.end(), b) != pbl.end()) continue;				
147 | 		
148 | 				for (BasicBlock::iterator ii = b->begin(), ie = b->end(); ii != ie; ii++)
149 | 				{
150 | 					CallInst* ci = dyn_cast<CallInst>(cast<Value>(ii));
151 | 					if (!ci) continue;
152 | 					Function *callee = dyn_cast<Function>(
153 | 						ci->getCalledValue()->stripPointerCasts());
154 | 					if (!callee) continue;
155 | 					if (callee->getName() != "begin_parallel_region")
156 | 						continue;
157 | 					
158 | 					// Move CallInst and all insts below CallInst to a new block.
159 | 					BasicBlock *nb1 = NULL;
160 | 					{
161 | 						BasicBlock::iterator SplitIt = ii;
162 | 						while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
163 | 							SplitIt++;
164 | 						stringstream name;
165 | 						name << ".begin_parallel_" << nparallel;
166 | 						nb1 = bi->splitBasicBlock(SplitIt, b->getName() + name.str());
167 | 					}
168 | 				
169 | 					// Nuke begin_parallel_region call.
170 | 					nb1->begin()->eraseFromParent();
171 | 					
172 | 					// Add nb1 to the list of parallel blocks.
173 | 					pbl.push_back(nb1);
174 | 					
175 | 					// Starting form nb1, follow all braches of parallel region, marking target
176 | 					// blocks as parallel. Continue until returning back into block marked as
177 | 					// parallel, or until the end_parallel_region call is approached.
178 | 					// However, the only valid stopping condition is end_parallel_region, which
179 | 					// is indicated by "true" return value.
180 | 					if (!followParallelBasicBlock(nb1, pbl, nparallel))
181 | 					{
182 | 						fprintf(stderr, "nvcc-llvm-ir: unmatched begin_parallel_region found\n");
183 | 						exit(1);
184 | 					}
185 | 					
186 | 					nparallel++;
187 | 
188 | 					// Continue iterating basic blocks from nb2.
189 | 					restart = nb1;
190 | 					break;
191 | 				}
192 | 			
193 | 				if (restart) break;
194 | 			}
195 | 		}
196 | 		while (restart);
197 | 	}
198 | 	
199 | 	// Export parallel blocks list into vector.
200 | 	parallelBlocks.reserve(pbl.size());
201 | 	parallelBlocks.assign(pbl.begin(), pbl.end());
202 | 
203 | 	// Remove parallel region marks declarations.
204 | 	begin_parallel_region->eraseFromParent();
205 | 	end_parallel_region->eraseFromParent();
206 | }
207 | 
208 | // Perform store instructions in threadIdx.x = 0 only.
209 | static void storeInZeroThreadOnly(Module* module, vector<BasicBlock*>& parallelBlocks)
210 | {
211 | 	if (!module) return;
212 | 
213 | 	Type* int32Ty = Type::getInt32Ty(module->getContext());
214 | 	Value* zero = ConstantInt::get(int32Ty, 0);
215 | 
216 | 	const char* threadIdxName = "llvm.nvvm.read.ptx.sreg.tid.x";
217 | 	Function* threadIdx = module->getFunction(threadIdxName);
218 | 	if (!threadIdx)
219 | 	{
220 | 		FunctionType* ft = FunctionType::get(int32Ty, std::vector<Type*>(), false);
221 | 		threadIdx = Function::Create(ft, Function::ExternalLinkage, threadIdxName, module);
222 | 	}
223 | 
224 | 	for (Module::iterator fi = module->begin(), fe = module->end(); fi != fe; fi++)
225 | 	{
226 | 		int nsplits = 0;
227 | 		BasicBlock* restart = NULL;
228 | 		do
229 | 		{
230 | 			for (Function::iterator bi = fi->begin(), be = fi->end(); bi != be; bi++)
231 | 			{
232 | 				BasicBlock* b = &*bi;
233 | 				if (restart && (b != restart)) continue;
234 | 
235 | 				restart = NULL;
236 | 				
237 | 				// Skip basic blocks belonging to parallel regions.
238 | 				if (find(parallelBlocks.begin(), parallelBlocks.end(), b) != parallelBlocks.end())
239 | 					continue;
240 | 		
241 | 				for (BasicBlock::iterator ii = b->begin(), ie = b->end(); ii != ie; ii++)
242 | 				{
243 | 					StoreInst* si = dyn_cast<StoreInst>(cast<Value>(ii));
244 | 					if (!si) continue;
245 | 			
246 | 					// Move StoreInst and all insts below StoreInst to a new block.
247 | 					BasicBlock *nb1 = NULL;
248 | 					{
249 | 						BasicBlock::iterator SplitIt = ii;
250 | 						while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
251 | 							SplitIt++;
252 | 						stringstream name;
253 | 						name << ".store_" << nsplits;
254 | 						nb1 = bi->splitBasicBlock(SplitIt, b->getName() + name.str());
255 | 					}
256 | 				
257 | 					BasicBlock::iterator nii1 = nb1->begin();
258 | 					nii1++;
259 | 
260 | 					// Move all insts below StoreInst to a new block.
261 | 					BasicBlock *nb2 = NULL;
262 | 					{
263 | 						BasicBlock::iterator SplitIt = nii1;
264 | 						while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
265 | 							SplitIt++;				
266 | 						stringstream name;
267 | 						name << ".else_" << nsplits;
268 | 						nb2 = nb1->splitBasicBlock(SplitIt, b->getName() + name.str());
269 | 					}
270 | 
271 | 					// Call intrinsic to retrieve threadIdx value.
272 | 					Value* tid = CallInst::Create(threadIdx, "", b->getTerminator());
273 | 				
274 | 					// Check if threadIdx is equal to zero. 
275 | 					Value* cond = new ICmpInst(b->getTerminator(),
276 | 						ICmpInst::ICMP_EQ, tid, zero, "");
277 | 
278 | 					// Nuke the old uncond branch.
279 | 					b->getTerminator()->eraseFromParent();
280 | 
281 | 					// Conditionaly branch to nb1 or nb2, depending on threadIdx.
282 | 					BranchInst* bi = BranchInst::Create(nb1, nb2, cond, b);
283 | 
284 | 					nsplits++;
285 | 				
286 | 					// Continue iterating basic blocks from nb2.
287 | 					restart = nb2;
288 | 					break;
289 | 				}
290 | 			
291 | 				if (restart) break;
292 | 			}
293 | 		}
294 | 		while (restart);
295 | 	}
296 | }
297 | 
298 | extern "C" void runModulePass(Module* module)
299 | {
300 | 	vector<BasicBlock*> parallelBlocks;
301 | 
302 | 	markParallelBasicBlocks(module, parallelBlocks);
303 | 
304 | 	// Perform store instructions in threadIdx.x = 0 only.
305 | 	storeInZeroThreadOnly(module, parallelBlocks);
306 | #if 0
307 | 	// Rerunning -O3 optimization after our modifications.
308 | 	PassManager manager;
309 | 	PassManagerBuilder builder;
310 | 	builder.Inliner = 0;
311 | 	builder.OptLevel = 3;
312 | 	builder.SizeLevel = 3;
313 | 	builder.DisableUnrollLoops = true;
314 | 	builder.populateModulePassManager(manager);
315 | 	manager.run(*module);
316 | 
317 | 	outs() << *module << "\n";
318 | #endif
319 | }
320 | 
321 | 


--------------------------------------------------------------------------------
/src/test1.cu:
--------------------------------------------------------------------------------
1 | extern "C" __device__ void kernel(int* result) { *result = 1; }
2 | 
3 | int main() { return 0; }
4 | 
5 | 


--------------------------------------------------------------------------------
/src/test2.cu:
--------------------------------------------------------------------------------
 1 | extern "C" __device__ void begin_parallel_region();
 2 | extern "C" __device__ void end_parallel_region();
 3 | 
 4 | extern "C" __device__ void kernel(int n, int* inputs, int* outputs)
 5 | {
 6 | 	outputs[0] = 0;
 7 | 
 8 | 	begin_parallel_region();
 9 | 	for (int i = 1; i < n - 1; i++)
10 | 		if (inputs[i] < 2)
11 | 			outputs[i] = inputs[i] + i;
12 | 		else
13 | 			outputs[i] = inputs[i];
14 | 	end_parallel_region();
15 | 
16 | 	outputs[n - 1] = n - 1;
17 | }
18 | 
19 | int main() { return 0; }
20 | 
21 | 


--------------------------------------------------------------------------------