├── doc ├── CMakeLists.txt ├── slides │ └── ANL_Summer_Status_Update.pdf └── readme │ ├── readme_is_up-to-date.test │ ├── update_doc.sh │ ├── CMakeLists.txt │ └── simple_at.cpp ├── tests ├── benchmark.test ├── compare.sh ├── meta │ ├── bad_signature_a.cpp │ ├── bad_signature_c.cpp │ ├── bad_signature_b.cpp │ ├── bad_signature_f.cpp │ ├── bad_signature_d.cpp │ ├── bad_signature_e.cpp │ ├── bad_signature_g.cpp │ ├── type_list+func_traits.cpp │ └── new_func_traits.cpp ├── doc │ └── lit.cfg.in ├── simple │ ├── int_a.cpp │ ├── static_var_a.cpp │ ├── long_a.cpp │ ├── nortti_a.cpp │ ├── ptr_a.cpp │ ├── static_var_b.cpp │ ├── float_a.cpp │ ├── double_a.cpp │ ├── opt_level.cpp │ ├── compose_bad.cpp │ ├── small_struct.cpp │ ├── thread.cpp │ ├── cache.cpp │ ├── array_bad.cpp │ ├── multi_file+regexp.cpp │ ├── exception_a.cpp │ ├── serialize.cpp │ ├── int_ptr_a.cpp │ ├── unroll.cpp │ ├── serialize_static.cpp │ ├── devirtualization_nohint.cpp │ ├── devirtualization.cpp │ ├── array.cpp │ ├── fun_ptr_a.cpp │ ├── fun_ptr_b.cpp │ ├── fun_ptr_d.cpp │ ├── serialize_multifile.cpp │ ├── fun_ptr_f.cpp │ ├── fun_ptr_c.cpp │ ├── fun_ptr_e.cpp │ ├── compose_ref.cpp │ ├── custom_key_cache.cpp │ └── compose_ptr.cpp ├── install │ ├── CMakeLists.txt │ └── test.cpp.disabled ├── tuner │ ├── stats.cpp.disabled │ ├── intrange_2.cpp │ ├── parallel_compile.cpp │ ├── tile.cpp │ ├── intrange_1.cpp │ ├── lifetime.cpp │ └── sq_matmul.cpp ├── perf │ └── fannkuchredux.cpp └── lit.cfg.in ├── xgboost ├── .gitignore └── get.sh ├── runtime ├── tuner │ ├── Statics.cpp │ ├── KnobSet.cpp │ ├── Util.cpp │ ├── Knob.cpp │ ├── AnalyzingTuner.cpp │ └── Feedback.cpp ├── pass │ ├── ContextAnalysis.cpp │ └── DevirtualizeConstant.cpp ├── InitNativeTarget.cpp ├── Context.cpp ├── Utils.cpp ├── CMakeLists.txt ├── BitcodeTracker.cpp └── Function.cpp ├── include ├── easy │ ├── runtime │ │ ├── LLVMHolder.h │ │ ├── Compat.h │ │ ├── Utils.h │ │ ├── LLVMHolderImpl.h │ │ ├── RuntimePasses.h │ │ ├── Function.h │ │ └── BitcodeTracker.h │ ├── attributes.h │ ├── exceptions.h │ ├── options.h │ ├── code_cache.h │ ├── jit.h │ ├── function_wrapper.h │ └── param.h ├── CMakeLists.txt └── tuner │ ├── AnalyzingTuner.h │ ├── JSON.h │ ├── param.h │ ├── RandomTuner.h │ ├── KnobSet.h │ ├── Util.h │ ├── KnobConfig.h │ ├── Knob.h │ ├── CodegenOptions.h │ ├── optimizer.h │ ├── AnnealingTuner.h │ ├── MDUtils.h │ └── LoopKnob.h ├── misc ├── docker │ ├── build_docker.sh │ └── GenDockerfile.py ├── doc │ ├── python.py │ ├── include.py │ └── generate.py └── atjitc.in ├── cmake ├── CMakeLists.txt ├── Polly.cmake ├── LLVM.cmake ├── Valgrind.cmake ├── EasyJitConfig.cmake.in ├── GCD.cmake ├── FindBenchmark.cmake ├── Python.cmake └── XGBoost.cmake ├── .gitignore ├── pass ├── StaticPasses.h ├── RegisterPasses.cpp ├── CMakeLists.txt ├── MayAliasTracer.h ├── LoopNamer.cpp └── MayAliasTracer.cpp ├── benchmark ├── setup.sh ├── benchmark.cpp ├── CMakeLists.txt ├── convolve.h ├── spectralnorm.h └── qsort.h ├── CMakeTests.txt ├── get-llvm.sh ├── TODO.md ├── LICENSE ├── get-llvm-with-polly.sh ├── .gitlab-ci.yml └── CMakeLists.txt /doc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(readme) 2 | -------------------------------------------------------------------------------- /tests/benchmark.test: -------------------------------------------------------------------------------- 1 | REQUIRES: benchmark 2 | RUN: %bin/atjit-benchmark-O0 --benchmark_repetitions=1 3 | -------------------------------------------------------------------------------- /xgboost/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything 2 | * 3 | 4 | # But not these files... 5 | !.gitignore 6 | !get.sh 7 | -------------------------------------------------------------------------------- /doc/slides/ANL_Summer_Status_Update.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kavon/atJIT/HEAD/doc/slides/ANL_Summer_Status_Update.pdf -------------------------------------------------------------------------------- /runtime/tuner/Statics.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | namespace tuner { 6 | std::atomic KnobTicker = 1; 7 | } 8 | -------------------------------------------------------------------------------- /include/easy/runtime/LLVMHolder.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | namespace easy { 3 | class LLVMHolder { 4 | public: 5 | virtual ~LLVMHolder() = default; 6 | }; 7 | } 8 | -------------------------------------------------------------------------------- /doc/readme/readme_is_up-to-date.test: -------------------------------------------------------------------------------- 1 | // RUN: cd %S 2 | // RUN: bash update_doc.sh > %t.new 3 | // RUN: cp %S/../../README.md %t.old 4 | // RUN: diff %t.new %t.old 5 | -------------------------------------------------------------------------------- /misc/docker/build_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python3 GenDockerfile.py ../../.travis.yml > Dockerfile.easy && 3 | docker build -t easy/test -f Dockerfile.easy . 4 | -------------------------------------------------------------------------------- /include/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | install(DIRECTORY easy 2 | DESTINATION include FILES_MATCHING PATTERN "*.h") 3 | install(DIRECTORY tuner 4 | DESTINATION include FILES_MATCHING PATTERN "*.h") 5 | -------------------------------------------------------------------------------- /misc/doc/python.py: -------------------------------------------------------------------------------- 1 | import generate 2 | 3 | def on_python(python_code): 4 | exec (python_code) in {'__builtins__':{}}, {} 5 | return 6 | 7 | generate.match_and_expand("python", on_python) 8 | -------------------------------------------------------------------------------- /cmake/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/EasyJitConfig.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/EasyJitConfig.cmake @ONLY) 2 | install(FILES ${CMAKE_CURRENT_BINARY_DIR}/EasyJitConfig.cmake 3 | DESTINATION lib/cmake) 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.ll 2 | *.ninja_deps 3 | *.ninja_log 4 | */CMakeCache.txt 5 | */CMakeFiles/ 6 | */bin/ 7 | */build.ninja 8 | */cmake_install.cmake 9 | */rules.ninja 10 | */.ninja_log 11 | /build/ 12 | /install/ 13 | /playground/ 14 | __pycache__/ 15 | *.py[cod] 16 | /llvm/ 17 | -------------------------------------------------------------------------------- /pass/StaticPasses.h: -------------------------------------------------------------------------------- 1 | #ifndef STATIC_PASSES 2 | #define STATIC_PASSES 3 | 4 | #include 5 | 6 | namespace easy { 7 | llvm::Pass* createRegisterBitcodePass(); 8 | } 9 | 10 | namespace tuner { 11 | llvm::Pass* createLoopNamerPass(); 12 | } 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /cmake/Polly.cmake: -------------------------------------------------------------------------------- 1 | ################# 2 | # find Polly componenents within LLVM 3 | 4 | set(Polly_DIR "${LLVM_ROOT}/lib/cmake/polly") 5 | 6 | find_package(Polly REQUIRED CONFIG) 7 | 8 | message(STATUS "Using PollyConfig.cmake in ${Polly_DIR}") 9 | 10 | # look in PollyConfig.cmake for more info about what's exported 11 | 12 | #################### 13 | -------------------------------------------------------------------------------- /doc/readme/update_doc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # dir of this script 4 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 5 | pushd $DIR >> /dev/null 6 | 7 | easy_jit_dir=../../ 8 | export PYTHONIOENCODING="utf-8" 9 | 10 | cat README.md.in | \ 11 | python3 ${easy_jit_dir}/misc/doc/python.py | \ 12 | python3 ${easy_jit_dir}/misc/doc/include.py 13 | -------------------------------------------------------------------------------- /tests/compare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # expected input: POSIX standard time output (-p option) 4 | # crashes if $1 is not less than $2 5 | 6 | PAT='s/real //' 7 | 8 | A=`head -n1 $1 | sed "$PAT"` 9 | B=`head -n1 $2 | sed "$PAT"` 10 | 11 | if [[ `echo "$A >= $B" | bc` -eq 1 ]]; then 12 | echo "$A >= $B" 13 | exit 1 14 | fi 15 | 16 | echo "$A < $B" 17 | -------------------------------------------------------------------------------- /cmake/LLVM.cmake: -------------------------------------------------------------------------------- 1 | 2 | # The runtime system uses RTTI, and I'm too lazy 3 | # to convert to LLVM's RTTI system. 4 | set(LLVM_ENABLE_RTTI "ON" CACHE BOOL "") 5 | 6 | # we use things like Filecheck in our test suite. 7 | set(LLVM_INSTALL_UTILS "ON" CACHE BOOL "") 8 | 9 | # see https://github.com/kavon/atJIT/issues/1 10 | set(LLVM_LINK_LLVM_DYLIB "ON" CACHE BOOL "") 11 | -------------------------------------------------------------------------------- /include/easy/runtime/Compat.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // handle some minor API differences 4 | #if LLVM_VERSION_MAJOR == 6 5 | 6 | #define LLVM_DEBUG DEBUG 7 | #define PASS_MODULE_ARG(M) &(M) 8 | 9 | #elif LLVM_VERSION_MAJOR == 8 10 | 11 | #define PASS_MODULE_ARG(M) (M) 12 | 13 | #else 14 | 15 | #warning "Compatibility with this version of LLVM is unknown!" 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /runtime/pass/ContextAnalysis.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | using namespace llvm; 4 | using namespace easy; 5 | 6 | char easy::ContextAnalysis::ID = 0; 7 | 8 | llvm::Pass* easy::createContextAnalysisPass(std::shared_ptr C) { 9 | return new ContextAnalysis(std::move(C)); 10 | } 11 | 12 | static RegisterPass X("", "", true, true); 13 | -------------------------------------------------------------------------------- /runtime/tuner/KnobSet.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | namespace tuner { 5 | 6 | 7 | void applyToKnobs(KnobSetAppFn &F, KnobSet const &KS) { 8 | for (auto V : KS.IntKnobs) F(V); 9 | for (auto V : KS.LoopKnobs) F(V); 10 | } 11 | 12 | void applyToKnobs(KnobIDAppFn &F, KnobSet const &KS) { 13 | for (auto V : KS.IntKnobs) F(V.first); 14 | for (auto V : KS.LoopKnobs) F(V.first); 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /misc/atjitc.in: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | ${CLANGXX_BIN} -Xclang -disable-O0-optnone --std=c++${CMAKE_CXX_STANDARD} \ 7 | -Xclang -load -Xclang ${CMAKE_INSTALL_PREFIX}/lib/EasyJitPass.so \ 8 | -I${LLVM_INCLUDE_DIRS} \ 9 | -I${CMAKE_INSTALL_PREFIX}/include \ 10 | -Wl,-rpath,'${CMAKE_INSTALL_PREFIX}/lib' \ 11 | -L${CMAKE_INSTALL_PREFIX}/lib -lATJitRuntime \ 12 | ${LIBLLVM_LINK_FLAGS} \ 13 | "$@" 14 | -------------------------------------------------------------------------------- /tests/meta/bad_signature_a.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %not %atjitc -O2 %s -o %t 2> %t.log 2 | // RUN: %FileCheck %s < %t.log 3 | 4 | #include 5 | #include 6 | 7 | // CHECK: An easy::jit option is expected 8 | 9 | using namespace std::placeholders; 10 | 11 | int foo(int) { 12 | return 0; 13 | } 14 | 15 | int main(int, char** argv) { 16 | 17 | auto foo_ = easy::jit(foo, 1, 2); 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /tests/meta/bad_signature_c.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %not %atjitc -O2 %s -o %t 2> %t.log 2 | // RUN: %FileCheck %s < %t.log 3 | 4 | #include 5 | #include 6 | 7 | // CHECK: Invalid bind, placeholder cannot be bound to a formal argument 8 | 9 | using namespace std::placeholders; 10 | 11 | int foo(float) { 12 | return 0; 13 | } 14 | 15 | int main(int, char** argv) { 16 | 17 | auto foo_ = easy::jit(foo, _2); 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /include/easy/attributes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define CI_SECTION "compiler-interface" 4 | #define JIT_SECTION "easy-jit" 5 | 6 | // mark functions in the easy::jit interface as no inline. 7 | // it's easier for the pass to find the original functions to be jitted. 8 | #define EASY_JIT_COMPILER_INTERFACE \ 9 | __attribute__((noinline)) __attribute__((section(CI_SECTION))) 10 | 11 | #define EASY_JIT_EXPOSE \ 12 | __attribute__((section(JIT_SECTION))) 13 | -------------------------------------------------------------------------------- /tests/doc/lit.cfg.in: -------------------------------------------------------------------------------- 1 | import lit.formats 2 | import lit.util 3 | 4 | from subprocess import call 5 | 6 | lit_config.load_config(config, os.path.join("@CMAKE_CURRENT_BINARY_DIR@", "./tests/lit.cfg")) 7 | config.test_source_root = "@CMAKE_CURRENT_SOURCE_DIR@/doc" 8 | config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@/tests/doc" 9 | 10 | # for the documentation example 11 | if "@ATJIT_EXAMPLE@" in ["1", "ON"]: 12 | config.available_features.add('example') 13 | -------------------------------------------------------------------------------- /tests/meta/bad_signature_b.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %not %atjitc -O2 %s -o %t 2> %t.log 2 | // RUN: %FileCheck %s < %t.log 3 | 4 | #include 5 | #include 6 | 7 | using namespace std::placeholders; 8 | 9 | int foo(int, int, int) { 10 | return 0; 11 | } 12 | 13 | int main(int, char** argv) { 14 | 15 | auto foo_ = easy::jit(foo, 1, 2); 16 | foo_(); // CHECK: easy::jit: not providing enough argument to actual call 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /benchmark/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # assumes you're using "build" and "install" directories. 4 | # run relative to your atJIT build dir. 5 | 6 | git clone --depth=1 https://github.com/google/benchmark.git 7 | git clone --depth=1 https://github.com/google/googletest.git benchmark/googletest 8 | mkdir benchmark/build 9 | pushd benchmark/build 10 | cmake .. -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=`pwd`/../install 11 | make install -j 4 12 | popd 13 | -------------------------------------------------------------------------------- /cmake/Valgrind.cmake: -------------------------------------------------------------------------------- 1 | ################# 2 | # find valgrind executable 3 | 4 | # defines the $VALGRIND_EXE symbol 5 | 6 | function (requireFound SYMB MSG) 7 | if(NOT ${SYMB}) 8 | message(FATAL_ERROR ${MSG}) 9 | endif() 10 | message(STATUS "Found ${SYMB}: ${${SYMB}}") 11 | endfunction(requireFound) 12 | 13 | # search 14 | find_program(VALGRIND_EXE valgrind) 15 | 16 | # check 17 | requireFound(VALGRIND_EXE "Valgrind executable not found") 18 | 19 | #################### 20 | -------------------------------------------------------------------------------- /tests/meta/bad_signature_f.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %not %atjitc -O2 %s -o %t 2> %t.log 2 | // RUN: %FileCheck %s < %t.log 3 | 4 | #include 5 | #include 6 | 7 | // CHECK: An easy::jit option is expected 8 | 9 | using namespace std::placeholders; 10 | using namespace tuned_param; 11 | 12 | int foo(int) { 13 | return 0; 14 | } 15 | 16 | int main(int, char** argv) { 17 | 18 | auto foo_ = easy::jit(foo, IntRange(-5, 5), IntRange(-5, 5)); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /include/easy/runtime/Utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace llvm { 7 | class LLVMContext; 8 | class Module; 9 | class Function; 10 | } 11 | 12 | namespace easy { 13 | 14 | std::string GetEntryFunctionName(llvm::Module const &M); 15 | void MarkAsEntry(llvm::Function &F); 16 | void UnmarkEntry(llvm::Module &M); 17 | 18 | std::unique_ptr 19 | CloneModuleWithContext(llvm::Module const &LM, llvm::LLVMContext &C); 20 | 21 | } 22 | -------------------------------------------------------------------------------- /tests/meta/bad_signature_d.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %not %atjitc -O2 %s -o %t 2> %t.log 2 | // RUN: %FileCheck %s < %t.log 3 | 4 | #include 5 | #include 6 | 7 | // CHECK: atJIT tunable parameter's underlying type is mismatched 8 | 9 | using namespace std::placeholders; 10 | using namespace tuned_param; 11 | 12 | int foo(float) { 13 | return 0; 14 | } 15 | 16 | int main(int, char** argv) { 17 | 18 | auto foo_ = easy::jit(foo, IntRange(-5, 5)); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /xgboost/get.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | # from: https://stackoverflow.com/questions/59895/getting-the-source-directory-of-a-bash-script-from-within 6 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 7 | 8 | ROOT="root" 9 | 10 | pushd $DIR 11 | 12 | rm -rf $ROOT 13 | mkdir $ROOT 14 | 15 | git clone -b 'v0.72' --single-branch --depth 1 --recursive https://github.com/dmlc/xgboost.git $ROOT 16 | 17 | pushd $ROOT 18 | 19 | # ./build.sh 20 | make -j 4 21 | -------------------------------------------------------------------------------- /tests/meta/bad_signature_e.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %not %atjitc -O2 %s -o %t 2> %t.log 2 | // RUN: %FileCheck %s < %t.log 3 | 4 | #include 5 | #include 6 | 7 | // CHECK: easy::jit: not providing enough argument to actual call 8 | 9 | using namespace std::placeholders; 10 | using namespace tuned_param; 11 | 12 | int foo(int, int) { 13 | return 0; 14 | } 15 | 16 | int main(int, char** argv) { 17 | 18 | auto foo_ = easy::jit(foo, IntRange(-5, 5)); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /benchmark/benchmark.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace tuned_param; 13 | using namespace std::placeholders; 14 | 15 | // benchmark components go here 16 | 17 | #include "matmul.h" 18 | 19 | #include "qsort.h" 20 | 21 | #include "spectralnorm.h" 22 | 23 | 24 | BENCHMARK_MAIN(); 25 | -------------------------------------------------------------------------------- /tests/meta/bad_signature_g.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %not %atjitc -O2 %s -o %t 2> %t.log 2 | // RUN: %FileCheck %s < %t.log 3 | 4 | #include 5 | #include 6 | 7 | // CHECK: atJIT tunable parameter's underlying type is mismatched 8 | 9 | using namespace std::placeholders; 10 | using namespace tuned_param; 11 | 12 | // the type must match even in sign 13 | int foo(unsigned int) { 14 | return 0; 15 | } 16 | 17 | int main(int, char** argv) { 18 | 19 | auto foo_ = easy::jit(foo, IntRange(-5, 5)); 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /misc/doc/include.py: -------------------------------------------------------------------------------- 1 | import generate 2 | import re 3 | 4 | 5 | def on_include(args): 6 | args = args.split() 7 | filename = args[0]; 8 | label = args[1]; 9 | 10 | all_code = open(filename.strip()).read() 11 | 12 | inline = re.compile(".*// INLINE FROM HERE #"+ label +"#(?P.*)// TO HERE #" + label + "#.*", flags=re.DOTALL) 13 | code = inline.match(all_code) 14 | code = code.group(1).rstrip().lstrip() 15 | 16 | print(code, end='') 17 | return 18 | 19 | generate.match_and_expand("include", on_include) 20 | -------------------------------------------------------------------------------- /runtime/InitNativeTarget.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace llvm; 8 | 9 | namespace { 10 | class InitNativeTarget { 11 | public: 12 | InitNativeTarget() { 13 | LLVMInitializeX86Target(); 14 | LLVMInitializeX86TargetInfo(); 15 | LLVMInitializeX86TargetMC(); 16 | LLVMInitializeX86AsmPrinter(); 17 | sys::DynamicLibrary::LoadLibraryPermanently(nullptr); 18 | } 19 | } Init; 20 | } 21 | -------------------------------------------------------------------------------- /cmake/EasyJitConfig.cmake.in: -------------------------------------------------------------------------------- 1 | # Try to find the atjit library, headers and compiler plugin. 2 | # ATJit_INCLUDE_DIRS - the atjit include directory 3 | # ATJit_LIBRARY_DIRS - library directory needed to use atjit 4 | # ATJit_LIBRARY - library needed to use atjit 5 | # ATJit_PLUGIN - compiler plugin 6 | 7 | set(ATJit_INCLUDE_DIRS "@CMAKE_INSTALL_PREFIX@/include") 8 | set(ATJit_LIBRARY_DIRS "@CMAKE_INSTALL_PREFIX@/lib") 9 | set(ATJit_LIBRARY "ATJitRuntime") 10 | set(ATJit_PLUGIN "@CMAKE_INSTALL_PREFIX@/lib/EasyJitPass@CMAKE_SHARED_LIBRARY_SUFFIX@") 11 | -------------------------------------------------------------------------------- /pass/RegisterPasses.cpp: -------------------------------------------------------------------------------- 1 | #include "StaticPasses.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace llvm; 8 | using namespace easy; 9 | 10 | static void callback(const PassManagerBuilder &, 11 | legacy::PassManagerBase &PM) { 12 | PM.add(easy::createRegisterBitcodePass()); 13 | } 14 | 15 | RegisterStandardPasses Register(PassManagerBuilder::EP_OptimizerLast, callback); 16 | RegisterStandardPasses RegisterO0(PassManagerBuilder::EP_EnabledOnOptLevel0, callback); 17 | -------------------------------------------------------------------------------- /include/tuner/AnalyzingTuner.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace tuner { 7 | 8 | class AnalyzingTuner : public Tuner { 9 | private: 10 | bool alreadyRun = false; 11 | 12 | protected: 13 | std::shared_ptr Cxt_; 14 | 15 | public: 16 | 17 | AnalyzingTuner(KnobSet KS, std::shared_ptr Cxt) 18 | : Tuner(KS), Cxt_(std::move(Cxt)) {} 19 | 20 | // collects knobs relevant for tuning from the module. 21 | void analyze(llvm::Module &M) override; 22 | 23 | }; 24 | 25 | } // namespace tuner 26 | -------------------------------------------------------------------------------- /runtime/tuner/Util.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | namespace tuner { 9 | 10 | int pow2Bit(uint64_t val) { 11 | // NOTE: if we set the 5th bit, then: 12 | // 0b100000 - 1 = 0b011111, and then 13 | // popcnt(0b011111) = 5 14 | 15 | if (val == 1) 16 | return 0; 17 | 18 | val -= 1; 19 | std::bitset<16> bits(val); 20 | return bits.count(); 21 | } 22 | 23 | void sleep_for(unsigned ms) { 24 | std::this_thread::sleep_for(std::chrono::milliseconds(ms)); 25 | } 26 | 27 | } // end namespace 28 | 29 | int JSON::depth = 0; 30 | -------------------------------------------------------------------------------- /tests/simple/int_a.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | using namespace std::placeholders; 11 | 12 | int add (int a, int b) { 13 | return a+b; 14 | } 15 | 16 | int main() { 17 | easy::FunctionWrapper inc = easy::jit(add, _1, 1); 18 | 19 | // CHECK: inc(4) is 5 20 | // CHECK: inc(5) is 6 21 | // CHECK: inc(6) is 7 22 | // CHECK: inc(7) is 8 23 | for(int v = 4; v != 8; ++v) 24 | printf("inc(%d) is %d\n", v, inc(v)); 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /tests/simple/static_var_a.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | using namespace std::placeholders; 11 | 12 | void add (int a, int b) { 13 | printf("inc(%d) is %d\n", a, a+b); 14 | } 15 | 16 | int main() { 17 | easy::FunctionWrapper inc = easy::jit(add, _1, 1); 18 | 19 | // CHECK: inc(4) is 5 20 | // CHECK: inc(5) is 6 21 | // CHECK: inc(6) is 7 22 | // CHECK: inc(7) is 8 23 | for(int v = 4; v != 8; ++v) 24 | inc(v); 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /tests/simple/long_a.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | using namespace std::placeholders; 11 | 12 | int add (long a, long b) { 13 | return a+b; 14 | } 15 | 16 | int main() { 17 | easy::FunctionWrapper inc = easy::jit(add, _1, -1); 18 | 19 | // CHECK: inc(4) is 3 20 | // CHECK: inc(5) is 4 21 | // CHECK: inc(6) is 5 22 | // CHECK: inc(7) is 6 23 | for(int v = 4; v != 8; ++v) 24 | printf("inc(%d) is %d\n", v, inc(v)); 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /tests/simple/nortti_a.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t -fno-rtti 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | using namespace std::placeholders; 11 | 12 | int add (int a, int b) { 13 | return a+b; 14 | } 15 | 16 | int main() { 17 | easy::FunctionWrapper inc = easy::jit(add, _1, 1); 18 | 19 | // CHECK: inc(4) is 5 20 | // CHECK: inc(5) is 6 21 | // CHECK: inc(6) is 7 22 | // CHECK: inc(7) is 8 23 | for(int v = 4; v != 8; ++v) 24 | printf("inc(%d) is %d\n", v, inc(v)); 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /tests/simple/ptr_a.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | using namespace std::placeholders; 11 | 12 | int add (int a, int *b) { 13 | return a+*b; 14 | } 15 | 16 | int main() { 17 | int b = 1; 18 | easy::FunctionWrapper inc = easy::jit(add, _1, &b); 19 | 20 | // CHECK: inc(4) is 5 21 | // CHECK: inc(5) is 6 22 | // CHECK: inc(6) is 7 23 | // CHECK: inc(7) is 8 24 | for(int v = 4; v != 8; ++v) 25 | printf("inc(%d) is %d\n", v, inc(v)); 26 | 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /tests/simple/static_var_b.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | using namespace std::placeholders; 11 | 12 | void add (int b) { 13 | static int a = 4; 14 | printf("inc(%d) is %d\n", a, a+b); 15 | a++; 16 | } 17 | 18 | int main() { 19 | easy::FunctionWrapper inc = easy::jit(add, 1); 20 | 21 | // CHECK: inc(4) is 5 22 | // CHECK: inc(5) is 6 23 | // CHECK: inc(6) is 7 24 | // CHECK: inc(7) is 8 25 | for(int v = 4; v != 8; ++v) 26 | inc(); 27 | 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /runtime/tuner/Knob.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | namespace tuner { 6 | 7 | template 8 | bool operator== (const ScalarRange& A, const ScalarRange& B) { 9 | return ( 10 | A.max() == B.max() && 11 | A.min() == B.min() && 12 | A.getDefault() == B.getDefault() 13 | ); 14 | } 15 | 16 | } // end namespace 17 | 18 | 19 | namespace tuned_param { 20 | bool IntRange::operator== (IntRange const& Other) { 21 | return ( 22 | static_cast const&>(*this) 23 | == static_cast const&>(Other) 24 | ); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /tests/simple/float_a.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | using namespace std::placeholders; 11 | 12 | float add (float a, float b) { 13 | return a+b; 14 | } 15 | 16 | int main() { 17 | easy::FunctionWrapper inc = easy::jit(add, _1, 1); 18 | 19 | // CHECK: inc(4.00) is 5.00 20 | // CHECK: inc(5.00) is 6.00 21 | // CHECK: inc(6.00) is 7.00 22 | // CHECK: inc(7.00) is 8.00 23 | for(int v = 4; v != 8; ++v) 24 | printf("inc(%.2f) is %.2f\n", (float)v, inc(v)); 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /tests/simple/double_a.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | using namespace std::placeholders; 11 | 12 | double add (double a, double b) { 13 | return a+b; 14 | } 15 | 16 | int main() { 17 | easy::FunctionWrapper inc = easy::jit(add, _1, 1); 18 | 19 | // CHECK: inc(4.00) is 5.00 20 | // CHECK: inc(5.00) is 6.00 21 | // CHECK: inc(6.00) is 7.00 22 | // CHECK: inc(7.00) is 8.00 23 | for(int v = 4; v != 8; ++v) 24 | printf("inc(%.2f) is %.2f\n", (double)v, inc(v)); 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /include/easy/exceptions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace easy { 8 | struct exception 9 | : public std::runtime_error { 10 | exception(std::string const &Message, std::string const &Reason) 11 | : std::runtime_error(Message + Reason) {} 12 | virtual ~exception() = default; 13 | }; 14 | } 15 | 16 | #define DefineEasyException(Exception, Message) \ 17 | struct Exception : public easy::exception { \ 18 | Exception() : easy::exception(Message, "") {} \ 19 | Exception(std::string const &Reason) : easy::exception(Message, Reason) {} \ 20 | virtual ~Exception() = default; \ 21 | } 22 | -------------------------------------------------------------------------------- /cmake/GCD.cmake: -------------------------------------------------------------------------------- 1 | ################# 2 | # find Grand Central Dispatch installation 3 | 4 | # defines the $GCD_LIB, and $GCD_INCLUDE_DIR symbols 5 | 6 | function (requireFound SYMB MSG) 7 | if(NOT ${SYMB}) 8 | message(FATAL_ERROR ${MSG}) 9 | endif() 10 | message(STATUS "Found ${SYMB}: ${${SYMB}}") 11 | endfunction(requireFound) 12 | 13 | # search 14 | find_library(GCD_LIB 15 | NAMES dispatch 16 | ) 17 | 18 | find_path(GCD_INCLUDE_DIR 19 | NAMES dispatch/dispatch.h 20 | ) 21 | 22 | # check 23 | requireFound(GCD_LIB "Grand Central Dispatch shared library (libdispatch) not found") 24 | requireFound(GCD_INCLUDE_DIR "Grand Central Dispatch header files not found") 25 | 26 | #################### 27 | -------------------------------------------------------------------------------- /tests/simple/opt_level.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | using namespace std::placeholders; 11 | 12 | int add (int a, int b) { 13 | return a+b; 14 | } 15 | 16 | int main() { 17 | 18 | // TODO: I don't know how to test this correctly. 19 | easy::FunctionWrapper inc = easy::jit(add, _1, 1, easy::options::opt_level(0,1)); 20 | 21 | // CHECK: inc(4) is 5 22 | // CHECK: inc(5) is 6 23 | // CHECK: inc(6) is 7 24 | // CHECK: inc(7) is 8 25 | for(int v = 4; v != 8; ++v) 26 | printf("inc(%d) is %d\n", v, inc(v)); 27 | 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /include/easy/runtime/LLVMHolderImpl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | namespace easy { 9 | class LLVMHolderImpl : public easy::LLVMHolder { 10 | public: 11 | 12 | std::unique_ptr Context_; 13 | std::unique_ptr Engine_; 14 | llvm::Module* M_; // the execution engine has the ownership 15 | 16 | LLVMHolderImpl(std::unique_ptr EE, std::unique_ptr C, llvm::Module* M) 17 | : Context_(std::move(C)), Engine_(std::move(EE)), M_(M) { 18 | } 19 | 20 | virtual ~LLVMHolderImpl() = default; 21 | }; 22 | } 23 | -------------------------------------------------------------------------------- /tests/simple/compose_bad.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %not %atjitc %s -o %t 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std::placeholders; 10 | 11 | float mul(float a, float b) { 12 | return a*b; 13 | } 14 | 15 | int accumulate(std::vector const &vec, int acum, int(*fun)(int)) { 16 | int a = acum; 17 | for(int e : vec) 18 | a += fun(e); 19 | return a; 20 | } 21 | 22 | int main(int argc, char** argv) { 23 | 24 | easy::FunctionWrapper mul_by_two = easy::jit(mul, _1, 2.0); 25 | easy::FunctionWrapper const&)> mul_vector_by_two = easy::jit(accumulate, _1, 0, mul_by_two); 26 | 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /tests/simple/small_struct.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | using namespace std::placeholders; 11 | 12 | struct Point { 13 | int x; 14 | int y; 15 | }; 16 | 17 | int add (Point a, Point b) { 18 | return a.x+b.x+a.y+b.y; 19 | } 20 | 21 | int main() { 22 | easy::FunctionWrapper inc = easy::jit(add, _1, Point{1,1}); 23 | 24 | // CHECK: inc(4,4) is 10 25 | // CHECK: inc(5,5) is 12 26 | // CHECK: inc(6,6) is 14 27 | // CHECK: inc(7,7) is 16 28 | for(int v = 4; v != 8; ++v) 29 | printf("inc(%d,%d) is %d\n", v, v, inc(Point{v,v})); 30 | 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /tests/simple/thread.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc -lpthread %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std::placeholders; 12 | 13 | int add (int a, int b) { 14 | return a+b; 15 | } 16 | 17 | int main() { 18 | auto inc_future = std::async(std::launch::async, 19 | [](){ return easy::jit(add, _1, 1);}); 20 | 21 | // CHECK: inc(4) is 5 22 | // CHECK: inc(5) is 6 23 | // CHECK: inc(6) is 7 24 | // CHECK: inc(7) is 8 25 | auto inc = inc_future.get(); 26 | for(int v = 4; v != 8; ++v) 27 | printf("inc(%d) is %d\n", v, inc(v)); 28 | 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /pass/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # NOTE: -O2 and -O3 cause the plugin to segfault _only_ on Travis CI with LLVM 6. 2 | # it happens when the bitcode register pass tries to construct a Twine, so 3 | # there's no straight-forward fix. 4 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O1") 5 | 6 | include(AddLLVM) 7 | 8 | add_llvm_library(EasyJitPass 9 | MODULE 10 | RegisterPasses.cpp 11 | Easy.cpp 12 | MayAliasTracer.cpp 13 | LoopNamer.cpp 14 | PLUGIN_TOOL 15 | clang 16 | ) 17 | 18 | include_directories(${LLVM_INCLUDE_DIRS}) 19 | add_definitions(${LLVM_DEFINITIONS}) 20 | 21 | set(EASY_JIT_PASS ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/EasyJitPass${CMAKE_SHARED_LIBRARY_SUFFIX} PARENT_SCOPE) 22 | 23 | install(TARGETS EasyJitPass 24 | LIBRARY DESTINATION lib) 25 | -------------------------------------------------------------------------------- /tests/simple/cache.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | using namespace std::placeholders; 11 | 12 | int add (int a, int b) { 13 | return a+b; 14 | } 15 | 16 | int main() { 17 | easy::Cache<> C; 18 | 19 | // CHECK: inc(4) is 5 20 | // CHECK: inc(5) is 6 21 | // CHECK: inc(6) is 7 22 | // CHECK: inc(7) is 8 23 | 24 | for(int i = 0; i != 16; ++i) { 25 | auto const &inc = C.jit(add, _1, 1); 26 | 27 | if(!C.has(add, _1, 1)) { 28 | printf("code not in cache!\n"); 29 | return -1; 30 | } 31 | 32 | for(int v = 4; v != 8; ++v) 33 | printf("inc(%d) is %d\n", v, inc(v)); 34 | } 35 | 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /tests/simple/array_bad.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %not %atjitc %s -o %t 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std::placeholders; 13 | using namespace easy::options; 14 | 15 | //////////// 16 | 17 | #define DIM 49 18 | void writeTo2DArray(double M[DIM][DIM]) { 19 | for (int i = 0; i < DIM; i += 1) 20 | for (int j = 0; j < DIM; j += 1) 21 | M[i][j] = i + j; 22 | } 23 | #undef DIM 24 | 25 | 26 | void test2DArray() { 27 | const int DIM = 50; 28 | double Mat[DIM][DIM]; 29 | 30 | auto const &Fn = easy::jit(writeTo2DArray, _1); 31 | Fn(Mat); 32 | } 33 | 34 | //////////// 35 | 36 | 37 | int main(int argc, char** argv) { 38 | 39 | test2DArray(); 40 | 41 | return 0; 42 | } 43 | -------------------------------------------------------------------------------- /doc/readme/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | option(ATJIT_EXAMPLE "Build Examples" OFF) 2 | 3 | if(ATJIT_EXAMPLE) 4 | find_package(OpenCV REQUIRED) 5 | 6 | include_directories(${LLVM_INCLUDE_DIRS}) 7 | add_definitions(${LLVM_DEFINITIONS}) 8 | 9 | set(CMAKE_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++) 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -disable-O0-optnone -Xclang -load -Xclang ${EASY_JIT_PASS}") 11 | 12 | add_executable(easyjit-example camfilter.cpp) 13 | add_dependencies(easyjit-example atjit-core) 14 | 15 | include_directories(${OpenCV_INCLUDE_DIRS}) 16 | target_link_libraries(easyjit-example ${OpenCV_LIBS}) 17 | 18 | link_directories(${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) 19 | 20 | target_link_libraries (easyjit-example ${Benchmark_LIBRARIES}) 21 | target_link_libraries (easyjit-example ATJitRuntime pthread) 22 | endif() 23 | -------------------------------------------------------------------------------- /tests/install/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.4.3) 2 | 3 | project(test) 4 | 5 | find_package(EasyJit REQUIRED CONFIG) 6 | 7 | message("atJIT include dir: " ${ATJit_INCLUDE_DIRS}) 8 | message("atJIT lib dir: " ${ATJit_LIBRARY_DIRS}) 9 | message("atJIT runtime: " ${ATJit_LIBRARY}) 10 | message("atJIT plugin: " ${ATJit_PLUGIN}) 11 | 12 | find_package(LLVM 6.0 REQUIRED CONFIG) 13 | 14 | message(STATUS "LLVM Include dirs: ${LLVM_INCLUDE_DIRS}") 15 | 16 | include_directories(${ATJit_INCLUDE_DIRS} ${LLVM_INCLUDE_DIRS}) 17 | link_directories(${ATJit_LIBRARY_DIRS}) 18 | 19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++17 -Xclang -disable-O0-optnone -Xclang -load -Xclang ${ATJit_PLUGIN} -Wl,-rpath,'${LLVM_LIBRARY_DIR}' -L${LLVM_LIBRARY_DIR} -lLLVM") 20 | 21 | add_executable(InstallTest 22 | test.cpp 23 | ) 24 | 25 | target_link_libraries(InstallTest ${ATJit_LIBRARY}) 26 | -------------------------------------------------------------------------------- /tests/simple/multi_file+regexp.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -DMAIN -c -o %t.main.o 2 | // RUN: %atjitc %s -DLIB -c -o %t.lib.o -mllvm -easy-export="add" 3 | // RUN: %atjitc %t.main.o %t.lib.o -o %t 4 | // RUN: %t > %t.out 5 | // RUN: %FileCheck %s < %t.out 6 | 7 | #ifdef LIB 8 | 9 | extern "C" int add (int a, int b) { 10 | return a+b; 11 | } 12 | 13 | #endif 14 | 15 | #ifdef MAIN 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | using namespace std::placeholders; 23 | 24 | extern "C" int add (int a, int b); 25 | 26 | int main() { 27 | easy::FunctionWrapper inc = easy::jit(add, _1, 1); 28 | 29 | // CHECK: inc(4) is 5 30 | // CHECK: inc(5) is 6 31 | // CHECK: inc(6) is 7 32 | // CHECK: inc(7) is 8 33 | for(int v = 4; v != 8; ++v) 34 | printf("inc(%d) is %d\n", v, inc(v)); 35 | 36 | return 0; 37 | } 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /pass/MayAliasTracer.h: -------------------------------------------------------------------------------- 1 | #ifndef MAY_ALIAS_TRACER 2 | #define MAY_ALIAS_TRACER 3 | 4 | #include 5 | 6 | namespace llvm { 7 | class Value; 8 | class GlobalObject; 9 | } 10 | 11 | namespace easy { 12 | class MayAliasTracer { 13 | llvm::SmallPtrSet GOs_; 14 | 15 | using VSet = llvm::SmallPtrSetImpl; 16 | void mayAliasWithStoredValues(llvm::Value* V, VSet &Loaded, VSet &Stored); 17 | void mayAliasWithLoadedValues(llvm::Value* V, VSet &Loaded, VSet &Stored); 18 | 19 | 20 | public: 21 | 22 | MayAliasTracer(llvm::Value* V) { 23 | llvm::SmallPtrSet VLoaded; 24 | llvm::SmallPtrSet VStored; 25 | mayAliasWithLoadedValues(V, VLoaded, VStored); 26 | } 27 | unsigned count(llvm::GlobalObject& GO) const { return GOs_.count(&GO);} 28 | }; 29 | 30 | } 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /tests/install/test.cpp.disabled: -------------------------------------------------------------------------------- 1 | // REQUIRES: install 2 | // 3 | // clean before, if not there may be inconsistencies 4 | // RUN: rm -fr build.ninja CMakeCache.txt CMakeFiles cmake_install.cmake InstallTest rules.ninja 5 | // 6 | // RUN: cmake -DCMAKE_CXX_COMPILER=%clang++ -DATJit_DIR=%install_dir/lib/cmake %S 7 | // RUN: cmake --build . 8 | // RUN: ./InstallTest > %t.out 9 | // RUN: %FileCheck %s < %t.out 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | using namespace std::placeholders; 18 | 19 | void test(int a) { 20 | printf("this is a test %d!\n", a); 21 | } 22 | 23 | int main() { 24 | easy::Cache<> C; 25 | auto test_jit0 = easy::jit(test, 0); 26 | auto const &test_jit1 = C.jit(test, 1); 27 | 28 | // CHECK: this is a test 0! 29 | // CHECK: this is a test 1! 30 | test_jit0(); 31 | test_jit1(); 32 | 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /cmake/FindBenchmark.cmake: -------------------------------------------------------------------------------- 1 | # Try to find the Google Benchmark library and headers. 2 | # Benchmark_FOUND - system has benchmark lib 3 | # Benchmark_INCLUDE_DIRS - the benchmark include directory 4 | # Benchmark_LIBRARIES - libraries needed to use benchmark 5 | 6 | find_path(Benchmark_INCLUDE_DIR 7 | NAMES benchmark/benchmark.h 8 | PATHS ${BENCHMARK_DIR}/include 9 | DOC "The directory where benchmark includes reside" 10 | ) 11 | 12 | find_library(Benchmark_LIBRARY 13 | NAMES benchmark 14 | PATHS ${BENCHMARK_DIR}/lib 15 | DOC "The benchmark library" 16 | ) 17 | 18 | set(Benchmark_INCLUDE_DIRS ${Benchmark_INCLUDE_DIR}) 19 | set(Benchmark_LIBRARIES ${Benchmark_LIBRARY}) 20 | 21 | include(FindPackageHandleStandardArgs) 22 | find_package_handle_standard_args(Benchmark 23 | FOUND_VAR Benchmark_FOUND 24 | REQUIRED_VARS Benchmark_INCLUDE_DIR Benchmark_LIBRARY 25 | ) 26 | 27 | mark_as_advanced(Benchmark_FOUND) 28 | -------------------------------------------------------------------------------- /tests/simple/exception_a.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | using namespace std::placeholders; 11 | 12 | int add (int a, int b) { 13 | if(a == 8) 14 | throw std::runtime_error{"an expected error occured"}; 15 | return a+b; 16 | } 17 | 18 | int main() { 19 | easy::FunctionWrapper inc = easy::jit(add, _1, 1); 20 | 21 | // CHECK: inc(4) is 5 22 | // CHECK: inc(5) is 6 23 | // CHECK: inc(6) is 7 24 | // CHECK: inc(7) is 8 25 | // CHECK: inc(8) is exception: an expected error occured 26 | // CHECK: inc(9) is 10 27 | for(int v = 4; v != 10; ++v) { 28 | try { 29 | printf("inc(%d) is %d\n", v, inc(v)); 30 | } catch(std::runtime_error &e) { 31 | printf("inc(%d) is exception: %s\n", v, e.what()); 32 | } 33 | } 34 | 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /CMakeTests.txt: -------------------------------------------------------------------------------- 1 | find_python_module(lit REQUIRED) 2 | 3 | include(Valgrind) 4 | 5 | find_program(TIME_EXE time) 6 | 7 | set(ALL_TESTS tests/) 8 | 9 | # it is not possible to use add_dependencies on the install target, since it's 10 | # not a "real" target: https://cmake.org/cmake/help/v3.5/command/add_dependencies.html 11 | # NOTE: if you want to shuffle the tests, add --shuffle 12 | add_custom_target(check 13 | COMMAND ${PYTHON_EXEC} -m lit.main ${ALL_TESTS} -v --time-tests -j 4 14 | COMMENT "running regression test suite..." 15 | USES_TERMINAL 16 | ) 17 | 18 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/tests/lit.cfg.in" "${CMAKE_CURRENT_BINARY_DIR}/tests/lit.cfg") 19 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/tests/doc/lit.cfg.in" "${CMAKE_CURRENT_BINARY_DIR}/tests/doc/lit.cfg") 20 | 21 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/tests/compare.sh" "${CMAKE_CURRENT_BINARY_DIR}/tests/compare.sh") 22 | -------------------------------------------------------------------------------- /tests/simple/serialize.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std::placeholders; 13 | 14 | int add (int a, int b) { 15 | return a+b; 16 | } 17 | 18 | int main() { 19 | auto inc_store = easy::jit(add, _1, 1); 20 | 21 | std::stringstream out; 22 | inc_store.serialize(out); 23 | out.flush(); 24 | 25 | std::string buffer = out.str(); 26 | 27 | assert(buffer.size()); 28 | printf("buffer.size() = %lu\n", buffer.size()); 29 | 30 | std::stringstream in(buffer); 31 | auto inc_load = easy::FunctionWrapper::deserialize(in); 32 | 33 | // CHECK: inc(4) is 5 34 | // CHECK: inc(5) is 6 35 | // CHECK: inc(6) is 7 36 | // CHECK: inc(7) is 8 37 | for(int v = 4; v != 8; ++v) 38 | printf("inc(%d) is %d\n", v, inc_load(v)); 39 | 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /tests/simple/int_ptr_a.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t "%t.ll" > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | // RUN: %FileCheck --check-prefix=CHECK-IR %s < %t.ll 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | // verify that the variable 'b' is not loaded, and the addition is performed using its constant value 13 | // CHECK-IR-NOT: inttoptr 14 | // CHECK-IR-NOT: load i32 15 | // CHECK-IR: add{{.*}}4321 16 | 17 | using namespace std::placeholders; 18 | 19 | static int add (int a, int *b) { 20 | return a+*b; 21 | } 22 | 23 | int const b = 4321; 24 | 25 | int main(int argc, char** argv) { 26 | easy::FunctionWrapper inc = easy::jit(add, _1, &b, easy::options::dump_ir(argv[1])); 27 | 28 | // CHECK: inc(4) is 4325 29 | // CHECK: inc(5) is 4326 30 | // CHECK: inc(6) is 4327 31 | // CHECK: inc(7) is 4328 32 | for(int v = 4; v != 8; ++v) 33 | printf("inc(%d) is %d\n", v, inc(v)); 34 | 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /tests/simple/unroll.cpp: -------------------------------------------------------------------------------- 1 | // RUN: rm -f %t.ll 2 | // RUN: %atjitc -O2 %s -o %t 3 | // RUN: %t %t.ll > %t.out 4 | // RUN: %FileCheck %s < %t.out 5 | // RUN: %FileCheck --check-prefix=CHECK-IR %s < %t.ll 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | // only one function 14 | // with only one block (~no branch) 15 | // CHECK-IR: define 16 | // CHECK-IR-NOT: define 17 | // CHECK-IR-NOT: br 18 | // CHECK-IR: ret 19 | 20 | using namespace std::placeholders; 21 | 22 | int dot(std::vector a, std::vector b) { 23 | int x = 0; 24 | for(size_t i = 0, n = a.size(); i != n; ++i) { 25 | x += a[i]*b[i]; 26 | } 27 | return x; 28 | } 29 | 30 | int main(int, char** argv) { 31 | 32 | std::vector a = {1,2,3,4}, 33 | b = {4,3,2,1}; 34 | 35 | auto dot_a = easy::jit(dot, a, _1, easy::options::dump_ir(argv[1])); 36 | int x = dot_a(b); 37 | 38 | // CHECK: dot is 20 39 | printf("dot is %d\n", x); 40 | 41 | return 0; 42 | } 43 | -------------------------------------------------------------------------------- /tests/simple/serialize_static.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std::placeholders; 13 | 14 | static int var = 0; 15 | 16 | int add (int a, int b) { 17 | return a+b+var; 18 | } 19 | 20 | int main() { 21 | auto inc_store = easy::jit(add, _1, 1); 22 | 23 | std::stringstream out; 24 | inc_store.serialize(out); 25 | out.flush(); 26 | 27 | std::string buffer = out.str(); 28 | 29 | assert(buffer.size()); 30 | printf("buffer.size() = %lu\n", buffer.size()); 31 | 32 | std::stringstream in(buffer); 33 | auto inc_load = easy::FunctionWrapper::deserialize(in); 34 | 35 | // CHECK: inc(4) is 6 36 | // CHECK: inc(5) is 8 37 | // CHECK: inc(6) is 10 38 | // CHECK: inc(7) is 12 39 | for(int v = 4; v != 8; ++v) { 40 | var++; 41 | printf("inc(%d) is %d\n", v, inc_load(v)); 42 | } 43 | 44 | return 0; 45 | } 46 | -------------------------------------------------------------------------------- /tests/simple/devirtualization_nohint.cpp: -------------------------------------------------------------------------------- 1 | // RUN: rm -f %t.ll 2 | // RUN: %atjitc -O2 %s -o %t 3 | // RUN: %t %t.ll > %t.out 4 | // RUN: %FileCheck %s < %t.out 5 | // RUN: %FileCheck --check-prefix=CHECK-IR %s < %t.ll 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | // only one function 14 | // reading from a global variable 15 | // CHECK-IR-NOT: = tail call 16 | 17 | 18 | using namespace std::placeholders; 19 | 20 | struct Foo { 21 | virtual int doit() { return 1; } 22 | virtual ~Foo() = default; 23 | }; 24 | 25 | struct Bar : Foo { 26 | int doit() override { return 2; } 27 | }; 28 | 29 | int doit(Foo* f) { 30 | return f->doit(); 31 | } 32 | 33 | int main(int argc, char** argv) { 34 | Foo* f = nullptr; 35 | if(argc == 1) 36 | f = new Foo(); 37 | else 38 | f = new Bar(); 39 | 40 | easy::FunctionWrapper easy_doit = easy::jit(doit, f, easy::options::dump_ir(argv[1])); 41 | 42 | // CHECK: doit() is 2 43 | printf("doit() is %d\n", easy_doit()); 44 | 45 | delete f; 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /tests/simple/devirtualization.cpp: -------------------------------------------------------------------------------- 1 | // RUN: rm -f %t.ll 2 | // RUN: %atjitc -O2 %s -o %t 3 | // RUN: %t %t.ll > %t.out 4 | // RUN: %FileCheck %s < %t.out 5 | // RUN: %FileCheck --check-prefix=CHECK-IR %s < %t.ll 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | // only one function 14 | // reading from a global variable 15 | // CHECK-IR-NOT: = tail call 16 | 17 | 18 | using namespace std::placeholders; 19 | 20 | struct Foo { 21 | virtual int EASY_JIT_EXPOSE doit() { return 1; } 22 | virtual ~Foo() = default; 23 | }; 24 | 25 | struct Bar : Foo { 26 | int EASY_JIT_EXPOSE doit() override { return 2; } 27 | }; 28 | 29 | int doit(Foo* f) { 30 | return f->doit(); 31 | } 32 | 33 | int main(int argc, char** argv) { 34 | Foo* f = nullptr; 35 | if(argc == 1) 36 | f = new Foo(); 37 | else 38 | f = new Bar(); 39 | 40 | easy::FunctionWrapper easy_doit = easy::jit(doit, f, easy::options::dump_ir(argv[1])); 41 | 42 | // CHECK: doit() is 2 43 | printf("doit() is %d\n", easy_doit()); 44 | 45 | delete f; 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /misc/doc/generate.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | 4 | def get_split(tag): 5 | start = re.compile("") 7 | def split(contents): 8 | start_match = start.search(contents, pos=0) 9 | if not start_match: 10 | return contents, "", "", False 11 | end_match = end.search(contents, pos=start_match.end()) 12 | if not end_match: 13 | return contents, "", "", False 14 | pre = contents[0 : start_match.start()] 15 | args = contents[start_match.end() : end_match.start()] 16 | post = contents[end_match.end() : ] 17 | return pre, args, post, True 18 | 19 | return split 20 | 21 | def match_and_expand(tag, expand_with): 22 | do_match_and_expand(sys.stdin.read(), get_split(tag), expand_with) 23 | return 24 | 25 | def do_match_and_expand(contents, split, expand_with): 26 | pre, code, post, match = split(contents) 27 | if match : 28 | do_match_and_expand(pre, split, expand_with) 29 | expand_with(code) 30 | do_match_and_expand(post, split, expand_with) 31 | else: 32 | print(contents, end='') 33 | -------------------------------------------------------------------------------- /tests/simple/array.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using namespace std::placeholders; 14 | using namespace easy::options; 15 | 16 | //////////// 17 | 18 | void writeTo1DArray(int sz, double A[]) { 19 | for (int i = 0; i < sz; ++i) 20 | A[i] = i; 21 | } 22 | 23 | void test1DArray() { 24 | const int DIM = 50; 25 | double Vec[DIM]; 26 | 27 | auto const &Fn = easy::jit(writeTo1DArray, DIM, _1); 28 | Fn(Vec); 29 | } 30 | 31 | 32 | 33 | #define DIM 50 34 | void writeTo2DArray(double M[DIM][DIM]) { 35 | for (int i = 0; i < DIM; i += 1) 36 | for (int j = 0; j < DIM; j += 1) 37 | M[i][j] = i + j; 38 | } 39 | #undef DIM 40 | 41 | 42 | void test2DArray() { 43 | const int DIM = 50; 44 | double Mat[DIM][DIM]; 45 | 46 | auto const &Fn = easy::jit(writeTo2DArray, _1); 47 | Fn(Mat); 48 | } 49 | 50 | //////////// 51 | 52 | 53 | int main(int argc, char** argv) { 54 | 55 | test1DArray(); 56 | test2DArray(); 57 | 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /get-llvm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 4 | 5 | if [ $# -ne 1 ]; then 6 | echo "expected args: " 7 | exit 1 8 | fi 9 | 10 | ############ 11 | # prefer using Ninja if available 12 | command -v ninja 13 | if [ $? -eq 0 ]; then 14 | GENERATOR="Ninja" 15 | BUILD_CMD="ninja install" 16 | else 17 | NUM_CPUS=`getconf _NPROCESSORS_ONLN` 18 | GENERATOR="Unix Makefiles" 19 | BUILD_CMD="make install -j${NUM_CPUS}" 20 | fi 21 | 22 | 23 | cd $1 24 | 25 | ##### 26 | # make sure we're in an empty dir 27 | if [ `ls -1A . | wc -l` -ne 0 ]; then 28 | echo "provided directory must be empty!" 29 | exit 1 30 | fi 31 | 32 | ##### 33 | # get sources 34 | 35 | # PULL LLVM mono repo 36 | git clone https://github.com/llvm/llvm-project.git src 37 | cd ./src 38 | git checkout tags/llvmorg-8.0.1 39 | 40 | cd .. 41 | 42 | ################## 43 | ## configure & build 44 | 45 | mkdir build install 46 | cd ./build 47 | cmake -C $DIR/cmake/LLVM.cmake -G "$GENERATOR" -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release -DLLVM_TARGETS_TO_BUILD="X86" -DCMAKE_INSTALL_PREFIX=../install ../src/llvm 48 | 49 | $BUILD_CMD 50 | -------------------------------------------------------------------------------- /tests/tuner/stats.cpp.disabled: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.json 3 | // RUN: %jsonlint < %t.json 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | using namespace std::placeholders; 11 | using namespace tuned_param; 12 | using namespace easy::options; 13 | 14 | int collatz(int seed) { 15 | int n = seed; 16 | int steps = 0; 17 | 18 | while(n != 1) { 19 | 20 | if (n % 2 == 0) 21 | n /= 2; 22 | else 23 | n = 3*n + 1; 24 | 25 | steps += 1; 26 | } 27 | 28 | return steps; 29 | } 30 | 31 | int main(int argc, char** argv) { 32 | 33 | tuner::AutoTuner TunerKind = tuner::AT_Random; 34 | const int ITERS = 1000; 35 | tuner::ATDriver AT; 36 | 37 | // we pick 6171 because it's the one generating the most steps 38 | // for all values less than it. 39 | // see: https://oeis.org/A006877 40 | 41 | for (int i = 0; i < ITERS; i++) { 42 | auto const &OptimizedFun = AT.reoptimize(collatz, 43 | IntRange(1, 65536, 6171), 44 | tuner_kind(TunerKind), 45 | blocking(true)); 46 | 47 | OptimizedFun(); 48 | } 49 | 50 | AT.exportStats(); 51 | 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /tests/simple/fun_ptr_a.cpp: -------------------------------------------------------------------------------- 1 | // RUN: rm -f %t.ll 2 | // RUN: %atjitc %s -o %t 3 | // RUN: %t %t.ll > %t.out 4 | // RUN: %FileCheck %s < %t.out 5 | // RUN: %FileCheck --check-prefix=CHECK-IR %s < %t.ll 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | // only one function 14 | // reading from a global variable 15 | // CHECK-IR: @[[GLOBAL:.+]] = external 16 | // CHECK-IR: define 17 | // CHECK-IR-NOT: define 18 | // CHECK-IR-NOT: br 19 | // CHECK-IR: load{{.*}}[[GLOBAL]] 20 | // CHECK-IR: add 21 | // CHECK-IR: store{{.*}}[[GLOBAL]] 22 | // CHECK-IR: ret 23 | 24 | 25 | using namespace std::placeholders; 26 | 27 | static int bubu() { 28 | static int v = 0; 29 | return v++; 30 | } 31 | 32 | static int add (int a, int (*f)()) { 33 | return a+f(); 34 | } 35 | 36 | int main(int argc, char** argv) { 37 | easy::FunctionWrapper inc = easy::jit(add, _1, bubu, easy::options::dump_ir(argv[1])); 38 | 39 | // CHECK: inc(4) is 4 40 | // CHECK: inc(5) is 6 41 | // CHECK: inc(6) is 8 42 | // CHECK: inc(7) is 10 43 | for(int v = 4; v != 8; ++v) 44 | printf("inc(%d) is %d\n", v, inc(v)); 45 | 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /tests/simple/fun_ptr_b.cpp: -------------------------------------------------------------------------------- 1 | // RUN: rm -f %t.ll 2 | // RUN: %atjitc %s -o %t 3 | // RUN: %t %t.ll > %t.out 4 | // RUN: %FileCheck %s < %t.out 5 | // RUN: %FileCheck --check-prefix=CHECK-IR %s < %t.ll 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | // only one function, without any call 14 | // CHECK-IR-NOT: call{{.*@.*}} 15 | 16 | 17 | using namespace std::placeholders; 18 | 19 | static void foo(void* dat) { 20 | (*(int*)dat) += 1; 21 | } 22 | 23 | static void map (void* data, unsigned nmemb, unsigned size, void (*f)(void*)) { 24 | for(unsigned i = 0; i < nmemb; ++i) 25 | f((char*)data + i * size); 26 | } 27 | 28 | int main(int argc, char** argv) { 29 | easy::FunctionWrapper map_w = easy::jit(map, _1, _2, _3, foo, easy::options::dump_ir(argv[1])); 30 | 31 | int data[] = {1,2,3,4}; 32 | map_w(data, sizeof(data)/sizeof(data[0]), sizeof(data[0])); 33 | 34 | // CHECK: data[0] is 2 35 | // CHECK: data[1] is 3 36 | // CHECK: data[2] is 4 37 | // CHECK: data[3] is 5 38 | for(int v = 0; v != 4; ++v) 39 | printf("data[%d] is %d\n", v, data[v]); 40 | 41 | return 0; 42 | } 43 | -------------------------------------------------------------------------------- /tests/simple/fun_ptr_d.cpp: -------------------------------------------------------------------------------- 1 | // RUN: rm -f %t.ll 2 | // RUN: %atjitc %s -o %t 3 | // RUN: %t %t.ll > %t.out 4 | // RUN: %FileCheck %s < %t.out 5 | // RUN: %FileCheck --check-prefix=CHECK-IR %s < %t.ll 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | // only one function 14 | // reading from a global variable 15 | // CHECK-IR: @[[GLOBAL:.+]] = external 16 | // CHECK-IR: define 17 | // CHECK-IR-NOT: define 18 | // CHECK-IR-NOT: br 19 | // CHECK-IR: load{{.*}}[[GLOBAL]] 20 | // CHECK-IR: add 21 | // CHECK-IR: store{{.*}}[[GLOBAL]] 22 | // CHECK-IR: ret 23 | 24 | 25 | using namespace std::placeholders; 26 | 27 | static int bubu() { 28 | static int v = 0; 29 | return v++; 30 | } 31 | static int bibi() { 32 | return 0; 33 | } 34 | 35 | static int add (int a, int (*f)()) { 36 | return a+f(); 37 | } 38 | 39 | int main(int argc, char** argv) { 40 | easy::FunctionWrapper inc = easy::jit(add, _1, argc?bubu:bibi, easy::options::dump_ir(argv[1])); 41 | 42 | // CHECK: inc(4) is 4 43 | // CHECK: inc(5) is 6 44 | // CHECK: inc(6) is 8 45 | // CHECK: inc(7) is 10 46 | for(int v = 4; v != 8; ++v) 47 | printf("inc(%d) is %d\n", v, inc(v)); 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /tests/simple/serialize_multifile.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -DMAIN -c -o %t.main.o 2 | // RUN: %atjitc %s -DLIB -c -o %t.lib.o -mllvm -easy-export="add" 3 | // RUN: %atjitc %t.main.o %t.lib.o -o %t 4 | // RUN: %t > %t.out 5 | // RUN: %FileCheck %s < %t.out 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace std::placeholders; 15 | 16 | #ifdef LIB 17 | 18 | static int var = 0; 19 | 20 | static int add (int a, int b) { 21 | return a+b+(var++); 22 | } 23 | 24 | std::string get_add(int b) { 25 | auto inc_store = easy::jit(add, _1, 1); 26 | 27 | std::ostringstream out; 28 | inc_store.serialize(out); 29 | out.flush(); 30 | 31 | return out.str(); 32 | } 33 | 34 | #endif 35 | 36 | #ifdef MAIN 37 | 38 | std::string get_add(int b); 39 | 40 | int main() { 41 | 42 | std::string bitcode = get_add(1); 43 | std::istringstream in(bitcode); 44 | auto inc_load = easy::FunctionWrapper::deserialize(in); 45 | 46 | // CHECK: inc(4) is 5 47 | // CHECK: inc(5) is 7 48 | // CHECK: inc(6) is 9 49 | // CHECK: inc(7) is 11 50 | for(int v = 4; v != 8; ++v) { 51 | printf("inc(%d) is %d\n", v, inc_load(v)); 52 | } 53 | 54 | return 0; 55 | } 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /tests/tuner/intrange_2.cpp: -------------------------------------------------------------------------------- 1 | // RUN: rm -f %t.ll 2 | // RUN: %atjitc -O2 %s -o %t 3 | // RUN: %t %t.ll 4 | // RUN: %FileCheck --check-prefix=CHECK-IR %s < %t.ll 5 | // RUN: %FileCheck --check-prefix=CHECK-IR-BEFOREJIT %s < %t.ll.before 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | 19 | using namespace std::placeholders; 20 | using namespace tuned_param; 21 | 22 | int dispatch(int choice) { 23 | if (choice <= 0) { 24 | return 31337; 25 | } else { 26 | return 44444; 27 | } 28 | } 29 | 30 | int main(int argc, char** argv) { 31 | 32 | { 33 | // test if the JIT compilation properly 34 | // inlines the IntRange's value as a constant, 35 | // which is then used to simplify the dispatcher completely. 36 | // this also tests the default setting on IntRange. 37 | auto F = easy::jit(dispatch, IntRange(0, 1, 1), easy::options::dump_ir(argv[1])); 38 | 39 | // CHECK-IR: define i32 @_Z8dispatchi() local_unnamed_addr #0 { 40 | // CHECK-IR-NOT: br i1 41 | // CHECK-IR-NOT: select 42 | // CHECK-IR: ret i32 44444 43 | 44 | // CHECK-IR-BEFOREJIT: i32 31337 45 | } 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /tests/tuner/parallel_compile.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | // just testing to see if we segfault or otherwise crash 16 | // when stressing the parallel compilation pipeline as much 17 | // as possible. 18 | 19 | using namespace std::placeholders; 20 | using namespace tuned_param; 21 | using namespace easy::options; 22 | 23 | std::atomic dummy; 24 | 25 | void spin(int val, std::atomic &dummy) { 26 | for (int i = 0; i < val; i++) 27 | dummy = i; 28 | } 29 | 30 | int main(int argc, char** argv) { 31 | 32 | tuner::AutoTuner TunerKind = tuner::AT_Random; 33 | const int ITERS = 100; 34 | int minVal = 9999999; 35 | int maxVal = 99999999; 36 | int dflt = (maxVal - minVal) / 2; 37 | 38 | tuner::ATDriver AT; 39 | 40 | for (int i = 0; i < ITERS; i++) { 41 | auto const &OptimizedFun = AT.reoptimize(spin, 42 | IntRange(minVal, maxVal, dflt), 43 | dummy, 44 | tuner_kind(TunerKind), 45 | feedback_kind(tuner::FB_Total_IgnoreError), 46 | blocking(true) 47 | ); 48 | 49 | OptimizedFun(); 50 | } 51 | 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /tests/simple/fun_ptr_f.cpp: -------------------------------------------------------------------------------- 1 | // RUN: rm -f %t.ll 2 | // RUN: %atjitc %s -o %t 3 | // RUN: %t %t.ll > %t.out 4 | // RUN: %FileCheck %s < %t.out 5 | // RUN: %FileCheck --check-prefix=CHECK-IR %s < %t.ll 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | // only one function, without any call 14 | // CHECK-IR-NOT: call{{ }} 15 | 16 | 17 | using namespace std::placeholders; 18 | 19 | static void foo(void* dat) { 20 | (*(int*)dat) += 1; 21 | } 22 | static void bar(void* dat) { 23 | (*(int*)dat) += 2; 24 | } 25 | 26 | static void map (void* data, unsigned nmemb, unsigned size, void (*f)(void*)) { 27 | for(unsigned i = 0; i < nmemb; ++i) 28 | f((char*)data + i * size); 29 | } 30 | 31 | int main(int argc, char** argv) { 32 | 33 | void (*(come_and_get_some[2]))(void*dat) = {foo, bar}; 34 | 35 | easy::FunctionWrapper map_w = easy::jit(map, _1, _2, _3, come_and_get_some[argc?1:0], easy::options::dump_ir(argv[1])); 36 | 37 | int data[] = {1,2,3,4}; 38 | map_w(data, sizeof(data)/sizeof(data[0]), sizeof(data[0])); 39 | 40 | // CHECK: data[0] is 3 41 | // CHECK: data[1] is 4 42 | // CHECK: data[2] is 5 43 | // CHECK: data[3] is 6 44 | for(int v = 0; v != 4; ++v) 45 | printf("data[%d] is %d\n", v, data[v]); 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /tests/simple/fun_ptr_c.cpp: -------------------------------------------------------------------------------- 1 | // RUN: rm -f %t.ll 2 | // RUN: %atjitc %s -o %t 3 | // RUN: %t %t.ll > %t.out 4 | // RUN: %FileCheck %s < %t.out 5 | // RUN: %FileCheck --check-prefix=CHECK-IR %s < %t.ll 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | // only one function, without any call 14 | // CHECK-IR-NOT: call{{ }} 15 | 16 | 17 | using namespace std::placeholders; 18 | 19 | static void foo(void* dat) { 20 | (*(int*)dat) += 1; 21 | } 22 | static void bar(void* dat) { 23 | (*(int*)dat) += 2; 24 | } 25 | 26 | static void map (void* data, unsigned nmemb, unsigned size, void (*f)(void*)) { 27 | for(unsigned i = 0; i < nmemb; ++i) 28 | f((char*)data + i * size); 29 | } 30 | 31 | int main(int argc, char** argv) { 32 | 33 | static void (*(come_and_get_some[]))(void*dat) = {foo, bar}; 34 | 35 | easy::FunctionWrapper map_w = easy::jit(map, _1, _2, _3, come_and_get_some[argc?1:0], easy::options::dump_ir(argv[1])); 36 | 37 | int data[] = {1,2,3,4}; 38 | map_w(data, sizeof(data)/sizeof(data[0]), sizeof(data[0])); 39 | 40 | // CHECK: data[0] is 3 41 | // CHECK: data[1] is 4 42 | // CHECK: data[2] is 5 43 | // CHECK: data[3] is 6 44 | for(int v = 0; v != 4; ++v) 45 | printf("data[%d] is %d\n", v, data[v]); 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /tests/tuner/tile.cpp: -------------------------------------------------------------------------------- 1 | // REQUIRES: pollyknobs 2 | // RUN: rm -f %t.ll 3 | // RUN: %atjitc %s -o %t 4 | // RUN: %t %t.ll 5 | // RUN: %FileCheck --check-prefix=CHECK-IR %s < %t.ll 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | using namespace std::placeholders; 17 | using namespace easy::options; 18 | 19 | //////////// 20 | 21 | // make sure polly did something 22 | // CHECK-IR: polly.loop_header 23 | 24 | // make sure the tiling metadata was consumed too 25 | // CHECK-IR-NOT: llvm.loop.tile 26 | 27 | 28 | void pragma_id_tile(float *Mat, const int SZ) { 29 | for (int i = 0; i < SZ; i += 1) 30 | for (int j = 0; j < SZ; j += 1) 31 | Mat[(i * SZ) + j] = i + j; 32 | } 33 | 34 | #define DIM 2000 35 | 36 | //////////// 37 | 38 | 39 | int main(int argc, char** argv) { 40 | 41 | tuner::ATDriver AT; 42 | const int ITERS = 100; 43 | std::vector Mat(DIM * DIM); 44 | 45 | for(int i = 0; i < ITERS; i++) { 46 | auto const &OptimizedFun = AT.reoptimize(pragma_id_tile, 47 | _1, _2, 48 | tuner_kind(tuner::AT_Random) 49 | , dump_ir(argv[argc-1]) 50 | , blocking(true) 51 | ); 52 | 53 | OptimizedFun(Mat.data(), DIM); 54 | } 55 | 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /tests/simple/fun_ptr_e.cpp: -------------------------------------------------------------------------------- 1 | // RUN: rm -f %t.ll 2 | // RUN: %atjitc %s -o %t 3 | // RUN: %t %t.ll > %t.out 4 | // RUN: %FileCheck %s < %t.out 5 | // RUN: %FileCheck --check-prefix=CHECK-IR %s < %t.ll 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | // only one function, without any call 14 | // CHECK-IR-NOT: call{{ }} 15 | 16 | 17 | using namespace std::placeholders; 18 | 19 | static void foo(void* dat) { 20 | (*(int*)dat) += 1; 21 | } 22 | static void bar(void* dat) { 23 | (*(int*)dat) += 2; 24 | } 25 | 26 | static void map (void* data, unsigned nmemb, unsigned size, void (*f)(void*)) { 27 | for(unsigned i = 0; i < nmemb; ++i) 28 | f((char*)data + i * size); 29 | } 30 | 31 | int main(int argc, char** argv) { 32 | 33 | void (*(come_and_get_some[2]))(void*dat); 34 | come_and_get_some[0] = foo; 35 | come_and_get_some[1] = bar; 36 | 37 | easy::FunctionWrapper map_w = easy::jit(map, _1, _2, _3, come_and_get_some[argc?1:0], easy::options::dump_ir(argv[1])); 38 | 39 | int data[] = {1,2,3,4}; 40 | map_w(data, sizeof(data)/sizeof(data[0]), sizeof(data[0])); 41 | 42 | // CHECK: data[0] is 3 43 | // CHECK: data[1] is 4 44 | // CHECK: data[2] is 5 45 | // CHECK: data[3] is 6 46 | for(int v = 0; v != 4; ++v) 47 | printf("data[%d] is %d\n", v, data[v]); 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /tests/simple/compose_ref.cpp: -------------------------------------------------------------------------------- 1 | // RUN: rm -f %t.ll 2 | // RUN: %atjitc %s -o %t 3 | // RUN: %t 8 1 2 3 4 5 6 7 8 %t.ll > %t.out 4 | // RUN: %FileCheck %s < %t.out 5 | // RUN: %FileCheck --check-prefix=CHECK-IR %s < %t.ll 6 | // 7 | // CHECK: 72 8 | // 9 | // only one function in the final IR 10 | // CHECK-IR: define 11 | // CHECK-IR-NOT: define 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | using namespace std::placeholders; 20 | 21 | int mul(int a, int b) { 22 | return a*b; 23 | } 24 | 25 | int accumulate(std::vector const &vec, int acum, int(&fun)(int)) { 26 | int a = acum; 27 | for(int e : vec) 28 | a += fun(e); 29 | return a; 30 | } 31 | 32 | int main(int argc, char** argv) { 33 | 34 | int n = atoi(argv[1]); 35 | 36 | // read input 37 | std::vector vec; 38 | for(int i = 0; i != n; ++i) 39 | vec.emplace_back(atoi(argv[i+2])); 40 | 41 | // generate code 42 | easy::FunctionWrapper mul_by_two = easy::jit(mul, _1, 2); 43 | easy::FunctionWrapper const&)> mul_vector_by_two = easy::jit(accumulate, _1, 0, mul_by_two, 44 | easy::options::dump_ir(argv[argc-1])); 45 | 46 | // kernel! 47 | int result = mul_vector_by_two(vec); 48 | 49 | // output 50 | printf("%d\n", result); 51 | 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /runtime/Context.cpp: -------------------------------------------------------------------------------- 1 | #include "easy/runtime/Context.h" 2 | 3 | using namespace easy; 4 | 5 | Context& Context::setParameterIndex(unsigned param_idx) { 6 | return setArg(param_idx); 7 | } 8 | 9 | Context& Context::setParameterInt(int64_t val) { 10 | return setArg(val); 11 | } 12 | 13 | Context& Context::setParameterFloat(double val) { 14 | return setArg(val); 15 | } 16 | 17 | Context& Context::setParameterPointer(const void* val) { 18 | return setArg(val); 19 | } 20 | 21 | Context& Context::setParameterStruct(char const* ptr, size_t size) { 22 | return setArg(ptr, size); 23 | } 24 | 25 | Context& Context::setParameterModule(easy::Function const &F) { 26 | return setArg(F); 27 | } 28 | 29 | Context& Context::setTunableParam(tuned_param::IntRange K) { 30 | return setArg(new tuned_param::IntRange(K)); 31 | } 32 | 33 | bool Context::operator==(const Context& Other) const { 34 | if(getOptLevel() != Other.getOptLevel()) 35 | return false; 36 | 37 | if(size() != Other.size()) 38 | return false; 39 | 40 | if(getTunerKind() != Other.getTunerKind()) 41 | return false; 42 | 43 | for(auto this_it = begin(), other_it = Other.begin(); 44 | this_it != end(); ++this_it, ++other_it) { 45 | ArgumentBase &ThisArg = **this_it; 46 | ArgumentBase &OtherArg = **other_it; 47 | if(!(ThisArg == OtherArg)) 48 | return false; 49 | } 50 | 51 | return true; 52 | } 53 | -------------------------------------------------------------------------------- /tests/simple/custom_key_cache.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std::placeholders; 12 | 13 | int add (int a, int b) { 14 | return a+b; 15 | } 16 | 17 | void test_int() { 18 | easy::Cache C; 19 | 20 | for(int i = 0; i != 6; ++i) { 21 | auto const &inc = C.jit(i, add, _1, i); 22 | 23 | if(!C.has(i)) { 24 | printf("code not in cache!\n"); 25 | } 26 | 27 | // CHECK-NOT: code not in cache 28 | // CHECK: inc.int(0) is 0 29 | // CHECK: inc.int(0) is 1 30 | // CHECK: inc.int(0) is 2 31 | // CHECK: inc.int(0) is 3 32 | // CHECK: inc.int(0) is 4 33 | // CHECK: inc.int(0) is 5 34 | 35 | printf("inc.int(%d) is %d\n", 0, inc(0)); 36 | } 37 | } 38 | 39 | void test_string() { 40 | easy::Cache C; 41 | 42 | for(int i = 0; i != 6; ++i) { 43 | auto const &inc = C.jit(std::to_string(i), add, _1, i); 44 | 45 | if(!C.has(std::to_string(i))) { 46 | printf("code not in cache!\n"); 47 | } 48 | 49 | // CHECK-NOT: code not in cache 50 | // CHECK: inc.str(0) is 0 51 | // CHECK: inc.str(0) is 1 52 | // CHECK: inc.str(0) is 2 53 | // CHECK: inc.str(0) is 3 54 | // CHECK: inc.str(0) is 4 55 | // CHECK: inc.str(0) is 5 56 | 57 | printf("inc.str(%d) is %d\n", 0, inc(0)); 58 | } 59 | } 60 | 61 | int main() { 62 | test_int(); 63 | test_string(); 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /pass/LoopNamer.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | using namespace llvm; 8 | 9 | namespace tuner { 10 | 11 | class LoopNamer : public LoopPass { 12 | private: 13 | unsigned LoopIDs = 0; 14 | public: 15 | static char ID; 16 | 17 | LoopNamer() 18 | : LoopPass(ID) {}; 19 | 20 | bool runOnLoop(Loop *Loop, LPPassManager &LPM) override { 21 | MDNode *LoopMD = Loop->getLoopID(); 22 | LLVMContext &Context = Loop->getHeader()->getContext(); 23 | 24 | if (!LoopMD) { 25 | // Setup the first location with a dummy operand for now. 26 | MDNode *Dummy = MDNode::get(Context, {}); 27 | LoopMD = MDNode::get(Context, {Dummy}); 28 | } 29 | 30 | MDNode* KnobTag = createLoopName(Context, LoopIDs); 31 | MDNode* Wrapper = MDNode::get(Context, {KnobTag}); 32 | 33 | // combine the knob tag with the current LoopMD. 34 | LoopMD = MDNode::concatenate(LoopMD, Wrapper); 35 | 36 | // reinstate the self-loop in the first position of the MD. 37 | LoopMD->replaceOperandWith(0, LoopMD); 38 | 39 | Loop->setLoopID(LoopMD); 40 | 41 | return true; 42 | } 43 | 44 | }; // end class 45 | 46 | char LoopNamer::ID = 0; 47 | static RegisterPass Register("loop-namer", 48 | "Ensure every loop has a name (!llvm.loop metadata)", 49 | false /* only looks at CFG*/, 50 | false /* analysis pass */); 51 | 52 | llvm::Pass* createLoopNamerPass() { 53 | return new LoopNamer(); 54 | } 55 | 56 | } // end namespace 57 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | Todos 2 | ===== 3 | 4 | ## Autotuning 5 | 6 | * Running-time normalization 7 | - first thing to try is put in user-interface to specify indicators of workload for normalization 8 | - then try to find that out automatically. 9 | - alternative: include the context in the model, 10 | and generate a decision tree in the code based on what the model learned dispatch to differently optimized functions based on inputs. 11 | * Add more benchmarks! 12 | * Experimentation Budget for rate limiting. 13 | - Could use some budget calculation or an annealing system to reduce experimentation 14 | - Perhaps consider the difference between predicted and actual performance in Bayes. 15 | * Inserting passes at different points in the PassManagerBuilder pipeline (i.e., less used or useful-to-run-more passes). 16 | * More asynchrony in the compile job queue (notably, training in Bayes could be async). 17 | * Use LLVM's PGO data collection insertion and make it available to optimization passes. 18 | * Hyperparameter tuning of the Bayes tuner 19 | * Function workload normalization 20 | * Exporting / persisting results of tuning. 21 | - Could just dump the best human-readable configs to a file and look for it when 22 | constructing the tuner. Another harder option would be to generate an object file and dynamically link. 23 | 24 | ## JIT Compilation 25 | 26 | ### Known Issues 27 | 28 | * Inlining of structures. 29 | - large structure return 30 | 31 | ### Testing 32 | 33 | * Test with: 34 | - member functions 35 | - function objects (jit operator() ?) 36 | - other architectures: ARM ? 37 | - other OS: osx ? 38 | -------------------------------------------------------------------------------- /tests/tuner/intrange_1.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | using namespace std::placeholders; 18 | using namespace tuned_param; 19 | using namespace easy::options; 20 | 21 | void show(int i, int k) { 22 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); 23 | printf("intrange test recieved (%i, %i)\n", i, k); 24 | } 25 | 26 | // (1) make sure the dynamic arg is still making it through 27 | 28 | // CHECK: intrange test recieved ({{-?[0-9]}}, 1) 29 | // CHECK: intrange test recieved ({{-?[0-9]}}, 3) 30 | 31 | // (2) make sure the value changes within the range 32 | 33 | // RUN: grep "intrange test recieved (9" < %t.out 34 | // RUN: grep -E "intrange test recieved \(-?[0-8]" < %t.out 35 | 36 | // (3) TODO: some sort of test that ensures the range is 37 | // inclusive. Random tuning of even a [1,2] range is not stable 38 | // enough for CI. 39 | 40 | int main(int argc, char** argv) { 41 | 42 | tuner::AutoTuner TunerKind = tuner::AT_Random; 43 | tuner::FeedbackKind FBK = tuner::FB_Total_IgnoreError; 44 | const int ITERS = 5; 45 | 46 | tuner::ATDriver AT; 47 | 48 | for (int i = 0; i < ITERS; i++) { 49 | auto const &OptimizedFun = AT.reoptimize(show, 50 | IntRange(-8, 9, 9), _1, 51 | tuner_kind(TunerKind), feedback_kind(FBK), blocking(true)); 52 | 53 | OptimizedFun(i); 54 | } 55 | 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /cmake/Python.cmake: -------------------------------------------------------------------------------- 1 | include(FindPackageHandleStandardArgs) 2 | 3 | # allow specifying which Python installation to use 4 | if (NOT PYTHON_EXEC) 5 | set(PYTHON_EXEC $ENV{PYTHON_EXEC}) 6 | endif (NOT PYTHON_EXEC) 7 | 8 | if (NOT PYTHON_EXEC) 9 | find_program(PYTHON_EXEC "python${Python_FIND_VERSION}" 10 | DOC "Location of python executable to use") 11 | endif(NOT PYTHON_EXEC) 12 | 13 | execute_process(COMMAND "${PYTHON_EXEC}" "-c" 14 | "import sys; print('%d.%d' % (sys.version_info[0],sys.version_info[1]))" 15 | OUTPUT_VARIABLE PYTHON_VERSION 16 | OUTPUT_STRIP_TRAILING_WHITESPACE) 17 | string(REPLACE "." "" PYTHON_VERSION_NO_DOTS ${PYTHON_VERSION}) 18 | 19 | 20 | function(find_python_module module) 21 | string(TOUPPER ${module} module_upper) 22 | if(NOT PY_${module_upper}) 23 | if(ARGC GREATER 1 AND ARGV1 STREQUAL "REQUIRED") 24 | set(${module}_FIND_REQUIRED TRUE) 25 | endif() 26 | # A module's location is usually a directory, but for binary modules it's a .so file. 27 | execute_process(COMMAND "${PYTHON_EXEC}" "-c" "import re, ${module}; print re.compile('/__init__.py.*').sub('',${module}.__file__)" 28 | RESULT_VARIABLE _${module}_status 29 | OUTPUT_VARIABLE _${module}_location 30 | ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) 31 | if(NOT _${module}_status) 32 | set(PY_${module_upper} ${_${module}_location} CACHE STRING "Location of Python module ${module}") 33 | endif(NOT _${module}_status) 34 | endif(NOT PY_${module_upper}) 35 | find_package_handle_standard_args(PY_${module} DEFAULT_MSG PY_${module_upper}) 36 | endfunction(find_python_module) 37 | -------------------------------------------------------------------------------- /cmake/XGBoost.cmake: -------------------------------------------------------------------------------- 1 | ################# 2 | # find xgboost installation 3 | 4 | # this script automatically includes all needed include dirs, 5 | # and outputs the following variables: 6 | # 7 | # XGB_LIB -- an absolute path to the shared library file for xgboost 8 | # XGB_LIB_DIR -- the directory to the above lib 9 | # 10 | 11 | function (requireFound SYMB MSG) 12 | if(NOT ${SYMB}) 13 | message(FATAL_ERROR ${MSG}) 14 | endif() 15 | message(STATUS "Found ${SYMB}: ${${SYMB}}") 16 | endfunction(requireFound) 17 | 18 | # search 19 | find_library(XGB_LIB 20 | NAMES xgboost 21 | HINTS "${PROJECT_SOURCE_DIR}/xgboost/root/lib" 22 | ) 23 | 24 | find_path(XGB_INCLUDE_DIR 25 | NAMES xgboost/base.h 26 | HINTS "${PROJECT_SOURCE_DIR}/xgboost/root/include" 27 | ) 28 | 29 | find_path(RABIT_INCLUDE_DIR 30 | NAMES rabit/rabit.h 31 | HINTS "${PROJECT_SOURCE_DIR}/xgboost/root/rabit/include" 32 | ) 33 | 34 | find_path(DMLC_INCLUDE_DIR 35 | NAMES dmlc/omp.h 36 | HINTS "${PROJECT_SOURCE_DIR}/xgboost/root/dmlc-core/include" 37 | ) 38 | 39 | # check 40 | requireFound(XGB_LIB "XGBoost shared library (libxgboost) not found") 41 | requireFound(XGB_INCLUDE_DIR "XGBoost include header files not found") 42 | requireFound(RABIT_INCLUDE_DIR "Rabit include header files not found") 43 | requireFound(DMLC_INCLUDE_DIR "DMLC include header files not found") 44 | 45 | include_directories(${XGB_INCLUDE_DIR} 46 | ${RABIT_INCLUDE_DIR} 47 | ${DMLC_INCLUDE_DIR} 48 | ) 49 | 50 | get_filename_component(XGB_LIB_DIR ${XGB_LIB} DIRECTORY) 51 | message(STATUS "Found XGB_LIB_DIR: ${XGB_LIB_DIR}") 52 | 53 | #################### 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, Kavon Farvardin. All rights reserved. 4 | 5 | Copyright (c) 2018, Juan Manuel Martinez Caamaño and Serge Guelton and Quarkslab. 6 | All rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are met: 10 | 11 | * Redistributions of source code must retain the above copyright notice, this 12 | list of conditions and the following disclaimer. 13 | 14 | * Redistributions in binary form must reproduce the above copyright notice, 15 | this list of conditions and the following disclaimer in the documentation 16 | and/or other materials provided with the distribution. 17 | 18 | * Neither the name of the copyright holder nor the names of its 19 | contributors may be used to endorse or promote products derived from 20 | this software without specific prior written permission. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 23 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 25 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 26 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /include/tuner/JSON.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | // quick and dirty JSON dumper 8 | class JSON { 9 | static int depth; 10 | public: 11 | 12 | static void indent(std::ostream &file) { 13 | for (int i = 0; i < depth; i++) 14 | file << " "; 15 | } 16 | 17 | static void beginObject(std::ostream &file) { 18 | indent(file); 19 | file << "{\n"; 20 | depth++; 21 | } 22 | 23 | static void endObject(std::ostream &file) { 24 | depth--; 25 | indent(file); 26 | file << "}"; 27 | } 28 | 29 | static void beginArray(std::ostream &file) { 30 | indent(file); 31 | file << "[\n"; 32 | depth++; 33 | } 34 | 35 | static void endArray(std::ostream &file) { 36 | depth--; 37 | indent(file); 38 | file << "]"; 39 | } 40 | 41 | static void beginBind(std::ostream &file, std::string key) { 42 | indent(file); 43 | fmt(file, key); 44 | file << " : "; 45 | } 46 | 47 | static void comma(std::ostream &file) { 48 | file << ",\n"; 49 | } 50 | 51 | static void fmt(std::ostream &file, std::string val) { 52 | file << "\"" << val << "\""; 53 | } 54 | 55 | static void fmt(std::ostream &file, const char *val) { 56 | file << "\"" << val << "\""; 57 | } 58 | 59 | template < typename ValTy > 60 | static void fmt(std::ostream &file, ValTy val) { 61 | file << val; 62 | } 63 | 64 | 65 | 66 | ////////////////////// 67 | // common operations 68 | 69 | template < typename ValTy > 70 | static void output(std::ostream &file, std::string key, ValTy val, bool hasNext = true) { 71 | beginBind(file, key); 72 | fmt(file, val); 73 | if (hasNext) 74 | comma(file); 75 | } 76 | 77 | }; 78 | -------------------------------------------------------------------------------- /tests/simple/compose_ptr.cpp: -------------------------------------------------------------------------------- 1 | // RUN: rm -f %t.ll 2 | // RUN: %atjitc %s -o %t 3 | // RUN: %t 8 1 2 3 4 5 6 7 8 %t.ll > %t.out 4 | // RUN: %FileCheck %s < %t.out 5 | // RUN: %FileCheck --check-prefix=CHECK-IR %s < %t.ll 6 | // 7 | // CHECK: 72 8 | // 9 | // only one function in the final IR 10 | // CHECK-IR: define 11 | // CHECK-IR-NOT: define 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | using namespace std::placeholders; 20 | 21 | int mul(int a, int b) { 22 | return a*b; 23 | } 24 | 25 | int accumulate(std::vector const &vec, int acum, int(*fun)(int)) { 26 | int a = acum; 27 | for(int e : vec) 28 | a += fun(e); 29 | return a; 30 | } 31 | 32 | int main(int argc, char** argv) { 33 | 34 | int n = atoi(argv[1]); 35 | 36 | // read input 37 | std::vector vec; 38 | for(int i = 0; i != n; ++i) 39 | vec.emplace_back(atoi(argv[i+2])); 40 | 41 | // generate code 42 | easy::FunctionWrapper mul_by_two = easy::jit(mul, _1, 2); 43 | 44 | static_assert(easy::is_function_wrapper::value, "Value not detected as function wrapper!"); 45 | static_assert(easy::is_function_wrapper::value, "Reference not detected as function wrapper!"); 46 | static_assert(easy::is_function_wrapper::value, "RReference not detected as function wrapper!"); 47 | 48 | easy::FunctionWrapper const&)> mul_vector_by_two = easy::jit(accumulate, _1, 0, mul_by_two, 49 | easy::options::dump_ir(argv[argc-1])); 50 | 51 | // kernel! 52 | int result = mul_vector_by_two(vec); 53 | 54 | // output 55 | printf("%d\n", result); 56 | 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /include/tuner/param.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | namespace tuned_param { 9 | 10 | class IntRange : public tuner::ScalarRange { 11 | private: 12 | int lo_, hi_; 13 | int cur, dflt_; 14 | 15 | public: 16 | virtual ~IntRange() = default; 17 | int min() const override { return lo_; } 18 | int max() const override { return hi_; } 19 | int getVal() const override { return cur; } 20 | int getDefault() const override { return dflt_; } 21 | void apply(llvm::Module &M) override {} 22 | void setVal(int val) override { 23 | assert(lo_ <= val && val <= hi_); 24 | cur = val; 25 | } 26 | 27 | std::string getName() const override { 28 | return "tunable param id " + std::to_string(getID()); 29 | } 30 | 31 | IntRange(int lo, int hi) : IntRange(lo, hi, lo) {} 32 | 33 | IntRange(int lo, int hi, int dflt) : lo_(lo), hi_(hi), 34 | dflt_(dflt) { 35 | assert(lo_ <= hi_ && "range doesn't make sense"); 36 | assert(lo_ <= dflt && dflt <= hi_ && "default doesn't make sense"); 37 | setVal(dflt); 38 | } 39 | 40 | size_t hash() const { 41 | return max() ^ min() ^ getDefault(); 42 | } 43 | 44 | bool operator==(IntRange const&); 45 | }; 46 | 47 | } // end namespace tuned_param 48 | 49 | namespace tuner { 50 | template< > 51 | struct is_knob< tuned_param::IntRange > { 52 | static constexpr bool value = true; 53 | using rawTy = int; 54 | }; 55 | } 56 | 57 | namespace std { 58 | template<> struct hash 59 | { 60 | typedef tuned_param::IntRange argument_type; 61 | typedef std::size_t result_type; 62 | result_type operator()(argument_type const& s) const noexcept { 63 | return s.hash(); 64 | } 65 | }; 66 | } // end namespace std 67 | -------------------------------------------------------------------------------- /doc/readme/simple_at.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | // CHECK: dec(5) == 4.0 6 | // CHECK: neg(3) == -3.0 7 | // CHECK: fsubJIT(3, 2) == 1.0 8 | // CHECK: 8 - 7 == 1.0 9 | 10 | 11 | // INLINE FROM HERE #ALL# 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | // INLINE FROM HERE #USAGE# 18 | using namespace std::placeholders; 19 | using namespace tuned_param; 20 | 21 | float fsub(float a, float b) { return a-b; } 22 | void wait(int ms) { std::this_thread::sleep_for(std::chrono::milliseconds(ms)); } 23 | int main () { 24 | tuner::ATDriver AT; 25 | // returns a function computing fsub(a, 1.0) 26 | easy::FunctionWrapper const& decrement = AT.reoptimize(fsub, _1, 1.0); 27 | 28 | // returns a function computing fsub(0.0, b) 29 | auto const& negate = AT.reoptimize(fsub, 0.0, _1); 30 | 31 | // returns a function with a fixed `wait` period in the range [1, 500] 32 | auto const& pause = AT.reoptimize(wait, IntRange(1, 500)); 33 | 34 | printf("dec(5) == %f\n", decrement(5)); 35 | printf("neg(3) == %f\n", negate(3)); 36 | pause(); 37 | // ... 38 | // TO HERE #USAGE# 39 | 40 | // INLINE FROM HERE #TUNERKIND# 41 | using namespace easy::options; 42 | 43 | // returns a function equivalent to fsub(a, b) 44 | auto const& fsubJIT = AT.reoptimize(fsub, _1, _2, 45 | tuner_kind(tuner::AT_Random)); 46 | 47 | printf("fsubJIT(3, 2) == %f\n", fsubJIT(3.0, 2.0)); 48 | // TO HERE #TUNERKIND# 49 | 50 | // INLINE FROM HERE #TUNING# 51 | for (int i = 0; i < 100; ++i) { 52 | auto const& tunedSub7 = 53 | AT.reoptimize(fsub, _1, 7.0, tuner_kind(tuner::AT_Random)); 54 | 55 | printf("8 - 7 == %f\n", tunedSub7(8)); 56 | } 57 | // TO HERE #TUNING# 58 | } 59 | // TO HERE #ALL# 60 | -------------------------------------------------------------------------------- /include/tuner/RandomTuner.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace tuner { 11 | 12 | // a tuner that always outputs a completely random configuration 13 | class RandomTuner : public AnalyzingTuner { 14 | protected: 15 | std::mt19937_64 Gen_; // 64-bit mersenne twister random number generator 16 | int aheadOfTimeCount = 0; 17 | 18 | public: 19 | RandomTuner(KnobSet KS, std::shared_ptr Cxt) : AnalyzingTuner(KS, std::move(Cxt)) { 20 | unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); 21 | Gen_ = std::mt19937_64(seed); 22 | } 23 | 24 | // we do not free any knobs, since MPM or other objects 25 | // should end up freeing them. 26 | ~RandomTuner() {} 27 | 28 | GenResult& getNextConfig() override { 29 | KnobConfig KC; 30 | 31 | // if this is the first requested config, we generate the default config. 32 | if (Configs_.empty()) 33 | KC = genDefaultConfig(KS_); 34 | else 35 | KC = genRandomConfig(KS_, Gen_); 36 | 37 | auto Conf = std::make_shared(KC); 38 | auto FB = createFeedback(Cxt_->getFeedbackKind(), PREFERRED_FEEDBACK); 39 | 40 | // keep track of this config. 41 | Configs_.push_back({Conf, FB}); 42 | return Configs_.back(); 43 | } 44 | 45 | // we always know the next config, so we 46 | // need to bound the number of yes's given 47 | bool shouldCompileNext () override { 48 | bool ans = aheadOfTimeCount < DEFAULT_COMPILE_AHEAD; 49 | if (!ans) 50 | aheadOfTimeCount = 0; 51 | else 52 | aheadOfTimeCount++; 53 | return ans; 54 | } 55 | 56 | }; // end class RandomTuner 57 | 58 | } // namespace tuner 59 | -------------------------------------------------------------------------------- /get-llvm-with-polly.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #### 4 | # This script obtains and builds a version of LLVM that has 5 | # special extensions for supporting polly transformation 6 | # annotations in the LLVM IR. 7 | 8 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 9 | 10 | if [ $# -ne 1 ]; then 11 | echo "expected args: " 12 | exit 1 13 | fi 14 | 15 | ############ 16 | # prefer using Ninja if available 17 | command -v ninja 18 | if [ $? -eq 0 ]; then 19 | GENERATOR="Ninja" 20 | BUILD_CMD="ninja install" 21 | else 22 | NUM_CPUS=`getconf _NPROCESSORS_ONLN` 23 | GENERATOR="Unix Makefiles" 24 | BUILD_CMD="make install -j${NUM_CPUS}" 25 | fi 26 | 27 | echo "WARNING: this script is deprecated. Please consider using vanilla LLVM." 28 | 29 | 30 | cd $1 31 | 32 | ##### 33 | # make sure we're in an empty dir 34 | if [ `ls -1A . | wc -l` -ne 0 ]; then 35 | echo "provided directory must be empty!" 36 | exit 1 37 | fi 38 | 39 | ##### 40 | # get sources 41 | 42 | # PULL LLVM 43 | git clone --branch pragma --single-branch https://github.com/Meinersbur/llvm.git src 44 | cd ./src 45 | git reset --hard 87877c50435bba433b7fa261574b7e3c4372ea5a 46 | 47 | cd ./tools 48 | 49 | ## PULL CLANG 50 | git clone --branch pragma --single-branch https://github.com/Meinersbur/clang.git 51 | cd ./clang 52 | git reset --hard 2c565d6149be499a22ae3089eae37b0a07057b15 53 | cd .. 54 | 55 | # PULL POLLY 56 | git clone --branch pragma --single-branch https://github.com/Meinersbur/polly.git 57 | cd ./polly 58 | git reset --hard 0abadf4cea38d9f6cf2236588e551ebfdcb80590 59 | cd .. 60 | 61 | cd ../.. 62 | 63 | ################## 64 | ## configure & build 65 | 66 | mkdir build install 67 | cd ./build 68 | cmake -C $DIR/cmake/LLVM.cmake -G "$GENERATOR" -DCMAKE_BUILD_TYPE=Release -DLLVM_TARGETS_TO_BUILD="X86" -DCMAKE_INSTALL_PREFIX=../install ../src 69 | 70 | $BUILD_CMD 71 | -------------------------------------------------------------------------------- /include/tuner/KnobSet.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | namespace tuner { 9 | 10 | // using std::variant & std::visit to try and combine the 11 | // different instances of Knob into a single container 12 | // looks like a royal pain-in-the-butt: 13 | // https://en.cppreference.com/w/cpp/utility/variant/visit 14 | // 15 | // instead, we use abstract function-objects to implement the equivalent 16 | // of a lambda-case in a functional language, e.g., (\x -> case x of ...) 17 | // and write our own generic operations over them. 18 | 19 | 20 | // NOTE if you add another structure member, you must immediately update: 21 | // 22 | // 0. class KnobConfig 23 | // 1. KnobSetAppFn and related abstract visitors. 24 | // 2. applyToKnobs and related generic operations. 25 | 26 | class KnobSet { 27 | public: 28 | std::unordered_map IntKnobs; 29 | std::unordered_map LoopKnobs; 30 | 31 | size_t size() const { 32 | size_t numVals = 0; 33 | 34 | for (auto const& Entry : IntKnobs) 35 | numVals += Entry.second->size(); 36 | 37 | for (auto const& Entry : LoopKnobs) 38 | numVals += Entry.second->size(); 39 | 40 | return numVals; 41 | } 42 | 43 | }; 44 | 45 | // applies some arbitrary operation to a KnobSet 46 | class KnobSetAppFn { 47 | public: 48 | virtual void operator()(std::pair) = 0; 49 | virtual void operator()(std::pair) = 0; 50 | }; 51 | 52 | // apply an operation over the IDs of a collection of knobs 53 | class KnobIDAppFn { 54 | public: 55 | virtual void operator()(KnobID) = 0; 56 | }; 57 | 58 | void applyToKnobs(KnobSetAppFn &F, KnobSet const &KS); 59 | void applyToKnobs(KnobIDAppFn &F, KnobSet const &KS); 60 | 61 | } 62 | -------------------------------------------------------------------------------- /include/easy/runtime/RuntimePasses.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace easy { 8 | struct ContextAnalysis : 9 | public llvm::ImmutablePass { 10 | 11 | static char ID; 12 | 13 | ContextAnalysis() 14 | : llvm::ImmutablePass(ID), C_(nullptr) {} 15 | ContextAnalysis(std::shared_ptr C) 16 | : llvm::ImmutablePass(ID), C_(std::move(C)) {} 17 | 18 | easy::Context const* getContext() const { 19 | return C_.get(); 20 | } 21 | 22 | private: 23 | 24 | std::shared_ptr C_; 25 | }; 26 | 27 | struct InlineParameters: 28 | public llvm::ModulePass { 29 | 30 | static char ID; 31 | 32 | InlineParameters() 33 | : llvm::ModulePass(ID) {} 34 | InlineParameters(llvm::StringRef TargetName) 35 | : llvm::ModulePass(ID), TargetName_(TargetName) {} 36 | 37 | void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { 38 | AU.addRequired(); 39 | } 40 | 41 | bool runOnModule(llvm::Module &M) override; 42 | 43 | private: 44 | llvm::StringRef TargetName_; 45 | }; 46 | 47 | struct DevirtualizeConstant : 48 | public llvm::FunctionPass { 49 | 50 | static char ID; 51 | 52 | DevirtualizeConstant() 53 | : llvm::FunctionPass(ID) {} 54 | DevirtualizeConstant(llvm::StringRef TargetName) 55 | : llvm::FunctionPass(ID), TargetName_(TargetName) {} 56 | 57 | void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { 58 | AU.addRequired(); 59 | } 60 | 61 | bool runOnFunction(llvm::Function &F) override; 62 | 63 | private: 64 | llvm::StringRef TargetName_; 65 | }; 66 | 67 | llvm::Pass* createContextAnalysisPass(std::shared_ptr C); 68 | llvm::Pass* createInlineParametersPass(llvm::StringRef Name); 69 | llvm::Pass* createDevirtualizeConstantPass(llvm::StringRef Name); 70 | } 71 | -------------------------------------------------------------------------------- /include/easy/runtime/Function.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | #include 8 | 9 | // NOTE(kavon): everything somehow breaks if you try to include any 10 | // type definitions here. you need to forward-declare anything 11 | // you need here instead. 12 | 13 | namespace easy { 14 | class Function; 15 | } 16 | 17 | namespace llvm { 18 | class Module; 19 | class LLVMContext; 20 | } 21 | 22 | namespace tuner { 23 | class Optimizer; 24 | class Feedback; 25 | } 26 | 27 | namespace std { 28 | template<> struct hash 29 | { 30 | typedef easy::Function argument_type; 31 | typedef std::size_t result_type; 32 | result_type operator()(argument_type const& F) const noexcept; 33 | }; 34 | } 35 | 36 | namespace easy { 37 | 38 | class Context; 39 | struct GlobalMapping; 40 | 41 | class Function { 42 | 43 | // do not reorder the fields and do not add virtual methods! 44 | void* Address; 45 | std::unique_ptr Holder; 46 | 47 | public: 48 | 49 | Function(void* Addr, std::unique_ptr H); 50 | 51 | void* getRawPointer() const { 52 | return Address; 53 | } 54 | 55 | void serialize(std::ostream&) const; 56 | static std::unique_ptr deserialize(std::istream&); 57 | 58 | bool operator==(easy::Function const&) const; 59 | 60 | llvm::Module const& getLLVMModule() const; 61 | 62 | static std::unique_ptr CompileAndWrap ( 63 | const char*Name, GlobalMapping* Globals, 64 | std::unique_ptr LLVMCxt, 65 | std::unique_ptr M, 66 | llvm::CodeGenOpt::Level CGLevel, 67 | bool UseFastISel, 68 | bool UseIPRA 69 | ); 70 | 71 | static void WriteOptimizedToFile(llvm::Module const &M, std::string const& File, bool Append = false); 72 | 73 | friend 74 | std::hash::result_type std::hash::operator()(argument_type const& F) const noexcept; 75 | }; 76 | 77 | } 78 | -------------------------------------------------------------------------------- /include/tuner/Util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define DEFAULT_MIN_TRIALS 2 9 | #define DEFAULT_COMPILE_AHEAD 2 10 | #define DEFAULT_STD_ERR_PCT 10.0 11 | #define COMPILE_JOB_BAILOUT_MS 120'000 12 | 13 | #define BEST_SWAP_ENABLE true 14 | 15 | // GROWTH_RATE * 100 = percent 16 | #define EXPERIMENT_DEPLOY_GROWTH_RATE 0.2 17 | #define EXPERIMENT_MIN_DEPLOY_NS 50'000 18 | 19 | #define PREFERRED_FEEDBACK FB_Total 20 | 21 | namespace tuner { 22 | // a "missing" value indicator 23 | static constexpr float MISSING = std::numeric_limits::quiet_NaN(); 24 | 25 | ///////// 26 | // generates a random integer that is "nearby" an 27 | // existing integer, within the given inclusive range 28 | // [min, max], given the amount of energy we have 29 | // to move away from the current integer. 30 | // Energy must be within [0, 100]. 31 | // 32 | // NOTE: the returned integer may be equal to the existing one. 33 | template < typename RNE > 34 | int nearbyInt (RNE &Eng, int cur, int min, int max, double energy) { 35 | // 68% of values drawn will be within this distance from the old value. 36 | int range = std::abs(max - min); 37 | int scaledRange = range * (energy / 100.0); 38 | int stdDev = scaledRange / 2.0; 39 | 40 | // sample from a normal distribution, where the mean is 41 | // the old value, and the std deviation is influenced by the energy. 42 | // NOTE: a logistic distribution, which is like a higher kurtosis 43 | // normal distribution, might give us better numbers when the 44 | // energy is low? 45 | std::normal_distribution dist(cur, stdDev); 46 | 47 | // ensure the value is in the right range. 48 | int val = std::round(dist(Eng)); 49 | val = std::max(val, min); 50 | val = std::min(val, max); 51 | 52 | return val; 53 | } 54 | 55 | // log_2(val) where val is a power-of-two. 56 | int pow2Bit(uint64_t val); 57 | 58 | // sleeps the current thread 59 | void sleep_for(unsigned ms); 60 | 61 | } // end namespace tuner 62 | -------------------------------------------------------------------------------- /misc/docker/GenDockerfile.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import sys 3 | 4 | Head = "# Dockerfile derived from easy::jit's .travis.yml" 5 | From = "ubuntu:latest" 6 | Manteiner = "Juan Manuel Martinez Caamaño jmartinezcaamao@gmail.com" 7 | base_packages = ['build-essential', 'python', 'python-pip', 'git', 'wget', 'unzip', 'cmake'] 8 | 9 | travis = yaml.load(open(sys.argv[1])) 10 | travis_sources = travis['addons']['apt']['sources'] 11 | travis_packages = travis['addons']['apt']['packages'] 12 | before_install = travis['before_install'] 13 | script = travis['script'] 14 | 15 | # I could not get a better way to do this 16 | AddSourceCmd = { 17 | "llvm-toolchain-trusty-6.0" : "deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty-6.0 main | tee -a /etc/apt/sources.list > /dev/null", 18 | "ubuntu-toolchain-r-test" : "apt-add-repository -y \"ppa:ubuntu-toolchain-r/test\"" 19 | } 20 | 21 | Sources = ["RUN {cmd} \n".format(cmd=AddSourceCmd[source]) for source in travis_sources] 22 | 23 | Apt = """# add sources 24 | RUN apt-get update 25 | RUN apt-get install -y software-properties-common 26 | {AddSources} 27 | # install apt packages, base first, then travis 28 | RUN apt-get update 29 | RUN apt-get upgrade -y 30 | RUN apt-get install -y {base_packages} && \\ 31 | apt-get install -y {travis_packages} 32 | """.format(AddSources = "".join(Sources), base_packages = " ".join(base_packages), travis_packages=" ".join(travis_packages)) 33 | 34 | Checkout = "RUN git clone --depth=50 --branch=${branch} https://github.com/jmmartinez/easy-just-in-time.git easy-just-in-time && cd easy-just-in-time\n" 35 | BeforeInstall = "".join(["RUN cd /easy-just-in-time && {0} \n".format(cmd) for cmd in before_install]) 36 | Run = "RUN cd easy-just-in-time && \\\n" + "".join([" {cmd} && \\ \n".format(cmd=cmd) for cmd in script]) + " echo ok!" 37 | 38 | Template = """{Head} 39 | 40 | FROM {From} 41 | 42 | LABEL manteiner {Manteiner} 43 | 44 | ARG branch=master 45 | 46 | {Apt} 47 | # checkout 48 | {Checkout} 49 | # install other deps 50 | {BeforeInstall} 51 | # compile and test! 52 | {Run}""" 53 | 54 | print(Template.format(Head=Head, From=From, Manteiner=Manteiner, Apt=Apt, BeforeInstall=BeforeInstall, Checkout=Checkout, Run=Run)) 55 | -------------------------------------------------------------------------------- /include/easy/runtime/BitcodeTracker.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | namespace easy { 11 | 12 | struct GlobalMapping { 13 | const char* Name; 14 | void* Address; 15 | }; 16 | 17 | struct FunctionInfo { 18 | const char* Name; 19 | GlobalMapping* Globals; 20 | const char* Bitcode; 21 | size_t BitcodeLen; 22 | 23 | FunctionInfo(const char* N, GlobalMapping* G, const char* B, size_t BL) 24 | : Name(N), Globals(G), Bitcode(B), BitcodeLen(BL) 25 | { } 26 | }; 27 | 28 | class BitcodeTracker { 29 | 30 | // map function to all the info required for jit compilation 31 | std::unordered_map Functions; 32 | std::unordered_map NameToAddress; 33 | 34 | public: 35 | 36 | void registerFunction(void* FPtr, const char* Name, GlobalMapping* Globals, const char* Bitcode, size_t BitcodeLen) { 37 | Functions.emplace(FPtr, FunctionInfo{Name, Globals, Bitcode, BitcodeLen}); 38 | NameToAddress.emplace(Name, FPtr); 39 | } 40 | 41 | void* getAddress(std::string const &Name); 42 | const char* getName(void*); 43 | std::tuple getNameAndGlobalMapping(void* FPtr); 44 | bool hasGlobalMapping(void* FPtr) const; 45 | 46 | /* 47 | NOTE: on getModule / getModuleWithContext 48 | 49 | "LLVMContext owns and manages the core "global" data of LLVM's core 50 | infrastructure, including the type and constant uniquing tables. 51 | LLVMContext itself provides no locking guarantees, so you should be 52 | careful to have one context per thread." 53 | 54 | Thus, right now, it seems we're stuck with parsing the bitcode into a new 55 | chunk of memory on each JIT event. 56 | 57 | */ 58 | 59 | using ModuleContextPair = std::pair, std::unique_ptr>; 60 | ModuleContextPair getModule(void* FPtr); 61 | std::unique_ptr getModuleWithContext(void* FPtr, llvm::LLVMContext &C); 62 | 63 | // get the singleton object 64 | static BitcodeTracker& GetTracker(); 65 | }; 66 | 67 | } 68 | -------------------------------------------------------------------------------- /tests/tuner/lifetime.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc -DTUNER_KIND="tuner::AT_None" %s -o %t 2 | // RUN: %valgrind --leak-check=summary %t 2> vgrind.out 3 | // RUN: %FileCheck %s < vgrind.out 4 | 5 | // RUN: %atjitc -DTUNER_KIND="tuner::AT_Random" %s -o %t 6 | // RUN: %valgrind --leak-check=summary %t 2> vgrind.out 7 | // RUN: %FileCheck %s < vgrind.out 8 | 9 | // RUN: %atjitc -DTUNER_KIND="tuner::AT_Anneal" %s -o %t 10 | // RUN: %valgrind --leak-check=summary %t 2> vgrind.out 11 | // RUN: %FileCheck %s < vgrind.out 12 | 13 | // RUN: %atjitc -DTUNER_KIND="tuner::AT_Bayes" %s -o %t 14 | // RUN: %valgrind --leak-check=summary %t 2> vgrind.out 15 | // RUN: %FileCheck %s < vgrind.out 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | // test the destruction of the ATDriver for leaks 29 | 30 | // CHECK: LEAK SUMMARY: 31 | // CHECK-NEXT: definitely lost: 0 bytes in 0 blocks 32 | 33 | using namespace std::placeholders; 34 | using namespace tuned_param; 35 | using namespace easy::options; 36 | 37 | void doNothing(int val) { 38 | return; 39 | } 40 | 41 | int main(int argc, char** argv) { 42 | 43 | const int ITERS = 15; 44 | int minVal = 9999999; 45 | int maxVal = 99999999; 46 | int dflt = (maxVal - minVal) / 2; 47 | 48 | #ifdef TUNER_KIND 49 | tuner::AutoTuner TunerKind = TUNER_KIND; 50 | #endif 51 | 52 | for (int i = 0; i < ITERS; i++) { 53 | tuner::ATDriver AT; 54 | auto const& Func1 = AT.reoptimize(doNothing, IntRange(minVal, maxVal, dflt), 55 | tuner_kind(TunerKind), 56 | feedback_kind(tuner::FB_Total_IgnoreError), 57 | blocking(true)); 58 | 59 | auto const& Func2 = AT.reoptimize(doNothing, IntRange(minVal, maxVal, dflt), 60 | tuner_kind(TunerKind), 61 | feedback_kind(tuner::FB_Total_IgnoreError), 62 | blocking(true)); 63 | 64 | 65 | Func1(); 66 | Func2(); 67 | } 68 | 69 | return 0; 70 | } 71 | -------------------------------------------------------------------------------- /benchmark/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | option(BENCHMARK "Enable benchmarking" OFF) 2 | 3 | if(BENCHMARK) 4 | set(CMAKE_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++) 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -disable-O0-optnone -Xclang -load -Xclang ${EASY_JIT_PASS}") 6 | 7 | include_directories(${LLVM_INCLUDE_DIRS}) 8 | add_definitions(${LLVM_DEFINITIONS}) 9 | 10 | find_package(Benchmark REQUIRED) 11 | include_directories(${Benchmark_INCLUDE_DIRS}) 12 | link_directories(${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) 13 | 14 | ######### 15 | add_executable(atjit-benchmark-O0 benchmark.cpp) 16 | add_dependencies(atjit-benchmark-O0 atjit-core) 17 | set_target_properties(atjit-benchmark-O0 PROPERTIES COMPILE_FLAGS "-O0") 18 | target_link_libraries (atjit-benchmark-O0 ${Benchmark_LIBRARIES} ATJitRuntime pthread) 19 | ######### 20 | 21 | ######### 22 | add_executable(atjit-benchmark-O1 benchmark.cpp) 23 | add_dependencies(atjit-benchmark-O1 atjit-core) 24 | set_target_properties(atjit-benchmark-O1 PROPERTIES COMPILE_FLAGS "-O1") 25 | target_link_libraries (atjit-benchmark-O1 ${Benchmark_LIBRARIES} ATJitRuntime pthread) 26 | ######### 27 | 28 | ######### 29 | add_executable(atjit-benchmark-O2 benchmark.cpp) 30 | add_dependencies(atjit-benchmark-O2 atjit-core) 31 | set_target_properties(atjit-benchmark-O2 PROPERTIES COMPILE_FLAGS "-O2") 32 | target_link_libraries (atjit-benchmark-O2 ${Benchmark_LIBRARIES} ATJitRuntime pthread) 33 | ######### 34 | 35 | ######### 36 | add_executable(atjit-benchmark-O3 benchmark.cpp) 37 | add_dependencies(atjit-benchmark-O3 atjit-core) 38 | set_target_properties(atjit-benchmark-O3 PROPERTIES COMPILE_FLAGS "-O3") 39 | target_link_libraries (atjit-benchmark-O3 ${Benchmark_LIBRARIES} ATJitRuntime pthread) 40 | ######### 41 | 42 | if (${POLLY_KNOBS}) 43 | ######### 44 | add_executable(atjit-benchmark-O3p benchmark.cpp) 45 | add_dependencies(atjit-benchmark-O3p atjit-core) 46 | set_target_properties(atjit-benchmark-O3p PROPERTIES COMPILE_FLAGS "-O3 -mllvm -polly") 47 | target_link_libraries (atjit-benchmark-O3p ${Benchmark_LIBRARIES} ATJitRuntime pthread) 48 | ######### 49 | endif() 50 | 51 | endif() 52 | -------------------------------------------------------------------------------- /tests/meta/type_list+func_traits.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %clangxx %cxxflags %include_flags -c %s -o /dev/null 2 | 3 | #include 4 | #include 5 | 6 | using namespace easy; 7 | using namespace easy::meta; 8 | 9 | int foo(int, bool, float); 10 | 11 | int main() { 12 | static_assert(std::is_same< 13 | type_list::head, 14 | int>::value, 15 | "not same type"); 16 | 17 | static_assert(std::is_same< 18 | type_list::at<0>, 19 | int>::value, 20 | "not same type"); 21 | static_assert(std::is_same< 22 | type_list::at<1>, 23 | bool>::value, 24 | "not same type"); 25 | static_assert(std::is_same< 26 | type_list::at<2>, 27 | float>::value, 28 | "not same type"); 29 | static_assert(type_list::size == 3, 30 | "not correct size"); 31 | static_assert(!type_list::empty, 32 | "detected as empty"); 33 | static_assert(std::is_same< 34 | type_list::tail::head, 35 | bool>::value, 36 | "not same type"); 37 | 38 | using foo_type = decltype(foo); 39 | using foo_traits = function_traits; 40 | 41 | static_assert(std::is_same::value, 42 | "not same type"); 43 | static_assert(std::is_same< 44 | foo_traits::parameter_list, 45 | type_list>::value, 46 | "not same type"); 47 | 48 | static_assert(std::is_same< 49 | typename meta::init_list<3,void>::type, 50 | type_list>::value, 51 | "not same type"); 52 | static_assert(std::is_same< 53 | typename meta::init_list<0,void>::type, 54 | type_list<>>::value, 55 | "not same type"); 56 | 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /runtime/Utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | using namespace llvm; 14 | 15 | static const char EasyJitMD[] = "easy::jit"; 16 | static const char EntryTag[] = "entry"; 17 | 18 | std::string easy::GetEntryFunctionName(Module const &M) { 19 | NamedMDNode* MD = M.getNamedMetadata(EasyJitMD); 20 | 21 | for(MDNode *Operand : MD->operands()) { 22 | if(Operand->getNumOperands() != 2) 23 | continue; 24 | MDString* Entry = dyn_cast(Operand->getOperand(0)); 25 | MDString* Name = dyn_cast(Operand->getOperand(1)); 26 | 27 | if(!Entry || !Name || Entry->getString() != EntryTag) 28 | continue; 29 | 30 | return Name->getString(); 31 | } 32 | 33 | report_fatal_error("No entry function in easy::jit module!"); 34 | return ""; 35 | } 36 | 37 | void easy::MarkAsEntry(llvm::Function &F) { 38 | Module &M = *F.getParent(); 39 | LLVMContext &Ctx = F.getContext(); 40 | NamedMDNode* MD = M.getOrInsertNamedMetadata(EasyJitMD); 41 | MDNode* Node = MDNode::get(Ctx, { MDString::get(Ctx, EntryTag), 42 | MDString::get(Ctx, F.getName())}); 43 | MD->addOperand(Node); 44 | } 45 | 46 | void easy::UnmarkEntry(llvm::Module &M) { 47 | NamedMDNode* MD = M.getOrInsertNamedMetadata(EasyJitMD); 48 | M.eraseNamedMetadata(MD); 49 | } 50 | 51 | std::unique_ptr 52 | easy::CloneModuleWithContext(llvm::Module const &LM, llvm::LLVMContext &C) { 53 | // I have not found a better way to do this withouth having to fully reimplement 54 | // CloneModule 55 | 56 | std::string buf; 57 | 58 | // write module 59 | { 60 | llvm::raw_string_ostream stream(buf); 61 | llvm::WriteBitcodeToFile(PASS_MODULE_ARG(LM), stream); 62 | stream.flush(); 63 | } 64 | 65 | // read the module 66 | auto MemBuf = llvm::MemoryBuffer::getMemBuffer(llvm::StringRef(buf)); 67 | auto ModuleOrError = llvm::parseBitcodeFile(*MemBuf, C); 68 | if(ModuleOrError.takeError()) 69 | return nullptr; 70 | 71 | auto LMCopy = std::move(ModuleOrError.get()); 72 | return LMCopy; 73 | } 74 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | # cache key for a ref on each job means that this job will check the cache for 2 | # a directory image keyed on that thing, and if it exists, it will load that before 3 | # executing the script. then it will save the image back under the same key when the job 4 | # succeeds. so we copy-paste it all over 5 | 6 | stages: 7 | - prereqs 8 | - build 9 | - test 10 | 11 | # obtain / check for dependencies 12 | deps: 13 | stage: prereqs 14 | script: 15 | # check for required apt packages 16 | - dpkg-query -W libstdc++-8-dev libopencv-dev libdispatch0 libdispatch-dev valgrind 17 | # check for required pip packages 18 | - python -c "import lit" 19 | # build other prereqs 20 | - ./xgboost/get.sh 21 | - mkdir build install 22 | - cd build 23 | - ../benchmark/setup.sh 24 | cache: 25 | key: "$CI_PIPELINE_ID" 26 | paths: 27 | - xgboost/root/lib 28 | - xgboost/root/include 29 | - xgboost/root/rabit/include 30 | - xgboost/root/dmlc-core/include 31 | - build 32 | - install 33 | 34 | 35 | # template for building and testing atjit 36 | .atjit_test: 37 | stage: build 38 | script: 39 | - cd build 40 | - cmake -G "Unix Makefiles" ${CUSTOM_LLVM} -DCMAKE_BUILD_TYPE=${ATJIT_BUILD_TYPE} -DBENCHMARK=ON -DBENCHMARK_DIR=`pwd`/benchmark/install -DATJIT_EXAMPLE=ON -DCMAKE_INSTALL_PREFIX=../install -DPOLLY_KNOBS=OFF .. 41 | - make -j 4 42 | - make install 43 | - make check 44 | dependencies: 45 | - deps 46 | cache: 47 | key: "$CI_PIPELINE_ID" 48 | paths: 49 | - xgboost/root/lib 50 | - xgboost/root/include 51 | - xgboost/root/rabit/include 52 | - xgboost/root/dmlc-core/include 53 | - build 54 | - install 55 | policy: pull # don't push changes to the cache from this job 56 | 57 | atjit_release: 58 | variables: 59 | ATJIT_BUILD_TYPE: "Release" 60 | CUSTOM_LLVM: "" 61 | extends: .atjit_test 62 | 63 | atjit_debug: 64 | variables: 65 | ATJIT_BUILD_TYPE: "Debug" 66 | CUSTOM_LLVM: "" 67 | extends: .atjit_test 68 | 69 | # TODO: implement method to build and test ./get-llvm.sh script. 70 | # I can't recall how to use gitlab's caching stuff to save state between 71 | # stages. Ideally we would just move to Docker to simplify this. 72 | -------------------------------------------------------------------------------- /runtime/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2") 2 | 3 | # enable logging with streams with loguru 4 | add_definitions(-DLOGURU_WITH_STREAMS=1) 5 | 6 | include(XGBoost) 7 | include(GCD) 8 | 9 | add_library(ATJitRuntime SHARED 10 | BitcodeTracker.cpp 11 | Context.cpp 12 | Function.cpp 13 | InitNativeTarget.cpp 14 | Utils.cpp 15 | loguru.cpp 16 | pass/ContextAnalysis.cpp 17 | pass/DevirtualizeConstant.cpp 18 | pass/InlineParameters.cpp 19 | tuner/Optimizer.cpp 20 | tuner/Feedback.cpp 21 | tuner/AnalyzingTuner.cpp 22 | tuner/LoopKnob.cpp 23 | tuner/LoopSettingGen.cpp 24 | tuner/KnobConfig.cpp 25 | tuner/KnobSet.cpp 26 | tuner/Statics.cpp 27 | tuner/Knob.cpp 28 | tuner/Util.cpp 29 | ) 30 | 31 | # the installed version of the library needs to 32 | # retain its rpath to the non-system-wide libs that we're linking in. 33 | # it will get stripped during install without this. 34 | set_target_properties(ATJitRuntime PROPERTIES 35 | INSTALL_RPATH "${XGB_LIB_DIR};${LLVM_LIBRARY_DIR}" 36 | INSTALL_RPATH_USE_LINK_PATH TRUE) 37 | 38 | include_directories(${GCD_INCLUDE_DIR}) 39 | 40 | 41 | 42 | if (${POLLY_KNOBS}) 43 | 44 | include(Polly) 45 | 46 | include_directories(${Polly_INCLUDE_DIRS}) 47 | add_definitions(${Polly_DEFINITIONS}) 48 | 49 | # Polly's exported targets already includes libLLVM 50 | target_link_libraries(ATJitRuntime PUBLIC Polly) 51 | 52 | else() ######### 53 | 54 | find_library(LIBLLVM_LIB 55 | NAMES LLVM 56 | PATHS "${LLVM_LIBRARY_DIR}" 57 | NO_DEFAULT_PATH 58 | ) 59 | requireFound(LIBLLVM_LIB "Could not find libLLVM !") 60 | 61 | include_directories(${LLVM_INCLUDE_DIRS}) 62 | add_definitions(${LLVM_DEFINITIONS}) 63 | target_link_libraries(ATJitRuntime PUBLIC ${LIBLLVM_LIB}) 64 | 65 | endif() 66 | ######################### 67 | 68 | target_link_libraries(ATJitRuntime PUBLIC ${XGB_LIB}) 69 | target_link_libraries(ATJitRuntime PUBLIC ${GCD_LIB}) 70 | 71 | 72 | set(ATJIT_RUNTIME ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libATJitRuntime${CMAKE_SHARED_LIBRARY_SUFFIX} PARENT_SCOPE) 73 | 74 | install(TARGETS ATJitRuntime 75 | LIBRARY DESTINATION lib) 76 | 77 | configure_file("${ATJIT_ROOT}/misc/atjitc.in" "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/atjitc") 78 | install(FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/atjitc DESTINATION bin 79 | PERMISSIONS OWNER_EXECUTE OWNER_READ ) 80 | -------------------------------------------------------------------------------- /include/easy/options.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #define EASY_NEW_OPTION_STRUCT(Name) \ 6 | struct Name; \ 7 | template<> struct is_option { \ 8 | static constexpr bool value = true; }; \ 9 | struct Name 10 | 11 | #define EASY_HANDLE_OPTION_STRUCT(Name, Ctx) \ 12 | void handle(easy::Context &Ctx) const 13 | 14 | 15 | namespace easy { 16 | namespace options{ 17 | 18 | template 19 | struct is_option { 20 | static constexpr bool value = false; 21 | }; 22 | 23 | EASY_NEW_OPTION_STRUCT(opt_level) 24 | : public std::pair { 25 | 26 | opt_level(unsigned OptLevel, unsigned OptSize) 27 | : std::pair(OptLevel,OptSize) {} 28 | 29 | EASY_HANDLE_OPTION_STRUCT(opt_level, C) { 30 | C.setOptLevel(first, second); 31 | } 32 | }; 33 | 34 | // tuner kind option & correspondence. 35 | EASY_NEW_OPTION_STRUCT(tuner_kind) { 36 | 37 | tuner_kind(tuner::AutoTuner kind) 38 | : kind_(kind) {} 39 | 40 | EASY_HANDLE_OPTION_STRUCT(IGNORED, C) { 41 | C.setTunerKind(kind_); 42 | } 43 | 44 | private: 45 | tuner::AutoTuner kind_; 46 | }; 47 | 48 | // the feedback module used to analyze perf 49 | EASY_NEW_OPTION_STRUCT(feedback_kind) { 50 | 51 | feedback_kind(tuner::FeedbackKind val) 52 | : val_(val) {} 53 | 54 | EASY_HANDLE_OPTION_STRUCT(IGNORED, C) { 55 | C.setFeedbackKind(val_); 56 | } 57 | 58 | private: 59 | tuner::FeedbackKind val_; 60 | }; 61 | 62 | // if true, the driver will be more willing to 63 | // wait, i.e. block, on compilation jobs. 64 | EASY_NEW_OPTION_STRUCT(blocking) { 65 | 66 | blocking(bool val) 67 | : val_(val) {} 68 | 69 | EASY_HANDLE_OPTION_STRUCT(IGNORED, C) { 70 | C.setWaitForCompile(val_); 71 | } 72 | 73 | private: 74 | bool val_; 75 | }; 76 | 77 | // option used for writing the ir to a file, useful for debugging 78 | EASY_NEW_OPTION_STRUCT(dump_ir) { 79 | dump_ir(std::string const &file) 80 | : file_(file), beforeFile_(file + ".before") {} 81 | 82 | EASY_HANDLE_OPTION_STRUCT(dump_ir, C) { 83 | C.setDebugFile(file_); 84 | C.setDebugBeforeFile(beforeFile_); 85 | } 86 | 87 | private: 88 | std::string file_; 89 | std::string beforeFile_; 90 | }; 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /benchmark/convolve.h: -------------------------------------------------------------------------------- 1 | #ifndef BENCH_CONVOLVE 2 | #define BENCH_CONVOLVE 3 | 4 | 5 | void __attribute__((noinline)) kernel(int n, int m, int * image, int const * mask, int* out) { 6 | for(int i = 0; i < n - m; ++i) 7 | for(int j = 0; j < n - m; ++j) 8 | for(int k = 0; k < m; ++k) 9 | for(int l = 0; l < m; ++l) 10 | out[i * (n-m+1) + j] += image[(i+k) * n + j+l] * mask[k *m + l]; 11 | } 12 | 13 | 14 | 15 | static const int mask[3][3] = {{1,2,3},{0,0,0},{3,2,1}}; 16 | 17 | static void BM_convolve_jit(benchmark::State& state) { 18 | using namespace std::placeholders; 19 | int n = state.range(0); 20 | std::vector image(n*n,0); 21 | std::vector out((n-3)*(n-3),0); 22 | benchmark::ClobberMemory(); 23 | 24 | auto my_kernel = easy::jit(kernel, n, 3, _1, &mask[0][0], _2); 25 | for (auto _ : state) { 26 | my_kernel(image.data(), out.data()); 27 | benchmark::ClobberMemory(); 28 | } 29 | } 30 | // NOTE: this test was disabled due to it crashing sometimes. See 31 | // here for more info: https://github.com/kavon/atJIT/issues/2 32 | // 33 | // BENCHMARK(BM_convolve_jit)->RangeMultiplier(2)->Range(16,1024); 34 | 35 | static void BM_convolve(benchmark::State& state) { 36 | int n = state.range(0); 37 | std::vector image(n*n,0); 38 | std::vector out((n-3)*(n-3),0); 39 | benchmark::ClobberMemory(); 40 | 41 | for (auto _ : state) { 42 | kernel(n, 3, image.data(), &mask[0][0], out.data()); 43 | benchmark::ClobberMemory(); 44 | } 45 | } 46 | // NOTE: this test sometimes segfaults above 512 too! 47 | // https://travis-ci.org/kavon/atJIT/builds/416869958 48 | // BENCHMARK(BM_convolve)->RangeMultiplier(2)->Range(16,1024); 49 | 50 | static void BM_convolve_compile_jit(benchmark::State& state) { 51 | using namespace std::placeholders; 52 | for (auto _ : state) { 53 | auto my_kernel = easy::jit(kernel, 11, 3, _1, &mask[0][0], _2); 54 | benchmark::ClobberMemory(); 55 | } 56 | } 57 | BENCHMARK(BM_convolve_compile_jit); 58 | 59 | static void BM_convolve_cache_hit_jit(benchmark::State& state) { 60 | using namespace std::placeholders; 61 | static easy::Cache<> cache; 62 | cache.jit(kernel, 11, 3, _1, &mask[0][0], _2); 63 | benchmark::ClobberMemory(); 64 | 65 | for (auto _ : state) { 66 | auto const &my_kernel = cache.jit(kernel, 11, 3, _1, &mask[0][0], _2); 67 | benchmark::ClobberMemory(); 68 | } 69 | } 70 | BENCHMARK(BM_convolve_cache_hit_jit); 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /runtime/tuner/AnalyzingTuner.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace tuner { 11 | 12 | namespace { 13 | 14 | using namespace llvm; 15 | 16 | class LoopKnobCreator : public LoopPass { 17 | private: 18 | KnobSet *KS; 19 | public: 20 | static char ID; 21 | 22 | // required to have a default ctor 23 | LoopKnobCreator() : LoopPass(ID), KS(nullptr) {} 24 | 25 | LoopKnobCreator(KnobSet *KSet) 26 | : LoopPass(ID), KS(KSet) {}; 27 | 28 | LoopKnob* buildTree(Loop *Loop) { 29 | // build children knobs 30 | std::vector SubLoops; 31 | for (auto Child : Loop->getSubLoops()) { 32 | SubLoops.push_back(buildTree(Child)); 33 | } 34 | 35 | // build self 36 | MDNode *LoopMD = Loop->getLoopID(); 37 | 38 | if (!LoopMD) { 39 | // LoopNamer should have been run when embedding the bitcode. 40 | report_fatal_error("encountered an improperly named loop!"); 41 | } 42 | 43 | LoopKnob *LK = new LoopKnob(getLoopName(LoopMD), std::move(SubLoops), Loop->getLoopDepth()); 44 | 45 | KS->LoopKnobs[LK->getID()] = LK; 46 | 47 | return LK; 48 | } 49 | 50 | bool runOnLoop(Loop *Loop, LPPassManager &LPM) override { 51 | // we have to build top-down, so we only run on top-level loops 52 | if (Loop->getLoopDepth() == 1) 53 | buildTree(Loop); 54 | 55 | return false; 56 | } 57 | 58 | }; // end class 59 | 60 | char LoopKnobCreator::ID = 0; 61 | static RegisterPass Register("loop-knob-creator", 62 | "Collect the names of all loops to make knobs.", 63 | false /* only looks at CFG*/, 64 | false /* analysis pass */); // NOTE it kind of is analysis... 65 | 66 | } // end anonymous namespace 67 | 68 | 69 | void AnalyzingTuner::analyze(llvm::Module &M) { 70 | // only run this once, since the input module 71 | // is fixed for each instance of a Tuner. 72 | if (alreadyRun) 73 | return; 74 | alreadyRun = true; 75 | 76 | // initialize dependencies 77 | initializeLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry()); 78 | 79 | legacy::PassManager Passes; 80 | Passes.add(new LoopKnobCreator(&KS_)); 81 | Passes.run(M); 82 | 83 | } 84 | 85 | } // end namespace 86 | -------------------------------------------------------------------------------- /pass/MayAliasTracer.cpp: -------------------------------------------------------------------------------- 1 | #include "MayAliasTracer.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace llvm; 9 | 10 | void easy::MayAliasTracer::mayAliasWithStoredValues(Value* V, VSet &Loaded, VSet &Stored) { 11 | if(!Stored.insert(V).second) 12 | return; 13 | if(auto* GO = dyn_cast(V)) 14 | GOs_.insert(GO); 15 | 16 | if(auto * II = dyn_cast(V)) { 17 | if(II->getIntrinsicID() == Intrinsic::memcpy) { 18 | mayAliasWithLoadedValues(II->getArgOperand(1), Loaded, Stored); 19 | } 20 | } 21 | 22 | if(auto* SI = dyn_cast(V)) { 23 | mayAliasWithLoadedValues(SI->getValueOperand(), Loaded, Stored); 24 | } 25 | 26 | if(isa(V)||isa(V)||isa(V)) { 27 | for(User* U : V->users()) { 28 | mayAliasWithStoredValues(U, Loaded, Stored); 29 | } 30 | } 31 | } 32 | 33 | void easy::MayAliasTracer::mayAliasWithLoadedValues(Value * V, VSet &Loaded, VSet &Stored) { 34 | if(!Loaded.insert(V).second) 35 | return; 36 | if(auto* GO = dyn_cast(V)) 37 | GOs_.insert(GO); 38 | 39 | auto mayAliasWithLoadedOperand = [this, &Loaded, &Stored](Value* V) { mayAliasWithLoadedValues(V, Loaded, Stored);}; 40 | 41 | //TODO: generalize that 42 | if(auto* PHI = dyn_cast(V)) { 43 | std::for_each(PHI->op_begin(), PHI->op_end(), mayAliasWithLoadedOperand); 44 | } 45 | if(auto* Select = dyn_cast(V)) { 46 | mayAliasWithLoadedValues(Select->getTrueValue(), Loaded, Stored); 47 | mayAliasWithLoadedValues(Select->getFalseValue(), Loaded, Stored); 48 | } 49 | if(auto* Alloca = dyn_cast(V)) { 50 | mayAliasWithStoredValues(Alloca, Loaded, Stored); 51 | } 52 | if(auto *GEP = dyn_cast(V)) { 53 | mayAliasWithLoadedValues(GEP->getPointerOperand(), Loaded, Stored); 54 | } 55 | if(auto *BC = dyn_cast(V)) { 56 | mayAliasWithLoadedValues(BC->getOperand(0), Loaded, Stored); 57 | } 58 | if(auto const* CE = dyn_cast(V)) { 59 | switch(CE->getOpcode()) { 60 | case Instruction::GetElementPtr: 61 | case Instruction::BitCast: 62 | return mayAliasWithLoadedValues(CE->getOperand(0), Loaded, Stored); 63 | default: 64 | ; 65 | } 66 | } 67 | if(auto* OtherGV = dyn_cast(V)) { 68 | if(OtherGV->hasInitializer()) 69 | mayAliasWithLoadedValues(OtherGV->getInitializer(), Loaded, Stored); 70 | mayAliasWithStoredValues(OtherGV, Loaded, Stored); 71 | } 72 | if(auto* CA = dyn_cast(V)) { 73 | std::for_each(CA->op_begin(), CA->op_end(), mayAliasWithLoadedOperand); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /include/tuner/KnobConfig.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | namespace tuner { 12 | 13 | // a data type that is suitable for use by mathematical models. 14 | // Conceptually, it is an ID-indexed snapshot of a KnobSet configuration. 15 | 16 | // NOTE if you add another structure member, you must immediately update: 17 | // 18 | // 0. class KnobSet 19 | // 1. KnobConfigAppFn and related abstract visitors. 20 | // 2. applyToConfig and related generic operations. 21 | 22 | class KnobConfig { 23 | public: 24 | std::unordered_map IntConfig; 25 | std::unordered_map LoopConfig; 26 | 27 | }; 28 | 29 | template < typename RNE > // meets the requirements of RandomNumberEngine 30 | KnobConfig genRandomConfig(KnobSet const &KS, RNE &Eng); 31 | 32 | extern template KnobConfig genRandomConfig(KnobSet const&, std::mt19937_64&); 33 | 34 | 35 | // energy = [0, 100], one can think of it as a "percentage of change". 36 | template < typename RNE > // meets the requirements of RandomNumberEngine 37 | KnobConfig perturbConfig(KnobConfig KC, KnobSet const &KS, RNE &Eng, float energy); 38 | 39 | extern template KnobConfig perturbConfig(KnobConfig, KnobSet const &, std::mt19937_64 &, float); 40 | 41 | 42 | KnobConfig genDefaultConfig(KnobSet const&); 43 | 44 | void exportConfig(KnobConfig const& KC, 45 | float* mat, const uint64_t row, const uint64_t ncol, 46 | uint64_t const* colToKnob, 47 | bool debug=false); 48 | 49 | class KnobConfigAppFn { 50 | public: 51 | virtual void operator()(std::pair) = 0; 52 | virtual void operator()(std::pair) = 0; 53 | }; 54 | 55 | // a version of the AppFn that only applies to the given knob ID. 56 | // uses lookups to find the Knob in the config. Is reusable. 57 | class KnobConfigSelFun : public KnobConfigAppFn { 58 | KnobID id_; 59 | public: 60 | KnobConfigSelFun(KnobID id) : id_(id) {} 61 | KnobID getID() const { return id_; } 62 | void setID(KnobID newID) { id_ = newID; } 63 | 64 | virtual void notFound() = 0; 65 | }; 66 | 67 | void applyToConfig(KnobConfigAppFn &F, KnobConfig const &Settings); 68 | void applyToConfig(KnobIDAppFn &F, KnobConfig const &Settings); 69 | void applyToConfig(KnobConfigSelFun &F, KnobConfig const &Settings); 70 | 71 | /////////// 72 | // printing utils 73 | 74 | namespace { 75 | using T = std::pair, std::shared_ptr>; 76 | } 77 | 78 | void dumpConfigInstance (std::ostream &os, KnobSet const& KS, T const &Entry); 79 | void dumpConfig (std::ostream &os, KnobSet const& KS, KnobConfig const &Config); 80 | 81 | } // end namespace 82 | -------------------------------------------------------------------------------- /tests/perf/fannkuchredux.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc -O2 -DBLOCKING_DRIVER %s -o %t.blocking 2 | // RUN: %atjitc -O2 %s -o %t.default 3 | // RUN: %time %t.blocking.time %t.blocking 7 4 | // RUN: %time %t.default.time %t.default 7 5 | 6 | // FIXME: we can't run this comparison reliably because 7 | // the time here includes compile jobs still in the queue. 8 | // plus I don't know if this is a good test cause it's so fast. 9 | 10 | // %compareTimes %t.default.time %t.blocking.time 11 | 12 | 13 | 14 | // NOTE: this test ensures that recompile requests are faster 15 | // if blocking mode is not turned on. 16 | // This test also makes sure blocking mode is off by default. 17 | 18 | 19 | /* The Computer Language Benchmarks Game 20 | https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ 21 | 22 | contributed by Branimir Maksimovic 23 | */ 24 | #include 25 | #include 26 | #include 27 | 28 | #include 29 | 30 | typedef unsigned char int_t; 31 | 32 | void rotate(int_t* p, int n) 33 | { 34 | int_t tmp = p[0]; 35 | for(int i = 0; i < n; ++i)p[i]=p[i+1]; 36 | p[n] = tmp; 37 | } 38 | 39 | bool next_permutation(int_t* beg, int n, int_t* c) 40 | { 41 | int i = 1; 42 | while(i=i)c[i++]=0; 46 | else break; 47 | } 48 | if(i>=n)return false; 49 | ++c[i]; 50 | return true; 51 | } 52 | 53 | 54 | struct Result{ 55 | int checksum; 56 | int maxflips; 57 | }; 58 | 59 | Result fannkuch(int n) 60 | { 61 | Result tmp = {0}; 62 | int i=0,permcount=0; 63 | int_t perm[16],tperm[16],cnt[16]={0}; 64 | 65 | std::generate(perm,perm+n,[&i](){ return ++i; }); 66 | 67 | do 68 | { 69 | std::copy(perm,perm+n,tperm); 70 | int flips = 0; 71 | while(tperm[0] != 1) 72 | { 73 | std::reverse(tperm,tperm+tperm[0]); 74 | ++flips; 75 | } 76 | tmp.checksum += (permcount%2 == 0)?flips:-flips; 77 | tmp.maxflips = std::max(tmp.maxflips,flips); 78 | }while(++permcount,next_permutation(perm,n,cnt)); 79 | 80 | return tmp; 81 | } 82 | 83 | int main(int argc, char** argv) 84 | { 85 | int n = 7; 86 | if(argc > 1)n = atoi(argv[1]); 87 | if(n < 3 || n > 16) 88 | { 89 | printf("n should be between [3 and 16]\n"); 90 | return 0; 91 | } 92 | 93 | using namespace easy::options; 94 | using namespace std::placeholders; 95 | tuner::ATDriver AT; 96 | Result r; 97 | 98 | const int ITERS = 50; 99 | for (int i = 0; i < ITERS; i++) { 100 | 101 | auto const& fannkuch_tuned = 102 | AT.reoptimize(fannkuch, n, tuner_kind(tuner::AT_Bayes) 103 | #ifdef BLOCKING_DRIVER 104 | , blocking(true) 105 | #endif 106 | ); 107 | r = fannkuch_tuned(); 108 | 109 | // r = fannkuch(n); 110 | } 111 | 112 | printf("%d\nPfannkuchen(%d) = %d\n",r.checksum,n,r.maxflips); 113 | } 114 | -------------------------------------------------------------------------------- /include/easy/code_cache.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace easy { 8 | 9 | namespace { 10 | using AutoKey = std::pair; 11 | 12 | template 13 | class CacheBase { 14 | 15 | public: 16 | 17 | using Key = KeyTy; 18 | 19 | protected: 20 | 21 | std::unordered_map Cache_; 22 | using iterator = typename std::unordered_map::iterator; 23 | 24 | template 25 | auto const & compile_if_not_in_cache(std::pair &CacheEntry, T &&Fun, Args&& ... args) { 26 | using wrapper_ty = decltype(easy::jit(std::forward(Fun), std::forward(args)...)); 27 | 28 | FunctionWrapperBase &FWB = CacheEntry.first->second; 29 | if(CacheEntry.second) { 30 | auto FW = easy::jit(std::forward(Fun), std::forward(args)...); 31 | FWB = std::move(FW); 32 | } 33 | return reinterpret_cast(FWB); 34 | } 35 | }; 36 | } 37 | 38 | template 39 | class Cache : public CacheBase { 40 | public: 41 | 42 | template 43 | auto const& EASY_JIT_COMPILER_INTERFACE jit(Key const &K, T &&Fun, Args&& ... args) { 44 | auto CacheEntry = CacheBase::Cache_.emplace(K, FunctionWrapperBase()); 45 | return CacheBase::compile_if_not_in_cache(CacheEntry, std::forward(Fun), std::forward(args)...); 46 | } 47 | 48 | template 49 | auto const& EASY_JIT_COMPILER_INTERFACE jit(Key &&K, T &&Fun, Args&& ... args) { 50 | auto CacheEntry = CacheBase::Cache_.emplace(K, FunctionWrapperBase()); 51 | return CacheBase::compile_if_not_in_cache(CacheEntry, std::forward(Fun), std::forward(args)...); 52 | } 53 | 54 | bool has(Key const &K) const { 55 | auto const CacheEntry = CacheBase::Cache_.find(K); 56 | return CacheEntry != CacheBase::Cache_.end(); 57 | } 58 | }; 59 | 60 | 61 | template<> 62 | class Cache : public CacheBase { 63 | public: 64 | 65 | template 66 | auto const& EASY_JIT_COMPILER_INTERFACE jit(T &&Fun, Args&& ... args) { 67 | void* FunPtr = reinterpret_cast(meta::get_as_pointer(Fun)); 68 | auto CacheEntry = 69 | CacheBase::Cache_.emplace( 70 | Key(FunPtr, get_context_for(std::forward(args)...)), 71 | FunctionWrapperBase()); 72 | return CacheBase::compile_if_not_in_cache(CacheEntry, std::forward(Fun), std::forward(args)...); 73 | } 74 | 75 | template 76 | bool has(T &&Fun, Args&& ... args) const { 77 | void* FunPtr = reinterpret_cast(meta::get_as_pointer(Fun)); 78 | auto const CacheEntry = 79 | CacheBase::Cache_.find(Key(FunPtr, 80 | get_context_for(std::forward(args)...))); 81 | return CacheEntry != Cache_.end(); 82 | } 83 | }; 84 | 85 | } 86 | -------------------------------------------------------------------------------- /tests/lit.cfg.in: -------------------------------------------------------------------------------- 1 | import lit.formats 2 | import lit.util 3 | import os 4 | 5 | config.name = 'atJIT' 6 | config.suffixes = ['.c', '.cpp', '.ll', '.test'] 7 | 8 | config.test_format = lit.formats.ShTest(True) 9 | 10 | config.test_source_root = "@CMAKE_CURRENT_SOURCE_DIR@/tests" 11 | config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@/tests" 12 | 13 | config.environment['PATH'] = os.pathsep.join(["@LLVM_TOOLS_BINARY_DIR@"] + [ config.environment['PATH'] ]) 14 | 15 | runtime_lib = os.path.basename("@ATJIT_RUNTIME@").split('.')[0].replace("lib", "", 1) 16 | runtime_lib_dir = os.path.dirname("@ATJIT_RUNTIME@") 17 | llvm_lib_dir = os.path.join(os.path.dirname("@LLVM_TOOLS_BINARY_DIR@"), "lib") 18 | 19 | includes = ["@ATJIT_ROOT@"] 20 | include_flags = " ".join(["-I'" + os.path.abspath(dir) + "'" for dir in "@LLVM_INCLUDE_DIRS@".split()] + ["-I'" + os.path.join(dir, "include") + "'" for dir in includes] ) 21 | 22 | ld_paths = [runtime_lib_dir, llvm_lib_dir] 23 | ld_flags = "" 24 | for ld_path in ld_paths: 25 | ld_flags = ld_flags + " -L'" + os.path.abspath(ld_path) + "' -rpath '" + os.path.abspath(ld_path) + "' " 26 | 27 | ld_flags = ld_flags + " -l" + runtime_lib 28 | 29 | # substitutions 30 | config.substitutions.append(('%bin', "@CMAKE_ARCHIVE_OUTPUT_DIRECTORY@")) 31 | config.substitutions.append(('%install_dir', "@CMAKE_INSTALL_PREFIX@")) 32 | config.substitutions.append(('%llvm_tools_dir', "@LLVM_TOOLS_BINARY_DIR@")) 33 | 34 | common_flags = "-g -Xclang -disable-O0-optnone " 35 | 36 | config.substitutions.append(('%atjitc', "@CMAKE_ARCHIVE_OUTPUT_DIRECTORY@/atjitc")) 37 | config.substitutions.append(('%valgrind', "@VALGRIND_EXE@")) 38 | config.substitutions.append(('%jsonlint', "ruby -e 'require \"json\"; JSON.parse(STDIN.read)'")) 39 | config.substitutions.append(('%time', "@TIME_EXE@ -p -o")) 40 | config.substitutions.append(('%compareTimes', config.test_exec_root + "/compare.sh")) 41 | 42 | config.substitutions.append(('%clangxx', os.path.join("@LLVM_TOOLS_BINARY_DIR@", "clang++"))) 43 | config.substitutions.append(('%clang', os.path.join("@LLVM_TOOLS_BINARY_DIR@", "clang"))) 44 | config.substitutions.append(('%opt', os.path.join("@LLVM_TOOLS_BINARY_DIR@", "opt"))) 45 | config.substitutions.append(('%cxxflags', common_flags + "--std=c++17")) 46 | config.substitutions.append(('%cflags', common_flags)) 47 | config.substitutions.append(('%include_flags', include_flags)) 48 | config.substitutions.append(('%lib_pass', "@EASY_JIT_PASS@")) 49 | config.substitutions.append(('%lib_runtime', "@ATJIT_RUNTIME@")) 50 | config.substitutions.append(('%ld_flags', ld_flags)) 51 | 52 | config.substitutions.append(('%not', "!")) 53 | 54 | config.substitutions.append(('%FileCheck', os.path.join("@LLVM_TOOLS_BINARY_DIR@", "FileCheck"))) 55 | 56 | if "@BENCHMARK@" in ["1", "ON"] : 57 | config.available_features.add('benchmark') 58 | 59 | if "@POLLY_KNOBS@" in ["1", "ON"] : 60 | config.available_features.add('pollyknobs') 61 | 62 | if "@CMAKE_INSTALL_PREFIX@" and os.path.exists(os.path.join("@CMAKE_INSTALL_PREFIX@", "include", "easy")): 63 | config.available_features.add('install') 64 | -------------------------------------------------------------------------------- /tests/meta/new_func_traits.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %clangxx %cxxflags %include_flags -c %s -o /dev/null 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace easy; 8 | using namespace easy::meta; 9 | using namespace std::placeholders; 10 | 11 | int foo(int, bool, float, int); 12 | int baz(int, bool); 13 | 14 | int main() { 15 | 16 | using foo_type = decltype(foo); 17 | using baz_type = decltype(baz); 18 | using new_foo_traits_a = new_function_traits>; 19 | using new_foo_traits_b = new_function_traits>; 20 | using new_foo_traits_c = new_function_traits>; 21 | using new_foo_traits_d = new_function_traits>; 22 | using new_foo_traits_e = new_function_traits>; 23 | using new_foo_traits_f = new_function_traits>; 24 | using new_baz_traits_g = new_function_traits>; 25 | using new_baz_traits_h = new_function_traits>; 26 | 27 | // full specialization 28 | static_assert(new_foo_traits_a::parameter_list::empty, "fail A not empty"); 29 | static_assert(new_foo_traits_b::parameter_list::empty, "fail B not empty"); 30 | static_assert(new_foo_traits_c::parameter_list::empty, "fail C not empty"); 31 | 32 | // two int parameters 33 | static_assert(new_foo_traits_d::parameter_list::size == 2, "fail D size"); 34 | static_assert(std::is_same< 35 | typename new_foo_traits_d::parameter_list, 36 | meta::type_list 37 | >::value, "fail D types"); 38 | 39 | // one int parameter 40 | static_assert(new_foo_traits_e::parameter_list::size == 1, "fail E size"); 41 | static_assert(new_foo_traits_f::parameter_list::size == 2, "fail F size"); 42 | 43 | static_assert(std::is_same< 44 | typename new_foo_traits_e::parameter_list, 45 | meta::type_list 46 | >::value, "fail E types"); 47 | static_assert(std::is_same< 48 | typename new_foo_traits_f::parameter_list, 49 | meta::type_list 50 | >::value, "fail F types"); 51 | 52 | static_assert(new_baz_traits_g::parameter_list::size == 2, "fail G size"); 53 | static_assert(new_baz_traits_h::parameter_list::size == 2, "fail H size"); 54 | 55 | static_assert(std::is_same< 56 | typename new_baz_traits_g::parameter_list, 57 | meta::type_list 58 | >::value, "fail G types"); 59 | 60 | static_assert(std::is_same< 61 | typename new_baz_traits_h::parameter_list, 62 | meta::type_list 63 | >::value, "fail H types"); 64 | 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /include/easy/jit.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tuner { 16 | class Optimizer; 17 | class Feedback; 18 | } 19 | 20 | namespace easy { 21 | 22 | namespace { 23 | template 24 | FunctionWrapper 25 | WrapFunction(std::unique_ptr F, std::shared_ptr FB, meta::type_list) { 26 | return FunctionWrapper(std::move(F), std::move(FB)); 27 | } 28 | 29 | template 30 | auto jit_with_optimizer(tuner::Optimizer &Opt, T &&Fun) { 31 | 32 | using FunOriginalTy = std::remove_pointer_t>; 33 | 34 | using new_type_traits = meta::new_function_traits>; 35 | using new_return_type = typename new_type_traits::return_type; 36 | using new_parameter_types = typename new_type_traits::parameter_list; 37 | 38 | assert(Opt.getAddr() == meta::get_as_pointer(Fun) && "mismatch between function and optimizer!"); 39 | 40 | auto CompiledFunction = Opt.recompile(); 41 | 42 | auto Wrapper = 43 | WrapFunction(std::move(CompiledFunction.first), std::move(CompiledFunction.second), 44 | typename new_parameter_types::template push_front ()); 45 | return Wrapper; 46 | } 47 | 48 | template 49 | auto jit_with_context(easy::Context const& Cxt, T &&Fun) { 50 | 51 | auto* FunPtr = meta::get_as_pointer(Fun); 52 | tuner::Optimizer Opt(reinterpret_cast(FunPtr), std::make_shared(Cxt)); 53 | 54 | return jit_with_optimizer(Opt, std::forward(Fun)); 55 | } 56 | 57 | template 58 | easy::Context get_context_for(Args&& ... args) { 59 | using FunOriginalTy = std::remove_pointer_t>; 60 | static_assert(std::is_function::value, 61 | "easy::jit: supports only on functions and function pointers"); 62 | 63 | using parameter_list = typename meta::function_traits::parameter_list; 64 | 65 | static_assert(parameter_list::size <= sizeof...(Args), 66 | "easy::jit: not providing enough argument to actual call"); 67 | 68 | easy::Context C; 69 | easy::set_parameters(parameter_list(), C, 70 | std::forward(args)...); 71 | return C; 72 | } 73 | 74 | template 75 | std::shared_ptr get_sharable_context_for(Args&& ... args) { 76 | easy::Context C = get_context_for(std::forward(args)...); 77 | auto SharableC = std::make_shared(std::move(C)); 78 | return SharableC; 79 | } 80 | 81 | } // end anonymous namespace 82 | 83 | template 84 | auto EASY_JIT_COMPILER_INTERFACE jit(T &&Fun, Args&& ... args) { 85 | auto C = get_context_for(std::forward(args)...); 86 | return jit_with_context(C, std::forward(Fun)); 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8) 2 | 3 | option(LLVM_ENABLE_PLUGINS "Generate build targets for LLVM plugins." ON) 4 | option(FORCE_COLORED_OUTPUT "Always produce ANSI-colored output (GNU/Clang only)." TRUE) 5 | option(POLLY_KNOBS "Enable the use of Polly knobs" OFF) 6 | 7 | 8 | if(NOT CMAKE_BUILD_TYPE) 9 | set(CMAKE_BUILD_TYPE Release) 10 | endif() 11 | 12 | message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") 13 | 14 | ############################################ 15 | #### identify and configure LLVM 16 | 17 | if (DEFINED LLVM_ROOT) 18 | set(LLVM_DIR "${LLVM_ROOT}/lib/cmake/llvm") 19 | endif() 20 | 21 | if (DEFINED LLVM_ROOT AND NOT (IS_ABSOLUTE ${LLVM_DIR})) 22 | message(FATAL_ERROR "LLVM_DIR / LLVM_ROOT must be an absolute path. Tried dir: ${LLVM_DIR}") 23 | endif() 24 | 25 | find_package(LLVM REQUIRED CONFIG 26 | NO_CMAKE_PATH NO_CMAKE_ENVIRONMENT_PATH NO_CMAKE_SYSTEM_PATH) 27 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 28 | message(STATUS "Using LLVMConfig.cmake in ${LLVM_DIR}") 29 | message(STATUS "LLVM dylib: ${LLVM_LINK_LLVM_DYLIB}") 30 | message(STATUS "LLVM RTTI: ${LLVM_ENABLE_RTTI}") 31 | 32 | if (${LLVM_VERSION_MAJOR} LESS 8) 33 | message(FATAL_ERROR "atJIT does not support LLVM < 8") 34 | endif() 35 | 36 | if (NOT LLVM_ENABLE_RTTI) 37 | message(FATAL_ERROR "atJIT currently requires LLVM with RTTI.") 38 | endif() 39 | 40 | if (NOT LLVM_LINK_LLVM_DYLIB) 41 | message(FATAL_ERROR "atJIT requires libLLVM built in a certian way") 42 | endif() 43 | 44 | set(LIBLLVM_LINK_FLAGS "-Wl,-rpath,'${LLVM_LIBRARY_DIR}' -L${LLVM_LIBRARY_DIR} -lLLVM") 45 | 46 | ## configure compilation flag and atjitc script for polly 47 | if (${POLLY_KNOBS}) 48 | message(STATUS "Polly Knobs: ON") 49 | add_definitions(-DPOLLY_KNOBS) 50 | else() 51 | message(STATUS "Polly Knobs: OFF") 52 | endif() 53 | 54 | 55 | list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") 56 | 57 | include(Python) 58 | 59 | add_definitions(-DLLVM_VERSION_MAJOR=${LLVM_VERSION_MAJOR}) 60 | include_directories(SYSTEM include) 61 | 62 | set(CLANGXX_BIN "${LLVM_TOOLS_BINARY_DIR}/clang++") 63 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic -Werror=return-type -Wno-unused-parameter") 64 | set(CMAKE_CXX_EXTENSIONS OFF) 65 | 66 | set(CMAKE_CXX_STANDARD 17) 67 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 68 | 69 | # needed for colors when using Ninja 70 | # source: https://medium.com/@alasher/colored-c-compiler-output-with-ninja-clang-gcc-10bfe7f2b949 71 | if (${FORCE_COLORED_OUTPUT}) 72 | if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") 73 | add_compile_options (-fdiagnostics-color) 74 | elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 75 | add_compile_options (-fcolor-diagnostics) 76 | endif () 77 | endif () 78 | 79 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin) 80 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin) 81 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin) 82 | 83 | set(ATJIT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) 84 | 85 | add_subdirectory(cmake) 86 | add_subdirectory(include) 87 | add_subdirectory(pass) 88 | add_subdirectory(runtime) 89 | 90 | add_custom_target(atjit-core DEPENDS EasyJitPass ATJitRuntime) 91 | 92 | add_subdirectory(doc) 93 | add_subdirectory(benchmark) 94 | 95 | include(CMakeTests.txt) 96 | -------------------------------------------------------------------------------- /tests/tuner/sq_matmul.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %atjitc %s -o %t 2 | // RUN: %t > %t.out 3 | // RUN: %FileCheck %s < %t.out 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using namespace std::placeholders; 14 | 15 | /////////////////// 16 | // utilities for square matrices 17 | 18 | template 19 | T** calloc_mat(const int DIM) { 20 | T** rows = (T**) malloc(DIM * sizeof(T*)); 21 | for(int i = 0; i < DIM; i++) { 22 | rows[i] = (T*) calloc(DIM, sizeof(T)); 23 | } 24 | return rows; 25 | } 26 | 27 | void free_mat(const int DIM, void** mat) { 28 | for (int i = 0; i < DIM; i++) 29 | free(mat[i]); 30 | free(mat); 31 | } 32 | 33 | template 34 | bool equal_mat(const int DIM, T** a, T** b) { 35 | for (int i = 0; i < DIM; i++) 36 | for (int k = 0; k < DIM; k++) 37 | if (a[i][k] != b[i][k]) 38 | return false; 39 | 40 | return true; 41 | } 42 | 43 | //////////// 44 | 45 | // multiply square matrices 46 | template 47 | T** MatMul(const int DIM, T** aMatrix, T** bMatrix) { 48 | T** product = calloc_mat(DIM); 49 | for (int row = 0; row < DIM; row++) { 50 | for (int col = 0; col < DIM; col++) { 51 | // Multiply the row of A by the column of B to get the row, column of product. 52 | for (int inner = 0; inner < DIM; inner++) { 53 | product[row][col] += aMatrix[row][inner] * bMatrix[inner][col]; 54 | } 55 | } 56 | } 57 | return product; 58 | } 59 | 60 | using ElmTy = int16_t; 61 | 62 | void testWith(tuner::AutoTuner TunerKind, const int ITERS) { 63 | tuner::ATDriver AT; 64 | 65 | int DIM = 100; 66 | 67 | // initialize matrices 68 | ElmTy** aMatrix = calloc_mat(DIM); 69 | ElmTy** bMatrix = calloc_mat(DIM); 70 | 71 | for (int i = 0; i < DIM; i++) { 72 | for (int k = 0; k < DIM; k++) { 73 | if (i == k) 74 | aMatrix[i][k] = 1; 75 | else 76 | aMatrix[i][k] = 0; 77 | 78 | bMatrix[i][k] = (ElmTy) (i+k); 79 | } 80 | } 81 | 82 | for (int i = 0; i < ITERS; i++) { 83 | auto const &OptimizedFun = AT.reoptimize(MatMul, DIM, _1, _2, 84 | easy::options::tuner_kind(TunerKind)); 85 | 86 | ElmTy** ans = OptimizedFun(aMatrix, bMatrix); 87 | 88 | if (!equal_mat(DIM, ans, bMatrix)) { 89 | printf("ERROR!! unexpected matrix multiply result.\n"); 90 | std::exit(1); 91 | } 92 | 93 | free(ans); 94 | } 95 | 96 | free(aMatrix); 97 | free(bMatrix); 98 | 99 | } 100 | 101 | int main(int argc, char** argv) { 102 | 103 | // CHECK: [sq_matmul] start! 104 | printf("[sq_matmul] start!\n"); 105 | 106 | testWith(tuner::AT_None, 5); 107 | // CHECK: [sq_matmul] noop tuner works 108 | printf("[sq_matmul] noop tuner works\n"); 109 | 110 | testWith(tuner::AT_Random, 150); 111 | // CHECK: [sq_matmul] random tuner works 112 | printf("[sq_matmul] random tuner works\n"); 113 | 114 | testWith(tuner::AT_Bayes, 150); 115 | // CHECK: [sq_matmul] bayes tuner works 116 | printf("[sq_matmul] bayes tuner works\n"); 117 | 118 | testWith(tuner::AT_Anneal, 5); 119 | // CHECK: [sq_matmul] annealing tuner works 120 | printf("[sq_matmul] annealing tuner works\n"); 121 | 122 | return 0; 123 | } 124 | -------------------------------------------------------------------------------- /benchmark/spectralnorm.h: -------------------------------------------------------------------------------- 1 | #ifndef BENCH_SPECNORM 2 | #define BENCH_SPECNORM 3 | 4 | /* The Computer Language Benchmarks Game 5 | * https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ 6 | * 7 | * Contributed by Sebastien Loisel 8 | * Adapted by Kavon Farvardin 9 | */ 10 | 11 | /////////////////////////// 12 | // benchmark code 13 | 14 | #include 15 | #include 16 | 17 | double eval_A(int i, int j) { return 1.0/((i+j)*(i+j+1)/2+i+1); } 18 | 19 | void eval_A_times_u(int N, const double u[], double Au[]) 20 | { 21 | int i,j; 22 | for(i=0;i(state.range(2)); 80 | 81 | for (auto _ : state) { 82 | tuner::ATDriver AT; 83 | auto Tuner = easy::options::tuner_kind(TK); 84 | 85 | for (int i = 0; i < ITERS; i++) { 86 | auto const& my_specnorm = AT.reoptimize(spectralnorm, _1, Tuner); 87 | 88 | my_specnorm(N); 89 | } 90 | // NOTE: we don't want to time the driver's destructor 91 | state.PauseTiming(); 92 | } 93 | } 94 | 95 | 96 | ///////////////////////////// 97 | // benchmark registration 98 | 99 | #define SPECNORM_MIN 150 100 | #define SPECNORM_MAX 300 101 | #define ITER_MIN 256 102 | #define ITER_MAX 2048 103 | 104 | static void SpecnormArgs(benchmark::internal::Benchmark* b) { 105 | for (tuner::AutoTuner TK : tuner::AllTuners) 106 | for (int i = ITER_MIN; i <= ITER_MAX; i *= 2) 107 | for (int sz = SPECNORM_MIN; sz <= SPECNORM_MAX; sz *= 2) 108 | b->Args({sz, i, TK}); 109 | } 110 | 111 | BENCHMARK(TUNING_spectralnorm) 112 | ->Unit(benchmark::kMillisecond) 113 | ->Apply(SpecnormArgs) 114 | ->UseRealTime(); 115 | 116 | 117 | // cleanup 118 | #undef SPECNORM_MIN 119 | #undef SPECNORM_MAX 120 | #undef ITER_MIN 121 | #undef ITER_MAX 122 | 123 | 124 | #endif // BENCH_SPECNORM 125 | -------------------------------------------------------------------------------- /runtime/BitcodeTracker.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | using namespace easy; 10 | using namespace llvm; 11 | 12 | namespace easy { 13 | DefineEasyException(BitcodeNotRegistered, "Cannot find bitcode."); 14 | DefineEasyException(BitcodeParseError, "Cannot parse bitcode for: "); 15 | } 16 | 17 | BitcodeTracker& BitcodeTracker::GetTracker() { 18 | static BitcodeTracker TheTracker; 19 | return TheTracker; 20 | } 21 | 22 | bool BitcodeTracker::hasGlobalMapping(void* FPtr) const { 23 | auto InfoPtr = Functions.find(FPtr); 24 | return InfoPtr != Functions.end(); 25 | } 26 | 27 | void* BitcodeTracker::getAddress(std::string const &Name) { 28 | auto Addr = NameToAddress.find(Name); 29 | if(Addr == NameToAddress.end()) 30 | return nullptr; 31 | return Addr->second; 32 | } 33 | 34 | std::tuple BitcodeTracker::getNameAndGlobalMapping(void* FPtr) { 35 | auto InfoPtr = Functions.find(FPtr); 36 | if(InfoPtr == Functions.end()) { 37 | throw easy::BitcodeNotRegistered(); 38 | } 39 | 40 | return std::make_tuple(InfoPtr->second.Name, InfoPtr->second.Globals); 41 | } 42 | 43 | const char* BitcodeTracker::getName(void* FPtr) { 44 | auto InfoPtr = Functions.find(FPtr); 45 | if(InfoPtr == Functions.end()) { 46 | throw easy::BitcodeNotRegistered(); 47 | } 48 | 49 | return InfoPtr->second.Name; 50 | } 51 | 52 | std::unique_ptr BitcodeTracker::getModuleWithContext(void* FPtr, llvm::LLVMContext &C) { 53 | auto InfoPtr = Functions.find(FPtr); 54 | if(InfoPtr == Functions.end()) { 55 | throw easy::BitcodeNotRegistered(); 56 | } 57 | 58 | auto &Info = InfoPtr->second; 59 | 60 | llvm::StringRef BytecodeStr(Info.Bitcode, Info.BitcodeLen); 61 | std::unique_ptr Buf(llvm::MemoryBuffer::getMemBuffer(BytecodeStr)); 62 | auto ModuleOrErr = 63 | llvm::parseBitcodeFile(Buf->getMemBufferRef(), C); 64 | 65 | if (ModuleOrErr.takeError()) { 66 | throw easy::BitcodeParseError(Info.Name); 67 | } 68 | 69 | return std::move(ModuleOrErr.get()); 70 | } 71 | 72 | #ifdef NDEBUG 73 | class DiagnosticSilencer : public llvm::DiagnosticHandler { 74 | public: 75 | DiagnosticSilencer() {} 76 | bool handleDiagnostics(const DiagnosticInfo &DI) override { return true; } 77 | bool isAnalysisRemarkEnabled(StringRef PassName) const override { return false; } 78 | bool isMissedOptRemarkEnabled(StringRef PassName) const override { return false; } 79 | bool isPassedOptRemarkEnabled(StringRef PassName) const override { return false; } 80 | }; // end class 81 | #endif 82 | 83 | BitcodeTracker::ModuleContextPair BitcodeTracker::getModule(void* FPtr) { 84 | 85 | std::unique_ptr Context(new llvm::LLVMContext()); 86 | 87 | #ifdef NDEBUG 88 | // silence the output as much as possible! 89 | Context->setDiagnosticHandler(std::make_unique()); 90 | Context->setDiagnosticsHotnessThreshold(~0); 91 | #else 92 | // preserve names in the IR for debugging. 93 | Context->setDiscardValueNames(false); 94 | #endif 95 | 96 | auto Module = getModuleWithContext(FPtr, *Context); 97 | 98 | return ModuleContextPair(std::move(Module), std::move(Context)); 99 | } 100 | 101 | // function to interface with the generated code 102 | extern "C" { 103 | void easy_register(void* FPtr, const char* Name, GlobalMapping* Globals, const char* Bitcode, size_t BitcodeLen) { 104 | BitcodeTracker::GetTracker().registerFunction(FPtr, Name, Globals, Bitcode, BitcodeLen); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /runtime/pass/DevirtualizeConstant.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | using namespace llvm; 16 | 17 | char easy::DevirtualizeConstant::ID = 0; 18 | 19 | llvm::Pass* easy::createDevirtualizeConstantPass(llvm::StringRef Name) { 20 | return new DevirtualizeConstant(Name); 21 | } 22 | 23 | static ConstantInt* getVTableHostAddress(Value& V) { 24 | auto* VTable = dyn_cast(&V); 25 | if(!VTable) 26 | return nullptr; 27 | MDNode *Tag = VTable->getMetadata(LLVMContext::MD_tbaa); 28 | if(!Tag || !Tag->isTBAAVtableAccess()) 29 | return nullptr; 30 | 31 | // that's a vtable 32 | auto* Location = dyn_cast(VTable->getPointerOperand()->stripPointerCasts()); 33 | if(!Location) 34 | return nullptr; 35 | 36 | if(auto* CE = dyn_cast(Location)) { 37 | if(CE->getOpcode() == Instruction::IntToPtr) { 38 | Location = CE->getOperand(0); 39 | } 40 | } 41 | auto* CLocation = dyn_cast(Location); 42 | if(!CLocation) 43 | return nullptr; 44 | return CLocation; 45 | } 46 | 47 | static Function* findFunctionAndLinkModules(Module& M, void* HostValue) { 48 | auto &BT = easy::BitcodeTracker::GetTracker(); 49 | const char* FName = std::get<0>(BT.getNameAndGlobalMapping(HostValue)); 50 | 51 | if(!FName) 52 | return nullptr; 53 | 54 | std::unique_ptr LM = BT.getModuleWithContext(HostValue, M.getContext()); 55 | 56 | if(!Linker::linkModules(M, std::move(LM), Linker::OverrideFromSrc, 57 | [](Module &, const StringSet<> &){})) 58 | { 59 | GlobalValue *GV = M.getNamedValue(FName); 60 | if(Function* F = dyn_cast(GV)) { 61 | F->setLinkage(Function::PrivateLinkage); 62 | return F; 63 | } 64 | else { 65 | assert(false && "wtf"); 66 | } 67 | } 68 | return nullptr; 69 | } 70 | 71 | bool easy::DevirtualizeConstant::runOnFunction(llvm::Function &F) { 72 | 73 | if(F.getName() != TargetName_) 74 | return false; 75 | 76 | llvm::Module &M = *F.getParent(); 77 | 78 | // easy::Context const &C = getAnalysis().getContext(); 79 | 80 | for(auto& I: instructions(F)) { 81 | auto* VTable = getVTableHostAddress(I); 82 | if(!VTable) 83 | continue; 84 | 85 | void** RuntimeLoadedValue = *(void***)(uintptr_t)(VTable->getZExtValue()); 86 | 87 | // that's generally the load from the table 88 | for(User* U : VTable->users()) { 89 | ConstantExpr* CE = dyn_cast(U); 90 | if(!CE || !CE->isCast()) 91 | continue; 92 | 93 | for(User* U : CE->users()) { 94 | if(auto* CalledPtr = dyn_cast(U)) { 95 | void* CalledPtrHostValue = *RuntimeLoadedValue; 96 | llvm::Function* Called = findFunctionAndLinkModules(M, CalledPtrHostValue); 97 | if(Called) { 98 | for(User* U2 : CalledPtr->users()) { 99 | if(auto* LI = dyn_cast(U2)) 100 | LI->replaceAllUsesWith(Called); 101 | } 102 | } 103 | } 104 | } 105 | } 106 | } 107 | 108 | return true; 109 | } 110 | 111 | static RegisterPass X("","",false, false); 112 | -------------------------------------------------------------------------------- /include/tuner/Knob.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #include 12 | 13 | namespace tuner { 14 | 15 | // polymorphism of Knobs is primarily achieved through inheritance. 16 | 17 | // I really wish c++ supported template methods that are virtual! 18 | // We implement the template portion manually with macros. 19 | // If you see things like "HANDLE_CASE" that take a type as a parameter, 20 | // that's what we mean. This is still robust, since a new virtual method will 21 | // cause the compiler to point out places where you haven't updated your code 22 | 23 | using KnobID = uint64_t; 24 | 25 | // used to ensure knob IDs are unique. 26 | // we rely on the fact that 0 is an invalid knob ID 27 | extern std::atomic KnobTicker; 28 | 29 | // Base class for tunable compiler "knobs", which 30 | // are simply tunable components. 31 | template< typename ValTy > 32 | class Knob { 33 | 34 | private: 35 | KnobID id__; 36 | 37 | public: 38 | Knob() { 39 | id__ = KnobTicker.fetch_add(1); 40 | assert(id__ != 0 && "exhausted the knob ticker!"); 41 | } 42 | virtual ~Knob() = default; 43 | // value accessors 44 | virtual ValTy getDefault() const = 0; 45 | virtual ValTy getVal() const = 0; 46 | virtual void setVal(ValTy) = 0; 47 | 48 | virtual void apply(llvm::Module &M) = 0; 49 | 50 | // a unique ID relative to all knobs in the process. 51 | // since multiple instances of autotuners can be created 52 | // per process, this only guarentees uniqueness of each 53 | // instance, it is otherwise unstable. 54 | KnobID getID() const { return id__; } 55 | 56 | virtual std::string getName() const { 57 | return "knob id " + std::to_string(getID()); 58 | } 59 | 60 | // members related to exporting to a flat array 61 | 62 | virtual size_t size() const { return 1; } // num values to be flattened 63 | 64 | }; // end class Knob 65 | 66 | 67 | // represents a knob that can take on values in the range 68 | // [a, b], where a, b are scalar values. 69 | template < typename ValTy > 70 | class ScalarRange : public Knob { 71 | public: 72 | virtual ~ScalarRange() = default; 73 | 74 | // inclusive ranges 75 | virtual ValTy min() const = 0; 76 | virtual ValTy max() const = 0; 77 | 78 | template 79 | friend bool operator== (ScalarRange const&, ScalarRange const&); 80 | 81 | }; // end class ScalarRange 82 | 83 | 84 | // a boolean-like scalar range 85 | class FlagKnob : public ScalarRange { 86 | private: 87 | static constexpr int TRUE = 1; 88 | static constexpr int FALSE = 0; 89 | int current; 90 | int dflt; 91 | public: 92 | virtual ~FlagKnob() = default; 93 | FlagKnob(bool dflt_) : dflt(dflt_ ? TRUE : FALSE) { 94 | current = dflt; 95 | } 96 | int getDefault() const override { return dflt; } 97 | int getVal() const override { return current; } 98 | void setVal(int newVal) override { 99 | assert(newVal == TRUE || newVal == FALSE); 100 | current = newVal; 101 | } 102 | void apply(llvm::Module &M) override { } // do nothing by default 103 | int min() const override { return FALSE; } 104 | int max() const override { return TRUE; } 105 | 106 | bool getFlag() const { 107 | return current != FALSE; 108 | } 109 | 110 | }; // end class FlagKnob 111 | 112 | //////////////////////// 113 | // handy type aliases and type utilities 114 | 115 | namespace knob_type { 116 | using ScalarInt = tuner::ScalarRange; 117 | } 118 | 119 | // this needs to appear first, before specializations. 120 | template< typename Any > 121 | struct is_knob { 122 | static constexpr bool value = false; 123 | using rawTy = void; 124 | }; 125 | 126 | 127 | } // namespace tuner 128 | -------------------------------------------------------------------------------- /include/tuner/CodegenOptions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | namespace tuner { 8 | 9 | // a typical, simple integer range knob 10 | class SimpleRange : public ScalarRange { 11 | private: 12 | int dflt; 13 | int current; 14 | public: 15 | SimpleRange(int dflt_) : dflt(dflt_), current(dflt_) {} 16 | int getDefault() const override { return dflt; } 17 | int getVal() const override { return current; } 18 | void setVal(int newVal) override { current = newVal; } 19 | void apply(llvm::Module &M) override { } // can't set it in the module. 20 | }; // end class 21 | 22 | class FastISelOption : public FlagKnob { 23 | public: 24 | FastISelOption(bool dflt_ = false) : FlagKnob(dflt_) {} 25 | std::string getName() const override { 26 | return "use FastISel"; 27 | } 28 | }; // end class 29 | 30 | class IPRAOption : public FlagKnob { 31 | public: 32 | IPRAOption(bool dflt_ = false) : FlagKnob(dflt_) {} 33 | std::string getName() const override { 34 | return "use IPRA"; 35 | } 36 | }; // end class 37 | 38 | // NOTE: we turned off the O0 option for now since 39 | // it is an awful code generator. 40 | class CodeGenOptLvl : public SimpleRange { 41 | public: 42 | CodeGenOptLvl(int dflt_ = 3) : SimpleRange(dflt_) {} 43 | int min() const override { return 1; } 44 | int max() const override { return 3; } 45 | 46 | std::string getName() const override { 47 | return "codegen opt level"; 48 | } 49 | 50 | llvm::CodeGenOpt::Level getLevel() { 51 | switch (getVal()) { 52 | case 1: 53 | return llvm::CodeGenOpt::Level::Less; 54 | case 2: 55 | return llvm::CodeGenOpt::Level::Default; 56 | case 3: 57 | return llvm::CodeGenOpt::Level::Aggressive; 58 | 59 | case 0: 60 | // return llvm::CodeGenOpt::Level::None; // see NOTE above 61 | default: 62 | throw std::logic_error("invalid codegen optimization level."); 63 | }; 64 | } 65 | }; // end class 66 | 67 | 68 | class OptimizerOptLvl : public SimpleRange { 69 | public: 70 | OptimizerOptLvl(int dflt_ = 3) : SimpleRange(dflt_) {} 71 | int min() const override { return 0; } 72 | int max() const override { return 3; } 73 | 74 | std::string getName() const override { 75 | return "optimizer opt level"; 76 | } 77 | }; // end class 78 | 79 | // NOTE: I believe it's: 80 | // 0 -> no size optimization 81 | // 1 -> -Os 82 | // 2 -> -Oz 83 | class OptimizerSizeLvl : public SimpleRange { 84 | public: 85 | OptimizerSizeLvl(int dflt_ = 0) : SimpleRange(dflt_) {} 86 | int min() const override { return 0; } 87 | int max() const override { return 2; } 88 | 89 | std::string getName() const override { 90 | return "optimizer size opt level"; 91 | } 92 | }; // end class 93 | 94 | class InlineThreshold : public ScalarRange { 95 | llvm::InlineParams Params; 96 | llvm::InlineParams DefaultParams; 97 | 98 | public: 99 | InlineThreshold() { 100 | Params = llvm::getInlineParams(); 101 | DefaultParams = Params; 102 | assert(getVal() <= max() && getVal() >= min()); 103 | } 104 | 105 | InlineThreshold(unsigned OptLevel, unsigned SizeOptLevel) { 106 | Params = llvm::getInlineParams(OptLevel, SizeOptLevel); 107 | DefaultParams = Params; 108 | assert(getVal() <= max() && getVal() >= min()); 109 | } 110 | 111 | void setVal(int Threshold) override { 112 | Params.DefaultThreshold = Threshold; 113 | } 114 | 115 | int getVal() const override { 116 | return Params.DefaultThreshold; 117 | } 118 | 119 | int getDefault() const override { 120 | return DefaultParams.DefaultThreshold; 121 | } 122 | 123 | int min() const override { 124 | return -2000; 125 | } 126 | 127 | int max() const override { 128 | return 2000; 129 | } 130 | 131 | std::string getName() const override { 132 | return "inlining threshold"; 133 | } 134 | 135 | void apply(llvm::Module &M) override { } 136 | }; 137 | 138 | } // end namespace 139 | -------------------------------------------------------------------------------- /include/tuner/optimizer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | namespace tuner { 23 | 24 | using CompileResult = 25 | std::pair, std::shared_ptr>; 26 | 27 | struct RecompileRequest { 28 | Optimizer* Opt; 29 | std::optional RetVal; 30 | }; 31 | 32 | struct AddCompileResult { 33 | Optimizer* Opt; 34 | CompileResult Result; 35 | }; 36 | 37 | struct OptimizeResult { 38 | public: 39 | std::unique_ptr M; 40 | std::unique_ptr LLVMCxt; 41 | std::shared_ptr FB; 42 | Optimizer* Opt; 43 | bool End; 44 | llvm::CodeGenOpt::Level CGLevel; 45 | bool FastISel; 46 | bool IPRA; 47 | }; 48 | 49 | namespace opt_status { 50 | enum Value { 51 | Empty, // nothing ready, no workers. 52 | Working, // empty but working on jobs 53 | Ready // at least one is available, maybe working. 54 | }; 55 | } 56 | 57 | 58 | ///// 59 | // each Optimizer instance is an encapsulation of the state of 60 | // dynamically optimizing a specific function. 61 | // 62 | // This consists of things like the knobs, tuner, 63 | // feedback information, and context of the function. 64 | class Optimizer { 65 | private: 66 | static std::once_flag haveInitPollyPasses_; 67 | 68 | std::shared_ptr Cxt_; 69 | void* Addr_; // the function pointer 70 | 71 | // metadata about the function being compiled 72 | std::tuple GMap_; 73 | 74 | // members related to the pass manager that we need to keep alive. 75 | std::unique_ptr TM_; 76 | bool InitializedSelf_; 77 | 78 | ////////// 79 | // knobs that control the compilation process 80 | CodeGenOptLvl CGOptLvl; 81 | FastISelOption FastISelOpt; 82 | IPRAOption IPRAOpt; 83 | 84 | OptimizerOptLvl OptLvl; 85 | OptimizerSizeLvl OptSz; 86 | InlineThreshold InlineThresh; 87 | 88 | ////////// 89 | // members related to concurrent JIT compilation 90 | 91 | // the initial job queue for recompile requests. 92 | // it is a serial queue that optimizes the IR. 93 | dispatch_queue_t optimizeQ_; 94 | 95 | // a serial job queue for IR -> asm compilation 96 | dispatch_queue_t codegenQ_; 97 | std::atomic recompileActive_ = false; 98 | 99 | // serial list-access queues. The dispatch 100 | // queue is basically a semaphore. 101 | dispatch_queue_t mutate_recompileDone_; 102 | std::list recompileDone_; 103 | std::atomic doneQueueEmpty_ = true; 104 | 105 | 106 | 107 | ///////////// 108 | 109 | std::unique_ptr genPassManager(); 110 | void findContextKnobs(KnobSet &); 111 | 112 | // members related to automatic tuning 113 | Tuner *Tuner_; 114 | bool isNoopTuner_ = false; 115 | 116 | public: 117 | Optimizer(void* Addr, std::shared_ptr Cxt, bool LazyInit = false); 118 | ~Optimizer(); 119 | 120 | // the "lazy" initializer that must be called manually if LazyInit == true 121 | void initialize(); 122 | 123 | easy::Context const* getContext() const; 124 | 125 | void* getAddr() const; 126 | 127 | // NOTE: The answer is imprecise, especially if another thread might call 128 | // `recompile` while this function is executing! 129 | // We accept this lower reliability in order to keep this 130 | // test very efficient (no synchronization needed). 131 | opt_status::Value status() const; 132 | 133 | //// these callbacks are a bit ugly. 134 | void addToList_callback(AddCompileResult*); 135 | void optimize_callback(); 136 | void codegen_callback(OptimizeResult*); 137 | void obtain_callback(RecompileRequest*); 138 | 139 | CompileResult recompile(); 140 | 141 | bool isNoopTuner() const { return isNoopTuner_; } 142 | 143 | void dumpStats(std::ostream &) const; 144 | 145 | }; // end class 146 | 147 | } // end namespace 148 | -------------------------------------------------------------------------------- /include/easy/function_wrapper.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | namespace easy { 12 | 13 | class FunctionWrapperBase { 14 | 15 | protected: 16 | std::unique_ptr Fun_; 17 | 18 | std::shared_ptr FB_; 19 | 20 | bool noInit_ = true; 21 | 22 | public: 23 | // null object 24 | FunctionWrapperBase() = default; 25 | 26 | // default constructor 27 | FunctionWrapperBase(std::unique_ptr F, std::shared_ptr FB) 28 | : Fun_(std::move(F)), FB_(std::move(FB)), noInit_(false) { } 29 | 30 | // steal the implementation 31 | FunctionWrapperBase(FunctionWrapperBase &&FW) 32 | : Fun_(std::move(FW.Fun_)), FB_(std::move(FW.FB_)), noInit_(FW.noInit_) { } 33 | 34 | FunctionWrapperBase& operator=(FunctionWrapperBase &&FW) { 35 | Fun_ = std::move(FW.Fun_); 36 | FB_ = std::move(FW.FB_); 37 | noInit_ = FW.noInit_; 38 | return *this; 39 | } 40 | 41 | bool isEmpty() const { return noInit_; } 42 | 43 | Function const& getFunction() const { 44 | return *Fun_; 45 | } 46 | 47 | tuner::Feedback& getFeedback() const { 48 | return *FB_; 49 | } 50 | 51 | void* getRawPointer() const { 52 | return getFunction().getRawPointer(); 53 | } 54 | 55 | void serialize(std::ostream& os) const { 56 | getFunction().serialize(os); 57 | } 58 | 59 | static FunctionWrapperBase deserialize(std::istream& is) { 60 | std::unique_ptr Fun = Function::deserialize(is); 61 | return FunctionWrapperBase{std::move(Fun), std::make_shared()}; 62 | } 63 | }; 64 | 65 | template 66 | class FunctionWrapper; 67 | 68 | template 69 | class FunctionWrapper : 70 | public FunctionWrapperBase { 71 | public: 72 | FunctionWrapper(std::unique_ptr F) 73 | : FunctionWrapperBase(std::move(F), std::make_shared()) {} 74 | 75 | FunctionWrapper(std::unique_ptr F, std::shared_ptr FB) 76 | : FunctionWrapperBase(std::move(F), std::move(FB)) {} 77 | 78 | template 79 | Ret operator()(Args&& ... args) const { 80 | auto Token = FB_->startMeasurement(); 81 | 82 | auto Result = getFunctionPointer()(std::forward(args)...); 83 | 84 | FB_->endMeasurement(Token); 85 | return Result; 86 | } 87 | 88 | auto getFunctionPointer() const { 89 | return ((Ret(*)(Params...))getRawPointer()); 90 | } 91 | 92 | static FunctionWrapper deserialize(std::istream& is) { 93 | std::unique_ptr Fun = Function::deserialize(is); 94 | return FunctionWrapper{std::move(Fun)}; 95 | } 96 | }; 97 | 98 | // specialization for void return 99 | template 100 | class FunctionWrapper : 101 | public FunctionWrapperBase { 102 | public: 103 | FunctionWrapper(std::unique_ptr F) 104 | : FunctionWrapperBase(std::move(F), std::make_shared()) {} 105 | 106 | FunctionWrapper(std::unique_ptr F, std::shared_ptr FB) 107 | : FunctionWrapperBase(std::move(F), std::move(FB)) {} 108 | 109 | template 110 | void operator()(Args&& ... args) const { 111 | auto Token = FB_->startMeasurement(); 112 | 113 | getFunctionPointer()(std::forward(args)...); 114 | 115 | FB_->endMeasurement(Token); 116 | } 117 | 118 | auto getFunctionPointer() const { 119 | return ((void(*)(Params...))getRawPointer()); 120 | } 121 | 122 | static FunctionWrapper deserialize(std::istream& is) { 123 | std::unique_ptr Fun = Function::deserialize(is); 124 | return FunctionWrapper{std::move(Fun)}; 125 | } 126 | }; 127 | 128 | template 129 | struct is_function_wrapper { 130 | 131 | template 132 | struct is_function_wrapper_helper { 133 | static constexpr bool value = false; 134 | }; 135 | 136 | template 137 | struct is_function_wrapper_helper> { 138 | static constexpr bool value = true; 139 | using return_type = Ret; 140 | using params = meta::type_list; 141 | }; 142 | 143 | using helper = is_function_wrapper_helper>; 144 | 145 | static constexpr bool value = helper::value; 146 | }; 147 | 148 | template 149 | struct is_function_wrapper> { 150 | static constexpr bool value = true; 151 | using return_type = Ret; 152 | using params = meta::type_list; 153 | }; 154 | 155 | 156 | } 157 | -------------------------------------------------------------------------------- /runtime/tuner/Feedback.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | namespace tuner { 8 | 9 | template 10 | int64_t elapsedTime(Feedback::TimePoint Start, Feedback::TimePoint End) { 11 | Duration elapsedDur = (End - Start); 12 | return elapsedDur.count(); 13 | } 14 | 15 | // NOTE: This implementation assumes that the sample 16 | // data in each Feedback object is normally distributed. 17 | bool Feedback::betterThan(Feedback& Other) { 18 | this->updateStats(); 19 | Other.updateStats(); 20 | 21 | /* 22 | We perform a two-sample t test aka Welch's unequal variances t-test. 23 | More details: 24 | 1. https://en.wikipedia.org/wiki/Welch%27s_t-test 25 | 2. Section 10.2 of Devore & Berk's 26 | "Modern Mathematical Statistics with Applications" 27 | 28 | Null Hypothesis: 29 | o_mean - my_mean <= delta 30 | 31 | Alternative Hypothesis 1: 32 | o_mean - my_mean > delta 33 | i.e. my mean is at least 'delta' lower than theirs, for 34 | delta >= 0. 35 | 36 | Reject null hypothesis with confidence level 100(1-alpha)% if: 37 | test_statistic >= t_{alpha, degrees of freedom} 38 | 39 | */ 40 | 41 | double my_mean = this->expectedValue(); 42 | double my_var = this->variance(); 43 | size_t my_sz = this->sampleSize(); 44 | double my_scaledVar = my_var / my_sz; 45 | 46 | double o_mean = Other.expectedValue(); 47 | double o_var = Other.variance(); 48 | size_t o_sz = Other.sampleSize(); 49 | double o_scaledVar = o_var / o_sz; 50 | 51 | const double DELTA = 0.02 * o_mean; // delta is defined as a % of their mean. 52 | 53 | double test_statistic = (o_mean - my_mean - DELTA) / 54 | std::sqrt(o_scaledVar + my_scaledVar); 55 | 56 | // degrees of freedom 57 | double df = std::trunc( // round down 58 | std::pow(o_scaledVar + my_scaledVar, 2) / 59 | ( (std::pow(o_scaledVar, 2) / (o_sz - 1)) 60 | + (std::pow(my_scaledVar, 2) / (my_sz - 1)) 61 | )); 62 | 63 | DLOG_F(INFO, "delta = %.3f, ts = %.3f, df = %.1f", DELTA, test_statistic, df); 64 | 65 | // TODO: determine the _correct_ critical values 66 | // for a given confidence level to determine 67 | // whether to reject null hyp. 68 | 69 | // hardcode alpha = 0.05, df = 6 70 | const double THRESH = 1.943; 71 | 72 | return test_statistic >= THRESH; 73 | } 74 | 75 | void calculateBasicStatistics( 76 | std::vector& startBuf, 77 | std::vector& endBuf, 78 | size_t sampleSz, 79 | double& sampleAvg, 80 | double& sampleVariance, 81 | double& sampleErr 82 | ) { 83 | 84 | CHECK_F(sampleSz > 0, "calculating statistics when there is no data."); 85 | 86 | { // compute sample average 87 | int64_t totalTime = 0; 88 | for (size_t i = 0; i < sampleSz; i++) { 89 | int64_t obsTime = elapsedTime(startBuf[i], endBuf[i]); 90 | 91 | DCHECK_F(obsTime > 0, "saw bogus sample time!"); 92 | DCHECK_F(std::numeric_limits::max() - totalTime > obsTime, 93 | "overflow. use a different time unit!"); 94 | 95 | totalTime += obsTime; 96 | } 97 | sampleAvg = ((double) totalTime) / sampleSz; 98 | } 99 | 100 | { // compute sample variance and standard error of the mean 101 | if (sampleSz == 1) { 102 | sampleVariance = 0; 103 | sampleErr = 0; 104 | } else { 105 | int64_t sumSqDiff = 0; 106 | for (size_t i = 0; i < sampleSz; i++) { 107 | sumSqDiff += std::pow(elapsedTime(startBuf[i], endBuf[i]) - sampleAvg, 2); 108 | } 109 | sampleVariance = ((double) sumSqDiff) / (sampleSz - 1); 110 | sampleErr = std::sqrt(sampleVariance) / std::sqrt(sampleSz); 111 | } 112 | } 113 | } 114 | 115 | 116 | std::shared_ptr createFeedback(FeedbackKind requested, 117 | std::optional preferred) { 118 | switch (requested) { 119 | case FB_None: 120 | if (preferred) 121 | return createFeedback(preferred.value(), std::nullopt); 122 | 123 | return std::make_shared(); 124 | 125 | case FB_Total: 126 | return std::make_shared(); 127 | 128 | case FB_Total_IgnoreError: 129 | return std::make_shared(-1); 130 | 131 | case FB_Recent: 132 | return std::make_shared(); 133 | 134 | case FB_Recent_NP: 135 | default: 136 | throw std::runtime_error("createFeedback -- unknown feedback kind!"); 137 | }; 138 | } 139 | 140 | 141 | 142 | } // end namespace 143 | -------------------------------------------------------------------------------- /include/easy/param.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | namespace easy { 11 | 12 | namespace { 13 | 14 | // special types 15 | template 16 | struct set_parameter_helper { 17 | 18 | template 19 | struct function_wrapper_specialization_is_possible { 20 | 21 | template 22 | static std::true_type can_assign_fun_pointer(std::remove_pointer_t); 23 | 24 | template 25 | static std::false_type can_assign_fun_pointer (...); 26 | 27 | using type = decltype(can_assign_fun_pointer( 28 | *std::declval().getFunctionPointer())); 29 | 30 | static constexpr bool value { type::value }; 31 | }; 32 | 33 | template 34 | using _if = std::enable_if_t; 35 | 36 | template 37 | static void set_param(Context &C, 38 | _if<(bool)std::is_placeholder::type>::value, Arg>) { 39 | C.setParameterIndex(std::is_placeholder::type>::value-1); 40 | } 41 | 42 | template 43 | static void set_param(Context &C, 44 | _if::value, Arg> &&arg) { 45 | static_assert(function_wrapper_specialization_is_possible::value, 46 | "easy::jit composition is not possible. Incompatible types."); 47 | C.setParameterModule(arg.getFunction()); 48 | } 49 | 50 | template 51 | static void set_param(Context &C, 52 | _if>::value, Arg> &&arg) { 53 | static_assert(std::is_same>::rawTy, Param>::value, 54 | "atJIT tunable parameter's underlying type is mismatched"); 55 | C.setTunableParam(arg); 56 | } 57 | 58 | }; 59 | 60 | template<> 61 | struct set_parameter_helper { 62 | 63 | template 64 | using _if = std::enable_if_t; 65 | 66 | template 67 | static void set_param(Context &C, 68 | _if::value, Arg> &&arg) { 69 | C.setParameterInt(std::forward(arg)); 70 | } 71 | 72 | template 73 | static void set_param(Context &C, 74 | _if::value, Arg> &&arg) { 75 | C.setParameterFloat(std::forward(arg)); 76 | } 77 | 78 | template 79 | static void set_param(Context &C, 80 | _if::value, Arg> &&arg) { 81 | C.setParameterTypedPointer(std::forward(arg)); 82 | } 83 | 84 | template 85 | static void set_param(Context &C, 86 | _if::value, Arg> &&arg) { 87 | C.setParameterTypedPointer(std::addressof(arg)); 88 | } 89 | 90 | template 91 | static void set_param(Context &C, 92 | _if::value, Arg> &&arg) { 93 | C.setParameterTypedStruct(std::addressof(arg)); 94 | } 95 | }; 96 | 97 | template 98 | struct set_parameter { 99 | 100 | static constexpr bool is_ph = std::is_placeholder>::value; 101 | static constexpr bool is_fw = easy::is_function_wrapper::value; 102 | static constexpr bool is_knb = tuner::is_knob>::value; 103 | static constexpr bool is_special = is_ph || is_fw || is_knb; 104 | 105 | using help = set_parameter_helper; 106 | }; 107 | 108 | } 109 | 110 | template 111 | void set_options(Context &, NoOptions&& ...) { 112 | static_assert(meta::type_list::empty, "Remaining options to be processed!"); 113 | } 114 | 115 | template 116 | void set_options(Context &C, Option0&& Opt, Options&& ... Opts) { 117 | using OptTy = std::decay_t; 118 | OptTy& OptRef = std::ref(Opt); 119 | static_assert(options::is_option::value, "An easy::jit option is expected"); 120 | 121 | OptRef.handle(C); 122 | set_options(C, std::forward(Opts)...); 123 | } 124 | 125 | template 126 | std::enable_if_t 127 | set_parameters(ParameterList, 128 | Context& C, Options&& ... opts) { 129 | set_options(C, std::forward(opts)...); 130 | } 131 | 132 | template 133 | std::enable_if_t 134 | set_parameters(ParameterList, 135 | Context &C, Arg0 &&arg0, Args&& ... args) { 136 | using Param0 = typename ParameterList::head; 137 | using ParametersTail = typename ParameterList::tail; 138 | 139 | set_parameter::help::template set_param(C, std::forward(arg0)); 140 | set_parameters(ParametersTail(), C, std::forward(args)...); 141 | } 142 | 143 | } 144 | -------------------------------------------------------------------------------- /runtime/Function.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | 25 | 26 | using namespace easy; 27 | 28 | namespace easy { 29 | DefineEasyException(ExecutionEngineCreateError, "Failed to create execution engine for:"); 30 | DefineEasyException(CouldNotOpenFile, "Failed to file to dump intermediate representation."); 31 | } 32 | 33 | Function::Function(void* Addr, std::unique_ptr H) 34 | : Address(Addr), Holder(std::move(H)) { 35 | } 36 | 37 | static std::unique_ptr GetEngine(std::unique_ptr M, const char *Name, llvm::CodeGenOpt::Level CGLevel, bool UseFastISel, bool UseIPRA) { 38 | llvm::EngineBuilder ebuilder(std::move(M)); 39 | std::string eeError; 40 | 41 | llvm::TargetOptions TO; 42 | TO.EnableFastISel = UseFastISel; 43 | TO.EnableIPRA = UseIPRA; 44 | 45 | std::unique_ptr EE(ebuilder.setErrorStr(&eeError) 46 | .setMCPU(llvm::sys::getHostCPUName()) 47 | .setEngineKind(llvm::EngineKind::JIT) 48 | .setOptLevel(CGLevel) 49 | .setTargetOptions(TO) 50 | .create()); 51 | 52 | if(!EE) { 53 | throw easy::ExecutionEngineCreateError(Name); 54 | } 55 | 56 | return EE; 57 | } 58 | 59 | static void MapGlobals(llvm::ExecutionEngine& EE, GlobalMapping* Globals) { 60 | for(GlobalMapping *GM = Globals; GM->Name; ++GM) { 61 | EE.addGlobalMapping(GM->Name, (uint64_t)GM->Address); 62 | } 63 | } 64 | 65 | void Function::WriteOptimizedToFile(llvm::Module const &M, std::string const& File, bool Append) { 66 | if(File.empty()) 67 | return; 68 | std::error_code Error; 69 | auto Mode = Append ? llvm::sys::fs::F_Append : llvm::sys::fs::F_None; 70 | llvm::raw_fd_ostream Out(File, Error, Mode); 71 | 72 | if(Error) 73 | throw CouldNotOpenFile(Error.message()); 74 | 75 | DLOG_S(INFO) << "dumping to file..."; 76 | 77 | Out << M; 78 | 79 | DLOG_S(INFO) << "done"; 80 | } 81 | 82 | std::unique_ptr 83 | Function::CompileAndWrap(const char*Name, GlobalMapping* Globals, 84 | std::unique_ptr LLVMCxt, 85 | std::unique_ptr M, 86 | llvm::CodeGenOpt::Level CGLevel, 87 | bool UseFastISel, 88 | bool UseIPRA) { 89 | 90 | llvm::Module* MPtr = M.get(); 91 | std::unique_ptr EE = GetEngine(std::move(M), Name, CGLevel, UseFastISel, UseIPRA); 92 | 93 | if(Globals) { 94 | MapGlobals(*EE, Globals); 95 | } 96 | 97 | void *Address = (void*)EE->getFunctionAddress(Name); 98 | 99 | assert(Address != 0); 100 | 101 | std::unique_ptr Holder(new easy::LLVMHolderImpl{std::move(EE), std::move(LLVMCxt), MPtr}); 102 | return std::unique_ptr(new Function(Address, std::move(Holder))); 103 | } 104 | 105 | llvm::Module const& Function::getLLVMModule() const { 106 | return *static_cast(*this->Holder).M_; 107 | } 108 | 109 | void easy::Function::serialize(std::ostream& os) const { 110 | std::string buf; 111 | llvm::raw_string_ostream stream(buf); 112 | 113 | LLVMHolderImpl const *H = reinterpret_cast(Holder.get()); 114 | llvm::WriteBitcodeToFile(PASS_MODULE_ARG(*(H->M_)), stream); 115 | stream.flush(); 116 | 117 | os << buf; 118 | } 119 | 120 | std::unique_ptr easy::Function::deserialize(std::istream& is) { 121 | 122 | auto &BT = BitcodeTracker::GetTracker(); 123 | 124 | std::string buf(std::istreambuf_iterator(is), {}); // read the entire istream 125 | auto MemBuf = llvm::MemoryBuffer::getMemBuffer(llvm::StringRef(buf)); 126 | 127 | std::unique_ptr Ctx(new llvm::LLVMContext()); 128 | auto ModuleOrError = llvm::parseBitcodeFile(*MemBuf, *Ctx); 129 | if(ModuleOrError.takeError()) { 130 | return nullptr; 131 | } 132 | 133 | auto M = std::move(ModuleOrError.get()); 134 | 135 | std::string FunName = easy::GetEntryFunctionName(*M); 136 | 137 | GlobalMapping* Globals = nullptr; 138 | if(void* OrigFunPtr = BT.getAddress(FunName)) { 139 | std::tie(std::ignore, Globals) = BT.getNameAndGlobalMapping(OrigFunPtr); 140 | } 141 | 142 | return 143 | CompileAndWrap(FunName.c_str(), Globals, 144 | std::move(Ctx), std::move(M), llvm::CodeGenOpt::Level::Aggressive, 145 | /*UseFastISel=*/ false, 146 | /*UseIPRA=*/ false); 147 | } 148 | 149 | bool Function::operator==(easy::Function const& other) const { 150 | LLVMHolderImpl& This = static_cast(*this->Holder); 151 | LLVMHolderImpl& Other = static_cast(*other.Holder); 152 | return This.M_ == Other.M_; 153 | } 154 | 155 | std::hash::result_type 156 | std::hash::operator()(argument_type const& F) const noexcept { 157 | LLVMHolderImpl& This = static_cast(*F.Holder); 158 | return std::hash{}(This.M_); 159 | } 160 | -------------------------------------------------------------------------------- /include/tuner/AnnealingTuner.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | namespace tuner { 8 | 9 | ////////////// 10 | // a tuner that uses Simulated Annealing. The algorithm is based on 11 | // the description in: 12 | // 13 | // Dimitris Bertsimas and John Tsitsiklis. "Simulated annealing." 14 | // Statistical Science 8, no. 1 (1993): 10-15. 15 | // 16 | // EscapeDifficulty corresponds to d* 17 | // 18 | class AnnealingTuner : public RandomTuner { 19 | uint64_t timeStep; 20 | // FIXME: how should we determine this value? 21 | // the costs related to it are in units of nanoseconds 22 | // so perhaps 1ms is a good number for now? 23 | // in the future this should probably be some percentage of the 24 | // "default config" cost. 25 | const double EscapeDifficulty = 1'000'000; 26 | 27 | const double MaxEnergy = 100.0; 28 | double MaxTemp; // determined by cooling schedule 29 | 30 | bool initalizedFirstState = false; 31 | 32 | GenResult currentState; 33 | GenResult trialState; 34 | private: 35 | GenResult& saveConfig(KnobConfig KC) { 36 | auto Conf = std::make_shared(KC); 37 | auto FB = createFeedback(Cxt_->getFeedbackKind(), PREFERRED_FEEDBACK); 38 | 39 | // keep track of this config. 40 | Configs_.push_back({Conf, FB}); 41 | return Configs_.back(); 42 | } 43 | 44 | bool missingCost(GenResult const& R) const { 45 | R.second->updateStats(); 46 | return R.second->goodQuality() == false; 47 | } 48 | 49 | double getCost(GenResult const& R) const { 50 | R.second->updateStats(); 51 | return R.second->expectedValue(); 52 | } 53 | 54 | // corresponds to T(t) 55 | double coolingSchedule(uint64_t step) const { 56 | // log(0) == -inf => T(0) ~= 0 57 | // log(1) == 0 => T(1) ~= inf 58 | assert(step > 1 && "unexpected time step"); 59 | return EscapeDifficulty / std::log(step); 60 | } 61 | 62 | // the amount of "energy" that should be used to perturb the config. 63 | double perturbEnergy() { 64 | if (timeStep < 2) 65 | return MaxEnergy; 66 | 67 | // scale based how cool the system is. 68 | return (coolingSchedule(timeStep) / MaxTemp) * MaxEnergy; 69 | } 70 | 71 | // choose the new current state given that we are trying to minimize the 72 | // "cost", aka, running time. 73 | GenResult chooseNextState() { 74 | // when computing a probability, we know trial > cur, so diff > 0. 75 | // thus, for the initial time steps, which are weird, probabilities are: 76 | // 77 | // T(0) => P(trial) ~= exp(-diff / 0) ~= exp(-inf) ~= 0 78 | // T(1) => P(trial) ~= exp(-diff / inf) ~= exp(-0) ~= 1 79 | 80 | double cur = getCost(currentState); 81 | double trial = getCost(trialState); 82 | 83 | if (trial <= cur) 84 | return trialState; 85 | 86 | // determine the probability of choosing the trial state. 87 | double prob; 88 | 89 | // handle special edge cases 90 | if (timeStep == 0) 91 | prob = 0.0; 92 | else if (timeStep == 1) 93 | prob = 1.0; 94 | else 95 | prob = std::exp(-( (trial - cur) / coolingSchedule(timeStep) )); 96 | 97 | assert(prob >= 0.0 && prob <= 1.0); 98 | 99 | // make the decision 100 | std::uniform_real_distribution<> dis(0, 1); // [0, 1) 101 | if (dis(Gen_) >= prob) 102 | return currentState; 103 | 104 | return trialState; 105 | } 106 | 107 | 108 | public: 109 | AnnealingTuner(KnobSet KS, std::shared_ptr Cxt) 110 | : RandomTuner(KS, std::move(Cxt)) { 111 | timeStep = 0; 112 | MaxTemp = coolingSchedule(2); // largest value the schedule takes on. 113 | } 114 | 115 | // we do not free any knobs, since MPM or other objects 116 | // should end up freeing them. 117 | ~AnnealingTuner() {} 118 | 119 | void analyze(llvm::Module &M) override { 120 | RandomTuner::analyze(M); 121 | 122 | // we do this here instead of in the constructor because we 123 | // want the first start to be aware of _all_ knobs. 124 | if (!initalizedFirstState) { 125 | currentState = saveConfig(genDefaultConfig(KS_)); 126 | trialState = saveConfig(genRandomConfig(KS_, Gen_)); 127 | initalizedFirstState = true; 128 | } 129 | } 130 | 131 | bool shouldCompileNext () override { 132 | // until we have feedback for the prior config, 133 | // we can't produce the next one. 134 | return false; 135 | } 136 | 137 | GenResult& getNextConfig() override { 138 | // ensure we have a cost for the currentState 139 | if (missingCost(currentState)) 140 | return currentState; 141 | 142 | // ... and a cost for the trialState 143 | if (missingCost(trialState)) 144 | return trialState; 145 | 146 | // now we can determine the next state. 147 | currentState = chooseNextState(); 148 | timeStep++; 149 | 150 | // produce a trial state, which is a neighbor of the new current. 151 | double energy = perturbEnergy(); 152 | auto curConf = *currentState.first; 153 | trialState = saveConfig(perturbConfig(curConf, KS_, Gen_, energy)); 154 | 155 | return trialState; 156 | } 157 | 158 | void dump() override { 159 | if (timeStep > 1) 160 | std::cout << "step: " << timeStep 161 | << ", temperature: " << coolingSchedule(timeStep) 162 | << ", energy: " << perturbEnergy() 163 | << std::endl; 164 | 165 | if (auto best = bestSeen()) { 166 | std::cout << "------- best config ----------\n"; 167 | dumpConfigInstance(std::cout, KS_, best.value()); 168 | } 169 | 170 | std::cout << "------- current config --------\n"; 171 | dumpConfigInstance(std::cout, KS_, currentState); 172 | 173 | std::cout << "------- trial config --------\n"; 174 | dumpConfigInstance(std::cout, KS_, trialState); 175 | } 176 | 177 | }; // end class AnnealingTuner 178 | 179 | } // namespace tuner 180 | -------------------------------------------------------------------------------- /benchmark/qsort.h: -------------------------------------------------------------------------------- 1 | #ifndef BENCH_QSORT 2 | #define BENCH_QSORT 3 | 4 | /////////////////////////// 5 | // benchmark code 6 | 7 | int int_cmp(int a, int b) 8 | { 9 | if (a > b) 10 | return 1; 11 | else 12 | { 13 | if (a == b) 14 | return 0; 15 | else 16 | return -1; 17 | } 18 | } 19 | 20 | bool isSorted(int v[], int lo, int hi, int (*cmp)(int, int)) { 21 | if ((hi - lo + 1) < 2) 22 | return true; 23 | 24 | for (int i = lo+1; i <= hi; i++) { 25 | if (cmp(v[i-1], v[i]) > 0) 26 | return false; 27 | } 28 | return true; 29 | } 30 | 31 | /* swap: interchange v[i] and v[j] */ 32 | void swap(int v[], int i, int j) 33 | { 34 | int temp; 35 | temp = v[i]; 36 | v[i] = v[j]; 37 | v[j] = temp; 38 | } 39 | 40 | void isort(int v[], int lo, int hi, int (*cmp)(int, int)) { 41 | int i = lo+1; 42 | while (i <= hi) { 43 | int j = i; 44 | while (j > lo && cmp(v[j-1], v[j]) > 0) { 45 | swap(v, j-1, j); 46 | j--; 47 | } 48 | i++; 49 | } 50 | } 51 | 52 | // https://en.wikipedia.org/wiki/Quicksort 53 | // with modifications to support a cutoff to switch to insertion sort 54 | void __attribute__((noinline)) Qsort(int v[], int left, int right, int (*cmp)(int, int), int cutOff) 55 | { 56 | int sz = right - left + 1; 57 | 58 | if (sz < 2) // do nothing if array contains < 2 elems 59 | return; 60 | 61 | if (sz <= cutOff) { 62 | // have insertion sort handle the slice 63 | isort(v, left, right, cmp); 64 | return; 65 | } 66 | 67 | int pivot = v[right]; 68 | int i = left; 69 | for (int j = left; j < right; j++) // partition 70 | if (cmp(v[j], pivot) < 0) { 71 | swap(v, i, j); 72 | i++; 73 | } 74 | 75 | swap(v, i, right); // emplace pivot 76 | 77 | Qsort(v, left, i-1, cmp, cutOff); 78 | Qsort(v, i+1, right, cmp, cutOff); 79 | } 80 | 81 | 82 | ///////////////////////////////////////////////////////////////// 83 | 84 | 85 | ///////////////////// 86 | // benchmark driver 87 | 88 | #define ISORT_MAX_CUTOFF 512 89 | #define ISORT_MIN_CUTOFF 4 90 | #define ISORT_IDEAL_CUTOFF 32 91 | 92 | // TUNED, with all JIT overheads included. 93 | static void TUNING_qsort(benchmark::State& state) { 94 | const int SZ = state.range(0); 95 | const int ITERS = state.range(1); 96 | tuner::AutoTuner TK = static_cast(state.range(2)); 97 | 98 | std::vector vec(SZ); 99 | std::iota(vec.begin(), vec.end(), 0); 100 | 101 | 102 | for (auto _ : state) { 103 | tuner::ATDriver AT; 104 | auto Tuner = easy::options::tuner_kind(TK); 105 | auto Range = IntRange(ISORT_MIN_CUTOFF, ISORT_MAX_CUTOFF, ISORT_IDEAL_CUTOFF); 106 | 107 | for (int i = 0; i < ITERS; i++) { 108 | state.PauseTiming(); 109 | assert( isSorted(vec.data(), 0, vec.size()-1, int_cmp) ); 110 | std::random_shuffle(vec.begin(), vec.end()); 111 | benchmark::ClobberMemory(); 112 | state.ResumeTiming(); 113 | 114 | 115 | auto const& my_qsort = AT.reoptimize(Qsort, _1, _2, _3, int_cmp, 116 | Range, Tuner); 117 | 118 | my_qsort(vec.data(), 0, vec.size()-1); 119 | } 120 | // NOTE: we don't want to time the driver's destructor 121 | state.PauseTiming(); 122 | } 123 | } 124 | 125 | // just measuring the tuned function. 126 | static void EXCLUDEDRIVER_qsort(benchmark::State& state) { 127 | const int SZ = state.range(0); 128 | const int ITERS = state.range(1); 129 | tuner::AutoTuner TK = static_cast(state.range(2)); 130 | 131 | std::vector vec(SZ); 132 | std::iota(vec.begin(), vec.end(), 0); 133 | 134 | 135 | for (auto _ : state) { 136 | tuner::ATDriver AT; 137 | auto Tuner = easy::options::tuner_kind(TK); 138 | auto Range = IntRange(ISORT_MIN_CUTOFF, ISORT_MAX_CUTOFF, ISORT_IDEAL_CUTOFF); 139 | 140 | for (int i = 0; i < ITERS; i++) { 141 | state.PauseTiming(); 142 | assert( isSorted(vec.data(), 0, vec.size()-1, int_cmp) ); 143 | std::random_shuffle(vec.begin(), vec.end()); 144 | benchmark::ClobberMemory(); 145 | 146 | auto const& my_qsort = AT.reoptimize(Qsort, _1, _2, _3, int_cmp, 147 | Range, Tuner); 148 | 149 | state.ResumeTiming(); 150 | 151 | my_qsort(vec.data(), 0, vec.size()-1); 152 | } 153 | // NOTE: we don't want to time the driver's destructor 154 | state.PauseTiming(); 155 | } 156 | } 157 | 158 | static void AOT_qsort(benchmark::State& state) { 159 | int SZ = state.range(0); 160 | const int ITERS = state.range(1); 161 | 162 | std::vector vec(SZ); 163 | std::iota(vec.begin(), vec.end(), 0); 164 | 165 | for (auto _ : state) { 166 | for (int i = 0; i < ITERS; i++) { 167 | state.PauseTiming(); 168 | assert( isSorted(vec.data(), 0, vec.size()-1, int_cmp) ); 169 | std::random_shuffle(vec.begin(), vec.end()); 170 | benchmark::ClobberMemory(); 171 | state.ResumeTiming(); 172 | 173 | Qsort(vec.data(), 0, vec.size()-1, int_cmp, ISORT_IDEAL_CUTOFF); 174 | } 175 | } 176 | } 177 | 178 | 179 | ///////////////////////////// 180 | // benchmark registration 181 | 182 | #define QSORT_MIN 32768 183 | #define QSORT_MAX 32768 184 | #define ITER_MIN 50 185 | #define ITER_MAX 400 186 | 187 | static void QSortArgs(benchmark::internal::Benchmark* b) { 188 | for (tuner::AutoTuner TK : tuner::AllTuners) 189 | for (int i = ITER_MIN; i <= ITER_MAX; i *= 2) 190 | for (int sz = QSORT_MIN; sz <= QSORT_MAX; sz *= 2) 191 | b->Args({sz, i, TK}); 192 | } 193 | 194 | BENCHMARK(AOT_qsort) 195 | ->Unit(benchmark::kMillisecond) 196 | ->RangeMultiplier(2) 197 | ->Ranges({{QSORT_MIN, QSORT_MAX}, {ITER_MIN, ITER_MAX}}) 198 | ->UseRealTime(); 199 | 200 | BENCHMARK(EXCLUDEDRIVER_qsort) 201 | ->Unit(benchmark::kMillisecond) 202 | ->Apply(QSortArgs) 203 | ->UseRealTime(); 204 | 205 | BENCHMARK(TUNING_qsort) 206 | ->Unit(benchmark::kMillisecond) 207 | ->Apply(QSortArgs) 208 | ->UseRealTime(); 209 | 210 | 211 | // cleanup after ourselves 212 | #undef ISORT_MAX_CUTOFF 213 | #undef ISORT_MIN_CUTOFF 214 | #undef ISORT_IDEAL_CUTOFF 215 | #undef QSORT_MIN 216 | #undef QSORT_MAX 217 | #undef ITER_MIN 218 | #undef ITER_MAX 219 | 220 | 221 | #endif // BENCH_QSORT 222 | -------------------------------------------------------------------------------- /include/tuner/MDUtils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | #pragma GCC diagnostic push 12 | #pragma GCC diagnostic ignored "-Wunused-function" 13 | 14 | // NOTE 15 | // these utils are included in both the clang pass plugin, and 16 | // the runtime library, so we wrap this in an anonymous namespace so 17 | // the definitions can be included wherever needed. 18 | // this may cause "unused function" warnings, so we turn those off for these. 19 | 20 | namespace { 21 | 22 | using namespace llvm; 23 | 24 | // make sure to keep this in sync with LoopKnob.h 25 | char const* TAG = "llvm.loop.id"; 26 | 27 | char const* TRANSFORM_ATTR = "looptransform"; 28 | 29 | Metadata* mkMDInt(IntegerType* Ty, uint64_t Val, bool isSigned = false) { 30 | auto ConstInt = ConstantInt::get(Ty, Val, isSigned); 31 | return ValueAsMetadata::get(ConstInt); 32 | } 33 | 34 | // return val indicates whether the module was changed 35 | // NOTE the transform metadata structure should follow 36 | // the work in Kruse's pragma branches 37 | bool addLoopTransformGroup(Function* F, std::list &newXForms) { 38 | if (newXForms.empty()) 39 | return false; 40 | 41 | SmallVector AllTransforms; 42 | auto &Ctx = F->getContext(); 43 | 44 | // collect the existing transforms, if any 45 | auto FuncMD = F->getMetadata(TRANSFORM_ATTR); 46 | if (FuncMD) 47 | for (auto &X : FuncMD->operands()) 48 | AllTransforms.push_back(X.get()); 49 | 50 | // add new transforms to the group 51 | for (auto X : newXForms) 52 | AllTransforms.push_back(X); 53 | 54 | auto AllTransformsMD = MDNode::get(Ctx, AllTransforms); 55 | F->setMetadata(TRANSFORM_ATTR, AllTransformsMD); 56 | return true; 57 | } 58 | 59 | MDNode* createTilingMD(LLVMContext& Cxt, const char* XFORM_NAME, std::vector> Dims) { 60 | IntegerType* i64 = IntegerType::get(Cxt, 64); 61 | 62 | SmallVector Names; 63 | SmallVector Sizes; 64 | 65 | for (auto Dim : Dims) { 66 | Names.push_back(MDString::get(Cxt, std::to_string(Dim.first))); 67 | Sizes.push_back(mkMDInt(i64, Dim.second)); 68 | } 69 | 70 | // build the arguments to the transform 71 | Metadata* NameList = MDNode::get(Cxt, Names); 72 | Metadata* SizeList = MDNode::get(Cxt, Sizes); 73 | 74 | // build the transform node itself 75 | Metadata* XFormName = MDString::get(Cxt, XFORM_NAME); 76 | MDNode* Transform = MDNode::get(Cxt, {XFormName, NameList, SizeList}); 77 | 78 | return Transform; 79 | } 80 | 81 | 82 | MDNode* createLoopName(LLVMContext& Context, unsigned &LoopIDs) { 83 | // build a Polly-compatible ID for the loop 84 | MDString *Tag = MDString::get(Context, TAG); 85 | unsigned IntVal = LoopIDs++; 86 | MDString* Val = MDString::get(Context, std::to_string(IntVal)); 87 | 88 | MDNode *KnobTag = MDNode::get(Context, {Tag, Val}); 89 | return KnobTag; 90 | } 91 | 92 | // parse the LoopMD, looking for the tag added by createLoopName 93 | unsigned getLoopName(MDNode* LoopMD) { 94 | for (const MDOperand& Op : LoopMD->operands()) { 95 | MDNode *Entry = dyn_cast(Op.get()); 96 | if (!Entry || Entry->getNumOperands() != 2) 97 | continue; 98 | 99 | MDString *Tag = dyn_cast(Entry->getOperand(0).get()); 100 | MDString *Val = dyn_cast(Entry->getOperand(1).get()); 101 | 102 | if (!Tag || !Val) 103 | continue; 104 | 105 | if (Tag->getString() != TAG) 106 | continue; 107 | 108 | llvm::StringRef Str = Val->getString(); 109 | unsigned IntVal = ~0; 110 | Str.getAsInteger(10, IntVal); 111 | 112 | if (IntVal == ~0) 113 | report_fatal_error("bad loop ID metadata on our tag!"); 114 | 115 | return IntVal; 116 | } // end loop 117 | 118 | report_fatal_error("not all loops have an ID tag for tuning"); 119 | } 120 | 121 | inline bool matchesLoopOption(Metadata *MD, StringRef &Key) { 122 | MDNode *MDN = dyn_cast(MD); 123 | if (!MDN || MDN->getNumOperands() < 1) 124 | return false; 125 | 126 | MDString *EntryKey = dyn_cast(MDN->getOperand(0).get()); 127 | 128 | if (!EntryKey || EntryKey->getString() != Key) 129 | return false; 130 | 131 | return true; 132 | } 133 | 134 | // a functional-style insertion with replacement that preserves all 135 | // non-matching operands of the MDNode, and returns a valid LoopMD. 136 | // This function can also be used to delete an entry of the given key if nullptr is provided as the Val. 137 | // 138 | // example: 139 | // 140 | // BEFORE: 141 | // 142 | // !x = {!x, ... !1, ...} 143 | // !1 = {"loop.vectorize.enable", i32 1} 144 | // 145 | // AFTER THE FOLLOWING ACTION 146 | // 147 | // updateLMD(!x, "loop.vectorize.enable", i32 0) 148 | // 149 | // !y = {!y, ... !1, ...} 150 | // !1 = {"loop.vectorize.enable", i32 0} 151 | // 152 | // The node !y will be returned. 153 | // 154 | MDNode* updateLMD(MDNode *LoopMD, StringRef Key, Metadata* Val, bool DeleteOnly = false) { 155 | SmallSetVector MDs(LoopMD->op_begin(), LoopMD->op_end()); 156 | LLVMContext &Cxt = LoopMD->getContext(); 157 | 158 | // if the loop-control option already exists, remove it. 159 | MDs.remove_if([&](Metadata *MD) { 160 | return matchesLoopOption(MD, Key); 161 | }); 162 | 163 | if (!DeleteOnly) { 164 | // add the new option. 165 | MDString* KeyMD = MDString::get(Cxt, Key); 166 | 167 | if (Val) 168 | MDs.insert(MDNode::get(Cxt, {KeyMD, Val})); 169 | else 170 | MDs.insert(MDNode::get(Cxt, {KeyMD})); 171 | } else { 172 | // deletion is done by key only, we do not try to match values too. 173 | assert(!Val && "did not expect SOME value during deletion!"); 174 | } 175 | 176 | // create the new MDNode 177 | MDNode *NewLoopMD = MDNode::get(Cxt, MDs.getArrayRef()); 178 | 179 | // since this is Loop metadata, we need to recreate the self-loop. 180 | NewLoopMD->replaceOperandWith(0, NewLoopMD); 181 | 182 | return NewLoopMD; 183 | } 184 | 185 | } // end namespace 186 | 187 | #pragma GCC diagnostic pop 188 | -------------------------------------------------------------------------------- /include/tuner/LoopKnob.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace tuner { 11 | 12 | #pragma GCC diagnostic push 13 | #pragma GCC diagnostic ignored "-Wunused-variable" 14 | 15 | namespace loop_md { 16 | // make sure to keep this in sync with MDUtils.h 17 | static char const* TAG = "llvm.loop.id"; 18 | 19 | static char const* UNROLL_DISABLE = "llvm.loop.unroll.disable"; 20 | static char const* UNROLL_COUNT = "llvm.loop.unroll.count"; 21 | static char const* UNROLL_FULL = "llvm.loop.unroll.full"; 22 | static char const* VECTORIZE_ENABLE = "llvm.loop.vectorize.enable"; 23 | static char const* VECTORIZE_WIDTH = "llvm.loop.vectorize.width"; 24 | static char const* LICM_VER_DISABLE = "llvm.loop.licm_versioning.disable"; 25 | static char const* INTERLEAVE_COUNT = "llvm.loop.interleave.count"; 26 | static char const* DISTRIBUTE = "llvm.loop.distribute.enable"; 27 | static char const* SECTION = "llvm.loop.tile"; 28 | } 29 | 30 | #pragma GCC diagnostic pop 31 | 32 | // https://llvm.org/docs/LangRef.html#llvm-loop 33 | // 34 | // NOTE if you add a new option here, make sure to update: 35 | // 1. LoopKnob.cpp::addToLoopMD 36 | // 1a. You might need to update MDUtils.h while doing this. 37 | // 2. operator<<(stream, LoopSetting) and operator== in LoopKnob.cpp 38 | // 3. any generators of a LoopSetting, 39 | // like genRandomLoopSetting or genNearbyLoopSetting 40 | // 41 | struct LoopSetting { 42 | // hints only 43 | std::optional VectorizeWidth{}; // 1 = disable entirely, >= 2 suggests width 44 | 45 | // hint only 46 | std::optional InterleaveCount{}; // 0 = off, 1 = automatic, >=2 is count. 47 | 48 | // TODO: these 3 ought to be combined into 1 integer option 49 | std::optional UnrollDisable{}; // llvm.loop.unroll.disable 50 | std::optional UnrollFull{}; // llvm.loop.unroll.full 51 | std::optional UnrollCount{}; // llvm.loop.unroll.count 52 | 53 | std::optional LICMVerDisable{}; // llvm.loop.licm_versioning.disable 54 | 55 | std::optional Distribute{}; 56 | 57 | /////////////////// 58 | // NOTE: polly-required options follow 59 | 60 | // loop sectioning, aka strip-mining or 1 dimensional tiling. 61 | // we will use the term "sectioning" throughout the code. 62 | std::optional Section{}; 63 | 64 | /////////////////////// 65 | 66 | size_t size() const { 67 | return 7 68 | #ifdef POLLY_KNOBS 69 | + 1 70 | #endif 71 | ; 72 | } 73 | 74 | static void flatten(float* slice, LoopSetting LS) { 75 | size_t i = 0; 76 | 77 | LoopSetting::flatten(slice + i++, LS.VectorizeWidth); 78 | 79 | LoopSetting::flatten(slice + i++, LS.InterleaveCount); 80 | 81 | LoopSetting::flatten(slice + i++, LS.UnrollDisable); 82 | LoopSetting::flatten(slice + i++, LS.UnrollFull); 83 | LoopSetting::flatten(slice + i++, LS.UnrollCount); 84 | 85 | LoopSetting::flatten(slice + i++, LS.LICMVerDisable); 86 | 87 | LoopSetting::flatten(slice + i++, LS.Distribute); 88 | 89 | #ifdef POLLY_KNOBS 90 | 91 | LoopSetting::flatten(slice + i++, LS.Section); 92 | 93 | #endif 94 | 95 | if (i != LS.size()) 96 | throw std::logic_error("size does not match expectations"); 97 | } 98 | 99 | static void flatten(float* slice, std::optional opt) { 100 | if (opt) 101 | *slice = opt.value() ? 1.0 : 0.0; 102 | else 103 | *slice = MISSING; 104 | } 105 | 106 | static void flatten(float* slice, std::optional opt) { 107 | if (opt) 108 | *slice = (float) opt.value(); 109 | else 110 | *slice = MISSING; 111 | } 112 | 113 | }; 114 | 115 | class LoopKnob : public Knob { 116 | private: 117 | LoopSetting Opt; 118 | unsigned LoopID; 119 | unsigned nestingDepth; 120 | std::vector kids; 121 | 122 | // NOTE could probably add some utilities to check the 123 | // sanity of a loop setting to this class? 124 | 125 | 126 | public: 127 | LoopKnob (unsigned name, std::vector children_, unsigned depth_) 128 | : LoopID(name), 129 | kids(std::move(children_)), 130 | nestingDepth(depth_) {} 131 | 132 | LoopSetting getDefault() const override { 133 | LoopSetting Empty; 134 | return Empty; 135 | } 136 | 137 | // loop structure information 138 | std::vector& children() { return kids; } 139 | auto begin() { return kids.begin(); } 140 | auto end() { return kids.end(); } 141 | unsigned loopDepth() const { return nestingDepth; } 142 | 143 | LoopSetting getVal() const override { return Opt; } 144 | 145 | void setVal (LoopSetting LS) override { Opt = LS; } 146 | 147 | unsigned getLoopName() const { return LoopID; } 148 | 149 | void apply (llvm::Module &M) override; 150 | 151 | virtual std::string getName() const override { 152 | return "loop #" + std::to_string(getLoopName()); 153 | } 154 | 155 | virtual size_t size() const override { 156 | return Opt.size(); 157 | } 158 | 159 | }; // end class 160 | 161 | 162 | // any specializations of genRandomLoopSetting you would like to use 163 | // should be declared as an extern template here, and then instantiated 164 | // in LoopSettingGen, since I don't want to include the generic impl here. 165 | // see: https://stackoverflow.com/questions/10632251/undefined-reference-to-template-function 166 | template < typename RNE > // meets the requirements of RandomNumberEngine 167 | LoopSetting genRandomLoopSetting(RNE &Eng); 168 | 169 | extern template 170 | LoopSetting genRandomLoopSetting(std::mt19937_64&); 171 | 172 | 173 | template < typename RNE > 174 | LoopSetting genNearbyLoopSetting(RNE &Eng, LoopSetting LS, double energy); 175 | 176 | extern template 177 | LoopSetting genNearbyLoopSetting(std::mt19937_64&, LoopSetting, double); 178 | 179 | 180 | // handy type aliases. 181 | namespace knob_type { 182 | using Loop = LoopKnob; 183 | } 184 | 185 | } // namespace tuner 186 | 187 | std::ostream& operator<<(std::ostream &o, tuner::LoopSetting &LS); 188 | bool operator==(tuner::LoopSetting const& A, tuner::LoopSetting const& B); 189 | bool operator!=(tuner::LoopSetting const& A, tuner::LoopSetting const& B); 190 | --------------------------------------------------------------------------------