├── .gitignore ├── CMakeLists.txt ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE.TXT ├── README.md ├── docs ├── Heatmap.png ├── Heatmaps.md └── OptimizingClang.md ├── llvm.patch ├── paper └── reproduce-bolt-cgo19 │ ├── README.md │ ├── breakdown.sh │ ├── clang │ └── Makefile │ └── gcc │ └── Makefile ├── runtime ├── CMakeLists.txt └── instr.cpp ├── src ├── BinaryBasicBlock.cpp ├── BinaryBasicBlock.h ├── BinaryContext.cpp ├── BinaryContext.h ├── BinaryData.cpp ├── BinaryData.h ├── BinaryFunction.cpp ├── BinaryFunction.h ├── BinaryFunctionProfile.cpp ├── BinaryLoop.h ├── BinaryPassManager.cpp ├── BinaryPassManager.h ├── BinarySection.cpp ├── BinarySection.h ├── BoltAddressTranslation.cpp ├── BoltAddressTranslation.h ├── BoltDiff.cpp ├── CMakeLists.txt ├── CacheMetrics.cpp ├── CacheMetrics.h ├── DWARFRewriter.cpp ├── DWARFRewriter.h ├── DataAggregator.cpp ├── DataAggregator.h ├── DataReader.cpp ├── DataReader.h ├── DebugData.cpp ├── DebugData.h ├── DynoStats.cpp ├── DynoStats.h ├── Exceptions.cpp ├── Exceptions.h ├── ExecutableFileMemoryManager.cpp ├── ExecutableFileMemoryManager.h ├── Heatmap.cpp ├── Heatmap.h ├── JumpTable.cpp ├── JumpTable.h ├── MCPlus.h ├── MCPlusBuilder.cpp ├── MCPlusBuilder.h ├── ParallelUtilities.cpp ├── ParallelUtilities.h ├── Passes │ ├── Aligner.cpp │ ├── Aligner.h │ ├── AllocCombiner.cpp │ ├── AllocCombiner.h │ ├── BinaryFunctionCallGraph.cpp │ ├── BinaryFunctionCallGraph.h │ ├── BinaryPasses.cpp │ ├── BinaryPasses.h │ ├── CMakeLists.txt │ ├── CachePlusReorderAlgorithm.cpp │ ├── CallGraph.cpp │ ├── CallGraph.h │ ├── CallGraphWalker.cpp │ ├── CallGraphWalker.h │ ├── DataflowAnalysis.cpp │ ├── DataflowAnalysis.h │ ├── DataflowInfoManager.cpp │ ├── DataflowInfoManager.h │ ├── DominatorAnalysis.h │ ├── FrameAnalysis.cpp │ ├── FrameAnalysis.h │ ├── FrameOptimizer.cpp │ ├── FrameOptimizer.h │ ├── HFSort.cpp │ ├── HFSort.h │ ├── HFSortPlus.cpp │ ├── IdenticalCodeFolding.cpp │ ├── IdenticalCodeFolding.h │ ├── IndirectCallPromotion.cpp │ ├── IndirectCallPromotion.h │ ├── Inliner.cpp │ ├── Inliner.h │ ├── Instrumentation.cpp │ ├── Instrumentation.h │ ├── JTFootprintReduction.cpp │ ├── JTFootprintReduction.h │ ├── LFenceInsertion.cpp │ ├── LFenceInsertion.h │ ├── LivenessAnalysis.cpp │ ├── LivenessAnalysis.h │ ├── LongJmp.cpp │ ├── LongJmp.h │ ├── MCF.cpp │ ├── MCF.h │ ├── PLTCall.cpp │ ├── PLTCall.h │ ├── PettisAndHansen.cpp │ ├── ReachingDefOrUse.h │ ├── ReachingInsns.h │ ├── RegAnalysis.cpp │ ├── RegAnalysis.h │ ├── RegReAssign.cpp │ ├── RegReAssign.h │ ├── ReorderAlgorithm.cpp │ ├── ReorderAlgorithm.h │ ├── ReorderData.cpp │ ├── ReorderData.h │ ├── ReorderFunctions.cpp │ ├── ReorderFunctions.h │ ├── ReorderUtils.h │ ├── RetpolineInsertion.cpp │ ├── RetpolineInsertion.h │ ├── ShrinkWrapping.cpp │ ├── ShrinkWrapping.h │ ├── StackAllocationAnalysis.cpp │ ├── StackAllocationAnalysis.h │ ├── StackAvailableExpressions.cpp │ ├── StackAvailableExpressions.h │ ├── StackPointerTracking.cpp │ ├── StackPointerTracking.h │ ├── StackReachingUses.cpp │ ├── StackReachingUses.h │ ├── StokeInfo.cpp │ ├── StokeInfo.h │ ├── ValidateInternalCalls.cpp │ ├── ValidateInternalCalls.h │ ├── VeneerElimination.cpp │ └── VeneerElimination.h ├── ProfileReader.cpp ├── ProfileReader.h ├── ProfileWriter.cpp ├── ProfileWriter.h ├── ProfileYAMLMapping.h ├── Relocation.cpp ├── Relocation.h ├── RewriteInstance.cpp ├── RewriteInstance.h ├── Target │ ├── AArch64 │ │ ├── AArch64MCPlusBuilder.cpp │ │ └── CMakeLists.txt │ ├── CMakeLists.txt │ └── X86 │ │ ├── CMakeLists.txt │ │ └── X86MCPlusBuilder.cpp ├── llvm-bolt.cpp └── merge-fdata │ ├── CMakeLists.txt │ └── merge-fdata.cpp └── test ├── CMakeLists.txt ├── X86 ├── Inputs │ ├── blarge.yaml │ ├── issue20.yaml │ ├── issue26.yaml │ ├── pre-aggregated.txt │ └── srol-bug-input.yaml ├── issue20.test ├── issue26.test ├── pre-aggregated-perf.test └── srol-bug.test ├── lit.cfg.py └── lit.site.cfg.py.in /.gitignore: -------------------------------------------------------------------------------- 1 | #==============================================================================# 2 | # This file specifies intentionally untracked files that git should ignore. 3 | # See: http://www.kernel.org/pub/software/scm/git/docs/gitignore.html 4 | # 5 | # This file is intentionally different from the output of `git svn show-ignore`, 6 | # as most of those are useless. 7 | #==============================================================================# 8 | 9 | #==============================================================================# 10 | # File extensions to be ignored anywhere in the tree. 11 | #==============================================================================# 12 | # Temp files created by most text editors. 13 | *~ 14 | # Merge files created by git. 15 | *.orig 16 | # Byte compiled python modules. 17 | *.pyc 18 | # vim swap files 19 | .*.sw? 20 | .sw? 21 | #OS X specific files. 22 | .DS_store 23 | 24 | # Nested build directory 25 | /build 26 | 27 | #==============================================================================# 28 | # Explicit files to ignore (only matches one). 29 | #==============================================================================# 30 | # Various tag programs 31 | /tags 32 | /TAGS 33 | /GPATH 34 | /GRTAGS 35 | /GSYMS 36 | /GTAGS 37 | .gitusers 38 | autom4te.cache 39 | cscope.files 40 | cscope.out 41 | autoconf/aclocal.m4 42 | autoconf/autom4te.cache 43 | /compile_commands.json 44 | 45 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(ExternalProject) 2 | 3 | set(BOLT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) 4 | set(BOLT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) 5 | set(CMAKE_CXX_STANDARD 14) 6 | 7 | ExternalProject_Add(bolt_rt 8 | SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/runtime" 9 | STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-stamps 10 | BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-bins 11 | CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} 12 | -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} 13 | -DCMAKE_BUILD_TYPE=Release 14 | -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} 15 | -DCMAKE_INSTALL_PREFIX=${LLVM_BINARY_DIR} 16 | # You might want to set this to True if actively developing bolt_rt, otherwise 17 | # cmake will not rebuild it after source code changes 18 | BUILD_ALWAYS True 19 | ) 20 | 21 | install(CODE "execute_process\(COMMAND \${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=\${CMAKE_INSTALL_PREFIX} -P ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-bins/cmake_install.cmake \)" 22 | COMPONENT bolt_rt) 23 | 24 | add_llvm_install_targets(install-bolt_rt 25 | DEPENDS bolt_rt 26 | COMPONENT bolt_rt) 27 | 28 | add_subdirectory(src) 29 | add_subdirectory(test) 30 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.facebook.com/pages/876921332402685/open-source-code-of-conduct) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to BOLT 2 | 3 | We want to make contributing to BOLT as easy and transparent as 4 | possible. 5 | 6 | ## Coding Standards 7 | 8 | Follow [LLVM Coding Standards](http://llvm.org/docs/CodingStandards.html). 9 | 10 | ## Pull Requests 11 | 12 | We actively welcome your pull requests. 13 | 14 | * Fork the repo and create your branch from `master`. 15 | * Create your PR. 16 | * If you have added code that should be tested, add tests. 17 | * Ensure the test suite passes. 18 | * If PR is not trivial, your PR description should have details on what the PR 19 | does. 20 | * Every PR needs to be reviewed by at least one [maintainer](#maintainers) 21 | before it can be merged. 22 | * When all of the tests are passing and all other conditions described above 23 | satisfied, the PR is ready for review and merge. 24 | * If you haven't already, complete the Contributor License Agreement ("CLA"). 25 | 26 | ## Contributor License Agreement ("CLA") 27 | 28 | In order to accept your pull request, we need you to submit a CLA. You only need 29 | to do this once to work on any of Facebook's open source projects. 30 | 31 | Complete your CLA here: 32 | 33 | ## Issues 34 | 35 | We use GitHub issues to track public bugs. Please ensure your description is 36 | clear and has sufficient instructions to be able to reproduce the issue. 37 | 38 | ## Maintainers 39 | 40 | * Maksim Panchenko ([maksfb](https://github.com/maksfb)) 41 | * Rafael Auler ([rafaelauler](https://github.com/rafaelauler)) 42 | 43 | ## License 44 | 45 | By contributing to the project, you agree that your contributions will be licensed 46 | under the LICENSE file in the root directory of this source tree. 47 | -------------------------------------------------------------------------------- /LICENSE.TXT: -------------------------------------------------------------------------------- 1 | ============================================================================== 2 | LLVM Release License 3 | ============================================================================== 4 | University of Illinois/NCSA 5 | Open Source License 6 | 7 | Copyright (c) 2003-2017 University of Illinois at Urbana-Champaign. 8 | All rights reserved. 9 | 10 | Developed by: 11 | 12 | LLVM Team 13 | 14 | University of Illinois at Urbana-Champaign 15 | 16 | http://llvm.org 17 | 18 | Permission is hereby granted, free of charge, to any person obtaining a copy of 19 | this software and associated documentation files (the "Software"), to deal with 20 | the Software without restriction, including without limitation the rights to 21 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 22 | of the Software, and to permit persons to whom the Software is furnished to do 23 | so, subject to the following conditions: 24 | 25 | * Redistributions of source code must retain the above copyright notice, 26 | this list of conditions and the following disclaimers. 27 | 28 | * Redistributions in binary form must reproduce the above copyright notice, 29 | this list of conditions and the following disclaimers in the 30 | documentation and/or other materials provided with the distribution. 31 | 32 | * Neither the names of the LLVM Team, University of Illinois at 33 | Urbana-Champaign, nor the names of its contributors may be used to 34 | endorse or promote products derived from this Software without specific 35 | prior written permission. 36 | 37 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 38 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 39 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 40 | CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 41 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 42 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE 43 | SOFTWARE. 44 | 45 | ============================================================================== 46 | Copyrights and Licenses for Third Party Software Distributed with LLVM: 47 | ============================================================================== 48 | The LLVM software contains code written by third parties. Such software will 49 | have its own individual LICENSE.TXT file in the directory in which it appears. 50 | This file will describe the copyrights, license, and restrictions which apply 51 | to that code. 52 | 53 | The disclaimer of warranty in the University of Illinois Open Source License 54 | applies to all code in the LLVM Distribution, and nothing in any of the 55 | other licenses gives permission to use the names of the LLVM Team or the 56 | University of Illinois to endorse or promote products derived from this 57 | Software. 58 | 59 | The following pieces of software have additional or alternate copyrights, 60 | licenses, and/or restrictions: 61 | 62 | Program Directory 63 | ------- --------- 64 | -------------------------------------------------------------------------------- /docs/Heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/signalapp/BOLT/130d2c758964950cf713bddef123104b41642161/docs/Heatmap.png -------------------------------------------------------------------------------- /docs/Heatmaps.md: -------------------------------------------------------------------------------- 1 | # Code Heatmaps 2 | 3 | BOLT has gained the ability to print code heatmaps based on 4 | sampling-based LBR profiles generated by `perf`. The output is produced 5 | in colored ASCII to be displayed in a color-capable terminal. It looks 6 | something like this: 7 | 8 | ![](./Heatmap.png) 9 | 10 | Heatmaps can be generated for BOLTed and non-BOLTed binaries. You can 11 | use them to compare the code layout before and after optimizations. 12 | 13 | To generate a heatmap, start with running your app under `perf`: 14 | 15 | ```bash 16 | $ perf record -e cycles:u -j any,u -- 17 | ``` 18 | or if you want to monitor the existing process(es): 19 | ```bash 20 | $ perf record -e cycles:u -j any,u [-p PID|-a] -- sleep 21 | ``` 22 | 23 | Note that at the moment running with LBR (`-j any,u` or `-b`) is 24 | a requirement. 25 | 26 | Once the run is complete, and `perf.data` is generated, run BOLT in 27 | a heatmap mode: 28 | 29 | ```bash 30 | $ llvm-bolt heatmap -p perf.data 31 | ``` 32 | 33 | By default the heatmap will be dumped to *stdout*. You can change it 34 | with `-o ` option. Each character/block in the heatmap 35 | shows the execution data accumulated for corresponding 64 bytes of 36 | code. You can change this granularity with a `-block-size` option. 37 | E.g. set it to 4096 to see code usage grouped by 4K pages. 38 | Other useful options are: 39 | 40 | ```bash 41 | -line-size= - number of entries per line (default 256) 42 | -max-address= - maximum address considered valid for heatmap (default 4GB) 43 | ``` 44 | 45 | If you prefer to look at the data in a browser (or would like to share 46 | it that way), then you can use an HTML conversion tool. E.g.: 47 | 48 | ```bash 49 | $ aha -b -f > .html 50 | ``` 51 | -------------------------------------------------------------------------------- /paper/reproduce-bolt-cgo19/breakdown.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | function run_one() { 4 | make clean_measurements 5 | make PERFCOUNTERS="${PERF}" BOLTOPTS="${BOLT}" results_bolt 6 | mkdir -p ../results-${1}-${2} 7 | cp * -v ../results-${1}-${2} 8 | } 9 | 10 | function run() { 11 | make clean_bolted_builds 12 | PERF=" " 13 | run_one ${1} 1 14 | PERF="-e instructions,L1-dcache-load-misses,dTLB-load-misses " 15 | run_one ${1} 2 16 | PERF="-e instructions,L1-icache-load-misses,iTLB-load-misses " 17 | run_one ${1} 3 18 | PERF="-e cycles,instructions,LLC-load-misses " 19 | run_one ${1} 4 20 | } 21 | 22 | function run_suite() { 23 | cd ${1} 24 | 25 | BOLT="-reorder-blocks=cache+ -dyno-stats -use-gnu-stack" 26 | run bb-reorder 27 | 28 | BOLT="-reorder-blocks=cache+ -dyno-stats -use-gnu-stack -icf=1" 29 | run bb-reorder-icf 30 | 31 | BOLT="-reorder-blocks=cache+ -dyno-stats -use-gnu-stack -icf=1 -split-functions=3 -split-all-cold" 32 | run bb-reorder-icf-split 33 | 34 | BOLT="-reorder-blocks=cache+ -reorder-functions=hfsort+ -split-functions=3 -split-all-cold -dyno-stats -icf=1 -use-gnu-stack" 35 | run bb-func 36 | 37 | BOLT="-reorder-blocks=cache+ -reorder-functions=hfsort+ -split-functions=3 -split-all-cold -dyno-stats -icf=1 -use-gnu-stack -simplify-rodata-loads -frame-opt=hot -indirect-call-promotion=jump-tables -indirect-call-promotion-topn=3 -plt=all" 38 | run bb-all 39 | } 40 | 41 | run_suite clang 42 | 43 | -------------------------------------------------------------------------------- /runtime/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1.0) 2 | set(CMAKE_CXX_STANDARD 11) 3 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 4 | set(CMAKE_CXX_EXTENSIONS OFF) 5 | 6 | project(libbolt_rt_project) 7 | 8 | add_library(bolt_rt STATIC 9 | instr.cpp 10 | ) 11 | 12 | install(TARGETS bolt_rt DESTINATION lib) 13 | -------------------------------------------------------------------------------- /src/BinaryData.cpp: -------------------------------------------------------------------------------- 1 | //===--- BinaryData.cpp - Representation of section data objects ----------===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | // 10 | //===----------------------------------------------------------------------===// 11 | 12 | #include "BinaryData.h" 13 | #include "BinarySection.h" 14 | #include "llvm/Support/CommandLine.h" 15 | #include "llvm/Support/Regex.h" 16 | 17 | using namespace llvm; 18 | using namespace bolt; 19 | 20 | #undef DEBUG_TYPE 21 | #define DEBUG_TYPE "bolt" 22 | 23 | namespace opts { 24 | extern cl::OptionCategory BoltCategory; 25 | extern cl::opt Verbosity; 26 | 27 | cl::opt 28 | PrintSymbolAliases("print-aliases", 29 | cl::desc("print aliases when printing objects"), 30 | cl::Hidden, 31 | cl::ZeroOrMore, 32 | cl::cat(BoltCategory)); 33 | } 34 | 35 | bool BinaryData::isAbsolute() const { 36 | return Flags & SymbolRef::SF_Absolute; 37 | } 38 | 39 | bool BinaryData::isMoveable() const { 40 | return (!isAbsolute() && 41 | (IsMoveable && 42 | (!Parent || isTopLevelJumpTable()))); 43 | } 44 | 45 | void BinaryData::merge(const BinaryData *Other) { 46 | assert(!Size || !Other->Size || Size == Other->Size); 47 | assert(Address == Other->Address); 48 | assert(*Section == *Other->Section); 49 | assert(OutputOffset == Other->OutputOffset); 50 | assert(OutputSection == Other->OutputSection); 51 | Names.insert(Names.end(), Other->Names.begin(), Other->Names.end()); 52 | Symbols.insert(Symbols.end(), Other->Symbols.begin(), Other->Symbols.end()); 53 | MemData.insert(MemData.end(), Other->MemData.begin(), Other->MemData.end()); 54 | Flags |= Other->Flags; 55 | if (!Size) 56 | Size = Other->Size; 57 | } 58 | 59 | bool BinaryData::hasNameRegex(StringRef NameRegex) const { 60 | Regex MatchName(NameRegex); 61 | for (auto &Name : Names) 62 | if (MatchName.match(Name)) 63 | return true; 64 | return false; 65 | } 66 | 67 | StringRef BinaryData::getSectionName() const { 68 | return getSection().getName(); 69 | } 70 | 71 | StringRef BinaryData::getOutputSectionName() const { 72 | return getOutputSection().getName(); 73 | } 74 | 75 | uint64_t BinaryData::getOutputAddress() const { 76 | assert(OutputSection->getOutputAddress()); 77 | return OutputSection->getOutputAddress() + OutputOffset; 78 | } 79 | 80 | uint64_t BinaryData::getOffset() const { 81 | return Address - getSection().getAddress(); 82 | } 83 | 84 | void BinaryData::setSection(BinarySection &NewSection) { 85 | if (OutputSection == Section) 86 | OutputSection = &NewSection; 87 | Section = &NewSection; 88 | } 89 | 90 | bool BinaryData::isMoved() const { 91 | return (getOffset() != OutputOffset || OutputSection != Section); 92 | } 93 | 94 | void BinaryData::print(raw_ostream &OS) const { 95 | printBrief(OS); 96 | } 97 | 98 | void BinaryData::printBrief(raw_ostream &OS) const { 99 | OS << "("; 100 | 101 | if (isJumpTable()) 102 | OS << "jump-table: "; 103 | else 104 | OS << "object: "; 105 | 106 | OS << getName(); 107 | 108 | if ((opts::PrintSymbolAliases || opts::Verbosity > 1) && Names.size() > 1) { 109 | OS << ", aliases:"; 110 | for (unsigned I = 1u; I < Names.size(); ++I) { 111 | OS << (I == 1 ? " (" : ", ") << Names[I]; 112 | } 113 | OS << ")"; 114 | } 115 | 116 | if (Parent) { 117 | OS << " (parent: "; 118 | Parent->printBrief(OS); 119 | OS << ")"; 120 | } 121 | 122 | OS << ", 0x" << Twine::utohexstr(getAddress()) 123 | << ":0x" << Twine::utohexstr(getEndAddress()) 124 | << "/" << getSize() << "/" << getAlignment() 125 | << "/0x" << Twine::utohexstr(Flags); 126 | 127 | if (opts::Verbosity > 1) { 128 | for (auto &MI : memData()) { 129 | OS << ", " << MI; 130 | } 131 | } 132 | 133 | OS << ")"; 134 | } 135 | 136 | BinaryData::BinaryData(StringRef Name, 137 | uint64_t Address, 138 | uint64_t Size, 139 | uint16_t Alignment, 140 | BinarySection &Section, 141 | unsigned Flags) 142 | : Names({Name}), 143 | Section(&Section), 144 | Address(Address), 145 | Size(Size), 146 | Alignment(Alignment), 147 | Flags(Flags), 148 | OutputSection(&Section), 149 | OutputOffset(getOffset()) 150 | { } 151 | -------------------------------------------------------------------------------- /src/BinaryLoop.h: -------------------------------------------------------------------------------- 1 | //===--- BinaryLoop.h - Interface for machine-level loop ------------------===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | // 10 | // This file defines the BinaryLoop class, which represents a loop in the 11 | // CFG of a binary function, and the BinaryLoopInfo class, which stores 12 | // information about all the loops of a binary function. 13 | // 14 | //===----------------------------------------------------------------------===// 15 | 16 | #ifndef LLVM_TOOLS_LLVM_BOLT_BINARY_LOOP_H 17 | #define LLVM_TOOLS_LLVM_BOLT_BINARY_LOOP_H 18 | 19 | #include "llvm/ADT/DepthFirstIterator.h" 20 | #include "llvm/Analysis/LoopInfoImpl.h" 21 | #include "llvm/Support/GenericDomTreeConstruction.h" 22 | 23 | namespace llvm { 24 | namespace bolt { 25 | 26 | class BinaryBasicBlock; 27 | 28 | using BinaryDomTreeNode = DomTreeNodeBase; 29 | using BinaryDominatorTree = DomTreeBase; 30 | 31 | class BinaryLoop : public LoopBase { 32 | public: 33 | BinaryLoop() : LoopBase() { } 34 | 35 | // The total count of all the back edges of this loop. 36 | uint64_t TotalBackEdgeCount{0}; 37 | 38 | // The times the loop is entered from outside. 39 | uint64_t EntryCount{0}; 40 | 41 | // The times the loop is exited. 42 | uint64_t ExitCount{0}; 43 | 44 | // Most of the public interface is provided by LoopBase. 45 | 46 | protected: 47 | friend class LoopInfoBase; 48 | explicit BinaryLoop(BinaryBasicBlock *BB) : 49 | LoopBase(BB) { } 50 | }; 51 | 52 | class BinaryLoopInfo : public LoopInfoBase { 53 | public: 54 | BinaryLoopInfo() { } 55 | 56 | unsigned OuterLoops{0}; 57 | unsigned TotalLoops{0}; 58 | unsigned MaximumDepth{0}; 59 | 60 | // Most of the public interface is provided by LoopInfoBase. 61 | }; 62 | 63 | } // namespace bolt 64 | } // namespace llvm 65 | 66 | namespace llvm { 67 | 68 | // BinaryDominatorTree GraphTraits specializations. 69 | template <> struct GraphTraits 70 | : public DomTreeGraphTraitsBase {}; 72 | 73 | template <> struct GraphTraits 74 | : public DomTreeGraphTraitsBase {}; 76 | 77 | template <> struct GraphTraits 78 | : public GraphTraits { 79 | static NodeRef getEntryNode(bolt::BinaryDominatorTree *DT) { 80 | return DT->getRootNode(); 81 | } 82 | 83 | static nodes_iterator nodes_begin(bolt::BinaryDominatorTree *N) { 84 | return df_begin(getEntryNode(N)); 85 | } 86 | 87 | static nodes_iterator nodes_end(bolt::BinaryDominatorTree *N) { 88 | return df_end(getEntryNode(N)); 89 | } 90 | }; 91 | 92 | } // namescpae llvm 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /src/BinaryPassManager.h: -------------------------------------------------------------------------------- 1 | //===--- BinaryPassManager.h - Binary-level analysis/optimization passes --===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | // 10 | // A very simple binary-level analysis/optimization passes system. 11 | // 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef LLVM_TOOLS_LLVM_BOLT_BINARY_FUNCTION_PASS_MANAGER_H 15 | #define LLVM_TOOLS_LLVM_BOLT_BINARY_FUNCTION_PASS_MANAGER_H 16 | 17 | #include "BinaryFunction.h" 18 | #include "Passes/BinaryPasses.h" 19 | #include 20 | #include 21 | #include 22 | 23 | namespace llvm { 24 | namespace bolt { 25 | 26 | /// Simple class for managing analyses and optimizations on BinaryFunctions. 27 | class BinaryFunctionPassManager { 28 | private: 29 | BinaryContext &BC; 30 | std::vector>> Passes; 32 | 33 | public: 34 | static const char TimerGroupName[]; 35 | static const char TimerGroupDesc[]; 36 | 37 | BinaryFunctionPassManager(BinaryContext &BC) 38 | : BC(BC) {} 39 | 40 | /// Adds a pass to this manager based on the value of its corresponding 41 | /// command-line option. 42 | void registerPass(std::unique_ptr Pass, 43 | const bool Run) { 44 | Passes.emplace_back(Run, std::move(Pass)); 45 | } 46 | 47 | /// Adds an unconditionally run pass to this manager. 48 | void registerPass(std::unique_ptr Pass) { 49 | Passes.emplace_back(true, std::move(Pass)); 50 | } 51 | 52 | /// Run all registered passes in the order they were added. 53 | void runPasses(); 54 | 55 | /// Runs all enabled implemented passes on all functions. 56 | static void runAllPasses(BinaryContext &BC); 57 | }; 58 | 59 | } // namespace bolt 60 | } // namespace llvm 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /src/BoltAddressTranslation.h: -------------------------------------------------------------------------------- 1 | //===--- BoltAddressTranslation.h -----------------------------------------===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | // 10 | //===----------------------------------------------------------------------===// 11 | 12 | #ifndef LLVM_TOOLS_LLVM_BOLT_BOLTADDRESSTRANSLATION_H 13 | #define LLVM_TOOLS_LLVM_BOLT_BOLTADDRESSTRANSLATION_H 14 | 15 | #include "BinaryContext.h" 16 | #include "llvm/Object/ELFObjectFile.h" 17 | 18 | namespace llvm { 19 | 20 | namespace bolt { 21 | 22 | /// The map of output addresses to input ones to be used when translating 23 | /// samples collected in a binary that was already processed by BOLT. We do not 24 | /// support reoptimizing a binary already processed by BOLT, but we do support 25 | /// collecting samples in a binary processed by BOLT. We then translate samples 26 | /// back to addresses from the input (original) binary, one that can be 27 | /// optimized. The goal is to avoid special deployments of non-bolted binaries 28 | /// just for the purposes of data collection. 29 | /// 30 | /// The in-memory representation of the map is as follows. Each function has its 31 | /// own map. A function is identified by its output address. This is the key to 32 | /// retrieve a translation map. The translation map is a collection of ordered 33 | /// keys identifying the start of a region (relative to the function start) in 34 | /// the output address space (addresses in the binary processed by BOLT). 35 | /// 36 | /// A translation then happens when perf2bolt needs to convert sample addresses 37 | /// in the output address space back to input addresses, valid to run BOLT in 38 | /// the original input binary. To convert, perf2bolt first needs to fetch the 39 | /// translation map for a sample recorded in a given function. It then finds 40 | /// the largest key that is still smaller or equal than the recorded address. 41 | /// It then converts this address to use the value of this key. 42 | /// 43 | /// Example translation Map for function foo 44 | /// KEY VALUE BB? 45 | /// Output offset1 (first BB) Original input offset1 Y 46 | /// ... 47 | /// Output offsetN (last branch) Original input offsetN N 48 | /// 49 | /// The information on whether a given entry is a BB start or an instruction 50 | /// that changes control flow is encoded in the last (highest) bit of VALUE. 51 | /// 52 | /// Notes: 53 | /// Instructions that will never appear in LBR because they do not cause control 54 | /// flow change are omitted from this map. Basic block locations are recorded 55 | /// because they can be a target of a jump (To address in the LBR) and also to 56 | /// recreate the BB layout of this function. We use the BB layout map to 57 | /// recreate fall-through jumps in the profile, given an LBR trace. 58 | class BoltAddressTranslation { 59 | public: 60 | // In-memory representation of the address translation table 61 | using MapTy = std::map; 62 | 63 | /// Name of the ELF section where the table will be serialized to in the 64 | /// output binary 65 | static const char *SECTION_NAME; 66 | 67 | BoltAddressTranslation(BinaryContext &BC) : BC(BC) {} 68 | 69 | /// Write the serialized address translation tables for each reordered 70 | /// function 71 | void write(raw_ostream &OS); 72 | 73 | /// Read the serialized address translation tables and load them internally 74 | /// in memory. Return a parse error if failed. 75 | std::error_code parse(StringRef Buf); 76 | 77 | /// If the maps are loaded in memory, perform the lookup to translate LBR 78 | /// addresses in \p Func. 79 | uint64_t translate(const BinaryFunction &Func, uint64_t Offset, 80 | bool IsBranchSrc) const; 81 | 82 | /// Use the map keys containing basic block addresses to infer fall-throughs 83 | /// taken in the path started at FirstLBR.To and ending at SecondLBR.From. 84 | /// Return NoneType if trace is invalid or the list of fall-throughs 85 | /// otherwise. 86 | Optional, 16>> 87 | getFallthroughsInTrace(const BinaryFunction &Func, const LBREntry &FirstLBR, 88 | const LBREntry &SecondLBR) const; 89 | 90 | /// If available, fetch the address of the hot part linked to the cold part 91 | /// at \p Address. Return 0 otherwise. 92 | uint64_t fetchParentAddress(uint64_t Address) const; 93 | 94 | /// True if the input binary has a translation table we can use to convert 95 | /// addresses when aggregating profile 96 | bool enabledFor(llvm::object::ELFObjectFileBase *InputFile) const; 97 | 98 | private: 99 | /// Helper to update \p Map by inserting one or more BAT entries reflecting 100 | /// \p BB for function located at \p FuncAddress. At least one entry will be 101 | /// emitted for the start of the BB. More entries may be emitted to cover 102 | /// the location of calls or any instruction that may change control flow. 103 | void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB, 104 | uint64_t FuncAddress); 105 | 106 | BinaryContext &BC; 107 | 108 | std::map Maps; 109 | 110 | /// Links outlined cold bocks to their original function 111 | std::map ColdPartSource; 112 | 113 | /// Identifies the address of a control-flow changing instructions in a 114 | /// translation map entry 115 | const static uint32_t BRANCHENTRY = 0x80000000; 116 | }; 117 | } 118 | 119 | } 120 | 121 | #endif 122 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(merge-fdata) 2 | add_subdirectory(Passes) 3 | add_subdirectory(Target) 4 | 5 | # Get the current git revision for BOLT. 6 | function(get_version ofn) 7 | find_program(git_executable NAMES git git.exe git.cmd) 8 | if (git_executable) 9 | execute_process(COMMAND ${git_executable} rev-parse HEAD 10 | WORKING_DIRECTORY ${LLVM_MAIN_SRC_DIR} 11 | TIMEOUT 5 12 | RESULT_VARIABLE git_result 13 | OUTPUT_VARIABLE git_output) 14 | if( git_result EQUAL 0 ) 15 | string(STRIP "${git_output}" git_ref_id) 16 | set(BOLT_REVISION "${git_ref_id}") 17 | endif() 18 | endif() 19 | 20 | # If we can't find a revision, set it to "". 21 | if (NOT BOLT_REVISION) 22 | set(BOLT_REVISION "") 23 | endif() 24 | 25 | add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn} 26 | COMMAND echo '"${BOLT_REVISION}"' > ${CMAKE_CURRENT_BINARY_DIR}/${ofn} 27 | COMMENT "Generating bogus ${ofn}..." 28 | ) 29 | 30 | set(VERSION_OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn} PARENT_SCOPE) 31 | 32 | # `make clean' must remove all those generated files: 33 | set_property(DIRECTORY APPEND 34 | PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${ofn}) 35 | set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${ofn} PROPERTIES 36 | GENERATED 1) 37 | endfunction() 38 | 39 | # Creates a public target for generating the revision file. 40 | function(add_public_gen_version_target target) 41 | add_custom_target(${target} DEPENDS ${VERSION_OUTPUT}) 42 | set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} ${target} PARENT_SCOPE) 43 | endfunction() 44 | 45 | get_version(BoltRevision.inc) 46 | add_public_gen_version_target(GenBoltRevision) 47 | 48 | set(LLVM_LINK_COMPONENTS 49 | ${LLVM_TARGETS_TO_BUILD} 50 | BOLTPasses 51 | CodeGen 52 | Core 53 | DebugInfoDWARF 54 | MC 55 | MCDisassembler 56 | MCParser 57 | Object 58 | Orcjit 59 | Support 60 | ) 61 | 62 | string(FIND "${LLVM_TARGETS_TO_BUILD}" "AArch64" POSITION) 63 | if (NOT ${POSITION} EQUAL -1) 64 | list(APPEND LLVM_LINK_COMPONENTS BOLTTargetAArch64) 65 | set(BOLT_AArch64 On) 66 | endif() 67 | 68 | string(FIND "${LLVM_TARGETS_TO_BUILD}" "X86" POSITION) 69 | if (NOT ${POSITION} EQUAL -1) 70 | list(APPEND LLVM_LINK_COMPONENTS BOLTTargetX86) 71 | set(BOLT_X64 On) 72 | endif() 73 | 74 | add_llvm_tool(llvm-bolt 75 | llvm-bolt.cpp 76 | BinaryBasicBlock.cpp 77 | BinaryContext.cpp 78 | BinaryData.cpp 79 | BinaryFunction.cpp 80 | BinaryFunctionProfile.cpp 81 | BinaryPassManager.cpp 82 | BinarySection.cpp 83 | BoltAddressTranslation.cpp 84 | BoltDiff.cpp 85 | CacheMetrics.cpp 86 | DataAggregator.cpp 87 | DataReader.cpp 88 | DebugData.cpp 89 | DWARFRewriter.cpp 90 | DynoStats.cpp 91 | Exceptions.cpp 92 | ExecutableFileMemoryManager.cpp 93 | Heatmap.cpp 94 | JumpTable.cpp 95 | MCPlusBuilder.cpp 96 | ParallelUtilities.cpp 97 | ProfileReader.cpp 98 | ProfileWriter.cpp 99 | Relocation.cpp 100 | RewriteInstance.cpp 101 | 102 | DEPENDS 103 | intrinsics_gen 104 | bolt_rt 105 | ) 106 | 107 | if (DEFINED BOLT_AArch64) 108 | target_compile_definitions(llvm-bolt PRIVATE AARCH64_AVAILABLE) 109 | endif() 110 | 111 | if (DEFINED BOLT_X64) 112 | target_compile_definitions(llvm-bolt PRIVATE X86_AVAILABLE) 113 | endif() 114 | 115 | add_llvm_tool_symlink(perf2bolt llvm-bolt) 116 | add_llvm_tool_symlink(llvm-boltdiff llvm-bolt) 117 | add_llvm_tool_symlink(llvm-bolt-heatmap llvm-bolt) 118 | -------------------------------------------------------------------------------- /src/CacheMetrics.h: -------------------------------------------------------------------------------- 1 | //===- CacheMetrics.h - Interface for instruction cache evaluation --===// 2 | // 3 | // Functions to show metrics of cache lines 4 | // 5 | // 6 | //===----------------------------------------------------------------------===// 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #ifndef LLVM_TOOLS_LLVM_BOLT_CACHEMETRICS_H 11 | #define LLVM_TOOLS_LLVM_BOLT_CACHEMETRICS_H 12 | 13 | #include "BinaryFunction.h" 14 | #include 15 | 16 | namespace llvm { 17 | namespace bolt { 18 | namespace CacheMetrics { 19 | 20 | /// Calculate various metrics related to instruction cache performance. 21 | void printAll(const std::vector &BinaryFunctions); 22 | 23 | /// Calculate Extended-TSP metric, which quantifies the expected number of 24 | /// i-cache misses for a given pair of basic blocks. The parameters are: 25 | /// - SrcAddr is the address of the source block; 26 | /// - SrcSize is the size of the source block; 27 | /// - DstAddr is the address of the destination block; 28 | /// - Count is the number of jumps between the pair of blocks. 29 | double extTSPScore(uint64_t SrcAddr, 30 | uint64_t SrcSize, 31 | uint64_t DstAddr, 32 | uint64_t Count); 33 | 34 | } // namespace CacheMetrics 35 | } // namespace bolt 36 | } // namespace llvm 37 | 38 | #endif //LLVM_CACHEMETRICS_H 39 | -------------------------------------------------------------------------------- /src/DWARFRewriter.h: -------------------------------------------------------------------------------- 1 | //===--- DWARFRewriter.h --------------------------------------------------===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | // 10 | //===----------------------------------------------------------------------===// 11 | 12 | #ifndef LLVM_TOOLS_LLVM_BOLT_DWARF_REWRITER_H 13 | #define LLVM_TOOLS_LLVM_BOLT_DWARF_REWRITER_H 14 | 15 | #include "DebugData.h" 16 | #include "RewriteInstance.h" 17 | #include 18 | #include 19 | 20 | namespace llvm { 21 | 22 | namespace bolt { 23 | 24 | class BinaryFunction; 25 | 26 | class DWARFRewriter { 27 | DWARFRewriter() = delete; 28 | 29 | BinaryContext &BC; 30 | 31 | using SectionPatchersType = RewriteInstance::SectionPatchersType; 32 | 33 | SectionPatchersType &SectionPatchers; 34 | 35 | SimpleBinaryPatcher *DebugInfoPatcher{nullptr}; 36 | 37 | std::mutex DebugInfoPatcherMutex; 38 | 39 | DebugAbbrevPatcher *AbbrevPatcher{nullptr}; 40 | 41 | std::mutex AbbrevPatcherMutex; 42 | 43 | /// Stores and serializes information that will be put into the .debug_ranges 44 | /// and .debug_aranges DWARF sections. 45 | std::unique_ptr RangesSectionsWriter; 46 | 47 | std::unique_ptr LocationListWriter; 48 | 49 | /// Recursively update debug info for all DIEs in \p Unit. 50 | /// If \p Function is not empty, it points to a function corresponding 51 | /// to a parent DW_TAG_subprogram node of the current \p DIE. 52 | void updateUnitDebugInfo( 53 | const DWARFDie DIE, std::vector FunctionStack, 54 | const BinaryFunction *&CachedFunction, 55 | std::map &CachedRanges); 56 | 57 | /// Patches the binary for an object's address ranges to be updated. 58 | /// The object can be a anything that has associated address ranges via either 59 | /// DW_AT_low/high_pc or DW_AT_ranges (i.e. functions, lexical blocks, etc). 60 | /// \p DebugRangesOffset is the offset in .debug_ranges of the object's 61 | /// new address ranges in the output binary. 62 | /// \p Unit Compile unit the object belongs to. 63 | /// \p DIE is the object's DIE in the input binary. 64 | void updateDWARFObjectAddressRanges(const DWARFDie DIE, 65 | uint64_t DebugRangesOffset); 66 | 67 | /// Generate new contents for .debug_ranges and .debug_aranges section. 68 | void finalizeDebugSections(); 69 | 70 | /// Patches the binary for DWARF address ranges (e.g. in functions and lexical 71 | /// blocks) to be updated. 72 | void updateDebugAddressRanges(); 73 | 74 | /// Rewrite .gdb_index section if present. 75 | void updateGdbIndexSection(); 76 | 77 | /// Abbreviations that were converted to use DW_AT_ranges. 78 | std::set ConvertedRangesAbbrevs; 79 | 80 | /// DIEs with abbrevs that were not converted to DW_AT_ranges. 81 | /// We only update those when all DIEs have been processed to guarantee that 82 | /// the abbrev (which is shared) is intact. 83 | std::map>> PendingRanges; 85 | 86 | /// Convert \p Abbrev from using a simple DW_AT_(low|high)_pc range to 87 | /// DW_AT_ranges. 88 | void convertToRanges(const DWARFAbbreviationDeclaration *Abbrev); 89 | 90 | /// Update \p DIE that was using DW_AT_(low|high)_pc with DW_AT_ranges offset. 91 | void convertToRanges(DWARFDie DIE, uint64_t RangesSectionOffset); 92 | 93 | /// Same as above, but takes a vector of \p Ranges as a parameter. 94 | void convertToRanges(DWARFDie DIE, const DebugAddressRangesVector &Ranges); 95 | 96 | /// Patch DW_AT_(low|high)_pc values for the \p DIE based on \p Range. 97 | void patchLowHigh(DWARFDie DIE, DebugAddressRange Range); 98 | 99 | /// Convert pending ranges associated with the given \p Abbrev. 100 | void convertPending(const DWARFAbbreviationDeclaration *Abbrev); 101 | 102 | /// Once all DIEs were seen, update DW_AT_(low|high)_pc values. 103 | void flushPendingRanges(); 104 | 105 | public: 106 | DWARFRewriter(BinaryContext &BC, 107 | SectionPatchersType &SectionPatchers) 108 | : BC(BC), SectionPatchers(SectionPatchers) {} 109 | 110 | /// Main function for updating the DWARF debug info. 111 | void updateDebugInfo(); 112 | 113 | /// Computes output .debug_line line table offsets for each compile unit, 114 | /// and updates stmt_list for a corresponding compile unit. 115 | void updateLineTableOffsets(); 116 | 117 | /// Updates debug line information for non-simple functions, which are not 118 | /// rewritten. 119 | void updateDebugLineInfoForNonSimpleFunctions(); 120 | }; 121 | 122 | } // namespace bolt 123 | } // namespace llvm 124 | 125 | #endif 126 | -------------------------------------------------------------------------------- /src/Exceptions.h: -------------------------------------------------------------------------------- 1 | //===-- Exceptions.h - Helpers for processing C++ exceptions --------------===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | // 10 | //===----------------------------------------------------------------------===// 11 | 12 | #ifndef LLVM_TOOLS_LLVM_BOLT_EXCEPTIONS_H 13 | #define LLVM_TOOLS_LLVM_BOLT_EXCEPTIONS_H 14 | 15 | #include "BinaryContext.h" 16 | #include "llvm/ADT/ArrayRef.h" 17 | #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" 18 | #include "llvm/Support/Casting.h" 19 | #include 20 | 21 | namespace llvm { 22 | namespace bolt { 23 | 24 | class BinaryFunction; 25 | class RewriteInstance; 26 | 27 | /// \brief Wraps up information to read all CFI instructions and feed them to a 28 | /// BinaryFunction, as well as rewriting CFI sections. 29 | class CFIReaderWriter { 30 | public: 31 | explicit CFIReaderWriter(const DWARFDebugFrame &EHFrame); 32 | 33 | bool fillCFIInfoFor(BinaryFunction &Function) const; 34 | 35 | /// Generate .eh_frame_hdr from old and new .eh_frame sections. 36 | /// 37 | /// Take FDEs from the \p NewEHFrame unless their initial_pc is listed 38 | /// in \p FailedAddresses. All other entries are taken from the 39 | /// \p OldEHFrame. 40 | /// 41 | /// \p EHFrameHeaderAddress specifies location of .eh_frame_hdr, 42 | /// and is required for relative addressing used in the section. 43 | std::vector generateEHFrameHeader( 44 | const DWARFDebugFrame &OldEHFrame, 45 | const DWARFDebugFrame &NewEHFrame, 46 | uint64_t EHFrameHeaderAddress, 47 | std::vector &FailedAddresses) const; 48 | 49 | using FDEsMap = std::map; 50 | using fde_iterator = FDEsMap::const_iterator; 51 | 52 | /// Get all FDEs discovered by this reader. 53 | iterator_range fdes() const { 54 | return iterator_range(FDEs.begin(), FDEs.end()); 55 | } 56 | 57 | const FDEsMap &getFDEs() const { 58 | return FDEs; 59 | } 60 | 61 | private: 62 | FDEsMap FDEs; 63 | }; 64 | 65 | } // namespace bolt 66 | } // namespace llvm 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /src/ExecutableFileMemoryManager.cpp: -------------------------------------------------------------------------------- 1 | //===--- ExecutableFileMemoryManager.cpp ----------------------------------===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | // 10 | //===----------------------------------------------------------------------===// 11 | 12 | #include "ExecutableFileMemoryManager.h" 13 | #include "RewriteInstance.h" 14 | 15 | #undef DEBUG_TYPE 16 | #define DEBUG_TYPE "efmm" 17 | 18 | using namespace llvm; 19 | using namespace object; 20 | using namespace bolt; 21 | 22 | namespace llvm { 23 | 24 | namespace bolt { 25 | 26 | uint8_t *ExecutableFileMemoryManager::allocateSection(intptr_t Size, 27 | unsigned Alignment, 28 | unsigned SectionID, 29 | StringRef SectionName, 30 | bool IsCode, 31 | bool IsReadOnly) { 32 | // Register a debug section as a note section. 33 | if (!ObjectsLoaded && RewriteInstance::isDebugSection(SectionName)) { 34 | uint8_t *DataCopy = new uint8_t[Size]; 35 | auto &Section = BC.registerOrUpdateNoteSection(SectionName, 36 | DataCopy, 37 | Size, 38 | Alignment); 39 | Section.setSectionID(SectionID); 40 | assert(!Section.isAllocatable() && "note sections cannot be allocatable"); 41 | return DataCopy; 42 | } 43 | 44 | uint8_t *Ret; 45 | if (IsCode) { 46 | Ret = SectionMemoryManager::allocateCodeSection(Size, Alignment, 47 | SectionID, SectionName); 48 | } else { 49 | Ret = SectionMemoryManager::allocateDataSection(Size, Alignment, 50 | SectionID, SectionName, 51 | IsReadOnly); 52 | } 53 | 54 | const auto Flags = BinarySection::getFlags(IsReadOnly, IsCode, true); 55 | SmallVector Buf; 56 | if (ObjectsLoaded > 0) 57 | SectionName = (Twine(SectionName) + ".bolt.extra." + Twine(ObjectsLoaded)) 58 | .toStringRef(Buf); 59 | 60 | auto &Section = BC.registerOrUpdateSection(SectionName, 61 | ELF::SHT_PROGBITS, 62 | Flags, 63 | Ret, 64 | Size, 65 | Alignment); 66 | Section.setSectionID(SectionID); 67 | assert(Section.isAllocatable() && 68 | "verify that allocatable is marked as allocatable"); 69 | 70 | DEBUG(dbgs() << "BOLT: allocating " << (Section.isLocal() ? "local " : "") 71 | << (IsCode ? "code" : (IsReadOnly ? "read-only data" : "data")) 72 | << " section : " << SectionName 73 | << " with size " << Size << ", alignment " << Alignment 74 | << " at 0x" << Ret << ", ID = " << SectionID << "\n"); 75 | 76 | return Ret; 77 | } 78 | 79 | /// Notifier for non-allocatable (note) section. 80 | uint8_t *ExecutableFileMemoryManager::recordNoteSection( 81 | const uint8_t *Data, 82 | uintptr_t Size, 83 | unsigned Alignment, 84 | unsigned SectionID, 85 | StringRef SectionName) { 86 | DEBUG(dbgs() << "BOLT: note section " 87 | << SectionName 88 | << " with size " << Size << ", alignment " << Alignment 89 | << " at 0x" 90 | << Twine::utohexstr(reinterpret_cast(Data)) << '\n'); 91 | auto &Section = BC.registerOrUpdateNoteSection(SectionName, 92 | copyByteArray(Data, Size), 93 | Size, 94 | Alignment); 95 | Section.setSectionID(SectionID); 96 | assert(!Section.isAllocatable() && "note sections cannot be allocatable"); 97 | return Section.getOutputData(); 98 | } 99 | 100 | bool ExecutableFileMemoryManager::finalizeMemory(std::string *ErrMsg) { 101 | DEBUG(dbgs() << "BOLT: finalizeMemory()\n"); 102 | ++ObjectsLoaded; 103 | return SectionMemoryManager::finalizeMemory(ErrMsg); 104 | } 105 | 106 | ExecutableFileMemoryManager::~ExecutableFileMemoryManager() { } 107 | 108 | } 109 | 110 | } 111 | -------------------------------------------------------------------------------- /src/ExecutableFileMemoryManager.h: -------------------------------------------------------------------------------- 1 | //===--- ExecutableFileMemoryManager.h ------------------------------------===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | // 10 | //===----------------------------------------------------------------------===// 11 | 12 | #ifndef LLVM_TOOLS_LLVM_BOLT_EXECUTABLE_FILE_MEMORY_MANAGER_H 13 | #define LLVM_TOOLS_LLVM_BOLT_EXECUTABLE_FILE_MEMORY_MANAGER_H 14 | 15 | #include "BinaryContext.h" 16 | #include "llvm/ADT/StringRef.h" 17 | #include "llvm/ADT/Twine.h" 18 | #include "llvm/ExecutionEngine/SectionMemoryManager.h" 19 | #include "llvm/Support/raw_ostream.h" 20 | 21 | namespace llvm { 22 | 23 | namespace bolt { 24 | 25 | struct SegmentInfo { 26 | uint64_t Address; /// Address of the segment in memory. 27 | uint64_t Size; /// Size of the segment in memory. 28 | uint64_t FileOffset; /// Offset in the file. 29 | uint64_t FileSize; /// Size in file. 30 | 31 | void print(raw_ostream &OS) const { 32 | OS << "SegmentInfo { Address: 0x" 33 | << Twine::utohexstr(Address) << ", Size: 0x" 34 | << Twine::utohexstr(Size) << ", FileOffset: 0x" 35 | << Twine::utohexstr(FileOffset) << ", FileSize: 0x" 36 | << Twine::utohexstr(FileSize) << "}"; 37 | }; 38 | }; 39 | 40 | inline raw_ostream &operator<<(raw_ostream &OS, const SegmentInfo &SegInfo) { 41 | SegInfo.print(OS); 42 | return OS; 43 | } 44 | 45 | /// Class responsible for allocating and managing code and data sections. 46 | class ExecutableFileMemoryManager : public SectionMemoryManager { 47 | private: 48 | uint8_t *allocateSection(intptr_t Size, 49 | unsigned Alignment, 50 | unsigned SectionID, 51 | StringRef SectionName, 52 | bool IsCode, 53 | bool IsReadOnly); 54 | BinaryContext &BC; 55 | bool AllowStubs; 56 | 57 | public: 58 | // Our linker's main purpose is to handle a single object file, created 59 | // by RewriteInstance after reading the input binary and reordering it. 60 | // After objects finish loading, we increment this. Therefore, whenever 61 | // this is greater than zero, we are dealing with additional objects that 62 | // will not be managed by BinaryContext but only exist to support linking 63 | // user-supplied objects into the main input executable. 64 | uint32_t ObjectsLoaded{0}; 65 | 66 | /// [start memory address] -> [segment info] mapping. 67 | std::map SegmentMapInfo; 68 | 69 | ExecutableFileMemoryManager(BinaryContext &BC, bool AllowStubs) 70 | : BC(BC), AllowStubs(AllowStubs) {} 71 | 72 | ~ExecutableFileMemoryManager(); 73 | 74 | uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, 75 | unsigned SectionID, 76 | StringRef SectionName) override { 77 | return allocateSection(Size, Alignment, SectionID, SectionName, 78 | /*IsCode=*/true, true); 79 | } 80 | 81 | uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, 82 | unsigned SectionID, StringRef SectionName, 83 | bool IsReadOnly) override { 84 | return allocateSection(Size, Alignment, SectionID, SectionName, 85 | /*IsCode=*/false, IsReadOnly); 86 | } 87 | 88 | uint8_t *recordNoteSection(const uint8_t *Data, uintptr_t Size, 89 | unsigned Alignment, unsigned SectionID, 90 | StringRef SectionName) override; 91 | 92 | bool allowStubAllocation() const override { return AllowStubs; } 93 | 94 | bool finalizeMemory(std::string *ErrMsg = nullptr) override; 95 | }; 96 | 97 | } // namespace bolt 98 | } // namespace llvm 99 | 100 | #endif 101 | -------------------------------------------------------------------------------- /src/Heatmap.h: -------------------------------------------------------------------------------- 1 | //===-- Heatmap.cpp ---------------------------------------------*- C++ -*-===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | // 10 | //===----------------------------------------------------------------------===// 11 | 12 | #ifndef LLVM_TOOLS_LLVM_BOLT_HEATMAP_H 13 | #define LLVM_TOOLS_LLVM_BOLT_HEATMAP_H 14 | 15 | #include "llvm/Support/raw_ostream.h" 16 | #include 17 | 18 | namespace llvm { 19 | namespace bolt { 20 | 21 | class Heatmap { 22 | /// Number of bytes per entry in the heat map. 23 | size_t BucketSize; 24 | 25 | /// Maximum address that is considered to be valid. 26 | uint64_t MaxAddress; 27 | 28 | /// Count invalid ranges. 29 | uint64_t NumSkippedRanges{0}; 30 | 31 | /// Map buckets to the number of samples. 32 | std::map Map; 33 | 34 | public: 35 | explicit Heatmap(uint64_t BucketSize = 4096, 36 | uint64_t MaxAddress = std::numeric_limits::max()) 37 | : BucketSize(BucketSize), MaxAddress(MaxAddress) 38 | {}; 39 | 40 | inline bool ignoreAddress(uint64_t Address) const { 41 | return Address > MaxAddress; 42 | } 43 | 44 | /// Register a single sample at \p Address. 45 | void registerAddress(uint64_t Address) { 46 | if (!ignoreAddress(Address)) 47 | ++Map[Address / BucketSize]; 48 | } 49 | 50 | /// Register \p Count samples at [\p StartAddress, \p EndAddress ]. 51 | void registerAddressRange(uint64_t StartAddress, uint64_t EndAddress, 52 | uint64_t Count); 53 | 54 | /// Return the number of ranges that failed to register. 55 | uint64_t getNumInvalidRanges() const { 56 | return NumSkippedRanges; 57 | } 58 | 59 | void print(StringRef FileName) const; 60 | 61 | void print(raw_ostream &OS) const; 62 | 63 | size_t size() const { 64 | return Map.size(); 65 | } 66 | }; 67 | 68 | } // namespace bolt 69 | } // namespace llvm 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /src/JumpTable.h: -------------------------------------------------------------------------------- 1 | //===--- JumpTable.h - Representation of a jump table ---------------------===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | // 10 | //===----------------------------------------------------------------------===// 11 | 12 | #ifndef LLVM_TOOLS_LLVM_BOLT_JUMP_TABLE_H 13 | #define LLVM_TOOLS_LLVM_BOLT_JUMP_TABLE_H 14 | 15 | #include "BinaryData.h" 16 | #include "llvm/MC/MCSection.h" 17 | #include "llvm/MC/MCStreamer.h" 18 | #include "llvm/MC/MCSymbol.h" 19 | #include 20 | #include 21 | 22 | namespace llvm { 23 | namespace bolt { 24 | 25 | enum JumpTableSupportLevel : char { 26 | JTS_NONE = 0, /// Disable jump tables support. 27 | JTS_BASIC = 1, /// Enable basic jump tables support (in-place). 28 | JTS_MOVE = 2, /// Move jump tables to a separate section. 29 | JTS_SPLIT = 3, /// Enable hot/cold splitting of jump tables. 30 | JTS_AGGRESSIVE = 4, /// Aggressive splitting of jump tables. 31 | }; 32 | 33 | class BinaryFunction; 34 | 35 | /// Representation of a jump table. 36 | /// 37 | /// The jump table may include other jump tables that are referenced by 38 | /// a different label at a different offset in this jump table. 39 | class JumpTable : public BinaryData { 40 | friend class BinaryContext; 41 | 42 | JumpTable() = delete; 43 | JumpTable(const JumpTable &) = delete; 44 | JumpTable &operator=(const JumpTable &) = delete; 45 | 46 | public: 47 | enum JumpTableType : char { 48 | JTT_NORMAL, 49 | JTT_PIC, 50 | }; 51 | 52 | /// Branch statistics for jump table entries. 53 | struct JumpInfo { 54 | uint64_t Mispreds{0}; 55 | uint64_t Count{0}; 56 | }; 57 | 58 | /// Size of the entry used for storage. 59 | std::size_t EntrySize; 60 | 61 | /// Size of the entry size we will write (we may use a more compact layout) 62 | std::size_t OutputEntrySize; 63 | 64 | /// The type of this jump table. 65 | JumpTableType Type; 66 | 67 | /// All the entries as labels. 68 | std::vector Entries; 69 | 70 | /// All the entries as offsets into a function. Invalid after CFG is built. 71 | using OffsetsType = std::vector; 72 | OffsetsType OffsetEntries; 73 | 74 | /// Map ->