├── LICENSE ├── README.md ├── ch1 ├── FileCheckExamples │ ├── README.md │ ├── ex1 │ │ ├── README.md │ │ ├── check-file.txt │ │ ├── input.txt │ │ └── run.sh │ ├── ex2 │ │ ├── README.md │ │ ├── check-file.txt │ │ ├── input.txt │ │ └── run.sh │ ├── ex3 │ │ ├── README.md │ │ ├── check-file.txt │ │ ├── input.txt │ │ └── run.sh │ ├── ex4 │ │ ├── README.md │ │ ├── check-file.txt │ │ ├── input.txt │ │ └── run.sh │ └── ex5 │ │ ├── README.md │ │ ├── check-file.txt │ │ ├── input.txt │ │ └── run.sh ├── README.md ├── quiz │ └── ex8-9 │ │ ├── README.md │ │ ├── check-file.txt │ │ ├── input.txt │ │ └── run.sh └── setup_env.sh ├── ch10 ├── debug_capabilities │ ├── CMakeLists.txt │ ├── README.md │ ├── before_slp_vectorizer_hadd_vector.ll │ └── hadd_vector.ll ├── incorrect_optimization │ ├── CMakeLists.txt │ ├── README.md │ ├── bugged_input.ll │ ├── bugpoint-check.sh │ ├── check.sh │ ├── main.cpp │ └── populate_function.cpp ├── undefined_behavior │ ├── CMakeLists.txt │ ├── README.md │ ├── main.cpp │ └── populate_function.cpp └── use_after_free │ ├── CMakeLists.txt │ ├── README.md │ ├── main.cpp │ └── populate_function.cpp ├── ch11 ├── instr_info │ ├── CMakeLists.txt │ ├── MyTargetInstrInfo.cpp │ ├── MyTargetInstrInfo.h │ ├── MyTargetRegisterInfo.cpp │ ├── MyTargetRegisterInfo.h │ ├── README.md │ ├── instrinfo.td │ ├── main.cpp │ ├── mytarget.td │ ├── reginfo.td │ ├── solution-instrinfo.td │ └── yourTurn-instrinfo.td ├── mir_format │ ├── README.md │ ├── full-dump.mir │ ├── input.ll │ ├── shrunk-dump.mir │ └── simplified-dump.mir └── register_units │ ├── CMakeLists.txt │ ├── README.md │ ├── SolutionRegisterInfo.cpp │ ├── SolutionRegisterInfo.h │ ├── YourTurnRegisterInfo.cpp │ ├── YourTurnRegisterInfo.h │ ├── main.cpp │ ├── solution-reginfo.td │ └── yourTurn-reginfo.td ├── ch13 ├── CMakeLists.txt ├── README.md └── input.ll ├── ch3 ├── README.md ├── input.c ├── irreducible.c ├── llvm_ir │ ├── CMakeLists.txt │ ├── README.md │ ├── input.c │ ├── main.cpp │ ├── solution │ │ └── populate_function.cpp │ └── your_turn │ │ └── populate_function.cpp └── machineir │ ├── CMakeLists.txt │ ├── README.md │ ├── main.cpp │ ├── solution │ └── populate_function.cpp │ └── your_turn │ └── populate_function.cpp ├── ch4 ├── implicit_func_scope_change │ ├── CMakeLists.txt │ ├── README.md │ └── main.cpp └── simple_cst_propagation │ ├── CMakeLists.txt │ ├── README.md │ ├── input.c │ ├── main.cpp │ ├── solution │ └── populate_function.cpp │ └── your_turn │ └── populate_function.cpp ├── ch5 ├── your_first_pass │ ├── CMakeLists.txt │ ├── README.md │ ├── main.cpp │ ├── solution │ │ ├── passWithLegacyPM.cpp │ │ ├── passWithNewPM.cpp │ │ └── passWithNewPM.h │ └── your_turn │ │ ├── passWithLegacyPM.cpp │ │ ├── passWithNewPM.cpp │ │ └── passWithNewPM.h └── your_first_pipeline │ ├── CMakeLists.txt │ ├── README.md │ ├── main.cpp │ ├── solution │ ├── passPipelineWithLegacyPM.cpp │ └── passPipelineWithNewPM.cpp │ └── your_turn │ ├── passPipelineWithLegacyPM.cpp │ └── passPipelineWithNewPM.cpp ├── ch6 ├── CMakeLists.txt ├── README.md ├── multiclass-with-def-type.td ├── multiclass.td ├── my-first-gisel.td └── person.td ├── ch7 ├── CMakeLists.txt ├── README.md ├── access_struct_type.ll ├── anonymous_type.ll ├── array_type.ll ├── check_vec_int_ty.cpp ├── datalayout_alignment.ll ├── endianness.c ├── full_example.ll ├── hadd_vector.c ├── impact_of_abi.c └── named_type.ll ├── ch8 ├── CMakeLists.txt ├── README.md ├── argpromotion.ll ├── canonical_form.ll ├── dce.ll ├── deadargelim.ll ├── indvars.ll ├── inline.ll ├── lcssa.ll ├── licm.ll ├── load-store-vectorizer.ll ├── loop-reduce.c ├── loop-reduce.ll ├── loop-unroll.ll ├── loop-vectorize.c ├── loop-vectorize.ll ├── reassociate.ll ├── simplifycfg.ll ├── slp-vectorizer.ll ├── value_tracking.ll └── xor.ll └── cmake └── utils ├── llc-run.cmake ├── opt-run.cmake └── set-llvm-install-prefix.cmake /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

LLVM Code Generation, First Edition

2 | 3 |

A deep dive into compiler backend development

4 | 5 |

Quentin Colombet

6 | 7 |

This is the code repository for LLVM Code Generation, First Edition, published by Packt.

8 | 9 | 10 |

11 | 12 |       13 | Free PDF 14 |       15 | Graphic Bundle 16 |       17 | Amazon 18 |       19 |

20 |
21 |

About the book

22 | 23 | LLVM Code Generation, First Edition 24 | 25 | 26 | The LLVM infrastructure is a popular compiler ecosystem widely used in the tech industry and academia. This technology is crucial for both experienced and aspiring compiler developers looking to make an impact in the field. Written by Quentin Colombet, a veteran LLVM contributor and architect of the GlobalISel framework, this book provides a primer on the main aspects of LLVM, with an emphasis on its backend infrastructure; that is, everything needed to transform the intermediate representation (IR) produced by frontends like Clang into assembly code and object files. 27 | You’ll learn how to write an optimizing code generator for a toy backend in LLVM. The chapters will guide you step by step through building this backend while exploring key concepts, such as the ABI, cost model, and register allocation. You’ll also find out how to express these concepts using LLVM's existing infrastructure and how established backends address these challenges. Furthermore, the book features code snippets that demonstrate the actual APIs. 28 | By the end of this book, you’ll have gained a deeper understanding of LLVM. The concepts presented are expected to remain stable across different LLVM versions, making this book a reliable quick reference guide for understanding LLVM.
29 |
30 |

Key Learnings

31 | 48 | 49 |
50 | 51 |
52 |

Chapters

53 | LLVM Code Generation, First Edition 54 |
    55 | 56 |
  1. Building LLVM and Understanding the Directory Structure
  2. 57 | 58 |
  3. Contributing to LLVM
  4. 59 | 60 |
  5. Compiler Basics and How They Map to LLVM APIs
  6. 61 | 62 |
  7. Writing Your First Optimization
  8. 63 | 64 |
  9. Dealing with Pass Managers
  10. 65 | 66 |
  11. TableGen – LLVM Swiss Army Knife for Modeling
  12. 67 | 68 |
  13. Understanding LLVM IR
  14. 69 | 70 |
  15. Survey of the Existing Passes
  16. 71 | 72 |
  17. Introducing Target-Specific Constructs
  18. 73 | 74 |
  19. Hands-On Debugging LLVM IR Passes
  20. 75 | 76 |
  21. Getting Started with the Backend
  22. 77 | 78 |
  23. Getting Started with the Machine Code Layer
  24. 79 | 80 |
  25. The Machine Pass Pipeline
  26. 81 | 82 |
  27. Getting Started with Instruction Selection
  28. 83 | 84 |
  29. Instruction Selection: The IR Building Phase
  30. 85 | 86 |
  31. Instruction Selection: The Legalization Phase
  32. 87 | 88 |
  33. Instruction Selection: The Selection Phase and Beyond
  34. 89 | 90 |
  35. Instruction Scheduling
  36. 91 | 92 |
  37. Register Allocation
  38. 93 | 94 |
  39. Lowering of the Stack Layout
  40. 95 | 96 |
  41. Getting Started with the Assembler
  42. 97 | 98 |
99 | 100 |
101 | 102 | 103 |
104 |

Requirements for this book

105 | To follow the instructions in this book, you need LLVM 20 installed on your system, running on Windows, macOS, or Linux operating systems. 106 | 107 | Navigate in the different `chX` directory and look at the examples provided and do the exercises when applicable. 108 | Each directory has its own README.md with specific directions. 109 | 110 | Note: 111 | The exercises have been tested with the open source repository of LLVM at the Git hash 424c2d9b7e4d from February 13th 2025. Which is LLVM 20.1.1. 112 | 113 | Some of the exercises interact directly with the LLVM C++ API. This API has no stability guarantee therefore it is possible that newer or older version of LLVM will not work with these exercises. 114 | 115 | For the exercices that requires a version of LLVM handy, if you build your own make sure to use the `CMAKE_INSTALL_PREFIX` cmake variable to set the install path, then build the `install` target. 116 | 117 | Then, you will need to provide this path to CMake in the different exercise. 118 | 119 | Follow the READMEs in the different directories when you get there. 120 |
121 | 122 |
123 |

Get to know the author

124 | 125 | _Quentin Colombet_ is a veteran LLVM contributor specializing in compiler backends. He is the architect of the new instruction selection framework (GlobalISel) and code owner of the LLVM register allocators. With over two decades of experience, he has worked on compiler backends for a variety of architectures, including GPU, CPU, microcontrollers, DSP, and ASICs. Quentin joined Apple in 2012 and has contributed to x86, Aarch64, and Apple GPU backends. He is passionate about helping newcomers onboard the LLVM infrastructure, having mentored interns and new hires over the years. 126 |
127 | 128 |
129 |

Other Related Books

130 | 141 | 142 |
143 | 144 | 145 | ## Errata 146 | 147 | * Page 11: In the command `$ git clone https://github.com/llvm/llvm/project.git`, the URL should be `https://github.com/llvm/llvm-project.git`. Therefore, the first line becomes `$ git clone https://github.com/llvm/llvm-project.git`. 148 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/README.md: -------------------------------------------------------------------------------- 1 | This directory contains example of how FileCheck can be used. 2 | 3 | The examples are sorted in increasing order of complexity. 4 | - ex1: Uses only the simplest form of directives 5 | - ex2: Shows how to use more than one prefix 6 | - ex3: Introduces keywords 7 | - ex4: Shows how to add regex in the mix 8 | - ex5: Introduces variables 9 | 10 | Each example lives in its own directory. 11 | Each directory follows the same structure: 12 | - `README.md` describes what there is to see in this example 13 | - `run.sh` contains the command to run to demonstrate the specific example 14 | - `input.txt` contains the input of the example 15 | - `check-file.txt` contains the patterns that FileCheck will match in `run.sh` 16 | 17 | To run the example: 18 | - Make sure that FileCheck is in your `PATH` 19 | - Change directory to exN 20 | - Open `run.sh` to see what is being tested 21 | - Either: 22 | - run the commands manually by copy/pasting them, or 23 | - execute `bash run.sh` 24 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex1/README.md: -------------------------------------------------------------------------------- 1 | This example shows the most basic use case of FileCheck. 2 | It uses the default prefix and performs simple matches. 3 | 4 | Look at the check file `check-file.txt` for additional comments. 5 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex1/check-file.txt: -------------------------------------------------------------------------------- 1 | Notice how freeflow text doesn't bother FileCheck. 2 | Notice also that FileCheck doesn't care by default about the number of spaces 3 | CHECK: I feel 4 | CHECK: great 5 | CHECK: today 6 | CHECK: How about you? 7 | CHECK: I don 8 | CHECK: Meh 9 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex1/input.txt: -------------------------------------------------------------------------------- 1 | I feel 2 | 3 | great 4 | today 5 | 6 | How about you? 7 | This line doesn't matter 8 | as well as this one 9 | I don't know 10 | 11 | Meh 12 | 13 | The end 14 | 15 | or is it? 16 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | FileCheck --input-file input.txt check-file.txt 4 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex2/README.md: -------------------------------------------------------------------------------- 1 | This example shows how to use several prefixes with FileCheck. 2 | 3 | We use the `check-prefixes` command line option to match expression that start 4 | with a different keyword than CHECK. 5 | 6 | Using this option, you can use several prefixes but only use a subset of what 7 | your check file holds. 8 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex2/check-file.txt: -------------------------------------------------------------------------------- 1 | CHECK: match with the CHECK prefix 2 | SECOND: match with the SECOND prefix 3 | CHECK: Matching prefix can be interleaved 4 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex2/input.txt: -------------------------------------------------------------------------------- 1 | line that we want to match with the CHECK prefix 2 | line that we want to match with the SECOND prefix 3 | 4 | Matching prefix can be interleaved. 5 | FileCheck follows the all of them 6 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex2/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | FileCheck --input-file input.txt check-file.txt --check-prefixes CHECK,SECOND 4 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex3/README.md: -------------------------------------------------------------------------------- 1 | This example introduces the FileCheck keywords. 2 | The keywords are appended to the check prefix with -. 3 | 4 | In this example you can see a few keywords in action: 5 | - SAME: match on the same line 6 | - DAG: match order does not matter between different DAG directives 7 | - NEXT: match exactly on the next line 8 | - NOT: make sure this pattern is not match 9 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex3/check-file.txt: -------------------------------------------------------------------------------- 1 | CHECK: I 2 | CHECK-SAME: feel 3 | CHECK: great 4 | CHECK-NEXT: today 5 | CHECK: How about you? 6 | CHECK-DAG: Meh 7 | CHECK-DAG: I don 8 | 9 | CHECK-NOT: or is it 10 | CHECK: The end 11 | CHECK: or is it 12 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex3/input.txt: -------------------------------------------------------------------------------- 1 | I feel 2 | 3 | great 4 | today 5 | 6 | How about you? 7 | This line doesn't matter 8 | as well as this one 9 | I don't know 10 | 11 | Meh 12 | 13 | The end 14 | 15 | or is it? 16 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex3/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | FileCheck --input-file input.txt check-file.txt 4 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex4/README.md: -------------------------------------------------------------------------------- 1 | This example shows how to use regex in FileCheck. 2 | 3 | Regex are marked by the `{{regex}}` delimiters in the FileCheck commands. 4 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex4/check-file.txt: -------------------------------------------------------------------------------- 1 | CHECK: match with the {{[a-zA-Z]+}} regex 2 | CHECK: match with the {{.*}} regex 3 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex4/input.txt: -------------------------------------------------------------------------------- 1 | line that won't match with the regex 2 | line that we want to match with the regex 3 | line that we want to match with the first regex 4 | line that we want to match with the second regex 5 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex4/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | FileCheck --input-file input.txt check-file.txt 4 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex5/README.md: -------------------------------------------------------------------------------- 1 | This example shows how to use variables in FileCheck. 2 | 3 | Variables allow to capture patterns (regex) then reuse that pattern in a later 4 | FileCheck commands. 5 | Variables use the `[[variable]]` delimiters. 6 | They are defined with `[[variable:regex]]` and used via `[[variable]]`. 7 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex5/check-file.txt: -------------------------------------------------------------------------------- 1 | Match whatever is not a space for our mystery word 2 | CHECK: with the word [[OUR_LABEL:[^ ]*]] 3 | Match the beginning of a line using the regex ^ 4 | CHECK: {{^}}[[OUR_LABEL]]: 5 | CHECK-NEXT: Congratulation! 6 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex5/input.txt: -------------------------------------------------------------------------------- 1 | we want to match the lines that start with the word Fancy 2 | 3 | Ignore this line 4 | Ignore that line 5 | Match the next line use a variable that captures Fancy 6 | Fancy: 7 | Congratulation! 8 | -------------------------------------------------------------------------------- /ch1/FileCheckExamples/ex5/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | FileCheck --input-file input.txt check-file.txt 4 | -------------------------------------------------------------------------------- /ch1/README.md: -------------------------------------------------------------------------------- 1 | See the readme at FileCheckExamples/README.md to get started with this chapter's exercises. 2 | 3 | The script at `setup_env.sh` is a helper script that you can use to setup your 4 | environment to develop with LLVM. 5 | It will install required tools like CMake and so on. 6 | -------------------------------------------------------------------------------- /ch1/quiz/ex8-9/README.md: -------------------------------------------------------------------------------- 1 | Same explanation as what we did for the FileCheckExamples. 2 | Please see the README from that directory (ch1/FileCheckExamples). 3 | -------------------------------------------------------------------------------- /ch1/quiz/ex8-9/check-file.txt: -------------------------------------------------------------------------------- 1 | CHECK: This is an example of match 2 | SECOND-CHECK-NOT: Do not want to see that 3 | CHECK: with FileCheck 4 | CHECK-NEXT: This line needs to happen exactly after “With FileCheck” 5 | SECOND-CHECK-NOT: Neither this 6 | CHECK: Over 7 | -------------------------------------------------------------------------------- /ch1/quiz/ex8-9/input.txt: -------------------------------------------------------------------------------- 1 | This is an example of match 2 | # Do not want to see that 3 | with FileCheck 4 | This line needs to happen exactly after “With FileCheck” 5 | 6 | # Neither this 7 | Over 8 | -------------------------------------------------------------------------------- /ch1/quiz/ex8-9/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo ex8 4 | FileCheck --input-file input.txt check-file.txt 5 | 6 | echo ex9 7 | # Here we use two prefixes to enable the second set of check lines in check-file.txt. 8 | # Alternatively, we could have written a new check-file. 9 | 10 | # This will reject input.txt, since the faulty lines are here. 11 | echo expected fail 12 | FileCheck --input-file input.txt check-file.txt --check-prefixes='CHECK,SECOND-CHECK' 13 | 14 | echo expected pass 15 | grep -v '^#' input.txt | FileCheck check-file.txt --check-prefixes='CHECK,SECOND-CHECK' 16 | 17 | -------------------------------------------------------------------------------- /ch1/setup_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Note: the URLs are customized for a macOS installation. 4 | 5 | # Exit if any command fails. 6 | set -e 7 | 8 | if [ $# -ne 1 ]; then 9 | echo "Convenient script to install the required binaries" 10 | echo "to build and test LLVM on macOS" 11 | echo "" 12 | echo "Usage $0 'where_to_install_path'" 13 | exit -1 14 | fi 15 | 16 | install_prefix="$@" 17 | # Poor's man path massage to get an absolute path. 18 | first_char=`echo ${install_prefix} | cut -c1` 19 | if [ "${first_char}" != '/' ]; then 20 | install_prefix=${PWD}/${install_prefix} 21 | fi 22 | 23 | final_bin_dir="${install_prefix}/bin" 24 | mkdir -p ${final_bin_dir} 25 | 26 | # Couple of helper functions for easier-to-read code. 27 | needs_binary () { 28 | # Don't abort when returning 0. 29 | set +e 30 | bin_name=$1 31 | if [ -f "`which $1`" ]; then 32 | return 0 33 | fi 34 | return 1 35 | } 36 | 37 | print_need_install () { 38 | need_bin=$1 39 | bin_name=$2 40 | if [ ${need_bin} -eq 0 ]; then 41 | echo "Found '${bin_name}' skipping installation for this package" 42 | echo "> Consider removing '${bin_name}' from your path to force the install" 43 | echo "" 44 | fi 45 | } 46 | 47 | print_path_to_add () { 48 | echo "Tools installed by this script are available at:" 49 | echo "'${final_bin_dir}'" 50 | echo "" 51 | echo "Please consider adding this location to your path" 52 | echo "export PATH=${final_bin_dir}:\${PATH}" 53 | } 54 | 55 | # Names of the packages to download 56 | 57 | # Git is vented as part of xcode on macOS. 58 | # We'll get it through Xcode install. 59 | #git_pkg=${install_prefix}/git.tgz 60 | #git_url="" 61 | needs_binary 'git' 62 | need_git=$? 63 | print_need_install ${need_git} 'git' 64 | 65 | # CMake's info. 66 | cmake_pkg=${install_prefix}/cmake.tgz 67 | cmake_url="https://github.com/Kitware/CMake/releases/download/v3.28.0/cmake-3.28.0-macos-universal.tar.gz" 68 | needs_binary 'cmake' 69 | need_cmake=$? 70 | print_need_install ${need_cmake} 'cmake' 71 | 72 | # The LLVM foundation doesn't vent a binary for MacOS. 73 | # We have to resort on the Xcode installer for that. 74 | #llvm_pkg=${install_prefix}/llvm.tgz 75 | #llvm_url="" 76 | needs_binary 'clang' 77 | need_llvm=$? 78 | print_need_install ${need_llvm} 'clang' 79 | 80 | # Ninja's info. 81 | ninja_pkg=${install_prefix}/ninja.zip 82 | ninja_url="https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-mac.zip" 83 | needs_binary 'ninja' 84 | need_ninja=$? 85 | print_need_install ${need_ninja} 'ninja' 86 | 87 | # Python's info. 88 | python_pkg=${install_prefix}/python.pkg 89 | needs_binary 'python3' 90 | need_python=$? 91 | print_need_install ${need_python} 'python3' 92 | 93 | # Helper function to download the packages we need. 94 | download_artifact() { 95 | pkg_name=$1 96 | pkg_filename=$2 97 | url=$3 98 | 99 | if [ -f ${pkg_filename} ]; then 100 | echo "Found ${pkg_name} package at '${pkg_filename}'" 101 | echo "> Skipping download" 102 | echo "> Consider removing this file if the package is out-of-date" 103 | echo "" 104 | else 105 | echo "Downloading ${pkg_name}" 106 | curl --location ${url} --output ${pkg_filename} 107 | fi 108 | } 109 | 110 | # Download and install Ninja, if needed. 111 | if [ ${need_ninja} -eq 1 ]; then 112 | download_artifact "ninja" "${ninja_pkg}" "${ninja_url}" 113 | ninja_bin="${final_bin_dir}/ninja" 114 | if [ -f "${ninja_bin}" ]; then 115 | echo "Ninja already found at '${ninja_bin}'" 116 | echo "> Consider removing it if this is not the right version" 117 | echo "" 118 | else 119 | echo "Installing ninja" 120 | unzip ${ninja_pkg} -d ${final_bin_dir} 121 | fi 122 | fi 123 | 124 | # Download and install CMake, if needed. 125 | if [ ${need_cmake} -eq 1 ]; then 126 | download_artifact "cmake" "${cmake_pkg}" "${cmake_url}" 127 | cmake_bin="${final_bin_dir}/cmake" 128 | if [ -f "${cmake_bin}" ]; then 129 | echo "CMake already found at '${cmake_bin}'" 130 | echo "> Consider removing it if this is not the right version" 131 | echo "" 132 | else 133 | echo "Installing CMake" 134 | tar xzf ${cmake_pkg} -C ${install_prefix} 135 | path_to_cmake=`find ${install_prefix} -name cmake | grep '/bin/cmake'` 136 | ln -sf ${path_to_cmake} ${cmake_bin} 137 | fi 138 | fi 139 | 140 | # Exit if all the other packages are already available. 141 | if [ ${need_python} -eq 0 ] && [ ${need_git} -eq 0 ] && [ ${need_llvm} -eq 0 ]; then 142 | print_path_to_add 143 | exit 0 144 | fi 145 | 146 | echo "/!\ You have to set the install prefix yourself for the next installations" 147 | read -p "Continue Y/n: " user_input 148 | 149 | if [ "${user_input}" == "n" ] || [ "${user_input}" == "N" ]; 150 | then 151 | echo "Python and Xcode CLI tools not installed" 152 | exit 0 153 | fi 154 | 155 | if [ "${user_input}" != "y" ] && [ "${user_input}" != "Y" ] && [ "${user_input}" != "" ]; 156 | then 157 | echo "error: invalid response" 158 | exit -1 159 | fi 160 | 161 | # Install python if needed. 162 | if [ ${need_python} -eq 1 ]; then 163 | download_artifact "python" "${python_pkg}" "${python_url}" 164 | echo "Installing python... Please follow installer instructions." 165 | open ${python_pkg} 166 | fi 167 | 168 | read -p "Press any key to continue" -n1 169 | 170 | # On macOS the most official way to get git and clang is through xcode. 171 | # if clang and git are already available, assume that they are correct. 172 | if [ ${need_git} -eq 1 ] || [ ${need_llvm} -eq 1 ]; then 173 | echo "Installing Xcode CLI tools... Please follow installer instructions." 174 | xcode-select --install 175 | fi 176 | 177 | print_path_to_add 178 | exit 0 179 | -------------------------------------------------------------------------------- /ch10/debug_capabilities/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH8 6 | LANGUAGES CXX C) 7 | 8 | include(../../cmake/utils/set-llvm-install-prefix.cmake) 9 | include(../../cmake/utils/opt-run.cmake) 10 | 11 | set(CURR_TARGET 12 | hadd_vector_log 13 | ) 14 | 15 | set(OPT_RUN_DEPENDENCIES) 16 | # Normal run. 17 | add_run_opt(OPT_RUN_DEPENDENCIES "-O3" hadd_vector.ll) 18 | # Run with print of the IR after all. 19 | add_run_opt(OPT_RUN_DEPENDENCIES "-O3 -print-after-all" hadd_vector.ll print_after_all) 20 | # Run with print of only the part we want. 21 | add_run_opt(OPT_RUN_DEPENDENCIES "-O3 -print-module-scope -print-before=slp-vectorizer" hadd_vector.ll print_before_slp_vectorizer) 22 | # Run the SLP vectorizer with debug log enabled. 23 | # Note: this works only if the provided LLVM was built with asserts. 24 | add_run_opt(OPT_RUN_DEPENDENCIES "-passes=slp-vectorizer -debug-only=SLP" before_slp_vectorizer_hadd_vector.ll debug_slp_vectorizer) 25 | 26 | # Create an executable target that depends on the generated file 27 | add_custom_target(${CURR_TARGET} 28 | DEPENDS ${OPT_RUN_DEPENDENCIES} 29 | ) 30 | 31 | set_target_properties(${CURR_TARGET} PROPERTIES EXCLUDE_FROM_ALL 0) 32 | -------------------------------------------------------------------------------- /ch10/debug_capabilities/README.md: -------------------------------------------------------------------------------- 1 | # Enable LLVM debug capabilities # 2 | 3 | In this exercise, we run an input module, `hadd_vector.ll` in the `O3` optimization pipeline (with `opt`). 4 | 5 | The goal here is for you to find when the 4 adds in the input IR gets replaced in one horizontal add (llvm.vector.reduce.add.) 6 | 7 | To find this, you need to use `-print-after-all` to find which pass does the transformation you are looking for. 8 | 9 | Then use `-debug-only=found_pass` to see how it does the transformation. 10 | 11 | Note: the `-debugxxx` options works only on builds of LLVM that enabled the assertions. 12 | 13 | The steps below shows you how to produce the interesting command line invocations. 14 | 15 | ## Configure your build directory ## 16 | 17 | ```bash 18 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild . 19 | ``` 20 | 21 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option). 22 | 23 | You must have a version of LLVM installed at `` for this to succeed. 24 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package. 25 | 26 | Note: Again if you use a plain release the steps that uses `-debugxxx` will fail. 27 | 28 | ## Build ## 29 | 30 | ```bash 31 | ninja -C build 32 | ``` 33 | 34 | This builds the default target in the build directory. 35 | 36 | This should produce in the `build` directory (showing only the relevant files): 37 | * `hadd_vector.out.ll`: The output IR of the `opt -O3` pipeline 38 | * `print_after_all_hadd_vector.out.ll.txt`: The command line output of `print-after-all` for `O3`. 39 | * `print_before_slp_vectorizer_hadd_vector.out.ll`: The input IR before the SLP vectorizer (the full module not just the function.) 40 | * `debug_slp_vectorizer_before_slp_vectorizer_hadd_vector.out.ll.txt`: The debug ouput of the SLP vectorizer. 41 | 42 | ## Solution ## 43 | 44 | The optimization we want to identify here is the SLP vectorizer (as you may have guessed from the dump we saved :)). 45 | 46 | -------------------------------------------------------------------------------- /ch10/debug_capabilities/before_slp_vectorizer_hadd_vector.ll: -------------------------------------------------------------------------------- 1 | ; *** IR Dump Before SLPVectorizerPass on hadd *** 2 | ; ModuleID = '/Users/qcolombet/clones/open/How-to-build-an-LLVM-backend/ch10/debug_capabilities/hadd_vector.ll' 3 | source_filename = "hadd_vector.c" 4 | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 5 | target triple = "arm64-apple-macosx14.0.0" 6 | 7 | ; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(argmem: read) uwtable(sync) 8 | define i32 @hadd(ptr nocapture noundef readonly %arg) local_unnamed_addr #0 { 9 | bb: 10 | %i3 = load i32, ptr %arg, align 4 11 | %i5 = getelementptr inbounds i8, ptr %arg, i64 4 12 | %i6 = load i32, ptr %i5, align 4 13 | %i7 = add nsw i32 %i6, %i3 14 | %i9 = getelementptr inbounds i8, ptr %arg, i64 8 15 | %i10 = load i32, ptr %i9, align 4 16 | %i11 = add nsw i32 %i7, %i10 17 | %i13 = getelementptr inbounds i8, ptr %arg, i64 12 18 | %i14 = load i32, ptr %i13, align 4 19 | %i15 = add nsw i32 %i11, %i14 20 | ret i32 %i15 21 | } 22 | 23 | attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(argmem: read) uwtable(sync) "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "probe-stack"="__chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } 24 | 25 | !llvm.module.flags = !{!0, !1, !2, !3, !4} 26 | !llvm.ident = !{!5} 27 | 28 | !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 14, i32 4]} 29 | !1 = !{i32 1, !"wchar_size", i32 4} 30 | !2 = !{i32 8, !"PIC Level", i32 2} 31 | !3 = !{i32 7, !"uwtable", i32 1} 32 | !4 = !{i32 7, !"frame-pointer", i32 1} 33 | !5 = !{!"Apple clang version 15.0.0 (clang-1500.3.9.4)"} 34 | -------------------------------------------------------------------------------- /ch10/debug_capabilities/hadd_vector.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = '' 2 | source_filename = "hadd_vector.c" 3 | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 4 | target triple = "arm64-apple-macosx14.0.0" 5 | 6 | %struct.Vec4 = type { i32, i32, i32, i32 } 7 | 8 | ; Function Attrs: noinline nounwind ssp uwtable(sync) 9 | define i32 @hadd(ptr noundef %arg) #0 { 10 | bb: 11 | %i = alloca ptr, align 8 12 | store ptr %arg, ptr %i, align 8 13 | %i1 = load ptr, ptr %i, align 8 14 | %i2 = getelementptr inbounds %struct.Vec4, ptr %i1, i32 0, i32 0 15 | %i3 = load i32, ptr %i2, align 4 16 | %i4 = load ptr, ptr %i, align 8 17 | %i5 = getelementptr inbounds %struct.Vec4, ptr %i4, i32 0, i32 1 18 | %i6 = load i32, ptr %i5, align 4 19 | %i7 = add nsw i32 %i3, %i6 20 | %i8 = load ptr, ptr %i, align 8 21 | %i9 = getelementptr inbounds %struct.Vec4, ptr %i8, i32 0, i32 2 22 | %i10 = load i32, ptr %i9, align 4 23 | %i11 = add nsw i32 %i7, %i10 24 | %i12 = load ptr, ptr %i, align 8 25 | %i13 = getelementptr inbounds %struct.Vec4, ptr %i12, i32 0, i32 3 26 | %i14 = load i32, ptr %i13, align 4 27 | %i15 = add nsw i32 %i11, %i14 28 | ret i32 %i15 29 | } 30 | 31 | attributes #0 = { noinline nounwind ssp uwtable(sync) "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "probe-stack"="__chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } 32 | 33 | !llvm.module.flags = !{!0, !1, !2, !3, !4} 34 | !llvm.ident = !{!5} 35 | 36 | !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 14, i32 4]} 37 | !1 = !{i32 1, !"wchar_size", i32 4} 38 | !2 = !{i32 8, !"PIC Level", i32 2} 39 | !3 = !{i32 7, !"uwtable", i32 1} 40 | !4 = !{i32 7, !"frame-pointer", i32 1} 41 | !5 = !{!"Apple clang version 15.0.0 (clang-1500.3.9.4)"} 42 | -------------------------------------------------------------------------------- /ch10/incorrect_optimization/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH4 6 | LANGUAGES CXX C) 7 | 8 | include(../../cmake/utils/set-llvm-install-prefix.cmake) 9 | 10 | set(CURR_TARGET 11 | buggy_cst_propagation 12 | ) 13 | 14 | set(SRC 15 | "main.cpp" 16 | "populate_function.cpp" 17 | ) 18 | 19 | set(INC 20 | ) 21 | 22 | set(CMAKE_CXX_FLAGS -fno-rtti) 23 | 24 | # Now build our tools 25 | add_executable(${CURR_TARGET} ${SRC} ${INC}) 26 | 27 | # Find the libraries that correspond to the LLVM components 28 | # that we wish to use 29 | llvm_map_components_to_libnames(llvm_libs support core transformutils) 30 | 31 | # Link against LLVM libraries 32 | target_link_libraries(${CURR_TARGET} ${llvm_libs}) 33 | -------------------------------------------------------------------------------- /ch10/incorrect_optimization/README.md: -------------------------------------------------------------------------------- 1 | # Use LLVM tools to reduce isses # 2 | 3 | In this exercise you need to leverage `llvm-reduce` or `bugpoint` to produce the smallest possible test case for the broken constant propagation implemented in `populate_function.cpp`. 4 | 5 | Build the code using the steps below. 6 | 7 | Then use `llvm-reduce` or `bugpoint` on `bugged_input.ll` to try to reduce the input IR. 8 | The goal here is for you to figure out the `llvm-reduce` (or `bugpoint`) command line and supply the right script to drive the tool where you need. 9 | 10 | When you have the reduced IR, try to fix the issue. 11 | 12 | Hint, when reducing the issue, make sure to filter out the case where the input module is empty. 13 | The generated executable will convert towards this otherwise. 14 | 15 | Hint 2, another similar instruction does the right thing in this implementation. 16 | 17 | Hint 3, the correct implementation is available in the ch4 directory. 18 | 19 | ## Configure your build directory ## 20 | 21 | ```bash 22 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild . 23 | ``` 24 | 25 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option). 26 | 27 | You must have a version of LLVM installed at `` for this to succeed. 28 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package. 29 | 30 | ## Build ## 31 | 32 | ```bash 33 | ninja -C build 34 | ``` 35 | 36 | This builds the default target in the build directory. 37 | 38 | This should produce in the `build` directory a binary named `buggy_cst_propagation`. 39 | 40 | ## Run ## 41 | 42 | ```bash 43 | ./build/buggy_cst_propagation [input.ll|.bc] 44 | ``` 45 | 46 | This will run buggy implementation on `input.ll`. 47 | Without any input, it runs on a pre-defined IR hardcoded in the main function. 48 | The code should work on the pre-defined IR. 49 | 50 | Next, running the following command should crash the program: 51 | ```bash 52 | ./build/buggy_cst_propagation bugged_input.ll 53 | ``` 54 | 55 | Now, your job is to reduce the IR in `bugged_input.ll` to find the minimal set of instructions that exposes the issue. 56 | The goal of this exercise is to have you play with `llvm-reduce` or `bugpoint`, which are utilities provided by the LLVM infrastructure. 57 | 58 | ## Solution ## 59 | 60 | ### With `llvm-reduce` ### 61 | 62 | `llvm-reduce` assumes it found what you are looking for when the given test script returns success (status == 0) 63 | 64 | We use that in `check.sh`. 65 | 66 | To reproduce the problem with `llvm-reduce`, you should run the following command line: 67 | ```bash 68 | llvm-reduce --test=check.sh bugged_input.ll 69 | ``` 70 | 71 | This command runs `llvm-reduce` while using `chech.sh` as the compilation and testing step. 72 | 73 | Check the content of this file in this directory. 74 | 75 | The resulting IR should resemble: 76 | ``` 77 | define i32 @bar() { 78 | bb: 79 | %i3 = sdiv i32 0, 0 80 | ret i32 0 81 | } 82 | ``` 83 | 84 | ### With `bugpoint` ### 85 | 86 | `bugpoint` is the opposite of `llvm-reduce` and assumes it found what you are looking for when the test script returns failure (status != 0). 87 | 88 | We use that in `bugpoint-check.sh`. 89 | 90 | To reproduce the problem with `bugpoint`, you should run the following command line: 91 | ```bash 92 | bugpoint --compile-command=./bugpoint-check.sh --run-llc --compile-custom bugged_input.ll 93 | ``` 94 | 95 | This command tells `bugpoint` that the compile command and status is reported by `bugpoint-check.sh`. 96 | More specifically the combination of `--compile-custom` and `--compile-command` says that we use a custom compile command and that command is given by the related option. 97 | 98 | Then, we use `llc` (`--run-llc`) as the runner, meaning that `bugpoint` should not try to interpret or run the IR with the "safe" version. 99 | 100 | The resulting IR should resemble: 101 | ``` 102 | define void @bar() { 103 | bb: 104 | %i3 = sdiv i32 3, 0 105 | unreachable 106 | } 107 | ``` 108 | 109 | Remember to use `opt -S` on the bitcode (`.bc`) to see the textual IR. 110 | -------------------------------------------------------------------------------- /ch10/incorrect_optimization/bugged_input.ll: -------------------------------------------------------------------------------- 1 | define i32 @foo(i32 noundef %arg) { 2 | bb: 3 | %i = ashr i32 6, 3 4 | %i1 = icmp ne i32 %arg, 0 5 | br i1 %i1, label %bb2, label %bb4 6 | 7 | bb2: 8 | %i3 = udiv i32 3, %i 9 | br label %bb6 10 | 11 | bb4: 12 | %i5 = or i32 %i, 3855 13 | br label %bb6 14 | 15 | bb6: 16 | %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ] 17 | %i7 = add i32 %.0, 1 18 | ret i32 %i7 19 | } 20 | 21 | define i32 @bar(i32 noundef %arg) { 22 | bb: 23 | %i = ashr i32 6, 3 24 | %i1 = icmp ne i32 %arg, 0 25 | br i1 %i1, label %bb2, label %bb4 26 | 27 | bb2: 28 | %i3 = sdiv i32 3, %i 29 | br label %bb6 30 | 31 | bb4: 32 | %i5 = or i32 %i, 3855 33 | br label %bb6 34 | 35 | bb6: 36 | %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ] 37 | %i7 = add i32 %.0, 1 38 | ret i32 %i7 39 | } 40 | -------------------------------------------------------------------------------- /ch10/incorrect_optimization/bugpoint-check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./build/buggy_cst_propagation $@ 3 | 4 | status=$? 5 | # We filter out $status == 1 because this is our error code 6 | # when the input file is empty, which is not what we are trying 7 | # bugpoint to converge to. 8 | if [ $status -ne 0 ] && [ $status -ne 1 ]; then 9 | # bugpoint expects an error code when something interesting happened. 10 | exit -1 11 | fi 12 | 13 | exit 0 14 | -------------------------------------------------------------------------------- /ch10/incorrect_optimization/check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./build/buggy_cst_propagation $@ 3 | 4 | status=$? 5 | # We filter out $status == 1 because this is our error code 6 | # when the input file is empty, which is not what we are trying 7 | # llvm-reduce to converge to. 8 | if [ $status -ne 0 ] && [ $status -ne 1 ]; then 9 | # llvm-reduce expects 0 when something interesting happens. 10 | exit 0 11 | fi 12 | 13 | exit -1 14 | -------------------------------------------------------------------------------- /ch10/incorrect_optimization/main.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/AsmParser/Parser.h" // For parseAssemblyString. 2 | #include "llvm/IR/Function.h" 3 | #include "llvm/IR/LLVMContext.h" 4 | #include "llvm/IR/Module.h" 5 | #include "llvm/IR/Verifier.h" 6 | #include "llvm/IRReader/IRReader.h" // For parseIRFile. 7 | #include "llvm/Support/Debug.h" // For errs(). 8 | #include "llvm/Support/SourceMgr.h" // For SMDiagnostic. 9 | 10 | using namespace llvm; 11 | 12 | extern bool buggyConstantPropagation(llvm::Function &); 13 | 14 | bool checkFunctionCorrectness(llvm::Function &Res) { 15 | Res.print(errs()); 16 | // verifyFunction returns true if it finds errors and 17 | // print them on the provided output stream (errs() here). 18 | if (verifyFunction(Res, &errs())) { 19 | errs() << Res.getName() << " does not verify\n"; 20 | return false; 21 | } 22 | return true; 23 | } 24 | 25 | // Default input in case no file was provided. 26 | const char *InputIR = 27 | "define i32 @foo(i32 noundef %arg) {\n" 28 | "bb:\n" 29 | " %i = shl i32 5, 3\n" 30 | " %i1 = icmp ne i32 %arg, 0\n" 31 | " br i1 %i1, label %bb2, label %bb4\n" 32 | "\n" 33 | "bb2:\n" 34 | " %i3 = sdiv i32 %i, 5\n" 35 | " br label %bb6\n" 36 | "\n" 37 | "bb4:\n" 38 | " %i5 = or i32 %i, 3855\n" 39 | " br label %bb6\n" 40 | "\n" 41 | "bb6:\n" 42 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n" 43 | " ret i32 %.0\n" 44 | "}\n" 45 | "\n" 46 | "define i32 @bar(i32 noundef %arg) {\n" 47 | "bb:\n" 48 | " %i = shl i32 -1, 3\n" 49 | " %i1 = icmp ne i32 %arg, 0\n" 50 | " br i1 %i1, label %bb2, label %bb4\n" 51 | "\n" 52 | "bb2:\n" 53 | " %i3 = udiv i32 %i, 3\n" 54 | " br label %bb6\n" 55 | "\n" 56 | "bb4:\n" 57 | " %i5 = or i32 %i, 3855\n" 58 | " br label %bb6\n" 59 | "\n" 60 | "bb6:\n" 61 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n" 62 | " %i7 = add i32 %.0, 1\n" 63 | " ret i32 %i7\n" 64 | "}\n"; 65 | 66 | int main(int argc, char **argv) { 67 | LLVMContext Context; 68 | SMDiagnostic Err; 69 | std::unique_ptr MyModule; 70 | // To be able to play with the optimization a little bit, 71 | // support a mode where you can feed your own IR files. 72 | if (argc == 2) { 73 | outs() << "Reading module from '" << argv[1] << "'\n"; 74 | MyModule = parseIRFile(argv[1], Err, Context); 75 | } else { 76 | MyModule = parseAssemblyString(InputIR, Err, Context); 77 | } 78 | if (!MyModule) { 79 | errs() << "Unable to build module\n"; 80 | return -1; 81 | } 82 | 83 | bool hadError = false; 84 | for (Function &Func : *MyModule) { 85 | outs() << "Processing function '" << Func.getName() << "'\n"; 86 | Func.print(outs()); 87 | 88 | // Clone the function before the optimization to make sure each 89 | // implementation sees the same input. 90 | 91 | outs() << "\n\n## Reference implementation\n"; 92 | bool solutionDidSomething = buggyConstantPropagation(Func); 93 | bool solutionIsCorrect = checkFunctionCorrectness(Func); 94 | 95 | if (!solutionIsCorrect) { 96 | hadError = true; 97 | errs() << "Solution does not verify:\n" 98 | "- provided implementation(" 99 | << (solutionIsCorrect ? "passed" : "failed") 100 | << ")\n"; 101 | } 102 | 103 | outs() << "\n######\n"; 104 | } 105 | 106 | return !hadError; 107 | } 108 | -------------------------------------------------------------------------------- /ch10/incorrect_optimization/populate_function.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/ADT/APInt.h" 2 | #include "llvm/ADT/PostOrderIterator.h" // For ReversePostOrderTraversal. 3 | #include "llvm/IR/BasicBlock.h" 4 | #include "llvm/IR/CFG.h" // To instantiate RPOTraversal. 5 | #include "llvm/IR/Constants.h" // For ConstantInt. 6 | #include "llvm/IR/Function.h" 7 | #include "llvm/IR/InstrTypes.h" // For BinaryOperator, etc. 8 | #include "llvm/IR/Instruction.h" 9 | #include "llvm/IR/LLVMContext.h" 10 | #include "llvm/IR/Module.h" 11 | #include "llvm/Support/Debug.h" // For errs(). 12 | 13 | #include 14 | 15 | using namespace llvm; 16 | 17 | // Helper function to deal with binary instructions. 18 | static Value *visitBinary(Instruction &Instr, LLVMContext &Ctxt, 19 | std::optional (*Computation)(const APInt &, 20 | const APInt &)) { 21 | assert(isa(Instr) && "This is meant for binary instruction"); 22 | 23 | auto *LHS = dyn_cast(Instr.getOperand(0)); 24 | auto *RHS = dyn_cast(Instr.getOperand(1)); 25 | if (!LHS || !RHS) 26 | return nullptr; 27 | 28 | // FIXME: Technically this API is not precise enough because we may want to 29 | // produce poison value for e.g., a division by zero. 30 | std::optional Res = Computation(LHS->getValue(), RHS->getValue()); 31 | if (!Res.has_value()) 32 | return nullptr; 33 | auto NewConstant = ConstantInt::get(Ctxt, *Res); 34 | return NewConstant; 35 | } 36 | 37 | // Takes \p Foo and apply a simple constant propagation optimization. 38 | // \returns true if \p Foo was modified (i.e., something had been constant 39 | // propagated), false otherwise. 40 | bool buggyConstantPropagation(Function &Foo) { 41 | 42 | if (Foo.empty()) 43 | return false; 44 | 45 | LLVMContext &Ctxt = Foo.getParent()->getContext(); 46 | bool MadeChanges = false; 47 | 48 | ReversePostOrderTraversal RPOT(&Foo); 49 | for (BasicBlock *BB : RPOT) { 50 | // Early increment to be able to remove the instruction that we replaced 51 | // on-the-fly. The alternative is to accumulate the instructions to remove 52 | // in a worklist and delete them afterwards. 53 | for (Instruction &Instr : make_early_inc_range(*BB)) { 54 | Value *NewConstant = nullptr; 55 | switch (Instr.getOpcode()) { 56 | case Instruction::Add: 57 | NewConstant = visitBinary( 58 | Instr, Ctxt, 59 | [](const APInt &A, const APInt &B) -> std::optional { 60 | return A + B; 61 | }); 62 | break; 63 | case Instruction::Sub: 64 | NewConstant = visitBinary( 65 | Instr, Ctxt, 66 | [](const APInt &A, const APInt &B) -> std::optional { 67 | return A - B; 68 | }); 69 | break; 70 | case Instruction::Mul: 71 | NewConstant = visitBinary( 72 | Instr, Ctxt, 73 | [](const APInt &A, const APInt &B) -> std::optional { 74 | return A * B; 75 | }); 76 | break; 77 | case Instruction::SDiv: 78 | NewConstant = visitBinary( 79 | Instr, Ctxt, 80 | [](const APInt &A, const APInt &B) -> std::optional { 81 | return A.sdiv(B); 82 | }); 83 | break; 84 | case Instruction::UDiv: 85 | NewConstant = visitBinary( 86 | Instr, Ctxt, 87 | [](const APInt &A, const APInt &B) -> std::optional { 88 | if (B.isZero()) 89 | return std::nullopt; 90 | return A.udiv(B); 91 | }); 92 | break; 93 | case Instruction::Shl: 94 | NewConstant = visitBinary( 95 | Instr, Ctxt, 96 | [](const APInt &A, const APInt &B) -> std::optional { 97 | return A.shl(B); 98 | }); 99 | break; 100 | case Instruction::LShr: 101 | NewConstant = visitBinary( 102 | Instr, Ctxt, 103 | [](const APInt &A, const APInt &B) -> std::optional { 104 | return A.lshr(B); 105 | }); 106 | break; 107 | case Instruction::AShr: 108 | NewConstant = visitBinary( 109 | Instr, Ctxt, 110 | [](const APInt &A, const APInt &B) -> std::optional { 111 | return A.ashr(B); 112 | }); 113 | break; 114 | case Instruction::And: 115 | NewConstant = visitBinary( 116 | Instr, Ctxt, 117 | [](const APInt &A, const APInt &B) -> std::optional { 118 | return A & B; 119 | }); 120 | break; 121 | case Instruction::Or: 122 | NewConstant = visitBinary( 123 | Instr, Ctxt, 124 | [](const APInt &A, const APInt &B) -> std::optional { 125 | return A | B; 126 | }); 127 | break; 128 | case Instruction::Xor: 129 | NewConstant = visitBinary( 130 | Instr, Ctxt, 131 | [](const APInt &A, const APInt &B) -> std::optional { 132 | return A ^ B; 133 | }); 134 | break; 135 | 136 | default: 137 | break; 138 | } 139 | if (NewConstant) { 140 | Instr.replaceAllUsesWith(NewConstant); 141 | Instr.eraseFromParent(); 142 | MadeChanges = true; 143 | } 144 | } 145 | } 146 | return MadeChanges; 147 | } 148 | -------------------------------------------------------------------------------- /ch10/undefined_behavior/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH10 6 | LANGUAGES CXX C) 7 | 8 | include(../../cmake/utils/set-llvm-install-prefix.cmake) 9 | 10 | set(CURR_TARGET 11 | fct_with_ub 12 | ) 13 | 14 | set(SRC 15 | "main.cpp" 16 | "populate_function.cpp" 17 | ) 18 | 19 | set(INC 20 | ) 21 | 22 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") 23 | 24 | # Now build our tools 25 | add_executable(${CURR_TARGET} ${SRC} ${INC}) 26 | 27 | # Find the libraries that correspond to the LLVM components 28 | # that we wish to use 29 | llvm_map_components_to_libnames(llvm_libs support core) 30 | 31 | # Link against LLVM libraries 32 | target_link_libraries(${CURR_TARGET} ${llvm_libs}) 33 | -------------------------------------------------------------------------------- /ch10/undefined_behavior/README.md: -------------------------------------------------------------------------------- 1 | # Easily find undefined behavior # 2 | 3 | In this exercise you need to leverage the undefined sanitizer to find what is wrong with the `fctWithUB` function implemented in `populate_function.cpp`. 4 | 5 | Build the code using the steps below. 6 | 7 | Run it, play with different values. 8 | 9 | Everything is fine, right? 10 | 11 | No, this program relies on undefined behavior. 12 | 13 | Rebuild with undefined sanitizer enable and find them. 14 | 15 | To fix the UB, please implement the specifications put as comments before the implementation of `fctWithUB`. 16 | 17 | ## Configure your build directory ## 18 | 19 | ```bash 20 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild . 21 | ``` 22 | 23 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option). 24 | 25 | You must have a version of LLVM installed at `` for this to succeed. 26 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package. 27 | 28 | ## Build ## 29 | 30 | ```bash 31 | ninja -C build 32 | ``` 33 | 34 | This builds the default target in the build directory. 35 | 36 | This should produce in the `build` directory a binary named `use_after_free`. 37 | 38 | ## Run ## 39 | 40 | ```bash 41 | ./build/fct_w_ub -- someNumber 42 | ``` 43 | 44 | This will run the function that relies on UB on `someNumber`. 45 | 46 | By default `someNumber == 12`. 47 | 48 | 49 | ## Solution ## 50 | 51 | Configure your build with undefined sanitizer enabled and build: 52 | ```bash 53 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild_w_ubsan . -DCMAKE_CXX_FLAGS=-fsanitize=undefined 54 | ninja -C build_w_ubsan 55 | ``` 56 | 57 | Then run the application: 58 | ```bash 59 | build_w_ubsan/fct_w_ub -- someNumber 60 | ``` 61 | 62 | If you use a negative number, you should get an error that resembles: 63 | ``` 64 | $ ./build_w_ubsan/fct_with_ub -- -25 65 | ch10/undefined_behavior/populate_function.cpp:7:17: runtime error: shift exponent -25 is negative 66 | SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior ch10/undefined_behavior/populate_function.cpp:7:17 in 67 | fctWithUB(-25) == 2519 68 | ``` 69 | 70 | Now, if you run with a number bigger than 31, you should get: 71 | ``` 72 | $ ./build_w_ubsan/fct_with_ub -- 32 73 | ch10/undefined_behavior/populate_function.cpp:7:17: runtime error: shift exponent 32 is too large for 32-bit type 'int' 74 | ``` 75 | 76 | In both cases, this tells you that at line 7 of `populate.cpp` we are relying on some specific undefined behavior and we should fix that. 77 | -------------------------------------------------------------------------------- /ch10/undefined_behavior/main.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/Support/CommandLine.h" // For cl::xxx. 2 | #include "llvm/Support/Debug.h" // For dbgs(). 3 | 4 | using namespace llvm; 5 | 6 | extern int fctWithUB(int a); 7 | 8 | static cl::opt InputNumber(cl::Positional, cl::desc(""), 9 | cl::init(12)); 10 | 11 | int main(int argc, char **argv) { 12 | cl::ParseCommandLineOptions(argc, argv, "CH10 UB sanitizer\n"); 13 | 14 | int Result = fctWithUB(InputNumber); 15 | dbgs() << "fctWithUB(" << InputNumber << ") == " << Result << '\n'; 16 | 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /ch10/undefined_behavior/populate_function.cpp: -------------------------------------------------------------------------------- 1 | // Shift left 322512 by `input` bits. 2 | // If input is > 31 we want to clamp to 31. 3 | // If input is < 0 we want to shift right instead of left (and still clamp to 4 | // 31.) 5 | // As it is this function has undefined behavior. 6 | int fctWithUB(int input) { 7 | return 322512 >> input; 8 | } 9 | -------------------------------------------------------------------------------- /ch10/use_after_free/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH10 6 | LANGUAGES CXX C) 7 | 8 | include(../../cmake/utils/set-llvm-install-prefix.cmake) 9 | 10 | set(CURR_TARGET 11 | use_after_free 12 | ) 13 | 14 | set(SRC 15 | "main.cpp" 16 | "populate_function.cpp" 17 | ) 18 | 19 | set(INC 20 | ) 21 | 22 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") 23 | 24 | # Now build our tools 25 | add_executable(${CURR_TARGET} ${SRC} ${INC}) 26 | 27 | # Find the libraries that correspond to the LLVM components 28 | # that we wish to use 29 | llvm_map_components_to_libnames(llvm_libs support core) 30 | 31 | # Link against LLVM libraries 32 | target_link_libraries(${CURR_TARGET} ${llvm_libs}) 33 | -------------------------------------------------------------------------------- /ch10/use_after_free/README.md: -------------------------------------------------------------------------------- 1 | # Easily find memory corruption # 2 | 3 | In this exercise you need to leverage the address sanitizer to find what is wrong with the `buggyBuildModule` function implemented in `populate_function.cpp`. 4 | 5 | Build the code using the steps below. 6 | 7 | Run it and observe the crash. 8 | 9 | Now, think how you can find this crash easily. 10 | 11 | Bonus: Fix the crash! 12 | 13 | You can look at `ch3/build_ir` for a correct way of doing the `buildModule` implementation. 14 | 15 | ## Configure your build directory ## 16 | 17 | ```bash 18 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild . 19 | ``` 20 | 21 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option). 22 | 23 | You must have a version of LLVM installed at `` for this to succeed. 24 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package. 25 | 26 | ## Build ## 27 | 28 | ```bash 29 | ninja -C build 30 | ``` 31 | 32 | This builds the default target in the build directory. 33 | 34 | This should produce in the `build` directory a binary named `use_after_free`. 35 | 36 | ## Run ## 37 | 38 | ```bash 39 | ./build/use_after_free 40 | ``` 41 | 42 | This will run the buggy build module implementation. 43 | 44 | At this point it should crash. 45 | 46 | ## Solution ## 47 | 48 | Configure your build with address sanitizer enabled and build: 49 | ```bash 50 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild_w_asan . -DCMAKE_CXX_FLAGS=-fsanitize=address 51 | ninja -C build_w_asan 52 | ``` 53 | 54 | Then run the application: 55 | ```bash 56 | build_w_asan/use_after_free 57 | ``` 58 | 59 | And you should see an output resembling: 60 | ``` 61 | ================================================================= 62 | ==37427==ERROR: AddressSanitizer: heap-use-after-free on address 0x000108f03ca0 at pc 0x000105baf828 bp 0x00016b1a6830 sp 0x00016b1a5ff0 63 | READ of size 15 at 0x000108f03ca0 thread T0 64 | #0 0x105baf824 in wrap_memchr+0x27c (libclang_rt.asan_osx_dynamic.dylib:arm64e+0x1b824) 65 | #1 0x1945b7cac in std::__1::basic_string, std::__1::allocator>::find(char, unsigned long) const+0x38 (libc++.1.dylib:arm64e+0x1bcac) 66 | #2 0x104d3c968 in llvm::Module::print(llvm::raw_ostream&, llvm::AssemblyAnnotationWriter*, bool, bool) const+0x9b0 (use_after_free:arm64+0x1000e4968) 67 | #3 0x104c5a088 in main main.cpp:18 68 | #4 0x1942fe0dc () 69 | 70 | 0x000108f03ca0 is located 160 bytes inside of 752-byte region [0x000108f03c00,0x000108f03ef0) 71 | freed by thread T0 here: 72 | #0 0x105bf5b8c in wrap__ZdlPv+0x74 (libclang_rt.asan_osx_dynamic.dylib:arm64e+0x61b8c) 73 | #1 0x104e836f4 in llvm::LLVMContextImpl::~LLVMContextImpl()+0xf4 (use_after_free:arm64+0x10022b6f4) 74 | #2 0x104e81910 in llvm::LLVMContext::~LLVMContext()+0x18 (use_after_free:arm64+0x100229910) 75 | #3 0x104c5cab4 in buggyBuildModule() populate_function.cpp:152 76 | #4 0x104c59fc8 in main main.cpp:11 77 | #5 0x1942fe0dc () 78 | 79 | previously allocated by thread T0 here: 80 | #0 0x105bf574c in wrap__Znwm+0x74 (libclang_rt.asan_osx_dynamic.dylib:arm64e+0x6174c) 81 | #1 0x104c5ce78 in std::__1::__unique_if::__unique_single std::__1::make_unique[abi:ue170006](char const (&) [16], llvm::LLVMContext&) unique_ptr.h:689 82 | #2 0x104c5b394 in buggyBuildModule() populate_function.cpp:71 83 | #3 0x104c59fc8 in main main.cpp:11 84 | #4 0x1942fe0dc () 85 | 86 | SUMMARY: AddressSanitizer: heap-use-after-free (libclang_rt.asan_osx_dynamic.dylib:arm64e+0x1b824) in wrap_memchr+0x27c 87 | Shadow bytes around the buggy address: 88 | 0x000108f03a00: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 89 | 0x000108f03a80: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 90 | 0x000108f03b00: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 91 | 0x000108f03b80: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 92 | 0x000108f03c00: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 93 | =>0x000108f03c80: fd fd fd fd[fd]fd fd fd fd fd fd fd fd fd fd fd 94 | 0x000108f03d00: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 95 | 0x000108f03d80: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 96 | 0x000108f03e00: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 97 | 0x000108f03e80: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fa fa 98 | 0x000108f03f00: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 99 | Shadow byte legend (one shadow byte represents 8 application bytes): 100 | Addressable: 00 101 | Partially addressable: 01 02 03 04 05 06 07 102 | Heap left redzone: fa 103 | Freed heap region: fd 104 | Stack left redzone: f1 105 | Stack mid redzone: f2 106 | Stack right redzone: f3 107 | Stack after return: f5 108 | Stack use after scope: f8 109 | Global redzone: f9 110 | Global init order: f6 111 | Poisoned by user: f7 112 | Container overflow: fc 113 | Array cookie: ac 114 | Intra object redzone: bb 115 | ASan internal: fe 116 | Left alloca redzone: ca 117 | Right alloca redzone: cb 118 | ==37427==ABORTING 119 | Abort trap: 6 120 | ``` 121 | 122 | What this tells you is that at line 18 of `main.cpp` we are using an object that has been deallocated at line 152 of `populate_function.cpp`. 123 | 124 | Furthermore, it tells us that this object was freed with `~LLVMContext`, i.e., the destructor of the LLVMContext. 125 | 126 | Therefore the problem here is that our LLVMContext does not have the proper scope. 127 | 128 | Indeed it is local to `buggyBuildModule`, whereas its live-range must out-live its Module, which in this case goes beyond `buggyBuildModule`. 129 | -------------------------------------------------------------------------------- /ch10/use_after_free/main.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/Module.h" 2 | #include "llvm/IR/Verifier.h" 3 | #include "llvm/Support/Debug.h" // For errs(). 4 | 5 | using namespace llvm; 6 | 7 | extern std::unique_ptr buggyBuildModule(); 8 | 9 | int main(int argc, char **argv) { 10 | bool hadError = false; 11 | std::unique_ptr CurModule = buggyBuildModule(); 12 | outs() << "\n\n## Processing module\n"; 13 | if (!CurModule) { 14 | outs() << "Nothing built\n"; 15 | return 0; 16 | } 17 | 18 | CurModule->print(errs(), /*AssemblyAnnotationWriter=*/nullptr); 19 | // verifyModule returns true if it finds errors and 20 | // print them on the provided output stream (errs() here). 21 | if (verifyModule(*CurModule, &errs())) { 22 | errs() << "Impl does not verify\n"; 23 | hadError |= true; 24 | } 25 | 26 | return !hadError; 27 | } 28 | -------------------------------------------------------------------------------- /ch10/use_after_free/populate_function.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/ADT/ArrayRef.h" 2 | #include "llvm/IR/BasicBlock.h" 3 | #include "llvm/IR/Constants.h" // For ConstantInt. 4 | #include "llvm/IR/DerivedTypes.h" // For PointerType, FunctionType. 5 | #include "llvm/IR/Function.h" 6 | #include "llvm/IR/IRBuilder.h" 7 | #include "llvm/IR/LLVMContext.h" 8 | #include "llvm/IR/Module.h" 9 | #include "llvm/IR/Type.h" 10 | #include "llvm/Support/Debug.h" // For errs(). 11 | 12 | #include // For unique_ptr 13 | 14 | using namespace llvm; 15 | 16 | // The goal of this function is to build a Module that 17 | // represents the lowering of the following foo, a C function: 18 | // extern int baz(); 19 | // extern void bar(int); 20 | // void foo(int a, int b) { 21 | // int var = a + b; 22 | // if (var == 0xFF) { 23 | // bar(var); 24 | // var = baz(); 25 | // } 26 | // bar(var); 27 | // } 28 | // 29 | // The IR for this snippet (at O0) is: 30 | // define void @foo(i32 %arg, i32 %arg1) { 31 | // bb: 32 | // %i = alloca i32 33 | // %i2 = alloca i32 34 | // %i3 = alloca i32 35 | // store i32 %arg, ptr %i 36 | // store i32 %arg1, ptr %i2 37 | // %i4 = load i32, ptr %i 38 | // %i5 = load i32, ptr %i2 39 | // %i6 = add i32 %i4, %i5 40 | // store i32 %i6, ptr %i3 41 | // %i7 = load i32, ptr %i3 42 | // %i8 = icmp eq i32 %i7, 255 43 | // br i1 %i8, label %bb9, label %bb12 44 | // 45 | // bb9: 46 | // %i10 = load i32, ptr %i3 47 | // call void @bar(i32 %i10) 48 | // %i11 = call i32 @baz() 49 | // store i32 %i11, ptr %i3 50 | // br label %bb12 51 | // 52 | // bb12: 53 | // %i13 = load i32, ptr %i3 54 | // call void @bar(i32 %i13) 55 | // ret void 56 | // } 57 | // 58 | // declare void @bar(i32) 59 | // declare i32 @baz(...) 60 | // 61 | // This function contains a bug, can you spot it? 62 | std::unique_ptr buggyBuildModule() { 63 | LLVMContext Ctxt; 64 | // Create the types that we will use over and over; 65 | Type *Int32Ty = Type::getInt32Ty(Ctxt); 66 | Type *VoidTy = Type::getVoidTy(Ctxt); 67 | Type *PtrTy = PointerType::get(Ctxt, /*AddrSpace=*/0); 68 | 69 | // Create the high level module. 70 | std::unique_ptr MyModule = 71 | std::make_unique("Solution Module", Ctxt); 72 | 73 | // Populate all the functions (just declaration for now.) 74 | // Starting with baz. 75 | FunctionType *BazTy = 76 | FunctionType::get(/*RetTy=*/Int32Ty, /*isVarArg=*/false); 77 | Function *BazFunc = 78 | cast(MyModule->getOrInsertFunction("baz", BazTy).getCallee()); 79 | 80 | // bar. 81 | FunctionType *BarTy = 82 | FunctionType::get(VoidTy, /*ArgsTy=*/ArrayRef(Int32Ty), false); 83 | Function *BarFunc = 84 | cast(MyModule->getOrInsertFunction("bar", BarTy).getCallee()); 85 | 86 | // foo. 87 | FunctionType *FooTy = 88 | FunctionType::get(VoidTy, /*ArgsTy*/ ArrayRef({Int32Ty, Int32Ty}), false); 89 | Function *FooFunc = 90 | cast(MyModule->getOrInsertFunction("foo", FooTy).getCallee()); 91 | 92 | // Next, create the structure for foo. 93 | BasicBlock *BB = BasicBlock::Create(Ctxt, /*Name=*/"bb", /*Parent=*/FooFunc); 94 | BasicBlock *BB9 = 95 | BasicBlock::Create(Ctxt, /*Name=*/"bb9", /*Parent=*/FooFunc); 96 | BasicBlock *BB12 = 97 | BasicBlock::Create(Ctxt, /*Name=*/"bb12", /*Parent=*/FooFunc); 98 | 99 | // Populate bb. 100 | IRBuilder Builder(BB); 101 | // Allocate stack space for the local variables. 102 | Value *I = Builder.CreateAlloca(Int32Ty); 103 | Value *I2 = Builder.CreateAlloca(Int32Ty); 104 | Value *I3 = Builder.CreateAlloca(Int32Ty); 105 | // Get arg and arg1 from foo. 106 | Value *Arg = FooFunc->getArg(0); 107 | Value *Arg1 = FooFunc->getArg(1); 108 | // Store them in the "local" variables. 109 | Builder.CreateStore(Arg, I); 110 | Builder.CreateStore(Arg1, I2); 111 | // Reload from the local variables. 112 | Value *I4 = Builder.CreateLoad(Int32Ty, I); 113 | Value *I5 = Builder.CreateLoad(Int32Ty, I2); 114 | // Do the add. 115 | Value *I6 = Builder.CreateAdd(I4, I5); 116 | // Store to local variable i3. 117 | Builder.CreateStore(I6, I3); 118 | // Reload from i3 (now you understand why O0 is slow!!) 119 | Value *I7 = Builder.CreateLoad(Int32Ty, I3); 120 | // Compare. 121 | Value *Cst255 = ConstantInt::get(Int32Ty, 255); 122 | Value *I8 = Builder.CreateICmpEQ(I7, Cst255); 123 | // Then jump. 124 | Builder.CreateCondBr(I8, BB9, BB12); 125 | 126 | // Populate bb9. 127 | // Reset the builder on the next basic block. 128 | Builder.SetInsertPoint(BB9); 129 | // Reload the local variable i3. 130 | Value *I10 = Builder.CreateLoad(Int32Ty, I3); 131 | // Call bar with i10. 132 | Builder.CreateCall(BarFunc->getFunctionType(), BarFunc, ArrayRef(I10)); 133 | // Call baz. 134 | Value *I11 = Builder.CreateCall(BazFunc->getFunctionType(), BazFunc); 135 | // Store the result in the local variable i3. 136 | Builder.CreateStore(I11, I3); 137 | // Jump to the next block. 138 | Builder.CreateBr(BB12); 139 | 140 | // Populate bb12. 141 | // Reset the builder on the next basic block. 142 | Builder.SetInsertPoint(BB12); 143 | // Reload the local variable I3. 144 | Value *I13 = Builder.CreateLoad(Int32Ty, I3); 145 | // Call bar on i13. 146 | Builder.CreateCall(BarFunc->getFunctionType(), BarFunc, ArrayRef(I13)); 147 | // Create the final return. 148 | // Remember all basic block must end with a terminator. 149 | Builder.CreateRetVoid(); 150 | 151 | return MyModule; 152 | } 153 | -------------------------------------------------------------------------------- /ch11/instr_info/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH11 6 | LANGUAGES CXX C) 7 | 8 | include(../../cmake/utils/set-llvm-install-prefix.cmake) 9 | 10 | # Hook up the TableGen tooling. 11 | set(LLVM_TABLEGEN_EXE ${LLVM_TOOLS_BINARY_DIR}/llvm-tblgen) 12 | include(${LLVM_CMAKE_DIR}/AddLLVM.cmake) 13 | include(${LLVM_CMAKE_DIR}/TableGen.cmake) 14 | 15 | # Create the different TableGen outputs. 16 | set(LLVM_TARGET_DEFINITIONS mytarget.td) 17 | tablegen(LLVM MyTargetGenRegisterInfo.inc -gen-register-info) 18 | tablegen(LLVM MyTargetGenInstrInfo.inc -gen-instr-info) 19 | 20 | # Register a target for all the TableGen outputs. 21 | add_public_tablegen_target(CommonTableGen) 22 | 23 | # Make that target part of the `all` target. 24 | set_target_properties(CommonTableGen PROPERTIES EXCLUDE_FROM_ALL 0) 25 | 26 | set(CURR_TARGET 27 | print_instr 28 | ) 29 | 30 | set(SRC 31 | "main.cpp" 32 | "MyTargetInstrInfo.cpp" 33 | "MyTargetRegisterInfo.cpp" 34 | ) 35 | 36 | set(INC 37 | "MyTargetInstrInfo.h" 38 | "MyTargetRegisterInfo.h" 39 | ) 40 | 41 | 42 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") 43 | 44 | # Now build our tools 45 | add_executable(${CURR_TARGET} ${SRC} ${INC}) 46 | add_dependencies(${CURR_TARGET} CommonTableGen) 47 | 48 | # Find the libraries that correspond to the LLVM components 49 | # that we wish to use 50 | llvm_map_components_to_libnames(llvm_libs support core mc codegen) 51 | 52 | # Link against LLVM libraries 53 | target_link_libraries(${CURR_TARGET} ${llvm_libs}) 54 | 55 | # To find the generated files 56 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}) 57 | -------------------------------------------------------------------------------- /ch11/instr_info/MyTargetInstrInfo.cpp: -------------------------------------------------------------------------------- 1 | #include "MyTargetInstrInfo.h" 2 | #include "llvm/ADT/SmallVector.h" 3 | #include "llvm/CodeGen/MachineBasicBlock.h" 4 | #include "llvm/CodeGen/MachineInstrBuilder.h" 5 | #include "llvm/IR/DebugLoc.h" 6 | #include "llvm/Support/ErrorHandling.h" 7 | #include 8 | #include 9 | 10 | #define GET_INSTRINFO_CTOR_DTOR 11 | #define GET_INSTRINFO_MC_DESC // This should be in MC 12 | #define GET_INSTRINFO_MC_HELPERS // This should be in MC 13 | #include "MyTargetGenInstrInfo.inc" 14 | 15 | using namespace llvm; 16 | 17 | MyTargetInstrInfo::MyTargetInstrInfo() : MyTargetGenInstrInfo() {} 18 | -------------------------------------------------------------------------------- /ch11/instr_info/MyTargetInstrInfo.h: -------------------------------------------------------------------------------- 1 | #ifndef LLVM_LIB_TARGET_MYTARGET_MYTARGETINSTRINFO_H 2 | #define LLVM_LIB_TARGET_MYTARGET_MYTARGETINSTRINFO_H 3 | 4 | #include "MyTargetRegisterInfo.h" // For the definition of the register class. 5 | #include "llvm/CodeGen/TargetInstrInfo.h" 6 | 7 | #define GET_INSTRINFO_HEADER 8 | #define GET_INSTRINFO_ENUM // This should be in MC 9 | #define GET_INSTRINFO_MC_HELPER_DECLS // This should be in MC 10 | #include "MyTargetGenInstrInfo.inc" 11 | 12 | namespace llvm { 13 | 14 | class MyTargetInstrInfo : public MyTargetGenInstrInfo { 15 | public: 16 | MyTargetInstrInfo(); 17 | }; 18 | } // namespace llvm 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /ch11/instr_info/MyTargetRegisterInfo.cpp: -------------------------------------------------------------------------------- 1 | #include "MyTargetRegisterInfo.h" 2 | 3 | #include "llvm/CodeGen/MachineFunction.h" 4 | #include "llvm/CodeGen/TargetFrameLowering.h" 5 | #include "llvm/CodeGen/TargetRegisterInfo.h" 6 | #include "llvm/CodeGen/TargetSubtargetInfo.h" 7 | 8 | namespace llvm { 9 | class MyTargetFrameLowering : public TargetFrameLowering { 10 | public: 11 | MyTargetFrameLowering() 12 | : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16), 13 | false /*StackRealignable*/) {} 14 | }; 15 | } // namespace llvm 16 | 17 | #define GET_REGINFO_TARGET_DESC 18 | #define GET_REGINFO_MC_DESC 19 | #include "MyTargetGenRegisterInfo.inc" 20 | -------------------------------------------------------------------------------- /ch11/instr_info/MyTargetRegisterInfo.h: -------------------------------------------------------------------------------- 1 | #ifndef __MYTARGETREGISTERINFO_H__ 2 | #define __MYTARGETREGISTERINFO_H__ 3 | 4 | #include "llvm/ADT/BitVector.h" 5 | #include "llvm/CodeGen/MachineBasicBlock.h" 6 | #include 7 | 8 | #define GET_REGINFO_HEADER 9 | #define GET_REGINFO_ENUM // Technically this one belongs to MC. 10 | #include "MyTargetGenRegisterInfo.inc" 11 | 12 | namespace llvm { 13 | class MachineFunction; 14 | 15 | class MyTargetRegisterInfo : public MyTargetGenRegisterInfo { 16 | public: 17 | MyTargetRegisterInfo() : MyTargetGenRegisterInfo(Register()) {} 18 | 19 | BitVector getReservedRegs(const MachineFunction &MF) const override { 20 | return BitVector(); 21 | } 22 | 23 | const MCPhysReg * 24 | getCalleeSavedRegs(const MachineFunction *MF) const override { 25 | return nullptr; 26 | } 27 | 28 | bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, 29 | unsigned FIOperandNum, 30 | RegScavenger *RS = nullptr) const override { 31 | return false; 32 | } 33 | 34 | Register getFrameRegister(const MachineFunction &MF) const override { 35 | return Register(); 36 | } 37 | }; 38 | 39 | } // end namespace llvm. 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /ch11/instr_info/README.md: -------------------------------------------------------------------------------- 1 | # Write your first instruction description # 2 | 3 | In this exercise, we print all the non-generic instructions defined for the backend identified by `MyTarget`. 4 | 5 | The goal of this exercise is to familiarize yourself with adding instruction in TableGen using the instr-info TableGen backend. 6 | 7 | To do that, read the direction in `yourTurn-instrinfo.td`, implement the missing instructions and run the commands to test your solution by following the steps in the next two sections. 8 | 9 | ## Configure your build directory ## 10 | 11 | ```bash 12 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm/ -Bbuild . 13 | ``` 14 | 15 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option). 16 | 17 | You must have a version of LLVM installed at `` for this to succeed. 18 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package. 19 | 20 | ## Build ## 21 | 22 | ```bash 23 | ninja -C build 24 | ``` 25 | 26 | This builds the default target in the build directory. 27 | 28 | Then run: 29 | ```bash 30 | ./build/print_inst 31 | ``` 32 | 33 | This will print all the non generic instructions. 34 | 35 | You should see your added instructions and if you followed the direction, these instructions should have the same profile as the ones printed with the `SOLUTION_` prefix. 36 | 37 | ## Solution ## 38 | 39 | Look at the content of `solution-instrinfo.td` for a possible solution. 40 | -------------------------------------------------------------------------------- /ch11/instr_info/instrinfo.td: -------------------------------------------------------------------------------- 1 | def ADDi32: Instruction<> { 2 | let Namespace = "MyTarget"; 3 | let OutOperandList = (outs GPR32:$dst); 4 | let InOperandList = (ins GPR32:$src0, GPR32:$src1); 5 | } 6 | 7 | include "solution-instrinfo.td" 8 | include "yourTurn-instrinfo.td" 9 | -------------------------------------------------------------------------------- /ch11/instr_info/main.cpp: -------------------------------------------------------------------------------- 1 | #include "MyTargetInstrInfo.h" 2 | #include "MyTargetRegisterInfo.h" 3 | 4 | #include "llvm/CodeGen/TargetRegisterInfo.h" 5 | #include "llvm/Support/Debug.h" // For dbgs(). 6 | 7 | using namespace llvm; 8 | 9 | int main() { 10 | MyTargetRegisterInfo MyTRI; 11 | MyTargetInstrInfo MyTII; 12 | TargetRegisterInfo *RegInfos[] = {&MyTRI}; 13 | unsigned NbInstrs = MyTII.getNumOpcodes(); 14 | dbgs() << "Found " << NbInstrs << " instructions for MyTarget.\n"; 15 | dbgs() << "Print the non-generic ones:\n"; 16 | for (unsigned i = 0; i != NbInstrs; ++i) { 17 | const MCInstrDesc &InstrDesc = MyTII.get(i); 18 | // Skip the generic opcode to focus on the target specific ones. 19 | if (InstrDesc.isPseudo()) 20 | continue; 21 | 22 | dbgs() << MyTII.getName(i) << ":isAsCheapAsMove(" 23 | << InstrDesc.isAsCheapAsAMove() << ")\t"; 24 | for (auto [index, MCOI] : enumerate(InstrDesc.operands())) { 25 | if (MCOI.OperandType == MCOI::OperandType::OPERAND_REGISTER) { 26 | if (index < InstrDesc.getNumDefs()) 27 | dbgs() << "(def)"; 28 | dbgs() << MyTRI.getRegClassName(MyTRI.getRegClass(MCOI.RegClass)); 29 | } else if (MCOI.OperandType == MCOI::OperandType::OPERAND_IMMEDIATE) { 30 | dbgs() << "imm"; 31 | } else 32 | dbgs() << "other"; 33 | dbgs() << ", "; 34 | } 35 | dbgs() << '\n'; 36 | } 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /ch11/instr_info/mytarget.td: -------------------------------------------------------------------------------- 1 | include "llvm/Target/Target.td" 2 | 3 | include "reginfo.td" 4 | include "instrinfo.td" 5 | 6 | def MyTargetii: InstrInfo; 7 | def MyTarget : Target { 8 | let InstructionSet = MyTargetii; 9 | } 10 | 11 | -------------------------------------------------------------------------------- /ch11/instr_info/reginfo.td: -------------------------------------------------------------------------------- 1 | let Namespace = "MyTarget" in { 2 | // +-----+-----+ 3 | // 64-bit | d0 | d1 | 4 | // +--+--+--+--+ 5 | // 32-bit |s0|s1|s2|s3| 6 | // +--+--+--+--+ 7 | 8 | // Declare the different subregister for our target. 9 | // Parameters are size and offset. 10 | // Indices for singles in double. 11 | def sub32_low: SubRegIndex<32>; 12 | def sub32_high: SubRegIndex<32, 32>; 13 | 14 | // single registers. 15 | def s0 : Register<"s0">; 16 | def s1 : Register<"s1">; 17 | def s2 : Register<"s2">; 18 | def s3 : Register<"s3">; 19 | 20 | // double registers are made of two single registers. 21 | // The pattern is a bit irregular because we assume 22 | // we have only 3 single registers. 23 | def d0 : Register<"d0"> { 24 | let SubRegIndices = [sub32_low, sub32_high]; 25 | let SubRegs = [s0, s1]; 26 | } 27 | def d1 : Register<"d1"> { 28 | let SubRegIndices = [sub32_low, sub32_high]; 29 | let SubRegs = [s2, s3]; 30 | } 31 | } // end namespace MyTarget. 32 | 33 | def GPR32 : RegisterClass<"MyTarget", [i32], 32, (sequence "s%u", 0, 3)>; 34 | def GPR64 : RegisterClass<"MyTarget", [i64], 64, (add d0, d1)>; 35 | -------------------------------------------------------------------------------- /ch11/instr_info/solution-instrinfo.td: -------------------------------------------------------------------------------- 1 | def SOLUTION_BREAKVALUE : Instruction<> { 2 | let Namespace = "MyTarget"; 3 | let OutOperandList = (outs GPR32:$dst0, GPR32:$dst1); 4 | let InOperandList = (ins GPR64:$src0); 5 | } 6 | 7 | def SOLUTION_LOADIMM32 : Instruction<> { 8 | let Namespace = "MyTarget"; 9 | let isAsCheapAsAMove = true; 10 | let OutOperandList = (outs GPR32:$dst); 11 | let InOperandList = (ins i32imm:$imm); 12 | } 13 | 14 | -------------------------------------------------------------------------------- /ch11/instr_info/yourTurn-instrinfo.td: -------------------------------------------------------------------------------- 1 | // Your turn: 2 | 3 | // BREAKVALUE opcode 4 | // - Add an instruction that creates two 32-bit values out of a 64-bit value. 5 | // - The input value will be on the GPR32 register class and the output 6 | // values on the GPR32 register class. 7 | // - The instruction needs to be in the MyTarget namespace. 8 | // - The name of the opcode must start with YT (this is just to avoid name 9 | // collision with the solution.) 10 | 11 | // LOADIMM32 opcode 12 | // - Add an instruction that materializes a 32-bit immediate value 13 | // into a 32-bit register. 14 | // - The input value will be an immediate operand and the output 15 | // value on a GPR32 register class. 16 | // - The instruction needs to be in the MyTarget namespace. 17 | // - The name of the opcode must start with YT (this is just to avoid name 18 | // collision with the solution.) 19 | // - The instruction needs to be marked isAsCheapAsAMove. 20 | // 21 | // Hint for the immediate operand, look for the appropriate record name 22 | // under the OPERAND_IMMEDIATE OperandType in llvm/include/llvm/Target/Target.td 23 | -------------------------------------------------------------------------------- /ch11/mir_format/README.md: -------------------------------------------------------------------------------- 1 | In this directory, you can find examples of `mir` (Machine IR) files. 2 | 3 | To produce a `mir` file you can use the -stop-before/after=pass-name. 4 | 5 | In this directory, we have one input LLVM IR file that we used to produce: 6 | - A pure `mir` file `full-dump.mir`, and 7 | - A simplified one `simplified-dump.mir`, and 8 | - A shrunk one `shrunk-dump.mir` 9 | 10 | The pure one has all the fields set, whereas the simplified one only contains the fields that have non-default values or cannot be recomputed. 11 | The shrunk one shows you the kind of manual editing you can make to reduce a file even more, in particular how the LLVM IR section can be removed. 12 | 13 | -------------------------------------------------------------------------------- /ch11/mir_format/full-dump.mir: -------------------------------------------------------------------------------- 1 | --- | 2 | ; ModuleID = 'input.ll' 3 | source_filename = "input.ll" 4 | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" 5 | target triple = "arm64-apple-macosx14.0.0" 6 | 7 | define i32 @_Z3fooPxx(ptr nocapture noundef readonly %arg, i64 noundef %arg1) local_unnamed_addr { 8 | bb: 9 | %i = icmp slt i64 %arg1, 0 10 | br i1 %i, label %bb2, label %bb4 11 | 12 | bb2: ; preds = %bb 13 | %i3 = tail call i64 @_Z3barx(i64 noundef %arg1) 14 | br label %bb4 15 | 16 | bb4: ; preds = %bb2, %bb 17 | %i5 = phi i64 [ %i3, %bb2 ], [ %arg1, %bb ] 18 | %i6 = add nsw i64 %i5, 18 19 | %i7 = getelementptr inbounds i64, ptr %arg, i64 %i6 20 | %i8 = load i64, ptr %i7, align 8 21 | %i9 = add nsw i64 %i8, %i5 22 | %i10 = trunc i64 %i9 to i32 23 | ret i32 %i10 24 | } 25 | 26 | declare i64 @_Z3barx(i64 noundef) local_unnamed_addr 27 | 28 | ... 29 | --- 30 | name: _Z3fooPxx 31 | alignment: 4 32 | exposesReturnsTwice: false 33 | legalized: false 34 | regBankSelected: false 35 | selected: false 36 | failedISel: false 37 | tracksRegLiveness: true 38 | hasWinCFI: false 39 | callsEHReturn: false 40 | callsUnwindInit: false 41 | hasEHCatchret: false 42 | hasEHScopes: false 43 | hasEHFunclets: false 44 | isOutlined: false 45 | debugInstrRef: false 46 | failsVerification: false 47 | tracksDebugUserValues: false 48 | registers: 49 | - { id: 0, class: gpr64all, preferred-register: '' } 50 | - { id: 1, class: gpr64, preferred-register: '' } 51 | - { id: 2, class: gpr64, preferred-register: '' } 52 | - { id: 3, class: gpr64, preferred-register: '' } 53 | - { id: 4, class: gpr64all, preferred-register: '' } 54 | - { id: 5, class: gpr64common, preferred-register: '' } 55 | - { id: 6, class: gpr32, preferred-register: '' } 56 | - { id: 7, class: gpr32, preferred-register: '' } 57 | - { id: 8, class: gpr32, preferred-register: '' } 58 | liveins: 59 | - { reg: '$x0', virtual-reg: '%2' } 60 | - { reg: '$x1', virtual-reg: '%3' } 61 | frameInfo: 62 | isFrameAddressTaken: false 63 | isReturnAddressTaken: false 64 | hasStackMap: false 65 | hasPatchPoint: false 66 | stackSize: 0 67 | offsetAdjustment: 0 68 | maxAlignment: 1 69 | adjustsStack: true 70 | hasCalls: true 71 | stackProtector: '' 72 | functionContext: '' 73 | maxCallFrameSize: 0 74 | cvBytesOfCalleeSavedRegisters: 0 75 | hasOpaqueSPAdjustment: false 76 | hasVAStart: false 77 | hasMustTailInVarArgFunc: false 78 | hasTailCall: false 79 | isCalleeSavedInfoValid: false 80 | localFrameSize: 0 81 | savePoint: '' 82 | restorePoint: '' 83 | fixedStack: [] 84 | stack: [] 85 | entry_values: [] 86 | callSites: [] 87 | debugValueSubstitutions: [] 88 | constants: [] 89 | machineFunctionInfo: {} 90 | body: | 91 | bb.0.bb: 92 | successors: %bb.1(0x30000000), %bb.2(0x50000000) 93 | liveins: $x0, $x1 94 | 95 | %3:gpr64 = COPY $x1 96 | %2:gpr64 = COPY $x0 97 | TBZX %3, 63, %bb.2 98 | B %bb.1 99 | 100 | bb.1.bb2: 101 | successors: %bb.2(0x80000000) 102 | 103 | ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp 104 | $x0 = COPY %3 105 | BL @_Z3barx, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0 106 | ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp 107 | %4:gpr64all = COPY $x0 108 | %0:gpr64all = COPY %4 109 | 110 | bb.2.bb4: 111 | %1:gpr64 = PHI %3, %bb.0, %0, %bb.1 112 | %5:gpr64common = ADDXrs %2, %1, 3 113 | %6:gpr32 = LDRWui killed %5, 36 :: (load (s32) from %ir.i7, align 8) 114 | %7:gpr32 = COPY %1.sub_32 115 | $w0 = ADDWrr %6, %7 116 | RET_ReallyLR implicit $w0 117 | 118 | ... 119 | -------------------------------------------------------------------------------- /ch11/mir_format/input.ll: -------------------------------------------------------------------------------- 1 | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 2 | target triple = "arm64-apple-macosx14.0.0" 3 | 4 | define i32 @_Z3fooPxx(ptr nocapture noundef readonly %arg, i64 noundef %arg1) local_unnamed_addr { 5 | bb: 6 | %i = icmp slt i64 %arg1, 0 7 | br i1 %i, label %bb2, label %bb4 8 | 9 | bb2: ; preds = %bb 10 | %i3 = tail call i64 @_Z3barx(i64 noundef %arg1) 11 | br label %bb4 12 | 13 | bb4: ; preds = %bb2, %bb 14 | %i5 = phi i64 [ %i3, %bb2 ], [ %arg1, %bb ] 15 | %i6 = add nsw i64 %i5, 18 16 | %i7 = getelementptr inbounds i64, ptr %arg, i64 %i6 17 | %i8 = load i64, ptr %i7, align 8 18 | %i9 = add nsw i64 %i8, %i5 19 | %i10 = trunc i64 %i9 to i32 20 | ret i32 %i10 21 | } 22 | 23 | declare i64 @_Z3barx(i64 noundef) local_unnamed_addr 24 | -------------------------------------------------------------------------------- /ch11/mir_format/shrunk-dump.mir: -------------------------------------------------------------------------------- 1 | --- 2 | name: _Z3fooPxx 3 | tracksRegLiveness: true 4 | body: | 5 | bb.0: 6 | liveins: $x0, $x1 7 | 8 | %3:gpr64 = COPY $x1 9 | %2:gpr64 = COPY $x0 10 | TBZX %3, 63, %bb.2 11 | B %bb.1 12 | 13 | bb.1: 14 | ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp 15 | $x0 = COPY %3 16 | BL 123, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0 17 | ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp 18 | %4:gpr64all = COPY $x0 19 | %0:gpr64all = COPY %4 20 | 21 | bb.2: 22 | %1:gpr64 = PHI %3, %bb.0, %0, %bb.1 23 | %5:gpr64common = ADDXrs %2, %1, 3 24 | %6:gpr32 = LDRWui killed %5, 36 :: (load (s32), align 8) 25 | %7:gpr32 = COPY %1.sub_32 26 | $w0 = ADDWrr %6, %7 27 | RET_ReallyLR implicit $w0 28 | 29 | ... 30 | -------------------------------------------------------------------------------- /ch11/mir_format/simplified-dump.mir: -------------------------------------------------------------------------------- 1 | --- | 2 | ; ModuleID = 'input.ll' 3 | source_filename = "input.ll" 4 | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" 5 | target triple = "arm64-apple-macosx14.0.0" 6 | 7 | define i32 @_Z3fooPxx(ptr nocapture noundef readonly %arg, i64 noundef %arg1) local_unnamed_addr { 8 | bb: 9 | %i = icmp slt i64 %arg1, 0 10 | br i1 %i, label %bb2, label %bb4 11 | 12 | bb2: ; preds = %bb 13 | %i3 = tail call i64 @_Z3barx(i64 noundef %arg1) 14 | br label %bb4 15 | 16 | bb4: ; preds = %bb2, %bb 17 | %i5 = phi i64 [ %i3, %bb2 ], [ %arg1, %bb ] 18 | %i6 = add nsw i64 %i5, 18 19 | %i7 = getelementptr inbounds i64, ptr %arg, i64 %i6 20 | %i8 = load i64, ptr %i7, align 8 21 | %i9 = add nsw i64 %i8, %i5 22 | %i10 = trunc i64 %i9 to i32 23 | ret i32 %i10 24 | } 25 | 26 | declare i64 @_Z3barx(i64 noundef) local_unnamed_addr 27 | 28 | ... 29 | --- 30 | name: _Z3fooPxx 31 | alignment: 4 32 | tracksRegLiveness: true 33 | registers: 34 | - { id: 0, class: gpr64all } 35 | - { id: 1, class: gpr64 } 36 | - { id: 2, class: gpr64 } 37 | - { id: 3, class: gpr64 } 38 | - { id: 4, class: gpr64all } 39 | - { id: 5, class: gpr64common } 40 | - { id: 6, class: gpr32 } 41 | - { id: 7, class: gpr32 } 42 | - { id: 8, class: gpr32 } 43 | liveins: 44 | - { reg: '$x0', virtual-reg: '%2' } 45 | - { reg: '$x1', virtual-reg: '%3' } 46 | frameInfo: 47 | maxAlignment: 1 48 | adjustsStack: true 49 | hasCalls: true 50 | maxCallFrameSize: 0 51 | machineFunctionInfo: {} 52 | body: | 53 | bb.0.bb: 54 | successors: %bb.1(0x30000000), %bb.2(0x50000000) 55 | liveins: $x0, $x1 56 | 57 | %3:gpr64 = COPY $x1 58 | %2:gpr64 = COPY $x0 59 | TBZX %3, 63, %bb.2 60 | B %bb.1 61 | 62 | bb.1.bb2: 63 | ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp 64 | $x0 = COPY %3 65 | BL @_Z3barx, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0 66 | ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp 67 | %4:gpr64all = COPY $x0 68 | %0:gpr64all = COPY %4 69 | 70 | bb.2.bb4: 71 | %1:gpr64 = PHI %3, %bb.0, %0, %bb.1 72 | %5:gpr64common = ADDXrs %2, %1, 3 73 | %6:gpr32 = LDRWui killed %5, 36 :: (load (s32) from %ir.i7, align 8) 74 | %7:gpr32 = COPY %1.sub_32 75 | $w0 = ADDWrr %6, %7 76 | RET_ReallyLR implicit $w0 77 | 78 | ... 79 | -------------------------------------------------------------------------------- /ch11/register_units/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH11 6 | LANGUAGES CXX C) 7 | 8 | include(../../cmake/utils/set-llvm-install-prefix.cmake) 9 | 10 | # Hook up the TableGen tooling. 11 | set(LLVM_TABLEGEN_EXE ${LLVM_TOOLS_BINARY_DIR}/llvm-tblgen) 12 | include(${LLVM_CMAKE_DIR}/AddLLVM.cmake) 13 | include(${LLVM_CMAKE_DIR}/TableGen.cmake) 14 | 15 | # Create the different TableGen outputs. 16 | set(LLVM_TARGET_DEFINITIONS solution-reginfo.td) 17 | tablegen(LLVM SolutionGenRegisterInfo.inc -gen-register-info) 18 | 19 | set(LLVM_TARGET_DEFINITIONS yourTurn-reginfo.td) 20 | tablegen(LLVM YourTurnGenRegisterInfo.inc -gen-register-info) 21 | 22 | # Register a target for all the TableGen outputs. 23 | add_public_tablegen_target(CommonTableGen) 24 | 25 | # Make that target part of the `all` target. 26 | set_target_properties(CommonTableGen PROPERTIES EXCLUDE_FROM_ALL 0) 27 | 28 | set(CURR_TARGET 29 | print_regunit 30 | ) 31 | 32 | set(SRC 33 | "main.cpp" 34 | "SolutionRegisterInfo.cpp" 35 | "YourTurnRegisterInfo.cpp" 36 | ) 37 | 38 | set(INC 39 | "SolutionRegisterInfo.h" 40 | "YourTurnRegisterInfo.h" 41 | ) 42 | 43 | 44 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") 45 | 46 | # Now build our tools 47 | add_executable(${CURR_TARGET} ${SRC} ${INC}) 48 | add_dependencies(${CURR_TARGET} CommonTableGen) 49 | 50 | # Find the libraries that correspond to the LLVM components 51 | # that we wish to use 52 | llvm_map_components_to_libnames(llvm_libs support core mc codegen) 53 | 54 | # Link against LLVM libraries 55 | target_link_libraries(${CURR_TARGET} ${llvm_libs}) 56 | 57 | # To find the generated files 58 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}) 59 | -------------------------------------------------------------------------------- /ch11/register_units/README.md: -------------------------------------------------------------------------------- 1 | # Write your first register description # 2 | 3 | In this exercise, we print all the register of a specific backend. 4 | 5 | The goal is to teach you how to write the register description for registers, register units, and register classes and use them in the register-info TableGen backend. 6 | 7 | Follow the direction in `yourTurn-reginfo.td` and implement the described register hierarchy. 8 | 9 | Then, use the steps below to test your changes. 10 | 11 | ## Configure your build directory ## 12 | 13 | ```bash 14 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm/ -Bbuild . 15 | ``` 16 | 17 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option). 18 | 19 | You must have a version of LLVM installed at `` for this to succeed. 20 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package. 21 | 22 | ## Build ## 23 | 24 | ```bash 25 | ninja -C build 26 | ``` 27 | 28 | This builds the default target in the build directory. 29 | 30 | Then run: 31 | ```bash 32 | ./build/print_regunit 33 | ``` 34 | 35 | This will print the register info for both the solution and what you implemented. 36 | 37 | You should aim to have your register info (printed under the `RegisterInfo for YourTurn` section) look exactly like what is under the `RegisterInfo for Solution` section. 38 | 39 | ## Solution ## 40 | 41 | Look at the content of `solution-reginfo.td` for a possible solution. 42 | -------------------------------------------------------------------------------- /ch11/register_units/SolutionRegisterInfo.cpp: -------------------------------------------------------------------------------- 1 | #include "SolutionRegisterInfo.h" 2 | 3 | #include "llvm/CodeGen/MachineFunction.h" 4 | #include "llvm/CodeGen/TargetFrameLowering.h" 5 | #include "llvm/CodeGen/TargetRegisterInfo.h" 6 | #include "llvm/CodeGen/TargetSubtargetInfo.h" 7 | 8 | namespace llvm { 9 | class SolutionFrameLowering : public TargetFrameLowering { 10 | public: 11 | SolutionFrameLowering() 12 | : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16), 13 | false /*StackRealignable*/) {} 14 | }; 15 | } // namespace llvm 16 | 17 | #define GET_REGINFO_TARGET_DESC 18 | #define GET_REGINFO_MC_DESC 19 | #include "SolutionGenRegisterInfo.inc" 20 | -------------------------------------------------------------------------------- /ch11/register_units/SolutionRegisterInfo.h: -------------------------------------------------------------------------------- 1 | #ifndef __SOLUTIONREGISTERINFO_H__ 2 | #define __SOLUTIONREGISTERINFO_H__ 3 | 4 | #include "llvm/ADT/BitVector.h" 5 | #include "llvm/CodeGen/MachineBasicBlock.h" 6 | #include 7 | 8 | #define GET_REGINFO_HEADER 9 | #define GET_REGINFO_ENUM // Technically this one belongs to MC. 10 | #include "SolutionGenRegisterInfo.inc" 11 | 12 | namespace llvm { 13 | class MachineFunction; 14 | 15 | class SolutionRegisterInfo : public SolutionGenRegisterInfo { 16 | public: 17 | SolutionRegisterInfo() : SolutionGenRegisterInfo(Register()) {} 18 | 19 | BitVector getReservedRegs(const MachineFunction &MF) const override { 20 | return BitVector(); 21 | } 22 | 23 | const MCPhysReg * 24 | getCalleeSavedRegs(const MachineFunction *MF) const override { 25 | return nullptr; 26 | } 27 | 28 | bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, 29 | unsigned FIOperandNum, 30 | RegScavenger *RS = nullptr) const override { 31 | return false; 32 | } 33 | 34 | Register getFrameRegister(const MachineFunction &MF) const override { 35 | return Register(); 36 | } 37 | }; 38 | 39 | } // end namespace llvm. 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /ch11/register_units/YourTurnRegisterInfo.cpp: -------------------------------------------------------------------------------- 1 | #include "YourTurnRegisterInfo.h" 2 | 3 | #include "llvm/CodeGen/MachineFunction.h" 4 | #include "llvm/CodeGen/TargetFrameLowering.h" 5 | #include "llvm/CodeGen/TargetRegisterInfo.h" 6 | #include "llvm/CodeGen/TargetSubtargetInfo.h" 7 | 8 | namespace llvm { 9 | class YourTurnFrameLowering : public TargetFrameLowering { 10 | public: 11 | YourTurnFrameLowering() 12 | : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16), 13 | false /*StackRealignable*/) {} 14 | }; 15 | } // namespace llvm 16 | 17 | #define GET_REGINFO_TARGET_DESC 18 | #define GET_REGINFO_MC_DESC 19 | #include "YourTurnGenRegisterInfo.inc" 20 | -------------------------------------------------------------------------------- /ch11/register_units/YourTurnRegisterInfo.h: -------------------------------------------------------------------------------- 1 | #ifndef __YOURTURNREGISTERINFOS_H__ 2 | #define __YOURTURNREGISTERINFOS_H__ 3 | 4 | #include "llvm/ADT/BitVector.h" 5 | #include "llvm/CodeGen/MachineBasicBlock.h" 6 | #include 7 | 8 | #define GET_REGINFO_HEADER 9 | #define GET_REGINFO_ENUM // Technically this one belongs to MC. 10 | #include "YourTurnGenRegisterInfo.inc" 11 | 12 | namespace llvm { 13 | class MachineFunction; 14 | 15 | class YourTurnRegisterInfo : public YourTurnGenRegisterInfo { 16 | public: 17 | YourTurnRegisterInfo() : YourTurnGenRegisterInfo(Register()) {} 18 | 19 | BitVector getReservedRegs(const MachineFunction &MF) const override { 20 | return BitVector(); 21 | } 22 | 23 | const MCPhysReg * 24 | getCalleeSavedRegs(const MachineFunction *MF) const override { 25 | return nullptr; 26 | } 27 | 28 | bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, 29 | unsigned FIOperandNum, 30 | RegScavenger *RS = nullptr) const override { 31 | return false; 32 | } 33 | 34 | Register getFrameRegister(const MachineFunction &MF) const override { 35 | return Register(); 36 | } 37 | }; 38 | 39 | } // end namespace llvm. 40 | #endif 41 | -------------------------------------------------------------------------------- /ch11/register_units/main.cpp: -------------------------------------------------------------------------------- 1 | #include "SolutionRegisterInfo.h" 2 | #include "YourTurnRegisterInfo.h" 3 | 4 | #include "llvm/CodeGen/TargetRegisterInfo.h" 5 | #include "llvm/MC/MCRegisterInfo.h" // For the RegUnit iterators. 6 | #include "llvm/Support/Debug.h" // For dbgs(). 7 | 8 | using namespace llvm; 9 | 10 | int main() { 11 | SolutionRegisterInfo SolutionRI; 12 | YourTurnRegisterInfo YourTurnRI; 13 | TargetRegisterInfo *RegInfos[] = {&SolutionRI, &YourTurnRI}; 14 | const char *RIName[] = {"Solution", "YourTurn"}; 15 | // Go through the register units and print them 16 | for (unsigned i = 0; i < sizeof(RegInfos) / sizeof(RegInfos[0]); ++i) { 17 | dbgs() << "===== RegisterInfo for " << RIName[i] << "=====\n"; 18 | TargetRegisterInfo *TRI = RegInfos[i]; 19 | dbgs() << "== RegisterUnit ==\n"; 20 | // Traverse all the units and print out which registers it touches. 21 | for (unsigned Unit = 0, E = TRI->getNumRegUnits(); Unit != E; ++Unit) { 22 | dbgs() << "RegUnit " << Unit << ":\t"; 23 | for (MCRegUnitRootIterator RI(Unit, TRI); RI.isValid(); ++RI) { 24 | for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) { 25 | dbgs() << TRI->getName(*SI) << ", "; 26 | } 27 | dbgs() << "\n"; 28 | } 29 | } 30 | dbgs() << "== RegisterClass ==\n"; 31 | for (const TargetRegisterClass *RegClass : TRI->regclasses()) { 32 | dbgs() << "RegClass " << TRI->getRegClassName(RegClass) << ":\t"; 33 | for (Register Reg : *RegClass) { 34 | dbgs() << TRI->getName(Reg) << ", "; 35 | } 36 | dbgs() << "\n"; 37 | } 38 | dbgs() << "======= End RegisterInfo ========\n\n\n"; 39 | } 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /ch11/register_units/solution-reginfo.td: -------------------------------------------------------------------------------- 1 | include "llvm/Target/Target.td" 2 | 3 | let Namespace = "Solution" in { 4 | // Describe the following hiearchy. 5 | // +-----------+-----------+-----------+ 6 | // 128-bit quad | q0 | q1 | q2 | 7 | // +-----+-----+-----+-----+-----------+ 8 | // 64-bit double | d0 | d1 | d2 | | | 9 | // +--+--+--+--+-----+-----+-----------+ 10 | // 32-bit single |s0|s1|s2| | | | 11 | // +--+--+--+--+-----------+-----------+ 12 | // 13 | // Empty cells mean that no architectural register exists for that cell (it 14 | // is not addressable.) 15 | 16 | // Declare the different subregister for our target. 17 | // Parameters are size and offset. 18 | // Indices for singles in double. 19 | def sub32_low: SubRegIndex<32>; 20 | def sub32_high: SubRegIndex<32, 32>; 21 | 22 | // Indices for doubles in quad. 23 | def sub64_low: SubRegIndex<64>; 24 | def sub64_high: SubRegIndex<64, 64>; 25 | 26 | 27 | // single registers. 28 | def s0 : Register<"s0">; 29 | def s1 : Register<"s1">; 30 | def s2 : Register<"s2">; 31 | 32 | // double registers are made of two single registers. 33 | // The pattern is a bit irregular because we assume 34 | // we have only 3 single registers. 35 | def d0 : Register<"d0"> { 36 | let SubRegIndices = [sub32_low, sub32_high]; 37 | let SubRegs = [s0, s1]; 38 | let CoveredBySubRegs = true; 39 | } 40 | def d1 : Register<"d1"> { 41 | let SubRegIndices = [sub32_low]; 42 | let SubRegs = [s2]; 43 | } 44 | def d2 : Register<"d2">; 45 | 46 | // quad registers are made of two double registers. 47 | // Similar pattern as double registers. 48 | def q0 : Register<"q0"> { 49 | let SubRegIndices = [sub64_low, sub64_high]; 50 | let SubRegs = [d0, d1]; 51 | let CoveredBySubRegs = true; 52 | } 53 | def q1 : Register<"q1"> { 54 | let SubRegIndices = [sub64_low]; 55 | let SubRegs = [d2]; 56 | } 57 | def q2 : Register<"q2">; 58 | } // end namespace Solution. 59 | 60 | // Bonus point: register classes. 61 | def SINGLES : RegisterClass<"Solution", [f32], 32, (sequence "s%u", 0, 2)>; 62 | def DOUBLES : RegisterClass<"Solution", [f64], 64, (sequence "d%u", 0, 2)>; 63 | def QUADS : RegisterClass<"Solution", [f128], 128, (sequence "q%u", 0, 2)>; 64 | 65 | // Boilerplate to get the TableGen backend happy. 66 | def myii: InstrInfo; 67 | def Solution : Target { 68 | let InstructionSet = myii; 69 | } 70 | -------------------------------------------------------------------------------- /ch11/register_units/yourTurn-reginfo.td: -------------------------------------------------------------------------------- 1 | include "llvm/Target/Target.td" 2 | 3 | def : HwMode<"", []>; 4 | def myii: InstrInfo; 5 | def YourTurn : Target { 6 | let InstructionSet = myii; 7 | } 8 | 9 | let Namespace = "yourTurn" in { 10 | // Complete this description to describe a register hierarchy that looks like: 11 | // +-----------+-----------+-----------+ 12 | // 128-bit quad | q0 | q1 | q2 | 13 | // +-----+-----+-----+-----+-----------+ 14 | // 64-bit double | d0 | d1 | d2 | | | 15 | // +--+--+--+--+-----+-----+-----------+ 16 | // 32-bit single |s0|s1|s2| | | | 17 | // +--+--+--+--+-----------+-----------+ 18 | // 19 | // Empty cells mean that no architectural register exists for that cell (it 20 | // is not addressable.) 21 | def s0 : Register<"s0">; 22 | 23 | } // end namespace yourTurn. 24 | 25 | // Bonus point, create 1 register class per level. 26 | // They would map respectively to f128, f64, and f32. 27 | def SINGLES : RegisterClass<"yourTurn", [f32], 32, (add s0)>; 28 | 29 | -------------------------------------------------------------------------------- /ch13/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH13 6 | LANGUAGES CXX C) 7 | 8 | include(../cmake/utils/set-llvm-install-prefix.cmake) 9 | include(../cmake/utils/llc-run.cmake) 10 | 11 | set(CURR_TARGET 12 | produce-mir 13 | ) 14 | 15 | set(LLC_RUN_DEPENDENCIES) 16 | 17 | # Args are: 18 | # - llc options 19 | # - input file 20 | # - output file 21 | add_run_llc(LLC_RUN_DEPENDENCIES "-stop-before=peephole-opt" "input.ll" "ssa.mir") 22 | add_run_llc(LLC_RUN_DEPENDENCIES "-stop-after=phi-node-elimination" "input.ll" "no-phi.mir") 23 | add_run_llc(LLC_RUN_DEPENDENCIES "-stop-after=virtregmap" "input.ll" "no-vreg.mir") 24 | 25 | # Create an executable target that depends on the generated file 26 | add_custom_target(${CURR_TARGET} 27 | DEPENDS ${LLC_RUN_DEPENDENCIES} 28 | ) 29 | 30 | set_target_properties(${CURR_TARGET} PROPERTIES EXCLUDE_FROM_ALL 0) 31 | -------------------------------------------------------------------------------- /ch13/README.md: -------------------------------------------------------------------------------- 1 | In this chapter you can discover the transformations performed by some of the optimizations passes. 2 | 3 | To see that in action, first setup your build directory: 4 | ```bash 5 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild . 6 | ``` 7 | 8 | Then, when you'll invoke `ninja`, it will run all the passes exercised in this folder. 9 | ```bash 10 | ninja -Cbuild 11 | ``` 12 | 13 | This will produce files named `xxx.mir` in the `build` directory. 14 | 15 | The output file is produced by stopping the pass pipeline at a specific point: 16 | - `ssa`: before the SSA machine optimization 17 | - `no-phi`: after the PHI elimination pass 18 | - `phys-reg`: after register allocation 19 | 20 | In any case, you can see the command used to produce the output by running the `ninja` command with the `-v` option. 21 | -------------------------------------------------------------------------------- /ch13/input.ll: -------------------------------------------------------------------------------- 1 | define i64 @def_in_loop_use_outside(i64 %src, i64 %upper_bound) { 2 | entry: 3 | br label %loop 4 | 5 | loop: 6 | %iv = phi i64 [0, %entry], [%iv_plus_1, %loop] 7 | %iv_plus_1 = add i64 %iv, 1 8 | %cond = icmp ult i64 %iv_plus_1, %upper_bound 9 | br i1 %cond, label %loop, label %end 10 | 11 | end: 12 | %tmp = add i64 %iv_plus_1, %src 13 | %res = add i64 %tmp, %iv_plus_1 14 | ret i64 %res 15 | } 16 | -------------------------------------------------------------------------------- /ch3/README.md: -------------------------------------------------------------------------------- 1 | This directoy contains some of the examples that are used in Chapter 3. 2 | 3 | For further examples and exercises, please navigate in the subdirectories. 4 | -------------------------------------------------------------------------------- /ch3/input.c: -------------------------------------------------------------------------------- 1 | extern int baz(); 2 | extern void bar(int); 3 | 4 | void foo(int a, int b) { 5 | int var = a + b; 6 | if (var == 0xFF) { 7 | bar(var); 8 | var = baz(); 9 | } 10 | bar(var); 11 | } 12 | 13 | -------------------------------------------------------------------------------- /ch3/irreducible.c: -------------------------------------------------------------------------------- 1 | extern void someFct(); 2 | 3 | int irreducible(int shouldSkip1stCall) { 4 | int i = 0; 5 | if (shouldSkip1stCall) 6 | goto SKIP; 7 | do { 8 | someFct(); 9 | SKIP:; 10 | } while (++i < 7); 11 | return 32; 12 | } 13 | -------------------------------------------------------------------------------- /ch3/llvm_ir/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH3 6 | LANGUAGES CXX C) 7 | 8 | include(../../cmake/utils/set-llvm-install-prefix.cmake) 9 | 10 | set(CURR_TARGET 11 | build_ir 12 | ) 13 | 14 | set(SRC 15 | "main.cpp" 16 | "solution/populate_function.cpp" 17 | "your_turn/populate_function.cpp" 18 | ) 19 | 20 | set(INC 21 | ) 22 | 23 | set(CMAKE_CXX_FLAGS -fno-rtti) 24 | 25 | # Now build our tools 26 | add_executable(${CURR_TARGET} ${SRC} ${INC}) 27 | 28 | # Find the libraries that correspond to the LLVM components 29 | # that we wish to use 30 | llvm_map_components_to_libnames(llvm_libs support core) 31 | 32 | # Link against LLVM libraries 33 | target_link_libraries(${CURR_TARGET} ${llvm_libs}) 34 | -------------------------------------------------------------------------------- /ch3/llvm_ir/README.md: -------------------------------------------------------------------------------- 1 | # Building LLVM IR # 2 | 3 | In this exercise, you will build your first function using the LLVM intermediate representation (IR). 4 | 5 | Open `your_turn/populate_function.cpp` and implement the `myBuildModule` function according to the comment 6 | in that file. 7 | Then, follow the steps below to configure and build the test program and check that your solution matches the output of the provided solution. 8 | 9 | In other words, check that the printed LLVM IR after `## Processing module from Your solution implementation` line matches the solution printed after the `## Processing module from Reference implementation` line. 10 | 11 | ## Configuring your environment ## 12 | 13 | 14 | ```bash 15 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild . 16 | ninja -Cbuild 17 | ``` 18 | 19 | ## Running the example ## 20 | 21 | ```bash 22 | ./build/build_ir 23 | ``` 24 | 25 | ## Solution ## 26 | 27 | If your output doesn't match the solution, look at `solution/populate_function.cpp` to see how to implement the desired IR. 28 | -------------------------------------------------------------------------------- /ch3/llvm_ir/input.c: -------------------------------------------------------------------------------- 1 | extern int baz(); 2 | 3 | extern void bar(int); 4 | 5 | void foo(int a, int b) { 6 | int var = a + b; 7 | if (var == 0xFF) { 8 | bar(var); 9 | var = baz(); 10 | } 11 | bar(var); 12 | } 13 | -------------------------------------------------------------------------------- /ch3/llvm_ir/main.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/Module.h" 2 | #include "llvm/IR/Verifier.h" 3 | #include "llvm/Support/Debug.h" // For errs(). 4 | 5 | using namespace llvm; 6 | 7 | extern std::unique_ptr myBuildModule(LLVMContext &); 8 | extern std::unique_ptr solutionBuildModule(LLVMContext &); 9 | 10 | int main(int argc, char **argv) { 11 | LLVMContext Ctxt; 12 | bool hadError = false; 13 | for (int i = 0; i != 2; ++i) { 14 | bool isRefImpl = i == 0; 15 | std::unique_ptr CurModule = 16 | isRefImpl ? solutionBuildModule(Ctxt) : myBuildModule(Ctxt); 17 | const char *msg = isRefImpl ? "Reference" : "Your solution"; 18 | 19 | outs() << "\n\n## Processing module from " << msg << " implementation\n"; 20 | if (!CurModule) { 21 | outs() << "Nothing built\n"; 22 | continue; 23 | } 24 | 25 | CurModule->print(errs(), /*AssemblyAnnotationWriter=*/nullptr); 26 | // verifyModule returns true if it finds errors and 27 | // print them on the provided output stream (errs() here). 28 | if (verifyModule(*CurModule, &errs())) { 29 | errs() << msg << " does not verify\n"; 30 | hadError |= true; 31 | } 32 | } 33 | 34 | return !hadError; 35 | } 36 | -------------------------------------------------------------------------------- /ch3/llvm_ir/solution/populate_function.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/ADT/ArrayRef.h" 2 | #include "llvm/IR/BasicBlock.h" 3 | #include "llvm/IR/Constants.h" // For ConstantInt. 4 | #include "llvm/IR/DerivedTypes.h" // For PointerType, FunctionType. 5 | #include "llvm/IR/Function.h" 6 | #include "llvm/IR/IRBuilder.h" 7 | #include "llvm/IR/LLVMContext.h" 8 | #include "llvm/IR/Module.h" 9 | #include "llvm/IR/Type.h" 10 | #include "llvm/Support/Debug.h" // For errs(). 11 | 12 | #include // For unique_ptr 13 | 14 | using namespace llvm; 15 | 16 | // The goal of this function is to build a Module that 17 | // represents the lowering of the following foo, a C function: 18 | // extern int baz(); 19 | // extern void bar(int); 20 | // void foo(int a, int b) { 21 | // int var = a + b; 22 | // if (var == 0xFF) { 23 | // bar(var); 24 | // var = baz(); 25 | // } 26 | // bar(var); 27 | // } 28 | // 29 | // The IR for this snippet (at O0) is: 30 | // define void @foo(i32 %arg, i32 %arg1) { 31 | // bb: 32 | // %i = alloca i32 33 | // %i2 = alloca i32 34 | // %i3 = alloca i32 35 | // store i32 %arg, ptr %i 36 | // store i32 %arg1, ptr %i2 37 | // %i4 = load i32, ptr %i 38 | // %i5 = load i32, ptr %i2 39 | // %i6 = add i32 %i4, %i5 40 | // store i32 %i6, ptr %i3 41 | // %i7 = load i32, ptr %i3 42 | // %i8 = icmp eq i32 %i7, 255 43 | // br i1 %i8, label %bb9, label %bb12 44 | // 45 | // bb9: 46 | // %i10 = load i32, ptr %i3 47 | // call void @bar(i32 %i10) 48 | // %i11 = call i32 @baz() 49 | // store i32 %i11, ptr %i3 50 | // br label %bb12 51 | // 52 | // bb12: 53 | // %i13 = load i32, ptr %i3 54 | // call void @bar(i32 %i13) 55 | // ret void 56 | // } 57 | // 58 | // declare void @bar(i32) 59 | // declare i32 @baz(...) 60 | std::unique_ptr solutionBuildModule(LLVMContext &Ctxt) { 61 | // Create the types that we will use over and over; 62 | Type *Int32Ty = Type::getInt32Ty(Ctxt); 63 | Type *VoidTy = Type::getVoidTy(Ctxt); 64 | Type *PtrTy = PointerType::get(Ctxt, /*AddrSpace=*/0); 65 | 66 | // Create the high level module. 67 | std::unique_ptr MyModule = 68 | std::make_unique("Solution Module", Ctxt); 69 | 70 | // Populate all the functions (just declaration for now.) 71 | // Starting with baz. 72 | FunctionType *BazTy = 73 | FunctionType::get(/*RetTy=*/Int32Ty, /*isVarArg=*/false); 74 | Function *BazFunc = 75 | cast(MyModule->getOrInsertFunction("baz", BazTy).getCallee()); 76 | 77 | // bar. 78 | FunctionType *BarTy = 79 | FunctionType::get(VoidTy, /*ArgsTy=*/ArrayRef(Int32Ty), false); 80 | Function *BarFunc = 81 | cast(MyModule->getOrInsertFunction("bar", BarTy).getCallee()); 82 | 83 | // foo. 84 | FunctionType *FooTy = 85 | FunctionType::get(VoidTy, /*ArgsTy*/ ArrayRef({Int32Ty, Int32Ty}), false); 86 | Function *FooFunc = 87 | cast(MyModule->getOrInsertFunction("foo", FooTy).getCallee()); 88 | 89 | // Next, create the structure for foo. 90 | BasicBlock *BB = BasicBlock::Create(Ctxt, /*Name=*/"bb", /*Parent=*/FooFunc); 91 | BasicBlock *BB9 = 92 | BasicBlock::Create(Ctxt, /*Name=*/"bb9", /*Parent=*/FooFunc); 93 | BasicBlock *BB12 = 94 | BasicBlock::Create(Ctxt, /*Name=*/"bb12", /*Parent=*/FooFunc); 95 | 96 | // Populate bb. 97 | IRBuilder Builder(BB); 98 | // Allocate stack space for the local variables. 99 | Value *I = Builder.CreateAlloca(Int32Ty); 100 | Value *I2 = Builder.CreateAlloca(Int32Ty); 101 | Value *I3 = Builder.CreateAlloca(Int32Ty); 102 | // Get arg and arg1 from foo. 103 | Value *Arg = FooFunc->getArg(0); 104 | Value *Arg1 = FooFunc->getArg(1); 105 | // Store them in the "local" variables. 106 | Builder.CreateStore(Arg, I); 107 | Builder.CreateStore(Arg1, I2); 108 | // Reload from the local variables. 109 | Value *I4 = Builder.CreateLoad(Int32Ty, I); 110 | Value *I5 = Builder.CreateLoad(Int32Ty, I2); 111 | // Do the add. 112 | Value *I6 = Builder.CreateAdd(I4, I5); 113 | // Store to local variable i3. 114 | Builder.CreateStore(I6, I3); 115 | // Reload from i3 (now you understand why O0 is slow!!) 116 | Value *I7 = Builder.CreateLoad(Int32Ty, I3); 117 | // Compare. 118 | Value *Cst255 = ConstantInt::get(Int32Ty, 255); 119 | Value *I8 = Builder.CreateICmpEQ(I7, Cst255); 120 | // Then jump. 121 | Builder.CreateCondBr(I8, BB9, BB12); 122 | 123 | // Populate bb9. 124 | // Reset the builder on the next basic block. 125 | Builder.SetInsertPoint(BB9); 126 | // Reload the local variable i3. 127 | Value *I10 = Builder.CreateLoad(Int32Ty, I3); 128 | // Call bar with i10. 129 | Builder.CreateCall(BarFunc->getFunctionType(), BarFunc, ArrayRef(I10)); 130 | // Call baz. 131 | Value *I11 = Builder.CreateCall(BazFunc->getFunctionType(), BazFunc); 132 | // Store the result in the local variable i3. 133 | Builder.CreateStore(I11, I3); 134 | // Jump to the next block. 135 | Builder.CreateBr(BB12); 136 | 137 | // Populate bb12. 138 | // Reset the builder on the next basic block. 139 | Builder.SetInsertPoint(BB12); 140 | // Reload the local variable I3. 141 | Value *I13 = Builder.CreateLoad(Int32Ty, I3); 142 | // Call bar on i13. 143 | Builder.CreateCall(BarFunc->getFunctionType(), BarFunc, ArrayRef(I13)); 144 | // Create the final return. 145 | // Remember all basic block must end with a terminator. 146 | Builder.CreateRetVoid(); 147 | 148 | return MyModule; 149 | } 150 | -------------------------------------------------------------------------------- /ch3/llvm_ir/your_turn/populate_function.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/ADT/ArrayRef.h" 2 | #include "llvm/IR/BasicBlock.h" 3 | #include "llvm/IR/Constants.h" // For ConstantInt. 4 | #include "llvm/IR/DerivedTypes.h" // For PointerType, FunctionType. 5 | #include "llvm/IR/Function.h" 6 | #include "llvm/IR/IRBuilder.h" 7 | #include "llvm/IR/LLVMContext.h" 8 | #include "llvm/IR/Module.h" 9 | #include "llvm/IR/Type.h" 10 | #include "llvm/Support/Debug.h" // For errs(). 11 | 12 | #include // For unique_ptr 13 | 14 | using namespace llvm; 15 | 16 | // The goal of this function is to build a Module that 17 | // represents the lowering of the following foo, a C function: 18 | // extern int baz(); 19 | // extern void bar(int); 20 | // void foo(int a, int b) { 21 | // int var = a + b; 22 | // if (var == 0xFF) { 23 | // bar(var); 24 | // var = baz(); 25 | // } 26 | // bar(var); 27 | // } 28 | // 29 | // The IR for this snippet (at O0) is: 30 | // define void @foo(i32 %arg, i32 %arg1) { 31 | // bb: 32 | // %i = alloca i32 33 | // %i2 = alloca i32 34 | // %i3 = alloca i32 35 | // store i32 %arg, ptr %i 36 | // store i32 %arg1, ptr %i2 37 | // %i4 = load i32, ptr %i 38 | // %i5 = load i32, ptr %i2 39 | // %i6 = add i32 %i4, %i5 40 | // store i32 %i6, ptr %i3 41 | // %i7 = load i32, ptr %i3 42 | // %i8 = icmp eq i32 %i7, 255 43 | // br i1 %i8, label %bb9, label %bb12 44 | // 45 | // bb9: 46 | // %i10 = load i32, ptr %i3 47 | // call void @bar(i32 %i10) 48 | // %i11 = call i32 @baz() 49 | // store i32 %i11, ptr %i3 50 | // br label %bb12 51 | // 52 | // bb12: 53 | // %i13 = load i32, ptr %i3 54 | // call void @bar(i32 %i13) 55 | // ret void 56 | // } 57 | // 58 | // declare void @bar(i32) 59 | // declare i32 @baz(...) 60 | std::unique_ptr myBuildModule(LLVMContext &Ctxt) { return nullptr; } 61 | -------------------------------------------------------------------------------- /ch3/machineir/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH3 6 | LANGUAGES CXX C) 7 | 8 | include(../../cmake/utils/set-llvm-install-prefix.cmake) 9 | 10 | set(CURR_TARGET 11 | build_mir 12 | ) 13 | 14 | set(SRC 15 | "main.cpp" 16 | "solution/populate_function.cpp" 17 | "your_turn/populate_function.cpp" 18 | ) 19 | 20 | set(INC 21 | ) 22 | 23 | 24 | # Now build our tools 25 | add_executable(${CURR_TARGET} ${SRC} ${INC}) 26 | 27 | # Find the libraries that correspond to the LLVM components 28 | # that we wish to use 29 | llvm_map_components_to_libnames(llvm_libs support core AllTargetsCodeGens) 30 | 31 | # Link against LLVM libraries 32 | target_link_libraries(${CURR_TARGET} ${llvm_libs}) 33 | -------------------------------------------------------------------------------- /ch3/machineir/README.md: -------------------------------------------------------------------------------- 1 | # Building Machine IR # 2 | 3 | In this exercise, you will build your first function using the Machine intermediate representation (IR). 4 | 5 | Open `your_turn/populate_function.cpp` and implement the `populateMachineIR` function according to the comment 6 | in that file. 7 | 8 | We recommend to start by running the program once to see the output of the solution. 9 | 10 | This way you will get an idea of which opcodes you should use for your machine instructions. 11 | 12 | At the end, your produced IR should match what is printed between the `# Machine code for function solution_foo` and `# End machine code for function solution_foo.` lines. 13 | 14 | Your function is printed between the `# Machine code for function foo` and `# End machine code for function foo.` lines. 15 | 16 | ## Configuring your environment ## 17 | 18 | 19 | ```bash 20 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild . 21 | ninja -Cbuild 22 | ``` 23 | 24 | ## Running the example ## 25 | 26 | ```bash 27 | ./build/build_mir 28 | ``` 29 | 30 | ## Solution ## 31 | 32 | If your output doesn't match the solution, look at `solution/populate_function.cpp` to see how to implement the desired IR. 33 | -------------------------------------------------------------------------------- /ch3/machineir/main.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/CodeGen/CodeGenTargetMachineImpl.h" 2 | #include "llvm/CodeGen/MachineBasicBlock.h" 3 | #include "llvm/CodeGen/MachineFunction.h" 4 | #include "llvm/CodeGen/MachineModuleInfo.h" 5 | #include "llvm/CodeGen/Register.h" 6 | #include "llvm/CodeGen/TargetRegisterInfo.h" 7 | #include "llvm/CodeGen/TargetSubtargetInfo.h" 8 | #include "llvm/IR/DerivedTypes.h" 9 | #include "llvm/IR/Function.h" 10 | #include "llvm/IR/LLVMContext.h" 11 | #include "llvm/IR/Module.h" 12 | #include "llvm/MC/TargetRegistry.h" 13 | #include "llvm/Support/TargetSelect.h" // For InitializeAllTargets 14 | #include "llvm/Target/TargetMachine.h" 15 | #include "llvm/TargetParser/Triple.h" 16 | 17 | using namespace llvm; 18 | 19 | extern MachineFunction *solutionPopulateMachineIR(MachineModuleInfo &, 20 | llvm::Function &, Register, 21 | Register); 22 | extern MachineFunction *populateMachineIR(MachineModuleInfo &, 23 | llvm::Function &, Register, 24 | Register); 25 | 26 | bool checkFunctionCorrectness(MachineFunction *Res, Register W0, Register W1) { 27 | // Take care of the liveness since we did not explain how to do that. 28 | MachineBasicBlock *EntryBB = Res->empty() ? nullptr : &*Res->begin(); 29 | if (EntryBB) { 30 | EntryBB->addLiveIn(W0); 31 | EntryBB->addLiveIn(W1); 32 | } 33 | Res->print(errs()); 34 | if (!Res->verify()) { 35 | errs() << Res->getName() << " does not verify\n"; 36 | return false; 37 | } 38 | return true; 39 | } 40 | 41 | int main() { 42 | // We have to initialize all the targets to get the registry initialized. 43 | InitializeAllTargets(); 44 | // We need the MC layer as well to query the register information. 45 | InitializeAllTargetMCs(); 46 | 47 | auto TT(Triple::normalize("aarch64--")); 48 | std::string Error; 49 | const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error); 50 | if (!TheTarget) { 51 | errs() << TT << " is not available with this build of LLVM\n"; 52 | return -1; 53 | } 54 | auto *LLVMTM = static_cast( 55 | TheTarget->createTargetMachine(TT, "", "", TargetOptions(), std::nullopt, 56 | std::nullopt, CodeGenOptLevel::Default)); 57 | MachineModuleInfoWrapperPass MMIWP(LLVMTM); 58 | LLVMContext Context; 59 | Module MyModule("MyModule", Context); 60 | MyModule.setDataLayout(LLVMTM->createDataLayout()); 61 | 62 | Function *SolutionFoo = Function::Create( 63 | FunctionType::get(Type::getVoidTy(Context), /*IsVarArg=*/false), 64 | Function::ExternalLinkage, "solution_foo", MyModule); 65 | const TargetSubtargetInfo *STI = LLVMTM->getSubtargetImpl(*SolutionFoo); 66 | const TargetRegisterInfo *TRI = STI->getRegisterInfo(); 67 | 68 | // Find the indices for W0 and W1. 69 | // Since we are not in AArch64 library we don't have access to the AArch64::W0 70 | // enums. 71 | StringRef W0Str = "W0"; 72 | StringRef W1Str = "W1"; 73 | Register W0 = 0; 74 | Register W1 = 0; 75 | for (unsigned i = 1, e = TRI->getNumRegs(); i != e && (!W0 || !W1); ++i) { 76 | if (!W0 && W0Str == TRI->getName(i)) { 77 | W0 = i; 78 | continue; 79 | } 80 | if (!W1 && W1Str == TRI->getName(i)) { 81 | W1 = i; 82 | continue; 83 | } 84 | } 85 | 86 | if (!W0 || !W1) { 87 | errs() << "Failed to found physical registers w0 and w1\n"; 88 | return -1; 89 | } 90 | 91 | MachineFunction *Res = 92 | solutionPopulateMachineIR(MMIWP.getMMI(), *SolutionFoo, W0, W1); 93 | bool solutionIsCorrect = checkFunctionCorrectness(Res, W0, W1); 94 | 95 | Function *Foo = Function::Create( 96 | FunctionType::get(Type::getVoidTy(Context), /*IsVarArg=*/false), 97 | Function::ExternalLinkage, "foo", MyModule); 98 | 99 | MachineFunction *YourTurnRes = 100 | populateMachineIR(MMIWP.getMMI(), *Foo, W0, W1); 101 | bool yourTurnIsCorrect = checkFunctionCorrectness(YourTurnRes, W0, W1); 102 | 103 | 104 | return !(solutionIsCorrect && yourTurnIsCorrect); 105 | } 106 | -------------------------------------------------------------------------------- /ch3/machineir/solution/populate_function.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 2 | #include "llvm/CodeGen/MachineFrameInfo.h" // For CreateStackObject. 3 | #include "llvm/CodeGen/MachineFunction.h" 4 | #include "llvm/CodeGen/MachineMemOperand.h" // For MachinePointerInfo. 5 | #include "llvm/CodeGen/MachineModuleInfo.h" 6 | #include "llvm/CodeGen/MachineRegisterInfo.h" 7 | #include "llvm/CodeGen/Register.h" 8 | #include "llvm/CodeGen/TargetOpcodes.h" 9 | #include "llvm/CodeGenTypes/LowLevelType.h" // For LLT. 10 | #include "llvm/IR/Function.h" 11 | #include "llvm/IR/InstrTypes.h" // For ICMP_EQ. 12 | 13 | using namespace llvm; 14 | 15 | // The goal of this function is to build a MachineFunction that 16 | // represents the lowering of the following foo, a C function: 17 | // extern int baz(); 18 | // extern void bar(int); 19 | // void foo(int a, int b) { 20 | // int var = a + b; 21 | // if (var == 0xFF) { 22 | // bar(var); 23 | // var = baz(); 24 | // } 25 | // bar(var); 26 | // } 27 | // 28 | // The proposed ABI is: 29 | // - 32-bit arguments are passed through registers: w0, w1 30 | // - 32-bit returned values are passed through registers: w0, w1 31 | // w0 and w1 are given as argument of this Function. 32 | // 33 | // The local variable named var is expected to live on the stack. 34 | MachineFunction *solutionPopulateMachineIR(MachineModuleInfo &MMI, 35 | Function &Foo, Register W0, 36 | Register W1) { 37 | MachineFunction &MF = MMI.getOrCreateMachineFunction(Foo); 38 | // Create the 3 basic blocks that compose Foo. 39 | MachineBasicBlock *EntryBB = MF.CreateMachineBasicBlock(); 40 | MF.push_back(EntryBB); 41 | MachineBasicBlock *ThenBB = MF.CreateMachineBasicBlock(); 42 | MF.push_back(ThenBB); 43 | MachineBasicBlock *ExitBB = MF.CreateMachineBasicBlock(); 44 | MF.push_back(ExitBB); 45 | 46 | // Create the configuration of the CFG. 47 | EntryBB->addSuccessor(ThenBB); 48 | EntryBB->addSuccessor(ExitBB); 49 | ThenBB->addSuccessor(ExitBB); 50 | 51 | // The type for bool. 52 | LLT I1 = LLT::scalar(1); 53 | // The type of var. 54 | LLT I32 = LLT::scalar(32); 55 | MachinePointerInfo PtrInfo; 56 | Align VarStackAlign(4); 57 | // The type for the address of var. 58 | LLT VarAddrLLT = LLT::pointer(/*AddressSpace=*/0, /*SizeInBits=*/64); 59 | // The stack slot for var. 60 | int FrameIndex = MF.getFrameInfo().CreateStackObject(32, VarStackAlign, 61 | /*IsSpillSlot=*/false); 62 | 63 | // Populate entry. 64 | MachineIRBuilder MIBuilder(*EntryBB, EntryBB->end()); 65 | // Get the input arguments. 66 | Register A = MIBuilder.buildCopy(I32, W0).getReg(0); 67 | Register B = MIBuilder.buildCopy(I32, W1).getReg(0); 68 | // Get the stack slot for var. 69 | Register VarStackAddr = 70 | MIBuilder.buildFrameIndex(VarAddrLLT, FrameIndex).getReg(0); 71 | // Add. 72 | Register ResAdd = MIBuilder.buildAdd(I32, A, B).getReg(0); 73 | // Write result to var's address. 74 | MIBuilder.buildStore(ResAdd, VarStackAddr, PtrInfo, VarStackAlign); 75 | // Build the ICmp 76 | Register Cst0xFF = MIBuilder.buildConstant(I32, 0xFF).getReg(0); 77 | Register ReloadedVar0 = 78 | MIBuilder.buildLoad(I32, VarStackAddr, PtrInfo, VarStackAlign).getReg(0); 79 | Register Cmp = 80 | MIBuilder.buildICmp(CmpInst::ICMP_EQ, I1, ReloadedVar0, Cst0xFF) 81 | .getReg(0); 82 | // Conditional branch. 83 | // If true jump to ThenBB. 84 | MIBuilder.buildBrCond(Cmp, *ThenBB); 85 | // Otherwise jump to ExitBB; 86 | MIBuilder.buildBr(*ExitBB); 87 | 88 | // ThenBB 89 | // Reset MIBuilder to point at the end of ThenBB. 90 | MIBuilder.setInsertPt(*ThenBB, ThenBB->end()); 91 | // Put var in W0 for the call to bar. 92 | Register ReloadedVar1 = 93 | MIBuilder.buildLoad(I32, VarStackAddr, PtrInfo, VarStackAlign).getReg(0); 94 | MIBuilder.buildCopy(W0, ReloadedVar1); 95 | // Fake call to bar. 96 | MIBuilder.buildInstr(TargetOpcode::INLINEASM, {}, {}) 97 | .addExternalSymbol("bl @bar") 98 | .addImm(0) 99 | .addReg(W0, RegState::Implicit); 100 | // Fake call to baz. 101 | MIBuilder.buildInstr(TargetOpcode::INLINEASM, {}, {}) 102 | .addExternalSymbol("bl @baz") 103 | .addImm(0) 104 | .addReg(W0, RegState::Implicit | RegState::Define); 105 | // Copy the result of baz to var. 106 | Register ResOfBaz = MIBuilder.buildCopy(I32, W0).getReg(0); 107 | MIBuilder.buildStore(ResOfBaz, VarStackAddr, PtrInfo, VarStackAlign); 108 | // Fallthrough to exit BB, no need for a terminator 109 | 110 | // ExitBB 111 | // Reset MIBuilder to point at the end of ExitBB. 112 | MIBuilder.setInsertPt(*ExitBB, ExitBB->end()); 113 | // Put var in W0 for the call to bar. 114 | Register ReloadedVar2 = 115 | MIBuilder.buildLoad(I32, VarStackAddr, PtrInfo, VarStackAlign).getReg(0); 116 | MIBuilder.buildCopy(W0, ReloadedVar2); 117 | // Fake call to bar. 118 | MIBuilder.buildInstr(TargetOpcode::INLINEASM, {}, {}) 119 | .addExternalSymbol("bl @bar") 120 | .addImm(0) 121 | .addReg(W0, RegState::Implicit); 122 | // End of the function, return void; 123 | MIBuilder.buildInstr(TargetOpcode::INLINEASM, {}, {}) 124 | .addExternalSymbol("ret") 125 | .addImm(0); 126 | return &MF; 127 | } 128 | -------------------------------------------------------------------------------- /ch3/machineir/your_turn/populate_function.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 2 | #include "llvm/CodeGen/MachineFrameInfo.h" // For CreateStackObject. 3 | #include "llvm/CodeGen/MachineFunction.h" 4 | #include "llvm/CodeGen/MachineMemOperand.h" // For MachinePointerInfo. 5 | #include "llvm/CodeGen/MachineModuleInfo.h" 6 | #include "llvm/CodeGen/MachineRegisterInfo.h" 7 | #include "llvm/CodeGen/Register.h" 8 | #include "llvm/CodeGen/TargetOpcodes.h" // For INLINEASM. 9 | #include "llvm/CodeGenTypes/LowLevelType.h" // For LLT. 10 | #include "llvm/IR/Function.h" 11 | #include "llvm/IR/InstrTypes.h" // For ICMP_EQ. 12 | 13 | using namespace llvm; 14 | 15 | // The goal of this function is to build a MachineFunction that 16 | // represents the lowering of the following foo, a C function: 17 | // extern int baz(); 18 | // extern void bar(int); 19 | // void foo(int a, int b) { 20 | // int var = a + b; 21 | // if (var == 0xFF) { 22 | // bar(var); 23 | // var = baz(); 24 | // } 25 | // bar(var); 26 | // } 27 | // 28 | // The proposed ABI is: 29 | // - 32-bit arguments are passed through registers: w0, w1 30 | // - 32-bit returned values are passed through registers: w0, w1 31 | // w0 and w1 are given as argument of this Function. 32 | // 33 | // The local variable named var is expected to live on the stack. 34 | MachineFunction *populateMachineIR(MachineModuleInfo &MMI, Function &Foo, 35 | Register W0, Register W1) { 36 | MachineFunction &MF = MMI.getOrCreateMachineFunction(Foo); 37 | 38 | // The type for bool. 39 | LLT I1 = LLT::scalar(1); 40 | // The type of var. 41 | LLT I32 = LLT::scalar(32); 42 | 43 | // To use to create load and store for var. 44 | MachinePointerInfo PtrInfo; 45 | Align VarStackAlign(4); 46 | 47 | // The type for the address of var. 48 | LLT VarAddrLLT = LLT::pointer(/*AddressSpace=*/0, /*SizeInBits=*/64); 49 | 50 | // The stack slot for var. 51 | int FrameIndex = MF.getFrameInfo().CreateStackObject(32, VarStackAlign, 52 | /*IsSpillSlot=*/false); 53 | 54 | // TODO: Populate MF. 55 | return &MF; 56 | } 57 | -------------------------------------------------------------------------------- /ch4/implicit_func_scope_change/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH4 6 | LANGUAGES CXX C) 7 | 8 | set(CURR_TARGET 9 | implicit_change_of_scope 10 | ) 11 | 12 | # Add the search path for LLVM. 13 | include(../../cmake/utils/set-llvm-install-prefix.cmake) 14 | 15 | set(SRC 16 | "main.cpp" 17 | ) 18 | 19 | set(INC 20 | ) 21 | 22 | set(CMAKE_CXX_FLAGS -fno-rtti) 23 | 24 | # Now build our tools 25 | add_executable(${CURR_TARGET} ${SRC} ${INC}) 26 | 27 | # Find the libraries that correspond to the LLVM components 28 | # that we wish to use 29 | llvm_map_components_to_libnames(llvm_libs core asmparser) 30 | 31 | # Link against LLVM libraries 32 | target_link_libraries(${CURR_TARGET} ${llvm_libs}) 33 | -------------------------------------------------------------------------------- /ch4/implicit_func_scope_change/README.md: -------------------------------------------------------------------------------- 1 | # Implicit change of scope # 2 | 3 | This example illustrates the implicit change of scope, meaning that when you follow the def-use chains of certain values you may jump around different functions. 4 | 5 | This is true unsurprisingly for instance with global variables, but it happens as well with constants. 6 | 7 | To see this in action, configure, build and run this example by following the next steps: 8 | ```bash 9 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild . 10 | ninja -Cbuild 11 | ./build/implicit_change_of_scope 12 | ``` 13 | 14 | Running `implicit_change_of_scope` will print some messages when jumping from one function to another. 15 | 16 | Look at the `InputIR` string in `main.cpp` to see if you can rebuild in your head the def-use chains and explain the change of scope from `foo` to`bar`. 17 | -------------------------------------------------------------------------------- /ch4/implicit_func_scope_change/main.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/AsmParser/Parser.h" // For parseAssemblyString. 2 | #include "llvm/IR/BasicBlock.h" 3 | #include "llvm/IR/Function.h" 4 | #include "llvm/IR/Instruction.h" 5 | #include "llvm/IR/Instructions.h" // For LoadInst. 6 | #include "llvm/IR/LLVMContext.h" 7 | #include "llvm/IR/Module.h" 8 | #include "llvm/IR/Use.h" 9 | #include "llvm/IR/User.h" 10 | #include "llvm/Support/Debug.h" // For errs(). 11 | #include "llvm/Support/SourceMgr.h" // For SMDiagnostic. 12 | 13 | using namespace llvm; 14 | 15 | const char *InputIR = 16 | "@global = external global ptr, align 8\n" 17 | "@other_global = local_unnamed_addr global ptr @global, align 8\n" 18 | "\n" 19 | "define signext i8 @foo() {\n" 20 | "bb:\n" 21 | " %i = load ptr, ptr @global, align 8\n" 22 | " %i1 = load i8, ptr %i, align 1\n" 23 | " ret i8 %i1\n" 24 | "}\n" 25 | "\n" 26 | "define signext i8 @bar() {\n" 27 | "bb:\n" 28 | " %i = load ptr, ptr @global, align 8\n" 29 | " %i1 = load i8, ptr %i, align 1\n" 30 | " ret i8 %i1\n" 31 | "}\n"; 32 | 33 | int main() { 34 | LLVMContext Context; 35 | SMDiagnostic Err; 36 | std::unique_ptr MyModule = parseAssemblyString(InputIR, Err, Context); 37 | Function *BarFunc = MyModule->getFunction("bar"); 38 | 39 | BasicBlock &Entry = *BarFunc->begin(); 40 | 41 | auto &BarRes = *cast(Entry.begin()); 42 | 43 | Value *Global = BarRes.getOperand(0); 44 | for (User *UserOfGlobal : Global->users()) { 45 | auto *UserInstr = dyn_cast(UserOfGlobal); 46 | if (!UserInstr) { 47 | errs() << "Found a non-instruction use of global: " << *UserOfGlobal 48 | << '\n'; 49 | continue; 50 | } 51 | Function *UserFunc = UserInstr->getParent()->getParent(); 52 | if (UserFunc != BarFunc) 53 | errs() << "Went from bar to " << UserFunc->getName() << '\n'; 54 | } 55 | return 0; 56 | } 57 | -------------------------------------------------------------------------------- /ch4/simple_cst_propagation/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH4 6 | LANGUAGES CXX C) 7 | 8 | include(../../cmake/utils/set-llvm-install-prefix.cmake) 9 | 10 | set(CURR_TARGET 11 | simple_cst_propagation 12 | ) 13 | 14 | set(SRC 15 | "main.cpp" 16 | "solution/populate_function.cpp" 17 | "your_turn/populate_function.cpp" 18 | ) 19 | 20 | set(INC 21 | ) 22 | 23 | set(CMAKE_CXX_FLAGS -fno-rtti) 24 | 25 | # Now build our tools 26 | add_executable(${CURR_TARGET} ${SRC} ${INC}) 27 | 28 | # Find the libraries that correspond to the LLVM components 29 | # that we wish to use 30 | llvm_map_components_to_libnames(llvm_libs support core transformutils) 31 | 32 | # Link against LLVM libraries 33 | target_link_libraries(${CURR_TARGET} ${llvm_libs}) 34 | -------------------------------------------------------------------------------- /ch4/simple_cst_propagation/README.md: -------------------------------------------------------------------------------- 1 | # Build your own simple constant propagation # 2 | 3 | In this exercise you need to populate the code in `your_turn/populate_function.cpp` by following the instruction given in the comments of the `myConstantPropagation` function. 4 | 5 | At any point in the development, you can build and test your code using the commands given below. 6 | 7 | If you get stuck, feel free to look at the reference implementation in `solution/populate_function.cpp`. 8 | 9 | When running the program you will see that it prints which implementation managed to transform the input IR. 10 | Try to beat the reference implementation by supporting more cases than it does! 11 | 12 | ## Configure your build directory ## 13 | 14 | ```bash 15 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild . 16 | ``` 17 | 18 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option). 19 | 20 | You must have a version of LLVM installed at `` for this to succeed. 21 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package. 22 | 23 | ## Build ## 24 | 25 | ```bash 26 | ninja -C build 27 | ``` 28 | 29 | This builds the default target in the build directory. 30 | 31 | This should produce in the `build` directory a binary named `simple_cst_propagation`. 32 | 33 | ## Run ## 34 | 35 | ```bash 36 | ./build/simple_cst_propagation [input.ll|.bc] 37 | ``` 38 | 39 | This will run both the reference implementation and your implementation on `input.ll` if specified or the default input if not. 40 | 41 | The run will apply both implementions to the input and will check whether an optimization happened, and if the resulting IR is correct. 42 | 43 | It will also report which implementation managed to optimize the input IR. 44 | 45 | To see how this is articulated, take a look at `main.cpp`. 46 | 47 | For each function in the input IR, the output will look like this: 48 | ``` 49 | Processing function '' 50 | [The input IR for function ] 51 | 52 | ## Reference implementation 53 | [Resulting IR after the reference optimization] 54 | 55 | 56 | ## Your implementation 57 | [Resulting IR after your optimization] 58 | 59 | 60 | 61 | 62 | ###### 63 | ``` 64 | 65 | The message after each processing will tell you if you did better than the reference implementation, i.e., you transformed the input IR and the reference implementation did not. 66 | 67 | ## Producing an input ## 68 | 69 | Using the following command line, you can produce from a C file an input to give to your program: 70 | ```bash 71 | clang -o - -S -emit-llvm test.c -O0 | sed -e 's#optnone##g' | /bin/opt -S -passes=mem2reg,instnamer > input.ll 72 | ``` 73 | 74 | optnone => remove the attribute that prevents optimizations 75 | mem2reg => get rid of stack accesses / build SSA 76 | instnamer => get rid of the implicit variables 77 | -------------------------------------------------------------------------------- /ch4/simple_cst_propagation/input.c: -------------------------------------------------------------------------------- 1 | int foo(int tst) { 2 | int a = 3 + 2; 3 | a <<= 3; 4 | if (tst) 5 | a /= 5; 6 | else 7 | a |= 0xF0F; 8 | return a; 9 | } 10 | 11 | int bar(int tst) { 12 | unsigned a = 0xFFFFFFFF; 13 | a <<= 3; 14 | if (tst) 15 | a /= 3; 16 | else 17 | a |= 0xF0F; 18 | ++a; 19 | return (int)a; 20 | } 21 | 22 | -------------------------------------------------------------------------------- /ch4/simple_cst_propagation/main.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/AsmParser/Parser.h" // For parseAssemblyString. 2 | #include "llvm/IR/Function.h" 3 | #include "llvm/IR/LLVMContext.h" 4 | #include "llvm/IR/Module.h" 5 | #include "llvm/IR/Verifier.h" 6 | #include "llvm/IRReader/IRReader.h" // For parseIRFile. 7 | #include "llvm/Support/Debug.h" // For errs(). 8 | #include "llvm/Support/SourceMgr.h" // For SMDiagnostic. 9 | #include "llvm/Transforms/Utils/Cloning.h" // For CloneFunction. 10 | 11 | using namespace llvm; 12 | 13 | extern bool myConstantPropagation(llvm::Function &); 14 | extern bool solutionConstantPropagation(llvm::Function &); 15 | 16 | bool checkFunctionCorrectness(llvm::Function &Res) { 17 | Res.print(errs()); 18 | // verifyFunction returns true if it finds errors and 19 | // print them on the provided output stream (errs() here). 20 | if (verifyFunction(Res, &errs())) { 21 | errs() << Res.getName() << " does not verify\n"; 22 | return false; 23 | } 24 | return true; 25 | } 26 | 27 | // Default input in case no file was provided. 28 | const char *InputIR = 29 | "define i32 @foo(i32 noundef %arg) {\n" 30 | "bb:\n" 31 | " %i = shl i32 5, 3\n" 32 | " %i1 = icmp ne i32 %arg, 0\n" 33 | " br i1 %i1, label %bb2, label %bb4\n" 34 | "\n" 35 | "bb2: ; preds = %bb\n" 36 | " %i3 = sdiv i32 %i, 5\n" 37 | " br label %bb6\n" 38 | "\n" 39 | "bb4: ; preds = %bb\n" 40 | " %i5 = or i32 %i, 3855\n" 41 | " br label %bb6\n" 42 | "\n" 43 | "bb6: ; preds = %bb4, %bb2\n" 44 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n" 45 | " ret i32 %.0\n" 46 | "}\n" 47 | "\n" 48 | "define i32 @bar(i32 noundef %arg) {\n" 49 | "bb:\n" 50 | " %i = shl i32 -1, 3\n" 51 | " %i1 = icmp ne i32 %arg, 0\n" 52 | " br i1 %i1, label %bb2, label %bb4\n" 53 | "\n" 54 | "bb2: ; preds = %bb\n" 55 | " %i3 = udiv i32 %i, 3\n" 56 | " br label %bb6\n" 57 | "\n" 58 | "bb4: ; preds = %bb\n" 59 | " %i5 = or i32 %i, 3855\n" 60 | " br label %bb6\n" 61 | "\n" 62 | "bb6: ; preds = %bb4, %bb2\n" 63 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n" 64 | " %i7 = add i32 %.0, 1\n" 65 | " ret i32 %i7\n" 66 | "}\n"; 67 | 68 | int main(int argc, char **argv) { 69 | LLVMContext Context; 70 | SMDiagnostic Err; 71 | std::unique_ptr MyModule; 72 | // To be able to play with the optimization a little bit, 73 | // support a mode where you can feed your own IR files. 74 | if (argc == 2) { 75 | outs() << "Reading module from '" << argv[1] << "'\n"; 76 | MyModule = parseIRFile(argv[1], Err, Context); 77 | } else { 78 | MyModule = parseAssemblyString(InputIR, Err, Context); 79 | } 80 | if (!MyModule) { 81 | errs() << "Unable to build module\n"; 82 | return -1; 83 | } 84 | 85 | // Put all the functions in a worklist and process them afterwards. 86 | // We do that because we clone the functions on the fly to have the 87 | // same input to both the reference implementation and the user provided 88 | // solution and we don't want to process cloned functions afterwards and 89 | // clone them and so on. 90 | SmallVector Worklist; 91 | for (Function &Func : *MyModule) 92 | Worklist.push_back(&Func); 93 | 94 | // Map for the cloned functions. 95 | ValueToValueMapTy VMap; 96 | bool hadError = false; 97 | for (Function *Func : Worklist) { 98 | outs() << "Processing function '" << Func->getName() << '\n'; 99 | Func->print(outs()); 100 | 101 | // Clone the function before the optimization to make sure each 102 | // implementation sees the same input. 103 | auto *ClonedFunc = CloneFunction(Func, VMap); 104 | 105 | outs() << "\n\n## Reference implementation\n"; 106 | bool solutionDidSomething = solutionConstantPropagation(*ClonedFunc); 107 | bool solutionIsCorrect = checkFunctionCorrectness(*ClonedFunc); 108 | 109 | outs() << "\n\n## Your implementation\n"; 110 | bool yourTurnDidSomething = myConstantPropagation(*Func); 111 | bool yourTurnIsCorrect = checkFunctionCorrectness(*Func); 112 | 113 | if (!(solutionIsCorrect && yourTurnIsCorrect)) { 114 | hadError = true; 115 | errs() << "Solution does not verify:\n" 116 | "- provided implementation(" 117 | << (solutionIsCorrect ? "passed" : "failed") 118 | << ")\n" 119 | " - your implementation(" 120 | << (yourTurnIsCorrect ? "passed" : "failed") << ")\n"; 121 | } 122 | 123 | outs() << '\n'; 124 | if (solutionDidSomething) { 125 | if (!yourTurnDidSomething) 126 | outs() 127 | << "The solution managed to optimize something but you did not\n"; 128 | else 129 | outs() << "Both your solution and the provided solution did something, " 130 | "which one is better?\n"; 131 | } else if (yourTurnDidSomething) { 132 | outs() << "Nice! You managed to optimize something whereas the provided " 133 | "solution did not!"; 134 | } 135 | outs() << "######\n"; 136 | } 137 | 138 | return !hadError; 139 | } 140 | -------------------------------------------------------------------------------- /ch4/simple_cst_propagation/solution/populate_function.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/ADT/APInt.h" 2 | #include "llvm/ADT/PostOrderIterator.h" // For ReversePostOrderTraversal. 3 | #include "llvm/IR/BasicBlock.h" 4 | #include "llvm/IR/CFG.h" // To instantiate RPOTraversal. 5 | #include "llvm/IR/Constants.h" // For ConstantInt. 6 | #include "llvm/IR/Function.h" 7 | #include "llvm/IR/InstrTypes.h" // For BinaryOperator, etc. 8 | #include "llvm/IR/Instruction.h" 9 | #include "llvm/IR/LLVMContext.h" 10 | #include "llvm/IR/Module.h" 11 | #include "llvm/Support/Debug.h" // For errs(). 12 | 13 | #include 14 | 15 | using namespace llvm; 16 | 17 | // Helper function to deal with binary instructions. 18 | static Value *visitBinary(Instruction &Instr, LLVMContext &Ctxt, 19 | std::optional (*Computation)(const APInt &, 20 | const APInt &)) { 21 | assert(isa(Instr) && "This is meant for binary instruction"); 22 | 23 | auto *LHS = dyn_cast(Instr.getOperand(0)); 24 | auto *RHS = dyn_cast(Instr.getOperand(1)); 25 | if (!LHS || !RHS) 26 | return nullptr; 27 | 28 | // FIXME: Technically this API is not precise enough because we may want to 29 | // produce poison value for e.g., a division by zero. 30 | std::optional Res = Computation(LHS->getValue(), RHS->getValue()); 31 | if (!Res.has_value()) 32 | return nullptr; 33 | auto NewConstant = ConstantInt::get(Ctxt, *Res); 34 | return NewConstant; 35 | } 36 | 37 | // Takes \p Foo and apply a simple constant propagation optimization. 38 | // \returns true if \p Foo was modified (i.e., something had been constant 39 | // propagated), false otherwise. 40 | bool solutionConstantPropagation(Function &Foo) { 41 | // RPO construction chokes on empty functions instead of producing an empty 42 | // traversal. 43 | // Just skip such entry. 44 | if (Foo.empty()) 45 | return false; 46 | 47 | LLVMContext &Ctxt = Foo.getParent()->getContext(); 48 | bool MadeChanges = false; 49 | 50 | ReversePostOrderTraversal RPOT(&Foo); 51 | for (BasicBlock *BB : RPOT) { 52 | // Early increment to be able to remove the instruction that we replaced 53 | // on-the-fly. The alternative is to accumulate the instructions to remove 54 | // in a worklist and delete them afterwards. 55 | for (Instruction &Instr : make_early_inc_range(*BB)) { 56 | Value *NewConstant = nullptr; 57 | switch (Instr.getOpcode()) { 58 | case Instruction::Add: 59 | NewConstant = visitBinary( 60 | Instr, Ctxt, 61 | [](const APInt &A, const APInt &B) -> std::optional { 62 | return A + B; 63 | }); 64 | break; 65 | case Instruction::Sub: 66 | NewConstant = visitBinary( 67 | Instr, Ctxt, 68 | [](const APInt &A, const APInt &B) -> std::optional { 69 | return A - B; 70 | }); 71 | break; 72 | case Instruction::Mul: 73 | NewConstant = visitBinary( 74 | Instr, Ctxt, 75 | [](const APInt &A, const APInt &B) -> std::optional { 76 | return A * B; 77 | }); 78 | break; 79 | case Instruction::SDiv: 80 | NewConstant = visitBinary( 81 | Instr, Ctxt, 82 | [](const APInt &A, const APInt &B) -> std::optional { 83 | if (B.isZero()) 84 | return std::nullopt; 85 | return A.sdiv(B); 86 | }); 87 | break; 88 | case Instruction::UDiv: 89 | NewConstant = visitBinary( 90 | Instr, Ctxt, 91 | [](const APInt &A, const APInt &B) -> std::optional { 92 | if (B.isZero()) 93 | return std::nullopt; 94 | return A.udiv(B); 95 | }); 96 | break; 97 | case Instruction::Shl: 98 | NewConstant = visitBinary( 99 | Instr, Ctxt, 100 | [](const APInt &A, const APInt &B) -> std::optional { 101 | return A.shl(B); 102 | }); 103 | break; 104 | case Instruction::LShr: 105 | NewConstant = visitBinary( 106 | Instr, Ctxt, 107 | [](const APInt &A, const APInt &B) -> std::optional { 108 | return A.lshr(B); 109 | }); 110 | break; 111 | case Instruction::AShr: 112 | NewConstant = visitBinary( 113 | Instr, Ctxt, 114 | [](const APInt &A, const APInt &B) -> std::optional { 115 | return A.ashr(B); 116 | }); 117 | break; 118 | case Instruction::And: 119 | NewConstant = visitBinary( 120 | Instr, Ctxt, 121 | [](const APInt &A, const APInt &B) -> std::optional { 122 | return A & B; 123 | }); 124 | break; 125 | case Instruction::Or: 126 | NewConstant = visitBinary( 127 | Instr, Ctxt, 128 | [](const APInt &A, const APInt &B) -> std::optional { 129 | return A | B; 130 | }); 131 | break; 132 | case Instruction::Xor: 133 | NewConstant = visitBinary( 134 | Instr, Ctxt, 135 | [](const APInt &A, const APInt &B) -> std::optional { 136 | return A ^ B; 137 | }); 138 | break; 139 | 140 | default: 141 | break; 142 | } 143 | if (NewConstant) { 144 | Instr.replaceAllUsesWith(NewConstant); 145 | Instr.eraseFromParent(); 146 | MadeChanges = true; 147 | } 148 | } 149 | } 150 | return MadeChanges; 151 | } 152 | -------------------------------------------------------------------------------- /ch4/simple_cst_propagation/your_turn/populate_function.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 2 | #include "llvm/CodeGen/MachineFrameInfo.h" // For CreateStackObject. 3 | #include "llvm/CodeGen/MachineFunction.h" 4 | #include "llvm/CodeGen/MachineMemOperand.h" // For MachinePointerInfo. 5 | #include "llvm/CodeGen/MachineModuleInfo.h" 6 | #include "llvm/CodeGen/MachineRegisterInfo.h" 7 | #include "llvm/CodeGen/Register.h" 8 | #include "llvm/CodeGen/TargetOpcodes.h" // For INLINEASM. 9 | #include "llvm/CodeGenTypes/LowLevelType.h" // For LLT. 10 | #include "llvm/IR/Function.h" 11 | #include "llvm/IR/InstrTypes.h" // For ICMP_EQ. 12 | 13 | using namespace llvm; 14 | 15 | // Takes \p Foo and apply a simple constant propagation optimization. 16 | // \returns true if \p Foo was modified (i.e., something had been constant 17 | // propagated), false otherwise. 18 | bool myConstantPropagation(Function &Foo) { 19 | // TODO: populate this function. 20 | return false; 21 | } 22 | -------------------------------------------------------------------------------- /ch5/your_first_pass/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH5 6 | LANGUAGES CXX C) 7 | 8 | include(../../cmake/utils/set-llvm-install-prefix.cmake) 9 | 10 | set(CURR_TARGET 11 | your_first_pass 12 | ) 13 | 14 | set(SRC 15 | "main.cpp" 16 | # We use the skeleton of the optimization we did in ch4 to create 17 | # our pass. 18 | "../../ch4/simple_cst_propagation/solution/populate_function.cpp" 19 | "solution/passWithNewPM.cpp" 20 | "solution/passWithLegacyPM.cpp" 21 | "your_turn/passWithNewPM.cpp" 22 | "your_turn/passWithLegacyPM.cpp" 23 | ) 24 | 25 | set(INC 26 | ) 27 | 28 | set(CMAKE_CXX_FLAGS -fno-rtti) 29 | 30 | # Now build our tools 31 | add_executable(${CURR_TARGET} ${SRC} ${INC}) 32 | 33 | # Find the libraries that correspond to the LLVM components 34 | # that we wish to use 35 | llvm_map_components_to_libnames(llvm_libs support core transformutils) 36 | 37 | # Link against LLVM libraries 38 | target_link_libraries(${CURR_TARGET} ${llvm_libs}) 39 | -------------------------------------------------------------------------------- /ch5/your_first_pass/README.md: -------------------------------------------------------------------------------- 1 | # Connect your first optimization pass # 2 | 3 | In this exercise you will learn to connect the optimization that we provided in Chapter 4 in both the legacy and the new pass manager. 4 | 5 | Go to `your_turn/passWithLegacyPM.cpp` and `your_turn/passWithNewPM.cpp` to implement the connection with the legacy pass manager and the new pass manager respectively. 6 | 7 | Then, update `your_turn/passWithNewPM.h` and set the macro `YOUR_TURN_IS_READY` to `1` to enable the processing of your optimizations. 8 | 9 | We put `TODO` comments in the places you have to update to materialize the connection. 10 | 11 | For the body of your optimization just call the provided `solutionConstantPropagation` function, which is the implementation of the simple constant propagation that we saw in Chapter 4. 12 | 13 | ## Configuring your build environment ## 14 | 15 | ```bash 16 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild . 17 | ``` 18 | 19 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option). 20 | 21 | You must have a version of LLVM installed at `` for this to succeed. 22 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package. 23 | 24 | ## Build ## 25 | 26 | ```bash 27 | ninja -C build 28 | ``` 29 | 30 | This builds the default target in the build directory. 31 | 32 | This should produce in the `build` directory a binary named `your_first_pass`. 33 | 34 | ## Run ## 35 | 36 | ```bash 37 | ./build/your_first_pass [input.ll|.bc] 38 | ``` 39 | 40 | This will run both the reference implementation and your implementation on `input.ll` if specified or the default input if not. 41 | 42 | The run will apply both implementions to the input and will check whether the resulting IR is still correct. 43 | 44 | To see how this is articulated, take a look at `main.cpp`. 45 | 46 | ## Solution ## 47 | 48 | Look into the `solution` directory to see how to implement the connection of your optimization with both pass manager. 49 | 50 | ## Producing an input ## 51 | 52 | Using the following command line, you can produce from a C file an input to give to your program: 53 | ```bash 54 | clang -o - -S -emit-llvm test.c -O0 | sed -e 's#optnone##g' | /bin/opt -S -passes=mem2reg,instnamer > input.ll 55 | ``` 56 | 57 | optnone => remove the attribute that prevents optimizations 58 | mem2reg => get rid of stack accesses / build SSA 59 | instnamer => get rid of the implicit variables 60 | -------------------------------------------------------------------------------- /ch5/your_first_pass/main.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/ADT/STLExtras.h" // For llvm::all_of. 2 | #include "llvm/AsmParser/Parser.h" // For parseAssemblyString. 3 | #include "llvm/IR/Function.h" 4 | #include "llvm/IR/LLVMContext.h" 5 | #include "llvm/IR/LegacyPassManager.h" // For legacy::PassManager. 6 | #include "llvm/IR/Module.h" 7 | #include "llvm/IR/PassInstrumentation.h" 8 | #include "llvm/IR/PassManager.h" // For the new PassManager. 9 | #include "llvm/IR/Verifier.h" 10 | #include "llvm/IRReader/IRReader.h" // For parseIRFile. 11 | #include "llvm/Support/Debug.h" // For errs(). 12 | #include "llvm/Support/SourceMgr.h" // For SMDiagnostic. 13 | #include "llvm/Transforms/Utils/Cloning.h" // For CloneModule. 14 | 15 | #include "solution/passWithNewPM.h" 16 | #include "your_turn/passWithNewPM.h" 17 | 18 | using namespace llvm; 19 | 20 | extern Pass *createSolutionPassForLegacyPM(); 21 | extern Pass *createYourTurnPassForLegacyPM(); 22 | 23 | bool checkFunctionCorrectness(const llvm::Function &Res) { 24 | Res.print(errs()); 25 | // verifyFunction returns true if it finds errors and 26 | // print them on the provided output stream (errs() here). 27 | if (verifyFunction(Res, &errs())) { 28 | errs() << Res.getName() << " does not verify\n"; 29 | return false; 30 | } 31 | return true; 32 | } 33 | 34 | // Default input in case no file was provided. 35 | const char *InputIR = 36 | "define i32 @foo(i32 noundef %arg) {\n" 37 | "bb:\n" 38 | " %i = shl i32 5, 3\n" 39 | " %i1 = icmp ne i32 %arg, 0\n" 40 | " br i1 %i1, label %bb2, label %bb4\n" 41 | "\n" 42 | "bb2: ; preds = %bb\n" 43 | " %i3 = sdiv i32 %i, 5\n" 44 | " br label %bb6\n" 45 | "\n" 46 | "bb4: ; preds = %bb\n" 47 | " %i5 = or i32 %i, 3855\n" 48 | " br label %bb6\n" 49 | "\n" 50 | "bb6: ; preds = %bb4, %bb2\n" 51 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n" 52 | " ret i32 %.0\n" 53 | "}\n" 54 | "\n" 55 | "define i32 @bar(i32 noundef %arg) {\n" 56 | "bb:\n" 57 | " %i = shl i32 -1, 3\n" 58 | " %i1 = icmp ne i32 %arg, 0\n" 59 | " br i1 %i1, label %bb2, label %bb4\n" 60 | "\n" 61 | "bb2: ; preds = %bb\n" 62 | " %i3 = udiv i32 %i, 3\n" 63 | " br label %bb6\n" 64 | "\n" 65 | "bb4: ; preds = %bb\n" 66 | " %i5 = or i32 %i, 3855\n" 67 | " br label %bb6\n" 68 | "\n" 69 | "bb6: ; preds = %bb4, %bb2\n" 70 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n" 71 | " %i7 = add i32 %.0, 1\n" 72 | " ret i32 %i7\n" 73 | "}\n"; 74 | 75 | int main(int argc, char **argv) { 76 | LLVMContext Context; 77 | SMDiagnostic Err; 78 | std::unique_ptr MyModule; 79 | // To be able to play with the optimization a little bit, 80 | // support a mode where you can feed your own IR files. 81 | if (argc == 2) { 82 | outs() << "Reading module from '" << argv[1] << "'\n"; 83 | MyModule = parseIRFile(argv[1], Err, Context); 84 | } else { 85 | MyModule = parseAssemblyString(InputIR, Err, Context); 86 | } 87 | if (!MyModule) { 88 | errs() << "Unable to build module\n"; 89 | return -1; 90 | } 91 | 92 | bool hadError = false; 93 | for (int i = 0; i < 2; ++i) { 94 | outs() << "Checking the run for " 95 | << ((i == 0) ? "Provided solution" : "Your turn") << "\n"; 96 | // Start with the legacy pass manager. 97 | std::unique_ptr ModuleForLegacyPM = CloneModule(*MyModule); 98 | 99 | legacy::PassManager LegacyPM; 100 | 101 | Pass *PassForLegacyPM = (i == 0) ? createSolutionPassForLegacyPM() 102 | : createYourTurnPassForLegacyPM(); 103 | if (PassForLegacyPM) 104 | LegacyPM.add(PassForLegacyPM); 105 | 106 | LegacyPM.run(*ModuleForLegacyPM); 107 | bool solutionIsCorrect = 108 | all_of(ModuleForLegacyPM->functions(), 109 | [](const Function &F) { return checkFunctionCorrectness(F); }); 110 | hadError |= !solutionIsCorrect; 111 | 112 | // Then do the same thing with the new pass manager. 113 | std::unique_ptr ModuleForNewPM = CloneModule(*MyModule); 114 | 115 | // CAREFUL the order of the manager is important here since the destructor 116 | // needs to be called in the right order otherwise it will crash. 117 | FunctionAnalysisManager FAM; 118 | ModuleAnalysisManager MAM; 119 | // Register the passes used implicitly at the start of the pipeline. 120 | MAM.registerPass([&] { return PassInstrumentationAnalysis(); }); 121 | // Well this one is not as implicit, we ask for it with the call to the 122 | // adaptor below. 123 | MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); }); 124 | FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); }); 125 | ModulePassManager NewPM; 126 | // Wrap the function pass into a module pass. 127 | if (i == 0) 128 | NewPM.addPass(createModuleToFunctionPassAdaptor( 129 | SolutionConstantPropagationNewPass())); 130 | #if YOUR_TURN_IS_READY 131 | else 132 | NewPM.addPass(createModuleToFunctionPassAdaptor( 133 | YourTurnConstantPropagationNewPass())); 134 | #endif 135 | NewPM.run(*ModuleForNewPM, MAM); 136 | 137 | solutionIsCorrect = 138 | all_of(ModuleForNewPM->functions(), 139 | [](const Function &F) { return checkFunctionCorrectness(F); }); 140 | hadError |= !solutionIsCorrect; 141 | } 142 | return !hadError; 143 | } 144 | -------------------------------------------------------------------------------- /ch5/your_first_pass/solution/passWithLegacyPM.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/Function.h" 2 | #include "llvm/Pass.h" // For FunctionPass & INITIALIZE_PASS. 3 | #include "llvm/Support/Debug.h" // For errs(). 4 | 5 | using namespace llvm; 6 | 7 | extern bool solutionConstantPropagation(llvm::Function &); 8 | 9 | // The implementation of this function is generated at the end of this file. See 10 | // INITIALIZE_PASS. 11 | namespace llvm { 12 | void initializeSolutionConstantPropagationPass(PassRegistry &); 13 | }; 14 | 15 | namespace { 16 | // The scope we want to modify is a Function. 17 | class SolutionConstantPropagation : public FunctionPass { 18 | public: 19 | // Used for the pass registry. 20 | static char ID; 21 | 22 | SolutionConstantPropagation() : FunctionPass(ID) { 23 | // Technically we do not need to put this in the constructor. 24 | // Usually this call lives in the uber InitializeAllXXX. 25 | initializeSolutionConstantPropagationPass(*PassRegistry::getPassRegistry()); 26 | } 27 | 28 | // Main function of a FunctionPass. 29 | // Returns true if any change was made to F. 30 | bool runOnFunction(Function &F) override { 31 | errs() << "Solution Legacy called on " << F.getName() << '\n'; 32 | return solutionConstantPropagation(F); 33 | } 34 | }; 35 | } // End anonymous namespace. 36 | 37 | // Anchor for this pass' ID. 38 | char SolutionConstantPropagation::ID = 0; 39 | 40 | // Initialize function used for the pass registration. 41 | // This hooks up the command line option and gives general information 42 | // about the pass' properties. 43 | // This macro generates a llvm::initialize##passImplementationName##Pass 44 | // function. 45 | INITIALIZE_PASS(/*passImplementationName=*/SolutionConstantPropagation, 46 | /*commandLineArgName=*/"legacy-solution", 47 | /*name=*/"Legacy Solution", /*isCFGOnly=*/false, 48 | /*isAnalysis=*/false); 49 | 50 | Pass *createSolutionPassForLegacyPM() { 51 | return new SolutionConstantPropagation(); 52 | } 53 | -------------------------------------------------------------------------------- /ch5/your_first_pass/solution/passWithNewPM.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/Function.h" 2 | #include "llvm/Support/Debug.h" // For errs(). 3 | 4 | #include "passWithNewPM.h" 5 | 6 | using namespace llvm; 7 | 8 | extern bool solutionConstantPropagation(llvm::Function &); 9 | 10 | PreservedAnalyses 11 | SolutionConstantPropagationNewPass::run(Function &F, 12 | FunctionAnalysisManager &AM) { 13 | errs() << "Solution New PM on " << F.getName() << "\n"; 14 | bool MadeChanges = solutionConstantPropagation(F); 15 | if (!MadeChanges) 16 | return PreservedAnalyses::all(); 17 | // Even if we made changes, we didn't touched the CFG. 18 | // So everything on that is still current. 19 | PreservedAnalyses PA; 20 | PA.preserveSet(); 21 | return PA; 22 | } 23 | -------------------------------------------------------------------------------- /ch5/your_first_pass/solution/passWithNewPM.h: -------------------------------------------------------------------------------- 1 | #ifndef SOLUTION_PASSWITHNEWPM_H 2 | #define SOLUTION_PASSWITHNEWPM_H 3 | #include "llvm/IR/PassManager.h" // For PassInfoMixin. 4 | 5 | namespace llvm { 6 | class Function; 7 | }; 8 | 9 | class SolutionConstantPropagationNewPass 10 | : public llvm::PassInfoMixin { 11 | public: 12 | llvm::PreservedAnalyses run(llvm::Function &F, 13 | llvm::FunctionAnalysisManager &AM); 14 | }; 15 | #endif 16 | -------------------------------------------------------------------------------- /ch5/your_first_pass/your_turn/passWithLegacyPM.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/Function.h" 2 | #include "llvm/Pass.h" // For FunctionPass & INITIALIZE_PASS. 3 | #include "llvm/Support/Debug.h" // For errs(). 4 | 5 | using namespace llvm; 6 | 7 | extern bool solutionConstantPropagation(llvm::Function &); 8 | 9 | // The implementation of this function is generated at the end of this file. See 10 | // INITIALIZE_PASS. 11 | namespace llvm { 12 | void initializeYourTurnConstantPropagationPass(PassRegistry &); 13 | }; 14 | 15 | namespace { 16 | class YourTurnConstantPropagation /* TODO: Fill in the blanks */ { 17 | public: 18 | YourTurnConstantPropagation() /* TODO: Fill in the blanks */ {} 19 | 20 | // TODO: Fill in the blanks. 21 | }; 22 | } // End anonymous namespace. 23 | 24 | // TODO: Remove and add proper implementation 25 | void llvm::initializeYourTurnConstantPropagationPass(PassRegistry &) {} 26 | 27 | Pass *createYourTurnPassForLegacyPM() { 28 | return nullptr; // TODO: Fill in the blanks. 29 | } 30 | -------------------------------------------------------------------------------- /ch5/your_first_pass/your_turn/passWithNewPM.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/Function.h" 2 | #include "llvm/Support/Debug.h" // For errs(). 3 | 4 | #include "passWithNewPM.h" 5 | 6 | using namespace llvm; 7 | 8 | extern bool solutionConstantPropagation(llvm::Function &); 9 | 10 | // TODO: Fill in the blanks. 11 | -------------------------------------------------------------------------------- /ch5/your_first_pass/your_turn/passWithNewPM.h: -------------------------------------------------------------------------------- 1 | #ifndef YOURTURN_PASSWITHNEWPM_H 2 | #define YOURTURN_PASSWITHNEWPM_H 3 | #include "llvm/IR/PassManager.h" // For PassInfoMixin. 4 | 5 | namespace llvm { 6 | class Function; 7 | }; 8 | 9 | class YourTurnConstantPropagationNewPass /* TODO: Fill in the blanks*/ { 10 | public: 11 | }; 12 | 13 | /* TODO: Change this into 1 when you're ready to try */ 14 | #define YOUR_TURN_IS_READY 0 15 | #endif 16 | -------------------------------------------------------------------------------- /ch5/your_first_pipeline/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH5 6 | LANGUAGES CXX C) 7 | 8 | include(../../cmake/utils/set-llvm-install-prefix.cmake) 9 | 10 | set(CURR_TARGET 11 | your_first_pipeline 12 | ) 13 | 14 | set(SRC 15 | "main.cpp" 16 | "solution/passPipelineWithNewPM.cpp" 17 | "solution/passPipelineWithLegacyPM.cpp" 18 | "your_turn/passPipelineWithNewPM.cpp" 19 | "your_turn/passPipelineWithLegacyPM.cpp" 20 | ) 21 | 22 | set(INC 23 | ) 24 | 25 | set(CMAKE_CXX_FLAGS -fno-rtti) 26 | 27 | # Now build our tools 28 | add_executable(${CURR_TARGET} ${SRC} ${INC}) 29 | 30 | # Find the libraries that correspond to the LLVM components 31 | # that we wish to use 32 | llvm_map_components_to_libnames(llvm_libs support core transformutils passes ipo) 33 | 34 | # Link against LLVM libraries 35 | target_link_libraries(${CURR_TARGET} ${llvm_libs}) 36 | -------------------------------------------------------------------------------- /ch5/your_first_pipeline/README.md: -------------------------------------------------------------------------------- 1 | # Build your first pass pipeline # 2 | 3 | In this exercise you will practice how to create your first pass pipeline with both the legacy and the new pass manager. 4 | 5 | Go to `yourTurn/passPipelineWithLegacyPM.cpp` and `your_turn/passPipelineWithNewPM.cpp` to implement the pass pipeline for the legacy pass manager and the new pass manager respectively. 6 | 7 | For this exercise, you will need to implement a pass pipeline that consists of the three following optimizations, in this order: 8 | - The mem2reg pass (called PromoteMemoryToRegister) 9 | - The instcombine pass 10 | - The always inliner pass 11 | 12 | ## Configuring your build environment ## 13 | 14 | ```bash 15 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild . 16 | ``` 17 | 18 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option). 19 | 20 | You must have a version of LLVM installed at `` for this to succeed. 21 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package. 22 | 23 | ## Build ## 24 | 25 | ```bash 26 | ninja -C build 27 | ``` 28 | 29 | This builds the default target in the build directory. 30 | 31 | This should produce in the `build` directory a binary named `your_first_pipeline`. 32 | 33 | ## Run ## 34 | 35 | ```bash 36 | ./build/your_first_pass [input.ll|.bc] 37 | ``` 38 | 39 | This will run both the reference implementation and your implementation on `input.ll` if specified or the default input if not. 40 | 41 | Check that the output look similar for both your and the reference pipelines. 42 | 43 | ## Solution ## 44 | 45 | Look into the `solution` directory to see how to implement the connection of your optimization with both pass manager. 46 | 47 | ## Producing an input ## 48 | 49 | Using the following command line, you can produce from a C file an input to give to your program: 50 | ```bash 51 | clang -o - -S -emit-llvm test.c -O0 | sed -e 's#optnone##g' | /bin/opt -S -passes=mem2reg,instnamer > input.ll 52 | ``` 53 | 54 | optnone => remove the attribute that prevents optimizations 55 | mem2reg => get rid of stack accesses / build SSA 56 | instnamer => get rid of the implicit variables 57 | -------------------------------------------------------------------------------- /ch5/your_first_pipeline/main.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/ADT/STLExtras.h" // For llvm::all_of. 2 | #include "llvm/ADT/STLFunctionalExtras.h" // For llvm::function_ref. 3 | #include "llvm/AsmParser/Parser.h" // For parseAssemblyString. 4 | #include "llvm/IR/Function.h" 5 | #include "llvm/IR/LLVMContext.h" 6 | #include "llvm/IR/LegacyPassManager.h" // For legacy::PassManager. 7 | #include "llvm/IR/Module.h" 8 | #include "llvm/IR/PassManager.h" // For the new PassManager. 9 | #include "llvm/IR/PassTimingInfo.h" // For reportAndResetTimings. 10 | #include "llvm/IR/Verifier.h" 11 | #include "llvm/IRReader/IRReader.h" // For parseIRFile. 12 | #include "llvm/Support/CommandLine.h" // For cl::. 13 | #include "llvm/Support/Debug.h" // For errs(). 14 | #include "llvm/Support/SourceMgr.h" // For SMDiagnostic. 15 | #include "llvm/Transforms/Utils/Cloning.h" // For CloneModule. 16 | 17 | using namespace llvm; 18 | 19 | extern void runSolutionPassPipelineForLegacyPM(Module &); 20 | extern void runSolutionPassPipelineForNewPM(Module &); 21 | extern void runYourTurnPassPipelineForLegacyPM(Module &); 22 | extern void runYourTurnPassPipelineForNewPM(Module &); 23 | 24 | bool checkFunctionCorrectness(const llvm::Function &Res) { 25 | // verifyFunction returns true if it finds errors and 26 | // print them on the provided output stream (errs() here). 27 | if (verifyFunction(Res, &errs())) { 28 | errs() << Res.getName() << " does not verify\n"; 29 | return false; 30 | } 31 | return true; 32 | } 33 | 34 | // Default input in case no file was provided. 35 | const char *InputIR = 36 | "define i32 @foo(i32 noundef %arg) {\n" 37 | "bb:\n" 38 | " %i = shl i32 5, 3\n" 39 | " %i1 = icmp ne i32 %arg, 0\n" 40 | " br i1 %i1, label %bb2, label %bb4\n" 41 | "\n" 42 | "bb2: ; preds = %bb\n" 43 | " %i3 = sdiv i32 %i, 5\n" 44 | " br label %bb6\n" 45 | "\n" 46 | "bb4: ; preds = %bb\n" 47 | " %i5 = or i32 %i, 3855\n" 48 | " br label %bb6\n" 49 | "\n" 50 | "bb6: ; preds = %bb4, %bb2\n" 51 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n" 52 | " ret i32 %.0\n" 53 | "}\n" 54 | "\n" 55 | "define i32 @bar(i32 noundef %arg) {\n" 56 | "bb:\n" 57 | " %i = shl i32 -1, 3\n" 58 | " %i1 = icmp ne i32 %arg, 0\n" 59 | " br i1 %i1, label %bb2, label %bb4\n" 60 | "\n" 61 | "bb2: ; preds = %bb\n" 62 | " %i3 = udiv i32 %i, 3\n" 63 | " br label %bb6\n" 64 | "\n" 65 | "bb4: ; preds = %bb\n" 66 | " %i5 = or i32 %i, 3855\n" 67 | " br label %bb6\n" 68 | "\n" 69 | "bb6: ; preds = %bb4, %bb2\n" 70 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n" 71 | " %i7 = add i32 %.0, 1\n" 72 | " ret i32 %i7\n" 73 | "}\n"; 74 | 75 | static cl::opt InputFilename(cl::Positional, 76 | cl::desc("")); 77 | 78 | int main(int argc, char **argv) { 79 | LLVMContext Context; 80 | SMDiagnostic Err; 81 | std::unique_ptr MyModule; 82 | 83 | cl::ParseCommandLineOptions(argc, argv, "CH5 pipeline example\n"); 84 | 85 | // To be able to play with the optimization a little bit, 86 | // support a mode where you can feed your own IR files. 87 | if (!InputFilename.empty()) { 88 | outs() << "Reading module from '" << InputFilename << "'\n"; 89 | MyModule = parseIRFile(InputFilename, Err, Context); 90 | } else { 91 | MyModule = parseAssemblyString(InputIR, Err, Context); 92 | } 93 | if (!MyModule) { 94 | errs() << "Unable to build module\n"; 95 | return -1; 96 | } 97 | 98 | function_ref functions[] = { 99 | runSolutionPassPipelineForLegacyPM, runSolutionPassPipelineForNewPM, 100 | runYourTurnPassPipelineForLegacyPM, runYourTurnPassPipelineForNewPM}; 101 | 102 | bool hadError = false; 103 | for (int i = 0; i < 4; ++i) { 104 | outs() << "Checking the run for " 105 | << ((i < 2) ? "Provided solution" : "Your turn"); 106 | outs() << " on the " << ((i & 1) ? "new" : "legacy") << " pass manager\n"; 107 | // Start with the legacy pass manager. 108 | std::unique_ptr ModuleForLegacyPM = CloneModule(*MyModule); 109 | functions[i](*ModuleForLegacyPM); 110 | 111 | reportAndResetTimings(&outs()); 112 | 113 | bool solutionIsCorrect = 114 | all_of(ModuleForLegacyPM->functions(), 115 | [](const Function &F) { return checkFunctionCorrectness(F); }); 116 | hadError |= !solutionIsCorrect; 117 | } 118 | return !hadError; 119 | } 120 | -------------------------------------------------------------------------------- /ch5/your_first_pipeline/solution/passPipelineWithLegacyPM.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/LegacyPassManager.h" // For legacy::PassManager. 2 | #include "llvm/Transforms/IPO/AlwaysInliner.h" // For inliner pass. 3 | #include "llvm/Transforms/InstCombine/InstCombine.h" // For instcombine pass. 4 | #include "llvm/Transforms/Utils.h" // For mem2reg pass. 5 | 6 | using namespace llvm; 7 | 8 | void runSolutionPassPipelineForLegacyPM(Module &MyModule) { 9 | legacy::PassManager LegacyPM; 10 | 11 | LegacyPM.add(createPromoteMemoryToRegisterPass()); 12 | LegacyPM.add(createInstructionCombiningPass()); 13 | LegacyPM.add(createAlwaysInlinerLegacyPass()); 14 | 15 | LegacyPM.run(MyModule); 16 | } -------------------------------------------------------------------------------- /ch5/your_first_pipeline/solution/passPipelineWithNewPM.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/Module.h" 2 | #include "llvm/IR/PassManager.h" // For the new PassManager. 3 | #include "llvm/Passes/PassBuilder.h" 4 | #include "llvm/Passes/StandardInstrumentations.h" 5 | #include "llvm/Transforms/IPO/AlwaysInliner.h" // For inliner pass. 6 | #include "llvm/Transforms/InstCombine/InstCombine.h" // For instcombine pass. 7 | #include "llvm/Transforms/Utils/Mem2Reg.h" // For mem2reg pass. 8 | 9 | using namespace llvm; 10 | 11 | void runSolutionPassPipelineForNewPM(Module &MyModule) { 12 | LLVMContext &Context = MyModule.getContext(); 13 | // CAREFUL the order of the manager is important here since the destructor 14 | // needs to be called in the right order otherwise it will crash. 15 | FunctionAnalysisManager FAM; 16 | ModuleAnalysisManager MAM; 17 | 18 | PassInstrumentationCallbacks PIC; 19 | PrintPassOptions PrintPassOpts; 20 | PrintPassOpts.Verbose = true; 21 | PrintPassOpts.SkipAnalyses = false; 22 | PrintPassOpts.Indent = true; 23 | StandardInstrumentations SI(Context, /*DebugLogging=*/true, 24 | /*VerifyEachPass=*/false, PrintPassOpts); 25 | SI.registerCallbacks(PIC, &MAM); 26 | 27 | // Register the passes used implicitly at the start of the pipeline. 28 | // And enable logging. 29 | MAM.registerPass([&] { return PassInstrumentationAnalysis(&PIC); }); 30 | FAM.registerPass([&] { return PassInstrumentationAnalysis(&PIC); }); 31 | // Well this one is not as implicit, we ask for it with the call to the 32 | // adaptor below. 33 | MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); }); 34 | FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); }); 35 | ModulePassManager NewPM; 36 | FunctionPassManager FPMgr; 37 | 38 | // Populate the XXXAnalysisManager with the IDs for all the passes. 39 | PassBuilder PB; 40 | PB.registerFunctionAnalyses(FAM); 41 | PB.registerModuleAnalyses(MAM); 42 | 43 | // Wrap the function pass into a module pass. 44 | #ifndef USE_MODULE_MGR 45 | FPMgr.addPass(PromotePass()); 46 | FPMgr.addPass(InstCombinePass()); 47 | #else 48 | NewPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); 49 | NewPM.addPass(createModuleToFunctionPassAdaptor(InstCombinePass())); 50 | #endif 51 | NewPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPMgr))); 52 | NewPM.addPass(AlwaysInlinerPass()); 53 | NewPM.run(MyModule, MAM); 54 | } 55 | -------------------------------------------------------------------------------- /ch5/your_first_pipeline/your_turn/passPipelineWithLegacyPM.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/LegacyPassManager.h" // For legacy::PassManager. 2 | 3 | using namespace llvm; 4 | 5 | void runYourTurnPassPipelineForLegacyPM(Module &MyModule) {} 6 | -------------------------------------------------------------------------------- /ch5/your_first_pipeline/your_turn/passPipelineWithNewPM.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/Module.h" 2 | #include "llvm/IR/PassManager.h" // For the new PassManager. 3 | 4 | using namespace llvm; 5 | 6 | void runYourTurnPassPipelineForNewPM(Module &MyModule) {} 7 | -------------------------------------------------------------------------------- /ch6/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH6 6 | LANGUAGES CXX C) 7 | 8 | include(../cmake/utils/set-llvm-install-prefix.cmake) 9 | 10 | # Hook up the TableGen tooling. 11 | set(LLVM_TABLEGEN_EXE ${LLVM_TOOLS_BINARY_DIR}/llvm-tblgen) 12 | include(${LLVM_CMAKE_DIR}/AddLLVM.cmake) 13 | include(${LLVM_CMAKE_DIR}/TableGen.cmake) 14 | 15 | # Create the different TableGen outputs. 16 | set(LLVM_TARGET_DEFINITIONS my-first-gisel.td) 17 | tablegen(LLVM GlobalISel.inc -gen-global-isel) 18 | 19 | set(LLVM_TARGET_DEFINITIONS Person.td) 20 | tablegen(LLVM Person.inc -print-records) 21 | 22 | set(LLVM_TARGET_DEFINITIONS multiclass.td) 23 | tablegen(LLVM multiclass.inc -print-records) 24 | 25 | set(LLVM_TARGET_DEFINITIONS multiclass-with-def-type.td) 26 | tablegen(LLVM multiclass-with-def-type.inc -print-records) 27 | 28 | # Register a target for all the TableGen outputs. 29 | add_public_tablegen_target(CommonTableGen) 30 | 31 | # Make that target part of the `all` target. 32 | set_target_properties(CommonTableGen PROPERTIES EXCLUDE_FROM_ALL 0) 33 | -------------------------------------------------------------------------------- /ch6/README.md: -------------------------------------------------------------------------------- 1 | # Getting started with TableGen # 2 | 3 | This directory contains examples of TableGen inputs. 4 | 5 | The goal here is to familiarize yourself with the TableGen syntax and understand how the records are expanded. 6 | 7 | ## Configuring your build environment ## 8 | 9 | ```bash 10 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild . 11 | ``` 12 | 13 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option). 14 | 15 | You must have a version of LLVM installed at `` for this to succeed. 16 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package. 17 | 18 | ## Build ## 19 | 20 | ```bash 21 | ninja -C build 22 | ``` 23 | 24 | This builds the default target in the build directory. 25 | 26 | This should produce in the `build` directory one `.inc` file per `.td` file in the source directory. 27 | 28 | Look at the produced `.inc` and the related `.td` and try to see if the expansion matches your understanding after reading Chapter 6. 29 | -------------------------------------------------------------------------------- /ch6/multiclass-with-def-type.td: -------------------------------------------------------------------------------- 1 | class Gift { 2 | string label = "Just a gift"; 3 | } 4 | 5 | multiclass Bundle { 6 | def A { 7 | string name = !strconcat(base, "-", "A"); 8 | int price = 12; 9 | int weight = 1; 10 | } 11 | def B { 12 | string name = !strconcat(base, "-", "B"); 13 | string tag = "special"; 14 | } 15 | def C: Gift { 16 | string color = "red"; 17 | } 18 | } 19 | 20 | class ShippingPrice { 21 | int shippingPrice = arg; 22 | } 23 | 24 | defm valuedBundle : Bundle<"valued">, ShippingPrice<5>; 25 | 26 | def AnotherRecord { 27 | list gifts = [valuedBundleC]; 28 | } 29 | -------------------------------------------------------------------------------- /ch6/multiclass.td: -------------------------------------------------------------------------------- 1 | multiclass Bundle { 2 | def A { 3 | string name = !strconcat(base, "-", "A"); 4 | int price = 12; 5 | int weight = 1; 6 | } 7 | def B { 8 | string name = !strconcat(base, "-", "B"); 9 | string tag = "special"; 10 | } 11 | } 12 | 13 | class ShippingPrice { 14 | int shippingPrice = arg; 15 | } 16 | 17 | defm valuedBundle : Bundle<"valued">, ShippingPrice<5>; 18 | -------------------------------------------------------------------------------- /ch6/my-first-gisel.td: -------------------------------------------------------------------------------- 1 | include "llvm/Target/Target.td" 2 | 3 | def : HwMode<"", []>; 4 | def myii: InstrInfo; 5 | def : Target { 6 | let InstructionSet = myii; 7 | } 8 | def x0 : Register<"x0"> ; 9 | def GPR32 : RegisterClass<"myNamespace", [i32], 32, (add x0)>; 10 | -------------------------------------------------------------------------------- /ch6/person.td: -------------------------------------------------------------------------------- 1 | class Person { 2 | int _age = age; 3 | string _name = name; 4 | } 5 | 6 | def A : Person<23, "A">; 7 | def B : Person<64, "B">; 8 | def /*Anonym*/: Person<43>; 9 | -------------------------------------------------------------------------------- /ch7/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH7 6 | LANGUAGES CXX C) 7 | 8 | include(../cmake/utils/set-llvm-install-prefix.cmake) 9 | 10 | set(CURR_TARGET 11 | check_vec_int_ty 12 | ) 13 | 14 | set(SRC 15 | "check_vec_int_ty.cpp" 16 | ) 17 | 18 | set(INC 19 | ) 20 | 21 | set(CMAKE_CXX_FLAGS -fno-rtti) 22 | 23 | # Now build our tools 24 | add_executable(${CURR_TARGET} ${SRC} ${INC}) 25 | 26 | # Find the libraries that correspond to the LLVM components 27 | # that we wish to use 28 | llvm_map_components_to_libnames(llvm_libs support core irreader) 29 | 30 | # Link against LLVM libraries 31 | target_link_libraries(${CURR_TARGET} ${llvm_libs}) 32 | -------------------------------------------------------------------------------- /ch7/README.md: -------------------------------------------------------------------------------- 1 | # Illustrate different LLVM IR constructs # 2 | 3 | This directory contains the examples that illustrates the different LLVM IR constructs introduced in Chapter 7. 4 | 5 | For the horizontal add example, please run: 6 | ```bash 7 | clang -emit-llvm hadd_vector.c -S -o - -O0 | sed -e 's#optnone##g' | /bin/opt -S -passes=mem2reg,instnamer 8 | ``` 9 | 10 | Additionally the `check_vec_int_ty.cpp` file shows the different ways to check that an integer type is a vector type. 11 | 12 | Look at the implementation of `isVectorOfIntV1` and `isVectorOfIntV2` in this file. 13 | 14 | To run `check_vec_int_ty.cpp` follow the steps below. 15 | 16 | 17 | 18 | ## Configuring your build environment ## 19 | 20 | ```bash 21 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild . 22 | ``` 23 | 24 | ## To build ## 25 | 26 | ```bash 27 | ninja -Cbuild 28 | ``` 29 | 30 | ## To run ## 31 | 32 | ```bash 33 | ./build/check_vec_int_ty [input.ll] 34 | ``` 35 | 36 | You can provide an input for `check_vec_in_ty` if you want. 37 | 38 | 39 | ## To produce an input for the `check_vec_int_ty` executable ## 40 | 41 | To produce an input: 42 | ```bash 43 | clang -o - -S -emit-llvm test.c -O0 | sed -e 's#optnone##g' | /bin/opt -S -passes=mem2reg,instnamer > input.ll 44 | ``` 45 | 46 | optnone => remove the attribute that prevents optimizations 47 | mem2reg => get rid of stack accesses / build SSA 48 | instnamer => get rid of the implicit variables 49 | 50 | -------------------------------------------------------------------------------- /ch7/access_struct_type.ll: -------------------------------------------------------------------------------- 1 | %my.type = type {i32, {ptr, half}, {i32, i1, i1}} 2 | 3 | define half @useOfMyType(ptr %dst) { 4 | %addr_half_field = getelementptr inbounds %my.type, ptr %dst, i64 0, i32 1, i32 1 5 | %res = load half, ptr %addr_half_field, align 8 6 | ret half %res 7 | } 8 | -------------------------------------------------------------------------------- /ch7/anonymous_type.ll: -------------------------------------------------------------------------------- 1 | define void @useOfUnknownType(ptr %dst) { 2 | store {i32, ptr} zeroinitializer, ptr %dst, align 8 3 | ret void 4 | } 5 | -------------------------------------------------------------------------------- /ch7/array_type.ll: -------------------------------------------------------------------------------- 1 | define void @useOfArrayType(ptr %dst) { 2 | store [12 x [36 x i32]] zeroinitializer, ptr %dst, align 8 3 | ret void 4 | } 5 | -------------------------------------------------------------------------------- /ch7/check_vec_int_ty.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/AsmParser/Parser.h" // For parseAssemblyString. 2 | #include "llvm/IR/DerivedTypes.h" // For VectorType and IntegerType. 3 | #include "llvm/IR/Instruction.h" 4 | #include "llvm/IR/LLVMContext.h" 5 | #include "llvm/IR/Module.h" 6 | #include "llvm/IR/Type.h" 7 | #include "llvm/IR/Verifier.h" 8 | #include "llvm/IRReader/IRReader.h" // For parseIRFile. 9 | #include "llvm/Support/Debug.h" // For errs(). 10 | #include "llvm/Support/SourceMgr.h" // For SMDiagnostic. 11 | 12 | using namespace llvm; 13 | 14 | // Default input in case no file was provided. 15 | const char *InputIR = 16 | "define <2 x i32> @foo(i32 %src) {\n" 17 | "bb:\n" 18 | " %simd = insertelement <2 x i32> poison, i32 %src, i32 0\n" 19 | " %input = insertelement <2 x i32> %simd, i32 %src, i32 1\n" 20 | " %res = add <2 x i32> %input, %input\n" 21 | " ret <2 x i32> %res\n" 22 | "}\n"; 23 | 24 | bool isVectorOfIntV1(Instruction &Add) { 25 | auto *VecTy = dyn_cast(Add.getType()); 26 | return VecTy && isa(VecTy->getElementType()); 27 | } 28 | 29 | bool isVectorOfIntV2(Instruction &Add) { 30 | Type *Ty = Add.getType(); 31 | return Ty->isVectorTy() && Ty->getScalarType()->isIntegerTy(); 32 | } 33 | 34 | int main(int argc, char **argv) { 35 | LLVMContext Context; 36 | SMDiagnostic Err; 37 | std::unique_ptr MyModule; 38 | // To be able to play with the optimization a little bit, 39 | // support a mode where you can feed your own IR files. 40 | if (argc == 2) { 41 | outs() << "Reading module from '" << argv[1] << "'\n"; 42 | MyModule = parseIRFile(argv[1], Err, Context); 43 | } else { 44 | MyModule = parseAssemblyString(InputIR, Err, Context); 45 | } 46 | if (!MyModule) { 47 | errs() << "Unable to build module\n"; 48 | return -1; 49 | } 50 | 51 | for (Function &Func : *MyModule) { 52 | for (BasicBlock &BB : Func) { 53 | for (Instruction &Inst : BB) { 54 | if (Inst.getOpcode() != Instruction::Add) 55 | continue; 56 | bool IsVecIntTy1 = isVectorOfIntV1(Inst); 57 | bool IsVecIntTy2 = isVectorOfIntV2(Inst); 58 | if (IsVecIntTy1 != IsVecIntTy2) 59 | errs() << "Found a mismatch with v1 & v2 for " << Inst << '\n'; 60 | if (IsVecIntTy1) 61 | outs() << "Found an add of vector int:\n" << Inst << '\n'; 62 | } 63 | } 64 | } 65 | 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /ch7/datalayout_alignment.ll: -------------------------------------------------------------------------------- 1 | define i32 @foo(ptr %src) { 2 | %res = load i32, ptr %src 3 | ret i32 %res 4 | } 5 | -------------------------------------------------------------------------------- /ch7/endianness.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int buildIntLittleEndian(const char *input) { 4 | int res = 0; 5 | for (int i = 0; i < sizeof(res); ++i) { 6 | res |= input[i] << (8 * i); 7 | } 8 | return res; 9 | } 10 | 11 | int buildIntBigEndian(const char *input) { 12 | int res = 0; 13 | for (int i = 0; i < sizeof(res); ++i) { 14 | res <<= 8; 15 | res |= input[i]; 16 | } 17 | return res; 18 | } 19 | 20 | int main() { 21 | char input[] = {0x01, 0x02, 0x03, 0x04}; 22 | int val = buildIntLittleEndian(input); 23 | int valBig = buildIntBigEndian(input); 24 | printf("little: 0x%08x\nbig: 0x%08x\n", val, valBig); 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /ch7/full_example.ll: -------------------------------------------------------------------------------- 1 | define i32 @foo(i32, i32, i32 %arg) noinline noreturn { 2 | entry: 3 | %myid = add i32 %0, %1 4 | %31 = mul i32 %myid, 2 5 | %45 = shl i32 %31, 5 6 | %"00~random~00" = udiv i32 %45, %arg 7 | br label %46 8 | 9 | br label %47 10 | 11 | 47: 12 | ret i32 %"00~random~00" 13 | } 14 | 15 | !0 = !{!"zero"} 16 | !2 = !{!"two"} 17 | !1 = !{!"one"} 18 | ; A named metadata. 19 | !name = !{!0, !1, !2} 20 | -------------------------------------------------------------------------------- /ch7/hadd_vector.c: -------------------------------------------------------------------------------- 1 | typedef struct { 2 | int a, b, c, d; 3 | } Vec4; 4 | 5 | int hadd(const Vec4 *input) { 6 | return input->a + input->b + input->c + input->d; 7 | } 8 | -------------------------------------------------------------------------------- /ch7/impact_of_abi.c: -------------------------------------------------------------------------------- 1 | typedef struct { 2 | int a, b, c, d; 3 | } BigStruct; 4 | 5 | BigStruct bigStructReturned() { 6 | BigStruct big = {0, 0, 0, 0}; 7 | return big; 8 | } 9 | -------------------------------------------------------------------------------- /ch7/named_type.ll: -------------------------------------------------------------------------------- 1 | %my.type = type { i32, ptr } 2 | 3 | define void @useOfMyType(ptr %dst) { 4 | store %my.type zeroinitializer, ptr %dst, align 8 5 | ret void 6 | } 7 | -------------------------------------------------------------------------------- /ch8/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 17) 2 | 3 | cmake_minimum_required(VERSION 3.22) 4 | 5 | project(CH8 6 | LANGUAGES CXX C) 7 | 8 | include(../cmake/utils/set-llvm-install-prefix.cmake) 9 | include(../cmake/utils/opt-run.cmake) 10 | 11 | set(CURR_TARGET 12 | produce-ir 13 | ) 14 | 15 | set(OPT_RUN_DEPENDENCIES) 16 | 17 | # Add the canonicalization examples. 18 | add_run_passes(OPT_RUN_DEPENDENCIES "instcombine" canonical_form.ll) 19 | add_run_passes(OPT_RUN_DEPENDENCIES "instcombine" xor.ll) 20 | # Add the value tracking examples. 21 | add_run_passes(OPT_RUN_DEPENDENCIES "instcombine" value_tracking.ll) 22 | 23 | # Add all the examples which have the same filename as the pass name. 24 | set(standard_passes 25 | lcssa 26 | argpromotion 27 | dce 28 | deadargelim 29 | indvars 30 | inline 31 | licm 32 | load-store-vectorizer 33 | loop-reduce 34 | loop-unroll 35 | loop-vectorize 36 | reassociate 37 | simplifycfg 38 | slp-vectorizer 39 | ) 40 | 41 | foreach(standard_pass IN LISTS standard_passes) 42 | add_run_passes(OPT_RUN_DEPENDENCIES "${standard_pass}" ${standard_pass}.ll) 43 | endforeach() 44 | 45 | # Create an executable target that depends on the generated file 46 | add_custom_target(${CURR_TARGET} 47 | DEPENDS ${OPT_RUN_DEPENDENCIES} 48 | ) 49 | 50 | set_target_properties(${CURR_TARGET} PROPERTIES EXCLUDE_FROM_ALL 0) 51 | -------------------------------------------------------------------------------- /ch8/README.md: -------------------------------------------------------------------------------- 1 | In this chapter you can discover the transformations performed by some of the optimizations passes. 2 | 3 | To see that in action, first setup your build directory: 4 | ```bash 5 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild . 6 | ``` 7 | 8 | Then, when you'll invoke `ninja`, it will run all the passes exercised in this folder. 9 | ```bash 10 | ninja -Cbuild 11 | ``` 12 | 13 | This will produce files named `xxx.out.ll` in the `build` directory. 14 | 15 | For each `xx.out.ll` file, you'll find the input IR in this chapter's directory with the name `xx.ll`. 16 | For instance, the input IR of `dce.out.ll` is `dce.ll`. 17 | 18 | The output file is produced by applying the pass with the same name as the input file to the input file. 19 | E.g., we apply the `dce` pass to the `dce.ll` input file to produce `dce.out.ll`. 20 | 21 | A few files do not follow this convention. 22 | These are `xor.ll`, `canonical_form.ll`, and `value_tracking.ll`. 23 | All three files exercises `instcombine`. 24 | 25 | In any case, you can see the command used to produce the output by running the `ninja` command with the `-v` option. 26 | 27 | Also if you want to rerun one (or several) passes, you can just modify the related input IR file (e.g., by using `touch xx.ll`) and rerun the Ninja command. 28 | 29 | For this chapter, look at the differences between the input and output IRs to see what happened. 30 | You can for instance use the following command: 31 | ```bash 32 | diff -U10 xx.ll build/xx.out.ll 33 | ``` 34 | -------------------------------------------------------------------------------- /ch8/argpromotion.ll: -------------------------------------------------------------------------------- 1 | define i64 @foo() { 2 | %local = alloca i64 3 | store i64 2, ptr %local 4 | %res = call i64 @bar(ptr %local) 5 | ret i64 %res 6 | } 7 | 8 | define internal i64 @bar(ptr %local) { 9 | %val = load i64, ptr %local 10 | %res = add i64 %val, 2 11 | ret i64 %res 12 | } 13 | 14 | -------------------------------------------------------------------------------- /ch8/canonical_form.ll: -------------------------------------------------------------------------------- 1 | define i64 @canonical_form(i64 %b, i64 %c) { 2 | %a = sub i64 %b, %c 3 | ret i64 %a 4 | } 5 | 6 | define i64 @non_canonical_form(i64 %b, i64 %c) { 7 | %neg_c = sub i64 0, %c 8 | %a = add i64 %b, %neg_c 9 | ret i64 %a 10 | } 11 | -------------------------------------------------------------------------------- /ch8/dce.ll: -------------------------------------------------------------------------------- 1 | define i64 @foo(i64 %in) { 2 | %dead = add i64 %in, %in 3 | %res = mul i64 %in, 2 4 | ret i64 %res 5 | } 6 | -------------------------------------------------------------------------------- /ch8/deadargelim.ll: -------------------------------------------------------------------------------- 1 | define i64 @foo() { 2 | %local = alloca i64 3 | %local2 = alloca i64 4 | store i64 2, ptr %local 5 | store i64 2, ptr %local2 6 | %res = call i64 @bar(ptr %local, ptr %local2) 7 | ret i64 %res 8 | } 9 | 10 | define internal i64 @bar(ptr %local, ptr %local2) { 11 | %val = load i64, ptr %local 12 | %res = add i64 %val, 2 13 | ret i64 %res 14 | } 15 | 16 | -------------------------------------------------------------------------------- /ch8/indvars.ll: -------------------------------------------------------------------------------- 1 | define i64 @foo(i64 %src, i64 %ub) { 2 | entry: 3 | br label %loop 4 | 5 | loop: 6 | %iv = phi i64 [0, %entry], [%iv1, %loop] 7 | %iv1 = add i64 %iv, 1 8 | %cond = icmp ult i64 %iv1, %ub 9 | br i1 %cond, label %loop, label %end 10 | 11 | end: 12 | %tmp = add i64 %iv1, %src 13 | %res = add i64 %tmp, %iv1 14 | ret i64 %res 15 | } 16 | -------------------------------------------------------------------------------- /ch8/inline.ll: -------------------------------------------------------------------------------- 1 | define i64 @foo() { 2 | %local = alloca i64 3 | store i64 2, ptr %local 4 | %res = call i64 @bar(ptr %local) 5 | ret i64 %res 6 | } 7 | 8 | define i64 @bar(ptr %local) { 9 | %val = load i64, ptr %local 10 | %res = add i64 %val, 2 11 | ret i64 %res 12 | } 13 | 14 | -------------------------------------------------------------------------------- /ch8/lcssa.ll: -------------------------------------------------------------------------------- 1 | define i64 @def_in_loop_use_outside(i64 %src, i64 %upper_bound) { 2 | entry: 3 | br label %loop 4 | 5 | loop: 6 | %iv = phi i64 [0, %entry], [%iv_plus_1, %loop] 7 | %iv_plus_1 = add i64 %iv, 1 8 | %cond = icmp ult i64 %iv_plus_1, %upper_bound 9 | br i1 %cond, label %loop, label %end 10 | 11 | end: 12 | %tmp = add i64 %iv_plus_1, %src 13 | %res = add i64 %tmp, %iv_plus_1 14 | ret i64 %res 15 | } 16 | -------------------------------------------------------------------------------- /ch8/licm.ll: -------------------------------------------------------------------------------- 1 | define i64 @foo(i64 %src, i64 %ub, ptr %addr) { 2 | entry: 3 | br label %loop 4 | 5 | loop: 6 | %iv = phi i64 [0, %entry], [%iv1, %loop] 7 | %offset = load i64, ptr %addr 8 | %iv1 = add i64 %iv, %offset 9 | %cond = icmp ult i64 %iv1, %ub 10 | br i1 %cond, label %loop, label %end 11 | 12 | end: 13 | %res = add i64 %src, %iv1 14 | ret i64 %res 15 | } 16 | -------------------------------------------------------------------------------- /ch8/load-store-vectorizer.ll: -------------------------------------------------------------------------------- 1 | target triple="aarch64-apple-ios" 2 | 3 | define void @bar(ptr %src, ptr %dst) { 4 | %v0 = load i64, ptr %src 5 | %src1 = getelementptr i64, ptr %src, i64 1 6 | %v1 = load i64, ptr %src1 7 | store i64 %v0, ptr %dst 8 | %dst1 = getelementptr i64, ptr %dst, i64 1 9 | store i64 %v1, ptr %dst1 10 | ret void 11 | } 12 | -------------------------------------------------------------------------------- /ch8/loop-reduce.c: -------------------------------------------------------------------------------- 1 | long long foo(long long *in, long long ub) { 2 | for (long long i = 0; i < ub; ++i) 3 | if (in[i] != 0) 4 | return i; 5 | return -1; 6 | } 7 | -------------------------------------------------------------------------------- /ch8/loop-reduce.ll: -------------------------------------------------------------------------------- 1 | target triple = "aarch64-apple-ios" 2 | 3 | define i64 @foo(ptr %arg, i64 %ub) { 4 | bb: 5 | br label %bb3 6 | 7 | bb3: 8 | %idx = phi i64 [ 0, %bb ], [ %i9, %bb8 ] 9 | %i = icmp slt i64 %idx, %ub 10 | br i1 %i, label %bb4, label %bb10 11 | 12 | bb4: 13 | %i5 = getelementptr inbounds i64, ptr %arg, i64 %idx 14 | %i6 = load i64, ptr %i5 15 | %i7 = icmp ne i64 %i6, 0 16 | br i1 %i7, label %bb10, label %bb8 17 | 18 | bb8: 19 | %i9 = add nsw i64 %idx, 1 20 | br label %bb3 21 | 22 | bb10: 23 | %res = phi i64 [ %idx, %bb4 ], [ -1, %bb3 ] 24 | ret i64 %res 25 | } 26 | -------------------------------------------------------------------------------- /ch8/loop-unroll.ll: -------------------------------------------------------------------------------- 1 | define i64 @foo(ptr %arg) { 2 | bb: 3 | br label %bb3 4 | 5 | bb3: 6 | %idx = phi i64 [ 0, %bb ], [ %i9, %bb8 ] 7 | %i = icmp slt i64 %idx, 3 8 | br i1 %i, label %bb4, label %bb10 9 | 10 | bb4: 11 | %i5 = getelementptr inbounds i64, ptr %arg, i64 %idx 12 | %i6 = load i64, ptr %i5 13 | %i7 = icmp ne i64 %i6, 0 14 | br i1 %i7, label %bb10, label %bb8 15 | 16 | bb8: 17 | %i9 = add nsw i64 %idx, 1 18 | br label %bb3 19 | 20 | bb10: 21 | %res = phi i64 [ %idx, %bb4 ], [ -1, %bb3 ] 22 | ret i64 %res 23 | } 24 | -------------------------------------------------------------------------------- /ch8/loop-vectorize.c: -------------------------------------------------------------------------------- 1 | void foo(short * restrict dst, short * restrict A, short * restrict B ) { 2 | for (long long i = 0; i != 24; ++i) 3 | dst[i] = A[i] + B[i]; 4 | } 5 | -------------------------------------------------------------------------------- /ch8/loop-vectorize.ll: -------------------------------------------------------------------------------- 1 | target triple = "arm64-apple-macosx14.0.0" 2 | 3 | 4 | define void @foo(ptr noalias noundef %arg, ptr noalias noundef %arg1, ptr noalias noundef %arg2) { 5 | bb: 6 | br label %bb3 7 | 8 | bb3: 9 | %idx = phi i64 [ 0, %bb ], [ %i14, %bb4 ] 10 | %i = icmp ne i64 %idx, 24 11 | br i1 %i, label %bb4, label %bb15 12 | 13 | bb4: 14 | %i5 = getelementptr inbounds i16, ptr %arg1, i64 %idx 15 | %i6 = load i16, ptr %i5 16 | %i7 = sext i16 %i6 to i32 17 | %i8 = getelementptr inbounds i16, ptr %arg2, i64 %idx 18 | %i9 = load i16, ptr %i8 19 | %i10 = sext i16 %i9 to i32 20 | %i11 = add nsw i32 %i7, %i10 21 | %i12 = trunc i32 %i11 to i16 22 | %i13 = getelementptr inbounds i16, ptr %arg, i64 %idx 23 | store i16 %i12, ptr %i13 24 | %i14 = add nsw i64 %idx, 1 25 | br label %bb3 26 | 27 | bb15: 28 | ret void 29 | } 30 | -------------------------------------------------------------------------------- /ch8/reassociate.ll: -------------------------------------------------------------------------------- 1 | define i64 @foo(i64 %in0, i64 %in1) { 2 | %v0 = add i64 %in0, %in1 3 | %v1 = add i64 %v0, 2 4 | %v2 = sub i64 %v1, %in1 5 | ret i64 %v2 6 | } 7 | -------------------------------------------------------------------------------- /ch8/simplifycfg.ll: -------------------------------------------------------------------------------- 1 | define i64 @foo(i64 %src, i64 %ub) { 2 | entry: 3 | br label %loop 4 | 5 | loop: 6 | br i1 false, label %loop, label %end 7 | 8 | end: 9 | %umax = call i64 @llvm.umax.i64(i64 %ub, i64 1) 10 | %tmp = add i64 %umax, %src 11 | %res = add i64 %tmp, %umax 12 | ret i64 %res 13 | } 14 | 15 | declare i64 @llvm.umax.i64(i64, i64) 16 | -------------------------------------------------------------------------------- /ch8/slp-vectorizer.ll: -------------------------------------------------------------------------------- 1 | ;target triple="aarch64-apple-ios" 2 | 3 | define <2 x i64> @foo(i64 %in0, i64 %in1) { 4 | %v0 = add i64 %in0, 2 5 | %v1 = add i64 %in1, 5 6 | %partial = insertelement <2 x i64> poison, i64 %v0, i32 0 7 | %res = insertelement <2 x i64> %partial, i64 %v1, i32 1 8 | ret <2 x i64> %res 9 | } 10 | -------------------------------------------------------------------------------- /ch8/value_tracking.ll: -------------------------------------------------------------------------------- 1 | define i1 @foo(i64 %b) { 2 | %a = and i64 %b, u0xfffffffffffffffc 3 | %mod = urem i64 %a, 2 4 | %cond = icmp eq i64 %mod, 0 5 | ret i1 %cond 6 | } 7 | -------------------------------------------------------------------------------- /ch8/xor.ll: -------------------------------------------------------------------------------- 1 | define i64 @xor(i64 %x) { 2 | %res = xor i64 %x, %x 3 | ret i64 %res 4 | } 5 | -------------------------------------------------------------------------------- /cmake/utils/llc-run.cmake: -------------------------------------------------------------------------------- 1 | # Add a custom command to generate the output by running passes with llc. 2 | # PARENT_VAR is used to accumulate the targets that needs to be built. 3 | function(add_run_llc PARENT_VAR LLC_ARGS INPUT) 4 | set(INPUT_FULL_PATH ${CMAKE_SOURCE_DIR}/${INPUT}) 5 | string(REPLACE ".ll" ".out.mir" OUTPUT ${INPUT}) 6 | # Spaces are escaped in CMake. 7 | # To issue spaces, we need to use a list of values. 8 | # Do the translation here so that at the high level it remains natural and 9 | # we can use spaces to declare our command line llcions. 10 | string(REPLACE " " ";" LLC_ARGS ${LLC_ARGS}) 11 | if(${ARGC} GREATER 3) 12 | set(OUTPUT "${ARGV3}_${OUTPUT}") 13 | endif() 14 | set(OUTPUT_FULL_PATH ${CMAKE_BINARY_DIR}/${OUTPUT}) 15 | set(REDIRECT "/dev/null") 16 | if(${ARGC} GREATER 3) 17 | set(REDIRECT "${OUTPUT_FULL_PATH}.txt") 18 | endif() 19 | add_custom_command( 20 | OUTPUT ${OUTPUT} 21 | COMMAND ${LLVM_TOOLS_BINARY_DIR}/llc ${LLC_ARGS} ${INPUT_FULL_PATH} -o ${OUTPUT_FULL_PATH} > ${REDIRECT} 2>&1 22 | DEPENDS ${INPUT} 23 | COMMENT "Generating code with llc" 24 | ) 25 | list(APPEND ${PARENT_VAR} ${OUTPUT}) 26 | set(${PARENT_VAR} ${${PARENT_VAR}} PARENT_SCOPE) 27 | endfunction() 28 | -------------------------------------------------------------------------------- /cmake/utils/opt-run.cmake: -------------------------------------------------------------------------------- 1 | # Add a custom command to generate the output by running passes with opt. 2 | # PARENT_VAR is used to accumulate the targets that needs to be built. 3 | function(add_run_opt PARENT_VAR OPT_ARGS INPUT) 4 | set(INPUT_FULL_PATH ${CMAKE_SOURCE_DIR}/${INPUT}) 5 | string(REPLACE ".ll" ".out.ll" OUTPUT ${INPUT}) 6 | # Spaces are escaped in CMake. 7 | # To issue spaces, we need to use a list of values. 8 | # Do the translation here so that at the high level it remains natural and 9 | # we can use spaces to declare our command line options. 10 | string(REPLACE " " ";" OPT_ARGS ${OPT_ARGS}) 11 | if(${ARGC} GREATER 3) 12 | set(OUTPUT "${ARGV3}_${OUTPUT}") 13 | endif() 14 | set(OUTPUT_FULL_PATH ${CMAKE_BINARY_DIR}/${OUTPUT}) 15 | set(REDIRECT "/dev/null") 16 | if(${ARGC} GREATER 3) 17 | set(REDIRECT "${OUTPUT_FULL_PATH}.txt") 18 | endif() 19 | add_custom_command( 20 | OUTPUT ${OUTPUT} 21 | COMMAND ${LLVM_TOOLS_BINARY_DIR}/opt -S ${OPT_ARGS} ${INPUT_FULL_PATH} -o ${OUTPUT_FULL_PATH} > ${REDIRECT} 2>&1 22 | DEPENDS ${INPUT} 23 | COMMENT "Generating code with opt" 24 | ) 25 | list(APPEND ${PARENT_VAR} ${OUTPUT}) 26 | set(${PARENT_VAR} ${${PARENT_VAR}} PARENT_SCOPE) 27 | endfunction() 28 | 29 | function(add_run_passes PARENT_VAR OPT_ARGS INPUT) 30 | set(OPT_ARGS_W_PASS "-passes=${OPT_ARGS}") 31 | add_run_opt(${PARENT_VAR} ${OPT_ARGS_W_PASS} ${INPUT}) 32 | # Propagate the result back one level. 33 | set(${PARENT_VAR} ${${PARENT_VAR}} PARENT_SCOPE) 34 | endfunction() 35 | 36 | -------------------------------------------------------------------------------- /cmake/utils/set-llvm-install-prefix.cmake: -------------------------------------------------------------------------------- 1 | # Helper cmake file to set the include and library search paths of LLVM. 2 | find_package(LLVM REQUIRED CONFIG) 3 | 4 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 5 | message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") 6 | 7 | include_directories(${LLVM_INCLUDE_DIRS}) 8 | separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS}) 9 | add_definitions(${LLVM_DEFINITIONS_LIST}) 10 | 11 | --------------------------------------------------------------------------------