├── LICENSE
├── README.md
├── ch1
├── FileCheckExamples
│ ├── README.md
│ ├── ex1
│ │ ├── README.md
│ │ ├── check-file.txt
│ │ ├── input.txt
│ │ └── run.sh
│ ├── ex2
│ │ ├── README.md
│ │ ├── check-file.txt
│ │ ├── input.txt
│ │ └── run.sh
│ ├── ex3
│ │ ├── README.md
│ │ ├── check-file.txt
│ │ ├── input.txt
│ │ └── run.sh
│ ├── ex4
│ │ ├── README.md
│ │ ├── check-file.txt
│ │ ├── input.txt
│ │ └── run.sh
│ └── ex5
│ │ ├── README.md
│ │ ├── check-file.txt
│ │ ├── input.txt
│ │ └── run.sh
├── README.md
├── quiz
│ └── ex8-9
│ │ ├── README.md
│ │ ├── check-file.txt
│ │ ├── input.txt
│ │ └── run.sh
└── setup_env.sh
├── ch10
├── debug_capabilities
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── before_slp_vectorizer_hadd_vector.ll
│ └── hadd_vector.ll
├── incorrect_optimization
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── bugged_input.ll
│ ├── bugpoint-check.sh
│ ├── check.sh
│ ├── main.cpp
│ └── populate_function.cpp
├── undefined_behavior
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── main.cpp
│ └── populate_function.cpp
└── use_after_free
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── main.cpp
│ └── populate_function.cpp
├── ch11
├── instr_info
│ ├── CMakeLists.txt
│ ├── MyTargetInstrInfo.cpp
│ ├── MyTargetInstrInfo.h
│ ├── MyTargetRegisterInfo.cpp
│ ├── MyTargetRegisterInfo.h
│ ├── README.md
│ ├── instrinfo.td
│ ├── main.cpp
│ ├── mytarget.td
│ ├── reginfo.td
│ ├── solution-instrinfo.td
│ └── yourTurn-instrinfo.td
├── mir_format
│ ├── README.md
│ ├── full-dump.mir
│ ├── input.ll
│ ├── shrunk-dump.mir
│ └── simplified-dump.mir
└── register_units
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── SolutionRegisterInfo.cpp
│ ├── SolutionRegisterInfo.h
│ ├── YourTurnRegisterInfo.cpp
│ ├── YourTurnRegisterInfo.h
│ ├── main.cpp
│ ├── solution-reginfo.td
│ └── yourTurn-reginfo.td
├── ch13
├── CMakeLists.txt
├── README.md
└── input.ll
├── ch3
├── README.md
├── input.c
├── irreducible.c
├── llvm_ir
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── input.c
│ ├── main.cpp
│ ├── solution
│ │ └── populate_function.cpp
│ └── your_turn
│ │ └── populate_function.cpp
└── machineir
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── main.cpp
│ ├── solution
│ └── populate_function.cpp
│ └── your_turn
│ └── populate_function.cpp
├── ch4
├── implicit_func_scope_change
│ ├── CMakeLists.txt
│ ├── README.md
│ └── main.cpp
└── simple_cst_propagation
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── input.c
│ ├── main.cpp
│ ├── solution
│ └── populate_function.cpp
│ └── your_turn
│ └── populate_function.cpp
├── ch5
├── your_first_pass
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── main.cpp
│ ├── solution
│ │ ├── passWithLegacyPM.cpp
│ │ ├── passWithNewPM.cpp
│ │ └── passWithNewPM.h
│ └── your_turn
│ │ ├── passWithLegacyPM.cpp
│ │ ├── passWithNewPM.cpp
│ │ └── passWithNewPM.h
└── your_first_pipeline
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── main.cpp
│ ├── solution
│ ├── passPipelineWithLegacyPM.cpp
│ └── passPipelineWithNewPM.cpp
│ └── your_turn
│ ├── passPipelineWithLegacyPM.cpp
│ └── passPipelineWithNewPM.cpp
├── ch6
├── CMakeLists.txt
├── README.md
├── multiclass-with-def-type.td
├── multiclass.td
├── my-first-gisel.td
└── person.td
├── ch7
├── CMakeLists.txt
├── README.md
├── access_struct_type.ll
├── anonymous_type.ll
├── array_type.ll
├── check_vec_int_ty.cpp
├── datalayout_alignment.ll
├── endianness.c
├── full_example.ll
├── hadd_vector.c
├── impact_of_abi.c
└── named_type.ll
├── ch8
├── CMakeLists.txt
├── README.md
├── argpromotion.ll
├── canonical_form.ll
├── dce.ll
├── deadargelim.ll
├── indvars.ll
├── inline.ll
├── lcssa.ll
├── licm.ll
├── load-store-vectorizer.ll
├── loop-reduce.c
├── loop-reduce.ll
├── loop-unroll.ll
├── loop-vectorize.c
├── loop-vectorize.ll
├── reassociate.ll
├── simplifycfg.ll
├── slp-vectorizer.ll
├── value_tracking.ll
└── xor.ll
└── cmake
└── utils
├── llc-run.cmake
├── opt-run.cmake
└── set-llvm-install-prefix.cmake
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Packt
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
LLVM Code Generation, First Edition
2 |
3 | A deep dive into compiler backend development
4 |
5 | Quentin Colombet
6 |
7 | This is the code repository for LLVM Code Generation, First Edition , published by Packt.
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | About the book
22 |
23 |
24 |
25 |
26 | The LLVM infrastructure is a popular compiler ecosystem widely used in the tech industry and academia. This technology is crucial for both experienced and aspiring compiler developers looking to make an impact in the field. Written by Quentin Colombet, a veteran LLVM contributor and architect of the GlobalISel framework, this book provides a primer on the main aspects of LLVM, with an emphasis on its backend infrastructure; that is, everything needed to transform the intermediate representation (IR) produced by frontends like Clang into assembly code and object files.
27 | You’ll learn how to write an optimizing code generator for a toy backend in LLVM. The chapters will guide you step by step through building this backend while exploring key concepts, such as the ABI, cost model, and register allocation. You’ll also find out how to express these concepts using LLVM's existing infrastructure and how established backends address these challenges. Furthermore, the book features code snippets that demonstrate the actual APIs.
28 | By the end of this book, you’ll have gained a deeper understanding of LLVM. The concepts presented are expected to remain stable across different LLVM versions, making this book a reliable quick reference guide for understanding LLVM.
29 |
30 | Key Learnings
31 |
32 |
33 | Understand essential compiler concepts, such as SSA, dominance, and ABI
34 |
35 | Build and extend LLVM backends for creating custom compiler features
36 |
37 | Optimize code by manipulating LLVM's Intermediate Representation
38 |
39 | Contribute effectively to LLVM open-source projects and development
40 |
41 | Develop debugging skills for LLVM optimizations and passes
42 |
43 | Grasp how encoding and (dis)assembling work in the context of compilers
44 |
45 | Utilize LLVM's TableGen DSL for creating custom compiler models
46 |
47 |
48 |
49 |
50 |
51 |
52 | Chapters
53 |
54 |
55 |
56 | Building LLVM and Understanding the Directory Structure
57 |
58 | Contributing to LLVM
59 |
60 | Compiler Basics and How They Map to LLVM APIs
61 |
62 | Writing Your First Optimization
63 |
64 | Dealing with Pass Managers
65 |
66 | TableGen – LLVM Swiss Army Knife for Modeling
67 |
68 | Understanding LLVM IR
69 |
70 | Survey of the Existing Passes
71 |
72 | Introducing Target-Specific Constructs
73 |
74 | Hands-On Debugging LLVM IR Passes
75 |
76 | Getting Started with the Backend
77 |
78 | Getting Started with the Machine Code Layer
79 |
80 | The Machine Pass Pipeline
81 |
82 | Getting Started with Instruction Selection
83 |
84 | Instruction Selection: The IR Building Phase
85 |
86 | Instruction Selection: The Legalization Phase
87 |
88 | Instruction Selection: The Selection Phase and Beyond
89 |
90 | Instruction Scheduling
91 |
92 | Register Allocation
93 |
94 | Lowering of the Stack Layout
95 |
96 | Getting Started with the Assembler
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 | Requirements for this book
105 | To follow the instructions in this book, you need LLVM 20 installed on your system, running on Windows, macOS, or Linux operating systems.
106 |
107 | Navigate in the different `chX` directory and look at the examples provided and do the exercises when applicable.
108 | Each directory has its own README.md with specific directions.
109 |
110 | Note:
111 | The exercises have been tested with the open source repository of LLVM at the Git hash 424c2d9b7e4d from February 13th 2025. Which is LLVM 20.1.1.
112 |
113 | Some of the exercises interact directly with the LLVM C++ API. This API has no stability guarantee therefore it is possible that newer or older version of LLVM will not work with these exercises.
114 |
115 | For the exercices that requires a version of LLVM handy, if you build your own make sure to use the `CMAKE_INSTALL_PREFIX` cmake variable to set the install path, then build the `install` target.
116 |
117 | Then, you will need to provide this path to CMake in the different exercise.
118 |
119 | Follow the READMEs in the different directories when you get there.
120 |
121 |
122 |
123 | Get to know the author
124 |
125 | _Quentin Colombet_ is a veteran LLVM contributor specializing in compiler backends. He is the architect of the new instruction selection framework (GlobalISel) and code owner of the LLVM register allocators. With over two decades of experience, he has worked on compiler backends for a variety of architectures, including GPU, CPU, microcontrollers, DSP, and ASICs. Quentin joined Apple in 2012 and has contributed to x86, Aarch64, and Apple GPU backends. He is passionate about helping newcomers onboard the LLVM infrastructure, having mentored interns and new hires over the years.
126 |
127 |
128 |
129 | Other Related Books
130 |
141 |
142 |
143 |
144 |
145 | ## Errata
146 |
147 | * Page 11: In the command `$ git clone https://github.com/llvm/llvm/project.git`, the URL should be `https://github.com/llvm/llvm-project.git`. Therefore, the first line becomes `$ git clone https://github.com/llvm/llvm-project.git`.
148 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/README.md:
--------------------------------------------------------------------------------
1 | This directory contains example of how FileCheck can be used.
2 |
3 | The examples are sorted in increasing order of complexity.
4 | - ex1: Uses only the simplest form of directives
5 | - ex2: Shows how to use more than one prefix
6 | - ex3: Introduces keywords
7 | - ex4: Shows how to add regex in the mix
8 | - ex5: Introduces variables
9 |
10 | Each example lives in its own directory.
11 | Each directory follows the same structure:
12 | - `README.md` describes what there is to see in this example
13 | - `run.sh` contains the command to run to demonstrate the specific example
14 | - `input.txt` contains the input of the example
15 | - `check-file.txt` contains the patterns that FileCheck will match in `run.sh`
16 |
17 | To run the example:
18 | - Make sure that FileCheck is in your `PATH`
19 | - Change directory to exN
20 | - Open `run.sh` to see what is being tested
21 | - Either:
22 | - run the commands manually by copy/pasting them, or
23 | - execute `bash run.sh`
24 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex1/README.md:
--------------------------------------------------------------------------------
1 | This example shows the most basic use case of FileCheck.
2 | It uses the default prefix and performs simple matches.
3 |
4 | Look at the check file `check-file.txt` for additional comments.
5 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex1/check-file.txt:
--------------------------------------------------------------------------------
1 | Notice how freeflow text doesn't bother FileCheck.
2 | Notice also that FileCheck doesn't care by default about the number of spaces
3 | CHECK: I feel
4 | CHECK: great
5 | CHECK: today
6 | CHECK: How about you?
7 | CHECK: I don
8 | CHECK: Meh
9 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex1/input.txt:
--------------------------------------------------------------------------------
1 | I feel
2 |
3 | great
4 | today
5 |
6 | How about you?
7 | This line doesn't matter
8 | as well as this one
9 | I don't know
10 |
11 | Meh
12 |
13 | The end
14 |
15 | or is it?
16 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex1/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | FileCheck --input-file input.txt check-file.txt
4 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex2/README.md:
--------------------------------------------------------------------------------
1 | This example shows how to use several prefixes with FileCheck.
2 |
3 | We use the `check-prefixes` command line option to match expression that start
4 | with a different keyword than CHECK.
5 |
6 | Using this option, you can use several prefixes but only use a subset of what
7 | your check file holds.
8 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex2/check-file.txt:
--------------------------------------------------------------------------------
1 | CHECK: match with the CHECK prefix
2 | SECOND: match with the SECOND prefix
3 | CHECK: Matching prefix can be interleaved
4 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex2/input.txt:
--------------------------------------------------------------------------------
1 | line that we want to match with the CHECK prefix
2 | line that we want to match with the SECOND prefix
3 |
4 | Matching prefix can be interleaved.
5 | FileCheck follows the all of them
6 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex2/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | FileCheck --input-file input.txt check-file.txt --check-prefixes CHECK,SECOND
4 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex3/README.md:
--------------------------------------------------------------------------------
1 | This example introduces the FileCheck keywords.
2 | The keywords are appended to the check prefix with -.
3 |
4 | In this example you can see a few keywords in action:
5 | - SAME: match on the same line
6 | - DAG: match order does not matter between different DAG directives
7 | - NEXT: match exactly on the next line
8 | - NOT: make sure this pattern is not match
9 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex3/check-file.txt:
--------------------------------------------------------------------------------
1 | CHECK: I
2 | CHECK-SAME: feel
3 | CHECK: great
4 | CHECK-NEXT: today
5 | CHECK: How about you?
6 | CHECK-DAG: Meh
7 | CHECK-DAG: I don
8 |
9 | CHECK-NOT: or is it
10 | CHECK: The end
11 | CHECK: or is it
12 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex3/input.txt:
--------------------------------------------------------------------------------
1 | I feel
2 |
3 | great
4 | today
5 |
6 | How about you?
7 | This line doesn't matter
8 | as well as this one
9 | I don't know
10 |
11 | Meh
12 |
13 | The end
14 |
15 | or is it?
16 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex3/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | FileCheck --input-file input.txt check-file.txt
4 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex4/README.md:
--------------------------------------------------------------------------------
1 | This example shows how to use regex in FileCheck.
2 |
3 | Regex are marked by the `{{regex}}` delimiters in the FileCheck commands.
4 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex4/check-file.txt:
--------------------------------------------------------------------------------
1 | CHECK: match with the {{[a-zA-Z]+}} regex
2 | CHECK: match with the {{.*}} regex
3 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex4/input.txt:
--------------------------------------------------------------------------------
1 | line that won't match with the regex
2 | line that we want to match with the regex
3 | line that we want to match with the first regex
4 | line that we want to match with the second regex
5 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex4/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | FileCheck --input-file input.txt check-file.txt
4 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex5/README.md:
--------------------------------------------------------------------------------
1 | This example shows how to use variables in FileCheck.
2 |
3 | Variables allow to capture patterns (regex) then reuse that pattern in a later
4 | FileCheck commands.
5 | Variables use the `[[variable]]` delimiters.
6 | They are defined with `[[variable:regex]]` and used via `[[variable]]`.
7 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex5/check-file.txt:
--------------------------------------------------------------------------------
1 | Match whatever is not a space for our mystery word
2 | CHECK: with the word [[OUR_LABEL:[^ ]*]]
3 | Match the beginning of a line using the regex ^
4 | CHECK: {{^}}[[OUR_LABEL]]:
5 | CHECK-NEXT: Congratulation!
6 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex5/input.txt:
--------------------------------------------------------------------------------
1 | we want to match the lines that start with the word Fancy
2 |
3 | Ignore this line
4 | Ignore that line
5 | Match the next line use a variable that captures Fancy
6 | Fancy:
7 | Congratulation!
8 |
--------------------------------------------------------------------------------
/ch1/FileCheckExamples/ex5/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | FileCheck --input-file input.txt check-file.txt
4 |
--------------------------------------------------------------------------------
/ch1/README.md:
--------------------------------------------------------------------------------
1 | See the readme at FileCheckExamples/README.md to get started with this chapter's exercises.
2 |
3 | The script at `setup_env.sh` is a helper script that you can use to setup your
4 | environment to develop with LLVM.
5 | It will install required tools like CMake and so on.
6 |
--------------------------------------------------------------------------------
/ch1/quiz/ex8-9/README.md:
--------------------------------------------------------------------------------
1 | Same explanation as what we did for the FileCheckExamples.
2 | Please see the README from that directory (ch1/FileCheckExamples).
3 |
--------------------------------------------------------------------------------
/ch1/quiz/ex8-9/check-file.txt:
--------------------------------------------------------------------------------
1 | CHECK: This is an example of match
2 | SECOND-CHECK-NOT: Do not want to see that
3 | CHECK: with FileCheck
4 | CHECK-NEXT: This line needs to happen exactly after “With FileCheck”
5 | SECOND-CHECK-NOT: Neither this
6 | CHECK: Over
7 |
--------------------------------------------------------------------------------
/ch1/quiz/ex8-9/input.txt:
--------------------------------------------------------------------------------
1 | This is an example of match
2 | # Do not want to see that
3 | with FileCheck
4 | This line needs to happen exactly after “With FileCheck”
5 |
6 | # Neither this
7 | Over
8 |
--------------------------------------------------------------------------------
/ch1/quiz/ex8-9/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | echo ex8
4 | FileCheck --input-file input.txt check-file.txt
5 |
6 | echo ex9
7 | # Here we use two prefixes to enable the second set of check lines in check-file.txt.
8 | # Alternatively, we could have written a new check-file.
9 |
10 | # This will reject input.txt, since the faulty lines are here.
11 | echo expected fail
12 | FileCheck --input-file input.txt check-file.txt --check-prefixes='CHECK,SECOND-CHECK'
13 |
14 | echo expected pass
15 | grep -v '^#' input.txt | FileCheck check-file.txt --check-prefixes='CHECK,SECOND-CHECK'
16 |
17 |
--------------------------------------------------------------------------------
/ch1/setup_env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Note: the URLs are customized for a macOS installation.
4 |
5 | # Exit if any command fails.
6 | set -e
7 |
8 | if [ $# -ne 1 ]; then
9 | echo "Convenient script to install the required binaries"
10 | echo "to build and test LLVM on macOS"
11 | echo ""
12 | echo "Usage $0 'where_to_install_path'"
13 | exit -1
14 | fi
15 |
16 | install_prefix="$@"
17 | # Poor's man path massage to get an absolute path.
18 | first_char=`echo ${install_prefix} | cut -c1`
19 | if [ "${first_char}" != '/' ]; then
20 | install_prefix=${PWD}/${install_prefix}
21 | fi
22 |
23 | final_bin_dir="${install_prefix}/bin"
24 | mkdir -p ${final_bin_dir}
25 |
26 | # Couple of helper functions for easier-to-read code.
27 | needs_binary () {
28 | # Don't abort when returning 0.
29 | set +e
30 | bin_name=$1
31 | if [ -f "`which $1`" ]; then
32 | return 0
33 | fi
34 | return 1
35 | }
36 |
37 | print_need_install () {
38 | need_bin=$1
39 | bin_name=$2
40 | if [ ${need_bin} -eq 0 ]; then
41 | echo "Found '${bin_name}' skipping installation for this package"
42 | echo "> Consider removing '${bin_name}' from your path to force the install"
43 | echo ""
44 | fi
45 | }
46 |
47 | print_path_to_add () {
48 | echo "Tools installed by this script are available at:"
49 | echo "'${final_bin_dir}'"
50 | echo ""
51 | echo "Please consider adding this location to your path"
52 | echo "export PATH=${final_bin_dir}:\${PATH}"
53 | }
54 |
55 | # Names of the packages to download
56 |
57 | # Git is vented as part of xcode on macOS.
58 | # We'll get it through Xcode install.
59 | #git_pkg=${install_prefix}/git.tgz
60 | #git_url=""
61 | needs_binary 'git'
62 | need_git=$?
63 | print_need_install ${need_git} 'git'
64 |
65 | # CMake's info.
66 | cmake_pkg=${install_prefix}/cmake.tgz
67 | cmake_url="https://github.com/Kitware/CMake/releases/download/v3.28.0/cmake-3.28.0-macos-universal.tar.gz"
68 | needs_binary 'cmake'
69 | need_cmake=$?
70 | print_need_install ${need_cmake} 'cmake'
71 |
72 | # The LLVM foundation doesn't vent a binary for MacOS.
73 | # We have to resort on the Xcode installer for that.
74 | #llvm_pkg=${install_prefix}/llvm.tgz
75 | #llvm_url=""
76 | needs_binary 'clang'
77 | need_llvm=$?
78 | print_need_install ${need_llvm} 'clang'
79 |
80 | # Ninja's info.
81 | ninja_pkg=${install_prefix}/ninja.zip
82 | ninja_url="https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-mac.zip"
83 | needs_binary 'ninja'
84 | need_ninja=$?
85 | print_need_install ${need_ninja} 'ninja'
86 |
87 | # Python's info.
88 | python_pkg=${install_prefix}/python.pkg
89 | needs_binary 'python3'
90 | need_python=$?
91 | print_need_install ${need_python} 'python3'
92 |
93 | # Helper function to download the packages we need.
94 | download_artifact() {
95 | pkg_name=$1
96 | pkg_filename=$2
97 | url=$3
98 |
99 | if [ -f ${pkg_filename} ]; then
100 | echo "Found ${pkg_name} package at '${pkg_filename}'"
101 | echo "> Skipping download"
102 | echo "> Consider removing this file if the package is out-of-date"
103 | echo ""
104 | else
105 | echo "Downloading ${pkg_name}"
106 | curl --location ${url} --output ${pkg_filename}
107 | fi
108 | }
109 |
110 | # Download and install Ninja, if needed.
111 | if [ ${need_ninja} -eq 1 ]; then
112 | download_artifact "ninja" "${ninja_pkg}" "${ninja_url}"
113 | ninja_bin="${final_bin_dir}/ninja"
114 | if [ -f "${ninja_bin}" ]; then
115 | echo "Ninja already found at '${ninja_bin}'"
116 | echo "> Consider removing it if this is not the right version"
117 | echo ""
118 | else
119 | echo "Installing ninja"
120 | unzip ${ninja_pkg} -d ${final_bin_dir}
121 | fi
122 | fi
123 |
124 | # Download and install CMake, if needed.
125 | if [ ${need_cmake} -eq 1 ]; then
126 | download_artifact "cmake" "${cmake_pkg}" "${cmake_url}"
127 | cmake_bin="${final_bin_dir}/cmake"
128 | if [ -f "${cmake_bin}" ]; then
129 | echo "CMake already found at '${cmake_bin}'"
130 | echo "> Consider removing it if this is not the right version"
131 | echo ""
132 | else
133 | echo "Installing CMake"
134 | tar xzf ${cmake_pkg} -C ${install_prefix}
135 | path_to_cmake=`find ${install_prefix} -name cmake | grep '/bin/cmake'`
136 | ln -sf ${path_to_cmake} ${cmake_bin}
137 | fi
138 | fi
139 |
140 | # Exit if all the other packages are already available.
141 | if [ ${need_python} -eq 0 ] && [ ${need_git} -eq 0 ] && [ ${need_llvm} -eq 0 ]; then
142 | print_path_to_add
143 | exit 0
144 | fi
145 |
146 | echo "/!\ You have to set the install prefix yourself for the next installations"
147 | read -p "Continue Y/n: " user_input
148 |
149 | if [ "${user_input}" == "n" ] || [ "${user_input}" == "N" ];
150 | then
151 | echo "Python and Xcode CLI tools not installed"
152 | exit 0
153 | fi
154 |
155 | if [ "${user_input}" != "y" ] && [ "${user_input}" != "Y" ] && [ "${user_input}" != "" ];
156 | then
157 | echo "error: invalid response"
158 | exit -1
159 | fi
160 |
161 | # Install python if needed.
162 | if [ ${need_python} -eq 1 ]; then
163 | download_artifact "python" "${python_pkg}" "${python_url}"
164 | echo "Installing python... Please follow installer instructions."
165 | open ${python_pkg}
166 | fi
167 |
168 | read -p "Press any key to continue" -n1
169 |
170 | # On macOS the most official way to get git and clang is through xcode.
171 | # if clang and git are already available, assume that they are correct.
172 | if [ ${need_git} -eq 1 ] || [ ${need_llvm} -eq 1 ]; then
173 | echo "Installing Xcode CLI tools... Please follow installer instructions."
174 | xcode-select --install
175 | fi
176 |
177 | print_path_to_add
178 | exit 0
179 |
--------------------------------------------------------------------------------
/ch10/debug_capabilities/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH8
6 | LANGUAGES CXX C)
7 |
8 | include(../../cmake/utils/set-llvm-install-prefix.cmake)
9 | include(../../cmake/utils/opt-run.cmake)
10 |
11 | set(CURR_TARGET
12 | hadd_vector_log
13 | )
14 |
15 | set(OPT_RUN_DEPENDENCIES)
16 | # Normal run.
17 | add_run_opt(OPT_RUN_DEPENDENCIES "-O3" hadd_vector.ll)
18 | # Run with print of the IR after all.
19 | add_run_opt(OPT_RUN_DEPENDENCIES "-O3 -print-after-all" hadd_vector.ll print_after_all)
20 | # Run with print of only the part we want.
21 | add_run_opt(OPT_RUN_DEPENDENCIES "-O3 -print-module-scope -print-before=slp-vectorizer" hadd_vector.ll print_before_slp_vectorizer)
22 | # Run the SLP vectorizer with debug log enabled.
23 | # Note: this works only if the provided LLVM was built with asserts.
24 | add_run_opt(OPT_RUN_DEPENDENCIES "-passes=slp-vectorizer -debug-only=SLP" before_slp_vectorizer_hadd_vector.ll debug_slp_vectorizer)
25 |
26 | # Create an executable target that depends on the generated file
27 | add_custom_target(${CURR_TARGET}
28 | DEPENDS ${OPT_RUN_DEPENDENCIES}
29 | )
30 |
31 | set_target_properties(${CURR_TARGET} PROPERTIES EXCLUDE_FROM_ALL 0)
32 |
--------------------------------------------------------------------------------
/ch10/debug_capabilities/README.md:
--------------------------------------------------------------------------------
1 | # Enable LLVM debug capabilities #
2 |
3 | In this exercise, we run an input module, `hadd_vector.ll` in the `O3` optimization pipeline (with `opt`).
4 |
5 | The goal here is for you to find when the 4 adds in the input IR gets replaced in one horizontal add (llvm.vector.reduce.add.)
6 |
7 | To find this, you need to use `-print-after-all` to find which pass does the transformation you are looking for.
8 |
9 | Then use `-debug-only=found_pass` to see how it does the transformation.
10 |
11 | Note: the `-debugxxx` options works only on builds of LLVM that enabled the assertions.
12 |
13 | The steps below shows you how to produce the interesting command line invocations.
14 |
15 | ## Configure your build directory ##
16 |
17 | ```bash
18 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild .
19 | ```
20 |
21 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option).
22 |
23 | You must have a version of LLVM installed at `` for this to succeed.
24 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package.
25 |
26 | Note: Again if you use a plain release the steps that uses `-debugxxx` will fail.
27 |
28 | ## Build ##
29 |
30 | ```bash
31 | ninja -C build
32 | ```
33 |
34 | This builds the default target in the build directory.
35 |
36 | This should produce in the `build` directory (showing only the relevant files):
37 | * `hadd_vector.out.ll`: The output IR of the `opt -O3` pipeline
38 | * `print_after_all_hadd_vector.out.ll.txt`: The command line output of `print-after-all` for `O3`.
39 | * `print_before_slp_vectorizer_hadd_vector.out.ll`: The input IR before the SLP vectorizer (the full module not just the function.)
40 | * `debug_slp_vectorizer_before_slp_vectorizer_hadd_vector.out.ll.txt`: The debug ouput of the SLP vectorizer.
41 |
42 | ## Solution ##
43 |
44 | The optimization we want to identify here is the SLP vectorizer (as you may have guessed from the dump we saved :)).
45 |
46 |
--------------------------------------------------------------------------------
/ch10/debug_capabilities/before_slp_vectorizer_hadd_vector.ll:
--------------------------------------------------------------------------------
1 | ; *** IR Dump Before SLPVectorizerPass on hadd ***
2 | ; ModuleID = '/Users/qcolombet/clones/open/How-to-build-an-LLVM-backend/ch10/debug_capabilities/hadd_vector.ll'
3 | source_filename = "hadd_vector.c"
4 | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5 | target triple = "arm64-apple-macosx14.0.0"
6 |
7 | ; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(argmem: read) uwtable(sync)
8 | define i32 @hadd(ptr nocapture noundef readonly %arg) local_unnamed_addr #0 {
9 | bb:
10 | %i3 = load i32, ptr %arg, align 4
11 | %i5 = getelementptr inbounds i8, ptr %arg, i64 4
12 | %i6 = load i32, ptr %i5, align 4
13 | %i7 = add nsw i32 %i6, %i3
14 | %i9 = getelementptr inbounds i8, ptr %arg, i64 8
15 | %i10 = load i32, ptr %i9, align 4
16 | %i11 = add nsw i32 %i7, %i10
17 | %i13 = getelementptr inbounds i8, ptr %arg, i64 12
18 | %i14 = load i32, ptr %i13, align 4
19 | %i15 = add nsw i32 %i11, %i14
20 | ret i32 %i15
21 | }
22 |
23 | attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(argmem: read) uwtable(sync) "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "probe-stack"="__chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
24 |
25 | !llvm.module.flags = !{!0, !1, !2, !3, !4}
26 | !llvm.ident = !{!5}
27 |
28 | !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 14, i32 4]}
29 | !1 = !{i32 1, !"wchar_size", i32 4}
30 | !2 = !{i32 8, !"PIC Level", i32 2}
31 | !3 = !{i32 7, !"uwtable", i32 1}
32 | !4 = !{i32 7, !"frame-pointer", i32 1}
33 | !5 = !{!"Apple clang version 15.0.0 (clang-1500.3.9.4)"}
34 |
--------------------------------------------------------------------------------
/ch10/debug_capabilities/hadd_vector.ll:
--------------------------------------------------------------------------------
1 | ; ModuleID = ''
2 | source_filename = "hadd_vector.c"
3 | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
4 | target triple = "arm64-apple-macosx14.0.0"
5 |
6 | %struct.Vec4 = type { i32, i32, i32, i32 }
7 |
8 | ; Function Attrs: noinline nounwind ssp uwtable(sync)
9 | define i32 @hadd(ptr noundef %arg) #0 {
10 | bb:
11 | %i = alloca ptr, align 8
12 | store ptr %arg, ptr %i, align 8
13 | %i1 = load ptr, ptr %i, align 8
14 | %i2 = getelementptr inbounds %struct.Vec4, ptr %i1, i32 0, i32 0
15 | %i3 = load i32, ptr %i2, align 4
16 | %i4 = load ptr, ptr %i, align 8
17 | %i5 = getelementptr inbounds %struct.Vec4, ptr %i4, i32 0, i32 1
18 | %i6 = load i32, ptr %i5, align 4
19 | %i7 = add nsw i32 %i3, %i6
20 | %i8 = load ptr, ptr %i, align 8
21 | %i9 = getelementptr inbounds %struct.Vec4, ptr %i8, i32 0, i32 2
22 | %i10 = load i32, ptr %i9, align 4
23 | %i11 = add nsw i32 %i7, %i10
24 | %i12 = load ptr, ptr %i, align 8
25 | %i13 = getelementptr inbounds %struct.Vec4, ptr %i12, i32 0, i32 3
26 | %i14 = load i32, ptr %i13, align 4
27 | %i15 = add nsw i32 %i11, %i14
28 | ret i32 %i15
29 | }
30 |
31 | attributes #0 = { noinline nounwind ssp uwtable(sync) "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "probe-stack"="__chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
32 |
33 | !llvm.module.flags = !{!0, !1, !2, !3, !4}
34 | !llvm.ident = !{!5}
35 |
36 | !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 14, i32 4]}
37 | !1 = !{i32 1, !"wchar_size", i32 4}
38 | !2 = !{i32 8, !"PIC Level", i32 2}
39 | !3 = !{i32 7, !"uwtable", i32 1}
40 | !4 = !{i32 7, !"frame-pointer", i32 1}
41 | !5 = !{!"Apple clang version 15.0.0 (clang-1500.3.9.4)"}
42 |
--------------------------------------------------------------------------------
/ch10/incorrect_optimization/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH4
6 | LANGUAGES CXX C)
7 |
8 | include(../../cmake/utils/set-llvm-install-prefix.cmake)
9 |
10 | set(CURR_TARGET
11 | buggy_cst_propagation
12 | )
13 |
14 | set(SRC
15 | "main.cpp"
16 | "populate_function.cpp"
17 | )
18 |
19 | set(INC
20 | )
21 |
22 | set(CMAKE_CXX_FLAGS -fno-rtti)
23 |
24 | # Now build our tools
25 | add_executable(${CURR_TARGET} ${SRC} ${INC})
26 |
27 | # Find the libraries that correspond to the LLVM components
28 | # that we wish to use
29 | llvm_map_components_to_libnames(llvm_libs support core transformutils)
30 |
31 | # Link against LLVM libraries
32 | target_link_libraries(${CURR_TARGET} ${llvm_libs})
33 |
--------------------------------------------------------------------------------
/ch10/incorrect_optimization/README.md:
--------------------------------------------------------------------------------
1 | # Use LLVM tools to reduce isses #
2 |
3 | In this exercise you need to leverage `llvm-reduce` or `bugpoint` to produce the smallest possible test case for the broken constant propagation implemented in `populate_function.cpp`.
4 |
5 | Build the code using the steps below.
6 |
7 | Then use `llvm-reduce` or `bugpoint` on `bugged_input.ll` to try to reduce the input IR.
8 | The goal here is for you to figure out the `llvm-reduce` (or `bugpoint`) command line and supply the right script to drive the tool where you need.
9 |
10 | When you have the reduced IR, try to fix the issue.
11 |
12 | Hint, when reducing the issue, make sure to filter out the case where the input module is empty.
13 | The generated executable will convert towards this otherwise.
14 |
15 | Hint 2, another similar instruction does the right thing in this implementation.
16 |
17 | Hint 3, the correct implementation is available in the ch4 directory.
18 |
19 | ## Configure your build directory ##
20 |
21 | ```bash
22 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild .
23 | ```
24 |
25 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option).
26 |
27 | You must have a version of LLVM installed at `` for this to succeed.
28 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package.
29 |
30 | ## Build ##
31 |
32 | ```bash
33 | ninja -C build
34 | ```
35 |
36 | This builds the default target in the build directory.
37 |
38 | This should produce in the `build` directory a binary named `buggy_cst_propagation`.
39 |
40 | ## Run ##
41 |
42 | ```bash
43 | ./build/buggy_cst_propagation [input.ll|.bc]
44 | ```
45 |
46 | This will run buggy implementation on `input.ll`.
47 | Without any input, it runs on a pre-defined IR hardcoded in the main function.
48 | The code should work on the pre-defined IR.
49 |
50 | Next, running the following command should crash the program:
51 | ```bash
52 | ./build/buggy_cst_propagation bugged_input.ll
53 | ```
54 |
55 | Now, your job is to reduce the IR in `bugged_input.ll` to find the minimal set of instructions that exposes the issue.
56 | The goal of this exercise is to have you play with `llvm-reduce` or `bugpoint`, which are utilities provided by the LLVM infrastructure.
57 |
58 | ## Solution ##
59 |
60 | ### With `llvm-reduce` ###
61 |
62 | `llvm-reduce` assumes it found what you are looking for when the given test script returns success (status == 0)
63 |
64 | We use that in `check.sh`.
65 |
66 | To reproduce the problem with `llvm-reduce`, you should run the following command line:
67 | ```bash
68 | llvm-reduce --test=check.sh bugged_input.ll
69 | ```
70 |
71 | This command runs `llvm-reduce` while using `chech.sh` as the compilation and testing step.
72 |
73 | Check the content of this file in this directory.
74 |
75 | The resulting IR should resemble:
76 | ```
77 | define i32 @bar() {
78 | bb:
79 | %i3 = sdiv i32 0, 0
80 | ret i32 0
81 | }
82 | ```
83 |
84 | ### With `bugpoint` ###
85 |
86 | `bugpoint` is the opposite of `llvm-reduce` and assumes it found what you are looking for when the test script returns failure (status != 0).
87 |
88 | We use that in `bugpoint-check.sh`.
89 |
90 | To reproduce the problem with `bugpoint`, you should run the following command line:
91 | ```bash
92 | bugpoint --compile-command=./bugpoint-check.sh --run-llc --compile-custom bugged_input.ll
93 | ```
94 |
95 | This command tells `bugpoint` that the compile command and status is reported by `bugpoint-check.sh`.
96 | More specifically the combination of `--compile-custom` and `--compile-command` says that we use a custom compile command and that command is given by the related option.
97 |
98 | Then, we use `llc` (`--run-llc`) as the runner, meaning that `bugpoint` should not try to interpret or run the IR with the "safe" version.
99 |
100 | The resulting IR should resemble:
101 | ```
102 | define void @bar() {
103 | bb:
104 | %i3 = sdiv i32 3, 0
105 | unreachable
106 | }
107 | ```
108 |
109 | Remember to use `opt -S` on the bitcode (`.bc`) to see the textual IR.
110 |
--------------------------------------------------------------------------------
/ch10/incorrect_optimization/bugged_input.ll:
--------------------------------------------------------------------------------
1 | define i32 @foo(i32 noundef %arg) {
2 | bb:
3 | %i = ashr i32 6, 3
4 | %i1 = icmp ne i32 %arg, 0
5 | br i1 %i1, label %bb2, label %bb4
6 |
7 | bb2:
8 | %i3 = udiv i32 3, %i
9 | br label %bb6
10 |
11 | bb4:
12 | %i5 = or i32 %i, 3855
13 | br label %bb6
14 |
15 | bb6:
16 | %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]
17 | %i7 = add i32 %.0, 1
18 | ret i32 %i7
19 | }
20 |
21 | define i32 @bar(i32 noundef %arg) {
22 | bb:
23 | %i = ashr i32 6, 3
24 | %i1 = icmp ne i32 %arg, 0
25 | br i1 %i1, label %bb2, label %bb4
26 |
27 | bb2:
28 | %i3 = sdiv i32 3, %i
29 | br label %bb6
30 |
31 | bb4:
32 | %i5 = or i32 %i, 3855
33 | br label %bb6
34 |
35 | bb6:
36 | %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]
37 | %i7 = add i32 %.0, 1
38 | ret i32 %i7
39 | }
40 |
--------------------------------------------------------------------------------
/ch10/incorrect_optimization/bugpoint-check.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ./build/buggy_cst_propagation $@
3 |
4 | status=$?
5 | # We filter out $status == 1 because this is our error code
6 | # when the input file is empty, which is not what we are trying
7 | # bugpoint to converge to.
8 | if [ $status -ne 0 ] && [ $status -ne 1 ]; then
9 | # bugpoint expects an error code when something interesting happened.
10 | exit -1
11 | fi
12 |
13 | exit 0
14 |
--------------------------------------------------------------------------------
/ch10/incorrect_optimization/check.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ./build/buggy_cst_propagation $@
3 |
4 | status=$?
5 | # We filter out $status == 1 because this is our error code
6 | # when the input file is empty, which is not what we are trying
7 | # llvm-reduce to converge to.
8 | if [ $status -ne 0 ] && [ $status -ne 1 ]; then
9 | # llvm-reduce expects 0 when something interesting happens.
10 | exit 0
11 | fi
12 |
13 | exit -1
14 |
--------------------------------------------------------------------------------
/ch10/incorrect_optimization/main.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/AsmParser/Parser.h" // For parseAssemblyString.
2 | #include "llvm/IR/Function.h"
3 | #include "llvm/IR/LLVMContext.h"
4 | #include "llvm/IR/Module.h"
5 | #include "llvm/IR/Verifier.h"
6 | #include "llvm/IRReader/IRReader.h" // For parseIRFile.
7 | #include "llvm/Support/Debug.h" // For errs().
8 | #include "llvm/Support/SourceMgr.h" // For SMDiagnostic.
9 |
10 | using namespace llvm;
11 |
12 | extern bool buggyConstantPropagation(llvm::Function &);
13 |
14 | bool checkFunctionCorrectness(llvm::Function &Res) {
15 | Res.print(errs());
16 | // verifyFunction returns true if it finds errors and
17 | // print them on the provided output stream (errs() here).
18 | if (verifyFunction(Res, &errs())) {
19 | errs() << Res.getName() << " does not verify\n";
20 | return false;
21 | }
22 | return true;
23 | }
24 |
25 | // Default input in case no file was provided.
26 | const char *InputIR =
27 | "define i32 @foo(i32 noundef %arg) {\n"
28 | "bb:\n"
29 | " %i = shl i32 5, 3\n"
30 | " %i1 = icmp ne i32 %arg, 0\n"
31 | " br i1 %i1, label %bb2, label %bb4\n"
32 | "\n"
33 | "bb2:\n"
34 | " %i3 = sdiv i32 %i, 5\n"
35 | " br label %bb6\n"
36 | "\n"
37 | "bb4:\n"
38 | " %i5 = or i32 %i, 3855\n"
39 | " br label %bb6\n"
40 | "\n"
41 | "bb6:\n"
42 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n"
43 | " ret i32 %.0\n"
44 | "}\n"
45 | "\n"
46 | "define i32 @bar(i32 noundef %arg) {\n"
47 | "bb:\n"
48 | " %i = shl i32 -1, 3\n"
49 | " %i1 = icmp ne i32 %arg, 0\n"
50 | " br i1 %i1, label %bb2, label %bb4\n"
51 | "\n"
52 | "bb2:\n"
53 | " %i3 = udiv i32 %i, 3\n"
54 | " br label %bb6\n"
55 | "\n"
56 | "bb4:\n"
57 | " %i5 = or i32 %i, 3855\n"
58 | " br label %bb6\n"
59 | "\n"
60 | "bb6:\n"
61 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n"
62 | " %i7 = add i32 %.0, 1\n"
63 | " ret i32 %i7\n"
64 | "}\n";
65 |
66 | int main(int argc, char **argv) {
67 | LLVMContext Context;
68 | SMDiagnostic Err;
69 | std::unique_ptr MyModule;
70 | // To be able to play with the optimization a little bit,
71 | // support a mode where you can feed your own IR files.
72 | if (argc == 2) {
73 | outs() << "Reading module from '" << argv[1] << "'\n";
74 | MyModule = parseIRFile(argv[1], Err, Context);
75 | } else {
76 | MyModule = parseAssemblyString(InputIR, Err, Context);
77 | }
78 | if (!MyModule) {
79 | errs() << "Unable to build module\n";
80 | return -1;
81 | }
82 |
83 | bool hadError = false;
84 | for (Function &Func : *MyModule) {
85 | outs() << "Processing function '" << Func.getName() << "'\n";
86 | Func.print(outs());
87 |
88 | // Clone the function before the optimization to make sure each
89 | // implementation sees the same input.
90 |
91 | outs() << "\n\n## Reference implementation\n";
92 | bool solutionDidSomething = buggyConstantPropagation(Func);
93 | bool solutionIsCorrect = checkFunctionCorrectness(Func);
94 |
95 | if (!solutionIsCorrect) {
96 | hadError = true;
97 | errs() << "Solution does not verify:\n"
98 | "- provided implementation("
99 | << (solutionIsCorrect ? "passed" : "failed")
100 | << ")\n";
101 | }
102 |
103 | outs() << "\n######\n";
104 | }
105 |
106 | return !hadError;
107 | }
108 |
--------------------------------------------------------------------------------
/ch10/incorrect_optimization/populate_function.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/ADT/APInt.h"
2 | #include "llvm/ADT/PostOrderIterator.h" // For ReversePostOrderTraversal.
3 | #include "llvm/IR/BasicBlock.h"
4 | #include "llvm/IR/CFG.h" // To instantiate RPOTraversal.
5 | #include "llvm/IR/Constants.h" // For ConstantInt.
6 | #include "llvm/IR/Function.h"
7 | #include "llvm/IR/InstrTypes.h" // For BinaryOperator, etc.
8 | #include "llvm/IR/Instruction.h"
9 | #include "llvm/IR/LLVMContext.h"
10 | #include "llvm/IR/Module.h"
11 | #include "llvm/Support/Debug.h" // For errs().
12 |
13 | #include
14 |
15 | using namespace llvm;
16 |
17 | // Helper function to deal with binary instructions.
18 | static Value *visitBinary(Instruction &Instr, LLVMContext &Ctxt,
19 | std::optional (*Computation)(const APInt &,
20 | const APInt &)) {
21 | assert(isa(Instr) && "This is meant for binary instruction");
22 |
23 | auto *LHS = dyn_cast(Instr.getOperand(0));
24 | auto *RHS = dyn_cast(Instr.getOperand(1));
25 | if (!LHS || !RHS)
26 | return nullptr;
27 |
28 | // FIXME: Technically this API is not precise enough because we may want to
29 | // produce poison value for e.g., a division by zero.
30 | std::optional Res = Computation(LHS->getValue(), RHS->getValue());
31 | if (!Res.has_value())
32 | return nullptr;
33 | auto NewConstant = ConstantInt::get(Ctxt, *Res);
34 | return NewConstant;
35 | }
36 |
37 | // Takes \p Foo and apply a simple constant propagation optimization.
38 | // \returns true if \p Foo was modified (i.e., something had been constant
39 | // propagated), false otherwise.
40 | bool buggyConstantPropagation(Function &Foo) {
41 |
42 | if (Foo.empty())
43 | return false;
44 |
45 | LLVMContext &Ctxt = Foo.getParent()->getContext();
46 | bool MadeChanges = false;
47 |
48 | ReversePostOrderTraversal RPOT(&Foo);
49 | for (BasicBlock *BB : RPOT) {
50 | // Early increment to be able to remove the instruction that we replaced
51 | // on-the-fly. The alternative is to accumulate the instructions to remove
52 | // in a worklist and delete them afterwards.
53 | for (Instruction &Instr : make_early_inc_range(*BB)) {
54 | Value *NewConstant = nullptr;
55 | switch (Instr.getOpcode()) {
56 | case Instruction::Add:
57 | NewConstant = visitBinary(
58 | Instr, Ctxt,
59 | [](const APInt &A, const APInt &B) -> std::optional {
60 | return A + B;
61 | });
62 | break;
63 | case Instruction::Sub:
64 | NewConstant = visitBinary(
65 | Instr, Ctxt,
66 | [](const APInt &A, const APInt &B) -> std::optional {
67 | return A - B;
68 | });
69 | break;
70 | case Instruction::Mul:
71 | NewConstant = visitBinary(
72 | Instr, Ctxt,
73 | [](const APInt &A, const APInt &B) -> std::optional {
74 | return A * B;
75 | });
76 | break;
77 | case Instruction::SDiv:
78 | NewConstant = visitBinary(
79 | Instr, Ctxt,
80 | [](const APInt &A, const APInt &B) -> std::optional {
81 | return A.sdiv(B);
82 | });
83 | break;
84 | case Instruction::UDiv:
85 | NewConstant = visitBinary(
86 | Instr, Ctxt,
87 | [](const APInt &A, const APInt &B) -> std::optional {
88 | if (B.isZero())
89 | return std::nullopt;
90 | return A.udiv(B);
91 | });
92 | break;
93 | case Instruction::Shl:
94 | NewConstant = visitBinary(
95 | Instr, Ctxt,
96 | [](const APInt &A, const APInt &B) -> std::optional {
97 | return A.shl(B);
98 | });
99 | break;
100 | case Instruction::LShr:
101 | NewConstant = visitBinary(
102 | Instr, Ctxt,
103 | [](const APInt &A, const APInt &B) -> std::optional {
104 | return A.lshr(B);
105 | });
106 | break;
107 | case Instruction::AShr:
108 | NewConstant = visitBinary(
109 | Instr, Ctxt,
110 | [](const APInt &A, const APInt &B) -> std::optional {
111 | return A.ashr(B);
112 | });
113 | break;
114 | case Instruction::And:
115 | NewConstant = visitBinary(
116 | Instr, Ctxt,
117 | [](const APInt &A, const APInt &B) -> std::optional {
118 | return A & B;
119 | });
120 | break;
121 | case Instruction::Or:
122 | NewConstant = visitBinary(
123 | Instr, Ctxt,
124 | [](const APInt &A, const APInt &B) -> std::optional {
125 | return A | B;
126 | });
127 | break;
128 | case Instruction::Xor:
129 | NewConstant = visitBinary(
130 | Instr, Ctxt,
131 | [](const APInt &A, const APInt &B) -> std::optional {
132 | return A ^ B;
133 | });
134 | break;
135 |
136 | default:
137 | break;
138 | }
139 | if (NewConstant) {
140 | Instr.replaceAllUsesWith(NewConstant);
141 | Instr.eraseFromParent();
142 | MadeChanges = true;
143 | }
144 | }
145 | }
146 | return MadeChanges;
147 | }
148 |
--------------------------------------------------------------------------------
/ch10/undefined_behavior/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH10
6 | LANGUAGES CXX C)
7 |
8 | include(../../cmake/utils/set-llvm-install-prefix.cmake)
9 |
10 | set(CURR_TARGET
11 | fct_with_ub
12 | )
13 |
14 | set(SRC
15 | "main.cpp"
16 | "populate_function.cpp"
17 | )
18 |
19 | set(INC
20 | )
21 |
22 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
23 |
24 | # Now build our tools
25 | add_executable(${CURR_TARGET} ${SRC} ${INC})
26 |
27 | # Find the libraries that correspond to the LLVM components
28 | # that we wish to use
29 | llvm_map_components_to_libnames(llvm_libs support core)
30 |
31 | # Link against LLVM libraries
32 | target_link_libraries(${CURR_TARGET} ${llvm_libs})
33 |
--------------------------------------------------------------------------------
/ch10/undefined_behavior/README.md:
--------------------------------------------------------------------------------
1 | # Easily find undefined behavior #
2 |
3 | In this exercise you need to leverage the undefined sanitizer to find what is wrong with the `fctWithUB` function implemented in `populate_function.cpp`.
4 |
5 | Build the code using the steps below.
6 |
7 | Run it, play with different values.
8 |
9 | Everything is fine, right?
10 |
11 | No, this program relies on undefined behavior.
12 |
13 | Rebuild with undefined sanitizer enable and find them.
14 |
15 | To fix the UB, please implement the specifications put as comments before the implementation of `fctWithUB`.
16 |
17 | ## Configure your build directory ##
18 |
19 | ```bash
20 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild .
21 | ```
22 |
23 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option).
24 |
25 | You must have a version of LLVM installed at `` for this to succeed.
26 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package.
27 |
28 | ## Build ##
29 |
30 | ```bash
31 | ninja -C build
32 | ```
33 |
34 | This builds the default target in the build directory.
35 |
36 | This should produce in the `build` directory a binary named `use_after_free`.
37 |
38 | ## Run ##
39 |
40 | ```bash
41 | ./build/fct_w_ub -- someNumber
42 | ```
43 |
44 | This will run the function that relies on UB on `someNumber`.
45 |
46 | By default `someNumber == 12`.
47 |
48 |
49 | ## Solution ##
50 |
51 | Configure your build with undefined sanitizer enabled and build:
52 | ```bash
53 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild_w_ubsan . -DCMAKE_CXX_FLAGS=-fsanitize=undefined
54 | ninja -C build_w_ubsan
55 | ```
56 |
57 | Then run the application:
58 | ```bash
59 | build_w_ubsan/fct_w_ub -- someNumber
60 | ```
61 |
62 | If you use a negative number, you should get an error that resembles:
63 | ```
64 | $ ./build_w_ubsan/fct_with_ub -- -25
65 | ch10/undefined_behavior/populate_function.cpp:7:17: runtime error: shift exponent -25 is negative
66 | SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior ch10/undefined_behavior/populate_function.cpp:7:17 in
67 | fctWithUB(-25) == 2519
68 | ```
69 |
70 | Now, if you run with a number bigger than 31, you should get:
71 | ```
72 | $ ./build_w_ubsan/fct_with_ub -- 32
73 | ch10/undefined_behavior/populate_function.cpp:7:17: runtime error: shift exponent 32 is too large for 32-bit type 'int'
74 | ```
75 |
76 | In both cases, this tells you that at line 7 of `populate.cpp` we are relying on some specific undefined behavior and we should fix that.
77 |
--------------------------------------------------------------------------------
/ch10/undefined_behavior/main.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/Support/CommandLine.h" // For cl::xxx.
2 | #include "llvm/Support/Debug.h" // For dbgs().
3 |
4 | using namespace llvm;
5 |
6 | extern int fctWithUB(int a);
7 |
8 | static cl::opt InputNumber(cl::Positional, cl::desc(" "),
9 | cl::init(12));
10 |
11 | int main(int argc, char **argv) {
12 | cl::ParseCommandLineOptions(argc, argv, "CH10 UB sanitizer\n");
13 |
14 | int Result = fctWithUB(InputNumber);
15 | dbgs() << "fctWithUB(" << InputNumber << ") == " << Result << '\n';
16 |
17 | return 0;
18 | }
19 |
--------------------------------------------------------------------------------
/ch10/undefined_behavior/populate_function.cpp:
--------------------------------------------------------------------------------
1 | // Shift left 322512 by `input` bits.
2 | // If input is > 31 we want to clamp to 31.
3 | // If input is < 0 we want to shift right instead of left (and still clamp to
4 | // 31.)
5 | // As it is this function has undefined behavior.
6 | int fctWithUB(int input) {
7 | return 322512 >> input;
8 | }
9 |
--------------------------------------------------------------------------------
/ch10/use_after_free/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH10
6 | LANGUAGES CXX C)
7 |
8 | include(../../cmake/utils/set-llvm-install-prefix.cmake)
9 |
10 | set(CURR_TARGET
11 | use_after_free
12 | )
13 |
14 | set(SRC
15 | "main.cpp"
16 | "populate_function.cpp"
17 | )
18 |
19 | set(INC
20 | )
21 |
22 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
23 |
24 | # Now build our tools
25 | add_executable(${CURR_TARGET} ${SRC} ${INC})
26 |
27 | # Find the libraries that correspond to the LLVM components
28 | # that we wish to use
29 | llvm_map_components_to_libnames(llvm_libs support core)
30 |
31 | # Link against LLVM libraries
32 | target_link_libraries(${CURR_TARGET} ${llvm_libs})
33 |
--------------------------------------------------------------------------------
/ch10/use_after_free/README.md:
--------------------------------------------------------------------------------
1 | # Easily find memory corruption #
2 |
3 | In this exercise you need to leverage the address sanitizer to find what is wrong with the `buggyBuildModule` function implemented in `populate_function.cpp`.
4 |
5 | Build the code using the steps below.
6 |
7 | Run it and observe the crash.
8 |
9 | Now, think how you can find this crash easily.
10 |
11 | Bonus: Fix the crash!
12 |
13 | You can look at `ch3/build_ir` for a correct way of doing the `buildModule` implementation.
14 |
15 | ## Configure your build directory ##
16 |
17 | ```bash
18 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild .
19 | ```
20 |
21 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option).
22 |
23 | You must have a version of LLVM installed at `` for this to succeed.
24 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package.
25 |
26 | ## Build ##
27 |
28 | ```bash
29 | ninja -C build
30 | ```
31 |
32 | This builds the default target in the build directory.
33 |
34 | This should produce in the `build` directory a binary named `use_after_free`.
35 |
36 | ## Run ##
37 |
38 | ```bash
39 | ./build/use_after_free
40 | ```
41 |
42 | This will run the buggy build module implementation.
43 |
44 | At this point it should crash.
45 |
46 | ## Solution ##
47 |
48 | Configure your build with address sanitizer enabled and build:
49 | ```bash
50 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild_w_asan . -DCMAKE_CXX_FLAGS=-fsanitize=address
51 | ninja -C build_w_asan
52 | ```
53 |
54 | Then run the application:
55 | ```bash
56 | build_w_asan/use_after_free
57 | ```
58 |
59 | And you should see an output resembling:
60 | ```
61 | =================================================================
62 | ==37427==ERROR: AddressSanitizer: heap-use-after-free on address 0x000108f03ca0 at pc 0x000105baf828 bp 0x00016b1a6830 sp 0x00016b1a5ff0
63 | READ of size 15 at 0x000108f03ca0 thread T0
64 | #0 0x105baf824 in wrap_memchr+0x27c (libclang_rt.asan_osx_dynamic.dylib:arm64e+0x1b824)
65 | #1 0x1945b7cac in std::__1::basic_string, std::__1::allocator>::find(char, unsigned long) const+0x38 (libc++.1.dylib:arm64e+0x1bcac)
66 | #2 0x104d3c968 in llvm::Module::print(llvm::raw_ostream&, llvm::AssemblyAnnotationWriter*, bool, bool) const+0x9b0 (use_after_free:arm64+0x1000e4968)
67 | #3 0x104c5a088 in main main.cpp:18
68 | #4 0x1942fe0dc ()
69 |
70 | 0x000108f03ca0 is located 160 bytes inside of 752-byte region [0x000108f03c00,0x000108f03ef0)
71 | freed by thread T0 here:
72 | #0 0x105bf5b8c in wrap__ZdlPv+0x74 (libclang_rt.asan_osx_dynamic.dylib:arm64e+0x61b8c)
73 | #1 0x104e836f4 in llvm::LLVMContextImpl::~LLVMContextImpl()+0xf4 (use_after_free:arm64+0x10022b6f4)
74 | #2 0x104e81910 in llvm::LLVMContext::~LLVMContext()+0x18 (use_after_free:arm64+0x100229910)
75 | #3 0x104c5cab4 in buggyBuildModule() populate_function.cpp:152
76 | #4 0x104c59fc8 in main main.cpp:11
77 | #5 0x1942fe0dc ()
78 |
79 | previously allocated by thread T0 here:
80 | #0 0x105bf574c in wrap__Znwm+0x74 (libclang_rt.asan_osx_dynamic.dylib:arm64e+0x6174c)
81 | #1 0x104c5ce78 in std::__1::__unique_if::__unique_single std::__1::make_unique[abi:ue170006](char const (&) [16], llvm::LLVMContext&) unique_ptr.h:689
82 | #2 0x104c5b394 in buggyBuildModule() populate_function.cpp:71
83 | #3 0x104c59fc8 in main main.cpp:11
84 | #4 0x1942fe0dc ()
85 |
86 | SUMMARY: AddressSanitizer: heap-use-after-free (libclang_rt.asan_osx_dynamic.dylib:arm64e+0x1b824) in wrap_memchr+0x27c
87 | Shadow bytes around the buggy address:
88 | 0x000108f03a00: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
89 | 0x000108f03a80: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
90 | 0x000108f03b00: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
91 | 0x000108f03b80: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
92 | 0x000108f03c00: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
93 | =>0x000108f03c80: fd fd fd fd[fd]fd fd fd fd fd fd fd fd fd fd fd
94 | 0x000108f03d00: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
95 | 0x000108f03d80: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
96 | 0x000108f03e00: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
97 | 0x000108f03e80: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fa fa
98 | 0x000108f03f00: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
99 | Shadow byte legend (one shadow byte represents 8 application bytes):
100 | Addressable: 00
101 | Partially addressable: 01 02 03 04 05 06 07
102 | Heap left redzone: fa
103 | Freed heap region: fd
104 | Stack left redzone: f1
105 | Stack mid redzone: f2
106 | Stack right redzone: f3
107 | Stack after return: f5
108 | Stack use after scope: f8
109 | Global redzone: f9
110 | Global init order: f6
111 | Poisoned by user: f7
112 | Container overflow: fc
113 | Array cookie: ac
114 | Intra object redzone: bb
115 | ASan internal: fe
116 | Left alloca redzone: ca
117 | Right alloca redzone: cb
118 | ==37427==ABORTING
119 | Abort trap: 6
120 | ```
121 |
122 | What this tells you is that at line 18 of `main.cpp` we are using an object that has been deallocated at line 152 of `populate_function.cpp`.
123 |
124 | Furthermore, it tells us that this object was freed with `~LLVMContext`, i.e., the destructor of the LLVMContext.
125 |
126 | Therefore the problem here is that our LLVMContext does not have the proper scope.
127 |
128 | Indeed it is local to `buggyBuildModule`, whereas its live-range must out-live its Module, which in this case goes beyond `buggyBuildModule`.
129 |
--------------------------------------------------------------------------------
/ch10/use_after_free/main.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/IR/Module.h"
2 | #include "llvm/IR/Verifier.h"
3 | #include "llvm/Support/Debug.h" // For errs().
4 |
5 | using namespace llvm;
6 |
7 | extern std::unique_ptr buggyBuildModule();
8 |
9 | int main(int argc, char **argv) {
10 | bool hadError = false;
11 | std::unique_ptr CurModule = buggyBuildModule();
12 | outs() << "\n\n## Processing module\n";
13 | if (!CurModule) {
14 | outs() << "Nothing built\n";
15 | return 0;
16 | }
17 |
18 | CurModule->print(errs(), /*AssemblyAnnotationWriter=*/nullptr);
19 | // verifyModule returns true if it finds errors and
20 | // print them on the provided output stream (errs() here).
21 | if (verifyModule(*CurModule, &errs())) {
22 | errs() << "Impl does not verify\n";
23 | hadError |= true;
24 | }
25 |
26 | return !hadError;
27 | }
28 |
--------------------------------------------------------------------------------
/ch10/use_after_free/populate_function.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/ADT/ArrayRef.h"
2 | #include "llvm/IR/BasicBlock.h"
3 | #include "llvm/IR/Constants.h" // For ConstantInt.
4 | #include "llvm/IR/DerivedTypes.h" // For PointerType, FunctionType.
5 | #include "llvm/IR/Function.h"
6 | #include "llvm/IR/IRBuilder.h"
7 | #include "llvm/IR/LLVMContext.h"
8 | #include "llvm/IR/Module.h"
9 | #include "llvm/IR/Type.h"
10 | #include "llvm/Support/Debug.h" // For errs().
11 |
12 | #include // For unique_ptr
13 |
14 | using namespace llvm;
15 |
16 | // The goal of this function is to build a Module that
17 | // represents the lowering of the following foo, a C function:
18 | // extern int baz();
19 | // extern void bar(int);
20 | // void foo(int a, int b) {
21 | // int var = a + b;
22 | // if (var == 0xFF) {
23 | // bar(var);
24 | // var = baz();
25 | // }
26 | // bar(var);
27 | // }
28 | //
29 | // The IR for this snippet (at O0) is:
30 | // define void @foo(i32 %arg, i32 %arg1) {
31 | // bb:
32 | // %i = alloca i32
33 | // %i2 = alloca i32
34 | // %i3 = alloca i32
35 | // store i32 %arg, ptr %i
36 | // store i32 %arg1, ptr %i2
37 | // %i4 = load i32, ptr %i
38 | // %i5 = load i32, ptr %i2
39 | // %i6 = add i32 %i4, %i5
40 | // store i32 %i6, ptr %i3
41 | // %i7 = load i32, ptr %i3
42 | // %i8 = icmp eq i32 %i7, 255
43 | // br i1 %i8, label %bb9, label %bb12
44 | //
45 | // bb9:
46 | // %i10 = load i32, ptr %i3
47 | // call void @bar(i32 %i10)
48 | // %i11 = call i32 @baz()
49 | // store i32 %i11, ptr %i3
50 | // br label %bb12
51 | //
52 | // bb12:
53 | // %i13 = load i32, ptr %i3
54 | // call void @bar(i32 %i13)
55 | // ret void
56 | // }
57 | //
58 | // declare void @bar(i32)
59 | // declare i32 @baz(...)
60 | //
61 | // This function contains a bug, can you spot it?
62 | std::unique_ptr buggyBuildModule() {
63 | LLVMContext Ctxt;
64 | // Create the types that we will use over and over;
65 | Type *Int32Ty = Type::getInt32Ty(Ctxt);
66 | Type *VoidTy = Type::getVoidTy(Ctxt);
67 | Type *PtrTy = PointerType::get(Ctxt, /*AddrSpace=*/0);
68 |
69 | // Create the high level module.
70 | std::unique_ptr MyModule =
71 | std::make_unique("Solution Module", Ctxt);
72 |
73 | // Populate all the functions (just declaration for now.)
74 | // Starting with baz.
75 | FunctionType *BazTy =
76 | FunctionType::get(/*RetTy=*/Int32Ty, /*isVarArg=*/false);
77 | Function *BazFunc =
78 | cast(MyModule->getOrInsertFunction("baz", BazTy).getCallee());
79 |
80 | // bar.
81 | FunctionType *BarTy =
82 | FunctionType::get(VoidTy, /*ArgsTy=*/ArrayRef(Int32Ty), false);
83 | Function *BarFunc =
84 | cast(MyModule->getOrInsertFunction("bar", BarTy).getCallee());
85 |
86 | // foo.
87 | FunctionType *FooTy =
88 | FunctionType::get(VoidTy, /*ArgsTy*/ ArrayRef({Int32Ty, Int32Ty}), false);
89 | Function *FooFunc =
90 | cast(MyModule->getOrInsertFunction("foo", FooTy).getCallee());
91 |
92 | // Next, create the structure for foo.
93 | BasicBlock *BB = BasicBlock::Create(Ctxt, /*Name=*/"bb", /*Parent=*/FooFunc);
94 | BasicBlock *BB9 =
95 | BasicBlock::Create(Ctxt, /*Name=*/"bb9", /*Parent=*/FooFunc);
96 | BasicBlock *BB12 =
97 | BasicBlock::Create(Ctxt, /*Name=*/"bb12", /*Parent=*/FooFunc);
98 |
99 | // Populate bb.
100 | IRBuilder Builder(BB);
101 | // Allocate stack space for the local variables.
102 | Value *I = Builder.CreateAlloca(Int32Ty);
103 | Value *I2 = Builder.CreateAlloca(Int32Ty);
104 | Value *I3 = Builder.CreateAlloca(Int32Ty);
105 | // Get arg and arg1 from foo.
106 | Value *Arg = FooFunc->getArg(0);
107 | Value *Arg1 = FooFunc->getArg(1);
108 | // Store them in the "local" variables.
109 | Builder.CreateStore(Arg, I);
110 | Builder.CreateStore(Arg1, I2);
111 | // Reload from the local variables.
112 | Value *I4 = Builder.CreateLoad(Int32Ty, I);
113 | Value *I5 = Builder.CreateLoad(Int32Ty, I2);
114 | // Do the add.
115 | Value *I6 = Builder.CreateAdd(I4, I5);
116 | // Store to local variable i3.
117 | Builder.CreateStore(I6, I3);
118 | // Reload from i3 (now you understand why O0 is slow!!)
119 | Value *I7 = Builder.CreateLoad(Int32Ty, I3);
120 | // Compare.
121 | Value *Cst255 = ConstantInt::get(Int32Ty, 255);
122 | Value *I8 = Builder.CreateICmpEQ(I7, Cst255);
123 | // Then jump.
124 | Builder.CreateCondBr(I8, BB9, BB12);
125 |
126 | // Populate bb9.
127 | // Reset the builder on the next basic block.
128 | Builder.SetInsertPoint(BB9);
129 | // Reload the local variable i3.
130 | Value *I10 = Builder.CreateLoad(Int32Ty, I3);
131 | // Call bar with i10.
132 | Builder.CreateCall(BarFunc->getFunctionType(), BarFunc, ArrayRef(I10));
133 | // Call baz.
134 | Value *I11 = Builder.CreateCall(BazFunc->getFunctionType(), BazFunc);
135 | // Store the result in the local variable i3.
136 | Builder.CreateStore(I11, I3);
137 | // Jump to the next block.
138 | Builder.CreateBr(BB12);
139 |
140 | // Populate bb12.
141 | // Reset the builder on the next basic block.
142 | Builder.SetInsertPoint(BB12);
143 | // Reload the local variable I3.
144 | Value *I13 = Builder.CreateLoad(Int32Ty, I3);
145 | // Call bar on i13.
146 | Builder.CreateCall(BarFunc->getFunctionType(), BarFunc, ArrayRef(I13));
147 | // Create the final return.
148 | // Remember all basic block must end with a terminator.
149 | Builder.CreateRetVoid();
150 |
151 | return MyModule;
152 | }
153 |
--------------------------------------------------------------------------------
/ch11/instr_info/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH11
6 | LANGUAGES CXX C)
7 |
8 | include(../../cmake/utils/set-llvm-install-prefix.cmake)
9 |
10 | # Hook up the TableGen tooling.
11 | set(LLVM_TABLEGEN_EXE ${LLVM_TOOLS_BINARY_DIR}/llvm-tblgen)
12 | include(${LLVM_CMAKE_DIR}/AddLLVM.cmake)
13 | include(${LLVM_CMAKE_DIR}/TableGen.cmake)
14 |
15 | # Create the different TableGen outputs.
16 | set(LLVM_TARGET_DEFINITIONS mytarget.td)
17 | tablegen(LLVM MyTargetGenRegisterInfo.inc -gen-register-info)
18 | tablegen(LLVM MyTargetGenInstrInfo.inc -gen-instr-info)
19 |
20 | # Register a target for all the TableGen outputs.
21 | add_public_tablegen_target(CommonTableGen)
22 |
23 | # Make that target part of the `all` target.
24 | set_target_properties(CommonTableGen PROPERTIES EXCLUDE_FROM_ALL 0)
25 |
26 | set(CURR_TARGET
27 | print_instr
28 | )
29 |
30 | set(SRC
31 | "main.cpp"
32 | "MyTargetInstrInfo.cpp"
33 | "MyTargetRegisterInfo.cpp"
34 | )
35 |
36 | set(INC
37 | "MyTargetInstrInfo.h"
38 | "MyTargetRegisterInfo.h"
39 | )
40 |
41 |
42 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
43 |
44 | # Now build our tools
45 | add_executable(${CURR_TARGET} ${SRC} ${INC})
46 | add_dependencies(${CURR_TARGET} CommonTableGen)
47 |
48 | # Find the libraries that correspond to the LLVM components
49 | # that we wish to use
50 | llvm_map_components_to_libnames(llvm_libs support core mc codegen)
51 |
52 | # Link against LLVM libraries
53 | target_link_libraries(${CURR_TARGET} ${llvm_libs})
54 |
55 | # To find the generated files
56 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR})
57 |
--------------------------------------------------------------------------------
/ch11/instr_info/MyTargetInstrInfo.cpp:
--------------------------------------------------------------------------------
1 | #include "MyTargetInstrInfo.h"
2 | #include "llvm/ADT/SmallVector.h"
3 | #include "llvm/CodeGen/MachineBasicBlock.h"
4 | #include "llvm/CodeGen/MachineInstrBuilder.h"
5 | #include "llvm/IR/DebugLoc.h"
6 | #include "llvm/Support/ErrorHandling.h"
7 | #include
8 | #include
9 |
10 | #define GET_INSTRINFO_CTOR_DTOR
11 | #define GET_INSTRINFO_MC_DESC // This should be in MC
12 | #define GET_INSTRINFO_MC_HELPERS // This should be in MC
13 | #include "MyTargetGenInstrInfo.inc"
14 |
15 | using namespace llvm;
16 |
17 | MyTargetInstrInfo::MyTargetInstrInfo() : MyTargetGenInstrInfo() {}
18 |
--------------------------------------------------------------------------------
/ch11/instr_info/MyTargetInstrInfo.h:
--------------------------------------------------------------------------------
1 | #ifndef LLVM_LIB_TARGET_MYTARGET_MYTARGETINSTRINFO_H
2 | #define LLVM_LIB_TARGET_MYTARGET_MYTARGETINSTRINFO_H
3 |
4 | #include "MyTargetRegisterInfo.h" // For the definition of the register class.
5 | #include "llvm/CodeGen/TargetInstrInfo.h"
6 |
7 | #define GET_INSTRINFO_HEADER
8 | #define GET_INSTRINFO_ENUM // This should be in MC
9 | #define GET_INSTRINFO_MC_HELPER_DECLS // This should be in MC
10 | #include "MyTargetGenInstrInfo.inc"
11 |
12 | namespace llvm {
13 |
14 | class MyTargetInstrInfo : public MyTargetGenInstrInfo {
15 | public:
16 | MyTargetInstrInfo();
17 | };
18 | } // namespace llvm
19 |
20 | #endif
21 |
--------------------------------------------------------------------------------
/ch11/instr_info/MyTargetRegisterInfo.cpp:
--------------------------------------------------------------------------------
1 | #include "MyTargetRegisterInfo.h"
2 |
3 | #include "llvm/CodeGen/MachineFunction.h"
4 | #include "llvm/CodeGen/TargetFrameLowering.h"
5 | #include "llvm/CodeGen/TargetRegisterInfo.h"
6 | #include "llvm/CodeGen/TargetSubtargetInfo.h"
7 |
8 | namespace llvm {
9 | class MyTargetFrameLowering : public TargetFrameLowering {
10 | public:
11 | MyTargetFrameLowering()
12 | : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16),
13 | false /*StackRealignable*/) {}
14 | };
15 | } // namespace llvm
16 |
17 | #define GET_REGINFO_TARGET_DESC
18 | #define GET_REGINFO_MC_DESC
19 | #include "MyTargetGenRegisterInfo.inc"
20 |
--------------------------------------------------------------------------------
/ch11/instr_info/MyTargetRegisterInfo.h:
--------------------------------------------------------------------------------
1 | #ifndef __MYTARGETREGISTERINFO_H__
2 | #define __MYTARGETREGISTERINFO_H__
3 |
4 | #include "llvm/ADT/BitVector.h"
5 | #include "llvm/CodeGen/MachineBasicBlock.h"
6 | #include
7 |
8 | #define GET_REGINFO_HEADER
9 | #define GET_REGINFO_ENUM // Technically this one belongs to MC.
10 | #include "MyTargetGenRegisterInfo.inc"
11 |
12 | namespace llvm {
13 | class MachineFunction;
14 |
15 | class MyTargetRegisterInfo : public MyTargetGenRegisterInfo {
16 | public:
17 | MyTargetRegisterInfo() : MyTargetGenRegisterInfo(Register()) {}
18 |
19 | BitVector getReservedRegs(const MachineFunction &MF) const override {
20 | return BitVector();
21 | }
22 |
23 | const MCPhysReg *
24 | getCalleeSavedRegs(const MachineFunction *MF) const override {
25 | return nullptr;
26 | }
27 |
28 | bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
29 | unsigned FIOperandNum,
30 | RegScavenger *RS = nullptr) const override {
31 | return false;
32 | }
33 |
34 | Register getFrameRegister(const MachineFunction &MF) const override {
35 | return Register();
36 | }
37 | };
38 |
39 | } // end namespace llvm.
40 |
41 | #endif
42 |
--------------------------------------------------------------------------------
/ch11/instr_info/README.md:
--------------------------------------------------------------------------------
1 | # Write your first instruction description #
2 |
3 | In this exercise, we print all the non-generic instructions defined for the backend identified by `MyTarget`.
4 |
5 | The goal of this exercise is to familiarize yourself with adding instruction in TableGen using the instr-info TableGen backend.
6 |
7 | To do that, read the direction in `yourTurn-instrinfo.td`, implement the missing instructions and run the commands to test your solution by following the steps in the next two sections.
8 |
9 | ## Configure your build directory ##
10 |
11 | ```bash
12 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm/ -Bbuild .
13 | ```
14 |
15 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option).
16 |
17 | You must have a version of LLVM installed at `` for this to succeed.
18 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package.
19 |
20 | ## Build ##
21 |
22 | ```bash
23 | ninja -C build
24 | ```
25 |
26 | This builds the default target in the build directory.
27 |
28 | Then run:
29 | ```bash
30 | ./build/print_inst
31 | ```
32 |
33 | This will print all the non generic instructions.
34 |
35 | You should see your added instructions and if you followed the direction, these instructions should have the same profile as the ones printed with the `SOLUTION_` prefix.
36 |
37 | ## Solution ##
38 |
39 | Look at the content of `solution-instrinfo.td` for a possible solution.
40 |
--------------------------------------------------------------------------------
/ch11/instr_info/instrinfo.td:
--------------------------------------------------------------------------------
1 | def ADDi32: Instruction<> {
2 | let Namespace = "MyTarget";
3 | let OutOperandList = (outs GPR32:$dst);
4 | let InOperandList = (ins GPR32:$src0, GPR32:$src1);
5 | }
6 |
7 | include "solution-instrinfo.td"
8 | include "yourTurn-instrinfo.td"
9 |
--------------------------------------------------------------------------------
/ch11/instr_info/main.cpp:
--------------------------------------------------------------------------------
1 | #include "MyTargetInstrInfo.h"
2 | #include "MyTargetRegisterInfo.h"
3 |
4 | #include "llvm/CodeGen/TargetRegisterInfo.h"
5 | #include "llvm/Support/Debug.h" // For dbgs().
6 |
7 | using namespace llvm;
8 |
9 | int main() {
10 | MyTargetRegisterInfo MyTRI;
11 | MyTargetInstrInfo MyTII;
12 | TargetRegisterInfo *RegInfos[] = {&MyTRI};
13 | unsigned NbInstrs = MyTII.getNumOpcodes();
14 | dbgs() << "Found " << NbInstrs << " instructions for MyTarget.\n";
15 | dbgs() << "Print the non-generic ones:\n";
16 | for (unsigned i = 0; i != NbInstrs; ++i) {
17 | const MCInstrDesc &InstrDesc = MyTII.get(i);
18 | // Skip the generic opcode to focus on the target specific ones.
19 | if (InstrDesc.isPseudo())
20 | continue;
21 |
22 | dbgs() << MyTII.getName(i) << ":isAsCheapAsMove("
23 | << InstrDesc.isAsCheapAsAMove() << ")\t";
24 | for (auto [index, MCOI] : enumerate(InstrDesc.operands())) {
25 | if (MCOI.OperandType == MCOI::OperandType::OPERAND_REGISTER) {
26 | if (index < InstrDesc.getNumDefs())
27 | dbgs() << "(def)";
28 | dbgs() << MyTRI.getRegClassName(MyTRI.getRegClass(MCOI.RegClass));
29 | } else if (MCOI.OperandType == MCOI::OperandType::OPERAND_IMMEDIATE) {
30 | dbgs() << "imm";
31 | } else
32 | dbgs() << "other";
33 | dbgs() << ", ";
34 | }
35 | dbgs() << '\n';
36 | }
37 | return 0;
38 | }
39 |
--------------------------------------------------------------------------------
/ch11/instr_info/mytarget.td:
--------------------------------------------------------------------------------
1 | include "llvm/Target/Target.td"
2 |
3 | include "reginfo.td"
4 | include "instrinfo.td"
5 |
6 | def MyTargetii: InstrInfo;
7 | def MyTarget : Target {
8 | let InstructionSet = MyTargetii;
9 | }
10 |
11 |
--------------------------------------------------------------------------------
/ch11/instr_info/reginfo.td:
--------------------------------------------------------------------------------
1 | let Namespace = "MyTarget" in {
2 | // +-----+-----+
3 | // 64-bit | d0 | d1 |
4 | // +--+--+--+--+
5 | // 32-bit |s0|s1|s2|s3|
6 | // +--+--+--+--+
7 |
8 | // Declare the different subregister for our target.
9 | // Parameters are size and offset.
10 | // Indices for singles in double.
11 | def sub32_low: SubRegIndex<32>;
12 | def sub32_high: SubRegIndex<32, 32>;
13 |
14 | // single registers.
15 | def s0 : Register<"s0">;
16 | def s1 : Register<"s1">;
17 | def s2 : Register<"s2">;
18 | def s3 : Register<"s3">;
19 |
20 | // double registers are made of two single registers.
21 | // The pattern is a bit irregular because we assume
22 | // we have only 3 single registers.
23 | def d0 : Register<"d0"> {
24 | let SubRegIndices = [sub32_low, sub32_high];
25 | let SubRegs = [s0, s1];
26 | }
27 | def d1 : Register<"d1"> {
28 | let SubRegIndices = [sub32_low, sub32_high];
29 | let SubRegs = [s2, s3];
30 | }
31 | } // end namespace MyTarget.
32 |
33 | def GPR32 : RegisterClass<"MyTarget", [i32], 32, (sequence "s%u", 0, 3)>;
34 | def GPR64 : RegisterClass<"MyTarget", [i64], 64, (add d0, d1)>;
35 |
--------------------------------------------------------------------------------
/ch11/instr_info/solution-instrinfo.td:
--------------------------------------------------------------------------------
1 | def SOLUTION_BREAKVALUE : Instruction<> {
2 | let Namespace = "MyTarget";
3 | let OutOperandList = (outs GPR32:$dst0, GPR32:$dst1);
4 | let InOperandList = (ins GPR64:$src0);
5 | }
6 |
7 | def SOLUTION_LOADIMM32 : Instruction<> {
8 | let Namespace = "MyTarget";
9 | let isAsCheapAsAMove = true;
10 | let OutOperandList = (outs GPR32:$dst);
11 | let InOperandList = (ins i32imm:$imm);
12 | }
13 |
14 |
--------------------------------------------------------------------------------
/ch11/instr_info/yourTurn-instrinfo.td:
--------------------------------------------------------------------------------
1 | // Your turn:
2 |
3 | // BREAKVALUE opcode
4 | // - Add an instruction that creates two 32-bit values out of a 64-bit value.
5 | // - The input value will be on the GPR32 register class and the output
6 | // values on the GPR32 register class.
7 | // - The instruction needs to be in the MyTarget namespace.
8 | // - The name of the opcode must start with YT (this is just to avoid name
9 | // collision with the solution.)
10 |
11 | // LOADIMM32 opcode
12 | // - Add an instruction that materializes a 32-bit immediate value
13 | // into a 32-bit register.
14 | // - The input value will be an immediate operand and the output
15 | // value on a GPR32 register class.
16 | // - The instruction needs to be in the MyTarget namespace.
17 | // - The name of the opcode must start with YT (this is just to avoid name
18 | // collision with the solution.)
19 | // - The instruction needs to be marked isAsCheapAsAMove.
20 | //
21 | // Hint for the immediate operand, look for the appropriate record name
22 | // under the OPERAND_IMMEDIATE OperandType in llvm/include/llvm/Target/Target.td
23 |
--------------------------------------------------------------------------------
/ch11/mir_format/README.md:
--------------------------------------------------------------------------------
1 | In this directory, you can find examples of `mir` (Machine IR) files.
2 |
3 | To produce a `mir` file you can use the -stop-before/after=pass-name.
4 |
5 | In this directory, we have one input LLVM IR file that we used to produce:
6 | - A pure `mir` file `full-dump.mir`, and
7 | - A simplified one `simplified-dump.mir`, and
8 | - A shrunk one `shrunk-dump.mir`
9 |
10 | The pure one has all the fields set, whereas the simplified one only contains the fields that have non-default values or cannot be recomputed.
11 | The shrunk one shows you the kind of manual editing you can make to reduce a file even more, in particular how the LLVM IR section can be removed.
12 |
13 |
--------------------------------------------------------------------------------
/ch11/mir_format/full-dump.mir:
--------------------------------------------------------------------------------
1 | --- |
2 | ; ModuleID = 'input.ll'
3 | source_filename = "input.ll"
4 | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
5 | target triple = "arm64-apple-macosx14.0.0"
6 |
7 | define i32 @_Z3fooPxx(ptr nocapture noundef readonly %arg, i64 noundef %arg1) local_unnamed_addr {
8 | bb:
9 | %i = icmp slt i64 %arg1, 0
10 | br i1 %i, label %bb2, label %bb4
11 |
12 | bb2: ; preds = %bb
13 | %i3 = tail call i64 @_Z3barx(i64 noundef %arg1)
14 | br label %bb4
15 |
16 | bb4: ; preds = %bb2, %bb
17 | %i5 = phi i64 [ %i3, %bb2 ], [ %arg1, %bb ]
18 | %i6 = add nsw i64 %i5, 18
19 | %i7 = getelementptr inbounds i64, ptr %arg, i64 %i6
20 | %i8 = load i64, ptr %i7, align 8
21 | %i9 = add nsw i64 %i8, %i5
22 | %i10 = trunc i64 %i9 to i32
23 | ret i32 %i10
24 | }
25 |
26 | declare i64 @_Z3barx(i64 noundef) local_unnamed_addr
27 |
28 | ...
29 | ---
30 | name: _Z3fooPxx
31 | alignment: 4
32 | exposesReturnsTwice: false
33 | legalized: false
34 | regBankSelected: false
35 | selected: false
36 | failedISel: false
37 | tracksRegLiveness: true
38 | hasWinCFI: false
39 | callsEHReturn: false
40 | callsUnwindInit: false
41 | hasEHCatchret: false
42 | hasEHScopes: false
43 | hasEHFunclets: false
44 | isOutlined: false
45 | debugInstrRef: false
46 | failsVerification: false
47 | tracksDebugUserValues: false
48 | registers:
49 | - { id: 0, class: gpr64all, preferred-register: '' }
50 | - { id: 1, class: gpr64, preferred-register: '' }
51 | - { id: 2, class: gpr64, preferred-register: '' }
52 | - { id: 3, class: gpr64, preferred-register: '' }
53 | - { id: 4, class: gpr64all, preferred-register: '' }
54 | - { id: 5, class: gpr64common, preferred-register: '' }
55 | - { id: 6, class: gpr32, preferred-register: '' }
56 | - { id: 7, class: gpr32, preferred-register: '' }
57 | - { id: 8, class: gpr32, preferred-register: '' }
58 | liveins:
59 | - { reg: '$x0', virtual-reg: '%2' }
60 | - { reg: '$x1', virtual-reg: '%3' }
61 | frameInfo:
62 | isFrameAddressTaken: false
63 | isReturnAddressTaken: false
64 | hasStackMap: false
65 | hasPatchPoint: false
66 | stackSize: 0
67 | offsetAdjustment: 0
68 | maxAlignment: 1
69 | adjustsStack: true
70 | hasCalls: true
71 | stackProtector: ''
72 | functionContext: ''
73 | maxCallFrameSize: 0
74 | cvBytesOfCalleeSavedRegisters: 0
75 | hasOpaqueSPAdjustment: false
76 | hasVAStart: false
77 | hasMustTailInVarArgFunc: false
78 | hasTailCall: false
79 | isCalleeSavedInfoValid: false
80 | localFrameSize: 0
81 | savePoint: ''
82 | restorePoint: ''
83 | fixedStack: []
84 | stack: []
85 | entry_values: []
86 | callSites: []
87 | debugValueSubstitutions: []
88 | constants: []
89 | machineFunctionInfo: {}
90 | body: |
91 | bb.0.bb:
92 | successors: %bb.1(0x30000000), %bb.2(0x50000000)
93 | liveins: $x0, $x1
94 |
95 | %3:gpr64 = COPY $x1
96 | %2:gpr64 = COPY $x0
97 | TBZX %3, 63, %bb.2
98 | B %bb.1
99 |
100 | bb.1.bb2:
101 | successors: %bb.2(0x80000000)
102 |
103 | ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
104 | $x0 = COPY %3
105 | BL @_Z3barx, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0
106 | ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
107 | %4:gpr64all = COPY $x0
108 | %0:gpr64all = COPY %4
109 |
110 | bb.2.bb4:
111 | %1:gpr64 = PHI %3, %bb.0, %0, %bb.1
112 | %5:gpr64common = ADDXrs %2, %1, 3
113 | %6:gpr32 = LDRWui killed %5, 36 :: (load (s32) from %ir.i7, align 8)
114 | %7:gpr32 = COPY %1.sub_32
115 | $w0 = ADDWrr %6, %7
116 | RET_ReallyLR implicit $w0
117 |
118 | ...
119 |
--------------------------------------------------------------------------------
/ch11/mir_format/input.ll:
--------------------------------------------------------------------------------
1 | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
2 | target triple = "arm64-apple-macosx14.0.0"
3 |
4 | define i32 @_Z3fooPxx(ptr nocapture noundef readonly %arg, i64 noundef %arg1) local_unnamed_addr {
5 | bb:
6 | %i = icmp slt i64 %arg1, 0
7 | br i1 %i, label %bb2, label %bb4
8 |
9 | bb2: ; preds = %bb
10 | %i3 = tail call i64 @_Z3barx(i64 noundef %arg1)
11 | br label %bb4
12 |
13 | bb4: ; preds = %bb2, %bb
14 | %i5 = phi i64 [ %i3, %bb2 ], [ %arg1, %bb ]
15 | %i6 = add nsw i64 %i5, 18
16 | %i7 = getelementptr inbounds i64, ptr %arg, i64 %i6
17 | %i8 = load i64, ptr %i7, align 8
18 | %i9 = add nsw i64 %i8, %i5
19 | %i10 = trunc i64 %i9 to i32
20 | ret i32 %i10
21 | }
22 |
23 | declare i64 @_Z3barx(i64 noundef) local_unnamed_addr
24 |
--------------------------------------------------------------------------------
/ch11/mir_format/shrunk-dump.mir:
--------------------------------------------------------------------------------
1 | ---
2 | name: _Z3fooPxx
3 | tracksRegLiveness: true
4 | body: |
5 | bb.0:
6 | liveins: $x0, $x1
7 |
8 | %3:gpr64 = COPY $x1
9 | %2:gpr64 = COPY $x0
10 | TBZX %3, 63, %bb.2
11 | B %bb.1
12 |
13 | bb.1:
14 | ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
15 | $x0 = COPY %3
16 | BL 123, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0
17 | ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
18 | %4:gpr64all = COPY $x0
19 | %0:gpr64all = COPY %4
20 |
21 | bb.2:
22 | %1:gpr64 = PHI %3, %bb.0, %0, %bb.1
23 | %5:gpr64common = ADDXrs %2, %1, 3
24 | %6:gpr32 = LDRWui killed %5, 36 :: (load (s32), align 8)
25 | %7:gpr32 = COPY %1.sub_32
26 | $w0 = ADDWrr %6, %7
27 | RET_ReallyLR implicit $w0
28 |
29 | ...
30 |
--------------------------------------------------------------------------------
/ch11/mir_format/simplified-dump.mir:
--------------------------------------------------------------------------------
1 | --- |
2 | ; ModuleID = 'input.ll'
3 | source_filename = "input.ll"
4 | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
5 | target triple = "arm64-apple-macosx14.0.0"
6 |
7 | define i32 @_Z3fooPxx(ptr nocapture noundef readonly %arg, i64 noundef %arg1) local_unnamed_addr {
8 | bb:
9 | %i = icmp slt i64 %arg1, 0
10 | br i1 %i, label %bb2, label %bb4
11 |
12 | bb2: ; preds = %bb
13 | %i3 = tail call i64 @_Z3barx(i64 noundef %arg1)
14 | br label %bb4
15 |
16 | bb4: ; preds = %bb2, %bb
17 | %i5 = phi i64 [ %i3, %bb2 ], [ %arg1, %bb ]
18 | %i6 = add nsw i64 %i5, 18
19 | %i7 = getelementptr inbounds i64, ptr %arg, i64 %i6
20 | %i8 = load i64, ptr %i7, align 8
21 | %i9 = add nsw i64 %i8, %i5
22 | %i10 = trunc i64 %i9 to i32
23 | ret i32 %i10
24 | }
25 |
26 | declare i64 @_Z3barx(i64 noundef) local_unnamed_addr
27 |
28 | ...
29 | ---
30 | name: _Z3fooPxx
31 | alignment: 4
32 | tracksRegLiveness: true
33 | registers:
34 | - { id: 0, class: gpr64all }
35 | - { id: 1, class: gpr64 }
36 | - { id: 2, class: gpr64 }
37 | - { id: 3, class: gpr64 }
38 | - { id: 4, class: gpr64all }
39 | - { id: 5, class: gpr64common }
40 | - { id: 6, class: gpr32 }
41 | - { id: 7, class: gpr32 }
42 | - { id: 8, class: gpr32 }
43 | liveins:
44 | - { reg: '$x0', virtual-reg: '%2' }
45 | - { reg: '$x1', virtual-reg: '%3' }
46 | frameInfo:
47 | maxAlignment: 1
48 | adjustsStack: true
49 | hasCalls: true
50 | maxCallFrameSize: 0
51 | machineFunctionInfo: {}
52 | body: |
53 | bb.0.bb:
54 | successors: %bb.1(0x30000000), %bb.2(0x50000000)
55 | liveins: $x0, $x1
56 |
57 | %3:gpr64 = COPY $x1
58 | %2:gpr64 = COPY $x0
59 | TBZX %3, 63, %bb.2
60 | B %bb.1
61 |
62 | bb.1.bb2:
63 | ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
64 | $x0 = COPY %3
65 | BL @_Z3barx, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0
66 | ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
67 | %4:gpr64all = COPY $x0
68 | %0:gpr64all = COPY %4
69 |
70 | bb.2.bb4:
71 | %1:gpr64 = PHI %3, %bb.0, %0, %bb.1
72 | %5:gpr64common = ADDXrs %2, %1, 3
73 | %6:gpr32 = LDRWui killed %5, 36 :: (load (s32) from %ir.i7, align 8)
74 | %7:gpr32 = COPY %1.sub_32
75 | $w0 = ADDWrr %6, %7
76 | RET_ReallyLR implicit $w0
77 |
78 | ...
79 |
--------------------------------------------------------------------------------
/ch11/register_units/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH11
6 | LANGUAGES CXX C)
7 |
8 | include(../../cmake/utils/set-llvm-install-prefix.cmake)
9 |
10 | # Hook up the TableGen tooling.
11 | set(LLVM_TABLEGEN_EXE ${LLVM_TOOLS_BINARY_DIR}/llvm-tblgen)
12 | include(${LLVM_CMAKE_DIR}/AddLLVM.cmake)
13 | include(${LLVM_CMAKE_DIR}/TableGen.cmake)
14 |
15 | # Create the different TableGen outputs.
16 | set(LLVM_TARGET_DEFINITIONS solution-reginfo.td)
17 | tablegen(LLVM SolutionGenRegisterInfo.inc -gen-register-info)
18 |
19 | set(LLVM_TARGET_DEFINITIONS yourTurn-reginfo.td)
20 | tablegen(LLVM YourTurnGenRegisterInfo.inc -gen-register-info)
21 |
22 | # Register a target for all the TableGen outputs.
23 | add_public_tablegen_target(CommonTableGen)
24 |
25 | # Make that target part of the `all` target.
26 | set_target_properties(CommonTableGen PROPERTIES EXCLUDE_FROM_ALL 0)
27 |
28 | set(CURR_TARGET
29 | print_regunit
30 | )
31 |
32 | set(SRC
33 | "main.cpp"
34 | "SolutionRegisterInfo.cpp"
35 | "YourTurnRegisterInfo.cpp"
36 | )
37 |
38 | set(INC
39 | "SolutionRegisterInfo.h"
40 | "YourTurnRegisterInfo.h"
41 | )
42 |
43 |
44 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
45 |
46 | # Now build our tools
47 | add_executable(${CURR_TARGET} ${SRC} ${INC})
48 | add_dependencies(${CURR_TARGET} CommonTableGen)
49 |
50 | # Find the libraries that correspond to the LLVM components
51 | # that we wish to use
52 | llvm_map_components_to_libnames(llvm_libs support core mc codegen)
53 |
54 | # Link against LLVM libraries
55 | target_link_libraries(${CURR_TARGET} ${llvm_libs})
56 |
57 | # To find the generated files
58 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR})
59 |
--------------------------------------------------------------------------------
/ch11/register_units/README.md:
--------------------------------------------------------------------------------
1 | # Write your first register description #
2 |
3 | In this exercise, we print all the register of a specific backend.
4 |
5 | The goal is to teach you how to write the register description for registers, register units, and register classes and use them in the register-info TableGen backend.
6 |
7 | Follow the direction in `yourTurn-reginfo.td` and implement the described register hierarchy.
8 |
9 | Then, use the steps below to test your changes.
10 |
11 | ## Configure your build directory ##
12 |
13 | ```bash
14 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm/ -Bbuild .
15 | ```
16 |
17 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option).
18 |
19 | You must have a version of LLVM installed at `` for this to succeed.
20 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package.
21 |
22 | ## Build ##
23 |
24 | ```bash
25 | ninja -C build
26 | ```
27 |
28 | This builds the default target in the build directory.
29 |
30 | Then run:
31 | ```bash
32 | ./build/print_regunit
33 | ```
34 |
35 | This will print the register info for both the solution and what you implemented.
36 |
37 | You should aim to have your register info (printed under the `RegisterInfo for YourTurn` section) look exactly like what is under the `RegisterInfo for Solution` section.
38 |
39 | ## Solution ##
40 |
41 | Look at the content of `solution-reginfo.td` for a possible solution.
42 |
--------------------------------------------------------------------------------
/ch11/register_units/SolutionRegisterInfo.cpp:
--------------------------------------------------------------------------------
1 | #include "SolutionRegisterInfo.h"
2 |
3 | #include "llvm/CodeGen/MachineFunction.h"
4 | #include "llvm/CodeGen/TargetFrameLowering.h"
5 | #include "llvm/CodeGen/TargetRegisterInfo.h"
6 | #include "llvm/CodeGen/TargetSubtargetInfo.h"
7 |
8 | namespace llvm {
9 | class SolutionFrameLowering : public TargetFrameLowering {
10 | public:
11 | SolutionFrameLowering()
12 | : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16),
13 | false /*StackRealignable*/) {}
14 | };
15 | } // namespace llvm
16 |
17 | #define GET_REGINFO_TARGET_DESC
18 | #define GET_REGINFO_MC_DESC
19 | #include "SolutionGenRegisterInfo.inc"
20 |
--------------------------------------------------------------------------------
/ch11/register_units/SolutionRegisterInfo.h:
--------------------------------------------------------------------------------
1 | #ifndef __SOLUTIONREGISTERINFO_H__
2 | #define __SOLUTIONREGISTERINFO_H__
3 |
4 | #include "llvm/ADT/BitVector.h"
5 | #include "llvm/CodeGen/MachineBasicBlock.h"
6 | #include
7 |
8 | #define GET_REGINFO_HEADER
9 | #define GET_REGINFO_ENUM // Technically this one belongs to MC.
10 | #include "SolutionGenRegisterInfo.inc"
11 |
12 | namespace llvm {
13 | class MachineFunction;
14 |
15 | class SolutionRegisterInfo : public SolutionGenRegisterInfo {
16 | public:
17 | SolutionRegisterInfo() : SolutionGenRegisterInfo(Register()) {}
18 |
19 | BitVector getReservedRegs(const MachineFunction &MF) const override {
20 | return BitVector();
21 | }
22 |
23 | const MCPhysReg *
24 | getCalleeSavedRegs(const MachineFunction *MF) const override {
25 | return nullptr;
26 | }
27 |
28 | bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
29 | unsigned FIOperandNum,
30 | RegScavenger *RS = nullptr) const override {
31 | return false;
32 | }
33 |
34 | Register getFrameRegister(const MachineFunction &MF) const override {
35 | return Register();
36 | }
37 | };
38 |
39 | } // end namespace llvm.
40 |
41 | #endif
42 |
--------------------------------------------------------------------------------
/ch11/register_units/YourTurnRegisterInfo.cpp:
--------------------------------------------------------------------------------
1 | #include "YourTurnRegisterInfo.h"
2 |
3 | #include "llvm/CodeGen/MachineFunction.h"
4 | #include "llvm/CodeGen/TargetFrameLowering.h"
5 | #include "llvm/CodeGen/TargetRegisterInfo.h"
6 | #include "llvm/CodeGen/TargetSubtargetInfo.h"
7 |
8 | namespace llvm {
9 | class YourTurnFrameLowering : public TargetFrameLowering {
10 | public:
11 | YourTurnFrameLowering()
12 | : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16),
13 | false /*StackRealignable*/) {}
14 | };
15 | } // namespace llvm
16 |
17 | #define GET_REGINFO_TARGET_DESC
18 | #define GET_REGINFO_MC_DESC
19 | #include "YourTurnGenRegisterInfo.inc"
20 |
--------------------------------------------------------------------------------
/ch11/register_units/YourTurnRegisterInfo.h:
--------------------------------------------------------------------------------
1 | #ifndef __YOURTURNREGISTERINFOS_H__
2 | #define __YOURTURNREGISTERINFOS_H__
3 |
4 | #include "llvm/ADT/BitVector.h"
5 | #include "llvm/CodeGen/MachineBasicBlock.h"
6 | #include
7 |
8 | #define GET_REGINFO_HEADER
9 | #define GET_REGINFO_ENUM // Technically this one belongs to MC.
10 | #include "YourTurnGenRegisterInfo.inc"
11 |
12 | namespace llvm {
13 | class MachineFunction;
14 |
15 | class YourTurnRegisterInfo : public YourTurnGenRegisterInfo {
16 | public:
17 | YourTurnRegisterInfo() : YourTurnGenRegisterInfo(Register()) {}
18 |
19 | BitVector getReservedRegs(const MachineFunction &MF) const override {
20 | return BitVector();
21 | }
22 |
23 | const MCPhysReg *
24 | getCalleeSavedRegs(const MachineFunction *MF) const override {
25 | return nullptr;
26 | }
27 |
28 | bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
29 | unsigned FIOperandNum,
30 | RegScavenger *RS = nullptr) const override {
31 | return false;
32 | }
33 |
34 | Register getFrameRegister(const MachineFunction &MF) const override {
35 | return Register();
36 | }
37 | };
38 |
39 | } // end namespace llvm.
40 | #endif
41 |
--------------------------------------------------------------------------------
/ch11/register_units/main.cpp:
--------------------------------------------------------------------------------
1 | #include "SolutionRegisterInfo.h"
2 | #include "YourTurnRegisterInfo.h"
3 |
4 | #include "llvm/CodeGen/TargetRegisterInfo.h"
5 | #include "llvm/MC/MCRegisterInfo.h" // For the RegUnit iterators.
6 | #include "llvm/Support/Debug.h" // For dbgs().
7 |
8 | using namespace llvm;
9 |
10 | int main() {
11 | SolutionRegisterInfo SolutionRI;
12 | YourTurnRegisterInfo YourTurnRI;
13 | TargetRegisterInfo *RegInfos[] = {&SolutionRI, &YourTurnRI};
14 | const char *RIName[] = {"Solution", "YourTurn"};
15 | // Go through the register units and print them
16 | for (unsigned i = 0; i < sizeof(RegInfos) / sizeof(RegInfos[0]); ++i) {
17 | dbgs() << "===== RegisterInfo for " << RIName[i] << "=====\n";
18 | TargetRegisterInfo *TRI = RegInfos[i];
19 | dbgs() << "== RegisterUnit ==\n";
20 | // Traverse all the units and print out which registers it touches.
21 | for (unsigned Unit = 0, E = TRI->getNumRegUnits(); Unit != E; ++Unit) {
22 | dbgs() << "RegUnit " << Unit << ":\t";
23 | for (MCRegUnitRootIterator RI(Unit, TRI); RI.isValid(); ++RI) {
24 | for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) {
25 | dbgs() << TRI->getName(*SI) << ", ";
26 | }
27 | dbgs() << "\n";
28 | }
29 | }
30 | dbgs() << "== RegisterClass ==\n";
31 | for (const TargetRegisterClass *RegClass : TRI->regclasses()) {
32 | dbgs() << "RegClass " << TRI->getRegClassName(RegClass) << ":\t";
33 | for (Register Reg : *RegClass) {
34 | dbgs() << TRI->getName(Reg) << ", ";
35 | }
36 | dbgs() << "\n";
37 | }
38 | dbgs() << "======= End RegisterInfo ========\n\n\n";
39 | }
40 | return 0;
41 | }
42 |
--------------------------------------------------------------------------------
/ch11/register_units/solution-reginfo.td:
--------------------------------------------------------------------------------
1 | include "llvm/Target/Target.td"
2 |
3 | let Namespace = "Solution" in {
4 | // Describe the following hiearchy.
5 | // +-----------+-----------+-----------+
6 | // 128-bit quad | q0 | q1 | q2 |
7 | // +-----+-----+-----+-----+-----------+
8 | // 64-bit double | d0 | d1 | d2 | | |
9 | // +--+--+--+--+-----+-----+-----------+
10 | // 32-bit single |s0|s1|s2| | | |
11 | // +--+--+--+--+-----------+-----------+
12 | //
13 | // Empty cells mean that no architectural register exists for that cell (it
14 | // is not addressable.)
15 |
16 | // Declare the different subregister for our target.
17 | // Parameters are size and offset.
18 | // Indices for singles in double.
19 | def sub32_low: SubRegIndex<32>;
20 | def sub32_high: SubRegIndex<32, 32>;
21 |
22 | // Indices for doubles in quad.
23 | def sub64_low: SubRegIndex<64>;
24 | def sub64_high: SubRegIndex<64, 64>;
25 |
26 |
27 | // single registers.
28 | def s0 : Register<"s0">;
29 | def s1 : Register<"s1">;
30 | def s2 : Register<"s2">;
31 |
32 | // double registers are made of two single registers.
33 | // The pattern is a bit irregular because we assume
34 | // we have only 3 single registers.
35 | def d0 : Register<"d0"> {
36 | let SubRegIndices = [sub32_low, sub32_high];
37 | let SubRegs = [s0, s1];
38 | let CoveredBySubRegs = true;
39 | }
40 | def d1 : Register<"d1"> {
41 | let SubRegIndices = [sub32_low];
42 | let SubRegs = [s2];
43 | }
44 | def d2 : Register<"d2">;
45 |
46 | // quad registers are made of two double registers.
47 | // Similar pattern as double registers.
48 | def q0 : Register<"q0"> {
49 | let SubRegIndices = [sub64_low, sub64_high];
50 | let SubRegs = [d0, d1];
51 | let CoveredBySubRegs = true;
52 | }
53 | def q1 : Register<"q1"> {
54 | let SubRegIndices = [sub64_low];
55 | let SubRegs = [d2];
56 | }
57 | def q2 : Register<"q2">;
58 | } // end namespace Solution.
59 |
60 | // Bonus point: register classes.
61 | def SINGLES : RegisterClass<"Solution", [f32], 32, (sequence "s%u", 0, 2)>;
62 | def DOUBLES : RegisterClass<"Solution", [f64], 64, (sequence "d%u", 0, 2)>;
63 | def QUADS : RegisterClass<"Solution", [f128], 128, (sequence "q%u", 0, 2)>;
64 |
65 | // Boilerplate to get the TableGen backend happy.
66 | def myii: InstrInfo;
67 | def Solution : Target {
68 | let InstructionSet = myii;
69 | }
70 |
--------------------------------------------------------------------------------
/ch11/register_units/yourTurn-reginfo.td:
--------------------------------------------------------------------------------
1 | include "llvm/Target/Target.td"
2 |
3 | def : HwMode<"", []>;
4 | def myii: InstrInfo;
5 | def YourTurn : Target {
6 | let InstructionSet = myii;
7 | }
8 |
9 | let Namespace = "yourTurn" in {
10 | // Complete this description to describe a register hierarchy that looks like:
11 | // +-----------+-----------+-----------+
12 | // 128-bit quad | q0 | q1 | q2 |
13 | // +-----+-----+-----+-----+-----------+
14 | // 64-bit double | d0 | d1 | d2 | | |
15 | // +--+--+--+--+-----+-----+-----------+
16 | // 32-bit single |s0|s1|s2| | | |
17 | // +--+--+--+--+-----------+-----------+
18 | //
19 | // Empty cells mean that no architectural register exists for that cell (it
20 | // is not addressable.)
21 | def s0 : Register<"s0">;
22 |
23 | } // end namespace yourTurn.
24 |
25 | // Bonus point, create 1 register class per level.
26 | // They would map respectively to f128, f64, and f32.
27 | def SINGLES : RegisterClass<"yourTurn", [f32], 32, (add s0)>;
28 |
29 |
--------------------------------------------------------------------------------
/ch13/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH13
6 | LANGUAGES CXX C)
7 |
8 | include(../cmake/utils/set-llvm-install-prefix.cmake)
9 | include(../cmake/utils/llc-run.cmake)
10 |
11 | set(CURR_TARGET
12 | produce-mir
13 | )
14 |
15 | set(LLC_RUN_DEPENDENCIES)
16 |
17 | # Args are:
18 | # - llc options
19 | # - input file
20 | # - output file
21 | add_run_llc(LLC_RUN_DEPENDENCIES "-stop-before=peephole-opt" "input.ll" "ssa.mir")
22 | add_run_llc(LLC_RUN_DEPENDENCIES "-stop-after=phi-node-elimination" "input.ll" "no-phi.mir")
23 | add_run_llc(LLC_RUN_DEPENDENCIES "-stop-after=virtregmap" "input.ll" "no-vreg.mir")
24 |
25 | # Create an executable target that depends on the generated file
26 | add_custom_target(${CURR_TARGET}
27 | DEPENDS ${LLC_RUN_DEPENDENCIES}
28 | )
29 |
30 | set_target_properties(${CURR_TARGET} PROPERTIES EXCLUDE_FROM_ALL 0)
31 |
--------------------------------------------------------------------------------
/ch13/README.md:
--------------------------------------------------------------------------------
1 | In this chapter you can discover the transformations performed by some of the optimizations passes.
2 |
3 | To see that in action, first setup your build directory:
4 | ```bash
5 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild .
6 | ```
7 |
8 | Then, when you'll invoke `ninja`, it will run all the passes exercised in this folder.
9 | ```bash
10 | ninja -Cbuild
11 | ```
12 |
13 | This will produce files named `xxx.mir` in the `build` directory.
14 |
15 | The output file is produced by stopping the pass pipeline at a specific point:
16 | - `ssa`: before the SSA machine optimization
17 | - `no-phi`: after the PHI elimination pass
18 | - `phys-reg`: after register allocation
19 |
20 | In any case, you can see the command used to produce the output by running the `ninja` command with the `-v` option.
21 |
--------------------------------------------------------------------------------
/ch13/input.ll:
--------------------------------------------------------------------------------
1 | define i64 @def_in_loop_use_outside(i64 %src, i64 %upper_bound) {
2 | entry:
3 | br label %loop
4 |
5 | loop:
6 | %iv = phi i64 [0, %entry], [%iv_plus_1, %loop]
7 | %iv_plus_1 = add i64 %iv, 1
8 | %cond = icmp ult i64 %iv_plus_1, %upper_bound
9 | br i1 %cond, label %loop, label %end
10 |
11 | end:
12 | %tmp = add i64 %iv_plus_1, %src
13 | %res = add i64 %tmp, %iv_plus_1
14 | ret i64 %res
15 | }
16 |
--------------------------------------------------------------------------------
/ch3/README.md:
--------------------------------------------------------------------------------
1 | This directoy contains some of the examples that are used in Chapter 3.
2 |
3 | For further examples and exercises, please navigate in the subdirectories.
4 |
--------------------------------------------------------------------------------
/ch3/input.c:
--------------------------------------------------------------------------------
1 | extern int baz();
2 | extern void bar(int);
3 |
4 | void foo(int a, int b) {
5 | int var = a + b;
6 | if (var == 0xFF) {
7 | bar(var);
8 | var = baz();
9 | }
10 | bar(var);
11 | }
12 |
13 |
--------------------------------------------------------------------------------
/ch3/irreducible.c:
--------------------------------------------------------------------------------
1 | extern void someFct();
2 |
3 | int irreducible(int shouldSkip1stCall) {
4 | int i = 0;
5 | if (shouldSkip1stCall)
6 | goto SKIP;
7 | do {
8 | someFct();
9 | SKIP:;
10 | } while (++i < 7);
11 | return 32;
12 | }
13 |
--------------------------------------------------------------------------------
/ch3/llvm_ir/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH3
6 | LANGUAGES CXX C)
7 |
8 | include(../../cmake/utils/set-llvm-install-prefix.cmake)
9 |
10 | set(CURR_TARGET
11 | build_ir
12 | )
13 |
14 | set(SRC
15 | "main.cpp"
16 | "solution/populate_function.cpp"
17 | "your_turn/populate_function.cpp"
18 | )
19 |
20 | set(INC
21 | )
22 |
23 | set(CMAKE_CXX_FLAGS -fno-rtti)
24 |
25 | # Now build our tools
26 | add_executable(${CURR_TARGET} ${SRC} ${INC})
27 |
28 | # Find the libraries that correspond to the LLVM components
29 | # that we wish to use
30 | llvm_map_components_to_libnames(llvm_libs support core)
31 |
32 | # Link against LLVM libraries
33 | target_link_libraries(${CURR_TARGET} ${llvm_libs})
34 |
--------------------------------------------------------------------------------
/ch3/llvm_ir/README.md:
--------------------------------------------------------------------------------
1 | # Building LLVM IR #
2 |
3 | In this exercise, you will build your first function using the LLVM intermediate representation (IR).
4 |
5 | Open `your_turn/populate_function.cpp` and implement the `myBuildModule` function according to the comment
6 | in that file.
7 | Then, follow the steps below to configure and build the test program and check that your solution matches the output of the provided solution.
8 |
9 | In other words, check that the printed LLVM IR after `## Processing module from Your solution implementation` line matches the solution printed after the `## Processing module from Reference implementation` line.
10 |
11 | ## Configuring your environment ##
12 |
13 |
14 | ```bash
15 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild .
16 | ninja -Cbuild
17 | ```
18 |
19 | ## Running the example ##
20 |
21 | ```bash
22 | ./build/build_ir
23 | ```
24 |
25 | ## Solution ##
26 |
27 | If your output doesn't match the solution, look at `solution/populate_function.cpp` to see how to implement the desired IR.
28 |
--------------------------------------------------------------------------------
/ch3/llvm_ir/input.c:
--------------------------------------------------------------------------------
1 | extern int baz();
2 |
3 | extern void bar(int);
4 |
5 | void foo(int a, int b) {
6 | int var = a + b;
7 | if (var == 0xFF) {
8 | bar(var);
9 | var = baz();
10 | }
11 | bar(var);
12 | }
13 |
--------------------------------------------------------------------------------
/ch3/llvm_ir/main.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/IR/Module.h"
2 | #include "llvm/IR/Verifier.h"
3 | #include "llvm/Support/Debug.h" // For errs().
4 |
5 | using namespace llvm;
6 |
7 | extern std::unique_ptr myBuildModule(LLVMContext &);
8 | extern std::unique_ptr solutionBuildModule(LLVMContext &);
9 |
10 | int main(int argc, char **argv) {
11 | LLVMContext Ctxt;
12 | bool hadError = false;
13 | for (int i = 0; i != 2; ++i) {
14 | bool isRefImpl = i == 0;
15 | std::unique_ptr CurModule =
16 | isRefImpl ? solutionBuildModule(Ctxt) : myBuildModule(Ctxt);
17 | const char *msg = isRefImpl ? "Reference" : "Your solution";
18 |
19 | outs() << "\n\n## Processing module from " << msg << " implementation\n";
20 | if (!CurModule) {
21 | outs() << "Nothing built\n";
22 | continue;
23 | }
24 |
25 | CurModule->print(errs(), /*AssemblyAnnotationWriter=*/nullptr);
26 | // verifyModule returns true if it finds errors and
27 | // print them on the provided output stream (errs() here).
28 | if (verifyModule(*CurModule, &errs())) {
29 | errs() << msg << " does not verify\n";
30 | hadError |= true;
31 | }
32 | }
33 |
34 | return !hadError;
35 | }
36 |
--------------------------------------------------------------------------------
/ch3/llvm_ir/solution/populate_function.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/ADT/ArrayRef.h"
2 | #include "llvm/IR/BasicBlock.h"
3 | #include "llvm/IR/Constants.h" // For ConstantInt.
4 | #include "llvm/IR/DerivedTypes.h" // For PointerType, FunctionType.
5 | #include "llvm/IR/Function.h"
6 | #include "llvm/IR/IRBuilder.h"
7 | #include "llvm/IR/LLVMContext.h"
8 | #include "llvm/IR/Module.h"
9 | #include "llvm/IR/Type.h"
10 | #include "llvm/Support/Debug.h" // For errs().
11 |
12 | #include // For unique_ptr
13 |
14 | using namespace llvm;
15 |
16 | // The goal of this function is to build a Module that
17 | // represents the lowering of the following foo, a C function:
18 | // extern int baz();
19 | // extern void bar(int);
20 | // void foo(int a, int b) {
21 | // int var = a + b;
22 | // if (var == 0xFF) {
23 | // bar(var);
24 | // var = baz();
25 | // }
26 | // bar(var);
27 | // }
28 | //
29 | // The IR for this snippet (at O0) is:
30 | // define void @foo(i32 %arg, i32 %arg1) {
31 | // bb:
32 | // %i = alloca i32
33 | // %i2 = alloca i32
34 | // %i3 = alloca i32
35 | // store i32 %arg, ptr %i
36 | // store i32 %arg1, ptr %i2
37 | // %i4 = load i32, ptr %i
38 | // %i5 = load i32, ptr %i2
39 | // %i6 = add i32 %i4, %i5
40 | // store i32 %i6, ptr %i3
41 | // %i7 = load i32, ptr %i3
42 | // %i8 = icmp eq i32 %i7, 255
43 | // br i1 %i8, label %bb9, label %bb12
44 | //
45 | // bb9:
46 | // %i10 = load i32, ptr %i3
47 | // call void @bar(i32 %i10)
48 | // %i11 = call i32 @baz()
49 | // store i32 %i11, ptr %i3
50 | // br label %bb12
51 | //
52 | // bb12:
53 | // %i13 = load i32, ptr %i3
54 | // call void @bar(i32 %i13)
55 | // ret void
56 | // }
57 | //
58 | // declare void @bar(i32)
59 | // declare i32 @baz(...)
60 | std::unique_ptr solutionBuildModule(LLVMContext &Ctxt) {
61 | // Create the types that we will use over and over;
62 | Type *Int32Ty = Type::getInt32Ty(Ctxt);
63 | Type *VoidTy = Type::getVoidTy(Ctxt);
64 | Type *PtrTy = PointerType::get(Ctxt, /*AddrSpace=*/0);
65 |
66 | // Create the high level module.
67 | std::unique_ptr MyModule =
68 | std::make_unique("Solution Module", Ctxt);
69 |
70 | // Populate all the functions (just declaration for now.)
71 | // Starting with baz.
72 | FunctionType *BazTy =
73 | FunctionType::get(/*RetTy=*/Int32Ty, /*isVarArg=*/false);
74 | Function *BazFunc =
75 | cast(MyModule->getOrInsertFunction("baz", BazTy).getCallee());
76 |
77 | // bar.
78 | FunctionType *BarTy =
79 | FunctionType::get(VoidTy, /*ArgsTy=*/ArrayRef(Int32Ty), false);
80 | Function *BarFunc =
81 | cast(MyModule->getOrInsertFunction("bar", BarTy).getCallee());
82 |
83 | // foo.
84 | FunctionType *FooTy =
85 | FunctionType::get(VoidTy, /*ArgsTy*/ ArrayRef({Int32Ty, Int32Ty}), false);
86 | Function *FooFunc =
87 | cast(MyModule->getOrInsertFunction("foo", FooTy).getCallee());
88 |
89 | // Next, create the structure for foo.
90 | BasicBlock *BB = BasicBlock::Create(Ctxt, /*Name=*/"bb", /*Parent=*/FooFunc);
91 | BasicBlock *BB9 =
92 | BasicBlock::Create(Ctxt, /*Name=*/"bb9", /*Parent=*/FooFunc);
93 | BasicBlock *BB12 =
94 | BasicBlock::Create(Ctxt, /*Name=*/"bb12", /*Parent=*/FooFunc);
95 |
96 | // Populate bb.
97 | IRBuilder Builder(BB);
98 | // Allocate stack space for the local variables.
99 | Value *I = Builder.CreateAlloca(Int32Ty);
100 | Value *I2 = Builder.CreateAlloca(Int32Ty);
101 | Value *I3 = Builder.CreateAlloca(Int32Ty);
102 | // Get arg and arg1 from foo.
103 | Value *Arg = FooFunc->getArg(0);
104 | Value *Arg1 = FooFunc->getArg(1);
105 | // Store them in the "local" variables.
106 | Builder.CreateStore(Arg, I);
107 | Builder.CreateStore(Arg1, I2);
108 | // Reload from the local variables.
109 | Value *I4 = Builder.CreateLoad(Int32Ty, I);
110 | Value *I5 = Builder.CreateLoad(Int32Ty, I2);
111 | // Do the add.
112 | Value *I6 = Builder.CreateAdd(I4, I5);
113 | // Store to local variable i3.
114 | Builder.CreateStore(I6, I3);
115 | // Reload from i3 (now you understand why O0 is slow!!)
116 | Value *I7 = Builder.CreateLoad(Int32Ty, I3);
117 | // Compare.
118 | Value *Cst255 = ConstantInt::get(Int32Ty, 255);
119 | Value *I8 = Builder.CreateICmpEQ(I7, Cst255);
120 | // Then jump.
121 | Builder.CreateCondBr(I8, BB9, BB12);
122 |
123 | // Populate bb9.
124 | // Reset the builder on the next basic block.
125 | Builder.SetInsertPoint(BB9);
126 | // Reload the local variable i3.
127 | Value *I10 = Builder.CreateLoad(Int32Ty, I3);
128 | // Call bar with i10.
129 | Builder.CreateCall(BarFunc->getFunctionType(), BarFunc, ArrayRef(I10));
130 | // Call baz.
131 | Value *I11 = Builder.CreateCall(BazFunc->getFunctionType(), BazFunc);
132 | // Store the result in the local variable i3.
133 | Builder.CreateStore(I11, I3);
134 | // Jump to the next block.
135 | Builder.CreateBr(BB12);
136 |
137 | // Populate bb12.
138 | // Reset the builder on the next basic block.
139 | Builder.SetInsertPoint(BB12);
140 | // Reload the local variable I3.
141 | Value *I13 = Builder.CreateLoad(Int32Ty, I3);
142 | // Call bar on i13.
143 | Builder.CreateCall(BarFunc->getFunctionType(), BarFunc, ArrayRef(I13));
144 | // Create the final return.
145 | // Remember all basic block must end with a terminator.
146 | Builder.CreateRetVoid();
147 |
148 | return MyModule;
149 | }
150 |
--------------------------------------------------------------------------------
/ch3/llvm_ir/your_turn/populate_function.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/ADT/ArrayRef.h"
2 | #include "llvm/IR/BasicBlock.h"
3 | #include "llvm/IR/Constants.h" // For ConstantInt.
4 | #include "llvm/IR/DerivedTypes.h" // For PointerType, FunctionType.
5 | #include "llvm/IR/Function.h"
6 | #include "llvm/IR/IRBuilder.h"
7 | #include "llvm/IR/LLVMContext.h"
8 | #include "llvm/IR/Module.h"
9 | #include "llvm/IR/Type.h"
10 | #include "llvm/Support/Debug.h" // For errs().
11 |
12 | #include // For unique_ptr
13 |
14 | using namespace llvm;
15 |
16 | // The goal of this function is to build a Module that
17 | // represents the lowering of the following foo, a C function:
18 | // extern int baz();
19 | // extern void bar(int);
20 | // void foo(int a, int b) {
21 | // int var = a + b;
22 | // if (var == 0xFF) {
23 | // bar(var);
24 | // var = baz();
25 | // }
26 | // bar(var);
27 | // }
28 | //
29 | // The IR for this snippet (at O0) is:
30 | // define void @foo(i32 %arg, i32 %arg1) {
31 | // bb:
32 | // %i = alloca i32
33 | // %i2 = alloca i32
34 | // %i3 = alloca i32
35 | // store i32 %arg, ptr %i
36 | // store i32 %arg1, ptr %i2
37 | // %i4 = load i32, ptr %i
38 | // %i5 = load i32, ptr %i2
39 | // %i6 = add i32 %i4, %i5
40 | // store i32 %i6, ptr %i3
41 | // %i7 = load i32, ptr %i3
42 | // %i8 = icmp eq i32 %i7, 255
43 | // br i1 %i8, label %bb9, label %bb12
44 | //
45 | // bb9:
46 | // %i10 = load i32, ptr %i3
47 | // call void @bar(i32 %i10)
48 | // %i11 = call i32 @baz()
49 | // store i32 %i11, ptr %i3
50 | // br label %bb12
51 | //
52 | // bb12:
53 | // %i13 = load i32, ptr %i3
54 | // call void @bar(i32 %i13)
55 | // ret void
56 | // }
57 | //
58 | // declare void @bar(i32)
59 | // declare i32 @baz(...)
60 | std::unique_ptr myBuildModule(LLVMContext &Ctxt) { return nullptr; }
61 |
--------------------------------------------------------------------------------
/ch3/machineir/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH3
6 | LANGUAGES CXX C)
7 |
8 | include(../../cmake/utils/set-llvm-install-prefix.cmake)
9 |
10 | set(CURR_TARGET
11 | build_mir
12 | )
13 |
14 | set(SRC
15 | "main.cpp"
16 | "solution/populate_function.cpp"
17 | "your_turn/populate_function.cpp"
18 | )
19 |
20 | set(INC
21 | )
22 |
23 |
24 | # Now build our tools
25 | add_executable(${CURR_TARGET} ${SRC} ${INC})
26 |
27 | # Find the libraries that correspond to the LLVM components
28 | # that we wish to use
29 | llvm_map_components_to_libnames(llvm_libs support core AllTargetsCodeGens)
30 |
31 | # Link against LLVM libraries
32 | target_link_libraries(${CURR_TARGET} ${llvm_libs})
33 |
--------------------------------------------------------------------------------
/ch3/machineir/README.md:
--------------------------------------------------------------------------------
1 | # Building Machine IR #
2 |
3 | In this exercise, you will build your first function using the Machine intermediate representation (IR).
4 |
5 | Open `your_turn/populate_function.cpp` and implement the `populateMachineIR` function according to the comment
6 | in that file.
7 |
8 | We recommend to start by running the program once to see the output of the solution.
9 |
10 | This way you will get an idea of which opcodes you should use for your machine instructions.
11 |
12 | At the end, your produced IR should match what is printed between the `# Machine code for function solution_foo` and `# End machine code for function solution_foo.` lines.
13 |
14 | Your function is printed between the `# Machine code for function foo` and `# End machine code for function foo.` lines.
15 |
16 | ## Configuring your environment ##
17 |
18 |
19 | ```bash
20 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild .
21 | ninja -Cbuild
22 | ```
23 |
24 | ## Running the example ##
25 |
26 | ```bash
27 | ./build/build_mir
28 | ```
29 |
30 | ## Solution ##
31 |
32 | If your output doesn't match the solution, look at `solution/populate_function.cpp` to see how to implement the desired IR.
33 |
--------------------------------------------------------------------------------
/ch3/machineir/main.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/CodeGen/CodeGenTargetMachineImpl.h"
2 | #include "llvm/CodeGen/MachineBasicBlock.h"
3 | #include "llvm/CodeGen/MachineFunction.h"
4 | #include "llvm/CodeGen/MachineModuleInfo.h"
5 | #include "llvm/CodeGen/Register.h"
6 | #include "llvm/CodeGen/TargetRegisterInfo.h"
7 | #include "llvm/CodeGen/TargetSubtargetInfo.h"
8 | #include "llvm/IR/DerivedTypes.h"
9 | #include "llvm/IR/Function.h"
10 | #include "llvm/IR/LLVMContext.h"
11 | #include "llvm/IR/Module.h"
12 | #include "llvm/MC/TargetRegistry.h"
13 | #include "llvm/Support/TargetSelect.h" // For InitializeAllTargets
14 | #include "llvm/Target/TargetMachine.h"
15 | #include "llvm/TargetParser/Triple.h"
16 |
17 | using namespace llvm;
18 |
19 | extern MachineFunction *solutionPopulateMachineIR(MachineModuleInfo &,
20 | llvm::Function &, Register,
21 | Register);
22 | extern MachineFunction *populateMachineIR(MachineModuleInfo &,
23 | llvm::Function &, Register,
24 | Register);
25 |
26 | bool checkFunctionCorrectness(MachineFunction *Res, Register W0, Register W1) {
27 | // Take care of the liveness since we did not explain how to do that.
28 | MachineBasicBlock *EntryBB = Res->empty() ? nullptr : &*Res->begin();
29 | if (EntryBB) {
30 | EntryBB->addLiveIn(W0);
31 | EntryBB->addLiveIn(W1);
32 | }
33 | Res->print(errs());
34 | if (!Res->verify()) {
35 | errs() << Res->getName() << " does not verify\n";
36 | return false;
37 | }
38 | return true;
39 | }
40 |
41 | int main() {
42 | // We have to initialize all the targets to get the registry initialized.
43 | InitializeAllTargets();
44 | // We need the MC layer as well to query the register information.
45 | InitializeAllTargetMCs();
46 |
47 | auto TT(Triple::normalize("aarch64--"));
48 | std::string Error;
49 | const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error);
50 | if (!TheTarget) {
51 | errs() << TT << " is not available with this build of LLVM\n";
52 | return -1;
53 | }
54 | auto *LLVMTM = static_cast(
55 | TheTarget->createTargetMachine(TT, "", "", TargetOptions(), std::nullopt,
56 | std::nullopt, CodeGenOptLevel::Default));
57 | MachineModuleInfoWrapperPass MMIWP(LLVMTM);
58 | LLVMContext Context;
59 | Module MyModule("MyModule", Context);
60 | MyModule.setDataLayout(LLVMTM->createDataLayout());
61 |
62 | Function *SolutionFoo = Function::Create(
63 | FunctionType::get(Type::getVoidTy(Context), /*IsVarArg=*/false),
64 | Function::ExternalLinkage, "solution_foo", MyModule);
65 | const TargetSubtargetInfo *STI = LLVMTM->getSubtargetImpl(*SolutionFoo);
66 | const TargetRegisterInfo *TRI = STI->getRegisterInfo();
67 |
68 | // Find the indices for W0 and W1.
69 | // Since we are not in AArch64 library we don't have access to the AArch64::W0
70 | // enums.
71 | StringRef W0Str = "W0";
72 | StringRef W1Str = "W1";
73 | Register W0 = 0;
74 | Register W1 = 0;
75 | for (unsigned i = 1, e = TRI->getNumRegs(); i != e && (!W0 || !W1); ++i) {
76 | if (!W0 && W0Str == TRI->getName(i)) {
77 | W0 = i;
78 | continue;
79 | }
80 | if (!W1 && W1Str == TRI->getName(i)) {
81 | W1 = i;
82 | continue;
83 | }
84 | }
85 |
86 | if (!W0 || !W1) {
87 | errs() << "Failed to found physical registers w0 and w1\n";
88 | return -1;
89 | }
90 |
91 | MachineFunction *Res =
92 | solutionPopulateMachineIR(MMIWP.getMMI(), *SolutionFoo, W0, W1);
93 | bool solutionIsCorrect = checkFunctionCorrectness(Res, W0, W1);
94 |
95 | Function *Foo = Function::Create(
96 | FunctionType::get(Type::getVoidTy(Context), /*IsVarArg=*/false),
97 | Function::ExternalLinkage, "foo", MyModule);
98 |
99 | MachineFunction *YourTurnRes =
100 | populateMachineIR(MMIWP.getMMI(), *Foo, W0, W1);
101 | bool yourTurnIsCorrect = checkFunctionCorrectness(YourTurnRes, W0, W1);
102 |
103 |
104 | return !(solutionIsCorrect && yourTurnIsCorrect);
105 | }
106 |
--------------------------------------------------------------------------------
/ch3/machineir/solution/populate_function.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
2 | #include "llvm/CodeGen/MachineFrameInfo.h" // For CreateStackObject.
3 | #include "llvm/CodeGen/MachineFunction.h"
4 | #include "llvm/CodeGen/MachineMemOperand.h" // For MachinePointerInfo.
5 | #include "llvm/CodeGen/MachineModuleInfo.h"
6 | #include "llvm/CodeGen/MachineRegisterInfo.h"
7 | #include "llvm/CodeGen/Register.h"
8 | #include "llvm/CodeGen/TargetOpcodes.h"
9 | #include "llvm/CodeGenTypes/LowLevelType.h" // For LLT.
10 | #include "llvm/IR/Function.h"
11 | #include "llvm/IR/InstrTypes.h" // For ICMP_EQ.
12 |
13 | using namespace llvm;
14 |
15 | // The goal of this function is to build a MachineFunction that
16 | // represents the lowering of the following foo, a C function:
17 | // extern int baz();
18 | // extern void bar(int);
19 | // void foo(int a, int b) {
20 | // int var = a + b;
21 | // if (var == 0xFF) {
22 | // bar(var);
23 | // var = baz();
24 | // }
25 | // bar(var);
26 | // }
27 | //
28 | // The proposed ABI is:
29 | // - 32-bit arguments are passed through registers: w0, w1
30 | // - 32-bit returned values are passed through registers: w0, w1
31 | // w0 and w1 are given as argument of this Function.
32 | //
33 | // The local variable named var is expected to live on the stack.
34 | MachineFunction *solutionPopulateMachineIR(MachineModuleInfo &MMI,
35 | Function &Foo, Register W0,
36 | Register W1) {
37 | MachineFunction &MF = MMI.getOrCreateMachineFunction(Foo);
38 | // Create the 3 basic blocks that compose Foo.
39 | MachineBasicBlock *EntryBB = MF.CreateMachineBasicBlock();
40 | MF.push_back(EntryBB);
41 | MachineBasicBlock *ThenBB = MF.CreateMachineBasicBlock();
42 | MF.push_back(ThenBB);
43 | MachineBasicBlock *ExitBB = MF.CreateMachineBasicBlock();
44 | MF.push_back(ExitBB);
45 |
46 | // Create the configuration of the CFG.
47 | EntryBB->addSuccessor(ThenBB);
48 | EntryBB->addSuccessor(ExitBB);
49 | ThenBB->addSuccessor(ExitBB);
50 |
51 | // The type for bool.
52 | LLT I1 = LLT::scalar(1);
53 | // The type of var.
54 | LLT I32 = LLT::scalar(32);
55 | MachinePointerInfo PtrInfo;
56 | Align VarStackAlign(4);
57 | // The type for the address of var.
58 | LLT VarAddrLLT = LLT::pointer(/*AddressSpace=*/0, /*SizeInBits=*/64);
59 | // The stack slot for var.
60 | int FrameIndex = MF.getFrameInfo().CreateStackObject(32, VarStackAlign,
61 | /*IsSpillSlot=*/false);
62 |
63 | // Populate entry.
64 | MachineIRBuilder MIBuilder(*EntryBB, EntryBB->end());
65 | // Get the input arguments.
66 | Register A = MIBuilder.buildCopy(I32, W0).getReg(0);
67 | Register B = MIBuilder.buildCopy(I32, W1).getReg(0);
68 | // Get the stack slot for var.
69 | Register VarStackAddr =
70 | MIBuilder.buildFrameIndex(VarAddrLLT, FrameIndex).getReg(0);
71 | // Add.
72 | Register ResAdd = MIBuilder.buildAdd(I32, A, B).getReg(0);
73 | // Write result to var's address.
74 | MIBuilder.buildStore(ResAdd, VarStackAddr, PtrInfo, VarStackAlign);
75 | // Build the ICmp
76 | Register Cst0xFF = MIBuilder.buildConstant(I32, 0xFF).getReg(0);
77 | Register ReloadedVar0 =
78 | MIBuilder.buildLoad(I32, VarStackAddr, PtrInfo, VarStackAlign).getReg(0);
79 | Register Cmp =
80 | MIBuilder.buildICmp(CmpInst::ICMP_EQ, I1, ReloadedVar0, Cst0xFF)
81 | .getReg(0);
82 | // Conditional branch.
83 | // If true jump to ThenBB.
84 | MIBuilder.buildBrCond(Cmp, *ThenBB);
85 | // Otherwise jump to ExitBB;
86 | MIBuilder.buildBr(*ExitBB);
87 |
88 | // ThenBB
89 | // Reset MIBuilder to point at the end of ThenBB.
90 | MIBuilder.setInsertPt(*ThenBB, ThenBB->end());
91 | // Put var in W0 for the call to bar.
92 | Register ReloadedVar1 =
93 | MIBuilder.buildLoad(I32, VarStackAddr, PtrInfo, VarStackAlign).getReg(0);
94 | MIBuilder.buildCopy(W0, ReloadedVar1);
95 | // Fake call to bar.
96 | MIBuilder.buildInstr(TargetOpcode::INLINEASM, {}, {})
97 | .addExternalSymbol("bl @bar")
98 | .addImm(0)
99 | .addReg(W0, RegState::Implicit);
100 | // Fake call to baz.
101 | MIBuilder.buildInstr(TargetOpcode::INLINEASM, {}, {})
102 | .addExternalSymbol("bl @baz")
103 | .addImm(0)
104 | .addReg(W0, RegState::Implicit | RegState::Define);
105 | // Copy the result of baz to var.
106 | Register ResOfBaz = MIBuilder.buildCopy(I32, W0).getReg(0);
107 | MIBuilder.buildStore(ResOfBaz, VarStackAddr, PtrInfo, VarStackAlign);
108 | // Fallthrough to exit BB, no need for a terminator
109 |
110 | // ExitBB
111 | // Reset MIBuilder to point at the end of ExitBB.
112 | MIBuilder.setInsertPt(*ExitBB, ExitBB->end());
113 | // Put var in W0 for the call to bar.
114 | Register ReloadedVar2 =
115 | MIBuilder.buildLoad(I32, VarStackAddr, PtrInfo, VarStackAlign).getReg(0);
116 | MIBuilder.buildCopy(W0, ReloadedVar2);
117 | // Fake call to bar.
118 | MIBuilder.buildInstr(TargetOpcode::INLINEASM, {}, {})
119 | .addExternalSymbol("bl @bar")
120 | .addImm(0)
121 | .addReg(W0, RegState::Implicit);
122 | // End of the function, return void;
123 | MIBuilder.buildInstr(TargetOpcode::INLINEASM, {}, {})
124 | .addExternalSymbol("ret")
125 | .addImm(0);
126 | return &MF;
127 | }
128 |
--------------------------------------------------------------------------------
/ch3/machineir/your_turn/populate_function.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
2 | #include "llvm/CodeGen/MachineFrameInfo.h" // For CreateStackObject.
3 | #include "llvm/CodeGen/MachineFunction.h"
4 | #include "llvm/CodeGen/MachineMemOperand.h" // For MachinePointerInfo.
5 | #include "llvm/CodeGen/MachineModuleInfo.h"
6 | #include "llvm/CodeGen/MachineRegisterInfo.h"
7 | #include "llvm/CodeGen/Register.h"
8 | #include "llvm/CodeGen/TargetOpcodes.h" // For INLINEASM.
9 | #include "llvm/CodeGenTypes/LowLevelType.h" // For LLT.
10 | #include "llvm/IR/Function.h"
11 | #include "llvm/IR/InstrTypes.h" // For ICMP_EQ.
12 |
13 | using namespace llvm;
14 |
15 | // The goal of this function is to build a MachineFunction that
16 | // represents the lowering of the following foo, a C function:
17 | // extern int baz();
18 | // extern void bar(int);
19 | // void foo(int a, int b) {
20 | // int var = a + b;
21 | // if (var == 0xFF) {
22 | // bar(var);
23 | // var = baz();
24 | // }
25 | // bar(var);
26 | // }
27 | //
28 | // The proposed ABI is:
29 | // - 32-bit arguments are passed through registers: w0, w1
30 | // - 32-bit returned values are passed through registers: w0, w1
31 | // w0 and w1 are given as argument of this Function.
32 | //
33 | // The local variable named var is expected to live on the stack.
34 | MachineFunction *populateMachineIR(MachineModuleInfo &MMI, Function &Foo,
35 | Register W0, Register W1) {
36 | MachineFunction &MF = MMI.getOrCreateMachineFunction(Foo);
37 |
38 | // The type for bool.
39 | LLT I1 = LLT::scalar(1);
40 | // The type of var.
41 | LLT I32 = LLT::scalar(32);
42 |
43 | // To use to create load and store for var.
44 | MachinePointerInfo PtrInfo;
45 | Align VarStackAlign(4);
46 |
47 | // The type for the address of var.
48 | LLT VarAddrLLT = LLT::pointer(/*AddressSpace=*/0, /*SizeInBits=*/64);
49 |
50 | // The stack slot for var.
51 | int FrameIndex = MF.getFrameInfo().CreateStackObject(32, VarStackAlign,
52 | /*IsSpillSlot=*/false);
53 |
54 | // TODO: Populate MF.
55 | return &MF;
56 | }
57 |
--------------------------------------------------------------------------------
/ch4/implicit_func_scope_change/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH4
6 | LANGUAGES CXX C)
7 |
8 | set(CURR_TARGET
9 | implicit_change_of_scope
10 | )
11 |
12 | # Add the search path for LLVM.
13 | include(../../cmake/utils/set-llvm-install-prefix.cmake)
14 |
15 | set(SRC
16 | "main.cpp"
17 | )
18 |
19 | set(INC
20 | )
21 |
22 | set(CMAKE_CXX_FLAGS -fno-rtti)
23 |
24 | # Now build our tools
25 | add_executable(${CURR_TARGET} ${SRC} ${INC})
26 |
27 | # Find the libraries that correspond to the LLVM components
28 | # that we wish to use
29 | llvm_map_components_to_libnames(llvm_libs core asmparser)
30 |
31 | # Link against LLVM libraries
32 | target_link_libraries(${CURR_TARGET} ${llvm_libs})
33 |
--------------------------------------------------------------------------------
/ch4/implicit_func_scope_change/README.md:
--------------------------------------------------------------------------------
1 | # Implicit change of scope #
2 |
3 | This example illustrates the implicit change of scope, meaning that when you follow the def-use chains of certain values you may jump around different functions.
4 |
5 | This is true unsurprisingly for instance with global variables, but it happens as well with constants.
6 |
7 | To see this in action, configure, build and run this example by following the next steps:
8 | ```bash
9 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild .
10 | ninja -Cbuild
11 | ./build/implicit_change_of_scope
12 | ```
13 |
14 | Running `implicit_change_of_scope` will print some messages when jumping from one function to another.
15 |
16 | Look at the `InputIR` string in `main.cpp` to see if you can rebuild in your head the def-use chains and explain the change of scope from `foo` to`bar`.
17 |
--------------------------------------------------------------------------------
/ch4/implicit_func_scope_change/main.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/AsmParser/Parser.h" // For parseAssemblyString.
2 | #include "llvm/IR/BasicBlock.h"
3 | #include "llvm/IR/Function.h"
4 | #include "llvm/IR/Instruction.h"
5 | #include "llvm/IR/Instructions.h" // For LoadInst.
6 | #include "llvm/IR/LLVMContext.h"
7 | #include "llvm/IR/Module.h"
8 | #include "llvm/IR/Use.h"
9 | #include "llvm/IR/User.h"
10 | #include "llvm/Support/Debug.h" // For errs().
11 | #include "llvm/Support/SourceMgr.h" // For SMDiagnostic.
12 |
13 | using namespace llvm;
14 |
15 | const char *InputIR =
16 | "@global = external global ptr, align 8\n"
17 | "@other_global = local_unnamed_addr global ptr @global, align 8\n"
18 | "\n"
19 | "define signext i8 @foo() {\n"
20 | "bb:\n"
21 | " %i = load ptr, ptr @global, align 8\n"
22 | " %i1 = load i8, ptr %i, align 1\n"
23 | " ret i8 %i1\n"
24 | "}\n"
25 | "\n"
26 | "define signext i8 @bar() {\n"
27 | "bb:\n"
28 | " %i = load ptr, ptr @global, align 8\n"
29 | " %i1 = load i8, ptr %i, align 1\n"
30 | " ret i8 %i1\n"
31 | "}\n";
32 |
33 | int main() {
34 | LLVMContext Context;
35 | SMDiagnostic Err;
36 | std::unique_ptr MyModule = parseAssemblyString(InputIR, Err, Context);
37 | Function *BarFunc = MyModule->getFunction("bar");
38 |
39 | BasicBlock &Entry = *BarFunc->begin();
40 |
41 | auto &BarRes = *cast(Entry.begin());
42 |
43 | Value *Global = BarRes.getOperand(0);
44 | for (User *UserOfGlobal : Global->users()) {
45 | auto *UserInstr = dyn_cast(UserOfGlobal);
46 | if (!UserInstr) {
47 | errs() << "Found a non-instruction use of global: " << *UserOfGlobal
48 | << '\n';
49 | continue;
50 | }
51 | Function *UserFunc = UserInstr->getParent()->getParent();
52 | if (UserFunc != BarFunc)
53 | errs() << "Went from bar to " << UserFunc->getName() << '\n';
54 | }
55 | return 0;
56 | }
57 |
--------------------------------------------------------------------------------
/ch4/simple_cst_propagation/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH4
6 | LANGUAGES CXX C)
7 |
8 | include(../../cmake/utils/set-llvm-install-prefix.cmake)
9 |
10 | set(CURR_TARGET
11 | simple_cst_propagation
12 | )
13 |
14 | set(SRC
15 | "main.cpp"
16 | "solution/populate_function.cpp"
17 | "your_turn/populate_function.cpp"
18 | )
19 |
20 | set(INC
21 | )
22 |
23 | set(CMAKE_CXX_FLAGS -fno-rtti)
24 |
25 | # Now build our tools
26 | add_executable(${CURR_TARGET} ${SRC} ${INC})
27 |
28 | # Find the libraries that correspond to the LLVM components
29 | # that we wish to use
30 | llvm_map_components_to_libnames(llvm_libs support core transformutils)
31 |
32 | # Link against LLVM libraries
33 | target_link_libraries(${CURR_TARGET} ${llvm_libs})
34 |
--------------------------------------------------------------------------------
/ch4/simple_cst_propagation/README.md:
--------------------------------------------------------------------------------
1 | # Build your own simple constant propagation #
2 |
3 | In this exercise you need to populate the code in `your_turn/populate_function.cpp` by following the instruction given in the comments of the `myConstantPropagation` function.
4 |
5 | At any point in the development, you can build and test your code using the commands given below.
6 |
7 | If you get stuck, feel free to look at the reference implementation in `solution/populate_function.cpp`.
8 |
9 | When running the program you will see that it prints which implementation managed to transform the input IR.
10 | Try to beat the reference implementation by supporting more cases than it does!
11 |
12 | ## Configure your build directory ##
13 |
14 | ```bash
15 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild .
16 | ```
17 |
18 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option).
19 |
20 | You must have a version of LLVM installed at `` for this to succeed.
21 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package.
22 |
23 | ## Build ##
24 |
25 | ```bash
26 | ninja -C build
27 | ```
28 |
29 | This builds the default target in the build directory.
30 |
31 | This should produce in the `build` directory a binary named `simple_cst_propagation`.
32 |
33 | ## Run ##
34 |
35 | ```bash
36 | ./build/simple_cst_propagation [input.ll|.bc]
37 | ```
38 |
39 | This will run both the reference implementation and your implementation on `input.ll` if specified or the default input if not.
40 |
41 | The run will apply both implementions to the input and will check whether an optimization happened, and if the resulting IR is correct.
42 |
43 | It will also report which implementation managed to optimize the input IR.
44 |
45 | To see how this is articulated, take a look at `main.cpp`.
46 |
47 | For each function in the input IR, the output will look like this:
48 | ```
49 | Processing function ''
50 | [The input IR for function ]
51 |
52 | ## Reference implementation
53 | [Resulting IR after the reference optimization]
54 |
55 |
56 | ## Your implementation
57 | [Resulting IR after your optimization]
58 |
59 |
60 |
61 |
62 | ######
63 | ```
64 |
65 | The message after each processing will tell you if you did better than the reference implementation, i.e., you transformed the input IR and the reference implementation did not.
66 |
67 | ## Producing an input ##
68 |
69 | Using the following command line, you can produce from a C file an input to give to your program:
70 | ```bash
71 | clang -o - -S -emit-llvm test.c -O0 | sed -e 's#optnone##g' | /bin/opt -S -passes=mem2reg,instnamer > input.ll
72 | ```
73 |
74 | optnone => remove the attribute that prevents optimizations
75 | mem2reg => get rid of stack accesses / build SSA
76 | instnamer => get rid of the implicit variables
77 |
--------------------------------------------------------------------------------
/ch4/simple_cst_propagation/input.c:
--------------------------------------------------------------------------------
1 | int foo(int tst) {
2 | int a = 3 + 2;
3 | a <<= 3;
4 | if (tst)
5 | a /= 5;
6 | else
7 | a |= 0xF0F;
8 | return a;
9 | }
10 |
11 | int bar(int tst) {
12 | unsigned a = 0xFFFFFFFF;
13 | a <<= 3;
14 | if (tst)
15 | a /= 3;
16 | else
17 | a |= 0xF0F;
18 | ++a;
19 | return (int)a;
20 | }
21 |
22 |
--------------------------------------------------------------------------------
/ch4/simple_cst_propagation/main.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/AsmParser/Parser.h" // For parseAssemblyString.
2 | #include "llvm/IR/Function.h"
3 | #include "llvm/IR/LLVMContext.h"
4 | #include "llvm/IR/Module.h"
5 | #include "llvm/IR/Verifier.h"
6 | #include "llvm/IRReader/IRReader.h" // For parseIRFile.
7 | #include "llvm/Support/Debug.h" // For errs().
8 | #include "llvm/Support/SourceMgr.h" // For SMDiagnostic.
9 | #include "llvm/Transforms/Utils/Cloning.h" // For CloneFunction.
10 |
11 | using namespace llvm;
12 |
13 | extern bool myConstantPropagation(llvm::Function &);
14 | extern bool solutionConstantPropagation(llvm::Function &);
15 |
16 | bool checkFunctionCorrectness(llvm::Function &Res) {
17 | Res.print(errs());
18 | // verifyFunction returns true if it finds errors and
19 | // print them on the provided output stream (errs() here).
20 | if (verifyFunction(Res, &errs())) {
21 | errs() << Res.getName() << " does not verify\n";
22 | return false;
23 | }
24 | return true;
25 | }
26 |
27 | // Default input in case no file was provided.
28 | const char *InputIR =
29 | "define i32 @foo(i32 noundef %arg) {\n"
30 | "bb:\n"
31 | " %i = shl i32 5, 3\n"
32 | " %i1 = icmp ne i32 %arg, 0\n"
33 | " br i1 %i1, label %bb2, label %bb4\n"
34 | "\n"
35 | "bb2: ; preds = %bb\n"
36 | " %i3 = sdiv i32 %i, 5\n"
37 | " br label %bb6\n"
38 | "\n"
39 | "bb4: ; preds = %bb\n"
40 | " %i5 = or i32 %i, 3855\n"
41 | " br label %bb6\n"
42 | "\n"
43 | "bb6: ; preds = %bb4, %bb2\n"
44 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n"
45 | " ret i32 %.0\n"
46 | "}\n"
47 | "\n"
48 | "define i32 @bar(i32 noundef %arg) {\n"
49 | "bb:\n"
50 | " %i = shl i32 -1, 3\n"
51 | " %i1 = icmp ne i32 %arg, 0\n"
52 | " br i1 %i1, label %bb2, label %bb4\n"
53 | "\n"
54 | "bb2: ; preds = %bb\n"
55 | " %i3 = udiv i32 %i, 3\n"
56 | " br label %bb6\n"
57 | "\n"
58 | "bb4: ; preds = %bb\n"
59 | " %i5 = or i32 %i, 3855\n"
60 | " br label %bb6\n"
61 | "\n"
62 | "bb6: ; preds = %bb4, %bb2\n"
63 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n"
64 | " %i7 = add i32 %.0, 1\n"
65 | " ret i32 %i7\n"
66 | "}\n";
67 |
68 | int main(int argc, char **argv) {
69 | LLVMContext Context;
70 | SMDiagnostic Err;
71 | std::unique_ptr MyModule;
72 | // To be able to play with the optimization a little bit,
73 | // support a mode where you can feed your own IR files.
74 | if (argc == 2) {
75 | outs() << "Reading module from '" << argv[1] << "'\n";
76 | MyModule = parseIRFile(argv[1], Err, Context);
77 | } else {
78 | MyModule = parseAssemblyString(InputIR, Err, Context);
79 | }
80 | if (!MyModule) {
81 | errs() << "Unable to build module\n";
82 | return -1;
83 | }
84 |
85 | // Put all the functions in a worklist and process them afterwards.
86 | // We do that because we clone the functions on the fly to have the
87 | // same input to both the reference implementation and the user provided
88 | // solution and we don't want to process cloned functions afterwards and
89 | // clone them and so on.
90 | SmallVector Worklist;
91 | for (Function &Func : *MyModule)
92 | Worklist.push_back(&Func);
93 |
94 | // Map for the cloned functions.
95 | ValueToValueMapTy VMap;
96 | bool hadError = false;
97 | for (Function *Func : Worklist) {
98 | outs() << "Processing function '" << Func->getName() << '\n';
99 | Func->print(outs());
100 |
101 | // Clone the function before the optimization to make sure each
102 | // implementation sees the same input.
103 | auto *ClonedFunc = CloneFunction(Func, VMap);
104 |
105 | outs() << "\n\n## Reference implementation\n";
106 | bool solutionDidSomething = solutionConstantPropagation(*ClonedFunc);
107 | bool solutionIsCorrect = checkFunctionCorrectness(*ClonedFunc);
108 |
109 | outs() << "\n\n## Your implementation\n";
110 | bool yourTurnDidSomething = myConstantPropagation(*Func);
111 | bool yourTurnIsCorrect = checkFunctionCorrectness(*Func);
112 |
113 | if (!(solutionIsCorrect && yourTurnIsCorrect)) {
114 | hadError = true;
115 | errs() << "Solution does not verify:\n"
116 | "- provided implementation("
117 | << (solutionIsCorrect ? "passed" : "failed")
118 | << ")\n"
119 | " - your implementation("
120 | << (yourTurnIsCorrect ? "passed" : "failed") << ")\n";
121 | }
122 |
123 | outs() << '\n';
124 | if (solutionDidSomething) {
125 | if (!yourTurnDidSomething)
126 | outs()
127 | << "The solution managed to optimize something but you did not\n";
128 | else
129 | outs() << "Both your solution and the provided solution did something, "
130 | "which one is better?\n";
131 | } else if (yourTurnDidSomething) {
132 | outs() << "Nice! You managed to optimize something whereas the provided "
133 | "solution did not!";
134 | }
135 | outs() << "######\n";
136 | }
137 |
138 | return !hadError;
139 | }
140 |
--------------------------------------------------------------------------------
/ch4/simple_cst_propagation/solution/populate_function.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/ADT/APInt.h"
2 | #include "llvm/ADT/PostOrderIterator.h" // For ReversePostOrderTraversal.
3 | #include "llvm/IR/BasicBlock.h"
4 | #include "llvm/IR/CFG.h" // To instantiate RPOTraversal.
5 | #include "llvm/IR/Constants.h" // For ConstantInt.
6 | #include "llvm/IR/Function.h"
7 | #include "llvm/IR/InstrTypes.h" // For BinaryOperator, etc.
8 | #include "llvm/IR/Instruction.h"
9 | #include "llvm/IR/LLVMContext.h"
10 | #include "llvm/IR/Module.h"
11 | #include "llvm/Support/Debug.h" // For errs().
12 |
13 | #include
14 |
15 | using namespace llvm;
16 |
17 | // Helper function to deal with binary instructions.
18 | static Value *visitBinary(Instruction &Instr, LLVMContext &Ctxt,
19 | std::optional (*Computation)(const APInt &,
20 | const APInt &)) {
21 | assert(isa(Instr) && "This is meant for binary instruction");
22 |
23 | auto *LHS = dyn_cast(Instr.getOperand(0));
24 | auto *RHS = dyn_cast(Instr.getOperand(1));
25 | if (!LHS || !RHS)
26 | return nullptr;
27 |
28 | // FIXME: Technically this API is not precise enough because we may want to
29 | // produce poison value for e.g., a division by zero.
30 | std::optional Res = Computation(LHS->getValue(), RHS->getValue());
31 | if (!Res.has_value())
32 | return nullptr;
33 | auto NewConstant = ConstantInt::get(Ctxt, *Res);
34 | return NewConstant;
35 | }
36 |
37 | // Takes \p Foo and apply a simple constant propagation optimization.
38 | // \returns true if \p Foo was modified (i.e., something had been constant
39 | // propagated), false otherwise.
40 | bool solutionConstantPropagation(Function &Foo) {
41 | // RPO construction chokes on empty functions instead of producing an empty
42 | // traversal.
43 | // Just skip such entry.
44 | if (Foo.empty())
45 | return false;
46 |
47 | LLVMContext &Ctxt = Foo.getParent()->getContext();
48 | bool MadeChanges = false;
49 |
50 | ReversePostOrderTraversal RPOT(&Foo);
51 | for (BasicBlock *BB : RPOT) {
52 | // Early increment to be able to remove the instruction that we replaced
53 | // on-the-fly. The alternative is to accumulate the instructions to remove
54 | // in a worklist and delete them afterwards.
55 | for (Instruction &Instr : make_early_inc_range(*BB)) {
56 | Value *NewConstant = nullptr;
57 | switch (Instr.getOpcode()) {
58 | case Instruction::Add:
59 | NewConstant = visitBinary(
60 | Instr, Ctxt,
61 | [](const APInt &A, const APInt &B) -> std::optional {
62 | return A + B;
63 | });
64 | break;
65 | case Instruction::Sub:
66 | NewConstant = visitBinary(
67 | Instr, Ctxt,
68 | [](const APInt &A, const APInt &B) -> std::optional {
69 | return A - B;
70 | });
71 | break;
72 | case Instruction::Mul:
73 | NewConstant = visitBinary(
74 | Instr, Ctxt,
75 | [](const APInt &A, const APInt &B) -> std::optional {
76 | return A * B;
77 | });
78 | break;
79 | case Instruction::SDiv:
80 | NewConstant = visitBinary(
81 | Instr, Ctxt,
82 | [](const APInt &A, const APInt &B) -> std::optional {
83 | if (B.isZero())
84 | return std::nullopt;
85 | return A.sdiv(B);
86 | });
87 | break;
88 | case Instruction::UDiv:
89 | NewConstant = visitBinary(
90 | Instr, Ctxt,
91 | [](const APInt &A, const APInt &B) -> std::optional {
92 | if (B.isZero())
93 | return std::nullopt;
94 | return A.udiv(B);
95 | });
96 | break;
97 | case Instruction::Shl:
98 | NewConstant = visitBinary(
99 | Instr, Ctxt,
100 | [](const APInt &A, const APInt &B) -> std::optional {
101 | return A.shl(B);
102 | });
103 | break;
104 | case Instruction::LShr:
105 | NewConstant = visitBinary(
106 | Instr, Ctxt,
107 | [](const APInt &A, const APInt &B) -> std::optional {
108 | return A.lshr(B);
109 | });
110 | break;
111 | case Instruction::AShr:
112 | NewConstant = visitBinary(
113 | Instr, Ctxt,
114 | [](const APInt &A, const APInt &B) -> std::optional {
115 | return A.ashr(B);
116 | });
117 | break;
118 | case Instruction::And:
119 | NewConstant = visitBinary(
120 | Instr, Ctxt,
121 | [](const APInt &A, const APInt &B) -> std::optional {
122 | return A & B;
123 | });
124 | break;
125 | case Instruction::Or:
126 | NewConstant = visitBinary(
127 | Instr, Ctxt,
128 | [](const APInt &A, const APInt &B) -> std::optional {
129 | return A | B;
130 | });
131 | break;
132 | case Instruction::Xor:
133 | NewConstant = visitBinary(
134 | Instr, Ctxt,
135 | [](const APInt &A, const APInt &B) -> std::optional {
136 | return A ^ B;
137 | });
138 | break;
139 |
140 | default:
141 | break;
142 | }
143 | if (NewConstant) {
144 | Instr.replaceAllUsesWith(NewConstant);
145 | Instr.eraseFromParent();
146 | MadeChanges = true;
147 | }
148 | }
149 | }
150 | return MadeChanges;
151 | }
152 |
--------------------------------------------------------------------------------
/ch4/simple_cst_propagation/your_turn/populate_function.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
2 | #include "llvm/CodeGen/MachineFrameInfo.h" // For CreateStackObject.
3 | #include "llvm/CodeGen/MachineFunction.h"
4 | #include "llvm/CodeGen/MachineMemOperand.h" // For MachinePointerInfo.
5 | #include "llvm/CodeGen/MachineModuleInfo.h"
6 | #include "llvm/CodeGen/MachineRegisterInfo.h"
7 | #include "llvm/CodeGen/Register.h"
8 | #include "llvm/CodeGen/TargetOpcodes.h" // For INLINEASM.
9 | #include "llvm/CodeGenTypes/LowLevelType.h" // For LLT.
10 | #include "llvm/IR/Function.h"
11 | #include "llvm/IR/InstrTypes.h" // For ICMP_EQ.
12 |
13 | using namespace llvm;
14 |
15 | // Takes \p Foo and apply a simple constant propagation optimization.
16 | // \returns true if \p Foo was modified (i.e., something had been constant
17 | // propagated), false otherwise.
18 | bool myConstantPropagation(Function &Foo) {
19 | // TODO: populate this function.
20 | return false;
21 | }
22 |
--------------------------------------------------------------------------------
/ch5/your_first_pass/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH5
6 | LANGUAGES CXX C)
7 |
8 | include(../../cmake/utils/set-llvm-install-prefix.cmake)
9 |
10 | set(CURR_TARGET
11 | your_first_pass
12 | )
13 |
14 | set(SRC
15 | "main.cpp"
16 | # We use the skeleton of the optimization we did in ch4 to create
17 | # our pass.
18 | "../../ch4/simple_cst_propagation/solution/populate_function.cpp"
19 | "solution/passWithNewPM.cpp"
20 | "solution/passWithLegacyPM.cpp"
21 | "your_turn/passWithNewPM.cpp"
22 | "your_turn/passWithLegacyPM.cpp"
23 | )
24 |
25 | set(INC
26 | )
27 |
28 | set(CMAKE_CXX_FLAGS -fno-rtti)
29 |
30 | # Now build our tools
31 | add_executable(${CURR_TARGET} ${SRC} ${INC})
32 |
33 | # Find the libraries that correspond to the LLVM components
34 | # that we wish to use
35 | llvm_map_components_to_libnames(llvm_libs support core transformutils)
36 |
37 | # Link against LLVM libraries
38 | target_link_libraries(${CURR_TARGET} ${llvm_libs})
39 |
--------------------------------------------------------------------------------
/ch5/your_first_pass/README.md:
--------------------------------------------------------------------------------
1 | # Connect your first optimization pass #
2 |
3 | In this exercise you will learn to connect the optimization that we provided in Chapter 4 in both the legacy and the new pass manager.
4 |
5 | Go to `your_turn/passWithLegacyPM.cpp` and `your_turn/passWithNewPM.cpp` to implement the connection with the legacy pass manager and the new pass manager respectively.
6 |
7 | Then, update `your_turn/passWithNewPM.h` and set the macro `YOUR_TURN_IS_READY` to `1` to enable the processing of your optimizations.
8 |
9 | We put `TODO` comments in the places you have to update to materialize the connection.
10 |
11 | For the body of your optimization just call the provided `solutionConstantPropagation` function, which is the implementation of the simple constant propagation that we saw in Chapter 4.
12 |
13 | ## Configuring your build environment ##
14 |
15 | ```bash
16 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild .
17 | ```
18 |
19 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option).
20 |
21 | You must have a version of LLVM installed at `` for this to succeed.
22 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package.
23 |
24 | ## Build ##
25 |
26 | ```bash
27 | ninja -C build
28 | ```
29 |
30 | This builds the default target in the build directory.
31 |
32 | This should produce in the `build` directory a binary named `your_first_pass`.
33 |
34 | ## Run ##
35 |
36 | ```bash
37 | ./build/your_first_pass [input.ll|.bc]
38 | ```
39 |
40 | This will run both the reference implementation and your implementation on `input.ll` if specified or the default input if not.
41 |
42 | The run will apply both implementions to the input and will check whether the resulting IR is still correct.
43 |
44 | To see how this is articulated, take a look at `main.cpp`.
45 |
46 | ## Solution ##
47 |
48 | Look into the `solution` directory to see how to implement the connection of your optimization with both pass manager.
49 |
50 | ## Producing an input ##
51 |
52 | Using the following command line, you can produce from a C file an input to give to your program:
53 | ```bash
54 | clang -o - -S -emit-llvm test.c -O0 | sed -e 's#optnone##g' | /bin/opt -S -passes=mem2reg,instnamer > input.ll
55 | ```
56 |
57 | optnone => remove the attribute that prevents optimizations
58 | mem2reg => get rid of stack accesses / build SSA
59 | instnamer => get rid of the implicit variables
60 |
--------------------------------------------------------------------------------
/ch5/your_first_pass/main.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/ADT/STLExtras.h" // For llvm::all_of.
2 | #include "llvm/AsmParser/Parser.h" // For parseAssemblyString.
3 | #include "llvm/IR/Function.h"
4 | #include "llvm/IR/LLVMContext.h"
5 | #include "llvm/IR/LegacyPassManager.h" // For legacy::PassManager.
6 | #include "llvm/IR/Module.h"
7 | #include "llvm/IR/PassInstrumentation.h"
8 | #include "llvm/IR/PassManager.h" // For the new PassManager.
9 | #include "llvm/IR/Verifier.h"
10 | #include "llvm/IRReader/IRReader.h" // For parseIRFile.
11 | #include "llvm/Support/Debug.h" // For errs().
12 | #include "llvm/Support/SourceMgr.h" // For SMDiagnostic.
13 | #include "llvm/Transforms/Utils/Cloning.h" // For CloneModule.
14 |
15 | #include "solution/passWithNewPM.h"
16 | #include "your_turn/passWithNewPM.h"
17 |
18 | using namespace llvm;
19 |
20 | extern Pass *createSolutionPassForLegacyPM();
21 | extern Pass *createYourTurnPassForLegacyPM();
22 |
23 | bool checkFunctionCorrectness(const llvm::Function &Res) {
24 | Res.print(errs());
25 | // verifyFunction returns true if it finds errors and
26 | // print them on the provided output stream (errs() here).
27 | if (verifyFunction(Res, &errs())) {
28 | errs() << Res.getName() << " does not verify\n";
29 | return false;
30 | }
31 | return true;
32 | }
33 |
34 | // Default input in case no file was provided.
35 | const char *InputIR =
36 | "define i32 @foo(i32 noundef %arg) {\n"
37 | "bb:\n"
38 | " %i = shl i32 5, 3\n"
39 | " %i1 = icmp ne i32 %arg, 0\n"
40 | " br i1 %i1, label %bb2, label %bb4\n"
41 | "\n"
42 | "bb2: ; preds = %bb\n"
43 | " %i3 = sdiv i32 %i, 5\n"
44 | " br label %bb6\n"
45 | "\n"
46 | "bb4: ; preds = %bb\n"
47 | " %i5 = or i32 %i, 3855\n"
48 | " br label %bb6\n"
49 | "\n"
50 | "bb6: ; preds = %bb4, %bb2\n"
51 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n"
52 | " ret i32 %.0\n"
53 | "}\n"
54 | "\n"
55 | "define i32 @bar(i32 noundef %arg) {\n"
56 | "bb:\n"
57 | " %i = shl i32 -1, 3\n"
58 | " %i1 = icmp ne i32 %arg, 0\n"
59 | " br i1 %i1, label %bb2, label %bb4\n"
60 | "\n"
61 | "bb2: ; preds = %bb\n"
62 | " %i3 = udiv i32 %i, 3\n"
63 | " br label %bb6\n"
64 | "\n"
65 | "bb4: ; preds = %bb\n"
66 | " %i5 = or i32 %i, 3855\n"
67 | " br label %bb6\n"
68 | "\n"
69 | "bb6: ; preds = %bb4, %bb2\n"
70 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n"
71 | " %i7 = add i32 %.0, 1\n"
72 | " ret i32 %i7\n"
73 | "}\n";
74 |
75 | int main(int argc, char **argv) {
76 | LLVMContext Context;
77 | SMDiagnostic Err;
78 | std::unique_ptr MyModule;
79 | // To be able to play with the optimization a little bit,
80 | // support a mode where you can feed your own IR files.
81 | if (argc == 2) {
82 | outs() << "Reading module from '" << argv[1] << "'\n";
83 | MyModule = parseIRFile(argv[1], Err, Context);
84 | } else {
85 | MyModule = parseAssemblyString(InputIR, Err, Context);
86 | }
87 | if (!MyModule) {
88 | errs() << "Unable to build module\n";
89 | return -1;
90 | }
91 |
92 | bool hadError = false;
93 | for (int i = 0; i < 2; ++i) {
94 | outs() << "Checking the run for "
95 | << ((i == 0) ? "Provided solution" : "Your turn") << "\n";
96 | // Start with the legacy pass manager.
97 | std::unique_ptr ModuleForLegacyPM = CloneModule(*MyModule);
98 |
99 | legacy::PassManager LegacyPM;
100 |
101 | Pass *PassForLegacyPM = (i == 0) ? createSolutionPassForLegacyPM()
102 | : createYourTurnPassForLegacyPM();
103 | if (PassForLegacyPM)
104 | LegacyPM.add(PassForLegacyPM);
105 |
106 | LegacyPM.run(*ModuleForLegacyPM);
107 | bool solutionIsCorrect =
108 | all_of(ModuleForLegacyPM->functions(),
109 | [](const Function &F) { return checkFunctionCorrectness(F); });
110 | hadError |= !solutionIsCorrect;
111 |
112 | // Then do the same thing with the new pass manager.
113 | std::unique_ptr ModuleForNewPM = CloneModule(*MyModule);
114 |
115 | // CAREFUL the order of the manager is important here since the destructor
116 | // needs to be called in the right order otherwise it will crash.
117 | FunctionAnalysisManager FAM;
118 | ModuleAnalysisManager MAM;
119 | // Register the passes used implicitly at the start of the pipeline.
120 | MAM.registerPass([&] { return PassInstrumentationAnalysis(); });
121 | // Well this one is not as implicit, we ask for it with the call to the
122 | // adaptor below.
123 | MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); });
124 | FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); });
125 | ModulePassManager NewPM;
126 | // Wrap the function pass into a module pass.
127 | if (i == 0)
128 | NewPM.addPass(createModuleToFunctionPassAdaptor(
129 | SolutionConstantPropagationNewPass()));
130 | #if YOUR_TURN_IS_READY
131 | else
132 | NewPM.addPass(createModuleToFunctionPassAdaptor(
133 | YourTurnConstantPropagationNewPass()));
134 | #endif
135 | NewPM.run(*ModuleForNewPM, MAM);
136 |
137 | solutionIsCorrect =
138 | all_of(ModuleForNewPM->functions(),
139 | [](const Function &F) { return checkFunctionCorrectness(F); });
140 | hadError |= !solutionIsCorrect;
141 | }
142 | return !hadError;
143 | }
144 |
--------------------------------------------------------------------------------
/ch5/your_first_pass/solution/passWithLegacyPM.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/IR/Function.h"
2 | #include "llvm/Pass.h" // For FunctionPass & INITIALIZE_PASS.
3 | #include "llvm/Support/Debug.h" // For errs().
4 |
5 | using namespace llvm;
6 |
7 | extern bool solutionConstantPropagation(llvm::Function &);
8 |
9 | // The implementation of this function is generated at the end of this file. See
10 | // INITIALIZE_PASS.
11 | namespace llvm {
12 | void initializeSolutionConstantPropagationPass(PassRegistry &);
13 | };
14 |
15 | namespace {
16 | // The scope we want to modify is a Function.
17 | class SolutionConstantPropagation : public FunctionPass {
18 | public:
19 | // Used for the pass registry.
20 | static char ID;
21 |
22 | SolutionConstantPropagation() : FunctionPass(ID) {
23 | // Technically we do not need to put this in the constructor.
24 | // Usually this call lives in the uber InitializeAllXXX.
25 | initializeSolutionConstantPropagationPass(*PassRegistry::getPassRegistry());
26 | }
27 |
28 | // Main function of a FunctionPass.
29 | // Returns true if any change was made to F.
30 | bool runOnFunction(Function &F) override {
31 | errs() << "Solution Legacy called on " << F.getName() << '\n';
32 | return solutionConstantPropagation(F);
33 | }
34 | };
35 | } // End anonymous namespace.
36 |
37 | // Anchor for this pass' ID.
38 | char SolutionConstantPropagation::ID = 0;
39 |
40 | // Initialize function used for the pass registration.
41 | // This hooks up the command line option and gives general information
42 | // about the pass' properties.
43 | // This macro generates a llvm::initialize##passImplementationName##Pass
44 | // function.
45 | INITIALIZE_PASS(/*passImplementationName=*/SolutionConstantPropagation,
46 | /*commandLineArgName=*/"legacy-solution",
47 | /*name=*/"Legacy Solution", /*isCFGOnly=*/false,
48 | /*isAnalysis=*/false);
49 |
50 | Pass *createSolutionPassForLegacyPM() {
51 | return new SolutionConstantPropagation();
52 | }
53 |
--------------------------------------------------------------------------------
/ch5/your_first_pass/solution/passWithNewPM.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/IR/Function.h"
2 | #include "llvm/Support/Debug.h" // For errs().
3 |
4 | #include "passWithNewPM.h"
5 |
6 | using namespace llvm;
7 |
8 | extern bool solutionConstantPropagation(llvm::Function &);
9 |
10 | PreservedAnalyses
11 | SolutionConstantPropagationNewPass::run(Function &F,
12 | FunctionAnalysisManager &AM) {
13 | errs() << "Solution New PM on " << F.getName() << "\n";
14 | bool MadeChanges = solutionConstantPropagation(F);
15 | if (!MadeChanges)
16 | return PreservedAnalyses::all();
17 | // Even if we made changes, we didn't touched the CFG.
18 | // So everything on that is still current.
19 | PreservedAnalyses PA;
20 | PA.preserveSet();
21 | return PA;
22 | }
23 |
--------------------------------------------------------------------------------
/ch5/your_first_pass/solution/passWithNewPM.h:
--------------------------------------------------------------------------------
1 | #ifndef SOLUTION_PASSWITHNEWPM_H
2 | #define SOLUTION_PASSWITHNEWPM_H
3 | #include "llvm/IR/PassManager.h" // For PassInfoMixin.
4 |
5 | namespace llvm {
6 | class Function;
7 | };
8 |
9 | class SolutionConstantPropagationNewPass
10 | : public llvm::PassInfoMixin {
11 | public:
12 | llvm::PreservedAnalyses run(llvm::Function &F,
13 | llvm::FunctionAnalysisManager &AM);
14 | };
15 | #endif
16 |
--------------------------------------------------------------------------------
/ch5/your_first_pass/your_turn/passWithLegacyPM.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/IR/Function.h"
2 | #include "llvm/Pass.h" // For FunctionPass & INITIALIZE_PASS.
3 | #include "llvm/Support/Debug.h" // For errs().
4 |
5 | using namespace llvm;
6 |
7 | extern bool solutionConstantPropagation(llvm::Function &);
8 |
9 | // The implementation of this function is generated at the end of this file. See
10 | // INITIALIZE_PASS.
11 | namespace llvm {
12 | void initializeYourTurnConstantPropagationPass(PassRegistry &);
13 | };
14 |
15 | namespace {
16 | class YourTurnConstantPropagation /* TODO: Fill in the blanks */ {
17 | public:
18 | YourTurnConstantPropagation() /* TODO: Fill in the blanks */ {}
19 |
20 | // TODO: Fill in the blanks.
21 | };
22 | } // End anonymous namespace.
23 |
24 | // TODO: Remove and add proper implementation
25 | void llvm::initializeYourTurnConstantPropagationPass(PassRegistry &) {}
26 |
27 | Pass *createYourTurnPassForLegacyPM() {
28 | return nullptr; // TODO: Fill in the blanks.
29 | }
30 |
--------------------------------------------------------------------------------
/ch5/your_first_pass/your_turn/passWithNewPM.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/IR/Function.h"
2 | #include "llvm/Support/Debug.h" // For errs().
3 |
4 | #include "passWithNewPM.h"
5 |
6 | using namespace llvm;
7 |
8 | extern bool solutionConstantPropagation(llvm::Function &);
9 |
10 | // TODO: Fill in the blanks.
11 |
--------------------------------------------------------------------------------
/ch5/your_first_pass/your_turn/passWithNewPM.h:
--------------------------------------------------------------------------------
1 | #ifndef YOURTURN_PASSWITHNEWPM_H
2 | #define YOURTURN_PASSWITHNEWPM_H
3 | #include "llvm/IR/PassManager.h" // For PassInfoMixin.
4 |
5 | namespace llvm {
6 | class Function;
7 | };
8 |
9 | class YourTurnConstantPropagationNewPass /* TODO: Fill in the blanks*/ {
10 | public:
11 | };
12 |
13 | /* TODO: Change this into 1 when you're ready to try */
14 | #define YOUR_TURN_IS_READY 0
15 | #endif
16 |
--------------------------------------------------------------------------------
/ch5/your_first_pipeline/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH5
6 | LANGUAGES CXX C)
7 |
8 | include(../../cmake/utils/set-llvm-install-prefix.cmake)
9 |
10 | set(CURR_TARGET
11 | your_first_pipeline
12 | )
13 |
14 | set(SRC
15 | "main.cpp"
16 | "solution/passPipelineWithNewPM.cpp"
17 | "solution/passPipelineWithLegacyPM.cpp"
18 | "your_turn/passPipelineWithNewPM.cpp"
19 | "your_turn/passPipelineWithLegacyPM.cpp"
20 | )
21 |
22 | set(INC
23 | )
24 |
25 | set(CMAKE_CXX_FLAGS -fno-rtti)
26 |
27 | # Now build our tools
28 | add_executable(${CURR_TARGET} ${SRC} ${INC})
29 |
30 | # Find the libraries that correspond to the LLVM components
31 | # that we wish to use
32 | llvm_map_components_to_libnames(llvm_libs support core transformutils passes ipo)
33 |
34 | # Link against LLVM libraries
35 | target_link_libraries(${CURR_TARGET} ${llvm_libs})
36 |
--------------------------------------------------------------------------------
/ch5/your_first_pipeline/README.md:
--------------------------------------------------------------------------------
1 | # Build your first pass pipeline #
2 |
3 | In this exercise you will practice how to create your first pass pipeline with both the legacy and the new pass manager.
4 |
5 | Go to `yourTurn/passPipelineWithLegacyPM.cpp` and `your_turn/passPipelineWithNewPM.cpp` to implement the pass pipeline for the legacy pass manager and the new pass manager respectively.
6 |
7 | For this exercise, you will need to implement a pass pipeline that consists of the three following optimizations, in this order:
8 | - The mem2reg pass (called PromoteMemoryToRegister)
9 | - The instcombine pass
10 | - The always inliner pass
11 |
12 | ## Configuring your build environment ##
13 |
14 | ```bash
15 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild .
16 | ```
17 |
18 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option).
19 |
20 | You must have a version of LLVM installed at `` for this to succeed.
21 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package.
22 |
23 | ## Build ##
24 |
25 | ```bash
26 | ninja -C build
27 | ```
28 |
29 | This builds the default target in the build directory.
30 |
31 | This should produce in the `build` directory a binary named `your_first_pipeline`.
32 |
33 | ## Run ##
34 |
35 | ```bash
36 | ./build/your_first_pass [input.ll|.bc]
37 | ```
38 |
39 | This will run both the reference implementation and your implementation on `input.ll` if specified or the default input if not.
40 |
41 | Check that the output look similar for both your and the reference pipelines.
42 |
43 | ## Solution ##
44 |
45 | Look into the `solution` directory to see how to implement the connection of your optimization with both pass manager.
46 |
47 | ## Producing an input ##
48 |
49 | Using the following command line, you can produce from a C file an input to give to your program:
50 | ```bash
51 | clang -o - -S -emit-llvm test.c -O0 | sed -e 's#optnone##g' | /bin/opt -S -passes=mem2reg,instnamer > input.ll
52 | ```
53 |
54 | optnone => remove the attribute that prevents optimizations
55 | mem2reg => get rid of stack accesses / build SSA
56 | instnamer => get rid of the implicit variables
57 |
--------------------------------------------------------------------------------
/ch5/your_first_pipeline/main.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/ADT/STLExtras.h" // For llvm::all_of.
2 | #include "llvm/ADT/STLFunctionalExtras.h" // For llvm::function_ref.
3 | #include "llvm/AsmParser/Parser.h" // For parseAssemblyString.
4 | #include "llvm/IR/Function.h"
5 | #include "llvm/IR/LLVMContext.h"
6 | #include "llvm/IR/LegacyPassManager.h" // For legacy::PassManager.
7 | #include "llvm/IR/Module.h"
8 | #include "llvm/IR/PassManager.h" // For the new PassManager.
9 | #include "llvm/IR/PassTimingInfo.h" // For reportAndResetTimings.
10 | #include "llvm/IR/Verifier.h"
11 | #include "llvm/IRReader/IRReader.h" // For parseIRFile.
12 | #include "llvm/Support/CommandLine.h" // For cl::.
13 | #include "llvm/Support/Debug.h" // For errs().
14 | #include "llvm/Support/SourceMgr.h" // For SMDiagnostic.
15 | #include "llvm/Transforms/Utils/Cloning.h" // For CloneModule.
16 |
17 | using namespace llvm;
18 |
19 | extern void runSolutionPassPipelineForLegacyPM(Module &);
20 | extern void runSolutionPassPipelineForNewPM(Module &);
21 | extern void runYourTurnPassPipelineForLegacyPM(Module &);
22 | extern void runYourTurnPassPipelineForNewPM(Module &);
23 |
24 | bool checkFunctionCorrectness(const llvm::Function &Res) {
25 | // verifyFunction returns true if it finds errors and
26 | // print them on the provided output stream (errs() here).
27 | if (verifyFunction(Res, &errs())) {
28 | errs() << Res.getName() << " does not verify\n";
29 | return false;
30 | }
31 | return true;
32 | }
33 |
34 | // Default input in case no file was provided.
35 | const char *InputIR =
36 | "define i32 @foo(i32 noundef %arg) {\n"
37 | "bb:\n"
38 | " %i = shl i32 5, 3\n"
39 | " %i1 = icmp ne i32 %arg, 0\n"
40 | " br i1 %i1, label %bb2, label %bb4\n"
41 | "\n"
42 | "bb2: ; preds = %bb\n"
43 | " %i3 = sdiv i32 %i, 5\n"
44 | " br label %bb6\n"
45 | "\n"
46 | "bb4: ; preds = %bb\n"
47 | " %i5 = or i32 %i, 3855\n"
48 | " br label %bb6\n"
49 | "\n"
50 | "bb6: ; preds = %bb4, %bb2\n"
51 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n"
52 | " ret i32 %.0\n"
53 | "}\n"
54 | "\n"
55 | "define i32 @bar(i32 noundef %arg) {\n"
56 | "bb:\n"
57 | " %i = shl i32 -1, 3\n"
58 | " %i1 = icmp ne i32 %arg, 0\n"
59 | " br i1 %i1, label %bb2, label %bb4\n"
60 | "\n"
61 | "bb2: ; preds = %bb\n"
62 | " %i3 = udiv i32 %i, 3\n"
63 | " br label %bb6\n"
64 | "\n"
65 | "bb4: ; preds = %bb\n"
66 | " %i5 = or i32 %i, 3855\n"
67 | " br label %bb6\n"
68 | "\n"
69 | "bb6: ; preds = %bb4, %bb2\n"
70 | " %.0 = phi i32 [ %i3, %bb2 ], [ %i5, %bb4 ]\n"
71 | " %i7 = add i32 %.0, 1\n"
72 | " ret i32 %i7\n"
73 | "}\n";
74 |
75 | static cl::opt InputFilename(cl::Positional,
76 | cl::desc(" "));
77 |
78 | int main(int argc, char **argv) {
79 | LLVMContext Context;
80 | SMDiagnostic Err;
81 | std::unique_ptr MyModule;
82 |
83 | cl::ParseCommandLineOptions(argc, argv, "CH5 pipeline example\n");
84 |
85 | // To be able to play with the optimization a little bit,
86 | // support a mode where you can feed your own IR files.
87 | if (!InputFilename.empty()) {
88 | outs() << "Reading module from '" << InputFilename << "'\n";
89 | MyModule = parseIRFile(InputFilename, Err, Context);
90 | } else {
91 | MyModule = parseAssemblyString(InputIR, Err, Context);
92 | }
93 | if (!MyModule) {
94 | errs() << "Unable to build module\n";
95 | return -1;
96 | }
97 |
98 | function_ref functions[] = {
99 | runSolutionPassPipelineForLegacyPM, runSolutionPassPipelineForNewPM,
100 | runYourTurnPassPipelineForLegacyPM, runYourTurnPassPipelineForNewPM};
101 |
102 | bool hadError = false;
103 | for (int i = 0; i < 4; ++i) {
104 | outs() << "Checking the run for "
105 | << ((i < 2) ? "Provided solution" : "Your turn");
106 | outs() << " on the " << ((i & 1) ? "new" : "legacy") << " pass manager\n";
107 | // Start with the legacy pass manager.
108 | std::unique_ptr ModuleForLegacyPM = CloneModule(*MyModule);
109 | functions[i](*ModuleForLegacyPM);
110 |
111 | reportAndResetTimings(&outs());
112 |
113 | bool solutionIsCorrect =
114 | all_of(ModuleForLegacyPM->functions(),
115 | [](const Function &F) { return checkFunctionCorrectness(F); });
116 | hadError |= !solutionIsCorrect;
117 | }
118 | return !hadError;
119 | }
120 |
--------------------------------------------------------------------------------
/ch5/your_first_pipeline/solution/passPipelineWithLegacyPM.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/IR/LegacyPassManager.h" // For legacy::PassManager.
2 | #include "llvm/Transforms/IPO/AlwaysInliner.h" // For inliner pass.
3 | #include "llvm/Transforms/InstCombine/InstCombine.h" // For instcombine pass.
4 | #include "llvm/Transforms/Utils.h" // For mem2reg pass.
5 |
6 | using namespace llvm;
7 |
8 | void runSolutionPassPipelineForLegacyPM(Module &MyModule) {
9 | legacy::PassManager LegacyPM;
10 |
11 | LegacyPM.add(createPromoteMemoryToRegisterPass());
12 | LegacyPM.add(createInstructionCombiningPass());
13 | LegacyPM.add(createAlwaysInlinerLegacyPass());
14 |
15 | LegacyPM.run(MyModule);
16 | }
--------------------------------------------------------------------------------
/ch5/your_first_pipeline/solution/passPipelineWithNewPM.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/IR/Module.h"
2 | #include "llvm/IR/PassManager.h" // For the new PassManager.
3 | #include "llvm/Passes/PassBuilder.h"
4 | #include "llvm/Passes/StandardInstrumentations.h"
5 | #include "llvm/Transforms/IPO/AlwaysInliner.h" // For inliner pass.
6 | #include "llvm/Transforms/InstCombine/InstCombine.h" // For instcombine pass.
7 | #include "llvm/Transforms/Utils/Mem2Reg.h" // For mem2reg pass.
8 |
9 | using namespace llvm;
10 |
11 | void runSolutionPassPipelineForNewPM(Module &MyModule) {
12 | LLVMContext &Context = MyModule.getContext();
13 | // CAREFUL the order of the manager is important here since the destructor
14 | // needs to be called in the right order otherwise it will crash.
15 | FunctionAnalysisManager FAM;
16 | ModuleAnalysisManager MAM;
17 |
18 | PassInstrumentationCallbacks PIC;
19 | PrintPassOptions PrintPassOpts;
20 | PrintPassOpts.Verbose = true;
21 | PrintPassOpts.SkipAnalyses = false;
22 | PrintPassOpts.Indent = true;
23 | StandardInstrumentations SI(Context, /*DebugLogging=*/true,
24 | /*VerifyEachPass=*/false, PrintPassOpts);
25 | SI.registerCallbacks(PIC, &MAM);
26 |
27 | // Register the passes used implicitly at the start of the pipeline.
28 | // And enable logging.
29 | MAM.registerPass([&] { return PassInstrumentationAnalysis(&PIC); });
30 | FAM.registerPass([&] { return PassInstrumentationAnalysis(&PIC); });
31 | // Well this one is not as implicit, we ask for it with the call to the
32 | // adaptor below.
33 | MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); });
34 | FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); });
35 | ModulePassManager NewPM;
36 | FunctionPassManager FPMgr;
37 |
38 | // Populate the XXXAnalysisManager with the IDs for all the passes.
39 | PassBuilder PB;
40 | PB.registerFunctionAnalyses(FAM);
41 | PB.registerModuleAnalyses(MAM);
42 |
43 | // Wrap the function pass into a module pass.
44 | #ifndef USE_MODULE_MGR
45 | FPMgr.addPass(PromotePass());
46 | FPMgr.addPass(InstCombinePass());
47 | #else
48 | NewPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
49 | NewPM.addPass(createModuleToFunctionPassAdaptor(InstCombinePass()));
50 | #endif
51 | NewPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPMgr)));
52 | NewPM.addPass(AlwaysInlinerPass());
53 | NewPM.run(MyModule, MAM);
54 | }
55 |
--------------------------------------------------------------------------------
/ch5/your_first_pipeline/your_turn/passPipelineWithLegacyPM.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/IR/LegacyPassManager.h" // For legacy::PassManager.
2 |
3 | using namespace llvm;
4 |
5 | void runYourTurnPassPipelineForLegacyPM(Module &MyModule) {}
6 |
--------------------------------------------------------------------------------
/ch5/your_first_pipeline/your_turn/passPipelineWithNewPM.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/IR/Module.h"
2 | #include "llvm/IR/PassManager.h" // For the new PassManager.
3 |
4 | using namespace llvm;
5 |
6 | void runYourTurnPassPipelineForNewPM(Module &MyModule) {}
7 |
--------------------------------------------------------------------------------
/ch6/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH6
6 | LANGUAGES CXX C)
7 |
8 | include(../cmake/utils/set-llvm-install-prefix.cmake)
9 |
10 | # Hook up the TableGen tooling.
11 | set(LLVM_TABLEGEN_EXE ${LLVM_TOOLS_BINARY_DIR}/llvm-tblgen)
12 | include(${LLVM_CMAKE_DIR}/AddLLVM.cmake)
13 | include(${LLVM_CMAKE_DIR}/TableGen.cmake)
14 |
15 | # Create the different TableGen outputs.
16 | set(LLVM_TARGET_DEFINITIONS my-first-gisel.td)
17 | tablegen(LLVM GlobalISel.inc -gen-global-isel)
18 |
19 | set(LLVM_TARGET_DEFINITIONS Person.td)
20 | tablegen(LLVM Person.inc -print-records)
21 |
22 | set(LLVM_TARGET_DEFINITIONS multiclass.td)
23 | tablegen(LLVM multiclass.inc -print-records)
24 |
25 | set(LLVM_TARGET_DEFINITIONS multiclass-with-def-type.td)
26 | tablegen(LLVM multiclass-with-def-type.inc -print-records)
27 |
28 | # Register a target for all the TableGen outputs.
29 | add_public_tablegen_target(CommonTableGen)
30 |
31 | # Make that target part of the `all` target.
32 | set_target_properties(CommonTableGen PROPERTIES EXCLUDE_FROM_ALL 0)
33 |
--------------------------------------------------------------------------------
/ch6/README.md:
--------------------------------------------------------------------------------
1 | # Getting started with TableGen #
2 |
3 | This directory contains examples of TableGen inputs.
4 |
5 | The goal here is to familiarize yourself with the TableGen syntax and understand how the records are expanded.
6 |
7 | ## Configuring your build environment ##
8 |
9 | ```bash
10 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild .
11 | ```
12 |
13 | This will initialize your build directory in `build` (the `-B` option) with Ninja (`-G` option).
14 |
15 | You must have a version of LLVM installed at `` for this to succeed.
16 | Either build and install your own llvm (with the `install` target from your LLVM build) or install an [official LLVM release](https://releases.llvm.org/) package.
17 |
18 | ## Build ##
19 |
20 | ```bash
21 | ninja -C build
22 | ```
23 |
24 | This builds the default target in the build directory.
25 |
26 | This should produce in the `build` directory one `.inc` file per `.td` file in the source directory.
27 |
28 | Look at the produced `.inc` and the related `.td` and try to see if the expansion matches your understanding after reading Chapter 6.
29 |
--------------------------------------------------------------------------------
/ch6/multiclass-with-def-type.td:
--------------------------------------------------------------------------------
1 | class Gift {
2 | string label = "Just a gift";
3 | }
4 |
5 | multiclass Bundle {
6 | def A {
7 | string name = !strconcat(base, "-", "A");
8 | int price = 12;
9 | int weight = 1;
10 | }
11 | def B {
12 | string name = !strconcat(base, "-", "B");
13 | string tag = "special";
14 | }
15 | def C: Gift {
16 | string color = "red";
17 | }
18 | }
19 |
20 | class ShippingPrice {
21 | int shippingPrice = arg;
22 | }
23 |
24 | defm valuedBundle : Bundle<"valued">, ShippingPrice<5>;
25 |
26 | def AnotherRecord {
27 | list gifts = [valuedBundleC];
28 | }
29 |
--------------------------------------------------------------------------------
/ch6/multiclass.td:
--------------------------------------------------------------------------------
1 | multiclass Bundle {
2 | def A {
3 | string name = !strconcat(base, "-", "A");
4 | int price = 12;
5 | int weight = 1;
6 | }
7 | def B {
8 | string name = !strconcat(base, "-", "B");
9 | string tag = "special";
10 | }
11 | }
12 |
13 | class ShippingPrice {
14 | int shippingPrice = arg;
15 | }
16 |
17 | defm valuedBundle : Bundle<"valued">, ShippingPrice<5>;
18 |
--------------------------------------------------------------------------------
/ch6/my-first-gisel.td:
--------------------------------------------------------------------------------
1 | include "llvm/Target/Target.td"
2 |
3 | def : HwMode<"", []>;
4 | def myii: InstrInfo;
5 | def : Target {
6 | let InstructionSet = myii;
7 | }
8 | def x0 : Register<"x0"> ;
9 | def GPR32 : RegisterClass<"myNamespace", [i32], 32, (add x0)>;
10 |
--------------------------------------------------------------------------------
/ch6/person.td:
--------------------------------------------------------------------------------
1 | class Person {
2 | int _age = age;
3 | string _name = name;
4 | }
5 |
6 | def A : Person<23, "A">;
7 | def B : Person<64, "B">;
8 | def /*Anonym*/: Person<43>;
9 |
--------------------------------------------------------------------------------
/ch7/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH7
6 | LANGUAGES CXX C)
7 |
8 | include(../cmake/utils/set-llvm-install-prefix.cmake)
9 |
10 | set(CURR_TARGET
11 | check_vec_int_ty
12 | )
13 |
14 | set(SRC
15 | "check_vec_int_ty.cpp"
16 | )
17 |
18 | set(INC
19 | )
20 |
21 | set(CMAKE_CXX_FLAGS -fno-rtti)
22 |
23 | # Now build our tools
24 | add_executable(${CURR_TARGET} ${SRC} ${INC})
25 |
26 | # Find the libraries that correspond to the LLVM components
27 | # that we wish to use
28 | llvm_map_components_to_libnames(llvm_libs support core irreader)
29 |
30 | # Link against LLVM libraries
31 | target_link_libraries(${CURR_TARGET} ${llvm_libs})
32 |
--------------------------------------------------------------------------------
/ch7/README.md:
--------------------------------------------------------------------------------
1 | # Illustrate different LLVM IR constructs #
2 |
3 | This directory contains the examples that illustrates the different LLVM IR constructs introduced in Chapter 7.
4 |
5 | For the horizontal add example, please run:
6 | ```bash
7 | clang -emit-llvm hadd_vector.c -S -o - -O0 | sed -e 's#optnone##g' | /bin/opt -S -passes=mem2reg,instnamer
8 | ```
9 |
10 | Additionally the `check_vec_int_ty.cpp` file shows the different ways to check that an integer type is a vector type.
11 |
12 | Look at the implementation of `isVectorOfIntV1` and `isVectorOfIntV2` in this file.
13 |
14 | To run `check_vec_int_ty.cpp` follow the steps below.
15 |
16 |
17 |
18 | ## Configuring your build environment ##
19 |
20 | ```bash
21 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild .
22 | ```
23 |
24 | ## To build ##
25 |
26 | ```bash
27 | ninja -Cbuild
28 | ```
29 |
30 | ## To run ##
31 |
32 | ```bash
33 | ./build/check_vec_int_ty [input.ll]
34 | ```
35 |
36 | You can provide an input for `check_vec_in_ty` if you want.
37 |
38 |
39 | ## To produce an input for the `check_vec_int_ty` executable ##
40 |
41 | To produce an input:
42 | ```bash
43 | clang -o - -S -emit-llvm test.c -O0 | sed -e 's#optnone##g' | /bin/opt -S -passes=mem2reg,instnamer > input.ll
44 | ```
45 |
46 | optnone => remove the attribute that prevents optimizations
47 | mem2reg => get rid of stack accesses / build SSA
48 | instnamer => get rid of the implicit variables
49 |
50 |
--------------------------------------------------------------------------------
/ch7/access_struct_type.ll:
--------------------------------------------------------------------------------
1 | %my.type = type {i32, {ptr, half}, {i32, i1, i1}}
2 |
3 | define half @useOfMyType(ptr %dst) {
4 | %addr_half_field = getelementptr inbounds %my.type, ptr %dst, i64 0, i32 1, i32 1
5 | %res = load half, ptr %addr_half_field, align 8
6 | ret half %res
7 | }
8 |
--------------------------------------------------------------------------------
/ch7/anonymous_type.ll:
--------------------------------------------------------------------------------
1 | define void @useOfUnknownType(ptr %dst) {
2 | store {i32, ptr} zeroinitializer, ptr %dst, align 8
3 | ret void
4 | }
5 |
--------------------------------------------------------------------------------
/ch7/array_type.ll:
--------------------------------------------------------------------------------
1 | define void @useOfArrayType(ptr %dst) {
2 | store [12 x [36 x i32]] zeroinitializer, ptr %dst, align 8
3 | ret void
4 | }
5 |
--------------------------------------------------------------------------------
/ch7/check_vec_int_ty.cpp:
--------------------------------------------------------------------------------
1 | #include "llvm/AsmParser/Parser.h" // For parseAssemblyString.
2 | #include "llvm/IR/DerivedTypes.h" // For VectorType and IntegerType.
3 | #include "llvm/IR/Instruction.h"
4 | #include "llvm/IR/LLVMContext.h"
5 | #include "llvm/IR/Module.h"
6 | #include "llvm/IR/Type.h"
7 | #include "llvm/IR/Verifier.h"
8 | #include "llvm/IRReader/IRReader.h" // For parseIRFile.
9 | #include "llvm/Support/Debug.h" // For errs().
10 | #include "llvm/Support/SourceMgr.h" // For SMDiagnostic.
11 |
12 | using namespace llvm;
13 |
14 | // Default input in case no file was provided.
15 | const char *InputIR =
16 | "define <2 x i32> @foo(i32 %src) {\n"
17 | "bb:\n"
18 | " %simd = insertelement <2 x i32> poison, i32 %src, i32 0\n"
19 | " %input = insertelement <2 x i32> %simd, i32 %src, i32 1\n"
20 | " %res = add <2 x i32> %input, %input\n"
21 | " ret <2 x i32> %res\n"
22 | "}\n";
23 |
24 | bool isVectorOfIntV1(Instruction &Add) {
25 | auto *VecTy = dyn_cast(Add.getType());
26 | return VecTy && isa(VecTy->getElementType());
27 | }
28 |
29 | bool isVectorOfIntV2(Instruction &Add) {
30 | Type *Ty = Add.getType();
31 | return Ty->isVectorTy() && Ty->getScalarType()->isIntegerTy();
32 | }
33 |
34 | int main(int argc, char **argv) {
35 | LLVMContext Context;
36 | SMDiagnostic Err;
37 | std::unique_ptr MyModule;
38 | // To be able to play with the optimization a little bit,
39 | // support a mode where you can feed your own IR files.
40 | if (argc == 2) {
41 | outs() << "Reading module from '" << argv[1] << "'\n";
42 | MyModule = parseIRFile(argv[1], Err, Context);
43 | } else {
44 | MyModule = parseAssemblyString(InputIR, Err, Context);
45 | }
46 | if (!MyModule) {
47 | errs() << "Unable to build module\n";
48 | return -1;
49 | }
50 |
51 | for (Function &Func : *MyModule) {
52 | for (BasicBlock &BB : Func) {
53 | for (Instruction &Inst : BB) {
54 | if (Inst.getOpcode() != Instruction::Add)
55 | continue;
56 | bool IsVecIntTy1 = isVectorOfIntV1(Inst);
57 | bool IsVecIntTy2 = isVectorOfIntV2(Inst);
58 | if (IsVecIntTy1 != IsVecIntTy2)
59 | errs() << "Found a mismatch with v1 & v2 for " << Inst << '\n';
60 | if (IsVecIntTy1)
61 | outs() << "Found an add of vector int:\n" << Inst << '\n';
62 | }
63 | }
64 | }
65 |
66 | return 0;
67 | }
68 |
--------------------------------------------------------------------------------
/ch7/datalayout_alignment.ll:
--------------------------------------------------------------------------------
1 | define i32 @foo(ptr %src) {
2 | %res = load i32, ptr %src
3 | ret i32 %res
4 | }
5 |
--------------------------------------------------------------------------------
/ch7/endianness.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | int buildIntLittleEndian(const char *input) {
4 | int res = 0;
5 | for (int i = 0; i < sizeof(res); ++i) {
6 | res |= input[i] << (8 * i);
7 | }
8 | return res;
9 | }
10 |
11 | int buildIntBigEndian(const char *input) {
12 | int res = 0;
13 | for (int i = 0; i < sizeof(res); ++i) {
14 | res <<= 8;
15 | res |= input[i];
16 | }
17 | return res;
18 | }
19 |
20 | int main() {
21 | char input[] = {0x01, 0x02, 0x03, 0x04};
22 | int val = buildIntLittleEndian(input);
23 | int valBig = buildIntBigEndian(input);
24 | printf("little: 0x%08x\nbig: 0x%08x\n", val, valBig);
25 | return 0;
26 | }
27 |
--------------------------------------------------------------------------------
/ch7/full_example.ll:
--------------------------------------------------------------------------------
1 | define i32 @foo(i32, i32, i32 %arg) noinline noreturn {
2 | entry:
3 | %myid = add i32 %0, %1
4 | %31 = mul i32 %myid, 2
5 | %45 = shl i32 %31, 5
6 | %"00~random~00" = udiv i32 %45, %arg
7 | br label %46
8 |
9 | br label %47
10 |
11 | 47:
12 | ret i32 %"00~random~00"
13 | }
14 |
15 | !0 = !{!"zero"}
16 | !2 = !{!"two"}
17 | !1 = !{!"one"}
18 | ; A named metadata.
19 | !name = !{!0, !1, !2}
20 |
--------------------------------------------------------------------------------
/ch7/hadd_vector.c:
--------------------------------------------------------------------------------
1 | typedef struct {
2 | int a, b, c, d;
3 | } Vec4;
4 |
5 | int hadd(const Vec4 *input) {
6 | return input->a + input->b + input->c + input->d;
7 | }
8 |
--------------------------------------------------------------------------------
/ch7/impact_of_abi.c:
--------------------------------------------------------------------------------
1 | typedef struct {
2 | int a, b, c, d;
3 | } BigStruct;
4 |
5 | BigStruct bigStructReturned() {
6 | BigStruct big = {0, 0, 0, 0};
7 | return big;
8 | }
9 |
--------------------------------------------------------------------------------
/ch7/named_type.ll:
--------------------------------------------------------------------------------
1 | %my.type = type { i32, ptr }
2 |
3 | define void @useOfMyType(ptr %dst) {
4 | store %my.type zeroinitializer, ptr %dst, align 8
5 | ret void
6 | }
7 |
--------------------------------------------------------------------------------
/ch8/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 17)
2 |
3 | cmake_minimum_required(VERSION 3.22)
4 |
5 | project(CH8
6 | LANGUAGES CXX C)
7 |
8 | include(../cmake/utils/set-llvm-install-prefix.cmake)
9 | include(../cmake/utils/opt-run.cmake)
10 |
11 | set(CURR_TARGET
12 | produce-ir
13 | )
14 |
15 | set(OPT_RUN_DEPENDENCIES)
16 |
17 | # Add the canonicalization examples.
18 | add_run_passes(OPT_RUN_DEPENDENCIES "instcombine" canonical_form.ll)
19 | add_run_passes(OPT_RUN_DEPENDENCIES "instcombine" xor.ll)
20 | # Add the value tracking examples.
21 | add_run_passes(OPT_RUN_DEPENDENCIES "instcombine" value_tracking.ll)
22 |
23 | # Add all the examples which have the same filename as the pass name.
24 | set(standard_passes
25 | lcssa
26 | argpromotion
27 | dce
28 | deadargelim
29 | indvars
30 | inline
31 | licm
32 | load-store-vectorizer
33 | loop-reduce
34 | loop-unroll
35 | loop-vectorize
36 | reassociate
37 | simplifycfg
38 | slp-vectorizer
39 | )
40 |
41 | foreach(standard_pass IN LISTS standard_passes)
42 | add_run_passes(OPT_RUN_DEPENDENCIES "${standard_pass}" ${standard_pass}.ll)
43 | endforeach()
44 |
45 | # Create an executable target that depends on the generated file
46 | add_custom_target(${CURR_TARGET}
47 | DEPENDS ${OPT_RUN_DEPENDENCIES}
48 | )
49 |
50 | set_target_properties(${CURR_TARGET} PROPERTIES EXCLUDE_FROM_ALL 0)
51 |
--------------------------------------------------------------------------------
/ch8/README.md:
--------------------------------------------------------------------------------
1 | In this chapter you can discover the transformations performed by some of the optimizations passes.
2 |
3 | To see that in action, first setup your build directory:
4 | ```bash
5 | cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLLVM_DIR=/lib/cmake/llvm -Bbuild .
6 | ```
7 |
8 | Then, when you'll invoke `ninja`, it will run all the passes exercised in this folder.
9 | ```bash
10 | ninja -Cbuild
11 | ```
12 |
13 | This will produce files named `xxx.out.ll` in the `build` directory.
14 |
15 | For each `xx.out.ll` file, you'll find the input IR in this chapter's directory with the name `xx.ll`.
16 | For instance, the input IR of `dce.out.ll` is `dce.ll`.
17 |
18 | The output file is produced by applying the pass with the same name as the input file to the input file.
19 | E.g., we apply the `dce` pass to the `dce.ll` input file to produce `dce.out.ll`.
20 |
21 | A few files do not follow this convention.
22 | These are `xor.ll`, `canonical_form.ll`, and `value_tracking.ll`.
23 | All three files exercises `instcombine`.
24 |
25 | In any case, you can see the command used to produce the output by running the `ninja` command with the `-v` option.
26 |
27 | Also if you want to rerun one (or several) passes, you can just modify the related input IR file (e.g., by using `touch xx.ll`) and rerun the Ninja command.
28 |
29 | For this chapter, look at the differences between the input and output IRs to see what happened.
30 | You can for instance use the following command:
31 | ```bash
32 | diff -U10 xx.ll build/xx.out.ll
33 | ```
34 |
--------------------------------------------------------------------------------
/ch8/argpromotion.ll:
--------------------------------------------------------------------------------
1 | define i64 @foo() {
2 | %local = alloca i64
3 | store i64 2, ptr %local
4 | %res = call i64 @bar(ptr %local)
5 | ret i64 %res
6 | }
7 |
8 | define internal i64 @bar(ptr %local) {
9 | %val = load i64, ptr %local
10 | %res = add i64 %val, 2
11 | ret i64 %res
12 | }
13 |
14 |
--------------------------------------------------------------------------------
/ch8/canonical_form.ll:
--------------------------------------------------------------------------------
1 | define i64 @canonical_form(i64 %b, i64 %c) {
2 | %a = sub i64 %b, %c
3 | ret i64 %a
4 | }
5 |
6 | define i64 @non_canonical_form(i64 %b, i64 %c) {
7 | %neg_c = sub i64 0, %c
8 | %a = add i64 %b, %neg_c
9 | ret i64 %a
10 | }
11 |
--------------------------------------------------------------------------------
/ch8/dce.ll:
--------------------------------------------------------------------------------
1 | define i64 @foo(i64 %in) {
2 | %dead = add i64 %in, %in
3 | %res = mul i64 %in, 2
4 | ret i64 %res
5 | }
6 |
--------------------------------------------------------------------------------
/ch8/deadargelim.ll:
--------------------------------------------------------------------------------
1 | define i64 @foo() {
2 | %local = alloca i64
3 | %local2 = alloca i64
4 | store i64 2, ptr %local
5 | store i64 2, ptr %local2
6 | %res = call i64 @bar(ptr %local, ptr %local2)
7 | ret i64 %res
8 | }
9 |
10 | define internal i64 @bar(ptr %local, ptr %local2) {
11 | %val = load i64, ptr %local
12 | %res = add i64 %val, 2
13 | ret i64 %res
14 | }
15 |
16 |
--------------------------------------------------------------------------------
/ch8/indvars.ll:
--------------------------------------------------------------------------------
1 | define i64 @foo(i64 %src, i64 %ub) {
2 | entry:
3 | br label %loop
4 |
5 | loop:
6 | %iv = phi i64 [0, %entry], [%iv1, %loop]
7 | %iv1 = add i64 %iv, 1
8 | %cond = icmp ult i64 %iv1, %ub
9 | br i1 %cond, label %loop, label %end
10 |
11 | end:
12 | %tmp = add i64 %iv1, %src
13 | %res = add i64 %tmp, %iv1
14 | ret i64 %res
15 | }
16 |
--------------------------------------------------------------------------------
/ch8/inline.ll:
--------------------------------------------------------------------------------
1 | define i64 @foo() {
2 | %local = alloca i64
3 | store i64 2, ptr %local
4 | %res = call i64 @bar(ptr %local)
5 | ret i64 %res
6 | }
7 |
8 | define i64 @bar(ptr %local) {
9 | %val = load i64, ptr %local
10 | %res = add i64 %val, 2
11 | ret i64 %res
12 | }
13 |
14 |
--------------------------------------------------------------------------------
/ch8/lcssa.ll:
--------------------------------------------------------------------------------
1 | define i64 @def_in_loop_use_outside(i64 %src, i64 %upper_bound) {
2 | entry:
3 | br label %loop
4 |
5 | loop:
6 | %iv = phi i64 [0, %entry], [%iv_plus_1, %loop]
7 | %iv_plus_1 = add i64 %iv, 1
8 | %cond = icmp ult i64 %iv_plus_1, %upper_bound
9 | br i1 %cond, label %loop, label %end
10 |
11 | end:
12 | %tmp = add i64 %iv_plus_1, %src
13 | %res = add i64 %tmp, %iv_plus_1
14 | ret i64 %res
15 | }
16 |
--------------------------------------------------------------------------------
/ch8/licm.ll:
--------------------------------------------------------------------------------
1 | define i64 @foo(i64 %src, i64 %ub, ptr %addr) {
2 | entry:
3 | br label %loop
4 |
5 | loop:
6 | %iv = phi i64 [0, %entry], [%iv1, %loop]
7 | %offset = load i64, ptr %addr
8 | %iv1 = add i64 %iv, %offset
9 | %cond = icmp ult i64 %iv1, %ub
10 | br i1 %cond, label %loop, label %end
11 |
12 | end:
13 | %res = add i64 %src, %iv1
14 | ret i64 %res
15 | }
16 |
--------------------------------------------------------------------------------
/ch8/load-store-vectorizer.ll:
--------------------------------------------------------------------------------
1 | target triple="aarch64-apple-ios"
2 |
3 | define void @bar(ptr %src, ptr %dst) {
4 | %v0 = load i64, ptr %src
5 | %src1 = getelementptr i64, ptr %src, i64 1
6 | %v1 = load i64, ptr %src1
7 | store i64 %v0, ptr %dst
8 | %dst1 = getelementptr i64, ptr %dst, i64 1
9 | store i64 %v1, ptr %dst1
10 | ret void
11 | }
12 |
--------------------------------------------------------------------------------
/ch8/loop-reduce.c:
--------------------------------------------------------------------------------
1 | long long foo(long long *in, long long ub) {
2 | for (long long i = 0; i < ub; ++i)
3 | if (in[i] != 0)
4 | return i;
5 | return -1;
6 | }
7 |
--------------------------------------------------------------------------------
/ch8/loop-reduce.ll:
--------------------------------------------------------------------------------
1 | target triple = "aarch64-apple-ios"
2 |
3 | define i64 @foo(ptr %arg, i64 %ub) {
4 | bb:
5 | br label %bb3
6 |
7 | bb3:
8 | %idx = phi i64 [ 0, %bb ], [ %i9, %bb8 ]
9 | %i = icmp slt i64 %idx, %ub
10 | br i1 %i, label %bb4, label %bb10
11 |
12 | bb4:
13 | %i5 = getelementptr inbounds i64, ptr %arg, i64 %idx
14 | %i6 = load i64, ptr %i5
15 | %i7 = icmp ne i64 %i6, 0
16 | br i1 %i7, label %bb10, label %bb8
17 |
18 | bb8:
19 | %i9 = add nsw i64 %idx, 1
20 | br label %bb3
21 |
22 | bb10:
23 | %res = phi i64 [ %idx, %bb4 ], [ -1, %bb3 ]
24 | ret i64 %res
25 | }
26 |
--------------------------------------------------------------------------------
/ch8/loop-unroll.ll:
--------------------------------------------------------------------------------
1 | define i64 @foo(ptr %arg) {
2 | bb:
3 | br label %bb3
4 |
5 | bb3:
6 | %idx = phi i64 [ 0, %bb ], [ %i9, %bb8 ]
7 | %i = icmp slt i64 %idx, 3
8 | br i1 %i, label %bb4, label %bb10
9 |
10 | bb4:
11 | %i5 = getelementptr inbounds i64, ptr %arg, i64 %idx
12 | %i6 = load i64, ptr %i5
13 | %i7 = icmp ne i64 %i6, 0
14 | br i1 %i7, label %bb10, label %bb8
15 |
16 | bb8:
17 | %i9 = add nsw i64 %idx, 1
18 | br label %bb3
19 |
20 | bb10:
21 | %res = phi i64 [ %idx, %bb4 ], [ -1, %bb3 ]
22 | ret i64 %res
23 | }
24 |
--------------------------------------------------------------------------------
/ch8/loop-vectorize.c:
--------------------------------------------------------------------------------
1 | void foo(short * restrict dst, short * restrict A, short * restrict B ) {
2 | for (long long i = 0; i != 24; ++i)
3 | dst[i] = A[i] + B[i];
4 | }
5 |
--------------------------------------------------------------------------------
/ch8/loop-vectorize.ll:
--------------------------------------------------------------------------------
1 | target triple = "arm64-apple-macosx14.0.0"
2 |
3 |
4 | define void @foo(ptr noalias noundef %arg, ptr noalias noundef %arg1, ptr noalias noundef %arg2) {
5 | bb:
6 | br label %bb3
7 |
8 | bb3:
9 | %idx = phi i64 [ 0, %bb ], [ %i14, %bb4 ]
10 | %i = icmp ne i64 %idx, 24
11 | br i1 %i, label %bb4, label %bb15
12 |
13 | bb4:
14 | %i5 = getelementptr inbounds i16, ptr %arg1, i64 %idx
15 | %i6 = load i16, ptr %i5
16 | %i7 = sext i16 %i6 to i32
17 | %i8 = getelementptr inbounds i16, ptr %arg2, i64 %idx
18 | %i9 = load i16, ptr %i8
19 | %i10 = sext i16 %i9 to i32
20 | %i11 = add nsw i32 %i7, %i10
21 | %i12 = trunc i32 %i11 to i16
22 | %i13 = getelementptr inbounds i16, ptr %arg, i64 %idx
23 | store i16 %i12, ptr %i13
24 | %i14 = add nsw i64 %idx, 1
25 | br label %bb3
26 |
27 | bb15:
28 | ret void
29 | }
30 |
--------------------------------------------------------------------------------
/ch8/reassociate.ll:
--------------------------------------------------------------------------------
1 | define i64 @foo(i64 %in0, i64 %in1) {
2 | %v0 = add i64 %in0, %in1
3 | %v1 = add i64 %v0, 2
4 | %v2 = sub i64 %v1, %in1
5 | ret i64 %v2
6 | }
7 |
--------------------------------------------------------------------------------
/ch8/simplifycfg.ll:
--------------------------------------------------------------------------------
1 | define i64 @foo(i64 %src, i64 %ub) {
2 | entry:
3 | br label %loop
4 |
5 | loop:
6 | br i1 false, label %loop, label %end
7 |
8 | end:
9 | %umax = call i64 @llvm.umax.i64(i64 %ub, i64 1)
10 | %tmp = add i64 %umax, %src
11 | %res = add i64 %tmp, %umax
12 | ret i64 %res
13 | }
14 |
15 | declare i64 @llvm.umax.i64(i64, i64)
16 |
--------------------------------------------------------------------------------
/ch8/slp-vectorizer.ll:
--------------------------------------------------------------------------------
1 | ;target triple="aarch64-apple-ios"
2 |
3 | define <2 x i64> @foo(i64 %in0, i64 %in1) {
4 | %v0 = add i64 %in0, 2
5 | %v1 = add i64 %in1, 5
6 | %partial = insertelement <2 x i64> poison, i64 %v0, i32 0
7 | %res = insertelement <2 x i64> %partial, i64 %v1, i32 1
8 | ret <2 x i64> %res
9 | }
10 |
--------------------------------------------------------------------------------
/ch8/value_tracking.ll:
--------------------------------------------------------------------------------
1 | define i1 @foo(i64 %b) {
2 | %a = and i64 %b, u0xfffffffffffffffc
3 | %mod = urem i64 %a, 2
4 | %cond = icmp eq i64 %mod, 0
5 | ret i1 %cond
6 | }
7 |
--------------------------------------------------------------------------------
/ch8/xor.ll:
--------------------------------------------------------------------------------
1 | define i64 @xor(i64 %x) {
2 | %res = xor i64 %x, %x
3 | ret i64 %res
4 | }
5 |
--------------------------------------------------------------------------------
/cmake/utils/llc-run.cmake:
--------------------------------------------------------------------------------
1 | # Add a custom command to generate the output by running passes with llc.
2 | # PARENT_VAR is used to accumulate the targets that needs to be built.
3 | function(add_run_llc PARENT_VAR LLC_ARGS INPUT)
4 | set(INPUT_FULL_PATH ${CMAKE_SOURCE_DIR}/${INPUT})
5 | string(REPLACE ".ll" ".out.mir" OUTPUT ${INPUT})
6 | # Spaces are escaped in CMake.
7 | # To issue spaces, we need to use a list of values.
8 | # Do the translation here so that at the high level it remains natural and
9 | # we can use spaces to declare our command line llcions.
10 | string(REPLACE " " ";" LLC_ARGS ${LLC_ARGS})
11 | if(${ARGC} GREATER 3)
12 | set(OUTPUT "${ARGV3}_${OUTPUT}")
13 | endif()
14 | set(OUTPUT_FULL_PATH ${CMAKE_BINARY_DIR}/${OUTPUT})
15 | set(REDIRECT "/dev/null")
16 | if(${ARGC} GREATER 3)
17 | set(REDIRECT "${OUTPUT_FULL_PATH}.txt")
18 | endif()
19 | add_custom_command(
20 | OUTPUT ${OUTPUT}
21 | COMMAND ${LLVM_TOOLS_BINARY_DIR}/llc ${LLC_ARGS} ${INPUT_FULL_PATH} -o ${OUTPUT_FULL_PATH} > ${REDIRECT} 2>&1
22 | DEPENDS ${INPUT}
23 | COMMENT "Generating code with llc"
24 | )
25 | list(APPEND ${PARENT_VAR} ${OUTPUT})
26 | set(${PARENT_VAR} ${${PARENT_VAR}} PARENT_SCOPE)
27 | endfunction()
28 |
--------------------------------------------------------------------------------
/cmake/utils/opt-run.cmake:
--------------------------------------------------------------------------------
1 | # Add a custom command to generate the output by running passes with opt.
2 | # PARENT_VAR is used to accumulate the targets that needs to be built.
3 | function(add_run_opt PARENT_VAR OPT_ARGS INPUT)
4 | set(INPUT_FULL_PATH ${CMAKE_SOURCE_DIR}/${INPUT})
5 | string(REPLACE ".ll" ".out.ll" OUTPUT ${INPUT})
6 | # Spaces are escaped in CMake.
7 | # To issue spaces, we need to use a list of values.
8 | # Do the translation here so that at the high level it remains natural and
9 | # we can use spaces to declare our command line options.
10 | string(REPLACE " " ";" OPT_ARGS ${OPT_ARGS})
11 | if(${ARGC} GREATER 3)
12 | set(OUTPUT "${ARGV3}_${OUTPUT}")
13 | endif()
14 | set(OUTPUT_FULL_PATH ${CMAKE_BINARY_DIR}/${OUTPUT})
15 | set(REDIRECT "/dev/null")
16 | if(${ARGC} GREATER 3)
17 | set(REDIRECT "${OUTPUT_FULL_PATH}.txt")
18 | endif()
19 | add_custom_command(
20 | OUTPUT ${OUTPUT}
21 | COMMAND ${LLVM_TOOLS_BINARY_DIR}/opt -S ${OPT_ARGS} ${INPUT_FULL_PATH} -o ${OUTPUT_FULL_PATH} > ${REDIRECT} 2>&1
22 | DEPENDS ${INPUT}
23 | COMMENT "Generating code with opt"
24 | )
25 | list(APPEND ${PARENT_VAR} ${OUTPUT})
26 | set(${PARENT_VAR} ${${PARENT_VAR}} PARENT_SCOPE)
27 | endfunction()
28 |
29 | function(add_run_passes PARENT_VAR OPT_ARGS INPUT)
30 | set(OPT_ARGS_W_PASS "-passes=${OPT_ARGS}")
31 | add_run_opt(${PARENT_VAR} ${OPT_ARGS_W_PASS} ${INPUT})
32 | # Propagate the result back one level.
33 | set(${PARENT_VAR} ${${PARENT_VAR}} PARENT_SCOPE)
34 | endfunction()
35 |
36 |
--------------------------------------------------------------------------------
/cmake/utils/set-llvm-install-prefix.cmake:
--------------------------------------------------------------------------------
1 | # Helper cmake file to set the include and library search paths of LLVM.
2 | find_package(LLVM REQUIRED CONFIG)
3 |
4 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
5 | message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
6 |
7 | include_directories(${LLVM_INCLUDE_DIRS})
8 | separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
9 | add_definitions(${LLVM_DEFINITIONS_LIST})
10 |
11 |
--------------------------------------------------------------------------------