├── .gitignore ├── Cargo.toml ├── README.md ├── Vagrantfile ├── in.ex ├── out.ll └── src └── main.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | .vagrant 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "llvm-example" 3 | version = "0.1.0" 4 | authors = ["Ulysse Carion "] 5 | 6 | [[bin]] 7 | name = "main" 8 | 9 | [dependencies] 10 | llvm-sys = "0.2" 11 | peg = "0.3" 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Writing an LLVM-IR compiler in Rust: Getting started 2 | 3 | This is the code accompanying [*Writing an LLVM-IR compiler in Rust: Getting 4 | started*](http://blog.ulysse.io/2016/07/03/llvm-getting-started.html). 5 | 6 | # License (MIT) 7 | 8 | The MIT License (MIT) 9 | Copyright (c) 2016 Ulysse Carion 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy of 12 | this software and associated documentation files (the "Software"), to deal in 13 | the Software without restriction, including without limitation the rights to 14 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 15 | the Software, and to permit persons to whom the Software is furnished to do so, 16 | subject to the following conditions: 17 | 18 | The above copyright notice and this permission notice shall be included in all 19 | copies or substantial portions of the Software. 20 | 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 23 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 24 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 25 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 26 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 27 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | Vagrant.configure("2") do |config| 2 | config.vm.box = "bento/ubuntu-16.04" 3 | end 4 | -------------------------------------------------------------------------------- /in.ex: -------------------------------------------------------------------------------- 1 | a = 1 2 | b = 0 3 | c = if a { 4 | if b { 5 | 11 6 | } else { 7 | 40 8 | } 9 | } else { 10 | if b { 11 | 10 12 | } else { 13 | 20 14 | } 15 | } 16 | c + 2 17 | -------------------------------------------------------------------------------- /out.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'example_module' 2 | 3 | define i64 @main() { 4 | entry: 5 | %a = alloca i64 6 | %b = alloca i64 7 | %c = alloca i64 8 | store i64 1, i64* %a 9 | store i64 0, i64* %b 10 | %a1 = load i64, i64* %a 11 | %is_nonzero = icmp ne i64 %a1, 0 12 | br i1 %is_nonzero, label %entry2, label %entry3 13 | 14 | entry2: ; preds = %entry 15 | %b5 = load i64, i64* %b 16 | %is_nonzero6 = icmp ne i64 %b5, 0 17 | br i1 %is_nonzero6, label %entry7, label %entry8 18 | 19 | entry3: ; preds = %entry 20 | %b10 = load i64, i64* %b 21 | %is_nonzero11 = icmp ne i64 %b10, 0 22 | br i1 %is_nonzero11, label %entry12, label %entry13 23 | 24 | entry4: ; preds = %entry14, %entry9 25 | %iftmp16 = phi i64 [ %iftmp, %entry9 ], [ %iftmp15, %entry14 ] 26 | store i64 %iftmp16, i64* %c 27 | %c17 = load i64, i64* %c 28 | %addtmp = add i64 %c17, 2 29 | ret i64 %addtmp 30 | 31 | entry7: ; preds = %entry2 32 | br label %entry9 33 | 34 | entry8: ; preds = %entry2 35 | br label %entry9 36 | 37 | entry9: ; preds = %entry8, %entry7 38 | %iftmp = phi i64 [ 11, %entry7 ], [ 40, %entry8 ] 39 | br label %entry4 40 | 41 | entry12: ; preds = %entry3 42 | br label %entry14 43 | 44 | entry13: ; preds = %entry3 45 | br label %entry14 46 | 47 | entry14: ; preds = %entry13, %entry12 48 | %iftmp15 = phi i64 [ 10, %entry12 ], [ 20, %entry13 ] 49 | br label %entry4 50 | } 51 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(plugin)] 2 | #![plugin(peg_syntax_ext)] 3 | 4 | extern crate llvm_sys as llvm; 5 | 6 | use std::fs::File; 7 | use std::ptr; 8 | use std::io::Read; 9 | use std::ffi::CString; 10 | use std::collections::{HashMap, HashSet}; 11 | 12 | use llvm::prelude::*; 13 | 14 | fn main() { 15 | let mut input = String::new(); 16 | let mut f = File::open("in.ex").unwrap(); 17 | f.read_to_string(&mut input).unwrap(); 18 | 19 | let parsed_input = parser::program(&input).unwrap(); 20 | 21 | unsafe { 22 | codegen(parsed_input); 23 | } 24 | } 25 | 26 | pub enum Expr { 27 | Literal(String), 28 | Ref(String), 29 | Assign(String, Box), 30 | Add(Box, Box), 31 | Sub(Box, Box), 32 | Mul(Box, Box), 33 | Div(Box, Box), 34 | If(Box, Vec, Vec), 35 | } 36 | 37 | // `product` and `sum` are that way to get operator precedence 38 | peg! parser(r#" 39 | use super::Expr; 40 | 41 | #[pub] 42 | program -> Vec 43 | = statements 44 | 45 | statements -> Vec 46 | = e:(expression ** ("\n" _)) "\n" { e } 47 | 48 | expression -> Expr 49 | = if_expression 50 | / i:identifier _ "=" _ s:expression { Expr::Assign(i, Box::new(s)) } 51 | / sum 52 | 53 | if_expression -> Expr 54 | = "if" _ e:expression _ "{\n" _ then_body:statements _ "}" _ "else" _ "{\n" _ else_body:statements _ "}" { 55 | Expr::If(Box::new(e), then_body, else_body) 56 | } 57 | 58 | sum -> Expr 59 | = a:product _ "+" _ b:sum { Expr::Add(Box::new(a), Box::new(b)) } 60 | / a:product _ "-" _ b:sum { Expr::Sub(Box::new(a), Box::new(b)) } 61 | / product 62 | 63 | product -> Expr 64 | = a:ref_or_literal _ "*" _ b:product { Expr::Mul(Box::new(a), Box::new(b)) } 65 | / a:ref_or_literal _ "/" _ b:product { Expr::Div(Box::new(a), Box::new(b)) } 66 | / ref_or_literal 67 | 68 | ref_or_literal -> Expr 69 | = i:identifier { Expr::Ref(i) } 70 | / int_literal 71 | 72 | identifier -> String 73 | = [a-zA-Z]+ { match_str.to_owned() } 74 | 75 | int_literal -> Expr 76 | = [0-9]+ { Expr::Literal(match_str.to_owned()) } 77 | 78 | _ = [ \t]* 79 | "#); 80 | 81 | unsafe fn codegen(input: Vec) { 82 | let context = llvm::core::LLVMContextCreate(); 83 | let module = llvm::core::LLVMModuleCreateWithName(b"example_module\0".as_ptr() as *const _); 84 | let builder = llvm::core::LLVMCreateBuilderInContext(context); 85 | 86 | // In LLVM, you get your types from functions. 87 | let int_type = llvm::core::LLVMInt64TypeInContext(context); 88 | let function_type = llvm::core::LLVMFunctionType(int_type, ptr::null_mut(), 0, 0); 89 | let function = llvm::core::LLVMAddFunction(module, b"main\0".as_ptr() as *const _, function_type); 90 | 91 | let entry_name = CString::new("entry").unwrap(); 92 | let bb = llvm::core::LLVMAppendBasicBlockInContext(context, function, entry_name.as_ptr()); 93 | llvm::core::LLVMPositionBuilderAtEnd(builder, bb); 94 | 95 | let mut names = HashMap::new(); 96 | insert_allocations(context, builder, &mut names, &input); 97 | 98 | let int_type = llvm::core::LLVMInt64TypeInContext(context); 99 | let zero = llvm::core::LLVMConstInt(int_type, 0, 0); 100 | 101 | let mut return_value = zero; // return value on empty program 102 | for expr in input { 103 | return_value = codegen_expr(context, builder, function, &mut names, expr); 104 | } 105 | llvm::core::LLVMBuildRet(builder, return_value); 106 | 107 | // Instead of dumping to stdout, let's write out the IR to `out.ll` 108 | let out_file = CString::new("out.ll").unwrap(); 109 | llvm::core::LLVMPrintModuleToFile(module, out_file.as_ptr(), ptr::null_mut()); 110 | 111 | llvm::core::LLVMDisposeBuilder(builder); 112 | llvm::core::LLVMDisposeModule(module); 113 | llvm::core::LLVMContextDispose(context); 114 | } 115 | 116 | unsafe fn insert_allocations(context: LLVMContextRef, builder: LLVMBuilderRef, names: &mut HashMap, exprs: &[Expr]) { 117 | let mut variable_names = HashSet::new(); 118 | for expr in exprs { 119 | match *expr { 120 | Expr::Assign(ref name, _) => { 121 | variable_names.insert(name); 122 | }, 123 | 124 | _ => {}, 125 | } 126 | } 127 | 128 | for variable_name in variable_names { 129 | let int_type = llvm::core::LLVMInt64TypeInContext(context); 130 | let name = CString::new(variable_name.as_bytes()).unwrap(); 131 | let pointer = llvm::core::LLVMBuildAlloca(builder, int_type, name.as_ptr()); 132 | 133 | names.insert(variable_name.to_owned(), pointer); 134 | } 135 | } 136 | 137 | // When you write out instructions in LLVM, you get back `LLVMValueRef`s. You 138 | // can then use these references in other instructions. 139 | unsafe fn codegen_expr(context: LLVMContextRef, builder: LLVMBuilderRef, func: LLVMValueRef, names: &mut HashMap, expr: Expr) -> LLVMValueRef { 140 | match expr { 141 | Expr::Literal(int_literal) => { 142 | let int_type = llvm::core::LLVMInt64TypeInContext(context); 143 | llvm::core::LLVMConstInt(int_type, int_literal.parse().unwrap(), 0) 144 | }, 145 | 146 | Expr::Add(lhs, rhs) => { 147 | let lhs = codegen_expr(context, builder, func, names, *lhs); 148 | let rhs = codegen_expr(context, builder, func, names, *rhs); 149 | 150 | let name = CString::new("addtmp").unwrap(); 151 | llvm::core::LLVMBuildAdd(builder, lhs, rhs, name.as_ptr()) 152 | }, 153 | 154 | Expr::Sub(lhs, rhs) => { 155 | let lhs = codegen_expr(context, builder, func, names, *lhs); 156 | let rhs = codegen_expr(context, builder, func, names, *rhs); 157 | 158 | let name = CString::new("subtmp").unwrap(); 159 | llvm::core::LLVMBuildSub(builder, lhs, rhs, name.as_ptr()) 160 | }, 161 | 162 | Expr::Mul(lhs, rhs) => { 163 | let lhs = codegen_expr(context, builder, func, names, *lhs); 164 | let rhs = codegen_expr(context, builder, func, names, *rhs); 165 | 166 | let name = CString::new("multmp").unwrap(); 167 | llvm::core::LLVMBuildMul(builder, lhs, rhs, name.as_ptr()) 168 | }, 169 | 170 | Expr::Div(lhs, rhs) => { 171 | let lhs = codegen_expr(context, builder, func, names, *lhs); 172 | let rhs = codegen_expr(context, builder, func, names, *rhs); 173 | 174 | let name = CString::new("divtmp").unwrap(); 175 | llvm::core::LLVMBuildUDiv(builder, lhs, rhs, name.as_ptr()) 176 | }, 177 | 178 | Expr::Ref(name) => { 179 | let pointer = names.get(&name).unwrap(); 180 | let name = CString::new(name).unwrap(); 181 | llvm::core::LLVMBuildLoad(builder, *pointer, name.as_ptr()) 182 | }, 183 | 184 | Expr::Assign(name, expr) => { 185 | let new_value = codegen_expr(context, builder, func, names, *expr); 186 | let pointer = names.get(&name).unwrap(); 187 | llvm::core::LLVMBuildStore(builder, new_value, *pointer); 188 | new_value 189 | }, 190 | 191 | Expr::If(condition, then_body, else_body) => { 192 | let condition_value = codegen_expr(context, builder, func, names, *condition); 193 | let int_type = llvm::core::LLVMInt64TypeInContext(context); 194 | let zero = llvm::core::LLVMConstInt(int_type, 0, 0); 195 | 196 | let name = CString::new("is_nonzero").unwrap(); 197 | let is_nonzero = llvm::core::LLVMBuildICmp(builder, llvm::LLVMIntPredicate::LLVMIntNE, condition_value, zero, name.as_ptr()); 198 | 199 | let entry_name = CString::new("entry").unwrap(); 200 | let then_block = llvm::core::LLVMAppendBasicBlockInContext(context, func, entry_name.as_ptr()); 201 | let else_block = llvm::core::LLVMAppendBasicBlockInContext(context, func, entry_name.as_ptr()); 202 | let merge_block = llvm::core::LLVMAppendBasicBlockInContext(context, func, entry_name.as_ptr()); 203 | 204 | llvm::core::LLVMBuildCondBr(builder, is_nonzero, then_block, else_block); 205 | 206 | llvm::core::LLVMPositionBuilderAtEnd(builder, then_block); 207 | let mut then_return = zero; 208 | for expr in then_body { 209 | then_return = codegen_expr(context, builder, func, names, expr); 210 | } 211 | llvm::core::LLVMBuildBr(builder, merge_block); 212 | let then_block = llvm::core::LLVMGetInsertBlock(builder); 213 | 214 | llvm::core::LLVMPositionBuilderAtEnd(builder, else_block); 215 | let mut else_return = zero; 216 | for expr in else_body { 217 | else_return = codegen_expr(context, builder, func, names, expr); 218 | } 219 | llvm::core::LLVMBuildBr(builder, merge_block); 220 | let else_block = llvm::core::LLVMGetInsertBlock(builder); 221 | 222 | llvm::core::LLVMPositionBuilderAtEnd(builder, merge_block); 223 | let phi_name = CString::new("iftmp").unwrap(); 224 | let phi = llvm::core::LLVMBuildPhi(builder, int_type, phi_name.as_ptr()); 225 | 226 | let mut values = vec![then_return, else_return]; 227 | let mut blocks = vec![then_block, else_block]; 228 | 229 | llvm::core::LLVMAddIncoming(phi, values.as_mut_ptr(), blocks.as_mut_ptr(), 2); 230 | phi 231 | }, 232 | } 233 | } 234 | --------------------------------------------------------------------------------