├── The Extended-Kaleidoscope Language Specification - v0.2.pdf ├── The Extended-Kaleidoscope Project - Compiler Requirements - v0.2.pdf ├── input.txt ├── constants.py ├── ekcc.py ├── README.md ├── short_llvmlite_examples ├── array_example.py ├── while_loop_example.py ├── array_example_with_variables.py └── for_loop_example.py ├── llvm_binder.py ├── input.ast.yaml ├── analyzer.py ├── lexerAndParser.py └── IR.py /The Extended-Kaleidoscope Language Specification - v0.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/symhom/Kaleidoscope_Compiler/HEAD/The Extended-Kaleidoscope Language Specification - v0.2.pdf -------------------------------------------------------------------------------- /The Extended-Kaleidoscope Project - Compiler Requirements - v0.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/symhom/Kaleidoscope_Compiler/HEAD/The Extended-Kaleidoscope Project - Compiler Requirements - v0.2.pdf -------------------------------------------------------------------------------- /input.txt: -------------------------------------------------------------------------------- 1 | def int fib ( int $n) 2 | { 3 | if ($n < 1 ) 4 | return 0; 5 | if ($n ==1) 6 | return 1; 7 | int $a = fib ($n - 1 ); 8 | int $b = fib ($n - 2 ); 9 | return $a + $b; 10 | } 11 | def void inc (ref int $n) { 12 | $n = $n + 1; 13 | } 14 | 15 | 16 | 17 | def int run () { 18 | int $a = fib(7); 19 | return $a; 20 | } 21 | -------------------------------------------------------------------------------- /constants.py: -------------------------------------------------------------------------------- 1 | error = 'error' 2 | blk = 'blk' 3 | uop = 'uop' 4 | typ = 'type' 5 | varExp = 'varval' 6 | var = 'var' 7 | vars_ = 'vars' 8 | name = 'name' 9 | vdecl = 'vdecl' 10 | vdecls = 'vdecls' 11 | globid = 'globid' 12 | stmts = 'stmts' 13 | stmt = 'stmt' 14 | cond = 'cond' 15 | op = 'op' 16 | contents = 'contents' 17 | rhs = 'rhs' 18 | lhs = 'lhs' 19 | else_stmt = 'else_stmt' 20 | exp = 'exp' 21 | expstmt = 'expstmt' 22 | vardeclstmt = 'vardeclstmt' 23 | node = 'node' 24 | exps = 'exps' 25 | params = 'params' 26 | ret_type = 'ret_type' 27 | value = 'value' 28 | func = 'func' 29 | funcs = 'funcs' 30 | tdecls = 'tdecls' 31 | extern = 'extern' 32 | externs = 'externs' 33 | prog = 'prog' 34 | assign = 'assign' 35 | ret = 'ret' 36 | whileStmt = 'while' 37 | ifStmt = 'if' 38 | printStmt = "print" 39 | litExp = "lit" 40 | slitExp = "slit" 41 | funcCallExp = "funccall" 42 | binop = 'binop' 43 | cint_args= "def" 44 | -------------------------------------------------------------------------------- /ekcc.py: -------------------------------------------------------------------------------- 1 | from yaml import dump 2 | import argparse 3 | import mmap 4 | import sys 5 | import lexerAndParser 6 | import analyzer 7 | import IR 8 | import llvm_binder 9 | 10 | def readFile(fileName): 11 | f = open(fileName,"r") 12 | mMap = mmap.mmap(f.fileno(),0, prot = mmap.PROT_READ) 13 | stringFile =str(mMap[:]) 14 | stringFile = stringFile[2:-1] 15 | data = mMap[:].decode('ascii') 16 | return data 17 | 18 | def emitAst(fileName, output): 19 | yaml = dump(output, default_flow_style=False) 20 | file = open(fileName, 'w') 21 | file.write(yaml) 22 | file.close() 23 | 24 | 25 | def emit_ir(fileName, module): 26 | file = open(fileName, 'w') 27 | file.write( 28 | str(module) 29 | ) 30 | file.close() 31 | 32 | 33 | if __name__== "__main__": 34 | parser = argparse.ArgumentParser( 35 | description='Sarah and Hao\' Compiler') 36 | parser.add_argument('input_file', metavar='input_file', 37 | help='input file name') 38 | parser.add_argument('-emit-ast', action='store_true', 39 | default=False, 40 | dest='boolean_emit_ast', 41 | help='generate ast'), 42 | parser.add_argument('-emit-llvm', action='store_true', 43 | default=False, 44 | dest='boolean_emit_llvm', 45 | help='generate ast') 46 | parser.add_argument('-jit', action='store_true', 47 | default=False, 48 | dest='boolean_jit', 49 | help='generate ast'), 50 | parser.add_argument('-o', action='store', 51 | dest='output_file', 52 | help='output file name', 53 | required=False) 54 | parser.add_argument('-O3)', action='store_true', 55 | dest='optimization3', 56 | help='optimization IR', 57 | required=False) 58 | parser.add_argument('sysarg', nargs='*') 59 | args = parser.parse_args() 60 | if not args.boolean_jit and args.output_file is None: 61 | raise RuntimeError("at least one of -jit or -o should be specified!") 62 | 63 | code = readFile(args.input_file) 64 | ast = lexerAndParser.toAst(code) 65 | if ast is None: 66 | raise RuntimeError('AST parsing failure') 67 | errors = analyzer.semanticsCheck(ast) 68 | 69 | if args.boolean_emit_ast: 70 | emitAst(args.input_file.rsplit('.', 1)[0] + '.ast.yaml', ast) 71 | 72 | module = IR.mainFunc(ast, args.sysarg) 73 | 74 | 75 | if args.boolean_jit: 76 | module = llvm_binder.bind(module, args.sysarg, optimize = args.optimization3) 77 | # module = str(module) 78 | 79 | if args.boolean_emit_llvm: 80 | emit_ir(args.output_file.rsplit('.', 1)[0] + '.ll', module) 81 | 82 | exitCode = len(errors) 83 | print('exit: ' + str(exitCode)) 84 | sys.exit(exitCode) 85 | 86 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kaleidoscope_Compiler 2 | 3 | tldr: This is a python compiler that utilizes PLY to tokenize and parse Kaleidoscope code into an abstract syntax tree (AST). It then uses llvmlite to create the IR and JIT the code. Details about the language can be found in the "The Extended-Kaleidoscope" pdfs. 4 | 5 | example command line where input.txt contains the Kaleidsocope Code: 6 | python ekcc.py -emit-ast -jit -O input.txt 7 | 8 | input.txt contains example code. input.ast.yaml is the yaml ast of the example code. 9 | 10 | ## Python code 11 | 1. ekcc.py: The main script 12 | 2. constants.py: global variables are defined 13 | 3. lexerAndParser.py: Uses the PLY library to tokenize and parse the code to create an AST in the form of dictionaries and lists. http://www.dabeaz.com/ply/ply.html 14 | 4. analyzer.py does some error checking by recursively going through the AST. 15 | - In 'vdecl', the type may not be void 16 | - In ref 'type', the type may not be void or a reference type 17 | - All functions must be declared/defined before being used 18 | - The initialization expression for a reference variable (including function arguments) must be a variable. 19 | - All programs must define exactly one function named “run” which returns an integer (the program exit status) and takes no arguments. 20 | - Every expression should have a type (int, float, etc). When printing the AST, the type of each expression should be part of the AST nodes for each expression. 21 | 5. IR.py recursively travels the AST and uses llvmlite to build the intermediate representation. Llvmlite is a lightweight LLVM-Python binding. https://llvmlite.readthedocs.io/en/latest/index.html 22 | 6. llvm_binder.py: actually binds, compiles, and executes the IR. It injects LLVM IR code into the IR module string for the print function. It also optimizes the compilation of the code. 23 | 24 | ## short__llvmlite_examples 25 | The folder contains short self-contained examples/tutorials of llvmlite code. It does not generate the IR by recurisvely going through the AST, but instead builds it up manually line by line. It includes llvmlite examples of creating arrays, 'for' loops, and 'while' loops. 26 | 27 | #### array_example.py 28 | It goes over a simple example where it creates an array and accesses a value from it. Creates the IR representation of the below code. 29 | 30 | ``` 31 | def int main() 32 | array = [3,5,8]; 33 | return array[1]; 34 | ``` 35 | 36 | #### array_example_with_variables.py 37 | It goes over a slightly different way to make arrays, especially when the array values are only known at run time. Creates the IR representation of the below code. 38 | ``` 39 | def int main() 40 | int x = 3; 41 | int y = x * 2 -1; 42 | array = [x,y,8]; 43 | return array[1]; 44 | ``` 45 | 46 | #### for_loop_example.py 47 | Llvmlite example of creating the llvm IR of a 'while' loop. Very similar to a 'while' loop. It also uses a symbol table to keep track of the 'i' values and array. Creates the IR representation of the below code. 48 | 49 | ``` 50 | def int_array_length_3 main() 51 | array = [3,5,8] 52 | 53 | for (int i = 0; i < 3; i++) 54 | { 55 | array[i] = array[i] + 1 56 | } 57 | return array; 58 | ``` 59 | 60 | #### while_loop_example.py 61 | Llvmlite example of creating the llvm IR of a 'while' loop. Creates the IR representation of the below code. 62 | ``` 63 | def int main() 64 | int x = 3; 65 | int i = 1; 66 | 67 | while (i < 5){ 68 | x = x * 2; 69 | i = i + 1; 70 | } 71 | 72 | return x; 73 | 74 | ``` 75 | -------------------------------------------------------------------------------- /short_llvmlite_examples/array_example.py: -------------------------------------------------------------------------------- 1 | from llvmlite import ir 2 | import llvmlite 3 | import llvmlite.binding as llvm 4 | from ctypes import CFUNCTYPE, c_int, c_float 5 | 6 | 7 | 8 | llvm.initialize() 9 | llvm.initialize_native_target() 10 | llvm.initialize_native_asmprinter() 11 | 12 | ##################### 13 | # The script generates the IR, executes the following code using llvmlite, and returns the value 5: 14 | 15 | # def int main() 16 | # array = [3,5,8]; 17 | # return array[1]; 18 | 19 | ###################################### 20 | # generate the IR code 21 | i32 = ir.IntType(32) 22 | f32 = ir.FloatType() 23 | 24 | #make a module 25 | module = ir.Module(name = "array_example") 26 | 27 | # define function parameters for function "main" 28 | return_type = i32 #return void 29 | argument_types = list() #can add ir.IntType(#), ir.FloatType() for arguments 30 | func_name = "main" 31 | 32 | #make a function 33 | fnty = ir.FunctionType(return_type, argument_types) 34 | main_func = ir.Function(module, fnty, name=func_name) 35 | 36 | # append basic block named 'entry', and make builder 37 | # blocks generally have 1 entry and exit point, with no branches within the block 38 | block = main_func.append_basic_block('entry') 39 | builder = ir.IRBuilder(block) 40 | 41 | # define array with length of 3 and type of i32 42 | # arrays can't have different types within it 43 | array_example = [3,5,8] 44 | array_type = ir.ArrayType(i32, len(array_example)) #According to documentation, the second argument has to be an Python Integer. It can't be ir.Constant(i32, 3) for example. 45 | arr = ir.Constant(array_type, array_example) 46 | ptr = builder.alloca(array_type) #allocate memory 47 | builder.store(arr, ptr) 48 | 49 | #to obtain these values. Let's say we want to get index 1 50 | int_0 = ir.Constant(i32, 0) 51 | index1 = ir.Constant(i32, 1) 52 | 53 | #allocate for the number 1 54 | ptr_arg = builder.alloca(i32) 55 | builder.store(index1, ptr_arg) 56 | value = builder.load(ptr_arg) 57 | 58 | #the address of array[index] that we want 59 | address = builder.gep(ptr, [int_0,value]) #you need int_0 60 | # I would avoid using IRbuilder.extract_value(agg, index), because the index has to be a Python integer, 61 | # and not a loaded value from the IR representation unlike IRbuider.gep. For example... 62 | 63 | # variable = a *b+ 1 64 | # array = [1,2,3] 65 | # array[variable] can't use extract_value because you don't know what value 'variable' 66 | # is until run time, and extract_value uses python integer 67 | # array[0] can use extract_value, because you know at compile time that the index is '0' and can use the python integer '0' 68 | 69 | 70 | # we return this value 71 | builder.ret(builder.load(address)) 72 | #End of IR generation 73 | ############################ 74 | #Excute the generated IR without any optimizations. Nothing special is required. 75 | llvm_ir_parsed = llvm.parse_assembly(str(module)) 76 | llvm_ir_parsed.verify() 77 | 78 | # JIT 79 | target_machine = llvm.Target.from_default_triple().create_target_machine() 80 | engine = llvm.create_mcjit_compiler(llvm_ir_parsed, target_machine) 81 | engine.finalize_object() 82 | 83 | #Run the function with name func_name. This is why it makes sense to have a 'main' function that calls other functions. 84 | entry = engine.get_function_address(func_name) 85 | cfunc = CFUNCTYPE(c_int)(entry) 86 | result = cfunc() 87 | 88 | print('The llvm IR generated is:') 89 | print(module) 90 | print() 91 | print(f'It returns {result}') 92 | 93 | #the result printed out is 94 | 95 | # The llvm IR generated is: 96 | # ; ModuleID = "array_example" 97 | # target triple = "unknown-unknown-unknown" 98 | # target datalayout = "" 99 | 100 | # define i32 @"main"() 101 | # { 102 | # entry: 103 | # %".2" = alloca [3 x i32] 104 | # store [3 x i32] [i32 3, i32 5, i32 8], [3 x i32]* %".2" 105 | # %".4" = alloca i32 106 | # store i32 1, i32* %".4" 107 | # %".6" = load i32, i32* %".4" 108 | # %".7" = getelementptr [3 x i32], [3 x i32]* %".2", i32 0, i32 %".6" 109 | # %".8" = load i32, i32* %".7" 110 | # ret i32 %".8" 111 | # } 112 | 113 | 114 | 115 | # It returns 5 116 | -------------------------------------------------------------------------------- /llvm_binder.py: -------------------------------------------------------------------------------- 1 | import llvmlite.binding as llvm 2 | from ctypes import CFUNCTYPE, c_int, c_float 3 | 4 | def inject_built_in(module): 5 | built_in = 'define void @"printFloat"(float) #0 { %2 = alloca float, align 4 store float %0, float* %2, align 4 %3 = load float, float* %2, align 4 %4 = fpext float %3 to double %5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double %4) ret void}define void @"printInt"(i32) #0 { %2 = alloca i32, align 4 store i32 %0, i32* %2, align 4 %3 = load i32, i32* %2, align 4 %4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i32 0, i32 0), i32 %3) ret void}define void @"printString"(i8*) #0 { %2 = alloca i8*, align 8 store i8* %0, i8** %2, align 8 %3 = load i8*, i8** %2, align 8 %4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i32 0, i32 0), i8* %3) ret void}' 6 | string_declare = '''@.str = private unnamed_addr constant [4 x i8] c"%f\\0A\\00", align 1@.str.1 = private unnamed_addr constant [4 x i8] c"%d\\0A\\00", align 1@.str.2 = private unnamed_addr constant [4 x i8] c"%s\\0A\\00", align 1''' 7 | strings = break_run(str(module)) 8 | return string_declare + strings[0] + built_in + strings[1] 9 | 10 | 11 | def break_run(module_string): 12 | module_string = module_string.replace('declare void @"printFloat"(float %".1")', "") 13 | module_string = module_string.replace('declare void @"printInt"(i32 %".1")', "") 14 | module_string = module_string.replace('declare void @"printString"(i8* %".1")', "") 15 | 16 | index = module_string.index('define i32 @"run"()') 17 | results = [module_string[0:index], module_string[index:]] 18 | return results 19 | 20 | 21 | def bind(module, *args, optimize = False): 22 | module = inject_built_in(module) 23 | 24 | llvm_ir_parsed = llvm.parse_assembly(str(module)) 25 | if False: 26 | #general way of optimizing 27 | # print("from optimize") 28 | pmb = llvm.create_pass_manager_builder() 29 | pmb.opt_level = 3 30 | 31 | fpm = llvm.create_function_pass_manager(llvm_ir_parsed) 32 | pmb.populate(fpm) 33 | 34 | pm = llvm.create_module_pass_manager() 35 | pmb.populate(pm) 36 | a = pm.run(llvm_ir_parsed) 37 | # print(f'something was optimized {a}') 38 | 39 | 40 | #################################################################### 41 | if optimize: 42 | #more specific way of optimizing 43 | opt_manager = llvm.PassManagerBuilder() 44 | mod_manager = llvm.ModulePassManager() 45 | 46 | mod_manager.add_constant_merge_pass() 47 | mod_manager.add_dead_arg_elimination_pass() 48 | mod_manager.add_function_inlining_pass(225) 49 | mod_manager.add_global_dce_pass() 50 | mod_manager.add_global_optimizer_pass() 51 | mod_manager.add_ipsccp_pass() 52 | mod_manager.add_dead_code_elimination_pass() 53 | mod_manager.add_cfg_simplification_pass() 54 | mod_manager.add_gvn_pass() 55 | mod_manager.add_instruction_combining_pass() 56 | mod_manager.add_licm_pass() 57 | mod_manager.add_sccp_pass() 58 | mod_manager.add_type_based_alias_analysis_pass() 59 | mod_manager.add_basic_alias_analysis_pass() 60 | 61 | mod_manager.run(llvm_ir_parsed) 62 | 63 | #################################################################### 64 | 65 | llvm_ir_parsed.verify() 66 | 67 | 68 | # JIT 69 | target_machine = llvm.Target.from_default_triple().create_target_machine() 70 | engine = llvm.create_mcjit_compiler(llvm_ir_parsed, target_machine) 71 | engine.finalize_object() 72 | 73 | entry = engine.get_function_address("run") 74 | 75 | # arg_types = [] 76 | # for arg in args: 77 | # if type(arg) == int: 78 | # arg_types.append(c_int) 79 | # elif type(arg) == float: 80 | # arg_types.append(c_float) 81 | # 82 | cfunc = CFUNCTYPE(c_int)(entry) 83 | # if len(arg_types) != 0: 84 | # cfunc = CFUNCTYPE(*arg_types)(entry) 85 | 86 | # arg_values = [] 87 | # for arg in args: 88 | # if type(arg) == int: 89 | # arg_values.append(arg) 90 | # elif type(arg) == float: 91 | # arg_values.append(c_float(arg)) 92 | 93 | # result = cfunc(*arg_values) 94 | result = cfunc() 95 | print() 96 | print("program returns: {}".format(result)) 97 | return llvm_ir_parsed 98 | -------------------------------------------------------------------------------- /input.ast.yaml: -------------------------------------------------------------------------------- 1 | funcList: 2 | fib: int 3 | inc: void 4 | run: int 5 | funcs: 6 | funcs: 7 | - blk: 8 | contents: 9 | name: stmts 10 | stmts: 11 | - cond: 12 | lhs: 13 | name: varval 14 | type: int 15 | var: n 16 | name: binop 17 | op: lt 18 | rhs: 19 | name: lit 20 | type: int 21 | value: 1 22 | type: int 23 | name: if 24 | stmt: 25 | exp: 26 | name: lit 27 | type: int 28 | value: 0 29 | knownVariables: 30 | n: int 31 | name: ret 32 | - cond: 33 | lhs: 34 | name: varval 35 | type: int 36 | var: n 37 | name: binop 38 | op: eq 39 | rhs: 40 | name: lit 41 | type: int 42 | value: 1 43 | type: int 44 | name: if 45 | stmt: 46 | exp: 47 | name: lit 48 | type: int 49 | value: 1 50 | knownVariables: 51 | n: int 52 | name: ret 53 | - exp: 54 | globid: fib 55 | name: funccall 56 | params: 57 | exps: 58 | - lhs: 59 | name: varval 60 | type: int 61 | var: n 62 | name: binop 63 | op: sub 64 | rhs: 65 | name: lit 66 | type: int 67 | value: 1 68 | type: int 69 | type: int 70 | name: vardeclstmt 71 | vdecl: 72 | node: vdecl 73 | type: int 74 | var: a 75 | - exp: 76 | globid: fib 77 | name: funccall 78 | params: 79 | exps: 80 | - lhs: 81 | name: varval 82 | type: int 83 | var: n 84 | name: binop 85 | op: sub 86 | rhs: 87 | name: lit 88 | type: int 89 | value: 2 90 | type: int 91 | type: int 92 | name: vardeclstmt 93 | vdecl: 94 | node: vdecl 95 | type: int 96 | var: b 97 | - exp: 98 | lhs: 99 | name: varval 100 | type: int 101 | var: a 102 | name: binop 103 | op: add 104 | rhs: 105 | name: varval 106 | type: int 107 | var: b 108 | type: int 109 | name: ret 110 | knownVariables: 111 | a: int 112 | b: int 113 | n: int 114 | name: blk 115 | globid: fib 116 | name: func 117 | ret_type: int 118 | vdecls: 119 | name: vdecls 120 | vars: 121 | - node: vdecl 122 | type: int 123 | var: n 124 | - blk: 125 | contents: 126 | name: stmts 127 | stmts: 128 | - exp: 129 | exp: 130 | lhs: 131 | name: varval 132 | type: int 133 | var: n 134 | name: binop 135 | op: add 136 | rhs: 137 | name: lit 138 | type: int 139 | value: 1 140 | type: int 141 | name: assign 142 | type: int 143 | var: n 144 | name: expstmt 145 | knownVariables: 146 | n: int 147 | name: blk 148 | globid: inc 149 | name: func 150 | ret_type: void 151 | vdecls: 152 | name: vdecls 153 | vars: 154 | - node: vdecl 155 | type: ref int 156 | var: n 157 | - blk: 158 | contents: 159 | name: stmts 160 | stmts: 161 | - exp: 162 | globid: fib 163 | name: funccall 164 | params: 165 | exps: 166 | - name: lit 167 | type: int 168 | value: 7 169 | type: int 170 | name: vardeclstmt 171 | vdecl: 172 | node: vdecl 173 | type: int 174 | var: a 175 | - exp: 176 | name: varval 177 | type: int 178 | var: a 179 | name: ret 180 | knownVariables: 181 | a: int 182 | name: blk 183 | globid: run 184 | name: func 185 | ret_type: int 186 | name: funcs 187 | name: prog 188 | -------------------------------------------------------------------------------- /short_llvmlite_examples/while_loop_example.py: -------------------------------------------------------------------------------- 1 | from llvmlite import ir 2 | import llvmlite 3 | import llvmlite.binding as llvm 4 | from ctypes import CFUNCTYPE, c_int, c_float 5 | 6 | 7 | 8 | llvm.initialize() 9 | llvm.initialize_native_target() 10 | llvm.initialize_native_asmprinter() 11 | 12 | ##################### 13 | # The script generates the IR, executes the following code using llvmlite, and returns the value 48: 14 | 15 | # def int main() 16 | # int x = 3; 17 | # int i = 1; 18 | 19 | # while (i < 5){ 20 | # x = x * 2; 21 | # i = i + 1; 22 | # } 23 | 24 | # return x; 25 | 26 | # generate the IR code 27 | ###################################### 28 | # the initial part: define module, function, basic block, etc 29 | 30 | i32 = ir.IntType(32) #integer with 32 bits 31 | 32 | #make a module 33 | module = ir.Module(name = "array_example") 34 | 35 | # define function parameters for function "main" 36 | return_type = i32 #return int 37 | argument_types = list() #can add ir.IntType(#), ir.FloatType() for arguments 38 | func_name = "main" 39 | 40 | #make a function 41 | fnty = ir.FunctionType(return_type, argument_types) 42 | main_func = ir.Function(module, fnty, name=func_name) 43 | 44 | # append basic block named 'entry', and make builder 45 | # blocks generally have 1 entry and exit point, with no branches within the block 46 | block = main_func.append_basic_block('entry') 47 | builder = ir.IRBuilder(block) 48 | 49 | 50 | ######################################## 51 | # symbol table generation, key = variable name, value = pointer 52 | 53 | x_value = ir.Constant(i32, 3) #create the values 54 | i_value = ir.Constant(i32, 1) 55 | x_pointer = builder.alloca(i32) #create the addresses 56 | i_pointer = builder.alloca(i32) 57 | builder.store(x_value, x_pointer) #store those values at those addresses 58 | builder.store(i_value, i_pointer) 59 | 60 | symbol_table ={"x":x_pointer, "i":i_pointer} 61 | 62 | ########################################## 63 | # while loop. 64 | 65 | w_body_block = builder.append_basic_block("w_body") 66 | w_after_block = builder.append_basic_block("w_after") 67 | 68 | # head 69 | # initial checking of while (i < 5) 70 | constant_5 = ir.Constant(i32, 5) 71 | current_i_value = builder.load(symbol_table["i"]) #loads the value of i_pointer 72 | cond_head = builder.icmp_signed('<', current_i_value, constant_5, name="lt") #returns boolean, which is ir.IntType(1) 73 | 74 | #for the first checking of (i<5), it could go straight from the the head to w_after_block 75 | # if i is already greater than 5. It needs to check whether to start the loop at all. 76 | builder.cbranch(cond_head, w_body_block, w_after_block) 77 | 78 | # body 79 | builder.position_at_start(w_body_block) 80 | current_x_value = builder.load(symbol_table["x"]) 81 | current_i_value = builder.load(symbol_table["i"]) 82 | 83 | # x = x * 2 84 | # i = i + 1 85 | new_x_value = builder.mul(current_x_value, ir.Constant(i32, 2), name='mul') 86 | new_i_value = builder.add(current_i_value, ir.Constant(i32,1), name="add") 87 | builder.store(new_x_value, symbol_table["x"]) #store the new x value at the x pointer 88 | builder.store(new_i_value, symbol_table["i"]) 89 | 90 | #at the end of the w_body_block, you need to check i < 5 again, because there's a branch possibility 91 | # if true, it returns to the top of the w_body_block. If false, it exits the loop 92 | cond_body = builder.icmp_signed('<', new_i_value, constant_5, name="lt") 93 | builder.cbranch(cond_body, w_body_block, w_after_block) 94 | # after 95 | builder.position_at_start(w_after_block) 96 | 97 | ############################## 98 | # return x 99 | x_address = symbol_table["x"] 100 | x_value = builder.load(x_address) 101 | # we return this value 102 | 103 | builder.ret(x_value) 104 | 105 | #End of IR generation 106 | ############################ 107 | # Excute the generated IR without any optimizations. Nothing special is required in this part. 108 | llvm_ir_parsed = llvm.parse_assembly(str(module)) 109 | llvm_ir_parsed.verify() 110 | 111 | # JIT 112 | target_machine = llvm.Target.from_default_triple().create_target_machine() 113 | engine = llvm.create_mcjit_compiler(llvm_ir_parsed, target_machine) 114 | engine.finalize_object() 115 | 116 | #Run the function with name func_name. This is why it makes sense to have a 'main' function that calls other functions. 117 | entry = engine.get_function_address(func_name) 118 | cfunc = CFUNCTYPE(c_int)(entry) 119 | result = cfunc() 120 | 121 | print('The llvm IR generated is:') 122 | print(module) 123 | print() 124 | print(f'It returns {result}') 125 | 126 | 127 | # The llvm IR generated is: 128 | # ; ModuleID = "array_example" 129 | # target triple = "unknown-unknown-unknown" 130 | # target datalayout = "" 131 | 132 | # define i32 @"main"() 133 | # { 134 | # entry: 135 | # %".2" = alloca i32 136 | # %".3" = alloca i32 137 | # store i32 3, i32* %".2" 138 | # store i32 1, i32* %".3" 139 | # %".6" = load i32, i32* %".3" 140 | # %"lt" = icmp slt i32 %".6", 5 141 | # br i1 %"lt", label %"w_body", label %"w_after" 142 | # w_body: 143 | # %".8" = load i32, i32* %".2" 144 | # %".9" = load i32, i32* %".3" 145 | # %"mul" = mul i32 %".8", 2 146 | # %"add" = add i32 %".9", 1 147 | # store i32 %"mul", i32* %".2" 148 | # store i32 %"add", i32* %".3" 149 | # %"lt.1" = icmp slt i32 %"add", 5 150 | # br i1 %"lt.1", label %"w_body", label %"w_after" 151 | # w_after: 152 | # %".13" = load i32, i32* %".2" 153 | # ret i32 %".13" 154 | # } 155 | 156 | 157 | # It returns 48 -------------------------------------------------------------------------------- /short_llvmlite_examples/array_example_with_variables.py: -------------------------------------------------------------------------------- 1 | from llvmlite import ir 2 | import llvmlite 3 | import llvmlite.binding as llvm 4 | from ctypes import CFUNCTYPE, c_int, c_float 5 | 6 | 7 | 8 | llvm.initialize() 9 | llvm.initialize_native_target() 10 | llvm.initialize_native_asmprinter() 11 | 12 | ##################### 13 | # The script generates the IR, executes the following code using llvmlite, and returns the value 5: 14 | 15 | # def int main() 16 | # int x = 3; 17 | # int y = x * 2 -1; 18 | # array = [x,y,8]; 19 | # return array[1]; 20 | 21 | ###################################### 22 | # generate the IR code 23 | # the initial part: define module, function, basic block, etc 24 | 25 | i32 = ir.IntType(32) #integer with 32 bits 26 | 27 | #make a module 28 | module = ir.Module(name = "array_example") 29 | 30 | # define function parameters for function "main" 31 | return_type = i32 #return int 32 | argument_types = list() #can add ir.IntType(#), ir.FloatType() for arguments 33 | func_name = "main" 34 | 35 | #make a function 36 | fnty = ir.FunctionType(return_type, argument_types) 37 | main_func = ir.Function(module, fnty, name=func_name) 38 | 39 | # append basic block named 'entry', and make builder 40 | # blocks generally have 1 entry and exit point, with no branches within the block 41 | block = main_func.append_basic_block('entry') 42 | builder = ir.IRBuilder(block) 43 | 44 | ###################################### 45 | #alternative method - allocate memory for array 46 | array_type = ir.ArrayType(i32, 3) #3 integers of bit 32 47 | array_pointer = builder.alloca(array_type) #pointer to array 48 | #no values have been stored yet 49 | 50 | i32_0 = ir.Constant(i32, 0) 51 | i32_1 = ir.Constant(i32, 1) 52 | i32_2 = ir.Constant(i32, 2) 53 | 54 | 55 | #can also just loop through to get this 56 | pointer_to_index_0 = builder.gep(array_pointer, [i32_0, i32_0]) #gets address of array[0] 57 | pointer_to_index_1 = builder.gep(array_pointer, [i32_0, i32_1]) #gets address of array[1] 58 | pointer_to_index_2 = builder.gep(array_pointer, [i32_0, i32_2]) #gets address of array[2] 59 | 60 | # then you can just store whatever value you want using the pointer directly to the array 61 | # builder.store(value,pointer_to_index), as opposed to using builder.alloca(i32) for an individual value 62 | 63 | 64 | 65 | 66 | ####################################### 67 | # Example using symbol table, and assigning value to certain indices 68 | 69 | # For the variable stuff, we don't define a variable 'x' in the IR. 70 | # Instead, we can keep track of it in a symbol table where key = variable_name, value = IR pointer 71 | # It is a bit overkill to use a symbol table for this code, but symbol tables can be used to maintain 72 | # information about scope 73 | symbol_table = {} 74 | #key = variable name, value = pointer 75 | 76 | # int x = 3 77 | symbol_table["x"] = pointer_to_index_0 #add variable 'x' and its pointer to symbol table 78 | x_value = ir.Constant(i32, 3) # 79 | builder.store(x_value, pointer_to_index_0) #store the value i32 3 to array[0] 80 | 81 | #int y, allocate memory, add it to symbol table, etc 82 | symbol_table["y"] = pointer_to_index_1 83 | 84 | # below does x * 2 85 | rhs = builder.load(symbol_table["x"]) #get the pointer from the variable "x" 86 | lhs = ir.Constant(i32, 2) 87 | y_value = builder.mul(lhs,rhs, name = 'mul') 88 | # subtracts 1 from x*2 89 | rhs = y_value 90 | lhs = ir.Constant(i32, 1) 91 | y_value = builder.sub(rhs, lhs, name ='sub') 92 | 93 | #store the y_value to index 1 94 | builder.store(y_value, pointer_to_index_1) 95 | 96 | #allocate space for the constant 8 97 | value_8 = ir.Constant(i32, 8) 98 | builder.store(value_8, pointer_to_index_2) 99 | 100 | ###################################### 101 | # getting the value of a certain index, in this case, index 1 102 | 103 | address = builder.gep(array_pointer, [i32_0,i32_1]) 104 | value = builder.load(address) 105 | # we return this value 106 | 107 | builder.ret(value) 108 | 109 | 110 | #End of IR generation 111 | ############################ 112 | # Excute the generated IR without any optimizations. Nothing special is required in this part. 113 | llvm_ir_parsed = llvm.parse_assembly(str(module)) 114 | llvm_ir_parsed.verify() 115 | 116 | # JIT 117 | target_machine = llvm.Target.from_default_triple().create_target_machine() 118 | engine = llvm.create_mcjit_compiler(llvm_ir_parsed, target_machine) 119 | engine.finalize_object() 120 | 121 | #Run the function with name func_name. This is why it makes sense to have a 'main' function that calls other functions. 122 | entry = engine.get_function_address(func_name) 123 | cfunc = CFUNCTYPE(c_int)(entry) 124 | result = cfunc() 125 | 126 | print('The llvm IR generated is:') 127 | print(module) 128 | print() 129 | print(f'It returns {result}') 130 | 131 | # The llvm IR generated is: 132 | # ; ModuleID = "array_example" 133 | # target triple = "unknown-unknown-unknown" 134 | # target datalayout = "" 135 | 136 | # define i32 @"main"() 137 | # { 138 | # entry: 139 | # %".2" = alloca [3 x i32] 140 | # %".3" = getelementptr [3 x i32], [3 x i32]* %".2", i32 0, i32 0 141 | # %".4" = getelementptr [3 x i32], [3 x i32]* %".2", i32 0, i32 1 142 | # %".5" = getelementptr [3 x i32], [3 x i32]* %".2", i32 0, i32 2 143 | # store i32 3, i32* %".3" 144 | # %".7" = load i32, i32* %".3" 145 | # %"mul" = mul i32 2, %".7" 146 | # %"sub" = sub i32 %"mul", 1 147 | # store i32 %"sub", i32* %".4" 148 | # store i32 8, i32* %".5" 149 | # %".10" = getelementptr [3 x i32], [3 x i32]* %".2", i32 0, i32 1 150 | # %".11" = load i32, i32* %".10" 151 | # ret i32 %".11" 152 | # } 153 | 154 | 155 | # It returns 5 156 | 157 | -------------------------------------------------------------------------------- /short_llvmlite_examples/for_loop_example.py: -------------------------------------------------------------------------------- 1 | from llvmlite import ir 2 | import llvmlite 3 | import llvmlite.binding as llvm 4 | from ctypes import CFUNCTYPE, c_int, c_float 5 | 6 | 7 | 8 | llvm.initialize() 9 | llvm.initialize_native_target() 10 | llvm.initialize_native_asmprinter() 11 | 12 | ##################### 13 | # The script generates the IR, executes the following code using llvmlite, and returns [4,6,9] 14 | 15 | # def int_array_length_3 main() 16 | # array = [3,5,8] 17 | 18 | # for (int i = 0; i < 3; i++) 19 | # { 20 | # array[i] = array[i] + 1 21 | # } 22 | # return array; 23 | 24 | 25 | # generate the IR code 26 | ###################################### 27 | # the initial part: define module, function, basic block, etc 28 | 29 | i32 = ir.IntType(32) #integer with 32 bits 30 | i32_0 = ir.Constant(i32,0) 31 | i32_1 = ir.Constant(i32,1) 32 | i32_3 = ir.Constant(i32,3) 33 | 34 | #make a module 35 | module = ir.Module(name = "array_example") 36 | 37 | # define function parameters for function "main" 38 | return_type = ir.ArrayType(i32, 3) #the return type is an array with 3 32-bit integers 39 | 40 | argument_types = list() #can add ir.IntType(#), ir.FloatType() for arguments 41 | func_name = "main" 42 | 43 | #make a function 44 | fnty = ir.FunctionType(return_type, argument_types) 45 | main_func = ir.Function(module, fnty, name=func_name) 46 | 47 | # append basic block named 'entry', and make builder 48 | # blocks generally have 1 entry and exit point, with no branches within the block 49 | block = main_func.append_basic_block('entry') 50 | builder = ir.IRBuilder(block) 51 | 52 | 53 | ######################################## 54 | #array = [3,5,8] 55 | array_example = [3,5,8] 56 | array_type = ir.ArrayType(i32, 3) #According to documentation, the second argument has to be an Python Integer. It can't be ir.Constant(i32, 3) for example. 57 | arr = ir.Constant(array_type, array_example) 58 | ptr = builder.alloca(array_type) #allocate memory 59 | builder.store(arr, ptr) 60 | 61 | #add variable 'array' to the symbol table 62 | symbol_table = {"array":ptr} 63 | 64 | # 65 | for_body_block = builder.append_basic_block("for_body") 66 | for_after_block = builder.append_basic_block("for_after") 67 | 68 | #initiailize i = 0 69 | #for (int i = 0;...) part 70 | i_ptr = builder.alloca(i32) 71 | i_value = i32_0 72 | builder.store(i_value, i_ptr) #store the value 0 to the address allocated 73 | symbol_table["i"] = i_ptr 74 | 75 | #does the initial i <3; Since i = 0, this is trivial 76 | 77 | current_i_value = builder.load(symbol_table["i"]) 78 | cond_head = builder.icmp_signed('<', current_i_value, i32_3, name="lt") #returns boolean, which is ir.IntType(1) 79 | 80 | #branches depending on whether cond_head is true or false 81 | builder.cbranch(cond_head, for_body_block, for_after_block) 82 | builder.position_at_start(for_body_block) 83 | 84 | #array[i] = array[i] + 1 85 | current_i_value = builder.load(symbol_table["i"]) #gets value of i (0,1 or 2) 86 | array_i_pointer = builder.gep(symbol_table["array"], [i32_0,current_i_value]) #accesses array[i] 87 | array_i_value = builder.load(array_i_pointer) 88 | new_array_i_value = builder.add(array_i_value, i32_1, name="add") #array[i] + 1 89 | builder.store(new_array_i_value, array_i_pointer) #store the new value of array[i] 90 | 91 | 92 | #i++ 93 | new_i_value = builder.add(current_i_value, i32_1, name="add") 94 | builder.store(new_i_value, symbol_table["i"]) #store the new value of i at the i pointer 95 | 96 | #compare i < 3 97 | cond_body = builder.icmp_signed('<', new_i_value, i32_3, name="lt") 98 | builder.cbranch(cond_body, for_body_block, for_after_block) #iterate again if true, leave if false 99 | 100 | # after 101 | builder.position_at_start(for_after_block) 102 | ############################################## 103 | #after for loop 104 | #return array 105 | # array = builder.load(symbol_table["array"]) 106 | # builder.ret(array) 107 | 108 | 109 | # return x 110 | address = symbol_table["array"] 111 | value = builder.load(address) 112 | # we return this value 113 | 114 | builder.ret(value) 115 | 116 | 117 | 118 | #End of IR generation 119 | ############################ 120 | # Excute the generated IR without any optimizations. Nothing special is required in this part. 121 | llvm_ir_parsed = llvm.parse_assembly(str(module)) 122 | llvm_ir_parsed.verify() 123 | 124 | # JIT 125 | target_machine = llvm.Target.from_default_triple().create_target_machine() 126 | engine = llvm.create_mcjit_compiler(llvm_ir_parsed, target_machine) 127 | engine.finalize_object() 128 | 129 | #Run the function with name func_name. This is why it makes sense to have a 'main' function that calls other functions. 130 | entry = engine.get_function_address(func_name) 131 | cfunc = CFUNCTYPE(c_int)(entry) 132 | result = cfunc() 133 | 134 | print('The llvm IR generated is:') 135 | print(module) 136 | 137 | 138 | # The llvm IR generated is: 139 | # ; ModuleID = "array_example" 140 | # target triple = "unknown-unknown-unknown" 141 | # target datalayout = "" 142 | 143 | # define [3 x i32] @"main"() 144 | # { 145 | # entry: 146 | # %".2" = alloca [3 x i32] 147 | # store [3 x i32] [i32 3, i32 5, i32 8], [3 x i32]* %".2" 148 | # %".4" = alloca i32 149 | # store i32 0, i32* %".4" 150 | # %".6" = load i32, i32* %".4" 151 | # %"lt" = icmp slt i32 %".6", 3 152 | # br i1 %"lt", label %"for_body", label %"for_after" 153 | # for_body: 154 | # %".8" = load i32, i32* %".4" 155 | # %".9" = getelementptr [3 x i32], [3 x i32]* %".2", i32 0, i32 %".8" 156 | # %".10" = load i32, i32* %".9" 157 | # %"add" = add i32 %".10", 1 158 | # store i32 %"add", i32* %".9" 159 | # %"add.1" = add i32 %".8", 1 160 | # store i32 %"add.1", i32* %".4" 161 | # %"lt.1" = icmp slt i32 %"add.1", 3 162 | # br i1 %"lt.1", label %"for_body", label %"for_after" 163 | # for_after: 164 | # %".14" = load [3 x i32], [3 x i32]* %".2" 165 | # ret [3 x i32] %".14" 166 | # } 167 | -------------------------------------------------------------------------------- /analyzer.py: -------------------------------------------------------------------------------- 1 | from yaml import dump 2 | import copy 3 | 4 | 5 | def semanticsCheck(ast): 6 | errors = [] 7 | knownFunctions = {} 8 | 9 | vdeclVoidCheck(ast, errors) 10 | refVoidCheck(ast, errors) 11 | functionOrderCheck(ast, errors, knownFunctions) 12 | refInitializationCheck(ast, errors) 13 | funcitonRefTypeCheck(ast, errors) 14 | 15 | a = types(ast, knownFunctions) 16 | return errors 17 | 18 | 19 | def vdeclVoidCheck(ast, errorList): 20 | vdeclType = list(find('vdecl', ast)) 21 | for i in vdeclType: 22 | if i['type'] == 'void': 23 | errorList.append('error: In ​ ​ , the type may not be void.') 24 | 25 | 26 | def refVoidCheck(ast, errors): 27 | types = list(find('types', ast)) 28 | types.append(list(find('ret_type', ast))) 29 | types.append(list(find('type', ast))) 30 | flat_list = [item for sublist in types for item in sublist] 31 | for t in flat_list: 32 | if 'ref' in t and 'void' in t: 33 | errors.append("error: In the type may not be void or itself a reference type.") 34 | 35 | 36 | def functionOrderCheck(ast, errorList, knownFunctions): 37 | f = ast['funcs']['funcs'] 38 | if 'externs' in ast: 39 | if 'externs' in ast['externs']: 40 | externs = ast['externs']['externs'] 41 | for extern in externs: 42 | knownFunctions[extern['globid']] = extern['ret_type'] 43 | 44 | count_run = 0 # counts all the run functions 45 | 46 | for i in f: 47 | 48 | # checks to make sure run function doesn't have arguments 49 | if i['globid'] == "run": 50 | count_run = count_run + 1 51 | if "vdecls" in i: 52 | errorList.append("error: The 'run' function cannot have arguments.") 53 | 54 | # appends functions to a list in the order that they are found 55 | knownFunctions[i['globid']] = i['ret_type'] 56 | 57 | # goes through the function bulk in order, to see what functions are called. 58 | glob = list(find('globid', i)) 59 | for functionCall in glob: 60 | if functionCall not in knownFunctions: 61 | # print('function order is bad') 62 | errorList.append("error: All functions must be declared and/or defined before they are used") 63 | 64 | # ensures that there is a run function, and that it must be 'def int run' 65 | 66 | for i in knownFunctions: 67 | if i == "run": 68 | if knownFunctions[i] != "int": 69 | errorList.append("error: The 'run' function must return an int") 70 | 71 | # counts the number of run functions 72 | if count_run != 1: 73 | errorList.append("error: All programs must define exactly one function named 'run'") 74 | 75 | ast['funcList'] = knownFunctions 76 | 77 | 78 | def refInitializationCheck(ast, errors): 79 | stmts = list(find('stmts', ast)) 80 | flat_list = [item for sublist in stmts for item in sublist] 81 | for stmt in flat_list: 82 | if not stmt['name'] == 'vardeclstmt': 83 | continue 84 | if not 'ref' in stmt['vdecl']['type']: 85 | continue 86 | if stmt['exp']['name'] == 'lit': 87 | errors.append("error: The initialization expression for a reference variable (including " 88 | + "function arguments) must be a variable.") 89 | 90 | 91 | def funcitonRefTypeCheck(ast, errors): 92 | funcs = ast['funcs']['funcs'] 93 | for func in funcs: 94 | if not 'ref' in func['ret_type']: 95 | continue 96 | errors.append("error: A function may not return a ref type.") 97 | 98 | 99 | def find(key, dictionary): 100 | if not isinstance(dictionary, dict): 101 | return None 102 | for k, v in dictionary.items(): 103 | if k == key: 104 | yield v 105 | elif isinstance(v, dict): 106 | for result in find(key, v): 107 | yield result 108 | elif isinstance(v, list): 109 | for d in v: 110 | for result in find(key, d): 111 | yield result 112 | 113 | 114 | def types(ast, knownFunctions): 115 | f = ast['funcs']['funcs'] 116 | 117 | for i in ast['funcs']['funcs']: 118 | # adds all the function arguments and their types to the variable list 119 | knownVariables = {} 120 | if 'vdecls' in i: 121 | for j in i['vdecls']['vars']: 122 | knownVariables[j['var']] = j['type'] 123 | if 'ref' in j['type'] and 'noalias' in j['type']: 124 | knownVariables[j['var']] = j['type'][12:] 125 | elif 'ref' in j['type']: 126 | knownVariables[j['var']] = j['type'][4:] 127 | 128 | i['blk']['knownVariables'] = knownVariables 129 | blkRecurs(i['blk'], knownFunctions) 130 | 131 | return ast 132 | 133 | 134 | def stmtRecurs(stmt, knownFunctions, knownVariables): 135 | if 'vdecl' in stmt: 136 | vdecl = stmt['vdecl'] 137 | knownVariables[vdecl['var']] = vdecl['type'] 138 | if stmt['name'] in ['blk', 'while']: 139 | stmt['knownVariables'] = copy.deepcopy(knownVariables) 140 | blkRecurs(stmt, knownFunctions) 141 | elif stmt['name'] in ['if']: 142 | stmt['stmt']['knownVariables'] = copy.deepcopy(knownVariables) 143 | stmtRecurs(stmt['stmt'], knownFunctions, knownVariables) 144 | if 'else_stmt' in stmt: 145 | stmt['else_stmt']['knownVariables'] = copy.deepcopy(knownVariables) 146 | stmtRecurs(stmt['else_stmt'], knownFunctions, knownVariables) 147 | 148 | if 'cond' in stmt: 149 | recurs2(stmt['cond'], knownVariables, knownFunctions) 150 | return None 151 | if 'exp' not in stmt: # print slit; 152 | return None 153 | recurs2(stmt['exp'], knownVariables, knownFunctions) 154 | 155 | 156 | def blkRecurs(blk, knownFunctions): 157 | knownVariables = blk['knownVariables'] 158 | statements = list(find('stmts', blk)) 159 | flat_list = [item for sublist in statements for item in sublist] 160 | for stmt in flat_list: 161 | stmtRecurs(stmt, knownFunctions, knownVariables) 162 | 163 | 164 | # function arguments can be void and what not, or strings 165 | def recurs2(exp, knownVars, knownFunctions): 166 | if 'type' in exp: 167 | return exp['type'] 168 | 169 | if isinstance(exp, list): 170 | # suspecting this is probably never reached 171 | raise RuntimeError('please remove me and take a look if the code makes sense') 172 | for i in exp: 173 | recurs2(exp, knownVars, knownFunctions) 174 | 175 | if exp['name'] == 'slit': 176 | exp['type'] = 'slit' 177 | return 'slit' 178 | 179 | if 'assign' == exp['name']: 180 | t = recurs2(exp['exp'], knownVars, knownFunctions) 181 | exp['type'] = t 182 | knownVars[exp['var']] = t 183 | return t 184 | 185 | if 'var' in exp: 186 | exp['type'] = knownVars[exp['var']] 187 | return exp['type'] 188 | 189 | # if it's a function, exp = fib(). Get the return type from knownFunctions. 190 | if exp['name'] == 'funccall': 191 | functionName = exp['globid'] 192 | if functionName not in knownFunctions: 193 | raise RuntimeError('function name unknown: ' + functionName) 194 | exp['type'] = knownFunctions[functionName] 195 | if 'exps' not in exp['params']: 196 | return exp['type'] 197 | for paramExp in exp['params']['exps']: 198 | recurs2(paramExp, knownVars, knownFunctions) 199 | return exp['type'] 200 | 201 | # this should take care of logical negations 202 | if exp['name'] == 'uop': 203 | exp['type'] = recurs2(exp['exp'], knownVars, knownFunctions) 204 | return exp['type'] 205 | 206 | if exp["name"] == "binop": 207 | if 'type' not in exp['lhs']: 208 | left = recurs2(exp['lhs'], knownVars, knownFunctions) 209 | if 'type' not in exp['rhs']: 210 | right = recurs2(exp['rhs'], knownVars, knownFunctions) 211 | exp['type'] = calculateType(exp['lhs'], exp['rhs']) 212 | # logical binary operators return boolean values, which in our languages are ints 213 | bo = exp['op'] 214 | if bo == 'eq' or bo == 'lt' or bo == 'gt' or bo == 'logAnd' or bo == 'logOr': 215 | exp['type'] = 'int' 216 | 217 | return exp['type'] 218 | 219 | # if 'type' in exp['lhs'] and 'type' in exp['rhs']: 220 | # exp['type'] = calculateType(exp['lhs'], exp['rhs']) 221 | # 222 | # # logical binary operators return boolean values, which in our languages are ints 223 | # bo = exp['op'] 224 | # if bo == 'eq' or bo == 'lt' or bo == 'gt' or bo == 'logAnd' or bo == 'logOr': 225 | # exp['type'] = 'int' 226 | # 227 | # return exp['type'] 228 | # 229 | # 230 | # 231 | # if 'type' not in exp['lhs']: 232 | # exp['type'] = recurs2(exp['lhs'], knownVars, knownFunctions) 233 | # return exp['type'] 234 | # if 'type' not in exp['rhs']: 235 | # exp['type'] = recurs2(exp['rhs'], knownVars, knownFunctions) 236 | # return exp['type'] 237 | 238 | 239 | def calculateType(lhs, rhs): 240 | if lhs['type'] == 'float' or rhs['type'] == 'float': 241 | lhs['type'] = 'float' 242 | rhs['type'] = 'float' 243 | return 'float' 244 | elif lhs['type'] == 'sfloat' or rhs['type'] == 'sfloat': 245 | lhs['type'] = 'sfloat' 246 | rhs['type'] = 'sfloat' 247 | return 'sfloat' 248 | elif lhs['type'] == 'int' or rhs['type'] == 'int': 249 | lhs['type'] = 'int' 250 | rhs['type'] = 'int' 251 | return 'int' 252 | elif lhs['type'] == 'cint' or rhs['type'] == 'cint': 253 | lhs['type'] = 'cint' 254 | rhs['type'] = 'cint' 255 | return 'cint' 256 | return "undefined" 257 | -------------------------------------------------------------------------------- /lexerAndParser.py: -------------------------------------------------------------------------------- 1 | import ply.lex as lex 2 | import ply.yacc as yacc 3 | import sys 4 | from constants import * 5 | 6 | 7 | names ={} 8 | RESERVED = { 9 | 'if' : 'If', 10 | 'return' : 'Return', 11 | 'while' : 'While', 12 | 'else' : 'Else', 13 | 'print' : 'Print', 14 | 'def' : 'DEF', 15 | 'int' : 'int', 16 | 'cint' : 'cint', 17 | 'float' : 'float', 18 | 'sfloat' : 'sfloat', 19 | 'void' : 'void', 20 | 'ref' : 'ref', 21 | 'noalias' : 'noalias', 22 | 'extern' : 'EXTERN' 23 | } 24 | 25 | tokens = [ 26 | 'lit', 27 | 'slit', 28 | 29 | 'PLUS', 30 | 'Minus', 31 | 'Multiply', 32 | 'Divide', 33 | 'Equal', 34 | 35 | 'Equality', 36 | 'lessThan', 37 | 'greaterThan', 38 | 'logicalOr', 39 | 'logicalAnd', 40 | 'logicalNegation', 41 | 42 | 'LParen', 43 | 'RParen', 44 | 'LBracket', 45 | 'RBracket', 46 | 'COMMA', 47 | 48 | 'newline', 49 | 'var', 50 | 'GLOBID', 51 | 'Comment', 52 | 'Semicolon' 53 | ] + list(RESERVED.values()) 54 | 55 | 56 | # Regular expression rules for simple tokens 57 | t_PLUS = r"\+" 58 | t_Minus = r'-' 59 | t_Multiply = r'\*' 60 | t_Divide = r'/' 61 | 62 | t_Equality = r'==' 63 | t_Equal = r'=' 64 | 65 | t_lessThan = r'<' 66 | t_greaterThan = r'>' 67 | t_logicalOr = r'\|\|' 68 | t_logicalNegation = r'!' 69 | t_logicalAnd = r'&&' 70 | 71 | t_LParen = r'\(' 72 | t_RParen = r'\)' 73 | t_LBracket = r'{' 74 | t_RBracket = r'}' 75 | t_COMMA = r',' 76 | 77 | t_Semicolon = r';' 78 | 79 | # t_slit = r'"[^"]*"' 80 | 81 | def t_slit(t): 82 | r'"[^"]*"' 83 | t.value = t.value[1:-1] 84 | return t 85 | 86 | 87 | ############## comment ############## 88 | def t_Comment(t): 89 | r'\#.*' 90 | # print(t.value + 'ignored') 91 | pass 92 | 93 | ############## var ############## 94 | def t_var(t): 95 | r'[$][\s]*[a-zA-Z_][a-zA-Z0-9_]*' 96 | t.value = t.value[1:].strip() 97 | return t 98 | 99 | ############## lit ############## 100 | # Check for reserved words 101 | def t_lit(t): 102 | r'[0-9]+(\.[0-9]+)?' 103 | t.value = t.value.replace(" ", "") 104 | if '.' in t.value: 105 | t.value = float(t.value) 106 | else: 107 | t.value = int(t.value) 108 | t.type = 'lit' 109 | return t 110 | 111 | ############## globid ############## 112 | def t_GLOBID(t): 113 | r'[a-zA-Z_][a-zA-Z_0-9]*' 114 | t.type = RESERVED.get(t.value, "GLOBID") 115 | return t 116 | #code for t_ID gotten from http://www.dabeaz.com/ply/ply.html#ply_nn3 117 | 118 | 119 | # Define a rule so we can track line numbers 120 | def t_newline(t): 121 | r'\n+' 122 | t.lexer.lineno += len(t.value) 123 | pass 124 | 125 | #code from t_ignore to while loop is from dabeaz 126 | t_ignore = ' \t' 127 | # r'[ \t]+ 128 | 129 | # Error handling rule 130 | def t_error(t): 131 | print('Illegal character: ' + t.value[0]) 132 | t.lexer.skip(1) 133 | 134 | # Build the lexer 135 | lexer = lex.lex() 136 | 137 | 138 | ################################################# 139 | # parsing # 140 | ################################################# 141 | 142 | ############### prog ############## 143 | def p_prog(p): 144 | '''prog : funcs 145 | | externs funcs''' 146 | if len(p) == 2: 147 | p[0] = {name: prog, funcs: p[1]} 148 | else: 149 | p[0] = {name: prog, funcs: p[2], externs: p[1]} 150 | 151 | def p_externs(p): 152 | '''externs : extern 153 | | extern externs''' 154 | if len(p) == 2: 155 | p[0] = {name: externs, externs:[p[1]]} 156 | else: 157 | appendByKey(p[2], externs, p[1]) 158 | p[0] = p[2] 159 | 160 | def p_funcs(p): 161 | '''funcs : func 162 | | func funcs''' 163 | if len(p) == 2: 164 | p[0] = {name: funcs, funcs:[p[1]]} 165 | else: 166 | appendByKey(p[2], funcs, p[1]) 167 | p[0] = p[2] 168 | 169 | ############### extern ############## 170 | def p_extern(p): 171 | '''extern : EXTERN TYPE GLOBID LParen RParen Semicolon''' 172 | p[0] = {name: extern, ret_type: p[2], globid:p[3]} 173 | 174 | def p_externWithTypes(p): 175 | '''extern : EXTERN TYPE GLOBID LParen tdecls RParen Semicolon''' 176 | p[0] = {name: extern, ret_type: p[2], globid:p[3], tdecls: p[5]} 177 | 178 | # ############## func ############## 179 | def p_func(p): 180 | '''func : DEF TYPE GLOBID LParen RParen blk''' 181 | p[0] = {name: func, ret_type: p[2], globid: p[3], blk: p[6]} 182 | 183 | def p_funcWithParams(p): 184 | '''func : DEF TYPE GLOBID LParen vdecls RParen blk''' 185 | p[0] = {name: func, ret_type: p[2], globid: p[3], vdecls: p[5], blk:p[7]} 186 | 187 | # ############## blk ############## 188 | def p_blk(p): 189 | '''blk : LBracket stmts RBracket''' 190 | p[0] = {name: blk, contents: p[2]} 191 | 192 | def p_blkEmpty(p): 193 | '''blk : LBracket RBracket''' 194 | p[0] = {name: blk} 195 | 196 | # ############# stmts ############## 197 | def p_statements(p): 198 | '''stmts : stmt 199 | | stmt stmts''' 200 | if len(p) == 2: 201 | p[0] = {name: stmts, stmts: [p[1]]} 202 | else : 203 | appendByKey(p[2], stmts, p[1]) 204 | p[0] = p[2] 205 | 206 | # ############# stmt ############## 207 | def p_blkStmt(p): 208 | '''stmt : blk''' 209 | p[0] = {name: blk, contents: p[1]} 210 | 211 | def p_return(p): 212 | '''stmt : Return Semicolon 213 | | Return exp Semicolon''' 214 | if len(p) == 4: 215 | p[0] = {name: ret, exp: p[2]} 216 | else: 217 | p[0] = {name: ret} 218 | 219 | 220 | def p_vdeclStmt(p): 221 | '''stmt : vdecl Equal exp Semicolon''' 222 | 223 | #because so far, we assume literals are either ints or floats 224 | #we need to account for cints and sfloats 225 | if "sfloat" in p[1]["type"]: 226 | p[3]["type"] = "sfloat" 227 | if "cint" in p[1]["type"]: 228 | p[3]["type"] = "cint" 229 | 230 | p[0] = {name: vardeclstmt, vdecl: p[1], exp: p[3]} 231 | 232 | def p_expSemi(p): 233 | '''stmt : exp Semicolon''' 234 | p[0] = {name: expstmt, exp: p[1]} 235 | 236 | def p_while(p): 237 | '''stmt : While LParen exp RParen stmt''' 238 | p[0] = {name: whileStmt, cond: p[3], stmt: p[5]} 239 | 240 | def p_if(p): 241 | '''stmt : If LParen exp RParen stmt 242 | | If LParen exp RParen stmt Else stmt''' 243 | if len(p) == 6: 244 | p[0] = {name: ifStmt, cond: p[3], stmt: p[5]} 245 | else: 246 | p[0] = {name: ifStmt, cond: p[3], stmt: p[5], else_stmt: p[7]} 247 | 248 | def p_print(p): 249 | '''stmt : Print exp Semicolon''' 250 | p[0] = {name : printStmt, exp : p[2]} 251 | 252 | # ############## exps ############## 253 | def p_exps(p): 254 | ''' exps : exp 255 | | exp COMMA exps''' 256 | if len(p) == 2: 257 | p[0] = {exps: [p[1]]} 258 | else: 259 | appendByKey(p[3], exps, p[1]) 260 | p[0] = p[3] 261 | 262 | def p_expParen(p): 263 | '''exp : LParen exp RParen''' 264 | p[0] = p[2] 265 | 266 | def p_exp(p): 267 | '''exp : lit''' 268 | if '.' in str(p[1]): 269 | p[0] = {name: litExp, value: p[1], typ: "sfloat"} 270 | else: 271 | p[0] = {name: litExp, value: p[1], typ: "int"} 272 | 273 | def p_slit(p): 274 | '''exp : slit''' 275 | p[0] = {name: slitExp, value: p[1], typ: "slit"} 276 | 277 | def p_expBinOpUop(p): 278 | '''exp : binop 279 | | uop''' 280 | p[0] = p[1] 281 | 282 | def p_var(p): 283 | '''exp : var''' 284 | p[0] = {name: varExp, var: p[1]} 285 | 286 | def p_expGlobid(p): 287 | '''exp : GLOBID expWrapper''' 288 | p[0] = {name: funcCallExp, globid: p[1], params: p[2]} 289 | 290 | def p_expWrapper(p): 291 | '''expWrapper : LParen RParen 292 | | LParen exps RParen''' 293 | if len(p) == 3: 294 | p[0] = [] 295 | else: 296 | p[0] = p[2] 297 | 298 | ############ binop ############## 299 | def p_binop(p): 300 | '''binop : exp Multiply exp 301 | | exp PLUS exp 302 | | exp Divide exp 303 | | exp Minus exp 304 | | var Equal exp 305 | | exp Equality exp 306 | | exp lessThan exp 307 | | exp greaterThan exp 308 | | exp logicalAnd exp 309 | | exp logicalOr exp''' 310 | if p[2] == '*': 311 | p[0] = {"name": binop, "lhs": p[1], "op": 'mul', "rhs": p[3]} 312 | elif p[2] == '/': 313 | p[0] = {"name": binop, "lhs": p[1], "op": 'div', "rhs": p[3]} 314 | elif p[2] == '+': 315 | p[0] = {"name": binop, "lhs": p[1], "op": 'add', "rhs": p[3]} 316 | elif p[2] == '-': 317 | p[0] = {"name": binop, "lhs": p[1], "op": 'sub', "rhs": p[3]} 318 | elif p[2] == '=': 319 | p[0] = {"name": assign, "var": p[1], "exp": p[3]} 320 | elif p[2] == '==': 321 | p[0] = {"name": binop, "lhs": p[1], "op": 'eq', "rhs": p[3]} 322 | elif p[2] == '<': 323 | p[0] = {"name": binop, "lhs": p[1], "op": 'lt', "rhs": p[3]} 324 | elif p[2] == '>': 325 | p[0] = {"name": binop, "lhs": p[1], "op": 'gt', "rhs": p[3]} 326 | elif p[2] == '&&': 327 | p[0] = {"name": binop, "lhs": p[1], "op": 'logAnd', "rhs": p[3]} 328 | elif p[2] == '||': 329 | p[0] = {"name": binop, "lhs": p[1], "op": 'logOr', "rhs": p[3]} 330 | 331 | ############## uop ############## 332 | def p_uop(p): 333 | '''uop : Minus exp %prec UOP 334 | | logicalNegation exp %prec UOP''' 335 | if p[1] == "-": 336 | p[0] = {"name" : uop, "uopType": "Minus", "exp": p[2] } 337 | else: 338 | p[0] = {"name" : uop, "uopType": "logicalNeg", "exp": p[2]} 339 | 340 | ############## var ############## 341 | def p_vdecls(p): 342 | '''vdecls : vdecl COMMA vdecls 343 | | vdecl''' 344 | if len(p) == 4: 345 | appendByKey(p[3], vars_, p[1]) 346 | p[0] = p[3] 347 | else: 348 | p[0] = {name: vdecls, vars_: [p[1]]} 349 | 350 | def p_vdeclare(p): 351 | '''vdecl : TYPE var''' 352 | p[0] = {node: vdecl, typ: p[1], var: p[2]} 353 | 354 | 355 | def p_tdecls(p): 356 | '''tdecls : TYPE 357 | | TYPE COMMA tdecls''' 358 | if len(p) == 2: 359 | p[0] = {name: tdecls, 'types': [p[1]]} 360 | else : 361 | appendByKey(p[3], 'types', p[1]) 362 | p[0] = p[3] 363 | 364 | def p_type(p): 365 | '''TYPE : int 366 | | float 367 | | cint 368 | | sfloat 369 | | void''' 370 | p[0] = p[1] 371 | 372 | def p_refType(p): 373 | '''TYPE : ref TYPE''' 374 | p[0] = 'ref ' + p[2] 375 | 376 | 377 | def p_refTypeNoAlias(p): 378 | '''TYPE : noalias ref TYPE''' 379 | 380 | p[0] = 'noalias ref ' + p[3] 381 | 382 | 383 | precedence = ( 384 | ('right', 'Equal'), 385 | ('left', 'logicalOr'), 386 | ('left', 'logicalAnd'), 387 | ('left', 'Equality'), 388 | ('left', 'lessThan', 'greaterThan'), 389 | ('left', 'PLUS', 'Minus'), 390 | ('left','Multiply','Divide'), 391 | ('left','UOP'), 392 | ) 393 | 394 | ############# helper ############ 395 | 396 | def errorOut(msg): 397 | print(error + ": " + msg) 398 | 399 | def appendByKey(dictonary, key, value): 400 | dictonary[key].insert(0,value) 401 | return dictonary 402 | 403 | ############# main ############ 404 | def toAst(code): 405 | yacc.yacc() 406 | ast = yacc.parse(code, debug=False) 407 | return ast 408 | -------------------------------------------------------------------------------- /IR.py: -------------------------------------------------------------------------------- 1 | from llvmlite import ir 2 | import llvmlite.binding as llvm 3 | import constants as c 4 | import copy 5 | from ctypes import CFUNCTYPE, c_int, c_float 6 | import llvm_binder 7 | 8 | 9 | llvm.initialize() 10 | llvm.initialize_native_target() 11 | llvm.initialize_native_asmprinter() 12 | 13 | i32 = ir.IntType(32) 14 | i1 = ir.IntType(1) 15 | f32 = ir.FloatType() 16 | 17 | 18 | def ir_type(string): 19 | if "ref" in string: 20 | if "int" in string: 21 | return ir.PointerType(i32) 22 | return ir.PointerType(f32) 23 | if "int" in string: 24 | return i32 25 | if "sfloat" in string: 26 | return f32 27 | if "float" in string: 28 | return f32 29 | return ir.VoidType() 30 | 31 | 32 | def externs(extern, module, *sysArgs): 33 | returnType = ir_type(extern["ret_type"]) 34 | 35 | args = list() 36 | if "tdecls" in extern: 37 | for arg in extern["tdecls"]["types"]: 38 | args.append(ir_type(arg)) 39 | 40 | if extern["globid"] == "getarg": 41 | getArg(module, *sysArgs) 42 | 43 | elif extern["globid"] == "getargf": 44 | getArgf(module, *sysArgs) 45 | pass 46 | 47 | else: 48 | fnty = ir.FunctionType(returnType, args) # func = ir.Function(module, functionType, name = i["globid"] ) 49 | func = ir.Function(module, fnty, name=extern["globid"]) 50 | 51 | 52 | def getArg(module, sysArgs): 53 | sysArgs = [ 54 | int(float(value)) for value in sysArgs 55 | ] 56 | array_type = ir.ArrayType(i32, len(sysArgs)) 57 | arr = ir.Constant(array_type, sysArgs) 58 | 59 | fnty = ir.FunctionType(i32, [i32]) 60 | func = ir.Function(module, fnty, name = "getarg") 61 | entry = func.append_basic_block("entry") 62 | builder = ir.IRBuilder(entry) 63 | 64 | ptr = builder.alloca(array_type) 65 | 66 | #function arguments (which is the index) 67 | index = func.args[0] 68 | ptr_arg = builder.alloca(i32) 69 | builder.store(index, ptr_arg) 70 | value = builder.load(ptr_arg) 71 | 72 | 73 | for number, arg in enumerate(sysArgs): 74 | int_1 = ir.Constant(i32, arg) 75 | 76 | #the million ifs 77 | # index_1 = ir.Constant(i32, number) 78 | 79 | # cond = builder.icmp_signed("==", value, index_1) 80 | # with builder.if_then(cond): 81 | # builder.ret(int_1) 82 | 83 | builder.insert_value(arr, int_1, number) 84 | builder.store(arr, ptr) 85 | 86 | 87 | int_0 = ir.Constant(i32, 0) 88 | 89 | address = builder.gep(ptr, [int_0,value]) 90 | builder.ret(builder.load(address)) 91 | 92 | 93 | def getArgf(module, sysArgs): 94 | sysArgs = [float(value) for value in sysArgs] 95 | array_type = ir.ArrayType(f32, len(sysArgs)) 96 | arr = ir.Constant(array_type, sysArgs) 97 | 98 | fnty = ir.FunctionType(f32, [i32]) 99 | func = ir.Function(module, fnty, name = "getargf") 100 | entry = func.append_basic_block("entry") 101 | builder = ir.IRBuilder(entry) 102 | 103 | ptr = builder.alloca(array_type) 104 | 105 | #function arguments (which is the index) 106 | index = func.args[0] 107 | ptr_arg = builder.alloca(i32) 108 | builder.store(index, ptr_arg) 109 | value = builder.load(ptr_arg) 110 | 111 | 112 | for number, arg in enumerate(sysArgs): 113 | float_1 = ir.Constant(f32, arg) 114 | builder.insert_value(arr, float_1, number) 115 | builder.store(arr, ptr) 116 | 117 | int_0 = ir.Constant(i32, 0) 118 | 119 | address = builder.gep(ptr, [int_0,value]) 120 | builder.ret(builder.load(address)) 121 | 122 | 123 | def funcs(ast, module, known_funcs): 124 | func_name = ast["globid"] 125 | symbols = {} 126 | symbols['cint'] = set() 127 | symbols[c.cint_args] = {} 128 | symbols[c.cint_args][func_name] = [] 129 | 130 | returnType = ir_type(ast['ret_type']) 131 | # find arguments 132 | argument_types = list() 133 | args = () 134 | if "vdecls" in ast: 135 | funcArgs = vdecls(ast["vdecls"], symbols, func_name) 136 | argument_types = funcArgs[0] 137 | args = funcArgs[1] 138 | 139 | fnty = ir.FunctionType(returnType, argument_types) 140 | func = ir.Function(module, fnty, name=func_name) 141 | known_funcs[func_name] = (fnty, symbols[c.cint_args][func_name]) # add parameter info 142 | populate_known_funcs(symbols, known_funcs) 143 | 144 | 145 | entry = func.append_basic_block('entry') 146 | builder = ir.IRBuilder(entry) 147 | 148 | for index, value in enumerate(func.args): 149 | var_name = args[index] 150 | var_type = argument_types[index] 151 | 152 | if var_type.is_pointer: 153 | ptr = value 154 | symbols[var_name] = ptr 155 | else: 156 | ptr = builder.alloca(var_type) 157 | symbols[var_name] = ptr 158 | builder.store(value, ptr) 159 | 160 | returned = pure_blk(ast["blk"], builder, symbols) 161 | if ast[c.ret_type] == 'void': 162 | builder.ret_void() 163 | return fnty 164 | if not returned: 165 | raise RuntimeError("function missing return statement") 166 | 167 | 168 | def pure_blk(blk, builder, symbols): 169 | if c.contents not in blk: 170 | return None 171 | legacy = copy.copy(symbols) 172 | returned = False 173 | for statement in blk[c.contents][c.stmts]: 174 | returned = stmt(statement, builder, legacy) or returned 175 | if returned: 176 | return returned 177 | return returned 178 | 179 | 180 | def populate_known_funcs(symbols, known_funcs): 181 | for name, t in known_funcs.items(): 182 | # symbols supposedly have IR objects as values 183 | # This is not a problem since a function call does not depend on the IR objects 184 | symbols[name] = t[0] 185 | symbols[c.cint_args][name] = t[1] # {"fib": [True, False]} if parameter is cint or not 186 | 187 | 188 | def vdecls(vdec, symbols, function_name): 189 | variables = vdec["vars"] 190 | variableList = list() 191 | args = list() 192 | for i in variables: 193 | if "cint" in i["type"]: 194 | symbols["cint"].add(i["var"]) 195 | symbols[c.cint_args][function_name].append(True) 196 | else: 197 | symbols[c.cint_args][function_name].append(False) 198 | variableList.append(ir_type(i["type"])) 199 | args.append(i["var"]) 200 | return [variableList, args] 201 | 202 | 203 | def blk_stmt(stmt, builder, symbols): 204 | return pure_blk(stmt[c.contents], builder, symbols) 205 | 206 | 207 | def stmt(ast, builder, symbols): 208 | name = ast["name"] 209 | if name == 'while': 210 | whileStmt(ast, builder, symbols) 211 | 212 | elif name == 'if': 213 | # if_then makes own blocks 214 | return ifStmt(ast, builder, symbols) 215 | 216 | elif name == 'ret': 217 | return returnStmt(ast, builder, symbols) 218 | 219 | elif name == 'vardeclstmt': 220 | vardeclstmt(ast, builder, symbols) 221 | 222 | elif name == 'expstmt': 223 | # Don't make new block, because this stmt is just an exp 224 | # stmt : exp Semicolon 225 | expression(ast[c.exp], symbols, builder) 226 | 227 | elif name == 'blk': 228 | return blk_stmt(ast, builder, symbols) 229 | 230 | elif name == c.printStmt: 231 | printStmt(ast, builder, symbols) 232 | 233 | else: 234 | raise RuntimeError('this is not processed: ' + str(ast)) 235 | 236 | 237 | def convert_to_string(builder, ir_object): 238 | if ir_object.type == f32: 239 | fn = builder.module.globals.get('floatToString') 240 | return builder.call(fn, [ir_object]) 241 | else: 242 | fn = builder.module.globals.get('intToString') 243 | return builder.call(fn, [ir_object]) 244 | 245 | 246 | def print_pointer_number(ir_pointer, builder): 247 | print_numbers(builder.load(ir_pointer), builder) 248 | 249 | 250 | def print_numbers(ir_object, builder): 251 | if ir_object.type.is_pointer: 252 | return print_pointer_number(ir_object, builder) 253 | 254 | if ir_object.type == f32: 255 | fn = builder.module.globals.get('printFloat') 256 | else: 257 | fn = builder.module.globals.get('printInt') 258 | if ir_object.type == i1: 259 | ir_object = builder.zext(ir_object, i32) 260 | builder.call(fn, [ir_object]) 261 | 262 | 263 | #printf 264 | def printStmt(ast, builder, symbols): 265 | #adapted from tutorial https://github.com/cea-sec/miasm/blob/master/miasm2/jitter/llvmconvert.py 266 | #but I know how it works and can explain it 267 | s = expression(ast["exp"], symbols, builder) 268 | if not isinstance(s, str): 269 | return print_numbers(s, builder) 270 | else: 271 | if len(s) == 0: 272 | return None 273 | b = s.encode('ascii') 274 | b = bytearray(b) 275 | 276 | s_bytes = ir.Constant(ir.ArrayType(ir.IntType(8), len(b)), b) 277 | 278 | #finds the global variables 279 | global_fmt = find_global_constant(builder, s, s_bytes) 280 | ptr_fmt = builder.bitcast(global_fmt, ir.IntType(8).as_pointer()) 281 | fn = builder.module.globals.get('printString') 282 | builder.call(fn, [ptr_fmt]) 283 | 284 | 285 | #make the global variable or find it 286 | def find_global_constant(builder,name, value): 287 | #adapted from tutorial https://github.com/cea-sec/miasm/blob/master/miasm2/jitter/llvmconvert.py 288 | if name in builder.module.globals: 289 | return builder.module.globals[name] 290 | else: 291 | glob = ir.GlobalVariable(builder.module, value.type, name = name) 292 | glob.global_constant = True 293 | glob.initializer = value 294 | return glob 295 | 296 | 297 | def whileStmt(ast, builder, symbols): 298 | w_body_block = builder.append_basic_block("w_body") 299 | w_after_block = builder.append_basic_block("w_after") 300 | 301 | # head 302 | cond_head = expression(ast[c.cond], symbols, builder) 303 | builder.cbranch(cond_head, w_body_block, w_after_block) 304 | # body 305 | builder.position_at_start(w_body_block) 306 | stmt(ast["stmt"], builder, symbols) 307 | cond_body = expression(ast[c.cond], symbols, builder) 308 | builder.cbranch(cond_body, w_body_block, w_after_block) 309 | # after 310 | builder.position_at_start(w_after_block) 311 | 312 | 313 | def ifStmt(ast, builder, symbols): 314 | cond = expression(ast["cond"], symbols, builder) 315 | returned = False 316 | entry = builder.block 317 | if "else_stmt" in ast: 318 | with builder.if_else(cond) as (then, otherwise): 319 | with then: 320 | returned_then = stmt(ast["stmt"], builder, symbols) 321 | with otherwise: 322 | returned_else = stmt(ast["else_stmt"], builder, symbols) 323 | returned = returned_then and returned_else 324 | 325 | else: 326 | with builder.if_then(cond): 327 | stmt(ast["stmt"], builder, symbols) 328 | if returned: 329 | endif = builder.block 330 | builder.function.blocks.remove(endif) 331 | return returned 332 | 333 | 334 | 335 | def returnStmt(ast, builder, symbols): 336 | if "exp" in ast: 337 | ret_exp = expression(ast["exp"], symbols, builder) 338 | if ret_exp.type.is_pointer: 339 | return builder.ret( 340 | builder.load(ret_exp) 341 | ) 342 | builder.ret(ret_exp) 343 | else: 344 | builder.ret_void() 345 | return True 346 | 347 | 348 | def vardeclstmt(ast, builder, symbols): 349 | var_declaration = ast[c.vdecl] 350 | var_type = var_declaration[c.typ] 351 | var_name = var_declaration[c.var] 352 | ####### inner variables clashes the outer ones 353 | # if var_name in symbols: 354 | # raise RuntimeError(var_name + ' has already been defined') 355 | if 'ref' in var_type: 356 | return ref_var_decl_stmt(ast, builder, symbols) 357 | 358 | vtype = to_ir_type(var_type) 359 | ptr = builder.alloca(vtype) 360 | symbols[var_name] = ptr 361 | exp = ast[c.exp] 362 | cint = False 363 | if "cint" in ast[c.vdecl][c.typ]: 364 | cint = True 365 | symbols["cint"].add(var_name) 366 | value = expression(exp, symbols, builder, cint = cint) 367 | if value.type.is_pointer: 368 | value = builder.load(value) 369 | 370 | if vtype != value.type: 371 | if vtype == f32: 372 | value = builder.uitofp(value, f32) 373 | if vtype == i32: 374 | if value.type == i1: 375 | value = builder.zext(value, i32) 376 | value = builder.fptosi(value, i32) 377 | 378 | try: 379 | builder.store(value, ptr) 380 | except TypeError as err: 381 | raise RuntimeError('error converting: ' + str(ast), err) 382 | 383 | 384 | def ref_var_decl_stmt(ast, builder, symbols): 385 | var_declaration = ast[c.vdecl] 386 | var_type = var_declaration[c.typ] # type checking for both side 387 | var_name = var_declaration[c.var] 388 | exp = ast[c.exp] 389 | pointee = expression(exp, symbols, builder) 390 | symbols[var_name] = pointee 391 | 392 | 393 | # def binary_convert(builder, i1, target_type): 394 | # if i1.type == ir.IntType(1): 395 | # i1 = builder.uitofp(i1, f32) 396 | # if i1.type == f32: 397 | # i1 = builder.fptosi(i1, target_type) 398 | # if i1.type.is_pointer: 399 | # i1 = builder.load(i1) 400 | # return i1 401 | 402 | def binary_convert(builder, il): 403 | if il.type.is_pointer: 404 | il = builder.load(il) 405 | if il.type == i32: 406 | il = builder.uitofp(il, f32) 407 | if il.type == f32: 408 | il = builder.fptosi(il, i1) 409 | 410 | return il 411 | 412 | 413 | def extract_value(exp, builder): 414 | if exp.type.is_pointer: 415 | return builder.load(exp) 416 | return exp 417 | 418 | 419 | def binop(ast, symbols, builder, target_type, cint = False): 420 | lhs = expression(ast["lhs"], symbols, builder, cint = cint) ###some functions 421 | rhs = expression(ast["rhs"], symbols, builder, cint = cint) ### 422 | lhs = extract_value(lhs, builder) 423 | rhs = extract_value(rhs, builder) 424 | exp_type = target_type 425 | op = ast["op"] 426 | 427 | 428 | if lhs.type != i1 and rhs.type != i1: 429 | if op != "logAnd" and op != "logOr": 430 | if "float" in exp_type: 431 | if lhs.type != f32: 432 | lhs = builder.uitofp(lhs, f32) 433 | if rhs.type != f32: 434 | rhs = builder.uitofp(rhs, f32) 435 | 436 | if "int" in exp_type: 437 | if lhs.type != i32: 438 | lhs = builder.fptosi(lhs, i32) 439 | if rhs.type != i32: 440 | rhs = builder.fptosi(rhs, i32) 441 | 442 | flags = list() 443 | if "float" == target_type: 444 | flags= ["fast"] 445 | 446 | try: 447 | if op == "logAnd": 448 | if lhs.type != rhs.type: 449 | lhs = binary_convert(builder, lhs) 450 | rhs = binary_convert(builder, rhs) 451 | return builder.and_(lhs, rhs, name="logAnd", flags = flags) 452 | elif op == "logOr": 453 | if lhs.type != rhs.type: 454 | lhs = binary_convert(builder, lhs) 455 | rhs = binary_convert(builder, rhs) 456 | return builder.or_(lhs, rhs, name="logOr", flags = flags) 457 | elif cint: 458 | return check_int(lhs, rhs, builder, op) 459 | elif "int" in exp_type: 460 | if op == 'mul': 461 | return builder.mul(lhs, rhs, name='mul') 462 | elif op == 'div': 463 | return builder.sdiv(lhs, rhs, name='div') 464 | elif op == 'add': 465 | return builder.add(lhs, rhs, name="add") 466 | elif op == 'sub': 467 | return builder.sub(lhs, rhs, name='sub') 468 | elif op == 'eq': 469 | return builder.icmp_signed('==', lhs, rhs, name="eq") 470 | elif op == 'lt': 471 | return builder.icmp_signed('<', lhs, rhs, name="lt") 472 | elif op == 'gt': 473 | return builder.icmp_signed('>', lhs, rhs, name="gt") 474 | elif "float" in exp_type: 475 | if op == 'mul': 476 | return builder.fmul(lhs, rhs, name='mul', flags = flags) 477 | elif op == 'div': 478 | return builder.fdiv(lhs, rhs, name='div', flags = flags) 479 | elif op == 'add': 480 | return builder.fadd(lhs, rhs, name="add", flags = flags) 481 | elif op == 'sub': 482 | return builder.fsub(lhs, rhs, name='sub', flags = flags) 483 | elif op == 'eq': 484 | return builder.fcmp_ordered('==', lhs, rhs, name="eq", flags = flags) 485 | elif op == 'lt': 486 | return builder.fcmp_ordered('<', lhs, rhs, name="lt", flags = flags) 487 | elif op == 'gt': 488 | return builder.fcmp_ordered('>', lhs, rhs, name="gt", flags = flags) 489 | except ValueError as err: 490 | raise RuntimeError('error processing: ' + str(ast), err) 491 | except AttributeError as err: 492 | raise RuntimeError('error processing: ' + str(ast), err) 493 | 494 | def check_int(lhs, rhs, builder, op): 495 | result = None 496 | if op == 'mul': 497 | result = builder.smul_with_overflow(lhs, rhs, name='mul') 498 | elif op == 'div': 499 | # rhs = builder.uitofp(rhs, f32) 500 | # rhs = builder.fdiv(ir.Constant(f32, 1), rhs, name="div") 501 | # return check_int(lhs, rhs, builder, 'mul') 502 | 503 | a = builder.sdiv(lhs, rhs, name='div') 504 | 505 | l = builder.icmp_signed('==', lhs, ir.Constant(i32,-2147483648 ), name="eq") 506 | r = builder.icmp_signed('!=', rhs, ir.Constant(i32,-1), name="nq") 507 | cond = builder.mul(l, r, name='mul') 508 | 509 | with builder.if_else(cond) as (then, otherwise): 510 | with then: 511 | pass 512 | with otherwise: 513 | lhs = check_int(lhs, ir.Constant(i32, -1), builder, 'mul') 514 | rhs = check_int(rhs, ir.Constant(i32, -1), builder, 'mul') 515 | return a 516 | 517 | elif op == 'add': 518 | result = builder.sadd_with_overflow(lhs, rhs, name="add") 519 | elif op == 'sub': 520 | result = builder.ssub_with_overflow(lhs, rhs, name='sub') 521 | 522 | if result is not None: 523 | is_overflow = builder.extract_value(result, 1) 524 | 525 | with builder.if_then(is_overflow): 526 | overflows(None, builder) 527 | 528 | 529 | return builder.extract_value(result, 0) 530 | 531 | 532 | if op == 'eq': 533 | return builder.icmp_signed('==', lhs, rhs, name="eq") 534 | elif op == 'lt': 535 | return builder.icmp_signed('<', lhs, rhs, name="lt") 536 | elif op == 'gt': 537 | return builder.icmp_signed('>', lhs, rhs, name="gt") 538 | 539 | 540 | class Error2147483648(Exception): 541 | pass 542 | 543 | 544 | def uop(ast, symbols, builder, cint=False): 545 | try: 546 | uop_value = expression(ast["exp"], symbols, builder, cint, neg=True, exception=True) 547 | except Error2147483648: 548 | return ir.Constant(i32, -2147483648) 549 | if uop_value.type.is_pointer: 550 | uop_value = builder.load(uop_value) 551 | if ast["uopType"] == "Minus": 552 | if uop_value.type == i32: 553 | if cint: 554 | is_overflow = builder.icmp_signed('==', uop_value, ir.Constant(i32, -2147483648)) 555 | with builder.if_then(is_overflow): 556 | overflows(None, builder) 557 | return builder.neg(uop_value, name="Minus") 558 | else: 559 | f32_0 = ir.Constant(f32, 0) 560 | return builder.fsub(f32_0, uop_value, name='sub', flags = ["fast"]) 561 | else: 562 | return builder.not_(uop_value, name="logicalNeg") 563 | 564 | 565 | def deference(builder, p): 566 | if p.type.is_pointer: 567 | return builder.load(p) 568 | return p 569 | 570 | 571 | def expression(ast, symbols, builder, cint = False, neg=False, exception=False): 572 | name = ast[c.name] 573 | try: 574 | if name == c.uop: 575 | return uop(ast, symbols, builder, cint) 576 | if name == c.litExp: 577 | if cint: 578 | limit = 2147483647 579 | if neg: 580 | limit += 1 581 | if ast['value'] > limit or ast['value'] < -2147483648: 582 | overflows(ast, builder) 583 | if exception and ast['value'] == 2147483648: 584 | raise Error2147483648 585 | 586 | r = ir.Constant(to_ir_type(ast['type']), ast['value']) 587 | return r 588 | if name == c.slitExp: 589 | return ast["value"] 590 | #raise RuntimeError('slit should never hit here') 591 | if name == c.varExp: 592 | id = ast[c.var] 593 | try: 594 | # return builder.load(symbols[id]) 595 | return symbols[id] 596 | except TypeError as err: 597 | raise RuntimeError('error parsing: ' + str(ast), err) 598 | if name == c.funcCallExp: 599 | function_name = ast[c.globid] 600 | fn = builder.module.globals.get(function_name) 601 | params = ast[c.params] 602 | parameters = [] 603 | if function_name != "getarg" and function_name != "getargf": 604 | parameters = prepare_parameters(function_name, symbols, params, builder) 605 | else: 606 | parameters = [ 607 | deference( 608 | builder, 609 | expression(param, symbols, builder) 610 | ) for param in params[c.exps] 611 | ] 612 | 613 | return builder.call(fn, parameters) 614 | if name == c.binop: 615 | target_type = ast[c.typ] 616 | return binop(ast, symbols, builder, target_type, cint = cint) 617 | 618 | if name == c.assign: 619 | var_name = ast["var"] 620 | 621 | if var_name not in symbols: 622 | raise RuntimeError(f'{var_name} has not been defined') 623 | 624 | ptr = symbols[var_name] 625 | if var_name in symbols["cint"]: 626 | ast["type"] = "cint" 627 | 628 | cint = False 629 | if "cint" in ast["type"]: 630 | cint = True 631 | value = expression(ast["exp"], symbols, builder, cint = cint) 632 | store_helper(builder, ptr, value) 633 | return None 634 | 635 | raise RuntimeError('Not processed: ' + str(ast)) 636 | 637 | except KeyError as err: 638 | raise RuntimeError('error converting: ' + str(ast), err) 639 | 640 | 641 | def prepare_parameters(function_name, symbols, params, builder): 642 | parameters = [] 643 | if len(params) > 0: 644 | fnArgs = symbols[function_name].args 645 | for index in range(len(params[c.exps])): 646 | param = params[c.exps][index] 647 | argType = fnArgs[index] 648 | if argType.is_pointer: 649 | if c.var not in param: 650 | raise RuntimeError("non-variable object passed as ref type") 651 | var_name = param[c.var] 652 | parameters.append( 653 | symbols[var_name] 654 | ) 655 | else: 656 | cint = symbols[c.cint_args][function_name][index] 657 | value = expression(param, symbols, builder, cint=cint) 658 | parameters.append( 659 | deference( 660 | builder, 661 | value 662 | ) 663 | ) 664 | return parameters 665 | 666 | def store_helper(builder, ptr, value): 667 | if value.type.is_pointer: 668 | value = builder.load(value) 669 | 670 | if ptr.type.pointee == i32: 671 | if value.type == i1: 672 | value = builder.uitofp(value, f32) 673 | if value.type == f32: 674 | value = builder.fptosi(value, ptr.type.pointee) 675 | elif ptr.type.pointee == f32: 676 | if value.type == i1 or value.type == i32: 677 | value = builder.uitofp(value, f32) 678 | 679 | builder.store(value, ptr) 680 | return None 681 | 682 | 683 | def to_ir_type(_type): 684 | return ir_type(_type) 685 | 686 | def overflows(ast, builder): 687 | overf = {"exp": 688 | {"value": "Error: cint value overflowed", "name": "slit"} 689 | } 690 | printStmt(overf, builder, None) 691 | 692 | pass 693 | 694 | def convert_externs(ast, module, *sysArgs): 695 | externList = ast["externs"] 696 | for i in externList: 697 | externs(i, module, *sysArgs) 698 | 699 | 700 | def convert_funcs(ast, module, known_funcs): 701 | funcList = ast['funcs'] 702 | for i in funcList: 703 | funcs(i, module, known_funcs) 704 | 705 | 706 | def convert(ast, module, *sysArgs): 707 | if "externs" in ast: 708 | # # does all the extern functions 709 | convert_externs(ast["externs"], module, *sysArgs) 710 | # moved funcs and externs into separate functions so that known_funcs could be passed from the prog level to funcs 711 | known_funcs = ast['funcList'] 712 | 713 | define_built_ins(module, known_funcs) 714 | 715 | convert_funcs(ast["funcs"], module, known_funcs) 716 | 717 | ######### 718 | #### make printf function 719 | ####code from https://github.com/cea-sec/miasm/blob/master/miasm2/jitter/llvmconvert.py 720 | #### search for printf to find it easier 721 | 722 | 723 | def define_built_ins(module, known_funcs): 724 | char_pointer = ir.IntType(8).as_pointer() 725 | fnty = ir.FunctionType(ir.IntType(32), [char_pointer], var_arg=True) 726 | printf = ir.Function(module, fnty, name="printf") 727 | known_funcs["printf"] = "slit" 728 | fnty = ir.FunctionType(ir.VoidType(), [char_pointer]) 729 | ir.Function(module, fnty, name="printString") 730 | fnty = ir.FunctionType(ir.VoidType(), [i32]) 731 | ir.Function(module, fnty, name="printInt") 732 | fnty = ir.FunctionType(ir.VoidType(), [f32]) 733 | ir.Function(module, fnty, name="printFloat") 734 | 735 | 736 | def mainFunc(ast, *args): 737 | module = ir.Module(name="prog") 738 | convert(ast, module, *args) 739 | # print(module) 740 | 741 | 742 | return module --------------------------------------------------------------------------------