├── src ├── .merlin ├── _tags ├── TODO ├── blockInterp.mli ├── regAlloc.mli ├── typeCheck.mli ├── frontEnd.mli ├── constProp.mli ├── compileFunction.mli ├── shrinkImmediates.mli ├── instrSelX86.mli ├── liveVarAnalysis.mli ├── lineariseCfg.mli ├── unnestExp.mli ├── interp.ml ├── astInterp.mli ├── tokens.mli ├── frontEnd.ml ├── Makefile ├── README.md ├── sourceAst.mli ├── blockStructure.mli ├── .depend ├── compile.ml ├── compileFunction.ml ├── lineariseCfg.ml ├── util.ml ├── shrinkImmediates.ml ├── blockInterp.ml ├── regAlloc.ml ├── tokens.ml ├── unnestExp.ml ├── x86.ml ├── liveVarAnalysis.ml ├── astInterp.ml ├── constProp.ml ├── typeCheck.ml ├── instrSelX86.ml ├── blockStructure.ml └── sourceAst.ml ├── Makefile ├── runtime ├── Makefile ├── io.c └── array.c ├── .gitignore ├── tests ├── div.expl ├── fact.expl ├── runtime_errors.expl ├── insert_sort.expl ├── insert_sort.c ├── scoping.expl ├── Makefile ├── quicksort.expl └── array.expl └── README.md /src/.merlin: -------------------------------------------------------------------------------- 1 | B _build 2 | -------------------------------------------------------------------------------- /src/_tags: -------------------------------------------------------------------------------- 1 | true : package(str) 2 | true : bin_annot, debug, principal, safe_string, strict_formats, strict_sequence, warn(A-42-4) 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | make -C src 3 | make -C runtime 4 | make -C tests 5 | 6 | clean: 7 | make -C src clean 8 | make -C runtime clean 9 | make -C tests clean 10 | -------------------------------------------------------------------------------- /src/TODO: -------------------------------------------------------------------------------- 1 | Const prop for x * 2^n -> x shift n 2 | Do some simple register allocation 3 | Add OCaml warning 4, and manually annotate all of the matches where it should apply. 4 | -------------------------------------------------------------------------------- /runtime/Makefile: -------------------------------------------------------------------------------- 1 | all : array.o io.o 2 | 3 | array.o : array.c 4 | gcc -c array.c -o array.o -std=c99 5 | 6 | io.o : io.c 7 | gcc -c io.c -o io.o -std=c99 8 | 9 | clean : 10 | rm -f io.o array.o 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # OCaml executables 2 | src/*.byte 3 | src/*.native 4 | src/_build 5 | 6 | # Test executables and intermediate things 7 | tests/*.s 8 | tests/*.o 9 | tests/*.exe 10 | 11 | runtime/*.o 12 | 13 | # Vim temporary files 14 | .*.swp 15 | -------------------------------------------------------------------------------- /tests/div.expl: -------------------------------------------------------------------------------- 1 | //test division 2 | 3 | let x : int = test(0, 0, 100) 4 | 5 | function test(x : int) (y : int) (z : int) : int 6 | { 7 | let x : int = 0 8 | let y : int = 0 9 | input x 10 | y := 10 / x 11 | output y 12 | output z // output 100 (to make sure z passed in RDX is not clobbered *) 13 | return y 14 | } 15 | -------------------------------------------------------------------------------- /tests/fact.expl: -------------------------------------------------------------------------------- 1 | // Input a number and print its factorial 2 | 3 | let v : int = exec(0) 4 | 5 | function iter_fact (x : int) : int { 6 | let c : int = 1 7 | let z : int = 1 8 | while 0 < x { 9 | z := z * x 10 | x := x - c 11 | } 12 | return z 13 | } 14 | 15 | function rec_fact (x : int) : int { 16 | let r : int = 1 17 | if x = 0 then 18 | return r 19 | else { 20 | r := x * rec_fact(x - 1) 21 | return r 22 | } 23 | } 24 | 25 | function exec (x : int) : int { 26 | let y : int = 0 27 | let r : int = 0 28 | input x 29 | y := iter_fact(x) 30 | output y 31 | y := rec_fact(x) 32 | output y 33 | return r 34 | } 35 | -------------------------------------------------------------------------------- /tests/runtime_errors.expl: -------------------------------------------------------------------------------- 1 | // Test for exceptions. Uncommenting any of the below should have an exception 2 | 3 | //let r1 : int = array_bound1(0) 4 | //let r2 : int = array_bound2(0) 5 | //let r3 : int = 110/0 6 | //let r4 : int = null(0) 7 | //let r5 : int = a[2] 8 | //let r6 : int = null2(a) 9 | let a : array 1 = array[10] 10 | 11 | function array_bound1 (x : int) : int { 12 | let y : array 1 = array [10] 13 | x := y[10] 14 | return x 15 | } 16 | 17 | function array_bound2 (x : int) : int { 18 | let y : array 1 = array [10] 19 | y[10] := x 20 | return x 21 | } 22 | 23 | function null (x : int) : int { 24 | let r5 : int = a[0] 25 | let a : array 1 = array[10] 26 | return x 27 | } 28 | 29 | function null2 (x : array 1) : int { 30 | let i : int = x[0] 31 | return i 32 | } 33 | -------------------------------------------------------------------------------- /tests/insert_sort.expl: -------------------------------------------------------------------------------- 1 | // insertion sort 2 | 3 | let r : int = test(1) 4 | 5 | function test (i : int) : int 6 | { 7 | let len : int = 0 8 | let a : array 1 = array[0] 9 | let i : int = 0 10 | let y : int = 0 11 | input len 12 | a := array[len] 13 | while i < len { 14 | input y 15 | a[i] := y 16 | i := i + 1 17 | } 18 | 19 | i := sort(a, len) 20 | 21 | i := 0 22 | while i < len { 23 | y := a[i] 24 | output y 25 | i := i + 1 26 | } 27 | return i 28 | } 29 | 30 | function sort (a : array 1) (len : int) : int { 31 | let i : int = 1 32 | let y : int = 0 33 | let j : int = 0 34 | while i < len { 35 | y := a[i] 36 | j := i - 1 37 | while (!(j < 0) && (a[j] > y)) { 38 | a[j+1] := a[j] 39 | j := j - 1 40 | } 41 | a[j+1] := y 42 | i := i + 1 43 | } 44 | return i 45 | } 46 | -------------------------------------------------------------------------------- /src/blockInterp.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2016 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* An interpreter for CFGs *) 20 | 21 | val interp_prog : int -> BlockStructure.cfg -> unit 22 | -------------------------------------------------------------------------------- /src/regAlloc.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2016 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | open BlockStructure 20 | 21 | val reg_alloc : (SourceAst.id * int) list -> int -> cfg -> (cfg * int) 22 | -------------------------------------------------------------------------------- /src/typeCheck.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* The type checker *) 20 | 21 | open SourceAst 22 | 23 | type t 24 | val type_prog : prog -> prog 25 | -------------------------------------------------------------------------------- /src/frontEnd.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2016 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* The front end packaging lexing, parsing and type checking *) 20 | 21 | val front_end : string -> bool -> SourceAst.prog 22 | -------------------------------------------------------------------------------- /src/constProp.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2016 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* Do constant propagation and folding *) 20 | 21 | open SourceAst 22 | val prop_stmts : exp Idmap.t -> stmt list -> exp Idmap.t * stmt list 23 | -------------------------------------------------------------------------------- /src/compileFunction.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* Compiler for a single function *) 20 | 21 | val compile_fun : bool -> string -> BlockStructure.Varset.t -> SourceAst.func -> SourceAst.id * X86.instruction list 22 | 23 | -------------------------------------------------------------------------------- /tests/insert_sort.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | extern int64_t input(); 5 | extern void output(int64_t i); 6 | 7 | int sort(int64_t a[], int64_t len) { 8 | int64_t i = 1; 9 | int64_t y = 1; 10 | int64_t j = 1; 11 | while (i < len) { 12 | int64_t y = a[i]; 13 | int64_t j = i - 1; 14 | while (!(j < 0) && (a[j] > y)) { 15 | a[j+1] = a[j]; 16 | j = j - 1; 17 | } 18 | a[j+1] = y; 19 | i = i + 1; 20 | } 21 | return i; 22 | } 23 | 24 | int64_t test(int64_t i2) { 25 | int64_t len = 0; 26 | int64_t *a; 27 | int64_t i = 0; 28 | int64_t y = 0; 29 | len = input(); 30 | a = malloc(len * sizeof(int64_t)); 31 | while (i < len) { 32 | int64_t y = input(); 33 | a[i] = y; 34 | i = i + 1; 35 | } 36 | 37 | i = sort(a,len); 38 | 39 | i = 0; 40 | while (i < len) { 41 | int64_t y = a[i]; 42 | output(y); 43 | i = i + 1; 44 | } 45 | return i; 46 | } 47 | 48 | int64_t r; 49 | 50 | int main(int argc, char** argv) { 51 | r = test(1); 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /src/shrinkImmediates.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2016 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* Ensure that all immediate arguments fit into 32 bits. We assume that 20 | constant propagation has ensured that no operation has two immediate 21 | arguments, and we maintain that property here. *) 22 | 23 | val shrink_imm : BlockStructure.cfg -> BlockStructure.cfg 24 | -------------------------------------------------------------------------------- /src/instrSelX86.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2016 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* Convert linearised three-address code to x86-64 *) 20 | 21 | val num_regs : int 22 | val argument_reg_numbers : int list 23 | val be_to_x86 : bool -> BlockStructure.block_elem -> X86.instruction list 24 | val to_x86 : bool -> LineariseCfg.linear list -> int -> X86.instruction list 25 | -------------------------------------------------------------------------------- /tests/scoping.expl: -------------------------------------------------------------------------------- 1 | // Test/demonstrate variable scoping 2 | 3 | // should print 1, 11, 12, 8, 5, 9, 10, then 100 plus those, then 4 | // 101, 11, 12, 16, 13, 9, 10, then 100 plus those, then finally 5 | // 201, 2, 3, 4 6 | 7 | let g : int = 1 8 | let x : int = 2 9 | let y : int = 3 10 | let z : int = 4 11 | 12 | let r : int = test1(5, 6, 7, 8) 13 | let r2 : int = test1(13, 14, 15, 16) 14 | let r3 : int = test2(1) 15 | 16 | function test1 (a : int) (b : int) (y : int) (z : int) : int { 17 | let b : int = 9 18 | let c : int = 10 19 | let x : int = 11 20 | let y : int = 12 21 | 22 | output g 23 | output x 24 | output y 25 | output z 26 | output a 27 | output b 28 | output c 29 | 30 | g := g + 100 31 | x := x + 100 32 | y := y + 100 33 | z := z + 100 34 | a := a + 100 35 | b := b + 100 36 | c := c + 100 37 | 38 | output g 39 | output x 40 | output y 41 | output z 42 | output a 43 | output b 44 | output c 45 | 46 | return x 47 | } 48 | 49 | function test2 (aa : int) : int { 50 | output g 51 | output x 52 | output y 53 | output z 54 | return aa 55 | } 56 | -------------------------------------------------------------------------------- /src/liveVarAnalysis.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2016 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | open BlockStructure 20 | 21 | type cfg_annot = { gen : Varset.t; kill : Varset.t; live_exit : Varset.t } 22 | 23 | type cfg = (cfg_entry * cfg_annot) list 24 | 25 | val pp_cfg : Format.formatter -> cfg -> unit 26 | 27 | val lva : Varset.t -> BlockStructure.cfg -> cfg 28 | 29 | val remove_unused_writes : cfg -> cfg 30 | -------------------------------------------------------------------------------- /src/lineariseCfg.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* Flatten the CFG into a list of three-address code. *) 20 | 21 | open BlockStructure 22 | type linear = 23 | | Instr of block_elem 24 | | CJump of test * bool * string (* jump to string if var is bool *) 25 | | Jump of string 26 | | Label of string 27 | | Return of var option 28 | 29 | type linear_list = linear list 30 | 31 | val cfg_to_linear : cfg -> linear list 32 | 33 | val pp_linear_list : Format.formatter -> linear list -> unit 34 | 35 | val init_traversal : cfg -> cfg_entry Util.Intmap.t 36 | -------------------------------------------------------------------------------- /tests/Makefile: -------------------------------------------------------------------------------- 1 | # Try to detect the operating system to set flags for the assembler. 2 | # OSX uses macho64 binary format, and needs an _ prefixed before all global 3 | # symbols. Linux uses elf64 binary format, and the symbols are all fine as they 4 | # are. 5 | OS := $(shell uname) 6 | ifeq ($(OS), Darwin) 7 | FORMAT = macho64 8 | PREFIX = --prefix _ 9 | else 10 | FORMAT = elf64 11 | PREFIX = 12 | endif 13 | 14 | COMPILER = ../src/compile.byte 15 | EXEC_FILES = fact.exe array.exe insert_sort.exe quicksort.exe div.exe scoping.exe runtime_errors.exe 16 | 17 | .PHONY : all clean 18 | 19 | all : $(EXEC_FILES) 20 | 21 | %.s %.dot : %.expl $(COMPILER) 22 | $(COMPILER) $(OPTIONS) $< 23 | 24 | %.o : %.s 25 | nasm -f $(FORMAT) $(PREFIX) $< 26 | 27 | %.o : %.c 28 | 29 | # Use gcc to do the linking since we want to include libc, and our 30 | # object code starts from a C-style main function. We do not want to 31 | # enter at _start (which is where the OS dumps you on process 32 | # creation), because there's a bunch of book keeping and alignment 33 | # stuff to sort out. gcc/libc include code to do that for us. 34 | %.exe : %.o ../runtime/io.o ../runtime/array.o 35 | gcc ../runtime/io.o ../runtime/array.o $< -o $@ 36 | 37 | %.pdf : %.dot 38 | dot -Tpdf $< > $@ 39 | 40 | .PRECIOUS : %.s 41 | 42 | clean : 43 | rm -f *.o *.s $(EXEC_FILES) *.pdf *.dot 44 | -------------------------------------------------------------------------------- /src/unnestExp.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2016 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* Flatten expressions so that they follow this grammar. We don't introduce a 20 | new type, but do define predicates on the SourceAst.exp type. 21 | 22 | type ae = 23 | | Num of int64 24 | | Bool of bool 25 | | Ident of SourceAst.id 26 | 27 | type flat_exp = 28 | | Num of int64 29 | | Bool of bool 30 | | Ident of SourceAst.id * ae list 31 | | Op of ae * op * ae 32 | | Uop of ae 33 | | Array of ae list 34 | *) 35 | 36 | open SourceAst 37 | val unnest : stmt list -> stmt list 38 | val is_atomic : exp -> bool 39 | val is_flat : exp -> bool 40 | -------------------------------------------------------------------------------- /src/interp.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2016 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* Driver for interpreter executable. *) 20 | 21 | let filename = ref None;; 22 | 23 | let usage_msg = 24 | "example interpreter \nexample usage: " ^ Sys.argv.(0)^ " test.expl\n";; 25 | 26 | let _ = 27 | Arg.parse [] 28 | (fun s -> 29 | match !filename with 30 | | None -> 31 | filename := Some s 32 | | Some s' -> 33 | (Format.printf "Error: given multiple files to run: %s and %s\n" s' s; 34 | exit 1)) 35 | usage_msg;; 36 | 37 | let _ = 38 | match !filename with 39 | | None -> 40 | (print_string usage_msg; 41 | exit 1) 42 | | Some filename -> 43 | AstInterp.interp_prog (FrontEnd.front_end filename false) 44 | -------------------------------------------------------------------------------- /src/astInterp.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* An interpreter for ASTs *) 20 | 21 | (* For when the interpreter crashed, such as array bounds violations *) 22 | exception Crash of string 23 | 24 | (* For errors that a well-typed program can't have *) 25 | exception TypeError 26 | 27 | (* Values are either integers or n-dimensional arrays of integers. 28 | We keep multi-dimensional arrays in a single dimensional one and include a 29 | list of how big each dimension is. 30 | We represent bools as numbers: true = 1L and false = 0L *) 31 | type val_t = 32 | | Vint of int64 33 | | Varray of int list * int64 array 34 | 35 | val do_op : Tokens.op -> int64 -> int64 -> int64 36 | 37 | val interp_prog : SourceAst.prog -> unit 38 | -------------------------------------------------------------------------------- /runtime/io.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | /* Trivial I/O runtime library */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | // Read in a 64 bit signed integer and return it. First print a > as prompt. 27 | int64_t input() { 28 | int64_t i; 29 | printf("> "); 30 | fflush(stdout); 31 | scanf("%" PRId64, &i); 32 | if (errno == ERANGE) { 33 | printf("Input number does not fit in a 64-bit signed integer\n"); 34 | exit(1); 35 | } 36 | return i; 37 | } 38 | 39 | // Print a given 64-bt 40 | void output(int64_t i) { 41 | printf("%" PRId64 "\n", i); 42 | return; 43 | } 44 | 45 | // signal an error 46 | void signal_error(int64_t errn) { 47 | switch (errn) { 48 | case 0: 49 | printf("array bounds error\n"); 50 | break; 51 | case 1: 52 | printf("null pointer dereference\n"); 53 | break; 54 | default: 55 | printf("unknown runtime error\n"); 56 | break; 57 | } 58 | exit(1); 59 | } 60 | -------------------------------------------------------------------------------- /src/tokens.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* The language's tokens, and a simple lexer *) 20 | 21 | type op = 22 | | Plus 23 | | Minus 24 | | Times 25 | | Div 26 | | Lshift 27 | | BitOr 28 | | BitAnd 29 | | Lt 30 | | Gt 31 | | Eq 32 | | And 33 | | Or 34 | 35 | type uop = 36 | | Not 37 | 38 | type token = 39 | | Num of int64 40 | | Ident of string 41 | | Op of op 42 | | Uop of uop 43 | | Lparen 44 | | Rparen 45 | | Lcurly 46 | | Rcurly 47 | | Lbrac 48 | | Rbrac 49 | | Colon 50 | | Comma 51 | | While 52 | | Do 53 | | If 54 | | Then 55 | | Else 56 | | Assign 57 | | True 58 | | False 59 | | Input 60 | | Output 61 | | Array 62 | | Int 63 | | Bool 64 | | Let 65 | | Function 66 | | Return 67 | 68 | type tok_loc = (token * int) 69 | 70 | val show_uop : uop -> string 71 | val show_op : op -> string 72 | val show_token : token -> string 73 | val pp_tok_loc : Format.formatter -> tok_loc -> unit 74 | val lex : string -> int -> int -> tok_loc list 75 | -------------------------------------------------------------------------------- /tests/quicksort.expl: -------------------------------------------------------------------------------- 1 | // Quicksort 2 | 3 | let r : int = test(1) 4 | 5 | function test (i : int) : int 6 | { 7 | let len : int = 0 8 | let a : array 1 = array[0] 9 | let i : int = 0 10 | let y : int = 0 11 | input len 12 | a := array[len] 13 | while i < len { 14 | input y 15 | a[i] := y 16 | i := i + 1 17 | } 18 | 19 | i := sort(a, 0, len-1) 20 | 21 | i := 0 22 | while i < len { 23 | y := a[i] 24 | output y 25 | i := i + 1 26 | } 27 | return i 28 | } 29 | 30 | // Partition an array between lower and upper (inclusive), so that everything 31 | // before (and including) the returned index is <= than the pivot and 32 | // everything after is >= the pivot. The return value is strictly less than 33 | // upper. Assume that lower < upper. This is Tony Hoare's partitioning 34 | // algorithm. 35 | function partition (a : array 1) (pivot : int) (lower : int) (upper : int) : int { 36 | let temp : int = 0 37 | lower := lower - 1 38 | upper := upper + 1 39 | do { 40 | do 41 | lower := lower + 1 42 | while (a[lower] < pivot) 43 | do 44 | upper := upper - 1 45 | while (a[upper] > pivot) 46 | if (lower < upper) then { 47 | temp := a[lower] 48 | a[lower] := a[upper] 49 | a[upper] := temp 50 | } 51 | else 52 | return upper 53 | } while true 54 | return upper 55 | } 56 | 57 | // Sort the array between first and last, inclusive. Return a dummy value. 58 | function sort (a : array 1) (first : int) (last : int) : int { 59 | let mid : int = 0 60 | let temp : int = 0 61 | 62 | if (first = last) then 63 | return temp 64 | else { 65 | mid := partition(a, a[first], first, last) 66 | temp := sort(a,first,mid) 67 | temp := sort(a,mid+1,last) 68 | return temp 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/frontEnd.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* The front end. It packages lexing, parsing and type checking for use by the 20 | interpreter and type checker both. *) 21 | 22 | open Util 23 | 24 | (* Parse and type check the given filename. *) 25 | let front_end (filename : string) (debug : bool) : SourceAst.prog 26 | = 27 | if Filename.check_suffix filename ".expl" then 28 | try 29 | let input = Std.input_file filename in 30 | let toks = Tokens.lex input 0 1 in 31 | if debug then 32 | Format.printf "%a@\n@\n" (pp_list Tokens.pp_tok_loc) toks; 33 | let ast = SourceAst.parse_program toks in 34 | if debug then 35 | Format.printf "%a@\n@\n" SourceAst.pp_program ast; 36 | let ast2 = TypeCheck.type_prog ast in 37 | if debug then 38 | Format.printf "%a@\n@\n" SourceAst.pp_program ast2; 39 | ast2 40 | with 41 | | BadInput s -> 42 | Format.printf "%s\n" s; 43 | exit 1 44 | else 45 | (Format.printf "Expects filename ending in .expl\n"; 46 | exit 1) 47 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | PACKAGES = str 2 | FLAGS = -bin-annot -g -principal -safe-string -strict-formats -strict-sequence -w +a-42-4 3 | 4 | FILES = util tokens sourceAst typeCheck frontEnd constProp unnestExp \ 5 | blockStructure shrinkImmediates liveVarAnalysis regAlloc lineariseCfg x86 \ 6 | instrSelX86 compileFunction astInterp interp compile 7 | 8 | FILES_ML = $(addsuffix .ml,$(FILES)) 9 | FILES_MLI = $(addsuffix .mli,$(FILES)) 10 | FILES_CMI = $(addsuffix .cmi,$(FILES)) 11 | FILES_CMT = $(addsuffix .cmt,$(FILES)) 12 | FILES_CMTI = $(addsuffix .cmti,$(FILES)) 13 | FILES_CMO = $(addsuffix .cmo,$(FILES)) 14 | FILES_CMX = $(addsuffix .cmx,$(FILES)) 15 | FILES_O = $(addsuffix .o,$(FILES)) 16 | 17 | .PHONY : ocb-compile.byte ocb-compile.native ocb-interp.byte ocb-interp.native all 18 | 19 | all : ocb-interp.byte ocb-compile.byte 20 | 21 | ocb-interp.byte : 22 | ocamlbuild -use-ocamlfind interp.byte 23 | 24 | ocb-interp.native : 25 | ocamlbuild -use-ocamlfind interp.native 26 | 27 | interp.byte : $(FILES_CMO) 28 | ocamlfind ocamlc -linkpkg -package $(PACKAGES) $(FILES_CMO) -o $@ 29 | 30 | interp.native : $(FILES_CMX) 31 | ocamlfind ocamlopt -linkpkg -package $(PACKAGES) $(FILES_CMX) -o $@ 32 | 33 | ocb-compile.byte : 34 | ocamlbuild -use-ocamlfind compile.byte 35 | 36 | ocb-compile.native : 37 | ocamlbuild -use-ocamlfind compile.native 38 | 39 | compile.byte : $(FILES_CMO) 40 | ocamlfind ocamlc -linkpkg -package $(PACKAGES) $(FILES_CMO) -o $@ 41 | 42 | compile.native : $(FILES_CMX) 43 | ocamlfind ocamlopt -linkpkg -package $(PACKAGES) $(FILES_CMX) -o $@ 44 | 45 | %.cmi : %.mli 46 | ocamlfind ocamlc -package $(PACKAGES) $(FLAGS) -o $@ -c $< 47 | 48 | %.cmo : %.ml 49 | ocamlfind ocamlc -package $(PACKAGES) $(FLAGS) -o $@ -c $< 50 | 51 | %.cmx : %.ml 52 | ocamlfind ocamlopt -package $(PACKAGES) $(FLAGS) -o $@ -c $< 53 | 54 | .PHONY : depend clean 55 | 56 | clean : 57 | ocamlbuild -clean 58 | rm -f $(FILES_CMI) $(FILES_CMTI) $(FILES_CMT) $(FILES_CMO) $(FILES_CMX) $(FILES_O) compile.byte compile.native interp.byte interp.native 59 | 60 | depend : 61 | ocamlfind ocamldep $(FILES_ML) $(FILES_MLI) > .depend 62 | 63 | include .depend 64 | -------------------------------------------------------------------------------- /src/README.md: -------------------------------------------------------------------------------- 1 | Compiler structure 2 | ------------------ 3 | 4 | The top-level files are _compile.ml_ and _interp.ml_, driving a compiler and 5 | interpreter respectively. 6 | 7 | Front end 8 | --------- 9 | 10 | The front-end in _frontEnd.ml_ packages a lexer (_tokens.ml_), parser 11 | (_sourceAst.ml_), and type checker (_typeCheck.ml_) and produces an AST. It is 12 | shared by the compiler and interpreter. 13 | 14 | AST transformations 15 | ------------------- 16 | 17 | The AST goes through a series of transformations 18 | 19 | - _constProp.ml_: Propagates and folds constants. 20 | The compiler later relies on there being no operators with two constant 21 | operands. 22 | - _unnestExp.ml_: Un-nests expressions. 23 | Flattens out expressions, using assignments to temporary variables. Ensures 24 | that no expressions contain sub-expressions with operators in them. Also 25 | makes all array accesses single-dimensional. 26 | - _blockStructure.ml_: Transforms the AST to a control-flow graph of basic 27 | blocks. Requires expressions to be un-nested and && and || to be removed. 28 | Inserts array bounds checks. 29 | 30 | Basic-block transformations 31 | --------------------------- 32 | 33 | - _shrinkImmediates.ml_: Removes all constants that don't fit into 32 bits 34 | (since that's the maximum size of an immediate on x86-64), possibly 35 | introducing assignments to temporary variables. 36 | - _liveVarAnalysis.ml_: Performs live variable analysis, and removes unused 37 | assignments. 38 | - _regAlloc.ml_: Performs register allocation. This changes all identifiers from 39 | named variables to registers and stack offsets. 40 | - _lineariseCFG.ml_: Flatten the CFG, introducing labels and branches. 41 | 42 | Back end 43 | ------- 44 | - _x86.ml_: An AST for the small subset of x86-64 that the compiler generates. 45 | Includes a printer for it in NASM syntax. 46 | - _instrSelX86.ml_: Generate x86 code. 47 | - _compileFunction.ml_: packages up the compiler for single functions 48 | 49 | Misc 50 | ---- 51 | - _util.ml_: Misc. utility functions. 52 | - _astInterp.ml_ and _blockInterp.ml_: Interpreters for the AST and CFG. 53 | -------------------------------------------------------------------------------- /tests/array.expl: -------------------------------------------------------------------------------- 1 | // Basic testing of arrays 2 | 3 | let r1 : int = test1(0) 4 | let r2 : int = test2(0) 5 | let g : int = 2 6 | 7 | let a : array 2 = array[4][4] 8 | let f : int = fill(0) 9 | let b : array 1 = test3(a) 10 | let c : int = test4(0) 11 | 12 | function fill(x : int) : int { 13 | let i : int = 0 14 | let j : int = 0 15 | while i < 4 { 16 | j := 0 17 | while j < 4 { 18 | a[i][j] := i+j 19 | j := j + 1 20 | } 21 | i := i + 1 22 | } 23 | return x 24 | } 25 | 26 | function test1 (x : int) : int 27 | { 28 | let i : int = 0 29 | let x : array 1 = array[10] 30 | let j : int = 0 31 | output i // should print 0 32 | while i < 10 { 33 | x[i] := i 34 | i := i + 1 35 | } 36 | output i // should print 10 37 | j := x[0] 38 | output j // should print 0 39 | j := x[9] 40 | output j // should print 9 41 | return j 42 | } 43 | 44 | function test2 (x : int) : int 45 | { 46 | let x : array 3 = array[3][5][7] 47 | let count : int = 0 48 | let i : int = 0 49 | let j : int = 0 50 | let k : int = 0 51 | let o : int = 0 52 | while i < 3 { 53 | j := 0 54 | while j < 5 { 55 | k := 0 56 | while k < 7 { 57 | x[i][j][k] := count 58 | count := count + 1 59 | k := k + 1 60 | } 61 | j := j + 1 62 | } 63 | i := i + 1 64 | } 65 | o := x[0][0][0] 66 | output o // should print 0 67 | o := x[1][3][4] 68 | output o // should print 60 69 | o := x[2][4][6] 70 | output o //should print 104 71 | return o 72 | } 73 | 74 | function test3 (a : array 2) : array 1 { 75 | let b : array 1 = array[4] 76 | let i : int = 0 77 | let j : int = 0 78 | while i < 4 { 79 | j := 0 80 | while j < 4 { 81 | b[i] := b[i] + a[i][j] 82 | j := j + 1 83 | } 84 | i := i + 1 85 | } 86 | return b 87 | } 88 | 89 | function test4 (x : int) : int { 90 | let i : int = 0 91 | while i < 4 { 92 | x := b[i] 93 | output x // 6, 10, 14, 18 94 | i := i + 1 95 | } 96 | x := b[g] // 14 97 | output x 98 | return x 99 | } 100 | -------------------------------------------------------------------------------- /src/sourceAst.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* The language's AST, and a predictive, recursive descent parser. See the 20 | ../README.md for the grammar. *) 21 | 22 | type scope = 23 | | Global 24 | | Parameter 25 | | Local 26 | 27 | val compare_scope : scope -> scope -> int 28 | 29 | type id = 30 | | Source of string * scope option 31 | | Temp of string * int 32 | 33 | module Idmap : Map.S with type key = id 34 | 35 | type exp = 36 | | Ident of id * exp list 37 | | Call of id * exp list 38 | | Num of int64 39 | | Bool of bool 40 | | Op of exp * Tokens.op * exp 41 | | Uop of Tokens.uop * exp 42 | (* Allocate a new array of given dimensions. Initialise to 0 *) 43 | | Array of exp list 44 | 45 | type stmt = 46 | | Assign of id * exp list * exp 47 | (* A generalised do/while loop. Always execute the first statement, then 48 | the test, then repeatedly do the 2nd, then first statement and then test 49 | 'while e s' becomes DoWhile (Stmts [], e, s) and 'do s while e' becomes 50 | DoWhile (s, e, Stmts []) *) 51 | | DoWhile of stmt * exp * stmt 52 | | Ite of exp * stmt * stmt 53 | | Stmts of stmt list 54 | | In of id 55 | | Out of id 56 | | Return of id option 57 | | Loc of stmt * int (* annotate a statement with it's source line number *) 58 | 59 | type typ = 60 | | Int 61 | | Bool 62 | (* An int array with the given number of dimensions *) 63 | | Array of int 64 | 65 | type var_dec = { var_name : id; typ : typ; init : exp; loc : int option } 66 | 67 | type func = { fun_name : id; params : (id * typ) list; ret : typ; 68 | locals : var_dec list; body : stmt list; loc : int option } 69 | 70 | type prog = { globals : var_dec list; funcs : func list } 71 | 72 | val show_id : id -> string 73 | val pp_stmt : Format.formatter -> stmt -> unit 74 | val pp_stmts : Format.formatter -> stmt list -> unit 75 | val pp_program : Format.formatter -> prog -> unit 76 | val parse_program : (Tokens.token * int) list -> prog 77 | val stmts_to_stmt : stmt list -> stmt 78 | -------------------------------------------------------------------------------- /src/blockStructure.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* A control flow graph representation with basic blocks, and the source AST -> 20 | CGF algorithm. Also compiles arrays to loads and stores. *) 21 | 22 | type var = 23 | | Vreg of int 24 | | Stack of int 25 | | Global of string 26 | | NamedSource of string * SourceAst.scope 27 | | NamedTmp of string * int 28 | 29 | module Varset : sig 30 | include Set.S with type elt = var 31 | val show : t -> string 32 | val pp : Format.formatter -> t -> unit 33 | end 34 | 35 | module Varmap : sig 36 | include Map.S with type key = var 37 | end 38 | 39 | type atomic_exp = 40 | | Ident of var 41 | | Num of int64 42 | 43 | type block_elem = 44 | | AssignOp of var * atomic_exp * Tokens.op * atomic_exp 45 | | AssignAtom of var * atomic_exp 46 | (* Ld (x,y,e) represents x := *(y+e) *) 47 | | Ld of var * var * atomic_exp 48 | (* St (x,e1,e2) represents *(x+e1) := e2 *) 49 | | St of var * atomic_exp * atomic_exp 50 | (* Call (x, f, aes) represents x := f(aes) *) 51 | | Call of var option * string * atomic_exp list 52 | (* BoundCheck (a1, a2) represents assert (a1 >= 0 && a1 < a2) *) 53 | | BoundCheck of atomic_exp * atomic_exp 54 | | NullCheck of var 55 | 56 | type basic_block = block_elem list 57 | 58 | type test_op = 59 | | Lt 60 | | Gt 61 | | Eq 62 | 63 | type test = atomic_exp * test_op * atomic_exp 64 | 65 | type next_block = 66 | | Return of var option 67 | | Next of int 68 | (* The first int is the block number if the ident is true, and the second if 69 | * it is false *) 70 | | Branch of test * int * int 71 | 72 | type cfg_entry = { bnum : int; elems : block_elem list; next : next_block; 73 | mutable started : bool; mutable finished : bool } 74 | 75 | type cfg = cfg_entry list 76 | 77 | val id_to_var : SourceAst.id -> var 78 | 79 | val show_var : var -> string 80 | val pp_block_elem : Format.formatter -> block_elem -> unit 81 | val pp_test : Format.formatter -> test -> unit 82 | val pp_cfg : Format.formatter -> cfg -> unit 83 | val pp_cfg_entry : Format.formatter -> cfg_entry -> unit 84 | val build_cfg : SourceAst.stmt list -> cfg 85 | 86 | val cfg_to_graphviz : Format.formatter -> cfg -> unit 87 | -------------------------------------------------------------------------------- /src/.depend: -------------------------------------------------------------------------------- 1 | astInterp.cmo : util.cmo tokens.cmi sourceAst.cmi astInterp.cmi 2 | astInterp.cmx : util.cmx tokens.cmx sourceAst.cmx astInterp.cmi 3 | astInterp.cmi : tokens.cmi sourceAst.cmi 4 | blockInterp.cmo : util.cmo lineariseCfg.cmi blockStructure.cmi astInterp.cmi \ 5 | blockInterp.cmi 6 | blockInterp.cmx : util.cmx lineariseCfg.cmx blockStructure.cmx astInterp.cmx \ 7 | blockInterp.cmi 8 | blockInterp.cmi : blockStructure.cmi 9 | blockStructure.cmo : util.cmo tokens.cmi sourceAst.cmi blockStructure.cmi 10 | blockStructure.cmx : util.cmx tokens.cmx sourceAst.cmx blockStructure.cmi 11 | blockStructure.cmi : tokens.cmi sourceAst.cmi 12 | compile.cmo : x86.cmo util.cmo sourceAst.cmi instrSelX86.cmi frontEnd.cmi \ 13 | compileFunction.cmi blockStructure.cmi 14 | compile.cmx : x86.cmx util.cmx sourceAst.cmx instrSelX86.cmx frontEnd.cmx \ 15 | compileFunction.cmx blockStructure.cmx 16 | compileFunction.cmo : x86.cmo unnestExp.cmi sourceAst.cmi \ 17 | shrinkImmediates.cmi regAlloc.cmi liveVarAnalysis.cmi lineariseCfg.cmi \ 18 | instrSelX86.cmi constProp.cmi blockStructure.cmi compileFunction.cmi 19 | compileFunction.cmx : x86.cmx unnestExp.cmx sourceAst.cmx \ 20 | shrinkImmediates.cmx regAlloc.cmx liveVarAnalysis.cmx lineariseCfg.cmx \ 21 | instrSelX86.cmx constProp.cmx blockStructure.cmx compileFunction.cmi 22 | compileFunction.cmi : x86.cmo sourceAst.cmi 23 | constProp.cmo : util.cmo tokens.cmi sourceAst.cmi constProp.cmi 24 | constProp.cmx : util.cmx tokens.cmx sourceAst.cmx constProp.cmi 25 | constProp.cmi : sourceAst.cmi 26 | frontEnd.cmo : util.cmo typeCheck.cmi tokens.cmi sourceAst.cmi frontEnd.cmi 27 | frontEnd.cmx : util.cmx typeCheck.cmx tokens.cmx sourceAst.cmx frontEnd.cmi 28 | frontEnd.cmi : sourceAst.cmi 29 | instrSelX86.cmo : x86.cmo util.cmo tokens.cmi lineariseCfg.cmi \ 30 | blockStructure.cmi instrSelX86.cmi 31 | instrSelX86.cmx : x86.cmx util.cmx tokens.cmx lineariseCfg.cmx \ 32 | blockStructure.cmx instrSelX86.cmi 33 | instrSelX86.cmi : x86.cmo lineariseCfg.cmi blockStructure.cmi 34 | interp.cmo : frontEnd.cmi astInterp.cmi 35 | interp.cmx : frontEnd.cmx astInterp.cmx 36 | lineariseCfg.cmo : util.cmo blockStructure.cmi lineariseCfg.cmi 37 | lineariseCfg.cmx : util.cmx blockStructure.cmx lineariseCfg.cmi 38 | lineariseCfg.cmi : util.cmo blockStructure.cmi 39 | liveVarAnalysis.cmo : util.cmo tokens.cmi blockStructure.cmi \ 40 | liveVarAnalysis.cmi 41 | liveVarAnalysis.cmx : util.cmx tokens.cmx blockStructure.cmx \ 42 | liveVarAnalysis.cmi 43 | liveVarAnalysis.cmi : util.cmo blockStructure.cmi 44 | regAlloc.cmo : util.cmo blockStructure.cmi regAlloc.cmi 45 | regAlloc.cmx : util.cmx blockStructure.cmx regAlloc.cmi 46 | regAlloc.cmi : blockStructure.cmi 47 | shrinkImmediates.cmo : tokens.cmi blockStructure.cmi shrinkImmediates.cmi 48 | shrinkImmediates.cmx : tokens.cmx blockStructure.cmx shrinkImmediates.cmi 49 | shrinkImmediates.cmi : blockStructure.cmi 50 | sourceAst.cmo : util.cmo tokens.cmi sourceAst.cmi 51 | sourceAst.cmx : util.cmx tokens.cmx sourceAst.cmi 52 | sourceAst.cmi : tokens.cmi 53 | tokens.cmo : util.cmo tokens.cmi 54 | tokens.cmx : util.cmx tokens.cmi 55 | tokens.cmi : 56 | typeCheck.cmo : util.cmo tokens.cmi sourceAst.cmi typeCheck.cmi 57 | typeCheck.cmx : util.cmx tokens.cmx sourceAst.cmx typeCheck.cmi 58 | typeCheck.cmi : sourceAst.cmi 59 | unnestExp.cmo : util.cmo tokens.cmi sourceAst.cmi unnestExp.cmi 60 | unnestExp.cmx : util.cmx tokens.cmx sourceAst.cmx unnestExp.cmi 61 | unnestExp.cmi : sourceAst.cmi 62 | util.cmo : 63 | util.cmx : 64 | x86.cmo : util.cmo 65 | x86.cmx : util.cmx 66 | -------------------------------------------------------------------------------- /runtime/array.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Example compiler 3 | * Copyright (C) 2015-2016 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | /* Allocation of multi-dimensional arrays of 64-bit ints. For an array of 20 | * length n, store the length of the array in element 0, and the array in 1 to 21 | * n inclusive. We're casting between int64_t and int64_t* which is not 22 | * guaranteed to work by the C standard, but seems to work on standard OS X and 23 | * Linux C compilers. */ 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | /* Since the first element of an array will have a different type than the 30 | * rest, and the last dimension's array will have numbers rather than pointers. */ 31 | union elt { 32 | uint64_t num; 33 | union elt *ptr; 34 | }; 35 | 36 | typedef union elt elt; 37 | 38 | #define S sizeof(elt) 39 | 40 | /* A 1 dimensional array */ 41 | elt* allocate1(int64_t dim1) { 42 | elt* x = malloc(S*(dim1+1)); 43 | for (unsigned long i = 0; i < dim1; i++) 44 | x[i+1].num = 0; 45 | x[0].num = dim1; 46 | return x; 47 | } 48 | 49 | /* A 2 dimensional array is an array of arrays */ 50 | elt* allocate2(int64_t dim1, int64_t dim2) { 51 | elt* x = malloc(S*(dim1+1)); 52 | for (unsigned long i = 0; i < dim1; i++) { 53 | elt* y = malloc(S*(dim2+1)); 54 | x[i+1].ptr = y; 55 | for (int64_t j = 0; j < dim2; j++) 56 | y[j+1].num = 0; 57 | y[0].num = dim2; 58 | } 59 | x[0].num = dim1; 60 | return x; 61 | } 62 | 63 | /* Allocate dimensions dim to num_dim where the length of each dimension is 64 | * given by the list dims[] */ 65 | elt* allocate_n_help(int64_t dim, int64_t num_dim, int64_t dims[]) { 66 | if (dim == num_dim - 1) 67 | return allocate1(dims[dim]); 68 | else { 69 | elt* x = malloc(S*(dims[dim]+1)); 70 | for (unsigned long i = 0; i < dims[dim]; i++) { 71 | x[i+1].ptr = allocate_n_help(dim+1, num_dim, dims); 72 | } 73 | x[0].num = dims[dim]; 74 | return x; 75 | } 76 | } 77 | 78 | elt* allocate_n(int64_t num_dim, int64_t dims[]) { 79 | return allocate_n_help(0, num_dim, dims); 80 | } 81 | 82 | elt* allocate3(int64_t dim1, int64_t dim2, int64_t dim3) { 83 | int64_t dims[] = {dim1, dim2, dim3}; 84 | return allocate_n(3, dims); 85 | } 86 | 87 | elt* allocate4(int64_t dim1, int64_t dim2, int64_t dim3, int64_t dim4) { 88 | int64_t dims[] = {dim1, dim2, dim3, dim4}; 89 | return allocate_n(4, dims); 90 | } 91 | 92 | elt* allocate5(int64_t dim1, int64_t dim2, int64_t dim3, int64_t dim4, int64_t dim5) { 93 | int64_t dims[] = {dim1, dim2, dim3, dim4, dim5}; 94 | return allocate_n(5, dims); 95 | } 96 | 97 | elt* allocate6(int64_t dim1, int64_t dim2, int64_t dim3, int64_t dim4, int64_t dim5, int64_t dim6) { 98 | int64_t dims[] = {dim1, dim2, dim3, dim4, dim5, dim6}; 99 | return allocate_n(6, dims); 100 | } 101 | 102 | elt* allocate7(int64_t dim1, int64_t dim2, int64_t dim3, int64_t dim4, int64_t dim5, int64_t dim6, int64_t dim7) { 103 | int64_t dims[] = {dim1, dim2, dim3, dim4, dim5, dim6, dim7}; 104 | return allocate_n(7, dims); 105 | } 106 | -------------------------------------------------------------------------------- /src/compile.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* The main driver for the compiler executable. *) 20 | 21 | (* Command-line arguments *) 22 | let filename_ref = ref None;; 23 | let safe_ref = ref true;; 24 | 25 | let options = Arg.align ([("-safe", Arg.Bool (fun b -> safe_ref := b), 26 | "\tdo null pointer and array bounds checks") 27 | ]);; 28 | 29 | let usage_msg = 30 | "example compiler \nexample usage: " ^ Sys.argv.(0)^ " test.expl\n" 31 | 32 | let _ = 33 | Arg.parse options 34 | (fun s -> 35 | match !filename_ref with 36 | | None -> 37 | filename_ref := Some s 38 | | Some s' -> 39 | (Format.printf "Error: given multiple files to process: %s and %s\n" 40 | s' s; 41 | exit 1)) 42 | usage_msg 43 | 44 | let filename = 45 | match !filename_ref with 46 | | None -> 47 | (print_string usage_msg; 48 | exit 1) 49 | | Some filename -> 50 | filename 51 | 52 | let prog = FrontEnd.front_end filename false;; 53 | 54 | open SourceAst 55 | 56 | (* Build a main function that just runs the initialisation of all of the 57 | globals *) 58 | let main_function = 59 | { fun_name = Source ("main", None); params = []; ret = Int; 60 | locals = []; 61 | body = List.map (fun d -> Assign (d.var_name, [], d.init)) prog.globals @ 62 | [Return None]; 63 | loc = None } 64 | 65 | let global_id_to_var (g : SourceAst.id) : BlockStructure.var = 66 | match g with 67 | | Source (i, Some Global) -> BlockStructure.NamedSource (i, Global) 68 | | _ -> assert false 69 | 70 | let globals = 71 | List.fold_right 72 | (fun d globals -> 73 | BlockStructure.Varset.add (global_id_to_var d.var_name) globals) 74 | prog.globals 75 | BlockStructure.Varset.empty ;; 76 | 77 | let functions = 78 | List.map (CompileFunction.compile_fun !safe_ref filename globals) 79 | (main_function::prog.funcs);; 80 | 81 | open Format 82 | 83 | let outfile = open_out (Filename.chop_extension filename ^ ".s");; 84 | let fmt = formatter_of_out_channel outfile;; 85 | (* Assembly wrapper *) 86 | fprintf fmt "[section .text align=16]@\n";; 87 | fprintf fmt "global main@\n@\n";; 88 | fprintf fmt "extern signal_error@\n";; 89 | fprintf fmt "extern input@\n";; 90 | fprintf fmt "extern output@\n";; 91 | fprintf fmt "extern allocate1@\n";; 92 | fprintf fmt "extern allocate2@\n";; 93 | fprintf fmt "extern allocate3@\n";; 94 | fprintf fmt "extern allocate4@\n";; 95 | fprintf fmt "extern allocate5@\n";; 96 | fprintf fmt "extern allocate6@\n";; 97 | fprintf fmt "extern allocate7@\n@\n";; 98 | List.iter 99 | (fun (name, code) -> fprintf fmt "%s:@\n%a" (show_id name) X86.pp_instr_list code) 100 | functions;; 101 | fprintf fmt "bound_error:@\n%a" 102 | (fun fmt instr -> X86.pp_instr_list fmt (InstrSelX86.be_to_x86 !safe_ref instr)) 103 | (BlockStructure.Call (None, "signal_error", [BlockStructure.Num 0L]));; 104 | fprintf fmt "null_error:@\n%a" 105 | (fun fmt instr -> X86.pp_instr_list fmt (InstrSelX86.be_to_x86 !safe_ref instr)) 106 | (BlockStructure.Call (None, "signal_error", [BlockStructure.Num 1L]));; 107 | (* bss segment for the global variables, all initialised to 0 *) 108 | fprintf fmt "[section .bss align=16]@\n";; 109 | fprintf fmt "default rel@\n";; 110 | List.iter (fun d -> fprintf fmt "%s: resq 1\n" (show_id d.var_name)) prog.globals;; 111 | close_out outfile;; 112 | -------------------------------------------------------------------------------- /src/compileFunction.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* Compiler for a single function *) 20 | 21 | open SourceAst 22 | 23 | (* We treat a local variable declarations as simple assignment to the variable. 24 | *) 25 | let init_var_dec_to_0 (d : var_dec) : stmt = 26 | Assign (d.var_name, [], Num 0L) 27 | 28 | let var_dec_to_stmt (d : var_dec) : stmt = 29 | Assign (d.var_name, [], d.init) 30 | 31 | (* globals should contain all globals variables, so that we can tell the live 32 | variable analysis that they must be live when the function returns. 33 | *) 34 | let compile_fun safe filename (globals : BlockStructure.Varset.t) (f : func) 35 | : id * X86.instruction list = 36 | let ast = 37 | (* Zero out local variables, in case any of the initialisations are out of 38 | order and refer to uninitialised variables. Later removal of dead writes 39 | should be able to remove these where there is no problem *) 40 | List.map init_var_dec_to_0 f.locals @ 41 | List.map var_dec_to_stmt f.locals @ 42 | f.body in 43 | 44 | let (_,opt_ast) = ConstProp.prop_stmts SourceAst.Idmap.empty ast in 45 | (*Format.printf "@\n%a@\n" SourceAst.pp_stmts opt_ast; *) 46 | 47 | let no_nest_ast = UnnestExp.unnest opt_ast in 48 | (*Format.printf "@\n%a@\n" SourceAst.pp_stmts no_nest_ast; *) 49 | 50 | let cfg = BlockStructure.build_cfg no_nest_ast in 51 | (*Format.printf "@\n%a@\n" BlockStructure.pp_cfg cfg; *) 52 | (* Print the CFG in dot format. Process the .dot file with dot -Tpdf FILENAME > FILENAME.pdf. 53 | dot is part of the graphviz package http://www.graphviz.org *) 54 | (* 55 | let outfile = open_out (Filename.chop_extension filename ^ "_" ^ show_id f.fun_name ^ ".dot") in 56 | let fmt = Format.formatter_of_out_channel outfile in 57 | Format.fprintf fmt "%a" BlockStructure.cfg_to_graphviz cfg; 58 | close_out outfile; 59 | *) 60 | 61 | let cfg' = ShrinkImmediates.shrink_imm cfg in 62 | (* Format.printf "@\n%a@\n" BlockStructure.pp_cfg cfg';*) 63 | 64 | let lva_cfg0 = LiveVarAnalysis.lva globals cfg' in 65 | (* Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg0; *) 66 | 67 | let lva_cfg1 = LiveVarAnalysis.remove_unused_writes lva_cfg0 in 68 | (*Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg1;*) 69 | 70 | 71 | (* Iterate analysis for examples like this: 72 | 73 | input c 74 | x := c 75 | y := 1 76 | if (c = 0) then 77 | y := x 78 | else 79 | y := x 80 | z := y 81 | a := z 82 | output x 83 | 84 | *) 85 | 86 | let lva_cfg2 = LiveVarAnalysis.lva globals (List.map fst lva_cfg1) in 87 | (* Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg2; *) 88 | 89 | let lva_cfg3 = LiveVarAnalysis.remove_unused_writes lva_cfg2 in 90 | (* Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg3; *) 91 | 92 | let lva_cfg4 = LiveVarAnalysis.lva globals (List.map fst lva_cfg3) in 93 | (* Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg4;*) 94 | 95 | let (reg_cfg, num_stack) = 96 | RegAlloc.reg_alloc (Util.zip (List.map fst f.params) InstrSelX86.argument_reg_numbers) 97 | InstrSelX86.num_regs 98 | (List.map fst lva_cfg4) 99 | in 100 | (* Format.printf "@\n%a@\n" BlockStructure.pp_cfg reg_cfg; *) 101 | 102 | let linear = LineariseCfg.cfg_to_linear reg_cfg in 103 | (* Format.printf "@\n%a@\n" LineariseCfg.pp_linear_list linear; *) 104 | 105 | let x86 = InstrSelX86.to_x86 safe linear num_stack in 106 | 107 | (f.fun_name, x86) 108 | -------------------------------------------------------------------------------- /src/lineariseCfg.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* Flatten the CFG into a list of three-address code. *) 20 | (* We generate labels of the form .block%d. The prefix . makes nasm treat them 21 | as local to the function, so we don't have problems assembling files with 22 | multiple functions and therefore duplicate block labels. *) 23 | 24 | open BlockStructure 25 | 26 | type linear = 27 | | Instr of block_elem 28 | | CJump of test * bool * string (* jump to string if the test is bool *) 29 | | Jump of string 30 | | Label of string 31 | | Return of var option 32 | 33 | let pp_linear (fmt : Format.formatter) (l : linear) : unit = 34 | match l with 35 | | Instr b -> 36 | Format.fprintf fmt " %a@\n" pp_block_elem b 37 | | CJump (v, b, s) -> 38 | Format.fprintf fmt " if (%a) = %s goto %s@\n" 39 | pp_test v 40 | (if b then "true" else "false") 41 | s 42 | | Jump s -> 43 | Format.fprintf fmt " goto %s@\n" s 44 | | Label s -> 45 | Format.fprintf fmt "%s:@\n" s 46 | | Return None -> 47 | Format.fprintf fmt " return@\n" 48 | | Return (Some v) -> 49 | Format.fprintf fmt " return %s@\n" (show_var v) 50 | 51 | type linear_list = linear list 52 | 53 | let rec pp_linear_list (fmt : Format.formatter) (ls : linear list) = 54 | match ls with 55 | | [] -> () 56 | | x::y -> 57 | (pp_linear fmt x; 58 | pp_linear_list fmt y) 59 | 60 | module I = Util.Intmap 61 | 62 | (* Initialise the mutable visited fields to false, and put CFG into a map *) 63 | let init_traversal (cfg : cfg) : cfg_entry I.t = 64 | List.iter (fun x -> x.started <- false; x.finished <- false) cfg; 65 | List.fold_left (fun map x -> I.add x.bnum x map) I.empty cfg 66 | 67 | (* Linearise the reachable cfg, starting from the block with index next_block, 68 | but don't do already visited blocks *) 69 | (* This is essentially a depth-first search, pre-order traversal *) 70 | let rec cfg_to_linear (next_block : int) (cfg : cfg_entry I.t) : linear list = 71 | let b = I.find next_block cfg in 72 | if b.finished then 73 | (* Don't output the block twice *) 74 | [] 75 | else 76 | (b.finished <- true; 77 | Label (".block" ^ string_of_int b.bnum) :: 78 | List.map (fun x -> Instr x) b.elems @ 79 | match b.next with 80 | | Return v -> [(Return v : linear)] 81 | | Next i -> 82 | if (I.find i cfg).started then 83 | (* We've started the next block, so we'll just jump to it *) 84 | [Jump (".block" ^ string_of_int i)] 85 | else 86 | (* We haven't started the next block, so we can put it here and omit 87 | the jump *) 88 | ((I.find i cfg).started <- true; 89 | cfg_to_linear i cfg) 90 | | Branch (v, t1, t2) -> 91 | let c1 = I.find t1 cfg in 92 | let c2 = I.find t2 cfg in 93 | match (c1.started, c2.started) with 94 | | (false, false) -> 95 | c1.started <- true; 96 | c2.started <- true; 97 | CJump (v, true, ".block" ^ string_of_int t1) :: 98 | cfg_to_linear t2 cfg @ 99 | cfg_to_linear t1 cfg 100 | | (true, true) -> 101 | [CJump (v, true, ".block" ^ string_of_int t1); 102 | Jump (".block" ^ string_of_int t2)] 103 | | (true, false) -> 104 | (c2.started <- true; 105 | [CJump (v, true, ".block" ^ string_of_int t1)] @ 106 | cfg_to_linear t2 cfg) 107 | | (false, true) -> 108 | (c1.started <- true; 109 | [CJump (v, false, ".block" ^ string_of_int t2)] @ 110 | cfg_to_linear t1 cfg)) 111 | 112 | let cfg_to_linear cfg = 113 | cfg_to_linear 0 (init_traversal cfg) 114 | -------------------------------------------------------------------------------- /src/util.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* General utility functions *) 20 | 21 | (* For lex/parse/type errors *) 22 | exception BadInput of string 23 | (* For cases that shouldn't be able to happen in the compiler *) 24 | 25 | exception InternalError of string 26 | 27 | (* Maps with string keys *) 28 | module Strmap = Map.Make(String) 29 | 30 | (* Maps with integer keys *) 31 | module Intmap = 32 | Map.Make(struct type t = int let (compare : int -> int -> int) = compare end) 33 | 34 | (* Build the list [0, ..., n-1] *) 35 | let count (n : int) : int list = 36 | let rec f n next = 37 | if n <= 0 then 38 | [] 39 | else 40 | next :: f (n - 1) (next + 1) 41 | in 42 | f n 0 43 | 44 | (* Combine 2 lists into a list of pairs, truncating the longer list *) 45 | let rec zip (l1 : 'a list) (l2 : 'b list) : ('a * 'b) list = 46 | match (l1,l2) with 47 | | (h1::t1, h2::t2) -> (h1,h2) :: zip t1 t2 48 | | _ -> [] 49 | 50 | (* Convert a list to a ; separated string *) 51 | let show_list' (d1 : string) (d2 : string) (show : 'a -> string) (l : 'a list) 52 | : string = 53 | let rec f l = 54 | match l with 55 | | [] -> "" 56 | | [h] -> show h 57 | | (h::t) -> 58 | show h ^ "; " ^ f t 59 | in 60 | d1 ^ f l ^ d2 61 | 62 | let show_list show = show_list' "[" "]" show 63 | let show_set show = show_list' "{" "}" show 64 | 65 | (* Pretty-pring a list, separated by ; *) 66 | let pp_list' (d1 : string) (d2 : string) (pp : Format.formatter -> 'a -> unit) 67 | (fmt : Format.formatter) (l : 'a list) 68 | : unit = 69 | let rec f fmt l = 70 | match l with 71 | | [] -> () 72 | | [h] -> pp fmt h 73 | | (h::t) -> 74 | Format.fprintf fmt "%a;@ %a" 75 | pp h 76 | f t 77 | in 78 | Format.fprintf fmt "%s@[%a@]%s" d1 f l d2 79 | 80 | let pp_list pp = pp_list' "[" "]" pp 81 | let pp_set pp = pp_list' "{" "}" pp 82 | 83 | let option_map (f : 'a -> 'b) (x : 'a option) : 'b option = 84 | match x with 85 | | None -> None 86 | | Some y -> Some (f y) 87 | 88 | (* Compare two options, where None is less than Some *) 89 | let option_compare (c : 'a -> 'b -> int) (o1 : 'a option) (o2 :'b option) 90 | : int = 91 | match (o1, o2) with 92 | | (None, None) -> 0 93 | | (Some _, None) -> 1 94 | | (None, Some _) -> -1 95 | | (Some x, Some y) -> c x y 96 | 97 | module Std = struct 98 | (* Copy and paste from extlib to get input_file without making a dependency. 99 | extlib is LGPL 2.1, and so this sub-module is too. 100 | https://github.com/ygrek/ocaml-extlib/blob/33f744ddb28d6a0f4c96832145e1a6e384644709/src/std.ml *) 101 | 102 | let finally handler f x = 103 | let r = ( 104 | try 105 | f x 106 | with 107 | e -> handler (); raise e 108 | ) in 109 | handler (); 110 | r 111 | 112 | let buf_len = 8192 113 | 114 | let input_all ic = 115 | let rec loop acc total buf ofs = 116 | let n = input ic buf ofs (buf_len - ofs) in 117 | if n = 0 then 118 | let res = Bytes.create total in 119 | let pos = total - ofs in 120 | let _ = Bytes.blit buf 0 res pos ofs in 121 | let coll pos buf = 122 | let new_pos = pos - buf_len in 123 | Bytes.blit buf 0 res new_pos buf_len; 124 | new_pos in 125 | let _ = List.fold_left coll pos acc in 126 | (* [res] doesn't escape and will not be mutated again *) 127 | Bytes.unsafe_to_string res 128 | else 129 | let new_ofs = ofs + n in 130 | let new_total = total + n in 131 | if new_ofs = buf_len then 132 | loop (buf :: acc) new_total (Bytes.create buf_len) 0 133 | else loop acc new_total buf new_ofs in 134 | loop [] 0 (Bytes.create buf_len) 0 135 | 136 | let input_file ?(bin=false) fname = 137 | let ch = (if bin then open_in_bin else open_in) fname in 138 | finally (fun () -> close_in ch) input_all ch 139 | 140 | end 141 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A simple compiler for a simple imperative language targeting x86-64 assembly 2 | (for systems that follow the System V ABI, so most operating systems **except** 3 | Windows). 4 | 5 | Running the compiler 6 | -------------------- 7 | 8 | The compiler relies on OCaml and the ocamlfind library. It is tested with OCaml 9 | versions 4.02.3 and 4.04.0, and ocamlfind 1.5.6 and 1.7.1. Ocamlfind can be 10 | installed via opam. If you have installed anything via Opam (for example, 11 | Merlin, or ocp-indent), ocamlfind is probably installed already. Opam installs 12 | everything into `.opam` in your home directory: 13 | ``` 14 | opam install ocamlfind 15 | ``` 16 | 17 | To compile the compiler run `make` in the `src` directory. This should produce 18 | `compile.byte` and `interp.byte` executables. Both take a single command-line 19 | argument: a source file name with the `.expl` extension. `interp.byte` runs the 20 | file, `compile.byte` compiles it, generating an x86-46 assembly `.s` file in 21 | *nasm* syntax. 22 | 23 | Compiling target programs 24 | ------------------------- 25 | 26 | First run `make` in the `runtime` directory to compile the very simple runtime 27 | library (using gcc). 28 | 29 | On Linux, use `nasm -f elf64 FILENAME.s` to assemble the compiler's output for 30 | *FILENAME*. On Mac, use `nasm -f macho64 --prefix _ FILENAME.s`. Then use `gcc 31 | COMPILER_DIR/runtime/io.o FILENAME.o -o FILENAME` to link the program with the 32 | runtime library. 33 | 34 | See the `tests` directory for some example programs. 35 | 36 | The source language 37 | ------------------- 38 | 39 | Keywords are `+ - * / | & << < > = || && ! := do while if then else input output true false array return function let bool int` 40 | 41 | Identifiers are strings of letters, underscores, and digits (not starting with 42 | a digit) that are not keywords. 43 | 44 | Numbers are sequences of digits that fit into a 64-bit signed integer. 45 | 46 | Comments start with '//' and last until the end of the line. 47 | 48 | All numerical operations are on signed, 2s complement 64-bit integers. 49 | 50 | Multi-dimensional arrays are supported. Array elements are 64-bit integers. 51 | 52 | op ::= 53 | | `+` --- Addition 54 | | `-` --- Subtraction 55 | | `*` --- Multiplication 56 | | `/` --- Division 57 | | `|` --- Bitwise or 58 | | `&` --- Bitwise and 59 | | `<<` --- Left shift 60 | | `<` --- Less than 61 | | `>` --- Greater than 62 | | `=` --- Equality 63 | | `||` --- Logical or 64 | | `&&` --- Logical and 65 | 66 | uop ::= 67 | | `!` --- Logical negation 68 | | `-` --- Unary minus 69 | 70 | indices ::= 71 | | `[` exp `]` indices 72 | | epsilon 73 | 74 | args ::= 75 | | exp 76 | | exp `,` args 77 | 78 | atomic_exp ::= 79 | | identifier indices --- Variable use and array indexing 80 | | identifier `(` args `)` --- Function call 81 | | number --- Integer constant 82 | | `true` --- Boolean constant 83 | | `false` --- Boolean constant 84 | | uop atomic_exp --- Unary operation 85 | | `array` indices --- Array allocation 86 | | `(` exp `)` --- Parenthesised expression 87 | 88 | exp ::= 89 | | atomic_exp op atomic_exp --- Binary operation 90 | | atomic_exp 91 | 92 | stmt ::= 93 | | identifier indices `:=` exp 94 | | `while` exp stmt 95 | | `do` stmt `while` exp 96 | | `if` exp `then` stmt `else` stmt 97 | | `{` stmts `}` 98 | | `input` identifier 99 | | `output` identifier 100 | | `return` identifier 101 | 102 | stmts ::= 103 | | epsilon 104 | | stmt stmts 105 | 106 | typ ::= 107 | | `int` --- a 64-bit signed integer 108 | | `bool` --- a boolean 109 | | `array` number --- an n dimensional array of 64-bit signed integers 110 | 111 | params ::= 112 | | `(` identifier `:` type `)` 113 | | `(` identifier `:` type `)` params 114 | 115 | var_decs ::= 116 | | epsilon 117 | | `let` identifier `:` typ `=` exp var_decs 118 | 119 | functions ::= 120 | | epsilon 121 | | `function` identifier params `:` typ `{` var_decs stmts `}` funcs 122 | 123 | program ::= 124 | | var_decs functions 125 | 126 | Loading the compiler in utop 127 | ---------------------------- 128 | 129 | First build the compiler, by running make from the src directory. In utop load 130 | the packages that the compiler uses with the `#require` command: 131 | 132 | ``` 133 | #require "str";; 134 | ``` 135 | 136 | You can add this line to the `.ocamlinit` file in your home directory, so that 137 | you don't have to manually enter it each time you start a new utop session. 138 | The contents of `.ocamlinit` are run each time you start a new utop. 139 | 140 | The OCaml compilation manager (ocamlbuild) stores all of the compiled OCaml 141 | sources in the `_build` directory, with the extension `.cmo`. The following 142 | tells utop to look there for source files. 143 | ``` 144 | #directory "_build";; 145 | ``` 146 | 147 | To load a particular module, for example, LineariseCfg, use the `#load_rec` command. 148 | ``` 149 | #load_rec "lineariseCfg.cmo";; 150 | ``` 151 | 152 | You can then open the module if you want (`open LineariseCfg`), or call 153 | functions directly (`LineariseCfg.cfg_to_linear`). Loading a compiled module in 154 | this way only gives you access to the values and functions that are exported in 155 | the corresponding `.mli` file. Often a good way to work on a file is to 156 | `#load_rec` all of the modules that it depends on, and then `#use` the file. 157 | -------------------------------------------------------------------------------- /src/shrinkImmediates.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* Ensure that all immediate arguments fit into 32 bits. We assume that 20 | constant propagation has ensured that no operation has two immediate 21 | operands, and we maintain that property here. Also, remove immediate 22 | operands from division, since the x86 signed division does not support them. 23 | *) 24 | 25 | open BlockStructure 26 | 27 | let tmp_var = NamedTmp ("SI",0) 28 | let tmp_var2 = NamedTmp ("SI",1) 29 | 30 | (* Build a series of assignments that puts the immediate n into the dest 31 | register, using only immediates of 32 bits or smaller *) 32 | let assign_imm (dest : var) (n : int64) : block_elem list = 33 | [AssignAtom (dest, Num (Int64.shift_right_logical n 32)); 34 | AssignOp (dest, Ident dest, Tokens.Lshift, Num 32L); 35 | AssignOp (dest, Ident dest, Tokens.BitOr, 36 | Num (Int64.logand n 0x00000000FFFFFFFFL))] 37 | 38 | let is_imm (a : atomic_exp) : bool = 39 | match a with 40 | | Ident _ -> false 41 | | Num _ -> true 42 | 43 | (* If the atomic_exp is a large immediate constant (takes more than 32 bits), 44 | then return the int64, else None. Relies on int64 being 2s complement. If 45 | it's negative (the top bit is 1), then check that the top 33 bits are 1. It 46 | it's non-negative, check that the top 33 bits are 0. 32 won't suffice, 47 | because when we truncate to 32 bits only, the top bit needs to be the sign 48 | bit. *) 49 | let get_large_imm (a : atomic_exp) : int64 option = 50 | match a with 51 | | Num n -> 52 | (* This is an arithmetic shift. *) 53 | let topmost = Int64.shift_right n 31 in 54 | if Int64.compare topmost 0L = 0 || 55 | Int64.compare topmost 0xFFFFFFFFFFFFFFFFL = 0 then 56 | None 57 | else 58 | Some n 59 | | _ -> None 60 | 61 | module T = Tokens 62 | 63 | let shrink_imm_elem (e : block_elem) : block_elem list = 64 | match e with 65 | | AssignOp (dest, a1, T.Div, a2) -> 66 | (* division cannot have immediate operands *) 67 | if is_imm a1 && is_imm a2 then 68 | [AssignAtom (tmp_var, a1); 69 | AssignAtom (tmp_var2, a2); 70 | AssignOp (dest, Ident tmp_var, T.Div, Ident tmp_var2)] 71 | else if is_imm a1 then 72 | [AssignAtom (tmp_var, a1); 73 | AssignOp (dest, Ident tmp_var, T.Div, a2)] 74 | else if is_imm a2 then 75 | [AssignAtom (tmp_var, a2); 76 | AssignOp (dest, a1, T.Div, Ident tmp_var)] 77 | else 78 | [e] 79 | | AssignOp (dest, a1, op, a2) -> 80 | assert (not (is_imm a1 && is_imm a2)); 81 | (match get_large_imm a1 with 82 | | Some n -> 83 | assign_imm tmp_var n @ 84 | [AssignOp (dest, Ident tmp_var, op, a2)] 85 | | None -> 86 | (match get_large_imm a2 with 87 | | Some n -> 88 | assign_imm tmp_var n @ 89 | [AssignOp (dest, a1, op, Ident tmp_var)] 90 | | None -> 91 | [e])) 92 | | AssignAtom (dest, a) -> 93 | (match get_large_imm a with 94 | | Some n -> assign_imm dest n 95 | | None -> [e]) 96 | | Ld (v1, v2, a) -> 97 | (match get_large_imm a with 98 | | Some n -> 99 | assign_imm tmp_var n @ [Ld(v1, v2, Ident tmp_var)] 100 | | None -> [e]) 101 | | St (r, a1, a2) -> 102 | (match (get_large_imm a1, get_large_imm a2) with 103 | | (None, None) -> [e] 104 | | (Some n1, None) -> 105 | assign_imm tmp_var n1 @ [St(r, Ident tmp_var, a2)] 106 | | (None, Some n2) -> 107 | assign_imm tmp_var n2 @ [St(r, a1, Ident tmp_var)] 108 | | (Some n1, Some n2) -> 109 | assign_imm tmp_var n1 @ 110 | assign_imm tmp_var2 n2 @ 111 | [St(r, Ident tmp_var, Ident tmp_var2)]) 112 | | Call (v, f, aes) -> 113 | let (s, es, _) = 114 | List.fold_right 115 | (fun (ae : atomic_exp) ((s : block_elem list), es, n) -> 116 | match get_large_imm ae with 117 | | None -> (s, ae::es, n) 118 | | Some imm -> 119 | (assign_imm (NamedTmp ("SI",n)) imm @ s, Ident tmp_var::es, n + 1)) 120 | aes 121 | ([], [], 2) 122 | in 123 | s @ [Call (v, f, es)] 124 | | BoundCheck (a1, a2) -> 125 | assert (not (is_imm a1 && is_imm a2)); 126 | (match get_large_imm a1 with 127 | | Some n -> 128 | assign_imm tmp_var n @ 129 | [BoundCheck (Ident tmp_var, a2)] 130 | | None -> 131 | (match get_large_imm a2 with 132 | | Some n -> 133 | assign_imm tmp_var n @ 134 | [BoundCheck (a1, Ident tmp_var)] 135 | | None -> 136 | [e])) 137 | | NullCheck v -> [NullCheck v] 138 | 139 | let shrink_imm (cfg : cfg) : cfg = 140 | List.map 141 | (fun cfg_entry -> 142 | { cfg_entry with 143 | elems = 144 | List.flatten 145 | (List.map shrink_imm_elem cfg_entry.elems) }) 146 | cfg 147 | -------------------------------------------------------------------------------- /src/blockInterp.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2016 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* An interpreter for CFGs *) 20 | 21 | open BlockStructure 22 | open AstInterp 23 | 24 | type store_t = int64 Varmap.t 25 | 26 | type heap_t = { mutable next_address : int; heap : bytes } 27 | 28 | (* Convert int64 to 8 bytes in little-endian order and place them in the 29 | given bytes at the given index *) 30 | let int64_to_bytes (b: bytes) index num : unit = 31 | let rec loop i inc = 32 | if inc < 8 then 33 | (Bytes.set b (index + inc) (Char.chr (Int64.to_int (Int64.logand 0xFFL i))); 34 | loop (Int64.shift_right i 8) (inc + 1)) 35 | else 36 | () 37 | in 38 | loop num 0 39 | 40 | (* Read 8 bytes in little-endian order into an int64 *) 41 | let bytes_to_int64 (b : bytes) index : int64 = 42 | let rec loop i inc = 43 | if inc >= 0 then 44 | let next = 45 | Int64.logor 46 | (Int64.shift_left i 8) 47 | (Int64.of_int (Char.code (Bytes.get b (index + inc)))) 48 | in 49 | loop next (inc - 1) 50 | else 51 | i 52 | in 53 | loop 0L 7 54 | 55 | let alloc1 (heap : heap_t) (n : int64) : int64 = 56 | let old_next = heap.next_address in 57 | let new_next = old_next + 8 * ((Int64.to_int n) + 1) in 58 | if new_next <= Bytes.length heap.heap then 59 | (heap.next_address <- new_next; 60 | int64_to_bytes heap.heap old_next n; 61 | Int64.of_int old_next) 62 | else 63 | raise (Crash "Out of memory") 64 | 65 | let rec alloc (heap : heap_t) (ns : int64 list) : int64 = 66 | match ns with 67 | | [] -> raise (Util.InternalError "Empty alloc") 68 | | [n] -> alloc1 heap n 69 | | n::ns -> 70 | let v = alloc1 heap n in 71 | for i = 0 to (Int64.to_int n) do 72 | int64_to_bytes heap.heap (Int64.to_int v + 8 * i) 73 | (alloc heap ns) 74 | done; 75 | v 76 | 77 | let interp_atomic_exp (store : store_t) (ae : atomic_exp) : int64 = 78 | match ae with 79 | | Ident x -> Varmap.find x store 80 | | Num i -> i 81 | 82 | let interp_block_elem (store : store_t) (heap : heap_t) (be : block_elem) 83 | : store_t = 84 | match be with 85 | | AssignOp (x, ae1, op, ae2) -> 86 | let n1 = interp_atomic_exp store ae1 in 87 | let n2 = interp_atomic_exp store ae2 in 88 | Varmap.add x (do_op op n1 n2) store 89 | | AssignAtom (x, ae) -> 90 | Varmap.add x (interp_atomic_exp store ae) store 91 | | Ld (x1, x2, ae) -> 92 | let n1 = Varmap.find x2 store in 93 | let n2 = interp_atomic_exp store ae in 94 | Varmap.add 95 | x1 96 | (bytes_to_int64 heap.heap (Int64.to_int (Int64.add n1 n2))) 97 | store 98 | | St (x, ae1, ae2) -> 99 | let n1 = Varmap.find x store in 100 | let n2 = interp_atomic_exp store ae1 in 101 | let n3 = interp_atomic_exp store ae2 in 102 | int64_to_bytes heap.heap (Int64.to_int (Int64.add n1 n2)) n3; 103 | store 104 | | Call (Some x, "input", []) -> 105 | Printf.printf "> "; 106 | (try 107 | let n = Int64.of_string (read_line ()) in 108 | Varmap.add x n store 109 | with Failure _ -> raise (Crash "not a 64-bit integer")) 110 | | Call (None, "output", [i]) -> 111 | begin 112 | print_string (Int64.to_string (interp_atomic_exp store i)); 113 | print_newline (); 114 | store 115 | end 116 | | Call (Some x, 117 | ("allocate1" | "allocate2" | "allocate3" | "allocate4" | "allocate5" | 118 | "allocate6" | "allocate7"), 119 | aes) -> 120 | let ns = List.map (interp_atomic_exp store) aes in 121 | Varmap.add x (alloc heap ns) store 122 | | Call _ -> 123 | raise (Util.InternalError "Unknown function call in blockInterp") 124 | 125 | let rec interp_basic_block (store : store_t) (heap : heap_t) 126 | (bb : basic_block) : store_t = 127 | match bb with 128 | | [] -> store 129 | | be::bes -> 130 | let store' = interp_block_elem store heap be in 131 | interp_basic_block store' heap bes 132 | 133 | let interp_next_block store nb : int = 134 | match nb with 135 | | End -> 0 136 | | Next i -> i 137 | | Branch ((ae1, op, ae2), i1, i2) -> 138 | let n1 = interp_atomic_exp store ae1 in 139 | let n2 = interp_atomic_exp store ae2 in 140 | match op with 141 | | Eq -> if Int64.compare n1 n2 = 0 then i1 else i2 142 | | Lt -> if Int64.compare n1 n2 < 0 then i1 else i2 143 | | Gt -> if Int64.compare n1 n2 > 0 then i1 else i2 144 | 145 | let interp_cfg (store : store_t) (heap : heap_t) (cfg : cfg) : unit = 146 | let cfgmap = LineariseCfg.init_traversal cfg in 147 | let rec loop (index : int) store = 148 | let node = Util.Intmap.find index cfgmap in 149 | let store' = interp_basic_block store heap node.elems in 150 | let next_index = interp_next_block store node.next in 151 | loop next_index store' 152 | in 153 | loop 1 store 154 | 155 | let interp_prog (heap_size : int) (cfg : cfg) : unit = 156 | interp_cfg 157 | Varmap.empty 158 | { next_address = 0; heap = Bytes.make heap_size '\000'} 159 | cfg 160 | -------------------------------------------------------------------------------- /src/regAlloc.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* A very simple register allocator. Global variables never get put in a 20 | register. Function parameters stay in the registers that they are passed in. 21 | Local variables get put in the remaining registers in no particular order. 22 | The rest are put on the stack *) 23 | 24 | open Util 25 | open BlockStructure 26 | 27 | let get_vars_ae (ae : atomic_exp) (vars : Varset.t) : Varset.t = 28 | match ae with 29 | | Ident r -> Varset.add r vars 30 | | Num _ -> vars 31 | 32 | let get_vars_be (be : block_elem) (vars : Varset.t) : Varset.t = 33 | match be with 34 | | AssignOp (r, ae1, _, ae2) -> 35 | get_vars_ae ae1 (get_vars_ae ae2 (Varset.add r vars)) 36 | | AssignAtom (r, ae) -> get_vars_ae ae (Varset.add r vars) 37 | | Ld (v1, v2, ae) -> 38 | get_vars_ae ae (Varset.add v1 (Varset.add v2 vars)) 39 | | St (r, ae1, ae2) -> 40 | get_vars_ae ae1 (get_vars_ae ae2 (Varset.add r vars)) 41 | | Call (None, _, aes) -> 42 | List.fold_right get_vars_ae aes vars 43 | | Call (Some i, _, aes) -> 44 | List.fold_right get_vars_ae aes (Varset.add i vars) 45 | | BoundCheck (a1, a2) -> 46 | get_vars_ae a1 (get_vars_ae a2 vars) 47 | | NullCheck v -> 48 | Varset.add v vars 49 | 50 | let get_vars_test (ae1, _, ae2) (vars : Varset.t) : Varset.t = 51 | List.fold_right get_vars_ae [ae1; ae2] vars 52 | 53 | let get_vars_nb (nb : next_block) (vars : Varset.t) : Varset.t = 54 | match nb with 55 | | Return None -> vars 56 | | Return (Some v) -> Varset.add v vars 57 | | Next _ -> vars 58 | | Branch (r, _, _) -> get_vars_test r vars 59 | 60 | let get_vars_block (b : cfg_entry) (vars : Varset.t) : Varset.t = 61 | List.fold_right get_vars_be b.elems (get_vars_nb b.next vars) 62 | 63 | (* Get all of the variables mentioned in the cfg *) 64 | let get_vars (cfg : cfg) : Varset.t = 65 | List.fold_right get_vars_block cfg Varset.empty 66 | 67 | let is_global v = 68 | match v with 69 | | NamedSource (_, SourceAst.Global) -> true 70 | | _ -> false 71 | 72 | let is_param v = 73 | match v with 74 | | NamedSource (_, SourceAst.Parameter) -> true 75 | | _ -> false 76 | 77 | let is_local v = 78 | match v with 79 | | NamedSource (_, SourceAst.Local) -> true 80 | | NamedTmp _ -> true 81 | | _ -> false 82 | 83 | (* Given the vars used in a function, allocate them into registers. Ignore the 84 | live ranges and clash graph. This is the simplest strategy that will yield a 85 | working program. Return also the number of stack slots used. *) 86 | (* TODO: actually put some locals into registers *) 87 | let build_regalloc_map (fun_params : (SourceAst.id * int) list) (vars : Varset.t) 88 | : (var Varmap.t * int) = 89 | let vars = Varset.elements vars in 90 | let (globals, vars) = List.partition is_global vars in 91 | let (locals, vars) = List.partition is_local vars in 92 | assert (List.for_all is_param vars); (* There shouldn't be any allocated registers already *) 93 | let global_map = 94 | List.map 95 | (fun g -> 96 | match g with 97 | | NamedSource (i, _) as n -> (n, Global i) 98 | | _ -> assert false) 99 | globals 100 | in 101 | let param_map = 102 | List.map (fun (p, i) -> (id_to_var p, Vreg i)) fun_params 103 | in 104 | let local_map = 105 | List.map2 (fun l i -> (l, Stack i)) locals (count (List.length locals)) 106 | in 107 | let m = 108 | List.fold_right (fun (k,v) m -> Varmap.add k v m) 109 | (global_map @ param_map @ local_map) 110 | Varmap.empty 111 | in 112 | (m, List.length locals) 113 | 114 | let reg_alloc_ae (map : var Varmap.t) (ae : atomic_exp) : atomic_exp = 115 | match ae with 116 | | Ident v -> 117 | Ident (Varmap.find v map) 118 | | Num x -> Num x 119 | 120 | let reg_alloc_be (map : var Varmap.t) (be : block_elem) : block_elem = 121 | match be with 122 | | AssignOp (v, ae1, op, ae2) -> 123 | AssignOp (Varmap.find v map, reg_alloc_ae map ae1, op, reg_alloc_ae map ae2) 124 | | AssignAtom (v, ae) -> 125 | AssignAtom (Varmap.find v map, reg_alloc_ae map ae) 126 | | Ld (v1, v2, ae) -> 127 | Ld (Varmap.find v1 map, Varmap.find v2 map, reg_alloc_ae map ae) 128 | | St (v, ae1, ae2) -> 129 | St (Varmap.find v map, reg_alloc_ae map ae1, reg_alloc_ae map ae2) 130 | | Call (v, f, aes) -> 131 | Call (Util.option_map (fun v -> Varmap.find v map) v, f, 132 | List.map (reg_alloc_ae map) aes) 133 | | BoundCheck (a1, a2) -> 134 | BoundCheck (reg_alloc_ae map a1, reg_alloc_ae map a2) 135 | | NullCheck v -> 136 | NullCheck (Varmap.find v map) 137 | 138 | let reg_alloc_test (map : var Varmap.t) (ae1, op, ae2) : test = 139 | (reg_alloc_ae map ae1, op, reg_alloc_ae map ae2) 140 | 141 | let reg_alloc_nb (map : var Varmap.t) (nb : next_block) : next_block = 142 | match nb with 143 | | Return (Some v) -> Return (Some (Varmap.find v map)) 144 | | Return None -> Return None 145 | | Next i -> Next i 146 | | Branch (t, t1, t2) -> 147 | Branch (reg_alloc_test map t, t1, t2) 148 | 149 | (* fun_params is an association list mapping the function's parameters to the 150 | registers that they were passed in. num_regs is the total number of 151 | registers that can be used. The returned int is the number of variables 152 | put on the stack *) 153 | let reg_alloc (fun_params : (SourceAst.id * int) list) (num_regs : int) 154 | (cfg : cfg) 155 | : (cfg * int) = 156 | let (map, num_stacks) = build_regalloc_map fun_params (get_vars cfg) in 157 | let cfg = 158 | List.map (fun entry -> { bnum = entry.bnum; 159 | elems = List.map (reg_alloc_be map) entry.elems; 160 | next = reg_alloc_nb map entry.next; 161 | finished = false; 162 | started = false }) cfg in 163 | (cfg, num_stacks) 164 | -------------------------------------------------------------------------------- /src/tokens.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* The language's tokens, and a simple lexer *) 20 | 21 | open Util 22 | 23 | (* Primitive operators *) 24 | (* When extending with operators that can have side effects, be careful to 25 | check the rest of the compiler. Some optimisations can only be done on 26 | side-effect-free operations. Right now only Div can have an effect 27 | (divide-by-zero exception) *) 28 | 29 | type op = 30 | (* integer operations *) 31 | | Plus 32 | | Minus 33 | | Times 34 | | Div 35 | | Lshift 36 | | BitOr 37 | | BitAnd 38 | (* comparisons *) 39 | | Lt 40 | | Gt 41 | | Eq 42 | (* boolean operations *) 43 | | And 44 | | Or 45 | 46 | type uop = 47 | (* boolean negation *) 48 | | Not 49 | 50 | let show_op op = 51 | match op with 52 | | Plus -> "+" 53 | | Minus -> "-" 54 | | Times -> "*" 55 | | Div -> "/" 56 | | Lt -> "<" 57 | | Gt -> ">" 58 | | Eq -> "=" 59 | | And -> "&&" 60 | | Or -> "||" 61 | | Lshift -> "<<" 62 | | BitOr -> "|" 63 | | BitAnd -> "&" 64 | 65 | let show_uop (uop : uop) : string = 66 | match uop with 67 | | Not -> "!" 68 | 69 | type token = 70 | | Num of int64 71 | | Ident of string (* identifiers represent various names, including function 72 | and variable names *) 73 | | Op of op 74 | | Uop of uop 75 | (* Punctuation *) 76 | | Lparen 77 | | Rparen 78 | | Lcurly 79 | | Rcurly 80 | | Lbrac 81 | | Rbrac 82 | | Colon 83 | | Comma 84 | (* Identifier-like keywords *) 85 | | While 86 | | Do 87 | | If 88 | | Then 89 | | Else 90 | | Assign 91 | | True 92 | | False 93 | | Input 94 | | Output 95 | | Array 96 | | Int 97 | | Bool 98 | | Let 99 | | Function 100 | | Return 101 | 102 | let show_token (t : token) : string = 103 | match t with 104 | | Num i -> Int64.to_string i 105 | | Ident s -> s 106 | | Op o -> show_op o 107 | | Uop u -> show_uop u 108 | | Lparen -> "(" 109 | | Rparen -> ")" 110 | | Lcurly -> "{" 111 | | Rcurly -> "}" 112 | | Lbrac -> "[" 113 | | Rbrac -> "]" 114 | | Comma -> "," 115 | | Colon -> ":" 116 | | While -> "while" 117 | | Do -> "do" 118 | | If -> "if" 119 | | Then -> "then" 120 | | Else -> "else" 121 | | Assign -> ":=" 122 | | True -> "true" 123 | | False -> "false" 124 | | Input -> "input" 125 | | Output -> "output" 126 | | Array -> "array" 127 | | Int -> "int" 128 | | Bool -> "bool" 129 | | Let -> "let" 130 | | Function -> "function" 131 | | Return -> "return" 132 | 133 | (* Pretty-print a token *) 134 | let pp_token (fmt : Format.formatter) (t : token) = 135 | Format.fprintf fmt "%s" (show_token t) 136 | 137 | (* Tokens annotated with which line number they appear on *) 138 | type tok_loc = (token * int) 139 | 140 | (* Pretty-print a token and location *) 141 | let pp_tok_loc (fmt : Format.formatter) ((t, l) : tok_loc) = 142 | Format.fprintf fmt "(%a, %d)" pp_token t l 143 | 144 | (* The mapping of keword strings to the corresponding tokens *) 145 | let keywords : (string * token) list = 146 | (* Derive the mapping from the to_string functions to avoid duplication *) 147 | (show_uop Not, Uop Not) :: 148 | List.map (fun o -> (show_op o, Op o)) 149 | [Plus; Minus; Times; Div; Lt; Gt; Eq; And; Or; Lshift; BitOr; BitAnd] @ 150 | List.map (fun t -> (show_token t, t)) 151 | [Do; While; If; Then; Else; Array; Assign; True; Input; Output; False; 152 | Lparen; Rparen; Lcurly; Rcurly; Lbrac; Rbrac; Int; Bool; Colon; Let; 153 | Return; Function; Comma] 154 | 155 | (* Map each keyword string to its corresponding token *) 156 | let keyword_map : token Strmap.t = 157 | List.fold_left (fun m (k,v) -> Strmap.add k v m) Strmap.empty keywords 158 | 159 | (* Regular expressions that describe various syntactic entities *) 160 | (* Build the keyword regexp out of the mapping to avoid duplication *) 161 | let keyword_re = 162 | Str.regexp 163 | (String.concat "\\|" 164 | (List.map (fun (s, _) -> Str.quote s) keywords)) 165 | let number_re = Str.regexp "[0-9]+" 166 | let ident_re = Str.regexp "[a-zA-Z_][a-zA-Z0-9_]*" 167 | let space_re = Str.regexp "[ \t]+\\|//.*" 168 | let newline_re = Str.regexp "\n\\|\r\n" 169 | 170 | (* Read all the tokens from s, using pos to index into the string and line_n 171 | to track the current line number, for error reporting later on. Return them 172 | in a list. *) 173 | (* The lex function repeatedly finds which regexp matches the string, starting 174 | from the current position, adds that token to the result, and then updates 175 | the starting position to find the next token *) 176 | let rec lex (s : string) (pos : int) (line_n : int) : tok_loc list = 177 | if pos >= String.length s then 178 | [] 179 | else if Str.string_match space_re s pos then 180 | lex s (Str.match_end ()) line_n 181 | else if Str.string_match newline_re s pos then 182 | lex s (Str.match_end ()) (line_n + 1) 183 | else if Str.string_match ident_re s pos then 184 | let id = Str.matched_string s in 185 | if Strmap.mem id keyword_map then 186 | (* The identifier is also a keyword *) 187 | let tok = Strmap.find id keyword_map in 188 | (tok, line_n) :: lex s (Str.match_end ()) line_n 189 | else 190 | (Ident id, line_n) :: lex s (Str.match_end ()) line_n 191 | else if Str.string_match keyword_re s pos then 192 | let tok = Strmap.find (Str.matched_string s) keyword_map in 193 | (tok, line_n) :: lex s (Str.match_end ()) line_n 194 | else if Str.string_match number_re s pos then 195 | let num = 196 | try Int64.of_string (Str.matched_string s) 197 | with Failure _ -> 198 | raise (BadInput ("Lex error: integer constant too big " ^ 199 | Str.matched_string s ^ 200 | " on line " ^ 201 | string_of_int line_n)) 202 | in 203 | (Num num, line_n) :: lex s (Str.match_end ()) line_n 204 | else 205 | raise (BadInput ("lex error: at character '" ^ 206 | String.sub s pos 1 ^ 207 | "' on line " ^ 208 | string_of_int line_n)) 209 | -------------------------------------------------------------------------------- /src/unnestExp.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* Flatten expressions so that they follow this grammar. We don't introduce a 20 | new type, but do define predicates on the SourceAst.exp type. 21 | 22 | type ae = 23 | | Num of int64 24 | | Bool of bool 25 | | Ident of SourceAst.id 26 | 27 | type flat_exp = 28 | | Num of int64 29 | | Bool of bool 30 | | Ident of SourceAst.id * ae 31 | | Op of ae * op * ae 32 | | Uop of ae 33 | | Array of ae list 34 | | Call of SourceAst.id * ae list 35 | *) 36 | 37 | open SourceAst 38 | module T = Tokens 39 | 40 | let is_atomic (e : exp) : bool = 41 | match e with 42 | | Num _ -> true 43 | | Bool _ -> true 44 | | Ident (_, []) -> true 45 | | Ident (_, _) -> false 46 | | Op _ -> false 47 | | Uop _ -> false 48 | | Array _ -> false 49 | | Call _ -> false 50 | 51 | let is_flat (e : exp) : bool = 52 | match e with 53 | | Num _ -> true 54 | | Bool _ -> true 55 | | Ident (_, []) -> true 56 | | Ident (_, [e]) -> is_atomic e 57 | | Ident (_, _) -> false 58 | | Op (e1, _, e2) -> is_atomic e1 && is_atomic e2 59 | | Uop (_, e) -> is_atomic e 60 | | Array es -> List.for_all is_atomic es 61 | | Call (_, es) -> List.for_all is_atomic es 62 | 63 | let unnest (stmts : stmt list) : stmt list = 64 | 65 | (* Generate unique names for temporary variables *) 66 | let next_ident = ref 0 in 67 | let get_ident () : id = 68 | let x = !next_ident in 69 | next_ident := (!next_ident) + 1; 70 | Temp ("UE", x) 71 | in 72 | 73 | (* indices must all be atomic, according to is_atomic above. Returns a flat 74 | expression according to is_flat. *) 75 | let rec unnest_indices (arr : id) (indices : exp list) : stmt list * exp = 76 | match indices with 77 | | [] -> ([], Ident (arr, [])) 78 | | [i] -> ([], Ident (arr, [i])) 79 | | (i::indices) -> 80 | let id = get_ident () in 81 | let (s, e) = unnest_indices id indices in 82 | (Assign (id, [], Ident (arr, [i])) :: s, 83 | e) 84 | in 85 | 86 | (* Flatten out and expression into a list of statements and an expression by 87 | using temporary variables to store the results of each sub-expression. The 88 | expression returned satisfies is_flat above. 89 | 90 | Warning: As implemented, this is O(n^2) because of the list appending. 91 | This is unlikely to matter for human written code, as it is O(n^2) where n 92 | is the size of an expression, not the whole program. However, for 93 | compiling machine-generated code, this could be a problem. A little care 94 | could be taken to collect results in a tree-structure and then convert it 95 | into a list. *) 96 | let rec unnest_exp (e : exp) : stmt list * exp = 97 | match e with 98 | | Num i -> ([], Num i) 99 | | Bool b -> ([], Bool b) 100 | | Ident (i, es) -> 101 | let (s_list, aes) = List.split (List.map unnest_exp_atomic es) in 102 | let (s_list2, ae) = unnest_indices i aes in 103 | (List.flatten s_list @ s_list2, ae) 104 | | Op (e1, T.And, e2) -> 105 | let (s1, f1) = unnest_exp e1 in 106 | let (s2, f2) = unnest_exp e2 in 107 | (* f1 && f2 --> (if f1 then tmp := f2 else t := false); tmp *) 108 | let tmp = get_ident () in 109 | (s1 @ 110 | [Ite (f1, 111 | Stmts (s2 @ [Assign (tmp, [], f2)]), 112 | Assign (tmp, [], Bool false))], 113 | (Ident (tmp, []))) 114 | | Op (e1, T.Or, e2) -> 115 | let (s1, f1) = unnest_exp e1 in 116 | let (s2, f2) = unnest_exp e2 in 117 | (* f1 || f2 --> (if f1 then tmp := true else tmp := f2 ); tmp *) 118 | let tmp = get_ident () in 119 | (s1 @ 120 | [Ite (f1, 121 | Assign (tmp, [], Bool true), 122 | Stmts (s2 @ [Assign (tmp, [], f2)]))], 123 | (Ident (tmp, []))) 124 | | Op (e1, op, e2) -> 125 | let (s1, a1) = unnest_exp_atomic e1 in 126 | let (s2, a2) = unnest_exp_atomic e2 in 127 | (s1 @ s2, Op (a1, op, a2)) 128 | | Uop (uop, e) -> 129 | let (s, a) = unnest_exp_atomic e in 130 | (s, Uop (uop, a)) 131 | | Array es -> 132 | let (s_list, aes) = List.split (List.map unnest_exp_atomic es) in 133 | (List.flatten s_list, Array aes) 134 | | Call (f, es) -> 135 | let (s_list, aes) = List.split (List.map unnest_exp_atomic es) in 136 | (List.flatten s_list, Call (f, aes)) 137 | 138 | 139 | (* Similar to unnest_exp, but ensures that the returned exp is atomic, rather 140 | than just flat. *) 141 | and unnest_exp_atomic (e : exp) : stmt list * exp = 142 | let (s, f) = unnest_exp e in 143 | if is_atomic f then 144 | (s, f) 145 | else 146 | let id = get_ident () in 147 | (s @ [Assign (id, [], f)], Ident (id, [])) 148 | 149 | and unnest_exp_for_test (e : exp) : stmt list * exp = 150 | let (s, f) = unnest_exp e in 151 | match f with 152 | | Ident (_, [_]) -> 153 | let id = get_ident () in 154 | (s @ [Assign (id, [], f)], Ident (id, [])) 155 | | _ -> (s, f) 156 | in 157 | 158 | let rec unnest_stmt (s : stmt) : stmt list = 159 | match s with 160 | | Assign (x, [], e) -> 161 | let (s, f) = unnest_exp e in 162 | s @ [Assign (x, [], f)] 163 | | Assign (x, es, e) -> 164 | let (s_list, aes) = List.split (List.map unnest_exp_atomic es) in 165 | let (s, f) = unnest_exp_atomic e in 166 | (match unnest_indices x aes with 167 | | (s', Ident (i, [])) -> 168 | List.flatten s_list @ s' @ s @ [Assign (i, [], f)] 169 | | (s', Ident (i, [f'])) -> 170 | List.flatten s_list @ s' @ s @ [Assign (i, [f'], f)] 171 | | _ -> assert false) 172 | | DoWhile (s0, e, s1) -> 173 | let s0' = unnest_stmt s0 in 174 | let (se1, e') = unnest_exp_for_test e in 175 | let s1' = unnest_stmt s1 in 176 | [DoWhile (stmts_to_stmt (s0' @ se1), e', stmts_to_stmt s1')] 177 | | Ite (e, s1, s2) -> 178 | let (se, e') = unnest_exp_for_test e in 179 | let s1' = unnest_stmt s1 in 180 | let s2' = unnest_stmt s2 in 181 | se @ [Ite (e', stmts_to_stmt s1', stmts_to_stmt s2')] 182 | | Stmts s_list -> 183 | List.flatten (List.map unnest_stmt s_list) 184 | | In id -> [In id] 185 | | Out id -> [Out id] 186 | | Loc (stmt, _) -> 187 | unnest_stmt stmt 188 | | Return id -> [Return id] 189 | in 190 | 191 | List.flatten (List.map unnest_stmt stmts) 192 | -------------------------------------------------------------------------------- /src/x86.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2016 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* A fragment of x86-64 AST, and printing in NASM syntax *) 20 | (* Translated from HOL4 in examples/machine-code/instruction-set-models/x86_64 *) 21 | 22 | open Format 23 | open Util 24 | 25 | (* 64-bit registers *) 26 | type reg = 27 | | RAX | RBX | RCX | RDX | RSP | RBP | RSI | RDI 28 | | R8 | R9 | R10 | R11 | R12 | R13 | R14 | R15 29 | 30 | let show_reg r = 31 | match r with 32 | | RAX -> "rax" 33 | | RBX -> "rbx" 34 | | RCX -> "rcx" 35 | | RDX -> "rdx" 36 | | RSP -> "rsp" 37 | | RBP -> "rbp" 38 | | RSI -> "rsi" 39 | | RDI -> "rdi" 40 | | R8 -> "r8" 41 | | R9 -> "r9" 42 | | R10 -> "r10" 43 | | R11 -> "r11" 44 | | R12 -> "r12" 45 | | R13 -> "r13" 46 | | R14 -> "r14" 47 | | R15 -> "r15" 48 | 49 | let pp_reg fmt r = 50 | fprintf fmt "%s" (show_reg r) 51 | 52 | type byte_reg = B of reg 53 | 54 | let show_byte_reg (B r) = 55 | match r with 56 | | RAX -> "al" 57 | | RBX -> "bl" 58 | | RCX -> "cl" 59 | | RDX -> "dl" 60 | | RSP -> "spl" 61 | | RBP -> "bpl" 62 | | RSI -> "sil" 63 | | RDI -> "dil" 64 | | R8 -> "r8b" 65 | | R9 -> "r9b" 66 | | R10 -> "r10b" 67 | | R11 -> "r11b" 68 | | R12 -> "r12b" 69 | | R13 -> "r13b" 70 | | R14 -> "r14b" 71 | | R15 -> "r15b" 72 | 73 | let pp_byte_reg fmt r = 74 | fprintf fmt "%s" (show_byte_reg r) 75 | 76 | type address_component = 77 | | EAScaled of int * reg 78 | | EAReg of reg 79 | | EAConst of int64 80 | 81 | type displacement = 82 | | Concrete_disp of int64 83 | | Label_disp of string 84 | 85 | type rm = 86 | | Zr of reg (* register *) 87 | (* Zm (Some (scale, index), Some base, Some displacement) = 88 | mem[scale * index + base + displacement] *) 89 | | Zm of (int * reg) option * reg option * displacement option 90 | 91 | let rec simple_add_exp fmt l = 92 | match l with 93 | | [] -> () 94 | | [Some x] -> fprintf fmt "%s" x 95 | | [None] -> fprintf fmt "0" 96 | | Some x :: y -> fprintf fmt "%s + %a" x simple_add_exp y 97 | | None :: y -> simple_add_exp fmt y 98 | 99 | let show_displacement d = 100 | match d with 101 | | Concrete_disp i -> Int64.to_string i 102 | | Label_disp l -> l ^ "_Global" 103 | 104 | let pp_rm fmt rm = 105 | match rm with 106 | | Zr r -> fprintf fmt "%a" pp_reg r 107 | | Zm (idx,base,disp) -> 108 | fprintf fmt "qword [%a]" 109 | simple_add_exp [option_map (fun (scale,i) -> show_reg i ^ " * " ^ string_of_int scale) idx; 110 | option_map show_reg base; 111 | option_map show_displacement disp] 112 | 113 | type dest_src = 114 | | Zrm_i of rm * int64 (* mnemonic r/mXX, immXX (sign-extended) *) 115 | | Zrm_r of rm * reg (* mnemonic r/mXX, rXX *) 116 | | Zr_rm of reg * rm (* mnemonic rXX, r/mXX *) 117 | 118 | let pp_dest_src fmt ds = 119 | match ds with 120 | | Zrm_i (rm, i) -> 121 | fprintf fmt "%a, %Ld" 122 | pp_rm rm 123 | i 124 | | Zrm_r (rm, reg) -> 125 | fprintf fmt "%a, %a" 126 | pp_rm rm 127 | pp_reg reg 128 | | Zr_rm (reg, rm) -> 129 | fprintf fmt "%a, %a" 130 | pp_reg reg 131 | pp_rm rm 132 | 133 | type imm_rm = 134 | | Zi_rm of rm (* r/mXX *) 135 | | Zi of int64 (* sign-extended immediate *) 136 | 137 | let pp_imm_rm fmt ir = 138 | match ir with 139 | | Zi_rm rm -> pp_rm fmt rm 140 | | Zi i -> fprintf fmt "%Ld" i 141 | 142 | type binop_name = 143 | | Zadc | Zadd | Zand | Zcmp | Zor | Zshl | Zshr 144 | | Zsar | Zsub | Zsbb | Ztest | Zxor 145 | 146 | let show_binop_name b = 147 | match b with 148 | | Zadc -> "adc" 149 | | Zadd -> "add" 150 | | Zand -> "and" 151 | | Zcmp -> "cmp" 152 | | Zor -> "or" 153 | | Zshl -> "shl" 154 | | Zshr -> "shr" 155 | | Zsar -> "sar" 156 | | Zsub -> "sub" 157 | | Zsbb -> "sbb" 158 | | Ztest -> "test" 159 | | Zxor -> "xor" 160 | 161 | let pp_binop_name fmt b = 162 | fprintf fmt "%s" (show_binop_name b) 163 | 164 | type monop_name = Zdec | Zinc | Znot | Zneg 165 | 166 | let show_monop_name b = 167 | match b with 168 | | Zdec -> "dec" 169 | | Zinc -> "inc" 170 | | Znot -> "not" 171 | | Zneg -> "neg" 172 | 173 | let pp_monop_name fmt b = 174 | fprintf fmt "%s" (show_monop_name b) 175 | 176 | type cond = (* this list is not complete *) 177 | | Z_ALWAYS (* N = not *) 178 | | Z_E | Z_NE (* E = equal *) 179 | | Z_S | Z_NS (* S = signed *) 180 | | Z_A | Z_NA (* A = above *) 181 | | Z_B | Z_NB (* B = below *) 182 | | Z_L | Z_NL (* L = less *) 183 | | Z_G | Z_NG (* L = greater *) 184 | 185 | let pp_cond fmt c = 186 | fprintf fmt "%s" 187 | (match c with 188 | | Z_ALWAYS -> "mp" 189 | | Z_E -> "e" 190 | | Z_NE -> "ne" 191 | | Z_S -> "s" 192 | | Z_NS -> "ns" 193 | | Z_A -> "a" 194 | | Z_NA -> "na" 195 | | Z_B -> "b" 196 | | Z_NB -> "nb" 197 | | Z_L -> "l" 198 | | Z_NL -> "nl" 199 | | Z_G -> "g" 200 | | Z_NG -> "ng") 201 | 202 | type instruction = 203 | | Zlabel of string 204 | | Zbinop of binop_name * dest_src 205 | | Zmonop of monop_name * rm 206 | (* 207 | | Zcmpxchg of rm * reg 208 | | Zxadd of rm * reg 209 | | Zxchg of rm * reg 210 | *) 211 | (* either reg := reg * rm; or reg := rm * int *) 212 | | Zimul of reg * rm * int64 option 213 | | Zidiv of rm (* RAX := RDX,RAX / rm; RDX := RDX,RAX mod rm *) 214 | | Zlea of dest_src 215 | | Zpop of rm 216 | | Zpush of imm_rm 217 | | Zcall of string 218 | | Zret 219 | | Zleave 220 | | Zcpuid 221 | | Zmov of dest_src 222 | (* | Zmovzx of dest_src *) 223 | (* jcc includes jmp rel, i.e. unconditional relative jumps. *) 224 | | Zjcc of cond * string 225 | | Zjmp of rm (* jmp excludes relative jumps, see jcc. *) 226 | | Zset of cond * byte_reg (* Set operates on a byte of memory or 227 | register. Here, we'll only use registers 228 | *) 229 | 230 | let pp_instruction fmt i = 231 | match i with 232 | | Zlabel s -> 233 | fprintf fmt "%s:" s 234 | | Zbinop (n, ds) -> 235 | fprintf fmt "%a %a" 236 | pp_binop_name n 237 | pp_dest_src ds 238 | | Zmonop (n, rm) -> 239 | fprintf fmt "%a %a" 240 | pp_monop_name n 241 | pp_rm rm 242 | | Zimul (r1, rm2, None) -> 243 | (* r1 := r1 * rm2 *) 244 | fprintf fmt "imul %a, %a" 245 | pp_reg r1 246 | pp_rm rm2 247 | | Zimul (r1, rm2, Some i) -> 248 | (* r1 := rm2 * i *) 249 | fprintf fmt "imul %a, %a, %Ld" 250 | pp_reg r1 251 | pp_rm rm2 252 | i 253 | | Zidiv rm -> 254 | fprintf fmt "idiv %a" 255 | pp_rm rm 256 | | Zlea ds -> 257 | fprintf fmt "lea %a" 258 | pp_dest_src ds 259 | | Zpop rm -> 260 | fprintf fmt "pop %a" 261 | pp_rm rm 262 | | Zpush ir -> 263 | fprintf fmt "push %a" 264 | pp_imm_rm ir 265 | | Zcall lab -> 266 | fprintf fmt "call %s" lab 267 | | Zret -> 268 | fprintf fmt "ret" 269 | | Zleave -> 270 | fprintf fmt "leave" 271 | | Zcpuid -> 272 | fprintf fmt "cpuid" 273 | | Zmov ds -> 274 | fprintf fmt "mov %a" 275 | pp_dest_src ds 276 | | Zjcc (c, lab) -> 277 | fprintf fmt "j%a %s" 278 | pp_cond c 279 | lab 280 | | Zjmp rm -> 281 | fprintf fmt "jmp %a" 282 | pp_rm rm 283 | | Zset (cond, reg) -> 284 | fprintf fmt "set%a %a" 285 | pp_cond cond 286 | pp_byte_reg reg 287 | 288 | let pp_instr_list fmt il = 289 | List.iter 290 | (fun i -> 291 | match i with 292 | | Zlabel _ -> fprintf fmt "%a@\n" pp_instruction i 293 | | _ -> fprintf fmt " %a@\n" pp_instruction i) 294 | il 295 | -------------------------------------------------------------------------------- /src/liveVarAnalysis.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | open Util 20 | open BlockStructure 21 | 22 | type cfg_annot = { gen : Varset.t; kill : Varset.t; live_exit : Varset.t } 23 | 24 | type cfg = (cfg_entry * cfg_annot) list 25 | 26 | let pp_cfg_annot fmt c = 27 | Format.fprintf fmt "{@[gen=%a;@ kill=%a; live_exit=%a@]}" 28 | Varset.pp c.gen 29 | Varset.pp c.kill 30 | Varset.pp c.live_exit 31 | 32 | let pp_cfg1 fmt (c,a) = 33 | Format.fprintf fmt "(@[%a@ %a@])" 34 | BlockStructure.pp_cfg_entry c 35 | pp_cfg_annot a 36 | 37 | let pp_cfg fmt l = pp_list pp_cfg1 fmt l 38 | 39 | (* Add the identifier in a to the generation set *) 40 | let add_gen (a : atomic_exp) (gen : Varset.t) : Varset.t = 41 | match a with 42 | | Num _ -> gen 43 | | Ident i -> Varset.add i gen 44 | 45 | (* Add the identifiers in aes to the generation set *) 46 | let rec add_gen_list (aes : atomic_exp list) (gen : Varset.t) : Varset.t = 47 | match aes with 48 | | [] -> gen 49 | | a::aes -> add_gen_list aes (add_gen a gen) 50 | 51 | (* Compute the gen and kill sets for a basic block, live_exit is set to empty *) 52 | let analyse_block (b : basic_block) : cfg_annot = 53 | let rec analyse_block gen kill b = 54 | match b with 55 | | [] -> (gen, kill) 56 | | AssignOp (i, a1, _, a2) :: b -> 57 | analyse_block (add_gen a1 (add_gen a2 (Varset.remove i gen))) 58 | (Varset.add i kill) 59 | b 60 | | AssignAtom (i, a) :: b -> 61 | analyse_block (add_gen a (Varset.remove i gen)) (Varset.add i kill) b 62 | | Ld (i, j, a) :: b -> 63 | analyse_block (add_gen a (Varset.add j (Varset.remove i gen))) 64 | (Varset.add i kill) 65 | b 66 | | St (i, a1, a2) :: b -> 67 | analyse_block (add_gen a1 (add_gen a2 (Varset.add i gen))) 68 | kill 69 | b 70 | | Call (None, _, aes) :: b -> 71 | analyse_block (add_gen_list aes gen) kill b 72 | | Call (Some i, _, aes) :: b -> 73 | analyse_block (add_gen_list aes (Varset.remove i gen)) (Varset.add i kill) b 74 | | BoundCheck (a1, a2) :: b -> 75 | analyse_block (add_gen a1 (add_gen a2 gen)) kill b 76 | | NullCheck v :: b -> 77 | analyse_block (Varset.add v gen) kill b 78 | in 79 | let (gen,kill) = analyse_block Varset.empty Varset.empty (List.rev b) in 80 | { gen = gen; kill = kill; live_exit = Varset.empty } 81 | 82 | (* Split the annotated cfg into the predecessors of node n, and the other nodes 83 | *) 84 | let find_preds n cfg = 85 | List.partition 86 | (fun (entry, _) -> 87 | match entry.next with 88 | | Return _ -> false 89 | | Next n' -> 90 | n = n' 91 | | Branch (_, n1, n2) -> 92 | n = n1 || n = n2) 93 | cfg 94 | 95 | let add_test_vars (ae1, _, ae2) vars = 96 | let vars' = 97 | match ae1 with 98 | | Ident v -> Varset.add v vars 99 | | _ -> vars 100 | in 101 | match ae2 with 102 | | Ident v -> Varset.add v vars' 103 | | _ -> vars' 104 | 105 | (* What's live right before we are exit a block: initially if the block 106 | returns, then all the globals are alive, since other parts of the code might 107 | read them. Also the returned value is live. For a branch, any variable used 108 | in the condition is live. *) 109 | let init_live_exit (globals : Varset.t) (a : cfg_annot) (next : next_block) 110 | : cfg_annot = 111 | match next with 112 | | Return None -> { a with live_exit = globals } 113 | | Return (Some i) -> { a with live_exit = Varset.add i globals } 114 | | Next _ -> a 115 | | Branch (i, _, _) -> 116 | { a with live_exit = add_test_vars i Varset.empty } 117 | 118 | (* Do live variable analysis, returning an annotated cfg *) 119 | let lva (globals : Varset.t) (cfg : BlockStructure.cfg) : cfg = 120 | (* Initial annotations for all of the blocks. *) 121 | let init_worklist = 122 | List.map 123 | (fun entry -> 124 | (entry, 125 | init_live_exit globals (analyse_block entry.elems) entry.next)) 126 | cfg 127 | in 128 | (* Update one block from the worklist. 129 | The worklist and finished_list partition the whole cfg. That is, they must 130 | contain all of the blocks between them, with no duplication. NB, this is 131 | slightly different than the naive worklist algorithm. Here we add a 132 | node to the worklist only when its live_exit would change based on the current 133 | node's live_entry. *) 134 | let rec do_one (worklist : cfg) (finished_list : cfg) : cfg = 135 | match worklist with 136 | | [] -> finished_list 137 | | ((entry, annot) as node) :: worklist -> 138 | let live_entry = 139 | Varset.union annot.gen (Varset.diff annot.live_exit annot.kill) 140 | in 141 | (* Updates contains node's predecessors from the worklist input, 142 | and worklist contains the remaining non-predecessors. *) 143 | let (updates, worklist) = find_preds entry.bnum worklist in 144 | (* Possible_updates contains node's predecessors from the finished_list, 145 | possibly including itself, and finished contains the remaining 146 | non-predecessors. *) 147 | let (possible_updates, finished) = 148 | find_preds entry.bnum (node::finished_list) 149 | in 150 | (* Finished' contains all possible_updates whose live_exits are supersets 151 | of our live entry. There is no need to update them, because no nodes would 152 | be added to their live_exit. updates' contains those previously finished nodes 153 | whose live_exits will change *) 154 | let (finished', updates') = 155 | List.partition 156 | (fun (_, annot) -> Varset.subset live_entry annot.live_exit) 157 | possible_updates 158 | in 159 | (* Update the live_exits of the nodes needing updated and add them to the 160 | worklist *) 161 | let new_worklist = 162 | List.map 163 | (fun (entry, annot) -> 164 | (entry, { annot with 165 | live_exit = Varset.union annot.live_exit live_entry})) 166 | (updates @ updates') 167 | @ 168 | worklist 169 | in 170 | do_one new_worklist (finished' @ finished) 171 | in 172 | do_one (List.rev init_worklist) [] 173 | 174 | (* Check that the operator cannot have a side-effect *) 175 | let pure_op op : bool = 176 | match op with 177 | | Tokens.Div -> false (* divide by zero *) 178 | | _ -> true 179 | 180 | (* Remove unused writes from elems, assuming that elems is presented backwards, 181 | and live is the set of variables live on exiting the block *) 182 | let rec local_remove_unused_writes (live : Varset.t) (elems : block_elem list) 183 | : block_elem list = 184 | match elems with 185 | | [] -> [] 186 | | AssignOp (i, a1, op, a2) :: b -> 187 | if Varset.mem i live || not (pure_op op) then 188 | AssignOp (i, a1, op, a2) :: 189 | local_remove_unused_writes 190 | (add_gen a1 (add_gen a2 (Varset.remove i live))) b 191 | else 192 | local_remove_unused_writes live b 193 | | AssignAtom (i, a) :: b -> 194 | if Varset.mem i live then 195 | AssignAtom (i, a) :: 196 | local_remove_unused_writes (add_gen a (Varset.remove i live)) b 197 | else 198 | local_remove_unused_writes live b 199 | | Ld (i, j, a) :: b -> 200 | if Varset.mem i live then 201 | Ld (i, j, a) :: 202 | local_remove_unused_writes (add_gen a (Varset.add j (Varset.remove i live))) b 203 | else 204 | local_remove_unused_writes live b 205 | | St (i, a1, a2) :: b -> 206 | St (i, a1, a2) :: 207 | local_remove_unused_writes (add_gen a1 (add_gen a2 (Varset.add i live))) b 208 | | Call (i, f, aes) :: b -> 209 | Call (i, f, aes) :: 210 | let live' = 211 | match i with 212 | | None -> live 213 | | Some v -> Varset.remove v live 214 | in 215 | local_remove_unused_writes (add_gen_list aes live') b 216 | | BoundCheck (a1, a2) :: b -> 217 | BoundCheck (a1, a2) :: local_remove_unused_writes (add_gen a1 (add_gen a2 live)) b 218 | | NullCheck v :: b -> 219 | NullCheck v :: local_remove_unused_writes (Varset.add v live) b 220 | 221 | let remove_unused_writes (cfg : cfg) : cfg = 222 | List.map 223 | (fun (entry, annot) -> 224 | let new_elems = 225 | local_remove_unused_writes annot.live_exit (List.rev entry.elems) 226 | in 227 | ({entry with elems = List.rev new_elems}, annot)) 228 | cfg 229 | -------------------------------------------------------------------------------- /src/astInterp.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* An interpreter for ASTs. This is meta circular in the sense that we use 20 | OCaml refs for mutable variables, and we represent arrays with mutable OCaml 21 | arrays. We also model function returns with exceptions. *) 22 | 23 | open SourceAst 24 | module T = Tokens 25 | 26 | (* For when the interpreter crashed, such as array bounds violations *) 27 | exception Crash of string 28 | 29 | (* For errors that a well-typed program can't have *) 30 | exception TypeError 31 | 32 | (* Values are either integers or n-dimensional arrays of integers. 33 | We keep multi-dimensional arrays in a single dimensional one and include a 34 | list of how big each dimension is. 35 | We represent bools as numbers: true = 1L and false = 0L *) 36 | type val_t = 37 | | Vint of int64 38 | | Varray of int list * int64 array 39 | 40 | let val_t_to_int (v : val_t) : int64 = 41 | match v with 42 | | Vint i -> i 43 | | Varray _ -> raise TypeError 44 | 45 | let val_t_to_array (v : val_t) : int list * int64 array = 46 | match v with 47 | | Vint 0L -> raise (Crash "null pointer exception") 48 | | Vint _ -> raise TypeError 49 | | Varray (dims, a) -> (dims, a) 50 | 51 | (* Given the array's dimensions, work out the slot for a particular set of 52 | indices. Sizes and indices must have the same length. Return None if one 53 | of the indices is out of bounds, i.e. greater than the size. *) 54 | let array_address (sizes : int list) (indices : int list) : int option = 55 | (* acc keeps track of the product of the dimensions seen so far *) 56 | let rec f sizes indices (acc : int) = 57 | match (sizes, indices) with 58 | | ([], []) -> 0 59 | | (s1::sizes, i1::indices) -> 60 | i1 * acc + f sizes indices (s1 * acc) 61 | | _ -> 62 | raise TypeError 63 | in 64 | if List.for_all2 (fun s i -> i < s) sizes indices then 65 | Some (f sizes indices 1) 66 | else 67 | None 68 | 69 | let bool_to_int64 (b : bool) : int64 = 70 | if b then 1L else 0L 71 | 72 | let int64_to_bool (i : int64) : bool = 73 | if Int64.compare i 0L = 0 then false else true 74 | 75 | (* Do a primitive operation, but not && or || *) 76 | let do_op op (n1 : int64) (n2 : int64) : int64 = 77 | match op with 78 | | T.Plus -> Int64.add n1 n2 79 | | T.Minus -> Int64.sub n1 n2 80 | | T.Times -> Int64.mul n1 n2 81 | | T.Div -> 82 | if n2 = 0L then 83 | raise (Crash "Division by 0") 84 | else 85 | Int64.div n1 n2 86 | | T.Lt -> bool_to_int64 (Int64.compare n1 n2 < 0) 87 | | T.Gt -> bool_to_int64 (Int64.compare n1 n2 > 0) 88 | | T.Eq -> bool_to_int64 (Int64.compare n1 n2 = 0) 89 | | T.Lshift -> 90 | let i = Int64.to_int n2 in 91 | (* Int64.shift_left is wacky outside of this interval *) 92 | if 0 <= i && i < 64 then 93 | Int64.shift_left n1 i 94 | else 95 | 0L 96 | | T.BitOr -> Int64.logor n1 n2 97 | | T.BitAnd -> Int64.logand n1 n2 98 | | T.And | T.Or -> assert false 99 | 100 | let do_uop uop (n : int64) : int64 = 101 | match uop with 102 | | T.Not -> 103 | bool_to_int64 (not (int64_to_bool n)) 104 | 105 | (* The environment will have the code of all of the functions that we can call, 106 | it will also keep the variable bindings in a map of OCaml references. 107 | Assignments to a variables will be interpreted as assignments to the 108 | corresponding reference. Note that an assignment to a variable (of scalar or 109 | array type) mutates the binding for that variable in the environment, 110 | whereas an assignment to an array element (i.e., an array update), actually 111 | changes the array value without modifying the environment binding for the 112 | array. 113 | *) 114 | type env_t = { funs : func Idmap.t; vars : val_t ref Idmap.t } 115 | 116 | (* To model return control flow *) 117 | exception Return_exn of val_t 118 | 119 | (* Add the function's arguments to the environment env at a call site. Assume 120 | that the two input lists are the same length. This is ensured by the type 121 | checker. We also know there are no duplicate parameter names. *) 122 | let rec add_arguments (params : (id * _) list) (args : val_t list) (env : env_t) 123 | : env_t = 124 | match (params, args) with 125 | | ([], []) -> env 126 | | ((x,_)::params, v::args) -> 127 | (* Allocate a mutable reference to store the parameter's value in. Then the 128 | function's body can assign to the variable by changing the binding. *) 129 | let binding = ref v in 130 | add_arguments params args { env with vars = Idmap.add x binding env.vars } 131 | | (_, _) -> 132 | raise TypeError 133 | 134 | (* Compute the value of an expression *) 135 | let rec interp_exp (env : env_t) (e : exp) : val_t = 136 | match e with 137 | | Ident (i, []) -> 138 | !(Idmap.find i env.vars) (* i will be in the environment in a well-typed 139 | program *) 140 | | Ident (i, iexps) -> 141 | let (sizes, a) = val_t_to_array !(Idmap.find i env.vars) in 142 | let indices = 143 | List.map (fun e -> (Int64.to_int (val_t_to_int (interp_exp env e)))) 144 | iexps 145 | in 146 | (match array_address sizes indices with 147 | | Some x -> Vint (Array.get a x) 148 | | None -> raise (Crash "array index out of bounds")) 149 | | Call (f, args) -> 150 | (* f will be in the environment in a well-typed program *) 151 | let func = Idmap.find f env.funs in 152 | let arg_vals = List.map (interp_exp env) args in 153 | let new_env1 = add_arguments func.params arg_vals env in 154 | let new_env2 = interp_var_decs new_env1 func.locals in 155 | (try 156 | List.iter (interp_stmt new_env2) func.body; 157 | (* We know this cannot happen because of the type checker, each path in 158 | each function must have a return. Otherwise, we would have to return 159 | a default value *of the right type* *) 160 | raise TypeError 161 | with Return_exn v -> v) 162 | | Num n -> Vint n 163 | | Bool b -> Vint (bool_to_int64 b) 164 | | Op (e1, T.And, e2) -> 165 | (match interp_exp env e1 with 166 | | Vint n -> 167 | if int64_to_bool n then 168 | interp_exp env e2 169 | else 170 | Vint n 171 | | _ -> raise TypeError) 172 | | Op (e1, T.Or, e2) -> 173 | (match interp_exp env e1 with 174 | | Vint n -> 175 | if int64_to_bool n then 176 | Vint n 177 | else 178 | interp_exp env e2 179 | | _ -> raise TypeError) 180 | | Op (e1, op, e2) -> 181 | (match (interp_exp env e1, interp_exp env e2) with 182 | | (Vint n1, Vint n2) -> Vint (do_op op n1 n2) 183 | | _ -> raise TypeError) 184 | | Uop (uop, e) -> 185 | (match interp_exp env e with 186 | | Vint n -> Vint (do_uop uop n) 187 | | _ -> raise TypeError) 188 | | Array iexps -> 189 | let indices = 190 | List.map (fun e -> (Int64.to_int (val_t_to_int (interp_exp env e)))) 191 | iexps 192 | in 193 | Varray (indices, 194 | Array.make (List.fold_right (fun x y -> x * y) indices 1) 0L) 195 | 196 | (* Run the initialisation expressions of the variables, and add bindings to the 197 | resulting values into the environment *) 198 | and interp_var_decs (env : env_t) (decs : var_dec list) : env_t = 199 | (* Start the variables at 0, because they can be referenced before they are 200 | initialised. NB, this allows null pointer exceptions (or crashes) into the 201 | language, since the variable can have array type. This is a consequence 202 | of having each global and local scope be a single recursive scope. *) 203 | let new_env = 204 | List.fold_right 205 | (fun v env -> 206 | let binding = ref (Vint 0L) in 207 | Idmap.add v.var_name binding env) 208 | decs 209 | env.vars 210 | in 211 | let env = { env with vars = new_env } in 212 | (* Now run all of the initialisation expressions *) 213 | List.iter 214 | (fun d -> 215 | let v = interp_exp env d.init in 216 | Idmap.find d.var_name env.vars := v) 217 | decs; 218 | env 219 | 220 | (* Run a statement *) 221 | and interp_stmt (env : env_t) (s : stmt) : unit = 222 | match s with 223 | | Assign (i, [], e) -> 224 | let v = interp_exp env e in 225 | Idmap.find i env.vars := v 226 | | Assign (i, iexps, e) -> 227 | (match !(Idmap.find i env.vars) with 228 | | Varray (sizes, a) -> 229 | (let indices = 230 | List.map (fun e -> (Int64.to_int (val_t_to_int (interp_exp env e)))) 231 | iexps 232 | in 233 | match array_address sizes indices with 234 | | None -> raise (Crash "array index out of bounds") 235 | | Some x -> 236 | let v = val_t_to_int (interp_exp env e) in 237 | Array.set a x v) 238 | | Vint 0L -> raise (Crash "null pointer exception") 239 | | Vint _ -> raise TypeError) 240 | | DoWhile (head_s, e, body_s) -> 241 | interp_stmt env head_s; 242 | if int64_to_bool (val_t_to_int (interp_exp env e)) then 243 | (interp_stmt env body_s; 244 | interp_stmt env s) 245 | else 246 | () 247 | | Ite (e, s1, s2) -> 248 | if int64_to_bool (val_t_to_int (interp_exp env e)) then 249 | interp_stmt env s1 250 | else 251 | interp_stmt env s2 252 | | Stmts sl -> 253 | List.iter (interp_stmt env) sl 254 | | In i -> 255 | Printf.printf "> "; 256 | (try 257 | let n = Int64.of_string (read_line ()) in 258 | Idmap.find i env.vars := Vint n 259 | with Failure _ -> raise (Crash "not a 64-bit integer")) 260 | | Out i -> 261 | (print_string (Int64.to_string (val_t_to_int !(Idmap.find i env.vars))); 262 | print_newline ()) 263 | | Return None -> 264 | raise (Return_exn (Vint 0L)) 265 | | Return (Some i) -> 266 | raise (Return_exn !(Idmap.find i env.vars)) 267 | | Loc (s, _) -> interp_stmt env s 268 | 269 | let interp_prog (p : prog) : unit = 270 | let fun_env = 271 | List.fold_right (fun f env -> Idmap.add f.fun_name f env) 272 | p.funcs Idmap.empty 273 | in 274 | ignore (interp_var_decs { funs = fun_env; vars = Idmap.empty } p.globals) 275 | -------------------------------------------------------------------------------- /src/constProp.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* Do constant propagation and folding. Later compiler phases assume that no 20 | operation has 2 constant arguments, so this needs to be guaranteed here. *) 21 | 22 | open SourceAst 23 | module T = Tokens 24 | 25 | (* Return n, such that 2^n = i, or None if there is no such *) 26 | let log2 (i : int64) : int option = 27 | (* Linear search for the least significant 1 in the binary represenatation. 28 | A binary search might be faster, but probably not worth the hassle. *) 29 | let rec f (i : int64) (shifted : int) : int option = 30 | if Int64.logand i 0x1L = 0L then 31 | f (Int64.shift_right i 1) (shifted + 1) 32 | else if i = 0x1L then 33 | Some shifted 34 | else 35 | None 36 | in 37 | if i > 0L then 38 | f i 0 39 | else 40 | None 41 | 42 | (* Decides whether evaluating an expression might have a side-effect *) 43 | let rec might_have_effect (e : exp) : bool = 44 | match e with 45 | | Ident (_, []) -> false 46 | | Ident (_, _) -> true (* Array bound check failure *) 47 | | Num _ | Bool _ -> false 48 | | Op (e1, op, e2) -> 49 | if op = T.Div then 50 | true (* Divide by zero error *) 51 | else 52 | might_have_effect e1 || might_have_effect e2 53 | | Uop (_, e) -> might_have_effect e 54 | | Array es -> List.exists might_have_effect es 55 | | Call _ -> true (* The called function could do something *) 56 | 57 | (* Check whether two expressions are equal, and don't have effects. This lets 58 | some operators remove them. Note that we are just comparing the structure of 59 | the expressions, so we won't see that x + (y + z) is equal to (x + y) + z, 60 | etc. *) 61 | let ok_remove_eq_exp (e1 : exp) (e2 : exp) : bool = 62 | e1 = e2 && 63 | not (might_have_effect e1) && 64 | not (might_have_effect e2) 65 | 66 | let between_0_63 (i : int64) : bool = 67 | Int64.compare i (-1L) = 1 && Int64.compare i 64L = -1 68 | 69 | (* Statically evaluate an expression according to the identifier values in env, 70 | don't try to follow constants in arrays. Operates in a single bottom-up pass. 71 | Doesn't do anything with associativity or commutativity, so things like 72 | (x + 1) + 2 don't get changed. *) 73 | let rec fold_exp (env : exp Idmap.t) (e : exp) : exp = 74 | match e with 75 | | Ident (i, []) -> 76 | (try Idmap.find i env 77 | with Not_found -> e) 78 | | Ident (i, es) -> Ident (i, List.map (fold_exp env) es) 79 | | Num n -> Num n 80 | | Bool b -> Bool b 81 | | Op (e1, op, e2) -> 82 | let o1 = fold_exp env e1 in 83 | let o2 = fold_exp env e2 in 84 | (match (o1, op, o2) with 85 | (* Plus *) 86 | | (Num n1, T.Plus, Num n2) -> Num (Int64.add n1 n2) 87 | | (Num 0L, T.Plus, e) | (e, T.Plus, Num 0L) -> e 88 | 89 | (* Minus *) 90 | | (Num n1, T.Minus, Num n2) -> Num (Int64.sub n1 n2) 91 | | (e, T.Minus, Num 0L) -> e 92 | | (e1, T.Minus, e2) when ok_remove_eq_exp e1 e2 -> Num 0L 93 | 94 | (* Times *) 95 | | (Num n1, T.Times, Num n2) -> Num (Int64.mul n1 n2) 96 | | (e, T.Times, Num 1L) | (Num 1L, T.Times, e) -> e 97 | | (Num -1L, T.Times, e) | (e, T.Times, Num -1L) -> 98 | Op (Num 0L, T.Minus, e) 99 | (* Can't use or-patterns for the following two, according to compiler warning *) 100 | | (Num 0L, T.Times, e) when not (might_have_effect e) -> 101 | Num 0L 102 | | (e, T.Times, Num 0L) when not (might_have_effect e) -> 103 | Num 0L 104 | | (e, T.Times, Num n) | (Num n, T.Times, e) -> 105 | (match log2 (Int64.abs n) with 106 | | None -> Op (o1, op, o2) 107 | | Some log -> 108 | let shift_op = Op (e, T.Lshift, Num (Int64.of_int log)) in 109 | if n < 0L then 110 | Op (Num 0L, T.Minus, shift_op) 111 | else 112 | shift_op) 113 | 114 | (* Div *) 115 | | (Num n1, T.Div, Num n2) when n2 <> 0L -> Num (Int64.div n1 n2) 116 | | (Num _, T.Div, Num 0L) -> 117 | (* This phase needs to guarantee that no operation has two constant 118 | arguments. It doesn't matter what the numerator is, when the 119 | divisor is 0, so we can just use an arbitrary variable. *) 120 | Op (Ident (Temp ("CP", 0), []), T.Div, Num 0L) 121 | | (e, T.Div, Num 1L) -> e 122 | 123 | (* Less *) 124 | | (Num n1, T.Lt, Num n2) -> Bool (Int64.compare n1 n2 < 0) 125 | | (e1, T.Lt, e2) when ok_remove_eq_exp e1 e2 -> Bool false 126 | 127 | (* Greater *) 128 | | (Num n1, T.Gt, Num n2) -> Bool (Int64.compare n1 n2 > 0) 129 | | (e1, T.Gt, e2) when ok_remove_eq_exp e1 e2 -> Bool false 130 | 131 | (* Equal *) 132 | | (Num n1, T.Eq, Num n2) -> Bool (Int64.compare n1 n2 = 0) 133 | | (e1, T.Eq, e2) when ok_remove_eq_exp e1 e2 -> Bool true 134 | 135 | (* Shift left *) 136 | | (Num n1, T.Lshift, Num n2) -> 137 | (* Ocaml's shift_left is only defined between 0 and 63 inclusive *) 138 | if between_0_63 n2 then 139 | Num (Int64.shift_left n1 (Int64.to_int n2)) 140 | else 141 | Num 0L 142 | | (e, T.Lshift, Num 0L) -> e 143 | | (Num 0L, T.Lshift, e) when not (might_have_effect e) -> 144 | Num 0L 145 | 146 | (* Bitwise or *) 147 | | (Num n1, T.BitOr, Num n2) -> Num (Int64.logor n1 n2) 148 | | (Num 0L, T.BitOr, e) | (e, T.BitOr, Num 0L) -> e 149 | (* Can't use or-patterns for the following two, according to compiler warning *) 150 | | (Num 0xFFFFFFFFFFFFFFFFL, T.BitOr, e) when not (might_have_effect e) -> 151 | Num 0xFFFFFFFFFFFFFFFFL 152 | | (e, T.BitOr, Num 0xFFFFFFFFFFFFFFFFL) when not (might_have_effect e) -> 153 | Num 0xFFFFFFFFFFFFFFFFL 154 | 155 | (* Bitwise and *) 156 | | (Num n1, T.BitAnd, Num n2) -> Num (Int64.logand n1 n2) 157 | | (Num 0xFFFFFFFFFFFFFFFFL, T.BitAnd, e) 158 | | (e, T.BitAnd, Num 0xFFFFFFFFFFFFFFFFL) -> e 159 | (* Can't use or-patterns for the following two, according to compiler warning *) 160 | | (Num 0L, T.BitAnd, e) when not (might_have_effect e) -> Num 0L 161 | | (e, T.BitAnd, Num 0L) when not (might_have_effect e) -> Num 0L 162 | 163 | (* And *) 164 | | (Bool true, T.And, e) | (e, T.And, Bool true) -> e 165 | | (Bool false, T.And, _) -> Bool false 166 | | (e, T.And, Bool false) when not (might_have_effect e) -> Bool false 167 | | (e1, T.And, e2) when ok_remove_eq_exp e1 e2 -> e1 168 | 169 | (* Or *) 170 | | (Bool false, T.Or, e) | (e, T.Or, Bool false) -> e 171 | | (Bool true, T.Or, _) -> Bool true 172 | | (e, T.Or, Bool true) when not (might_have_effect e) -> Bool true 173 | | (e1, T.Or, e2) when ok_remove_eq_exp e1 e2 -> e1 174 | 175 | | _ -> Op (o1, op, o2)) 176 | | Uop (uop, e) -> 177 | let o = fold_exp env e in 178 | (match (uop, o) with 179 | | (T.Not, Bool b) -> Bool (not b) 180 | | (T.Not, Uop (T.Not, e)) -> e 181 | | _ -> Uop (uop, o)) 182 | | Array es -> 183 | Array (List.map (fold_exp env) es) 184 | | Call (f, es) -> 185 | Call (f, List.map (fold_exp env) es) 186 | 187 | let is_const (e : exp) : bool = 188 | match e with 189 | | Num _ | Bool _ -> true 190 | | _ -> false 191 | 192 | let same_const (e1 : exp) (e2 : exp) : bool = 193 | match (e1,e2) with 194 | | (Num n1, Num n2) -> Int64.compare n1 n2 = 0 195 | | (Bool b1, Bool b2) -> b1 = b2 196 | | _ -> false 197 | 198 | (* If v1 and v2 contain the same constant, return it. Otherwise return None *) 199 | let merge_constants (_ : id) (v1 : exp option) (v2 : exp option) 200 | : exp option = 201 | match (v1,v2) with 202 | | (Some e1, Some e2) -> 203 | if same_const e1 e2 then 204 | Some e1 205 | else None 206 | | _ -> None 207 | 208 | (* Do constant propagation. Accumulate an environment of definitely known 209 | constants at the end of stmts, given definitely known constants env at the 210 | start. *) 211 | let rec prop_stmts (env : exp Idmap.t) (stmts : stmt list) 212 | : exp Idmap.t * stmt list = 213 | match stmts with 214 | | [] -> (env,[]) 215 | | Assign (x, [], e) :: stmts -> 216 | let o1 = fold_exp env e in 217 | let first_env = 218 | if is_const o1 then Idmap.add x o1 env else Idmap.remove x env 219 | in 220 | let (env',stmts') = prop_stmts first_env stmts in 221 | (env', Assign (x, [], o1) :: stmts') 222 | | Assign (x, es, e) :: stmts -> 223 | let o = fold_exp env e in 224 | let os = List.map (fold_exp env) es in 225 | let (env', stmts') = prop_stmts env stmts in 226 | (env', Assign (x, os, o) :: stmts') 227 | | DoWhile (s0, e, s1) :: stmts -> 228 | (* s0 is always executed *) 229 | let (env_init, t) = prop_stmt env s0 in 230 | let o = fold_exp env_init e in 231 | (match o with 232 | | Bool false -> 233 | let (env', stmts') = prop_stmts env_init stmts in 234 | (env', t::stmts') 235 | | _ -> 236 | (* From this point, the loop might execute s1;s0 0 or more times, so we 237 | calculate env1 which contains the constants that are constant no 238 | matter how many times the loop is executed *) 239 | let env1 = prop_loop_body env_init [s1; s0] in 240 | (* Now re-calculate the constant propagation for the things constant no 241 | matter how many times we iterate the loop *) 242 | let (_, t1) = prop_stmt env1 s1 in 243 | let o = fold_exp env1 e in 244 | let (_, t0) = prop_stmt env1 s0 in 245 | let (env',stmts') = prop_stmts env1 stmts in 246 | (env', DoWhile (t0, o, t1) :: stmts')) 247 | | Ite (e, s1, s2) :: stmts -> 248 | let o1 = fold_exp env e in 249 | (match o1 with 250 | | Bool true -> 251 | let (env1, os1) = prop_stmt env s1 in 252 | let (env', stmts') = prop_stmts env1 stmts in 253 | (env', os1::stmts') 254 | | Bool false -> 255 | let (env2, os2) = prop_stmt env s2 in 256 | let (env', stmts') = prop_stmts env2 stmts in 257 | (env', os2::stmts') 258 | | _ -> 259 | let (env1, os1) = prop_stmt env s1 in 260 | let (env2, os2) = prop_stmt env s2 in 261 | (* Only include constants that are known to be the same at the end of 262 | both branches of the Ite for the subsequent statements. *) 263 | let (env',stmts') = 264 | prop_stmts (Idmap.merge merge_constants env1 env2) stmts 265 | in 266 | (env', Ite (o1, os1, os2) :: stmts')) 267 | | Stmts (stmts1) :: stmts -> 268 | let (env1, os1) = prop_stmts env stmts1 in 269 | let (env', stmts') = prop_stmts env1 stmts in 270 | (env', Stmts (os1) :: stmts') 271 | | In x :: stmts -> 272 | let (env',stmts') = prop_stmts (Idmap.remove x env) stmts in 273 | (env', In x :: stmts') 274 | | Out x :: stmts -> 275 | let (env',stmts') = prop_stmts env stmts in 276 | (env', Out x :: stmts') 277 | | Loc (s, ln) :: stmts -> 278 | let (env1, o1) = prop_stmt env s in 279 | let (env', stmts') = prop_stmts env1 stmts in 280 | (env', Loc (o1, ln) :: stmts') 281 | | Return x :: _ -> 282 | (* We can't carry on past a return *) 283 | (env, [Return x]) 284 | 285 | and prop_stmt env (stmt : stmt) = 286 | match prop_stmts env [stmt] with 287 | | (env,[os]) -> (env,os) 288 | | _ -> assert false 289 | 290 | (* Given possibly known constants env at the start, compute the definitely 291 | known constants at the end, assuming that stmts is run in a loop body an 292 | unknown number of times. *) 293 | and prop_loop_body (env : exp Idmap.t) (stmts : stmt list) : exp Idmap.t = 294 | let (env', _) = prop_stmts env stmts in 295 | (* The next approximation of constants at the start *) 296 | let env'' = Idmap.merge merge_constants env env' in 297 | if Idmap.equal same_const env env'' then 298 | (* Same as last time, fixed point reached *) 299 | env'' 300 | else 301 | prop_loop_body env'' stmts 302 | -------------------------------------------------------------------------------- /src/typeCheck.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* The type checker. In addition to checking the types, we build a new AST that 20 | annotates the scope of all of the variables. Each use of a variable either 21 | refers to a global variable, a local variable, or a function parameter. The 22 | type checker keeps track of which sort each variable is and annotates each 23 | use with this information. This allows the register allocator and back end 24 | to treat them differently. *) 25 | 26 | open Util 27 | open SourceAst 28 | module T = Tokens 29 | 30 | (* Types *) 31 | type t = 32 | | Tint 33 | | Tbool 34 | | Tarray of int (* The number of dimensions that the array has *) 35 | 36 | let show_t t = 37 | match t with 38 | | Tint -> "int" 39 | | Tbool -> "bool" 40 | | Tarray n -> "array " ^ string_of_int n 41 | 42 | (* Map functions to their types, and variables to their types and scope *) 43 | type env_t = { funs : (t list * t) Idmap.t; vars : (t * scope) Idmap.t } 44 | 45 | (* Raise a type error *) 46 | let type_error (ln : int option) (msg : string) : 'a = 47 | match ln with 48 | | Some ln -> 49 | raise (BadInput ("Type error on line " ^ string_of_int ln ^ ": " ^ msg)) 50 | | None -> 51 | raise (BadInput ("Type error at unknown location: " ^ msg)) 52 | 53 | (* Annotate an identifier with its scope *) 54 | let add_scope (id : id) (s : scope) : id = 55 | match id with 56 | | Source (i, None) -> Source (i, Some s) 57 | | Source (_, Some _) -> 58 | raise (InternalError "scoped identifier during type checking") 59 | | Temp (x, y) -> Temp (x, y) 60 | 61 | (* Compute the type of an expression, or raise BadInput if there is a type 62 | error. Also return a new scope-annotated version of the expression. *) 63 | let rec type_exp (ln : int option) (env : env_t) (e : exp) : t * exp = 64 | match e with 65 | | Ident (i, es) -> 66 | let (t, scope) = 67 | try Idmap.find i env.vars 68 | with Not_found -> 69 | type_error ln ("reference to undeclared variable " ^ show_id i) 70 | in 71 | let (ts, es') = List.split (List.map (type_exp ln env) es) in 72 | let l = List.length ts in 73 | let annotated_exp = Ident (add_scope i scope, es') in 74 | (* If there are no indices, then just use the type of the identifier, even 75 | if it is of array type. If there are indices, they must fully index the 76 | array, which always contains ints *) 77 | if l = 0 then 78 | (t, annotated_exp) 79 | else 80 | (match t with 81 | | Tarray num_dims -> 82 | if num_dims = l && List.for_all (fun x -> x = Tint) ts then 83 | (Tint, annotated_exp) 84 | else if num_dims = l then 85 | type_error ln "array index with non-integer type" 86 | else 87 | type_error ln ("array reference with " ^ string_of_int l ^ 88 | " indices, expected " ^ string_of_int num_dims) 89 | | _ -> 90 | type_error ln ("attempt to index non-array variable " ^ show_id i)) 91 | | Call (f, args) -> 92 | let (param_types, ret_type) = 93 | try Idmap.find f env.funs 94 | with Not_found -> 95 | type_error ln ("call to undefined function " ^ show_id f) 96 | in 97 | let (ts, es') = List.split (List.map (type_exp ln env) args) in 98 | (* A local function to check that the parameter and argument types match 99 | up. Use n to keep track of which position, for the error message. Assume 100 | that the lists are of the same length. *) 101 | let rec check (n : int) (pts : t list) (ats : t list) : unit = 102 | match (pts, ats) with 103 | | ([], []) -> () 104 | | (pt::pts, at::ats) -> 105 | if pt = at then check (n+1) pts ats else 106 | type_error ln ("function call given argument of type " ^ show_t at ^ 107 | " but expected type " ^ show_t pt ^ 108 | " in argument number " ^ string_of_int n) 109 | | _ -> assert false 110 | in 111 | if List.length param_types <> List.length args then 112 | type_error ln ("function call expects " ^ 113 | string_of_int (List.length param_types) ^ 114 | " arguments, given " ^ 115 | string_of_int (List.length ts)) 116 | else 117 | check 1 param_types ts; 118 | (ret_type, Call (f, es')) 119 | | Num n -> (Tint, Num n) 120 | | Bool b -> (Tbool, Bool b) 121 | | Op (e1, op, e2) -> 122 | let (t1, e1') = type_exp ln env e1 in 123 | let (t2, e2') = type_exp ln env e2 in 124 | let t = 125 | match (t1, op, t2) with 126 | | (Tbool, (T.And | T.Or | T.Eq), Tbool) -> Tbool 127 | | (Tint, (T.Plus | T.Minus | T.Times | T.Div | T.Lshift | T.BitOr | 128 | T.BitAnd), Tint) -> 129 | Tint 130 | | (Tint, (T.Lt | T.Eq | T.Gt), Tint) -> Tbool 131 | | (t1, _, t2) -> 132 | type_error ln ("operator " ^ T.show_op op ^ 133 | " applied to expressions of types " ^ show_t t1 ^ 134 | " and " ^ show_t t2) 135 | in 136 | (t, Op (e1', op, e2')) 137 | | Uop (uop, e) -> 138 | let (t, e') = type_exp ln env e in 139 | (match (uop, t) with 140 | | (T.Not, Tbool) -> (Tbool, Uop (uop, e')) 141 | | (T.Not, t) -> 142 | type_error ln ("operator " ^ T.show_uop uop ^ 143 | " applied to expression of type " ^ show_t t)) 144 | | Array es -> 145 | let (ts, es') = List.split (List.map (type_exp ln env) es) in 146 | let l = List.length ts in 147 | if l = 0 then 148 | type_error ln "attempt to allocate array with 0 dimensions" 149 | else if List.for_all (fun x -> x = Tint) ts then 150 | (Tarray l, Array es') 151 | else 152 | type_error ln "array dimension with non-integer type" 153 | 154 | (* Type check an identifier without array indices *) 155 | let type_simple_ident (ln : int option) (env : env_t) (i : id) : t * id = 156 | match type_exp ln env (Ident (i, [])) with 157 | | (t, Ident (i', [])) -> (t, i') 158 | | _ -> assert false (* type_exp does not change the shape of the expression *) 159 | 160 | (* Type check a statement. Raise BadInput if there is an error. return gives 161 | the return type of the enclosing function. ln gives the current line number. 162 | Return a scope annotated version of the statement. Also strip the location 163 | annotations. 164 | *) 165 | let rec type_stmt (ln : int option) (env :env_t) (return : t) (stmt : stmt) 166 | : stmt = 167 | match stmt with 168 | | In i -> 169 | let (t, i') = type_simple_ident ln env i in 170 | if t <> Tint then 171 | type_error ln "input with non-integer type" 172 | else 173 | In i' 174 | | Out i -> 175 | let (t, i') = type_simple_ident ln env i in 176 | if t <> Tint then 177 | type_error ln "output with non-integer type" 178 | else 179 | Out i' 180 | | Return (Some i) -> 181 | let (t, i') = type_simple_ident ln env i in 182 | if t <> return then 183 | type_error ln ("return has type " ^ show_t t ^ 184 | " in a function with return type " ^ 185 | show_t return) 186 | else 187 | Return (Some i') 188 | | Return None -> 189 | type_error ln "return without value" 190 | | Assign (x, es, e) -> 191 | (match type_exp ln env (Ident (x, es)) with 192 | | (t1, Ident (x', es')) -> 193 | let (t2, e') = type_exp ln env e in 194 | if t1 <> t2 then 195 | type_error ln ("assignment to variable of type " ^ show_t t1 ^ " with type " ^ 196 | show_t t2) 197 | else 198 | Assign (x', es', e') 199 | | _ -> assert false) 200 | | DoWhile (s1, e, s2) -> 201 | let s1' = type_stmt ln env return s1 in 202 | (match type_exp ln env e with 203 | | (Tbool, e') -> 204 | let s2' = type_stmt ln env return s2 in 205 | DoWhile (s1', e', s2') 206 | | _ -> 207 | type_error ln "do/while test of non-bool type") 208 | | Ite (e, s1, s2) -> 209 | (match type_exp ln env e with 210 | | (Tbool, e') -> 211 | let s1' = type_stmt ln env return s1 in 212 | let s2' = type_stmt ln env return s2 in 213 | Ite (e', s1', s2') 214 | | _ -> type_error ln "if test of non-bool type") 215 | | Stmts (s_list) -> 216 | Stmts (List.map (type_stmt ln env return) s_list) 217 | | Loc (s, ln') -> 218 | type_stmt (Some ln') env return s 219 | 220 | let source_typ_to_t (t : SourceAst.typ) : t = 221 | match t with 222 | | Int -> Tint 223 | | Bool -> Tbool 224 | | Array n -> Tarray n 225 | 226 | (* Get the declared types of all of the parameters. Raise an exception if a 227 | duplicate name is found, using the location ln. Accumulate the answer in 228 | param_env. *) 229 | let rec get_param_types (ln : int option) (params : (id * typ) list) 230 | (param_env : (t * scope) Idmap.t) 231 | : (t * scope) Idmap.t = 232 | match params with 233 | | [] -> param_env 234 | | (x,t)::params -> 235 | if Idmap.mem x param_env then 236 | type_error ln ("duplicate function parameter " ^ show_id x) 237 | else 238 | get_param_types ln params 239 | (Idmap.add x (source_typ_to_t t, Parameter) param_env) 240 | 241 | (* Get the declared types of all of the variables. Raise an exception if a 242 | duplicate is found. Accumulate the answer in var_env. *) 243 | let rec get_var_types (s : scope) (vars : var_dec list) 244 | (var_env : (t * scope) Idmap.t) 245 | : (t * scope) Idmap.t = 246 | match vars with 247 | | [] -> var_env 248 | | v::vars -> 249 | if Idmap.mem v.var_name var_env then 250 | type_error v.loc ("duplicate variable definition " ^ show_id v.var_name) 251 | else 252 | get_var_types s vars 253 | (Idmap.add v.var_name (source_typ_to_t v.typ, s) var_env) 254 | 255 | (* Check the init expressions on a variable declaration. Return a 256 | scope-annotated version of the declaration. *) 257 | let type_var_dec (s : scope) (env : env_t) (dec : var_dec) : var_dec = 258 | let (t, init') = type_exp dec.loc env dec.init in 259 | if t = source_typ_to_t dec.typ then 260 | { dec with var_name = add_scope dec.var_name s; init = init' } 261 | else 262 | type_error dec.loc ("initialisation of variable typed " ^ 263 | show_t (source_typ_to_t dec.typ) ^ 264 | " to expression of type " ^ show_t t) 265 | 266 | (* See the documentation for Map.merge in the OCaml standard library *) 267 | let merge_keep_first _ (x : 'a option) (y : 'a option) : 'a option = 268 | match (x,y) with 269 | | (None, None) -> None 270 | | (Some x, Some _) -> Some x 271 | | (Some x, None) -> Some x 272 | | (None, Some y) -> Some y 273 | 274 | (* Determine whether that each control-flow path through the stmts must include 275 | a return. This way we know that a function cannot fall off the end, without 276 | returning a value of the correct type. *) 277 | let rec check_return_paths (stmts : stmt list) : bool = 278 | match stmts with 279 | | [] -> false 280 | | Return _ :: _ -> true 281 | | s::stmts -> 282 | if check_return_paths stmts then 283 | true 284 | else (* The remainder might not return, so the first statement must *) 285 | match s with 286 | | Return _ -> assert false (* already checked for this case *) 287 | | In _ | Out _ | Assign _ -> false 288 | | Loc (s,_) -> check_return_paths [s] 289 | | Ite (_,s1,s2) -> 290 | check_return_paths [s1] && check_return_paths [s2] 291 | | DoWhile (s1, _, _) -> 292 | check_return_paths [s1] 293 | | Stmts s -> check_return_paths s 294 | 295 | (* Check a function. Return a scope-annotated version is there are no errors. *) 296 | let type_function (env : env_t) (f : func) : func = 297 | let param_env = get_param_types f.loc f.params Idmap.empty in 298 | let local_env = get_var_types Local f.locals Idmap.empty in 299 | (* Add the parameter and locals to the env. Ensure that the locals shadow the 300 | parameters and globals, and that the parameters shadow the globals. *) 301 | let env1 = Idmap.merge merge_keep_first param_env env.vars in 302 | let env2 = Idmap.merge merge_keep_first local_env env1 in 303 | let new_env = { env with vars = env2 } in 304 | let locals' = List.map (type_var_dec Local new_env) f.locals in 305 | let body' = 306 | List.map (type_stmt None new_env (source_typ_to_t f.ret)) f.body 307 | in 308 | if not (check_return_paths f.body) then 309 | type_error f.loc "function might not return" 310 | else 311 | { f with 312 | params = List.map (fun (i,t) -> (add_scope i Parameter, t)) f.params; 313 | locals = locals'; 314 | body = body' } 315 | 316 | (* Get the declared types of all of the functions. Raise an exception if a 317 | duplicate is found. Accumulate the answer in fun_env. *) 318 | let rec get_function_types (funcs : func list) (fun_env : (t list * t) Idmap.t) 319 | : (t list * t) Idmap.t = 320 | match funcs with 321 | | [] -> fun_env 322 | | f::funcs -> 323 | if Idmap.mem f.fun_name fun_env then 324 | type_error f.loc ("duplicate function definition " ^ show_id f.fun_name) 325 | else 326 | get_function_types funcs 327 | (Idmap.add f.fun_name 328 | (List.map (fun (_,t) -> source_typ_to_t t) f.params, 329 | source_typ_to_t f.ret) 330 | fun_env) 331 | 332 | let type_prog (p : prog) : prog = 333 | (* Get the types of the globals and functions. *) 334 | let env = 335 | { funs = get_function_types p.funcs Idmap.empty; 336 | vars = get_var_types Global p.globals Idmap.empty } 337 | in 338 | (* Check the function bodies and global initialisations in the environment 339 | with types for all of the globals and functions. This means that the 340 | top-level is one recursive scope and each function body and global 341 | initialisation can refer to any of the others (or itself). *) 342 | let globals' = List.map (type_var_dec Global env) p.globals in 343 | let funcs' = List.map (type_function env) p.funcs in 344 | { funcs = funcs'; globals = globals' } 345 | -------------------------------------------------------------------------------- /src/instrSelX86.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* Convert linearised three-address code to x86-64 *) 20 | 21 | exception TODO of string 22 | 23 | open BlockStructure 24 | open X86 25 | module L = LineariseCfg 26 | module T = Tokens 27 | 28 | let tok_to_binop t = 29 | match t with 30 | | T.Plus -> Zadd 31 | | T.Minus -> Zsub 32 | | T.Lshift -> Zshl 33 | | T.BitOr -> Zor 34 | | T.BitAnd -> Zand 35 | | _ -> assert false 36 | 37 | (* Save RSP and RBP for stack stuff, 38 | save RAX and RDX for scratch, and index RAX by -1 39 | RAX is chosen because division needs it. Alas, it also needs RDX, which is 40 | used in the calling convention, so we keep R10 as scratch instead. *) 41 | let reg_numbers = 42 | [(-1, RAX); 43 | (0, RBX); 44 | (1, RCX); 45 | (2, RDX); 46 | (3, RSI); 47 | (4, RDI); 48 | (5, R8); 49 | (6, R9); 50 | (7, R11); 51 | (8, R12); 52 | (9, R13); 53 | (10, R14); 54 | (11, R15)] 55 | let reg_numbers_flip = 56 | List.map (fun (x,y) -> (y,x)) reg_numbers 57 | 58 | let reg_list = [RDI; RSI; RDX; RCX; R8; R9] 59 | let num_regs = List.length reg_numbers - 1 60 | let argument_reg_numbers = 61 | List.map (fun r -> (List.assoc r reg_numbers_flip)) reg_list 62 | 63 | (* Assume that we have kept RAX and R10 free for scratch space *) 64 | let r_scratch = RAX 65 | let r_scratch2 = R10 66 | 67 | (* Convert a variable, which can be a register, global or stack slot, to a rm *) 68 | let var_to_rm (v : var) : rm = 69 | match v with 70 | | Vreg i -> Zr (List.assoc i reg_numbers) 71 | | Stack i -> Zm (None, Some RBP, Some (Concrete_disp (Int64.of_int (-8 * (i+1))))) 72 | | Global g -> Zm (None, None, Some (Label_disp g)) 73 | | n -> raise (Util.InternalError ("named variable in instrSelX86: " ^ show_var n)) 74 | 75 | let rm_rm_to_dest_src (dest_rm : rm) (src_rm : rm) 76 | : instruction list * dest_src = 77 | match (dest_rm, src_rm) with 78 | | (Zr r, _) -> ([], Zr_rm (r, src_rm)) 79 | | (_, Zr r) -> ([], Zrm_r (dest_rm, r)) 80 | | (Zm _, Zm _) -> 81 | ([Zmov (Zr_rm (r_scratch, src_rm))], Zrm_r (dest_rm, r_scratch)) 82 | 83 | let rm_ae_to_dest_src (dest_rm : rm) (src_ae : atomic_exp) 84 | : instruction list * dest_src = 85 | match src_ae with 86 | | Num i -> ([], Zrm_i (dest_rm, i)) 87 | | Ident src_var -> rm_rm_to_dest_src dest_rm (var_to_rm src_var) 88 | 89 | (* Convert a heap reference, which is a variable to be deferenced, offset by 90 | another variable or immediate *) 91 | let heap_to_rm (base : var) (offset : atomic_exp) : instruction list * rm = 92 | match (base, offset) with 93 | (* Simple addresses *) 94 | | (Vreg b, Num o) -> 95 | ([], Zm (None, Some (List.assoc b reg_numbers), Some (Concrete_disp o))) 96 | | ((Stack _ | Global _ ) as v, Num o) -> 97 | ([Zmov (Zr_rm (r_scratch2, var_to_rm v))], 98 | Zm (None, Some r_scratch2, Some (Concrete_disp o))) 99 | | (Vreg b, Ident (Vreg o)) -> 100 | ([], 101 | Zm (Some (1, List.assoc o reg_numbers), 102 | Some (List.assoc b reg_numbers), 103 | None)) 104 | | ((Stack _ | Global _) as v, Ident (Vreg r)) 105 | (* 1 indirection *) 106 | | (Vreg r, Ident ((Stack _ | Global _) as v)) -> 107 | ([Zmov (Zr_rm (r_scratch2, var_to_rm v))], 108 | Zm (Some (1, List.assoc r reg_numbers), 109 | Some r_scratch2, 110 | None)) 111 | (* 2 indirections *) 112 | | (((Stack _ | Global _) as v1), Ident ((Stack _ | Global _) as v2)) -> 113 | ([Zmov (Zr_rm (r_scratch2, var_to_rm v1)); 114 | Zbinop (Zadd, Zr_rm (r_scratch2, var_to_rm v2))], 115 | Zm (None, 116 | Some r_scratch2, 117 | None)) 118 | | ((NamedSource _ | NamedTmp _), _) -> 119 | raise (Util.InternalError "Named variables in instrSelX86") 120 | | (_, Ident (NamedSource _ | NamedTmp _)) -> 121 | raise (Util.InternalError "Named variables in instrSelX86") 122 | 123 | (* Build the operation for r := r op ae *) 124 | let build_to_reg_op (op : Tokens.op) (r : reg) (ae : atomic_exp) 125 | : instruction list = 126 | match (op, ae) with 127 | | ((T.Plus | T.Minus | T.Lshift | T.BitOr | T.BitAnd), Num i) -> 128 | [Zbinop (tok_to_binop op, Zrm_i (Zr r, i))] 129 | | ((T.Plus | T.Minus | T.Lshift | T.BitOr | T.BitAnd), Ident v) -> 130 | [Zbinop (tok_to_binop op, Zr_rm (r, var_to_rm v))] 131 | | (T.Times, Num i) -> 132 | [Zimul (r, Zr r, Some i)] 133 | | (T.Times, Ident v) -> 134 | [Zimul (r, var_to_rm v, None)] 135 | | (T.Div, _) -> 136 | [Zmov (Zr_rm (r_scratch2, Zr RDX)); 137 | Zmov (Zrm_i (Zr RDX, 0L)); 138 | Zmov (Zr_rm (RAX, Zr r)); 139 | (match ae with 140 | | Ident v -> Zidiv (var_to_rm v) 141 | | Num _ -> 142 | raise (Util.InternalError "Division by immediate constant")); 143 | Zmov (Zr_rm (r, Zr RAX)); 144 | Zmov (Zr_rm (RDX, Zr r_scratch2))] 145 | | ((T.Lt | T.Gt | T.Eq), _) -> 146 | assert false 147 | | ((T.And | T.Or), _) -> 148 | assert false 149 | 150 | let reverse_op op = 151 | match op with 152 | | T.Gt -> T.Lt 153 | | T.Lt -> T.Gt 154 | | T.Eq -> T.Eq 155 | | _ -> assert false 156 | 157 | let op_to_cond op = 158 | match op with 159 | | T.Gt -> Z_G 160 | | T.Lt -> Z_L 161 | | T.Eq -> Z_E 162 | | _ -> assert false 163 | 164 | (* ------------ Calling convention stuff ------------ *) 165 | 166 | (* Don't save RAX since it is our scratch. Remember to do an even number before 167 | a call for alignment, which must be 16-bytes at (external) function calls. 168 | This is why we also push R10, even though it is scratch too. *) 169 | let caller_save = 170 | [Zpush (Zi_rm (Zr RCX)); 171 | Zpush (Zi_rm (Zr RDX)); 172 | Zpush (Zi_rm (Zr RSI)); 173 | Zpush (Zi_rm (Zr RDI)); 174 | Zpush (Zi_rm (Zr R8)); 175 | Zpush (Zi_rm (Zr R9)); 176 | Zpush (Zi_rm (Zr R10)); 177 | Zpush (Zi_rm (Zr R11))] 178 | 179 | let caller_restore = 180 | [Zpop (Zr R11); 181 | Zpop (Zr R10); 182 | Zpop (Zr R9); 183 | Zpop (Zr R8); 184 | Zpop (Zr RDI); 185 | Zpop (Zr RSI); 186 | Zpop (Zr RDX); 187 | Zpop (Zr RCX)] 188 | 189 | let callee_save = 190 | [Zpush (Zi_rm (Zr RBX)); 191 | Zpush (Zi_rm (Zr R12)); 192 | Zpush (Zi_rm (Zr R13)); 193 | Zpush (Zi_rm (Zr R14)); 194 | Zpush (Zi_rm (Zr R15))] 195 | 196 | let callee_restore = 197 | [Zpop (Zr R15); 198 | Zpop (Zr R14); 199 | Zpop (Zr R13); 200 | Zpop (Zr R12); 201 | Zpop (Zr RBX)] 202 | 203 | (* The order that the calling convention puts arguments into registers *) 204 | let reg_list = [RDI; RSI; RDX; RCX; R8; R9] 205 | 206 | (* Where the saved versions of the argument registers are on the stack. Depends 207 | on the order in caller_save. This is relative to RSP. *) 208 | let stack_reg_to_offset r = 209 | match r with 210 | | RDI -> 32L 211 | | RSI -> 40L 212 | | RDX -> 48L 213 | | RCX -> 56L 214 | | R8 -> 24L 215 | | _ -> assert false 216 | 217 | (* Move the value of ae into the register dest_r for argument passing. If ae 218 | refers to any regs in overwritten_regs, which contains the registers 219 | overwitten already for previous arguments, then use the stored version on 220 | the stack. *) 221 | let setup_arg (overwritten_regs : reg list) (dest_r : reg) (ae : atomic_exp) 222 | : dest_src option = 223 | match ae with 224 | | Num n -> Some (Zrm_i (Zr dest_r, n)) 225 | | Ident (Stack i) -> Some (Zr_rm (dest_r, var_to_rm (Stack i))) 226 | | Ident (Vreg src_r) -> 227 | let src_r = List.assoc src_r reg_numbers in 228 | if src_r = dest_r then 229 | None 230 | else if List.mem src_r overwritten_regs then 231 | Some (Zr_rm (dest_r, 232 | Zm (None, Some RSP, Some (Concrete_disp (stack_reg_to_offset src_r))))) 233 | else 234 | Some (Zr_rm (dest_r, Zr src_r)) 235 | | Ident (Global i) -> Some (Zr_rm (dest_r, var_to_rm (Global i))) 236 | | Ident ((NamedSource _ | NamedTmp _) as v) -> 237 | raise (Util.InternalError ("Named variable in instrSelX86: " ^ show_var v)) 238 | 239 | (* Move the values of aes into the registers and then the stack for argument 240 | passing. If ae refers to any regs in overwritten_regs, which contains the 241 | registers overwitten already for previous arguments, then use the stored 242 | version on the stack. *) 243 | let rec setup_args (aes : atomic_exp list) (remaining_regs : reg list) 244 | (overwritten_regs : reg list) : instruction list = 245 | match (aes, remaining_regs) with 246 | | ([], _) -> [] 247 | | (a :: aes, next :: regs) -> 248 | (match setup_arg overwritten_regs next a with 249 | | None -> [] 250 | | Some arg -> [Zmov arg]) 251 | @ 252 | setup_args aes regs (next :: overwritten_regs) 253 | | _ -> 254 | raise (TODO "InstrSelX86 does not support function calls with more than 6 arguments") 255 | 256 | (* --------------- End calling convention ------------- *) 257 | 258 | let op_to_cc b op = 259 | match (b, op) with 260 | | (true, Lt) -> 261 | Z_L 262 | | (false, Lt) -> 263 | Z_NL 264 | | (true, Gt) -> 265 | Z_G 266 | | (false, Gt) -> 267 | Z_NG 268 | | (true, Eq) -> 269 | Z_E 270 | | (false, Eq) -> 271 | Z_NE 272 | 273 | let reverse_op2 op = 274 | match op with 275 | | Gt -> Lt 276 | | Lt -> Gt 277 | | Eq -> Eq 278 | 279 | (* Return a boolean true if the condition needs to be negated *) 280 | let test_to_x86 ae1 op ae2 b (label : string) : instruction list = 281 | match (ae1, ae2) with 282 | | (Ident i, _) -> 283 | let (instrs, dest_src) = rm_ae_to_dest_src (var_to_rm i) ae2 in 284 | instrs @ 285 | [Zbinop (Zcmp, dest_src); 286 | Zjcc (op_to_cc b op, label)] 287 | | (_, Ident i) -> 288 | let (instrs, dest_src) = rm_ae_to_dest_src (var_to_rm i) ae1 in 289 | instrs @ 290 | [Zbinop (Zcmp, dest_src); 291 | Zjcc (op_to_cc b (reverse_op2 op), label)] 292 | | (Num n1, Num n2) -> 293 | let do_jump = 294 | match op with 295 | | Gt -> (Int64.compare n1 n2 > 0) = b 296 | | Lt -> (Int64.compare n1 n2 < 0) = b 297 | | Eq -> (Int64.compare n1 n2 = 0) = b 298 | in 299 | if do_jump then 300 | [Zjcc (Z_ALWAYS, label)] 301 | else 302 | [] 303 | 304 | let rec be_to_x86 safe be : instruction list = 305 | match be with 306 | | AssignOp (v, Num imm, ((T.Lt | T.Gt | T.Eq) as op), ae2) -> 307 | (* constant prop ensures both aren't immediate *) 308 | be_to_x86 safe (AssignOp (v, ae2, reverse_op op, Num imm)) 309 | | AssignOp (v, Ident v2, ((T.Lt | T.Gt | T.Eq) as op), ae2) -> 310 | let (instrs, cmp_arg) = rm_ae_to_dest_src (var_to_rm v2) ae2 in 311 | let cmp_instrs = instrs @ [Zbinop (Zcmp, cmp_arg)] in 312 | (match var_to_rm v with 313 | | Zm _ as m -> 314 | cmp_instrs @ 315 | [Zset (op_to_cond op, B r_scratch); 316 | Zbinop (Zand, Zrm_i (Zr r_scratch, 1L)); 317 | Zmov (Zrm_r (m, r_scratch))] 318 | | Zr r -> 319 | cmp_instrs @ 320 | [Zset (op_to_cond op, B r); 321 | Zbinop (Zand, Zrm_i (Zr r, 1L))]) 322 | | AssignOp (v, ae1, ((T.Plus | T.Minus | T.Lshift | T.BitOr 323 | | T.BitAnd | T.Times | T.Div) as op), ae2) -> 324 | (match (var_to_rm v, ae1) with 325 | | (Zr r1, Num imm2) -> 326 | (* r1 := imm2 op m/r3 --> mov r1, imm2; op r1, m/r3 *) 327 | Zmov (Zrm_i (Zr r1, imm2)) :: build_to_reg_op op r1 ae2 328 | | (Zr r1, Ident var) -> 329 | (* r1 := m/r2 op m/r3/imm3 --> mov r1, m/r2; op r1, m/r3/imm3 *) 330 | Zmov (Zr_rm (r1, var_to_rm var)) :: build_to_reg_op op r1 ae2 331 | | (Zm _ as m1, Num imm2) -> 332 | (* m1 := imm2 op m/r3 --> 333 | mov r_scratch, imm2; op r_scratch, m/r3; mov m1, r_scratch *) 334 | Zmov (Zrm_i (Zr r_scratch, imm2)) :: 335 | build_to_reg_op op r_scratch ae2 @ 336 | [Zmov (Zrm_r (m1, r_scratch))] 337 | | (Zm _ as m1, Ident var) -> 338 | (* m1 := m/r2 op m/r3/imm3 --> 339 | mov r_scratch, m/r2; op r_scratch, m/r3/imm3; mov m1, r_scratch *) 340 | Zmov (Zr_rm (r_scratch, var_to_rm var)) :: 341 | build_to_reg_op op r_scratch ae2 @ 342 | [Zmov (Zrm_r (m1, r_scratch))]) 343 | | AssignOp (_, _, (T.And | T.Or), _) -> 344 | raise (Util.InternalError "And/Or in instrSelX86") 345 | | AssignAtom (v, ae) -> 346 | let (instrs, mov_arg) = rm_ae_to_dest_src (var_to_rm v) ae in 347 | instrs @ [Zmov mov_arg] 348 | | Ld (v1, v2, ae) -> 349 | let (instrs, src_rm) = heap_to_rm v2 ae in 350 | let (instrs2, dest_src) = rm_rm_to_dest_src (var_to_rm v1) src_rm in 351 | instrs @ 352 | instrs2 @ 353 | [Zmov dest_src] 354 | | St (v, ae1, ae2) -> 355 | let (instrs, dest_rm) = heap_to_rm v ae1 in 356 | let (instrs2, dest_src) = rm_ae_to_dest_src dest_rm ae2 in 357 | instrs @ 358 | instrs2 @ 359 | [Zmov dest_src] 360 | | Call (v, f, aes) -> 361 | let alloc_name = f in 362 | caller_save @ 363 | setup_args aes reg_list [] @ 364 | [Zcall alloc_name] @ 365 | caller_restore @ 366 | (match v with 367 | | None -> [] 368 | | Some v -> [Zmov (Zrm_r (var_to_rm v, r_scratch))]) 369 | | BoundCheck (a1, a2) -> 370 | if safe then 371 | test_to_x86 a1 Lt (Num 0L) true "bound_error" @ 372 | test_to_x86 a1 Lt a2 false "bound_error" 373 | else 374 | [] 375 | | NullCheck v -> 376 | if safe then 377 | test_to_x86 (Ident v) Eq (Num 0L) true "null_error" 378 | else 379 | [] 380 | 381 | (* Translate a function body to x86. safe determines whether to do null and 382 | bounds checks *) 383 | let to_x86 (safe : bool) (ll : L.linear list) (num_stack : int) 384 | : instruction list = 385 | (* We have to keep RSP 16 byte aligned, add a qword if necessary *) 386 | let num_stack = 387 | if num_stack mod 2 = 0 then 388 | (* We need to do an odd number of pushes over all, since the call has 389 | pushed rip for us. There are an even number of callee saved registers, 390 | including rbp. *) 391 | num_stack + 1 392 | else 393 | num_stack 394 | in 395 | (* Save the old base pointer *) 396 | [Zpush (Zi_rm (Zr RBP))] @ 397 | (* Set a new base pointer *) 398 | [Zmov (Zr_rm (RBP, Zr RSP))] @ 399 | (* Extend the stack for the local variables *) 400 | [Zbinop (Zsub, Zrm_i (Zr RSP, Int64.mul (Int64.of_int num_stack) 8L))] @ 401 | (* Save other the callee save registers, odd number *) 402 | callee_save @ 403 | List.flatten 404 | (List.map 405 | (fun l -> 406 | match l with 407 | | L.Instr be -> be_to_x86 safe be 408 | | L.Return None -> callee_restore @ [Zleave; Zret] 409 | | L.Return (Some v) -> Zmov (Zr_rm (RAX, var_to_rm v)) :: callee_restore @ [Zleave; Zret] 410 | | L.CJump ((ae1, op, ae2), b, s) -> 411 | test_to_x86 ae1 op ae2 b s 412 | | L.Jump s -> 413 | [Zjcc (Z_ALWAYS, s)] 414 | | L.Label s -> 415 | [Zlabel s]) 416 | ll) 417 | -------------------------------------------------------------------------------- /src/blockStructure.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* A control flow graph representation with basic blocks, and the source AST -> 20 | CGF algorithm. Also compiles arrays to loads and stores. *) 21 | 22 | open Util 23 | module S = SourceAst 24 | 25 | type var = 26 | | Vreg of int 27 | | Stack of int 28 | | Global of string 29 | | NamedSource of string * SourceAst.scope 30 | | NamedTmp of string * int 31 | 32 | let var_cmp_index (v : var) : int = 33 | match v with 34 | | Vreg _ -> 0 35 | | Stack _ -> 1 36 | | Global _ -> 2 37 | | NamedSource _ -> 3 38 | | NamedTmp _ -> 4 39 | 40 | let compare_var v1 v2 = 41 | match (v1, v2) with 42 | | (Vreg i1, Vreg i2) -> compare i1 i2 43 | | (Stack i1, Stack i2) -> compare i1 i2 44 | | (Global s1, Global s2) -> String.compare s1 s2 45 | | (NamedSource (s1, scope1), NamedSource (s2, scope2)) -> 46 | let c = SourceAst.compare_scope scope1 scope2 in 47 | if c = 0 then 48 | String.compare s1 s2 49 | else 50 | c 51 | | (NamedTmp (s1, i1), NamedTmp (s2, i2)) -> 52 | let c = String.compare s1 s2 in 53 | if c = 0 then 54 | compare i1 i2 55 | else 56 | c 57 | | _ -> compare (var_cmp_index v1) (var_cmp_index v2) 58 | 59 | let show_var v = 60 | match v with 61 | | Vreg i -> "_r_" ^ string_of_int i 62 | | Stack i -> "_s_" ^ string_of_int i 63 | | Global i -> "_g_" ^ i 64 | | NamedSource (s,_) -> s 65 | | NamedTmp (s, i) -> "_tmp_" ^ s ^ string_of_int i 66 | 67 | let pp_var fmt v = 68 | Format.fprintf fmt "%s" (show_var v) 69 | 70 | module VarCmp = struct 71 | type t = var 72 | let compare = compare_var 73 | end 74 | 75 | module Varset' = Set.Make(VarCmp) 76 | 77 | module Varset = struct 78 | include Varset' 79 | let show s = show_list show_var (elements s) 80 | let pp fmt s = Format.fprintf fmt "%a" (pp_set pp_var) (elements s) 81 | end 82 | 83 | module Varmap = Map.Make(VarCmp) 84 | 85 | (* Atomic expressions *) 86 | type atomic_exp = 87 | | Ident of var 88 | | Num of int64 89 | 90 | let show_atomic_exp ae = 91 | match ae with 92 | | Ident v -> show_var v 93 | | Num i -> Int64.to_string i 94 | 95 | let pp_atomic_exp fmt ae = 96 | Format.fprintf fmt "%s" (show_atomic_exp ae) 97 | 98 | (* A single entry in a basic block *) 99 | type block_elem = 100 | | AssignOp of var * atomic_exp * Tokens.op * atomic_exp 101 | | AssignAtom of var * atomic_exp 102 | (* Ld (x,y,e) represents x := *(y+e) *) 103 | | Ld of var * var * atomic_exp 104 | (* St (x,e1,e2) represents *(x+e1) := e2 *) 105 | | St of var * atomic_exp * atomic_exp 106 | (* Call (x, f, aes) represents x := f(aes) *) 107 | | Call of var option * string * atomic_exp list 108 | (* BoundCheck (a1, a2) represents assert (a1 >= 0 && a1 < a2) *) 109 | | BoundCheck of atomic_exp * atomic_exp 110 | (* NullCheck v represents assert (v <> 0) *) 111 | | NullCheck of var 112 | 113 | let pp_block_elem fmt be = 114 | match be with 115 | | AssignOp (v, ae1, op, ae2) -> 116 | Format.fprintf fmt "%a := %a %s %a" 117 | pp_var v 118 | pp_atomic_exp ae1 119 | (Tokens.show_op op) 120 | pp_atomic_exp ae2 121 | | AssignAtom (v, ae) -> 122 | Format.fprintf fmt "%a := %a" 123 | pp_var v 124 | pp_atomic_exp ae 125 | | Ld (v1, v2, ae) -> 126 | Format.fprintf fmt "%a := *(%a+%a)" 127 | pp_var v1 128 | pp_var v2 129 | pp_atomic_exp ae 130 | | St (v, ae1, ae2) -> 131 | Format.fprintf fmt "*(%a+%a) := %a" 132 | pp_var v 133 | pp_atomic_exp ae1 134 | pp_atomic_exp ae2 135 | | Call (Some v, x, aes) -> 136 | Format.fprintf fmt "%a := %s%a" 137 | pp_var v 138 | x 139 | (pp_list pp_atomic_exp) aes 140 | | Call (None, x, aes) -> 141 | Format.fprintf fmt "%s%a" 142 | x 143 | (pp_list pp_atomic_exp) aes 144 | | BoundCheck (a1, a2) -> 145 | Format.fprintf fmt "assert (%a >= 0 && %a < %a)" 146 | pp_atomic_exp a1 147 | pp_atomic_exp a1 148 | pp_atomic_exp a2 149 | | NullCheck v -> 150 | Format.fprintf fmt "assert (%a <> 0)" 151 | pp_var v 152 | 153 | 154 | let show_block_elem be = 155 | pp_block_elem Format.str_formatter be; 156 | Format.flush_str_formatter () 157 | 158 | type basic_block = block_elem list 159 | 160 | type test_op = 161 | | Lt 162 | | Gt 163 | | Eq 164 | 165 | let pp_test_op fmt op = 166 | match op with 167 | | Lt -> Format.fprintf fmt "<" 168 | | Gt -> Format.fprintf fmt ">" 169 | | Eq -> Format.fprintf fmt "=" 170 | 171 | type test = atomic_exp * test_op * atomic_exp 172 | 173 | let pp_test fmt (ae1, op, ae2) = 174 | Format.fprintf fmt "%a %a %a" 175 | pp_atomic_exp ae1 176 | pp_test_op op 177 | pp_atomic_exp ae2 178 | 179 | (* A basic block is either at the end of the function, returning a var, or 180 | there is an unconditional jump out of it, or a branch out of it to the 181 | blocks indexed by the int. *) 182 | type next_block = 183 | | Return of var option 184 | | Next of int 185 | (* The first int is the block number if the ident is true, and the second if 186 | * it is false *) 187 | | Branch of test * int * int 188 | 189 | let pp_next_block fmt nb = 190 | match nb with 191 | | Return (Some v) -> Format.fprintf fmt "return %a" pp_var v 192 | | Return None -> Format.fprintf fmt "return" 193 | | Next i -> Format.fprintf fmt "B%d" i 194 | | Branch (t,i1,i2) -> Format.fprintf fmt "if@ %a@ then@ B%d@ else@ B%d" 195 | pp_test t 196 | i1 197 | i2 198 | 199 | (* An adjacency list representation for the CFG *) 200 | type cfg_entry = 201 | { bnum : int; elems : block_elem list; next : next_block; 202 | mutable started : bool; mutable finished : bool } 203 | 204 | let pp_cfg_entry fmt e = 205 | Format.fprintf fmt "@[[B%d:@ %a@ %a]@]" 206 | e.bnum 207 | (pp_list pp_block_elem) e.elems 208 | pp_next_block e.next 209 | 210 | type cfg = cfg_entry list 211 | 212 | let pp_cfg fmt es = pp_list pp_cfg_entry fmt es 213 | 214 | let cfg_to_graphviz fmt (cfg : cfg) : unit = 215 | Format.fprintf fmt "digraph {@\nnode[shape=box]@\n%a@\n}" 216 | (fun fmt cfg -> 217 | List.iter 218 | (fun entry -> 219 | Format.fprintf fmt "%d[label=\"%a\"]" 220 | entry.bnum 221 | (fun fmt e -> 222 | if entry.bnum = 0 then 223 | Format.fprintf fmt "ENTRY\\l" 224 | else 225 | (); 226 | List.iter (fun x -> Format.fprintf fmt "%s\\l" (show_block_elem x)) 227 | e.elems; 228 | match e.next with 229 | | Branch (t,_,_) -> Format.fprintf fmt "%a\\l" pp_test t 230 | | Return None -> Format.fprintf fmt "RETURN\\l" 231 | | Return (Some i) -> Format.fprintf fmt "RETURN %s\\l" (show_var i) 232 | | _ -> ()) 233 | entry; 234 | Format.fprintf fmt "@\n"; 235 | match entry.next with 236 | | Return _ -> () 237 | | Next i -> 238 | Format.fprintf fmt "%d->%d@\n" entry.bnum i 239 | | Branch (_, i1, i2) -> 240 | Format.fprintf fmt "%d->%d[label=1]@\n" entry.bnum i1; 241 | Format.fprintf fmt "%d->%d[label=0]@\n" entry.bnum i2) 242 | cfg) 243 | cfg 244 | 245 | let id_to_var (i : S.id) : var = 246 | match i with 247 | | S.Source (s, None) -> 248 | raise (InternalError ("un-scoped source identifier in blockStructure: " ^ 249 | S.show_id (S.Source (s, None)))) 250 | | S.Source (s, Some scope) -> NamedSource (s, scope) 251 | | S.Temp (s, i) -> NamedTmp (s, i) 252 | 253 | let bool_to_num b = 254 | if b then Num 1L else Num 0L 255 | 256 | (* Convert an amomic source expression *) 257 | let exp_to_atomic (e : S.exp) : atomic_exp = 258 | match e with 259 | | S.Ident (id, []) -> Ident (id_to_var id) 260 | | S.Num n -> Num n 261 | | S.Bool b -> bool_to_num b 262 | | S.Ident (_, _::_) | S.Op _ | S.Uop _ | S.Array _ | S.Call _ -> 263 | raise (InternalError "non-flat expression in blockStructure") 264 | 265 | let tmp_var = NamedTmp("BS", 0) 266 | 267 | (* Convert x := e into block elements, returned in *reverse* order *) 268 | let flat_e_to_assign (x : S.id) (e : S.exp) : block_elem list = 269 | let v = id_to_var x in 270 | match e with 271 | | S.Ident (id, []) -> 272 | [AssignAtom (v, Ident (id_to_var id))] 273 | | S.Ident (id, [ae]) -> 274 | let ae = exp_to_atomic ae in 275 | (* v := id[ae] --> tmp_var := *id; 276 | assert ae < length_var; 277 | v := *(id + (ae+1) * 8) *) 278 | let get_len = Ld (tmp_var, id_to_var id, Num 0L) in 279 | (match ae with 280 | | Num n -> 281 | [Ld (v, id_to_var id, Num (Int64.shift_left (Int64.add n 1L) 3)); 282 | BoundCheck (ae, Ident tmp_var); 283 | get_len; 284 | NullCheck (id_to_var id)] 285 | | _ -> 286 | [Ld (v, id_to_var id, Ident tmp_var); 287 | AssignOp (tmp_var, Ident tmp_var, Tokens.Lshift, Num 3L); 288 | AssignOp (tmp_var, ae, Tokens.Plus, Num 1L); 289 | BoundCheck (ae, Ident tmp_var); 290 | get_len; 291 | NullCheck (id_to_var id)]) 292 | | S.Ident (_, _::_::_) -> 293 | raise (InternalError "multi-dimension array index in blockStructure") 294 | | S.Num n -> [AssignAtom (v, Num n)] 295 | | S.Bool b -> [AssignAtom (v, bool_to_num b)] 296 | | S.Op (ae1, op, ae2) -> 297 | [AssignOp (v, exp_to_atomic ae1, op, exp_to_atomic ae2)] 298 | | S.Uop (Tokens.Not, ae) -> 299 | (* !x == (x = false) *) 300 | [AssignOp (v, exp_to_atomic ae, Tokens.Eq, Num 0L)] 301 | | S.Array es -> 302 | [Call (Some v, "allocate" ^ string_of_int (List.length es), 303 | List.map exp_to_atomic es)] 304 | | S.Call (f, es) -> 305 | [Call (Some v, S.show_id f, List.map exp_to_atomic es)] 306 | 307 | let op_to_test_op op = 308 | match op with 309 | | Tokens.Lt -> Lt 310 | | Tokens.Gt -> Gt 311 | | Tokens.Eq -> Eq 312 | | _ -> raise (InternalError "non-test operator in test in blockStructure") 313 | 314 | let flat_exp_to_test (e : S.exp) : test = 315 | match e with 316 | | S.Ident (id, []) -> 317 | (Ident (id_to_var id), Eq, Num 1L) 318 | | S.Ident (_, _::_) -> 319 | raise (InternalError "array index in test position in blockStructure") 320 | | S.Num _ -> 321 | raise (InternalError "number in test position in blockStructure") 322 | | S.Bool b -> 323 | (bool_to_num b, Eq, Num 1L) 324 | | S.Op (ae1, op, ae2) -> 325 | (exp_to_atomic ae1, op_to_test_op op, exp_to_atomic ae2) 326 | | S.Uop (Tokens.Not, ae) -> 327 | (* !x == (x = false) *) 328 | (exp_to_atomic ae, Eq, Num 0L) 329 | | S.Array _ -> 330 | raise (InternalError "array alloc test in blockStructure") 331 | | S.Call _ -> 332 | raise (InternalError "function call test in blockStructure") 333 | 334 | (* Build the control-flow graph *) 335 | let build_cfg (stmts : S.stmt list) : cfg = 336 | (* A counter to get new indices for blocks *) 337 | let next_block_num = ref 0 in 338 | let get_block_num () = 339 | let x = !next_block_num in 340 | next_block_num := 1 + !next_block_num; 341 | x 342 | in 343 | 344 | (* Store the cfg here as we build it *) 345 | let the_cfg = ref [] in 346 | let add_block (num : int) (block : basic_block) (next : next_block) : unit = 347 | the_cfg := { bnum = num; elems = List.rev block; next = next; 348 | started = false; finished = false} :: !the_cfg 349 | in 350 | 351 | (* Convert stmts to basic blocks, and add them to the_cfg. 352 | block_num is the index for the block being created. 353 | block_acc contains the elements seen so far, in *reverse* order. 354 | ret_block is the control flow out of the block being created. 355 | 356 | The AST must be in a restricted form: 357 | - The expressions must all be unnested 358 | (i.e. UnnestExp.is_flat returns true). 359 | - && and || must have been removed. 360 | - Multi-dimensional array indexing must have been removed. 361 | - No statements can follow a return 362 | - All paths must terminate in a return *) 363 | let rec find_blocks (block_num : int) (block_acc : basic_block) 364 | (following_block : next_block) (stmts : S.stmt list) : unit = 365 | match stmts with 366 | | [] -> add_block block_num block_acc following_block 367 | | S.Assign (x, [], e) :: s1 -> 368 | find_blocks block_num (flat_e_to_assign x e @ block_acc) following_block s1 369 | | S.Assign (x, [ae], e) :: s1 -> 370 | let ae = exp_to_atomic ae in 371 | (* x[ae] := e --> tmp_var := *x; 372 | assert ae < length_var; 373 | *(x + (ae+1) * 8) := e *) 374 | let get_len = Ld (tmp_var, id_to_var x, Num 0L) in 375 | let new_block_elems = 376 | (match ae with 377 | | Num n -> 378 | [St (id_to_var x, Num (Int64.shift_left (Int64.add n 1L) 3), 379 | exp_to_atomic e); 380 | BoundCheck (ae, Ident tmp_var); 381 | get_len; 382 | NullCheck (id_to_var x)] 383 | | _ -> 384 | [St (id_to_var x, Ident tmp_var, exp_to_atomic e); 385 | AssignOp (tmp_var, Ident tmp_var, Tokens.Lshift, Num 3L); 386 | AssignOp (tmp_var, ae, Tokens.Plus, Num 1L); 387 | BoundCheck (ae, Ident tmp_var); 388 | get_len; 389 | NullCheck (id_to_var x)]) 390 | in 391 | find_blocks block_num (new_block_elems @ block_acc) following_block s1 392 | | S.Assign (_, _::_::_, _) :: _ -> 393 | raise (InternalError "multi-dimension array index in blockStructure") 394 | | S.Stmts s1 :: s2 -> 395 | (* Treat { s1 ... sn } s1' ... sn' as though it were 396 | s1 ... sn s1' ... sn' *) 397 | find_blocks block_num block_acc following_block (s1 @ s2) 398 | | S.DoWhile (s0, e, s1) :: s2 -> 399 | let header_block_n = get_block_num () in 400 | let body_block_n = get_block_num () in 401 | add_block block_num block_acc (Next header_block_n); 402 | find_blocks body_block_n [] (Next header_block_n) [s1]; 403 | (match (s2, following_block) with 404 | | ([], Next i) -> 405 | find_blocks header_block_n [] 406 | (Branch (flat_exp_to_test e, body_block_n, i)) 407 | [s0] 408 | | _ -> 409 | let following_block_n = get_block_num () in 410 | find_blocks header_block_n [] 411 | (Branch (flat_exp_to_test e, body_block_n, following_block_n)) 412 | [s0]; 413 | find_blocks following_block_n [] following_block s2) 414 | | S.Ite (e, s1, s2) :: s3 -> 415 | let true_block_n = get_block_num () in 416 | let false_block_n = get_block_num () in 417 | add_block block_num block_acc 418 | (Branch (flat_exp_to_test e, true_block_n, false_block_n)); 419 | if s3 = [] then 420 | (find_blocks true_block_n [] following_block [s1]; 421 | find_blocks false_block_n [] following_block [s2]) 422 | else 423 | let following_block_n = get_block_num () in 424 | find_blocks true_block_n [] (Next following_block_n) [s1]; 425 | find_blocks false_block_n [] (Next following_block_n) [s2]; 426 | find_blocks following_block_n [] following_block s3 427 | | S.In x :: s -> 428 | find_blocks block_num (Call (Some (id_to_var x),"input", []) :: block_acc) 429 | following_block s 430 | | S.Out x :: s -> 431 | find_blocks block_num 432 | (Call (None, "output", [Ident (id_to_var x)]) :: block_acc) following_block s 433 | | ((S.Loc (s1, _)) :: s2) -> 434 | find_blocks block_num block_acc following_block (s1::s2) 435 | | [S.Return (Some x)] -> 436 | add_block block_num block_acc (Return (Some (id_to_var x))) 437 | | [S.Return None] -> 438 | add_block block_num block_acc (Return None) 439 | | S.Return _ :: _ -> 440 | raise (InternalError "return followed by statements in blockStructure") 441 | 442 | in 443 | 444 | (* Later on we rely on the starting block being #0 *) 445 | let init_block = get_block_num () in 446 | find_blocks init_block [] (Return None) stmts; 447 | !the_cfg 448 | -------------------------------------------------------------------------------- /src/sourceAst.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Example compiler 3 | * Copyright (C) 2015-2017 Scott Owens 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | *) 18 | 19 | (* The language's AST, and a predictive, recursive descent parser. See the 20 | ../README.md for the grammar. *) 21 | 22 | open Util 23 | module T = Tokens 24 | module F = Format 25 | 26 | (* Type of identifiers. Source ones come from the original program, and Temp 27 | ones come from intermediate compilation stages. This makes it easy to avoid 28 | unintentional conflicts. The string on a Temp should name the stage that 29 | introduced it. The scope indicates where the identifier is bound, if that is 30 | known. We expect the parse to generate unknown scopes and for the type 31 | checker to fill them in. All Temps are considered local in scope. *) 32 | type scope = 33 | | Global 34 | | Parameter 35 | | Local 36 | 37 | type id = 38 | | Source of string * scope option 39 | | Temp of string * int 40 | 41 | let show_scope (s : scope) : string = 42 | match s with 43 | | Global -> "Global" 44 | | Parameter -> "Parameter" 45 | | Local -> "Local" 46 | 47 | let show_id (i : id) : string = 48 | match i with 49 | | Source (s, None) -> s 50 | | Source (s, Some scope) -> s ^ "_" ^ show_scope scope 51 | | Temp (s,i) -> "_tmp_" ^ s ^ string_of_int i 52 | 53 | (* Pretty-print an identifier *) 54 | let pp_id (fmt : Format.formatter) (i : id) : unit = 55 | F.fprintf fmt "%s" (show_id i) 56 | 57 | (* Construct a total order on ids so that we can use them as keys in maps. 58 | OCaml maps are implemented with balanced binary trees *) 59 | let scope_to_int s = 60 | match s with 61 | | Global -> 0 62 | | Parameter -> 1 63 | | Local -> 2 64 | 65 | (* Compare two scopes *) 66 | let compare_scope (s1 : scope) (s2 : scope) : int = 67 | compare (scope_to_int s1) (scope_to_int s2) 68 | 69 | (* Compare two identidiers *) 70 | let compare_id (id1 : id) (id2 : id) : int = 71 | match (id1, id2) with 72 | | (Source (s1, scope1), Source (s2, scope2)) -> 73 | let c = 74 | option_compare compare_scope scope1 scope2 75 | in 76 | if c = 0 then 77 | String.compare s1 s2 78 | else 79 | c 80 | | (Temp _, Source _) -> -1 81 | | (Source _, Temp _) -> 1 82 | | (Temp (s1,i1), Temp (s2,i2)) -> 83 | let c = String.compare s1 s2 in 84 | if c = 0 then 85 | compare i1 i2 86 | else 87 | c 88 | 89 | module Idord = struct 90 | type t = id 91 | let compare = compare_id 92 | end 93 | 94 | (* Build Map module specialised to keys of type id *) 95 | module Idmap = Map.Make(Idord) 96 | 97 | (* AST of expressions *) 98 | type exp = 99 | | Ident of id * exp list 100 | | Call of id * exp list 101 | | Num of int64 102 | | Bool of bool 103 | | Op of exp * T.op * exp 104 | | Uop of T.uop * exp 105 | (* Allocate a new array of given dimensions. Initialise to 0 *) 106 | | Array of exp list 107 | 108 | (* pretty printing for expressions *) 109 | 110 | (* Print a list, where each element is enclosed in [] *) 111 | let rec pp_array_list (f : F.formatter -> 'a -> unit) 112 | (fmt : F.formatter) (l : 'a list) 113 | : unit = 114 | match l with 115 | | [] -> () 116 | | (h::t) -> 117 | F.fprintf fmt "[@[%a@]]@,%a" 118 | f h 119 | (pp_array_list f) t 120 | 121 | (* Print a list, surrounded by (), separating the items with , *) 122 | let pp_call (f : F.formatter -> 'a -> unit) (fmt : F.formatter) 123 | (l : 'a list) 124 | : unit = 125 | let rec pp fmt l = 126 | match l with 127 | | [] -> () 128 | | [h] -> 129 | F.fprintf fmt "%a" 130 | f h 131 | | (h::t) -> 132 | F.fprintf fmt "%a,@ %a" 133 | f h 134 | pp t 135 | in 136 | F.fprintf fmt "(@[%a@])" 137 | pp l 138 | 139 | (* Pretty print an expression *) 140 | let rec pp_exp (fmt : F.formatter) (exp : exp) : unit = 141 | match exp with 142 | | Ident (id, []) -> 143 | F.fprintf fmt "%a" 144 | pp_id id 145 | | Ident (id, es) -> 146 | F.fprintf fmt "%a%a" 147 | pp_id id 148 | (pp_array_list pp_exp) es 149 | | Call (id, es) -> 150 | F.fprintf fmt "%a%a" 151 | pp_id id 152 | (pp_call pp_exp) es 153 | | Num n -> F.fprintf fmt "%Ld" n 154 | | Bool true -> F.fprintf fmt "true" 155 | | Bool false -> F.fprintf fmt "false" 156 | | Op (e1, op, e2) -> 157 | F.fprintf fmt "(@[%a@ %s@ %a@])" 158 | pp_exp e1 159 | (Tokens.show_op op) 160 | pp_exp e2 161 | | Uop (uop, e) -> 162 | F.fprintf fmt "@[<2>%s@ %a@]" 163 | (Tokens.show_uop uop) 164 | pp_exp e 165 | | Array es -> 166 | F.fprintf fmt "@[<2>array@ %a@]" 167 | (pp_array_list pp_exp) es 168 | 169 | (* AST of statements *) 170 | type stmt = 171 | | Assign of id * exp list * exp 172 | (* A generalised do/while loop. Always execute the first statement, then 173 | the test, then repeatedly do the 2nd, then first statement and then test 174 | 'while e s' becomes DoWhile (Stmts [], e, s) and 'do s while e' becomes 175 | DoWhile (s, e, Stmts []) *) 176 | | DoWhile of stmt * exp * stmt 177 | | Ite of exp * stmt * stmt 178 | | Stmts of stmt list 179 | | In of id 180 | | Out of id 181 | | Return of id option 182 | | Loc of stmt * int (* annotate a statement with it's source line number *) 183 | 184 | (* Pretty-print a statement *) 185 | let rec pp_stmt (fmt : F.formatter) (stmt : stmt) : unit = 186 | match stmt with 187 | | Assign (id, [], e) -> 188 | F.fprintf fmt "@[<2>%a :=@ %a@]" 189 | pp_id id 190 | pp_exp e 191 | | Assign (id, es, e) -> 192 | F.fprintf fmt "@[<2>%a%a :=@ %a@]" 193 | pp_id id 194 | (pp_array_list pp_exp) es 195 | pp_exp e 196 | | DoWhile (Stmts [], e, s) -> 197 | F.fprintf fmt "@[<2>while@ %a@ %a@]" 198 | pp_exp e 199 | pp_stmt s 200 | | DoWhile (s, e, Stmts []) -> 201 | F.fprintf fmt "@[<2>do@ %a@ while@ %a@]" 202 | pp_stmt s 203 | pp_exp e 204 | | DoWhile (s1, e, s2) -> 205 | F.fprintf fmt "@[<2>do@ %a@ while@ %a@ %a@]" 206 | pp_stmt s1 207 | pp_exp e 208 | pp_stmt s2 209 | | Ite (e, s1, s2) -> 210 | F.fprintf fmt "@[<2>if@ %a@ then@ %a@ else@ %a@]" 211 | pp_exp e 212 | pp_stmt s1 213 | pp_stmt s2 214 | | Stmts slist -> 215 | F.fprintf fmt "{@\n%a}" 216 | pp_stmts slist 217 | | In i -> 218 | F.fprintf fmt "@[<2>input@ %a@]" 219 | pp_id i 220 | | Out i -> 221 | F.fprintf fmt "@[<2>output@ %a@]" 222 | pp_id i 223 | | Return None -> 224 | F.fprintf fmt "return" 225 | | Return (Some i) -> 226 | F.fprintf fmt "@[<2>return@ %a@]" 227 | pp_id i 228 | | Loc (s, _) -> 229 | pp_stmt fmt s 230 | 231 | and pp_stmts (fmt : F.formatter) (stmts : stmt list) : unit = 232 | let rec pp fmt stmts = 233 | match stmts with 234 | | [] -> () 235 | | stmt::stmts -> 236 | F.fprintf fmt "%a@\n%a" 237 | pp_stmt stmt 238 | pp stmts 239 | in 240 | F.fprintf fmt "@[%a@]" 241 | pp stmts 242 | 243 | (* AST of types *) 244 | type typ = 245 | | Int 246 | | Bool 247 | (* An int array with the given number of dimensions *) 248 | | Array of int 249 | 250 | (* Pretty-print a type *) 251 | let pp_typ (fmt : F.formatter) (t : typ) : unit = 252 | match t with 253 | | Int -> F.fprintf fmt "int" 254 | | Bool -> F.fprintf fmt "bool" 255 | | Array n -> F.fprintf fmt "array@ %d" n 256 | 257 | (* AST of variable and function declarations *) 258 | type var_dec = { var_name : id; typ : typ; init : exp; loc : int option } 259 | 260 | type func = { fun_name : id; params : (id * typ) list; ret : typ; 261 | locals : var_dec list; body : stmt list; loc : int option} 262 | 263 | (* Pretty-print a variable declaration *) 264 | let pp_var_dec (fmt : F.formatter) (d : var_dec) : unit = 265 | F.fprintf fmt "@[<2>let@ %a@ :@ %a@ =@ %a@]" 266 | pp_id d.var_name 267 | pp_typ d.typ 268 | pp_exp d.init 269 | 270 | (* Pretty-print variable declarations, 1 per line *) 271 | let pp_var_decs (fmt : F.formatter) (decs : var_dec list) : unit = 272 | let rec pp fmt decs = 273 | match decs with 274 | | [] -> () 275 | | var_dec::decs -> 276 | F.fprintf fmt "%a@\n%a" 277 | pp_var_dec var_dec 278 | pp decs 279 | in 280 | F.fprintf fmt "@[%a@]" 281 | pp decs 282 | 283 | (* Pretty-print function parameters *) 284 | let rec pp_params (fmt : F.formatter) (params : (id * typ) list) : unit = 285 | match params with 286 | | [] -> () 287 | | (n,t)::params -> 288 | F.fprintf fmt "@[(%a@ :@ %a)@]@ %a" 289 | pp_id n 290 | pp_typ t 291 | pp_params params 292 | 293 | (* Pretty-print a function *) 294 | let pp_func (fmt : F.formatter) (func : func) : unit = 295 | F.fprintf fmt "@[<2>function@ %a@ %a@ :@ %a@ {@\n%a%a}@]@\n" 296 | pp_id func.fun_name 297 | pp_params func.params 298 | pp_typ func.ret 299 | pp_var_decs func.locals 300 | pp_stmts func.body 301 | 302 | (* AST of complete programs *) 303 | type prog = { globals : var_dec list; funcs : func list } 304 | 305 | (* Pretty-print a program *) 306 | let pp_program (fmt : F.formatter) (p : prog) : unit = 307 | pp_var_decs fmt p.globals; 308 | List.iter (pp_func fmt) p.funcs 309 | 310 | (* Raise a parse error *) 311 | let parse_error (ln : int) (msg : string) : 'a = 312 | raise (BadInput ("Parse error on line " ^ string_of_int ln ^ ": " ^ msg)) 313 | 314 | (* Raise a parse error explaining the expected token and the token actually there *) 315 | let parse_error_expect (ln : int) (given : T.token) (expect : T.token) 316 | (where : string) 317 | : 'a = 318 | raise (BadInput ("Parse error on line " ^ string_of_int ln ^ ": expected " ^ 319 | T.show_token expect ^ " in " ^ where ^ " but found " ^ 320 | T.show_token given)) 321 | 322 | 323 | (* Raise a parse error because the end of file was reached unexpectedly *) 324 | let eof_error (expect : string) : 'a = 325 | raise (BadInput ("Parse error: end of file while parsing " ^ expect)) 326 | 327 | (* Convert the first expression in toks into an AST. Return it with the left 328 | over tokens. *) 329 | let rec parse_atomic_exp (toks : T.tok_loc list) : exp * T.tok_loc list = 330 | match toks with 331 | | [] -> eof_error "an expression" 332 | | (T.Ident i, ln) :: (T.Lparen, _) ::toks -> 333 | let (args, toks) = parse_args ln toks in 334 | (Call (Source (i,None), args), toks) 335 | | (T.Ident i, _) :: toks -> 336 | let (indices, toks) = parse_indices toks in 337 | (Ident (Source (i,None), indices), toks) 338 | | (T.Num n, _) :: toks -> (Num n, toks) 339 | | (T.True, _) :: toks -> (Bool true, toks) 340 | | (T.False, _) :: toks -> (Bool false, toks) 341 | | (T.Op T.Minus, _) :: toks -> 342 | let (e, toks) = parse_atomic_exp toks in 343 | (Op (Num 0L, T.Minus, e), toks) 344 | | (T.Uop uop, _) :: toks -> 345 | let (e, toks) = parse_atomic_exp toks in 346 | (Uop (uop, e), toks) 347 | | (T.Array, _) :: toks -> 348 | let (indices, toks) = parse_indices toks in 349 | (Array indices, toks) 350 | | (T.Lparen, _) :: toks -> 351 | (match parse_exp toks with 352 | | (_, []) -> eof_error "a parenthesized expression" 353 | | (e, (T.Rparen, _) :: toks) -> 354 | (e, toks) 355 | | (_, (t, ln)::_) -> 356 | parse_error_expect ln t T.Rparen "a parenthesized expression") 357 | | (t, ln) :: _ -> 358 | parse_error ln ("bad expression, beginning with " ^ T.show_token t) 359 | 360 | and parse_exp (toks : T.tok_loc list) : exp * T.tok_loc list = 361 | match parse_atomic_exp toks with 362 | | (e1, (T.Op o, _) :: toks) -> 363 | let (e2, toks) = parse_atomic_exp toks in 364 | (Op (e1, o, e2), toks) 365 | | (e1, toks) -> (e1, toks) 366 | 367 | (* Parse 0 or more array indices. Return them with the left over tokens. *) 368 | and parse_indices (toks : T.tok_loc list) : exp list * T.tok_loc list = 369 | match toks with 370 | | (T.Lbrac, _) :: toks -> 371 | (match parse_exp toks with 372 | | (_, []) -> eof_error "an array index" 373 | | (e, (T.Rbrac, _) :: toks) -> 374 | let (es,toks) = parse_indices toks in 375 | (e::es, toks) 376 | | (_, (t, ln) :: _) -> 377 | parse_error_expect ln t T.Rbrac "an array index") 378 | | _ -> ([], toks) 379 | 380 | (* Parse 1 or more comma-separated expressions. Return them with the left over 381 | tokens. End on a closing parenthesis *) 382 | and parse_args ln (toks : T.tok_loc list) : exp list * T.tok_loc list = 383 | match parse_exp toks with 384 | | (_, []) -> eof_error "a function call expression" 385 | | (e, (T.Comma, _)::toks) -> 386 | let (es, toks) = parse_args ln toks in 387 | (e::es, toks) 388 | | (e, (T.Rparen, _)::toks) -> 389 | ([e], toks) 390 | | (_, (t, ln) :: _) -> 391 | parse_error ln ("expected " ^ T.show_token T.Comma ^ " or " ^ 392 | T.show_token T.Rparen ^ 393 | " in a function call expression but found " ^ 394 | T.show_token t) 395 | 396 | (* Convert the first statement in toks into an AST. Return it with the left 397 | over tokens *) 398 | let rec parse_stmt (toks : T.tok_loc list) : stmt * T.tok_loc list = 399 | match toks with 400 | | [] -> eof_error "a statement" 401 | | (T.Ident x, ln) :: toks -> 402 | (match parse_indices toks with 403 | | (_, []) -> eof_error "an assignment statement" 404 | | (indices, (T.Assign, _) :: toks) -> 405 | let (e, toks) = parse_exp toks in 406 | (Loc (Assign (Source (x, None), indices, e), ln), toks) 407 | | (_, (t, ln) :: _) -> 408 | parse_error_expect ln t T.Assign "an assignment statement") 409 | | (T.While, ln) :: toks -> 410 | let (e, toks) = parse_exp toks in 411 | let (s, toks) = parse_stmt toks in 412 | (Loc (DoWhile (Stmts [], e, s), ln), toks) 413 | | (T.Do, ln) :: toks -> 414 | (match parse_stmt toks with 415 | | (_, []) -> eof_error "a do statement" 416 | | (s, (T.While, _)::toks) -> 417 | let (e, toks) = parse_exp toks in 418 | (Loc (DoWhile (s, e, Stmts []), ln), toks) 419 | | (_, (t, ln) :: _) -> 420 | parse_error_expect ln t T.While "a do statement") 421 | | (T.If, ln) :: toks -> 422 | (match parse_exp toks with 423 | | (_, []) -> eof_error "an if statement" 424 | | (e, (T.Then, _) :: toks) -> 425 | (match parse_stmt toks with 426 | | (_, []) -> eof_error "an if statement" 427 | | (s1, (T.Else, _) :: toks) -> 428 | let (s2, toks) = parse_stmt toks in 429 | (Loc (Ite (e, s1, s2), ln), toks) 430 | | (_, (t, ln) :: _) -> 431 | parse_error_expect ln t T.Else "an if statement") 432 | | (_, (t, ln) :: _) -> 433 | parse_error_expect ln t T.Then "an if statement") 434 | | (T.Lcurly, ln) :: toks -> 435 | let (s_list, toks) = parse_stmt_list toks in 436 | (Loc (Stmts (s_list), ln), toks) 437 | | (T.Input, ln) :: (T.Ident x, _) :: toks -> (Loc (In (Source (x,None)), ln), toks) 438 | | (T.Output, ln) :: (T.Ident x, _) :: toks -> (Loc (Out (Source (x,None)), ln), toks) 439 | | (T.Return, ln) :: (T.Ident x, _) :: toks -> (Loc (Return (Some (Source (x,None))), ln), toks) 440 | | (t, ln) :: _ -> 441 | parse_error ln ("bad statement, beginning with a " ^ T.show_token t) 442 | 443 | (* Convert all of the statement in toks into an AST, stopping on a }. Return 444 | them with the left over tokens, not including the } *) 445 | and parse_stmt_list (toks : T.tok_loc list) : stmt list * T.tok_loc list = 446 | match toks with 447 | | ((T.Rcurly, _) :: toks) -> ([], toks) 448 | | _ -> 449 | let (s, toks) = parse_stmt toks in 450 | let (s_list, toks) = parse_stmt_list toks in 451 | (s::s_list, toks) 452 | 453 | (* Convert the first typ in toks into an AST. Return it with the left over 454 | tokens. *) 455 | let parse_typ (toks : T.tok_loc list) : typ * T.tok_loc list = 456 | match toks with 457 | | [] -> eof_error "a type" 458 | | (T.Int, _) :: toks -> (Int, toks) 459 | | (T.Bool, _) :: toks -> (Bool, toks) 460 | | (T.Array, _) :: (T.Num n, _) :: toks -> (Array (Int64.to_int n), toks) 461 | | (t,ln) :: _ -> 462 | parse_error ln ("bad type, beginning with a " ^ T.show_token t) 463 | 464 | (* Convert the first function parameter toks into an AST. Return it with the 465 | left over tokens. *) 466 | let parse_param (toks : T.tok_loc list) : (id * typ) * T.tok_loc list = 467 | match toks with 468 | | [] -> eof_error "a function parameter" 469 | | (T.Lparen, _) :: (T.Ident x, _) :: (T.Colon,_) :: toks -> 470 | (match parse_typ toks with 471 | | (_, []) -> eof_error "a function parameter" 472 | | (t, (T.Rparen, _)::toks) -> 473 | ((Source (x,None), t), toks) 474 | | (_, (t, ln) :: _) -> 475 | parse_error_expect ln t T.Rparen "a function parameter") 476 | | (t, ln) :: _ -> parse_error_expect ln t T.Lparen "a function parameter" 477 | 478 | (* Convert a list of function parameters in toks into an AST. Return it with 479 | the left over tokens. Recognise when the list is over by the next token not 480 | being Lparen *) 481 | let rec parse_param_list (toks : T.tok_loc list) : (id * typ) list * T.tok_loc list = 482 | match toks with 483 | | ((T.Lparen, _) :: _) -> 484 | let (v, toks) = parse_param toks in 485 | let (v_list, toks) = parse_param_list toks in 486 | (v::v_list, toks) 487 | | _ -> ([], toks) 488 | 489 | 490 | (* Convert the first variable declaration in toks into an AST. Return it with 491 | the left over tokens. *) 492 | let parse_var_dec (toks : T.tok_loc list) : var_dec * T.tok_loc list = 493 | match toks with 494 | | [] -> eof_error "a variable declaration" 495 | | (T.Let, ln) :: (T.Ident x, _) :: (T.Colon,_) :: toks -> 496 | (match parse_typ toks with 497 | | (_, []) -> eof_error "a variable declaration" 498 | | (t, (T.Op T.Eq, _)::toks) -> 499 | let (e, toks) = parse_exp toks in 500 | ({ var_name = Source (x,None); typ = t; init = e; loc = Some ln }, toks) 501 | | (_, (t, ln) :: _) -> 502 | parse_error_expect ln t (T.Op T.Eq) "a variable declaration") 503 | | (t, ln) :: _ -> parse_error_expect ln t T.Let "a variable declaration" 504 | 505 | (* Convert a list of variable declaration in toks into an AST. Return it with 506 | the left over tokens. Recognise when the list is over by the next token not 507 | being Let *) 508 | let rec parse_var_dec_list (toks : T.tok_loc list) : var_dec list * T.tok_loc list = 509 | match toks with 510 | | (T.Let, _) :: _ -> 511 | let (v, toks) = parse_var_dec toks in 512 | let (v_list, toks) = parse_var_dec_list toks in 513 | (v::v_list, toks) 514 | | _ -> ([], toks) 515 | 516 | (* Convert the first function declaration in toks into an AST. Return it with 517 | the left over tokens. *) 518 | let parse_func (toks : T.tok_loc list) : func * T.tok_loc list = 519 | match toks with 520 | | [] -> eof_error "a function declaration" 521 | | (T.Function, ln) :: (T.Ident x, _) :: toks -> 522 | (match parse_param_list toks with 523 | | (_, []) -> eof_error "a function declaration" 524 | | (params, (T.Colon, _) :: toks) -> 525 | if List.length params <> 0 then 526 | (match parse_typ toks with 527 | | (_, []) -> eof_error "a function declaration" 528 | | (t, (T.Lcurly, _) :: toks) -> 529 | let (var_decs, toks) = parse_var_dec_list toks in 530 | let (stmts, toks) = parse_stmt_list toks in 531 | ({ fun_name = Source (x,None); 532 | params = params; 533 | ret = t; 534 | locals = var_decs; 535 | body = stmts; 536 | loc = Some ln }, toks) 537 | | (_, (t, ln) :: _) -> 538 | parse_error_expect ln t T.Lcurly "a function declaration") 539 | else 540 | parse_error ln 541 | "bad function declaration, functions must have at least 1 parameter" 542 | | (_, (t, ln) :: _) -> 543 | parse_error_expect ln t T.Colon "a function declaration") 544 | | (t, ln) :: _ -> parse_error_expect ln t T.Function "a function declaration" 545 | 546 | (* Parse global variable declarations, and then function declarations until the 547 | end of file *) 548 | let parse_program (toks : T.tok_loc list) : prog = 549 | let (var_decs, toks) = parse_var_dec_list toks in 550 | let rec parse_funs toks = 551 | match toks with 552 | | [] -> [] 553 | | _ -> 554 | let (f, toks) = parse_func toks in 555 | f :: parse_funs toks 556 | in 557 | { globals = var_decs; funcs = parse_funs toks } 558 | 559 | let stmts_to_stmt (s : stmt list) : stmt = 560 | match s with 561 | | [s1] -> s1 562 | | _ -> Stmts s 563 | --------------------------------------------------------------------------------