├── src
    ├── .merlin
    ├── _tags
    ├── TODO
    ├── blockInterp.mli
    ├── regAlloc.mli
    ├── typeCheck.mli
    ├── frontEnd.mli
    ├── constProp.mli
    ├── compileFunction.mli
    ├── shrinkImmediates.mli
    ├── instrSelX86.mli
    ├── liveVarAnalysis.mli
    ├── lineariseCfg.mli
    ├── unnestExp.mli
    ├── interp.ml
    ├── astInterp.mli
    ├── tokens.mli
    ├── frontEnd.ml
    ├── Makefile
    ├── README.md
    ├── sourceAst.mli
    ├── blockStructure.mli
    ├── .depend
    ├── compile.ml
    ├── compileFunction.ml
    ├── lineariseCfg.ml
    ├── util.ml
    ├── shrinkImmediates.ml
    ├── blockInterp.ml
    ├── regAlloc.ml
    ├── tokens.ml
    ├── unnestExp.ml
    ├── x86.ml
    ├── liveVarAnalysis.ml
    ├── astInterp.ml
    ├── constProp.ml
    ├── typeCheck.ml
    ├── instrSelX86.ml
    ├── blockStructure.ml
    └── sourceAst.ml
├── Makefile
├── runtime
    ├── Makefile
    ├── io.c
    └── array.c
├── .gitignore
├── tests
    ├── div.expl
    ├── fact.expl
    ├── runtime_errors.expl
    ├── insert_sort.expl
    ├── insert_sort.c
    ├── scoping.expl
    ├── Makefile
    ├── quicksort.expl
    └── array.expl
└── README.md


/src/.merlin:
--------------------------------------------------------------------------------
1 | B _build
2 | 


--------------------------------------------------------------------------------
/src/_tags:
--------------------------------------------------------------------------------
1 | true : package(str)
2 | true : bin_annot, debug, principal, safe_string, strict_formats, strict_sequence, warn(A-42-4)
3 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all:
 2 | 	make -C src
 3 | 	make -C runtime
 4 | 	make -C tests
 5 | 
 6 | clean:
 7 | 	make -C src clean
 8 | 	make -C runtime clean
 9 | 	make -C tests clean
10 | 


--------------------------------------------------------------------------------
/src/TODO:
--------------------------------------------------------------------------------
1 | Const prop for x * 2^n -> x shift n
2 | Do some simple register allocation
3 | Add OCaml warning 4, and manually annotate all of the matches where it should apply.
4 | 


--------------------------------------------------------------------------------
/runtime/Makefile:
--------------------------------------------------------------------------------
 1 | all : array.o io.o
 2 | 
 3 | array.o : array.c
 4 | 	gcc -c array.c -o array.o -std=c99
 5 | 
 6 | io.o : io.c
 7 | 	gcc -c io.c -o io.o -std=c99
 8 | 
 9 | clean :
10 | 	rm -f io.o array.o
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # OCaml executables
 2 | src/*.byte
 3 | src/*.native
 4 | src/_build
 5 | 
 6 | # Test executables and intermediate things
 7 | tests/*.s
 8 | tests/*.o
 9 | tests/*.exe
10 | 
11 | runtime/*.o
12 | 
13 | # Vim temporary files
14 | .*.swp
15 | 


--------------------------------------------------------------------------------
/tests/div.expl:
--------------------------------------------------------------------------------
 1 | //test division
 2 | 
 3 | let x : int = test(0, 0, 100)
 4 | 
 5 | function test(x : int) (y : int) (z : int) : int
 6 | {
 7 |   let x : int = 0
 8 |   let y : int = 0
 9 |   input x
10 |   y := 10 / x
11 |   output y
12 |   output z // output 100 (to make sure z passed in RDX is not clobbered *)
13 |   return y
14 | }
15 | 


--------------------------------------------------------------------------------
/tests/fact.expl:
--------------------------------------------------------------------------------
 1 | // Input a number and print its factorial
 2 | 
 3 | let v : int = exec(0)
 4 | 
 5 | function iter_fact (x : int) : int {
 6 |   let c : int = 1
 7 |   let z : int = 1
 8 |   while 0 < x {
 9 |     z := z * x
10 |     x := x - c
11 |   }
12 |   return z
13 | }
14 | 
15 | function rec_fact (x : int) : int {
16 |   let r : int = 1
17 |   if x = 0 then
18 |     return r
19 |   else {
20 |     r := x * rec_fact(x - 1)
21 |     return r
22 |   }
23 | }
24 | 
25 | function exec (x : int) : int {
26 |   let y : int = 0
27 |   let r : int = 0
28 |   input x
29 |   y := iter_fact(x)
30 |   output y
31 |   y := rec_fact(x)
32 |   output y
33 |   return r
34 | }
35 | 


--------------------------------------------------------------------------------
/tests/runtime_errors.expl:
--------------------------------------------------------------------------------
 1 | // Test for exceptions. Uncommenting any of the below should have an exception
 2 | 
 3 | //let r1 : int = array_bound1(0)
 4 | //let r2 : int = array_bound2(0)
 5 | //let r3 : int = 110/0
 6 | //let r4 : int = null(0)
 7 | //let r5 : int = a[2]
 8 | //let r6 : int = null2(a)
 9 | let a : array 1 = array[10]
10 | 
11 | function array_bound1 (x : int) : int {
12 |   let y : array 1 = array [10]
13 |   x := y[10]
14 |   return x
15 | }
16 | 
17 | function array_bound2 (x : int) : int {
18 |   let y : array 1 = array [10]
19 |   y[10] := x
20 |   return x
21 | }
22 | 
23 | function null (x : int) : int {
24 |   let r5 : int = a[0]
25 |   let a : array 1 = array[10]
26 |   return x
27 | }
28 | 
29 | function null2 (x : array 1) : int {
30 |   let i : int = x[0]
31 |   return i
32 | }
33 | 


--------------------------------------------------------------------------------
/tests/insert_sort.expl:
--------------------------------------------------------------------------------
 1 | // insertion sort
 2 | 
 3 | let r : int = test(1)
 4 | 
 5 | function test (i : int) : int
 6 | {
 7 |   let len : int = 0
 8 |   let a : array 1 = array[0]
 9 |   let i : int = 0
10 |   let y : int = 0
11 |   input len
12 |   a := array[len]
13 |   while i < len {
14 |     input y
15 |     a[i] := y
16 |     i := i + 1
17 |   }
18 | 
19 |   i := sort(a, len)
20 | 
21 |   i := 0
22 |   while i < len {
23 |     y := a[i]
24 |     output y
25 |     i := i + 1
26 |   }
27 |   return i
28 | }
29 | 
30 | function sort (a : array 1) (len : int) : int {
31 |   let i : int = 1
32 |   let y : int = 0
33 |   let j : int = 0
34 |   while i < len {
35 |     y := a[i]
36 |     j := i - 1
37 |     while (!(j < 0) && (a[j] > y)) {
38 |       a[j+1] := a[j]
39 |       j := j - 1
40 |     }
41 |     a[j+1] := y
42 |     i := i + 1
43 |   }
44 |   return i
45 | }
46 | 


--------------------------------------------------------------------------------
/src/blockInterp.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2016 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* An interpreter for CFGs *)
20 | 
21 | val interp_prog : int -> BlockStructure.cfg -> unit
22 | 


--------------------------------------------------------------------------------
/src/regAlloc.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2016 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | open BlockStructure
20 | 
21 | val reg_alloc : (SourceAst.id * int) list -> int -> cfg -> (cfg * int)
22 | 


--------------------------------------------------------------------------------
/src/typeCheck.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2017 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* The type checker *)
20 | 
21 | open SourceAst
22 | 
23 | type t
24 | val type_prog : prog -> prog
25 | 


--------------------------------------------------------------------------------
/src/frontEnd.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2016 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* The front end packaging lexing, parsing and type checking *)
20 | 
21 | val front_end : string -> bool -> SourceAst.prog
22 | 


--------------------------------------------------------------------------------
/src/constProp.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2016 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* Do constant propagation and folding *)
20 | 
21 | open SourceAst
22 | val prop_stmts : exp Idmap.t -> stmt list -> exp Idmap.t * stmt list
23 | 


--------------------------------------------------------------------------------
/src/compileFunction.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2017 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* Compiler for a single function *)
20 | 
21 | val compile_fun : bool -> string -> BlockStructure.Varset.t -> SourceAst.func -> SourceAst.id * X86.instruction list
22 | 
23 | 


--------------------------------------------------------------------------------
/tests/insert_sort.c:
--------------------------------------------------------------------------------
 1 | #include <inttypes.h>
 2 | #include <stdlib.h>
 3 | 
 4 | extern int64_t input();
 5 | extern void output(int64_t i);
 6 | 
 7 | int sort(int64_t a[], int64_t len) {
 8 |   int64_t i = 1;
 9 |   int64_t y = 1;
10 |   int64_t j = 1;
11 |   while (i < len) {
12 |     int64_t y = a[i];
13 |     int64_t j = i - 1;
14 |     while (!(j < 0) && (a[j] > y)) {
15 |       a[j+1] = a[j];
16 |       j = j - 1;
17 |     }
18 |     a[j+1] = y;
19 |     i = i + 1;
20 |   }
21 |   return i;
22 | }
23 | 
24 | int64_t test(int64_t i2) {
25 |   int64_t len = 0;
26 |   int64_t *a;
27 |   int64_t i = 0;
28 |   int64_t y = 0;
29 |   len = input();
30 |   a = malloc(len * sizeof(int64_t));
31 |   while (i < len) {
32 |     int64_t y = input();
33 |     a[i] = y;
34 |     i = i + 1;
35 |   }
36 | 
37 |   i = sort(a,len);
38 | 
39 |   i = 0;
40 |   while (i < len) {
41 |     int64_t y = a[i];
42 |     output(y);
43 |     i = i + 1;
44 |   }
45 |   return i;
46 | }
47 | 
48 | int64_t r;
49 | 
50 | int main(int argc, char** argv) {
51 |   r = test(1);
52 |   return 0;
53 | }
54 | 


--------------------------------------------------------------------------------
/src/shrinkImmediates.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2016 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* Ensure that all immediate arguments fit into 32 bits.  We assume that
20 |    constant propagation has ensured that no operation has two immediate
21 |    arguments, and we maintain that property here. *)
22 | 
23 | val shrink_imm : BlockStructure.cfg -> BlockStructure.cfg
24 | 


--------------------------------------------------------------------------------
/src/instrSelX86.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2016 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* Convert linearised three-address code to x86-64 *)
20 | 
21 | val num_regs : int
22 | val argument_reg_numbers : int list
23 | val be_to_x86 : bool -> BlockStructure.block_elem -> X86.instruction list
24 | val to_x86 : bool -> LineariseCfg.linear list -> int -> X86.instruction list
25 | 


--------------------------------------------------------------------------------
/tests/scoping.expl:
--------------------------------------------------------------------------------
 1 | // Test/demonstrate variable scoping
 2 | 
 3 | // should print 1, 11, 12, 8, 5, 9, 10, then 100 plus those, then
 4 | // 101, 11, 12, 16, 13, 9, 10, then 100 plus those, then finally
 5 | // 201, 2, 3, 4
 6 | 
 7 | let g : int = 1
 8 | let x : int = 2
 9 | let y : int = 3
10 | let z : int = 4
11 | 
12 | let r : int = test1(5, 6, 7, 8)
13 | let r2 : int = test1(13, 14, 15, 16)
14 | let r3 : int = test2(1)
15 | 
16 | function test1 (a : int) (b : int) (y : int) (z : int) : int {
17 |   let b : int = 9
18 |   let c : int = 10
19 |   let x : int = 11
20 |   let y : int = 12
21 | 
22 |   output g
23 |   output x
24 |   output y
25 |   output z
26 |   output a
27 |   output b
28 |   output c
29 | 
30 |   g := g + 100
31 |   x := x + 100
32 |   y := y + 100
33 |   z := z + 100
34 |   a := a + 100
35 |   b := b + 100
36 |   c := c + 100
37 | 
38 |   output g
39 |   output x
40 |   output y
41 |   output z
42 |   output a
43 |   output b
44 |   output c
45 | 
46 |   return x
47 | }
48 | 
49 | function test2 (aa : int) : int {
50 |   output g
51 |   output x
52 |   output y
53 |   output z
54 |   return aa
55 | }
56 | 


--------------------------------------------------------------------------------
/src/liveVarAnalysis.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2016 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | open BlockStructure
20 | 
21 | type cfg_annot = { gen : Varset.t; kill : Varset.t; live_exit : Varset.t }
22 | 
23 | type cfg = (cfg_entry * cfg_annot) list
24 | 
25 | val pp_cfg : Format.formatter -> cfg -> unit
26 | 
27 | val lva : Varset.t -> BlockStructure.cfg -> cfg
28 | 
29 | val remove_unused_writes : cfg -> cfg
30 | 


--------------------------------------------------------------------------------
/src/lineariseCfg.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2017 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* Flatten the CFG into a list of three-address code. *)
20 | 
21 | open BlockStructure
22 | type linear =
23 |   | Instr of block_elem
24 |   | CJump of test * bool * string (* jump to string if var is bool *)
25 |   | Jump of string
26 |   | Label of string
27 |   | Return of var option
28 | 
29 | type linear_list = linear list
30 | 
31 | val cfg_to_linear : cfg -> linear list
32 | 
33 | val pp_linear_list : Format.formatter -> linear list -> unit
34 | 
35 | val init_traversal : cfg -> cfg_entry Util.Intmap.t
36 | 


--------------------------------------------------------------------------------
/tests/Makefile:
--------------------------------------------------------------------------------
 1 | # Try to detect the operating system to set flags for the assembler.
 2 | # OSX uses macho64 binary format, and needs an _ prefixed before all global
 3 | # symbols. Linux uses elf64 binary format, and the symbols are all fine as they
 4 | # are.
 5 | OS := $(shell uname)
 6 | ifeq ($(OS), Darwin)
 7 | FORMAT = macho64
 8 | PREFIX = --prefix _
 9 | else
10 | FORMAT = elf64
11 | PREFIX =
12 | endif
13 | 
14 | COMPILER = ../src/compile.byte
15 | EXEC_FILES = fact.exe array.exe insert_sort.exe quicksort.exe div.exe scoping.exe runtime_errors.exe
16 | 
17 | .PHONY : all clean
18 | 
19 | all : $(EXEC_FILES)
20 | 
21 | %.s %.dot : %.expl $(COMPILER)
22 | 	$(COMPILER) $(OPTIONS) $<
23 | 
24 | %.o : %.s
25 | 	nasm -f $(FORMAT) $(PREFIX) $<
26 | 
27 | %.o : %.c
28 | 
29 | # Use gcc to do the linking since we want to include libc, and our
30 | # object code starts from  a C-style main function. We do not want to
31 | # enter at _start (which is where the OS dumps you on process
32 | # creation), because there's a bunch of book keeping and alignment
33 | # stuff to sort out. gcc/libc include code to do that for us.
34 | %.exe : %.o ../runtime/io.o ../runtime/array.o
35 | 	gcc ../runtime/io.o ../runtime/array.o $< -o $@
36 | 
37 | %.pdf : %.dot
38 | 	dot -Tpdf $< > $@
39 | 
40 | .PRECIOUS : %.s
41 | 
42 | clean :
43 | 	rm -f *.o *.s $(EXEC_FILES) *.pdf *.dot
44 | 


--------------------------------------------------------------------------------
/src/unnestExp.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2016 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* Flatten expressions so that they follow this grammar. We don't introduce a
20 |    new type, but do define predicates on the SourceAst.exp type. 
21 | 
22 |    type ae =
23 |    | Num of int64
24 |    | Bool of bool
25 |    | Ident of SourceAst.id
26 | 
27 |    type flat_exp =
28 |    | Num of int64
29 |    | Bool of bool
30 |    | Ident of SourceAst.id * ae list
31 |    | Op of ae * op * ae
32 |    | Uop of ae
33 |    | Array of ae list
34 |  *)
35 | 
36 | open SourceAst
37 | val unnest : stmt list -> stmt list
38 | val is_atomic : exp -> bool
39 | val is_flat : exp -> bool
40 | 


--------------------------------------------------------------------------------
/src/interp.ml:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2016 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* Driver for interpreter executable. *)
20 | 
21 | let filename = ref None;;
22 | 
23 | let usage_msg =
24 |   "example interpreter \nexample usage:    " ^ Sys.argv.(0)^ " test.expl\n";;
25 | 
26 | let _ =
27 |   Arg.parse []
28 |     (fun s ->
29 |        match !filename with
30 |        | None ->
31 |          filename := Some s
32 |        | Some s' ->
33 |          (Format.printf "Error: given multiple files to run: %s and %s\n" s' s;
34 |           exit 1))
35 |     usage_msg;;
36 | 
37 | let _ =
38 |   match !filename with
39 |   | None ->
40 |     (print_string usage_msg;
41 |      exit 1)
42 |   | Some filename ->
43 |     AstInterp.interp_prog (FrontEnd.front_end filename false)
44 | 


--------------------------------------------------------------------------------
/src/astInterp.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2017 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* An interpreter for ASTs *)
20 | 
21 | (* For when the interpreter crashed, such as array bounds violations *)
22 | exception Crash of string
23 | 
24 | (* For errors that a well-typed program can't have *)
25 | exception TypeError
26 | 
27 | (* Values are either integers or n-dimensional arrays of integers.
28 |    We keep multi-dimensional arrays in a single dimensional one and include a
29 |    list of how big each dimension is.
30 |    We represent bools as numbers: true = 1L and false = 0L *)
31 | type val_t =
32 |   | Vint of int64
33 |   | Varray of int list * int64 array
34 | 
35 | val do_op : Tokens.op -> int64 -> int64 -> int64
36 | 
37 | val interp_prog : SourceAst.prog -> unit
38 | 


--------------------------------------------------------------------------------
/runtime/io.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2017 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | */
18 | 
19 | /* Trivial I/O runtime library */
20 | 
21 | #include <stdio.h>
22 | #include <stdlib.h>
23 | #include <errno.h>
24 | #include <inttypes.h>
25 | 
26 | // Read in a 64 bit signed integer and return it. First print a > as prompt.
27 | int64_t input() {
28 |   int64_t i;
29 |   printf("> ");
30 |   fflush(stdout);
31 |   scanf("%" PRId64, &i);
32 |   if (errno == ERANGE) {
33 |     printf("Input number does not fit in a 64-bit signed integer\n");
34 |     exit(1);
35 |   }
36 |   return i;
37 | }
38 | 
39 | // Print a given 64-bt
40 | void output(int64_t i) {
41 |   printf("%" PRId64 "\n", i);
42 |   return;
43 | }
44 | 
45 | // signal an error
46 | void signal_error(int64_t errn) {
47 |   switch (errn) {
48 |     case 0:
49 |       printf("array bounds error\n");
50 |       break;
51 |     case 1:
52 |       printf("null pointer dereference\n");
53 |       break;
54 |     default:
55 |       printf("unknown runtime error\n");
56 |       break;
57 |   }
58 |   exit(1);
59 | }
60 | 


--------------------------------------------------------------------------------
/src/tokens.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2017 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* The language's tokens, and a simple lexer *)
20 | 
21 | type op =
22 |   | Plus
23 |   | Minus
24 |   | Times
25 |   | Div
26 |   | Lshift
27 |   | BitOr
28 |   | BitAnd
29 |   | Lt
30 |   | Gt
31 |   | Eq
32 |   | And
33 |   | Or
34 | 
35 | type uop =
36 |   | Not
37 | 
38 | type token =
39 |   | Num of int64
40 |   | Ident of string
41 |   | Op of op
42 |   | Uop of uop
43 |   | Lparen
44 |   | Rparen
45 |   | Lcurly
46 |   | Rcurly
47 |   | Lbrac
48 |   | Rbrac
49 |   | Colon
50 |   | Comma
51 |   | While
52 |   | Do
53 |   | If
54 |   | Then
55 |   | Else
56 |   | Assign
57 |   | True
58 |   | False
59 |   | Input
60 |   | Output
61 |   | Array
62 |   | Int
63 |   | Bool
64 |   | Let
65 |   | Function
66 |   | Return
67 | 
68 | type tok_loc = (token * int)
69 | 
70 | val show_uop : uop -> string
71 | val show_op : op -> string
72 | val show_token : token -> string
73 | val pp_tok_loc : Format.formatter -> tok_loc -> unit
74 | val lex : string -> int -> int -> tok_loc list
75 | 


--------------------------------------------------------------------------------
/tests/quicksort.expl:
--------------------------------------------------------------------------------
 1 | // Quicksort
 2 | 
 3 | let r : int = test(1)
 4 | 
 5 | function test (i : int) : int
 6 | {
 7 |   let len : int = 0
 8 |   let a : array 1 = array[0]
 9 |   let i : int = 0
10 |   let y : int = 0
11 |   input len
12 |   a := array[len]
13 |   while i < len {
14 |     input y
15 |     a[i] := y
16 |     i := i + 1
17 |   }
18 | 
19 |   i := sort(a, 0, len-1)
20 | 
21 |   i := 0
22 |   while i < len {
23 |     y := a[i]
24 |     output y
25 |     i := i + 1
26 |   }
27 |   return i
28 | }
29 | 
30 | // Partition an array between lower and upper (inclusive), so that everything
31 | // before (and including) the returned index is <= than the pivot and
32 | // everything after is >= the pivot. The return value is strictly less than
33 | // upper. Assume that lower < upper. This is Tony Hoare's partitioning
34 | // algorithm.
35 | function partition (a : array 1) (pivot : int) (lower : int) (upper : int) : int {
36 |   let temp : int = 0
37 |   lower := lower - 1
38 |   upper := upper + 1
39 |   do {
40 |     do
41 |       lower := lower + 1
42 |     while (a[lower] < pivot)
43 |     do
44 |       upper := upper - 1
45 |     while (a[upper] > pivot)
46 |     if (lower < upper) then {
47 |       temp := a[lower]
48 |       a[lower] := a[upper]
49 |       a[upper] := temp
50 |     }
51 |     else
52 |       return upper
53 |   } while true
54 |   return upper
55 | }
56 | 
57 | // Sort the array between first and last, inclusive. Return a dummy value.
58 | function sort (a : array 1) (first : int) (last : int) : int {
59 |   let mid : int = 0
60 |   let temp : int = 0
61 | 
62 |   if (first = last) then
63 |     return temp
64 |   else {
65 |     mid := partition(a, a[first], first, last)
66 |     temp := sort(a,first,mid)
67 |     temp := sort(a,mid+1,last)
68 |     return temp
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/frontEnd.ml:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2017 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* The front end. It packages lexing, parsing and type checking for use by the
20 |    interpreter and type checker both. *)
21 | 
22 | open Util
23 | 
24 | (* Parse and type check the given filename. *)
25 | let front_end (filename : string) (debug : bool) : SourceAst.prog
26 |   =
27 |   if Filename.check_suffix filename ".expl" then
28 |     try
29 |       let input = Std.input_file filename in
30 |       let toks = Tokens.lex input 0 1 in
31 |       if debug then
32 |         Format.printf "%a@\n@\n" (pp_list Tokens.pp_tok_loc) toks;
33 |       let ast = SourceAst.parse_program toks in
34 |       if debug then
35 |         Format.printf "%a@\n@\n" SourceAst.pp_program ast;
36 |       let ast2 = TypeCheck.type_prog ast in
37 |       if debug then
38 |         Format.printf "%a@\n@\n" SourceAst.pp_program ast2;
39 |       ast2
40 |     with
41 |     | BadInput s ->
42 |       Format.printf "%s\n" s;
43 |       exit 1
44 |   else
45 |     (Format.printf "Expects filename ending in .expl\n";
46 |      exit 1)
47 | 


--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
 1 | PACKAGES = str
 2 | FLAGS = -bin-annot -g -principal -safe-string -strict-formats -strict-sequence -w +a-42-4
 3 | 
 4 | FILES = util tokens sourceAst typeCheck frontEnd constProp unnestExp \
 5 | blockStructure shrinkImmediates liveVarAnalysis regAlloc lineariseCfg x86 \
 6 | instrSelX86 compileFunction astInterp interp compile
 7 | 
 8 | FILES_ML = $(addsuffix .ml,$(FILES))
 9 | FILES_MLI = $(addsuffix .mli,$(FILES))
10 | FILES_CMI = $(addsuffix .cmi,$(FILES))
11 | FILES_CMT = $(addsuffix .cmt,$(FILES))
12 | FILES_CMTI = $(addsuffix .cmti,$(FILES))
13 | FILES_CMO = $(addsuffix .cmo,$(FILES))
14 | FILES_CMX = $(addsuffix .cmx,$(FILES))
15 | FILES_O = $(addsuffix .o,$(FILES))
16 | 
17 | .PHONY : ocb-compile.byte ocb-compile.native ocb-interp.byte ocb-interp.native all
18 | 
19 | all : ocb-interp.byte ocb-compile.byte
20 | 
21 | ocb-interp.byte :
22 | 	ocamlbuild -use-ocamlfind interp.byte
23 | 
24 | ocb-interp.native :
25 | 	ocamlbuild -use-ocamlfind interp.native
26 | 
27 | interp.byte : $(FILES_CMO)
28 | 	ocamlfind ocamlc -linkpkg -package $(PACKAGES) $(FILES_CMO) -o $@
29 | 
30 | interp.native : $(FILES_CMX)
31 | 	ocamlfind ocamlopt -linkpkg -package $(PACKAGES) $(FILES_CMX) -o $@
32 | 
33 | ocb-compile.byte :
34 | 	ocamlbuild -use-ocamlfind compile.byte
35 | 
36 | ocb-compile.native :
37 | 	ocamlbuild -use-ocamlfind compile.native
38 | 
39 | compile.byte : $(FILES_CMO)
40 | 	ocamlfind ocamlc -linkpkg -package $(PACKAGES) $(FILES_CMO) -o $@
41 | 
42 | compile.native : $(FILES_CMX)
43 | 	ocamlfind ocamlopt -linkpkg -package $(PACKAGES) $(FILES_CMX) -o $@
44 | 
45 | %.cmi : %.mli
46 | 	ocamlfind ocamlc -package $(PACKAGES) $(FLAGS) -o $@ -c $<
47 | 
48 | %.cmo : %.ml
49 | 	ocamlfind ocamlc -package $(PACKAGES) $(FLAGS) -o $@ -c $<
50 | 
51 | %.cmx : %.ml
52 | 	ocamlfind ocamlopt -package $(PACKAGES) $(FLAGS) -o $@ -c $<
53 | 
54 | .PHONY : depend clean
55 | 
56 | clean :
57 | 	ocamlbuild -clean
58 | 	rm -f $(FILES_CMI) $(FILES_CMTI) $(FILES_CMT) $(FILES_CMO) $(FILES_CMX) $(FILES_O) compile.byte compile.native interp.byte interp.native
59 | 
60 | depend :
61 | 	ocamlfind ocamldep $(FILES_ML) $(FILES_MLI) > .depend
62 | 
63 | include .depend
64 | 


--------------------------------------------------------------------------------
/src/README.md:
--------------------------------------------------------------------------------
 1 | Compiler structure
 2 | ------------------
 3 | 
 4 | The top-level files are _compile.ml_ and _interp.ml_, driving a compiler and
 5 | interpreter respectively.
 6 | 
 7 | Front end
 8 | ---------
 9 | 
10 | The front-end in _frontEnd.ml_ packages a lexer (_tokens.ml_), parser
11 | (_sourceAst.ml_), and type checker (_typeCheck.ml_) and produces an AST. It is
12 | shared by the compiler and interpreter.
13 | 
14 | AST transformations
15 | -------------------
16 | 
17 | The AST goes through a series of transformations
18 | 
19 | - _constProp.ml_: Propagates and folds constants.
20 |   The compiler later relies on there being no operators with two constant
21 |   operands.
22 | - _unnestExp.ml_: Un-nests expressions.
23 |   Flattens out expressions, using assignments to temporary variables. Ensures
24 |   that no expressions contain sub-expressions with operators in them. Also
25 |   makes all array accesses single-dimensional.
26 | - _blockStructure.ml_: Transforms the AST to a control-flow graph of basic
27 |   blocks. Requires expressions to be un-nested and && and || to be removed.
28 |   Inserts array bounds checks.
29 | 
30 | Basic-block transformations
31 | ---------------------------
32 | 
33 | - _shrinkImmediates.ml_: Removes all constants that don't fit into 32 bits
34 |   (since that's the maximum size of an immediate on x86-64), possibly
35 |   introducing assignments to temporary variables.
36 | - _liveVarAnalysis.ml_: Performs live variable analysis, and removes unused
37 |   assignments.
38 | - _regAlloc.ml_: Performs register allocation. This changes all identifiers from
39 |   named variables to registers and stack offsets.
40 | - _lineariseCFG.ml_: Flatten the CFG, introducing labels and branches.
41 | 
42 | Back end
43 | -------
44 | - _x86.ml_: An AST for the small subset of x86-64 that the compiler generates.
45 |   Includes a printer for it in NASM syntax.
46 | - _instrSelX86.ml_: Generate x86 code.
47 | - _compileFunction.ml_: packages up the compiler for single functions
48 | 
49 | Misc
50 | ----
51 | - _util.ml_: Misc. utility functions.
52 | - _astInterp.ml_ and _blockInterp.ml_: Interpreters for the AST and CFG.
53 | 


--------------------------------------------------------------------------------
/tests/array.expl:
--------------------------------------------------------------------------------
  1 | // Basic testing of arrays
  2 | 
  3 | let r1 : int = test1(0)
  4 | let r2 : int = test2(0)
  5 | let g : int = 2
  6 | 
  7 | let a : array 2 = array[4][4]
  8 | let f : int = fill(0)
  9 | let b : array 1 = test3(a)
 10 | let c : int = test4(0)
 11 | 
 12 | function fill(x : int) : int {
 13 |   let i : int = 0
 14 |   let j : int = 0
 15 |   while i < 4 {
 16 |     j := 0
 17 |     while j < 4 {
 18 |       a[i][j] := i+j
 19 |       j := j + 1
 20 |     }
 21 |     i := i + 1
 22 |   }
 23 |   return x
 24 | }
 25 | 
 26 | function test1 (x : int) : int
 27 | {
 28 |   let i : int = 0
 29 |   let x : array 1 = array[10]
 30 |   let j : int = 0
 31 |   output i // should print 0
 32 |   while i < 10 {
 33 |     x[i] := i
 34 |     i := i + 1
 35 |   }
 36 |   output i // should print 10
 37 |   j := x[0]
 38 |   output j // should print 0
 39 |   j := x[9]
 40 |   output j // should print 9
 41 |   return j
 42 | }
 43 | 
 44 | function test2 (x : int) : int
 45 | {
 46 |   let x : array 3 = array[3][5][7]
 47 |   let count : int = 0
 48 |   let i : int = 0
 49 |   let j : int = 0
 50 |   let k : int = 0
 51 |   let o : int = 0
 52 |   while i < 3 {
 53 |     j := 0
 54 |     while j < 5 {
 55 |       k := 0
 56 |       while k < 7 {
 57 |         x[i][j][k] := count
 58 |         count := count + 1
 59 |         k := k + 1
 60 |       }
 61 |       j := j + 1
 62 |     }
 63 |     i := i + 1
 64 |   }
 65 |   o := x[0][0][0]
 66 |   output o // should print 0
 67 |   o := x[1][3][4]
 68 |   output o // should print 60
 69 |   o := x[2][4][6]
 70 |   output o //should print 104
 71 |   return o
 72 | }
 73 | 
 74 | function test3 (a : array 2) : array 1 {
 75 |   let b : array 1 = array[4]
 76 |   let i : int = 0
 77 |   let j : int = 0
 78 |   while i < 4 {
 79 |     j := 0
 80 |     while j < 4 {
 81 |       b[i] := b[i] + a[i][j]
 82 |       j := j + 1
 83 |     }
 84 |     i := i + 1
 85 |   }
 86 |   return b
 87 | }
 88 | 
 89 | function test4 (x : int) : int {
 90 |   let i : int = 0
 91 |   while i < 4 {
 92 |     x := b[i]
 93 |     output x // 6, 10, 14, 18
 94 |     i := i + 1
 95 |   }
 96 |   x := b[g] // 14
 97 |   output x
 98 |   return x
 99 | }
100 | 


--------------------------------------------------------------------------------
/src/sourceAst.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2017 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* The language's AST, and a predictive, recursive descent parser. See the
20 |    ../README.md for the grammar. *)
21 | 
22 | type scope =
23 |   | Global
24 |   | Parameter
25 |   | Local
26 | 
27 | val compare_scope : scope -> scope -> int
28 | 
29 | type id =
30 |   | Source of string * scope option
31 |   | Temp of string * int
32 | 
33 | module Idmap : Map.S with type key = id
34 | 
35 | type exp =
36 |   | Ident of id * exp list
37 |   | Call of id * exp list
38 |   | Num of int64
39 |   | Bool of bool
40 |   | Op of exp * Tokens.op * exp
41 |   | Uop of Tokens.uop * exp
42 |   (* Allocate a new array of given dimensions. Initialise to 0 *)
43 |   | Array of exp list
44 | 
45 | type stmt =
46 |   | Assign of id * exp list * exp
47 |   (* A generalised do/while loop. Always execute the first statement, then
48 |      the test, then repeatedly do the 2nd, then first statement and then test
49 |      'while e s' becomes DoWhile (Stmts [], e, s) and 'do s while e' becomes
50 |      DoWhile (s, e, Stmts []) *)
51 |   | DoWhile of stmt * exp * stmt
52 |   | Ite of exp * stmt * stmt
53 |   | Stmts of stmt list
54 |   | In of id
55 |   | Out of id
56 |   | Return of id option
57 |   | Loc of stmt * int (* annotate a statement with it's source line number *)
58 | 
59 | type typ =
60 |   | Int
61 |   | Bool
62 |   (* An int array with the given number of dimensions *)
63 |   | Array of int
64 | 
65 | type var_dec = { var_name : id; typ : typ; init : exp; loc : int option }
66 | 
67 | type func = { fun_name : id; params : (id * typ) list; ret : typ;
68 |               locals : var_dec list; body : stmt list; loc : int option }
69 | 
70 | type prog = { globals : var_dec list; funcs : func list }
71 | 
72 | val show_id : id -> string
73 | val pp_stmt : Format.formatter -> stmt -> unit
74 | val pp_stmts : Format.formatter -> stmt list -> unit
75 | val pp_program : Format.formatter -> prog -> unit
76 | val parse_program : (Tokens.token * int) list -> prog
77 | val stmts_to_stmt : stmt list -> stmt
78 | 


--------------------------------------------------------------------------------
/src/blockStructure.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Example compiler
 3 |  * Copyright (C) 2015-2017 Scott Owens
 4 |  *
 5 |  *  This program is free software: you can redistribute it and/or modify
 6 |  *  it under the terms of the GNU General Public License as published by
 7 |  *  the Free Software Foundation, either version 3 of the License, or
 8 |  *  (at your option) any later version.
 9 |  *
10 |  *  This program is distributed in the hope that it will be useful,
11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *  GNU General Public License for more details.
14 |  *
15 |  *  You should have received a copy of the GNU General Public License
16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | *)
18 | 
19 | (* A control flow graph representation with basic blocks, and the source AST ->
20 |    CGF algorithm. Also compiles arrays to loads and stores. *)
21 | 
22 | type var =
23 |   | Vreg of int
24 |   | Stack of int
25 |   | Global of string
26 |   | NamedSource of string * SourceAst.scope
27 |   | NamedTmp of string * int
28 | 
29 | module Varset : sig
30 |   include Set.S with type elt = var
31 |   val show : t -> string
32 |   val pp : Format.formatter -> t -> unit
33 | end
34 | 
35 | module Varmap : sig
36 |   include Map.S with type key = var
37 | end
38 | 
39 | type atomic_exp =
40 |   | Ident of var
41 |   | Num of int64
42 | 
43 | type block_elem =
44 |   | AssignOp of var * atomic_exp * Tokens.op * atomic_exp
45 |   | AssignAtom of var * atomic_exp
46 |   (* Ld (x,y,e) represents x := *(y+e) *)
47 |   | Ld of var * var * atomic_exp
48 |   (* St (x,e1,e2) represents *(x+e1) := e2 *)
49 |   | St of var * atomic_exp * atomic_exp
50 |   (* Call (x, f, aes) represents x := f(aes) *)
51 |   | Call of var option * string * atomic_exp list
52 |   (* BoundCheck (a1, a2) represents assert (a1 >= 0 && a1 < a2) *)
53 |   | BoundCheck of atomic_exp * atomic_exp
54 |   | NullCheck of var
55 | 
56 | type basic_block = block_elem list
57 | 
58 | type test_op =
59 |   | Lt
60 |   | Gt
61 |   | Eq
62 | 
63 | type test = atomic_exp * test_op * atomic_exp
64 | 
65 | type next_block =
66 |   | Return of var option
67 |   | Next of int
68 |   (* The first int is the block number if the ident is true, and the second if
69 |    * it is false *)
70 |   | Branch of test * int * int
71 | 
72 | type cfg_entry = { bnum : int; elems : block_elem list; next : next_block;
73 |                    mutable started : bool; mutable finished : bool }
74 | 
75 | type cfg = cfg_entry list
76 | 
77 | val id_to_var : SourceAst.id -> var
78 | 
79 | val show_var : var -> string
80 | val pp_block_elem : Format.formatter -> block_elem -> unit
81 | val pp_test : Format.formatter -> test -> unit
82 | val pp_cfg : Format.formatter -> cfg -> unit
83 | val pp_cfg_entry : Format.formatter -> cfg_entry -> unit
84 | val build_cfg : SourceAst.stmt list -> cfg
85 | 
86 | val cfg_to_graphviz : Format.formatter -> cfg -> unit
87 | 


--------------------------------------------------------------------------------
/src/.depend:
--------------------------------------------------------------------------------
 1 | astInterp.cmo : util.cmo tokens.cmi sourceAst.cmi astInterp.cmi
 2 | astInterp.cmx : util.cmx tokens.cmx sourceAst.cmx astInterp.cmi
 3 | astInterp.cmi : tokens.cmi sourceAst.cmi
 4 | blockInterp.cmo : util.cmo lineariseCfg.cmi blockStructure.cmi astInterp.cmi \
 5 |     blockInterp.cmi
 6 | blockInterp.cmx : util.cmx lineariseCfg.cmx blockStructure.cmx astInterp.cmx \
 7 |     blockInterp.cmi
 8 | blockInterp.cmi : blockStructure.cmi
 9 | blockStructure.cmo : util.cmo tokens.cmi sourceAst.cmi blockStructure.cmi
10 | blockStructure.cmx : util.cmx tokens.cmx sourceAst.cmx blockStructure.cmi
11 | blockStructure.cmi : tokens.cmi sourceAst.cmi
12 | compile.cmo : x86.cmo util.cmo sourceAst.cmi instrSelX86.cmi frontEnd.cmi \
13 |     compileFunction.cmi blockStructure.cmi
14 | compile.cmx : x86.cmx util.cmx sourceAst.cmx instrSelX86.cmx frontEnd.cmx \
15 |     compileFunction.cmx blockStructure.cmx
16 | compileFunction.cmo : x86.cmo unnestExp.cmi sourceAst.cmi \
17 |     shrinkImmediates.cmi regAlloc.cmi liveVarAnalysis.cmi lineariseCfg.cmi \
18 |     instrSelX86.cmi constProp.cmi blockStructure.cmi compileFunction.cmi
19 | compileFunction.cmx : x86.cmx unnestExp.cmx sourceAst.cmx \
20 |     shrinkImmediates.cmx regAlloc.cmx liveVarAnalysis.cmx lineariseCfg.cmx \
21 |     instrSelX86.cmx constProp.cmx blockStructure.cmx compileFunction.cmi
22 | compileFunction.cmi : x86.cmo sourceAst.cmi
23 | constProp.cmo : util.cmo tokens.cmi sourceAst.cmi constProp.cmi
24 | constProp.cmx : util.cmx tokens.cmx sourceAst.cmx constProp.cmi
25 | constProp.cmi : sourceAst.cmi
26 | frontEnd.cmo : util.cmo typeCheck.cmi tokens.cmi sourceAst.cmi frontEnd.cmi
27 | frontEnd.cmx : util.cmx typeCheck.cmx tokens.cmx sourceAst.cmx frontEnd.cmi
28 | frontEnd.cmi : sourceAst.cmi
29 | instrSelX86.cmo : x86.cmo util.cmo tokens.cmi lineariseCfg.cmi \
30 |     blockStructure.cmi instrSelX86.cmi
31 | instrSelX86.cmx : x86.cmx util.cmx tokens.cmx lineariseCfg.cmx \
32 |     blockStructure.cmx instrSelX86.cmi
33 | instrSelX86.cmi : x86.cmo lineariseCfg.cmi blockStructure.cmi
34 | interp.cmo : frontEnd.cmi astInterp.cmi
35 | interp.cmx : frontEnd.cmx astInterp.cmx
36 | lineariseCfg.cmo : util.cmo blockStructure.cmi lineariseCfg.cmi
37 | lineariseCfg.cmx : util.cmx blockStructure.cmx lineariseCfg.cmi
38 | lineariseCfg.cmi : util.cmo blockStructure.cmi
39 | liveVarAnalysis.cmo : util.cmo tokens.cmi blockStructure.cmi \
40 |     liveVarAnalysis.cmi
41 | liveVarAnalysis.cmx : util.cmx tokens.cmx blockStructure.cmx \
42 |     liveVarAnalysis.cmi
43 | liveVarAnalysis.cmi : util.cmo blockStructure.cmi
44 | regAlloc.cmo : util.cmo blockStructure.cmi regAlloc.cmi
45 | regAlloc.cmx : util.cmx blockStructure.cmx regAlloc.cmi
46 | regAlloc.cmi : blockStructure.cmi
47 | shrinkImmediates.cmo : tokens.cmi blockStructure.cmi shrinkImmediates.cmi
48 | shrinkImmediates.cmx : tokens.cmx blockStructure.cmx shrinkImmediates.cmi
49 | shrinkImmediates.cmi : blockStructure.cmi
50 | sourceAst.cmo : util.cmo tokens.cmi sourceAst.cmi
51 | sourceAst.cmx : util.cmx tokens.cmx sourceAst.cmi
52 | sourceAst.cmi : tokens.cmi
53 | tokens.cmo : util.cmo tokens.cmi
54 | tokens.cmx : util.cmx tokens.cmi
55 | tokens.cmi :
56 | typeCheck.cmo : util.cmo tokens.cmi sourceAst.cmi typeCheck.cmi
57 | typeCheck.cmx : util.cmx tokens.cmx sourceAst.cmx typeCheck.cmi
58 | typeCheck.cmi : sourceAst.cmi
59 | unnestExp.cmo : util.cmo tokens.cmi sourceAst.cmi unnestExp.cmi
60 | unnestExp.cmx : util.cmx tokens.cmx sourceAst.cmx unnestExp.cmi
61 | unnestExp.cmi : sourceAst.cmi
62 | util.cmo :
63 | util.cmx :
64 | x86.cmo : util.cmo
65 | x86.cmx : util.cmx
66 | 


--------------------------------------------------------------------------------
/runtime/array.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2016 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | */
 18 | 
 19 | /* Allocation of multi-dimensional arrays of 64-bit ints. For an array of
 20 |  * length n, store the length of the array in element 0, and the array in 1 to
 21 |  * n inclusive. We're casting between int64_t and int64_t* which is not
 22 |  * guaranteed to work by the C standard, but seems to work on standard OS X and
 23 |  * Linux C compilers. */
 24 | 
 25 | #include <stdlib.h>
 26 | #include <stdint.h>
 27 | #include <assert.h>
 28 | 
 29 | /* Since the first element of an array will have a different type than the
 30 |  * rest, and the last dimension's array will have numbers rather than pointers. */
 31 | union elt {
 32 |   uint64_t num;
 33 |   union elt *ptr;
 34 | };
 35 | 
 36 | typedef union elt elt;
 37 | 
 38 | #define S sizeof(elt)
 39 | 
 40 | /* A 1 dimensional array */
 41 | elt* allocate1(int64_t dim1) {
 42 |   elt* x = malloc(S*(dim1+1));
 43 |   for (unsigned long i = 0; i < dim1; i++)
 44 |     x[i+1].num = 0;
 45 |   x[0].num = dim1;
 46 |   return x;
 47 | }
 48 | 
 49 | /* A 2 dimensional array is an array of arrays */
 50 | elt* allocate2(int64_t dim1, int64_t dim2) {
 51 |   elt* x = malloc(S*(dim1+1));
 52 |   for (unsigned long i = 0; i < dim1; i++) {
 53 |     elt* y = malloc(S*(dim2+1));
 54 |     x[i+1].ptr = y;
 55 |     for (int64_t j = 0; j < dim2; j++)
 56 |       y[j+1].num = 0;
 57 |     y[0].num = dim2;
 58 |   }
 59 |   x[0].num = dim1;
 60 |   return x;
 61 | }
 62 | 
 63 | /* Allocate dimensions dim to num_dim where the length of each dimension is
 64 |  * given by the list dims[] */
 65 | elt* allocate_n_help(int64_t dim, int64_t num_dim, int64_t dims[]) {
 66 |   if (dim == num_dim - 1)
 67 |     return allocate1(dims[dim]);
 68 |   else {
 69 |     elt* x = malloc(S*(dims[dim]+1));
 70 |     for (unsigned long i = 0; i < dims[dim]; i++) {
 71 |       x[i+1].ptr = allocate_n_help(dim+1, num_dim, dims);
 72 |     }
 73 |     x[0].num = dims[dim];
 74 |     return x;
 75 |   }
 76 | }
 77 | 
 78 | elt* allocate_n(int64_t num_dim, int64_t dims[]) {
 79 |   return allocate_n_help(0, num_dim, dims);
 80 | }
 81 | 
 82 | elt* allocate3(int64_t dim1, int64_t dim2, int64_t dim3) {
 83 |   int64_t dims[] = {dim1, dim2, dim3};
 84 |   return allocate_n(3, dims);
 85 | }
 86 | 
 87 | elt* allocate4(int64_t dim1, int64_t dim2, int64_t dim3, int64_t dim4) {
 88 |   int64_t dims[] = {dim1, dim2, dim3, dim4};
 89 |   return allocate_n(4, dims);
 90 | }
 91 | 
 92 | elt* allocate5(int64_t dim1, int64_t dim2, int64_t dim3, int64_t dim4, int64_t dim5) {
 93 |   int64_t dims[] = {dim1, dim2, dim3, dim4, dim5};
 94 |   return allocate_n(5, dims);
 95 | }
 96 | 
 97 | elt* allocate6(int64_t dim1, int64_t dim2, int64_t dim3, int64_t dim4, int64_t dim5, int64_t dim6) {
 98 |   int64_t dims[] = {dim1, dim2, dim3, dim4, dim5, dim6};
 99 |   return allocate_n(6, dims);
100 | }
101 | 
102 | elt* allocate7(int64_t dim1, int64_t dim2, int64_t dim3, int64_t dim4, int64_t dim5, int64_t dim6, int64_t dim7) {
103 |   int64_t dims[] = {dim1, dim2, dim3, dim4, dim5, dim6, dim7};
104 |   return allocate_n(7, dims);
105 | }
106 | 


--------------------------------------------------------------------------------
/src/compile.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* The main driver for the compiler executable. *)
 20 | 
 21 | (* Command-line arguments *)
 22 | let filename_ref = ref None;;
 23 | let safe_ref = ref true;;
 24 | 
 25 | let options = Arg.align ([("-safe", Arg.Bool (fun b -> safe_ref := b),
 26 |                            "\tdo null pointer and array bounds checks")
 27 |  ]);;
 28 | 
 29 | let usage_msg =
 30 |   "example compiler \nexample usage:    " ^ Sys.argv.(0)^ " test.expl\n"
 31 | 
 32 | let _ =
 33 |   Arg.parse options
 34 |     (fun s ->
 35 |        match !filename_ref with
 36 |        | None ->
 37 |          filename_ref := Some s
 38 |        | Some s' ->
 39 |          (Format.printf "Error: given multiple files to process: %s and %s\n"
 40 |             s' s;
 41 |           exit 1))
 42 |     usage_msg
 43 | 
 44 | let filename =
 45 |  match !filename_ref with
 46 |   | None ->
 47 |     (print_string usage_msg;
 48 |      exit 1)
 49 |   | Some filename ->
 50 |     filename
 51 | 
 52 | let prog = FrontEnd.front_end filename false;;
 53 | 
 54 | open SourceAst
 55 | 
 56 | (* Build a main function that just runs the initialisation of all of the
 57 |    globals *)
 58 | let main_function =
 59 |   { fun_name = Source ("main", None); params = []; ret = Int;
 60 |     locals = [];
 61 |     body = List.map (fun d -> Assign (d.var_name, [], d.init)) prog.globals @
 62 |            [Return None];
 63 |     loc = None }
 64 | 
 65 | let global_id_to_var (g : SourceAst.id) : BlockStructure.var =
 66 |   match g with
 67 |   | Source (i, Some Global) -> BlockStructure.NamedSource (i, Global)
 68 |   | _ -> assert false
 69 | 
 70 | let globals =
 71 |   List.fold_right
 72 |     (fun d globals ->
 73 |        BlockStructure.Varset.add (global_id_to_var d.var_name) globals)
 74 |     prog.globals
 75 |     BlockStructure.Varset.empty ;;
 76 | 
 77 | let functions =
 78 |   List.map (CompileFunction.compile_fun !safe_ref filename globals)
 79 |     (main_function::prog.funcs);;
 80 | 
 81 | open Format
 82 | 
 83 | let outfile = open_out (Filename.chop_extension filename ^ ".s");;
 84 | let fmt = formatter_of_out_channel outfile;;
 85 | (* Assembly wrapper *)
 86 | fprintf fmt "[section .text align=16]@\n";;
 87 | fprintf fmt "global main@\n@\n";;
 88 | fprintf fmt "extern signal_error@\n";;
 89 | fprintf fmt "extern input@\n";;
 90 | fprintf fmt "extern output@\n";;
 91 | fprintf fmt "extern allocate1@\n";;
 92 | fprintf fmt "extern allocate2@\n";;
 93 | fprintf fmt "extern allocate3@\n";;
 94 | fprintf fmt "extern allocate4@\n";;
 95 | fprintf fmt "extern allocate5@\n";;
 96 | fprintf fmt "extern allocate6@\n";;
 97 | fprintf fmt "extern allocate7@\n@\n";;
 98 | List.iter
 99 |   (fun (name, code) -> fprintf fmt "%s:@\n%a" (show_id name) X86.pp_instr_list code)
100 |   functions;;
101 | fprintf fmt "bound_error:@\n%a"
102 |   (fun fmt instr -> X86.pp_instr_list fmt (InstrSelX86.be_to_x86 !safe_ref instr))
103 |   (BlockStructure.Call (None, "signal_error", [BlockStructure.Num 0L]));;
104 | fprintf fmt "null_error:@\n%a"
105 |   (fun fmt instr -> X86.pp_instr_list fmt (InstrSelX86.be_to_x86 !safe_ref instr))
106 |   (BlockStructure.Call (None, "signal_error", [BlockStructure.Num 1L]));;
107 | (* bss segment for the global variables, all initialised to 0 *)
108 | fprintf fmt "[section .bss align=16]@\n";;
109 | fprintf fmt "default rel@\n";;
110 | List.iter (fun d -> fprintf fmt "%s: resq 1\n" (show_id d.var_name)) prog.globals;;
111 | close_out outfile;;
112 | 


--------------------------------------------------------------------------------
/src/compileFunction.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* Compiler for a single function *)
 20 | 
 21 | open SourceAst
 22 | 
 23 | (* We treat a local variable declarations as simple assignment to the variable.
 24 | *)
 25 | let init_var_dec_to_0 (d : var_dec) : stmt =
 26 |   Assign (d.var_name, [], Num 0L)
 27 | 
 28 | let var_dec_to_stmt (d : var_dec) : stmt =
 29 |   Assign (d.var_name, [], d.init)
 30 | 
 31 | (* globals should contain all globals variables, so that we can tell the live
 32 |    variable analysis that they must be live when the function returns.
 33 | *)
 34 | let compile_fun safe filename (globals : BlockStructure.Varset.t) (f : func)
 35 |   : id * X86.instruction list =
 36 |   let ast =
 37 |     (* Zero out local variables, in case any of the initialisations are out of
 38 |        order and refer to uninitialised variables. Later removal of dead writes
 39 |        should be able to remove these where there is no problem *)
 40 |     List.map init_var_dec_to_0 f.locals @
 41 |     List.map var_dec_to_stmt f.locals @
 42 |     f.body in
 43 | 
 44 |   let (_,opt_ast) = ConstProp.prop_stmts SourceAst.Idmap.empty ast in
 45 |    (*Format.printf "@\n%a@\n" SourceAst.pp_stmts opt_ast; *)
 46 | 
 47 |   let no_nest_ast = UnnestExp.unnest opt_ast in
 48 |   (*Format.printf "@\n%a@\n" SourceAst.pp_stmts no_nest_ast; *)
 49 | 
 50 |   let cfg = BlockStructure.build_cfg no_nest_ast in
 51 |    (*Format.printf "@\n%a@\n" BlockStructure.pp_cfg cfg; *)
 52 |   (* Print the CFG in dot format. Process the .dot file with dot -Tpdf FILENAME > FILENAME.pdf.
 53 |      dot is part of the graphviz package http://www.graphviz.org *)
 54 |   (*
 55 |   let outfile = open_out (Filename.chop_extension filename ^ "_" ^ show_id f.fun_name ^ ".dot") in
 56 |   let fmt = Format.formatter_of_out_channel outfile in
 57 |   Format.fprintf fmt "%a" BlockStructure.cfg_to_graphviz cfg;
 58 |   close_out outfile;
 59 |      *)
 60 | 
 61 |   let cfg' = ShrinkImmediates.shrink_imm cfg in
 62 |   (* Format.printf "@\n%a@\n" BlockStructure.pp_cfg cfg';*)
 63 | 
 64 |   let lva_cfg0 = LiveVarAnalysis.lva globals cfg' in
 65 |   (* Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg0; *)
 66 | 
 67 |   let lva_cfg1 = LiveVarAnalysis.remove_unused_writes lva_cfg0 in
 68 |   (*Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg1;*)
 69 | 
 70 | 
 71 |   (* Iterate analysis for examples like this:
 72 | 
 73 |    input c
 74 |    x := c
 75 |    y := 1
 76 |    if (c = 0) then
 77 |    y := x
 78 |    else
 79 |    y := x
 80 |    z := y
 81 |    a := z
 82 |    output x
 83 | 
 84 |   *)
 85 | 
 86 |   let lva_cfg2 = LiveVarAnalysis.lva globals (List.map fst lva_cfg1) in
 87 |   (* Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg2; *)
 88 | 
 89 |   let lva_cfg3 = LiveVarAnalysis.remove_unused_writes lva_cfg2 in
 90 |   (* Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg3; *)
 91 | 
 92 |   let lva_cfg4 = LiveVarAnalysis.lva globals (List.map fst lva_cfg3) in
 93 |   (* Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg4;*)
 94 | 
 95 |   let (reg_cfg, num_stack) =
 96 |     RegAlloc.reg_alloc (Util.zip (List.map fst f.params) InstrSelX86.argument_reg_numbers)
 97 |       InstrSelX86.num_regs
 98 |       (List.map fst lva_cfg4)
 99 |   in
100 |    (* Format.printf "@\n%a@\n" BlockStructure.pp_cfg reg_cfg; *)
101 | 
102 |   let linear = LineariseCfg.cfg_to_linear reg_cfg in
103 |   (* Format.printf "@\n%a@\n" LineariseCfg.pp_linear_list linear; *)
104 | 
105 |   let x86 = InstrSelX86.to_x86 safe linear num_stack in
106 | 
107 |   (f.fun_name, x86)
108 | 


--------------------------------------------------------------------------------
/src/lineariseCfg.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* Flatten the CFG into a list of three-address code. *)
 20 | (* We generate labels of the form .block%d. The prefix . makes nasm treat them
 21 |    as local to the function, so we don't have problems assembling files with
 22 |    multiple functions and therefore duplicate block labels. *)
 23 | 
 24 | open BlockStructure
 25 | 
 26 | type linear =
 27 |   | Instr of block_elem
 28 |   | CJump of test * bool * string (* jump to string if the test is bool *)
 29 |   | Jump of string
 30 |   | Label of string
 31 |   | Return of var option
 32 | 
 33 | let pp_linear (fmt : Format.formatter) (l : linear) : unit =
 34 |   match l with
 35 |   | Instr b ->
 36 |     Format.fprintf fmt "  %a@\n" pp_block_elem b
 37 |   | CJump (v, b, s) ->
 38 |     Format.fprintf fmt "  if (%a) = %s goto %s@\n"
 39 |       pp_test v
 40 |       (if b then "true" else "false")
 41 |       s
 42 |   | Jump s ->
 43 |     Format.fprintf fmt "  goto %s@\n" s
 44 |   | Label s ->
 45 |     Format.fprintf fmt "%s:@\n" s
 46 |   | Return None ->
 47 |     Format.fprintf fmt "  return@\n"
 48 |   | Return (Some v) ->
 49 |     Format.fprintf fmt "  return %s@\n" (show_var v)
 50 | 
 51 | type linear_list = linear list
 52 | 
 53 | let rec pp_linear_list (fmt : Format.formatter) (ls : linear list) =
 54 |   match ls with
 55 |   | [] -> ()
 56 |   | x::y ->
 57 |     (pp_linear fmt x;
 58 |      pp_linear_list fmt y)
 59 | 
 60 | module I = Util.Intmap
 61 | 
 62 | (* Initialise the mutable visited fields to false, and put CFG into a map *)
 63 | let init_traversal (cfg : cfg) : cfg_entry I.t =
 64 |   List.iter (fun x -> x.started <- false; x.finished <- false) cfg;
 65 |   List.fold_left (fun map x -> I.add x.bnum x map) I.empty cfg
 66 | 
 67 | (* Linearise the reachable cfg, starting from the block with index next_block,
 68 |    but don't do already visited blocks *)
 69 | (* This is essentially a depth-first search, pre-order traversal *)
 70 | let rec cfg_to_linear (next_block : int) (cfg : cfg_entry I.t) : linear list =
 71 |   let b = I.find next_block cfg in
 72 |   if b.finished then
 73 |     (* Don't output the block twice *)
 74 |     []
 75 |   else
 76 |     (b.finished <- true;
 77 |      Label (".block" ^ string_of_int b.bnum) ::
 78 |      List.map (fun x -> Instr x) b.elems @
 79 |      match b.next with
 80 |      | Return v -> [(Return v : linear)]
 81 |      | Next i ->
 82 |        if (I.find i cfg).started then
 83 |          (* We've started the next block, so we'll just jump to it *)
 84 |          [Jump (".block" ^ string_of_int i)]
 85 |        else
 86 |          (* We haven't started the next block, so we can put it here and omit
 87 |             the jump *)
 88 |          ((I.find i cfg).started <- true;
 89 |           cfg_to_linear i cfg)
 90 |      | Branch (v, t1, t2) ->
 91 |        let c1 = I.find t1 cfg in
 92 |        let c2 = I.find t2 cfg in
 93 |        match (c1.started, c2.started) with
 94 |        | (false, false) ->
 95 |          c1.started <- true;
 96 |          c2.started <- true;
 97 |          CJump (v, true, ".block"  ^ string_of_int t1) ::
 98 |          cfg_to_linear t2 cfg @
 99 |          cfg_to_linear t1 cfg
100 |        | (true, true) ->
101 |          [CJump (v, true, ".block"  ^ string_of_int t1);
102 |           Jump (".block" ^ string_of_int t2)]
103 |        | (true, false) ->
104 |          (c2.started <- true;
105 |           [CJump (v, true, ".block"  ^ string_of_int t1)] @
106 |           cfg_to_linear t2 cfg)
107 |        | (false, true) ->
108 |          (c1.started <- true;
109 |           [CJump (v, false, ".block"  ^ string_of_int t2)] @
110 |           cfg_to_linear t1 cfg))
111 | 
112 | let cfg_to_linear cfg =
113 |   cfg_to_linear 0 (init_traversal cfg)
114 | 


--------------------------------------------------------------------------------
/src/util.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* General utility functions *)
 20 | 
 21 | (* For lex/parse/type errors *)
 22 | exception BadInput of string
 23 | (* For cases that shouldn't be able to happen in the compiler *)
 24 | 
 25 | exception InternalError of string
 26 | 
 27 | (* Maps with string keys *)
 28 | module Strmap = Map.Make(String)
 29 | 
 30 | (* Maps with integer keys *)
 31 | module Intmap =
 32 |   Map.Make(struct type t = int let (compare : int -> int -> int) = compare end)
 33 | 
 34 | (* Build the list [0, ..., n-1] *)
 35 | let count (n : int) : int list =
 36 |   let rec f n next =
 37 |     if n <= 0 then
 38 |       []
 39 |     else
 40 |       next :: f (n - 1) (next + 1)
 41 |   in
 42 |   f n 0
 43 | 
 44 | (* Combine 2 lists into a list of pairs, truncating the longer list *)
 45 | let rec zip (l1 : 'a list) (l2 : 'b list) : ('a * 'b) list =
 46 |   match (l1,l2) with
 47 |   | (h1::t1, h2::t2) -> (h1,h2) :: zip t1 t2
 48 |   | _ -> []
 49 | 
 50 | (* Convert a list to a ; separated string *)
 51 | let show_list' (d1 : string) (d2 : string) (show : 'a -> string) (l : 'a list)
 52 |   : string =
 53 |   let rec f l =
 54 |     match l with
 55 |     | [] -> ""
 56 |     | [h] -> show h
 57 |     | (h::t) ->
 58 |       show h ^ "; " ^ f t
 59 |   in
 60 |     d1 ^ f l ^ d2
 61 | 
 62 | let show_list show = show_list' "[" "]" show
 63 | let show_set show = show_list' "{" "}" show
 64 | 
 65 | (* Pretty-pring a list, separated by ; *)
 66 | let pp_list' (d1 : string) (d2 : string) (pp : Format.formatter -> 'a -> unit)
 67 |     (fmt : Format.formatter) (l : 'a list)
 68 |   : unit =
 69 |   let rec f fmt l =
 70 |     match l with
 71 |     | [] -> ()
 72 |     | [h] -> pp fmt h
 73 |     | (h::t) ->
 74 |       Format.fprintf fmt "%a;@ %a"
 75 |         pp h
 76 |         f t
 77 |   in
 78 |     Format.fprintf fmt "%s@[%a@]%s" d1 f l d2
 79 | 
 80 | let pp_list pp = pp_list' "[" "]" pp
 81 | let pp_set pp = pp_list' "{" "}" pp
 82 | 
 83 | let option_map (f : 'a -> 'b) (x : 'a option) : 'b option =
 84 |   match x with
 85 |   | None -> None
 86 |   | Some y -> Some (f y)
 87 | 
 88 | (* Compare two options, where None is less than Some *)
 89 | let option_compare (c : 'a -> 'b -> int) (o1 : 'a option) (o2 :'b option)
 90 |   : int =
 91 |   match (o1, o2) with
 92 |   | (None, None) -> 0
 93 |   | (Some _, None) -> 1
 94 |   | (None, Some _) -> -1
 95 |   | (Some x, Some y) -> c x y
 96 | 
 97 | module Std = struct
 98 |   (* Copy and paste from extlib to get input_file without making a dependency.
 99 |      extlib is LGPL 2.1, and so this sub-module is too.
100 |      https://github.com/ygrek/ocaml-extlib/blob/33f744ddb28d6a0f4c96832145e1a6e384644709/src/std.ml *)
101 | 
102 |   let finally handler f x =
103 |     let r = (
104 |       try
105 |         f x
106 |       with
107 |         e -> handler (); raise e
108 |     ) in
109 |     handler ();
110 |     r
111 | 
112 |   let buf_len = 8192
113 | 
114 |   let input_all ic =
115 |     let rec loop acc total buf ofs =
116 |       let n = input ic buf ofs (buf_len - ofs) in
117 |       if n = 0 then
118 |         let res = Bytes.create total in
119 |         let pos = total - ofs in
120 |         let _ = Bytes.blit buf 0 res pos ofs in
121 |         let coll pos buf =
122 |           let new_pos = pos - buf_len in
123 |           Bytes.blit buf 0 res new_pos buf_len;
124 |           new_pos in
125 |         let _ = List.fold_left coll pos acc in
126 |         (* [res] doesn't escape and will not be mutated again *)
127 |         Bytes.unsafe_to_string res
128 |       else
129 |         let new_ofs = ofs + n in
130 |         let new_total = total + n in
131 |         if new_ofs = buf_len then
132 |           loop (buf :: acc) new_total (Bytes.create buf_len) 0
133 |         else loop acc new_total buf new_ofs in
134 |     loop [] 0 (Bytes.create buf_len) 0
135 | 
136 |   let input_file ?(bin=false) fname =
137 |     let ch = (if bin then open_in_bin else open_in) fname in
138 |     finally (fun () -> close_in ch) input_all ch
139 | 
140 | end
141 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | A simple compiler for a simple imperative language targeting x86-64 assembly
  2 | (for systems that follow the System V ABI, so most operating systems **except**
  3 | Windows).
  4 | 
  5 | Running the compiler
  6 | --------------------
  7 | 
  8 | The compiler relies on OCaml and the ocamlfind library. It is tested with OCaml
  9 | versions 4.02.3 and 4.04.0, and ocamlfind 1.5.6 and 1.7.1. Ocamlfind can be
 10 | installed via opam. If you have installed anything via Opam (for example,
 11 | Merlin, or ocp-indent), ocamlfind is probably installed already. Opam installs
 12 | everything into `.opam` in your home directory:
 13 | ```
 14 | opam install ocamlfind
 15 | ```
 16 | 
 17 | To compile the compiler run `make` in the `src` directory. This should produce
 18 | `compile.byte` and `interp.byte` executables. Both take a single command-line
 19 | argument: a source file name with the `.expl` extension. `interp.byte` runs the
 20 | file, `compile.byte` compiles it, generating an x86-46 assembly `.s` file in
 21 | *nasm* syntax.
 22 | 
 23 | Compiling target programs
 24 | -------------------------
 25 | 
 26 | First run `make` in the `runtime` directory to compile the very simple runtime
 27 | library (using gcc).
 28 | 
 29 | On Linux, use `nasm -f elf64 FILENAME.s` to assemble the compiler's output for
 30 | *FILENAME*. On Mac, use `nasm -f macho64 --prefix _ FILENAME.s`. Then use `gcc
 31 | COMPILER_DIR/runtime/io.o FILENAME.o -o FILENAME` to link the program with the
 32 | runtime library.
 33 | 
 34 | See the `tests` directory for some example programs.
 35 | 
 36 | The source language
 37 | -------------------
 38 | 
 39 | Keywords are `+ - * / | & << < > = || && ! := do while if then else input output true false array return function let bool int`
 40 | 
 41 | Identifiers are strings of letters, underscores, and digits (not starting with
 42 | a digit) that are not keywords.
 43 | 
 44 | Numbers are sequences of digits that fit into a 64-bit signed integer.
 45 | 
 46 | Comments start with '//' and last until the end of the line.
 47 | 
 48 | All numerical operations are on signed, 2s complement 64-bit integers.
 49 | 
 50 | Multi-dimensional arrays are supported. Array elements are 64-bit integers.
 51 | 
 52 | op ::=  
 53 | | `+`  --- Addition  
 54 | | `-`  --- Subtraction  
 55 | | `*`  --- Multiplication  
 56 | | `/`  --- Division  
 57 | | `|`  --- Bitwise or  
 58 | | `&`  --- Bitwise and  
 59 | | `<<` --- Left shift  
 60 | | `<`  --- Less than  
 61 | | `>`  --- Greater than  
 62 | | `=`  --- Equality  
 63 | | `||` --- Logical or  
 64 | | `&&` --- Logical and
 65 | 
 66 | uop ::=  
 67 | | `!`  --- Logical negation  
 68 | | `-`  --- Unary minus
 69 | 
 70 | indices ::=  
 71 | | `[` exp `]` indices  
 72 | | epsilon
 73 | 
 74 | args ::=  
 75 | | exp  
 76 | | exp `,` args
 77 | 
 78 | atomic_exp ::=  
 79 | | identifier indices --- Variable use and array indexing  
 80 | | identifier `(` args `)` --- Function call  
 81 | | number             --- Integer constant  
 82 | | `true`             --- Boolean constant  
 83 | | `false`            --- Boolean constant  
 84 | | uop atomic_exp     --- Unary operation  
 85 | | `array` indices    --- Array allocation  
 86 | | `(` exp `)`        --- Parenthesised expression
 87 | 
 88 | exp ::=  
 89 | | atomic_exp op atomic_exp --- Binary operation  
 90 | | atomic_exp
 91 | 
 92 | stmt ::=  
 93 | | identifier indices `:=` exp  
 94 | | `while` exp stmt  
 95 | | `do` stmt `while` exp  
 96 | | `if` exp `then` stmt `else` stmt  
 97 | | `{` stmts `}`  
 98 | | `input` identifier  
 99 | | `output` identifier  
100 | | `return` identifier
101 | 
102 | stmts ::=  
103 | | epsilon  
104 | | stmt stmts
105 | 
106 | typ ::=  
107 | | `int`          --- a 64-bit signed integer  
108 | | `bool`         --- a boolean  
109 | | `array` number --- an n dimensional array of 64-bit signed integers
110 | 
111 | params ::=  
112 | | `(` identifier `:` type `)`  
113 | |  `(` identifier `:` type `)` params
114 | 
115 | var_decs ::=  
116 | | epsilon  
117 | | `let` identifier `:` typ `=` exp var_decs
118 | 
119 | functions ::=  
120 | | epsilon  
121 | | `function` identifier params `:` typ `{` var_decs stmts `}` funcs
122 | 
123 | program ::=  
124 | | var_decs functions
125 | 
126 | Loading the compiler in utop
127 | ----------------------------
128 | 
129 | First build the compiler, by running make from the src directory. In utop load
130 | the packages that the compiler uses with the `#require` command:
131 | 
132 | ```
133 | #require "str";;
134 | ```
135 | 
136 | You can add this line to the `.ocamlinit` file in your home directory, so that
137 | you don't have to manually enter it each time you start a new utop session.
138 | The contents of `.ocamlinit` are run each time you start a new utop.
139 | 
140 | The OCaml compilation manager (ocamlbuild) stores all of the compiled OCaml
141 | sources in the `_build` directory, with the extension `.cmo`. The following
142 | tells utop to look there for source files.
143 | ```
144 | #directory "_build";;
145 | ```
146 | 
147 | To load a particular module, for example, LineariseCfg, use the `#load_rec` command.
148 | ```
149 | #load_rec "lineariseCfg.cmo";;
150 | ```
151 | 
152 | You can then open the module if you want (`open LineariseCfg`), or call
153 | functions directly (`LineariseCfg.cfg_to_linear`). Loading a compiled module in
154 | this way only gives you access to the values and functions that are exported in
155 | the corresponding `.mli` file. Often a good way to work on a file is to
156 | `#load_rec` all of the modules that it depends on, and then `#use` the file.
157 | 


--------------------------------------------------------------------------------
/src/shrinkImmediates.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* Ensure that all immediate arguments fit into 32 bits.  We assume that
 20 |    constant propagation has ensured that no operation has two immediate
 21 |    operands, and we maintain that property here. Also, remove immediate
 22 |    operands from division, since the x86 signed division does not support them.
 23 | *)
 24 | 
 25 | open BlockStructure
 26 | 
 27 | let tmp_var = NamedTmp ("SI",0)
 28 | let tmp_var2 = NamedTmp ("SI",1)
 29 | 
 30 | (* Build a series of assignments that puts the immediate n into the dest
 31 |    register, using only immediates of 32 bits or smaller *)
 32 | let assign_imm (dest : var) (n : int64) : block_elem list =
 33 |   [AssignAtom (dest, Num (Int64.shift_right_logical n 32));
 34 |    AssignOp (dest, Ident dest, Tokens.Lshift, Num 32L);
 35 |    AssignOp (dest, Ident dest, Tokens.BitOr,
 36 |              Num (Int64.logand n 0x00000000FFFFFFFFL))]
 37 | 
 38 | let is_imm (a : atomic_exp) : bool =
 39 |   match a with
 40 |   | Ident _ -> false
 41 |   | Num _ -> true
 42 | 
 43 | (* If the atomic_exp is a large immediate constant (takes more than 32 bits),
 44 |    then return the int64, else None. Relies on int64 being 2s complement. If
 45 |    it's negative (the top bit is 1), then check that the top 33 bits are 1. It
 46 |    it's non-negative, check that the top 33 bits are 0. 32 won't suffice,
 47 |    because when we truncate to 32 bits only, the top bit needs to be the sign
 48 |    bit. *)
 49 | let get_large_imm (a : atomic_exp) : int64 option =
 50 |   match a with
 51 |   | Num n ->
 52 |     (* This is an arithmetic shift. *)
 53 |     let topmost = Int64.shift_right n 31 in
 54 |     if Int64.compare topmost 0L = 0 ||
 55 |        Int64.compare topmost 0xFFFFFFFFFFFFFFFFL = 0 then
 56 |       None
 57 |     else
 58 |       Some n
 59 |   | _ -> None
 60 | 
 61 | module T = Tokens
 62 | 
 63 | let shrink_imm_elem (e : block_elem) : block_elem list =
 64 |   match e with
 65 |   | AssignOp (dest, a1, T.Div, a2) ->
 66 |     (* division cannot have immediate operands *)
 67 |     if is_imm a1 && is_imm a2 then
 68 |       [AssignAtom (tmp_var, a1);
 69 |        AssignAtom (tmp_var2, a2);
 70 |        AssignOp (dest, Ident tmp_var, T.Div, Ident tmp_var2)]
 71 |     else if is_imm a1 then
 72 |       [AssignAtom (tmp_var, a1);
 73 |        AssignOp (dest, Ident tmp_var, T.Div, a2)]
 74 |     else if is_imm a2 then
 75 |       [AssignAtom (tmp_var, a2);
 76 |        AssignOp (dest, a1, T.Div, Ident tmp_var)]
 77 |     else
 78 |       [e]
 79 |   | AssignOp (dest, a1, op, a2) ->
 80 |     assert (not (is_imm a1 && is_imm a2));
 81 |     (match get_large_imm a1 with
 82 |      | Some n ->
 83 |        assign_imm tmp_var n @
 84 |        [AssignOp (dest, Ident tmp_var, op, a2)]
 85 |      | None ->
 86 |        (match get_large_imm a2 with
 87 |         | Some n ->
 88 |           assign_imm tmp_var n @
 89 |           [AssignOp (dest, a1, op, Ident tmp_var)]
 90 |         | None ->
 91 |           [e]))
 92 |   | AssignAtom (dest, a) ->
 93 |     (match get_large_imm a with
 94 |      | Some n -> assign_imm dest n
 95 |      | None -> [e])
 96 |   | Ld (v1, v2, a) ->
 97 |     (match get_large_imm a with
 98 |      | Some n ->
 99 |        assign_imm tmp_var n @ [Ld(v1, v2, Ident tmp_var)]
100 |      | None -> [e])
101 |   | St (r, a1, a2) ->
102 |     (match (get_large_imm a1, get_large_imm a2) with
103 |      | (None, None) -> [e]
104 |      | (Some n1, None) ->
105 |        assign_imm tmp_var n1 @ [St(r, Ident tmp_var, a2)]
106 |      | (None, Some n2) ->
107 |        assign_imm tmp_var n2 @ [St(r, a1, Ident tmp_var)]
108 |      | (Some n1, Some n2) ->
109 |        assign_imm tmp_var n1 @
110 |        assign_imm tmp_var2 n2 @
111 |        [St(r, Ident tmp_var, Ident tmp_var2)])
112 |   | Call (v, f, aes) ->
113 |     let (s, es, _) =
114 |       List.fold_right
115 |         (fun (ae : atomic_exp) ((s : block_elem list), es, n) ->
116 |            match get_large_imm ae with
117 |            | None -> (s, ae::es, n)
118 |            | Some imm ->
119 |              (assign_imm (NamedTmp ("SI",n)) imm @ s, Ident tmp_var::es, n + 1))
120 |         aes
121 |         ([], [], 2)
122 |     in
123 |     s @ [Call (v, f, es)]
124 |   | BoundCheck (a1, a2) ->
125 |     assert (not (is_imm a1 && is_imm a2));
126 |     (match get_large_imm a1 with
127 |      | Some n ->
128 |        assign_imm tmp_var n @
129 |        [BoundCheck (Ident tmp_var, a2)]
130 |      | None ->
131 |        (match get_large_imm a2 with
132 |         | Some n ->
133 |           assign_imm tmp_var n @
134 |           [BoundCheck (a1, Ident tmp_var)]
135 |         | None ->
136 |           [e]))
137 |   | NullCheck v -> [NullCheck v]
138 | 
139 | let shrink_imm (cfg : cfg) : cfg =
140 |   List.map
141 |     (fun cfg_entry ->
142 |        { cfg_entry with
143 |          elems =
144 |            List.flatten
145 |              (List.map shrink_imm_elem cfg_entry.elems) })
146 |     cfg
147 | 


--------------------------------------------------------------------------------
/src/blockInterp.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2016 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* An interpreter for CFGs *)
 20 | 
 21 | open BlockStructure
 22 | open AstInterp
 23 | 
 24 | type store_t = int64 Varmap.t
 25 | 
 26 | type heap_t = { mutable next_address : int; heap : bytes }
 27 | 
 28 | (* Convert int64 to 8 bytes in little-endian order and place them in the
 29 |    given bytes at the given index *)
 30 | let int64_to_bytes (b: bytes) index num : unit =
 31 |   let rec loop i inc =
 32 |     if inc < 8 then
 33 |       (Bytes.set b (index + inc) (Char.chr (Int64.to_int (Int64.logand 0xFFL i)));
 34 |        loop (Int64.shift_right i 8) (inc + 1))
 35 |     else
 36 |       ()
 37 |   in
 38 |   loop num 0
 39 | 
 40 | (* Read 8 bytes in little-endian order into an int64 *)
 41 | let bytes_to_int64 (b : bytes) index : int64 =
 42 |   let rec loop i inc =
 43 |     if inc >= 0 then
 44 |       let next =
 45 |         Int64.logor
 46 |           (Int64.shift_left i 8)
 47 |           (Int64.of_int (Char.code (Bytes.get b (index + inc))))
 48 |       in
 49 |       loop next (inc - 1)
 50 |     else
 51 |       i
 52 |   in
 53 |   loop 0L 7
 54 | 
 55 | let alloc1 (heap : heap_t) (n : int64) : int64 =
 56 |   let old_next = heap.next_address in
 57 |   let new_next = old_next + 8 * ((Int64.to_int n) + 1) in
 58 |   if new_next <= Bytes.length heap.heap then
 59 |     (heap.next_address <- new_next;
 60 |      int64_to_bytes heap.heap old_next n;
 61 |      Int64.of_int old_next)
 62 |   else
 63 |     raise (Crash "Out of memory")
 64 | 
 65 | let rec alloc (heap : heap_t) (ns : int64 list) : int64 =
 66 |   match ns with
 67 |   | [] -> raise (Util.InternalError "Empty alloc")
 68 |   | [n] -> alloc1 heap n
 69 |   | n::ns ->
 70 |     let v = alloc1 heap n in
 71 |     for i = 0 to (Int64.to_int n) do
 72 |       int64_to_bytes heap.heap (Int64.to_int v + 8 * i)
 73 |         (alloc heap ns)
 74 |     done;
 75 |     v
 76 | 
 77 | let interp_atomic_exp (store : store_t) (ae : atomic_exp) : int64 =
 78 |   match ae with
 79 |   | Ident x -> Varmap.find x store
 80 |   | Num i -> i
 81 | 
 82 | let interp_block_elem (store : store_t) (heap : heap_t) (be : block_elem)
 83 |   : store_t =
 84 |   match be with
 85 |   | AssignOp (x, ae1, op, ae2) ->
 86 |     let n1 = interp_atomic_exp store ae1 in
 87 |     let n2 = interp_atomic_exp store ae2 in
 88 |     Varmap.add x (do_op op n1 n2) store
 89 |   | AssignAtom (x, ae) ->
 90 |     Varmap.add x (interp_atomic_exp store ae) store
 91 |   | Ld (x1, x2, ae) ->
 92 |     let n1 = Varmap.find x2 store in
 93 |     let n2 = interp_atomic_exp store ae in
 94 |     Varmap.add
 95 |       x1
 96 |       (bytes_to_int64 heap.heap (Int64.to_int (Int64.add n1 n2)))
 97 |       store
 98 |   | St (x, ae1, ae2) ->
 99 |     let n1 = Varmap.find x store in
100 |     let n2 = interp_atomic_exp store ae1 in
101 |     let n3 = interp_atomic_exp store ae2 in
102 |     int64_to_bytes heap.heap (Int64.to_int (Int64.add n1 n2)) n3;
103 |     store
104 |   | Call (Some x, "input", []) ->
105 |     Printf.printf "> ";
106 |     (try
107 |        let n = Int64.of_string (read_line ()) in
108 |        Varmap.add x n store
109 |      with Failure _ -> raise (Crash "not a 64-bit integer"))
110 |   | Call (None, "output", [i]) ->
111 |     begin
112 |       print_string (Int64.to_string (interp_atomic_exp store i));
113 |       print_newline ();
114 |       store
115 |     end
116 |   | Call (Some x,
117 |           ("allocate1" | "allocate2" | "allocate3" | "allocate4" | "allocate5" |
118 |            "allocate6" | "allocate7"),
119 |           aes) ->
120 |     let ns = List.map (interp_atomic_exp store) aes in
121 |     Varmap.add x (alloc heap ns) store
122 |   | Call _ ->
123 |     raise (Util.InternalError "Unknown function call in blockInterp")
124 | 
125 | let rec interp_basic_block (store : store_t) (heap : heap_t)
126 |     (bb : basic_block) : store_t =
127 |   match bb with
128 |   | [] -> store
129 |   | be::bes ->
130 |     let store' = interp_block_elem store heap be in
131 |     interp_basic_block store' heap bes
132 | 
133 | let interp_next_block store nb : int =
134 |   match nb with
135 |   | End -> 0
136 |   | Next i -> i
137 |   | Branch ((ae1, op, ae2), i1, i2) ->
138 |     let n1 = interp_atomic_exp store ae1 in
139 |     let n2 = interp_atomic_exp store ae2 in
140 |     match op with
141 |     | Eq -> if Int64.compare n1 n2 = 0 then i1 else i2
142 |     | Lt -> if Int64.compare n1 n2 < 0 then i1 else i2
143 |     | Gt -> if Int64.compare n1 n2 > 0 then i1 else i2
144 | 
145 | let interp_cfg  (store : store_t) (heap : heap_t) (cfg : cfg) : unit =
146 |   let cfgmap = LineariseCfg.init_traversal cfg in
147 |   let rec loop (index : int) store =
148 |     let node = Util.Intmap.find index cfgmap in
149 |     let store' = interp_basic_block store heap node.elems in
150 |     let next_index = interp_next_block store node.next in
151 |     loop next_index store'
152 |   in
153 |   loop 1 store
154 | 
155 | let interp_prog (heap_size : int) (cfg : cfg) : unit =
156 |   interp_cfg
157 |     Varmap.empty
158 |     { next_address = 0; heap = Bytes.make heap_size '\000'}
159 |     cfg
160 | 


--------------------------------------------------------------------------------
/src/regAlloc.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* A very simple register allocator. Global variables never get put in a
 20 |    register. Function parameters stay in the registers that they are passed in.
 21 |    Local variables get put in the remaining registers in no particular order.
 22 |    The rest are put on the stack *)
 23 | 
 24 | open Util
 25 | open BlockStructure
 26 | 
 27 | let get_vars_ae (ae : atomic_exp) (vars : Varset.t) : Varset.t =
 28 |   match ae with
 29 |   | Ident r -> Varset.add r vars
 30 |   | Num _ -> vars
 31 | 
 32 | let get_vars_be (be : block_elem) (vars : Varset.t) : Varset.t =
 33 |   match be with
 34 |   | AssignOp (r, ae1, _, ae2) ->
 35 |     get_vars_ae ae1 (get_vars_ae ae2 (Varset.add r vars))
 36 |   | AssignAtom (r, ae) -> get_vars_ae ae (Varset.add r vars)
 37 |   | Ld (v1, v2, ae) ->
 38 |     get_vars_ae ae (Varset.add v1 (Varset.add v2 vars))
 39 |   | St (r, ae1, ae2) ->
 40 |     get_vars_ae ae1 (get_vars_ae ae2 (Varset.add r vars))
 41 |   | Call (None, _, aes) ->
 42 |     List.fold_right get_vars_ae aes vars
 43 |   | Call (Some i, _, aes) ->
 44 |     List.fold_right get_vars_ae aes (Varset.add i vars)
 45 |   | BoundCheck (a1, a2) ->
 46 |     get_vars_ae a1 (get_vars_ae a2 vars)
 47 |   | NullCheck v ->
 48 |     Varset.add v vars
 49 | 
 50 | let get_vars_test (ae1, _, ae2) (vars : Varset.t) : Varset.t =
 51 |   List.fold_right get_vars_ae [ae1; ae2] vars
 52 | 
 53 | let get_vars_nb (nb : next_block) (vars : Varset.t) : Varset.t =
 54 |   match nb with
 55 |   | Return None -> vars
 56 |   | Return (Some v) -> Varset.add v vars
 57 |   | Next _ -> vars
 58 |   | Branch (r, _, _) -> get_vars_test r vars
 59 | 
 60 | let get_vars_block (b : cfg_entry) (vars : Varset.t) : Varset.t =
 61 |   List.fold_right get_vars_be b.elems (get_vars_nb b.next vars)
 62 | 
 63 | (* Get all of the variables mentioned in the cfg *)
 64 | let get_vars (cfg : cfg) : Varset.t =
 65 |   List.fold_right get_vars_block cfg Varset.empty
 66 | 
 67 | let is_global v =
 68 |   match v with
 69 |   | NamedSource (_, SourceAst.Global) -> true
 70 |   | _ -> false
 71 | 
 72 | let is_param v =
 73 |   match v with
 74 |   | NamedSource (_, SourceAst.Parameter) -> true
 75 |   | _ -> false
 76 | 
 77 | let is_local v =
 78 |   match v with
 79 |   | NamedSource (_, SourceAst.Local) -> true
 80 |   | NamedTmp _ -> true
 81 |   | _ -> false
 82 | 
 83 | (* Given the vars used in a function, allocate them into registers. Ignore the
 84 |    live ranges and clash graph. This is the simplest strategy that will yield a
 85 |    working program. Return also the number of stack slots used. *)
 86 | (* TODO: actually put some locals into registers *)
 87 | let build_regalloc_map (fun_params : (SourceAst.id * int) list) (vars : Varset.t)
 88 |   : (var Varmap.t * int) =
 89 |   let vars = Varset.elements vars in
 90 |   let (globals, vars) = List.partition is_global vars in
 91 |   let (locals, vars) = List.partition is_local vars in
 92 |   assert (List.for_all is_param vars); (* There shouldn't be any allocated registers already *)
 93 |   let global_map =
 94 |     List.map
 95 |       (fun g ->
 96 |          match g with
 97 |          | NamedSource (i, _) as n -> (n, Global i)
 98 |          | _ -> assert false)
 99 |       globals
100 |   in
101 |   let param_map =
102 |     List.map (fun (p, i) -> (id_to_var p, Vreg i)) fun_params
103 |   in
104 |   let local_map =
105 |     List.map2 (fun l i -> (l, Stack i)) locals (count (List.length locals))
106 |   in
107 |   let m =
108 |     List.fold_right (fun (k,v) m -> Varmap.add k v m)
109 |       (global_map @ param_map @ local_map)
110 |       Varmap.empty
111 |   in
112 |   (m, List.length locals)
113 | 
114 | let reg_alloc_ae (map : var Varmap.t) (ae : atomic_exp) : atomic_exp =
115 |   match ae with
116 |   | Ident v ->
117 |     Ident (Varmap.find v map)
118 |   | Num x -> Num x
119 | 
120 | let reg_alloc_be (map : var Varmap.t) (be : block_elem) : block_elem =
121 |   match be with
122 |   | AssignOp (v, ae1, op, ae2) ->
123 |     AssignOp (Varmap.find v map, reg_alloc_ae map ae1, op, reg_alloc_ae map ae2)
124 |   | AssignAtom (v, ae) ->
125 |     AssignAtom (Varmap.find v map, reg_alloc_ae map ae)
126 |   | Ld (v1, v2, ae) ->
127 |     Ld (Varmap.find v1 map, Varmap.find v2 map, reg_alloc_ae map ae)
128 |   | St (v, ae1, ae2) ->
129 |     St (Varmap.find v map, reg_alloc_ae map ae1, reg_alloc_ae map ae2)
130 |   | Call (v, f, aes) ->
131 |     Call (Util.option_map (fun v -> Varmap.find v map) v, f,
132 |           List.map (reg_alloc_ae map) aes)
133 |   | BoundCheck (a1, a2) ->
134 |     BoundCheck (reg_alloc_ae map a1, reg_alloc_ae map a2)
135 |   | NullCheck v ->
136 |     NullCheck (Varmap.find v map)
137 | 
138 | let reg_alloc_test (map : var Varmap.t) (ae1, op, ae2) : test =
139 |   (reg_alloc_ae map ae1, op, reg_alloc_ae map ae2)
140 | 
141 | let reg_alloc_nb (map : var Varmap.t) (nb : next_block) : next_block =
142 |   match nb with
143 |   | Return (Some v) -> Return (Some (Varmap.find v map))
144 |   | Return None -> Return None
145 |   | Next i -> Next i
146 |   | Branch (t, t1, t2) ->
147 |     Branch (reg_alloc_test map t, t1, t2)
148 | 
149 | (* fun_params is an association list mapping the function's parameters to the
150 |    registers that they were passed in. num_regs is the total number of
151 |    registers that can be used. The returned int is the number of variables
152 |    put on the stack *)
153 | let reg_alloc (fun_params : (SourceAst.id * int) list) (num_regs : int)
154 |     (cfg : cfg)
155 |   : (cfg * int) =
156 |   let (map, num_stacks) = build_regalloc_map fun_params (get_vars cfg) in
157 |   let cfg =
158 |     List.map (fun entry -> { bnum = entry.bnum;
159 |                              elems = List.map (reg_alloc_be map) entry.elems;
160 |                              next = reg_alloc_nb map entry.next;
161 |                              finished = false;
162 |                              started = false }) cfg in
163 |   (cfg, num_stacks)
164 | 


--------------------------------------------------------------------------------
/src/tokens.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* The language's tokens, and a simple lexer *)
 20 | 
 21 | open Util
 22 | 
 23 | (* Primitive operators *)
 24 | (* When extending with operators that can have side effects, be careful to
 25 |    check the rest of the compiler. Some optimisations can only be done on
 26 |    side-effect-free operations. Right now only Div can have an effect
 27 |    (divide-by-zero exception) *)
 28 | 
 29 | type op =
 30 |   (* integer operations *)
 31 |   | Plus
 32 |   | Minus
 33 |   | Times
 34 |   | Div
 35 |   | Lshift
 36 |   | BitOr
 37 |   | BitAnd
 38 |   (* comparisons *)
 39 |   | Lt
 40 |   | Gt
 41 |   | Eq
 42 |   (* boolean operations *)
 43 |   | And
 44 |   | Or
 45 | 
 46 | type uop =
 47 |   (* boolean negation *)
 48 |   | Not
 49 | 
 50 | let show_op op =
 51 |   match op with
 52 |   | Plus -> "+"
 53 |   | Minus -> "-"
 54 |   | Times -> "*"
 55 |   | Div -> "/"
 56 |   | Lt -> "<"
 57 |   | Gt -> ">"
 58 |   | Eq -> "="
 59 |   | And -> "&&"
 60 |   | Or -> "||"
 61 |   | Lshift -> "<<"
 62 |   | BitOr -> "|"
 63 |   | BitAnd -> "&"
 64 | 
 65 | let show_uop (uop : uop) : string =
 66 |   match uop with
 67 |   | Not -> "!"
 68 | 
 69 | type token =
 70 |   | Num of int64
 71 |   | Ident of string (* identifiers represent various names, including function
 72 |                        and variable names *)
 73 |   | Op of op
 74 |   | Uop of uop
 75 |   (* Punctuation *)
 76 |   | Lparen
 77 |   | Rparen
 78 |   | Lcurly
 79 |   | Rcurly
 80 |   | Lbrac
 81 |   | Rbrac
 82 |   | Colon
 83 |   | Comma
 84 |   (* Identifier-like keywords *)
 85 |   | While
 86 |   | Do
 87 |   | If
 88 |   | Then
 89 |   | Else
 90 |   | Assign
 91 |   | True
 92 |   | False
 93 |   | Input
 94 |   | Output
 95 |   | Array
 96 |   | Int
 97 |   | Bool
 98 |   | Let
 99 |   | Function
100 |   | Return
101 | 
102 | let show_token (t : token) : string =
103 |   match t with
104 |   | Num i -> Int64.to_string i
105 |   | Ident s -> s
106 |   | Op o -> show_op o
107 |   | Uop u -> show_uop u
108 |   | Lparen -> "("
109 |   | Rparen -> ")"
110 |   | Lcurly -> "{"
111 |   | Rcurly -> "}"
112 |   | Lbrac -> "["
113 |   | Rbrac -> "]"
114 |   | Comma -> ","
115 |   | Colon -> ":"
116 |   | While -> "while"
117 |   | Do -> "do"
118 |   | If -> "if"
119 |   | Then -> "then"
120 |   | Else -> "else"
121 |   | Assign -> ":="
122 |   | True -> "true"
123 |   | False -> "false"
124 |   | Input -> "input"
125 |   | Output -> "output"
126 |   | Array -> "array"
127 |   | Int -> "int"
128 |   | Bool -> "bool"
129 |   | Let -> "let"
130 |   | Function -> "function"
131 |   | Return -> "return"
132 | 
133 | (* Pretty-print a token *)
134 | let pp_token (fmt : Format.formatter) (t : token) =
135 |   Format.fprintf fmt "%s" (show_token t)
136 | 
137 | (* Tokens annotated with which line number they appear on *)
138 | type tok_loc = (token * int)
139 | 
140 | (* Pretty-print a token and location *)
141 | let pp_tok_loc (fmt : Format.formatter) ((t, l) : tok_loc) =
142 |   Format.fprintf fmt "(%a, %d)" pp_token t l
143 | 
144 | (* The mapping of keword strings to the corresponding tokens *)
145 | let keywords : (string * token) list =
146 |   (* Derive the mapping from the to_string functions to avoid duplication *)
147 |   (show_uop Not, Uop Not) ::
148 |   List.map (fun o -> (show_op o, Op o))
149 |     [Plus; Minus; Times; Div; Lt; Gt; Eq; And; Or; Lshift; BitOr; BitAnd] @
150 |   List.map (fun t -> (show_token t, t))
151 |     [Do; While; If; Then; Else; Array; Assign; True; Input; Output; False;
152 |      Lparen; Rparen; Lcurly; Rcurly; Lbrac; Rbrac; Int; Bool; Colon; Let;
153 |      Return; Function; Comma]
154 | 
155 | (* Map each keyword string to its corresponding token *)
156 | let keyword_map : token Strmap.t =
157 |   List.fold_left (fun m (k,v) -> Strmap.add k v m) Strmap.empty keywords
158 | 
159 | (* Regular expressions that describe various syntactic entities *)
160 | (* Build the keyword regexp out of the mapping to avoid duplication *)
161 | let keyword_re =
162 |   Str.regexp
163 |     (String.concat "\\|"
164 |        (List.map (fun (s, _) -> Str.quote s) keywords))
165 | let number_re = Str.regexp "[0-9]+"
166 | let ident_re = Str.regexp "[a-zA-Z_][a-zA-Z0-9_]*"
167 | let space_re = Str.regexp "[ \t]+\\|//.*"
168 | let newline_re = Str.regexp "\n\\|\r\n"
169 | 
170 | (* Read all the tokens from s, using pos to index into the string and line_n
171 |    to track the current line number, for error reporting later on. Return them
172 |    in a list. *)
173 | (* The lex function repeatedly finds which regexp matches the string, starting
174 |    from the current position, adds that token to the result, and then updates
175 |    the starting position to find the next token *)
176 | let rec lex (s : string) (pos : int) (line_n : int) : tok_loc list =
177 |   if pos >= String.length s then
178 |     []
179 |   else if Str.string_match space_re s pos then
180 |     lex s (Str.match_end ()) line_n
181 |   else if Str.string_match newline_re s pos then
182 |     lex s (Str.match_end ()) (line_n + 1)
183 |   else if Str.string_match ident_re s pos then
184 |     let id = Str.matched_string s in
185 |     if Strmap.mem id keyword_map then
186 |       (* The identifier is also a keyword *)
187 |       let tok = Strmap.find id keyword_map in
188 |       (tok, line_n) :: lex s (Str.match_end ()) line_n
189 |     else
190 |       (Ident id, line_n) :: lex s (Str.match_end ()) line_n
191 |   else if Str.string_match keyword_re s pos then
192 |     let tok = Strmap.find (Str.matched_string s) keyword_map in
193 |     (tok, line_n) :: lex s (Str.match_end ()) line_n
194 |   else if Str.string_match number_re s pos then
195 |     let num =
196 |       try Int64.of_string (Str.matched_string s)
197 |       with Failure _ ->
198 |         raise (BadInput ("Lex error: integer constant too big " ^
199 |                          Str.matched_string s ^
200 |                          " on line " ^
201 |                          string_of_int line_n))
202 |     in
203 |     (Num num, line_n) :: lex s (Str.match_end ()) line_n
204 |   else
205 |     raise (BadInput ("lex error: at character '" ^
206 |                      String.sub s pos 1 ^
207 |                      "' on line " ^
208 |                      string_of_int line_n))
209 | 


--------------------------------------------------------------------------------
/src/unnestExp.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* Flatten expressions so that they follow this grammar. We don't introduce a
 20 |    new type, but do define predicates on the SourceAst.exp type.
 21 | 
 22 |    type ae =
 23 |    | Num of int64
 24 |    | Bool of bool
 25 |    | Ident of SourceAst.id
 26 | 
 27 |    type flat_exp =
 28 |    | Num of int64
 29 |    | Bool of bool
 30 |    | Ident of SourceAst.id * ae
 31 |    | Op of ae * op * ae
 32 |    | Uop of ae
 33 |    | Array of ae list
 34 |    | Call of SourceAst.id * ae list
 35 |  *)
 36 | 
 37 | open SourceAst
 38 | module T = Tokens
 39 | 
 40 | let is_atomic (e : exp) : bool =
 41 |   match e with
 42 |   | Num _ -> true
 43 |   | Bool _ -> true
 44 |   | Ident (_, []) -> true
 45 |   | Ident (_, _) -> false
 46 |   | Op _ -> false
 47 |   | Uop _ -> false
 48 |   | Array _ -> false
 49 |   | Call _ -> false
 50 | 
 51 | let is_flat (e : exp) : bool =
 52 |   match e with
 53 |   | Num _ -> true
 54 |   | Bool _ -> true
 55 |   | Ident (_, []) -> true
 56 |   | Ident (_, [e]) -> is_atomic e
 57 |   | Ident (_, _) -> false
 58 |   | Op (e1, _, e2) -> is_atomic e1 && is_atomic e2
 59 |   | Uop (_, e) -> is_atomic e
 60 |   | Array es -> List.for_all is_atomic es
 61 |   | Call (_, es) -> List.for_all is_atomic es
 62 | 
 63 | let unnest (stmts : stmt list) : stmt list =
 64 | 
 65 |   (* Generate unique names for temporary variables *)
 66 |   let next_ident = ref 0 in
 67 |   let get_ident () : id =
 68 |     let x = !next_ident in
 69 |     next_ident := (!next_ident) + 1;
 70 |     Temp ("UE", x)
 71 |   in
 72 | 
 73 |   (* indices must all be atomic, according to is_atomic above. Returns a flat
 74 |      expression according to is_flat. *)
 75 |   let rec unnest_indices (arr : id) (indices : exp list) : stmt list * exp =
 76 |     match indices with
 77 |     | [] -> ([], Ident (arr, []))
 78 |     | [i] -> ([], Ident (arr, [i]))
 79 |     | (i::indices) ->
 80 |       let id = get_ident () in
 81 |       let (s, e) = unnest_indices id indices in
 82 |       (Assign (id, [], Ident (arr, [i])) :: s,
 83 |        e)
 84 |   in
 85 | 
 86 |   (* Flatten out and expression into a list of statements and an expression by
 87 |      using temporary variables to store the results of each sub-expression. The
 88 |      expression returned satisfies is_flat above.
 89 | 
 90 |      Warning: As implemented, this is O(n^2) because of the list appending.
 91 |      This is unlikely to matter for human written code, as it is O(n^2) where n
 92 |      is the size of an expression, not the whole program. However, for
 93 |      compiling machine-generated code, this could be a problem. A little care
 94 |      could be taken to collect results in a tree-structure and then convert it
 95 |      into a list. *)
 96 |   let rec unnest_exp (e : exp) : stmt list * exp =
 97 |     match e with
 98 |     | Num i -> ([], Num i)
 99 |     | Bool b -> ([], Bool b)
100 |     | Ident (i, es) ->
101 |       let (s_list, aes) = List.split (List.map unnest_exp_atomic es) in
102 |       let (s_list2, ae) = unnest_indices i aes in
103 |       (List.flatten s_list @ s_list2, ae)
104 |     | Op (e1, T.And, e2) ->
105 |       let (s1, f1) = unnest_exp e1 in
106 |       let (s2, f2) = unnest_exp e2 in
107 |       (* f1 && f2 --> (if f1 then tmp := f2 else t := false); tmp *)
108 |       let tmp = get_ident () in
109 |       (s1 @
110 |        [Ite (f1,
111 |              Stmts (s2 @ [Assign (tmp, [], f2)]),
112 |              Assign (tmp, [], Bool false))],
113 |        (Ident (tmp, [])))
114 |     | Op (e1, T.Or, e2) ->
115 |       let (s1, f1) = unnest_exp e1 in
116 |       let (s2, f2) = unnest_exp e2 in
117 |       (* f1 || f2 --> (if f1 then tmp := true else tmp := f2 ); tmp *)
118 |       let tmp = get_ident () in
119 |       (s1 @
120 |        [Ite (f1,
121 |              Assign (tmp, [], Bool true),
122 |              Stmts (s2 @ [Assign (tmp, [], f2)]))],
123 |        (Ident (tmp, [])))
124 |     | Op (e1, op, e2) ->
125 |       let (s1, a1) = unnest_exp_atomic e1 in
126 |       let (s2, a2) = unnest_exp_atomic e2 in
127 |       (s1 @ s2, Op (a1, op, a2))
128 |     | Uop (uop, e) ->
129 |       let (s, a) = unnest_exp_atomic e in
130 |       (s, Uop (uop, a))
131 |     | Array es ->
132 |       let (s_list, aes) = List.split (List.map unnest_exp_atomic es) in
133 |       (List.flatten s_list, Array aes)
134 |     | Call (f, es) ->
135 |       let (s_list, aes) = List.split (List.map unnest_exp_atomic es) in
136 |       (List.flatten s_list, Call  (f, aes))
137 | 
138 | 
139 | (* Similar to unnest_exp, but ensures that the returned exp is atomic, rather
140 |    than just flat.  *)
141 |   and unnest_exp_atomic (e : exp) : stmt list * exp =
142 |     let (s, f) = unnest_exp e in
143 |     if is_atomic f then
144 |       (s, f)
145 |     else
146 |       let id = get_ident () in
147 |       (s @ [Assign (id, [], f)], Ident (id, []))
148 | 
149 |   and unnest_exp_for_test (e : exp) : stmt list * exp =
150 |     let (s, f) = unnest_exp e in
151 |     match f with
152 |     | Ident (_, [_]) ->
153 |       let id = get_ident () in
154 |       (s @ [Assign (id, [], f)], Ident (id, []))
155 |     | _ -> (s, f)
156 |   in
157 | 
158 |   let rec unnest_stmt (s : stmt) : stmt list =
159 |     match s with
160 |     | Assign (x, [], e) ->
161 |       let (s, f) = unnest_exp e in
162 |       s @ [Assign (x, [], f)]
163 |     | Assign (x, es, e) ->
164 |       let (s_list, aes) = List.split (List.map unnest_exp_atomic es) in
165 |       let (s, f) = unnest_exp_atomic e in
166 |       (match unnest_indices x aes with
167 |        | (s', Ident (i, [])) ->
168 |          List.flatten s_list @ s' @ s @ [Assign (i, [], f)]
169 |        | (s', Ident (i, [f'])) ->
170 |          List.flatten s_list @ s' @ s @ [Assign (i, [f'], f)]
171 |        | _ -> assert false)
172 |     | DoWhile (s0, e, s1) ->
173 |       let s0' = unnest_stmt s0 in
174 |       let (se1, e') = unnest_exp_for_test e in
175 |       let s1' = unnest_stmt s1 in
176 |       [DoWhile (stmts_to_stmt (s0' @ se1), e', stmts_to_stmt s1')]
177 |     | Ite (e, s1, s2) ->
178 |       let (se, e') = unnest_exp_for_test e in
179 |       let s1' = unnest_stmt s1 in
180 |       let s2' = unnest_stmt s2 in
181 |       se @ [Ite (e', stmts_to_stmt s1', stmts_to_stmt s2')]
182 |     | Stmts s_list ->
183 |       List.flatten (List.map unnest_stmt s_list)
184 |     | In id -> [In id]
185 |     | Out id -> [Out id]
186 |     | Loc (stmt, _) ->
187 |       unnest_stmt stmt
188 |     | Return id -> [Return id]
189 |   in
190 | 
191 |   List.flatten (List.map unnest_stmt stmts)
192 | 


--------------------------------------------------------------------------------
/src/x86.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2016 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* A fragment of x86-64 AST, and printing in NASM syntax *)
 20 | (* Translated from HOL4 in examples/machine-code/instruction-set-models/x86_64 *)
 21 | 
 22 | open Format
 23 | open Util
 24 | 
 25 | (* 64-bit registers *)
 26 | type reg =
 27 |   | RAX | RBX | RCX  | RDX  | RSP  | RBP  | RSI  | RDI
 28 |   | R8 | R9 | R10 | R11 | R12 | R13 | R14 | R15
 29 | 
 30 | let show_reg r =
 31 |   match r with
 32 |   | RAX -> "rax"
 33 |   | RBX -> "rbx"
 34 |   | RCX -> "rcx"
 35 |   | RDX -> "rdx"
 36 |   | RSP -> "rsp"
 37 |   | RBP -> "rbp"
 38 |   | RSI -> "rsi"
 39 |   | RDI -> "rdi"
 40 |   | R8 -> "r8"
 41 |   | R9 -> "r9"
 42 |   | R10 -> "r10"
 43 |   | R11 -> "r11"
 44 |   | R12 -> "r12"
 45 |   | R13 -> "r13"
 46 |   | R14 -> "r14"
 47 |   | R15 -> "r15"
 48 | 
 49 | let pp_reg fmt r =
 50 |   fprintf fmt "%s" (show_reg r)
 51 | 
 52 | type byte_reg = B of reg
 53 | 
 54 | let show_byte_reg (B r) =
 55 |   match r with
 56 |   | RAX -> "al"
 57 |   | RBX -> "bl"
 58 |   | RCX -> "cl"
 59 |   | RDX -> "dl"
 60 |   | RSP -> "spl"
 61 |   | RBP -> "bpl"
 62 |   | RSI -> "sil"
 63 |   | RDI -> "dil"
 64 |   | R8 -> "r8b"
 65 |   | R9 -> "r9b"
 66 |   | R10 -> "r10b"
 67 |   | R11 -> "r11b"
 68 |   | R12 -> "r12b"
 69 |   | R13 -> "r13b"
 70 |   | R14 -> "r14b"
 71 |   | R15 -> "r15b"
 72 | 
 73 | let pp_byte_reg fmt r =
 74 |   fprintf fmt "%s" (show_byte_reg r)
 75 | 
 76 | type address_component =
 77 |   | EAScaled of int * reg
 78 |   | EAReg of reg
 79 |   | EAConst of int64
 80 | 
 81 | type displacement =
 82 |   | Concrete_disp of int64
 83 |   | Label_disp of string
 84 | 
 85 | type rm =
 86 |   | Zr of reg                                          (* register *)
 87 |   (* Zm (Some (scale, index), Some base, Some displacement) =
 88 |      mem[scale * index + base + displacement] *)
 89 |   | Zm of (int * reg) option * reg option * displacement option
 90 | 
 91 | let rec simple_add_exp fmt l =
 92 |   match l with
 93 |   | [] -> ()
 94 |   | [Some x] -> fprintf fmt "%s" x
 95 |   | [None] -> fprintf fmt "0"
 96 |   | Some x :: y -> fprintf fmt "%s + %a" x simple_add_exp y
 97 |   | None :: y -> simple_add_exp fmt y
 98 | 
 99 | let show_displacement d =
100 |   match d with
101 |   | Concrete_disp i -> Int64.to_string i
102 |   | Label_disp l -> l ^ "_Global"
103 | 
104 | let pp_rm fmt rm =
105 |   match rm with
106 |   | Zr r -> fprintf fmt "%a" pp_reg r
107 |   | Zm (idx,base,disp) ->
108 |     fprintf fmt "qword [%a]"
109 |       simple_add_exp [option_map (fun (scale,i) -> show_reg i ^ " * " ^ string_of_int scale) idx;
110 |                       option_map show_reg base;
111 |                       option_map show_displacement disp]
112 | 
113 | type dest_src =
114 |   | Zrm_i of rm  * int64  (* mnemonic r/mXX, immXX (sign-extended) *)
115 |   | Zrm_r of rm  * reg    (* mnemonic r/mXX, rXX *)
116 |   | Zr_rm of reg * rm     (* mnemonic rXX, r/mXX *)
117 | 
118 | let pp_dest_src fmt ds =
119 |   match ds with
120 |   | Zrm_i (rm, i) ->
121 |     fprintf fmt "%a, %Ld"
122 |       pp_rm rm
123 |       i
124 |   | Zrm_r (rm, reg) ->
125 |     fprintf fmt "%a, %a"
126 |       pp_rm rm
127 |       pp_reg reg
128 |   | Zr_rm (reg, rm) ->
129 |     fprintf fmt "%a, %a"
130 |       pp_reg reg
131 |       pp_rm rm
132 | 
133 | type imm_rm =
134 |   | Zi_rm of rm      (* r/mXX *)
135 |   | Zi    of int64   (* sign-extended immediate *)
136 | 
137 | let pp_imm_rm fmt ir =
138 |   match ir with
139 |   | Zi_rm rm -> pp_rm fmt rm
140 |   | Zi i -> fprintf fmt "%Ld" i
141 | 
142 | type binop_name =
143 |   | Zadc | Zadd | Zand | Zcmp | Zor | Zshl | Zshr
144 |   | Zsar | Zsub | Zsbb | Ztest | Zxor
145 | 
146 | let show_binop_name b =
147 |   match b with
148 |   | Zadc -> "adc"
149 |   | Zadd -> "add"
150 |   | Zand -> "and"
151 |   | Zcmp -> "cmp"
152 |   | Zor -> "or"
153 |   | Zshl -> "shl"
154 |   | Zshr -> "shr"
155 |   | Zsar -> "sar"
156 |   | Zsub -> "sub"
157 |   | Zsbb -> "sbb"
158 |   | Ztest -> "test"
159 |   | Zxor -> "xor"
160 | 
161 | let pp_binop_name fmt b =
162 |   fprintf fmt "%s" (show_binop_name b)
163 | 
164 | type monop_name = Zdec | Zinc | Znot | Zneg
165 | 
166 | let show_monop_name b =
167 |   match b with
168 |   | Zdec -> "dec"
169 |   | Zinc -> "inc"
170 |   | Znot -> "not"
171 |   | Zneg -> "neg"
172 | 
173 | let pp_monop_name fmt b =
174 |   fprintf fmt "%s" (show_monop_name b)
175 | 
176 | type cond = (* this list is not complete *)
177 |   | Z_ALWAYS            (* N = not     *)
178 |   | Z_E | Z_NE          (* E = equal   *)
179 |   | Z_S | Z_NS          (* S = signed  *)
180 |   | Z_A | Z_NA          (* A = above   *)
181 |   | Z_B | Z_NB          (* B = below   *)
182 |   | Z_L | Z_NL          (* L = less *)
183 |   | Z_G | Z_NG          (* L = greater *)
184 | 
185 | let pp_cond fmt c =
186 |   fprintf fmt "%s"
187 |     (match c with
188 |      | Z_ALWAYS -> "mp"
189 |      | Z_E -> "e"
190 |      | Z_NE -> "ne"
191 |      | Z_S -> "s"
192 |      | Z_NS -> "ns"
193 |      | Z_A -> "a"
194 |      | Z_NA -> "na"
195 |      | Z_B -> "b"
196 |      | Z_NB -> "nb"
197 |      | Z_L -> "l"
198 |      | Z_NL -> "nl"
199 |      | Z_G -> "g"
200 |      | Z_NG -> "ng")
201 | 
202 | type instruction =
203 |   | Zlabel     of string
204 |   | Zbinop     of binop_name * dest_src
205 |   | Zmonop     of monop_name * rm
206 | (*
207 |   | Zcmpxchg   of rm * reg
208 |   | Zxadd      of rm * reg
209 |   | Zxchg      of rm * reg
210 |    *)
211 |   (* either reg := reg * rm; or reg := rm * int *)
212 |   | Zimul      of reg * rm * int64 option
213 |   | Zidiv      of rm (* RAX := RDX,RAX / rm; RDX := RDX,RAX mod rm *)
214 |   | Zlea       of dest_src
215 |   | Zpop       of rm
216 |   | Zpush      of imm_rm
217 |   | Zcall      of string
218 |   | Zret
219 |   | Zleave
220 |   | Zcpuid
221 |   | Zmov       of dest_src
222 |   (* | Zmovzx     of dest_src *)
223 |   (* jcc includes jmp rel, i.e. unconditional relative jumps. *)
224 |   | Zjcc       of cond * string
225 |   | Zjmp       of rm                (* jmp excludes relative jumps, see jcc. *)
226 |   | Zset       of cond * byte_reg   (* Set operates on a byte of memory or
227 |                                        register. Here, we'll only use registers
228 |                                     *)
229 | 
230 | let pp_instruction fmt i =
231 |   match i with
232 |   | Zlabel s ->
233 |     fprintf fmt "%s:" s
234 |   | Zbinop (n, ds) ->
235 |     fprintf fmt "%a %a"
236 |       pp_binop_name n
237 |       pp_dest_src ds
238 |   | Zmonop (n, rm) ->
239 |     fprintf fmt "%a %a"
240 |       pp_monop_name n
241 |       pp_rm rm
242 |   | Zimul (r1, rm2, None) ->
243 |     (* r1 := r1 * rm2 *)
244 |     fprintf fmt "imul %a, %a"
245 |       pp_reg r1
246 |       pp_rm rm2
247 |   | Zimul (r1, rm2, Some i) ->
248 |     (* r1 := rm2 * i *)
249 |     fprintf fmt "imul %a, %a, %Ld"
250 |       pp_reg r1
251 |       pp_rm rm2
252 |       i
253 |   | Zidiv rm ->
254 |     fprintf fmt "idiv %a"
255 |       pp_rm rm
256 |   | Zlea ds ->
257 |     fprintf fmt "lea %a"
258 |       pp_dest_src ds
259 |   | Zpop rm ->
260 |     fprintf fmt "pop %a"
261 |       pp_rm rm
262 |   | Zpush ir ->
263 |     fprintf fmt "push %a"
264 |       pp_imm_rm ir
265 |   | Zcall lab ->
266 |     fprintf fmt "call %s" lab
267 |   | Zret ->
268 |     fprintf fmt "ret"
269 |   | Zleave ->
270 |     fprintf fmt "leave"
271 |   | Zcpuid ->
272 |     fprintf fmt "cpuid"
273 |   | Zmov ds ->
274 |     fprintf fmt "mov %a"
275 |       pp_dest_src ds
276 |   | Zjcc (c, lab) ->
277 |     fprintf fmt "j%a %s"
278 |       pp_cond c
279 |       lab
280 |   | Zjmp rm ->
281 |     fprintf fmt "jmp %a"
282 |       pp_rm rm
283 |   | Zset (cond, reg) ->
284 |     fprintf fmt "set%a %a"
285 |       pp_cond cond
286 |       pp_byte_reg reg
287 | 
288 | let pp_instr_list fmt il =
289 |   List.iter
290 |     (fun i ->
291 |        match i with
292 |        | Zlabel _ -> fprintf fmt "%a@\n" pp_instruction i
293 |        | _ -> fprintf fmt "  %a@\n" pp_instruction i)
294 |     il
295 | 


--------------------------------------------------------------------------------
/src/liveVarAnalysis.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | open Util
 20 | open BlockStructure
 21 | 
 22 | type cfg_annot = { gen : Varset.t; kill : Varset.t; live_exit : Varset.t }
 23 | 
 24 | type cfg = (cfg_entry * cfg_annot) list
 25 | 
 26 | let pp_cfg_annot fmt c =
 27 |   Format.fprintf fmt "{@[gen=%a;@ kill=%a; live_exit=%a@]}"
 28 |     Varset.pp c.gen
 29 |     Varset.pp c.kill
 30 |     Varset.pp c.live_exit
 31 | 
 32 | let pp_cfg1 fmt (c,a) =
 33 |   Format.fprintf fmt "(@[%a@ %a@])"
 34 |     BlockStructure.pp_cfg_entry c
 35 |     pp_cfg_annot a
 36 | 
 37 | let pp_cfg fmt l = pp_list pp_cfg1 fmt l
 38 | 
 39 | (* Add the identifier in a to the generation set *)
 40 | let add_gen (a : atomic_exp) (gen : Varset.t) : Varset.t =
 41 |   match a with
 42 |   | Num _ -> gen
 43 |   | Ident i -> Varset.add i gen
 44 | 
 45 | (* Add the identifiers in aes to the generation set *)
 46 | let rec add_gen_list (aes : atomic_exp list) (gen : Varset.t) : Varset.t =
 47 |   match aes with
 48 |   | [] -> gen
 49 |   | a::aes -> add_gen_list aes (add_gen a gen)
 50 | 
 51 | (* Compute the gen and kill sets for a basic block, live_exit is set to empty *)
 52 | let analyse_block (b : basic_block) : cfg_annot =
 53 |   let rec analyse_block gen kill b =
 54 |     match b with
 55 |     | [] -> (gen, kill)
 56 |     | AssignOp (i, a1, _, a2) :: b ->
 57 |       analyse_block (add_gen a1 (add_gen a2 (Varset.remove i gen)))
 58 |         (Varset.add i kill)
 59 |         b
 60 |     | AssignAtom (i, a) :: b ->
 61 |       analyse_block (add_gen a (Varset.remove i gen)) (Varset.add i kill) b
 62 |     | Ld (i, j, a) :: b ->
 63 |       analyse_block (add_gen a (Varset.add j (Varset.remove i gen)))
 64 |         (Varset.add i kill)
 65 |         b
 66 |     | St (i, a1, a2) :: b ->
 67 |       analyse_block (add_gen a1 (add_gen a2 (Varset.add i gen)))
 68 |         kill
 69 |         b
 70 |     | Call (None, _, aes) :: b ->
 71 |       analyse_block (add_gen_list aes gen) kill b
 72 |     | Call (Some i, _, aes) :: b ->
 73 |       analyse_block (add_gen_list aes (Varset.remove i gen)) (Varset.add i kill) b
 74 |     | BoundCheck (a1, a2) :: b ->
 75 |       analyse_block (add_gen a1 (add_gen a2 gen)) kill b
 76 |     | NullCheck v :: b ->
 77 |       analyse_block (Varset.add v gen) kill b
 78 |   in
 79 |   let (gen,kill) = analyse_block Varset.empty Varset.empty (List.rev b) in
 80 |   { gen = gen; kill = kill; live_exit = Varset.empty }
 81 | 
 82 | (* Split the annotated cfg into the predecessors of node n, and the other nodes
 83 | *)
 84 | let find_preds n cfg =
 85 |   List.partition
 86 |     (fun (entry, _) ->
 87 |        match entry.next with
 88 |        | Return _ -> false
 89 |        | Next n' ->
 90 |          n = n'
 91 |        | Branch (_, n1, n2) ->
 92 |          n = n1 || n = n2)
 93 |     cfg
 94 | 
 95 | let add_test_vars (ae1, _, ae2) vars =
 96 |   let vars' =
 97 |     match ae1 with
 98 |     | Ident v -> Varset.add v vars
 99 |     | _ -> vars
100 |   in
101 |   match ae2 with
102 |     | Ident v -> Varset.add v vars'
103 |     | _ -> vars'
104 | 
105 | (* What's live right before we are exit a block: initially if the block
106 |    returns, then all the globals are alive, since other parts of the code might
107 |    read them. Also the returned value is live. For a branch, any variable used
108 |    in the condition is live. *)
109 | let init_live_exit (globals : Varset.t) (a : cfg_annot) (next : next_block)
110 |   : cfg_annot =
111 |   match next with
112 |   | Return None -> { a with live_exit = globals }
113 |   | Return (Some i) -> { a with live_exit = Varset.add i globals }
114 |   | Next _ -> a
115 |   | Branch (i, _, _) ->
116 |     { a with live_exit = add_test_vars i Varset.empty }
117 | 
118 | (* Do live variable analysis, returning an annotated cfg *)
119 | let lva (globals : Varset.t) (cfg : BlockStructure.cfg) : cfg =
120 |   (* Initial annotations for all of the blocks. *)
121 |   let init_worklist =
122 |     List.map
123 |       (fun entry ->
124 |          (entry,
125 |           init_live_exit globals (analyse_block entry.elems) entry.next))
126 |       cfg
127 |   in
128 |   (* Update one block from the worklist.
129 |     The worklist and finished_list partition the whole cfg. That is, they must
130 |      contain all of the blocks between them, with no duplication. NB, this is
131 |      slightly different than the naive worklist algorithm. Here we add a
132 |      node to the worklist only when its live_exit would change based on the current
133 |      node's live_entry. *)
134 |   let rec do_one (worklist : cfg) (finished_list : cfg) : cfg =
135 |     match worklist with
136 |     | [] -> finished_list
137 |     | ((entry, annot) as node) :: worklist ->
138 |       let live_entry =
139 |         Varset.union annot.gen (Varset.diff annot.live_exit annot.kill)
140 |       in
141 |       (* Updates contains node's predecessors from the worklist input,
142 |          and worklist contains the remaining non-predecessors. *)
143 |       let (updates, worklist) = find_preds entry.bnum worklist in
144 |       (* Possible_updates contains node's predecessors from the finished_list,
145 |          possibly including itself, and finished contains the remaining
146 |          non-predecessors. *)
147 |       let (possible_updates, finished) =
148 |         find_preds entry.bnum (node::finished_list)
149 |       in
150 |       (* Finished' contains all possible_updates whose live_exits are supersets
151 |          of our live entry. There is no need to update them, because no nodes would
152 |          be added to their live_exit. updates' contains those previously finished nodes
153 |          whose live_exits will change *)
154 |       let (finished', updates') =
155 |         List.partition
156 |           (fun (_, annot) -> Varset.subset live_entry annot.live_exit)
157 |           possible_updates
158 |       in
159 |       (* Update the live_exits of the nodes needing updated and add them to the
160 |          worklist *)
161 |       let new_worklist =
162 |         List.map
163 |           (fun (entry, annot) ->
164 |              (entry, { annot with
165 |                        live_exit = Varset.union annot.live_exit live_entry}))
166 |           (updates @ updates')
167 |         @
168 |         worklist
169 |       in
170 |       do_one new_worklist (finished' @ finished)
171 |   in
172 |   do_one (List.rev init_worklist) []
173 | 
174 | (* Check that the operator cannot have a side-effect *)
175 | let pure_op op : bool =
176 |   match op with
177 |   | Tokens.Div -> false (* divide by zero *)
178 |   | _ -> true
179 | 
180 | (* Remove unused writes from elems, assuming that elems is presented backwards,
181 |    and live is the set of variables live on exiting the block *)
182 | let rec local_remove_unused_writes (live : Varset.t) (elems : block_elem list)
183 |   : block_elem list =
184 |   match elems with
185 |   | [] -> []
186 |   | AssignOp (i, a1, op, a2) :: b ->
187 |     if Varset.mem i live || not (pure_op op) then
188 |       AssignOp (i, a1, op, a2) ::
189 |       local_remove_unused_writes
190 |         (add_gen a1 (add_gen a2 (Varset.remove i live))) b
191 |     else
192 |       local_remove_unused_writes live b
193 |   | AssignAtom (i, a) :: b ->
194 |     if Varset.mem i live then
195 |       AssignAtom (i, a) ::
196 |       local_remove_unused_writes (add_gen a (Varset.remove i live)) b
197 |     else
198 |       local_remove_unused_writes live b
199 |   | Ld (i, j, a) :: b ->
200 |     if Varset.mem i live then
201 |       Ld (i, j, a) ::
202 |       local_remove_unused_writes (add_gen a (Varset.add j (Varset.remove i live))) b
203 |     else
204 |       local_remove_unused_writes live b
205 |   | St (i, a1, a2) :: b ->
206 |     St (i, a1, a2) ::
207 |     local_remove_unused_writes (add_gen a1 (add_gen a2 (Varset.add i live))) b
208 |   | Call (i, f, aes) :: b ->
209 |     Call (i, f, aes) ::
210 |     let live' =
211 |       match i with
212 |       | None -> live
213 |       | Some v -> Varset.remove v live
214 |     in
215 |     local_remove_unused_writes (add_gen_list aes live') b
216 |   | BoundCheck (a1, a2) :: b ->
217 |     BoundCheck (a1, a2) :: local_remove_unused_writes (add_gen a1 (add_gen a2 live)) b
218 |   | NullCheck v :: b ->
219 |     NullCheck v :: local_remove_unused_writes (Varset.add v live) b
220 | 
221 | let remove_unused_writes (cfg : cfg) : cfg =
222 |   List.map
223 |     (fun (entry, annot) ->
224 |        let new_elems =
225 |          local_remove_unused_writes annot.live_exit (List.rev entry.elems)
226 |        in
227 |        ({entry with elems = List.rev new_elems}, annot))
228 |     cfg
229 | 


--------------------------------------------------------------------------------
/src/astInterp.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* An interpreter for ASTs. This is meta circular in the sense that we use
 20 |    OCaml refs for mutable variables, and we represent arrays with mutable OCaml
 21 |    arrays. We also model function returns with exceptions. *)
 22 | 
 23 | open SourceAst
 24 | module T = Tokens
 25 | 
 26 | (* For when the interpreter crashed, such as array bounds violations *)
 27 | exception Crash of string
 28 | 
 29 | (* For errors that a well-typed program can't have *)
 30 | exception TypeError
 31 | 
 32 | (* Values are either integers or n-dimensional arrays of integers.
 33 |    We keep multi-dimensional arrays in a single dimensional one and include a
 34 |    list of how big each dimension is.
 35 |    We represent bools as numbers: true = 1L and false = 0L *)
 36 | type val_t =
 37 |   | Vint of int64
 38 |   | Varray of int list * int64 array
 39 | 
 40 | let val_t_to_int (v : val_t) : int64 =
 41 |   match v with
 42 |   | Vint i -> i
 43 |   | Varray _ -> raise TypeError
 44 | 
 45 | let val_t_to_array (v : val_t) : int list * int64 array =
 46 |   match v with
 47 |   | Vint 0L -> raise (Crash "null pointer exception")
 48 |   | Vint _ -> raise TypeError
 49 |   | Varray (dims, a) -> (dims, a)
 50 | 
 51 | (* Given the array's dimensions, work out the slot for a particular set of
 52 |    indices. Sizes and indices must have the same length.  Return None if one
 53 |    of the indices is out of bounds, i.e. greater than the size. *)
 54 | let array_address (sizes : int list) (indices : int list) : int option =
 55 |   (* acc keeps track of the product of the dimensions seen so far *)
 56 |   let rec f sizes indices (acc : int) =
 57 |     match (sizes, indices) with
 58 |     | ([], []) -> 0
 59 |     | (s1::sizes, i1::indices) ->
 60 |         i1 * acc + f sizes indices (s1 * acc)
 61 |     | _ ->
 62 |       raise TypeError
 63 |   in
 64 |   if List.for_all2 (fun s i -> i < s) sizes indices then
 65 |     Some (f sizes indices 1)
 66 |   else
 67 |     None
 68 | 
 69 | let bool_to_int64 (b : bool) : int64 =
 70 |   if b then 1L else 0L
 71 | 
 72 | let int64_to_bool (i : int64) : bool =
 73 |   if Int64.compare i 0L = 0 then false else true
 74 | 
 75 | (* Do a primitive operation, but not && or || *)
 76 | let do_op op (n1 : int64) (n2 : int64) : int64 =
 77 |   match op with
 78 |   | T.Plus -> Int64.add n1 n2
 79 |   | T.Minus -> Int64.sub n1 n2
 80 |   | T.Times -> Int64.mul n1 n2
 81 |   | T.Div ->
 82 |     if n2 = 0L then
 83 |       raise (Crash "Division by 0")
 84 |     else
 85 |       Int64.div n1 n2
 86 |   | T.Lt -> bool_to_int64 (Int64.compare n1 n2 < 0)
 87 |   | T.Gt -> bool_to_int64 (Int64.compare n1 n2 > 0)
 88 |   | T.Eq -> bool_to_int64 (Int64.compare n1 n2 = 0)
 89 |   | T.Lshift ->
 90 |     let i = Int64.to_int n2 in
 91 |     (* Int64.shift_left is wacky outside of this interval *)
 92 |     if 0 <= i && i < 64 then
 93 |       Int64.shift_left n1 i
 94 |     else
 95 |       0L
 96 |   | T.BitOr -> Int64.logor n1 n2
 97 |   | T.BitAnd -> Int64.logand n1 n2
 98 |   | T.And | T.Or -> assert false
 99 | 
100 | let do_uop uop (n : int64) : int64 =
101 |   match uop with
102 |   | T.Not ->
103 |     bool_to_int64 (not (int64_to_bool n))
104 | 
105 | (* The environment will have the code of all of the functions that we can call,
106 |    it will also keep the variable bindings in a map of OCaml references.
107 |    Assignments to a variables will be interpreted as assignments to the
108 |    corresponding reference. Note that an assignment to a variable (of scalar or
109 |    array type) mutates the binding for that variable in the environment,
110 |    whereas an assignment to an array element (i.e., an array update), actually
111 |    changes the array value without modifying the environment binding for the
112 |    array.
113 | *)
114 | type env_t = { funs : func Idmap.t; vars : val_t ref Idmap.t }
115 | 
116 | (* To model return control flow *)
117 | exception Return_exn of val_t
118 | 
119 | (* Add the function's arguments to the environment env at a call site. Assume
120 |    that the two input lists are the same length. This is ensured by the type
121 |    checker. We also know there are no duplicate parameter names. *)
122 | let rec add_arguments (params : (id * _) list) (args : val_t list) (env : env_t)
123 |   : env_t =
124 |   match (params, args) with
125 |   | ([], []) -> env
126 |   | ((x,_)::params, v::args) ->
127 |     (* Allocate a mutable reference to store the parameter's value in. Then the
128 |        function's body can assign to the variable by changing the binding. *)
129 |     let binding = ref v in
130 |     add_arguments params args { env with vars = Idmap.add x binding env.vars }
131 |   | (_, _) ->
132 |     raise TypeError
133 | 
134 | (* Compute the value of an expression *)
135 | let rec interp_exp (env : env_t) (e : exp) : val_t =
136 |   match e with
137 |   | Ident (i, []) ->
138 |     !(Idmap.find i env.vars) (* i will be in the environment in a well-typed
139 |                                 program *)
140 |   | Ident (i, iexps) ->
141 |     let (sizes, a) = val_t_to_array !(Idmap.find i env.vars) in
142 |     let indices =
143 |       List.map (fun e -> (Int64.to_int (val_t_to_int (interp_exp env e))))
144 |         iexps
145 |     in
146 |     (match array_address sizes indices with
147 |      | Some x -> Vint (Array.get a x)
148 |      | None -> raise (Crash "array index out of bounds"))
149 |   | Call (f, args) ->
150 |     (* f will be in the environment in a well-typed program *)
151 |     let func = Idmap.find f env.funs in
152 |     let arg_vals = List.map (interp_exp env) args in
153 |     let new_env1 = add_arguments func.params arg_vals env in
154 |     let new_env2 = interp_var_decs new_env1 func.locals in
155 |     (try
156 |        List.iter (interp_stmt new_env2) func.body;
157 |        (* We know this cannot happen because of the type checker, each path in
158 |           each function must have a return. Otherwise, we would have to return
159 |           a default value *of the right type* *)
160 |        raise TypeError
161 |      with Return_exn v -> v)
162 |   | Num n -> Vint n
163 |   | Bool b -> Vint (bool_to_int64 b)
164 |   | Op (e1, T.And, e2) ->
165 |     (match interp_exp env e1 with
166 |      | Vint n ->
167 |        if int64_to_bool n then
168 |          interp_exp env e2
169 |        else
170 |          Vint n
171 |      | _ -> raise TypeError)
172 |    | Op (e1, T.Or, e2) ->
173 |     (match interp_exp env e1 with
174 |      | Vint n ->
175 |        if int64_to_bool n then
176 |          Vint n
177 |        else
178 |          interp_exp env e2
179 |      | _ -> raise TypeError)
180 |   | Op (e1, op, e2) ->
181 |     (match (interp_exp env e1, interp_exp env e2) with
182 |      | (Vint n1, Vint n2) -> Vint (do_op op n1 n2)
183 |      | _ -> raise TypeError)
184 |   | Uop (uop, e) ->
185 |     (match interp_exp env e with
186 |      | Vint n -> Vint (do_uop uop n)
187 |      | _ -> raise TypeError)
188 |   | Array iexps ->
189 |     let indices =
190 |       List.map (fun e -> (Int64.to_int (val_t_to_int (interp_exp env e))))
191 |         iexps
192 |     in
193 |     Varray (indices,
194 |             Array.make (List.fold_right (fun x y -> x * y) indices 1) 0L)
195 | 
196 | (* Run the initialisation expressions of the variables, and add bindings to the
197 |    resulting values into the environment *)
198 | and interp_var_decs (env : env_t) (decs : var_dec list) : env_t =
199 |   (* Start the variables at 0, because they can be referenced before they are
200 |      initialised. NB, this allows null pointer exceptions (or crashes) into the
201 |      language, since the variable can have array type.  This is a consequence
202 |      of having each global and local scope be a single recursive scope. *)
203 |   let new_env =
204 |     List.fold_right
205 |       (fun v env ->
206 |         let binding = ref (Vint 0L) in
207 |          Idmap.add v.var_name binding env)
208 |       decs
209 |       env.vars
210 |   in
211 |   let env = { env with vars = new_env } in
212 |   (* Now run all of the initialisation expressions *)
213 |   List.iter
214 |     (fun d ->
215 |        let v = interp_exp env d.init in
216 |        Idmap.find d.var_name env.vars := v)
217 |     decs;
218 |   env
219 | 
220 | (* Run a statement *)
221 | and interp_stmt (env : env_t) (s : stmt) : unit =
222 |   match s with
223 |   | Assign (i, [], e) ->
224 |     let v = interp_exp env e in
225 |     Idmap.find i env.vars := v
226 |   | Assign (i, iexps, e) ->
227 |     (match !(Idmap.find i env.vars) with
228 |      | Varray (sizes, a) ->
229 |        (let indices =
230 |           List.map (fun e -> (Int64.to_int (val_t_to_int (interp_exp env e))))
231 |             iexps
232 |         in
233 |         match array_address sizes indices with
234 |         | None -> raise (Crash "array index out of bounds")
235 |         | Some x ->
236 |           let v = val_t_to_int (interp_exp env e) in
237 |           Array.set a x v)
238 |      | Vint 0L -> raise (Crash "null pointer exception")
239 |      | Vint _ -> raise TypeError)
240 |   | DoWhile (head_s, e, body_s) ->
241 |     interp_stmt env head_s;
242 |     if int64_to_bool (val_t_to_int (interp_exp env e)) then
243 |       (interp_stmt env body_s;
244 |        interp_stmt env s)
245 |     else
246 |       ()
247 |   | Ite (e, s1, s2) ->
248 |     if int64_to_bool (val_t_to_int (interp_exp env e)) then
249 |       interp_stmt env s1
250 |     else
251 |       interp_stmt env s2
252 |   | Stmts sl ->
253 |     List.iter (interp_stmt env) sl
254 |   | In i ->
255 |     Printf.printf "> ";
256 |     (try
257 |        let n = Int64.of_string (read_line ()) in
258 |        Idmap.find i env.vars := Vint n
259 |      with Failure _ -> raise (Crash "not a 64-bit integer"))
260 |   | Out i ->
261 |     (print_string (Int64.to_string (val_t_to_int !(Idmap.find i env.vars)));
262 |      print_newline ())
263 |   | Return None ->
264 |     raise (Return_exn (Vint 0L))
265 |   | Return (Some i) ->
266 |     raise (Return_exn !(Idmap.find i env.vars))
267 |   | Loc (s, _) -> interp_stmt env s
268 | 
269 | let interp_prog (p : prog) : unit =
270 |   let fun_env =
271 |     List.fold_right (fun f env -> Idmap.add f.fun_name f env)
272 |       p.funcs Idmap.empty
273 |   in
274 |   ignore (interp_var_decs { funs = fun_env; vars = Idmap.empty } p.globals)
275 | 


--------------------------------------------------------------------------------
/src/constProp.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* Do constant propagation and folding. Later compiler phases assume that no
 20 |    operation has 2 constant arguments, so this needs to be guaranteed here. *)
 21 | 
 22 | open SourceAst
 23 | module T = Tokens
 24 | 
 25 | (* Return n, such that 2^n = i, or None if there is no such *)
 26 | let log2 (i : int64) : int option =
 27 |   (* Linear search for the least significant 1 in the binary represenatation.
 28 |      A binary search might be faster, but probably not worth the hassle. *)
 29 |   let rec f (i : int64) (shifted : int) : int option =
 30 |     if Int64.logand i 0x1L = 0L then
 31 |       f (Int64.shift_right i 1) (shifted + 1)
 32 |     else if i = 0x1L then
 33 |       Some shifted
 34 |     else
 35 |       None
 36 |   in
 37 |   if i > 0L then
 38 |     f i 0
 39 |   else
 40 |     None
 41 | 
 42 | (* Decides whether evaluating an expression might have a side-effect *)
 43 | let rec might_have_effect (e : exp) : bool =
 44 |   match e with
 45 |   | Ident (_, []) -> false
 46 |   | Ident (_, _) -> true (* Array bound check failure *)
 47 |   | Num _ | Bool _ -> false
 48 |   | Op (e1, op, e2) ->
 49 |     if op = T.Div then
 50 |       true (* Divide by zero error *)
 51 |     else
 52 |       might_have_effect e1 || might_have_effect e2
 53 |   | Uop (_, e) -> might_have_effect e
 54 |   | Array es -> List.exists might_have_effect es
 55 |   | Call _ -> true (* The called function could do something *)
 56 | 
 57 | (* Check whether two expressions are equal, and don't have effects. This lets
 58 |    some operators remove them. Note that we are just comparing the structure of
 59 |    the expressions, so we won't see that x + (y + z) is equal to (x + y) + z,
 60 |    etc. *)
 61 | let ok_remove_eq_exp (e1 : exp) (e2 : exp) : bool =
 62 |   e1 = e2 &&
 63 |   not (might_have_effect e1) &&
 64 |   not (might_have_effect e2)
 65 | 
 66 | let between_0_63 (i : int64) : bool =
 67 |   Int64.compare i (-1L) = 1 && Int64.compare i 64L = -1
 68 | 
 69 | (* Statically evaluate an expression according to the identifier values in env,
 70 |    don't try to follow constants in arrays. Operates in a single bottom-up pass.
 71 |    Doesn't do anything with associativity or commutativity, so things like
 72 |    (x + 1) + 2 don't get changed. *)
 73 | let rec fold_exp (env : exp Idmap.t) (e : exp) : exp =
 74 |   match e with
 75 |   | Ident (i, []) ->
 76 |     (try Idmap.find i env
 77 |      with Not_found -> e)
 78 |   | Ident (i, es) -> Ident (i, List.map (fold_exp env) es)
 79 |   | Num n -> Num n
 80 |   | Bool b -> Bool b
 81 |   | Op (e1, op, e2) ->
 82 |     let o1 = fold_exp env e1 in
 83 |     let o2 = fold_exp env e2 in
 84 |     (match (o1, op, o2) with
 85 |      (* Plus *)
 86 |      | (Num n1, T.Plus, Num n2) -> Num (Int64.add n1 n2)
 87 |      | (Num 0L, T.Plus, e) | (e, T.Plus, Num 0L) -> e
 88 | 
 89 |      (* Minus *)
 90 |      | (Num n1, T.Minus, Num n2) -> Num (Int64.sub n1 n2)
 91 |      | (e, T.Minus, Num 0L) -> e
 92 |      | (e1, T.Minus, e2) when ok_remove_eq_exp e1 e2 -> Num 0L
 93 | 
 94 |      (* Times *)
 95 |      | (Num n1, T.Times, Num n2) -> Num (Int64.mul n1 n2)
 96 |      | (e, T.Times, Num 1L) | (Num 1L, T.Times, e) -> e
 97 |      | (Num -1L, T.Times, e) | (e, T.Times, Num -1L) ->
 98 |         Op (Num 0L, T.Minus, e)
 99 |      (* Can't use or-patterns for the following two, according to compiler warning *)
100 |      | (Num 0L, T.Times, e) when not (might_have_effect e) ->
101 |          Num 0L
102 |      | (e, T.Times, Num 0L) when not (might_have_effect e) ->
103 |          Num 0L
104 |      | (e, T.Times, Num n) | (Num n, T.Times, e) ->
105 |        (match log2 (Int64.abs n) with
106 |         | None -> Op (o1, op, o2)
107 |         | Some log ->
108 |           let shift_op = Op (e, T.Lshift, Num (Int64.of_int log)) in
109 |           if n < 0L then
110 |             Op (Num 0L, T.Minus, shift_op)
111 |           else
112 |             shift_op)
113 | 
114 |      (* Div *)
115 |      | (Num n1, T.Div, Num n2) when n2 <> 0L -> Num (Int64.div n1 n2)
116 |      | (Num _, T.Div, Num 0L) ->
117 |        (* This phase needs to guarantee that no operation has two constant
118 |           arguments. It doesn't matter what the numerator is, when the
119 |           divisor is 0, so we can just use an arbitrary variable. *)
120 |        Op (Ident (Temp ("CP", 0), []), T.Div, Num 0L)
121 |      | (e, T.Div, Num 1L) -> e
122 | 
123 |      (* Less *)
124 |      | (Num n1, T.Lt, Num n2) -> Bool (Int64.compare n1 n2 < 0)
125 |      | (e1, T.Lt, e2) when ok_remove_eq_exp e1 e2 -> Bool false
126 | 
127 |      (* Greater *)
128 |      | (Num n1, T.Gt, Num n2) -> Bool (Int64.compare n1 n2 > 0)
129 |      | (e1, T.Gt, e2) when ok_remove_eq_exp e1 e2 -> Bool false
130 | 
131 |      (* Equal *)
132 |      | (Num n1, T.Eq, Num n2) -> Bool (Int64.compare n1 n2 = 0)
133 |      | (e1, T.Eq, e2) when ok_remove_eq_exp e1 e2 -> Bool true
134 | 
135 |      (* Shift left *)
136 |      | (Num n1, T.Lshift, Num n2) ->
137 |        (* Ocaml's shift_left is only defined between 0 and 63 inclusive *)
138 |        if between_0_63 n2 then
139 |          Num (Int64.shift_left n1 (Int64.to_int n2))
140 |        else
141 |          Num 0L
142 |      | (e, T.Lshift, Num 0L) -> e
143 |      | (Num 0L, T.Lshift, e) when not (might_have_effect e) ->
144 |        Num 0L
145 | 
146 |      (* Bitwise or *)
147 |      | (Num n1, T.BitOr, Num n2) -> Num (Int64.logor n1 n2)
148 |      | (Num 0L, T.BitOr, e) | (e, T.BitOr, Num 0L) -> e
149 |      (* Can't use or-patterns for the following two, according to compiler warning *)
150 |      | (Num 0xFFFFFFFFFFFFFFFFL, T.BitOr, e) when not (might_have_effect e) ->
151 |        Num 0xFFFFFFFFFFFFFFFFL
152 |      | (e, T.BitOr, Num 0xFFFFFFFFFFFFFFFFL) when not (might_have_effect e) ->
153 |        Num 0xFFFFFFFFFFFFFFFFL
154 | 
155 |      (* Bitwise and *)
156 |      | (Num n1, T.BitAnd, Num n2) -> Num (Int64.logand n1 n2)
157 |      | (Num 0xFFFFFFFFFFFFFFFFL, T.BitAnd, e)
158 |      | (e, T.BitAnd, Num 0xFFFFFFFFFFFFFFFFL) -> e
159 |      (* Can't use or-patterns for the following two, according to compiler warning *)
160 |      | (Num 0L, T.BitAnd, e) when not (might_have_effect e) -> Num 0L
161 |      | (e, T.BitAnd, Num 0L) when not (might_have_effect e) -> Num 0L
162 | 
163 |      (* And *)
164 |      | (Bool true, T.And, e) | (e, T.And, Bool true) -> e
165 |      | (Bool false, T.And, _) -> Bool false
166 |      | (e, T.And, Bool false) when not (might_have_effect e) -> Bool false
167 |      | (e1, T.And, e2) when ok_remove_eq_exp e1 e2 -> e1
168 | 
169 |      (* Or *)
170 |      | (Bool false, T.Or, e) | (e, T.Or, Bool false) -> e
171 |      | (Bool true, T.Or, _) -> Bool true
172 |      | (e, T.Or, Bool true) when not (might_have_effect e) -> Bool true
173 |      | (e1, T.Or, e2) when ok_remove_eq_exp e1 e2 -> e1
174 | 
175 |      | _ -> Op (o1, op, o2))
176 |   | Uop (uop, e) ->
177 |     let o = fold_exp env e in
178 |     (match (uop, o) with
179 |      | (T.Not, Bool b) -> Bool (not b)
180 |      | (T.Not, Uop (T.Not, e)) -> e
181 |      | _ -> Uop (uop, o))
182 |   | Array es ->
183 |     Array (List.map (fold_exp env) es)
184 |   | Call (f, es) ->
185 |     Call (f, List.map (fold_exp env) es)
186 | 
187 | let is_const (e : exp) : bool =
188 |   match e with
189 |   | Num _ | Bool _ -> true
190 |   | _ -> false
191 | 
192 | let same_const (e1 : exp) (e2 : exp) : bool =
193 |   match (e1,e2) with
194 |   | (Num n1, Num n2) -> Int64.compare n1 n2 = 0
195 |   | (Bool b1, Bool b2) -> b1 = b2
196 |   | _ -> false
197 | 
198 | (* If v1 and v2 contain the same constant, return it. Otherwise return None *)
199 | let merge_constants (_ : id) (v1 : exp option) (v2 : exp option)
200 |   : exp option =
201 |   match (v1,v2) with
202 |   | (Some e1, Some e2) ->
203 |     if same_const e1 e2 then
204 |       Some e1
205 |     else None
206 |   | _ -> None
207 | 
208 | (* Do constant propagation. Accumulate an environment of definitely known
209 |    constants at the end of stmts, given definitely known constants env at the
210 |    start. *)
211 | let rec prop_stmts (env : exp Idmap.t) (stmts : stmt list)
212 |   : exp Idmap.t * stmt list =
213 |   match stmts with
214 |   | [] -> (env,[])
215 |   | Assign (x, [], e) :: stmts ->
216 |     let o1 = fold_exp env e in
217 |     let first_env =
218 |       if is_const o1 then Idmap.add x o1 env else Idmap.remove x env
219 |     in
220 |     let (env',stmts') = prop_stmts first_env stmts in
221 |     (env', Assign (x, [], o1) :: stmts')
222 |   | Assign (x, es, e) :: stmts ->
223 |     let o = fold_exp env e in
224 |     let os = List.map (fold_exp env) es in
225 |     let (env', stmts') = prop_stmts env stmts in
226 |     (env', Assign (x, os, o) :: stmts')
227 |   | DoWhile (s0, e, s1) :: stmts ->
228 |     (* s0 is always executed *)
229 |     let (env_init, t) = prop_stmt env s0 in
230 |     let o = fold_exp env_init e in
231 |     (match o with
232 |      | Bool false ->
233 |        let (env', stmts') = prop_stmts env_init stmts in
234 |        (env', t::stmts')
235 |      | _ ->
236 |        (* From this point, the loop might execute s1;s0 0 or more times, so we
237 |           calculate env1 which contains the constants that are constant no
238 |           matter how many times the loop is executed *)
239 |        let env1 = prop_loop_body env_init [s1; s0] in
240 |        (* Now re-calculate the constant propagation for the things constant no
241 |           matter how many times we iterate the loop *)
242 |        let (_, t1) = prop_stmt env1 s1 in
243 |        let o = fold_exp env1 e in
244 |        let (_, t0) = prop_stmt env1 s0 in
245 |        let (env',stmts') = prop_stmts env1 stmts in
246 |        (env', DoWhile (t0, o, t1) :: stmts'))
247 |   | Ite (e, s1, s2) :: stmts ->
248 |     let o1 = fold_exp env e in
249 |     (match o1 with
250 |      | Bool true ->
251 |        let (env1, os1) = prop_stmt env s1 in
252 |        let (env', stmts') = prop_stmts env1 stmts in
253 |        (env', os1::stmts')
254 |      | Bool false ->
255 |        let (env2, os2) = prop_stmt env s2 in
256 |        let (env', stmts') = prop_stmts env2 stmts in
257 |        (env', os2::stmts')
258 |      | _ ->
259 |        let (env1, os1) = prop_stmt env s1 in
260 |        let (env2, os2) = prop_stmt env s2 in
261 |        (* Only include constants that are known to be the same at the end of
262 |           both branches of the Ite for the subsequent statements. *)
263 |        let (env',stmts') =
264 |          prop_stmts (Idmap.merge merge_constants env1 env2) stmts
265 |        in
266 |        (env', Ite (o1, os1, os2) :: stmts'))
267 |   | Stmts (stmts1) :: stmts ->
268 |     let (env1, os1) = prop_stmts env stmts1 in
269 |     let (env', stmts') = prop_stmts env1 stmts in
270 |     (env', Stmts (os1) :: stmts')
271 |   | In x :: stmts ->
272 |     let (env',stmts') = prop_stmts (Idmap.remove x env) stmts in
273 |     (env', In x :: stmts')
274 |   | Out x :: stmts ->
275 |     let (env',stmts') = prop_stmts env stmts in
276 |     (env', Out x :: stmts')
277 |   | Loc (s, ln) :: stmts ->
278 |     let (env1, o1) = prop_stmt env s in
279 |     let (env', stmts') = prop_stmts env1 stmts in
280 |     (env', Loc (o1, ln) :: stmts')
281 |   | Return x :: _ ->
282 |     (* We can't carry on past a return *)
283 |     (env, [Return x])
284 | 
285 | and prop_stmt env (stmt : stmt) =
286 |   match prop_stmts env [stmt] with
287 |   | (env,[os]) -> (env,os)
288 |   | _ -> assert false
289 | 
290 | (* Given possibly known constants env at the start, compute the definitely
291 |    known constants at the end, assuming that stmts is run in a loop body an
292 |    unknown number of times. *)
293 | and prop_loop_body (env : exp Idmap.t) (stmts : stmt list) : exp Idmap.t =
294 |   let (env', _) = prop_stmts env stmts in
295 |   (* The next approximation of constants at the start *)
296 |   let env'' = Idmap.merge merge_constants env env' in
297 |   if Idmap.equal same_const env env'' then
298 |     (* Same as last time, fixed point reached *)
299 |     env''
300 |   else
301 |     prop_loop_body env'' stmts
302 | 


--------------------------------------------------------------------------------
/src/typeCheck.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* The type checker. In addition to checking the types, we build a new AST that
 20 |    annotates the scope of all of the variables. Each use of a variable either
 21 |    refers to a global variable, a local variable, or a function parameter. The
 22 |    type checker keeps track of which sort each variable is and annotates each
 23 |    use with this information. This allows the register allocator and back end
 24 |    to treat them differently. *)
 25 | 
 26 | open Util
 27 | open SourceAst
 28 | module T = Tokens
 29 | 
 30 | (* Types *)
 31 | type t =
 32 |   | Tint
 33 |   | Tbool
 34 |   | Tarray of int (* The number of dimensions that the array has *)
 35 | 
 36 | let show_t t =
 37 |   match t with
 38 |   | Tint -> "int"
 39 |   | Tbool -> "bool"
 40 |   | Tarray n -> "array " ^ string_of_int n
 41 | 
 42 | (* Map functions to their types, and variables to their types and scope *)
 43 | type env_t = { funs : (t list * t) Idmap.t; vars : (t * scope) Idmap.t }
 44 | 
 45 | (* Raise a type error *)
 46 | let type_error (ln : int option) (msg : string) : 'a =
 47 |   match ln with
 48 |   | Some ln ->
 49 |     raise (BadInput ("Type error on line " ^ string_of_int ln ^ ": " ^ msg))
 50 |   | None ->
 51 |     raise (BadInput ("Type error at unknown location: " ^ msg))
 52 | 
 53 | (* Annotate an identifier with its scope *)
 54 | let add_scope (id : id) (s : scope) : id =
 55 |   match id with
 56 |   | Source (i, None) -> Source (i, Some s)
 57 |   | Source (_, Some _) ->
 58 |     raise (InternalError "scoped identifier during type checking")
 59 |   | Temp (x, y) -> Temp (x, y)
 60 | 
 61 | (* Compute the type of an expression, or raise BadInput if there is a type
 62 |    error. Also return a new scope-annotated version of the expression. *)
 63 | let rec type_exp (ln : int option) (env : env_t) (e : exp) : t * exp =
 64 |   match e with
 65 |   | Ident (i, es) ->
 66 |     let (t, scope) =
 67 |       try Idmap.find i env.vars
 68 |       with Not_found ->
 69 |         type_error ln ("reference to undeclared variable " ^ show_id i)
 70 |     in
 71 |     let (ts, es') = List.split (List.map (type_exp ln env) es) in
 72 |     let l = List.length ts in
 73 |     let annotated_exp = Ident (add_scope i scope, es') in
 74 |     (* If there are no indices, then just use the type of the identifier, even
 75 |        if it is of array type. If there are indices, they must fully index the
 76 |        array, which always contains ints *)
 77 |     if l = 0 then
 78 |       (t, annotated_exp)
 79 |     else
 80 |       (match t with
 81 |        | Tarray num_dims ->
 82 |          if num_dims = l && List.for_all (fun x -> x = Tint) ts then
 83 |            (Tint, annotated_exp)
 84 |          else if num_dims = l then
 85 |            type_error ln "array index with non-integer type"
 86 |          else
 87 |            type_error ln ("array reference with " ^ string_of_int l ^
 88 |                           " indices, expected " ^ string_of_int num_dims)
 89 |        | _ ->
 90 |          type_error ln ("attempt to index non-array variable " ^ show_id i))
 91 |   | Call (f, args) ->
 92 |     let (param_types, ret_type) =
 93 |       try Idmap.find f env.funs
 94 |       with Not_found ->
 95 |         type_error ln ("call to undefined function " ^ show_id f)
 96 |     in
 97 |     let (ts, es') = List.split (List.map (type_exp ln env) args) in
 98 |     (* A local function to check that the parameter and argument types match
 99 |        up. Use n to keep track of which position, for the error message. Assume
100 |        that the lists are of the same length. *)
101 |     let rec check (n : int) (pts : t list) (ats : t list) : unit =
102 |       match (pts, ats) with
103 |       | ([], []) -> ()
104 |       | (pt::pts, at::ats) ->
105 |         if pt = at then check (n+1) pts ats else
106 |           type_error ln ("function call given argument of type " ^ show_t at ^
107 |                          " but expected type " ^ show_t pt ^
108 |                          " in argument number " ^ string_of_int n)
109 |       | _ -> assert false
110 |     in
111 |     if List.length param_types <> List.length args then
112 |       type_error ln ("function call expects " ^
113 |                      string_of_int (List.length param_types) ^
114 |                      " arguments, given " ^
115 |                      string_of_int (List.length ts))
116 |     else
117 |       check 1 param_types ts;
118 |     (ret_type, Call (f, es'))
119 |   | Num n -> (Tint, Num n)
120 |   | Bool b -> (Tbool, Bool b)
121 |   | Op (e1, op, e2) ->
122 |     let (t1, e1') = type_exp ln env e1 in
123 |     let (t2, e2') = type_exp ln env e2 in
124 |     let t =
125 |       match (t1, op, t2) with
126 |       | (Tbool, (T.And | T.Or | T.Eq), Tbool) -> Tbool
127 |       | (Tint, (T.Plus | T.Minus | T.Times | T.Div | T.Lshift | T.BitOr |
128 |                 T.BitAnd), Tint) ->
129 |         Tint
130 |       | (Tint, (T.Lt | T.Eq | T.Gt), Tint) -> Tbool
131 |       | (t1, _, t2) ->
132 |         type_error ln ("operator " ^ T.show_op op ^
133 |                        " applied to expressions of types " ^ show_t t1 ^
134 |                        " and " ^ show_t t2)
135 |     in
136 |     (t, Op (e1', op, e2'))
137 |   | Uop (uop, e) ->
138 |     let (t, e') = type_exp ln env e in
139 |     (match (uop, t) with
140 |      | (T.Not, Tbool) -> (Tbool, Uop (uop, e'))
141 |      | (T.Not, t) ->
142 |        type_error ln ("operator " ^ T.show_uop uop ^
143 |                       " applied to expression of type " ^ show_t t))
144 |   | Array es ->
145 |     let (ts, es') = List.split (List.map (type_exp ln env) es) in
146 |     let l = List.length ts in
147 |     if l = 0 then
148 |       type_error ln "attempt to allocate array with 0 dimensions"
149 |     else if List.for_all (fun x -> x = Tint) ts then
150 |       (Tarray l, Array es')
151 |     else
152 |       type_error ln "array dimension with non-integer type"
153 | 
154 | (* Type check an identifier without array indices *)
155 | let type_simple_ident (ln : int option) (env : env_t) (i : id) : t * id =
156 |   match type_exp ln env (Ident (i, [])) with
157 |   | (t, Ident (i', [])) -> (t, i')
158 |   | _ -> assert false (* type_exp does not change the shape of the expression *)
159 | 
160 | (* Type check a statement. Raise BadInput if there is an error. return gives
161 |    the return type of the enclosing function. ln gives the current line number.
162 |    Return a scope annotated version of the statement. Also strip the location
163 |    annotations.
164 | *)
165 | let rec type_stmt (ln : int option) (env :env_t) (return : t) (stmt : stmt)
166 |   : stmt =
167 |   match stmt with
168 |   | In i ->
169 |     let (t, i') = type_simple_ident ln env i in
170 |     if t <> Tint then
171 |       type_error ln "input with non-integer type"
172 |     else
173 |       In i'
174 |   | Out i ->
175 |     let (t, i') = type_simple_ident ln env i in
176 |     if t <> Tint then
177 |       type_error ln "output with non-integer type"
178 |     else
179 |       Out i'
180 |   | Return (Some i) ->
181 |     let (t, i') = type_simple_ident ln env i in
182 |     if t <> return then
183 |       type_error ln ("return has type " ^ show_t t ^
184 |                      " in a function with return type " ^
185 |                      show_t return)
186 |     else
187 |       Return (Some i')
188 |   | Return None ->
189 |     type_error ln "return without value"
190 |   | Assign (x, es, e) ->
191 |     (match type_exp ln env (Ident (x, es)) with
192 |      | (t1, Ident (x', es')) ->
193 |        let (t2, e') = type_exp ln env e in
194 |        if t1 <> t2 then
195 |          type_error ln ("assignment to variable of type " ^ show_t t1 ^ " with type " ^
196 |                         show_t t2)
197 |        else
198 |          Assign (x', es', e')
199 |      | _ -> assert false)
200 |   | DoWhile (s1, e, s2) ->
201 |     let s1' = type_stmt ln env return s1 in
202 |     (match type_exp ln env e with
203 |      | (Tbool, e') ->
204 |        let s2' = type_stmt ln env return s2 in
205 |        DoWhile (s1', e', s2')
206 |      | _ ->
207 |       type_error ln "do/while test of non-bool type")
208 |   | Ite (e, s1, s2) ->
209 |     (match type_exp ln env e with
210 |      | (Tbool, e') ->
211 |        let s1' = type_stmt ln env return s1 in
212 |        let s2' = type_stmt ln env return s2 in
213 |        Ite (e', s1', s2')
214 |      | _ -> type_error ln "if test of non-bool type")
215 |   | Stmts (s_list) ->
216 |     Stmts (List.map (type_stmt ln env return) s_list)
217 |   | Loc (s, ln') ->
218 |     type_stmt (Some ln') env return s
219 | 
220 | let source_typ_to_t (t : SourceAst.typ) : t =
221 |   match t with
222 |   | Int -> Tint
223 |   | Bool -> Tbool
224 |   | Array n -> Tarray n
225 | 
226 | (* Get the declared types of all of the parameters. Raise an exception if a
227 |    duplicate name is found, using the location ln. Accumulate the answer in
228 |    param_env. *)
229 | let rec get_param_types (ln : int option) (params : (id * typ) list)
230 |     (param_env : (t * scope) Idmap.t)
231 |   : (t * scope) Idmap.t =
232 |   match params with
233 |   | [] -> param_env
234 |   | (x,t)::params ->
235 |     if Idmap.mem x param_env then
236 |       type_error ln ("duplicate function parameter " ^ show_id x)
237 |     else
238 |       get_param_types ln params
239 |         (Idmap.add x (source_typ_to_t t, Parameter) param_env)
240 | 
241 | (* Get the declared types of all of the variables.  Raise an exception if a
242 |    duplicate is found. Accumulate the answer in var_env. *)
243 | let rec get_var_types (s : scope) (vars : var_dec list)
244 |     (var_env : (t * scope) Idmap.t)
245 |   : (t * scope) Idmap.t =
246 |   match vars with
247 |   | [] -> var_env
248 |   | v::vars ->
249 |     if Idmap.mem v.var_name var_env then
250 |       type_error v.loc ("duplicate variable definition " ^ show_id v.var_name)
251 |     else
252 |       get_var_types s vars
253 |         (Idmap.add v.var_name (source_typ_to_t v.typ, s) var_env)
254 | 
255 | (* Check the init expressions on a variable declaration. Return a
256 |    scope-annotated version of the declaration. *)
257 | let type_var_dec (s : scope) (env : env_t) (dec : var_dec) : var_dec =
258 |   let (t, init') = type_exp dec.loc env dec.init in
259 |   if t = source_typ_to_t dec.typ then
260 |     { dec with var_name = add_scope dec.var_name s; init = init' }
261 |   else
262 |     type_error dec.loc ("initialisation of variable typed " ^
263 |                         show_t (source_typ_to_t dec.typ) ^
264 |                         " to expression of type " ^ show_t t)
265 | 
266 | (* See the documentation for Map.merge in the OCaml standard library *)
267 | let merge_keep_first _ (x : 'a option) (y : 'a option) : 'a option =
268 |   match (x,y) with
269 |   | (None, None) -> None
270 |   | (Some x, Some _) -> Some x
271 |   | (Some x, None) -> Some x
272 |   | (None, Some y) -> Some y
273 | 
274 | (* Determine whether that each control-flow path through the stmts must include
275 |    a return.  This way we know that a function cannot fall off the end, without
276 |    returning a value of the correct type. *)
277 | let rec check_return_paths (stmts : stmt list) : bool =
278 |   match stmts with
279 |   | [] -> false
280 |   | Return _ :: _ -> true
281 |   | s::stmts ->
282 |     if check_return_paths stmts then
283 |       true
284 |     else (* The remainder might not return, so the first statement must *)
285 |       match s with
286 |       | Return _ -> assert false (* already checked for this case *)
287 |       | In _ | Out _ | Assign _ -> false
288 |       | Loc (s,_) -> check_return_paths [s]
289 |       | Ite (_,s1,s2) ->
290 |         check_return_paths [s1] && check_return_paths [s2]
291 |       | DoWhile (s1, _, _) ->
292 |         check_return_paths [s1]
293 |       | Stmts s -> check_return_paths s
294 | 
295 | (* Check a function. Return a scope-annotated version is there are no errors. *)
296 | let type_function (env : env_t) (f : func) : func =
297 |   let param_env = get_param_types f.loc f.params Idmap.empty in
298 |   let local_env = get_var_types Local f.locals Idmap.empty in
299 |   (* Add the parameter and locals to the env. Ensure that the locals shadow the
300 |      parameters and globals, and that the parameters shadow the globals. *)
301 |   let env1 = Idmap.merge merge_keep_first param_env env.vars in
302 |   let env2 = Idmap.merge merge_keep_first local_env env1 in
303 |   let new_env = { env with vars = env2 } in
304 |   let locals' = List.map (type_var_dec Local new_env) f.locals in
305 |   let body' =
306 |     List.map (type_stmt None new_env (source_typ_to_t f.ret)) f.body
307 |   in
308 |   if not (check_return_paths f.body) then
309 |     type_error f.loc "function might not return"
310 |   else
311 |     { f with
312 |       params = List.map (fun (i,t) -> (add_scope i Parameter, t)) f.params;
313 |       locals = locals';
314 |       body = body' }
315 | 
316 | (* Get the declared types of all of the functions. Raise an exception if a
317 |    duplicate is found. Accumulate the answer in fun_env. *)
318 | let rec get_function_types (funcs : func list) (fun_env : (t list * t) Idmap.t)
319 |   : (t list * t) Idmap.t =
320 |   match funcs with
321 |   | [] -> fun_env
322 |   | f::funcs ->
323 |     if Idmap.mem f.fun_name fun_env then
324 |       type_error f.loc ("duplicate function definition " ^ show_id f.fun_name)
325 |     else
326 |       get_function_types funcs
327 |         (Idmap.add f.fun_name
328 |            (List.map (fun (_,t) -> source_typ_to_t t) f.params,
329 |             source_typ_to_t f.ret)
330 |            fun_env)
331 | 
332 | let type_prog (p : prog) : prog =
333 |   (* Get the types of the globals and functions. *)
334 |   let env =
335 |     { funs = get_function_types p.funcs Idmap.empty;
336 |       vars = get_var_types Global p.globals Idmap.empty }
337 |   in
338 |   (* Check the function bodies and global initialisations in the environment
339 |      with types for all of the globals and functions. This means that the
340 |      top-level is one recursive scope and each function body and global
341 |      initialisation can refer to any of the others (or itself). *)
342 |   let globals' = List.map (type_var_dec Global env) p.globals in
343 |   let funcs' = List.map (type_function env) p.funcs in
344 |   { funcs = funcs'; globals = globals' }
345 | 


--------------------------------------------------------------------------------
/src/instrSelX86.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* Convert linearised three-address code to x86-64 *)
 20 | 
 21 | exception TODO of string
 22 | 
 23 | open BlockStructure
 24 | open X86
 25 | module L = LineariseCfg
 26 | module T = Tokens
 27 | 
 28 | let tok_to_binop t =
 29 |   match t with
 30 |   | T.Plus -> Zadd
 31 |   | T.Minus -> Zsub
 32 |   | T.Lshift -> Zshl
 33 |   | T.BitOr -> Zor
 34 |   | T.BitAnd -> Zand
 35 |   | _ -> assert false
 36 | 
 37 | (* Save RSP and RBP for stack stuff,
 38 |    save RAX and RDX for scratch, and index RAX by -1
 39 |    RAX is chosen because division needs it. Alas, it also needs RDX, which is
 40 |    used in the calling convention, so we keep R10 as scratch instead. *)
 41 | let reg_numbers =
 42 |   [(-1, RAX);
 43 |    (0, RBX);
 44 |    (1, RCX);
 45 |    (2, RDX);
 46 |    (3, RSI);
 47 |    (4, RDI);
 48 |    (5, R8);
 49 |    (6, R9);
 50 |    (7, R11);
 51 |    (8, R12);
 52 |    (9, R13);
 53 |    (10, R14);
 54 |    (11, R15)]
 55 | let reg_numbers_flip =
 56 |   List.map (fun (x,y) -> (y,x)) reg_numbers
 57 | 
 58 | let reg_list = [RDI; RSI; RDX; RCX; R8; R9]
 59 | let num_regs = List.length reg_numbers - 1
 60 | let argument_reg_numbers =
 61 |   List.map (fun r -> (List.assoc r reg_numbers_flip)) reg_list
 62 | 
 63 | (* Assume that we have kept RAX and R10 free for scratch space *)
 64 | let r_scratch = RAX
 65 | let r_scratch2 = R10
 66 | 
 67 | (* Convert a variable, which can be a register, global or stack slot, to a rm *)
 68 | let var_to_rm (v : var) : rm =
 69 |   match v with
 70 |   | Vreg i -> Zr (List.assoc i reg_numbers)
 71 |   | Stack i -> Zm (None, Some RBP, Some (Concrete_disp (Int64.of_int (-8 * (i+1)))))
 72 |   | Global g -> Zm (None, None, Some (Label_disp g))
 73 |   | n -> raise (Util.InternalError ("named variable in instrSelX86: " ^ show_var n))
 74 | 
 75 | let rm_rm_to_dest_src (dest_rm : rm) (src_rm : rm)
 76 |   : instruction list * dest_src =
 77 |   match (dest_rm, src_rm) with
 78 |   | (Zr r, _) -> ([], Zr_rm (r, src_rm))
 79 |   | (_, Zr r) -> ([], Zrm_r (dest_rm, r))
 80 |   | (Zm _, Zm _) ->
 81 |     ([Zmov (Zr_rm (r_scratch, src_rm))], Zrm_r (dest_rm, r_scratch))
 82 | 
 83 | let rm_ae_to_dest_src (dest_rm : rm) (src_ae : atomic_exp)
 84 |   : instruction list * dest_src =
 85 |   match src_ae with
 86 |   | Num i -> ([], Zrm_i (dest_rm, i))
 87 |   | Ident src_var -> rm_rm_to_dest_src dest_rm (var_to_rm src_var)
 88 | 
 89 | (* Convert a heap reference, which is a variable to be deferenced, offset by
 90 |    another variable or immediate *)
 91 | let heap_to_rm (base : var) (offset : atomic_exp) : instruction list * rm =
 92 |   match (base, offset) with
 93 |   (* Simple addresses *)
 94 |   | (Vreg b, Num o) ->
 95 |     ([], Zm (None, Some (List.assoc b reg_numbers), Some (Concrete_disp o)))
 96 |   | ((Stack _ | Global _ ) as v, Num o) ->
 97 |     ([Zmov (Zr_rm (r_scratch2, var_to_rm v))],
 98 |      Zm (None, Some r_scratch2, Some (Concrete_disp o)))
 99 |   | (Vreg b, Ident (Vreg o)) ->
100 |     ([],
101 |      Zm (Some (1, List.assoc o reg_numbers),
102 |          Some (List.assoc b reg_numbers),
103 |          None))
104 |   | ((Stack _ | Global _) as v, Ident (Vreg r))
105 |   (* 1 indirection *)
106 |   | (Vreg r, Ident ((Stack _ | Global _) as v)) ->
107 |     ([Zmov (Zr_rm (r_scratch2, var_to_rm v))],
108 |      Zm (Some (1, List.assoc r reg_numbers),
109 |          Some r_scratch2,
110 |          None))
111 |   (* 2 indirections *)
112 |   | (((Stack _ | Global _) as v1), Ident ((Stack _ | Global _) as v2)) ->
113 |     ([Zmov (Zr_rm (r_scratch2, var_to_rm v1));
114 |       Zbinop (Zadd, Zr_rm (r_scratch2, var_to_rm v2))],
115 |      Zm (None,
116 |          Some r_scratch2,
117 |          None))
118 |   | ((NamedSource _ | NamedTmp _), _) ->
119 |     raise (Util.InternalError "Named variables in instrSelX86")
120 |   | (_, Ident (NamedSource _ | NamedTmp _)) ->
121 |     raise (Util.InternalError "Named variables in instrSelX86")
122 | 
123 | (* Build the operation for r := r op ae *)
124 | let build_to_reg_op (op : Tokens.op) (r : reg) (ae : atomic_exp)
125 |   : instruction list =
126 |   match (op, ae) with
127 |   | ((T.Plus | T.Minus | T.Lshift | T.BitOr | T.BitAnd), Num i) ->
128 |     [Zbinop (tok_to_binop op, Zrm_i (Zr r, i))]
129 |   | ((T.Plus | T.Minus | T.Lshift | T.BitOr | T.BitAnd), Ident v) ->
130 |     [Zbinop (tok_to_binop op, Zr_rm (r, var_to_rm v))]
131 |   | (T.Times, Num i) ->
132 |     [Zimul (r, Zr r, Some i)]
133 |   | (T.Times, Ident v) ->
134 |     [Zimul (r, var_to_rm v, None)]
135 |   | (T.Div, _) ->
136 |     [Zmov (Zr_rm (r_scratch2, Zr RDX));
137 |      Zmov (Zrm_i (Zr RDX, 0L));
138 |      Zmov (Zr_rm (RAX, Zr r));
139 |      (match ae with
140 |       | Ident v -> Zidiv (var_to_rm v)
141 |       | Num _ ->
142 |         raise (Util.InternalError "Division by immediate constant"));
143 |      Zmov (Zr_rm (r, Zr RAX));
144 |      Zmov (Zr_rm (RDX, Zr r_scratch2))]
145 |   | ((T.Lt | T.Gt | T.Eq), _) ->
146 |     assert false
147 |   | ((T.And | T.Or), _) ->
148 |     assert false
149 | 
150 | let reverse_op op =
151 |   match op with
152 |   | T.Gt -> T.Lt
153 |   | T.Lt -> T.Gt
154 |   | T.Eq -> T.Eq
155 |   | _ -> assert false
156 | 
157 | let op_to_cond op =
158 |   match op with
159 |   | T.Gt -> Z_G
160 |   | T.Lt -> Z_L
161 |   | T.Eq -> Z_E
162 |   | _ -> assert false
163 | 
164 | (* ------------ Calling convention stuff ------------ *)
165 | 
166 | (* Don't save RAX since it is our scratch. Remember to do an even number before
167 |    a call for alignment, which must be 16-bytes at (external) function calls.
168 |    This is why we also push R10, even though it is scratch too. *)
169 | let caller_save =
170 |   [Zpush (Zi_rm (Zr RCX));
171 |    Zpush (Zi_rm (Zr RDX));
172 |    Zpush (Zi_rm (Zr RSI));
173 |    Zpush (Zi_rm (Zr RDI));
174 |    Zpush (Zi_rm (Zr R8));
175 |    Zpush (Zi_rm (Zr R9));
176 |    Zpush (Zi_rm (Zr R10));
177 |    Zpush (Zi_rm (Zr R11))]
178 | 
179 | let caller_restore =
180 |   [Zpop (Zr R11);
181 |    Zpop (Zr R10);
182 |    Zpop (Zr R9);
183 |    Zpop (Zr R8);
184 |    Zpop (Zr RDI);
185 |    Zpop (Zr RSI);
186 |    Zpop (Zr RDX);
187 |    Zpop (Zr RCX)]
188 | 
189 | let callee_save =
190 |   [Zpush (Zi_rm (Zr RBX));
191 |    Zpush (Zi_rm (Zr R12));
192 |    Zpush (Zi_rm (Zr R13));
193 |    Zpush (Zi_rm (Zr R14));
194 |    Zpush (Zi_rm (Zr R15))]
195 | 
196 | let callee_restore =
197 |   [Zpop (Zr R15);
198 |    Zpop (Zr R14);
199 |    Zpop (Zr R13);
200 |    Zpop (Zr R12);
201 |    Zpop (Zr RBX)]
202 | 
203 | (* The order that the calling convention puts arguments into registers *)
204 | let reg_list = [RDI; RSI; RDX; RCX; R8; R9]
205 | 
206 | (* Where the saved versions of the argument registers are on the stack. Depends
207 |    on the order in caller_save. This is relative to RSP. *)
208 | let stack_reg_to_offset r =
209 |   match r with
210 |   | RDI -> 32L
211 |   | RSI -> 40L
212 |   | RDX -> 48L
213 |   | RCX -> 56L
214 |   | R8 -> 24L
215 |   | _ -> assert false
216 | 
217 | (* Move the value of ae into the register dest_r for argument passing. If ae
218 |    refers to any regs in overwritten_regs, which contains the registers
219 |    overwitten already for previous arguments, then use the stored version on
220 |    the stack. *)
221 | let setup_arg (overwritten_regs : reg list) (dest_r : reg) (ae : atomic_exp)
222 |   : dest_src option =
223 |   match ae with
224 |   | Num n -> Some (Zrm_i (Zr dest_r, n))
225 |   | Ident (Stack i) -> Some (Zr_rm (dest_r, var_to_rm (Stack i)))
226 |   | Ident (Vreg src_r) ->
227 |     let src_r = List.assoc src_r reg_numbers in
228 |     if src_r = dest_r then
229 |       None
230 |     else if List.mem src_r overwritten_regs then
231 |       Some (Zr_rm (dest_r,
232 |                    Zm (None, Some RSP, Some (Concrete_disp (stack_reg_to_offset src_r)))))
233 |     else
234 |       Some (Zr_rm (dest_r, Zr src_r))
235 |   | Ident (Global i) -> Some (Zr_rm (dest_r, var_to_rm (Global i)))
236 |   | Ident ((NamedSource _ | NamedTmp _) as v) ->
237 |     raise (Util.InternalError ("Named variable in instrSelX86: " ^ show_var v))
238 | 
239 | (* Move the values of aes into the registers and then the stack for argument
240 |    passing. If ae refers to any regs in overwritten_regs, which contains the
241 |    registers overwitten already for previous arguments, then use the stored
242 |    version on the stack. *)
243 | let rec setup_args (aes : atomic_exp list) (remaining_regs : reg list)
244 |     (overwritten_regs : reg list) : instruction list =
245 |   match (aes, remaining_regs) with
246 |   | ([], _) -> []
247 |   | (a :: aes, next :: regs) ->
248 |     (match setup_arg overwritten_regs next a with
249 |      | None -> []
250 |      | Some arg -> [Zmov arg])
251 |     @
252 |     setup_args aes regs (next :: overwritten_regs)
253 |   | _ ->
254 |     raise (TODO "InstrSelX86 does not support function calls with more than 6 arguments")
255 | 
256 | (* --------------- End calling convention ------------- *)
257 | 
258 | let op_to_cc b op =
259 |   match (b, op) with
260 |   | (true, Lt) ->
261 |     Z_L
262 |   | (false, Lt) ->
263 |     Z_NL
264 |   | (true, Gt) ->
265 |     Z_G
266 |   | (false, Gt) ->
267 |     Z_NG
268 |   | (true, Eq) ->
269 |     Z_E
270 |   | (false, Eq) ->
271 |     Z_NE
272 | 
273 | let reverse_op2 op =
274 |   match op with
275 |   | Gt -> Lt
276 |   | Lt -> Gt
277 |   | Eq -> Eq
278 | 
279 | (* Return a boolean true if the condition needs to be negated *)
280 | let test_to_x86 ae1 op ae2 b (label : string) : instruction list =
281 |   match (ae1, ae2) with
282 |   | (Ident i, _) ->
283 |     let (instrs, dest_src) = rm_ae_to_dest_src (var_to_rm i) ae2 in
284 |     instrs @
285 |     [Zbinop (Zcmp, dest_src);
286 |      Zjcc (op_to_cc b op, label)]
287 |   | (_, Ident i) ->
288 |     let (instrs, dest_src) = rm_ae_to_dest_src (var_to_rm i) ae1 in
289 |     instrs @
290 |     [Zbinop (Zcmp, dest_src);
291 |      Zjcc (op_to_cc b (reverse_op2 op), label)]
292 |   | (Num n1, Num n2) ->
293 |     let do_jump =
294 |       match op with
295 |       | Gt -> (Int64.compare n1 n2 > 0) = b
296 |       | Lt -> (Int64.compare n1 n2 < 0) = b
297 |       | Eq -> (Int64.compare n1 n2 = 0) = b
298 |     in
299 |     if do_jump then
300 |       [Zjcc (Z_ALWAYS, label)]
301 |     else
302 |       []
303 | 
304 | let rec be_to_x86 safe be : instruction list =
305 |   match be with
306 |   | AssignOp (v, Num imm, ((T.Lt | T.Gt | T.Eq) as op), ae2) ->
307 |     (* constant prop ensures both aren't immediate *)
308 |     be_to_x86 safe (AssignOp (v, ae2, reverse_op op, Num imm))
309 |   | AssignOp (v, Ident v2, ((T.Lt | T.Gt | T.Eq) as op), ae2) ->
310 |     let (instrs, cmp_arg) = rm_ae_to_dest_src (var_to_rm v2) ae2 in
311 |     let cmp_instrs = instrs @ [Zbinop (Zcmp, cmp_arg)] in
312 |     (match var_to_rm v with
313 |      | Zm _ as m ->
314 |        cmp_instrs @
315 |        [Zset (op_to_cond op, B r_scratch);
316 |         Zbinop (Zand, Zrm_i (Zr r_scratch, 1L));
317 |         Zmov (Zrm_r (m, r_scratch))]
318 |      | Zr r ->
319 |        cmp_instrs @
320 |        [Zset (op_to_cond op, B r);
321 |         Zbinop (Zand, Zrm_i (Zr r, 1L))])
322 |   | AssignOp (v, ae1, ((T.Plus | T.Minus | T.Lshift | T.BitOr
323 |                        | T.BitAnd | T.Times | T.Div) as op), ae2) ->
324 |     (match (var_to_rm v, ae1) with
325 |      | (Zr r1, Num imm2) ->
326 |        (* r1 := imm2 op m/r3 --> mov r1, imm2; op r1, m/r3 *)
327 |        Zmov (Zrm_i (Zr r1, imm2)) :: build_to_reg_op op r1 ae2
328 |      | (Zr r1, Ident var) ->
329 |        (* r1 := m/r2 op m/r3/imm3 --> mov r1, m/r2; op r1, m/r3/imm3 *)
330 |        Zmov (Zr_rm (r1, var_to_rm var)) :: build_to_reg_op op r1 ae2
331 |      | (Zm _ as m1, Num imm2) ->
332 |        (* m1 := imm2 op m/r3 -->
333 |           mov r_scratch, imm2; op r_scratch, m/r3; mov m1, r_scratch *)
334 |        Zmov (Zrm_i (Zr r_scratch, imm2)) ::
335 |        build_to_reg_op op r_scratch ae2 @
336 |        [Zmov (Zrm_r (m1, r_scratch))]
337 |      | (Zm _ as m1, Ident var) ->
338 |        (* m1 := m/r2 op m/r3/imm3 -->
339 |           mov r_scratch, m/r2; op r_scratch, m/r3/imm3; mov m1, r_scratch *)
340 |        Zmov (Zr_rm (r_scratch, var_to_rm var)) ::
341 |        build_to_reg_op op r_scratch ae2 @
342 |        [Zmov (Zrm_r (m1, r_scratch))])
343 |   | AssignOp (_, _, (T.And | T.Or), _) ->
344 |     raise (Util.InternalError "And/Or in instrSelX86")
345 |   | AssignAtom (v, ae) ->
346 |     let (instrs, mov_arg) = rm_ae_to_dest_src (var_to_rm v) ae in
347 |     instrs @ [Zmov mov_arg]
348 |   | Ld (v1, v2, ae) ->
349 |     let (instrs, src_rm) = heap_to_rm v2 ae in
350 |     let (instrs2, dest_src) = rm_rm_to_dest_src (var_to_rm v1) src_rm in
351 |     instrs @
352 |     instrs2 @
353 |     [Zmov dest_src]
354 |   | St (v, ae1, ae2) ->
355 |     let (instrs, dest_rm) = heap_to_rm v ae1 in
356 |     let (instrs2, dest_src) = rm_ae_to_dest_src dest_rm ae2 in
357 |     instrs @
358 |     instrs2 @
359 |     [Zmov dest_src]
360 |   | Call (v, f, aes) ->
361 |     let alloc_name = f in
362 |     caller_save @
363 |     setup_args aes reg_list [] @
364 |     [Zcall alloc_name] @
365 |     caller_restore @
366 |     (match v with
367 |      | None -> []
368 |      | Some v -> [Zmov (Zrm_r (var_to_rm v, r_scratch))])
369 |   | BoundCheck (a1, a2) ->
370 |     if safe then
371 |       test_to_x86 a1 Lt (Num 0L) true "bound_error" @
372 |       test_to_x86 a1 Lt a2 false "bound_error"
373 |     else
374 |       []
375 |   | NullCheck v ->
376 |     if safe then
377 |       test_to_x86 (Ident v) Eq (Num 0L) true "null_error"
378 |     else
379 |       []
380 | 
381 | (* Translate a function body to x86. safe determines whether to do null and
382 |    bounds checks *)
383 | let to_x86 (safe : bool) (ll : L.linear list) (num_stack : int)
384 |   : instruction list =
385 |   (* We have to keep RSP 16 byte aligned, add a qword if necessary *)
386 |   let num_stack =
387 |     if num_stack mod 2 = 0 then
388 |       (* We need to do an odd number of pushes over all, since the call has
389 |          pushed rip for us. There are an even number of callee saved registers,
390 |          including rbp. *)
391 |       num_stack + 1
392 |     else
393 |       num_stack
394 |   in
395 |   (* Save the old base pointer *)
396 |   [Zpush (Zi_rm (Zr RBP))] @
397 |   (* Set a new base pointer *)
398 |   [Zmov (Zr_rm (RBP, Zr RSP))] @
399 |   (* Extend the stack for the local variables *)
400 |   [Zbinop (Zsub, Zrm_i (Zr RSP, Int64.mul (Int64.of_int num_stack) 8L))] @
401 |   (* Save other the callee save registers, odd number *)
402 |   callee_save @
403 |   List.flatten
404 |     (List.map
405 |        (fun l ->
406 |           match l with
407 |           | L.Instr be -> be_to_x86 safe be
408 |           | L.Return None -> callee_restore @ [Zleave; Zret]
409 |           | L.Return (Some v) -> Zmov (Zr_rm (RAX, var_to_rm v)) :: callee_restore @ [Zleave; Zret]
410 |           | L.CJump ((ae1, op, ae2), b, s) ->
411 |             test_to_x86 ae1 op ae2 b s
412 |           | L.Jump s ->
413 |             [Zjcc (Z_ALWAYS, s)]
414 |           | L.Label s ->
415 |             [Zlabel s])
416 |        ll)
417 | 


--------------------------------------------------------------------------------
/src/blockStructure.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* A control flow graph representation with basic blocks, and the source AST ->
 20 |    CGF algorithm. Also compiles arrays to loads and stores. *)
 21 | 
 22 | open Util
 23 | module S = SourceAst
 24 | 
 25 | type var =
 26 |   | Vreg of int
 27 |   | Stack of int
 28 |   | Global of string
 29 |   | NamedSource of string * SourceAst.scope
 30 |   | NamedTmp of string * int
 31 | 
 32 | let var_cmp_index (v : var) : int =
 33 |   match v with
 34 |   | Vreg _ -> 0
 35 |   | Stack _ -> 1
 36 |   | Global _ -> 2
 37 |   | NamedSource _ -> 3
 38 |   | NamedTmp _ -> 4
 39 | 
 40 | let compare_var v1 v2 =
 41 |   match (v1, v2) with
 42 |   | (Vreg i1, Vreg i2) -> compare i1 i2
 43 |   | (Stack i1, Stack i2) -> compare i1 i2
 44 |   | (Global s1, Global s2) -> String.compare s1 s2
 45 |   | (NamedSource (s1, scope1), NamedSource (s2, scope2)) ->
 46 |     let c = SourceAst.compare_scope scope1 scope2 in
 47 |     if c = 0 then
 48 |       String.compare s1 s2
 49 |     else
 50 |       c
 51 |   | (NamedTmp (s1, i1), NamedTmp (s2, i2)) ->
 52 |     let c = String.compare s1 s2 in
 53 |     if c = 0 then
 54 |       compare i1 i2
 55 |     else
 56 |       c
 57 |   | _ -> compare (var_cmp_index v1) (var_cmp_index v2)
 58 | 
 59 | let show_var v =
 60 |   match v with
 61 |   | Vreg i -> "_r_" ^ string_of_int i
 62 |   | Stack i -> "_s_" ^ string_of_int i
 63 |   | Global i -> "_g_" ^ i
 64 |   | NamedSource (s,_) -> s
 65 |   | NamedTmp (s, i) -> "_tmp_" ^ s ^ string_of_int i
 66 | 
 67 | let pp_var fmt v =
 68 |   Format.fprintf fmt "%s" (show_var v)
 69 | 
 70 | module VarCmp = struct
 71 |   type t = var
 72 |   let compare = compare_var
 73 | end
 74 | 
 75 | module Varset' = Set.Make(VarCmp)
 76 | 
 77 | module Varset = struct
 78 |   include Varset'
 79 |   let show s = show_list show_var (elements s)
 80 |   let pp fmt s = Format.fprintf fmt "%a" (pp_set pp_var) (elements s)
 81 | end
 82 | 
 83 | module Varmap = Map.Make(VarCmp)
 84 | 
 85 | (* Atomic expressions *)
 86 | type atomic_exp =
 87 |   | Ident of var
 88 |   | Num of int64
 89 | 
 90 | let show_atomic_exp ae =
 91 |   match ae with
 92 |   | Ident v -> show_var v
 93 |   | Num i -> Int64.to_string i
 94 | 
 95 | let pp_atomic_exp fmt ae =
 96 |   Format.fprintf fmt "%s" (show_atomic_exp ae)
 97 | 
 98 | (* A single entry in a basic block *)
 99 | type block_elem =
100 |   | AssignOp of var * atomic_exp * Tokens.op * atomic_exp
101 |   | AssignAtom of var * atomic_exp
102 |   (* Ld (x,y,e) represents x := *(y+e) *)
103 |   | Ld of var * var * atomic_exp
104 |   (* St (x,e1,e2) represents *(x+e1) := e2 *)
105 |   | St of var * atomic_exp * atomic_exp
106 |   (* Call (x, f, aes) represents x := f(aes) *)
107 |   | Call of var option * string * atomic_exp list
108 |   (* BoundCheck (a1, a2) represents assert (a1 >= 0 && a1 < a2) *)
109 |   | BoundCheck of atomic_exp * atomic_exp
110 |   (* NullCheck v represents assert (v <> 0) *)
111 |   | NullCheck of var
112 | 
113 | let pp_block_elem fmt be =
114 |   match be with
115 |   | AssignOp (v, ae1, op, ae2) ->
116 |     Format.fprintf fmt "%a := %a %s %a"
117 |       pp_var v
118 |       pp_atomic_exp ae1
119 |       (Tokens.show_op op)
120 |       pp_atomic_exp ae2
121 |   | AssignAtom (v, ae) ->
122 |     Format.fprintf fmt "%a := %a"
123 |       pp_var v
124 |       pp_atomic_exp ae
125 |   | Ld (v1, v2, ae) ->
126 |     Format.fprintf fmt "%a := *(%a+%a)"
127 |       pp_var v1
128 |       pp_var v2
129 |       pp_atomic_exp ae
130 |   | St (v, ae1, ae2) ->
131 |     Format.fprintf fmt "*(%a+%a) := %a"
132 |       pp_var v
133 |       pp_atomic_exp ae1
134 |       pp_atomic_exp ae2
135 |   | Call (Some v, x, aes) ->
136 |     Format.fprintf fmt "%a := %s%a"
137 |       pp_var v
138 |       x
139 |       (pp_list pp_atomic_exp) aes
140 |   | Call (None, x, aes) ->
141 |     Format.fprintf fmt "%s%a"
142 |       x
143 |       (pp_list pp_atomic_exp) aes
144 |   | BoundCheck (a1, a2) ->
145 |     Format.fprintf fmt "assert (%a >= 0 && %a < %a)"
146 |       pp_atomic_exp a1
147 |       pp_atomic_exp a1
148 |       pp_atomic_exp a2
149 |   | NullCheck v ->
150 |     Format.fprintf fmt "assert (%a <> 0)"
151 |       pp_var v
152 | 
153 | 
154 | let show_block_elem be =
155 |   pp_block_elem Format.str_formatter be;
156 |   Format.flush_str_formatter ()
157 | 
158 | type basic_block = block_elem list
159 | 
160 | type test_op =
161 |   | Lt
162 |   | Gt
163 |   | Eq
164 | 
165 | let pp_test_op fmt op =
166 |   match op with
167 |   | Lt -> Format.fprintf fmt "<"
168 |   | Gt -> Format.fprintf fmt ">"
169 |   | Eq -> Format.fprintf fmt "="
170 | 
171 | type test = atomic_exp * test_op * atomic_exp
172 | 
173 | let pp_test fmt (ae1, op, ae2) =
174 |   Format.fprintf fmt "%a %a %a"
175 |     pp_atomic_exp ae1
176 |     pp_test_op op
177 |     pp_atomic_exp ae2
178 | 
179 | (* A basic block is either at the end of the function, returning a var, or
180 |    there is an unconditional jump out of it, or a branch out of it to the
181 |    blocks indexed by the int. *)
182 | type next_block =
183 |   | Return of var option
184 |   | Next of int
185 |   (* The first int is the block number if the ident is true, and the second if
186 |    * it is false *)
187 |   | Branch of test * int * int
188 | 
189 | let pp_next_block fmt nb =
190 |   match nb with
191 |   | Return (Some v) -> Format.fprintf fmt "return %a" pp_var v
192 |   | Return None -> Format.fprintf fmt "return"
193 |   | Next i -> Format.fprintf fmt "B%d" i
194 |   | Branch (t,i1,i2) -> Format.fprintf fmt "if@ %a@ then@ B%d@ else@ B%d"
195 |                           pp_test t
196 |                           i1
197 |                           i2
198 | 
199 | (* An adjacency list representation for the CFG *)
200 | type cfg_entry =
201 |   { bnum : int; elems : block_elem list; next : next_block;
202 |     mutable started : bool; mutable finished : bool }
203 | 
204 | let pp_cfg_entry fmt e =
205 |   Format.fprintf fmt "@[[B%d:@ %a@ %a]@]"
206 |     e.bnum
207 |     (pp_list pp_block_elem) e.elems
208 |     pp_next_block e.next
209 | 
210 | type cfg = cfg_entry list
211 | 
212 | let pp_cfg fmt es = pp_list pp_cfg_entry fmt es
213 | 
214 | let cfg_to_graphviz fmt (cfg : cfg) : unit =
215 |   Format.fprintf fmt "digraph {@\nnode[shape=box]@\n%a@\n}"
216 |     (fun fmt cfg ->
217 |        List.iter
218 |          (fun entry ->
219 |             Format.fprintf fmt "%d[label=\"%a\"]"
220 |               entry.bnum
221 |               (fun fmt e ->
222 |                  if entry.bnum = 0 then
223 |                    Format.fprintf fmt "ENTRY\\l"
224 |                  else
225 |                    ();
226 |                  List.iter (fun x -> Format.fprintf fmt "%s\\l" (show_block_elem x))
227 |                    e.elems;
228 |                  match e.next with
229 |                  | Branch (t,_,_) -> Format.fprintf fmt "%a\\l" pp_test t
230 |                  | Return None -> Format.fprintf fmt "RETURN\\l"
231 |                  | Return (Some i) -> Format.fprintf fmt "RETURN %s\\l" (show_var i)
232 |                  | _ -> ())
233 |               entry;
234 |             Format.fprintf fmt "@\n";
235 |             match entry.next with
236 |             | Return _ -> ()
237 |             | Next i ->
238 |               Format.fprintf fmt "%d->%d@\n" entry.bnum i
239 |             | Branch (_, i1, i2) ->
240 |               Format.fprintf fmt "%d->%d[label=1]@\n" entry.bnum i1;
241 |               Format.fprintf fmt "%d->%d[label=0]@\n" entry.bnum i2)
242 |          cfg)
243 |     cfg
244 | 
245 | let id_to_var (i : S.id) : var =
246 |   match i with
247 |   | S.Source (s, None) ->
248 |     raise (InternalError ("un-scoped source identifier in blockStructure: " ^
249 |                           S.show_id (S.Source (s, None))))
250 |   | S.Source (s, Some scope) -> NamedSource (s, scope)
251 |   | S.Temp (s, i) -> NamedTmp (s, i)
252 | 
253 | let bool_to_num b =
254 |   if b then Num 1L else Num 0L
255 | 
256 | (* Convert an amomic source expression *)
257 | let exp_to_atomic (e : S.exp) : atomic_exp =
258 |   match e with
259 |   | S.Ident (id, []) -> Ident (id_to_var id)
260 |   | S.Num n -> Num n
261 |   | S.Bool b -> bool_to_num b
262 |   | S.Ident (_, _::_) | S.Op _ | S.Uop _ | S.Array _ | S.Call _ ->
263 |     raise (InternalError "non-flat expression in blockStructure")
264 | 
265 | let tmp_var = NamedTmp("BS", 0)
266 | 
267 | (* Convert x := e into block elements, returned in *reverse* order *)
268 | let flat_e_to_assign (x : S.id) (e : S.exp) : block_elem list =
269 |   let v = id_to_var x in
270 |   match e with
271 |   | S.Ident (id, []) ->
272 |     [AssignAtom (v, Ident (id_to_var id))]
273 |   | S.Ident (id, [ae]) ->
274 |     let ae = exp_to_atomic ae in
275 |     (* v := id[ae] --> tmp_var := *id;
276 |                        assert ae < length_var;
277 |                        v := *(id + (ae+1) * 8) *)
278 |     let get_len = Ld (tmp_var, id_to_var id, Num 0L) in
279 |     (match ae with
280 |      | Num n ->
281 |        [Ld (v, id_to_var id, Num (Int64.shift_left (Int64.add n 1L) 3));
282 |         BoundCheck (ae, Ident tmp_var);
283 |         get_len;
284 |         NullCheck (id_to_var id)]
285 |      | _ ->
286 |        [Ld (v, id_to_var id, Ident tmp_var);
287 |         AssignOp (tmp_var, Ident tmp_var, Tokens.Lshift, Num 3L);
288 |         AssignOp (tmp_var, ae, Tokens.Plus, Num 1L);
289 |         BoundCheck (ae, Ident tmp_var);
290 |         get_len;
291 |         NullCheck (id_to_var id)])
292 |   | S.Ident (_, _::_::_) ->
293 |     raise (InternalError "multi-dimension array index in blockStructure")
294 |   | S.Num n -> [AssignAtom (v, Num n)]
295 |   | S.Bool b -> [AssignAtom (v, bool_to_num b)]
296 |   | S.Op (ae1, op, ae2) ->
297 |     [AssignOp (v, exp_to_atomic ae1, op, exp_to_atomic ae2)]
298 |   | S.Uop (Tokens.Not, ae) ->
299 |     (* !x == (x = false) *)
300 |     [AssignOp (v, exp_to_atomic ae, Tokens.Eq, Num 0L)]
301 |   | S.Array es ->
302 |     [Call (Some v, "allocate" ^ string_of_int (List.length es),
303 |            List.map exp_to_atomic es)]
304 |   | S.Call (f, es) ->
305 |     [Call (Some v, S.show_id f, List.map exp_to_atomic es)]
306 | 
307 | let op_to_test_op op =
308 |   match op with
309 |   | Tokens.Lt -> Lt
310 |   | Tokens.Gt -> Gt
311 |   | Tokens.Eq -> Eq
312 |   | _ -> raise (InternalError "non-test operator in test in blockStructure")
313 | 
314 | let flat_exp_to_test (e : S.exp) : test =
315 |   match e with
316 |   | S.Ident (id, []) ->
317 |     (Ident (id_to_var id), Eq, Num 1L)
318 |   | S.Ident (_, _::_) ->
319 |     raise (InternalError "array index in test position in blockStructure")
320 |   | S.Num _ ->
321 |     raise (InternalError "number in test position in blockStructure")
322 |   | S.Bool b ->
323 |     (bool_to_num b, Eq, Num 1L)
324 |   | S.Op (ae1, op, ae2) ->
325 |     (exp_to_atomic ae1, op_to_test_op op, exp_to_atomic ae2)
326 |   | S.Uop (Tokens.Not, ae) ->
327 |     (* !x == (x = false) *)
328 |     (exp_to_atomic ae, Eq, Num 0L)
329 |   | S.Array _ ->
330 |     raise (InternalError "array alloc test in blockStructure")
331 |   | S.Call _ ->
332 |     raise (InternalError "function call test in blockStructure")
333 | 
334 | (* Build the control-flow graph *)
335 | let build_cfg (stmts : S.stmt list) : cfg =
336 |   (* A counter to get new indices for blocks *)
337 |   let next_block_num = ref 0 in
338 |   let get_block_num () =
339 |     let x = !next_block_num in
340 |     next_block_num := 1 + !next_block_num;
341 |     x
342 |   in
343 | 
344 |   (* Store the cfg here as we build it *)
345 |   let the_cfg = ref [] in
346 |   let add_block (num : int) (block : basic_block) (next : next_block) : unit =
347 |     the_cfg := { bnum = num; elems = List.rev block; next = next;
348 |                  started = false; finished = false} :: !the_cfg
349 |   in
350 | 
351 |   (* Convert stmts to basic blocks, and add them to the_cfg.
352 |      block_num is the index for the block being created.
353 |      block_acc contains the elements seen so far, in *reverse* order.
354 |      ret_block is the control flow out of the block being created.
355 | 
356 |      The AST must be in a restricted form:
357 |      - The expressions must all be unnested
358 |        (i.e. UnnestExp.is_flat returns true).
359 |      - && and || must have been removed.
360 |      - Multi-dimensional array indexing must have been removed.
361 |      - No statements can follow a return
362 |      - All paths must terminate in a return *)
363 |   let rec find_blocks (block_num : int) (block_acc : basic_block)
364 |       (following_block : next_block) (stmts : S.stmt list) : unit =
365 |     match stmts with
366 |     | [] -> add_block block_num block_acc following_block
367 |     | S.Assign (x, [], e) :: s1 ->
368 |       find_blocks block_num (flat_e_to_assign x e @ block_acc) following_block s1
369 |     | S.Assign (x, [ae], e) :: s1 ->
370 |       let ae = exp_to_atomic ae in
371 |       (* x[ae] := e --> tmp_var := *x;
372 |                         assert ae < length_var;
373 |                         *(x + (ae+1) * 8) := e *)
374 |       let get_len = Ld (tmp_var, id_to_var x, Num 0L) in
375 |       let new_block_elems =
376 |         (match ae with
377 |          | Num n ->
378 |            [St (id_to_var x, Num (Int64.shift_left (Int64.add n 1L) 3),
379 |                 exp_to_atomic e);
380 |             BoundCheck (ae, Ident tmp_var);
381 |             get_len;
382 |             NullCheck (id_to_var x)]
383 |          | _ ->
384 |            [St (id_to_var x, Ident tmp_var, exp_to_atomic e);
385 |             AssignOp (tmp_var, Ident tmp_var, Tokens.Lshift, Num 3L);
386 |             AssignOp (tmp_var, ae, Tokens.Plus, Num 1L);
387 |             BoundCheck (ae, Ident tmp_var);
388 |             get_len;
389 |             NullCheck (id_to_var x)])
390 |       in
391 |       find_blocks block_num (new_block_elems @ block_acc) following_block s1
392 |     | S.Assign (_, _::_::_, _) :: _ ->
393 |       raise (InternalError "multi-dimension array index in blockStructure")
394 |     | S.Stmts s1 :: s2 ->
395 |       (* Treat { s1 ... sn } s1' ... sn' as though it were
396 |          s1 ... sn s1' ... sn' *)
397 |       find_blocks block_num block_acc following_block (s1 @ s2)
398 |     | S.DoWhile (s0, e, s1) :: s2 ->
399 |       let header_block_n = get_block_num () in
400 |       let body_block_n = get_block_num () in
401 |       add_block block_num block_acc (Next header_block_n);
402 |       find_blocks body_block_n [] (Next header_block_n) [s1];
403 |       (match (s2, following_block) with
404 |        | ([], Next i) ->
405 |          find_blocks header_block_n []
406 |            (Branch (flat_exp_to_test e, body_block_n, i))
407 |            [s0]
408 |        | _ ->
409 |          let following_block_n = get_block_num () in
410 |          find_blocks header_block_n []
411 |            (Branch (flat_exp_to_test e, body_block_n, following_block_n))
412 |            [s0];
413 |          find_blocks following_block_n [] following_block s2)
414 |     | S.Ite (e, s1, s2) :: s3 ->
415 |       let true_block_n = get_block_num () in
416 |       let false_block_n = get_block_num () in
417 |       add_block block_num block_acc
418 |         (Branch (flat_exp_to_test e, true_block_n, false_block_n));
419 |       if s3 = [] then
420 |         (find_blocks true_block_n [] following_block [s1];
421 |          find_blocks false_block_n [] following_block [s2])
422 |       else
423 |         let following_block_n = get_block_num () in
424 |         find_blocks true_block_n [] (Next following_block_n) [s1];
425 |         find_blocks false_block_n [] (Next following_block_n) [s2];
426 |         find_blocks following_block_n [] following_block s3
427 |     | S.In x :: s ->
428 |       find_blocks block_num (Call (Some (id_to_var x),"input", []) :: block_acc)
429 |         following_block s
430 |     | S.Out x :: s ->
431 |       find_blocks block_num
432 |         (Call (None, "output", [Ident (id_to_var x)]) :: block_acc) following_block s
433 |     | ((S.Loc (s1, _)) :: s2) ->
434 |       find_blocks block_num block_acc following_block (s1::s2)
435 |     | [S.Return (Some x)] ->
436 |       add_block block_num block_acc (Return (Some (id_to_var x)))
437 |     | [S.Return None] ->
438 |       add_block block_num block_acc (Return None)
439 |     | S.Return _ :: _ ->
440 |       raise (InternalError "return followed by statements in blockStructure")
441 | 
442 |   in
443 | 
444 |   (* Later on we rely on the starting block being #0 *)
445 |   let init_block = get_block_num () in
446 |   find_blocks init_block [] (Return None) stmts;
447 |   !the_cfg
448 | 


--------------------------------------------------------------------------------
/src/sourceAst.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Example compiler
  3 |  * Copyright (C) 2015-2017 Scott Owens
  4 |  *
  5 |  *  This program is free software: you can redistribute it and/or modify
  6 |  *  it under the terms of the GNU General Public License as published by
  7 |  *  the Free Software Foundation, either version 3 of the License, or
  8 |  *  (at your option) any later version.
  9 |  *
 10 |  *  This program is distributed in the hope that it will be useful,
 11 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  *  GNU General Public License for more details.
 14 |  *
 15 |  *  You should have received a copy of the GNU General Public License
 16 |  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | *)
 18 | 
 19 | (* The language's AST, and a predictive, recursive descent parser. See the
 20 |    ../README.md for the grammar. *)
 21 | 
 22 | open Util
 23 | module T = Tokens
 24 | module F = Format
 25 | 
 26 | (* Type of identifiers. Source ones come from the original program, and Temp
 27 |    ones come from intermediate compilation stages. This makes it easy to avoid
 28 |    unintentional conflicts. The string on a Temp should name the stage that
 29 |    introduced it. The scope indicates where the identifier is bound, if that is
 30 |    known. We expect the parse to generate unknown scopes and for the type
 31 |    checker to fill them in. All Temps are considered local in scope. *)
 32 | type scope =
 33 |   | Global
 34 |   | Parameter
 35 |   | Local
 36 | 
 37 | type id =
 38 |   | Source of string * scope option
 39 |   | Temp of string * int
 40 | 
 41 | let show_scope (s : scope) : string =
 42 |   match s with
 43 |   | Global -> "Global"
 44 |   | Parameter -> "Parameter"
 45 |   | Local -> "Local"
 46 | 
 47 | let show_id (i : id) : string =
 48 |   match i with
 49 |   | Source (s, None) -> s
 50 |   | Source (s, Some scope) -> s ^ "_" ^ show_scope scope
 51 |   | Temp (s,i) -> "_tmp_" ^ s ^ string_of_int i
 52 | 
 53 | (* Pretty-print an identifier *)
 54 | let pp_id (fmt : Format.formatter) (i : id) : unit =
 55 |   F.fprintf fmt "%s" (show_id i)
 56 | 
 57 | (* Construct a total order on ids so that we can use them as keys in maps.
 58 |    OCaml maps are implemented with balanced binary trees *)
 59 | let scope_to_int s =
 60 |   match s with
 61 |   | Global -> 0
 62 |   | Parameter -> 1
 63 |   | Local -> 2
 64 | 
 65 | (* Compare two scopes *)
 66 | let compare_scope (s1 : scope) (s2 : scope) : int =
 67 |   compare (scope_to_int s1) (scope_to_int s2)
 68 | 
 69 | (* Compare two identidiers *)
 70 | let compare_id (id1 : id) (id2 : id) : int =
 71 |   match (id1, id2) with
 72 |   | (Source (s1, scope1), Source (s2, scope2)) ->
 73 |     let c =
 74 |       option_compare compare_scope scope1 scope2
 75 |     in
 76 |     if c = 0 then
 77 |       String.compare s1 s2
 78 |     else
 79 |       c
 80 |   | (Temp _, Source _) -> -1
 81 |   | (Source _, Temp _) -> 1
 82 |   | (Temp (s1,i1), Temp (s2,i2)) ->
 83 |     let c = String.compare s1 s2 in
 84 |     if c = 0 then
 85 |       compare i1 i2
 86 |     else
 87 |       c
 88 | 
 89 | module Idord = struct
 90 |   type t = id
 91 |   let compare = compare_id
 92 | end
 93 | 
 94 | (* Build Map module specialised to keys of type id *)
 95 | module Idmap = Map.Make(Idord)
 96 | 
 97 | (* AST of expressions *)
 98 | type exp =
 99 |   | Ident of id * exp list
100 |   | Call of id * exp list
101 |   | Num of int64
102 |   | Bool of bool
103 |   | Op of exp * T.op * exp
104 |   | Uop of T.uop * exp
105 |   (* Allocate a new array of given dimensions. Initialise to 0 *)
106 |   | Array of exp list
107 | 
108 | (* pretty printing for expressions *)
109 | 
110 | (* Print a list, where each element is enclosed in [] *)
111 | let rec pp_array_list (f : F.formatter -> 'a -> unit)
112 |     (fmt : F.formatter) (l : 'a list)
113 |   : unit =
114 |   match l with
115 |   | [] -> ()
116 |   | (h::t) ->
117 |     F.fprintf fmt "[@[%a@]]@,%a"
118 |       f h
119 |       (pp_array_list f) t
120 | 
121 | (* Print a list, surrounded by (), separating the items with , *)
122 | let pp_call (f : F.formatter -> 'a -> unit) (fmt : F.formatter)
123 |     (l : 'a list)
124 |   : unit =
125 |   let rec pp fmt l =
126 |     match l with
127 |     | [] -> ()
128 |     | [h] ->
129 |       F.fprintf fmt "%a"
130 |         f h
131 |     | (h::t) ->
132 |       F.fprintf fmt "%a,@ %a"
133 |         f h
134 |         pp t
135 |   in
136 |   F.fprintf fmt "(@[%a@])"
137 |     pp l
138 | 
139 | (* Pretty print an expression *)
140 | let rec pp_exp (fmt : F.formatter) (exp : exp) : unit =
141 |   match exp with
142 |   | Ident (id, []) ->
143 |     F.fprintf fmt "%a"
144 |       pp_id id
145 |   | Ident (id, es) ->
146 |     F.fprintf fmt "%a%a"
147 |       pp_id id
148 |       (pp_array_list pp_exp) es
149 |   | Call (id, es) ->
150 |     F.fprintf fmt "%a%a"
151 |       pp_id id
152 |       (pp_call pp_exp) es
153 |   | Num n -> F.fprintf fmt "%Ld" n
154 |   | Bool true -> F.fprintf fmt "true"
155 |   | Bool false -> F.fprintf fmt "false"
156 |   | Op (e1, op, e2) ->
157 |     F.fprintf fmt "(@[%a@ %s@ %a@])"
158 |       pp_exp e1
159 |       (Tokens.show_op op)
160 |       pp_exp e2
161 |   | Uop (uop, e) ->
162 |     F.fprintf fmt "@[<2>%s@ %a@]"
163 |       (Tokens.show_uop uop)
164 |       pp_exp e
165 |   | Array es ->
166 |     F.fprintf fmt "@[<2>array@ %a@]"
167 |       (pp_array_list pp_exp) es
168 | 
169 | (* AST of statements *)
170 | type stmt =
171 |   | Assign of id * exp list * exp
172 |   (* A generalised do/while loop. Always execute the first statement, then
173 |      the test, then repeatedly do the 2nd, then first statement and then test
174 |      'while e s' becomes DoWhile (Stmts [], e, s) and 'do s while e' becomes
175 |      DoWhile (s, e, Stmts []) *)
176 |   | DoWhile of stmt * exp * stmt
177 |   | Ite of exp * stmt * stmt
178 |   | Stmts of stmt list
179 |   | In of id
180 |   | Out of id
181 |   | Return of id option
182 |   | Loc of stmt * int (* annotate a statement with it's source line number *)
183 | 
184 | (* Pretty-print a statement *)
185 | let rec pp_stmt (fmt : F.formatter) (stmt : stmt) : unit =
186 |   match stmt with
187 |   | Assign (id, [], e) ->
188 |     F.fprintf fmt "@[<2>%a :=@ %a@]"
189 |       pp_id id
190 |       pp_exp e
191 |   | Assign (id, es, e) ->
192 |     F.fprintf fmt "@[<2>%a%a :=@ %a@]"
193 |       pp_id id
194 |       (pp_array_list pp_exp) es
195 |       pp_exp e
196 |   | DoWhile (Stmts [], e, s) ->
197 |     F.fprintf fmt "@[<2>while@ %a@ %a@]"
198 |       pp_exp e
199 |       pp_stmt s
200 |   | DoWhile (s, e, Stmts []) ->
201 |     F.fprintf fmt "@[<2>do@ %a@ while@ %a@]"
202 |       pp_stmt s
203 |       pp_exp e
204 |   | DoWhile (s1, e, s2) ->
205 |     F.fprintf fmt "@[<2>do@ %a@ while@ %a@ %a@]"
206 |       pp_stmt s1
207 |       pp_exp e
208 |       pp_stmt s2
209 |   | Ite (e, s1, s2) ->
210 |     F.fprintf fmt "@[<2>if@ %a@ then@ %a@ else@ %a@]"
211 |       pp_exp e
212 |       pp_stmt s1
213 |       pp_stmt s2
214 |   | Stmts slist ->
215 |     F.fprintf fmt "{@\n%a}"
216 |       pp_stmts slist
217 |   | In i ->
218 |     F.fprintf fmt "@[<2>input@ %a@]"
219 |       pp_id i
220 |   | Out i ->
221 |     F.fprintf fmt "@[<2>output@ %a@]"
222 |       pp_id i
223 |   | Return None ->
224 |     F.fprintf fmt "return"
225 |   | Return (Some i) ->
226 |     F.fprintf fmt "@[<2>return@ %a@]"
227 |       pp_id i
228 |   | Loc (s, _) ->
229 |     pp_stmt fmt s
230 | 
231 | and pp_stmts (fmt : F.formatter) (stmts : stmt list) : unit =
232 |   let rec pp fmt stmts =
233 |     match stmts with
234 |     | [] -> ()
235 |     | stmt::stmts ->
236 |       F.fprintf fmt "%a@\n%a"
237 |         pp_stmt stmt
238 |         pp stmts
239 |   in
240 |   F.fprintf fmt "@[<v>%a@]"
241 |     pp stmts
242 | 
243 | (* AST of types *)
244 | type typ =
245 |   | Int
246 |   | Bool
247 |   (* An int array with the given number of dimensions *)
248 |   | Array of int
249 | 
250 | (* Pretty-print a type *)
251 | let pp_typ (fmt : F.formatter) (t : typ) : unit =
252 |   match t with
253 |   | Int -> F.fprintf fmt "int"
254 |   | Bool -> F.fprintf fmt "bool"
255 |   | Array n -> F.fprintf fmt "array@ %d" n
256 | 
257 | (* AST of variable and function declarations *)
258 | type var_dec = { var_name : id; typ : typ; init : exp; loc : int option }
259 | 
260 | type func = { fun_name : id; params : (id * typ) list; ret : typ;
261 |               locals : var_dec list; body : stmt list; loc : int option}
262 | 
263 | (* Pretty-print a variable declaration *)
264 | let pp_var_dec (fmt : F.formatter) (d : var_dec) : unit =
265 |   F.fprintf fmt "@[<2>let@ %a@ :@ %a@ =@ %a@]"
266 |     pp_id d.var_name
267 |     pp_typ d.typ
268 |     pp_exp d.init
269 | 
270 | (* Pretty-print variable declarations, 1 per line *)
271 | let pp_var_decs (fmt : F.formatter) (decs : var_dec list) : unit =
272 |   let rec pp fmt decs =
273 |     match decs with
274 |     | [] -> ()
275 |     | var_dec::decs ->
276 |       F.fprintf fmt "%a@\n%a"
277 |         pp_var_dec var_dec
278 |         pp decs
279 |   in
280 |   F.fprintf fmt "@[<v>%a@]"
281 |     pp decs
282 | 
283 | (* Pretty-print function parameters *)
284 | let rec pp_params (fmt : F.formatter) (params : (id * typ) list) : unit =
285 |   match params with
286 |   | [] -> ()
287 |   | (n,t)::params ->
288 |     F.fprintf fmt "@[(%a@ :@ %a)@]@ %a"
289 |       pp_id n
290 |       pp_typ t
291 |       pp_params params
292 | 
293 | (* Pretty-print a function *)
294 | let pp_func (fmt : F.formatter) (func : func) : unit =
295 |   F.fprintf fmt "@[<2>function@ %a@ %a@ :@ %a@ {@\n%a%a}@]@\n"
296 |     pp_id func.fun_name
297 |     pp_params func.params
298 |     pp_typ func.ret
299 |     pp_var_decs func.locals
300 |     pp_stmts func.body
301 | 
302 | (* AST of complete programs *)
303 | type prog = { globals : var_dec list; funcs : func list }
304 | 
305 | (* Pretty-print a program *)
306 | let pp_program (fmt : F.formatter) (p : prog) : unit =
307 |   pp_var_decs fmt p.globals;
308 |   List.iter (pp_func fmt) p.funcs
309 | 
310 | (* Raise a parse error *)
311 | let parse_error (ln : int) (msg : string) : 'a =
312 |   raise (BadInput ("Parse error on line " ^ string_of_int ln ^ ": " ^ msg))
313 | 
314 | (* Raise a parse error explaining the expected token and the token actually there *)
315 | let parse_error_expect (ln : int) (given : T.token) (expect : T.token)
316 |     (where : string)
317 |   : 'a =
318 |   raise (BadInput ("Parse error on line " ^ string_of_int ln ^ ": expected " ^
319 |                    T.show_token expect ^ " in " ^ where ^ " but found " ^
320 |                    T.show_token given))
321 | 
322 | 
323 | (* Raise a parse error because the end of file was reached unexpectedly *)
324 | let eof_error (expect : string) : 'a =
325 |   raise (BadInput ("Parse error: end of file while parsing " ^ expect))
326 | 
327 | (* Convert the first expression in toks into an AST. Return it with the left
328 |    over tokens. *)
329 | let rec parse_atomic_exp (toks : T.tok_loc list) : exp * T.tok_loc list =
330 |   match toks with
331 |   | [] -> eof_error "an expression"
332 |   | (T.Ident i, ln) :: (T.Lparen, _) ::toks ->
333 |     let (args, toks) = parse_args ln toks in
334 |     (Call (Source (i,None), args), toks)
335 |   | (T.Ident i, _) :: toks ->
336 |     let (indices, toks) = parse_indices toks in
337 |     (Ident (Source (i,None), indices), toks)
338 |   | (T.Num n, _) :: toks -> (Num n, toks)
339 |   | (T.True, _) :: toks -> (Bool true, toks)
340 |   | (T.False, _) :: toks -> (Bool false, toks)
341 |   | (T.Op T.Minus, _) :: toks ->
342 |     let (e, toks) = parse_atomic_exp toks in
343 |     (Op (Num 0L, T.Minus, e), toks)
344 |   | (T.Uop uop, _) :: toks ->
345 |     let (e, toks) = parse_atomic_exp toks in
346 |     (Uop (uop, e), toks)
347 |   | (T.Array, _) :: toks ->
348 |     let (indices, toks) = parse_indices toks in
349 |     (Array indices, toks)
350 |   | (T.Lparen, _) :: toks ->
351 |     (match parse_exp toks with
352 |      | (_, []) -> eof_error "a parenthesized expression"
353 |      | (e, (T.Rparen, _) :: toks) ->
354 |        (e, toks)
355 |      | (_, (t, ln)::_) ->
356 |        parse_error_expect ln t T.Rparen "a parenthesized expression")
357 |   | (t, ln) :: _ ->
358 |     parse_error ln ("bad expression, beginning with " ^ T.show_token t)
359 | 
360 | and parse_exp (toks : T.tok_loc list) : exp * T.tok_loc list =
361 |   match parse_atomic_exp toks with
362 |   | (e1, (T.Op o, _) :: toks) ->
363 |     let (e2, toks) = parse_atomic_exp toks in
364 |     (Op (e1, o, e2), toks)
365 |   | (e1, toks) -> (e1, toks)
366 | 
367 | (* Parse 0 or more array indices. Return them with the left over tokens. *)
368 | and parse_indices (toks : T.tok_loc list) : exp list * T.tok_loc list =
369 |   match toks with
370 |   | (T.Lbrac, _) :: toks ->
371 |     (match parse_exp toks with
372 |      | (_, []) -> eof_error "an array index"
373 |      | (e, (T.Rbrac, _) :: toks) ->
374 |        let (es,toks) = parse_indices toks in
375 |        (e::es, toks)
376 |      | (_, (t, ln) :: _) ->
377 |        parse_error_expect ln t T.Rbrac "an array index")
378 |   | _ -> ([], toks)
379 | 
380 | (* Parse 1 or more comma-separated expressions. Return them with the left over
381 |    tokens. End on a closing parenthesis *)
382 | and parse_args ln (toks : T.tok_loc list) : exp list * T.tok_loc list =
383 |   match parse_exp toks with
384 |   | (_, []) -> eof_error "a function call expression"
385 |   | (e, (T.Comma, _)::toks) ->
386 |     let (es, toks) = parse_args ln toks in
387 |     (e::es, toks)
388 |   | (e, (T.Rparen, _)::toks) ->
389 |     ([e], toks)
390 |   | (_, (t, ln) :: _) ->
391 |     parse_error ln ("expected " ^ T.show_token T.Comma ^ " or " ^
392 |                     T.show_token T.Rparen ^
393 |                     " in a function call expression but found " ^
394 |                     T.show_token t)
395 | 
396 | (* Convert the first statement in toks into an AST. Return it with the left
397 |    over tokens *)
398 | let rec parse_stmt (toks : T.tok_loc list) : stmt * T.tok_loc list =
399 |   match toks with
400 |   | [] -> eof_error "a statement"
401 |   | (T.Ident x, ln) :: toks ->
402 |     (match parse_indices toks with
403 |      | (_, []) -> eof_error "an assignment statement"
404 |      | (indices, (T.Assign, _) :: toks) ->
405 |        let (e, toks) = parse_exp toks in
406 |        (Loc (Assign (Source (x, None), indices, e), ln), toks)
407 |      | (_, (t, ln) :: _) ->
408 |        parse_error_expect ln t T.Assign "an assignment statement")
409 |   | (T.While, ln) :: toks ->
410 |     let (e, toks) = parse_exp toks in
411 |     let (s, toks) = parse_stmt toks in
412 |     (Loc (DoWhile (Stmts [], e, s), ln), toks)
413 |   | (T.Do, ln) :: toks ->
414 |     (match parse_stmt toks with
415 |      | (_, []) -> eof_error "a do statement"
416 |      | (s, (T.While, _)::toks) ->
417 |        let (e, toks) = parse_exp toks in
418 |        (Loc (DoWhile (s, e, Stmts []), ln), toks)
419 |      | (_, (t, ln) :: _) ->
420 |        parse_error_expect ln t T.While "a do statement")
421 |   | (T.If, ln) :: toks ->
422 |     (match parse_exp toks with
423 |      | (_, []) -> eof_error "an if statement"
424 |      | (e, (T.Then, _) :: toks) ->
425 |        (match parse_stmt toks with
426 |         | (_, []) -> eof_error "an if statement"
427 |         | (s1, (T.Else, _) :: toks) ->
428 |           let (s2, toks) = parse_stmt toks in
429 |           (Loc (Ite (e, s1, s2), ln), toks)
430 |         | (_, (t, ln) :: _) ->
431 |           parse_error_expect ln t T.Else "an if statement")
432 |      | (_, (t, ln) :: _) ->
433 |        parse_error_expect ln t T.Then "an if statement")
434 |   | (T.Lcurly, ln) :: toks ->
435 |     let (s_list, toks) = parse_stmt_list toks in
436 |     (Loc (Stmts (s_list), ln), toks)
437 |   | (T.Input, ln) :: (T.Ident x, _) :: toks -> (Loc (In (Source (x,None)), ln), toks)
438 |   | (T.Output, ln) :: (T.Ident x, _) :: toks -> (Loc (Out (Source (x,None)), ln), toks)
439 |   | (T.Return, ln) :: (T.Ident x, _) :: toks -> (Loc (Return (Some (Source (x,None))), ln), toks)
440 |   | (t, ln) :: _ ->
441 |     parse_error ln ("bad statement, beginning with a " ^ T.show_token t)
442 | 
443 | (* Convert all of the statement in toks into an AST, stopping on a }. Return
444 |    them with the left over tokens, not including the } *)
445 | and parse_stmt_list (toks : T.tok_loc list) : stmt list * T.tok_loc list =
446 |   match toks with
447 |   | ((T.Rcurly, _) :: toks) -> ([], toks)
448 |   | _ ->
449 |     let (s, toks) = parse_stmt toks in
450 |     let (s_list, toks) = parse_stmt_list toks in
451 |     (s::s_list, toks)
452 | 
453 | (* Convert the first typ in toks into an AST. Return it with the left over
454 |    tokens. *)
455 | let parse_typ (toks : T.tok_loc list) : typ * T.tok_loc list =
456 |   match toks with
457 |   | [] -> eof_error "a type"
458 |   | (T.Int, _) :: toks -> (Int, toks)
459 |   | (T.Bool, _) :: toks -> (Bool, toks)
460 |   | (T.Array, _) :: (T.Num n, _) :: toks -> (Array (Int64.to_int n), toks)
461 |   | (t,ln) :: _ ->
462 |     parse_error ln ("bad type, beginning with a " ^ T.show_token t)
463 | 
464 | (* Convert the first function parameter toks into an AST. Return it with the
465 |    left over tokens. *)
466 | let parse_param (toks : T.tok_loc list) : (id * typ) * T.tok_loc list =
467 |   match toks with
468 |   | [] -> eof_error "a function parameter"
469 |   | (T.Lparen, _) :: (T.Ident x, _) :: (T.Colon,_) :: toks ->
470 |     (match parse_typ toks with
471 |      | (_, []) -> eof_error "a function parameter"
472 |      | (t, (T.Rparen, _)::toks) ->
473 |        ((Source (x,None), t), toks)
474 |      | (_, (t, ln) :: _) ->
475 |        parse_error_expect ln t T.Rparen "a function parameter")
476 |   | (t, ln) :: _ -> parse_error_expect ln t T.Lparen "a function parameter"
477 | 
478 | (* Convert a list of function parameters in toks into an AST. Return it with
479 |    the left over tokens. Recognise when the list is over by the next token not
480 |    being Lparen *)
481 | let rec parse_param_list (toks : T.tok_loc list) : (id * typ) list * T.tok_loc list =
482 |   match toks with
483 |   | ((T.Lparen, _) :: _) ->
484 |     let (v, toks) = parse_param toks in
485 |     let (v_list, toks) = parse_param_list toks in
486 |     (v::v_list, toks)
487 |   | _ -> ([], toks)
488 | 
489 | 
490 | (* Convert the first variable declaration in toks into an AST. Return it with
491 |    the left over tokens. *)
492 | let parse_var_dec (toks : T.tok_loc list) : var_dec * T.tok_loc list =
493 |   match toks with
494 |   | [] -> eof_error "a variable declaration"
495 |   | (T.Let, ln) :: (T.Ident x, _) :: (T.Colon,_) :: toks ->
496 |     (match parse_typ toks with
497 |      | (_, []) -> eof_error "a variable declaration"
498 |      | (t, (T.Op T.Eq, _)::toks) ->
499 |        let (e, toks) = parse_exp toks in
500 |        ({ var_name = Source (x,None); typ = t; init = e; loc = Some ln }, toks)
501 |      | (_, (t, ln) :: _) ->
502 |        parse_error_expect ln t (T.Op T.Eq) "a variable declaration")
503 |   | (t, ln) :: _ -> parse_error_expect ln t T.Let "a variable declaration"
504 | 
505 | (* Convert a list of variable declaration in toks into an AST. Return it with
506 |    the left over tokens. Recognise when the list is over by the next token not
507 |    being Let *)
508 | let rec parse_var_dec_list (toks : T.tok_loc list) : var_dec list * T.tok_loc list =
509 |   match toks with
510 |   | (T.Let, _) :: _ ->
511 |     let (v, toks) = parse_var_dec toks in
512 |     let (v_list, toks) = parse_var_dec_list toks in
513 |     (v::v_list, toks)
514 |   | _ -> ([], toks)
515 | 
516 | (* Convert the first function declaration in toks into an AST. Return it with
517 |    the left over tokens. *)
518 | let parse_func (toks : T.tok_loc list) : func * T.tok_loc list =
519 |   match toks with
520 |   | [] -> eof_error "a function declaration"
521 |   | (T.Function, ln) :: (T.Ident x, _) :: toks ->
522 |     (match parse_param_list toks with
523 |      | (_, []) -> eof_error "a function declaration"
524 |      | (params, (T.Colon, _) :: toks) ->
525 |        if List.length params <> 0 then
526 |          (match parse_typ toks with
527 |           | (_, []) -> eof_error "a function declaration"
528 |           | (t, (T.Lcurly, _) :: toks) ->
529 |             let (var_decs, toks) = parse_var_dec_list toks in
530 |             let (stmts, toks) = parse_stmt_list toks in
531 |             ({ fun_name = Source (x,None);
532 |                params = params;
533 |                ret = t;
534 |                locals = var_decs;
535 |                body = stmts;
536 |                loc = Some ln }, toks)
537 |           | (_, (t, ln) :: _) ->
538 |             parse_error_expect ln t T.Lcurly "a function declaration")
539 |        else
540 |          parse_error ln
541 |            "bad function declaration, functions must have at least 1 parameter"
542 |      | (_, (t, ln) :: _) ->
543 |        parse_error_expect ln t T.Colon "a function declaration")
544 |   | (t, ln) :: _ -> parse_error_expect ln t T.Function "a function declaration"
545 | 
546 | (* Parse global variable declarations, and then function declarations until the
547 |    end of file *)
548 | let parse_program (toks : T.tok_loc list) : prog =
549 |   let (var_decs, toks) = parse_var_dec_list toks in
550 |   let rec parse_funs toks =
551 |     match toks with
552 |     | [] -> []
553 |     | _ ->
554 |       let (f, toks) = parse_func toks in
555 |       f :: parse_funs toks
556 |   in
557 |   { globals = var_decs; funcs = parse_funs toks }
558 | 
559 | let stmts_to_stmt (s : stmt list) : stmt =
560 |   match s with
561 |   | [s1] -> s1
562 |   | _ -> Stmts s
563 | 


--------------------------------------------------------------------------------