├── src
├── .merlin
├── _tags
├── TODO
├── blockInterp.mli
├── regAlloc.mli
├── typeCheck.mli
├── frontEnd.mli
├── constProp.mli
├── compileFunction.mli
├── shrinkImmediates.mli
├── instrSelX86.mli
├── liveVarAnalysis.mli
├── lineariseCfg.mli
├── unnestExp.mli
├── interp.ml
├── astInterp.mli
├── tokens.mli
├── frontEnd.ml
├── Makefile
├── README.md
├── sourceAst.mli
├── blockStructure.mli
├── .depend
├── compile.ml
├── compileFunction.ml
├── lineariseCfg.ml
├── util.ml
├── shrinkImmediates.ml
├── blockInterp.ml
├── regAlloc.ml
├── tokens.ml
├── unnestExp.ml
├── x86.ml
├── liveVarAnalysis.ml
├── astInterp.ml
├── constProp.ml
├── typeCheck.ml
├── instrSelX86.ml
├── blockStructure.ml
└── sourceAst.ml
├── Makefile
├── runtime
├── Makefile
├── io.c
└── array.c
├── .gitignore
├── tests
├── div.expl
├── fact.expl
├── runtime_errors.expl
├── insert_sort.expl
├── insert_sort.c
├── scoping.expl
├── Makefile
├── quicksort.expl
└── array.expl
└── README.md
/src/.merlin:
--------------------------------------------------------------------------------
1 | B _build
2 |
--------------------------------------------------------------------------------
/src/_tags:
--------------------------------------------------------------------------------
1 | true : package(str)
2 | true : bin_annot, debug, principal, safe_string, strict_formats, strict_sequence, warn(A-42-4)
3 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | make -C src
3 | make -C runtime
4 | make -C tests
5 |
6 | clean:
7 | make -C src clean
8 | make -C runtime clean
9 | make -C tests clean
10 |
--------------------------------------------------------------------------------
/src/TODO:
--------------------------------------------------------------------------------
1 | Const prop for x * 2^n -> x shift n
2 | Do some simple register allocation
3 | Add OCaml warning 4, and manually annotate all of the matches where it should apply.
4 |
--------------------------------------------------------------------------------
/runtime/Makefile:
--------------------------------------------------------------------------------
1 | all : array.o io.o
2 |
3 | array.o : array.c
4 | gcc -c array.c -o array.o -std=c99
5 |
6 | io.o : io.c
7 | gcc -c io.c -o io.o -std=c99
8 |
9 | clean :
10 | rm -f io.o array.o
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # OCaml executables
2 | src/*.byte
3 | src/*.native
4 | src/_build
5 |
6 | # Test executables and intermediate things
7 | tests/*.s
8 | tests/*.o
9 | tests/*.exe
10 |
11 | runtime/*.o
12 |
13 | # Vim temporary files
14 | .*.swp
15 |
--------------------------------------------------------------------------------
/tests/div.expl:
--------------------------------------------------------------------------------
1 | //test division
2 |
3 | let x : int = test(0, 0, 100)
4 |
5 | function test(x : int) (y : int) (z : int) : int
6 | {
7 | let x : int = 0
8 | let y : int = 0
9 | input x
10 | y := 10 / x
11 | output y
12 | output z // output 100 (to make sure z passed in RDX is not clobbered *)
13 | return y
14 | }
15 |
--------------------------------------------------------------------------------
/tests/fact.expl:
--------------------------------------------------------------------------------
1 | // Input a number and print its factorial
2 |
3 | let v : int = exec(0)
4 |
5 | function iter_fact (x : int) : int {
6 | let c : int = 1
7 | let z : int = 1
8 | while 0 < x {
9 | z := z * x
10 | x := x - c
11 | }
12 | return z
13 | }
14 |
15 | function rec_fact (x : int) : int {
16 | let r : int = 1
17 | if x = 0 then
18 | return r
19 | else {
20 | r := x * rec_fact(x - 1)
21 | return r
22 | }
23 | }
24 |
25 | function exec (x : int) : int {
26 | let y : int = 0
27 | let r : int = 0
28 | input x
29 | y := iter_fact(x)
30 | output y
31 | y := rec_fact(x)
32 | output y
33 | return r
34 | }
35 |
--------------------------------------------------------------------------------
/tests/runtime_errors.expl:
--------------------------------------------------------------------------------
1 | // Test for exceptions. Uncommenting any of the below should have an exception
2 |
3 | //let r1 : int = array_bound1(0)
4 | //let r2 : int = array_bound2(0)
5 | //let r3 : int = 110/0
6 | //let r4 : int = null(0)
7 | //let r5 : int = a[2]
8 | //let r6 : int = null2(a)
9 | let a : array 1 = array[10]
10 |
11 | function array_bound1 (x : int) : int {
12 | let y : array 1 = array [10]
13 | x := y[10]
14 | return x
15 | }
16 |
17 | function array_bound2 (x : int) : int {
18 | let y : array 1 = array [10]
19 | y[10] := x
20 | return x
21 | }
22 |
23 | function null (x : int) : int {
24 | let r5 : int = a[0]
25 | let a : array 1 = array[10]
26 | return x
27 | }
28 |
29 | function null2 (x : array 1) : int {
30 | let i : int = x[0]
31 | return i
32 | }
33 |
--------------------------------------------------------------------------------
/tests/insert_sort.expl:
--------------------------------------------------------------------------------
1 | // insertion sort
2 |
3 | let r : int = test(1)
4 |
5 | function test (i : int) : int
6 | {
7 | let len : int = 0
8 | let a : array 1 = array[0]
9 | let i : int = 0
10 | let y : int = 0
11 | input len
12 | a := array[len]
13 | while i < len {
14 | input y
15 | a[i] := y
16 | i := i + 1
17 | }
18 |
19 | i := sort(a, len)
20 |
21 | i := 0
22 | while i < len {
23 | y := a[i]
24 | output y
25 | i := i + 1
26 | }
27 | return i
28 | }
29 |
30 | function sort (a : array 1) (len : int) : int {
31 | let i : int = 1
32 | let y : int = 0
33 | let j : int = 0
34 | while i < len {
35 | y := a[i]
36 | j := i - 1
37 | while (!(j < 0) && (a[j] > y)) {
38 | a[j+1] := a[j]
39 | j := j - 1
40 | }
41 | a[j+1] := y
42 | i := i + 1
43 | }
44 | return i
45 | }
46 |
--------------------------------------------------------------------------------
/src/blockInterp.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2016 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* An interpreter for CFGs *)
20 |
21 | val interp_prog : int -> BlockStructure.cfg -> unit
22 |
--------------------------------------------------------------------------------
/src/regAlloc.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2016 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | open BlockStructure
20 |
21 | val reg_alloc : (SourceAst.id * int) list -> int -> cfg -> (cfg * int)
22 |
--------------------------------------------------------------------------------
/src/typeCheck.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* The type checker *)
20 |
21 | open SourceAst
22 |
23 | type t
24 | val type_prog : prog -> prog
25 |
--------------------------------------------------------------------------------
/src/frontEnd.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2016 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* The front end packaging lexing, parsing and type checking *)
20 |
21 | val front_end : string -> bool -> SourceAst.prog
22 |
--------------------------------------------------------------------------------
/src/constProp.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2016 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* Do constant propagation and folding *)
20 |
21 | open SourceAst
22 | val prop_stmts : exp Idmap.t -> stmt list -> exp Idmap.t * stmt list
23 |
--------------------------------------------------------------------------------
/src/compileFunction.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* Compiler for a single function *)
20 |
21 | val compile_fun : bool -> string -> BlockStructure.Varset.t -> SourceAst.func -> SourceAst.id * X86.instruction list
22 |
23 |
--------------------------------------------------------------------------------
/tests/insert_sort.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | extern int64_t input();
5 | extern void output(int64_t i);
6 |
7 | int sort(int64_t a[], int64_t len) {
8 | int64_t i = 1;
9 | int64_t y = 1;
10 | int64_t j = 1;
11 | while (i < len) {
12 | int64_t y = a[i];
13 | int64_t j = i - 1;
14 | while (!(j < 0) && (a[j] > y)) {
15 | a[j+1] = a[j];
16 | j = j - 1;
17 | }
18 | a[j+1] = y;
19 | i = i + 1;
20 | }
21 | return i;
22 | }
23 |
24 | int64_t test(int64_t i2) {
25 | int64_t len = 0;
26 | int64_t *a;
27 | int64_t i = 0;
28 | int64_t y = 0;
29 | len = input();
30 | a = malloc(len * sizeof(int64_t));
31 | while (i < len) {
32 | int64_t y = input();
33 | a[i] = y;
34 | i = i + 1;
35 | }
36 |
37 | i = sort(a,len);
38 |
39 | i = 0;
40 | while (i < len) {
41 | int64_t y = a[i];
42 | output(y);
43 | i = i + 1;
44 | }
45 | return i;
46 | }
47 |
48 | int64_t r;
49 |
50 | int main(int argc, char** argv) {
51 | r = test(1);
52 | return 0;
53 | }
54 |
--------------------------------------------------------------------------------
/src/shrinkImmediates.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2016 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* Ensure that all immediate arguments fit into 32 bits. We assume that
20 | constant propagation has ensured that no operation has two immediate
21 | arguments, and we maintain that property here. *)
22 |
23 | val shrink_imm : BlockStructure.cfg -> BlockStructure.cfg
24 |
--------------------------------------------------------------------------------
/src/instrSelX86.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2016 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* Convert linearised three-address code to x86-64 *)
20 |
21 | val num_regs : int
22 | val argument_reg_numbers : int list
23 | val be_to_x86 : bool -> BlockStructure.block_elem -> X86.instruction list
24 | val to_x86 : bool -> LineariseCfg.linear list -> int -> X86.instruction list
25 |
--------------------------------------------------------------------------------
/tests/scoping.expl:
--------------------------------------------------------------------------------
1 | // Test/demonstrate variable scoping
2 |
3 | // should print 1, 11, 12, 8, 5, 9, 10, then 100 plus those, then
4 | // 101, 11, 12, 16, 13, 9, 10, then 100 plus those, then finally
5 | // 201, 2, 3, 4
6 |
7 | let g : int = 1
8 | let x : int = 2
9 | let y : int = 3
10 | let z : int = 4
11 |
12 | let r : int = test1(5, 6, 7, 8)
13 | let r2 : int = test1(13, 14, 15, 16)
14 | let r3 : int = test2(1)
15 |
16 | function test1 (a : int) (b : int) (y : int) (z : int) : int {
17 | let b : int = 9
18 | let c : int = 10
19 | let x : int = 11
20 | let y : int = 12
21 |
22 | output g
23 | output x
24 | output y
25 | output z
26 | output a
27 | output b
28 | output c
29 |
30 | g := g + 100
31 | x := x + 100
32 | y := y + 100
33 | z := z + 100
34 | a := a + 100
35 | b := b + 100
36 | c := c + 100
37 |
38 | output g
39 | output x
40 | output y
41 | output z
42 | output a
43 | output b
44 | output c
45 |
46 | return x
47 | }
48 |
49 | function test2 (aa : int) : int {
50 | output g
51 | output x
52 | output y
53 | output z
54 | return aa
55 | }
56 |
--------------------------------------------------------------------------------
/src/liveVarAnalysis.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2016 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | open BlockStructure
20 |
21 | type cfg_annot = { gen : Varset.t; kill : Varset.t; live_exit : Varset.t }
22 |
23 | type cfg = (cfg_entry * cfg_annot) list
24 |
25 | val pp_cfg : Format.formatter -> cfg -> unit
26 |
27 | val lva : Varset.t -> BlockStructure.cfg -> cfg
28 |
29 | val remove_unused_writes : cfg -> cfg
30 |
--------------------------------------------------------------------------------
/src/lineariseCfg.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* Flatten the CFG into a list of three-address code. *)
20 |
21 | open BlockStructure
22 | type linear =
23 | | Instr of block_elem
24 | | CJump of test * bool * string (* jump to string if var is bool *)
25 | | Jump of string
26 | | Label of string
27 | | Return of var option
28 |
29 | type linear_list = linear list
30 |
31 | val cfg_to_linear : cfg -> linear list
32 |
33 | val pp_linear_list : Format.formatter -> linear list -> unit
34 |
35 | val init_traversal : cfg -> cfg_entry Util.Intmap.t
36 |
--------------------------------------------------------------------------------
/tests/Makefile:
--------------------------------------------------------------------------------
1 | # Try to detect the operating system to set flags for the assembler.
2 | # OSX uses macho64 binary format, and needs an _ prefixed before all global
3 | # symbols. Linux uses elf64 binary format, and the symbols are all fine as they
4 | # are.
5 | OS := $(shell uname)
6 | ifeq ($(OS), Darwin)
7 | FORMAT = macho64
8 | PREFIX = --prefix _
9 | else
10 | FORMAT = elf64
11 | PREFIX =
12 | endif
13 |
14 | COMPILER = ../src/compile.byte
15 | EXEC_FILES = fact.exe array.exe insert_sort.exe quicksort.exe div.exe scoping.exe runtime_errors.exe
16 |
17 | .PHONY : all clean
18 |
19 | all : $(EXEC_FILES)
20 |
21 | %.s %.dot : %.expl $(COMPILER)
22 | $(COMPILER) $(OPTIONS) $<
23 |
24 | %.o : %.s
25 | nasm -f $(FORMAT) $(PREFIX) $<
26 |
27 | %.o : %.c
28 |
29 | # Use gcc to do the linking since we want to include libc, and our
30 | # object code starts from a C-style main function. We do not want to
31 | # enter at _start (which is where the OS dumps you on process
32 | # creation), because there's a bunch of book keeping and alignment
33 | # stuff to sort out. gcc/libc include code to do that for us.
34 | %.exe : %.o ../runtime/io.o ../runtime/array.o
35 | gcc ../runtime/io.o ../runtime/array.o $< -o $@
36 |
37 | %.pdf : %.dot
38 | dot -Tpdf $< > $@
39 |
40 | .PRECIOUS : %.s
41 |
42 | clean :
43 | rm -f *.o *.s $(EXEC_FILES) *.pdf *.dot
44 |
--------------------------------------------------------------------------------
/src/unnestExp.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2016 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* Flatten expressions so that they follow this grammar. We don't introduce a
20 | new type, but do define predicates on the SourceAst.exp type.
21 |
22 | type ae =
23 | | Num of int64
24 | | Bool of bool
25 | | Ident of SourceAst.id
26 |
27 | type flat_exp =
28 | | Num of int64
29 | | Bool of bool
30 | | Ident of SourceAst.id * ae list
31 | | Op of ae * op * ae
32 | | Uop of ae
33 | | Array of ae list
34 | *)
35 |
36 | open SourceAst
37 | val unnest : stmt list -> stmt list
38 | val is_atomic : exp -> bool
39 | val is_flat : exp -> bool
40 |
--------------------------------------------------------------------------------
/src/interp.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2016 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* Driver for interpreter executable. *)
20 |
21 | let filename = ref None;;
22 |
23 | let usage_msg =
24 | "example interpreter \nexample usage: " ^ Sys.argv.(0)^ " test.expl\n";;
25 |
26 | let _ =
27 | Arg.parse []
28 | (fun s ->
29 | match !filename with
30 | | None ->
31 | filename := Some s
32 | | Some s' ->
33 | (Format.printf "Error: given multiple files to run: %s and %s\n" s' s;
34 | exit 1))
35 | usage_msg;;
36 |
37 | let _ =
38 | match !filename with
39 | | None ->
40 | (print_string usage_msg;
41 | exit 1)
42 | | Some filename ->
43 | AstInterp.interp_prog (FrontEnd.front_end filename false)
44 |
--------------------------------------------------------------------------------
/src/astInterp.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* An interpreter for ASTs *)
20 |
21 | (* For when the interpreter crashed, such as array bounds violations *)
22 | exception Crash of string
23 |
24 | (* For errors that a well-typed program can't have *)
25 | exception TypeError
26 |
27 | (* Values are either integers or n-dimensional arrays of integers.
28 | We keep multi-dimensional arrays in a single dimensional one and include a
29 | list of how big each dimension is.
30 | We represent bools as numbers: true = 1L and false = 0L *)
31 | type val_t =
32 | | Vint of int64
33 | | Varray of int list * int64 array
34 |
35 | val do_op : Tokens.op -> int64 -> int64 -> int64
36 |
37 | val interp_prog : SourceAst.prog -> unit
38 |
--------------------------------------------------------------------------------
/runtime/io.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | */
18 |
19 | /* Trivial I/O runtime library */
20 |
21 | #include
22 | #include
23 | #include
24 | #include
25 |
26 | // Read in a 64 bit signed integer and return it. First print a > as prompt.
27 | int64_t input() {
28 | int64_t i;
29 | printf("> ");
30 | fflush(stdout);
31 | scanf("%" PRId64, &i);
32 | if (errno == ERANGE) {
33 | printf("Input number does not fit in a 64-bit signed integer\n");
34 | exit(1);
35 | }
36 | return i;
37 | }
38 |
39 | // Print a given 64-bt
40 | void output(int64_t i) {
41 | printf("%" PRId64 "\n", i);
42 | return;
43 | }
44 |
45 | // signal an error
46 | void signal_error(int64_t errn) {
47 | switch (errn) {
48 | case 0:
49 | printf("array bounds error\n");
50 | break;
51 | case 1:
52 | printf("null pointer dereference\n");
53 | break;
54 | default:
55 | printf("unknown runtime error\n");
56 | break;
57 | }
58 | exit(1);
59 | }
60 |
--------------------------------------------------------------------------------
/src/tokens.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* The language's tokens, and a simple lexer *)
20 |
21 | type op =
22 | | Plus
23 | | Minus
24 | | Times
25 | | Div
26 | | Lshift
27 | | BitOr
28 | | BitAnd
29 | | Lt
30 | | Gt
31 | | Eq
32 | | And
33 | | Or
34 |
35 | type uop =
36 | | Not
37 |
38 | type token =
39 | | Num of int64
40 | | Ident of string
41 | | Op of op
42 | | Uop of uop
43 | | Lparen
44 | | Rparen
45 | | Lcurly
46 | | Rcurly
47 | | Lbrac
48 | | Rbrac
49 | | Colon
50 | | Comma
51 | | While
52 | | Do
53 | | If
54 | | Then
55 | | Else
56 | | Assign
57 | | True
58 | | False
59 | | Input
60 | | Output
61 | | Array
62 | | Int
63 | | Bool
64 | | Let
65 | | Function
66 | | Return
67 |
68 | type tok_loc = (token * int)
69 |
70 | val show_uop : uop -> string
71 | val show_op : op -> string
72 | val show_token : token -> string
73 | val pp_tok_loc : Format.formatter -> tok_loc -> unit
74 | val lex : string -> int -> int -> tok_loc list
75 |
--------------------------------------------------------------------------------
/tests/quicksort.expl:
--------------------------------------------------------------------------------
1 | // Quicksort
2 |
3 | let r : int = test(1)
4 |
5 | function test (i : int) : int
6 | {
7 | let len : int = 0
8 | let a : array 1 = array[0]
9 | let i : int = 0
10 | let y : int = 0
11 | input len
12 | a := array[len]
13 | while i < len {
14 | input y
15 | a[i] := y
16 | i := i + 1
17 | }
18 |
19 | i := sort(a, 0, len-1)
20 |
21 | i := 0
22 | while i < len {
23 | y := a[i]
24 | output y
25 | i := i + 1
26 | }
27 | return i
28 | }
29 |
30 | // Partition an array between lower and upper (inclusive), so that everything
31 | // before (and including) the returned index is <= than the pivot and
32 | // everything after is >= the pivot. The return value is strictly less than
33 | // upper. Assume that lower < upper. This is Tony Hoare's partitioning
34 | // algorithm.
35 | function partition (a : array 1) (pivot : int) (lower : int) (upper : int) : int {
36 | let temp : int = 0
37 | lower := lower - 1
38 | upper := upper + 1
39 | do {
40 | do
41 | lower := lower + 1
42 | while (a[lower] < pivot)
43 | do
44 | upper := upper - 1
45 | while (a[upper] > pivot)
46 | if (lower < upper) then {
47 | temp := a[lower]
48 | a[lower] := a[upper]
49 | a[upper] := temp
50 | }
51 | else
52 | return upper
53 | } while true
54 | return upper
55 | }
56 |
57 | // Sort the array between first and last, inclusive. Return a dummy value.
58 | function sort (a : array 1) (first : int) (last : int) : int {
59 | let mid : int = 0
60 | let temp : int = 0
61 |
62 | if (first = last) then
63 | return temp
64 | else {
65 | mid := partition(a, a[first], first, last)
66 | temp := sort(a,first,mid)
67 | temp := sort(a,mid+1,last)
68 | return temp
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/src/frontEnd.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* The front end. It packages lexing, parsing and type checking for use by the
20 | interpreter and type checker both. *)
21 |
22 | open Util
23 |
24 | (* Parse and type check the given filename. *)
25 | let front_end (filename : string) (debug : bool) : SourceAst.prog
26 | =
27 | if Filename.check_suffix filename ".expl" then
28 | try
29 | let input = Std.input_file filename in
30 | let toks = Tokens.lex input 0 1 in
31 | if debug then
32 | Format.printf "%a@\n@\n" (pp_list Tokens.pp_tok_loc) toks;
33 | let ast = SourceAst.parse_program toks in
34 | if debug then
35 | Format.printf "%a@\n@\n" SourceAst.pp_program ast;
36 | let ast2 = TypeCheck.type_prog ast in
37 | if debug then
38 | Format.printf "%a@\n@\n" SourceAst.pp_program ast2;
39 | ast2
40 | with
41 | | BadInput s ->
42 | Format.printf "%s\n" s;
43 | exit 1
44 | else
45 | (Format.printf "Expects filename ending in .expl\n";
46 | exit 1)
47 |
--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
1 | PACKAGES = str
2 | FLAGS = -bin-annot -g -principal -safe-string -strict-formats -strict-sequence -w +a-42-4
3 |
4 | FILES = util tokens sourceAst typeCheck frontEnd constProp unnestExp \
5 | blockStructure shrinkImmediates liveVarAnalysis regAlloc lineariseCfg x86 \
6 | instrSelX86 compileFunction astInterp interp compile
7 |
8 | FILES_ML = $(addsuffix .ml,$(FILES))
9 | FILES_MLI = $(addsuffix .mli,$(FILES))
10 | FILES_CMI = $(addsuffix .cmi,$(FILES))
11 | FILES_CMT = $(addsuffix .cmt,$(FILES))
12 | FILES_CMTI = $(addsuffix .cmti,$(FILES))
13 | FILES_CMO = $(addsuffix .cmo,$(FILES))
14 | FILES_CMX = $(addsuffix .cmx,$(FILES))
15 | FILES_O = $(addsuffix .o,$(FILES))
16 |
17 | .PHONY : ocb-compile.byte ocb-compile.native ocb-interp.byte ocb-interp.native all
18 |
19 | all : ocb-interp.byte ocb-compile.byte
20 |
21 | ocb-interp.byte :
22 | ocamlbuild -use-ocamlfind interp.byte
23 |
24 | ocb-interp.native :
25 | ocamlbuild -use-ocamlfind interp.native
26 |
27 | interp.byte : $(FILES_CMO)
28 | ocamlfind ocamlc -linkpkg -package $(PACKAGES) $(FILES_CMO) -o $@
29 |
30 | interp.native : $(FILES_CMX)
31 | ocamlfind ocamlopt -linkpkg -package $(PACKAGES) $(FILES_CMX) -o $@
32 |
33 | ocb-compile.byte :
34 | ocamlbuild -use-ocamlfind compile.byte
35 |
36 | ocb-compile.native :
37 | ocamlbuild -use-ocamlfind compile.native
38 |
39 | compile.byte : $(FILES_CMO)
40 | ocamlfind ocamlc -linkpkg -package $(PACKAGES) $(FILES_CMO) -o $@
41 |
42 | compile.native : $(FILES_CMX)
43 | ocamlfind ocamlopt -linkpkg -package $(PACKAGES) $(FILES_CMX) -o $@
44 |
45 | %.cmi : %.mli
46 | ocamlfind ocamlc -package $(PACKAGES) $(FLAGS) -o $@ -c $<
47 |
48 | %.cmo : %.ml
49 | ocamlfind ocamlc -package $(PACKAGES) $(FLAGS) -o $@ -c $<
50 |
51 | %.cmx : %.ml
52 | ocamlfind ocamlopt -package $(PACKAGES) $(FLAGS) -o $@ -c $<
53 |
54 | .PHONY : depend clean
55 |
56 | clean :
57 | ocamlbuild -clean
58 | rm -f $(FILES_CMI) $(FILES_CMTI) $(FILES_CMT) $(FILES_CMO) $(FILES_CMX) $(FILES_O) compile.byte compile.native interp.byte interp.native
59 |
60 | depend :
61 | ocamlfind ocamldep $(FILES_ML) $(FILES_MLI) > .depend
62 |
63 | include .depend
64 |
--------------------------------------------------------------------------------
/src/README.md:
--------------------------------------------------------------------------------
1 | Compiler structure
2 | ------------------
3 |
4 | The top-level files are _compile.ml_ and _interp.ml_, driving a compiler and
5 | interpreter respectively.
6 |
7 | Front end
8 | ---------
9 |
10 | The front-end in _frontEnd.ml_ packages a lexer (_tokens.ml_), parser
11 | (_sourceAst.ml_), and type checker (_typeCheck.ml_) and produces an AST. It is
12 | shared by the compiler and interpreter.
13 |
14 | AST transformations
15 | -------------------
16 |
17 | The AST goes through a series of transformations
18 |
19 | - _constProp.ml_: Propagates and folds constants.
20 | The compiler later relies on there being no operators with two constant
21 | operands.
22 | - _unnestExp.ml_: Un-nests expressions.
23 | Flattens out expressions, using assignments to temporary variables. Ensures
24 | that no expressions contain sub-expressions with operators in them. Also
25 | makes all array accesses single-dimensional.
26 | - _blockStructure.ml_: Transforms the AST to a control-flow graph of basic
27 | blocks. Requires expressions to be un-nested and && and || to be removed.
28 | Inserts array bounds checks.
29 |
30 | Basic-block transformations
31 | ---------------------------
32 |
33 | - _shrinkImmediates.ml_: Removes all constants that don't fit into 32 bits
34 | (since that's the maximum size of an immediate on x86-64), possibly
35 | introducing assignments to temporary variables.
36 | - _liveVarAnalysis.ml_: Performs live variable analysis, and removes unused
37 | assignments.
38 | - _regAlloc.ml_: Performs register allocation. This changes all identifiers from
39 | named variables to registers and stack offsets.
40 | - _lineariseCFG.ml_: Flatten the CFG, introducing labels and branches.
41 |
42 | Back end
43 | -------
44 | - _x86.ml_: An AST for the small subset of x86-64 that the compiler generates.
45 | Includes a printer for it in NASM syntax.
46 | - _instrSelX86.ml_: Generate x86 code.
47 | - _compileFunction.ml_: packages up the compiler for single functions
48 |
49 | Misc
50 | ----
51 | - _util.ml_: Misc. utility functions.
52 | - _astInterp.ml_ and _blockInterp.ml_: Interpreters for the AST and CFG.
53 |
--------------------------------------------------------------------------------
/tests/array.expl:
--------------------------------------------------------------------------------
1 | // Basic testing of arrays
2 |
3 | let r1 : int = test1(0)
4 | let r2 : int = test2(0)
5 | let g : int = 2
6 |
7 | let a : array 2 = array[4][4]
8 | let f : int = fill(0)
9 | let b : array 1 = test3(a)
10 | let c : int = test4(0)
11 |
12 | function fill(x : int) : int {
13 | let i : int = 0
14 | let j : int = 0
15 | while i < 4 {
16 | j := 0
17 | while j < 4 {
18 | a[i][j] := i+j
19 | j := j + 1
20 | }
21 | i := i + 1
22 | }
23 | return x
24 | }
25 |
26 | function test1 (x : int) : int
27 | {
28 | let i : int = 0
29 | let x : array 1 = array[10]
30 | let j : int = 0
31 | output i // should print 0
32 | while i < 10 {
33 | x[i] := i
34 | i := i + 1
35 | }
36 | output i // should print 10
37 | j := x[0]
38 | output j // should print 0
39 | j := x[9]
40 | output j // should print 9
41 | return j
42 | }
43 |
44 | function test2 (x : int) : int
45 | {
46 | let x : array 3 = array[3][5][7]
47 | let count : int = 0
48 | let i : int = 0
49 | let j : int = 0
50 | let k : int = 0
51 | let o : int = 0
52 | while i < 3 {
53 | j := 0
54 | while j < 5 {
55 | k := 0
56 | while k < 7 {
57 | x[i][j][k] := count
58 | count := count + 1
59 | k := k + 1
60 | }
61 | j := j + 1
62 | }
63 | i := i + 1
64 | }
65 | o := x[0][0][0]
66 | output o // should print 0
67 | o := x[1][3][4]
68 | output o // should print 60
69 | o := x[2][4][6]
70 | output o //should print 104
71 | return o
72 | }
73 |
74 | function test3 (a : array 2) : array 1 {
75 | let b : array 1 = array[4]
76 | let i : int = 0
77 | let j : int = 0
78 | while i < 4 {
79 | j := 0
80 | while j < 4 {
81 | b[i] := b[i] + a[i][j]
82 | j := j + 1
83 | }
84 | i := i + 1
85 | }
86 | return b
87 | }
88 |
89 | function test4 (x : int) : int {
90 | let i : int = 0
91 | while i < 4 {
92 | x := b[i]
93 | output x // 6, 10, 14, 18
94 | i := i + 1
95 | }
96 | x := b[g] // 14
97 | output x
98 | return x
99 | }
100 |
--------------------------------------------------------------------------------
/src/sourceAst.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* The language's AST, and a predictive, recursive descent parser. See the
20 | ../README.md for the grammar. *)
21 |
22 | type scope =
23 | | Global
24 | | Parameter
25 | | Local
26 |
27 | val compare_scope : scope -> scope -> int
28 |
29 | type id =
30 | | Source of string * scope option
31 | | Temp of string * int
32 |
33 | module Idmap : Map.S with type key = id
34 |
35 | type exp =
36 | | Ident of id * exp list
37 | | Call of id * exp list
38 | | Num of int64
39 | | Bool of bool
40 | | Op of exp * Tokens.op * exp
41 | | Uop of Tokens.uop * exp
42 | (* Allocate a new array of given dimensions. Initialise to 0 *)
43 | | Array of exp list
44 |
45 | type stmt =
46 | | Assign of id * exp list * exp
47 | (* A generalised do/while loop. Always execute the first statement, then
48 | the test, then repeatedly do the 2nd, then first statement and then test
49 | 'while e s' becomes DoWhile (Stmts [], e, s) and 'do s while e' becomes
50 | DoWhile (s, e, Stmts []) *)
51 | | DoWhile of stmt * exp * stmt
52 | | Ite of exp * stmt * stmt
53 | | Stmts of stmt list
54 | | In of id
55 | | Out of id
56 | | Return of id option
57 | | Loc of stmt * int (* annotate a statement with it's source line number *)
58 |
59 | type typ =
60 | | Int
61 | | Bool
62 | (* An int array with the given number of dimensions *)
63 | | Array of int
64 |
65 | type var_dec = { var_name : id; typ : typ; init : exp; loc : int option }
66 |
67 | type func = { fun_name : id; params : (id * typ) list; ret : typ;
68 | locals : var_dec list; body : stmt list; loc : int option }
69 |
70 | type prog = { globals : var_dec list; funcs : func list }
71 |
72 | val show_id : id -> string
73 | val pp_stmt : Format.formatter -> stmt -> unit
74 | val pp_stmts : Format.formatter -> stmt list -> unit
75 | val pp_program : Format.formatter -> prog -> unit
76 | val parse_program : (Tokens.token * int) list -> prog
77 | val stmts_to_stmt : stmt list -> stmt
78 |
--------------------------------------------------------------------------------
/src/blockStructure.mli:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* A control flow graph representation with basic blocks, and the source AST ->
20 | CGF algorithm. Also compiles arrays to loads and stores. *)
21 |
22 | type var =
23 | | Vreg of int
24 | | Stack of int
25 | | Global of string
26 | | NamedSource of string * SourceAst.scope
27 | | NamedTmp of string * int
28 |
29 | module Varset : sig
30 | include Set.S with type elt = var
31 | val show : t -> string
32 | val pp : Format.formatter -> t -> unit
33 | end
34 |
35 | module Varmap : sig
36 | include Map.S with type key = var
37 | end
38 |
39 | type atomic_exp =
40 | | Ident of var
41 | | Num of int64
42 |
43 | type block_elem =
44 | | AssignOp of var * atomic_exp * Tokens.op * atomic_exp
45 | | AssignAtom of var * atomic_exp
46 | (* Ld (x,y,e) represents x := *(y+e) *)
47 | | Ld of var * var * atomic_exp
48 | (* St (x,e1,e2) represents *(x+e1) := e2 *)
49 | | St of var * atomic_exp * atomic_exp
50 | (* Call (x, f, aes) represents x := f(aes) *)
51 | | Call of var option * string * atomic_exp list
52 | (* BoundCheck (a1, a2) represents assert (a1 >= 0 && a1 < a2) *)
53 | | BoundCheck of atomic_exp * atomic_exp
54 | | NullCheck of var
55 |
56 | type basic_block = block_elem list
57 |
58 | type test_op =
59 | | Lt
60 | | Gt
61 | | Eq
62 |
63 | type test = atomic_exp * test_op * atomic_exp
64 |
65 | type next_block =
66 | | Return of var option
67 | | Next of int
68 | (* The first int is the block number if the ident is true, and the second if
69 | * it is false *)
70 | | Branch of test * int * int
71 |
72 | type cfg_entry = { bnum : int; elems : block_elem list; next : next_block;
73 | mutable started : bool; mutable finished : bool }
74 |
75 | type cfg = cfg_entry list
76 |
77 | val id_to_var : SourceAst.id -> var
78 |
79 | val show_var : var -> string
80 | val pp_block_elem : Format.formatter -> block_elem -> unit
81 | val pp_test : Format.formatter -> test -> unit
82 | val pp_cfg : Format.formatter -> cfg -> unit
83 | val pp_cfg_entry : Format.formatter -> cfg_entry -> unit
84 | val build_cfg : SourceAst.stmt list -> cfg
85 |
86 | val cfg_to_graphviz : Format.formatter -> cfg -> unit
87 |
--------------------------------------------------------------------------------
/src/.depend:
--------------------------------------------------------------------------------
1 | astInterp.cmo : util.cmo tokens.cmi sourceAst.cmi astInterp.cmi
2 | astInterp.cmx : util.cmx tokens.cmx sourceAst.cmx astInterp.cmi
3 | astInterp.cmi : tokens.cmi sourceAst.cmi
4 | blockInterp.cmo : util.cmo lineariseCfg.cmi blockStructure.cmi astInterp.cmi \
5 | blockInterp.cmi
6 | blockInterp.cmx : util.cmx lineariseCfg.cmx blockStructure.cmx astInterp.cmx \
7 | blockInterp.cmi
8 | blockInterp.cmi : blockStructure.cmi
9 | blockStructure.cmo : util.cmo tokens.cmi sourceAst.cmi blockStructure.cmi
10 | blockStructure.cmx : util.cmx tokens.cmx sourceAst.cmx blockStructure.cmi
11 | blockStructure.cmi : tokens.cmi sourceAst.cmi
12 | compile.cmo : x86.cmo util.cmo sourceAst.cmi instrSelX86.cmi frontEnd.cmi \
13 | compileFunction.cmi blockStructure.cmi
14 | compile.cmx : x86.cmx util.cmx sourceAst.cmx instrSelX86.cmx frontEnd.cmx \
15 | compileFunction.cmx blockStructure.cmx
16 | compileFunction.cmo : x86.cmo unnestExp.cmi sourceAst.cmi \
17 | shrinkImmediates.cmi regAlloc.cmi liveVarAnalysis.cmi lineariseCfg.cmi \
18 | instrSelX86.cmi constProp.cmi blockStructure.cmi compileFunction.cmi
19 | compileFunction.cmx : x86.cmx unnestExp.cmx sourceAst.cmx \
20 | shrinkImmediates.cmx regAlloc.cmx liveVarAnalysis.cmx lineariseCfg.cmx \
21 | instrSelX86.cmx constProp.cmx blockStructure.cmx compileFunction.cmi
22 | compileFunction.cmi : x86.cmo sourceAst.cmi
23 | constProp.cmo : util.cmo tokens.cmi sourceAst.cmi constProp.cmi
24 | constProp.cmx : util.cmx tokens.cmx sourceAst.cmx constProp.cmi
25 | constProp.cmi : sourceAst.cmi
26 | frontEnd.cmo : util.cmo typeCheck.cmi tokens.cmi sourceAst.cmi frontEnd.cmi
27 | frontEnd.cmx : util.cmx typeCheck.cmx tokens.cmx sourceAst.cmx frontEnd.cmi
28 | frontEnd.cmi : sourceAst.cmi
29 | instrSelX86.cmo : x86.cmo util.cmo tokens.cmi lineariseCfg.cmi \
30 | blockStructure.cmi instrSelX86.cmi
31 | instrSelX86.cmx : x86.cmx util.cmx tokens.cmx lineariseCfg.cmx \
32 | blockStructure.cmx instrSelX86.cmi
33 | instrSelX86.cmi : x86.cmo lineariseCfg.cmi blockStructure.cmi
34 | interp.cmo : frontEnd.cmi astInterp.cmi
35 | interp.cmx : frontEnd.cmx astInterp.cmx
36 | lineariseCfg.cmo : util.cmo blockStructure.cmi lineariseCfg.cmi
37 | lineariseCfg.cmx : util.cmx blockStructure.cmx lineariseCfg.cmi
38 | lineariseCfg.cmi : util.cmo blockStructure.cmi
39 | liveVarAnalysis.cmo : util.cmo tokens.cmi blockStructure.cmi \
40 | liveVarAnalysis.cmi
41 | liveVarAnalysis.cmx : util.cmx tokens.cmx blockStructure.cmx \
42 | liveVarAnalysis.cmi
43 | liveVarAnalysis.cmi : util.cmo blockStructure.cmi
44 | regAlloc.cmo : util.cmo blockStructure.cmi regAlloc.cmi
45 | regAlloc.cmx : util.cmx blockStructure.cmx regAlloc.cmi
46 | regAlloc.cmi : blockStructure.cmi
47 | shrinkImmediates.cmo : tokens.cmi blockStructure.cmi shrinkImmediates.cmi
48 | shrinkImmediates.cmx : tokens.cmx blockStructure.cmx shrinkImmediates.cmi
49 | shrinkImmediates.cmi : blockStructure.cmi
50 | sourceAst.cmo : util.cmo tokens.cmi sourceAst.cmi
51 | sourceAst.cmx : util.cmx tokens.cmx sourceAst.cmi
52 | sourceAst.cmi : tokens.cmi
53 | tokens.cmo : util.cmo tokens.cmi
54 | tokens.cmx : util.cmx tokens.cmi
55 | tokens.cmi :
56 | typeCheck.cmo : util.cmo tokens.cmi sourceAst.cmi typeCheck.cmi
57 | typeCheck.cmx : util.cmx tokens.cmx sourceAst.cmx typeCheck.cmi
58 | typeCheck.cmi : sourceAst.cmi
59 | unnestExp.cmo : util.cmo tokens.cmi sourceAst.cmi unnestExp.cmi
60 | unnestExp.cmx : util.cmx tokens.cmx sourceAst.cmx unnestExp.cmi
61 | unnestExp.cmi : sourceAst.cmi
62 | util.cmo :
63 | util.cmx :
64 | x86.cmo : util.cmo
65 | x86.cmx : util.cmx
66 |
--------------------------------------------------------------------------------
/runtime/array.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Example compiler
3 | * Copyright (C) 2015-2016 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | */
18 |
19 | /* Allocation of multi-dimensional arrays of 64-bit ints. For an array of
20 | * length n, store the length of the array in element 0, and the array in 1 to
21 | * n inclusive. We're casting between int64_t and int64_t* which is not
22 | * guaranteed to work by the C standard, but seems to work on standard OS X and
23 | * Linux C compilers. */
24 |
25 | #include
26 | #include
27 | #include
28 |
29 | /* Since the first element of an array will have a different type than the
30 | * rest, and the last dimension's array will have numbers rather than pointers. */
31 | union elt {
32 | uint64_t num;
33 | union elt *ptr;
34 | };
35 |
36 | typedef union elt elt;
37 |
38 | #define S sizeof(elt)
39 |
40 | /* A 1 dimensional array */
41 | elt* allocate1(int64_t dim1) {
42 | elt* x = malloc(S*(dim1+1));
43 | for (unsigned long i = 0; i < dim1; i++)
44 | x[i+1].num = 0;
45 | x[0].num = dim1;
46 | return x;
47 | }
48 |
49 | /* A 2 dimensional array is an array of arrays */
50 | elt* allocate2(int64_t dim1, int64_t dim2) {
51 | elt* x = malloc(S*(dim1+1));
52 | for (unsigned long i = 0; i < dim1; i++) {
53 | elt* y = malloc(S*(dim2+1));
54 | x[i+1].ptr = y;
55 | for (int64_t j = 0; j < dim2; j++)
56 | y[j+1].num = 0;
57 | y[0].num = dim2;
58 | }
59 | x[0].num = dim1;
60 | return x;
61 | }
62 |
63 | /* Allocate dimensions dim to num_dim where the length of each dimension is
64 | * given by the list dims[] */
65 | elt* allocate_n_help(int64_t dim, int64_t num_dim, int64_t dims[]) {
66 | if (dim == num_dim - 1)
67 | return allocate1(dims[dim]);
68 | else {
69 | elt* x = malloc(S*(dims[dim]+1));
70 | for (unsigned long i = 0; i < dims[dim]; i++) {
71 | x[i+1].ptr = allocate_n_help(dim+1, num_dim, dims);
72 | }
73 | x[0].num = dims[dim];
74 | return x;
75 | }
76 | }
77 |
78 | elt* allocate_n(int64_t num_dim, int64_t dims[]) {
79 | return allocate_n_help(0, num_dim, dims);
80 | }
81 |
82 | elt* allocate3(int64_t dim1, int64_t dim2, int64_t dim3) {
83 | int64_t dims[] = {dim1, dim2, dim3};
84 | return allocate_n(3, dims);
85 | }
86 |
87 | elt* allocate4(int64_t dim1, int64_t dim2, int64_t dim3, int64_t dim4) {
88 | int64_t dims[] = {dim1, dim2, dim3, dim4};
89 | return allocate_n(4, dims);
90 | }
91 |
92 | elt* allocate5(int64_t dim1, int64_t dim2, int64_t dim3, int64_t dim4, int64_t dim5) {
93 | int64_t dims[] = {dim1, dim2, dim3, dim4, dim5};
94 | return allocate_n(5, dims);
95 | }
96 |
97 | elt* allocate6(int64_t dim1, int64_t dim2, int64_t dim3, int64_t dim4, int64_t dim5, int64_t dim6) {
98 | int64_t dims[] = {dim1, dim2, dim3, dim4, dim5, dim6};
99 | return allocate_n(6, dims);
100 | }
101 |
102 | elt* allocate7(int64_t dim1, int64_t dim2, int64_t dim3, int64_t dim4, int64_t dim5, int64_t dim6, int64_t dim7) {
103 | int64_t dims[] = {dim1, dim2, dim3, dim4, dim5, dim6, dim7};
104 | return allocate_n(7, dims);
105 | }
106 |
--------------------------------------------------------------------------------
/src/compile.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* The main driver for the compiler executable. *)
20 |
21 | (* Command-line arguments *)
22 | let filename_ref = ref None;;
23 | let safe_ref = ref true;;
24 |
25 | let options = Arg.align ([("-safe", Arg.Bool (fun b -> safe_ref := b),
26 | "\tdo null pointer and array bounds checks")
27 | ]);;
28 |
29 | let usage_msg =
30 | "example compiler \nexample usage: " ^ Sys.argv.(0)^ " test.expl\n"
31 |
32 | let _ =
33 | Arg.parse options
34 | (fun s ->
35 | match !filename_ref with
36 | | None ->
37 | filename_ref := Some s
38 | | Some s' ->
39 | (Format.printf "Error: given multiple files to process: %s and %s\n"
40 | s' s;
41 | exit 1))
42 | usage_msg
43 |
44 | let filename =
45 | match !filename_ref with
46 | | None ->
47 | (print_string usage_msg;
48 | exit 1)
49 | | Some filename ->
50 | filename
51 |
52 | let prog = FrontEnd.front_end filename false;;
53 |
54 | open SourceAst
55 |
56 | (* Build a main function that just runs the initialisation of all of the
57 | globals *)
58 | let main_function =
59 | { fun_name = Source ("main", None); params = []; ret = Int;
60 | locals = [];
61 | body = List.map (fun d -> Assign (d.var_name, [], d.init)) prog.globals @
62 | [Return None];
63 | loc = None }
64 |
65 | let global_id_to_var (g : SourceAst.id) : BlockStructure.var =
66 | match g with
67 | | Source (i, Some Global) -> BlockStructure.NamedSource (i, Global)
68 | | _ -> assert false
69 |
70 | let globals =
71 | List.fold_right
72 | (fun d globals ->
73 | BlockStructure.Varset.add (global_id_to_var d.var_name) globals)
74 | prog.globals
75 | BlockStructure.Varset.empty ;;
76 |
77 | let functions =
78 | List.map (CompileFunction.compile_fun !safe_ref filename globals)
79 | (main_function::prog.funcs);;
80 |
81 | open Format
82 |
83 | let outfile = open_out (Filename.chop_extension filename ^ ".s");;
84 | let fmt = formatter_of_out_channel outfile;;
85 | (* Assembly wrapper *)
86 | fprintf fmt "[section .text align=16]@\n";;
87 | fprintf fmt "global main@\n@\n";;
88 | fprintf fmt "extern signal_error@\n";;
89 | fprintf fmt "extern input@\n";;
90 | fprintf fmt "extern output@\n";;
91 | fprintf fmt "extern allocate1@\n";;
92 | fprintf fmt "extern allocate2@\n";;
93 | fprintf fmt "extern allocate3@\n";;
94 | fprintf fmt "extern allocate4@\n";;
95 | fprintf fmt "extern allocate5@\n";;
96 | fprintf fmt "extern allocate6@\n";;
97 | fprintf fmt "extern allocate7@\n@\n";;
98 | List.iter
99 | (fun (name, code) -> fprintf fmt "%s:@\n%a" (show_id name) X86.pp_instr_list code)
100 | functions;;
101 | fprintf fmt "bound_error:@\n%a"
102 | (fun fmt instr -> X86.pp_instr_list fmt (InstrSelX86.be_to_x86 !safe_ref instr))
103 | (BlockStructure.Call (None, "signal_error", [BlockStructure.Num 0L]));;
104 | fprintf fmt "null_error:@\n%a"
105 | (fun fmt instr -> X86.pp_instr_list fmt (InstrSelX86.be_to_x86 !safe_ref instr))
106 | (BlockStructure.Call (None, "signal_error", [BlockStructure.Num 1L]));;
107 | (* bss segment for the global variables, all initialised to 0 *)
108 | fprintf fmt "[section .bss align=16]@\n";;
109 | fprintf fmt "default rel@\n";;
110 | List.iter (fun d -> fprintf fmt "%s: resq 1\n" (show_id d.var_name)) prog.globals;;
111 | close_out outfile;;
112 |
--------------------------------------------------------------------------------
/src/compileFunction.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* Compiler for a single function *)
20 |
21 | open SourceAst
22 |
23 | (* We treat a local variable declarations as simple assignment to the variable.
24 | *)
25 | let init_var_dec_to_0 (d : var_dec) : stmt =
26 | Assign (d.var_name, [], Num 0L)
27 |
28 | let var_dec_to_stmt (d : var_dec) : stmt =
29 | Assign (d.var_name, [], d.init)
30 |
31 | (* globals should contain all globals variables, so that we can tell the live
32 | variable analysis that they must be live when the function returns.
33 | *)
34 | let compile_fun safe filename (globals : BlockStructure.Varset.t) (f : func)
35 | : id * X86.instruction list =
36 | let ast =
37 | (* Zero out local variables, in case any of the initialisations are out of
38 | order and refer to uninitialised variables. Later removal of dead writes
39 | should be able to remove these where there is no problem *)
40 | List.map init_var_dec_to_0 f.locals @
41 | List.map var_dec_to_stmt f.locals @
42 | f.body in
43 |
44 | let (_,opt_ast) = ConstProp.prop_stmts SourceAst.Idmap.empty ast in
45 | (*Format.printf "@\n%a@\n" SourceAst.pp_stmts opt_ast; *)
46 |
47 | let no_nest_ast = UnnestExp.unnest opt_ast in
48 | (*Format.printf "@\n%a@\n" SourceAst.pp_stmts no_nest_ast; *)
49 |
50 | let cfg = BlockStructure.build_cfg no_nest_ast in
51 | (*Format.printf "@\n%a@\n" BlockStructure.pp_cfg cfg; *)
52 | (* Print the CFG in dot format. Process the .dot file with dot -Tpdf FILENAME > FILENAME.pdf.
53 | dot is part of the graphviz package http://www.graphviz.org *)
54 | (*
55 | let outfile = open_out (Filename.chop_extension filename ^ "_" ^ show_id f.fun_name ^ ".dot") in
56 | let fmt = Format.formatter_of_out_channel outfile in
57 | Format.fprintf fmt "%a" BlockStructure.cfg_to_graphviz cfg;
58 | close_out outfile;
59 | *)
60 |
61 | let cfg' = ShrinkImmediates.shrink_imm cfg in
62 | (* Format.printf "@\n%a@\n" BlockStructure.pp_cfg cfg';*)
63 |
64 | let lva_cfg0 = LiveVarAnalysis.lva globals cfg' in
65 | (* Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg0; *)
66 |
67 | let lva_cfg1 = LiveVarAnalysis.remove_unused_writes lva_cfg0 in
68 | (*Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg1;*)
69 |
70 |
71 | (* Iterate analysis for examples like this:
72 |
73 | input c
74 | x := c
75 | y := 1
76 | if (c = 0) then
77 | y := x
78 | else
79 | y := x
80 | z := y
81 | a := z
82 | output x
83 |
84 | *)
85 |
86 | let lva_cfg2 = LiveVarAnalysis.lva globals (List.map fst lva_cfg1) in
87 | (* Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg2; *)
88 |
89 | let lva_cfg3 = LiveVarAnalysis.remove_unused_writes lva_cfg2 in
90 | (* Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg3; *)
91 |
92 | let lva_cfg4 = LiveVarAnalysis.lva globals (List.map fst lva_cfg3) in
93 | (* Format.printf "@\n%a@\n" LiveVarAnalysis.pp_cfg lva_cfg4;*)
94 |
95 | let (reg_cfg, num_stack) =
96 | RegAlloc.reg_alloc (Util.zip (List.map fst f.params) InstrSelX86.argument_reg_numbers)
97 | InstrSelX86.num_regs
98 | (List.map fst lva_cfg4)
99 | in
100 | (* Format.printf "@\n%a@\n" BlockStructure.pp_cfg reg_cfg; *)
101 |
102 | let linear = LineariseCfg.cfg_to_linear reg_cfg in
103 | (* Format.printf "@\n%a@\n" LineariseCfg.pp_linear_list linear; *)
104 |
105 | let x86 = InstrSelX86.to_x86 safe linear num_stack in
106 |
107 | (f.fun_name, x86)
108 |
--------------------------------------------------------------------------------
/src/lineariseCfg.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* Flatten the CFG into a list of three-address code. *)
20 | (* We generate labels of the form .block%d. The prefix . makes nasm treat them
21 | as local to the function, so we don't have problems assembling files with
22 | multiple functions and therefore duplicate block labels. *)
23 |
24 | open BlockStructure
25 |
26 | type linear =
27 | | Instr of block_elem
28 | | CJump of test * bool * string (* jump to string if the test is bool *)
29 | | Jump of string
30 | | Label of string
31 | | Return of var option
32 |
33 | let pp_linear (fmt : Format.formatter) (l : linear) : unit =
34 | match l with
35 | | Instr b ->
36 | Format.fprintf fmt " %a@\n" pp_block_elem b
37 | | CJump (v, b, s) ->
38 | Format.fprintf fmt " if (%a) = %s goto %s@\n"
39 | pp_test v
40 | (if b then "true" else "false")
41 | s
42 | | Jump s ->
43 | Format.fprintf fmt " goto %s@\n" s
44 | | Label s ->
45 | Format.fprintf fmt "%s:@\n" s
46 | | Return None ->
47 | Format.fprintf fmt " return@\n"
48 | | Return (Some v) ->
49 | Format.fprintf fmt " return %s@\n" (show_var v)
50 |
51 | type linear_list = linear list
52 |
53 | let rec pp_linear_list (fmt : Format.formatter) (ls : linear list) =
54 | match ls with
55 | | [] -> ()
56 | | x::y ->
57 | (pp_linear fmt x;
58 | pp_linear_list fmt y)
59 |
60 | module I = Util.Intmap
61 |
62 | (* Initialise the mutable visited fields to false, and put CFG into a map *)
63 | let init_traversal (cfg : cfg) : cfg_entry I.t =
64 | List.iter (fun x -> x.started <- false; x.finished <- false) cfg;
65 | List.fold_left (fun map x -> I.add x.bnum x map) I.empty cfg
66 |
67 | (* Linearise the reachable cfg, starting from the block with index next_block,
68 | but don't do already visited blocks *)
69 | (* This is essentially a depth-first search, pre-order traversal *)
70 | let rec cfg_to_linear (next_block : int) (cfg : cfg_entry I.t) : linear list =
71 | let b = I.find next_block cfg in
72 | if b.finished then
73 | (* Don't output the block twice *)
74 | []
75 | else
76 | (b.finished <- true;
77 | Label (".block" ^ string_of_int b.bnum) ::
78 | List.map (fun x -> Instr x) b.elems @
79 | match b.next with
80 | | Return v -> [(Return v : linear)]
81 | | Next i ->
82 | if (I.find i cfg).started then
83 | (* We've started the next block, so we'll just jump to it *)
84 | [Jump (".block" ^ string_of_int i)]
85 | else
86 | (* We haven't started the next block, so we can put it here and omit
87 | the jump *)
88 | ((I.find i cfg).started <- true;
89 | cfg_to_linear i cfg)
90 | | Branch (v, t1, t2) ->
91 | let c1 = I.find t1 cfg in
92 | let c2 = I.find t2 cfg in
93 | match (c1.started, c2.started) with
94 | | (false, false) ->
95 | c1.started <- true;
96 | c2.started <- true;
97 | CJump (v, true, ".block" ^ string_of_int t1) ::
98 | cfg_to_linear t2 cfg @
99 | cfg_to_linear t1 cfg
100 | | (true, true) ->
101 | [CJump (v, true, ".block" ^ string_of_int t1);
102 | Jump (".block" ^ string_of_int t2)]
103 | | (true, false) ->
104 | (c2.started <- true;
105 | [CJump (v, true, ".block" ^ string_of_int t1)] @
106 | cfg_to_linear t2 cfg)
107 | | (false, true) ->
108 | (c1.started <- true;
109 | [CJump (v, false, ".block" ^ string_of_int t2)] @
110 | cfg_to_linear t1 cfg))
111 |
112 | let cfg_to_linear cfg =
113 | cfg_to_linear 0 (init_traversal cfg)
114 |
--------------------------------------------------------------------------------
/src/util.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* General utility functions *)
20 |
21 | (* For lex/parse/type errors *)
22 | exception BadInput of string
23 | (* For cases that shouldn't be able to happen in the compiler *)
24 |
25 | exception InternalError of string
26 |
27 | (* Maps with string keys *)
28 | module Strmap = Map.Make(String)
29 |
30 | (* Maps with integer keys *)
31 | module Intmap =
32 | Map.Make(struct type t = int let (compare : int -> int -> int) = compare end)
33 |
34 | (* Build the list [0, ..., n-1] *)
35 | let count (n : int) : int list =
36 | let rec f n next =
37 | if n <= 0 then
38 | []
39 | else
40 | next :: f (n - 1) (next + 1)
41 | in
42 | f n 0
43 |
44 | (* Combine 2 lists into a list of pairs, truncating the longer list *)
45 | let rec zip (l1 : 'a list) (l2 : 'b list) : ('a * 'b) list =
46 | match (l1,l2) with
47 | | (h1::t1, h2::t2) -> (h1,h2) :: zip t1 t2
48 | | _ -> []
49 |
50 | (* Convert a list to a ; separated string *)
51 | let show_list' (d1 : string) (d2 : string) (show : 'a -> string) (l : 'a list)
52 | : string =
53 | let rec f l =
54 | match l with
55 | | [] -> ""
56 | | [h] -> show h
57 | | (h::t) ->
58 | show h ^ "; " ^ f t
59 | in
60 | d1 ^ f l ^ d2
61 |
62 | let show_list show = show_list' "[" "]" show
63 | let show_set show = show_list' "{" "}" show
64 |
65 | (* Pretty-pring a list, separated by ; *)
66 | let pp_list' (d1 : string) (d2 : string) (pp : Format.formatter -> 'a -> unit)
67 | (fmt : Format.formatter) (l : 'a list)
68 | : unit =
69 | let rec f fmt l =
70 | match l with
71 | | [] -> ()
72 | | [h] -> pp fmt h
73 | | (h::t) ->
74 | Format.fprintf fmt "%a;@ %a"
75 | pp h
76 | f t
77 | in
78 | Format.fprintf fmt "%s@[%a@]%s" d1 f l d2
79 |
80 | let pp_list pp = pp_list' "[" "]" pp
81 | let pp_set pp = pp_list' "{" "}" pp
82 |
83 | let option_map (f : 'a -> 'b) (x : 'a option) : 'b option =
84 | match x with
85 | | None -> None
86 | | Some y -> Some (f y)
87 |
88 | (* Compare two options, where None is less than Some *)
89 | let option_compare (c : 'a -> 'b -> int) (o1 : 'a option) (o2 :'b option)
90 | : int =
91 | match (o1, o2) with
92 | | (None, None) -> 0
93 | | (Some _, None) -> 1
94 | | (None, Some _) -> -1
95 | | (Some x, Some y) -> c x y
96 |
97 | module Std = struct
98 | (* Copy and paste from extlib to get input_file without making a dependency.
99 | extlib is LGPL 2.1, and so this sub-module is too.
100 | https://github.com/ygrek/ocaml-extlib/blob/33f744ddb28d6a0f4c96832145e1a6e384644709/src/std.ml *)
101 |
102 | let finally handler f x =
103 | let r = (
104 | try
105 | f x
106 | with
107 | e -> handler (); raise e
108 | ) in
109 | handler ();
110 | r
111 |
112 | let buf_len = 8192
113 |
114 | let input_all ic =
115 | let rec loop acc total buf ofs =
116 | let n = input ic buf ofs (buf_len - ofs) in
117 | if n = 0 then
118 | let res = Bytes.create total in
119 | let pos = total - ofs in
120 | let _ = Bytes.blit buf 0 res pos ofs in
121 | let coll pos buf =
122 | let new_pos = pos - buf_len in
123 | Bytes.blit buf 0 res new_pos buf_len;
124 | new_pos in
125 | let _ = List.fold_left coll pos acc in
126 | (* [res] doesn't escape and will not be mutated again *)
127 | Bytes.unsafe_to_string res
128 | else
129 | let new_ofs = ofs + n in
130 | let new_total = total + n in
131 | if new_ofs = buf_len then
132 | loop (buf :: acc) new_total (Bytes.create buf_len) 0
133 | else loop acc new_total buf new_ofs in
134 | loop [] 0 (Bytes.create buf_len) 0
135 |
136 | let input_file ?(bin=false) fname =
137 | let ch = (if bin then open_in_bin else open_in) fname in
138 | finally (fun () -> close_in ch) input_all ch
139 |
140 | end
141 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | A simple compiler for a simple imperative language targeting x86-64 assembly
2 | (for systems that follow the System V ABI, so most operating systems **except**
3 | Windows).
4 |
5 | Running the compiler
6 | --------------------
7 |
8 | The compiler relies on OCaml and the ocamlfind library. It is tested with OCaml
9 | versions 4.02.3 and 4.04.0, and ocamlfind 1.5.6 and 1.7.1. Ocamlfind can be
10 | installed via opam. If you have installed anything via Opam (for example,
11 | Merlin, or ocp-indent), ocamlfind is probably installed already. Opam installs
12 | everything into `.opam` in your home directory:
13 | ```
14 | opam install ocamlfind
15 | ```
16 |
17 | To compile the compiler run `make` in the `src` directory. This should produce
18 | `compile.byte` and `interp.byte` executables. Both take a single command-line
19 | argument: a source file name with the `.expl` extension. `interp.byte` runs the
20 | file, `compile.byte` compiles it, generating an x86-46 assembly `.s` file in
21 | *nasm* syntax.
22 |
23 | Compiling target programs
24 | -------------------------
25 |
26 | First run `make` in the `runtime` directory to compile the very simple runtime
27 | library (using gcc).
28 |
29 | On Linux, use `nasm -f elf64 FILENAME.s` to assemble the compiler's output for
30 | *FILENAME*. On Mac, use `nasm -f macho64 --prefix _ FILENAME.s`. Then use `gcc
31 | COMPILER_DIR/runtime/io.o FILENAME.o -o FILENAME` to link the program with the
32 | runtime library.
33 |
34 | See the `tests` directory for some example programs.
35 |
36 | The source language
37 | -------------------
38 |
39 | Keywords are `+ - * / | & << < > = || && ! := do while if then else input output true false array return function let bool int`
40 |
41 | Identifiers are strings of letters, underscores, and digits (not starting with
42 | a digit) that are not keywords.
43 |
44 | Numbers are sequences of digits that fit into a 64-bit signed integer.
45 |
46 | Comments start with '//' and last until the end of the line.
47 |
48 | All numerical operations are on signed, 2s complement 64-bit integers.
49 |
50 | Multi-dimensional arrays are supported. Array elements are 64-bit integers.
51 |
52 | op ::=
53 | | `+` --- Addition
54 | | `-` --- Subtraction
55 | | `*` --- Multiplication
56 | | `/` --- Division
57 | | `|` --- Bitwise or
58 | | `&` --- Bitwise and
59 | | `<<` --- Left shift
60 | | `<` --- Less than
61 | | `>` --- Greater than
62 | | `=` --- Equality
63 | | `||` --- Logical or
64 | | `&&` --- Logical and
65 |
66 | uop ::=
67 | | `!` --- Logical negation
68 | | `-` --- Unary minus
69 |
70 | indices ::=
71 | | `[` exp `]` indices
72 | | epsilon
73 |
74 | args ::=
75 | | exp
76 | | exp `,` args
77 |
78 | atomic_exp ::=
79 | | identifier indices --- Variable use and array indexing
80 | | identifier `(` args `)` --- Function call
81 | | number --- Integer constant
82 | | `true` --- Boolean constant
83 | | `false` --- Boolean constant
84 | | uop atomic_exp --- Unary operation
85 | | `array` indices --- Array allocation
86 | | `(` exp `)` --- Parenthesised expression
87 |
88 | exp ::=
89 | | atomic_exp op atomic_exp --- Binary operation
90 | | atomic_exp
91 |
92 | stmt ::=
93 | | identifier indices `:=` exp
94 | | `while` exp stmt
95 | | `do` stmt `while` exp
96 | | `if` exp `then` stmt `else` stmt
97 | | `{` stmts `}`
98 | | `input` identifier
99 | | `output` identifier
100 | | `return` identifier
101 |
102 | stmts ::=
103 | | epsilon
104 | | stmt stmts
105 |
106 | typ ::=
107 | | `int` --- a 64-bit signed integer
108 | | `bool` --- a boolean
109 | | `array` number --- an n dimensional array of 64-bit signed integers
110 |
111 | params ::=
112 | | `(` identifier `:` type `)`
113 | | `(` identifier `:` type `)` params
114 |
115 | var_decs ::=
116 | | epsilon
117 | | `let` identifier `:` typ `=` exp var_decs
118 |
119 | functions ::=
120 | | epsilon
121 | | `function` identifier params `:` typ `{` var_decs stmts `}` funcs
122 |
123 | program ::=
124 | | var_decs functions
125 |
126 | Loading the compiler in utop
127 | ----------------------------
128 |
129 | First build the compiler, by running make from the src directory. In utop load
130 | the packages that the compiler uses with the `#require` command:
131 |
132 | ```
133 | #require "str";;
134 | ```
135 |
136 | You can add this line to the `.ocamlinit` file in your home directory, so that
137 | you don't have to manually enter it each time you start a new utop session.
138 | The contents of `.ocamlinit` are run each time you start a new utop.
139 |
140 | The OCaml compilation manager (ocamlbuild) stores all of the compiled OCaml
141 | sources in the `_build` directory, with the extension `.cmo`. The following
142 | tells utop to look there for source files.
143 | ```
144 | #directory "_build";;
145 | ```
146 |
147 | To load a particular module, for example, LineariseCfg, use the `#load_rec` command.
148 | ```
149 | #load_rec "lineariseCfg.cmo";;
150 | ```
151 |
152 | You can then open the module if you want (`open LineariseCfg`), or call
153 | functions directly (`LineariseCfg.cfg_to_linear`). Loading a compiled module in
154 | this way only gives you access to the values and functions that are exported in
155 | the corresponding `.mli` file. Often a good way to work on a file is to
156 | `#load_rec` all of the modules that it depends on, and then `#use` the file.
157 |
--------------------------------------------------------------------------------
/src/shrinkImmediates.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* Ensure that all immediate arguments fit into 32 bits. We assume that
20 | constant propagation has ensured that no operation has two immediate
21 | operands, and we maintain that property here. Also, remove immediate
22 | operands from division, since the x86 signed division does not support them.
23 | *)
24 |
25 | open BlockStructure
26 |
27 | let tmp_var = NamedTmp ("SI",0)
28 | let tmp_var2 = NamedTmp ("SI",1)
29 |
30 | (* Build a series of assignments that puts the immediate n into the dest
31 | register, using only immediates of 32 bits or smaller *)
32 | let assign_imm (dest : var) (n : int64) : block_elem list =
33 | [AssignAtom (dest, Num (Int64.shift_right_logical n 32));
34 | AssignOp (dest, Ident dest, Tokens.Lshift, Num 32L);
35 | AssignOp (dest, Ident dest, Tokens.BitOr,
36 | Num (Int64.logand n 0x00000000FFFFFFFFL))]
37 |
38 | let is_imm (a : atomic_exp) : bool =
39 | match a with
40 | | Ident _ -> false
41 | | Num _ -> true
42 |
43 | (* If the atomic_exp is a large immediate constant (takes more than 32 bits),
44 | then return the int64, else None. Relies on int64 being 2s complement. If
45 | it's negative (the top bit is 1), then check that the top 33 bits are 1. It
46 | it's non-negative, check that the top 33 bits are 0. 32 won't suffice,
47 | because when we truncate to 32 bits only, the top bit needs to be the sign
48 | bit. *)
49 | let get_large_imm (a : atomic_exp) : int64 option =
50 | match a with
51 | | Num n ->
52 | (* This is an arithmetic shift. *)
53 | let topmost = Int64.shift_right n 31 in
54 | if Int64.compare topmost 0L = 0 ||
55 | Int64.compare topmost 0xFFFFFFFFFFFFFFFFL = 0 then
56 | None
57 | else
58 | Some n
59 | | _ -> None
60 |
61 | module T = Tokens
62 |
63 | let shrink_imm_elem (e : block_elem) : block_elem list =
64 | match e with
65 | | AssignOp (dest, a1, T.Div, a2) ->
66 | (* division cannot have immediate operands *)
67 | if is_imm a1 && is_imm a2 then
68 | [AssignAtom (tmp_var, a1);
69 | AssignAtom (tmp_var2, a2);
70 | AssignOp (dest, Ident tmp_var, T.Div, Ident tmp_var2)]
71 | else if is_imm a1 then
72 | [AssignAtom (tmp_var, a1);
73 | AssignOp (dest, Ident tmp_var, T.Div, a2)]
74 | else if is_imm a2 then
75 | [AssignAtom (tmp_var, a2);
76 | AssignOp (dest, a1, T.Div, Ident tmp_var)]
77 | else
78 | [e]
79 | | AssignOp (dest, a1, op, a2) ->
80 | assert (not (is_imm a1 && is_imm a2));
81 | (match get_large_imm a1 with
82 | | Some n ->
83 | assign_imm tmp_var n @
84 | [AssignOp (dest, Ident tmp_var, op, a2)]
85 | | None ->
86 | (match get_large_imm a2 with
87 | | Some n ->
88 | assign_imm tmp_var n @
89 | [AssignOp (dest, a1, op, Ident tmp_var)]
90 | | None ->
91 | [e]))
92 | | AssignAtom (dest, a) ->
93 | (match get_large_imm a with
94 | | Some n -> assign_imm dest n
95 | | None -> [e])
96 | | Ld (v1, v2, a) ->
97 | (match get_large_imm a with
98 | | Some n ->
99 | assign_imm tmp_var n @ [Ld(v1, v2, Ident tmp_var)]
100 | | None -> [e])
101 | | St (r, a1, a2) ->
102 | (match (get_large_imm a1, get_large_imm a2) with
103 | | (None, None) -> [e]
104 | | (Some n1, None) ->
105 | assign_imm tmp_var n1 @ [St(r, Ident tmp_var, a2)]
106 | | (None, Some n2) ->
107 | assign_imm tmp_var n2 @ [St(r, a1, Ident tmp_var)]
108 | | (Some n1, Some n2) ->
109 | assign_imm tmp_var n1 @
110 | assign_imm tmp_var2 n2 @
111 | [St(r, Ident tmp_var, Ident tmp_var2)])
112 | | Call (v, f, aes) ->
113 | let (s, es, _) =
114 | List.fold_right
115 | (fun (ae : atomic_exp) ((s : block_elem list), es, n) ->
116 | match get_large_imm ae with
117 | | None -> (s, ae::es, n)
118 | | Some imm ->
119 | (assign_imm (NamedTmp ("SI",n)) imm @ s, Ident tmp_var::es, n + 1))
120 | aes
121 | ([], [], 2)
122 | in
123 | s @ [Call (v, f, es)]
124 | | BoundCheck (a1, a2) ->
125 | assert (not (is_imm a1 && is_imm a2));
126 | (match get_large_imm a1 with
127 | | Some n ->
128 | assign_imm tmp_var n @
129 | [BoundCheck (Ident tmp_var, a2)]
130 | | None ->
131 | (match get_large_imm a2 with
132 | | Some n ->
133 | assign_imm tmp_var n @
134 | [BoundCheck (a1, Ident tmp_var)]
135 | | None ->
136 | [e]))
137 | | NullCheck v -> [NullCheck v]
138 |
139 | let shrink_imm (cfg : cfg) : cfg =
140 | List.map
141 | (fun cfg_entry ->
142 | { cfg_entry with
143 | elems =
144 | List.flatten
145 | (List.map shrink_imm_elem cfg_entry.elems) })
146 | cfg
147 |
--------------------------------------------------------------------------------
/src/blockInterp.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2016 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* An interpreter for CFGs *)
20 |
21 | open BlockStructure
22 | open AstInterp
23 |
24 | type store_t = int64 Varmap.t
25 |
26 | type heap_t = { mutable next_address : int; heap : bytes }
27 |
28 | (* Convert int64 to 8 bytes in little-endian order and place them in the
29 | given bytes at the given index *)
30 | let int64_to_bytes (b: bytes) index num : unit =
31 | let rec loop i inc =
32 | if inc < 8 then
33 | (Bytes.set b (index + inc) (Char.chr (Int64.to_int (Int64.logand 0xFFL i)));
34 | loop (Int64.shift_right i 8) (inc + 1))
35 | else
36 | ()
37 | in
38 | loop num 0
39 |
40 | (* Read 8 bytes in little-endian order into an int64 *)
41 | let bytes_to_int64 (b : bytes) index : int64 =
42 | let rec loop i inc =
43 | if inc >= 0 then
44 | let next =
45 | Int64.logor
46 | (Int64.shift_left i 8)
47 | (Int64.of_int (Char.code (Bytes.get b (index + inc))))
48 | in
49 | loop next (inc - 1)
50 | else
51 | i
52 | in
53 | loop 0L 7
54 |
55 | let alloc1 (heap : heap_t) (n : int64) : int64 =
56 | let old_next = heap.next_address in
57 | let new_next = old_next + 8 * ((Int64.to_int n) + 1) in
58 | if new_next <= Bytes.length heap.heap then
59 | (heap.next_address <- new_next;
60 | int64_to_bytes heap.heap old_next n;
61 | Int64.of_int old_next)
62 | else
63 | raise (Crash "Out of memory")
64 |
65 | let rec alloc (heap : heap_t) (ns : int64 list) : int64 =
66 | match ns with
67 | | [] -> raise (Util.InternalError "Empty alloc")
68 | | [n] -> alloc1 heap n
69 | | n::ns ->
70 | let v = alloc1 heap n in
71 | for i = 0 to (Int64.to_int n) do
72 | int64_to_bytes heap.heap (Int64.to_int v + 8 * i)
73 | (alloc heap ns)
74 | done;
75 | v
76 |
77 | let interp_atomic_exp (store : store_t) (ae : atomic_exp) : int64 =
78 | match ae with
79 | | Ident x -> Varmap.find x store
80 | | Num i -> i
81 |
82 | let interp_block_elem (store : store_t) (heap : heap_t) (be : block_elem)
83 | : store_t =
84 | match be with
85 | | AssignOp (x, ae1, op, ae2) ->
86 | let n1 = interp_atomic_exp store ae1 in
87 | let n2 = interp_atomic_exp store ae2 in
88 | Varmap.add x (do_op op n1 n2) store
89 | | AssignAtom (x, ae) ->
90 | Varmap.add x (interp_atomic_exp store ae) store
91 | | Ld (x1, x2, ae) ->
92 | let n1 = Varmap.find x2 store in
93 | let n2 = interp_atomic_exp store ae in
94 | Varmap.add
95 | x1
96 | (bytes_to_int64 heap.heap (Int64.to_int (Int64.add n1 n2)))
97 | store
98 | | St (x, ae1, ae2) ->
99 | let n1 = Varmap.find x store in
100 | let n2 = interp_atomic_exp store ae1 in
101 | let n3 = interp_atomic_exp store ae2 in
102 | int64_to_bytes heap.heap (Int64.to_int (Int64.add n1 n2)) n3;
103 | store
104 | | Call (Some x, "input", []) ->
105 | Printf.printf "> ";
106 | (try
107 | let n = Int64.of_string (read_line ()) in
108 | Varmap.add x n store
109 | with Failure _ -> raise (Crash "not a 64-bit integer"))
110 | | Call (None, "output", [i]) ->
111 | begin
112 | print_string (Int64.to_string (interp_atomic_exp store i));
113 | print_newline ();
114 | store
115 | end
116 | | Call (Some x,
117 | ("allocate1" | "allocate2" | "allocate3" | "allocate4" | "allocate5" |
118 | "allocate6" | "allocate7"),
119 | aes) ->
120 | let ns = List.map (interp_atomic_exp store) aes in
121 | Varmap.add x (alloc heap ns) store
122 | | Call _ ->
123 | raise (Util.InternalError "Unknown function call in blockInterp")
124 |
125 | let rec interp_basic_block (store : store_t) (heap : heap_t)
126 | (bb : basic_block) : store_t =
127 | match bb with
128 | | [] -> store
129 | | be::bes ->
130 | let store' = interp_block_elem store heap be in
131 | interp_basic_block store' heap bes
132 |
133 | let interp_next_block store nb : int =
134 | match nb with
135 | | End -> 0
136 | | Next i -> i
137 | | Branch ((ae1, op, ae2), i1, i2) ->
138 | let n1 = interp_atomic_exp store ae1 in
139 | let n2 = interp_atomic_exp store ae2 in
140 | match op with
141 | | Eq -> if Int64.compare n1 n2 = 0 then i1 else i2
142 | | Lt -> if Int64.compare n1 n2 < 0 then i1 else i2
143 | | Gt -> if Int64.compare n1 n2 > 0 then i1 else i2
144 |
145 | let interp_cfg (store : store_t) (heap : heap_t) (cfg : cfg) : unit =
146 | let cfgmap = LineariseCfg.init_traversal cfg in
147 | let rec loop (index : int) store =
148 | let node = Util.Intmap.find index cfgmap in
149 | let store' = interp_basic_block store heap node.elems in
150 | let next_index = interp_next_block store node.next in
151 | loop next_index store'
152 | in
153 | loop 1 store
154 |
155 | let interp_prog (heap_size : int) (cfg : cfg) : unit =
156 | interp_cfg
157 | Varmap.empty
158 | { next_address = 0; heap = Bytes.make heap_size '\000'}
159 | cfg
160 |
--------------------------------------------------------------------------------
/src/regAlloc.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* A very simple register allocator. Global variables never get put in a
20 | register. Function parameters stay in the registers that they are passed in.
21 | Local variables get put in the remaining registers in no particular order.
22 | The rest are put on the stack *)
23 |
24 | open Util
25 | open BlockStructure
26 |
27 | let get_vars_ae (ae : atomic_exp) (vars : Varset.t) : Varset.t =
28 | match ae with
29 | | Ident r -> Varset.add r vars
30 | | Num _ -> vars
31 |
32 | let get_vars_be (be : block_elem) (vars : Varset.t) : Varset.t =
33 | match be with
34 | | AssignOp (r, ae1, _, ae2) ->
35 | get_vars_ae ae1 (get_vars_ae ae2 (Varset.add r vars))
36 | | AssignAtom (r, ae) -> get_vars_ae ae (Varset.add r vars)
37 | | Ld (v1, v2, ae) ->
38 | get_vars_ae ae (Varset.add v1 (Varset.add v2 vars))
39 | | St (r, ae1, ae2) ->
40 | get_vars_ae ae1 (get_vars_ae ae2 (Varset.add r vars))
41 | | Call (None, _, aes) ->
42 | List.fold_right get_vars_ae aes vars
43 | | Call (Some i, _, aes) ->
44 | List.fold_right get_vars_ae aes (Varset.add i vars)
45 | | BoundCheck (a1, a2) ->
46 | get_vars_ae a1 (get_vars_ae a2 vars)
47 | | NullCheck v ->
48 | Varset.add v vars
49 |
50 | let get_vars_test (ae1, _, ae2) (vars : Varset.t) : Varset.t =
51 | List.fold_right get_vars_ae [ae1; ae2] vars
52 |
53 | let get_vars_nb (nb : next_block) (vars : Varset.t) : Varset.t =
54 | match nb with
55 | | Return None -> vars
56 | | Return (Some v) -> Varset.add v vars
57 | | Next _ -> vars
58 | | Branch (r, _, _) -> get_vars_test r vars
59 |
60 | let get_vars_block (b : cfg_entry) (vars : Varset.t) : Varset.t =
61 | List.fold_right get_vars_be b.elems (get_vars_nb b.next vars)
62 |
63 | (* Get all of the variables mentioned in the cfg *)
64 | let get_vars (cfg : cfg) : Varset.t =
65 | List.fold_right get_vars_block cfg Varset.empty
66 |
67 | let is_global v =
68 | match v with
69 | | NamedSource (_, SourceAst.Global) -> true
70 | | _ -> false
71 |
72 | let is_param v =
73 | match v with
74 | | NamedSource (_, SourceAst.Parameter) -> true
75 | | _ -> false
76 |
77 | let is_local v =
78 | match v with
79 | | NamedSource (_, SourceAst.Local) -> true
80 | | NamedTmp _ -> true
81 | | _ -> false
82 |
83 | (* Given the vars used in a function, allocate them into registers. Ignore the
84 | live ranges and clash graph. This is the simplest strategy that will yield a
85 | working program. Return also the number of stack slots used. *)
86 | (* TODO: actually put some locals into registers *)
87 | let build_regalloc_map (fun_params : (SourceAst.id * int) list) (vars : Varset.t)
88 | : (var Varmap.t * int) =
89 | let vars = Varset.elements vars in
90 | let (globals, vars) = List.partition is_global vars in
91 | let (locals, vars) = List.partition is_local vars in
92 | assert (List.for_all is_param vars); (* There shouldn't be any allocated registers already *)
93 | let global_map =
94 | List.map
95 | (fun g ->
96 | match g with
97 | | NamedSource (i, _) as n -> (n, Global i)
98 | | _ -> assert false)
99 | globals
100 | in
101 | let param_map =
102 | List.map (fun (p, i) -> (id_to_var p, Vreg i)) fun_params
103 | in
104 | let local_map =
105 | List.map2 (fun l i -> (l, Stack i)) locals (count (List.length locals))
106 | in
107 | let m =
108 | List.fold_right (fun (k,v) m -> Varmap.add k v m)
109 | (global_map @ param_map @ local_map)
110 | Varmap.empty
111 | in
112 | (m, List.length locals)
113 |
114 | let reg_alloc_ae (map : var Varmap.t) (ae : atomic_exp) : atomic_exp =
115 | match ae with
116 | | Ident v ->
117 | Ident (Varmap.find v map)
118 | | Num x -> Num x
119 |
120 | let reg_alloc_be (map : var Varmap.t) (be : block_elem) : block_elem =
121 | match be with
122 | | AssignOp (v, ae1, op, ae2) ->
123 | AssignOp (Varmap.find v map, reg_alloc_ae map ae1, op, reg_alloc_ae map ae2)
124 | | AssignAtom (v, ae) ->
125 | AssignAtom (Varmap.find v map, reg_alloc_ae map ae)
126 | | Ld (v1, v2, ae) ->
127 | Ld (Varmap.find v1 map, Varmap.find v2 map, reg_alloc_ae map ae)
128 | | St (v, ae1, ae2) ->
129 | St (Varmap.find v map, reg_alloc_ae map ae1, reg_alloc_ae map ae2)
130 | | Call (v, f, aes) ->
131 | Call (Util.option_map (fun v -> Varmap.find v map) v, f,
132 | List.map (reg_alloc_ae map) aes)
133 | | BoundCheck (a1, a2) ->
134 | BoundCheck (reg_alloc_ae map a1, reg_alloc_ae map a2)
135 | | NullCheck v ->
136 | NullCheck (Varmap.find v map)
137 |
138 | let reg_alloc_test (map : var Varmap.t) (ae1, op, ae2) : test =
139 | (reg_alloc_ae map ae1, op, reg_alloc_ae map ae2)
140 |
141 | let reg_alloc_nb (map : var Varmap.t) (nb : next_block) : next_block =
142 | match nb with
143 | | Return (Some v) -> Return (Some (Varmap.find v map))
144 | | Return None -> Return None
145 | | Next i -> Next i
146 | | Branch (t, t1, t2) ->
147 | Branch (reg_alloc_test map t, t1, t2)
148 |
149 | (* fun_params is an association list mapping the function's parameters to the
150 | registers that they were passed in. num_regs is the total number of
151 | registers that can be used. The returned int is the number of variables
152 | put on the stack *)
153 | let reg_alloc (fun_params : (SourceAst.id * int) list) (num_regs : int)
154 | (cfg : cfg)
155 | : (cfg * int) =
156 | let (map, num_stacks) = build_regalloc_map fun_params (get_vars cfg) in
157 | let cfg =
158 | List.map (fun entry -> { bnum = entry.bnum;
159 | elems = List.map (reg_alloc_be map) entry.elems;
160 | next = reg_alloc_nb map entry.next;
161 | finished = false;
162 | started = false }) cfg in
163 | (cfg, num_stacks)
164 |
--------------------------------------------------------------------------------
/src/tokens.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* The language's tokens, and a simple lexer *)
20 |
21 | open Util
22 |
23 | (* Primitive operators *)
24 | (* When extending with operators that can have side effects, be careful to
25 | check the rest of the compiler. Some optimisations can only be done on
26 | side-effect-free operations. Right now only Div can have an effect
27 | (divide-by-zero exception) *)
28 |
29 | type op =
30 | (* integer operations *)
31 | | Plus
32 | | Minus
33 | | Times
34 | | Div
35 | | Lshift
36 | | BitOr
37 | | BitAnd
38 | (* comparisons *)
39 | | Lt
40 | | Gt
41 | | Eq
42 | (* boolean operations *)
43 | | And
44 | | Or
45 |
46 | type uop =
47 | (* boolean negation *)
48 | | Not
49 |
50 | let show_op op =
51 | match op with
52 | | Plus -> "+"
53 | | Minus -> "-"
54 | | Times -> "*"
55 | | Div -> "/"
56 | | Lt -> "<"
57 | | Gt -> ">"
58 | | Eq -> "="
59 | | And -> "&&"
60 | | Or -> "||"
61 | | Lshift -> "<<"
62 | | BitOr -> "|"
63 | | BitAnd -> "&"
64 |
65 | let show_uop (uop : uop) : string =
66 | match uop with
67 | | Not -> "!"
68 |
69 | type token =
70 | | Num of int64
71 | | Ident of string (* identifiers represent various names, including function
72 | and variable names *)
73 | | Op of op
74 | | Uop of uop
75 | (* Punctuation *)
76 | | Lparen
77 | | Rparen
78 | | Lcurly
79 | | Rcurly
80 | | Lbrac
81 | | Rbrac
82 | | Colon
83 | | Comma
84 | (* Identifier-like keywords *)
85 | | While
86 | | Do
87 | | If
88 | | Then
89 | | Else
90 | | Assign
91 | | True
92 | | False
93 | | Input
94 | | Output
95 | | Array
96 | | Int
97 | | Bool
98 | | Let
99 | | Function
100 | | Return
101 |
102 | let show_token (t : token) : string =
103 | match t with
104 | | Num i -> Int64.to_string i
105 | | Ident s -> s
106 | | Op o -> show_op o
107 | | Uop u -> show_uop u
108 | | Lparen -> "("
109 | | Rparen -> ")"
110 | | Lcurly -> "{"
111 | | Rcurly -> "}"
112 | | Lbrac -> "["
113 | | Rbrac -> "]"
114 | | Comma -> ","
115 | | Colon -> ":"
116 | | While -> "while"
117 | | Do -> "do"
118 | | If -> "if"
119 | | Then -> "then"
120 | | Else -> "else"
121 | | Assign -> ":="
122 | | True -> "true"
123 | | False -> "false"
124 | | Input -> "input"
125 | | Output -> "output"
126 | | Array -> "array"
127 | | Int -> "int"
128 | | Bool -> "bool"
129 | | Let -> "let"
130 | | Function -> "function"
131 | | Return -> "return"
132 |
133 | (* Pretty-print a token *)
134 | let pp_token (fmt : Format.formatter) (t : token) =
135 | Format.fprintf fmt "%s" (show_token t)
136 |
137 | (* Tokens annotated with which line number they appear on *)
138 | type tok_loc = (token * int)
139 |
140 | (* Pretty-print a token and location *)
141 | let pp_tok_loc (fmt : Format.formatter) ((t, l) : tok_loc) =
142 | Format.fprintf fmt "(%a, %d)" pp_token t l
143 |
144 | (* The mapping of keword strings to the corresponding tokens *)
145 | let keywords : (string * token) list =
146 | (* Derive the mapping from the to_string functions to avoid duplication *)
147 | (show_uop Not, Uop Not) ::
148 | List.map (fun o -> (show_op o, Op o))
149 | [Plus; Minus; Times; Div; Lt; Gt; Eq; And; Or; Lshift; BitOr; BitAnd] @
150 | List.map (fun t -> (show_token t, t))
151 | [Do; While; If; Then; Else; Array; Assign; True; Input; Output; False;
152 | Lparen; Rparen; Lcurly; Rcurly; Lbrac; Rbrac; Int; Bool; Colon; Let;
153 | Return; Function; Comma]
154 |
155 | (* Map each keyword string to its corresponding token *)
156 | let keyword_map : token Strmap.t =
157 | List.fold_left (fun m (k,v) -> Strmap.add k v m) Strmap.empty keywords
158 |
159 | (* Regular expressions that describe various syntactic entities *)
160 | (* Build the keyword regexp out of the mapping to avoid duplication *)
161 | let keyword_re =
162 | Str.regexp
163 | (String.concat "\\|"
164 | (List.map (fun (s, _) -> Str.quote s) keywords))
165 | let number_re = Str.regexp "[0-9]+"
166 | let ident_re = Str.regexp "[a-zA-Z_][a-zA-Z0-9_]*"
167 | let space_re = Str.regexp "[ \t]+\\|//.*"
168 | let newline_re = Str.regexp "\n\\|\r\n"
169 |
170 | (* Read all the tokens from s, using pos to index into the string and line_n
171 | to track the current line number, for error reporting later on. Return them
172 | in a list. *)
173 | (* The lex function repeatedly finds which regexp matches the string, starting
174 | from the current position, adds that token to the result, and then updates
175 | the starting position to find the next token *)
176 | let rec lex (s : string) (pos : int) (line_n : int) : tok_loc list =
177 | if pos >= String.length s then
178 | []
179 | else if Str.string_match space_re s pos then
180 | lex s (Str.match_end ()) line_n
181 | else if Str.string_match newline_re s pos then
182 | lex s (Str.match_end ()) (line_n + 1)
183 | else if Str.string_match ident_re s pos then
184 | let id = Str.matched_string s in
185 | if Strmap.mem id keyword_map then
186 | (* The identifier is also a keyword *)
187 | let tok = Strmap.find id keyword_map in
188 | (tok, line_n) :: lex s (Str.match_end ()) line_n
189 | else
190 | (Ident id, line_n) :: lex s (Str.match_end ()) line_n
191 | else if Str.string_match keyword_re s pos then
192 | let tok = Strmap.find (Str.matched_string s) keyword_map in
193 | (tok, line_n) :: lex s (Str.match_end ()) line_n
194 | else if Str.string_match number_re s pos then
195 | let num =
196 | try Int64.of_string (Str.matched_string s)
197 | with Failure _ ->
198 | raise (BadInput ("Lex error: integer constant too big " ^
199 | Str.matched_string s ^
200 | " on line " ^
201 | string_of_int line_n))
202 | in
203 | (Num num, line_n) :: lex s (Str.match_end ()) line_n
204 | else
205 | raise (BadInput ("lex error: at character '" ^
206 | String.sub s pos 1 ^
207 | "' on line " ^
208 | string_of_int line_n))
209 |
--------------------------------------------------------------------------------
/src/unnestExp.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* Flatten expressions so that they follow this grammar. We don't introduce a
20 | new type, but do define predicates on the SourceAst.exp type.
21 |
22 | type ae =
23 | | Num of int64
24 | | Bool of bool
25 | | Ident of SourceAst.id
26 |
27 | type flat_exp =
28 | | Num of int64
29 | | Bool of bool
30 | | Ident of SourceAst.id * ae
31 | | Op of ae * op * ae
32 | | Uop of ae
33 | | Array of ae list
34 | | Call of SourceAst.id * ae list
35 | *)
36 |
37 | open SourceAst
38 | module T = Tokens
39 |
40 | let is_atomic (e : exp) : bool =
41 | match e with
42 | | Num _ -> true
43 | | Bool _ -> true
44 | | Ident (_, []) -> true
45 | | Ident (_, _) -> false
46 | | Op _ -> false
47 | | Uop _ -> false
48 | | Array _ -> false
49 | | Call _ -> false
50 |
51 | let is_flat (e : exp) : bool =
52 | match e with
53 | | Num _ -> true
54 | | Bool _ -> true
55 | | Ident (_, []) -> true
56 | | Ident (_, [e]) -> is_atomic e
57 | | Ident (_, _) -> false
58 | | Op (e1, _, e2) -> is_atomic e1 && is_atomic e2
59 | | Uop (_, e) -> is_atomic e
60 | | Array es -> List.for_all is_atomic es
61 | | Call (_, es) -> List.for_all is_atomic es
62 |
63 | let unnest (stmts : stmt list) : stmt list =
64 |
65 | (* Generate unique names for temporary variables *)
66 | let next_ident = ref 0 in
67 | let get_ident () : id =
68 | let x = !next_ident in
69 | next_ident := (!next_ident) + 1;
70 | Temp ("UE", x)
71 | in
72 |
73 | (* indices must all be atomic, according to is_atomic above. Returns a flat
74 | expression according to is_flat. *)
75 | let rec unnest_indices (arr : id) (indices : exp list) : stmt list * exp =
76 | match indices with
77 | | [] -> ([], Ident (arr, []))
78 | | [i] -> ([], Ident (arr, [i]))
79 | | (i::indices) ->
80 | let id = get_ident () in
81 | let (s, e) = unnest_indices id indices in
82 | (Assign (id, [], Ident (arr, [i])) :: s,
83 | e)
84 | in
85 |
86 | (* Flatten out and expression into a list of statements and an expression by
87 | using temporary variables to store the results of each sub-expression. The
88 | expression returned satisfies is_flat above.
89 |
90 | Warning: As implemented, this is O(n^2) because of the list appending.
91 | This is unlikely to matter for human written code, as it is O(n^2) where n
92 | is the size of an expression, not the whole program. However, for
93 | compiling machine-generated code, this could be a problem. A little care
94 | could be taken to collect results in a tree-structure and then convert it
95 | into a list. *)
96 | let rec unnest_exp (e : exp) : stmt list * exp =
97 | match e with
98 | | Num i -> ([], Num i)
99 | | Bool b -> ([], Bool b)
100 | | Ident (i, es) ->
101 | let (s_list, aes) = List.split (List.map unnest_exp_atomic es) in
102 | let (s_list2, ae) = unnest_indices i aes in
103 | (List.flatten s_list @ s_list2, ae)
104 | | Op (e1, T.And, e2) ->
105 | let (s1, f1) = unnest_exp e1 in
106 | let (s2, f2) = unnest_exp e2 in
107 | (* f1 && f2 --> (if f1 then tmp := f2 else t := false); tmp *)
108 | let tmp = get_ident () in
109 | (s1 @
110 | [Ite (f1,
111 | Stmts (s2 @ [Assign (tmp, [], f2)]),
112 | Assign (tmp, [], Bool false))],
113 | (Ident (tmp, [])))
114 | | Op (e1, T.Or, e2) ->
115 | let (s1, f1) = unnest_exp e1 in
116 | let (s2, f2) = unnest_exp e2 in
117 | (* f1 || f2 --> (if f1 then tmp := true else tmp := f2 ); tmp *)
118 | let tmp = get_ident () in
119 | (s1 @
120 | [Ite (f1,
121 | Assign (tmp, [], Bool true),
122 | Stmts (s2 @ [Assign (tmp, [], f2)]))],
123 | (Ident (tmp, [])))
124 | | Op (e1, op, e2) ->
125 | let (s1, a1) = unnest_exp_atomic e1 in
126 | let (s2, a2) = unnest_exp_atomic e2 in
127 | (s1 @ s2, Op (a1, op, a2))
128 | | Uop (uop, e) ->
129 | let (s, a) = unnest_exp_atomic e in
130 | (s, Uop (uop, a))
131 | | Array es ->
132 | let (s_list, aes) = List.split (List.map unnest_exp_atomic es) in
133 | (List.flatten s_list, Array aes)
134 | | Call (f, es) ->
135 | let (s_list, aes) = List.split (List.map unnest_exp_atomic es) in
136 | (List.flatten s_list, Call (f, aes))
137 |
138 |
139 | (* Similar to unnest_exp, but ensures that the returned exp is atomic, rather
140 | than just flat. *)
141 | and unnest_exp_atomic (e : exp) : stmt list * exp =
142 | let (s, f) = unnest_exp e in
143 | if is_atomic f then
144 | (s, f)
145 | else
146 | let id = get_ident () in
147 | (s @ [Assign (id, [], f)], Ident (id, []))
148 |
149 | and unnest_exp_for_test (e : exp) : stmt list * exp =
150 | let (s, f) = unnest_exp e in
151 | match f with
152 | | Ident (_, [_]) ->
153 | let id = get_ident () in
154 | (s @ [Assign (id, [], f)], Ident (id, []))
155 | | _ -> (s, f)
156 | in
157 |
158 | let rec unnest_stmt (s : stmt) : stmt list =
159 | match s with
160 | | Assign (x, [], e) ->
161 | let (s, f) = unnest_exp e in
162 | s @ [Assign (x, [], f)]
163 | | Assign (x, es, e) ->
164 | let (s_list, aes) = List.split (List.map unnest_exp_atomic es) in
165 | let (s, f) = unnest_exp_atomic e in
166 | (match unnest_indices x aes with
167 | | (s', Ident (i, [])) ->
168 | List.flatten s_list @ s' @ s @ [Assign (i, [], f)]
169 | | (s', Ident (i, [f'])) ->
170 | List.flatten s_list @ s' @ s @ [Assign (i, [f'], f)]
171 | | _ -> assert false)
172 | | DoWhile (s0, e, s1) ->
173 | let s0' = unnest_stmt s0 in
174 | let (se1, e') = unnest_exp_for_test e in
175 | let s1' = unnest_stmt s1 in
176 | [DoWhile (stmts_to_stmt (s0' @ se1), e', stmts_to_stmt s1')]
177 | | Ite (e, s1, s2) ->
178 | let (se, e') = unnest_exp_for_test e in
179 | let s1' = unnest_stmt s1 in
180 | let s2' = unnest_stmt s2 in
181 | se @ [Ite (e', stmts_to_stmt s1', stmts_to_stmt s2')]
182 | | Stmts s_list ->
183 | List.flatten (List.map unnest_stmt s_list)
184 | | In id -> [In id]
185 | | Out id -> [Out id]
186 | | Loc (stmt, _) ->
187 | unnest_stmt stmt
188 | | Return id -> [Return id]
189 | in
190 |
191 | List.flatten (List.map unnest_stmt stmts)
192 |
--------------------------------------------------------------------------------
/src/x86.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2016 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* A fragment of x86-64 AST, and printing in NASM syntax *)
20 | (* Translated from HOL4 in examples/machine-code/instruction-set-models/x86_64 *)
21 |
22 | open Format
23 | open Util
24 |
25 | (* 64-bit registers *)
26 | type reg =
27 | | RAX | RBX | RCX | RDX | RSP | RBP | RSI | RDI
28 | | R8 | R9 | R10 | R11 | R12 | R13 | R14 | R15
29 |
30 | let show_reg r =
31 | match r with
32 | | RAX -> "rax"
33 | | RBX -> "rbx"
34 | | RCX -> "rcx"
35 | | RDX -> "rdx"
36 | | RSP -> "rsp"
37 | | RBP -> "rbp"
38 | | RSI -> "rsi"
39 | | RDI -> "rdi"
40 | | R8 -> "r8"
41 | | R9 -> "r9"
42 | | R10 -> "r10"
43 | | R11 -> "r11"
44 | | R12 -> "r12"
45 | | R13 -> "r13"
46 | | R14 -> "r14"
47 | | R15 -> "r15"
48 |
49 | let pp_reg fmt r =
50 | fprintf fmt "%s" (show_reg r)
51 |
52 | type byte_reg = B of reg
53 |
54 | let show_byte_reg (B r) =
55 | match r with
56 | | RAX -> "al"
57 | | RBX -> "bl"
58 | | RCX -> "cl"
59 | | RDX -> "dl"
60 | | RSP -> "spl"
61 | | RBP -> "bpl"
62 | | RSI -> "sil"
63 | | RDI -> "dil"
64 | | R8 -> "r8b"
65 | | R9 -> "r9b"
66 | | R10 -> "r10b"
67 | | R11 -> "r11b"
68 | | R12 -> "r12b"
69 | | R13 -> "r13b"
70 | | R14 -> "r14b"
71 | | R15 -> "r15b"
72 |
73 | let pp_byte_reg fmt r =
74 | fprintf fmt "%s" (show_byte_reg r)
75 |
76 | type address_component =
77 | | EAScaled of int * reg
78 | | EAReg of reg
79 | | EAConst of int64
80 |
81 | type displacement =
82 | | Concrete_disp of int64
83 | | Label_disp of string
84 |
85 | type rm =
86 | | Zr of reg (* register *)
87 | (* Zm (Some (scale, index), Some base, Some displacement) =
88 | mem[scale * index + base + displacement] *)
89 | | Zm of (int * reg) option * reg option * displacement option
90 |
91 | let rec simple_add_exp fmt l =
92 | match l with
93 | | [] -> ()
94 | | [Some x] -> fprintf fmt "%s" x
95 | | [None] -> fprintf fmt "0"
96 | | Some x :: y -> fprintf fmt "%s + %a" x simple_add_exp y
97 | | None :: y -> simple_add_exp fmt y
98 |
99 | let show_displacement d =
100 | match d with
101 | | Concrete_disp i -> Int64.to_string i
102 | | Label_disp l -> l ^ "_Global"
103 |
104 | let pp_rm fmt rm =
105 | match rm with
106 | | Zr r -> fprintf fmt "%a" pp_reg r
107 | | Zm (idx,base,disp) ->
108 | fprintf fmt "qword [%a]"
109 | simple_add_exp [option_map (fun (scale,i) -> show_reg i ^ " * " ^ string_of_int scale) idx;
110 | option_map show_reg base;
111 | option_map show_displacement disp]
112 |
113 | type dest_src =
114 | | Zrm_i of rm * int64 (* mnemonic r/mXX, immXX (sign-extended) *)
115 | | Zrm_r of rm * reg (* mnemonic r/mXX, rXX *)
116 | | Zr_rm of reg * rm (* mnemonic rXX, r/mXX *)
117 |
118 | let pp_dest_src fmt ds =
119 | match ds with
120 | | Zrm_i (rm, i) ->
121 | fprintf fmt "%a, %Ld"
122 | pp_rm rm
123 | i
124 | | Zrm_r (rm, reg) ->
125 | fprintf fmt "%a, %a"
126 | pp_rm rm
127 | pp_reg reg
128 | | Zr_rm (reg, rm) ->
129 | fprintf fmt "%a, %a"
130 | pp_reg reg
131 | pp_rm rm
132 |
133 | type imm_rm =
134 | | Zi_rm of rm (* r/mXX *)
135 | | Zi of int64 (* sign-extended immediate *)
136 |
137 | let pp_imm_rm fmt ir =
138 | match ir with
139 | | Zi_rm rm -> pp_rm fmt rm
140 | | Zi i -> fprintf fmt "%Ld" i
141 |
142 | type binop_name =
143 | | Zadc | Zadd | Zand | Zcmp | Zor | Zshl | Zshr
144 | | Zsar | Zsub | Zsbb | Ztest | Zxor
145 |
146 | let show_binop_name b =
147 | match b with
148 | | Zadc -> "adc"
149 | | Zadd -> "add"
150 | | Zand -> "and"
151 | | Zcmp -> "cmp"
152 | | Zor -> "or"
153 | | Zshl -> "shl"
154 | | Zshr -> "shr"
155 | | Zsar -> "sar"
156 | | Zsub -> "sub"
157 | | Zsbb -> "sbb"
158 | | Ztest -> "test"
159 | | Zxor -> "xor"
160 |
161 | let pp_binop_name fmt b =
162 | fprintf fmt "%s" (show_binop_name b)
163 |
164 | type monop_name = Zdec | Zinc | Znot | Zneg
165 |
166 | let show_monop_name b =
167 | match b with
168 | | Zdec -> "dec"
169 | | Zinc -> "inc"
170 | | Znot -> "not"
171 | | Zneg -> "neg"
172 |
173 | let pp_monop_name fmt b =
174 | fprintf fmt "%s" (show_monop_name b)
175 |
176 | type cond = (* this list is not complete *)
177 | | Z_ALWAYS (* N = not *)
178 | | Z_E | Z_NE (* E = equal *)
179 | | Z_S | Z_NS (* S = signed *)
180 | | Z_A | Z_NA (* A = above *)
181 | | Z_B | Z_NB (* B = below *)
182 | | Z_L | Z_NL (* L = less *)
183 | | Z_G | Z_NG (* L = greater *)
184 |
185 | let pp_cond fmt c =
186 | fprintf fmt "%s"
187 | (match c with
188 | | Z_ALWAYS -> "mp"
189 | | Z_E -> "e"
190 | | Z_NE -> "ne"
191 | | Z_S -> "s"
192 | | Z_NS -> "ns"
193 | | Z_A -> "a"
194 | | Z_NA -> "na"
195 | | Z_B -> "b"
196 | | Z_NB -> "nb"
197 | | Z_L -> "l"
198 | | Z_NL -> "nl"
199 | | Z_G -> "g"
200 | | Z_NG -> "ng")
201 |
202 | type instruction =
203 | | Zlabel of string
204 | | Zbinop of binop_name * dest_src
205 | | Zmonop of monop_name * rm
206 | (*
207 | | Zcmpxchg of rm * reg
208 | | Zxadd of rm * reg
209 | | Zxchg of rm * reg
210 | *)
211 | (* either reg := reg * rm; or reg := rm * int *)
212 | | Zimul of reg * rm * int64 option
213 | | Zidiv of rm (* RAX := RDX,RAX / rm; RDX := RDX,RAX mod rm *)
214 | | Zlea of dest_src
215 | | Zpop of rm
216 | | Zpush of imm_rm
217 | | Zcall of string
218 | | Zret
219 | | Zleave
220 | | Zcpuid
221 | | Zmov of dest_src
222 | (* | Zmovzx of dest_src *)
223 | (* jcc includes jmp rel, i.e. unconditional relative jumps. *)
224 | | Zjcc of cond * string
225 | | Zjmp of rm (* jmp excludes relative jumps, see jcc. *)
226 | | Zset of cond * byte_reg (* Set operates on a byte of memory or
227 | register. Here, we'll only use registers
228 | *)
229 |
230 | let pp_instruction fmt i =
231 | match i with
232 | | Zlabel s ->
233 | fprintf fmt "%s:" s
234 | | Zbinop (n, ds) ->
235 | fprintf fmt "%a %a"
236 | pp_binop_name n
237 | pp_dest_src ds
238 | | Zmonop (n, rm) ->
239 | fprintf fmt "%a %a"
240 | pp_monop_name n
241 | pp_rm rm
242 | | Zimul (r1, rm2, None) ->
243 | (* r1 := r1 * rm2 *)
244 | fprintf fmt "imul %a, %a"
245 | pp_reg r1
246 | pp_rm rm2
247 | | Zimul (r1, rm2, Some i) ->
248 | (* r1 := rm2 * i *)
249 | fprintf fmt "imul %a, %a, %Ld"
250 | pp_reg r1
251 | pp_rm rm2
252 | i
253 | | Zidiv rm ->
254 | fprintf fmt "idiv %a"
255 | pp_rm rm
256 | | Zlea ds ->
257 | fprintf fmt "lea %a"
258 | pp_dest_src ds
259 | | Zpop rm ->
260 | fprintf fmt "pop %a"
261 | pp_rm rm
262 | | Zpush ir ->
263 | fprintf fmt "push %a"
264 | pp_imm_rm ir
265 | | Zcall lab ->
266 | fprintf fmt "call %s" lab
267 | | Zret ->
268 | fprintf fmt "ret"
269 | | Zleave ->
270 | fprintf fmt "leave"
271 | | Zcpuid ->
272 | fprintf fmt "cpuid"
273 | | Zmov ds ->
274 | fprintf fmt "mov %a"
275 | pp_dest_src ds
276 | | Zjcc (c, lab) ->
277 | fprintf fmt "j%a %s"
278 | pp_cond c
279 | lab
280 | | Zjmp rm ->
281 | fprintf fmt "jmp %a"
282 | pp_rm rm
283 | | Zset (cond, reg) ->
284 | fprintf fmt "set%a %a"
285 | pp_cond cond
286 | pp_byte_reg reg
287 |
288 | let pp_instr_list fmt il =
289 | List.iter
290 | (fun i ->
291 | match i with
292 | | Zlabel _ -> fprintf fmt "%a@\n" pp_instruction i
293 | | _ -> fprintf fmt " %a@\n" pp_instruction i)
294 | il
295 |
--------------------------------------------------------------------------------
/src/liveVarAnalysis.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | open Util
20 | open BlockStructure
21 |
22 | type cfg_annot = { gen : Varset.t; kill : Varset.t; live_exit : Varset.t }
23 |
24 | type cfg = (cfg_entry * cfg_annot) list
25 |
26 | let pp_cfg_annot fmt c =
27 | Format.fprintf fmt "{@[gen=%a;@ kill=%a; live_exit=%a@]}"
28 | Varset.pp c.gen
29 | Varset.pp c.kill
30 | Varset.pp c.live_exit
31 |
32 | let pp_cfg1 fmt (c,a) =
33 | Format.fprintf fmt "(@[%a@ %a@])"
34 | BlockStructure.pp_cfg_entry c
35 | pp_cfg_annot a
36 |
37 | let pp_cfg fmt l = pp_list pp_cfg1 fmt l
38 |
39 | (* Add the identifier in a to the generation set *)
40 | let add_gen (a : atomic_exp) (gen : Varset.t) : Varset.t =
41 | match a with
42 | | Num _ -> gen
43 | | Ident i -> Varset.add i gen
44 |
45 | (* Add the identifiers in aes to the generation set *)
46 | let rec add_gen_list (aes : atomic_exp list) (gen : Varset.t) : Varset.t =
47 | match aes with
48 | | [] -> gen
49 | | a::aes -> add_gen_list aes (add_gen a gen)
50 |
51 | (* Compute the gen and kill sets for a basic block, live_exit is set to empty *)
52 | let analyse_block (b : basic_block) : cfg_annot =
53 | let rec analyse_block gen kill b =
54 | match b with
55 | | [] -> (gen, kill)
56 | | AssignOp (i, a1, _, a2) :: b ->
57 | analyse_block (add_gen a1 (add_gen a2 (Varset.remove i gen)))
58 | (Varset.add i kill)
59 | b
60 | | AssignAtom (i, a) :: b ->
61 | analyse_block (add_gen a (Varset.remove i gen)) (Varset.add i kill) b
62 | | Ld (i, j, a) :: b ->
63 | analyse_block (add_gen a (Varset.add j (Varset.remove i gen)))
64 | (Varset.add i kill)
65 | b
66 | | St (i, a1, a2) :: b ->
67 | analyse_block (add_gen a1 (add_gen a2 (Varset.add i gen)))
68 | kill
69 | b
70 | | Call (None, _, aes) :: b ->
71 | analyse_block (add_gen_list aes gen) kill b
72 | | Call (Some i, _, aes) :: b ->
73 | analyse_block (add_gen_list aes (Varset.remove i gen)) (Varset.add i kill) b
74 | | BoundCheck (a1, a2) :: b ->
75 | analyse_block (add_gen a1 (add_gen a2 gen)) kill b
76 | | NullCheck v :: b ->
77 | analyse_block (Varset.add v gen) kill b
78 | in
79 | let (gen,kill) = analyse_block Varset.empty Varset.empty (List.rev b) in
80 | { gen = gen; kill = kill; live_exit = Varset.empty }
81 |
82 | (* Split the annotated cfg into the predecessors of node n, and the other nodes
83 | *)
84 | let find_preds n cfg =
85 | List.partition
86 | (fun (entry, _) ->
87 | match entry.next with
88 | | Return _ -> false
89 | | Next n' ->
90 | n = n'
91 | | Branch (_, n1, n2) ->
92 | n = n1 || n = n2)
93 | cfg
94 |
95 | let add_test_vars (ae1, _, ae2) vars =
96 | let vars' =
97 | match ae1 with
98 | | Ident v -> Varset.add v vars
99 | | _ -> vars
100 | in
101 | match ae2 with
102 | | Ident v -> Varset.add v vars'
103 | | _ -> vars'
104 |
105 | (* What's live right before we are exit a block: initially if the block
106 | returns, then all the globals are alive, since other parts of the code might
107 | read them. Also the returned value is live. For a branch, any variable used
108 | in the condition is live. *)
109 | let init_live_exit (globals : Varset.t) (a : cfg_annot) (next : next_block)
110 | : cfg_annot =
111 | match next with
112 | | Return None -> { a with live_exit = globals }
113 | | Return (Some i) -> { a with live_exit = Varset.add i globals }
114 | | Next _ -> a
115 | | Branch (i, _, _) ->
116 | { a with live_exit = add_test_vars i Varset.empty }
117 |
118 | (* Do live variable analysis, returning an annotated cfg *)
119 | let lva (globals : Varset.t) (cfg : BlockStructure.cfg) : cfg =
120 | (* Initial annotations for all of the blocks. *)
121 | let init_worklist =
122 | List.map
123 | (fun entry ->
124 | (entry,
125 | init_live_exit globals (analyse_block entry.elems) entry.next))
126 | cfg
127 | in
128 | (* Update one block from the worklist.
129 | The worklist and finished_list partition the whole cfg. That is, they must
130 | contain all of the blocks between them, with no duplication. NB, this is
131 | slightly different than the naive worklist algorithm. Here we add a
132 | node to the worklist only when its live_exit would change based on the current
133 | node's live_entry. *)
134 | let rec do_one (worklist : cfg) (finished_list : cfg) : cfg =
135 | match worklist with
136 | | [] -> finished_list
137 | | ((entry, annot) as node) :: worklist ->
138 | let live_entry =
139 | Varset.union annot.gen (Varset.diff annot.live_exit annot.kill)
140 | in
141 | (* Updates contains node's predecessors from the worklist input,
142 | and worklist contains the remaining non-predecessors. *)
143 | let (updates, worklist) = find_preds entry.bnum worklist in
144 | (* Possible_updates contains node's predecessors from the finished_list,
145 | possibly including itself, and finished contains the remaining
146 | non-predecessors. *)
147 | let (possible_updates, finished) =
148 | find_preds entry.bnum (node::finished_list)
149 | in
150 | (* Finished' contains all possible_updates whose live_exits are supersets
151 | of our live entry. There is no need to update them, because no nodes would
152 | be added to their live_exit. updates' contains those previously finished nodes
153 | whose live_exits will change *)
154 | let (finished', updates') =
155 | List.partition
156 | (fun (_, annot) -> Varset.subset live_entry annot.live_exit)
157 | possible_updates
158 | in
159 | (* Update the live_exits of the nodes needing updated and add them to the
160 | worklist *)
161 | let new_worklist =
162 | List.map
163 | (fun (entry, annot) ->
164 | (entry, { annot with
165 | live_exit = Varset.union annot.live_exit live_entry}))
166 | (updates @ updates')
167 | @
168 | worklist
169 | in
170 | do_one new_worklist (finished' @ finished)
171 | in
172 | do_one (List.rev init_worklist) []
173 |
174 | (* Check that the operator cannot have a side-effect *)
175 | let pure_op op : bool =
176 | match op with
177 | | Tokens.Div -> false (* divide by zero *)
178 | | _ -> true
179 |
180 | (* Remove unused writes from elems, assuming that elems is presented backwards,
181 | and live is the set of variables live on exiting the block *)
182 | let rec local_remove_unused_writes (live : Varset.t) (elems : block_elem list)
183 | : block_elem list =
184 | match elems with
185 | | [] -> []
186 | | AssignOp (i, a1, op, a2) :: b ->
187 | if Varset.mem i live || not (pure_op op) then
188 | AssignOp (i, a1, op, a2) ::
189 | local_remove_unused_writes
190 | (add_gen a1 (add_gen a2 (Varset.remove i live))) b
191 | else
192 | local_remove_unused_writes live b
193 | | AssignAtom (i, a) :: b ->
194 | if Varset.mem i live then
195 | AssignAtom (i, a) ::
196 | local_remove_unused_writes (add_gen a (Varset.remove i live)) b
197 | else
198 | local_remove_unused_writes live b
199 | | Ld (i, j, a) :: b ->
200 | if Varset.mem i live then
201 | Ld (i, j, a) ::
202 | local_remove_unused_writes (add_gen a (Varset.add j (Varset.remove i live))) b
203 | else
204 | local_remove_unused_writes live b
205 | | St (i, a1, a2) :: b ->
206 | St (i, a1, a2) ::
207 | local_remove_unused_writes (add_gen a1 (add_gen a2 (Varset.add i live))) b
208 | | Call (i, f, aes) :: b ->
209 | Call (i, f, aes) ::
210 | let live' =
211 | match i with
212 | | None -> live
213 | | Some v -> Varset.remove v live
214 | in
215 | local_remove_unused_writes (add_gen_list aes live') b
216 | | BoundCheck (a1, a2) :: b ->
217 | BoundCheck (a1, a2) :: local_remove_unused_writes (add_gen a1 (add_gen a2 live)) b
218 | | NullCheck v :: b ->
219 | NullCheck v :: local_remove_unused_writes (Varset.add v live) b
220 |
221 | let remove_unused_writes (cfg : cfg) : cfg =
222 | List.map
223 | (fun (entry, annot) ->
224 | let new_elems =
225 | local_remove_unused_writes annot.live_exit (List.rev entry.elems)
226 | in
227 | ({entry with elems = List.rev new_elems}, annot))
228 | cfg
229 |
--------------------------------------------------------------------------------
/src/astInterp.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* An interpreter for ASTs. This is meta circular in the sense that we use
20 | OCaml refs for mutable variables, and we represent arrays with mutable OCaml
21 | arrays. We also model function returns with exceptions. *)
22 |
23 | open SourceAst
24 | module T = Tokens
25 |
26 | (* For when the interpreter crashed, such as array bounds violations *)
27 | exception Crash of string
28 |
29 | (* For errors that a well-typed program can't have *)
30 | exception TypeError
31 |
32 | (* Values are either integers or n-dimensional arrays of integers.
33 | We keep multi-dimensional arrays in a single dimensional one and include a
34 | list of how big each dimension is.
35 | We represent bools as numbers: true = 1L and false = 0L *)
36 | type val_t =
37 | | Vint of int64
38 | | Varray of int list * int64 array
39 |
40 | let val_t_to_int (v : val_t) : int64 =
41 | match v with
42 | | Vint i -> i
43 | | Varray _ -> raise TypeError
44 |
45 | let val_t_to_array (v : val_t) : int list * int64 array =
46 | match v with
47 | | Vint 0L -> raise (Crash "null pointer exception")
48 | | Vint _ -> raise TypeError
49 | | Varray (dims, a) -> (dims, a)
50 |
51 | (* Given the array's dimensions, work out the slot for a particular set of
52 | indices. Sizes and indices must have the same length. Return None if one
53 | of the indices is out of bounds, i.e. greater than the size. *)
54 | let array_address (sizes : int list) (indices : int list) : int option =
55 | (* acc keeps track of the product of the dimensions seen so far *)
56 | let rec f sizes indices (acc : int) =
57 | match (sizes, indices) with
58 | | ([], []) -> 0
59 | | (s1::sizes, i1::indices) ->
60 | i1 * acc + f sizes indices (s1 * acc)
61 | | _ ->
62 | raise TypeError
63 | in
64 | if List.for_all2 (fun s i -> i < s) sizes indices then
65 | Some (f sizes indices 1)
66 | else
67 | None
68 |
69 | let bool_to_int64 (b : bool) : int64 =
70 | if b then 1L else 0L
71 |
72 | let int64_to_bool (i : int64) : bool =
73 | if Int64.compare i 0L = 0 then false else true
74 |
75 | (* Do a primitive operation, but not && or || *)
76 | let do_op op (n1 : int64) (n2 : int64) : int64 =
77 | match op with
78 | | T.Plus -> Int64.add n1 n2
79 | | T.Minus -> Int64.sub n1 n2
80 | | T.Times -> Int64.mul n1 n2
81 | | T.Div ->
82 | if n2 = 0L then
83 | raise (Crash "Division by 0")
84 | else
85 | Int64.div n1 n2
86 | | T.Lt -> bool_to_int64 (Int64.compare n1 n2 < 0)
87 | | T.Gt -> bool_to_int64 (Int64.compare n1 n2 > 0)
88 | | T.Eq -> bool_to_int64 (Int64.compare n1 n2 = 0)
89 | | T.Lshift ->
90 | let i = Int64.to_int n2 in
91 | (* Int64.shift_left is wacky outside of this interval *)
92 | if 0 <= i && i < 64 then
93 | Int64.shift_left n1 i
94 | else
95 | 0L
96 | | T.BitOr -> Int64.logor n1 n2
97 | | T.BitAnd -> Int64.logand n1 n2
98 | | T.And | T.Or -> assert false
99 |
100 | let do_uop uop (n : int64) : int64 =
101 | match uop with
102 | | T.Not ->
103 | bool_to_int64 (not (int64_to_bool n))
104 |
105 | (* The environment will have the code of all of the functions that we can call,
106 | it will also keep the variable bindings in a map of OCaml references.
107 | Assignments to a variables will be interpreted as assignments to the
108 | corresponding reference. Note that an assignment to a variable (of scalar or
109 | array type) mutates the binding for that variable in the environment,
110 | whereas an assignment to an array element (i.e., an array update), actually
111 | changes the array value without modifying the environment binding for the
112 | array.
113 | *)
114 | type env_t = { funs : func Idmap.t; vars : val_t ref Idmap.t }
115 |
116 | (* To model return control flow *)
117 | exception Return_exn of val_t
118 |
119 | (* Add the function's arguments to the environment env at a call site. Assume
120 | that the two input lists are the same length. This is ensured by the type
121 | checker. We also know there are no duplicate parameter names. *)
122 | let rec add_arguments (params : (id * _) list) (args : val_t list) (env : env_t)
123 | : env_t =
124 | match (params, args) with
125 | | ([], []) -> env
126 | | ((x,_)::params, v::args) ->
127 | (* Allocate a mutable reference to store the parameter's value in. Then the
128 | function's body can assign to the variable by changing the binding. *)
129 | let binding = ref v in
130 | add_arguments params args { env with vars = Idmap.add x binding env.vars }
131 | | (_, _) ->
132 | raise TypeError
133 |
134 | (* Compute the value of an expression *)
135 | let rec interp_exp (env : env_t) (e : exp) : val_t =
136 | match e with
137 | | Ident (i, []) ->
138 | !(Idmap.find i env.vars) (* i will be in the environment in a well-typed
139 | program *)
140 | | Ident (i, iexps) ->
141 | let (sizes, a) = val_t_to_array !(Idmap.find i env.vars) in
142 | let indices =
143 | List.map (fun e -> (Int64.to_int (val_t_to_int (interp_exp env e))))
144 | iexps
145 | in
146 | (match array_address sizes indices with
147 | | Some x -> Vint (Array.get a x)
148 | | None -> raise (Crash "array index out of bounds"))
149 | | Call (f, args) ->
150 | (* f will be in the environment in a well-typed program *)
151 | let func = Idmap.find f env.funs in
152 | let arg_vals = List.map (interp_exp env) args in
153 | let new_env1 = add_arguments func.params arg_vals env in
154 | let new_env2 = interp_var_decs new_env1 func.locals in
155 | (try
156 | List.iter (interp_stmt new_env2) func.body;
157 | (* We know this cannot happen because of the type checker, each path in
158 | each function must have a return. Otherwise, we would have to return
159 | a default value *of the right type* *)
160 | raise TypeError
161 | with Return_exn v -> v)
162 | | Num n -> Vint n
163 | | Bool b -> Vint (bool_to_int64 b)
164 | | Op (e1, T.And, e2) ->
165 | (match interp_exp env e1 with
166 | | Vint n ->
167 | if int64_to_bool n then
168 | interp_exp env e2
169 | else
170 | Vint n
171 | | _ -> raise TypeError)
172 | | Op (e1, T.Or, e2) ->
173 | (match interp_exp env e1 with
174 | | Vint n ->
175 | if int64_to_bool n then
176 | Vint n
177 | else
178 | interp_exp env e2
179 | | _ -> raise TypeError)
180 | | Op (e1, op, e2) ->
181 | (match (interp_exp env e1, interp_exp env e2) with
182 | | (Vint n1, Vint n2) -> Vint (do_op op n1 n2)
183 | | _ -> raise TypeError)
184 | | Uop (uop, e) ->
185 | (match interp_exp env e with
186 | | Vint n -> Vint (do_uop uop n)
187 | | _ -> raise TypeError)
188 | | Array iexps ->
189 | let indices =
190 | List.map (fun e -> (Int64.to_int (val_t_to_int (interp_exp env e))))
191 | iexps
192 | in
193 | Varray (indices,
194 | Array.make (List.fold_right (fun x y -> x * y) indices 1) 0L)
195 |
196 | (* Run the initialisation expressions of the variables, and add bindings to the
197 | resulting values into the environment *)
198 | and interp_var_decs (env : env_t) (decs : var_dec list) : env_t =
199 | (* Start the variables at 0, because they can be referenced before they are
200 | initialised. NB, this allows null pointer exceptions (or crashes) into the
201 | language, since the variable can have array type. This is a consequence
202 | of having each global and local scope be a single recursive scope. *)
203 | let new_env =
204 | List.fold_right
205 | (fun v env ->
206 | let binding = ref (Vint 0L) in
207 | Idmap.add v.var_name binding env)
208 | decs
209 | env.vars
210 | in
211 | let env = { env with vars = new_env } in
212 | (* Now run all of the initialisation expressions *)
213 | List.iter
214 | (fun d ->
215 | let v = interp_exp env d.init in
216 | Idmap.find d.var_name env.vars := v)
217 | decs;
218 | env
219 |
220 | (* Run a statement *)
221 | and interp_stmt (env : env_t) (s : stmt) : unit =
222 | match s with
223 | | Assign (i, [], e) ->
224 | let v = interp_exp env e in
225 | Idmap.find i env.vars := v
226 | | Assign (i, iexps, e) ->
227 | (match !(Idmap.find i env.vars) with
228 | | Varray (sizes, a) ->
229 | (let indices =
230 | List.map (fun e -> (Int64.to_int (val_t_to_int (interp_exp env e))))
231 | iexps
232 | in
233 | match array_address sizes indices with
234 | | None -> raise (Crash "array index out of bounds")
235 | | Some x ->
236 | let v = val_t_to_int (interp_exp env e) in
237 | Array.set a x v)
238 | | Vint 0L -> raise (Crash "null pointer exception")
239 | | Vint _ -> raise TypeError)
240 | | DoWhile (head_s, e, body_s) ->
241 | interp_stmt env head_s;
242 | if int64_to_bool (val_t_to_int (interp_exp env e)) then
243 | (interp_stmt env body_s;
244 | interp_stmt env s)
245 | else
246 | ()
247 | | Ite (e, s1, s2) ->
248 | if int64_to_bool (val_t_to_int (interp_exp env e)) then
249 | interp_stmt env s1
250 | else
251 | interp_stmt env s2
252 | | Stmts sl ->
253 | List.iter (interp_stmt env) sl
254 | | In i ->
255 | Printf.printf "> ";
256 | (try
257 | let n = Int64.of_string (read_line ()) in
258 | Idmap.find i env.vars := Vint n
259 | with Failure _ -> raise (Crash "not a 64-bit integer"))
260 | | Out i ->
261 | (print_string (Int64.to_string (val_t_to_int !(Idmap.find i env.vars)));
262 | print_newline ())
263 | | Return None ->
264 | raise (Return_exn (Vint 0L))
265 | | Return (Some i) ->
266 | raise (Return_exn !(Idmap.find i env.vars))
267 | | Loc (s, _) -> interp_stmt env s
268 |
269 | let interp_prog (p : prog) : unit =
270 | let fun_env =
271 | List.fold_right (fun f env -> Idmap.add f.fun_name f env)
272 | p.funcs Idmap.empty
273 | in
274 | ignore (interp_var_decs { funs = fun_env; vars = Idmap.empty } p.globals)
275 |
--------------------------------------------------------------------------------
/src/constProp.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* Do constant propagation and folding. Later compiler phases assume that no
20 | operation has 2 constant arguments, so this needs to be guaranteed here. *)
21 |
22 | open SourceAst
23 | module T = Tokens
24 |
25 | (* Return n, such that 2^n = i, or None if there is no such *)
26 | let log2 (i : int64) : int option =
27 | (* Linear search for the least significant 1 in the binary represenatation.
28 | A binary search might be faster, but probably not worth the hassle. *)
29 | let rec f (i : int64) (shifted : int) : int option =
30 | if Int64.logand i 0x1L = 0L then
31 | f (Int64.shift_right i 1) (shifted + 1)
32 | else if i = 0x1L then
33 | Some shifted
34 | else
35 | None
36 | in
37 | if i > 0L then
38 | f i 0
39 | else
40 | None
41 |
42 | (* Decides whether evaluating an expression might have a side-effect *)
43 | let rec might_have_effect (e : exp) : bool =
44 | match e with
45 | | Ident (_, []) -> false
46 | | Ident (_, _) -> true (* Array bound check failure *)
47 | | Num _ | Bool _ -> false
48 | | Op (e1, op, e2) ->
49 | if op = T.Div then
50 | true (* Divide by zero error *)
51 | else
52 | might_have_effect e1 || might_have_effect e2
53 | | Uop (_, e) -> might_have_effect e
54 | | Array es -> List.exists might_have_effect es
55 | | Call _ -> true (* The called function could do something *)
56 |
57 | (* Check whether two expressions are equal, and don't have effects. This lets
58 | some operators remove them. Note that we are just comparing the structure of
59 | the expressions, so we won't see that x + (y + z) is equal to (x + y) + z,
60 | etc. *)
61 | let ok_remove_eq_exp (e1 : exp) (e2 : exp) : bool =
62 | e1 = e2 &&
63 | not (might_have_effect e1) &&
64 | not (might_have_effect e2)
65 |
66 | let between_0_63 (i : int64) : bool =
67 | Int64.compare i (-1L) = 1 && Int64.compare i 64L = -1
68 |
69 | (* Statically evaluate an expression according to the identifier values in env,
70 | don't try to follow constants in arrays. Operates in a single bottom-up pass.
71 | Doesn't do anything with associativity or commutativity, so things like
72 | (x + 1) + 2 don't get changed. *)
73 | let rec fold_exp (env : exp Idmap.t) (e : exp) : exp =
74 | match e with
75 | | Ident (i, []) ->
76 | (try Idmap.find i env
77 | with Not_found -> e)
78 | | Ident (i, es) -> Ident (i, List.map (fold_exp env) es)
79 | | Num n -> Num n
80 | | Bool b -> Bool b
81 | | Op (e1, op, e2) ->
82 | let o1 = fold_exp env e1 in
83 | let o2 = fold_exp env e2 in
84 | (match (o1, op, o2) with
85 | (* Plus *)
86 | | (Num n1, T.Plus, Num n2) -> Num (Int64.add n1 n2)
87 | | (Num 0L, T.Plus, e) | (e, T.Plus, Num 0L) -> e
88 |
89 | (* Minus *)
90 | | (Num n1, T.Minus, Num n2) -> Num (Int64.sub n1 n2)
91 | | (e, T.Minus, Num 0L) -> e
92 | | (e1, T.Minus, e2) when ok_remove_eq_exp e1 e2 -> Num 0L
93 |
94 | (* Times *)
95 | | (Num n1, T.Times, Num n2) -> Num (Int64.mul n1 n2)
96 | | (e, T.Times, Num 1L) | (Num 1L, T.Times, e) -> e
97 | | (Num -1L, T.Times, e) | (e, T.Times, Num -1L) ->
98 | Op (Num 0L, T.Minus, e)
99 | (* Can't use or-patterns for the following two, according to compiler warning *)
100 | | (Num 0L, T.Times, e) when not (might_have_effect e) ->
101 | Num 0L
102 | | (e, T.Times, Num 0L) when not (might_have_effect e) ->
103 | Num 0L
104 | | (e, T.Times, Num n) | (Num n, T.Times, e) ->
105 | (match log2 (Int64.abs n) with
106 | | None -> Op (o1, op, o2)
107 | | Some log ->
108 | let shift_op = Op (e, T.Lshift, Num (Int64.of_int log)) in
109 | if n < 0L then
110 | Op (Num 0L, T.Minus, shift_op)
111 | else
112 | shift_op)
113 |
114 | (* Div *)
115 | | (Num n1, T.Div, Num n2) when n2 <> 0L -> Num (Int64.div n1 n2)
116 | | (Num _, T.Div, Num 0L) ->
117 | (* This phase needs to guarantee that no operation has two constant
118 | arguments. It doesn't matter what the numerator is, when the
119 | divisor is 0, so we can just use an arbitrary variable. *)
120 | Op (Ident (Temp ("CP", 0), []), T.Div, Num 0L)
121 | | (e, T.Div, Num 1L) -> e
122 |
123 | (* Less *)
124 | | (Num n1, T.Lt, Num n2) -> Bool (Int64.compare n1 n2 < 0)
125 | | (e1, T.Lt, e2) when ok_remove_eq_exp e1 e2 -> Bool false
126 |
127 | (* Greater *)
128 | | (Num n1, T.Gt, Num n2) -> Bool (Int64.compare n1 n2 > 0)
129 | | (e1, T.Gt, e2) when ok_remove_eq_exp e1 e2 -> Bool false
130 |
131 | (* Equal *)
132 | | (Num n1, T.Eq, Num n2) -> Bool (Int64.compare n1 n2 = 0)
133 | | (e1, T.Eq, e2) when ok_remove_eq_exp e1 e2 -> Bool true
134 |
135 | (* Shift left *)
136 | | (Num n1, T.Lshift, Num n2) ->
137 | (* Ocaml's shift_left is only defined between 0 and 63 inclusive *)
138 | if between_0_63 n2 then
139 | Num (Int64.shift_left n1 (Int64.to_int n2))
140 | else
141 | Num 0L
142 | | (e, T.Lshift, Num 0L) -> e
143 | | (Num 0L, T.Lshift, e) when not (might_have_effect e) ->
144 | Num 0L
145 |
146 | (* Bitwise or *)
147 | | (Num n1, T.BitOr, Num n2) -> Num (Int64.logor n1 n2)
148 | | (Num 0L, T.BitOr, e) | (e, T.BitOr, Num 0L) -> e
149 | (* Can't use or-patterns for the following two, according to compiler warning *)
150 | | (Num 0xFFFFFFFFFFFFFFFFL, T.BitOr, e) when not (might_have_effect e) ->
151 | Num 0xFFFFFFFFFFFFFFFFL
152 | | (e, T.BitOr, Num 0xFFFFFFFFFFFFFFFFL) when not (might_have_effect e) ->
153 | Num 0xFFFFFFFFFFFFFFFFL
154 |
155 | (* Bitwise and *)
156 | | (Num n1, T.BitAnd, Num n2) -> Num (Int64.logand n1 n2)
157 | | (Num 0xFFFFFFFFFFFFFFFFL, T.BitAnd, e)
158 | | (e, T.BitAnd, Num 0xFFFFFFFFFFFFFFFFL) -> e
159 | (* Can't use or-patterns for the following two, according to compiler warning *)
160 | | (Num 0L, T.BitAnd, e) when not (might_have_effect e) -> Num 0L
161 | | (e, T.BitAnd, Num 0L) when not (might_have_effect e) -> Num 0L
162 |
163 | (* And *)
164 | | (Bool true, T.And, e) | (e, T.And, Bool true) -> e
165 | | (Bool false, T.And, _) -> Bool false
166 | | (e, T.And, Bool false) when not (might_have_effect e) -> Bool false
167 | | (e1, T.And, e2) when ok_remove_eq_exp e1 e2 -> e1
168 |
169 | (* Or *)
170 | | (Bool false, T.Or, e) | (e, T.Or, Bool false) -> e
171 | | (Bool true, T.Or, _) -> Bool true
172 | | (e, T.Or, Bool true) when not (might_have_effect e) -> Bool true
173 | | (e1, T.Or, e2) when ok_remove_eq_exp e1 e2 -> e1
174 |
175 | | _ -> Op (o1, op, o2))
176 | | Uop (uop, e) ->
177 | let o = fold_exp env e in
178 | (match (uop, o) with
179 | | (T.Not, Bool b) -> Bool (not b)
180 | | (T.Not, Uop (T.Not, e)) -> e
181 | | _ -> Uop (uop, o))
182 | | Array es ->
183 | Array (List.map (fold_exp env) es)
184 | | Call (f, es) ->
185 | Call (f, List.map (fold_exp env) es)
186 |
187 | let is_const (e : exp) : bool =
188 | match e with
189 | | Num _ | Bool _ -> true
190 | | _ -> false
191 |
192 | let same_const (e1 : exp) (e2 : exp) : bool =
193 | match (e1,e2) with
194 | | (Num n1, Num n2) -> Int64.compare n1 n2 = 0
195 | | (Bool b1, Bool b2) -> b1 = b2
196 | | _ -> false
197 |
198 | (* If v1 and v2 contain the same constant, return it. Otherwise return None *)
199 | let merge_constants (_ : id) (v1 : exp option) (v2 : exp option)
200 | : exp option =
201 | match (v1,v2) with
202 | | (Some e1, Some e2) ->
203 | if same_const e1 e2 then
204 | Some e1
205 | else None
206 | | _ -> None
207 |
208 | (* Do constant propagation. Accumulate an environment of definitely known
209 | constants at the end of stmts, given definitely known constants env at the
210 | start. *)
211 | let rec prop_stmts (env : exp Idmap.t) (stmts : stmt list)
212 | : exp Idmap.t * stmt list =
213 | match stmts with
214 | | [] -> (env,[])
215 | | Assign (x, [], e) :: stmts ->
216 | let o1 = fold_exp env e in
217 | let first_env =
218 | if is_const o1 then Idmap.add x o1 env else Idmap.remove x env
219 | in
220 | let (env',stmts') = prop_stmts first_env stmts in
221 | (env', Assign (x, [], o1) :: stmts')
222 | | Assign (x, es, e) :: stmts ->
223 | let o = fold_exp env e in
224 | let os = List.map (fold_exp env) es in
225 | let (env', stmts') = prop_stmts env stmts in
226 | (env', Assign (x, os, o) :: stmts')
227 | | DoWhile (s0, e, s1) :: stmts ->
228 | (* s0 is always executed *)
229 | let (env_init, t) = prop_stmt env s0 in
230 | let o = fold_exp env_init e in
231 | (match o with
232 | | Bool false ->
233 | let (env', stmts') = prop_stmts env_init stmts in
234 | (env', t::stmts')
235 | | _ ->
236 | (* From this point, the loop might execute s1;s0 0 or more times, so we
237 | calculate env1 which contains the constants that are constant no
238 | matter how many times the loop is executed *)
239 | let env1 = prop_loop_body env_init [s1; s0] in
240 | (* Now re-calculate the constant propagation for the things constant no
241 | matter how many times we iterate the loop *)
242 | let (_, t1) = prop_stmt env1 s1 in
243 | let o = fold_exp env1 e in
244 | let (_, t0) = prop_stmt env1 s0 in
245 | let (env',stmts') = prop_stmts env1 stmts in
246 | (env', DoWhile (t0, o, t1) :: stmts'))
247 | | Ite (e, s1, s2) :: stmts ->
248 | let o1 = fold_exp env e in
249 | (match o1 with
250 | | Bool true ->
251 | let (env1, os1) = prop_stmt env s1 in
252 | let (env', stmts') = prop_stmts env1 stmts in
253 | (env', os1::stmts')
254 | | Bool false ->
255 | let (env2, os2) = prop_stmt env s2 in
256 | let (env', stmts') = prop_stmts env2 stmts in
257 | (env', os2::stmts')
258 | | _ ->
259 | let (env1, os1) = prop_stmt env s1 in
260 | let (env2, os2) = prop_stmt env s2 in
261 | (* Only include constants that are known to be the same at the end of
262 | both branches of the Ite for the subsequent statements. *)
263 | let (env',stmts') =
264 | prop_stmts (Idmap.merge merge_constants env1 env2) stmts
265 | in
266 | (env', Ite (o1, os1, os2) :: stmts'))
267 | | Stmts (stmts1) :: stmts ->
268 | let (env1, os1) = prop_stmts env stmts1 in
269 | let (env', stmts') = prop_stmts env1 stmts in
270 | (env', Stmts (os1) :: stmts')
271 | | In x :: stmts ->
272 | let (env',stmts') = prop_stmts (Idmap.remove x env) stmts in
273 | (env', In x :: stmts')
274 | | Out x :: stmts ->
275 | let (env',stmts') = prop_stmts env stmts in
276 | (env', Out x :: stmts')
277 | | Loc (s, ln) :: stmts ->
278 | let (env1, o1) = prop_stmt env s in
279 | let (env', stmts') = prop_stmts env1 stmts in
280 | (env', Loc (o1, ln) :: stmts')
281 | | Return x :: _ ->
282 | (* We can't carry on past a return *)
283 | (env, [Return x])
284 |
285 | and prop_stmt env (stmt : stmt) =
286 | match prop_stmts env [stmt] with
287 | | (env,[os]) -> (env,os)
288 | | _ -> assert false
289 |
290 | (* Given possibly known constants env at the start, compute the definitely
291 | known constants at the end, assuming that stmts is run in a loop body an
292 | unknown number of times. *)
293 | and prop_loop_body (env : exp Idmap.t) (stmts : stmt list) : exp Idmap.t =
294 | let (env', _) = prop_stmts env stmts in
295 | (* The next approximation of constants at the start *)
296 | let env'' = Idmap.merge merge_constants env env' in
297 | if Idmap.equal same_const env env'' then
298 | (* Same as last time, fixed point reached *)
299 | env''
300 | else
301 | prop_loop_body env'' stmts
302 |
--------------------------------------------------------------------------------
/src/typeCheck.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* The type checker. In addition to checking the types, we build a new AST that
20 | annotates the scope of all of the variables. Each use of a variable either
21 | refers to a global variable, a local variable, or a function parameter. The
22 | type checker keeps track of which sort each variable is and annotates each
23 | use with this information. This allows the register allocator and back end
24 | to treat them differently. *)
25 |
26 | open Util
27 | open SourceAst
28 | module T = Tokens
29 |
30 | (* Types *)
31 | type t =
32 | | Tint
33 | | Tbool
34 | | Tarray of int (* The number of dimensions that the array has *)
35 |
36 | let show_t t =
37 | match t with
38 | | Tint -> "int"
39 | | Tbool -> "bool"
40 | | Tarray n -> "array " ^ string_of_int n
41 |
42 | (* Map functions to their types, and variables to their types and scope *)
43 | type env_t = { funs : (t list * t) Idmap.t; vars : (t * scope) Idmap.t }
44 |
45 | (* Raise a type error *)
46 | let type_error (ln : int option) (msg : string) : 'a =
47 | match ln with
48 | | Some ln ->
49 | raise (BadInput ("Type error on line " ^ string_of_int ln ^ ": " ^ msg))
50 | | None ->
51 | raise (BadInput ("Type error at unknown location: " ^ msg))
52 |
53 | (* Annotate an identifier with its scope *)
54 | let add_scope (id : id) (s : scope) : id =
55 | match id with
56 | | Source (i, None) -> Source (i, Some s)
57 | | Source (_, Some _) ->
58 | raise (InternalError "scoped identifier during type checking")
59 | | Temp (x, y) -> Temp (x, y)
60 |
61 | (* Compute the type of an expression, or raise BadInput if there is a type
62 | error. Also return a new scope-annotated version of the expression. *)
63 | let rec type_exp (ln : int option) (env : env_t) (e : exp) : t * exp =
64 | match e with
65 | | Ident (i, es) ->
66 | let (t, scope) =
67 | try Idmap.find i env.vars
68 | with Not_found ->
69 | type_error ln ("reference to undeclared variable " ^ show_id i)
70 | in
71 | let (ts, es') = List.split (List.map (type_exp ln env) es) in
72 | let l = List.length ts in
73 | let annotated_exp = Ident (add_scope i scope, es') in
74 | (* If there are no indices, then just use the type of the identifier, even
75 | if it is of array type. If there are indices, they must fully index the
76 | array, which always contains ints *)
77 | if l = 0 then
78 | (t, annotated_exp)
79 | else
80 | (match t with
81 | | Tarray num_dims ->
82 | if num_dims = l && List.for_all (fun x -> x = Tint) ts then
83 | (Tint, annotated_exp)
84 | else if num_dims = l then
85 | type_error ln "array index with non-integer type"
86 | else
87 | type_error ln ("array reference with " ^ string_of_int l ^
88 | " indices, expected " ^ string_of_int num_dims)
89 | | _ ->
90 | type_error ln ("attempt to index non-array variable " ^ show_id i))
91 | | Call (f, args) ->
92 | let (param_types, ret_type) =
93 | try Idmap.find f env.funs
94 | with Not_found ->
95 | type_error ln ("call to undefined function " ^ show_id f)
96 | in
97 | let (ts, es') = List.split (List.map (type_exp ln env) args) in
98 | (* A local function to check that the parameter and argument types match
99 | up. Use n to keep track of which position, for the error message. Assume
100 | that the lists are of the same length. *)
101 | let rec check (n : int) (pts : t list) (ats : t list) : unit =
102 | match (pts, ats) with
103 | | ([], []) -> ()
104 | | (pt::pts, at::ats) ->
105 | if pt = at then check (n+1) pts ats else
106 | type_error ln ("function call given argument of type " ^ show_t at ^
107 | " but expected type " ^ show_t pt ^
108 | " in argument number " ^ string_of_int n)
109 | | _ -> assert false
110 | in
111 | if List.length param_types <> List.length args then
112 | type_error ln ("function call expects " ^
113 | string_of_int (List.length param_types) ^
114 | " arguments, given " ^
115 | string_of_int (List.length ts))
116 | else
117 | check 1 param_types ts;
118 | (ret_type, Call (f, es'))
119 | | Num n -> (Tint, Num n)
120 | | Bool b -> (Tbool, Bool b)
121 | | Op (e1, op, e2) ->
122 | let (t1, e1') = type_exp ln env e1 in
123 | let (t2, e2') = type_exp ln env e2 in
124 | let t =
125 | match (t1, op, t2) with
126 | | (Tbool, (T.And | T.Or | T.Eq), Tbool) -> Tbool
127 | | (Tint, (T.Plus | T.Minus | T.Times | T.Div | T.Lshift | T.BitOr |
128 | T.BitAnd), Tint) ->
129 | Tint
130 | | (Tint, (T.Lt | T.Eq | T.Gt), Tint) -> Tbool
131 | | (t1, _, t2) ->
132 | type_error ln ("operator " ^ T.show_op op ^
133 | " applied to expressions of types " ^ show_t t1 ^
134 | " and " ^ show_t t2)
135 | in
136 | (t, Op (e1', op, e2'))
137 | | Uop (uop, e) ->
138 | let (t, e') = type_exp ln env e in
139 | (match (uop, t) with
140 | | (T.Not, Tbool) -> (Tbool, Uop (uop, e'))
141 | | (T.Not, t) ->
142 | type_error ln ("operator " ^ T.show_uop uop ^
143 | " applied to expression of type " ^ show_t t))
144 | | Array es ->
145 | let (ts, es') = List.split (List.map (type_exp ln env) es) in
146 | let l = List.length ts in
147 | if l = 0 then
148 | type_error ln "attempt to allocate array with 0 dimensions"
149 | else if List.for_all (fun x -> x = Tint) ts then
150 | (Tarray l, Array es')
151 | else
152 | type_error ln "array dimension with non-integer type"
153 |
154 | (* Type check an identifier without array indices *)
155 | let type_simple_ident (ln : int option) (env : env_t) (i : id) : t * id =
156 | match type_exp ln env (Ident (i, [])) with
157 | | (t, Ident (i', [])) -> (t, i')
158 | | _ -> assert false (* type_exp does not change the shape of the expression *)
159 |
160 | (* Type check a statement. Raise BadInput if there is an error. return gives
161 | the return type of the enclosing function. ln gives the current line number.
162 | Return a scope annotated version of the statement. Also strip the location
163 | annotations.
164 | *)
165 | let rec type_stmt (ln : int option) (env :env_t) (return : t) (stmt : stmt)
166 | : stmt =
167 | match stmt with
168 | | In i ->
169 | let (t, i') = type_simple_ident ln env i in
170 | if t <> Tint then
171 | type_error ln "input with non-integer type"
172 | else
173 | In i'
174 | | Out i ->
175 | let (t, i') = type_simple_ident ln env i in
176 | if t <> Tint then
177 | type_error ln "output with non-integer type"
178 | else
179 | Out i'
180 | | Return (Some i) ->
181 | let (t, i') = type_simple_ident ln env i in
182 | if t <> return then
183 | type_error ln ("return has type " ^ show_t t ^
184 | " in a function with return type " ^
185 | show_t return)
186 | else
187 | Return (Some i')
188 | | Return None ->
189 | type_error ln "return without value"
190 | | Assign (x, es, e) ->
191 | (match type_exp ln env (Ident (x, es)) with
192 | | (t1, Ident (x', es')) ->
193 | let (t2, e') = type_exp ln env e in
194 | if t1 <> t2 then
195 | type_error ln ("assignment to variable of type " ^ show_t t1 ^ " with type " ^
196 | show_t t2)
197 | else
198 | Assign (x', es', e')
199 | | _ -> assert false)
200 | | DoWhile (s1, e, s2) ->
201 | let s1' = type_stmt ln env return s1 in
202 | (match type_exp ln env e with
203 | | (Tbool, e') ->
204 | let s2' = type_stmt ln env return s2 in
205 | DoWhile (s1', e', s2')
206 | | _ ->
207 | type_error ln "do/while test of non-bool type")
208 | | Ite (e, s1, s2) ->
209 | (match type_exp ln env e with
210 | | (Tbool, e') ->
211 | let s1' = type_stmt ln env return s1 in
212 | let s2' = type_stmt ln env return s2 in
213 | Ite (e', s1', s2')
214 | | _ -> type_error ln "if test of non-bool type")
215 | | Stmts (s_list) ->
216 | Stmts (List.map (type_stmt ln env return) s_list)
217 | | Loc (s, ln') ->
218 | type_stmt (Some ln') env return s
219 |
220 | let source_typ_to_t (t : SourceAst.typ) : t =
221 | match t with
222 | | Int -> Tint
223 | | Bool -> Tbool
224 | | Array n -> Tarray n
225 |
226 | (* Get the declared types of all of the parameters. Raise an exception if a
227 | duplicate name is found, using the location ln. Accumulate the answer in
228 | param_env. *)
229 | let rec get_param_types (ln : int option) (params : (id * typ) list)
230 | (param_env : (t * scope) Idmap.t)
231 | : (t * scope) Idmap.t =
232 | match params with
233 | | [] -> param_env
234 | | (x,t)::params ->
235 | if Idmap.mem x param_env then
236 | type_error ln ("duplicate function parameter " ^ show_id x)
237 | else
238 | get_param_types ln params
239 | (Idmap.add x (source_typ_to_t t, Parameter) param_env)
240 |
241 | (* Get the declared types of all of the variables. Raise an exception if a
242 | duplicate is found. Accumulate the answer in var_env. *)
243 | let rec get_var_types (s : scope) (vars : var_dec list)
244 | (var_env : (t * scope) Idmap.t)
245 | : (t * scope) Idmap.t =
246 | match vars with
247 | | [] -> var_env
248 | | v::vars ->
249 | if Idmap.mem v.var_name var_env then
250 | type_error v.loc ("duplicate variable definition " ^ show_id v.var_name)
251 | else
252 | get_var_types s vars
253 | (Idmap.add v.var_name (source_typ_to_t v.typ, s) var_env)
254 |
255 | (* Check the init expressions on a variable declaration. Return a
256 | scope-annotated version of the declaration. *)
257 | let type_var_dec (s : scope) (env : env_t) (dec : var_dec) : var_dec =
258 | let (t, init') = type_exp dec.loc env dec.init in
259 | if t = source_typ_to_t dec.typ then
260 | { dec with var_name = add_scope dec.var_name s; init = init' }
261 | else
262 | type_error dec.loc ("initialisation of variable typed " ^
263 | show_t (source_typ_to_t dec.typ) ^
264 | " to expression of type " ^ show_t t)
265 |
266 | (* See the documentation for Map.merge in the OCaml standard library *)
267 | let merge_keep_first _ (x : 'a option) (y : 'a option) : 'a option =
268 | match (x,y) with
269 | | (None, None) -> None
270 | | (Some x, Some _) -> Some x
271 | | (Some x, None) -> Some x
272 | | (None, Some y) -> Some y
273 |
274 | (* Determine whether that each control-flow path through the stmts must include
275 | a return. This way we know that a function cannot fall off the end, without
276 | returning a value of the correct type. *)
277 | let rec check_return_paths (stmts : stmt list) : bool =
278 | match stmts with
279 | | [] -> false
280 | | Return _ :: _ -> true
281 | | s::stmts ->
282 | if check_return_paths stmts then
283 | true
284 | else (* The remainder might not return, so the first statement must *)
285 | match s with
286 | | Return _ -> assert false (* already checked for this case *)
287 | | In _ | Out _ | Assign _ -> false
288 | | Loc (s,_) -> check_return_paths [s]
289 | | Ite (_,s1,s2) ->
290 | check_return_paths [s1] && check_return_paths [s2]
291 | | DoWhile (s1, _, _) ->
292 | check_return_paths [s1]
293 | | Stmts s -> check_return_paths s
294 |
295 | (* Check a function. Return a scope-annotated version is there are no errors. *)
296 | let type_function (env : env_t) (f : func) : func =
297 | let param_env = get_param_types f.loc f.params Idmap.empty in
298 | let local_env = get_var_types Local f.locals Idmap.empty in
299 | (* Add the parameter and locals to the env. Ensure that the locals shadow the
300 | parameters and globals, and that the parameters shadow the globals. *)
301 | let env1 = Idmap.merge merge_keep_first param_env env.vars in
302 | let env2 = Idmap.merge merge_keep_first local_env env1 in
303 | let new_env = { env with vars = env2 } in
304 | let locals' = List.map (type_var_dec Local new_env) f.locals in
305 | let body' =
306 | List.map (type_stmt None new_env (source_typ_to_t f.ret)) f.body
307 | in
308 | if not (check_return_paths f.body) then
309 | type_error f.loc "function might not return"
310 | else
311 | { f with
312 | params = List.map (fun (i,t) -> (add_scope i Parameter, t)) f.params;
313 | locals = locals';
314 | body = body' }
315 |
316 | (* Get the declared types of all of the functions. Raise an exception if a
317 | duplicate is found. Accumulate the answer in fun_env. *)
318 | let rec get_function_types (funcs : func list) (fun_env : (t list * t) Idmap.t)
319 | : (t list * t) Idmap.t =
320 | match funcs with
321 | | [] -> fun_env
322 | | f::funcs ->
323 | if Idmap.mem f.fun_name fun_env then
324 | type_error f.loc ("duplicate function definition " ^ show_id f.fun_name)
325 | else
326 | get_function_types funcs
327 | (Idmap.add f.fun_name
328 | (List.map (fun (_,t) -> source_typ_to_t t) f.params,
329 | source_typ_to_t f.ret)
330 | fun_env)
331 |
332 | let type_prog (p : prog) : prog =
333 | (* Get the types of the globals and functions. *)
334 | let env =
335 | { funs = get_function_types p.funcs Idmap.empty;
336 | vars = get_var_types Global p.globals Idmap.empty }
337 | in
338 | (* Check the function bodies and global initialisations in the environment
339 | with types for all of the globals and functions. This means that the
340 | top-level is one recursive scope and each function body and global
341 | initialisation can refer to any of the others (or itself). *)
342 | let globals' = List.map (type_var_dec Global env) p.globals in
343 | let funcs' = List.map (type_function env) p.funcs in
344 | { funcs = funcs'; globals = globals' }
345 |
--------------------------------------------------------------------------------
/src/instrSelX86.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* Convert linearised three-address code to x86-64 *)
20 |
21 | exception TODO of string
22 |
23 | open BlockStructure
24 | open X86
25 | module L = LineariseCfg
26 | module T = Tokens
27 |
28 | let tok_to_binop t =
29 | match t with
30 | | T.Plus -> Zadd
31 | | T.Minus -> Zsub
32 | | T.Lshift -> Zshl
33 | | T.BitOr -> Zor
34 | | T.BitAnd -> Zand
35 | | _ -> assert false
36 |
37 | (* Save RSP and RBP for stack stuff,
38 | save RAX and RDX for scratch, and index RAX by -1
39 | RAX is chosen because division needs it. Alas, it also needs RDX, which is
40 | used in the calling convention, so we keep R10 as scratch instead. *)
41 | let reg_numbers =
42 | [(-1, RAX);
43 | (0, RBX);
44 | (1, RCX);
45 | (2, RDX);
46 | (3, RSI);
47 | (4, RDI);
48 | (5, R8);
49 | (6, R9);
50 | (7, R11);
51 | (8, R12);
52 | (9, R13);
53 | (10, R14);
54 | (11, R15)]
55 | let reg_numbers_flip =
56 | List.map (fun (x,y) -> (y,x)) reg_numbers
57 |
58 | let reg_list = [RDI; RSI; RDX; RCX; R8; R9]
59 | let num_regs = List.length reg_numbers - 1
60 | let argument_reg_numbers =
61 | List.map (fun r -> (List.assoc r reg_numbers_flip)) reg_list
62 |
63 | (* Assume that we have kept RAX and R10 free for scratch space *)
64 | let r_scratch = RAX
65 | let r_scratch2 = R10
66 |
67 | (* Convert a variable, which can be a register, global or stack slot, to a rm *)
68 | let var_to_rm (v : var) : rm =
69 | match v with
70 | | Vreg i -> Zr (List.assoc i reg_numbers)
71 | | Stack i -> Zm (None, Some RBP, Some (Concrete_disp (Int64.of_int (-8 * (i+1)))))
72 | | Global g -> Zm (None, None, Some (Label_disp g))
73 | | n -> raise (Util.InternalError ("named variable in instrSelX86: " ^ show_var n))
74 |
75 | let rm_rm_to_dest_src (dest_rm : rm) (src_rm : rm)
76 | : instruction list * dest_src =
77 | match (dest_rm, src_rm) with
78 | | (Zr r, _) -> ([], Zr_rm (r, src_rm))
79 | | (_, Zr r) -> ([], Zrm_r (dest_rm, r))
80 | | (Zm _, Zm _) ->
81 | ([Zmov (Zr_rm (r_scratch, src_rm))], Zrm_r (dest_rm, r_scratch))
82 |
83 | let rm_ae_to_dest_src (dest_rm : rm) (src_ae : atomic_exp)
84 | : instruction list * dest_src =
85 | match src_ae with
86 | | Num i -> ([], Zrm_i (dest_rm, i))
87 | | Ident src_var -> rm_rm_to_dest_src dest_rm (var_to_rm src_var)
88 |
89 | (* Convert a heap reference, which is a variable to be deferenced, offset by
90 | another variable or immediate *)
91 | let heap_to_rm (base : var) (offset : atomic_exp) : instruction list * rm =
92 | match (base, offset) with
93 | (* Simple addresses *)
94 | | (Vreg b, Num o) ->
95 | ([], Zm (None, Some (List.assoc b reg_numbers), Some (Concrete_disp o)))
96 | | ((Stack _ | Global _ ) as v, Num o) ->
97 | ([Zmov (Zr_rm (r_scratch2, var_to_rm v))],
98 | Zm (None, Some r_scratch2, Some (Concrete_disp o)))
99 | | (Vreg b, Ident (Vreg o)) ->
100 | ([],
101 | Zm (Some (1, List.assoc o reg_numbers),
102 | Some (List.assoc b reg_numbers),
103 | None))
104 | | ((Stack _ | Global _) as v, Ident (Vreg r))
105 | (* 1 indirection *)
106 | | (Vreg r, Ident ((Stack _ | Global _) as v)) ->
107 | ([Zmov (Zr_rm (r_scratch2, var_to_rm v))],
108 | Zm (Some (1, List.assoc r reg_numbers),
109 | Some r_scratch2,
110 | None))
111 | (* 2 indirections *)
112 | | (((Stack _ | Global _) as v1), Ident ((Stack _ | Global _) as v2)) ->
113 | ([Zmov (Zr_rm (r_scratch2, var_to_rm v1));
114 | Zbinop (Zadd, Zr_rm (r_scratch2, var_to_rm v2))],
115 | Zm (None,
116 | Some r_scratch2,
117 | None))
118 | | ((NamedSource _ | NamedTmp _), _) ->
119 | raise (Util.InternalError "Named variables in instrSelX86")
120 | | (_, Ident (NamedSource _ | NamedTmp _)) ->
121 | raise (Util.InternalError "Named variables in instrSelX86")
122 |
123 | (* Build the operation for r := r op ae *)
124 | let build_to_reg_op (op : Tokens.op) (r : reg) (ae : atomic_exp)
125 | : instruction list =
126 | match (op, ae) with
127 | | ((T.Plus | T.Minus | T.Lshift | T.BitOr | T.BitAnd), Num i) ->
128 | [Zbinop (tok_to_binop op, Zrm_i (Zr r, i))]
129 | | ((T.Plus | T.Minus | T.Lshift | T.BitOr | T.BitAnd), Ident v) ->
130 | [Zbinop (tok_to_binop op, Zr_rm (r, var_to_rm v))]
131 | | (T.Times, Num i) ->
132 | [Zimul (r, Zr r, Some i)]
133 | | (T.Times, Ident v) ->
134 | [Zimul (r, var_to_rm v, None)]
135 | | (T.Div, _) ->
136 | [Zmov (Zr_rm (r_scratch2, Zr RDX));
137 | Zmov (Zrm_i (Zr RDX, 0L));
138 | Zmov (Zr_rm (RAX, Zr r));
139 | (match ae with
140 | | Ident v -> Zidiv (var_to_rm v)
141 | | Num _ ->
142 | raise (Util.InternalError "Division by immediate constant"));
143 | Zmov (Zr_rm (r, Zr RAX));
144 | Zmov (Zr_rm (RDX, Zr r_scratch2))]
145 | | ((T.Lt | T.Gt | T.Eq), _) ->
146 | assert false
147 | | ((T.And | T.Or), _) ->
148 | assert false
149 |
150 | let reverse_op op =
151 | match op with
152 | | T.Gt -> T.Lt
153 | | T.Lt -> T.Gt
154 | | T.Eq -> T.Eq
155 | | _ -> assert false
156 |
157 | let op_to_cond op =
158 | match op with
159 | | T.Gt -> Z_G
160 | | T.Lt -> Z_L
161 | | T.Eq -> Z_E
162 | | _ -> assert false
163 |
164 | (* ------------ Calling convention stuff ------------ *)
165 |
166 | (* Don't save RAX since it is our scratch. Remember to do an even number before
167 | a call for alignment, which must be 16-bytes at (external) function calls.
168 | This is why we also push R10, even though it is scratch too. *)
169 | let caller_save =
170 | [Zpush (Zi_rm (Zr RCX));
171 | Zpush (Zi_rm (Zr RDX));
172 | Zpush (Zi_rm (Zr RSI));
173 | Zpush (Zi_rm (Zr RDI));
174 | Zpush (Zi_rm (Zr R8));
175 | Zpush (Zi_rm (Zr R9));
176 | Zpush (Zi_rm (Zr R10));
177 | Zpush (Zi_rm (Zr R11))]
178 |
179 | let caller_restore =
180 | [Zpop (Zr R11);
181 | Zpop (Zr R10);
182 | Zpop (Zr R9);
183 | Zpop (Zr R8);
184 | Zpop (Zr RDI);
185 | Zpop (Zr RSI);
186 | Zpop (Zr RDX);
187 | Zpop (Zr RCX)]
188 |
189 | let callee_save =
190 | [Zpush (Zi_rm (Zr RBX));
191 | Zpush (Zi_rm (Zr R12));
192 | Zpush (Zi_rm (Zr R13));
193 | Zpush (Zi_rm (Zr R14));
194 | Zpush (Zi_rm (Zr R15))]
195 |
196 | let callee_restore =
197 | [Zpop (Zr R15);
198 | Zpop (Zr R14);
199 | Zpop (Zr R13);
200 | Zpop (Zr R12);
201 | Zpop (Zr RBX)]
202 |
203 | (* The order that the calling convention puts arguments into registers *)
204 | let reg_list = [RDI; RSI; RDX; RCX; R8; R9]
205 |
206 | (* Where the saved versions of the argument registers are on the stack. Depends
207 | on the order in caller_save. This is relative to RSP. *)
208 | let stack_reg_to_offset r =
209 | match r with
210 | | RDI -> 32L
211 | | RSI -> 40L
212 | | RDX -> 48L
213 | | RCX -> 56L
214 | | R8 -> 24L
215 | | _ -> assert false
216 |
217 | (* Move the value of ae into the register dest_r for argument passing. If ae
218 | refers to any regs in overwritten_regs, which contains the registers
219 | overwitten already for previous arguments, then use the stored version on
220 | the stack. *)
221 | let setup_arg (overwritten_regs : reg list) (dest_r : reg) (ae : atomic_exp)
222 | : dest_src option =
223 | match ae with
224 | | Num n -> Some (Zrm_i (Zr dest_r, n))
225 | | Ident (Stack i) -> Some (Zr_rm (dest_r, var_to_rm (Stack i)))
226 | | Ident (Vreg src_r) ->
227 | let src_r = List.assoc src_r reg_numbers in
228 | if src_r = dest_r then
229 | None
230 | else if List.mem src_r overwritten_regs then
231 | Some (Zr_rm (dest_r,
232 | Zm (None, Some RSP, Some (Concrete_disp (stack_reg_to_offset src_r)))))
233 | else
234 | Some (Zr_rm (dest_r, Zr src_r))
235 | | Ident (Global i) -> Some (Zr_rm (dest_r, var_to_rm (Global i)))
236 | | Ident ((NamedSource _ | NamedTmp _) as v) ->
237 | raise (Util.InternalError ("Named variable in instrSelX86: " ^ show_var v))
238 |
239 | (* Move the values of aes into the registers and then the stack for argument
240 | passing. If ae refers to any regs in overwritten_regs, which contains the
241 | registers overwitten already for previous arguments, then use the stored
242 | version on the stack. *)
243 | let rec setup_args (aes : atomic_exp list) (remaining_regs : reg list)
244 | (overwritten_regs : reg list) : instruction list =
245 | match (aes, remaining_regs) with
246 | | ([], _) -> []
247 | | (a :: aes, next :: regs) ->
248 | (match setup_arg overwritten_regs next a with
249 | | None -> []
250 | | Some arg -> [Zmov arg])
251 | @
252 | setup_args aes regs (next :: overwritten_regs)
253 | | _ ->
254 | raise (TODO "InstrSelX86 does not support function calls with more than 6 arguments")
255 |
256 | (* --------------- End calling convention ------------- *)
257 |
258 | let op_to_cc b op =
259 | match (b, op) with
260 | | (true, Lt) ->
261 | Z_L
262 | | (false, Lt) ->
263 | Z_NL
264 | | (true, Gt) ->
265 | Z_G
266 | | (false, Gt) ->
267 | Z_NG
268 | | (true, Eq) ->
269 | Z_E
270 | | (false, Eq) ->
271 | Z_NE
272 |
273 | let reverse_op2 op =
274 | match op with
275 | | Gt -> Lt
276 | | Lt -> Gt
277 | | Eq -> Eq
278 |
279 | (* Return a boolean true if the condition needs to be negated *)
280 | let test_to_x86 ae1 op ae2 b (label : string) : instruction list =
281 | match (ae1, ae2) with
282 | | (Ident i, _) ->
283 | let (instrs, dest_src) = rm_ae_to_dest_src (var_to_rm i) ae2 in
284 | instrs @
285 | [Zbinop (Zcmp, dest_src);
286 | Zjcc (op_to_cc b op, label)]
287 | | (_, Ident i) ->
288 | let (instrs, dest_src) = rm_ae_to_dest_src (var_to_rm i) ae1 in
289 | instrs @
290 | [Zbinop (Zcmp, dest_src);
291 | Zjcc (op_to_cc b (reverse_op2 op), label)]
292 | | (Num n1, Num n2) ->
293 | let do_jump =
294 | match op with
295 | | Gt -> (Int64.compare n1 n2 > 0) = b
296 | | Lt -> (Int64.compare n1 n2 < 0) = b
297 | | Eq -> (Int64.compare n1 n2 = 0) = b
298 | in
299 | if do_jump then
300 | [Zjcc (Z_ALWAYS, label)]
301 | else
302 | []
303 |
304 | let rec be_to_x86 safe be : instruction list =
305 | match be with
306 | | AssignOp (v, Num imm, ((T.Lt | T.Gt | T.Eq) as op), ae2) ->
307 | (* constant prop ensures both aren't immediate *)
308 | be_to_x86 safe (AssignOp (v, ae2, reverse_op op, Num imm))
309 | | AssignOp (v, Ident v2, ((T.Lt | T.Gt | T.Eq) as op), ae2) ->
310 | let (instrs, cmp_arg) = rm_ae_to_dest_src (var_to_rm v2) ae2 in
311 | let cmp_instrs = instrs @ [Zbinop (Zcmp, cmp_arg)] in
312 | (match var_to_rm v with
313 | | Zm _ as m ->
314 | cmp_instrs @
315 | [Zset (op_to_cond op, B r_scratch);
316 | Zbinop (Zand, Zrm_i (Zr r_scratch, 1L));
317 | Zmov (Zrm_r (m, r_scratch))]
318 | | Zr r ->
319 | cmp_instrs @
320 | [Zset (op_to_cond op, B r);
321 | Zbinop (Zand, Zrm_i (Zr r, 1L))])
322 | | AssignOp (v, ae1, ((T.Plus | T.Minus | T.Lshift | T.BitOr
323 | | T.BitAnd | T.Times | T.Div) as op), ae2) ->
324 | (match (var_to_rm v, ae1) with
325 | | (Zr r1, Num imm2) ->
326 | (* r1 := imm2 op m/r3 --> mov r1, imm2; op r1, m/r3 *)
327 | Zmov (Zrm_i (Zr r1, imm2)) :: build_to_reg_op op r1 ae2
328 | | (Zr r1, Ident var) ->
329 | (* r1 := m/r2 op m/r3/imm3 --> mov r1, m/r2; op r1, m/r3/imm3 *)
330 | Zmov (Zr_rm (r1, var_to_rm var)) :: build_to_reg_op op r1 ae2
331 | | (Zm _ as m1, Num imm2) ->
332 | (* m1 := imm2 op m/r3 -->
333 | mov r_scratch, imm2; op r_scratch, m/r3; mov m1, r_scratch *)
334 | Zmov (Zrm_i (Zr r_scratch, imm2)) ::
335 | build_to_reg_op op r_scratch ae2 @
336 | [Zmov (Zrm_r (m1, r_scratch))]
337 | | (Zm _ as m1, Ident var) ->
338 | (* m1 := m/r2 op m/r3/imm3 -->
339 | mov r_scratch, m/r2; op r_scratch, m/r3/imm3; mov m1, r_scratch *)
340 | Zmov (Zr_rm (r_scratch, var_to_rm var)) ::
341 | build_to_reg_op op r_scratch ae2 @
342 | [Zmov (Zrm_r (m1, r_scratch))])
343 | | AssignOp (_, _, (T.And | T.Or), _) ->
344 | raise (Util.InternalError "And/Or in instrSelX86")
345 | | AssignAtom (v, ae) ->
346 | let (instrs, mov_arg) = rm_ae_to_dest_src (var_to_rm v) ae in
347 | instrs @ [Zmov mov_arg]
348 | | Ld (v1, v2, ae) ->
349 | let (instrs, src_rm) = heap_to_rm v2 ae in
350 | let (instrs2, dest_src) = rm_rm_to_dest_src (var_to_rm v1) src_rm in
351 | instrs @
352 | instrs2 @
353 | [Zmov dest_src]
354 | | St (v, ae1, ae2) ->
355 | let (instrs, dest_rm) = heap_to_rm v ae1 in
356 | let (instrs2, dest_src) = rm_ae_to_dest_src dest_rm ae2 in
357 | instrs @
358 | instrs2 @
359 | [Zmov dest_src]
360 | | Call (v, f, aes) ->
361 | let alloc_name = f in
362 | caller_save @
363 | setup_args aes reg_list [] @
364 | [Zcall alloc_name] @
365 | caller_restore @
366 | (match v with
367 | | None -> []
368 | | Some v -> [Zmov (Zrm_r (var_to_rm v, r_scratch))])
369 | | BoundCheck (a1, a2) ->
370 | if safe then
371 | test_to_x86 a1 Lt (Num 0L) true "bound_error" @
372 | test_to_x86 a1 Lt a2 false "bound_error"
373 | else
374 | []
375 | | NullCheck v ->
376 | if safe then
377 | test_to_x86 (Ident v) Eq (Num 0L) true "null_error"
378 | else
379 | []
380 |
381 | (* Translate a function body to x86. safe determines whether to do null and
382 | bounds checks *)
383 | let to_x86 (safe : bool) (ll : L.linear list) (num_stack : int)
384 | : instruction list =
385 | (* We have to keep RSP 16 byte aligned, add a qword if necessary *)
386 | let num_stack =
387 | if num_stack mod 2 = 0 then
388 | (* We need to do an odd number of pushes over all, since the call has
389 | pushed rip for us. There are an even number of callee saved registers,
390 | including rbp. *)
391 | num_stack + 1
392 | else
393 | num_stack
394 | in
395 | (* Save the old base pointer *)
396 | [Zpush (Zi_rm (Zr RBP))] @
397 | (* Set a new base pointer *)
398 | [Zmov (Zr_rm (RBP, Zr RSP))] @
399 | (* Extend the stack for the local variables *)
400 | [Zbinop (Zsub, Zrm_i (Zr RSP, Int64.mul (Int64.of_int num_stack) 8L))] @
401 | (* Save other the callee save registers, odd number *)
402 | callee_save @
403 | List.flatten
404 | (List.map
405 | (fun l ->
406 | match l with
407 | | L.Instr be -> be_to_x86 safe be
408 | | L.Return None -> callee_restore @ [Zleave; Zret]
409 | | L.Return (Some v) -> Zmov (Zr_rm (RAX, var_to_rm v)) :: callee_restore @ [Zleave; Zret]
410 | | L.CJump ((ae1, op, ae2), b, s) ->
411 | test_to_x86 ae1 op ae2 b s
412 | | L.Jump s ->
413 | [Zjcc (Z_ALWAYS, s)]
414 | | L.Label s ->
415 | [Zlabel s])
416 | ll)
417 |
--------------------------------------------------------------------------------
/src/blockStructure.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* A control flow graph representation with basic blocks, and the source AST ->
20 | CGF algorithm. Also compiles arrays to loads and stores. *)
21 |
22 | open Util
23 | module S = SourceAst
24 |
25 | type var =
26 | | Vreg of int
27 | | Stack of int
28 | | Global of string
29 | | NamedSource of string * SourceAst.scope
30 | | NamedTmp of string * int
31 |
32 | let var_cmp_index (v : var) : int =
33 | match v with
34 | | Vreg _ -> 0
35 | | Stack _ -> 1
36 | | Global _ -> 2
37 | | NamedSource _ -> 3
38 | | NamedTmp _ -> 4
39 |
40 | let compare_var v1 v2 =
41 | match (v1, v2) with
42 | | (Vreg i1, Vreg i2) -> compare i1 i2
43 | | (Stack i1, Stack i2) -> compare i1 i2
44 | | (Global s1, Global s2) -> String.compare s1 s2
45 | | (NamedSource (s1, scope1), NamedSource (s2, scope2)) ->
46 | let c = SourceAst.compare_scope scope1 scope2 in
47 | if c = 0 then
48 | String.compare s1 s2
49 | else
50 | c
51 | | (NamedTmp (s1, i1), NamedTmp (s2, i2)) ->
52 | let c = String.compare s1 s2 in
53 | if c = 0 then
54 | compare i1 i2
55 | else
56 | c
57 | | _ -> compare (var_cmp_index v1) (var_cmp_index v2)
58 |
59 | let show_var v =
60 | match v with
61 | | Vreg i -> "_r_" ^ string_of_int i
62 | | Stack i -> "_s_" ^ string_of_int i
63 | | Global i -> "_g_" ^ i
64 | | NamedSource (s,_) -> s
65 | | NamedTmp (s, i) -> "_tmp_" ^ s ^ string_of_int i
66 |
67 | let pp_var fmt v =
68 | Format.fprintf fmt "%s" (show_var v)
69 |
70 | module VarCmp = struct
71 | type t = var
72 | let compare = compare_var
73 | end
74 |
75 | module Varset' = Set.Make(VarCmp)
76 |
77 | module Varset = struct
78 | include Varset'
79 | let show s = show_list show_var (elements s)
80 | let pp fmt s = Format.fprintf fmt "%a" (pp_set pp_var) (elements s)
81 | end
82 |
83 | module Varmap = Map.Make(VarCmp)
84 |
85 | (* Atomic expressions *)
86 | type atomic_exp =
87 | | Ident of var
88 | | Num of int64
89 |
90 | let show_atomic_exp ae =
91 | match ae with
92 | | Ident v -> show_var v
93 | | Num i -> Int64.to_string i
94 |
95 | let pp_atomic_exp fmt ae =
96 | Format.fprintf fmt "%s" (show_atomic_exp ae)
97 |
98 | (* A single entry in a basic block *)
99 | type block_elem =
100 | | AssignOp of var * atomic_exp * Tokens.op * atomic_exp
101 | | AssignAtom of var * atomic_exp
102 | (* Ld (x,y,e) represents x := *(y+e) *)
103 | | Ld of var * var * atomic_exp
104 | (* St (x,e1,e2) represents *(x+e1) := e2 *)
105 | | St of var * atomic_exp * atomic_exp
106 | (* Call (x, f, aes) represents x := f(aes) *)
107 | | Call of var option * string * atomic_exp list
108 | (* BoundCheck (a1, a2) represents assert (a1 >= 0 && a1 < a2) *)
109 | | BoundCheck of atomic_exp * atomic_exp
110 | (* NullCheck v represents assert (v <> 0) *)
111 | | NullCheck of var
112 |
113 | let pp_block_elem fmt be =
114 | match be with
115 | | AssignOp (v, ae1, op, ae2) ->
116 | Format.fprintf fmt "%a := %a %s %a"
117 | pp_var v
118 | pp_atomic_exp ae1
119 | (Tokens.show_op op)
120 | pp_atomic_exp ae2
121 | | AssignAtom (v, ae) ->
122 | Format.fprintf fmt "%a := %a"
123 | pp_var v
124 | pp_atomic_exp ae
125 | | Ld (v1, v2, ae) ->
126 | Format.fprintf fmt "%a := *(%a+%a)"
127 | pp_var v1
128 | pp_var v2
129 | pp_atomic_exp ae
130 | | St (v, ae1, ae2) ->
131 | Format.fprintf fmt "*(%a+%a) := %a"
132 | pp_var v
133 | pp_atomic_exp ae1
134 | pp_atomic_exp ae2
135 | | Call (Some v, x, aes) ->
136 | Format.fprintf fmt "%a := %s%a"
137 | pp_var v
138 | x
139 | (pp_list pp_atomic_exp) aes
140 | | Call (None, x, aes) ->
141 | Format.fprintf fmt "%s%a"
142 | x
143 | (pp_list pp_atomic_exp) aes
144 | | BoundCheck (a1, a2) ->
145 | Format.fprintf fmt "assert (%a >= 0 && %a < %a)"
146 | pp_atomic_exp a1
147 | pp_atomic_exp a1
148 | pp_atomic_exp a2
149 | | NullCheck v ->
150 | Format.fprintf fmt "assert (%a <> 0)"
151 | pp_var v
152 |
153 |
154 | let show_block_elem be =
155 | pp_block_elem Format.str_formatter be;
156 | Format.flush_str_formatter ()
157 |
158 | type basic_block = block_elem list
159 |
160 | type test_op =
161 | | Lt
162 | | Gt
163 | | Eq
164 |
165 | let pp_test_op fmt op =
166 | match op with
167 | | Lt -> Format.fprintf fmt "<"
168 | | Gt -> Format.fprintf fmt ">"
169 | | Eq -> Format.fprintf fmt "="
170 |
171 | type test = atomic_exp * test_op * atomic_exp
172 |
173 | let pp_test fmt (ae1, op, ae2) =
174 | Format.fprintf fmt "%a %a %a"
175 | pp_atomic_exp ae1
176 | pp_test_op op
177 | pp_atomic_exp ae2
178 |
179 | (* A basic block is either at the end of the function, returning a var, or
180 | there is an unconditional jump out of it, or a branch out of it to the
181 | blocks indexed by the int. *)
182 | type next_block =
183 | | Return of var option
184 | | Next of int
185 | (* The first int is the block number if the ident is true, and the second if
186 | * it is false *)
187 | | Branch of test * int * int
188 |
189 | let pp_next_block fmt nb =
190 | match nb with
191 | | Return (Some v) -> Format.fprintf fmt "return %a" pp_var v
192 | | Return None -> Format.fprintf fmt "return"
193 | | Next i -> Format.fprintf fmt "B%d" i
194 | | Branch (t,i1,i2) -> Format.fprintf fmt "if@ %a@ then@ B%d@ else@ B%d"
195 | pp_test t
196 | i1
197 | i2
198 |
199 | (* An adjacency list representation for the CFG *)
200 | type cfg_entry =
201 | { bnum : int; elems : block_elem list; next : next_block;
202 | mutable started : bool; mutable finished : bool }
203 |
204 | let pp_cfg_entry fmt e =
205 | Format.fprintf fmt "@[[B%d:@ %a@ %a]@]"
206 | e.bnum
207 | (pp_list pp_block_elem) e.elems
208 | pp_next_block e.next
209 |
210 | type cfg = cfg_entry list
211 |
212 | let pp_cfg fmt es = pp_list pp_cfg_entry fmt es
213 |
214 | let cfg_to_graphviz fmt (cfg : cfg) : unit =
215 | Format.fprintf fmt "digraph {@\nnode[shape=box]@\n%a@\n}"
216 | (fun fmt cfg ->
217 | List.iter
218 | (fun entry ->
219 | Format.fprintf fmt "%d[label=\"%a\"]"
220 | entry.bnum
221 | (fun fmt e ->
222 | if entry.bnum = 0 then
223 | Format.fprintf fmt "ENTRY\\l"
224 | else
225 | ();
226 | List.iter (fun x -> Format.fprintf fmt "%s\\l" (show_block_elem x))
227 | e.elems;
228 | match e.next with
229 | | Branch (t,_,_) -> Format.fprintf fmt "%a\\l" pp_test t
230 | | Return None -> Format.fprintf fmt "RETURN\\l"
231 | | Return (Some i) -> Format.fprintf fmt "RETURN %s\\l" (show_var i)
232 | | _ -> ())
233 | entry;
234 | Format.fprintf fmt "@\n";
235 | match entry.next with
236 | | Return _ -> ()
237 | | Next i ->
238 | Format.fprintf fmt "%d->%d@\n" entry.bnum i
239 | | Branch (_, i1, i2) ->
240 | Format.fprintf fmt "%d->%d[label=1]@\n" entry.bnum i1;
241 | Format.fprintf fmt "%d->%d[label=0]@\n" entry.bnum i2)
242 | cfg)
243 | cfg
244 |
245 | let id_to_var (i : S.id) : var =
246 | match i with
247 | | S.Source (s, None) ->
248 | raise (InternalError ("un-scoped source identifier in blockStructure: " ^
249 | S.show_id (S.Source (s, None))))
250 | | S.Source (s, Some scope) -> NamedSource (s, scope)
251 | | S.Temp (s, i) -> NamedTmp (s, i)
252 |
253 | let bool_to_num b =
254 | if b then Num 1L else Num 0L
255 |
256 | (* Convert an amomic source expression *)
257 | let exp_to_atomic (e : S.exp) : atomic_exp =
258 | match e with
259 | | S.Ident (id, []) -> Ident (id_to_var id)
260 | | S.Num n -> Num n
261 | | S.Bool b -> bool_to_num b
262 | | S.Ident (_, _::_) | S.Op _ | S.Uop _ | S.Array _ | S.Call _ ->
263 | raise (InternalError "non-flat expression in blockStructure")
264 |
265 | let tmp_var = NamedTmp("BS", 0)
266 |
267 | (* Convert x := e into block elements, returned in *reverse* order *)
268 | let flat_e_to_assign (x : S.id) (e : S.exp) : block_elem list =
269 | let v = id_to_var x in
270 | match e with
271 | | S.Ident (id, []) ->
272 | [AssignAtom (v, Ident (id_to_var id))]
273 | | S.Ident (id, [ae]) ->
274 | let ae = exp_to_atomic ae in
275 | (* v := id[ae] --> tmp_var := *id;
276 | assert ae < length_var;
277 | v := *(id + (ae+1) * 8) *)
278 | let get_len = Ld (tmp_var, id_to_var id, Num 0L) in
279 | (match ae with
280 | | Num n ->
281 | [Ld (v, id_to_var id, Num (Int64.shift_left (Int64.add n 1L) 3));
282 | BoundCheck (ae, Ident tmp_var);
283 | get_len;
284 | NullCheck (id_to_var id)]
285 | | _ ->
286 | [Ld (v, id_to_var id, Ident tmp_var);
287 | AssignOp (tmp_var, Ident tmp_var, Tokens.Lshift, Num 3L);
288 | AssignOp (tmp_var, ae, Tokens.Plus, Num 1L);
289 | BoundCheck (ae, Ident tmp_var);
290 | get_len;
291 | NullCheck (id_to_var id)])
292 | | S.Ident (_, _::_::_) ->
293 | raise (InternalError "multi-dimension array index in blockStructure")
294 | | S.Num n -> [AssignAtom (v, Num n)]
295 | | S.Bool b -> [AssignAtom (v, bool_to_num b)]
296 | | S.Op (ae1, op, ae2) ->
297 | [AssignOp (v, exp_to_atomic ae1, op, exp_to_atomic ae2)]
298 | | S.Uop (Tokens.Not, ae) ->
299 | (* !x == (x = false) *)
300 | [AssignOp (v, exp_to_atomic ae, Tokens.Eq, Num 0L)]
301 | | S.Array es ->
302 | [Call (Some v, "allocate" ^ string_of_int (List.length es),
303 | List.map exp_to_atomic es)]
304 | | S.Call (f, es) ->
305 | [Call (Some v, S.show_id f, List.map exp_to_atomic es)]
306 |
307 | let op_to_test_op op =
308 | match op with
309 | | Tokens.Lt -> Lt
310 | | Tokens.Gt -> Gt
311 | | Tokens.Eq -> Eq
312 | | _ -> raise (InternalError "non-test operator in test in blockStructure")
313 |
314 | let flat_exp_to_test (e : S.exp) : test =
315 | match e with
316 | | S.Ident (id, []) ->
317 | (Ident (id_to_var id), Eq, Num 1L)
318 | | S.Ident (_, _::_) ->
319 | raise (InternalError "array index in test position in blockStructure")
320 | | S.Num _ ->
321 | raise (InternalError "number in test position in blockStructure")
322 | | S.Bool b ->
323 | (bool_to_num b, Eq, Num 1L)
324 | | S.Op (ae1, op, ae2) ->
325 | (exp_to_atomic ae1, op_to_test_op op, exp_to_atomic ae2)
326 | | S.Uop (Tokens.Not, ae) ->
327 | (* !x == (x = false) *)
328 | (exp_to_atomic ae, Eq, Num 0L)
329 | | S.Array _ ->
330 | raise (InternalError "array alloc test in blockStructure")
331 | | S.Call _ ->
332 | raise (InternalError "function call test in blockStructure")
333 |
334 | (* Build the control-flow graph *)
335 | let build_cfg (stmts : S.stmt list) : cfg =
336 | (* A counter to get new indices for blocks *)
337 | let next_block_num = ref 0 in
338 | let get_block_num () =
339 | let x = !next_block_num in
340 | next_block_num := 1 + !next_block_num;
341 | x
342 | in
343 |
344 | (* Store the cfg here as we build it *)
345 | let the_cfg = ref [] in
346 | let add_block (num : int) (block : basic_block) (next : next_block) : unit =
347 | the_cfg := { bnum = num; elems = List.rev block; next = next;
348 | started = false; finished = false} :: !the_cfg
349 | in
350 |
351 | (* Convert stmts to basic blocks, and add them to the_cfg.
352 | block_num is the index for the block being created.
353 | block_acc contains the elements seen so far, in *reverse* order.
354 | ret_block is the control flow out of the block being created.
355 |
356 | The AST must be in a restricted form:
357 | - The expressions must all be unnested
358 | (i.e. UnnestExp.is_flat returns true).
359 | - && and || must have been removed.
360 | - Multi-dimensional array indexing must have been removed.
361 | - No statements can follow a return
362 | - All paths must terminate in a return *)
363 | let rec find_blocks (block_num : int) (block_acc : basic_block)
364 | (following_block : next_block) (stmts : S.stmt list) : unit =
365 | match stmts with
366 | | [] -> add_block block_num block_acc following_block
367 | | S.Assign (x, [], e) :: s1 ->
368 | find_blocks block_num (flat_e_to_assign x e @ block_acc) following_block s1
369 | | S.Assign (x, [ae], e) :: s1 ->
370 | let ae = exp_to_atomic ae in
371 | (* x[ae] := e --> tmp_var := *x;
372 | assert ae < length_var;
373 | *(x + (ae+1) * 8) := e *)
374 | let get_len = Ld (tmp_var, id_to_var x, Num 0L) in
375 | let new_block_elems =
376 | (match ae with
377 | | Num n ->
378 | [St (id_to_var x, Num (Int64.shift_left (Int64.add n 1L) 3),
379 | exp_to_atomic e);
380 | BoundCheck (ae, Ident tmp_var);
381 | get_len;
382 | NullCheck (id_to_var x)]
383 | | _ ->
384 | [St (id_to_var x, Ident tmp_var, exp_to_atomic e);
385 | AssignOp (tmp_var, Ident tmp_var, Tokens.Lshift, Num 3L);
386 | AssignOp (tmp_var, ae, Tokens.Plus, Num 1L);
387 | BoundCheck (ae, Ident tmp_var);
388 | get_len;
389 | NullCheck (id_to_var x)])
390 | in
391 | find_blocks block_num (new_block_elems @ block_acc) following_block s1
392 | | S.Assign (_, _::_::_, _) :: _ ->
393 | raise (InternalError "multi-dimension array index in blockStructure")
394 | | S.Stmts s1 :: s2 ->
395 | (* Treat { s1 ... sn } s1' ... sn' as though it were
396 | s1 ... sn s1' ... sn' *)
397 | find_blocks block_num block_acc following_block (s1 @ s2)
398 | | S.DoWhile (s0, e, s1) :: s2 ->
399 | let header_block_n = get_block_num () in
400 | let body_block_n = get_block_num () in
401 | add_block block_num block_acc (Next header_block_n);
402 | find_blocks body_block_n [] (Next header_block_n) [s1];
403 | (match (s2, following_block) with
404 | | ([], Next i) ->
405 | find_blocks header_block_n []
406 | (Branch (flat_exp_to_test e, body_block_n, i))
407 | [s0]
408 | | _ ->
409 | let following_block_n = get_block_num () in
410 | find_blocks header_block_n []
411 | (Branch (flat_exp_to_test e, body_block_n, following_block_n))
412 | [s0];
413 | find_blocks following_block_n [] following_block s2)
414 | | S.Ite (e, s1, s2) :: s3 ->
415 | let true_block_n = get_block_num () in
416 | let false_block_n = get_block_num () in
417 | add_block block_num block_acc
418 | (Branch (flat_exp_to_test e, true_block_n, false_block_n));
419 | if s3 = [] then
420 | (find_blocks true_block_n [] following_block [s1];
421 | find_blocks false_block_n [] following_block [s2])
422 | else
423 | let following_block_n = get_block_num () in
424 | find_blocks true_block_n [] (Next following_block_n) [s1];
425 | find_blocks false_block_n [] (Next following_block_n) [s2];
426 | find_blocks following_block_n [] following_block s3
427 | | S.In x :: s ->
428 | find_blocks block_num (Call (Some (id_to_var x),"input", []) :: block_acc)
429 | following_block s
430 | | S.Out x :: s ->
431 | find_blocks block_num
432 | (Call (None, "output", [Ident (id_to_var x)]) :: block_acc) following_block s
433 | | ((S.Loc (s1, _)) :: s2) ->
434 | find_blocks block_num block_acc following_block (s1::s2)
435 | | [S.Return (Some x)] ->
436 | add_block block_num block_acc (Return (Some (id_to_var x)))
437 | | [S.Return None] ->
438 | add_block block_num block_acc (Return None)
439 | | S.Return _ :: _ ->
440 | raise (InternalError "return followed by statements in blockStructure")
441 |
442 | in
443 |
444 | (* Later on we rely on the starting block being #0 *)
445 | let init_block = get_block_num () in
446 | find_blocks init_block [] (Return None) stmts;
447 | !the_cfg
448 |
--------------------------------------------------------------------------------
/src/sourceAst.ml:
--------------------------------------------------------------------------------
1 | (*
2 | * Example compiler
3 | * Copyright (C) 2015-2017 Scott Owens
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program. If not, see .
17 | *)
18 |
19 | (* The language's AST, and a predictive, recursive descent parser. See the
20 | ../README.md for the grammar. *)
21 |
22 | open Util
23 | module T = Tokens
24 | module F = Format
25 |
26 | (* Type of identifiers. Source ones come from the original program, and Temp
27 | ones come from intermediate compilation stages. This makes it easy to avoid
28 | unintentional conflicts. The string on a Temp should name the stage that
29 | introduced it. The scope indicates where the identifier is bound, if that is
30 | known. We expect the parse to generate unknown scopes and for the type
31 | checker to fill them in. All Temps are considered local in scope. *)
32 | type scope =
33 | | Global
34 | | Parameter
35 | | Local
36 |
37 | type id =
38 | | Source of string * scope option
39 | | Temp of string * int
40 |
41 | let show_scope (s : scope) : string =
42 | match s with
43 | | Global -> "Global"
44 | | Parameter -> "Parameter"
45 | | Local -> "Local"
46 |
47 | let show_id (i : id) : string =
48 | match i with
49 | | Source (s, None) -> s
50 | | Source (s, Some scope) -> s ^ "_" ^ show_scope scope
51 | | Temp (s,i) -> "_tmp_" ^ s ^ string_of_int i
52 |
53 | (* Pretty-print an identifier *)
54 | let pp_id (fmt : Format.formatter) (i : id) : unit =
55 | F.fprintf fmt "%s" (show_id i)
56 |
57 | (* Construct a total order on ids so that we can use them as keys in maps.
58 | OCaml maps are implemented with balanced binary trees *)
59 | let scope_to_int s =
60 | match s with
61 | | Global -> 0
62 | | Parameter -> 1
63 | | Local -> 2
64 |
65 | (* Compare two scopes *)
66 | let compare_scope (s1 : scope) (s2 : scope) : int =
67 | compare (scope_to_int s1) (scope_to_int s2)
68 |
69 | (* Compare two identidiers *)
70 | let compare_id (id1 : id) (id2 : id) : int =
71 | match (id1, id2) with
72 | | (Source (s1, scope1), Source (s2, scope2)) ->
73 | let c =
74 | option_compare compare_scope scope1 scope2
75 | in
76 | if c = 0 then
77 | String.compare s1 s2
78 | else
79 | c
80 | | (Temp _, Source _) -> -1
81 | | (Source _, Temp _) -> 1
82 | | (Temp (s1,i1), Temp (s2,i2)) ->
83 | let c = String.compare s1 s2 in
84 | if c = 0 then
85 | compare i1 i2
86 | else
87 | c
88 |
89 | module Idord = struct
90 | type t = id
91 | let compare = compare_id
92 | end
93 |
94 | (* Build Map module specialised to keys of type id *)
95 | module Idmap = Map.Make(Idord)
96 |
97 | (* AST of expressions *)
98 | type exp =
99 | | Ident of id * exp list
100 | | Call of id * exp list
101 | | Num of int64
102 | | Bool of bool
103 | | Op of exp * T.op * exp
104 | | Uop of T.uop * exp
105 | (* Allocate a new array of given dimensions. Initialise to 0 *)
106 | | Array of exp list
107 |
108 | (* pretty printing for expressions *)
109 |
110 | (* Print a list, where each element is enclosed in [] *)
111 | let rec pp_array_list (f : F.formatter -> 'a -> unit)
112 | (fmt : F.formatter) (l : 'a list)
113 | : unit =
114 | match l with
115 | | [] -> ()
116 | | (h::t) ->
117 | F.fprintf fmt "[@[%a@]]@,%a"
118 | f h
119 | (pp_array_list f) t
120 |
121 | (* Print a list, surrounded by (), separating the items with , *)
122 | let pp_call (f : F.formatter -> 'a -> unit) (fmt : F.formatter)
123 | (l : 'a list)
124 | : unit =
125 | let rec pp fmt l =
126 | match l with
127 | | [] -> ()
128 | | [h] ->
129 | F.fprintf fmt "%a"
130 | f h
131 | | (h::t) ->
132 | F.fprintf fmt "%a,@ %a"
133 | f h
134 | pp t
135 | in
136 | F.fprintf fmt "(@[%a@])"
137 | pp l
138 |
139 | (* Pretty print an expression *)
140 | let rec pp_exp (fmt : F.formatter) (exp : exp) : unit =
141 | match exp with
142 | | Ident (id, []) ->
143 | F.fprintf fmt "%a"
144 | pp_id id
145 | | Ident (id, es) ->
146 | F.fprintf fmt "%a%a"
147 | pp_id id
148 | (pp_array_list pp_exp) es
149 | | Call (id, es) ->
150 | F.fprintf fmt "%a%a"
151 | pp_id id
152 | (pp_call pp_exp) es
153 | | Num n -> F.fprintf fmt "%Ld" n
154 | | Bool true -> F.fprintf fmt "true"
155 | | Bool false -> F.fprintf fmt "false"
156 | | Op (e1, op, e2) ->
157 | F.fprintf fmt "(@[%a@ %s@ %a@])"
158 | pp_exp e1
159 | (Tokens.show_op op)
160 | pp_exp e2
161 | | Uop (uop, e) ->
162 | F.fprintf fmt "@[<2>%s@ %a@]"
163 | (Tokens.show_uop uop)
164 | pp_exp e
165 | | Array es ->
166 | F.fprintf fmt "@[<2>array@ %a@]"
167 | (pp_array_list pp_exp) es
168 |
169 | (* AST of statements *)
170 | type stmt =
171 | | Assign of id * exp list * exp
172 | (* A generalised do/while loop. Always execute the first statement, then
173 | the test, then repeatedly do the 2nd, then first statement and then test
174 | 'while e s' becomes DoWhile (Stmts [], e, s) and 'do s while e' becomes
175 | DoWhile (s, e, Stmts []) *)
176 | | DoWhile of stmt * exp * stmt
177 | | Ite of exp * stmt * stmt
178 | | Stmts of stmt list
179 | | In of id
180 | | Out of id
181 | | Return of id option
182 | | Loc of stmt * int (* annotate a statement with it's source line number *)
183 |
184 | (* Pretty-print a statement *)
185 | let rec pp_stmt (fmt : F.formatter) (stmt : stmt) : unit =
186 | match stmt with
187 | | Assign (id, [], e) ->
188 | F.fprintf fmt "@[<2>%a :=@ %a@]"
189 | pp_id id
190 | pp_exp e
191 | | Assign (id, es, e) ->
192 | F.fprintf fmt "@[<2>%a%a :=@ %a@]"
193 | pp_id id
194 | (pp_array_list pp_exp) es
195 | pp_exp e
196 | | DoWhile (Stmts [], e, s) ->
197 | F.fprintf fmt "@[<2>while@ %a@ %a@]"
198 | pp_exp e
199 | pp_stmt s
200 | | DoWhile (s, e, Stmts []) ->
201 | F.fprintf fmt "@[<2>do@ %a@ while@ %a@]"
202 | pp_stmt s
203 | pp_exp e
204 | | DoWhile (s1, e, s2) ->
205 | F.fprintf fmt "@[<2>do@ %a@ while@ %a@ %a@]"
206 | pp_stmt s1
207 | pp_exp e
208 | pp_stmt s2
209 | | Ite (e, s1, s2) ->
210 | F.fprintf fmt "@[<2>if@ %a@ then@ %a@ else@ %a@]"
211 | pp_exp e
212 | pp_stmt s1
213 | pp_stmt s2
214 | | Stmts slist ->
215 | F.fprintf fmt "{@\n%a}"
216 | pp_stmts slist
217 | | In i ->
218 | F.fprintf fmt "@[<2>input@ %a@]"
219 | pp_id i
220 | | Out i ->
221 | F.fprintf fmt "@[<2>output@ %a@]"
222 | pp_id i
223 | | Return None ->
224 | F.fprintf fmt "return"
225 | | Return (Some i) ->
226 | F.fprintf fmt "@[<2>return@ %a@]"
227 | pp_id i
228 | | Loc (s, _) ->
229 | pp_stmt fmt s
230 |
231 | and pp_stmts (fmt : F.formatter) (stmts : stmt list) : unit =
232 | let rec pp fmt stmts =
233 | match stmts with
234 | | [] -> ()
235 | | stmt::stmts ->
236 | F.fprintf fmt "%a@\n%a"
237 | pp_stmt stmt
238 | pp stmts
239 | in
240 | F.fprintf fmt "@[%a@]"
241 | pp stmts
242 |
243 | (* AST of types *)
244 | type typ =
245 | | Int
246 | | Bool
247 | (* An int array with the given number of dimensions *)
248 | | Array of int
249 |
250 | (* Pretty-print a type *)
251 | let pp_typ (fmt : F.formatter) (t : typ) : unit =
252 | match t with
253 | | Int -> F.fprintf fmt "int"
254 | | Bool -> F.fprintf fmt "bool"
255 | | Array n -> F.fprintf fmt "array@ %d" n
256 |
257 | (* AST of variable and function declarations *)
258 | type var_dec = { var_name : id; typ : typ; init : exp; loc : int option }
259 |
260 | type func = { fun_name : id; params : (id * typ) list; ret : typ;
261 | locals : var_dec list; body : stmt list; loc : int option}
262 |
263 | (* Pretty-print a variable declaration *)
264 | let pp_var_dec (fmt : F.formatter) (d : var_dec) : unit =
265 | F.fprintf fmt "@[<2>let@ %a@ :@ %a@ =@ %a@]"
266 | pp_id d.var_name
267 | pp_typ d.typ
268 | pp_exp d.init
269 |
270 | (* Pretty-print variable declarations, 1 per line *)
271 | let pp_var_decs (fmt : F.formatter) (decs : var_dec list) : unit =
272 | let rec pp fmt decs =
273 | match decs with
274 | | [] -> ()
275 | | var_dec::decs ->
276 | F.fprintf fmt "%a@\n%a"
277 | pp_var_dec var_dec
278 | pp decs
279 | in
280 | F.fprintf fmt "@[%a@]"
281 | pp decs
282 |
283 | (* Pretty-print function parameters *)
284 | let rec pp_params (fmt : F.formatter) (params : (id * typ) list) : unit =
285 | match params with
286 | | [] -> ()
287 | | (n,t)::params ->
288 | F.fprintf fmt "@[(%a@ :@ %a)@]@ %a"
289 | pp_id n
290 | pp_typ t
291 | pp_params params
292 |
293 | (* Pretty-print a function *)
294 | let pp_func (fmt : F.formatter) (func : func) : unit =
295 | F.fprintf fmt "@[<2>function@ %a@ %a@ :@ %a@ {@\n%a%a}@]@\n"
296 | pp_id func.fun_name
297 | pp_params func.params
298 | pp_typ func.ret
299 | pp_var_decs func.locals
300 | pp_stmts func.body
301 |
302 | (* AST of complete programs *)
303 | type prog = { globals : var_dec list; funcs : func list }
304 |
305 | (* Pretty-print a program *)
306 | let pp_program (fmt : F.formatter) (p : prog) : unit =
307 | pp_var_decs fmt p.globals;
308 | List.iter (pp_func fmt) p.funcs
309 |
310 | (* Raise a parse error *)
311 | let parse_error (ln : int) (msg : string) : 'a =
312 | raise (BadInput ("Parse error on line " ^ string_of_int ln ^ ": " ^ msg))
313 |
314 | (* Raise a parse error explaining the expected token and the token actually there *)
315 | let parse_error_expect (ln : int) (given : T.token) (expect : T.token)
316 | (where : string)
317 | : 'a =
318 | raise (BadInput ("Parse error on line " ^ string_of_int ln ^ ": expected " ^
319 | T.show_token expect ^ " in " ^ where ^ " but found " ^
320 | T.show_token given))
321 |
322 |
323 | (* Raise a parse error because the end of file was reached unexpectedly *)
324 | let eof_error (expect : string) : 'a =
325 | raise (BadInput ("Parse error: end of file while parsing " ^ expect))
326 |
327 | (* Convert the first expression in toks into an AST. Return it with the left
328 | over tokens. *)
329 | let rec parse_atomic_exp (toks : T.tok_loc list) : exp * T.tok_loc list =
330 | match toks with
331 | | [] -> eof_error "an expression"
332 | | (T.Ident i, ln) :: (T.Lparen, _) ::toks ->
333 | let (args, toks) = parse_args ln toks in
334 | (Call (Source (i,None), args), toks)
335 | | (T.Ident i, _) :: toks ->
336 | let (indices, toks) = parse_indices toks in
337 | (Ident (Source (i,None), indices), toks)
338 | | (T.Num n, _) :: toks -> (Num n, toks)
339 | | (T.True, _) :: toks -> (Bool true, toks)
340 | | (T.False, _) :: toks -> (Bool false, toks)
341 | | (T.Op T.Minus, _) :: toks ->
342 | let (e, toks) = parse_atomic_exp toks in
343 | (Op (Num 0L, T.Minus, e), toks)
344 | | (T.Uop uop, _) :: toks ->
345 | let (e, toks) = parse_atomic_exp toks in
346 | (Uop (uop, e), toks)
347 | | (T.Array, _) :: toks ->
348 | let (indices, toks) = parse_indices toks in
349 | (Array indices, toks)
350 | | (T.Lparen, _) :: toks ->
351 | (match parse_exp toks with
352 | | (_, []) -> eof_error "a parenthesized expression"
353 | | (e, (T.Rparen, _) :: toks) ->
354 | (e, toks)
355 | | (_, (t, ln)::_) ->
356 | parse_error_expect ln t T.Rparen "a parenthesized expression")
357 | | (t, ln) :: _ ->
358 | parse_error ln ("bad expression, beginning with " ^ T.show_token t)
359 |
360 | and parse_exp (toks : T.tok_loc list) : exp * T.tok_loc list =
361 | match parse_atomic_exp toks with
362 | | (e1, (T.Op o, _) :: toks) ->
363 | let (e2, toks) = parse_atomic_exp toks in
364 | (Op (e1, o, e2), toks)
365 | | (e1, toks) -> (e1, toks)
366 |
367 | (* Parse 0 or more array indices. Return them with the left over tokens. *)
368 | and parse_indices (toks : T.tok_loc list) : exp list * T.tok_loc list =
369 | match toks with
370 | | (T.Lbrac, _) :: toks ->
371 | (match parse_exp toks with
372 | | (_, []) -> eof_error "an array index"
373 | | (e, (T.Rbrac, _) :: toks) ->
374 | let (es,toks) = parse_indices toks in
375 | (e::es, toks)
376 | | (_, (t, ln) :: _) ->
377 | parse_error_expect ln t T.Rbrac "an array index")
378 | | _ -> ([], toks)
379 |
380 | (* Parse 1 or more comma-separated expressions. Return them with the left over
381 | tokens. End on a closing parenthesis *)
382 | and parse_args ln (toks : T.tok_loc list) : exp list * T.tok_loc list =
383 | match parse_exp toks with
384 | | (_, []) -> eof_error "a function call expression"
385 | | (e, (T.Comma, _)::toks) ->
386 | let (es, toks) = parse_args ln toks in
387 | (e::es, toks)
388 | | (e, (T.Rparen, _)::toks) ->
389 | ([e], toks)
390 | | (_, (t, ln) :: _) ->
391 | parse_error ln ("expected " ^ T.show_token T.Comma ^ " or " ^
392 | T.show_token T.Rparen ^
393 | " in a function call expression but found " ^
394 | T.show_token t)
395 |
396 | (* Convert the first statement in toks into an AST. Return it with the left
397 | over tokens *)
398 | let rec parse_stmt (toks : T.tok_loc list) : stmt * T.tok_loc list =
399 | match toks with
400 | | [] -> eof_error "a statement"
401 | | (T.Ident x, ln) :: toks ->
402 | (match parse_indices toks with
403 | | (_, []) -> eof_error "an assignment statement"
404 | | (indices, (T.Assign, _) :: toks) ->
405 | let (e, toks) = parse_exp toks in
406 | (Loc (Assign (Source (x, None), indices, e), ln), toks)
407 | | (_, (t, ln) :: _) ->
408 | parse_error_expect ln t T.Assign "an assignment statement")
409 | | (T.While, ln) :: toks ->
410 | let (e, toks) = parse_exp toks in
411 | let (s, toks) = parse_stmt toks in
412 | (Loc (DoWhile (Stmts [], e, s), ln), toks)
413 | | (T.Do, ln) :: toks ->
414 | (match parse_stmt toks with
415 | | (_, []) -> eof_error "a do statement"
416 | | (s, (T.While, _)::toks) ->
417 | let (e, toks) = parse_exp toks in
418 | (Loc (DoWhile (s, e, Stmts []), ln), toks)
419 | | (_, (t, ln) :: _) ->
420 | parse_error_expect ln t T.While "a do statement")
421 | | (T.If, ln) :: toks ->
422 | (match parse_exp toks with
423 | | (_, []) -> eof_error "an if statement"
424 | | (e, (T.Then, _) :: toks) ->
425 | (match parse_stmt toks with
426 | | (_, []) -> eof_error "an if statement"
427 | | (s1, (T.Else, _) :: toks) ->
428 | let (s2, toks) = parse_stmt toks in
429 | (Loc (Ite (e, s1, s2), ln), toks)
430 | | (_, (t, ln) :: _) ->
431 | parse_error_expect ln t T.Else "an if statement")
432 | | (_, (t, ln) :: _) ->
433 | parse_error_expect ln t T.Then "an if statement")
434 | | (T.Lcurly, ln) :: toks ->
435 | let (s_list, toks) = parse_stmt_list toks in
436 | (Loc (Stmts (s_list), ln), toks)
437 | | (T.Input, ln) :: (T.Ident x, _) :: toks -> (Loc (In (Source (x,None)), ln), toks)
438 | | (T.Output, ln) :: (T.Ident x, _) :: toks -> (Loc (Out (Source (x,None)), ln), toks)
439 | | (T.Return, ln) :: (T.Ident x, _) :: toks -> (Loc (Return (Some (Source (x,None))), ln), toks)
440 | | (t, ln) :: _ ->
441 | parse_error ln ("bad statement, beginning with a " ^ T.show_token t)
442 |
443 | (* Convert all of the statement in toks into an AST, stopping on a }. Return
444 | them with the left over tokens, not including the } *)
445 | and parse_stmt_list (toks : T.tok_loc list) : stmt list * T.tok_loc list =
446 | match toks with
447 | | ((T.Rcurly, _) :: toks) -> ([], toks)
448 | | _ ->
449 | let (s, toks) = parse_stmt toks in
450 | let (s_list, toks) = parse_stmt_list toks in
451 | (s::s_list, toks)
452 |
453 | (* Convert the first typ in toks into an AST. Return it with the left over
454 | tokens. *)
455 | let parse_typ (toks : T.tok_loc list) : typ * T.tok_loc list =
456 | match toks with
457 | | [] -> eof_error "a type"
458 | | (T.Int, _) :: toks -> (Int, toks)
459 | | (T.Bool, _) :: toks -> (Bool, toks)
460 | | (T.Array, _) :: (T.Num n, _) :: toks -> (Array (Int64.to_int n), toks)
461 | | (t,ln) :: _ ->
462 | parse_error ln ("bad type, beginning with a " ^ T.show_token t)
463 |
464 | (* Convert the first function parameter toks into an AST. Return it with the
465 | left over tokens. *)
466 | let parse_param (toks : T.tok_loc list) : (id * typ) * T.tok_loc list =
467 | match toks with
468 | | [] -> eof_error "a function parameter"
469 | | (T.Lparen, _) :: (T.Ident x, _) :: (T.Colon,_) :: toks ->
470 | (match parse_typ toks with
471 | | (_, []) -> eof_error "a function parameter"
472 | | (t, (T.Rparen, _)::toks) ->
473 | ((Source (x,None), t), toks)
474 | | (_, (t, ln) :: _) ->
475 | parse_error_expect ln t T.Rparen "a function parameter")
476 | | (t, ln) :: _ -> parse_error_expect ln t T.Lparen "a function parameter"
477 |
478 | (* Convert a list of function parameters in toks into an AST. Return it with
479 | the left over tokens. Recognise when the list is over by the next token not
480 | being Lparen *)
481 | let rec parse_param_list (toks : T.tok_loc list) : (id * typ) list * T.tok_loc list =
482 | match toks with
483 | | ((T.Lparen, _) :: _) ->
484 | let (v, toks) = parse_param toks in
485 | let (v_list, toks) = parse_param_list toks in
486 | (v::v_list, toks)
487 | | _ -> ([], toks)
488 |
489 |
490 | (* Convert the first variable declaration in toks into an AST. Return it with
491 | the left over tokens. *)
492 | let parse_var_dec (toks : T.tok_loc list) : var_dec * T.tok_loc list =
493 | match toks with
494 | | [] -> eof_error "a variable declaration"
495 | | (T.Let, ln) :: (T.Ident x, _) :: (T.Colon,_) :: toks ->
496 | (match parse_typ toks with
497 | | (_, []) -> eof_error "a variable declaration"
498 | | (t, (T.Op T.Eq, _)::toks) ->
499 | let (e, toks) = parse_exp toks in
500 | ({ var_name = Source (x,None); typ = t; init = e; loc = Some ln }, toks)
501 | | (_, (t, ln) :: _) ->
502 | parse_error_expect ln t (T.Op T.Eq) "a variable declaration")
503 | | (t, ln) :: _ -> parse_error_expect ln t T.Let "a variable declaration"
504 |
505 | (* Convert a list of variable declaration in toks into an AST. Return it with
506 | the left over tokens. Recognise when the list is over by the next token not
507 | being Let *)
508 | let rec parse_var_dec_list (toks : T.tok_loc list) : var_dec list * T.tok_loc list =
509 | match toks with
510 | | (T.Let, _) :: _ ->
511 | let (v, toks) = parse_var_dec toks in
512 | let (v_list, toks) = parse_var_dec_list toks in
513 | (v::v_list, toks)
514 | | _ -> ([], toks)
515 |
516 | (* Convert the first function declaration in toks into an AST. Return it with
517 | the left over tokens. *)
518 | let parse_func (toks : T.tok_loc list) : func * T.tok_loc list =
519 | match toks with
520 | | [] -> eof_error "a function declaration"
521 | | (T.Function, ln) :: (T.Ident x, _) :: toks ->
522 | (match parse_param_list toks with
523 | | (_, []) -> eof_error "a function declaration"
524 | | (params, (T.Colon, _) :: toks) ->
525 | if List.length params <> 0 then
526 | (match parse_typ toks with
527 | | (_, []) -> eof_error "a function declaration"
528 | | (t, (T.Lcurly, _) :: toks) ->
529 | let (var_decs, toks) = parse_var_dec_list toks in
530 | let (stmts, toks) = parse_stmt_list toks in
531 | ({ fun_name = Source (x,None);
532 | params = params;
533 | ret = t;
534 | locals = var_decs;
535 | body = stmts;
536 | loc = Some ln }, toks)
537 | | (_, (t, ln) :: _) ->
538 | parse_error_expect ln t T.Lcurly "a function declaration")
539 | else
540 | parse_error ln
541 | "bad function declaration, functions must have at least 1 parameter"
542 | | (_, (t, ln) :: _) ->
543 | parse_error_expect ln t T.Colon "a function declaration")
544 | | (t, ln) :: _ -> parse_error_expect ln t T.Function "a function declaration"
545 |
546 | (* Parse global variable declarations, and then function declarations until the
547 | end of file *)
548 | let parse_program (toks : T.tok_loc list) : prog =
549 | let (var_decs, toks) = parse_var_dec_list toks in
550 | let rec parse_funs toks =
551 | match toks with
552 | | [] -> []
553 | | _ ->
554 | let (f, toks) = parse_func toks in
555 | f :: parse_funs toks
556 | in
557 | { globals = var_decs; funcs = parse_funs toks }
558 |
559 | let stmts_to_stmt (s : stmt list) : stmt =
560 | match s with
561 | | [s1] -> s1
562 | | _ -> Stmts s
563 |
--------------------------------------------------------------------------------