├── src ├── pprint.mli ├── util.mli ├── parse.mli ├── gen.mli ├── util.ml ├── lex.mli ├── nqcc.ml ├── tok.ml ├── ast.ml ├── context.ml ├── pprint.ml ├── lex.ml ├── parse.ml └── gen.ml ├── tst ├── test_lex.mli ├── test_nqcc.ml ├── test_lex.ml └── test_parse.ml ├── .merlin ├── examples ├── consts │ ├── ex1.c │ ├── ex10.c │ ├── ex2.c │ ├── ex3.c │ ├── ex4.c │ ├── ex5.c │ ├── ex6.c │ ├── ex7.c │ ├── ex8.c │ ├── ex9.c │ └── ex11.c ├── unops │ ├── ex1.c │ ├── ex2.c │ ├── ex3.c │ ├── ex4.c │ ├── ex5.c │ └── ex6.c ├── binops │ ├── ex1.c │ ├── ex10.c │ ├── ex11.c │ ├── ex12.c │ ├── ex2.c │ ├── ex3.c │ ├── ex4.c │ ├── ex5.c │ ├── ex6.c │ ├── ex7.c │ ├── ex8.c │ └── ex9.c ├── bitops │ ├── ex1.c │ ├── ex10.c │ ├── ex11.c │ ├── ex2.c │ ├── ex3.c │ ├── ex4.c │ ├── ex5.c │ ├── ex6.c │ ├── ex7.c │ ├── ex8.c │ ├── ex9.c │ └── note.md ├── boolops │ ├── ex1.c │ ├── ex10.c │ ├── ex11.c │ ├── ex2.c │ ├── ex3.c │ ├── ex4.c │ ├── ex5.c │ ├── ex6.c │ ├── ex7.c │ ├── ex8.c │ ├── ex12.c │ ├── ex13.c │ └── ex9.c ├── cmps │ ├── equals.c │ ├── less_than.c │ ├── greater_equal.c │ ├── greater_than.c │ ├── less_equal.c │ └── not_equals.c ├── funs │ ├── ex5.c │ ├── ex1.c │ ├── ex2.c │ ├── ex3.c │ ├── ex4.c │ └── ex6.c ├── if │ ├── ex1.c │ ├── ex10.c │ ├── ex3.c │ ├── ex8.c │ ├── ex6.c │ ├── ex7.c │ ├── ex13.c │ ├── ex12.c │ ├── ex11.c │ ├── ex4.c │ ├── ex5.c │ ├── ex14.c │ ├── ex2.c │ ├── if_nested.c │ └── ex9.c ├── vars │ ├── ex1.c │ ├── ex3.c │ └── ex2.c └── loops │ ├── for.c │ ├── for_decl.c │ ├── for2.c │ └── for_single_statement.c ├── .gitignore ├── configure ├── test_examples.sh ├── _oasis ├── Makefile ├── nqcc ├── README.md ├── LICENSE └── setup.ml /src/pprint.mli: -------------------------------------------------------------------------------- 1 | val pprint: Ast.prog -> unit -------------------------------------------------------------------------------- /tst/test_lex.mli: -------------------------------------------------------------------------------- 1 | val lex_tests : OUnit2.test list -------------------------------------------------------------------------------- /.merlin: -------------------------------------------------------------------------------- 1 | B _build/* 2 | PKG batteries 3 | PKG oUnit -------------------------------------------------------------------------------- /examples/consts/ex1.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 2; 3 | } -------------------------------------------------------------------------------- /examples/unops/ex1.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return -4; 3 | } -------------------------------------------------------------------------------- /examples/unops/ex2.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return +4; 3 | } -------------------------------------------------------------------------------- /examples/unops/ex3.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return ~4; 3 | } -------------------------------------------------------------------------------- /examples/unops/ex4.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return !4; 3 | } -------------------------------------------------------------------------------- /examples/unops/ex5.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return !0; 3 | } -------------------------------------------------------------------------------- /examples/unops/ex6.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return ~-0; 3 | } -------------------------------------------------------------------------------- /examples/binops/ex1.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 2+2; 3 | } -------------------------------------------------------------------------------- /examples/binops/ex10.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 3- -4; 3 | } -------------------------------------------------------------------------------- /examples/binops/ex11.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 3-+4; 3 | } -------------------------------------------------------------------------------- /examples/binops/ex12.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 3 % 2; 3 | } -------------------------------------------------------------------------------- /examples/binops/ex2.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 3+'a'; 3 | } -------------------------------------------------------------------------------- /examples/binops/ex3.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 3*4; 3 | } -------------------------------------------------------------------------------- /examples/binops/ex4.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 3*4+2; 3 | } -------------------------------------------------------------------------------- /examples/binops/ex5.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 5/4; 3 | } -------------------------------------------------------------------------------- /examples/binops/ex6.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 4*-1; 3 | } -------------------------------------------------------------------------------- /examples/binops/ex7.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return -4/-2; 3 | } -------------------------------------------------------------------------------- /examples/binops/ex8.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 4/3*2; 3 | } -------------------------------------------------------------------------------- /examples/binops/ex9.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 2-3; 3 | } -------------------------------------------------------------------------------- /examples/bitops/ex1.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 1&2; 3 | } -------------------------------------------------------------------------------- /examples/bitops/ex10.c: -------------------------------------------------------------------------------- 1 | int main(){ 2 | return 1&-0; 3 | } -------------------------------------------------------------------------------- /examples/bitops/ex11.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 1^-0; 3 | } -------------------------------------------------------------------------------- /examples/bitops/ex2.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 1|2; 3 | } -------------------------------------------------------------------------------- /examples/bitops/ex3.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 1^2; 3 | } -------------------------------------------------------------------------------- /examples/bitops/ex4.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 1>>2; 3 | } -------------------------------------------------------------------------------- /examples/bitops/ex5.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 100<<2; 3 | } -------------------------------------------------------------------------------- /examples/bitops/ex6.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 1|-0; 3 | } -------------------------------------------------------------------------------- /examples/bitops/ex7.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return -1<<2; 3 | } -------------------------------------------------------------------------------- /examples/bitops/ex8.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return -1>>2; 3 | } -------------------------------------------------------------------------------- /examples/bitops/ex9.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return -0<<3; 3 | } -------------------------------------------------------------------------------- /examples/boolops/ex1.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 1&&0; 3 | } -------------------------------------------------------------------------------- /examples/boolops/ex10.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 2 > 3; 3 | } -------------------------------------------------------------------------------- /examples/boolops/ex11.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 4 < 3; 3 | } -------------------------------------------------------------------------------- /examples/boolops/ex2.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 1 || 0; 3 | } -------------------------------------------------------------------------------- /examples/boolops/ex3.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 1 < 1; 3 | } -------------------------------------------------------------------------------- /examples/boolops/ex4.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 1 <= 1; 3 | } -------------------------------------------------------------------------------- /examples/boolops/ex5.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 1 > 1; 3 | } -------------------------------------------------------------------------------- /examples/boolops/ex6.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 1>=1; 3 | } -------------------------------------------------------------------------------- /examples/boolops/ex7.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 2==2; 3 | } -------------------------------------------------------------------------------- /examples/boolops/ex8.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 2 != 2; 3 | } -------------------------------------------------------------------------------- /examples/cmps/equals.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 2==3; 3 | } -------------------------------------------------------------------------------- /examples/cmps/less_than.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 2<3; 3 | } -------------------------------------------------------------------------------- /examples/consts/ex10.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return ((3)); 3 | } -------------------------------------------------------------------------------- /examples/consts/ex2.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 'a'; 3 | } -------------------------------------------------------------------------------- /examples/consts/ex3.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return '\b'; 3 | } -------------------------------------------------------------------------------- /examples/consts/ex4.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return '\x03'; 3 | } -------------------------------------------------------------------------------- /examples/consts/ex5.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return '\075'; 3 | } -------------------------------------------------------------------------------- /examples/consts/ex6.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return '\''; 3 | } -------------------------------------------------------------------------------- /examples/consts/ex7.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 012; 3 | } -------------------------------------------------------------------------------- /examples/consts/ex8.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 0xf3; 3 | } -------------------------------------------------------------------------------- /examples/consts/ex9.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return '\\'; 3 | } -------------------------------------------------------------------------------- /examples/boolops/ex12.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 4 <= 3; 3 | } -------------------------------------------------------------------------------- /examples/boolops/ex13.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 4 >= 3; 3 | } -------------------------------------------------------------------------------- /examples/cmps/greater_equal.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 2>=3; 3 | } -------------------------------------------------------------------------------- /examples/cmps/greater_than.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 2>3; 3 | } -------------------------------------------------------------------------------- /examples/cmps/less_equal.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 3 <= 3; 3 | } -------------------------------------------------------------------------------- /examples/cmps/not_equals.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 2!=3; 3 | } -------------------------------------------------------------------------------- /examples/consts/ex11.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | ; 3 | return 0; 4 | } -------------------------------------------------------------------------------- /examples/funs/ex5.c: -------------------------------------------------------------------------------- 1 | int main(int argc) { 2 | return argc; 3 | } -------------------------------------------------------------------------------- /examples/if/ex1.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | if (1) return 3; else return 2; 3 | } -------------------------------------------------------------------------------- /examples/vars/ex1.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 4; 3 | return a; 4 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | *.byte 3 | *.s 4 | *.out 5 | _tags 6 | setup.data 7 | setup.log -------------------------------------------------------------------------------- /examples/boolops/ex9.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return (3+4 <= 4 || 1&&2 != 3 > 6); 3 | } -------------------------------------------------------------------------------- /examples/if/ex10.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | if (1) { 3 | 4 | } 5 | 6 | return 0; 7 | } -------------------------------------------------------------------------------- /examples/if/ex3.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | if (0) 3 | return 4; 4 | 5 | return 5; 6 | } -------------------------------------------------------------------------------- /src/util.mli: -------------------------------------------------------------------------------- 1 | (* create unique id prefixed with given string *) 2 | val unique_id: string -> string -------------------------------------------------------------------------------- /examples/if/ex8.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | if (1) { 3 | return 4; 4 | } else return 3; 5 | } -------------------------------------------------------------------------------- /examples/vars/ex3.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 3; 3 | int b = a = 4; 4 | return (a + b); 5 | } -------------------------------------------------------------------------------- /examples/funs/ex1.c: -------------------------------------------------------------------------------- 1 | int foo() { 2 | return 3; 3 | } 4 | 5 | int main() { 6 | return 1 + foo(); 7 | } -------------------------------------------------------------------------------- /examples/if/ex6.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | if (0) { 3 | return 4; 4 | } 5 | 6 | return 5; 7 | } -------------------------------------------------------------------------------- /examples/if/ex7.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | if (1) { 3 | return 4; 4 | } 5 | 6 | return 5; 7 | } -------------------------------------------------------------------------------- /examples/if/ex13.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 3; 3 | int b = a > 3 ? 4 : 1 ? 5 : 1; 4 | return b; 5 | } -------------------------------------------------------------------------------- /src/parse.mli: -------------------------------------------------------------------------------- 1 | (* Convert list of tokens into abstract syntax tree *) 2 | val parse : Tok.token list -> Ast.prog -------------------------------------------------------------------------------- /examples/if/ex12.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 3; 3 | int b = (a > 3 ? 4 : 0) ? 5 : 1; 4 | return b; 5 | } -------------------------------------------------------------------------------- /examples/vars/ex2.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int foo; 3 | foo = 4*2; 4 | int bar = foo+1; 5 | return bar; 6 | } -------------------------------------------------------------------------------- /examples/funs/ex2.c: -------------------------------------------------------------------------------- 1 | int incr(int arg) { 2 | return arg + 1; 3 | } 4 | 5 | int main() { 6 | return incr(3); 7 | } -------------------------------------------------------------------------------- /examples/if/ex11.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 3; 3 | int b = 1; 4 | a = 3 > 5 ? b = 5 : 1; 5 | return b; 6 | } -------------------------------------------------------------------------------- /examples/if/ex4.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | if (3) { 3 | return 1; 4 | } else { 5 | return 2; 6 | } 7 | } -------------------------------------------------------------------------------- /examples/if/ex5.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | if (0) { 3 | return 1; 4 | } else { 5 | return 2; 6 | } 7 | } -------------------------------------------------------------------------------- /examples/if/ex14.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 3; 3 | int b = a > 3 ? 1 < 2 ? 4 : 5 : 3 < 4 ? 6 : 7; 4 | return b; 5 | } -------------------------------------------------------------------------------- /src/gen.mli: -------------------------------------------------------------------------------- 1 | (** Given filename 'file.c' and AST, write assembly to 'file.s' **) 2 | val generate : string -> Ast.prog -> unit -------------------------------------------------------------------------------- /examples/funs/ex3.c: -------------------------------------------------------------------------------- 1 | int incr(int a) { 2 | return a + 1; 3 | } 4 | 5 | int main() { 6 | int b = 2; 7 | incr (b); 8 | return b; 9 | } -------------------------------------------------------------------------------- /examples/loops/for.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | 3 | int i; 4 | 5 | for (i = 0; i < 5; i = i + 1) { 6 | 1; 7 | } 8 | return i; 9 | } -------------------------------------------------------------------------------- /examples/funs/ex4.c: -------------------------------------------------------------------------------- 1 | int incr(int a) { 2 | return a + 1; 3 | } 4 | 5 | int main() { 6 | int a = 2; 7 | a = incr(a+1); 8 | return a; 9 | } -------------------------------------------------------------------------------- /examples/funs/ex6.c: -------------------------------------------------------------------------------- 1 | int incr(int a) { 2 | return a + 1; 3 | } 4 | 5 | int main() { 6 | int a = 2; 7 | a = (1+2) + incr(a+1); 8 | return a; 9 | } -------------------------------------------------------------------------------- /examples/if/ex2.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 3; 3 | 4 | if (!a) 5 | a = 4; 6 | else 7 | a = 5; 8 | 9 | return a; 10 | } -------------------------------------------------------------------------------- /examples/loops/for_decl.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int j = 0; 3 | 4 | for (int i = 0; i < 5; i = i + 1) { 5 | j = j + 2; 6 | } 7 | 8 | return j; 9 | } -------------------------------------------------------------------------------- /src/util.ml: -------------------------------------------------------------------------------- 1 | let counter = ref 0 2 | 3 | let unique_id s = 4 | let id = String.concat "" [s; (BatString.of_int (!counter))] in 5 | incr counter; 6 | id 7 | -------------------------------------------------------------------------------- /examples/loops/for2.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | 3 | int i; 4 | int j = 0; 5 | 6 | for (i = 0; i < 5; i = i + 1) { 7 | j = j + 2; 8 | } 9 | return j; 10 | } -------------------------------------------------------------------------------- /examples/loops/for_single_statement.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 0; 3 | for (int i = 0; i < 5; i = i + 1) 4 | if (i % 2) 5 | a = a + 1; 6 | return a; 7 | } -------------------------------------------------------------------------------- /src/lex.mli: -------------------------------------------------------------------------------- 1 | (* Convert C program into a list of tokens *) 2 | val lex : string -> Tok.token list 3 | 4 | (* Get string representation of a token *) 5 | val tok_to_string: Tok.token -> string 6 | -------------------------------------------------------------------------------- /examples/if/if_nested.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 0; 3 | if (a > 2) { 4 | return 1; 5 | } else if (a >= 0) { 6 | return 0; 7 | } else { 8 | return 5; 9 | } 10 | } -------------------------------------------------------------------------------- /tst/test_nqcc.ml: -------------------------------------------------------------------------------- 1 | open OUnit2 2 | 3 | let suite = 4 | "All" >::: [ 5 | "Lex" >::: Test_lex.lex_tests; 6 | "Parse" >::: Test_parse.parse_tests 7 | ] 8 | 9 | let () = run_test_tt_main suite -------------------------------------------------------------------------------- /examples/bitops/note.md: -------------------------------------------------------------------------------- 1 | The following cases are undefined, so we don't test them: 2 | 3 | * Bit shift count is negative (e.g. `1>>-2`). 4 | * Bit shift count is greater than width of type (e.g. `1>>33`; width of type is 32). -------------------------------------------------------------------------------- /examples/if/ex9.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | 3 | int a = 2; 4 | 5 | if (1) { 6 | a = 5; 7 | } else { 8 | a = 7; 9 | } 10 | 11 | int b = a-5; 12 | 13 | if (b) { 14 | return 4; 15 | } 16 | 17 | return 5; 18 | } -------------------------------------------------------------------------------- /src/nqcc.ml: -------------------------------------------------------------------------------- 1 | open Batteries 2 | 3 | let compile prog_filename = 4 | let source_lines = File.lines_of prog_filename in 5 | let ast = Enum.reduce (fun line1 line2 -> line1^" "^line2) source_lines 6 | |> Lex.lex 7 | |> Parse.parse 8 | in 9 | Gen.generate prog_filename ast 10 | 11 | (* TODO: verify .c file extension *) 12 | let filename = Array.get Sys.argv 1 13 | 14 | let _ = compile filename 15 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # OASIS_START 4 | # DO NOT EDIT (digest: dc86c2ad450f91ca10c931b6045d0499) 5 | set -e 6 | 7 | FST=true 8 | for i in "$@"; do 9 | if $FST; then 10 | set -- 11 | FST=false 12 | fi 13 | 14 | case $i in 15 | --*=*) 16 | ARG=${i%%=*} 17 | VAL=${i##*=} 18 | set -- "$@" "$ARG" "$VAL" 19 | ;; 20 | *) 21 | set -- "$@" "$i" 22 | ;; 23 | esac 24 | done 25 | 26 | ocaml setup.ml -configure "$@" 27 | # OASIS_STOP 28 | -------------------------------------------------------------------------------- /test_examples.sh: -------------------------------------------------------------------------------- 1 | for i in examples/*/*.c 2 | do 3 | gcc -w -m32 $i #compile with gcc 4 | ./a.out #run it 5 | expected=$? #get exit code 6 | ./nqcc $i #compile with nqcc 7 | base="${i%.*}" 8 | $base #run the thing we assembled 9 | actual=$? #get exit code 10 | echo -n "$i: " 11 | if [ "$expected" -ne "$actual" ] 12 | then 13 | echo "FAIL" 14 | else 15 | echo "OK" 16 | fi 17 | rm $base 18 | done 19 | 20 | #cleanup 21 | rm a.out 22 | -------------------------------------------------------------------------------- /_oasis: -------------------------------------------------------------------------------- 1 | Name: nqcc 2 | Version: 0.1 3 | Synopsis: A tiny c compiler 4 | Authors: Nora Sandler 5 | License: MIT 6 | 7 | Description: A simple compiler for a subset of C 8 | 9 | OASISFormat: 0.4 10 | BuildTools: ocamlbuild 11 | Plugins: META (0.4), DevFiles (0.4) 12 | 13 | Executable "nqcc" 14 | Path: src 15 | MainIs: nqcc.ml 16 | BuildDepends: 17 | str, batteries 18 | 19 | #export modules as a lib so we can test them 20 | Library "nqcc_lib" 21 | Path: src 22 | Modules: Lex, Parse, Gen 23 | BuildDepends: 24 | batteries 25 | 26 | Executable "test_nqcc" 27 | Path: tst 28 | MainIs: test_nqcc.ml 29 | Build$: flag(tests) 30 | Install: false 31 | BuildDepends: 32 | oUnit, nqcc_lib 33 | 34 | Test "test_nqcc" 35 | TestTools: test_nqcc 36 | Command: $test_nqcc 37 | WorkingDirectory: tst -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # OASIS_START 2 | # DO NOT EDIT (digest: a3c674b4239234cbbe53afe090018954) 3 | 4 | SETUP = ocaml setup.ml 5 | 6 | build: setup.data 7 | $(SETUP) -build $(BUILDFLAGS) 8 | 9 | doc: setup.data build 10 | $(SETUP) -doc $(DOCFLAGS) 11 | 12 | test: setup.data build 13 | $(SETUP) -test $(TESTFLAGS) 14 | 15 | all: 16 | $(SETUP) -all $(ALLFLAGS) 17 | 18 | install: setup.data 19 | $(SETUP) -install $(INSTALLFLAGS) 20 | 21 | uninstall: setup.data 22 | $(SETUP) -uninstall $(UNINSTALLFLAGS) 23 | 24 | reinstall: setup.data 25 | $(SETUP) -reinstall $(REINSTALLFLAGS) 26 | 27 | clean: 28 | $(SETUP) -clean $(CLEANFLAGS) 29 | 30 | distclean: 31 | $(SETUP) -distclean $(DISTCLEANFLAGS) 32 | 33 | setup.data: 34 | $(SETUP) -configure $(CONFIGUREFLAGS) 35 | 36 | configure: 37 | $(SETUP) -configure $(CONFIGUREFLAGS) 38 | 39 | .PHONY: build doc test all install uninstall reinstall clean distclean configure 40 | 41 | # OASIS_STOP 42 | -------------------------------------------------------------------------------- /nqcc: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | assembly="" 3 | 4 | # output is name of source file w/out extension 5 | out="${1%.*}" 6 | 7 | # unless it's specified with -o 8 | while getopts "ho:" opt; do 9 | case "$opt" in 10 | o ) 11 | out=$OPTARG;; 12 | h ) 13 | echo "USAGE: nqcc [-o output] inputs" 14 | exit 0;; 15 | *) 16 | echo "USAGE: nqcc [-o output] inputs" 17 | exit 1;; 18 | esac 19 | done 20 | shift $(expr $OPTIND - 1 ) 21 | 22 | if [ -z "$1" ] 23 | then 24 | echo "USAGE: nqcc [-o output] inputs" 25 | echo "Must specify at least one input file" 26 | exit 1 27 | fi 28 | 29 | until [ -z "$1" ] 30 | do 31 | `dirname $0`/nqcc.byte $1 #compile to assembly 32 | if [[ $? != 0 ]]; then 33 | exit 1 34 | fi 35 | base="${1%.*}" 36 | assembly=$assembly" "$base".s" 37 | shift 38 | done 39 | 40 | gcc-8 -m32 $assembly -o $out -fno-pie 2>/dev/null 41 | rm $assembly -------------------------------------------------------------------------------- /src/tok.ml: -------------------------------------------------------------------------------- 1 | (* A token in a C progrm *) 2 | type token = 3 | | OpenBrace 4 | | CloseBrace 5 | | OpenParen 6 | | CloseParen 7 | | Comma 8 | | Question 9 | | Semicolon 10 | | Colon 11 | | IntKeyword 12 | | StaticKeyword 13 | | ExternKeyword 14 | | CharKeyword 15 | | ReturnKeyword 16 | | IfKeyword 17 | | ElseKeyword 18 | | ForKeyword 19 | | DoKeyword 20 | | WhileKeyword 21 | | BreakKeyword 22 | | ContinueKeyword 23 | | Bang 24 | | Complement 25 | | Plus 26 | | Minus 27 | | Mult 28 | | Div 29 | | Mod 30 | | Eq (* = *) 31 | | DoubleEq (* == *) 32 | | Neq (* != *) 33 | | Lt (* < *) 34 | | Le (* <= *) 35 | | Gt (* > *) 36 | | Ge (* >= *) 37 | | And (* && *) 38 | | Or (* || *) 39 | | BitAnd (* & *) 40 | | BitOr (* | *) 41 | | Xor (* ^ *) 42 | | ShiftLeft (* << *) 43 | | ShiftRight (* >> *) 44 | | Int of int 45 | | Char of char 46 | | Id of string 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NQCC - a not-quite-C compiler 2 | 3 | **This repo is no longer active. I'm working on a new version of NQCC - I'll link to it here once it's released!** 4 | 5 | A compiler for a tiny (but growing!) subset of C, written in OCaml. 6 | 7 | ## Requirements 8 | * Install OCaml and OPAM 9 | * Configure OPAM 10 | ```` 11 | opam init 12 | ```` 13 | 14 | * Install dependencies: 15 | ``` 16 | opam install oasis 17 | opam install batteries 18 | opam install ounit # only needed for tests 19 | ```` 20 | 21 | ## Usage 22 | * Build it 23 | ``` 24 | make 25 | ```` 26 | 27 | * To run it, invoke the `nqcc` script in the project root 28 | ``` 29 | ./nqcc /path/to/source.c 30 | ``` 31 | The compiled executable will be in the same directory as the source file, and have the same name (e.g. `source` in the example above). 32 | 33 | ## Tests 34 | ``` 35 | ocaml setup.ml -configure --enable-tests # you only need to run this once 36 | make test # run unit tests 37 | ./test_examples.sh # compile sample programs 38 | ```` 39 | 40 | You can also test against [this test suite](https://github.com/nlsandler/write_a_c_compiler). 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Nora Sandler 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /src/ast.ml: -------------------------------------------------------------------------------- 1 | (* Data types representing an abstract syntax tree *) 2 | type const = 3 | | Int of int 4 | | Char of char 5 | | String of string 6 | 7 | type type_def = 8 | | IntType 9 | | CharType 10 | 11 | type binop = 12 | | Add 13 | | Sub 14 | | Mult 15 | | Div 16 | | Mod 17 | | Lt 18 | | Gt 19 | | Le 20 | | Ge 21 | | Neq 22 | | Eq 23 | | And 24 | | Or 25 | | BitAnd 26 | | BitOr 27 | | Xor 28 | | ShiftL 29 | | ShiftR 30 | 31 | type assign_op = 32 | | Equals (* = *) 33 | 34 | type unop = Negate | Pos | Complement | Not 35 | 36 | type id = ID of string 37 | 38 | type exp = 39 | | Const of const 40 | | Var of id 41 | | UnOp of unop * exp 42 | | BinOp of binop * exp * exp 43 | | TernOp of exp * exp * exp 44 | | Assign of assign_op * id * exp 45 | | FunCall of id * exp list 46 | 47 | type storage_class = Static | Extern | Nothing 48 | type declaration = 49 | { var_type: type_def; 50 | var_name: id; 51 | init: exp option; 52 | storage_class: storage_class; 53 | } 54 | 55 | type block_item = 56 | | Statement of statement 57 | | Decl of declaration 58 | 59 | and block = block_item list 60 | 61 | and statement = 62 | | Block of block 63 | | If of {cond: exp; if_body: statement; else_body: statement option} 64 | | Exp of exp option 65 | | For of {init: exp option; cond: exp; post: exp option; body: statement} 66 | | ForDecl of {init: declaration; cond: exp; post: exp option; body: statement} 67 | | While of {cond: exp; body: statement} 68 | | DoWhile of {body: statement; cond: exp} 69 | | ReturnVal of exp (* should we add a return_exp instead? *) 70 | | Break 71 | | Continue 72 | 73 | type fun_param = Param of type_def * id 74 | 75 | type fun_declaration = 76 | { fun_type: type_def; 77 | name: id; 78 | storage_class: storage_class; 79 | params: fun_param list; 80 | body: block option; 81 | } 82 | 83 | type top_level = 84 | | Function of fun_declaration 85 | | GlobalVar of declaration 86 | 87 | type prog = Prog of top_level list 88 | -------------------------------------------------------------------------------- /setup.ml: -------------------------------------------------------------------------------- 1 | (* setup.ml generated for the first time by OASIS v0.4.10 *) 2 | 3 | (* OASIS_START *) 4 | (* DO NOT EDIT (digest: a426e2d026defb34183b787d31fbdcff) *) 5 | (******************************************************************************) 6 | (* OASIS: architecture for building OCaml libraries and applications *) 7 | (* *) 8 | (* Copyright (C) 2011-2016, Sylvain Le Gall *) 9 | (* Copyright (C) 2008-2011, OCamlCore SARL *) 10 | (* *) 11 | (* This library is free software; you can redistribute it and/or modify it *) 12 | (* under the terms of the GNU Lesser General Public License as published by *) 13 | (* the Free Software Foundation; either version 2.1 of the License, or (at *) 14 | (* your option) any later version, with the OCaml static compilation *) 15 | (* exception. *) 16 | (* *) 17 | (* This library is distributed in the hope that it will be useful, but *) 18 | (* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *) 19 | (* or FITNESS FOR A PARTICULAR PURPOSE. See the file COPYING for more *) 20 | (* details. *) 21 | (* *) 22 | (* You should have received a copy of the GNU Lesser General Public License *) 23 | (* along with this library; if not, write to the Free Software Foundation, *) 24 | (* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) 25 | (******************************************************************************) 26 | 27 | let () = 28 | try 29 | Topdirs.dir_directory (Sys.getenv "OCAML_TOPLEVEL_PATH") 30 | with Not_found -> () 31 | ;; 32 | #use "topfind";; 33 | #require "oasis.dynrun";; 34 | open OASISDynRun;; 35 | 36 | let setup_t = BaseCompat.Compat_0_4.adapt_setup_t setup_t 37 | open BaseCompat.Compat_0_4 38 | (* OASIS_STOP *) 39 | let () = setup ();; 40 | -------------------------------------------------------------------------------- /src/context.ml: -------------------------------------------------------------------------------- 1 | open Batteries 2 | 3 | type linkage = 4 | | External 5 | | Internal 6 | | Nothing 7 | 8 | type initial = 9 | | Final of int 10 | | Tentative (* defaults to 0 *) 11 | | NoDef 12 | 13 | type fun_decl = int * linkage * bool 14 | 15 | type symbol = 16 | | Stack of int (* local var - offset on stack *) 17 | | Heap of string (* global or static var - label *) 18 | | Fun of int (* number of params *) 19 | 20 | type decl = 21 | | HeapDecl of linkage * initial 22 | (* number of params, linkage, whether defined yet *) 23 | | FunDecl of int * linkage * bool 24 | 25 | (* A record for all the stuff we have to pass around during code generation *) 26 | type t = { 27 | (* map variable name to address *) 28 | var_map : (string, symbol) Map.t; 29 | (* map memory location label to declaration info for vars on heap *) 30 | var_decl_map : (string, decl) Map.t; 31 | current_scope : string Set.t; 32 | stack_index : int; 33 | break_label : string option; 34 | continue_label : string option 35 | } 36 | 37 | let empty = 38 | { var_decl_map=Map.empty; 39 | var_map=Map.empty; 40 | (* not used at global scope *) 41 | current_scope=Set.empty; 42 | stack_index=0; 43 | break_label=None; 44 | continue_label=None; 45 | } 46 | 47 | (* Add function arguments to initial context with just global variables *) 48 | let init_for_fun { var_map } params = 49 | (* arguments are just below return address, which is just below EBP, so first arg at ebp + 8 50 | *) 51 | let add_param (m, si, sc) (Ast.Param (_, ID id)) = 52 | Map.add id (Stack si) m, si + 4, Set.add id sc 53 | in 54 | let var_map', _, scope = 55 | List.fold_left 56 | add_param 57 | (var_map, 8, Set.empty) 58 | params 59 | in 60 | { var_decl_map=Map.empty; 61 | var_map=var_map'; 62 | current_scope=scope; 63 | (* stack index, i.e. offset of thing after ESP from EBP, is 4 *) 64 | stack_index=(-4); 65 | break_label=None; 66 | continue_label=None 67 | } 68 | 69 | let already_defined { current_scope; } id = Set.mem id current_scope 70 | let reset_scope context = { context with current_scope=Set.empty } 71 | 72 | let get_init on_err = function 73 | | None -> Tentative 74 | | Some (Ast.Const c) -> 75 | begin 76 | match c with 77 | | Ast.Int i -> Final i 78 | | _ -> on_err "non-int initializers not implemented" 79 | end 80 | | _ -> on_err "non-constant initializer" 81 | 82 | (* TODO: prob doesn't belong here since gen.ml uses it *) 83 | let get_var_init on_err Ast.({ init; storage_class; }) = 84 | match (get_init on_err init) with 85 | | Tentative -> 86 | if storage_class = Ast.Extern 87 | then NoDef 88 | else Tentative 89 | | x -> x 90 | 91 | let finalize_init = function 92 | | Tentative -> Final 0 93 | | a -> a 94 | 95 | let add_local_extern_var ({ var_map; current_scope; } as context) id = 96 | let var_label = "_"^id in 97 | { context with 98 | var_map=Map.add id (Heap var_label) var_map; 99 | current_scope=Set.add id current_scope; 100 | } 101 | 102 | let add_global_var ({ var_map; var_decl_map } as context) Ast.({ var_name=ID id }) decl = 103 | let label = "_"^id in 104 | { context with 105 | var_map=Map.add id (Heap label) var_map; 106 | var_decl_map=Map.add label decl var_decl_map 107 | } 108 | 109 | let add_function ({ var_map; var_decl_map } as context) Ast.({ params; name=ID id}) decl = 110 | let label = "_"^id in 111 | { context with 112 | var_map=Map.add id (Fun (List.length params)) var_map; 113 | var_decl_map=Map.add label decl var_decl_map 114 | } 115 | 116 | let add_static_local_var on_err ({ var_map; var_decl_map; current_scope; } as context) id init = 117 | let label = Util.unique_id id in 118 | let init' = finalize_init (get_init on_err init) in 119 | { context with 120 | var_map=Map.add id (Heap label) var_map; 121 | var_decl_map=Map.add label (HeapDecl (Internal, init')) var_decl_map; 122 | current_scope=Set.add id current_scope; 123 | } 124 | 125 | let add_local_var ({ var_map; current_scope; stack_index; } as context) id = 126 | let stack_index', addr = stack_index - 4, Stack stack_index in 127 | { context with 128 | var_map=Map.add id addr var_map; 129 | current_scope=Set.add id current_scope; 130 | stack_index=stack_index'; 131 | } 132 | 133 | let var_lookup err_handler { var_map } var = 134 | try 135 | Map.find var var_map 136 | with 137 | | Not_found -> err_handler (Printf.sprintf "undeclared identifier %s" var) 138 | 139 | let opt_decl_lookup { var_decl_map } (Ast.ID id) = 140 | try 141 | Some (Map.find ("_"^id) var_decl_map) 142 | with 143 | | Not_found -> None 144 | -------------------------------------------------------------------------------- /src/pprint.ml: -------------------------------------------------------------------------------- 1 | open Ast 2 | 3 | let type_to_string fun_type = 4 | match fun_type with 5 | | IntType -> "INT" 6 | | CharType -> "CHAR" 7 | 8 | let id_to_string (ID id) = id 9 | 10 | let param_to_string (Param (param_type, param_id)) = Printf.sprintf "%s %s" (type_to_string param_type) (id_to_string param_id) 11 | 12 | let pprint_function_decl fun_type fun_id = 13 | Printf.printf "FUN %s %s:\n" (type_to_string fun_type) (id_to_string fun_id) 14 | 15 | let pprint_function_params params = 16 | let param_strings = List.map param_to_string params in 17 | let param_list_string = String.concat ", " param_strings in 18 | Printf.printf "\tparams: (%s)\n" param_list_string 19 | 20 | let const_to_string = function 21 | | Int i -> Printf.sprintf "Int<%d>" i 22 | | Char c -> Printf.sprintf "Char<%c>" c 23 | | String s -> Printf.sprintf "String<%s>" s 24 | 25 | let op_to_string = function 26 | | Add -> "+" 27 | | Sub -> "-" 28 | | Mult -> "*" 29 | | Div -> "/" 30 | | Mod -> "%" 31 | | Lt -> "<" 32 | | Le -> "<=" 33 | | Gt -> ">" 34 | | Ge -> ">=" 35 | | Neq -> "!=" 36 | | Eq -> "==" 37 | | And -> "&&" 38 | | Or -> "||" 39 | | BitAnd -> "&" 40 | | BitOr -> "|" 41 | | Xor -> "^" 42 | | ShiftL -> "<<" 43 | | ShiftR -> ">>" 44 | 45 | let unop_to_string = function 46 | | Negate -> "-" 47 | | Pos -> "+" 48 | | Complement -> "~" 49 | | Not -> "!" 50 | 51 | let assign_op_to_string = function 52 | | Equals -> "=" 53 | 54 | let rec exp_to_string = function 55 | | Var (ID v) -> Printf.sprintf "VAR<%s>" v 56 | | Const c -> const_to_string c 57 | | TernOp (e1, e2, e3) -> Printf.sprintf "%s ? %s : %s" (exp_to_string e1) (exp_to_string e2) (exp_to_string e3) 58 | | BinOp (op, e1, e2) -> Printf.sprintf "(%s %s %s)" (exp_to_string e1) (op_to_string op) (exp_to_string e2) 59 | | UnOp (op, e) -> Printf.sprintf "(%s %s)" (unop_to_string op) (exp_to_string e) 60 | | FunCall (fun_id, args) -> Printf.sprintf "%s(%s)" (id_to_string fun_id) (args_to_string args) 61 | | Assign (op, ID var_name, rhs) -> 62 | Printf.sprintf "%s %s %s" var_name (assign_op_to_string op) (exp_to_string rhs) 63 | 64 | and args_to_string args = 65 | let arg_strings = List.map exp_to_string args in 66 | String.concat ", " arg_strings 67 | 68 | let optional_exp_to_string = function 69 | | Some e -> exp_to_string e 70 | | None -> "" 71 | 72 | let decl_to_string { var_type; var_name=ID id; init } = 73 | let decl_str = Printf.sprintf "%s %s" (type_to_string var_type) id in 74 | match init with 75 | | None -> decl_str 76 | | Some e -> Printf.sprintf "%s = %s" decl_str (exp_to_string e) 77 | 78 | let pprint_decl indent decl = Printf.printf "%s%s\n" indent (decl_to_string decl) 79 | 80 | let rec pprint_block_item indent = function 81 | | Decl d -> pprint_decl indent d 82 | | Statement s -> pprint_stmt indent s 83 | 84 | and pprint_stmt indent = function 85 | | ReturnVal e -> Printf.printf "%sRETURN %s\n" indent (exp_to_string e) 86 | | Block block_items -> List.iter (fun item -> pprint_block_item indent item) block_items 87 | | If { cond; if_body; else_body } -> 88 | Printf.printf "%sIF (%s)\n" indent (exp_to_string cond); 89 | pprint_stmt (indent^"\t") if_body; 90 | begin match else_body with 91 | | Some statement -> 92 | Printf.printf "%sELSE\n" indent; 93 | pprint_stmt (indent^"\t") statement; 94 | Printf.printf "\n" 95 | | None -> Printf.printf "\n" 96 | end 97 | | For { init; cond; post; body } -> 98 | Printf.printf "%sFOR (%s ; %s ; %s)\n" indent 99 | (optional_exp_to_string init) 100 | (exp_to_string cond) 101 | (optional_exp_to_string post); 102 | pprint_stmt (indent^"\t") body; 103 | Printf.printf "\n" 104 | | ForDecl { init; cond; post; body } -> 105 | Printf.printf "%sFOR (%s ; %s ; %s)\n" indent 106 | (decl_to_string init) (exp_to_string cond) (optional_exp_to_string post); 107 | pprint_stmt (indent^"\t") body; 108 | Printf.printf "\n" 109 | | Exp e -> Printf.printf "%s%s\n" indent (optional_exp_to_string e) 110 | | While { cond; body } -> 111 | Printf.printf "%sWHILE (%s):\n" (exp_to_string cond) indent; 112 | pprint_stmt (indent^"\t") body; 113 | Printf.printf "\n" 114 | | DoWhile { body; cond } -> 115 | Printf.printf "%sDO:\n" indent; 116 | pprint_stmt (indent^"\t") body; 117 | Printf.printf "%sWHILE (%s)\n" (exp_to_string cond) indent 118 | | Break -> Printf.printf "%sBREAK\n" indent 119 | | Continue -> Printf.printf "%sCONTINUE\n" indent 120 | 121 | let pprint_function_body body = 122 | print_string "\tbody:\n"; 123 | List.map (fun item -> pprint_block_item "\t\t" item) body 124 | 125 | let pprint_function (FunDecl { fun_type; name; params; body }) = 126 | let _ = pprint_function_decl fun_type name in 127 | let _ = pprint_function_params params in 128 | match body with 129 | | Some body -> 130 | begin 131 | pprint_function_body body; 132 | () 133 | end 134 | | None -> () 135 | 136 | let pprint (Prog funs) = List.iter pprint_function funs 137 | -------------------------------------------------------------------------------- /src/lex.ml: -------------------------------------------------------------------------------- 1 | open Batteries 2 | 3 | (* 4 | int_regexp: (-?([0-9]+)|(0x[0-9a-f]+))(\b.* ) 5 | in other words: 6 | -one or more digits, OR 7 | -0x followed by one or more hex digits (0-9, a-f) 8 | -everything else 9 | all case insensitive 10 | *) 11 | let int_regexp = Str.regexp_case_fold "\\(\\([0-9]+\\)\\|\\(0x[0-9a-f]+\\)\\)\\(\\b.*\\)" 12 | (* 13 | id_regexp: ([A-Za-z][A-Za-z0-9_]* )(\b.* ) 14 | *) 15 | let id_regexp = Str.regexp "\\([A-Za-z_][A-Za-z0-9_]*\\)\\(\\b.*\\)" 16 | (* 17 | char_regexp: ('[^\\]|\\([abfenrtv'?]|[0-7]{1,3}|[0-9a-f]{1,3})')(.* ) 18 | in other words: 19 | -a single quote 20 | -one character that isn't a backslash or single quote, OR 21 | -a backslash followed by: 22 | -a, b, f, e, n, r, t, v, backslash, question mark, single quote or double quote (escape sequences), OR 23 | -1, 2, or 3 digits between 0 and 7 (octal representation) 24 | -h, followed by 1, 2, or 3 digits between 0-9, a-f (hex representation) 25 | -a single closing quote 26 | -everything else 27 | *) 28 | let char_regexp = Str.regexp "'\\([^'\\\\]\\|\\\\\\([abfenrtv'\"?\\\\]\\|[0-7]+\\|x[0-9a-fA-F]+\\)\\)'\\(.*\\)" 29 | 30 | let get_char char_token = 31 | match String.length char_token with 32 | | 1 -> String.get char_token 0 (* a single character *) 33 | | 2 -> (* escape sequence *) 34 | begin match char_token with 35 | | "\\\\" -> Char.chr 92 (* backslash *) 36 | | "\\a" -> Char.chr 7 (* bell *) 37 | | "\\b" -> '\b' (* backspace *) 38 | | "\\e" -> Char.chr 27 (* esc *) 39 | | "\\f" -> Char.chr 12 (* form feed *) 40 | | "\\n" -> '\n' (* newline *) 41 | | "\\r" -> '\r' (* carriage return *) 42 | | "\\t" -> '\t' (* tab *) 43 | | "\\v" -> Char.chr 11 (* vertical tab *) 44 | | "\\\'" -> Char.chr 39 (* single quote *) 45 | | "\\\"" -> '"' (* double quote *) 46 | | "\\?" -> '?' (* question mark *) 47 | | _ -> failwith ("Unknown escape sequence "^char_token) 48 | end 49 | | _ -> (* different prefix for hex or octal *) 50 | let prefix = if String.get char_token 1 = 'x' then "0" else "0o" in 51 | let num_str = prefix^(String.slice ~first:1 char_token) in 52 | Char.chr (Int.of_string num_str) 53 | 54 | let get_int int_token = 55 | if (String.get int_token 0 = '0') && 56 | String.length int_token > 1 && 57 | (Char.lowercase (String.get int_token 1) <> 'x') 58 | then int_of_string ("0o"^int_token) (* octal *) 59 | else int_of_string int_token (* hex or decimal *) 60 | 61 | let get_id t = 62 | let open Tok in 63 | match t with 64 | | "return" -> ReturnKeyword 65 | | "int" -> IntKeyword 66 | | "static" -> StaticKeyword 67 | | "extern" -> ExternKeyword 68 | | "char" -> CharKeyword 69 | | "if" -> IfKeyword 70 | | "else" -> ElseKeyword 71 | | "for" -> ForKeyword 72 | | "do" -> DoKeyword 73 | | "while" -> WhileKeyword 74 | | "break" -> BreakKeyword 75 | | "continue" -> ContinueKeyword 76 | | _ -> Id t 77 | 78 | let lex_complex_token input = 79 | if Str.string_match int_regexp input 0 80 | then (* it's an int *) 81 | let int_token = Str.matched_group 1 input in 82 | let int_val = get_int int_token in 83 | if int_val > Int32.to_int Int32.max_int || int_val < Int32.to_int Int32.min_int 84 | then failwith "Invalid int literal" 85 | else 86 | let rest = Str.matched_group 4 input in 87 | (Tok.Int int_val), rest 88 | else if Str.string_match char_regexp input 0 89 | then (* it's a char *) 90 | let char_token = Str.matched_group 1 input in 91 | let rest = Str.matched_group 3 input in 92 | (Tok.Char (get_char char_token)), rest 93 | else if Str.string_match id_regexp input 0 94 | then (* it's an ID, possibly a keyword *) 95 | let id_token_str = Str.matched_group 1 input in 96 | let rest = Str.matched_group 2 input in 97 | let id_token = get_id id_token_str in 98 | id_token, rest 99 | else 100 | failwith ("Syntax error: \""^input^ "\" is not valid.") 101 | 102 | let rec lex_const_or_id input_toks = 103 | let input = String.trim (String.implode input_toks) in 104 | let tok, rest = lex_complex_token input in 105 | tok::(lex_rest (String.explode rest)) 106 | 107 | and lex_rest words = 108 | let open Tok in 109 | match words with 110 | | [] -> [] 111 | | '{'::rest -> OpenBrace::(lex_rest rest) 112 | | '}'::rest -> CloseBrace::(lex_rest rest) 113 | | '('::rest -> OpenParen::(lex_rest rest) 114 | | ')'::rest -> CloseParen::(lex_rest rest) 115 | | ';'::rest -> Semicolon::(lex_rest rest) 116 | | ','::rest -> Comma::(lex_rest rest) 117 | | '+'::rest -> Plus::(lex_rest rest) 118 | | '?'::rest -> Question::(lex_rest rest) 119 | | ':'::rest -> Colon::(lex_rest rest) 120 | | '-'::'-'::rest -> failwith "decrement not yet implemented" 121 | | '<'::'<'::rest -> ShiftLeft::(lex_rest rest) 122 | | '>'::'>'::rest -> ShiftRight::(lex_rest rest) 123 | | '!'::'='::rest -> Neq::(lex_rest rest) 124 | | '<'::'='::rest -> Le::(lex_rest rest) 125 | | '>'::'='::rest -> Ge::(lex_rest rest) 126 | | '<'::rest -> Lt::(lex_rest rest) 127 | | '>'::rest -> Gt::(lex_rest rest) 128 | | '-'::rest -> Minus::(lex_rest rest) 129 | | '*'::rest -> Mult::(lex_rest rest) 130 | | '/'::rest -> Div::(lex_rest rest) 131 | | '%'::rest -> Mod::(lex_rest rest) 132 | | '&'::'&'::rest -> And::(lex_rest rest) 133 | | '&'::rest -> BitAnd::(lex_rest rest) 134 | | '|'::'|'::rest -> Or::(lex_rest rest) 135 | | '|'::rest -> BitOr::(lex_rest rest) 136 | | '^'::rest -> Xor::(lex_rest rest) 137 | | '~'::rest -> Complement::(lex_rest rest) 138 | | '!'::rest -> Bang::(lex_rest rest) 139 | | '='::'='::rest -> DoubleEq::(lex_rest rest) 140 | | '='::rest -> Eq::(lex_rest rest) 141 | | c::rest -> 142 | if Char.is_whitespace c then lex_rest rest else lex_const_or_id words 143 | 144 | let lex input = 145 | let input = String.trim input in 146 | lex_rest (String.explode input) 147 | 148 | let tok_to_string t = 149 | let open Tok in 150 | match t with 151 | | OpenBrace -> "{" 152 | | CloseBrace -> "}" 153 | | OpenParen -> "(" 154 | | CloseParen -> ")" 155 | | Semicolon -> ";" 156 | | Colon -> ":" 157 | | Question -> "?" 158 | | Comma -> "," 159 | | Plus -> "+" 160 | | Minus -> "-" 161 | | Mult -> "*" 162 | | Div -> "/" 163 | | Mod -> "%" 164 | | Complement -> "~" 165 | | Bang -> "!" 166 | | Eq -> "=" 167 | | DoubleEq -> "==" 168 | | Neq -> "!=" 169 | | Le -> "<=" 170 | | Ge -> ">=" 171 | | Lt -> "<" 172 | | Gt -> ">" 173 | | And -> "&&" 174 | | Or -> "||" 175 | | BitAnd -> "&" 176 | | BitOr -> "|" 177 | | Xor -> "^" 178 | | ShiftLeft -> "<<" 179 | | ShiftRight -> ">>" 180 | | IntKeyword -> "INT" 181 | | CharKeyword -> "CHAR" 182 | | ReturnKeyword -> "RETURN" 183 | | IfKeyword -> "IF" 184 | | ElseKeyword -> "ELSE" 185 | | ForKeyword -> "FOR" 186 | | DoKeyword -> "DO" 187 | | WhileKeyword -> "WHILE" 188 | | BreakKeyword -> "BREAK" 189 | | ContinueKeyword -> "CONTINUE" 190 | | Int i -> Printf.sprintf "INT<%d>" i 191 | | Id id -> Printf.sprintf "ID<%s>" id 192 | | Char c -> Printf.sprintf "CHAR<%c" c 193 | -------------------------------------------------------------------------------- /tst/test_lex.ml: -------------------------------------------------------------------------------- 1 | open OUnit2 2 | open Tok 3 | 4 | (* create a test to lex a single token *) 5 | let test_lex_single input expected_tok test_ctxt = 6 | let tokenized = Lex.lex input in 7 | assert_equal 1 (List.length tokenized); 8 | assert_equal (expected_tok) (List.hd tokenized) 9 | 10 | let test_lex_multi input expected_toks test_ctxt = 11 | let tokenized = Lex.lex input in 12 | let rec compare_token_lists a b = 13 | match a with 14 | | [] -> assert_equal b [] 15 | | head::tail -> assert_equal (List.hd b) head; compare_token_lists tail (List.tl b) in 16 | compare_token_lists tokenized expected_toks 17 | 18 | let test_expect_failure input fail_msg test_ctxt = 19 | let f = fun () -> Lex.lex input in 20 | assert_raises (Failure fail_msg) f 21 | 22 | let lex_char_tests = 23 | [ 24 | "test_lex_char" >:: test_lex_single "'a'" (Char 'a'); 25 | "test_lex_char_esc" >:: test_lex_single "'\\n'" (Char '\n'); (* escape sequence - newline *) 26 | "test_lex_char_backslash" >:: test_lex_single "'\\\\'" (Char '\\'); (* escape sequence - backslash *) 27 | "test_lex_char_hex" >:: test_lex_single "'\\x61'" (Char 'a'); (* hex *) 28 | "test_lex_char_octal" >:: test_lex_single "'\\141'" (Char 'a'); (* octal *) 29 | "test_lex_char_too_long" >:: test_expect_failure "'aa'" "Syntax error: \"'aa'\" is not valid."; 30 | ] 31 | 32 | let lex_int_tests = 33 | [ 34 | "test_lex_int" >:: test_lex_single "3" (Int 3); 35 | "test_lex_zero" >:: test_lex_single "0" (Int 0); 36 | "test_lex_int_max" >:: test_lex_single "2147483647" (Int 2147483647); 37 | "test_lex_int_overflow" >:: test_expect_failure "2147483648" "Invalid int literal"; 38 | "test_lex_int_underflow" >:: test_expect_failure "-2147483649" "Invalid int literal"; 39 | "test_lex_int_single_digit_hex" >:: test_lex_single "0xf" (Int 0xf); 40 | "test_lex_int_multi_digit_hex" >:: test_lex_single "0xaf42" (Int 0xaf42); 41 | "test_lex_int_octal" >:: test_lex_single "0666" (Int 0o666); 42 | "test_lex_int_octal_overflow" >:: test_expect_failure "020000000000" "Invalid int literal"; 43 | "test_lex_int_hex_overflow" >:: test_expect_failure "0x80000000" "Invalid int literal"; 44 | ] 45 | 46 | let lex_keyword_tests = 47 | [ 48 | "test_lex_return_keyword" >:: test_lex_single "return" ReturnKeyword; 49 | "test_lex_char_keyword" >:: test_lex_single "char" CharKeyword; 50 | "test_lex_int_keyword" >:: test_lex_single "int" IntKeyword; 51 | "test_lex_if_keyword" >:: test_lex_single "if" IfKeyword; 52 | "test_lex_else_keyword" >:: test_lex_single "else" ElseKeyword; 53 | "test_lex_for_keyword" >:: test_lex_single "for" ForKeyword; 54 | "test_lex_while_keyword" >:: test_lex_single "while" WhileKeyword; 55 | "test_lex_do_keyword" >:: test_lex_single "do" DoKeyword; 56 | ] 57 | 58 | let lex_punctuation_tests = [ 59 | "test_lex_openbrace" >:: test_lex_single "{" OpenBrace; 60 | "test_lex_closebrace" >:: test_lex_single "}" CloseBrace; 61 | "test_lex_openparen" >:: test_lex_single "(" OpenParen; 62 | "test_lex_closeparen" >:: test_lex_single ")" CloseParen; 63 | "test_lex_semicolon" >:: test_lex_single ";" Semicolon; 64 | "test_lex_comma" >:: test_lex_single "," Comma; 65 | "test_lex_plus" >:: test_lex_single "+" Plus; 66 | "test_lex_minus" >:: test_lex_single "-" Minus; 67 | "test_lex_product" >:: test_lex_single "*" Mult; 68 | "test_lex_divide" >:: test_lex_single "/" Div; 69 | "test_lex_modulo" >:: test_lex_single "%" Mod; 70 | "test_lex_complement" >:: test_lex_single "~" Complement; 71 | "test_lex_bang" >:: test_lex_single "!" Bang; 72 | "test_lex_eq" >:: test_lex_single "=" Eq; 73 | "test_lex_double_eq" >:: test_lex_single "==" DoubleEq; 74 | "test_lex_neq" >:: test_lex_single "!=" Neq; 75 | "test_lex_gt" >:: test_lex_single ">" Gt; 76 | "test_lex_ge" >:: test_lex_single ">=" Ge; 77 | "test_lex_lt" >:: test_lex_single "<" Lt; 78 | "test_lex_le" >:: test_lex_single "<=" Le; 79 | "test_lex_and" >:: test_lex_single "&&" And; 80 | "test_lex_or" >:: test_lex_single "||" Or; 81 | "test_lex_bitwise_and" >:: test_lex_single "&" BitAnd; 82 | "test_lex_bitwise_or" >:: test_lex_single "|" BitOr; 83 | "test_lex_xor" >:: test_lex_single "^" Xor; 84 | "test_lex_shiftl" >:: test_lex_single "<<" ShiftLeft; 85 | "test_lex_shiftr" >:: test_lex_single ">>" ShiftRight; 86 | "test_lex_question" >:: test_lex_single "?" Question; 87 | "test_lex_colon" >:: test_lex_single ":" Colon 88 | ] 89 | 90 | let lex_id_tests = [ 91 | "test_lex_id_simple" >:: test_lex_single "hello" (Id "hello"); 92 | "test_lex_id_underscore" >:: test_lex_single "_hell_o" (Id "_hell_o"); 93 | "test_lex_id_uppercase" >:: test_lex_single "HELLO" (Id "HELLO"); 94 | "test_lex_id_uppercase_return" >:: test_lex_single "RETURN" (Id "RETURN"); 95 | "test_lex_id_uppercase_char" >:: test_lex_single "CHAR" (Id "CHAR"); 96 | "test_lex_id_uppercase_int" >:: test_lex_single "INT" (Id "INT"); 97 | "test_lex_id_numeric" >:: test_lex_single "a123" (Id "a123"); 98 | "test_lex_id_numeric_start" >:: test_expect_failure "123a" "Syntax error: \"123a\" is not valid."; 99 | "test_lex_id_at" >:: test_expect_failure "abc@def" "Syntax error: \"@def\" is not valid."; 100 | "test_lex_id_money" >:: test_expect_failure "abc$def" "Syntax error: \"$def\" is not valid."; 101 | "test_lex_id_hash" >:: test_expect_failure "abc#def" "Syntax error: \"#def\" is not valid."; 102 | ] 103 | 104 | (* NOTE: we do NOT test newline handling because all newlines are removed when we read in the input file *) 105 | 106 | let lex_whitespace_tests = [ 107 | "test_leading_whitespace" >:: test_lex_single " foo" (Id "foo"); 108 | "test_leading_tab" >:: test_lex_single "\t foo" (Id "foo"); 109 | "test_trailing_whitespace" >:: test_lex_single "123 " (Int 123); 110 | "test_trailing_tab" >:: test_lex_single "0x81\t" (Int 0x81); 111 | ] 112 | 113 | let lex_multi_tests = [ 114 | "test_lex_negative" >:: test_lex_multi "-1" [Minus; Int 1]; 115 | "test_lex_positive" >:: test_lex_multi "+1" [Plus; Int 1]; 116 | "test_lex_brace_id" >:: test_lex_multi "}foo" [CloseBrace; Id "foo"]; 117 | "test_lex_brace_id_whitespace" >:: test_lex_multi "} bar" [CloseBrace; Id "bar"]; 118 | "test_lex_multi_semicolon" >:: test_lex_multi "bar;34" [Id "bar"; Semicolon; Int 34]; 119 | "test_lex_kw_space" >:: test_lex_multi "return 2" [ReturnKeyword; Int 2]; 120 | "test_lex_kw_nospace" >:: test_lex_multi "return2" [Id "return2"]; 121 | "test_lex_tab" >:: test_lex_multi "int\tmain" [IntKeyword; Id "main"]; 122 | "test_lex_some_spaces" >:: test_lex_multi "under_score main( a char\t ){ return}; ;\tf;oo" 123 | [Id "under_score"; Id "main"; OpenParen; Id "a"; CharKeyword; CloseParen; 124 | OpenBrace; ReturnKeyword; CloseBrace; Semicolon; Semicolon; Id "f"; 125 | Semicolon; Id "oo"]; 126 | "test_lex_addition" >:: test_lex_multi "a+b" [Id "a"; Plus; Id "b"]; 127 | "test_lex_addition_spaces" >:: test_lex_multi "1 + b" [Int 1; Plus; Id "b"]; 128 | "test_lex_addition_parens" >:: test_lex_multi "3 +('c'+ b)" [Int 3; Plus; OpenParen; Char 'c'; Plus; Id "b"; CloseParen]; 129 | "test_lex_subtraction" >:: test_lex_multi "4 - 'a'" [Int 4; Minus; Char 'a']; 130 | "test_lex_multiplication" >:: test_lex_multi "2*2" [Int 2; Mult; Int 2]; 131 | "test_lex_division" >:: test_lex_multi "2/2" [Int 2; Div; Int 2]; 132 | "test_lex_complement" >:: test_lex_multi "~5" [Complement; Int 5]; 133 | "test_lex_compl_var" >:: test_lex_multi "~var" [Complement; Id "var"]; 134 | "test_lex_bang" >:: test_lex_multi "!foo" [Bang; Id "foo"]; 135 | "test_lex_assignment" >:: test_lex_multi "a=5" [Id "a"; Eq; Int 5]; 136 | "test_lex_declaration" >:: test_lex_multi "int somevar;" [IntKeyword; Id "somevar"; Semicolon]; 137 | "test_lex_equals" >:: test_lex_multi "a == 4" [Id "a"; DoubleEq; Int 4]; 138 | "test_lex_not_equal" >:: test_lex_multi "a != 4" [Id "a"; Neq; Int 4]; 139 | ] 140 | 141 | let lex_tests = lex_char_tests@lex_int_tests@lex_keyword_tests@lex_punctuation_tests@lex_id_tests@lex_whitespace_tests@lex_multi_tests 142 | -------------------------------------------------------------------------------- /src/parse.ml: -------------------------------------------------------------------------------- 1 | open Batteries 2 | 3 | let tok_to_const = function 4 | | Tok.Int i -> Ast.(Const (Int i)) 5 | | Tok.Char c -> Ast.(Const (Char c)) 6 | | _ -> failwith "Not a constant" 7 | 8 | (* A map from a token to its corresponding Ast.binop. 9 | * Note that some tokens also correspond to other, non-binop 10 | * AST elements (e.g. Tok.Minus can also be parsed as unary minus), 11 | * so only use this map when we're expecting a binop *) 12 | let op_map = 13 | Map.empty 14 | |> Map.add Tok.Plus Ast.Add 15 | |> Map.add Tok.Minus Ast.Sub 16 | |> Map.add Tok.Mult Ast.Mult 17 | |> Map.add Tok.Div Ast.Div 18 | |> Map.add Tok.Mod Ast.Mod 19 | |> Map.add Tok.Lt Ast.Lt 20 | |> Map.add Tok.Le Ast.Le 21 | |> Map.add Tok.Gt Ast.Gt 22 | |> Map.add Tok.Ge Ast.Ge 23 | |> Map.add Tok.DoubleEq Ast.Eq 24 | |> Map.add Tok.Neq Ast.Neq 25 | |> Map.add Tok.And Ast.And 26 | |> Map.add Tok.Or Ast.Or 27 | |> Map.add Tok.ShiftLeft Ast.ShiftL 28 | |> Map.add Tok.ShiftRight Ast.ShiftR 29 | |> Map.add Tok.BitAnd Ast.BitAnd 30 | |> Map.add Tok.BitOr Ast.BitOr 31 | |> Map.add Tok.Xor Ast.Xor 32 | 33 | (** 34 | * Parse one production rule for a binary operation. 35 | * One production rule in the grammar corresponds to a particular precedence level. 36 | * Parameters: 37 | * - parse_next_level is a function to parse expressions 38 | * at the next highest precedence level. 39 | * - op_toks is the list of tokens that should be parsed at this precedence level. 40 | * - toks is the tokenized source code to parse 41 | * For example, to parse the production rule for addition and subtraction: 42 | * ::= { ("+" | "-") } 43 | * We would invoke this function as follows: 44 | * parse_bin_exp parse_term [Tok.Plus; Tok.Minus] toks 45 | **) 46 | let parse_expression toks = 47 | let parse_bin_exp parse_next_level op_toks toks = 48 | (* call parse_next_level to parse first sub-expression *) 49 | let left, rest = parse_next_level toks in 50 | let rec add_terms left_exp toks = 51 | let hd_tok = List.hd toks in 52 | if (List.mem hd_tok op_toks) then 53 | (* The first token after left_exp is an operator we care about, 54 | * so there must be another sub-expression *) 55 | 56 | (* Parse the next sub-expression *) 57 | let right_exp, rest = parse_next_level (List.tl toks) in 58 | let bin_op = Map.find hd_tok op_map in 59 | (* Construct BinOp AST node BEFORE parsing additional sub-expressions. 60 | * This enforces left-associativity. *) 61 | let left_exp = Ast.BinOp (bin_op, left_exp, right_exp) in 62 | (* Try to parse more sub-expressions *) 63 | add_terms left_exp rest 64 | else 65 | (* No more sub-expressions to add, return current exp *) 66 | left_exp, toks 67 | in 68 | (* try adding more subexpressions *) 69 | add_terms left rest 70 | in 71 | 72 | let rec parse_function_call = function 73 | | Tok.(Id name::OpenParen::arg_tokens) -> 74 | let fun_name = Ast.ID(name) in 75 | let args, rest = parse_function_arguments arg_tokens in 76 | Ast.FunCall(fun_name, args), rest 77 | | _ -> failwith "Shouldn't have called parse_function_call, this isn't a function call" 78 | 79 | and parse_function_arguments = function 80 | | Tok.CloseParen::rest -> [], rest 81 | | toks -> 82 | let arg, rest = parse_exp toks in 83 | let args, rest = 84 | match rest with 85 | | Tok.Comma::more_args -> parse_function_arguments more_args 86 | | Tok.CloseParen::after_fun_call -> [], after_fun_call 87 | | _ -> failwith "Invalid list of function arguments" 88 | in 89 | arg::args, rest 90 | 91 | and parse_factor toks = 92 | let open Tok in 93 | match toks with 94 | | OpenParen::factor -> 95 | let exp, after_exp = parse_exp factor in 96 | begin 97 | match after_exp with 98 | | CloseParen::rest -> (exp, rest) 99 | | _ -> failwith "Syntax error: expected close paren" 100 | end 101 | | Minus::factor -> 102 | let num, rest = parse_factor factor in 103 | Ast.(UnOp (Negate, num)), rest 104 | | Plus::factor -> 105 | let num, rest = parse_factor factor in 106 | Ast.(UnOp (Pos, num)), rest 107 | | Complement::factor -> 108 | let num, rest = parse_factor factor in 109 | Ast.(UnOp (Complement, num)), rest 110 | | Bang::factor -> 111 | let num, rest = parse_factor factor in 112 | Ast.(UnOp (Not, num)), rest 113 | | Id name::OpenParen::rest -> parse_function_call toks 114 | | Id name::rest -> Ast.(Var (ID name)), rest 115 | | Int i::rest -> Ast.(Const (Int i)), rest 116 | | Char c::rest -> Ast.(Const (Char c)), rest 117 | | rest -> failwith "Failed to parse factor" 118 | 119 | and parse_term toks = let open Tok in parse_bin_exp parse_factor [Mult; Div; Mod] toks 120 | 121 | and parse_additive_exp toks = let open Tok in parse_bin_exp parse_term [Plus; Minus] toks 122 | 123 | and parse_shift_exp toks = let open Tok in parse_bin_exp parse_additive_exp [ShiftLeft; ShiftRight] toks 124 | 125 | and parse_relational_exp toks = let open Tok in parse_bin_exp parse_shift_exp [Lt; Le; Gt; Ge] toks 126 | 127 | and parse_equality_exp toks = let open Tok in parse_bin_exp parse_relational_exp [DoubleEq; Neq] toks 128 | 129 | and parse_bitwise_and_exp toks = parse_bin_exp parse_equality_exp [Tok.BitAnd] toks 130 | 131 | and parse_xor_exp toks = parse_bin_exp parse_bitwise_and_exp [Tok.Xor] toks 132 | 133 | and parse_bitwise_or_exp toks = parse_bin_exp parse_xor_exp [Tok.BitOr] toks 134 | 135 | and parse_and_exp toks = parse_bin_exp parse_bitwise_or_exp [Tok.And] toks 136 | 137 | and parse_or_exp toks = parse_bin_exp parse_and_exp [Tok.Or] toks 138 | 139 | and parse_ternary_exp toks = 140 | let exp_1, rest = parse_or_exp toks in 141 | match rest with 142 | | Tok.Question::branch1_tokens -> 143 | let branch1, rest = parse_exp branch1_tokens in 144 | begin 145 | match rest with 146 | | Tok.Colon::branch2_tokens -> 147 | let branch2, rest = parse_ternary_exp branch2_tokens in 148 | Ast.TernOp (exp_1, branch1, branch2), rest 149 | | _ -> failwith "Expected colon after ternary operator" 150 | end 151 | | _ -> exp_1, rest 152 | 153 | and parse_exp = function 154 | | Tok.(Id v::Eq::rest) -> 155 | (* assignment statement *) 156 | let var_id = Ast.ID v in 157 | let exp, rest = parse_exp rest in 158 | Ast.(Assign (Equals, var_id, exp)), rest 159 | | tokens -> parse_ternary_exp tokens 160 | in 161 | parse_exp toks 162 | 163 | let parse_optional_exp next_expected toks = 164 | if (List.hd toks) = next_expected then 165 | None, (List.tl toks) 166 | else 167 | let e, rest = parse_expression toks in 168 | if (List.hd rest = next_expected) then 169 | Some e, (List.tl rest) 170 | else 171 | failwith "Didn't get expected token after exp" 172 | 173 | let parse_rest_of_declaration var_id var_type storage_class tokens = 174 | let init, rest = 175 | match tokens with 176 | | Tok.Semicolon::_ -> None, tokens 177 | | Tok.Eq::rest -> 178 | let exp, rest = parse_expression rest in 179 | Some exp, rest 180 | | _ -> failwith "Invalid initial value for variable" 181 | in 182 | let declaration = Ast.{ 183 | var_type = var_type; 184 | var_name = var_id; 185 | init = init; 186 | storage_class = storage_class; 187 | } 188 | in 189 | begin 190 | match rest with 191 | | Tok.Semicolon::rest -> declaration, rest 192 | | _ -> failwith "Expected semicolon after declaration" 193 | end 194 | 195 | let parse_declaration tokens = 196 | let storage_class, rest = match tokens with 197 | | Tok.StaticKeyword::rest -> Ast.Static, rest 198 | | Tok.ExternKeyword::rest -> Ast.Extern, rest 199 | | _ -> Ast.Nothing, tokens 200 | in 201 | match rest with 202 | | (Tok.IntKeyword::Tok.Id varname::rest) -> parse_rest_of_declaration (Ast.ID varname) Ast.IntType storage_class rest 203 | | _ -> failwith (Printf.sprintf "Unexpected keyword %s" (Lex.tok_to_string (List.hd rest))) 204 | 205 | let parse_function_body toks = 206 | let parse_return_statement stmt = 207 | let exp, rest = parse_expression stmt in 208 | Ast.ReturnVal exp, rest 209 | in 210 | 211 | let rec parse_block = function 212 | | Tok.OpenBrace::more_tokens -> 213 | begin 214 | let block_items, rest = parse_block_item_list more_tokens in 215 | match rest with 216 | | Tok.CloseBrace::rest -> block_items, rest 217 | | _ -> failwith "Expected closing brace at end of block" 218 | end 219 | | _ -> failwith "Expected opening brace at start of block" 220 | 221 | and parse_if_statement = function 222 | | Tok.OpenParen::_ as toks -> 223 | let cond, rest = parse_expression toks in 224 | let if_body, rest = parse_statement rest in 225 | let else_body, rest = 226 | match rest with 227 | | Tok.ElseKeyword::else_tokens -> 228 | let else_body, rest = parse_statement else_tokens in 229 | Some else_body, rest 230 | | _ -> None, rest in 231 | let if_statement = Ast.If { cond; if_body; else_body } in 232 | if_statement, rest 233 | | _ -> failwith "Expected '(' after 'if'" 234 | 235 | and parse_for_components toks = 236 | let cond, rest = parse_optional_exp Tok.Semicolon toks in 237 | let cond = match cond with 238 | (* C11 6.8.5.3 - An omitted expression-2 is replaced by a nonzero constant *) 239 | | None -> Ast.Const (Int 1) 240 | | Some c -> c 241 | in 242 | let post, rest = parse_optional_exp Tok.CloseParen rest in 243 | let body, rest = parse_statement rest in 244 | cond, post, body, rest 245 | 246 | and parse_for_statement = function 247 | | Tok.(OpenParen::((IntKeyword::_) as decl_toks)) -> 248 | (* for loop w/ variable declaration *) 249 | let init, rest = parse_declaration decl_toks in 250 | let cond, post, body, rest = parse_for_components rest in 251 | Ast.ForDecl { init; cond; post; body }, rest 252 | | Tok.OpenParen::toks -> 253 | let init, rest = parse_optional_exp Tok.Semicolon toks in 254 | let cond, post, body, rest = parse_for_components rest in 255 | Ast.For { init; cond; post; body }, rest 256 | | _ -> failwith "PANIC: expected open paren at start of for loop" 257 | 258 | and parse_while_statement toks = 259 | let cond, rest = parse_expression toks in 260 | let body, rest = parse_statement rest in 261 | Ast.While { cond; body }, rest 262 | 263 | and parse_do_while_statement toks = 264 | let body, rest = parse_statement toks in 265 | match rest with 266 | | Tok.WhileKeyword::cond_tokens -> 267 | let cond, rest = parse_expression cond_tokens in 268 | begin 269 | match rest with 270 | | Tok.Semicolon::rest -> Ast.DoWhile { body; cond }, rest 271 | | _ -> failwith "Expected semicolon after do-while" 272 | end 273 | | _ -> failwith "Expected 'while' after body of do-while" 274 | 275 | (* TODO: actually pay attention to types *) 276 | and parse_statement toks = 277 | let open Tok in 278 | match toks with 279 | | OpenBrace::_ -> 280 | let block, rest = parse_block toks in 281 | Block block, rest 282 | | IfKeyword::tokens -> parse_if_statement tokens 283 | | ForKeyword::tokens -> parse_for_statement tokens 284 | | ReturnKeyword::tokens -> 285 | let statement, rest = parse_return_statement tokens in 286 | begin 287 | match rest with 288 | | Semicolon::rest -> statement, rest 289 | | _ -> failwith "Expected semicolon after return statement" 290 | end 291 | | WhileKeyword::tokens -> parse_while_statement tokens 292 | | DoKeyword::tokens -> parse_do_while_statement tokens 293 | | BreakKeyword::Semicolon::rest -> Ast.Break, rest 294 | | BreakKeyword::_ -> failwith "Expected semicolon after break" 295 | | ContinueKeyword::Semicolon::rest -> Ast.Continue, rest 296 | | ContinueKeyword::_ -> failwith "Expected semicolon after continue" 297 | | _ -> 298 | let exp, rest = parse_optional_exp Tok.Semicolon toks in 299 | Ast.Exp exp, rest 300 | 301 | and parse_block_item tokens = 302 | let is_declaration = match tokens with 303 | | Tok.StaticKeyword::_ -> true 304 | | Tok.ExternKeyword::_ -> true 305 | | Tok.IntKeyword::_ -> true 306 | | _ -> false 307 | in 308 | if is_declaration then 309 | let decl, rest = parse_declaration tokens in 310 | Ast.Decl decl, rest 311 | else 312 | let stmt, rest = parse_statement tokens in 313 | Ast.Statement stmt, rest 314 | 315 | and parse_block_item_list tokens = 316 | if (List.hd tokens) == Tok.CloseBrace 317 | then [], tokens 318 | else 319 | let next_statement, rest = parse_block_item tokens in 320 | let statements, rest = parse_block_item_list rest in 321 | next_statement::statements, rest 322 | 323 | in 324 | parse_block toks 325 | 326 | let parse_next_param = function 327 | | Tok.IntKeyword::(Id name)::rest -> 328 | Ast.(Param (IntType, ID name)), rest 329 | | Tok.CharKeyword::(Id name)::rest -> 330 | Ast.(Param (CharType, ID name)), rest 331 | | _ -> failwith "Invalid function parameter" 332 | 333 | let rec parse_fun_params = function 334 | | Tok.CloseParen::rest -> [], rest 335 | | toks -> 336 | let param, rest = parse_next_param toks in 337 | let params, rest = 338 | match rest with 339 | | Tok.Comma::more_params -> parse_fun_params more_params 340 | | CloseParen::after_params -> [], after_params 341 | | _ -> failwith "Invalid list of parameters" 342 | in param::params, rest 343 | 344 | let parse_fun fun_type name storage_class tokens = 345 | (* we've already parsed everything up to open paren *) 346 | let params, rest = parse_fun_params tokens in 347 | let opt_body, rest = 348 | match rest with 349 | | Tok.OpenBrace::_ -> 350 | let body, rest' = parse_function_body rest in 351 | Some body, rest' 352 | | Tok.Semicolon::rest' -> None, rest' 353 | | _ -> failwith "Unexpected token after function declaration" 354 | in 355 | let decl = Ast.Function { fun_type; name; storage_class; params; body=opt_body } in 356 | decl, rest 357 | 358 | let parse_top_level tokens = 359 | let storage_class, after_storage_class = 360 | match tokens with 361 | | Tok.StaticKeyword::toks -> Ast.Static, toks 362 | | Tok.ExternKeyword::toks -> Ast.Extern, toks 363 | | otherwise -> Ast.Nothing, tokens 364 | in 365 | let tl_type, tl_name, after_id = 366 | match after_storage_class with 367 | | Tok.(IntKeyword::Id name::rest) -> Ast.(IntType, ID name, rest) 368 | | Tok.(CharKeyword::Id name::rest) -> Ast.(CharType, ID name, rest) 369 | | _ -> failwith "Parse error in parse_top_level: bad toplevel name or top" 370 | in 371 | let Ast.ID name = tl_name in 372 | match after_id with 373 | | Tok.OpenParen::rest -> parse_fun tl_type tl_name storage_class rest 374 | | other -> 375 | let decl, rest = parse_rest_of_declaration tl_name tl_type storage_class after_id in 376 | (Ast.GlobalVar decl, rest) 377 | 378 | let rec parse_top_levels = function 379 | | [] -> [] (* no functions left to parse *) 380 | | tokens -> 381 | let tl, rest = parse_top_level tokens in 382 | let tls = parse_top_levels rest in 383 | tl::tls 384 | 385 | let parse tokens = Ast.Prog (parse_top_levels tokens) 386 | -------------------------------------------------------------------------------- /src/gen.ml: -------------------------------------------------------------------------------- 1 | open Batteries 2 | 3 | let generate filename prog = 4 | 5 | (* Open assembly file for writing *) 6 | let filename_asm = String.splice filename (-1) 1 "s" in 7 | let chan = open_out filename_asm in 8 | let print_asm = Printf.fprintf chan "%s\n" in 9 | 10 | let handle_error message = begin 11 | Printf.printf "ERROR: %s\n" message; 12 | close_out chan; 13 | Sys.remove filename_asm; 14 | exit 1; 15 | end in 16 | 17 | let var_lookup = Context.var_lookup handle_error in 18 | 19 | let validate_fun_call context fun_name arg_count = 20 | match var_lookup context fun_name with 21 | | Context.Fun i -> 22 | if i <> arg_count then 23 | handle_error "called function with wrong number of arguments" 24 | else () 25 | | _ -> handle_error "tried to call identifier that isn't a function" 26 | in 27 | 28 | let emit_label label = Printf.fprintf chan "%s:\n" label in 29 | 30 | let emit_comparison set_instruction = 31 | begin 32 | print_asm " cmp %ecx, %eax"; 33 | print_asm " movl $0, %eax"; 34 | Printf.fprintf chan " %s %%al\n" set_instruction 35 | end in 36 | 37 | let emit_function_epilogue () = 38 | begin 39 | print_asm " movl %ebp, %esp"; 40 | print_asm " pop %ebp"; 41 | print_asm " ret" 42 | end in 43 | 44 | let emit_bin_op op = 45 | let open Ast in 46 | match op with 47 | | Div -> 48 | (* zero out edx (b/c idivl operand calculates 64-bit value edx:eax / operand) *) 49 | begin 50 | print_asm " xor %edx, %edx"; 51 | print_asm " idivl %ecx"; 52 | end 53 | | Mod -> 54 | (* zero out edx (b/c idivl operand calculates 64-bit value edx:eax / operand) *) 55 | begin 56 | print_asm " xor %edx, %edx"; 57 | print_asm " idivl %ecx"; 58 | (* remainder stored in edx, move it to eax *) 59 | print_asm " movl %edx, %eax" 60 | end 61 | | Sub -> print_asm " subl %ecx, %eax"; 62 | | Add -> print_asm " addl %ecx, %eax"; 63 | | Mult -> print_asm " imul %ecx, %eax"; 64 | | Xor -> print_asm " xor %ecx, %eax"; 65 | | BitAnd -> print_asm " and %ecx, %eax"; 66 | | BitOr -> print_asm " or %ecx, %eax"; 67 | | ShiftL -> print_asm " sall %cl, %eax"; 68 | | ShiftR -> print_asm " sarl %cl, %eax"; 69 | | Eq -> emit_comparison "sete" 70 | | Neq -> emit_comparison "setne" 71 | | Lt -> emit_comparison "setl" 72 | | Le -> emit_comparison "setle" 73 | | Gt -> emit_comparison "setg" 74 | | Ge -> emit_comparison "setge" 75 | | Or -> failwith "Shouldn't handle or here!" 76 | | And -> failwith "Shouldn't handle and here!" 77 | in 78 | (* TODO refactor a lot of these print functions, e.g. align *) 79 | let emit_local_heap_decl lbl = function 80 | | Context.(HeapDecl (_, Final init)) -> 81 | if init = 0 then 82 | begin 83 | print_asm " .bss"; 84 | print_asm " .align 2"; 85 | Printf.fprintf chan "_%s:\n" lbl; 86 | print_asm " .zero 4"; 87 | end 88 | else 89 | begin 90 | print_asm " .data"; 91 | print_asm " .align 2"; 92 | Printf.fprintf chan "_%s:\n" lbl; 93 | Printf.fprintf chan " .long %d\n" init 94 | end 95 | | other -> failwith "tentative definition of local var" 96 | in 97 | 98 | let emit_local_heap_decls = Map.iter emit_local_heap_decl in 99 | 100 | let print_globl_if_extern linkg lbl = 101 | if linkg = Context.External 102 | then Printf.fprintf chan " .globl _%s\n" lbl 103 | else () 104 | in 105 | 106 | let emit_global_heap_decl lbl = function 107 | | Context.HeapDecl (linkg, init) -> 108 | let open Context in 109 | let init' = finalize_init init in 110 | begin 111 | match init' with 112 | | NoDef -> () 113 | | Final 0 -> 114 | if linkg = External then 115 | (* allocate space in .comm - NOTE different on Linux! *) 116 | begin 117 | print_asm " .text"; 118 | Printf.fprintf chan " .comm _%s,4,2\n" lbl 119 | end 120 | else 121 | (* allocate space in bss *) 122 | begin 123 | print_asm " .bss"; 124 | print_asm " .align 2"; 125 | Printf.fprintf chan "_%s:\n" lbl; 126 | print_asm " .zero 4" 127 | end 128 | | Final i -> 129 | begin 130 | print_globl_if_extern linkg lbl; 131 | print_asm " .data"; 132 | print_asm " .align 2"; 133 | Printf.fprintf chan "_%s:\n" lbl; 134 | Printf.fprintf chan " .long %d\n" i 135 | end 136 | | Tentative -> failwith "failed to finalize tentative def" 137 | end 138 | | _ -> () (* function declaration - already handled *) 139 | in 140 | 141 | let emit_global_heap_decls = Map.iter emit_global_heap_decl in 142 | 143 | (* generate code to execute expression and move result into eax *) 144 | let rec generate_exp context = function 145 | | Ast.(Assign (Equals, (ID id), exp)) -> 146 | let _ = generate_exp context exp in 147 | begin 148 | match var_lookup context id with 149 | | Context.Stack var_index -> Printf.fprintf chan " movl %%eax, %d(%%ebp)\n" var_index 150 | | Context.Heap lbl -> Printf.fprintf chan " movl %%eax, _%s\n" lbl 151 | | Context.Fun _ -> handle_error "tried to assign function to variable" 152 | end 153 | 154 | | TernOp (e1, e2, e3) -> 155 | let post_tern_label = Util.unique_id "post_ternary" in 156 | let e3_label = Util.unique_id "second_branch_label" in 157 | begin 158 | (* calculate cond *) 159 | generate_exp context e1; 160 | (* is cond true? *) 161 | print_asm " cmp $0, %eax"; 162 | (* if it's false, jump to 'else' exp *) 163 | Printf.fprintf chan " je %s\n" e3_label; 164 | (* Generate 'if' block *) 165 | generate_exp context e2; 166 | (* after 'if', jump over 'else' *) 167 | Printf.fprintf chan " jmp %s\n" post_tern_label; 168 | (* Label start of 'else' *) 169 | emit_label e3_label; 170 | (* Generate 'else' *) 171 | generate_exp context e3; 172 | (* Label end of ternary operation *) 173 | emit_label post_tern_label 174 | end 175 | | BinOp (And, e1, e2) -> 176 | let clause2_lbl = Util.unique_id "and_clause2" in 177 | let post_expr_lbl = Util.unique_id "post_and" in 178 | begin 179 | generate_exp context e1; 180 | (* compare eax to 0 *) 181 | print_asm " cmp $0, %eax"; 182 | (* if not equal, e1 is true, so jump to e2 *) 183 | Printf.fprintf chan " jne %s\n" clause2_lbl; 184 | (* if we're here, e1 is 0 so just jump to end of expression without computing e2 *) 185 | Printf.fprintf chan " jmp %s\n" post_expr_lbl; 186 | (* handle case where e1 is false - return 1 iff e2 is true *) 187 | emit_label clause2_lbl; 188 | generate_exp context e2; 189 | (* if eax != 0, set eax = 1 *) 190 | print_asm " cmp $0, %eax"; 191 | print_asm " movl $0, %eax"; 192 | print_asm " setne %al"; 193 | emit_label post_expr_lbl; 194 | end 195 | | BinOp (Or, e1, e2) -> 196 | let clause2_lbl = Util.unique_id "or_clause2" in 197 | let post_expr_lbl = Util.unique_id "post_or" in 198 | begin 199 | generate_exp context e1; 200 | print_asm " cmp $0, %eax"; 201 | Printf.fprintf chan " je %s\n" clause2_lbl; 202 | (* if we're here, return 1 *) 203 | print_asm " movl $1, %eax"; 204 | Printf.fprintf chan " jmp %s\n" post_expr_lbl; 205 | (* handle case where e1 is true - return 1 iff e2 is true *) 206 | emit_label clause2_lbl; 207 | generate_exp context e2; 208 | print_asm " cmp $0, %eax"; 209 | print_asm " movl $0, %eax"; 210 | print_asm " setne %al"; 211 | emit_label post_expr_lbl; 212 | end 213 | | BinOp (op, e1, e2) -> 214 | begin 215 | (* calculate e1 and e2 *) 216 | generate_exp context e1; 217 | print_asm " push %eax"; 218 | generate_exp context e2; 219 | (* op s, d computes d = op(d,s), so put e2 in ecx, e1 in eax *) 220 | print_asm " movl %eax, %ecx"; 221 | (* Put e1 in eax (where it needs to be for idiv) *) 222 | print_asm " pop %eax"; 223 | (* perform operation *) 224 | emit_bin_op op 225 | end 226 | | UnOp (op, e) -> 227 | generate_exp context e; 228 | begin 229 | match op with 230 | | Pos -> ()(* No-op for now - eventually handle casting to int if needed *) 231 | | Negate -> print_asm " neg %eax"; 232 | | Complement -> print_asm " not %eax"; 233 | | Not -> 234 | print_asm " cmpl $0, %eax"; (* compare eax to 0 *) 235 | print_asm " movl $0, %eax"; (* set eax to 0 *) 236 | print_asm " sete %al"; (* if eax was zero in earlier comparison, set al to 1 *) 237 | end 238 | | Var (ID id) -> 239 | begin 240 | (* move value of variable to eax *) 241 | match var_lookup context id with 242 | | Context.Stack var_index -> Printf.fprintf chan " movl %d(%%ebp), %%eax\n" var_index 243 | | Context.Heap lbl -> Printf.fprintf chan " movl _%s, %%eax\n" lbl 244 | (* NOTE: this does not actually violate the spec *) 245 | | Context.Fun _ -> handle_error "Trying to reference function as variable" 246 | end 247 | | FunCall (ID id, args) -> 248 | let arg_count = List.length args in 249 | let _ = validate_fun_call context id arg_count in 250 | let _ = 251 | (* edx = (esp - 4*(arg_count + 1)) % 16 *) 252 | (* the + 1 is for saved remainder *) 253 | print_asm " movl %esp, %eax"; 254 | Printf.fprintf chan " subl $%d, %%eax\n" (4*(arg_count + 1)); 255 | print_asm " xorl %edx, %edx"; 256 | print_asm " movl $0x20, %ecx"; 257 | print_asm " idivl %ecx"; 258 | (* edx contains the remainder, i.e. # of bytes to subtract *) 259 | print_asm " subl %edx, %esp"; 260 | print_asm " pushl %edx" (* need it for deallocating *) 261 | in 262 | let _ = put_args_on_stack context args in 263 | begin 264 | (* actually make the call *) 265 | Printf.fprintf chan " call _%s\n" id; 266 | (* deallocate args *) 267 | Printf.fprintf chan " addl $%d, %%esp\n" (arg_count * 4); 268 | (* pop remainder off stack, undo 16-byte alignment *) 269 | print_asm " popl %edx"; 270 | print_asm " addl %edx, %esp" 271 | end 272 | | Const (Int i) -> 273 | Printf.fprintf chan " movl $%d, %%eax\n" i 274 | | Const (Char c) -> 275 | Printf.fprintf chan " movl $%d, %%eax\n" (Char.code c) 276 | | _ -> handle_error "Constant not supported" 277 | 278 | and put_args_on_stack context args = 279 | let push_arg arg = 280 | generate_exp context arg; 281 | print_asm " pushl %eax"; 282 | in 283 | List.iter push_arg (List.rev args) 284 | in 285 | 286 | let generate_optional_exp context = function 287 | | None -> () 288 | | Some e -> generate_exp context e 289 | in 290 | 291 | let generate_local_var context Ast.({ var_name=ID id; storage_class; init }) = 292 | if Context.already_defined context id 293 | then handle_error (Printf.sprintf "Variable %s declared twice in same scope" id) 294 | else 295 | match storage_class with 296 | | Ast.Extern -> 297 | (* TODO check this in context? *) 298 | if init = None then 299 | Context.add_local_extern_var context id 300 | else 301 | handle_error "extern local variable declaration with initializer" 302 | | Ast.Static -> 303 | Context.add_static_local_var handle_error context id init 304 | | Ast.Nothing -> 305 | let _ = match init with 306 | | Some exp -> generate_exp context exp 307 | | None -> () in 308 | (* push value of var onto stack *) 309 | let _ = print_asm " push %eax" in 310 | Context.add_local_var context id 311 | in 312 | 313 | let rec generate_statement context statement = 314 | let context = Context.reset_scope context in 315 | match statement with 316 | | Ast.For _ -> generate_for_loop context statement 317 | | ForDecl _ -> generate_for_decl_loop context statement 318 | | While _ -> generate_while_loop context statement 319 | | DoWhile _ -> generate_do_while_loop context statement 320 | | Block block -> generate_block context block 321 | | If _ -> generate_if context statement 322 | | Break -> generate_break context statement; context.var_decl_map 323 | | Continue -> generate_continue context statement; context.var_decl_map 324 | | Exp e -> generate_optional_exp context e; context.var_decl_map 325 | (* for return statements, variable map/stack index unchanged *) 326 | | ReturnVal exp -> 327 | let _ = generate_exp context exp in 328 | emit_function_epilogue (); 329 | context.var_decl_map 330 | (* 331 | for (i = 0; i < 5; i = i + 1) { 332 | statements 333 | } 334 | 335 | mov 0, i 336 | _loop: 337 | cmp i 5 338 | jmp if false to _post_loop 339 | do statements 340 | do i = i + 1 341 | jmp _loop 342 | _post_loop: 343 | ... 344 | 345 | *) 346 | 347 | and generate_for_loop context Ast.(For { init ; cond ; post ; body }) = 348 | begin 349 | generate_optional_exp context init; 350 | loop_helper context cond post body 351 | end 352 | 353 | and generate_for_decl_loop context Ast.(ForDecl { init ; cond ; post ; body }) = 354 | if init.storage_class = Nothing then 355 | (* add variable - for loop is new scope *) 356 | let context' = generate_local_var context init in 357 | let decl_map = loop_helper context' cond post body in 358 | (* pop declared variable off the stack *) 359 | let _ = print_asm " pop %eax" in 360 | decl_map 361 | else 362 | handle_error "Declared non-local var in for loop" 363 | 364 | and generate_while_loop context Ast.(While { cond ; body }) = 365 | (* while loops don't have post expression *) 366 | loop_helper context cond None body 367 | 368 | and loop_helper context cond post body = 369 | let loop_label = Util.unique_id "loop" in 370 | let post_loop_label = Util.unique_id "post_loop" in 371 | let continue_label = Util.unique_id "loop_continue" in 372 | let _ = 373 | begin 374 | emit_label loop_label; 375 | generate_exp context cond; 376 | (* jump after loop if cond is false *) 377 | print_asm " cmp $0, %eax"; 378 | Printf.fprintf chan " je %s\n" post_loop_label; 379 | end 380 | in 381 | (* evaluate loop body, which is a new scope *) 382 | let decl_map = generate_statement 383 | { context with 384 | break_label=Some post_loop_label; 385 | continue_label=Some continue_label 386 | } 387 | body 388 | in 389 | begin 390 | emit_label continue_label; 391 | (* evaluate post expression *) 392 | generate_optional_exp context post; 393 | (* execute loop again *) 394 | Printf.fprintf chan " jmp %s\n" loop_label; 395 | (* label end of loop *) 396 | emit_label post_loop_label; 397 | decl_map 398 | end 399 | 400 | and generate_do_while_loop context Ast.(DoWhile { body; cond }) = 401 | let loop_label = Util.unique_id "do_while" in 402 | let break_label = Util.unique_id "post_do_while" in 403 | let continue_label = Util.unique_id "continue_do_while" in 404 | let _ = emit_label loop_label in 405 | let decl_map = 406 | (* do-while body *) 407 | generate_statement 408 | { context with break_label=Some break_label; 409 | continue_label=Some continue_label 410 | } 411 | body 412 | in 413 | begin 414 | emit_label continue_label; 415 | (* evaluate condition *) 416 | generate_exp context cond; 417 | (* jump back to loop if cond is true *) 418 | print_asm " cmp $0, %eax"; 419 | Printf.fprintf chan " jne %s\n" loop_label; 420 | emit_label break_label; 421 | decl_map 422 | end 423 | 424 | and generate_break { break_label; } Ast.Break = 425 | match break_label with 426 | | Some label -> Printf.fprintf chan " jmp %s\n" label 427 | | None -> handle_error "Break statement not in loop" 428 | 429 | and generate_continue { continue_label; } Ast.Continue = 430 | match continue_label with 431 | | Some label -> Printf.fprintf chan " jmp %s\n" label 432 | | None -> handle_error "Continue statement not in loop" 433 | 434 | and generate_block context = function 435 | | [] -> 436 | let bytes_to_deallocate = 4 * Set.cardinal context.current_scope in 437 | (* pop any variables declared in this block off the stack *) 438 | let _ = Printf.fprintf chan " addl $%d, %%esp\n" bytes_to_deallocate in 439 | context.var_decl_map 440 | | Ast.Statement s::block_items -> 441 | let decl_map_1 = generate_statement context s in 442 | let decl_map_2 = generate_block context block_items in 443 | Map.union decl_map_1 decl_map_2 444 | | Ast.Decl d::block_items -> 445 | let context' = generate_local_var context d in 446 | generate_block context' block_items 447 | 448 | and generate_if context Ast.(If { cond; if_body; else_body }) = 449 | (* evaluate condition *) 450 | let _ = generate_exp context cond in 451 | let post_if_label = Util.unique_id "post_if" in 452 | let if_decl_map = begin 453 | (* stuff that's the same whether or not there's an else block *) 454 | (* compare cond to false *) 455 | print_asm " cmp $0, %eax"; 456 | (* if cond is false, jump over if body *) 457 | Printf.fprintf chan " je %s\n" post_if_label; 458 | (* generate if body *) 459 | generate_statement context if_body 460 | end in 461 | let else_decl_map = begin 462 | match else_body with 463 | (* handle else, if present *) 464 | | Some else_statement -> 465 | let post_else_label = Util.unique_id "post_else" in 466 | let _ = 467 | begin 468 | (* We're at end of if statement, need to jump over the else statement *) 469 | Printf.fprintf chan " jmp %s\n" post_else_label; 470 | (* now print out label after if statement *) 471 | emit_label post_if_label; 472 | end 473 | in 474 | let decl_map = 475 | (* now generate else statement *) 476 | generate_statement context else_statement; 477 | in 478 | begin 479 | (* now print post-else label *) 480 | emit_label post_else_label; 481 | decl_map 482 | end 483 | | None -> 484 | begin 485 | (* print out label that comes after if statement *) 486 | emit_label post_if_label; 487 | context.var_decl_map 488 | end 489 | end 490 | in 491 | Map.union if_decl_map else_decl_map 492 | in 493 | 494 | let combine_linkages linkg storage_class = 495 | let open Context in 496 | match (linkg, storage_class) with 497 | | Nothing, _ -> failwith "global object has no linkage" 498 | | External, Ast.Static -> handle_error "static declaration after non-static" 499 | | Internal, Nothing -> handle_error "non_static declaration after static" 500 | | _, _ -> linkg 501 | in 502 | 503 | let update_ctx_for_f ctx f = 504 | let open Ast in 505 | let open Context in 506 | let has_body = f.body <> None in 507 | let n_params = List.length f.params in 508 | let storage_class = 509 | (* 6.2.2.5 *) 510 | if f.storage_class = Ast.Nothing 511 | then Extern 512 | else f.storage_class 513 | in 514 | let updated_decl = 515 | match opt_decl_lookup ctx f.name with 516 | | None -> 517 | let linkg = 518 | if storage_class = Static 519 | then Internal 520 | else External 521 | in 522 | FunDecl (n_params, linkg, has_body) 523 | | Some (FunDecl (current_params, current_linkg, current_body)) -> 524 | if current_params <> n_params then 525 | handle_error "Function declared with different signatures" 526 | else if current_body && has_body then 527 | handle_error "multiple function definitions" 528 | else 529 | let linkg = combine_linkages current_linkg storage_class in 530 | FunDecl (n_params, linkg, current_body || has_body) 531 | | Some HeapDecl _ -> handle_error "variable redefined as function" 532 | in 533 | (Context.add_function ctx f updated_decl, updated_decl) 534 | in 535 | 536 | let generate_fun global_ctx f = 537 | let (global_ctx', f_decl) = update_ctx_for_f global_ctx f in 538 | let Context.FunDecl (_, f_linkg, _) = f_decl in 539 | let Ast.ID fun_name = f.name in 540 | begin 541 | match f.body with 542 | | Some body -> 543 | let _ = begin 544 | print_asm " .text"; 545 | print_globl_if_extern f_linkg fun_name; 546 | Printf.fprintf chan "_%s:\n" fun_name; 547 | print_asm " push %ebp"; 548 | print_asm " movl %esp, %ebp" 549 | end 550 | in 551 | let context = Context.init_for_fun global_ctx' f.params in 552 | let heap_decls = generate_block context body in 553 | begin 554 | (* set eax to 0 and generate function epilogue and ret, so function returns 0 even if missing return statement *) 555 | print_asm " movl $0, %eax"; 556 | emit_function_epilogue (); 557 | emit_local_heap_decls heap_decls 558 | end 559 | | None -> () 560 | end; 561 | global_ctx' 562 | in 563 | 564 | let combine_inits prev_init init_val = 565 | let open Context in 566 | match prev_init, init_val with 567 | | NoDef, a -> a 568 | | a, NoDef -> a 569 | | Tentative, a -> a 570 | | a, Tentative -> a 571 | | Final _, Final _ -> handle_error "multiple variable definitions" 572 | in 573 | 574 | let generate_global_var context v = 575 | let open Context in 576 | let init_val = get_var_init handle_error v in 577 | let current_decl = opt_decl_lookup context v.var_name in 578 | let updated_decl = 579 | match current_decl with 580 | | None -> 581 | let linkg = 582 | if v.storage_class = Static 583 | then Internal 584 | else External 585 | in 586 | HeapDecl (linkg, init_val) 587 | | Some (HeapDecl (current_linkage, current_init)) -> 588 | let linkg = combine_linkages current_linkage v.storage_class in 589 | let init = combine_inits current_init init_val in 590 | HeapDecl (linkg, init) 591 | | Some FunDecl _ -> handle_error "function redefined as variable" 592 | in 593 | Context.add_global_var context v updated_decl 594 | in 595 | 596 | let generate_tl global_ctx = function 597 | (* function declaration can add variables to heap but not to global scope *) 598 | | Ast.Function f -> generate_fun global_ctx f 599 | | Ast.GlobalVar gv -> generate_global_var global_ctx gv 600 | in 601 | 602 | let rec generate_tls global_ctx = function 603 | | [] -> Context.(emit_global_heap_decls global_ctx.var_decl_map) 604 | | tl::tls -> 605 | (* TODO: use List.fold *) 606 | let global_ctx' = generate_tl global_ctx tl in 607 | generate_tls global_ctx' tls 608 | in 609 | 610 | match prog with 611 | | Ast.Prog tl_list -> 612 | let global_ctx = Context.empty in 613 | let _ = generate_tls global_ctx tl_list in 614 | close_out chan 615 | -------------------------------------------------------------------------------- /tst/test_parse.ml: -------------------------------------------------------------------------------- 1 | open OUnit2 2 | open Ast 3 | 4 | (* AST COMPARISON *) 5 | let compare_ids (ID expected_id) (ID actual_id) = String.equal expected_id actual_id 6 | 7 | let compare_params (Param (t1, id1)) (Param (t2, id2)) = 8 | (t1 == t2) && (compare_ids id1 id2) 9 | 10 | let compare_consts expected actual = 11 | match expected, actual with 12 | | Int i1, Int i2 -> i1 == i2 13 | | Char c1, Char c2 -> c1 == c2 14 | | _ -> false 15 | 16 | let rec compare_exps expected actual = 17 | match expected, actual with 18 | | Const expected_const, Const actual_const -> compare_consts expected_const actual_const 19 | | Var id1, Var id2 -> compare_ids id1 id2 20 | | BinOp (expected_op, expected_e1, expected_e2), BinOp (actual_op, actual_e1, actual_e2) -> 21 | (expected_op == actual_op) && compare_exps expected_e1 actual_e1 && compare_exps expected_e2 actual_e2 22 | | UnOp (expected_op, expected_exp), UnOp (actual_op, actual_exp) -> 23 | (expected_op == actual_op) && compare_exps expected_exp actual_exp 24 | | FunCall (expected_id, expected_args), FunCall (actual_id, actual_args) -> 25 | compare_ids expected_id actual_id && List.for_all2 compare_exps expected_args actual_args 26 | | Assign (expected_op, expected_id, expected_exp), Assign (actual_op, actual_id, actual_exp) -> 27 | (expected_op == actual_op) && compare_ids expected_id actual_id && compare_exps expected_exp actual_exp 28 | | TernOp (expected_1, expected_2, expected_3), TernOp (actual_1, actual_2, actual_3) -> 29 | compare_exps expected_1 actual_1 && compare_exps expected_2 actual_2 && compare_exps expected_3 actual_3 30 | | _ -> false 31 | 32 | let compare_optional_exps expected actual = 33 | match expected, actual with 34 | | None, None -> true 35 | | Some e1, Some e2 -> compare_exps e1 e2 36 | | _ -> false 37 | 38 | let compare_declarations 39 | { var_type = t1; var_name = id1; init = init1; } 40 | { var_type = t2; var_name = id2; init = init2; } = 41 | t1 == t2 && compare_ids id1 id2 && 42 | match init1, init2 with 43 | | None, None -> true 44 | | Some e1, Some e2 -> compare_exps e1 e2 45 | | _ -> false 46 | 47 | let rec compare_statements expected actual = 48 | match expected, actual with 49 | | ReturnVal v1, ReturnVal v2 -> compare_exps v1 v2 50 | | Block stmts1, Block stmts2 -> List.for_all2 compare_block_items stmts1 stmts2 51 | | If { cond=cond1; if_body=then1; else_body=else1 }, 52 | If { cond=cond2; if_body=then2; else_body=else2 } -> 53 | compare_exps cond1 cond2 && compare_statements then1 then2 && 54 | (match else1, else2 with 55 | | Some body1, Some body2 -> compare_statements body1 body2 56 | | None, None -> true 57 | | _ -> false) 58 | | For { init = init1; cond = cond1; post = post1; body = body1; }, 59 | For { init = init2; cond = cond2; post = post2; body = body2; } -> 60 | compare_optional_exps init1 init2 && compare_exps cond1 cond2 && 61 | compare_optional_exps post1 post2 && compare_statements body1 body2 62 | | ForDecl { init = init1; cond = cond1; post = post1; body = body1; }, 63 | ForDecl { init = init2; cond = cond2; post = post2; body = body2; } -> 64 | compare_declarations init1 init2 && compare_exps cond1 cond2 && 65 | compare_optional_exps post1 post2 && compare_statements body1 body2 66 | | While { cond = cond1; body = body1 }, 67 | While { cond = cond2; body = body2 } -> 68 | compare_exps cond1 cond2 && compare_statements body1 body2 69 | | DoWhile { cond = cond1; body = body1 }, 70 | DoWhile { cond = cond2; body = body2 } -> 71 | compare_exps cond1 cond2 && compare_statements body1 body2 72 | | Exp e1, Exp e2 -> compare_optional_exps e1 e2 73 | | _ -> false 74 | 75 | and compare_block_items expected actual = 76 | match expected, actual with 77 | | Statement s1, Statement s2 -> compare_statements s1 s2 78 | | Decl d1, Decl d2 -> compare_declarations d1 d2 79 | | _ -> false 80 | 81 | let compare_funs expected actual = 82 | match expected, actual with 83 | | FunDecl { fun_type=t1; name=name1; params=params1; body=Some body1 }, 84 | FunDecl { fun_type=t2; name=name2; params=params2; body=Some body2 } -> 85 | t1 == t2 && compare_ids name1 name2 && List.for_all2 compare_params params1 params2 && List.for_all2 compare_block_items body1 body2 86 | | _ -> false 87 | 88 | let compare_asts (Prog fun_list1) (Prog fun_list2) = List.for_all2 compare_funs fun_list1 fun_list2 89 | 90 | (* AST construction utilities *) 91 | let make_int i = Const (Int i) 92 | let make_exp_statement e = Statement (Exp (Some e)) 93 | 94 | (* Test utilities *) 95 | 96 | let print_test_failure expected actual = 97 | let _ = print_string "Expected AST:\n" in 98 | let _ = Pprint.pprint expected in 99 | let _ = print_string "Actual AST:\n" in 100 | let _ = Pprint.pprint actual in 101 | () 102 | 103 | let test_expect_failure input fail_msg test_ctxt = 104 | let f = fun () -> Parse.parse input in 105 | assert_raises (Failure fail_msg) f 106 | 107 | let test_compare_asts tokens expected_ast test_ctxt = 108 | let actual_ast = Parse.parse tokens in 109 | let same = compare_asts expected_ast actual_ast in 110 | let _ = 111 | if (not same) 112 | then (print_test_failure expected_ast actual_ast) 113 | else () in 114 | assert_bool "Mismatched ASTS" same 115 | 116 | (* ast with multiple statements *) 117 | let make_ast params block_items = 118 | let body = block_items in 119 | let name = ID "main" in 120 | let f = FunDecl { fun_type=IntType; name; params; body=Some body } in 121 | Prog([f]) 122 | 123 | let make_simple_ast params exp = 124 | let ret = ReturnVal exp in 125 | make_ast params [Statement ret] 126 | 127 | (* CONSTANTS *) 128 | 129 | let simple_token_list = Lex.lex "int main(){return 2;}" 130 | let simple_ast = make_simple_ast [] (make_int 2) 131 | 132 | (* TODO: this probably belongs in a different test group *) 133 | let fun_arg_token_list = Lex.lex "int main(int argc){return 2;}" 134 | let fun_arg_ast = 135 | let params = [Param (IntType, ID "argc")] in 136 | let return_exp = make_int 2 in 137 | make_simple_ast params return_exp 138 | 139 | let return_char_tokens = Lex.lex "int main(int argc){return 'a';}" 140 | let return_char_ast = 141 | let params = [Param (IntType, ID "argc")] in 142 | let return_exp = Const (Char 'a') in 143 | make_simple_ast params return_exp 144 | 145 | let basic_parse_tests = [ 146 | "test_simple_parse" >:: test_compare_asts simple_token_list simple_ast; 147 | "test_fun_args" >:: test_compare_asts fun_arg_token_list fun_arg_ast; 148 | "test_return_char" >:: test_compare_asts return_char_tokens return_char_ast; 149 | ] 150 | 151 | (* UNARY OPERATORS *) 152 | 153 | let negation_tokens = Lex.lex "int main(){ return -3;}" 154 | let negation_ast = 155 | let unop = UnOp (Negate, make_int 3) in 156 | make_simple_ast [] unop 157 | 158 | let pos_tokens = Lex.lex "int main() { return +3;}" 159 | let pos_ast = 160 | let unop = UnOp (Pos, make_int 3) in 161 | make_simple_ast [] unop 162 | 163 | let complement_tokens = Lex.lex "int main() {return ~3;}" 164 | let complement_ast = 165 | let unop = UnOp (Complement, make_int 3) in 166 | make_simple_ast [] unop 167 | 168 | let not_tokens = Lex.lex "int main() {return !4;}" 169 | let not_ast = 170 | let unop = UnOp (Not, make_int 4) in 171 | make_simple_ast [] unop 172 | 173 | let unop_parse_tests = [ 174 | "test_negation" >:: test_compare_asts negation_tokens negation_ast; 175 | "test_pos" >:: test_compare_asts pos_tokens pos_ast; 176 | "test_complement" >:: test_compare_asts complement_tokens complement_ast; 177 | "test_not" >:: test_compare_asts not_tokens not_ast; 178 | ] 179 | 180 | (* BINARY OPERATORS *) 181 | 182 | let addition_tokens = Lex.lex "int main(){return 1+2;}" 183 | let addition_ast = 184 | let binop = BinOp (Add, make_int 1, make_int 2) in 185 | let params = [] in 186 | make_simple_ast params binop 187 | 188 | let subtraction_tokens = Lex.lex "int main(){return 4-3;}" 189 | let subtraction_ast = 190 | let binop = BinOp (Sub, make_int 4, make_int 3) in 191 | make_simple_ast [] binop 192 | 193 | let subtract_negative_tokens = Lex.lex "int main(){return 4- -3;}" 194 | let subtract_negative_ast = 195 | let unop = UnOp (Negate, make_int 3) in 196 | let binop = BinOp (Sub, make_int 4, unop) in 197 | make_simple_ast [] binop 198 | 199 | let multi_addition_tokens = Lex.lex "int main() {return 1+2+3;}" 200 | let multi_addition_ast = 201 | let inner_binop = BinOp (Add, make_int 1, make_int 2) in 202 | let outer_binop = BinOp (Add, inner_binop, make_int 3) in 203 | let params = [] in 204 | make_simple_ast params outer_binop 205 | 206 | let division_tokens = Lex.lex "int main() {return 4/5;}" 207 | let division_ast = 208 | let binop = BinOp (Div, make_int 4, make_int 5) in 209 | make_simple_ast [] binop 210 | 211 | let nested_addition_tokens = Lex.lex "int main(){return 1+(2+3);}" 212 | let nested_addition_ast = 213 | let inner_binop = BinOp (Add, make_int 2, make_int 3) in 214 | let outer_binop = BinOp (Add, make_int 1, inner_binop) in 215 | make_simple_ast [] outer_binop 216 | 217 | let lots_of_parens_tokens = Lex.lex "int main(){return ((3));}" 218 | let lots_of_parens_ast = make_simple_ast [] (make_int 3) 219 | 220 | let left_nested_addition_tokens = Lex.lex "int main() {return (1+2)+3;}" 221 | let left_nested_addition_ast = 222 | let inner_binop = BinOp (Add, make_int 1, make_int 2) in 223 | let outer_binop = BinOp (Add, inner_binop, make_int 3) in 224 | let params = [] in 225 | make_simple_ast params outer_binop 226 | 227 | let mult_tokens = Lex.lex "int main() {return 4*5;}" 228 | let mult_ast = 229 | let binop = BinOp (Mult, make_int 4, make_int 5) in 230 | make_simple_ast [] binop 231 | 232 | let lots_of_parens_add_tokens = Lex.lex "int main(){return ((1+2));}" 233 | let lots_of_parens_add_ast = 234 | let e1 = make_int 1 in 235 | let e2 = make_int 2 in 236 | let ret_exp = BinOp (Add, e1, e2) in 237 | make_simple_ast [] ret_exp 238 | 239 | let mod_toks = Lex.lex "int main() {return 3%2;}" 240 | let mod_ast = 241 | let e1 = make_int 3 in 242 | let e2 = make_int 2 in 243 | let ret_exp = BinOp (Mod, e1, e2) in 244 | make_simple_ast [] ret_exp 245 | 246 | let precedence_tokens = Lex.lex "int main() {return 1*2+3%2;}" 247 | let precedence_ast = 248 | let e1 = BinOp (Mult, make_int 1, make_int 2) in 249 | let e2 = BinOp (Mod, make_int 3, make_int 2) in 250 | let outer_binop = BinOp (Add, e1, e2) in 251 | make_simple_ast [] outer_binop 252 | 253 | let associativity_tokens = Lex.lex "int main() {return 1/2*3;}" 254 | let associativity_ast = 255 | let inner_binop = BinOp (Div, make_int 1, make_int 2) in 256 | let outer_binop = BinOp (Mult, inner_binop, make_int 3) in 257 | make_simple_ast [] outer_binop 258 | 259 | let binop_parse_tests = [ 260 | "test_addition" >:: test_compare_asts addition_tokens addition_ast; 261 | "test_subtraction" >:: test_compare_asts subtraction_tokens subtraction_ast; 262 | "test_subtract_negative" >:: test_compare_asts subtract_negative_tokens subtract_negative_ast; 263 | "test_multiplication" >:: test_compare_asts mult_tokens mult_ast; 264 | "test_division" >:: test_compare_asts division_tokens division_ast; 265 | "test_mod" >:: test_compare_asts mod_toks mod_ast; 266 | "test_nested_addition" >:: test_compare_asts nested_addition_tokens nested_addition_ast; 267 | "test_lots_of_parens" >:: test_compare_asts lots_of_parens_tokens lots_of_parens_ast; 268 | "test_left_nested_addition" >:: test_compare_asts left_nested_addition_tokens left_nested_addition_ast; 269 | "test_lots_of_parens_add" >:: test_compare_asts lots_of_parens_add_tokens lots_of_parens_add_ast; 270 | "test_precedence" >:: test_compare_asts precedence_tokens precedence_ast; 271 | "test_associativity" >:: test_compare_asts associativity_tokens associativity_ast; 272 | ] 273 | 274 | (* BITWISE BINOPS *) 275 | 276 | let bitwise_and_tokens = Lex.lex "int main() {return 1&2;}" 277 | let bitwise_and_ast = 278 | let binop = BinOp (BitAnd, make_int 1, make_int 2) in 279 | make_simple_ast [] binop 280 | 281 | let bitwise_or_tokens = Lex.lex "int main() {return 1|2;}" 282 | let bitwise_or_ast = 283 | let binop = BinOp (BitOr, make_int 1, make_int 2) in 284 | make_simple_ast [] binop 285 | 286 | let xor_tokens = Lex.lex "int main() {return 1^2;}" 287 | let xor_ast = 288 | let binop = BinOp (Xor, make_int 1, make_int 2) in 289 | make_simple_ast [] binop 290 | 291 | let shiftl_tokens = Lex.lex "int main() {return 1<<2;}" 292 | let shiftl_ast = 293 | let binop = BinOp (ShiftL, make_int 1, make_int 2) in 294 | make_simple_ast [] binop 295 | 296 | let shiftr_tokens = Lex.lex "int main() {return 1>>2;}" 297 | let shiftr_ast = 298 | let binop = BinOp (ShiftR, make_int 1, make_int 2) in 299 | make_simple_ast [] binop 300 | 301 | let bitwise_binops_tests = [ 302 | "test_bitwise_and" >:: test_compare_asts bitwise_and_tokens bitwise_and_ast; 303 | "test_bitwise_or" >:: test_compare_asts bitwise_or_tokens bitwise_or_ast; 304 | "test_xor" >:: test_compare_asts xor_tokens xor_ast; 305 | "test_shiftl" >:: test_compare_asts shiftl_tokens shiftl_ast; 306 | "test_shiftr" >:: test_compare_asts shiftr_tokens shiftr_ast 307 | ] 308 | 309 | (* BOOLEAN BINOPS *) 310 | 311 | let and_tokens = Lex.lex "int main() {return 1&&2;}" 312 | let and_ast = 313 | let binop = BinOp (And, make_int 1, make_int 2) in 314 | make_simple_ast [] binop 315 | 316 | let or_tokens = Lex.lex "int main() {return 1||2;}" 317 | let or_ast = 318 | let binop = BinOp (Or, make_int 1, make_int 2) in 319 | make_simple_ast [] binop 320 | 321 | (* addition is higher precedence than &&, which is higher precedence than || *) 322 | let bool_precedence_tokens = Lex.lex "int main() {return 1 || 2 && 3 + 4;}" 323 | let bool_precedence_ast = 324 | let add_binop = BinOp (Add, make_int 3, make_int 4) in 325 | let and_binop = BinOp (And, make_int 2, add_binop) in 326 | let or_binop = BinOp (Or, make_int 1, and_binop) in 327 | make_simple_ast [] or_binop 328 | 329 | let boolean_binop_tests = [ 330 | "test_and" >:: test_compare_asts and_tokens and_ast; 331 | "test_or" >:: test_compare_asts or_tokens or_ast; 332 | "test_bool_precedence" >:: test_compare_asts bool_precedence_tokens bool_precedence_ast; 333 | ] 334 | 335 | 336 | (* COMPARISON OPERATORS *) 337 | 338 | let compare_eq_tokens = Lex.lex "int main() {return 1==2;}" 339 | let compare_eq_ast = 340 | let binop = BinOp (Eq, make_int 1, make_int 2) in 341 | make_simple_ast [] binop 342 | 343 | let compare_neq_tokens = Lex.lex "int main() {return 1!=2;}" 344 | let compare_neq_ast = 345 | let binop = BinOp (Neq, make_int 1, make_int 2) in 346 | make_simple_ast [] binop 347 | 348 | let compare_gt_tokens = Lex.lex "int main() {return 1 > 2;}" 349 | let compare_gt_ast = 350 | let binop = BinOp (Gt, make_int 1, make_int 2) in 351 | make_simple_ast [] binop 352 | 353 | let compare_ge_tokens = Lex.lex "int main() {return 1 >= 2;}" 354 | let compare_ge_ast = 355 | let binop = BinOp (Ge, make_int 1, make_int 2) in 356 | make_simple_ast [] binop 357 | 358 | let compare_lt_tokens = Lex.lex "int main() {return 1 < 2;}" 359 | let compare_lt_ast = 360 | let binop = BinOp (Lt, make_int 1, make_int 2) in 361 | make_simple_ast [] binop 362 | 363 | let compare_le_tokens = Lex.lex "int main() {return 1 <= 2;}" 364 | let compare_le_ast = 365 | let binop = BinOp (Le, make_int 1, make_int 2) in 366 | make_simple_ast [] binop 367 | 368 | let comp_parse_tests = [ 369 | "test_eq" >:: test_compare_asts compare_eq_tokens compare_eq_ast; 370 | "test_neq" >:: test_compare_asts compare_neq_tokens compare_neq_ast; 371 | "test_lt" >:: test_compare_asts compare_lt_tokens compare_lt_ast; 372 | "test_gt" >:: test_compare_asts compare_gt_tokens compare_gt_ast; 373 | "test_le" >:: test_compare_asts compare_le_tokens compare_le_ast; 374 | "test_ge" >:: test_compare_asts compare_ge_tokens compare_ge_ast; 375 | ] 376 | 377 | (* VARIABLES *) 378 | 379 | let declaration_tokens = Lex.lex "int main(){int a=2; return a;}" 380 | let declaration_ast = 381 | let decl = Decl { var_type = IntType; 382 | var_name = ID "a"; 383 | init = Some (make_int 2); 384 | } 385 | in 386 | let ret = Statement (ReturnVal (Var (ID "a"))) in 387 | let statements = [decl; ret] in 388 | make_ast [] statements 389 | 390 | let assignment_tokens = Lex.lex "int main(){int a; a=2; return 0;}" 391 | let assignment_ast = 392 | let decl = Decl { var_type = IntType; 393 | var_name = ID "a"; 394 | init = None; 395 | } 396 | in 397 | let assign = make_exp_statement (Assign (Equals, ID "a", make_int 2)) in 398 | let ret = Statement (ReturnVal (make_int 0)) in 399 | let block_items = [decl; assign; ret] in 400 | make_ast [] block_items 401 | 402 | let multi_assign_tokens = Lex.lex "int main(){int a; int b = a = 2; return b;}" 403 | let multi_assign_ast = 404 | let decl_1 = (Decl { var_type = IntType; 405 | var_name = ID "a"; 406 | init = None 407 | }) in 408 | let assign = (Assign (Equals, ID "a", make_int 2)) in 409 | let decl_2 = (Decl { var_type = IntType; 410 | var_name = ID "b"; 411 | init = Some assign 412 | }) in 413 | let ret = (Statement (ReturnVal (Var (ID "b")))) in 414 | let statements = [decl_1; decl_2; ret] in 415 | make_ast [] statements 416 | 417 | 418 | let variable_parse_tests = [ 419 | "test_declaration" >:: test_compare_asts declaration_tokens declaration_ast; 420 | "test_assignment" >:: test_compare_asts assignment_tokens assignment_ast; 421 | "test_multi_assign" >:: test_compare_asts multi_assign_tokens multi_assign_ast; 422 | ] 423 | 424 | (* CONDITIONALS *) 425 | 426 | let single_if_tokens = Lex.lex "int main(){if (0) return 1; return 0;}" 427 | let single_if_ast = 428 | let cond = make_int 0 in 429 | let if_body = ReturnVal (make_int 1) in 430 | let if_statement = (Statement (If { cond; if_body; else_body=None })) in 431 | let return = (Statement (ReturnVal (make_int 0))) in 432 | make_ast [] [if_statement; return] 433 | 434 | let single_if_else_tokens = Lex.lex "int main(){if (0) return 1; else return 2;}" 435 | let single_if_else_ast = 436 | let cond = make_int 0 in 437 | let if_body = ReturnVal (make_int 1) in 438 | let else_body = Some (ReturnVal (make_int 2)) in 439 | let if_statement = (Statement (If { cond; if_body; else_body })) in 440 | make_ast [] [if_statement] 441 | 442 | let ternary_tokens = Lex.lex "int main(){ return 3 < 4 ? 5 : 6; }" 443 | let ternary_ast = 444 | let condition = BinOp (Lt, make_int 3, make_int 4) in 445 | let ternop = TernOp (condition, make_int 5, make_int 6) in 446 | make_simple_ast [] ternop 447 | 448 | (* TODO: test nested ternary statements, i'm not totally sure this is right *) 449 | 450 | let conditional_parse_tests = [ 451 | "test_single_if" >:: test_compare_asts single_if_tokens single_if_ast; 452 | "test_single_if_else" >:: test_compare_asts single_if_else_tokens single_if_else_ast; 453 | "test_ternary" >:: test_compare_asts ternary_tokens ternary_ast 454 | ] 455 | 456 | (* FOR LOOPS *) 457 | 458 | let for_tokens = Lex.lex "int main() {for(1; 1; 1) { 1;}}" 459 | let for_ast = 460 | let const = make_int 1 in 461 | let for_loop = (Statement (For { init=Some const; cond=const; post=Some const; body=Block [make_exp_statement const] })) in 462 | make_ast [] [for_loop] 463 | 464 | let for_compound_tokens = Lex.lex "int main() {int a; for(a=0; a<5; a=a+1){ 1+1; if(a<5) {return 3;} } }" 465 | let for_compound_ast = 466 | let decl = Decl { var_type = IntType; var_name = ID "a"; init = None } in 467 | let init = Assign (Equals, ID "a", make_int 0) in 468 | let post = Assign (Equals, ID "a", BinOp (Add, Var(ID "a"), make_int 1)) in 469 | let cond = BinOp (Lt, Var(ID "a"), make_int 5) in 470 | let exp = make_exp_statement (BinOp (Add, make_int 1, make_int 1)) in 471 | let return = Statement (ReturnVal (make_int 3)) in 472 | let if_statement = Statement (If { cond; if_body=Block [return]; else_body=None }) in 473 | let body = Block [exp; if_statement] in 474 | let for_loop = Statement (For { init=Some init; cond; post=Some post; body }) in 475 | make_ast [] [decl; for_loop] 476 | 477 | let for_declaration_tokens = Lex.lex "int main() {for(int a; 1; 1) { 1;}}" 478 | let for_decl_ast = 479 | let const = make_int 1 in 480 | let decl = { var_type = IntType; var_name = ID "a"; init = None} in 481 | let for_loop = Statement (ForDecl { init=decl; cond=const; post=Some const; body=Block [make_exp_statement const] }) in 482 | make_ast [] [for_loop] 483 | 484 | let single_for_tokens = Lex.lex "int main() {for(1; 1; 1) if (1) 1;}" 485 | let single_for_ast = 486 | let const = make_int 1 in 487 | let if_statement = If { cond=const; if_body=Exp (Some const); else_body=None } in 488 | let for_loop = For { init=Some const; cond=const; post=Some const; body=if_statement } in 489 | make_ast [] [Statement for_loop] 490 | 491 | let for_parse_tests = [ 492 | "test_for" >:: test_compare_asts for_tokens for_ast; 493 | "test_for_compound" >:: test_compare_asts for_compound_tokens for_compound_ast; 494 | "test_for_declaration" >:: test_compare_asts for_declaration_tokens for_decl_ast; 495 | "test_single_for" >:: test_compare_asts single_for_tokens single_for_ast 496 | ] 497 | 498 | (* WHILE & DO-WHILE LOOPS *) 499 | let while_tokens = Lex.lex "int main() { while (1) 2; }" 500 | let while_ast = 501 | let cond = make_int 1 in 502 | let body = Exp (Some (make_int 2)) in 503 | let while_statement = Statement (While { cond; body }) in 504 | make_ast [] [while_statement] 505 | 506 | let while_block_tokens = Lex.lex "int main() { while (1) {2;} }" 507 | let while_block_ast = 508 | let cond = make_int 1 in 509 | let body = make_exp_statement (make_int 2) in 510 | let while_statement = Statement (While { cond; body=Block [body] }) in 511 | make_ast [] [while_statement] 512 | 513 | let do_while_tokens = Lex.lex "int main() { do 2; while (1); }" 514 | let do_while_ast = 515 | let cond = make_int 1 in 516 | let body = Exp (Some (make_int 2)) in 517 | let do_while_statement = Statement (DoWhile { body; cond }) in 518 | make_ast [] [do_while_statement] 519 | 520 | let do_while_block_tokens = Lex.lex "int main() { do { 2; } while (1); }" 521 | let do_while_block_ast = 522 | let cond = make_int 1 in 523 | let body = make_exp_statement (make_int 2) in 524 | let do_while_statement = Statement (DoWhile { body=Block [body]; cond }) in 525 | make_ast [] [do_while_statement] 526 | 527 | let while_parse_tests = [ 528 | "test_while" >:: test_compare_asts while_tokens while_ast; 529 | "test_while_block" >:: test_compare_asts while_block_tokens while_block_ast; 530 | "test_do_while" >:: test_compare_asts do_while_tokens do_while_ast; 531 | "test_do_while_block" >:: test_compare_asts do_while_block_tokens do_while_block_ast 532 | ] 533 | 534 | (* FUNCTION CALLS *) 535 | 536 | let make_foo_decl params = FunDecl { fun_type=IntType; name=ID "foo"; params; body=Some [] } 537 | 538 | let make_fun_call_program params args = 539 | let foo_decl = make_foo_decl params in 540 | let fun_call = FunCall (ID "foo", args) in 541 | let main_body = [Statement (ReturnVal fun_call)] in 542 | let main_decl = FunDecl { fun_type=IntType; name=ID "main"; params=[]; body=Some main_body } in 543 | Prog [foo_decl; main_decl] 544 | 545 | let make_param name = Param (IntType, ID name) 546 | 547 | let fun_tokens = Lex.lex "int foo(){} int main() { return foo(); }" 548 | let fun_ast = make_fun_call_program [] [] 549 | 550 | let fun_args_tokens = Lex.lex "int foo(int a){} int main() { return foo(5); }" 551 | let fun_args_ast = 552 | let foo_params = [make_param "a"] in 553 | let foo_args = [make_int 5] in 554 | make_fun_call_program foo_params foo_args 555 | 556 | let fun_arg_exp_tokens = Lex.lex "int foo(int a){} int main() { return foo(a + 5); }" 557 | let fun_arg_exp_ast = 558 | let foo_params = [make_param "a"] in 559 | let arg_exp = BinOp (Add, Var (ID "a"), make_int 5) in 560 | make_fun_call_program foo_params [arg_exp] 561 | 562 | let fun_call_standalone = "int main() {incr (b);}" 563 | 564 | let fun_call_parse_tests = [ 565 | "test_simple_call" >:: test_compare_asts fun_tokens fun_ast; 566 | "test_call_with_args" >:: test_compare_asts fun_args_tokens fun_args_ast; 567 | "test_call_with_complex_arg" >:: test_compare_asts fun_arg_exp_tokens fun_arg_exp_ast; 568 | ] 569 | 570 | (* FAILURE *) 571 | 572 | let bad_token_list = [Tok.IntKeyword] 573 | let missing_semicolon = Lex.lex "int main(){return 2}" 574 | 575 | let incomplete_addition = Lex.lex "int main(){return 2+;}" 576 | let mismatched_parens = Lex.lex "int main() {return ((1);}" 577 | let mismatched_right_parens = Lex.lex "int main() {return (1));}" 578 | let one_paren = Lex.lex "int main() {return (1;}" 579 | let backwards_parens = Lex.lex "int main() {return )1+2;}" 580 | 581 | let failure_parse_tests = [ 582 | "test_parse_fail" >:: test_expect_failure bad_token_list "Parse error in parse_fun: bad function type or name"; 583 | "test_semicolon_required" >:: test_expect_failure missing_semicolon "Expected semicolon after return statement"; 584 | "test_incomplete_addition" >:: test_expect_failure incomplete_addition "Failed to parse factor"; 585 | "test_mismatched_parens" >:: test_expect_failure mismatched_parens "Syntax error: expected close paren"; 586 | "test_mismatched_right_parens" >:: test_expect_failure mismatched_right_parens "Expected semicolon after return statement"; 587 | "test_one_paren" >:: test_expect_failure one_paren "Syntax error: expected close paren"; 588 | "test_backwards_parens" >:: test_expect_failure backwards_parens "Failed to parse factor"; 589 | ] 590 | 591 | let parse_tests = basic_parse_tests@unop_parse_tests@binop_parse_tests 592 | @boolean_binop_tests@bitwise_binops_tests@comp_parse_tests 593 | @variable_parse_tests@conditional_parse_tests@fun_call_parse_tests 594 | @for_parse_tests@while_parse_tests@failure_parse_tests 595 | --------------------------------------------------------------------------------