├── .gitignore ├── dune-project ├── lib ├── parse_tests.mli ├── toplevel.mli ├── kaleidoscope_lib.ml ├── codegen.mli ├── dune ├── lexer.mll ├── ast.mli ├── parse_tests.ml ├── toplevel.ml ├── parser.mly ├── ast.ml └── codegen.ml ├── bin ├── kaleidoscope.mli ├── dune └── kaleidoscope.ml ├── stubs ├── dune └── bindings.c └── example └── mandel.kal /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | .merlin 3 | *.swp 4 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 1.6) 2 | (using menhir 2.0) 3 | -------------------------------------------------------------------------------- /lib/parse_tests.mli: -------------------------------------------------------------------------------- 1 | (*_ mli intentionally blank for test file *) 2 | -------------------------------------------------------------------------------- /bin/kaleidoscope.mli: -------------------------------------------------------------------------------- 1 | (*_ mli intentionally blank for executable *) 2 | -------------------------------------------------------------------------------- /lib/toplevel.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val main : [`Stdin | `File of string] -> unit 4 | -------------------------------------------------------------------------------- /lib/kaleidoscope_lib.ml: -------------------------------------------------------------------------------- 1 | module Codegen = Codegen 2 | module Lexer = Lexer 3 | module Parser = Parser 4 | module Toplevel = Toplevel 5 | -------------------------------------------------------------------------------- /stubs/dune: -------------------------------------------------------------------------------- 1 | (rule 2 | (targets libbindings.so) 3 | (deps bindings.c) 4 | (action (run gcc -shared -fPIC %{deps} -o %{targets}))) 5 | -------------------------------------------------------------------------------- /lib/codegen.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val the_module : Llvm.llmodule 4 | 5 | val codegen_proto : Ast.proto -> Llvm.llvalue 6 | 7 | val codegen_func : [`Function] Llvm.PassManager.t -> Ast.func -> Llvm.llvalue 8 | -------------------------------------------------------------------------------- /bin/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name kaleidoscope) 3 | (libraries 4 | async 5 | core 6 | kaleidoscope_lib) 7 | (link_deps (file ../stubs/libbindings.so)) 8 | (link_flags -cclib -Lstubs -cclib -lbindings) 9 | (preprocess (pps ppx_jane ppx_expect ppx_let))) 10 | -------------------------------------------------------------------------------- /stubs/bindings.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* putchard - putchar that take a double and returns 0. */ 4 | extern double putchard(double X) { 5 | putc((char)X, stderr); 6 | return 0; 7 | } 8 | 9 | /* printd - printf that takes a double prints it as "%f\n", returning 0. */ 10 | extern double printd(double X) { 11 | fprintf(stderr, "%f\n", X); 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /bin/kaleidoscope.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | let () = 4 | let open Command.Let_syntax in 5 | Command.basic ~summary:"Parse and print kaleidoscope" 6 | [%map_open 7 | let file = flag "file" (optional file) ~doc:"FILE read input from file" in 8 | fun () -> Kaleidoscope_lib.Toplevel.main (match file with 9 | | None -> `Stdin 10 | | Some file -> `File file)] 11 | |> Command.run 12 | -------------------------------------------------------------------------------- /lib/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name kaleidoscope_lib) 3 | (libraries 4 | async 5 | core 6 | ctypes.foreign 7 | llvm 8 | llvm.analysis 9 | llvm.executionengine 10 | llvm.target 11 | llvm.scalar_opts 12 | menhirLib) 13 | (preprocess (pps ppx_jane ppx_expect ppx_let)) 14 | (inline_tests)) 15 | 16 | (ocamllex lexer) 17 | 18 | (menhir 19 | (flags --external-tokens Ast --explain --table) 20 | (modules parser)) 21 | -------------------------------------------------------------------------------- /lib/lexer.mll: -------------------------------------------------------------------------------- 1 | { 2 | open Ast 3 | } 4 | 5 | let white = [' ' '\t' '\n' '\r']+ 6 | let newline = '\n' | '\r' | "\r\n" 7 | let id = ['a'-'z' 'A'-'Z'] ['a'-'z' 'A'-'Z' '0'-'9']* 8 | let digit = ['0'-'9'] 9 | let frac = '.' digit* 10 | let float = digit* frac? 11 | 12 | rule read = 13 | parse 14 | (* skip whitespace *) 15 | | white { read lexbuf } 16 | | newline { read lexbuf } 17 | | "def" { DEF } 18 | | "extern" { EXTERN } 19 | | "if" { IF } 20 | | "then" { THEN } 21 | | "else" { ELSE } 22 | | "for" { FOR } 23 | | "in" { IN } 24 | | "binary" { BINARY } 25 | | "unary" { UNARY } 26 | | "var" { VAR } 27 | | id { IDENT (Lexing.lexeme lexbuf) } 28 | | float { NUMBER (float_of_string (Lexing.lexeme lexbuf)) } 29 | | "=" { EQUALS } 30 | | "(" { LEFT_PAREN } 31 | | ")" { RIGHT_PAREN } 32 | | "," { COMMA } 33 | | ";" { SEMICOLON } 34 | (* '#' marks the beginning of a comment *) 35 | | "#" { read_comment lexbuf } 36 | | _ { KWD (Lexing.lexeme_char lexbuf 0) } 37 | | eof { EOF } 38 | 39 | and read_comment = 40 | parse 41 | (* comment continues until newline *) 42 | | newline { read lexbuf } 43 | | _ { read_comment lexbuf } 44 | | eof { EOF } 45 | 46 | -------------------------------------------------------------------------------- /example/mandel.kal: -------------------------------------------------------------------------------- 1 | extern printd(x); 2 | 3 | def binary : 1 (x y) 0; 4 | def unary!(v) if v then 0 else 1; 5 | 6 | def unary-(v) 7 | 0-v; 8 | 9 | def binary> 10 (LHS RHS) 10 | RHS < LHS; 11 | 12 | def binary| 5 (LHS RHS) 13 | if LHS then 14 | 1 15 | else if RHS then 16 | 1 17 | else 18 | 0; 19 | 20 | def binary& 6 (LHS RHS) 21 | if !LHS then 22 | 0 23 | else 24 | !!RHS; 25 | 26 | extern putchard(char); 27 | 28 | def printdensity(d) 29 | if d > 8 then 30 | putchard(32) 31 | else if d > 4 then 32 | putchard(46) 33 | else if d > 2 then 34 | putchard(43) 35 | else 36 | putchard(42); 37 | 38 | def mandelconverger(real imag iters creal cimag) 39 | if iters > 255 | (real*real + imag*imag > 4) then 40 | iters 41 | else 42 | mandelconverger(real*real - imag*imag + creal, 43 | 2*real*imag + cimag, 44 | iters+1, creal, cimag); 45 | 46 | def mandelconverge(real imag) 47 | mandelconverger(real, imag, 0, real, imag); 48 | 49 | def mandelhelp(xmin xmax xstep ymin ymax ystep) 50 | for y = ymin, y < ymax, ystep in ( 51 | (for x = xmin, x < xmax, xstep in 52 | printdensity(mandelconverge(x,y))) 53 | : putchard(10) 54 | ); 55 | 56 | def mandel(realstart imagstart realmag imagmag) 57 | mandelhelp(realstart, realstart+realmag*78, realmag, 58 | imagstart, imagstart+imagmag*40, imagmag); 59 | 60 | mandel(-2.3, -1.3, 0.05, 0.07); 61 | 62 | mandel(-2, -1, 0.02, 0.04); 63 | 64 | mandel(-0.9, -1.4, 0.02, 0.03); 65 | -------------------------------------------------------------------------------- /lib/ast.mli: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | type token = 4 | | DEF 5 | | EXTERN 6 | | IF 7 | | THEN 8 | | ELSE 9 | | FOR 10 | | IN 11 | | BINARY 12 | | UNARY 13 | | VAR 14 | | IDENT of string 15 | | NUMBER of float 16 | | KWD of char 17 | | LEFT_PAREN 18 | | RIGHT_PAREN 19 | | EQUALS 20 | | COMMA 21 | | SEMICOLON 22 | | EOF 23 | [@@deriving sexp] 24 | 25 | type proto = 26 | | Prototype of string * string list 27 | | BinOpPrototype of string * string list * int 28 | [@@deriving sexp] 29 | 30 | module Expr : sig 31 | module No_binop : sig 32 | type t = 33 | | Number of float 34 | | Variable of string 35 | | Unary of char * t 36 | | Bin_list of t * (char * int * t) list 37 | | Call of string * t list 38 | | If of t * t * t 39 | | For of string * t * t * t option * t 40 | | Var of (string * t option) list * t 41 | [@@deriving sexp] 42 | 43 | type func = Function of proto * t [@@deriving sexp] 44 | end 45 | 46 | type t = 47 | | Number of float 48 | | Variable of string 49 | | Unary of char * t 50 | | Binary of char * t * t 51 | | Call of string * t list 52 | | If of t * t * t 53 | | For of string * t * t * t option * t 54 | | Var of (string * t option) list * t 55 | [@@deriving sexp] 56 | 57 | val of_no_binop : No_binop.t -> t 58 | end 59 | 60 | type func = Function of proto * Expr.t [@@deriving sexp] 61 | 62 | val func_of_no_binop_func : Expr.No_binop.func -> func 63 | 64 | val set_func_name : string -> func -> func 65 | 66 | val binop_precedence : (char, int) Hashtbl.t 67 | -------------------------------------------------------------------------------- /lib/parse_tests.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | let print_parsed s = 4 | printf 5 | !"%{sexp: [`Expr of Ast.Expr.No_binop.func | `Extern of Ast.proto | `Def \ 6 | of Ast.Expr.No_binop.func | `Eof ]}" 7 | (Parser.toplevel Lexer.read (Lexing.from_string s)) 8 | 9 | let%expect_test _ = 10 | print_parsed "LHS < RHS;" ; 11 | [%expect 12 | {| 13 | (Expr 14 | (Function (Prototype "" ()) 15 | (Bin_list (Variable LHS) ((< -1 (Variable RHS)))))) |}] ; 16 | print_parsed "LHS < RHS | LHS > RHS;" ; 17 | [%expect 18 | {| 19 | (Expr 20 | (Function (Prototype "" ()) 21 | (Bin_list (Variable LHS) 22 | ((< -1 (Variable RHS)) (| -1 (Variable LHS)) (> -1 (Variable RHS)))))) |}]; 23 | print_parsed "def binary = 9 (LHS RHS) !(LHS < RHS | LHS > RHS);"; 24 | [%expect {| 25 | (Def 26 | (Function (BinOpPrototype binary= (LHS RHS) 9) 27 | (Unary ! 28 | (Bin_list (Variable LHS) 29 | ((< -1 (Variable RHS)) (| -1 (Variable LHS)) (> -1 (Variable RHS))))))) |}]; 30 | print_parsed ""; 31 | [%expect {| Eof |}]; 32 | print_parsed "!a | x;"; 33 | [%expect 34 | {| 35 | (Expr 36 | (Function (Prototype "" ()) 37 | (Bin_list (Unary ! (Variable a)) ((| -1 (Variable x)))))) |}] ; 38 | print_parsed "if x > 0 then 1 else x + 10;"; 39 | [%expect {| 40 | (Expr 41 | (Function (Prototype "" ()) 42 | (If (Bin_list (Variable x) ((> -1 (Number 0)))) (Number 1) 43 | (Bin_list (Variable x) ((+ -1 (Number 10))))))) |}]; 44 | print_parsed "5 + #some comment\n5;"; 45 | [%expect {| (Expr (Function (Prototype "" ()) (Bin_list (Number 5) ((+ -1 (Number 5)))))) |}]; 46 | print_parsed "def test(x) 1+2+x;"; 47 | [%expect {| 48 | (Def 49 | (Function (Prototype test (x)) 50 | (Bin_list (Number 1) ((+ -1 (Number 2)) (+ -1 (Variable x)))))) |}]; 51 | print_parsed "def test(x) (1+2+x)*(x+(1+2));"; 52 | [%expect {| 53 | (Def 54 | (Function (Prototype test (x)) 55 | (Bin_list (Bin_list (Number 1) ((+ -1 (Number 2)) (+ -1 (Variable x)))) 56 | ((* -1 57 | (Bin_list (Variable x) 58 | ((+ -1 (Bin_list (Number 1) ((+ -1 (Number 2)))))))))))) |}]; 59 | print_parsed "foo(2);"; 60 | [%expect {| (Expr (Function (Prototype "" ()) (Call foo ((Number 2))))) |}]; 61 | print_parsed "extern sin(x);"; 62 | [%expect {| (Extern (Prototype sin (x))) |}]; 63 | -------------------------------------------------------------------------------- /lib/toplevel.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | let run_main in_channel ~the_fpm ~the_execution_engine = 4 | let anonymous_func_count = ref 0 in 5 | let supplier = 6 | Parser.MenhirInterpreter.lexer_lexbuf_to_supplier Lexer.read 7 | (Lexing.from_channel in_channel) 8 | in 9 | let rec run_loop the_fpm the_execution_engine supplier = 10 | let incremental = Parser.Incremental.toplevel Lexing.dummy_pos in 11 | printf "\n" ; 12 | printf "ready> " ; 13 | Out_channel.flush stdout ; 14 | ( try 15 | match Parser.MenhirInterpreter.loop supplier incremental with 16 | | `Expr ast -> 17 | printf "parsed a toplevel expression" ; 18 | (* Evaluate a top-level expression into an anonymous function. *) 19 | let func = Ast.func_of_no_binop_func ast in 20 | Out_channel.flush stdout ; 21 | Llvm_executionengine.add_module Codegen.the_module 22 | the_execution_engine ; 23 | anonymous_func_count := !anonymous_func_count + 1 ; 24 | let tmp_name = sprintf "__toplevel%d" !anonymous_func_count in 25 | let tmp_func = Ast.set_func_name tmp_name func in 26 | let the_function = Codegen.codegen_func the_fpm tmp_func in 27 | Llvm.dump_value the_function ; 28 | (* JIT the function, returning a function pointer. *) 29 | let fp = 30 | Llvm_executionengine.get_function_address tmp_name 31 | (Foreign.funptr Ctypes.(void @-> returning double)) 32 | the_execution_engine 33 | in 34 | printf "Evaluated to %f" (fp ()) ; 35 | Llvm_executionengine.remove_module Codegen.the_module 36 | the_execution_engine 37 | | `Extern ext -> 38 | printf "parsed an extern" ; 39 | (* printf !"%{sexp: Ast.proto}\n" ext; *) 40 | Out_channel.flush stdout ; 41 | Llvm.dump_value (Codegen.codegen_proto ext) 42 | | `Def def -> 43 | printf "parsed a definition" ; 44 | let func = Ast.func_of_no_binop_func def in 45 | (* printf !"%{sexp: Ast.func}\n" func; *) 46 | Out_channel.flush stdout ; 47 | Llvm.dump_value (Codegen.codegen_func the_fpm func) 48 | | `Eof -> 49 | printf "\n\n" ; 50 | printf "reached eof\n" ; 51 | printf "module dump:\n" ; 52 | Out_channel.flush Out_channel.stdout ; 53 | (* Print out all the generated code. *) 54 | Llvm.dump_module Codegen.the_module ; 55 | exit 0 56 | with e -> 57 | (* Skip expression for error recovery. *) 58 | printf !"\nencountered an error %{sexp: exn}" e ) ; 59 | Out_channel.flush Out_channel.stdout ; 60 | run_loop the_fpm the_execution_engine supplier 61 | in 62 | run_loop the_fpm the_execution_engine supplier 63 | 64 | let main input = 65 | (* Install standard binary operators. 66 | * 1 is the lowest precedence. *) 67 | Hashtbl.add_exn Ast.binop_precedence ~key:'=' ~data:2 ; 68 | Hashtbl.add_exn Ast.binop_precedence ~key:'<' ~data:10 ; 69 | Hashtbl.add_exn Ast.binop_precedence ~key:'+' ~data:20 ; 70 | Hashtbl.add_exn Ast.binop_precedence ~key:'-' ~data:20 ; 71 | Hashtbl.add_exn Ast.binop_precedence ~key:'*' ~data:40 ; 72 | (* Create the JIT *) 73 | let the_execution_engine = 74 | ( match Llvm_executionengine.initialize () with 75 | | true -> () 76 | | false -> raise_s [%message "failed to initialize"] ) ; 77 | Llvm_executionengine.create Codegen.the_module 78 | in 79 | let the_fpm = Llvm.PassManager.create_function Codegen.the_module in 80 | (* Promote allocas to registers. *) 81 | Llvm_scalar_opts.add_memory_to_register_promotion the_fpm ; 82 | (* Do simple "peephole" optimizations and bit-twiddling optzn. *) 83 | Llvm_scalar_opts.add_instruction_combination the_fpm ; 84 | (* reassociate expressions. *) 85 | Llvm_scalar_opts.add_reassociation the_fpm ; 86 | (* Eliminate Common SubExpressions. *) 87 | Llvm_scalar_opts.add_gvn the_fpm ; 88 | (* Simplify the control flow graph (deleting unreachable blocks, etc). *) 89 | Llvm_scalar_opts.add_cfg_simplification the_fpm ; 90 | Llvm.PassManager.initialize the_fpm |> ignore ; 91 | match input with 92 | | `Stdin -> run_main ~the_execution_engine ~the_fpm In_channel.stdin 93 | | `File file -> 94 | In_channel.with_file file ~f:(run_main ~the_execution_engine ~the_fpm) 95 | -------------------------------------------------------------------------------- /lib/parser.mly: -------------------------------------------------------------------------------- 1 | %token DEF 2 | %token EXTERN 3 | %token IF 4 | %token THEN 5 | %token ELSE 6 | %token FOR 7 | %token IN 8 | %token BINARY 9 | %token UNARY 10 | %token VAR 11 | %token IDENT 12 | %token NUMBER 13 | %token KWD 14 | %token EQUALS 15 | %token LEFT_PAREN 16 | %token RIGHT_PAREN 17 | %token COMMA 18 | %token SEMICOLON 19 | %token EOF 20 | 21 | %{ 22 | open Ast 23 | (* get the precedence of the binary operator token. *) 24 | let precedence c = 25 | match Base.Hashtbl.find binop_precedence c with 26 | | None -> -1 27 | | Some p -> p 28 | %} 29 | 30 | %start < [`Expr of Ast.Expr.No_binop.func 31 | | `Extern of Ast.proto 32 | | `Def of Ast.Expr.No_binop.func 33 | | `Eof ]> toplevel 34 | %% 35 | 36 | (* toplevel 37 | * ::= expr 38 | * ::= extern 39 | * ::= definition *) 40 | toplevel: 41 | | e = expr; SEMICOLON { `Expr (Expr.No_binop.Function (Prototype ("", []), e)) } 42 | | e = extern; SEMICOLON { `Extern e } 43 | | d = definition; SEMICOLON { `Def d } 44 | | EOF { `Eof } 45 | ; 46 | 47 | (* primary 48 | * ::= number 49 | * ::= '(' expr ')' 50 | * ::= identifier '(' expression? (',' expression)* ')' 51 | * ::= identifier *) 52 | primary: 53 | | f = NUMBER 54 | { Expr.No_binop.Number f } 55 | | LEFT_PAREN; e = expr; RIGHT_PAREN { e } 56 | | id = IDENT; args = delimited(LEFT_PAREN, separated_list(COMMA, expr), RIGHT_PAREN) 57 | { Expr.No_binop.Call (id, args) } 58 | | id = IDENT; { Expr.No_binop.Variable id } 59 | ; 60 | 61 | (* block 62 | * ::= 'if' expr 'then' expr 'else' expr 63 | * ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expr 64 | * ::= 'var' identifier ('=' expr)? 65 | * (',' identifier ('=' expr)?)* 'in' expr *) 66 | block: 67 | | IF; c = expr; THEN; t = expr; ELSE; e = expr { Expr.No_binop.If (c, t, e) } 68 | | FOR; id = IDENT; EQUALS; start = expr; COMMA; end_ = expr; 69 | step = option(COMMA; e = expr { e }); IN; body = expr 70 | { Expr.No_binop.For (id, start, end_, step, body) } 71 | | VAR; vars = separated_nonempty_list(COMMA, var); IN; body = expr 72 | { Expr.No_binop.Var (vars, body) } 73 | ; 74 | 75 | (* var 76 | * ::= 'var' identifier ('=' expression)? *) 77 | var: name = IDENT; e = option(EQUALS; e = expr { e }) { (name, e) } 78 | 79 | (* unary 80 | * ::= primary 81 | * ::= op expr_without_rhs *) 82 | unary: 83 | | op = operator; operand = unary { Expr.No_binop.Unary (op, operand) } 84 | | e = primary { e } 85 | ; 86 | 87 | (* right_hand_side 88 | * ::= op unary_op unary 89 | * ::= op primary *) 90 | rhs: 91 | | op = operator; unop = operator; e = unary 92 | { (op, precedence op, Expr.No_binop.Unary (unop, e)) } 93 | | op = operator; e = primary 94 | { (op, precedence op, e ) } 95 | ; 96 | 97 | (* expression 98 | * ::= unary (right_hand_side)* 99 | * ::= block *) 100 | expr: 101 | | lhs = unary; rest = list(rhs) 102 | { match rest with 103 | | [] -> lhs 104 | | _ -> Expr.No_binop.Bin_list (lhs, rest) 105 | } 106 | | e = block { e } 107 | ; 108 | 109 | (* prototype 110 | * ::= identifier '(' identifier* ')' 111 | * ::= binary char number? (id, id) 112 | * ::= unary char number? (id) *) 113 | prototype: 114 | | name = IDENT; args = delimited(LEFT_PAREN, list(IDENT), RIGHT_PAREN) 115 | { Prototype (name, args) } 116 | | kind = operator_kind; op = operator; prec = precedence; 117 | args = delimited(LEFT_PAREN, list(IDENT), RIGHT_PAREN) 118 | { let open Base in 119 | match kind with 120 | | `Binary -> 121 | if Int.(<>) (List.length args) 2 122 | then raise_s 123 | [%message "binary operator should have 2 arguments" (args : string list)] 124 | else 125 | Ast.BinOpPrototype ("binary" ^ String.of_char op, args, prec) 126 | | `Unary -> 127 | if Int.(<>) (List.length args) 1 128 | then raise_s 129 | [%message "unary operator should have 1 argument" (args : string list)] 130 | else 131 | Ast.Prototype ("unary" ^ String.of_char op, args) 132 | } 133 | ; 134 | 135 | operator: 136 | | op = KWD { op } 137 | | EQUALS { '=' } 138 | ; 139 | 140 | operator_kind: 141 | | UNARY { `Unary } 142 | | BINARY { `Binary } 143 | ; 144 | 145 | precedence: 146 | | n = option(NUMBER) { Base.Int.of_float (Base.Option.value n ~default:30.0) } 147 | ; 148 | 149 | (* definition 150 | * ::= 'def' prototype expression *) 151 | definition: 152 | | DEF; proto = prototype; body = expr { Expr.No_binop.Function (proto, body) } 153 | ; 154 | 155 | (* external ::= 'extern' prototype *) 156 | extern: EXTERN; proto = prototype { proto } 157 | 158 | -------------------------------------------------------------------------------- /lib/ast.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | type token = 4 | (* commands *) 5 | | DEF 6 | | EXTERN 7 | (* control *) 8 | | IF 9 | | THEN 10 | | ELSE 11 | | FOR 12 | | IN 13 | (* operators *) 14 | | BINARY 15 | | UNARY 16 | (* var definition *) 17 | | VAR 18 | (* primary *) 19 | | IDENT of string 20 | | NUMBER of float 21 | (* unknown *) 22 | | KWD of char 23 | (* special chars *) 24 | | LEFT_PAREN 25 | | RIGHT_PAREN 26 | | EQUALS 27 | | COMMA 28 | | SEMICOLON 29 | (* end of file *) 30 | | EOF 31 | [@@deriving sexp] 32 | 33 | (* proto - This type represents the "prototype" for a function, which captures 34 | * its name, and its argument names (thus implicitly the number of arguments the 35 | * function takes). *) 36 | type proto = 37 | | Prototype of string * string list 38 | | BinOpPrototype of string * string list * int 39 | [@@deriving sexp] 40 | 41 | module Expr = struct 42 | module No_binop = struct 43 | (* base type for expressions before we've properly associated binops *) 44 | type t = 45 | (* variant for numeric literals like "1.0" *) 46 | | Number of float 47 | (* variant for referencing a variable, like "a". *) 48 | | Variable of string 49 | (* variant for a unary operator. *) 50 | | Unary of char * t 51 | (* variant for a sequence binary operators, they still need to be 52 | * associated based operator precedence. *) 53 | | Bin_list of t * (char * int * t) list 54 | (* variant for function calls. *) 55 | | Call of string * t list 56 | (* variant for if/then/else. *) 57 | | If of t * t * t 58 | (* variant for for/in. *) 59 | | For of string * t * t * t option * t 60 | (* variant for var/in *) 61 | | Var of (string * t option) list * t 62 | [@@deriving sexp] 63 | 64 | (* func - This type represents a function definition itself (still needing 65 | * association of binops). *) 66 | type func = Function of proto * t [@@deriving sexp] 67 | 68 | (* group the two highest terms joined by the higest precedence operator 69 | * into a single term and then recurse until there is one term. *) 70 | let rec reduce first rest = 71 | match rest with 72 | | (first_op, first_prec, _) :: tail -> ( 73 | let index = 74 | (* search for the index of the operator with highest precedence *) 75 | List.foldi tail ~init:(first_op, first_prec, 0) 76 | ~f:(fun new_inx 77 | (highest_op, highest_prec, inx) 78 | (new_op, new_prec, _new_expr) 79 | -> 80 | if Int.( > ) new_prec highest_prec then 81 | (new_op, new_prec, new_inx + 1) 82 | else (highest_op, highest_prec, inx) ) 83 | |> fun (_, _, index) -> index 84 | in 85 | match index with 86 | (* if the first operator has precedence, combine [first] and [rest[0]] 87 | * into new [first] and set [rest] to [tail rest]. *) 88 | | 0 -> 89 | let to_reduce = List.hd_exn rest in 90 | let expr = Bin_list (first, [to_reduce]) in 91 | reduce expr (List.tl_exn rest) 92 | (* if it's index n > 0 then combine the terms at index [n] and [n-1] 93 | * into the new [rest]. *) 94 | | n -> 95 | let to_reduce = List.nth_exn rest n in 96 | let prev_op, prev_prec, prev_expr = List.nth_exn rest (n - 1) in 97 | let new_expr = 98 | (prev_op, prev_prec, Bin_list (prev_expr, [to_reduce])) 99 | in 100 | reduce first 101 | (List.take rest (n - 1) @ (new_expr :: List.drop rest (n + 1))) 102 | ) 103 | (* once there's only one term left we're done *) 104 | | [] -> first 105 | end 106 | 107 | type t = 108 | (* variant for numeric literals like "1.0" *) 109 | | Number of float 110 | (* variant for referencing a variable, like "a". *) 111 | | Variable of string 112 | (* variant for a unary operator. *) 113 | | Unary of char * t 114 | (* variant for a binary operators. *) 115 | | Binary of char * t * t 116 | (* variant for function calls. *) 117 | | Call of string * t list 118 | (* variant for if/then/else. *) 119 | | If of t * t * t 120 | (* variant for for/in. *) 121 | | For of string * t * t * t option * t 122 | (* variant for var/in *) 123 | | Var of (string * t option) list * t 124 | [@@deriving sexp] 125 | 126 | let rec of_no_binop = function 127 | | No_binop.Number f -> Number f 128 | | No_binop.Variable x -> Variable x 129 | | No_binop.Call (f, args) -> Call (f, List.map args ~f:of_no_binop) 130 | | No_binop.If (if_, then_, else_) -> 131 | If (of_no_binop if_, of_no_binop then_, of_no_binop else_) 132 | | No_binop.For (id, start, end_, step, body) -> 133 | For 134 | ( id 135 | , of_no_binop start 136 | , of_no_binop end_ 137 | , Option.map step ~f:of_no_binop 138 | , of_no_binop body ) 139 | | No_binop.Unary (c, t) -> Unary (c, of_no_binop t) 140 | | No_binop.Var (vars, body) -> 141 | Var 142 | ( List.map vars ~f:(fun (name, expr) -> 143 | (name, Option.map expr ~f:of_no_binop) ) 144 | , of_no_binop body ) 145 | | No_binop.Bin_list (first, []) -> of_no_binop first 146 | | No_binop.Bin_list (first, [(op, _prec, second)]) -> 147 | Binary (op, of_no_binop first, of_no_binop second) 148 | | No_binop.Bin_list (first, rest) -> 149 | of_no_binop (No_binop.reduce first rest) 150 | 151 | let%expect_test _ = 152 | let no_binop = 153 | No_binop.Bin_list 154 | ( No_binop.Variable "x" 155 | , [('*', 40, No_binop.Variable "y"); ('+', 20, No_binop.Variable "z")] 156 | ) 157 | in 158 | printf !"%{sexp: t}" (of_no_binop no_binop) ; 159 | [%expect 160 | {| (Binary + (Binary * (Variable x) (Variable y)) (Variable z)) |}] ; 161 | let no_binop = 162 | No_binop.Bin_list 163 | ( No_binop.Variable "x" 164 | , [ ('*', 40, No_binop.Variable "y") 165 | ; ('+', 20, No_binop.Variable "z") 166 | ; ('*', 40, No_binop.Variable "w") ] ) 167 | in 168 | printf !"%{sexp: t}" (of_no_binop no_binop) ; 169 | [%expect 170 | {| 171 | (Binary + (Binary * (Variable x) (Variable y)) 172 | (Binary * (Variable z) (Variable w))) 173 | |}] 174 | end 175 | 176 | (* func - This type represents a function definition itself. *) 177 | type func = Function of proto * Expr.t [@@deriving sexp] 178 | 179 | let func_of_no_binop_func (Expr.No_binop.Function (proto, body)) = 180 | Function (proto, Expr.of_no_binop body) 181 | 182 | let set_func_name name (Function (proto, body)) = 183 | let new_proto = 184 | match proto with 185 | | Prototype ((_name : string), args) -> Prototype (name, args) 186 | | BinOpPrototype ((_name : string), args, prec) -> 187 | BinOpPrototype (name, args, prec) 188 | in 189 | Function (new_proto, body) 190 | 191 | (* this holds the precedence for each binary operator that is defined. It can 192 | * be mutated if new binops are defined *) 193 | let binop_precedence : (char, int) Hashtbl.t = Hashtbl.create (module Char) 194 | -------------------------------------------------------------------------------- /lib/codegen.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | let context = Llvm.global_context () 4 | 5 | let the_module = Llvm.create_module context "my cool jit" 6 | 7 | let builder = Llvm.builder context 8 | 9 | let named_values : (string, Llvm.llvalue) Hashtbl.t = 10 | Hashtbl.create (module String) 11 | 12 | let double_type = Llvm.double_type context 13 | 14 | (* Create an alloca instruction in the entry block of the function. This 15 | * is used for mutable variables etc. *) 16 | let create_entry_block_alloca the_function var_name = 17 | let builder = 18 | Llvm.builder_at context (Llvm.instr_begin (Llvm.entry_block the_function)) 19 | in 20 | Llvm.build_alloca double_type var_name builder 21 | 22 | let rec codegen_expr = function 23 | | Ast.Expr.Var (var_names, body) -> 24 | let old_bindings = ref [] in 25 | let the_function = Llvm.block_parent (Llvm.insertion_block builder) in 26 | (* Register all variables and emit their initializer. *) 27 | List.iter var_names ~f:(fun (var_name, init) -> 28 | (* Emit the initializer before adding the variable to scope, this 29 | * prevents the initializer from referencing the variable itself, and 30 | * permits stuff like this: 31 | * var a = 1 in 32 | * var a = a in ... # refers to outer 'a'. *) 33 | let init_val = 34 | match init with 35 | | Some init -> codegen_expr init 36 | (* If not specified, use 0.0. *) 37 | | None -> Llvm.const_float double_type 0.0 38 | in 39 | let alloca = create_entry_block_alloca the_function var_name in 40 | Llvm.build_store init_val alloca builder |> ignore ; 41 | (* Remember the old variable binding so that we can restore the binding 42 | * when we unrecurse. *) 43 | ( match Hashtbl.find named_values var_name with 44 | | None -> () 45 | | Some old_value -> 46 | old_bindings := (var_name, old_value) :: !old_bindings ) ; 47 | (* Remember this binding. *) 48 | Hashtbl.set named_values ~key:var_name ~data:alloca ) ; 49 | (* Codegen the body, now that all vars are in scope. *) 50 | let body_val = codegen_expr body in 51 | (* Pop all our variables from scope. *) 52 | List.iter !old_bindings ~f:(fun (var_name, old_value) -> 53 | Hashtbl.set named_values ~key:var_name ~data:old_value ) ; 54 | (* Return the body computation. *) 55 | body_val 56 | | Ast.Expr.Number n -> Llvm.const_float double_type n 57 | | Ast.Expr.Variable name -> ( 58 | match Hashtbl.find named_values name with 59 | | None -> raise_s [%message "unkown variable name" (name : string)] 60 | (* Load the value *) 61 | | Some v -> Llvm.build_load v name builder ) 62 | | Ast.Expr.Binary ('=', lhs, rhs) -> 63 | (* Special case '=' because we don't want to emit the LHS as an 64 | * expression. *) 65 | let name = 66 | match lhs with 67 | | Ast.Expr.Variable name -> name 68 | | _ -> raise_s [%message "destination of '=' must be a variable"] 69 | in 70 | (* Codegen the rhs. *) 71 | let val_ = codegen_expr rhs in 72 | (* Lookup the name. *) 73 | let variable = 74 | match Hashtbl.find named_values name with 75 | | None -> raise_s [%message "unknown variable name" (name : string)] 76 | | Some var -> var 77 | in 78 | Llvm.build_store val_ variable builder |> ignore ; 79 | val_ 80 | | Ast.Expr.Binary (op, lhs, rhs) -> ( 81 | let lhs_val = codegen_expr lhs in 82 | let rhs_val = codegen_expr rhs in 83 | match op with 84 | | '+' -> Llvm.build_fadd lhs_val rhs_val "addtmp" builder 85 | | '-' -> Llvm.build_fsub lhs_val rhs_val "subtmp" builder 86 | | '*' -> Llvm.build_fmul lhs_val rhs_val "multmp" builder 87 | | '<' -> 88 | let i = 89 | Llvm.build_fcmp Llvm.Fcmp.Ult lhs_val rhs_val "cmptmp" builder 90 | in 91 | (* Convert bool 0/1 to double 0.0 or 1.0 *) 92 | Llvm.build_uitofp i double_type "booltmp" builder 93 | | _ -> 94 | (* If it wasn't a builtin binary operator, it must be a user defined 95 | * one. Emit a call to it. *) 96 | let callee = "binary" ^ String.make 1 op in 97 | let callee = 98 | match Llvm.lookup_function callee the_module with 99 | | Some callee -> callee 100 | | None -> raise_s [%message "unrecognized binop" (op : char)] 101 | in 102 | Llvm.build_call callee [|lhs_val; rhs_val|] "binop" builder ) 103 | | Ast.Expr.Call (callee_name, args) -> 104 | (* Look up the name in the module table. *) 105 | let callee = 106 | match Llvm.lookup_function callee_name the_module with 107 | | Some callee -> callee 108 | | None -> 109 | raise_s [%message "undefined function" (callee_name : string)] 110 | in 111 | (* If argument mismatch error. *) 112 | if Int.( = ) (Array.length (Llvm.params callee)) (List.length args) then 113 | () 114 | else 115 | raise_s 116 | [%message "incorrect number of arguments" (callee_name : string)] ; 117 | let args = Array.map (Array.of_list args) ~f:codegen_expr in 118 | Llvm.build_call callee args "calltmp" builder 119 | | Ast.Expr.If (condition, then_, else_) -> 120 | let cond = codegen_expr condition in 121 | (* Convert condition to a bool by comparing equal to 0.0 *) 122 | let zero = Llvm.const_float double_type 0.0 in 123 | let cond_val = 124 | Llvm.build_fcmp Llvm.Fcmp.One cond zero "ifcond" builder 125 | in 126 | (* Grab the first block so that we might later add the conditional branch 127 | * to it at the end of the function. *) 128 | let start_bb = Llvm.insertion_block builder in 129 | let the_function = Llvm.block_parent start_bb in 130 | let then_bb = Llvm.append_block context "then" the_function in 131 | (* Emit 'then' value. *) 132 | Llvm.position_at_end then_bb builder ; 133 | let then_val = codegen_expr then_ in 134 | (* Codegen of 'then' can change the current block, update then_bb for the 135 | * phi. We create a new name because one is used for the phi node, and the 136 | * other is used for the conditional branch. *) 137 | let new_then_bb = Llvm.insertion_block builder in 138 | (* Emit 'else' value. *) 139 | let else_bb = Llvm.append_block context "else" the_function in 140 | Llvm.position_at_end else_bb builder ; 141 | let else_val = codegen_expr else_ in 142 | (* Codegen of 'else' can change the current block, update else_bb for the 143 | * phi. *) 144 | let new_else_bb = Llvm.insertion_block builder in 145 | (* Emit merge block. *) 146 | let merge_bb = Llvm.append_block context "ifcont" the_function in 147 | Llvm.position_at_end merge_bb builder ; 148 | let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in 149 | let phi = Llvm.build_phi incoming "iftmp" builder in 150 | (* Return to the start block to add the conditional branch. *) 151 | Llvm.position_at_end start_bb builder ; 152 | Llvm.build_cond_br cond_val then_bb else_bb builder |> ignore ; 153 | (* Set a unconditional branch at the end of the 'then' block and the 154 | * 'else' block to the 'merge' block. *) 155 | Llvm.position_at_end new_then_bb builder ; 156 | Llvm.build_br merge_bb builder |> ignore ; 157 | Llvm.position_at_end new_else_bb builder ; 158 | Llvm.build_br merge_bb builder |> ignore ; 159 | (* Finally, set the builder to the end of the merge block. *) 160 | Llvm.position_at_end merge_bb builder ; 161 | phi 162 | | Ast.Expr.For (var_name, start, end_, step, body) -> 163 | (* Output this as: 164 | * var = alloca double 165 | * ... 166 | * start = startexpr 167 | * store start -> var 168 | * goto loop 169 | * loop: 170 | * ... 171 | * bodyexpr 172 | * ... 173 | * loopend: 174 | * step = stepexpr 175 | * endcond = endexpr 176 | * 177 | * curvar = load var 178 | * nextvar = curvar + step 179 | * store nextvar -> var 180 | * br endcond, loop, endloop 181 | * outloop: *) 182 | let the_function = Llvm.block_parent (Llvm.insertion_block builder) in 183 | (* Create an alloca for the variable in the entry block. *) 184 | let alloca = create_entry_block_alloca the_function var_name in 185 | (* Emit the start code first, without 'variable' in scope. *) 186 | let start_val = codegen_expr start in 187 | (* Store the value into the alloca. *) 188 | Llvm.build_store start_val alloca builder |> ignore ; 189 | (* Make the new basic block for the loop header, inserting after current 190 | * block. *) 191 | let loop_bb = Llvm.append_block context "loop" the_function in 192 | (* Insert an explicit fall through from the current block to the 193 | * loop_bb. *) 194 | Llvm.build_br loop_bb builder |> ignore ; 195 | (* Start insertion in loop_bb. *) 196 | Llvm.position_at_end loop_bb builder ; 197 | (* Within the loop, the variable is defined equal to the PHI node. If it 198 | * shadows an existing variable, we have to restore it, so save it 199 | * now. *) 200 | let old_val = Hashtbl.find named_values var_name in 201 | Hashtbl.set named_values ~key:var_name ~data:alloca ; 202 | (* Emit the body of the loop. This, like any other expr, can change the 203 | * current BB. Note that we ignore the value computed by the body, but 204 | * don't allow an error *) 205 | codegen_expr body |> ignore ; 206 | (* Emit the step value. *) 207 | let step_val = 208 | match step with 209 | | Some step -> codegen_expr step 210 | (* If not specified, use 1.0. *) 211 | | None -> Llvm.const_float double_type 1.0 212 | in 213 | (* Compute the end condition. *) 214 | let end_cond = codegen_expr end_ in 215 | (* Reload, increment, and restore the alloca. This handles the case where 216 | * the body of the loop mutates the variable. *) 217 | let cur_var = Llvm.build_load alloca var_name builder in 218 | let next_var = Llvm.build_fadd cur_var step_val "nextvar" builder in 219 | Llvm.build_store next_var alloca builder |> ignore ; 220 | (* Convert condition to a bool by comparing equal to 0.0. *) 221 | let zero = Llvm.const_float double_type 0.0 in 222 | let end_cond = 223 | Llvm.build_fcmp Llvm.Fcmp.One end_cond zero "loopcond" builder 224 | in 225 | (* Create the "after loop" block and insert it. *) 226 | let after_bb = Llvm.append_block context "afterloop" the_function in 227 | (* Insert the conditional branch into the end of loop_end_bb. *) 228 | Llvm.build_cond_br end_cond loop_bb after_bb builder |> ignore ; 229 | (* Any new code will be inserted in after_bb. *) 230 | Llvm.position_at_end after_bb builder ; 231 | (* Restore the unshadowed variable. *) 232 | ( match old_val with 233 | | Some old_val -> Hashtbl.set named_values ~key:var_name ~data:old_val 234 | | None -> () ) ; 235 | (* for expr always returns 0.0. *) 236 | Llvm.const_null double_type 237 | | Ast.Expr.Unary (op, operand) -> 238 | let operand = codegen_expr operand in 239 | let callee = "unary" ^ String.make 1 op in 240 | let callee = 241 | match Llvm.lookup_function callee the_module with 242 | | Some callee -> callee 243 | | None -> raise_s [%message "unknown unary operator" (op : char)] 244 | in 245 | Llvm.build_call callee [|operand|] "unop" builder 246 | 247 | let codegen_proto_existing = function 248 | | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) -> 249 | (* Make the function type: double(double,double) etc. *) 250 | Hashtbl.clear named_values ; 251 | let doubles = Array.create ~len:(List.length args) double_type in 252 | let ft = Llvm.function_type double_type doubles in 253 | let f, existing = 254 | match Llvm.lookup_function name the_module with 255 | | None -> (Llvm.declare_function name ft the_module, `Existing) 256 | (* If 'f' conflicted, there was already something named 'name'. If it 257 | * has a body, don't allow redefinition or reextern. *) 258 | | Some f -> 259 | (* If 'f' already has a body, reject this. *) 260 | if Int.(Array.length (Llvm.basic_blocks f) = 0) then () 261 | else raise_s [%message "redefinition of function" (name : string)] ; 262 | (* If 'f' took a different number of arguments, reject. *) 263 | if Int.(Array.length (Llvm.params f) = List.length args) then () 264 | else 265 | raise_s 266 | [%message 267 | "redefinition of function with a different number of args" 268 | (name : string)] ; 269 | (f, `Not_existing) 270 | in 271 | (* Set names for all arguments. *) 272 | Array.iteri (Llvm.params f) ~f:(fun i a -> 273 | let name = List.nth_exn args i in 274 | Llvm.set_value_name name a ; 275 | Hashtbl.add_exn named_values ~key:name ~data:a ) ; 276 | (f, existing) 277 | 278 | (* Create an alloca for each argument and register the argument in the symbol 279 | * table so that references to it will succeed. *) 280 | let create_argument_allocas the_function proto = 281 | let args = 282 | match proto with 283 | | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -> args 284 | in 285 | Array.iteri (Llvm.params the_function) ~f:(fun i ai -> 286 | let var_name = List.nth_exn args i in 287 | (* Create an alloca for this variable. *) 288 | let alloca = create_entry_block_alloca the_function var_name in 289 | (* Store the initial value into the alloca. *) 290 | Llvm.build_store ai alloca builder |> ignore ; 291 | (* Add arguments to variable symbol table. *) 292 | Hashtbl.set named_values ~key:var_name ~data:alloca ) 293 | 294 | let codegen_func the_fpm = function 295 | | Ast.Function (proto, body) -> ( 296 | let the_function, existing = codegen_proto_existing proto in 297 | (* If this is an operator, install it. *) 298 | ( match proto with 299 | | Ast.BinOpPrototype (name, _args, prec) -> 300 | let op = name.[String.length name - 1] in 301 | Hashtbl.add_exn Ast.binop_precedence ~key:op ~data:prec 302 | | _ -> () ) ; 303 | (* Create a new basic block to start insertion into. *) 304 | let bb = Llvm.append_block context "entry" the_function in 305 | Llvm.position_at_end bb builder ; 306 | try 307 | (* Add all arguments to the symbol table and create their allocas. *) 308 | create_argument_allocas the_function proto ; 309 | let return_val = codegen_expr body in 310 | (* Finish off the function. *) 311 | let _ : Llvm.llvalue = Llvm.build_ret return_val builder in 312 | (* Validate the generated code, checking for consistency. *) 313 | ( match Llvm_analysis.verify_function the_function with 314 | | true -> () 315 | | false -> 316 | printf "invalid function generated\n%s\n" 317 | (Llvm.string_of_llvalue the_function) ; 318 | Llvm_analysis.assert_valid_function the_function ) ; 319 | (* Optimize the function. *) 320 | let _ : bool = Llvm.PassManager.run_function the_function the_fpm in 321 | the_function 322 | with e -> 323 | ( match existing with 324 | | `Not_existing -> Llvm.delete_function the_function 325 | | `Existing -> 326 | Array.iter (Llvm.basic_blocks the_function) ~f:Llvm.delete_block ) ; 327 | raise e ) 328 | 329 | let codegen_proto proto = codegen_proto_existing proto |> fst 330 | --------------------------------------------------------------------------------