├── .gitignore ├── .vscode └── tasks.json ├── Cargo.toml ├── LICENSE ├── README.md ├── language ├── ideas.txt ├── play │ ├── 2017-06-22 │ │ ├── problem-1.cf │ │ ├── problem-2.cf │ │ ├── problem-3.cf │ │ └── vector.cf │ ├── 2017-07-11 │ │ └── option.cf │ ├── far_future.cf │ ├── multiple_trait_impls.cf │ └── types.cf ├── scratch.cf ├── spec │ ├── lex.txt │ └── parse.txt ├── tests-fail │ ├── elseless-if-wrong-type.cf │ ├── lexical_scope.cf │ ├── return-bool-from-s32.cf │ └── unknown_type_name.cf ├── tests-succeed │ ├── add_calls.cf │ ├── auto_type_inference.cf │ ├── elseless-if.cf │ ├── fib.cf │ ├── function-taking-non-s32.cf │ ├── function_call_no_bind.cf │ ├── function_returning_bool.cf │ ├── let_if.cf │ ├── locals.cf │ ├── log.cf │ ├── long.cf │ ├── number_lit.cf │ └── trivial.cf └── vs-cafe │ ├── .vscode │ └── launch.json │ ├── .vscodeignore │ ├── CHANGELOG.md │ ├── README.md │ ├── language-configuration.json │ ├── package.json │ ├── syntaxes │ └── cafe.tmLanguage.json │ └── vsc-extension-quickstart.md ├── run_scratch.sh ├── src ├── ast.rs ├── containers.rs ├── main.rs ├── mir │ ├── data.rs │ ├── mod.rs │ ├── runner.rs │ └── ty.rs └── parse │ ├── lexer.rs │ └── mod.rs ├── test.py └── todo.txt /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | // See https://go.microsoft.com/fwlink/?LinkId=733558 3 | // for the documentation about the tasks.json format 4 | "version": "2.0.0", 5 | "tasks": [ 6 | { 7 | "taskName": "build", 8 | "type": "shell", 9 | "command": "cargo", 10 | "args": [ 11 | "build" 12 | ], 13 | "problemMatcher": [ 14 | "$rustc" 15 | ], 16 | "group": { 17 | "kind": "build", 18 | "isDefault": true 19 | }, 20 | "isBackground": true 21 | }, 22 | { 23 | "taskName": "scratch", 24 | "type": "shell", 25 | "command": "cargo", 26 | "args": [ 27 | "run", 28 | "--", 29 | "--print-ast", 30 | "--print-mir", 31 | "language/scratch.cf" 32 | ], 33 | "problemMatcher": [ 34 | "$rustc" 35 | ] 36 | }, 37 | { 38 | "taskName": "scratch-no-mir", 39 | "type": "shell", 40 | "command": "cargo", 41 | "args": [ 42 | "run", 43 | "--", 44 | "--print-ast", 45 | "language/scratch.cf" 46 | ], 47 | "problemMatcher": [ 48 | "$rustc" 49 | ] 50 | }, 51 | { 52 | "taskName": "test", 53 | "type": "shell", 54 | "command": "python", 55 | "args": [ 56 | "test.py" 57 | ] 58 | } 59 | ] 60 | } -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Nicole Mazzuca "] 3 | name = "cafec" 4 | version = "0.1.0" 5 | 6 | [[bin]] 7 | name = "cafec" 8 | 9 | [dependencies] 10 | clap = "2.26.0" 11 | ucd = "0.1.1" 12 | unicode-normalization = "0.1.5" 13 | unicode-xid = "0.1.0" 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Nicole Mazzuca 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without limitation 7 | the rights to use, copy, modify, merge, publish, distribute, 8 | sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall 13 | be included in all copies or substantial portions of the 14 | Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 17 | KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 18 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 19 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS 20 | OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 21 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | café 2 | --- 3 | A fun language experiment 4 | 5 | This version of the language has been mothballed. See [cafe-ml][cafe-ml] for the current version. 6 | 7 | [cafe-ml]: https://github.com/ubsan/cafe 8 | -------------------------------------------------------------------------------- /language/ideas.txt: -------------------------------------------------------------------------------- 1 | - named typeclass implementations 2 | - anonymous as well for "default" 3 | - http://www.lpw25.net/ml2014.pdf 4 | - effects like F* 5 | - http://www2.imm.dtu.dk/~fnie/Papers/NiNi99tes.pdf 6 | - https://www.fstar-lang.org/papers/dm4free/ 7 | - https://www.fstar-lang.org/papers/mumon/ 8 | - regions/references 9 | - http://209.68.42.137/ucsd-pages/Courses/cse227.w03/handouts/cyclone-regions.pdf 10 | - https://www.microsoft.com/en-us/research/publication/uniqueness-and-reference-immutability-for-safe-parallelism 11 | - http://web.cs.ucla.edu/~palsberg/tba/papers/tofte-talpin-iandc97.pdf 12 | - just cool 13 | - http://www.ccs.neu.edu/home/amal/papers/funtal.pdf 14 | - http://www.ccs.neu.edu/home/amal/papers/linking-types.pdf 15 | - maybe `def` instead of `let` at item-level? 16 | 17 | - syntax: 18 | 19 | ``` 20 | func id(n: s32): s32 { 21 | n 22 | } 23 | 24 | type optional(t: type) = variant { 25 | some: t, 26 | none, 27 | }; 28 | 29 | type point2(t: type) = struct { 30 | x: t, 31 | y: t, 32 | } 33 | 34 | // idea for type modules? 35 | type point2(t: type <: Add) { 36 | // maybe s/data/self or something? 37 | data = struct { 38 | x: t, 39 | y: t, 40 | }; 41 | 42 | func add(self: data, other: data): data { 43 | data { 44 | x = t.add(self.x, other.x), 45 | y = t.add(self.y, other.y), 46 | } 47 | } 48 | }; 49 | 50 | type point2_add: Add(point2, point2) { 51 | func add(self: point2, other: point2): point2 { 52 | point2.add(self, other) 53 | // or 54 | // self.add(other) 55 | } 56 | }; 57 | 58 | func array(t: type, n: usize): type = [n]t; 59 | 60 | func print(implicit t <: Show, s: t) { 61 | std.io.print(t.show(s)) 62 | } 63 | 64 | print(implicit Show_int, 5); 65 | ``` 66 | -------------------------------------------------------------------------------- /language/play/2017-06-22/problem-1.cf: -------------------------------------------------------------------------------- 1 | using std.io.println; 2 | 3 | main :: fn() { 4 | let mut sum = 0; 5 | for i = 0..1000 { 6 | if i % 3 == 0 || i % 5 == 0 { 7 | sum += i 8 | } 9 | } 10 | println("{}", sum); 11 | } 12 | -------------------------------------------------------------------------------- /language/play/2017-06-22/problem-2.cf: -------------------------------------------------------------------------------- 1 | use std.io.println; 2 | 3 | fib_sum :: fn(n: u64) -> u64 { 4 | let mut sum = 0; 5 | let mut a = 0; 6 | let mut b = 1; 7 | while a < n { 8 | if a % 2 == 0 { 9 | sum += a; 10 | } 11 | let tmp = a; 12 | a += b; 13 | b = tmp; 14 | } 15 | } 16 | 17 | main :: fn() { 18 | println("{}", fib_sum(4 000 000)); 19 | } 20 | -------------------------------------------------------------------------------- /language/play/2017-06-22/problem-3.cf: -------------------------------------------------------------------------------- 1 | prime_factors :: (n: u64) -> vector { 2 | let mut factors = vector(); 3 | a: for i in 0..n { 4 | if n % i == 0 { 5 | for j in factors { 6 | if i % j == 0 { 7 | break; 8 | } 9 | } nobreak { 10 | factors.push(i); 11 | } 12 | } 13 | } 14 | } 15 | 16 | main :: fn() { 17 | println("{}", prime_factors(600 851 475 143).last()); 18 | } 19 | -------------------------------------------------------------------------------- /language/play/2017-06-22/vector.cf: -------------------------------------------------------------------------------- 1 | public type vector :: (T) => struct { 2 | ptr: &raw T, 3 | len: usize, 4 | cap: usize, 5 | } 6 | 7 | impl (T) => constructible(vector(T), T...) { 8 | construct :: fn(ts: T...) -> Self { 9 | // TODO(ubsan): figure out what this `sizeof(ts)` expression 10 | // should be 11 | let mut ret = vector::with_capacity(sizeof(ts)); 12 | for t in ts... { 13 | ret.push(t); 14 | } 15 | ret 16 | } 17 | } 18 | 19 | impl (T) => convertible(vector(T), box([]T)) { 20 | convert :: fn(mut self) -> box([T]) { 21 | self.shrink_to_fit(); 22 | let (ptr, len) = (self.ptr, self.len); 23 | std::mem::forget(self); 24 | let dst_ptr = ([]T)::from_raw_parts(ptr, len); 25 | box::from_raw_parts(dst_ptr) 26 | } 27 | } 28 | 29 | impl (T) => vector(T) { 30 | public new :: fn() -> Self { 31 | vector { 32 | ptr: std::ptr::null(), 33 | len: 0, 34 | cap: 0, 35 | } 36 | } 37 | 38 | public with_capacity :: fn(n: usize) -> Self { 39 | vector { 40 | ptr: std::ptr::allocate(n), 41 | len: 0, 42 | cap: n, 43 | } 44 | } 45 | 46 | public push :: fn(mut self, t: T) { 47 | unsafe { 48 | if self.len < self.cap { 49 | self.ptr.offset(self.len).write(t); 50 | self.len += 1; 51 | } else { 52 | self.resize(); 53 | self.ptr.offset(self.len).write(t); 54 | self.len += 1; 55 | } 56 | } 57 | } 58 | 59 | resize :: fn(mut self) { 60 | unsafe { 61 | let cap = if self.capacity == 0 { 62 | 8 63 | } else { 64 | self.capacity * 2 65 | }; 66 | let tmp = std::ptr::reallocate(self.ptr, cap, self.cap); 67 | if tmp.is_null() { 68 | // fail - not sure how yet 69 | // should not unwind, I don't think 70 | } 71 | self.cap = cap; 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /language/play/2017-07-11/option.cf: -------------------------------------------------------------------------------- 1 | option(t: type) { 2 | type = variant { 3 | some t, 4 | none (), 5 | }; 6 | } 7 | -------------------------------------------------------------------------------- /language/play/far_future.cf: -------------------------------------------------------------------------------- 1 | zero :: struct { } 2 | succ :: (_: type) => struct { } 3 | 4 | nat :: (_: type) => interface { 5 | value: u64; 6 | } 7 | impl :: nat(zero) { 8 | value = 0; 9 | } 10 | impl :: (T: type) => nat(succ(T)) { 11 | value = nat(T)::value + 1; 12 | } 13 | 14 | plus :: (L <: nat, R) => interface 15 | requires nat(R) 16 | { 17 | type t; 18 | requires nat(t); 19 | // equivalent: 20 | // type t <: nat; 21 | } 22 | 23 | impl :: (Rhs <: nat) => plus(zero, Rhs) { 24 | type t = Rhs; 25 | } 26 | 27 | impl :: (Lhs, Rhs) => plus(succ(Lhs), Rhs) 28 | requires plus(Lhs, Rhs) 29 | { 30 | type t = succ(plus(Lhs, Rhs)::t); 31 | } 32 | /* also valid (and equivalent) 33 | impl :: (Rhs, Lhs <: plus(Rhs)) => plus(succ(Lhs), Rhs) { 34 | type t = succ(plus(Lhs, Rhs)::t); 35 | } 36 | */ 37 | 38 | // 39 | 40 | optional :: (T) => enum { 41 | some: T; 42 | none: (); 43 | } 44 | 45 | // 46 | 47 | is_some :: (T) => fn( 48 | opt: optional(T), 49 | _: to_show_off_multiple_params, 50 | ) -> bool { 51 | // unknown what the match syntax will be yet 52 | // probably similar to Rust 53 | match opt { 54 | some _ => true; 55 | none () => false; 56 | } 57 | } 58 | 59 | // equivalent to 60 | // foo: (T <: nat) -> fn(t: T) -> u64 { T::value } 61 | // perhaps? 62 | // this will *not* be in an early version of the language 63 | foo :: fn(t: nat) -> u64 { 64 | typeof(t)::value 65 | } 66 | -------------------------------------------------------------------------------- /language/play/multiple_trait_impls.cf: -------------------------------------------------------------------------------- 1 | // using ocaml-y syntax, because this idea is from ocaml 2 | 3 | type ordering :: enum { 4 | less, 5 | equal, 6 | greater, 7 | } 8 | 9 | type :: ordering { 10 | fn reverse(self: ref self) -> self { 11 | match self { 12 | ordering::less => { ordering::greater } 13 | ordering::greater => { ordering::less } 14 | ordering::equal => { ordering::equal } 15 | } 16 | } 17 | } 18 | 19 | type ord(t) :: class { 20 | fn compare(lhs: ref t, rhs: ref t) -> ordering; 21 | } 22 | 23 | // default impl 24 | // same rules as rust, basically, for orphan impls 25 | type :: ord(i32) { 26 | fn compare(lhs: ref i32, rhs: ref i32) -> ordering { 27 | if lhs < rhs { 28 | ordering::less 29 | } else if lhs > rhs { 30 | ordering::greater 31 | } else { 32 | ordering::equal 33 | } 34 | } 35 | } 36 | 37 | // non-default impl 38 | // you can define these anywhere 39 | type backwards( 40 | t <: ord, // by default, uses the default ord for the type 41 | ) :: ord(t) { 42 | fn compare(lhs: ref t, rhs: ref t) -> ordering { 43 | ord(t)::compare(lhs, rhs).reverse() 44 | } 45 | } 46 | 47 | // how to use these: 48 | fn ge(t <: ord) => (lhs: ref t, rhs: ref t) -> bool { 49 | match ord::(t)::compare(lhs, rhs) { 50 | ordering::greater | ordering::equal => { true } 51 | ordering::les => { false } 52 | } 53 | } 54 | 55 | fn main() { 56 | let _ = ge(0, 1); // false 57 | let _ = ge::(i32 <: backwards(ord(i32)))::(0, 1); // true 58 | // or something like this 59 | // the syntax definitely needs work 60 | // but I know I like these semantics 61 | // this would also allow you to use, say, floats as ord, by defining 62 | // your own named ord for floats 63 | } 64 | -------------------------------------------------------------------------------- /language/play/types.cf: -------------------------------------------------------------------------------- 1 | // this is fine for now 2 | type option :: variant(t: type) { 3 | } 4 | -------------------------------------------------------------------------------- /language/scratch.cf: -------------------------------------------------------------------------------- 1 | type t = struct { x: s32 }; 2 | 3 | func log_t(v: t) { 4 | // log(v.x); 5 | } 6 | 7 | func main() { 8 | let v = t { x = 1 }; 9 | log_t(v); 10 | } 11 | -------------------------------------------------------------------------------- /language/spec/lex.txt: -------------------------------------------------------------------------------- 1 | NOTES: 2 | - if the lexer discovers a keyword, it is treated as a 3 | keyword, not as an identifier 4 | - XID_* shall be treated as a unicode character class 5 | - token-class* shall be treated as "0 or more 6 | repetitions of identifier-class" 7 | - token-class+ shall be treated as "1 or more 8 | repetitions of identifier-class" 9 | - token-class? shall be treated as "0 or 1 10 | repetitions of identifier-class" 11 | - literal sequences of characters shall be denoted in `` 12 | - literal ` shall be denoted as `\`` 13 | - # shall denote future expansions 14 | - whitespace - a character of class XID_WSpace - is used 15 | to split tokens, but is not a token in and of itself 16 | 17 | comments: 18 | - a comment shall either be a line comment, or a block 19 | comment 20 | - a line comment shall start with a `//`, and shall end 21 | in a new line 22 | - a block comment shall start with a `/*`, end with a 23 | `*/`, and does nest. 24 | 25 | token = 26 | | keyword 27 | | identifier 28 | | literal 29 | | separator 30 | | operator 31 | 32 | keyword = 33 | | `let` 34 | | `if` 35 | | `else` 36 | | `true` 37 | | `false` 38 | # | `func` 39 | # | `struct` 40 | # | `variant` 41 | # | `type` 42 | # | `raw` 43 | # | `mut` 44 | # | `own` 45 | 46 | identifier = 47 | | XID\_Start XID\_Continue* 48 | 49 | literal = 50 | | number-literal 51 | | `true` 52 | | `false` 53 | | `()` 54 | 55 | number-literal = 56 | | dec-digit-list+ 57 | # | `0x` hex-digit-list+ 58 | # | `0X` hex-digit-list+ 59 | # | `0b` bin-digit-list+ 60 | # | `0B` bin-digit-list+ 61 | # | `0o` oct-digit-list+ 62 | # | `0O` oct-digit-list+ 63 | 64 | dec-digit-list = 65 | | dec-digit ` `? 66 | 67 | dec-digit = 68 | | `0` | `1` | `2` | `3` | `4` 69 | | `5` | `6` | `7` | `8` | `9` 70 | 71 | binop = 72 | | `+` | `<=` | `-` 73 | # | `++` | `=` | 74 | # | `<` | `>` | `>=` | `==` | `!=` 75 | # | `*` | `/` | `%` 76 | # | `+=` | `-=` | `*=` | `/=` | `%=` 77 | # | `^` | `&` | `|` 78 | # | `^=` | `&=` | `|=` 79 | # | `&&` | `||` 80 | 81 | operator = 82 | | `:` | `,` | `=` | binop 83 | # `=>` | `->` 84 | 85 | separator = 86 | | `(` | `)` | `{` | `}` | `;` 87 | # | `[` | `]` 88 | -------------------------------------------------------------------------------- /language/spec/parse.txt: -------------------------------------------------------------------------------- 1 | NOTES: 2 | - a specific token shall be between `` 3 | - the special token empty shall be the absence of a token 4 | 5 | file = 6 | | item-definition* 7 | 8 | item-definition = 9 | | `let` definition 10 | 11 | definition-start = 12 | | ident parameter-list? declared-type? 13 | definition = 14 | | definition-start block 15 | | definition-start `=` expression `;` 16 | 17 | declared-type = 18 | | `:` type 19 | 20 | parameter-list = 21 | | `(` parameter-list-no-parens `)` 22 | parameter-list-no-parens = 23 | | parameter `,`? 24 | | parameter `,` parameter-list-no-parens 25 | parameter = 26 | | ident declared-type 27 | 28 | type = 29 | | identifier 30 | 31 | block = 32 | | `{` statement* expression `}` 33 | 34 | statement = 35 | | `let` definition 36 | | expression `;` 37 | 38 | expression = 39 | | literal 40 | | expression binop expression 41 | | `if` expression `{` expression `}` 42 | -- NOTE(ubsan): should just take a block 43 | | `-` expression 44 | -------------------------------------------------------------------------------- /language/tests-fail/elseless-if-wrong-type.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | log(if true { 0 }) 3 | } -------------------------------------------------------------------------------- /language/tests-fail/lexical_scope.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | if true { 3 | let x = 0; 4 | } else { }; 5 | let y = x; 6 | } 7 | -------------------------------------------------------------------------------- /language/tests-fail/return-bool-from-s32.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | } 3 | 4 | func foo(): s32 { true } -------------------------------------------------------------------------------- /language/tests-fail/unknown_type_name.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | let x: floop = 0; 3 | } -------------------------------------------------------------------------------- /language/tests-succeed/add_calls.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | log(foo1() + foo2() + foo3() + foo4()) 3 | } 4 | 5 | func foo1(): s32 { 6 | 1 7 | } 8 | 9 | func foo2(): s32 { 10 | 2 11 | } 12 | 13 | func foo3(): s32 { 14 | 3 15 | } 16 | 17 | func foo4(): s32 { 18 | 4 19 | } 20 | -------------------------------------------------------------------------------- /language/tests-succeed/auto_type_inference.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | let x = 0; 3 | log(x); 4 | } 5 | -------------------------------------------------------------------------------- /language/tests-succeed/elseless-if.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | log(if true { () }); 3 | } -------------------------------------------------------------------------------- /language/tests-succeed/fib.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | log(fib(15)) 3 | } 4 | 5 | func fib(n: s32): s32 { 6 | let neg_one: s32 = -1; 7 | if n <= 1 { 8 | n 9 | } else { 10 | fib(n + neg_one) + fib(n + -2) 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /language/tests-succeed/function-taking-non-s32.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | log(foo(true, ())) 3 | } 4 | 5 | func foo(x: bool, y: unit): s32 { 6 | if x { 7 | 1 8 | } else { 9 | 0 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /language/tests-succeed/function_call_no_bind.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | foo(); 3 | } 4 | 5 | func foo(): s32 { 6 | 0 7 | } 8 | -------------------------------------------------------------------------------- /language/tests-succeed/function_returning_bool.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | log(fib(15)) 3 | } 4 | 5 | func should_recurse(n: s32): bool { 6 | 2 <= n 7 | } 8 | 9 | func fib(n: s32): s32 { 10 | if should_recurse(n) { 11 | fib(n + -1) + fib(n + -2) 12 | } else { 13 | n 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /language/tests-succeed/let_if.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | let x = if true { 3 | 1 4 | } else { 5 | 0 6 | }; 7 | 8 | log(x) 9 | } 10 | -------------------------------------------------------------------------------- /language/tests-succeed/locals.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | let res: s32 = 0; 3 | let bloop: bool = true; 4 | } 5 | -------------------------------------------------------------------------------- /language/tests-succeed/log.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | log(42424242); 3 | log(true); 4 | log(()); 5 | } 6 | -------------------------------------------------------------------------------- /language/tests-succeed/long.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | log(fib(15)); 3 | 4 | let res = debug_s32(bloop(true, ())); 5 | log(42424242); 6 | log(true); 7 | log(()); 8 | let num_lits = 1 2 3 + 321 + 1 1 1; 9 | } 10 | 11 | func debug_s32(n: s32): s32 { 12 | log(n); 13 | n 14 | } 15 | 16 | func should_recurse(n: s32): bool { 17 | 2 <= n 18 | } 19 | func fib(n: s32): s32 { 20 | if should_recurse(n) { 21 | fib(n + -1) + fib(n + -2) 22 | } else { 23 | n 24 | } 25 | } 26 | func bloop(x: bool, y: unit): s32 { 27 | if x { 28 | 1 29 | } else { 30 | 0 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /language/tests-succeed/number_lit.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | log(1 2 3 + 321 + 1 1 1) 3 | } 4 | -------------------------------------------------------------------------------- /language/tests-succeed/trivial.cf: -------------------------------------------------------------------------------- 1 | func main() { 2 | } 3 | -------------------------------------------------------------------------------- /language/vs-cafe/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | // A launch configuration that launches the extension inside a new window 2 | { 3 | "version": "0.1.0", 4 | "configurations": [ 5 | { 6 | "name": "Launch Extension", 7 | "type": "extensionHost", 8 | "request": "launch", 9 | "runtimeExecutable": "${execPath}", 10 | "args": ["--extensionDevelopmentPath=${workspaceRoot}" ] 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /language/vs-cafe/.vscodeignore: -------------------------------------------------------------------------------- 1 | .vscode/** 2 | .vscode-test/** 3 | .gitignore 4 | vsc-extension-quickstart.md 5 | -------------------------------------------------------------------------------- /language/vs-cafe/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to the "vs-cafe" extension will be documented in this file. 3 | 4 | Check [Keep a Changelog](http://keepachangelog.com/) for recommendations on how to structure this file. 5 | 6 | ## [Unreleased] 7 | - Initial release -------------------------------------------------------------------------------- /language/vs-cafe/README.md: -------------------------------------------------------------------------------- 1 | # vs-cafe 2 | 3 | I'm too lazy to document this right now -------------------------------------------------------------------------------- /language/vs-cafe/language-configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "comments": { 3 | // symbol used for single line comment. Remove this entry if your language does not support line comments 4 | "lineComment": "//", 5 | // symbols used for start and end a block comment. Remove this entry if your language does not support block comments 6 | "blockComment": [ "/*", "*/" ] 7 | }, 8 | // symbols used as brackets 9 | "brackets": [ 10 | ["{", "}"], 11 | ["[", "]"], 12 | ["(", ")"] 13 | ], 14 | // symbols that are auto closed when typing 15 | "autoClosingPairs": [ 16 | ["{", "}"], 17 | ["[", "]"], 18 | ["(", ")"], 19 | ["\"", "\""], 20 | ["'", "'"] 21 | ], 22 | // symbols that that can be used to surround a selection 23 | "surroundingPairs": [ 24 | ["{", "}"], 25 | ["[", "]"], 26 | ["(", ")"], 27 | ["\"", "\""], 28 | ["'", "'"] 29 | ] 30 | } -------------------------------------------------------------------------------- /language/vs-cafe/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "vs-cafe", 3 | "displayName": "vs-cafe", 4 | "description": "cafe support for visual studio code", 5 | "version": "0.0.1", 6 | "publisher": "ubsan", 7 | "engines": { 8 | "vscode": "^1.17.0" 9 | }, 10 | "categories": [ 11 | "Languages" 12 | ], 13 | "contributes": { 14 | "languages": [ 15 | { 16 | "id": "cafe", 17 | "aliases": [ 18 | "cafe", 19 | "cafe" 20 | ], 21 | "extensions": [ 22 | ".cf" 23 | ], 24 | "configuration": "./language-configuration.json" 25 | } 26 | ], 27 | "grammars": [ 28 | { 29 | "language": "cafe", 30 | "scopeName": "source.cafe", 31 | "path": "./syntaxes/cafe.tmLanguage.json" 32 | } 33 | ] 34 | } 35 | } -------------------------------------------------------------------------------- /language/vs-cafe/syntaxes/cafe.tmLanguage.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json", 3 | "name": "cafe", 4 | "foldingStartMarker": "\\{\\s*$", 5 | "foldingStopMarker": "^\\s*$\\}", 6 | "patterns": [ 7 | { 8 | "include": "#keywords" 9 | }, 10 | { 11 | "include": "#constants" 12 | }, 13 | { 14 | "include": "#comments" 15 | } 16 | ], 17 | "repository": { 18 | "keywords": { 19 | "patterns": [ 20 | { 21 | "name": "keyword.control.cafe", 22 | "match": "\\b(type|struct|let|func|if|else)\\b" 23 | }, 24 | { 25 | "name": "keyword.operator.cafe", 26 | "match": "\\b(log)\\b" 27 | }, 28 | { 29 | "name": "invalid.illegal.cafe", 30 | "match": "\\b(val|data|raw|mut|own)\\b" 31 | } 32 | ] 33 | }, 34 | "constants": { 35 | "patterns": [ 36 | { 37 | "name": "constant.language.cafe", 38 | "match": "\\b(true|false)\\b" 39 | }, 40 | { 41 | "name": "constant.numeric.cafe", 42 | "match": "\\b( ?[0-9])+( ?[a-zA-Z][a-zA-Z0-9]*)?\\b" 43 | }, 44 | { 45 | "name": "string.quoted.double.cafe", 46 | "begin": "\"", 47 | "end": "\"", 48 | "patterns": [ 49 | { 50 | "name": "constant.character.escape.cafe", 51 | "match": "\\\\." 52 | } 53 | ] 54 | } 55 | ] 56 | }, 57 | "comments": { 58 | "patterns": [ 59 | { 60 | "name": "comment.line", 61 | "match": "//.*\n" 62 | }, 63 | { 64 | "name": "comment.block", 65 | "begin": "/\\*", 66 | "end": "\\*/" 67 | } 68 | ] 69 | } 70 | }, 71 | "scopeName": "source.cafe" 72 | } -------------------------------------------------------------------------------- /language/vs-cafe/vsc-extension-quickstart.md: -------------------------------------------------------------------------------- 1 | # Welcome to your VS Code Extension 2 | 3 | ## What's in the folder 4 | * This folder contains all of the files necessary for your extension. 5 | * `package.json` - this is the manifest file in which you declare your language support and define 6 | the location of the grammar file that has been copied into your extension. 7 | * `syntaxes/cafe.tmLanguage.json` - this is the Text mate grammar file that is used for tokenization. 8 | * `language-configuration.json` - this the language configuration, defining the tokens that are used for 9 | comments and brackets. 10 | 11 | ## Get up and running straight away 12 | * Make sure the language configuration settings in `language-configuration.json` are accurate. 13 | * Press `F5` to open a new window with your extension loaded. 14 | * Create a new file with a file name suffix matching your language. 15 | * Verify that syntax highlighting works and that the language configuration settings are working. 16 | 17 | ## Make changes 18 | * You can relaunch the extension from the debug toolbar after making changes to the files listed above. 19 | * You can also reload (`Ctrl+R` or `Cmd+R` on Mac) the VS Code window with your extension to load your changes. 20 | 21 | ## Add more language features 22 | * To add features such as intellisense, hovers and validators check out the VS Code extenders documentation at 23 | https://code.visualstudio.com/docs 24 | 25 | ## Install your extension 26 | * To start using your extension with Visual Studio Code copy it into the `/.vscode/extensions` folder and restart Code. 27 | * To share your extension with the world, read on https://code.visualstudio.com/docs about publishing an extension. 28 | -------------------------------------------------------------------------------- /run_scratch.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | cargo run -- --print-ast --print-mir language/scratch.cf || \ 3 | echo "failure to compile : " $? && exit 4 | -------------------------------------------------------------------------------- /src/ast.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::fmt::{self, Display}; 3 | 4 | use containers::Scope; 5 | use mir::{self, Mir, TypeError}; 6 | use parse::{ItemVariant, Parser, ParserError, ParserErrorVariant, Spanned}; 7 | 8 | // user defined types will be strings 9 | #[derive(Clone, Debug)] 10 | pub enum StringlyType { 11 | UserDefinedType(String), 12 | Unit, 13 | } 14 | 15 | #[derive(Debug, Copy, Clone, PartialEq, Eq)] 16 | pub enum BinOp { 17 | Plus, 18 | LessEq, 19 | } 20 | 21 | #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] 22 | pub enum BinOpPrecedence { 23 | Addition, 24 | Comparison, 25 | } 26 | 27 | #[derive(Debug)] 28 | pub enum ExpressionVariant { 29 | UnitLiteral, 30 | IntLiteral { is_negative: bool, value: u64 }, 31 | BoolLiteral(bool), 32 | Variable(String), 33 | Negative(Box), 34 | Block { 35 | statements: Vec, 36 | expr: Box, 37 | }, 38 | IfElse { 39 | cond: Box, 40 | then: Box, 41 | els: Box, 42 | }, 43 | BinOp { 44 | lhs: Box, 45 | rhs: Box, 46 | op: BinOp, 47 | }, 48 | Call { 49 | callee: String, 50 | args: Vec, 51 | }, 52 | Log(Box), 53 | } 54 | pub type Expression = Spanned; 55 | 56 | #[derive(Debug)] 57 | pub enum StatementVariant { 58 | Expr(Expression), 59 | Local { 60 | name: String, 61 | ty: Option, 62 | initializer: Expression, 63 | }, 64 | } 65 | pub type Statement = Spanned; 66 | 67 | #[derive(Debug)] 68 | pub struct FunctionValue { 69 | pub params: Vec<(String, StringlyType)>, 70 | pub ret_ty: StringlyType, 71 | pub expr: Expression, 72 | } 73 | pub type Function = Spanned; 74 | 75 | #[derive(Debug)] 76 | pub struct StructValue { 77 | pub members: Vec<(String, StringlyType)>, 78 | } 79 | pub type Struct = Spanned; 80 | 81 | #[derive(Debug)] 82 | pub enum AstErrorVariant { 83 | Parser(ParserErrorVariant), 84 | MultipleValueDefinitions { name: String, original: Spanned<()> }, 85 | } 86 | impl Display for AstErrorVariant { 87 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 88 | use self::AstErrorVariant::*; 89 | match *self { 90 | Parser(ref p) => p.fmt(f), 91 | MultipleValueDefinitions { 92 | ref name, 93 | ref original, 94 | } => write!( 95 | f, 96 | "found multiple definitions for '{}' - original definition at {}", 97 | name, 98 | original.span, 99 | ), 100 | } 101 | } 102 | } 103 | pub type AstError = Spanned; 104 | pub type AstResult = Result; 105 | 106 | #[derive(Debug)] 107 | pub struct Ast { 108 | funcs: HashMap, 109 | } 110 | 111 | struct Types<'ctx> { 112 | inner: HashMap>, 113 | } 114 | 115 | 116 | impl BinOp { 117 | pub fn precedence(self) -> BinOpPrecedence { 118 | match self { 119 | BinOp::Plus => BinOpPrecedence::Addition, 120 | BinOp::LessEq => BinOpPrecedence::Comparison, 121 | } 122 | } 123 | } 124 | 125 | impl Expression { 126 | fn ty<'ctx>( 127 | &self, 128 | tys: &Types<'ctx>, 129 | funcs: &HashMap, 130 | locals: &Scope<(mir::Type<'ctx>, mir::Reference)>, 131 | builder: &mir::FunctionBuilder<'ctx>, 132 | mir: &Mir<'ctx>, 133 | ) -> Result, TypeError<'ctx>> { 134 | match **self { 135 | ExpressionVariant::UnitLiteral => Ok(mir.get_builtin_type(mir::BuiltinType::Unit)), 136 | ExpressionVariant::BoolLiteral(_) => Ok(mir.get_builtin_type(mir::BuiltinType::Bool)), 137 | ExpressionVariant::IntLiteral { .. } => { 138 | Ok(mir.get_builtin_type(mir::BuiltinType::SInt(mir::IntSize::I32))) 139 | } 140 | ExpressionVariant::Variable(ref name) => if let Some(local) = locals.get(name) { 141 | Ok(local.0) 142 | } else { 143 | Err(TypeError::binding_not_found(name.clone(), self.span)) 144 | }, 145 | ExpressionVariant::Negative(ref e) => e.ty(tys, funcs, locals, builder, mir), 146 | ExpressionVariant::Block { ref expr, .. } => expr.ty(tys, funcs, locals, builder, mir), 147 | ExpressionVariant::IfElse { ref then, .. } => then.ty(tys, funcs, locals, builder, mir), 148 | ExpressionVariant::BinOp { 149 | op: BinOp::Plus, 150 | ref lhs, 151 | .. 152 | } => lhs.ty(tys, funcs, locals, builder, mir), 153 | ExpressionVariant::BinOp { 154 | op: BinOp::LessEq, .. 155 | } => Ok(mir.get_builtin_type(mir::BuiltinType::Bool)), 156 | ExpressionVariant::Call { ref callee, .. } => if let Some(func) = funcs.get(callee) { 157 | Ok(mir.get_function_type(*func).ret) 158 | } else { 159 | Err(TypeError::binding_not_found(callee.clone(), self.span)) 160 | }, 161 | ExpressionVariant::Log(_) => Ok(mir.get_builtin_type(mir::BuiltinType::Unit)), 162 | } 163 | } 164 | } 165 | 166 | impl Expression { 167 | fn mir_binop(op: BinOp, lhs: mir::Reference, rhs: mir::Reference) -> mir::Value { 168 | match op { 169 | BinOp::Plus => mir::Value::Add(lhs, rhs), 170 | BinOp::LessEq => mir::Value::LessEq(lhs, rhs), 171 | } 172 | } 173 | 174 | fn to_mir<'ctx>( 175 | &self, 176 | dst: mir::Reference, 177 | // TODO(ubsan): this state should probably all be in a struct 178 | tys: &Types<'ctx>, 179 | mir: &mut Mir<'ctx>, 180 | builder: &mut mir::FunctionBuilder<'ctx>, 181 | block: &mut mir::Block, 182 | funcs: &HashMap, 183 | locals: &Scope<(mir::Type<'ctx>, mir::Reference)>, 184 | ) -> Result<(), TypeError<'ctx>> { 185 | let bool = mir.get_builtin_type(mir::BuiltinType::Bool); 186 | let span = self.span; 187 | match **self { 188 | ExpressionVariant::IntLiteral { is_negative, value } => { 189 | let mul = if is_negative { -1 } else { 1 }; 190 | builder.add_stmt( 191 | mir, 192 | *block, 193 | dst, 194 | mir::Value::int_lit((value as i32) * mul), 195 | span, 196 | )? 197 | } 198 | ExpressionVariant::UnitLiteral => { 199 | builder.add_stmt(mir, *block, dst, mir::Value::unit_lit(), span)? 200 | } 201 | ExpressionVariant::BoolLiteral(b) => { 202 | builder.add_stmt(mir, *block, dst, mir::Value::bool_lit(b), span)? 203 | } 204 | ExpressionVariant::Negative(ref e) => { 205 | let ty = e.ty(tys, funcs, locals, builder, mir)?; 206 | let var = builder.add_anonymous_local(ty); 207 | e.to_mir(var, tys, mir, builder, block, funcs, locals)?; 208 | builder.add_stmt(mir, *block, dst, mir::Value::Negative(var), span)? 209 | } 210 | ExpressionVariant::Variable(ref name) => if let Some(&loc) = locals.get(name) { 211 | builder.add_stmt(mir, *block, dst, mir::Value::Reference(loc.1), span)?; 212 | } else { 213 | return Err(TypeError::binding_not_found(name.clone(), span)); 214 | }, 215 | ExpressionVariant::Block { 216 | ref statements, 217 | ref expr, 218 | } => { 219 | let mut locals = Scope::with_parent(locals); 220 | for stmt in statements { 221 | match **stmt { 222 | StatementVariant::Expr(ref e) => { 223 | let ty = e.ty(tys, funcs, &locals, builder, mir)?; 224 | let tmp = builder.add_anonymous_local(ty); 225 | e.to_mir(tmp, tys, mir, builder, block, funcs, &locals)?; 226 | } 227 | StatementVariant::Local { 228 | ref name, 229 | ref ty, 230 | ref initializer, 231 | } => { 232 | let ty = if let Some(ref ty) = *ty { 233 | match tys.get(ty) { 234 | Some(mir_ty) => mir_ty, 235 | None => { 236 | let str_ty = match *ty { 237 | StringlyType::UserDefinedType(ref s) => s.clone(), 238 | StringlyType::Unit => unreachable!(), 239 | }; 240 | return Err(TypeError::type_not_found(str_ty, stmt.span)); 241 | } 242 | } 243 | } else { 244 | initializer.ty(tys, funcs, &locals, &builder, mir)? 245 | }; 246 | let var = builder.add_local(name.clone(), ty); 247 | initializer.to_mir(var, tys, mir, builder, block, funcs, &locals)?; 248 | locals.insert(name.clone(), (ty, var)); 249 | } 250 | }; 251 | } 252 | expr.to_mir(dst, tys, mir, builder, block, funcs, &locals)? 253 | } 254 | ExpressionVariant::IfElse { 255 | ref cond, 256 | ref then, 257 | ref els, 258 | } => { 259 | let cond = { 260 | let var = builder.add_anonymous_local(bool); 261 | cond.to_mir(var, tys, mir, builder, block, funcs, locals)?; 262 | var 263 | }; 264 | let (mut then_bb, mut els_bb, final_bb) = builder.term_if_else(*block, cond); 265 | then.to_mir(dst, tys, mir, builder, &mut then_bb, funcs, locals)?; 266 | els.to_mir(dst, tys, mir, builder, &mut els_bb, funcs, locals)?; 267 | *block = final_bb; 268 | } 269 | ExpressionVariant::BinOp { 270 | ref lhs, 271 | ref rhs, 272 | ref op, 273 | } => { 274 | let lhs = { 275 | let ty = lhs.ty(tys, funcs, locals, builder, mir)?; 276 | let var = builder.add_anonymous_local(ty); 277 | lhs.to_mir(var, tys, mir, builder, block, funcs, locals)?; 278 | var 279 | }; 280 | let rhs = { 281 | let ty = rhs.ty(tys, funcs, locals, builder, mir)?; 282 | let var = builder.add_anonymous_local(ty); 283 | rhs.to_mir(var, tys, mir, builder, block, funcs, locals)?; 284 | var 285 | }; 286 | builder.add_stmt(mir, *block, dst, Self::mir_binop(*op, lhs, rhs), span)?; 287 | } 288 | ExpressionVariant::Call { 289 | ref callee, 290 | ref args, 291 | } => { 292 | let args = args.iter() 293 | .map(|v| { 294 | let ty = v.ty(tys, funcs, locals, builder, mir)?; 295 | let var = builder.add_anonymous_local(ty); 296 | v.to_mir(var, tys, mir, builder, block, funcs, locals)?; 297 | Ok(var) 298 | }) 299 | .collect::>()?; 300 | if let Some(&callee) = funcs.get(callee) { 301 | builder.add_stmt(mir, *block, dst, mir::Value::Call { callee, args }, span)? 302 | } else { 303 | panic!("function `{}` doesn't exist", callee); 304 | } 305 | } 306 | ExpressionVariant::Log(ref arg) => { 307 | let ty = arg.ty(tys, funcs, locals, builder, mir)?; 308 | let var = builder.add_anonymous_local(ty); 309 | arg.to_mir(var, tys, mir, builder, block, funcs, locals)?; 310 | builder.add_stmt(mir, *block, dst, mir::Value::Log(var), span)?; 311 | } 312 | } 313 | 314 | Ok(()) 315 | } 316 | } 317 | 318 | impl Function { 319 | fn build_mir<'ctx>( 320 | &self, 321 | tys: &Types<'ctx>, 322 | decl: mir::FunctionDecl, 323 | funcs: &HashMap, 324 | mir: &mut Mir<'ctx>, 325 | ) -> Result<(), TypeError<'ctx>> { 326 | let mut builder = mir.get_function_builder(decl); 327 | 328 | let mut locals = Scope::new(); 329 | for (i, param) in self.params.iter().enumerate() { 330 | locals.insert( 331 | param.0.clone(), 332 | (tys.get(¶m.1).unwrap(), builder.get_param(i as u32)), 333 | ); 334 | } 335 | 336 | let mut block = builder.entrance(); 337 | let ret = mir::Reference::ret(); 338 | self.expr 339 | .to_mir(ret, tys, mir, &mut builder, &mut block, funcs, &mut locals)?; 340 | Ok(mir.add_function_definition(builder)) 341 | } 342 | } 343 | 344 | impl From for AstError { 345 | fn from(pe: ParserError) -> AstError { 346 | Spanned { 347 | thing: AstErrorVariant::Parser(pe.thing), 348 | span: pe.span, 349 | } 350 | } 351 | } 352 | 353 | impl<'ctx> Types<'ctx> { 354 | fn new() -> Self { 355 | Self { 356 | inner: HashMap::new(), 357 | } 358 | } 359 | 360 | pub fn insert(&mut self, name: String, ty: mir::Type<'ctx>) { 361 | self.inner.insert(name, ty); 362 | } 363 | pub fn get(&self, name: &StringlyType) -> Option> { 364 | match *name { 365 | StringlyType::UserDefinedType(ref name) => self.inner.get(name).map(|x| *x), 366 | StringlyType::Unit => self.inner.get("unit").map(|x| *x), 367 | } 368 | } 369 | } 370 | 371 | impl Ast { 372 | pub fn new(file: &str) -> AstResult { 373 | let mut parse = Parser::new(file); 374 | let mut funcs = HashMap::::new(); 375 | let mut types = HashMap::::new(); 376 | loop { 377 | let tmp = parse.next_item(); 378 | if let Err(Spanned { 379 | thing: ParserErrorVariant::ExpectedEof, 380 | .. 381 | }) = tmp 382 | { 383 | break; 384 | } 385 | let (name, Spanned { thing, span }) = tmp?; 386 | match thing { 387 | ItemVariant::Function(thing) => { 388 | if let Some(orig) = funcs.get(&name) { 389 | return Err(Spanned { 390 | thing: AstErrorVariant::MultipleValueDefinitions { 391 | name: name.clone(), 392 | original: Spanned { 393 | thing: (), 394 | span: orig.span, 395 | }, 396 | }, 397 | span, 398 | }); 399 | }; 400 | funcs.insert(name, Spanned { thing, span }); 401 | } 402 | ItemVariant::StructDecl(members) => { 403 | let thing = StructValue { members }; 404 | types.insert(name, Spanned { thing, span }); 405 | } 406 | } 407 | } 408 | Ok(Ast { funcs }) 409 | } 410 | } 411 | 412 | impl Ast { 413 | pub fn build_mir<'ctx>(&mut self, mir: &mut Mir<'ctx>) -> Result<(), TypeError<'ctx>> { 414 | let mut mir_funcs: HashMap = HashMap::new(); 415 | let mut tys = Types::new(); 416 | 417 | Self::prelude_types(mir, &mut tys); 418 | 419 | for (name, func) in &self.funcs { 420 | let params = { 421 | let tmp = func.params 422 | .iter() 423 | // TODO(ubsan): should return a TypeError, not unwrap 424 | .map(|&(_, ref ty)| tys.get(ty).unwrap()) 425 | .collect(); 426 | mir::TypeList::from_existing(tmp) 427 | }; 428 | let ret = tys.get(&func.ret_ty).unwrap(); 429 | 430 | let decl = mir.add_function_decl( 431 | Some(name.to_owned()), 432 | mir::FunctionType { ret, params }, 433 | func.span, 434 | )?; 435 | mir_funcs.insert(name.to_owned(), decl); 436 | } 437 | for (name, func) in &self.funcs { 438 | let decl = mir_funcs[name]; 439 | func.build_mir(&tys, decl, &mir_funcs, mir)?; 440 | } 441 | Ok(()) 442 | } 443 | 444 | fn prelude_types<'ctx>(mir: &Mir<'ctx>, tys: &mut Types<'ctx>) { 445 | use mir::BuiltinType::*; 446 | use mir::IntSize::*; 447 | tys.insert("unit".to_owned(), mir.get_builtin_type(Unit)); 448 | tys.insert("bool".to_owned(), mir.get_builtin_type(Bool)); 449 | tys.insert("s32".to_owned(), mir.get_builtin_type(SInt(I32))); 450 | } 451 | } 452 | 453 | impl Display for StringlyType { 454 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 455 | match *self { 456 | StringlyType::UserDefinedType(ref s) => write!(f, "{}", s), 457 | StringlyType::Unit => write!(f, "unit"), 458 | } 459 | } 460 | } 461 | 462 | impl Display for BinOp { 463 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 464 | match *self { 465 | BinOp::Plus => write!(f, "+"), 466 | BinOp::LessEq => write!(f, "<="), 467 | } 468 | } 469 | } 470 | 471 | impl ExpressionVariant { 472 | fn is_nullary(&self) -> bool { 473 | if let ExpressionVariant::UnitLiteral = *self { 474 | true 475 | } else { 476 | false 477 | } 478 | } 479 | 480 | fn fmt_as_block(&self, f: &mut fmt::Formatter, indent: usize) -> fmt::Result { 481 | if let ExpressionVariant::Block { .. } = *self { 482 | self.fmt(f, indent) 483 | } else { 484 | writeln!(f, "{{")?; 485 | ::write_indent(f, indent + 1)?; 486 | self.fmt(f, indent + 1)?; 487 | writeln!(f, "")?; 488 | ::write_indent(f, indent)?; 489 | write!(f, "}}") 490 | } 491 | } 492 | fn fmt(&self, f: &mut fmt::Formatter, indent: usize) -> fmt::Result { 493 | match *self { 494 | ExpressionVariant::IntLiteral { is_negative, value } => if is_negative { 495 | write!(f, "-{}", value) 496 | } else { 497 | write!(f, "{}", value) 498 | }, 499 | ExpressionVariant::BoolLiteral(b) => write!(f, "{}", b), 500 | ExpressionVariant::UnitLiteral => write!(f, "()"), 501 | ExpressionVariant::Variable(ref s) => write!(f, "{}", s), 502 | ExpressionVariant::Negative(ref e) => { 503 | write!(f, "-")?; 504 | e.fmt(f, indent) 505 | } 506 | ExpressionVariant::BinOp { 507 | ref lhs, 508 | ref rhs, 509 | ref op, 510 | } => { 511 | lhs.fmt(f, indent)?; 512 | write!(f, " {} ", op)?; 513 | rhs.fmt(f, indent) 514 | } 515 | ExpressionVariant::Block { 516 | ref statements, 517 | ref expr, 518 | } => { 519 | writeln!(f, "{{")?; 520 | for stmt in statements { 521 | ::write_indent(f, indent + 1)?; 522 | stmt.thing.fmt(f, indent + 1)?; 523 | writeln!(f, ";")?; 524 | } 525 | if !expr.is_nullary() { 526 | ::write_indent(f, indent + 1)?; 527 | expr.fmt(f, indent + 1)?; 528 | writeln!(f, "")?; 529 | } 530 | ::write_indent(f, indent)?; 531 | write!(f, "}}") 532 | } 533 | ExpressionVariant::IfElse { 534 | ref cond, 535 | ref then, 536 | ref els, 537 | } => { 538 | write!(f, "if ")?; 539 | cond.fmt(f, indent)?; 540 | write!(f, " ")?; 541 | then.fmt_as_block(f, indent)?; 542 | //if !els.is_nullary() { 543 | write!(f, " else ")?; 544 | els.fmt_as_block(f, indent)?; 545 | //} 546 | Ok(()) 547 | } 548 | ExpressionVariant::Call { 549 | ref callee, 550 | ref args, 551 | } => { 552 | write!(f, "{}(", callee)?; 553 | if !args.is_empty() { 554 | for arg in &args[..args.len() - 1] { 555 | arg.fmt(f, indent)?; 556 | write!(f, ", ")?; 557 | } 558 | args[args.len() - 1].fmt(f, indent)?; 559 | write!(f, ")") 560 | } else { 561 | write!(f, ")") 562 | } 563 | } 564 | ExpressionVariant::Log(ref arg) => { 565 | write!(f, "log(")?; 566 | arg.fmt(f, indent)?; 567 | write!(f, ")") 568 | } 569 | } 570 | } 571 | } 572 | 573 | impl StatementVariant { 574 | fn fmt(&self, f: &mut fmt::Formatter, indent: usize) -> fmt::Result { 575 | match *self { 576 | StatementVariant::Expr(ref e) => e.fmt(f, indent), 577 | StatementVariant::Local { 578 | ref name, 579 | ref ty, 580 | ref initializer, 581 | } => if let Some(ref ty) = *ty { 582 | write!(f, "let {}: {} = ", name, ty)?; 583 | initializer.fmt(f, indent) 584 | } else { 585 | write!(f, "let {} = ", name)?; 586 | initializer.fmt(f, indent) 587 | }, 588 | } 589 | } 590 | } 591 | 592 | impl Display for Ast { 593 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 594 | for (name, func) in &self.funcs { 595 | let ref func = func.thing; 596 | write!(f, "func {}(", name)?; 597 | if !func.params.is_empty() { 598 | for p in &func.params[..func.params.len() - 1] { 599 | let (ref name, ref ty) = *p; 600 | write!(f, "{}: {}, ", name, ty)?; 601 | } 602 | let (ref name, ref ty) = func.params[func.params.len() - 1]; 603 | write!(f, "{}: {}", name, ty)?; 604 | } 605 | write!(f, "): {} ", func.ret_ty)?; 606 | func.expr.fmt_as_block(f, 0)?; 607 | writeln!(f, "")?; 608 | } 609 | Ok(()) 610 | } 611 | } 612 | -------------------------------------------------------------------------------- /src/containers.rs: -------------------------------------------------------------------------------- 1 | use std::ptr; 2 | use std::sync::Mutex; 3 | 4 | use std::ops::Index; 5 | 6 | pub struct Scope<'a, T: 'a> { 7 | parent: Option<&'a Scope<'a, T>>, 8 | current: Vec<(String, T)>, 9 | } 10 | 11 | impl<'a, T> Scope<'a, T> { 12 | pub fn new() -> Self { 13 | Self { 14 | parent: None, 15 | current: Vec::new(), 16 | } 17 | } 18 | 19 | pub fn with_parent(parent: &'a Scope<'a, T>) -> Self { 20 | Self { 21 | parent: Some(parent), 22 | current: Vec::new(), 23 | } 24 | } 25 | 26 | pub fn get(&self, key: &str) -> Option<&T> { 27 | for &(ref s, ref t) in self.current.iter().rev() { 28 | if key == s { 29 | return Some(t); 30 | } 31 | } 32 | if let Some(parent) = self.parent { 33 | parent.get(key) 34 | } else { 35 | None 36 | } 37 | } 38 | 39 | pub fn insert(&mut self, key: String, val: T) { 40 | self.current.push((key, val)); 41 | } 42 | } 43 | 44 | impl<'a, 'b, T> Index<&'b str> for Scope<'a, T> { 45 | type Output = T; 46 | 47 | fn index(&self, key: &'b str) -> &T { 48 | if let Some(ret) = self.get(key) { 49 | ret 50 | } else { 51 | panic!("Didn't find key: {}", key) 52 | } 53 | } 54 | } 55 | 56 | pub struct Arena { 57 | arena: Mutex<(Vec>, *mut Vec)>, 58 | } 59 | 60 | impl Arena { 61 | pub fn new() -> Self { 62 | let mut inner = (vec![Vec::with_capacity(16)], ptr::null_mut()); 63 | inner.1 = &mut inner.0[0]; 64 | Arena { 65 | arena: Mutex::new(inner), 66 | } 67 | } 68 | 69 | pub fn push(&self, t: T) -> &T { 70 | let mut inner = self.arena.lock().unwrap(); 71 | unsafe { 72 | let mut v = &mut *inner.1; 73 | let cap = v.capacity(); 74 | if v.len() == cap { 75 | inner 76 | .0 77 | .push(Vec::with_capacity(if cap >= 1024 { cap } else { cap * 2 })); 78 | inner.1 = inner.0.last_mut().unwrap(); 79 | v = &mut *inner.1; 80 | } 81 | v.push(t); 82 | &v[v.len() - 1] 83 | } 84 | } 85 | 86 | // calls the function on subsequent things until it hits Some, which it returns 87 | // hack to deal with the lack of generators 88 | pub fn call_on_all(&self, mut f: F) -> Option 89 | where 90 | F: FnMut(&T) -> Option, 91 | { 92 | let inner = self.arena.lock().unwrap(); 93 | for v in &inner.0 { 94 | for el in v { 95 | if let Some(v) = f(el) { 96 | return Some(v); 97 | } 98 | } 99 | } 100 | None 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate clap; 2 | 3 | mod ast; 4 | mod containers; 5 | mod mir; 6 | mod parse; 7 | 8 | use std::fs::File; 9 | use std::fmt; 10 | 11 | use ast::Ast; 12 | use mir::Mir; 13 | 14 | fn write_indent(f: &mut fmt::Formatter, indent: usize) -> fmt::Result { 15 | const INDENT_SIZE: usize = 2; 16 | for _ in 0..(indent * INDENT_SIZE) { 17 | write!(f, " ")?; 18 | } 19 | Ok(()) 20 | } 21 | 22 | 23 | 24 | macro_rules! user_error { 25 | ($($args:expr),* $(,)*) => ({ 26 | eprintln!($($args),*); 27 | ::std::panic::set_hook(Box::new(|_| {})); 28 | panic!(); 29 | }) 30 | } 31 | 32 | fn main() { 33 | use clap::{App, Arg}; 34 | use std::io::Read; 35 | 36 | let matches = App::new("cfc") 37 | .version("0.1.0") 38 | .author("Nicole Mazzuca ") 39 | .about( 40 | "A compiler for the café language.\n\ 41 | Written in Rust.", 42 | ) 43 | .arg(Arg::with_name("input").required(true)) 44 | .arg( 45 | Arg::with_name("print-ast") 46 | .long("print-ast") 47 | .help("print the generated AST"), 48 | ) 49 | .arg( 50 | Arg::with_name("print-mir") 51 | .long("print-mir") 52 | .help("print the generated MIR"), 53 | ) 54 | .arg( 55 | Arg::with_name("no-output") 56 | .long("no-output") 57 | .help("do not print the output of the run"), 58 | ) 59 | .arg( 60 | Arg::with_name("no-run") 61 | .long("no-run") 62 | .help("do not actually run the resulting mir") 63 | .conflicts_with("no-output"), 64 | ) 65 | .get_matches(); 66 | 67 | let name = matches.value_of("input").unwrap(); 68 | let print_ast = matches.is_present("print-ast"); 69 | let print_mir = matches.is_present("print-mir"); 70 | let print_run = !matches.is_present("no-output"); 71 | let do_run = !matches.is_present("no-run"); 72 | 73 | let mut file = Vec::new(); 74 | match File::open(&name) { 75 | Ok(mut o) => { 76 | o.read_to_end(&mut file).unwrap(); 77 | } 78 | Err(e) => { 79 | user_error!("Failure to open file '{}': {}", name, e); 80 | } 81 | } 82 | let file = String::from_utf8(file).unwrap(); 83 | 84 | let ast = match Ast::new(&file) { 85 | Ok(ast) => ast, 86 | Err(e) => user_error!("error: {}", e), 87 | }; 88 | if print_ast { 89 | println!(" === AST === "); 90 | println!("{}", ast); 91 | } 92 | let ctxt = mir::MirCtxt::new(); 93 | let mir = match Mir::new(&ctxt, ast) { 94 | Ok(mir) => mir, 95 | Err(e) => user_error!("error: {}", e), 96 | }; 97 | if print_mir { 98 | println!(" === MIR === "); 99 | println!("{}", mir); 100 | } 101 | 102 | if do_run { 103 | if print_run { 104 | if print_ast || print_mir { 105 | println!(" === RUN === "); 106 | } 107 | mir.run(); 108 | } else { 109 | mir.run(); 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/mir/data.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | use parse::{Span, Spanned}; 4 | 5 | #[derive(Copy, Clone, Debug)] 6 | pub struct Reference(pub(super) u32); 7 | 8 | impl Reference { 9 | pub fn ret() -> Self { 10 | Reference(0) 11 | } 12 | 13 | pub(super) fn param(n: u32) -> Self { 14 | Reference(n + 1) 15 | } 16 | } 17 | 18 | #[derive(Debug)] 19 | pub enum Literal { 20 | Int(i32), 21 | Bool(bool), 22 | Unit, 23 | } 24 | 25 | impl Literal { 26 | fn ty<'ctx>(&self, mir: &Mir<'ctx>) -> Type<'ctx> { 27 | match *self { 28 | Literal::Int(_) => mir.get_builtin_type(BuiltinType::SInt(IntSize::I32)), 29 | Literal::Bool(_) => mir.get_builtin_type(BuiltinType::Bool), 30 | Literal::Unit => mir.get_builtin_type(BuiltinType::Unit), 31 | } 32 | } 33 | } 34 | 35 | #[derive(Debug)] 36 | pub enum Value { 37 | Literal(Literal), 38 | Reference(Reference), 39 | Negative(Reference), 40 | Add(Reference, Reference), 41 | LessEq(Reference, Reference), 42 | Call { 43 | callee: FunctionDecl, 44 | args: Vec, 45 | }, 46 | Log(Reference), 47 | } 48 | 49 | impl Value { 50 | pub fn int_lit(i: i32) -> Self { 51 | Value::Literal(Literal::Int(i)) 52 | } 53 | pub fn bool_lit(b: bool) -> Self { 54 | Value::Literal(Literal::Bool(b)) 55 | } 56 | pub fn unit_lit() -> Self { 57 | Value::Literal(Literal::Unit) 58 | } 59 | 60 | pub fn ty<'ctx>( 61 | &self, 62 | builder: &FunctionBuilder<'ctx>, 63 | mir: &Mir<'ctx>, 64 | span: Span, 65 | ) -> Result, TypeError<'ctx>> { 66 | match *self { 67 | Value::Literal(ref lit) => Ok(lit.ty(mir)), 68 | Value::Negative(ref_) => { 69 | let ty = builder.bindings[ref_.0 as usize].ty; 70 | let s32 = mir.get_builtin_type(BuiltinType::SInt(IntSize::I32)); 71 | assert!(ty == s32, "negative must be done on s32"); 72 | Ok(builder.bindings[ref_.0 as usize].ty) 73 | } 74 | Value::Reference(ref_) => Ok(builder.bindings[ref_.0 as usize].ty), 75 | Value::Add(rhs, lhs) => { 76 | let ty_lhs = builder.bindings[lhs.0 as usize].ty; 77 | let ty_rhs = builder.bindings[rhs.0 as usize].ty; 78 | if ty_rhs != ty_lhs { 79 | Err(Spanned { 80 | thing: TypeErrorVariant::Mismatched { 81 | lhs: ty_lhs, 82 | rhs: ty_rhs, 83 | }, 84 | span, 85 | }) 86 | } else { 87 | let s32 = mir.get_builtin_type(BuiltinType::SInt(IntSize::I32)); 88 | assert!(ty_lhs == s32, "addition must be done on s32"); 89 | Ok(ty_lhs) 90 | } 91 | } 92 | Value::LessEq(rhs, lhs) => { 93 | let ty_lhs = builder.bindings[lhs.0 as usize].ty; 94 | let ty_rhs = builder.bindings[rhs.0 as usize].ty; 95 | if ty_rhs != ty_lhs { 96 | Err(Spanned { 97 | thing: TypeErrorVariant::Mismatched { 98 | lhs: ty_lhs, 99 | rhs: ty_rhs, 100 | }, 101 | span, 102 | }) 103 | } else { 104 | Ok(mir.get_builtin_type(BuiltinType::Bool)) 105 | } 106 | } 107 | Value::Call { 108 | callee: ref decl, 109 | ref args, 110 | } => { 111 | let callee = &mir.funcs[decl.0]; 112 | let params = &callee.ty.params; 113 | if args.len() != (params.number_of_types() as usize) { 114 | return Err(Spanned { 115 | thing: TypeErrorVariant::NumberOfArgs { 116 | name: mir.funcs[decl.0] 117 | .name 118 | .clone() 119 | .unwrap_or("".to_owned()), 120 | args_expected: callee.ty.params.number_of_types() as u32, 121 | args_found: args.len() as u32, 122 | }, 123 | span, 124 | }); 125 | } 126 | 127 | for (arg, parm) in args.iter().zip(params) { 128 | let arg_ty = builder.bindings[arg.0 as usize].ty; 129 | if arg_ty != parm { 130 | return Err(Spanned { 131 | thing: TypeErrorVariant::Mismatched { 132 | lhs: parm, 133 | rhs: arg_ty, 134 | }, 135 | span, 136 | }); 137 | } 138 | } 139 | 140 | Ok(callee.ty.ret) 141 | } 142 | Value::Log(_) => Ok(mir.get_builtin_type(BuiltinType::Unit)), 143 | } 144 | } 145 | } 146 | 147 | #[derive(Copy, Clone, Debug)] 148 | pub(super) enum BindingKind { 149 | Param(u32), 150 | Local(u32), 151 | Return, 152 | } 153 | 154 | #[derive(Debug)] 155 | pub(super) struct Binding<'ctx> { 156 | pub(super) name: Option, 157 | pub(super) ty: Type<'ctx>, 158 | pub(super) kind: BindingKind, 159 | } 160 | 161 | #[derive(Copy, Clone, Debug)] 162 | pub struct Block(pub(super) u32); 163 | 164 | #[derive(Copy, Clone, Debug)] 165 | pub enum Terminator { 166 | IfElse { 167 | cond: Reference, 168 | then: Block, 169 | els: Block, 170 | }, 171 | Goto(Block), 172 | Return, 173 | } 174 | 175 | #[derive(Debug)] 176 | pub(super) struct Statement { 177 | pub(super) lhs: Reference, 178 | pub(super) rhs: Value, 179 | } 180 | 181 | #[derive(Debug)] 182 | pub(super) struct BlockData { 183 | pub(super) stmts: Vec, 184 | pub(super) term: Terminator, 185 | } 186 | -------------------------------------------------------------------------------- /src/mir/mod.rs: -------------------------------------------------------------------------------- 1 | // TODO(ubsan): make sure to start dealing with Spanneds 2 | // whee errors are fun 3 | // TODO(ubsan): typeck should *probably* be done in AST 4 | // the current typeck is pretty hax 5 | // TODO(ubsan): figure out a good way to give params names 6 | // without lots of allocations 7 | 8 | mod runner; 9 | mod ty; 10 | mod data; 11 | 12 | use ast::Ast; 13 | use containers::Arena; 14 | use parse::{Span, Spanned}; 15 | 16 | use self::runner::Runner; 17 | pub use self::ty::*; 18 | pub use self::data::*; 19 | 20 | use std::ops::{Add, Rem, Sub}; 21 | 22 | #[inline(always)] 23 | fn align(x: T, to: T) -> T 24 | where 25 | T: Copy + Add + Sub + Rem + PartialEq, 26 | { 27 | if to == x - x { 28 | x 29 | } else if x % to == x - x { 30 | x 31 | } else { 32 | x + (to - x % to) 33 | } 34 | } 35 | 36 | impl BlockData { 37 | fn new() -> Self { 38 | BlockData { 39 | stmts: vec![], 40 | term: Terminator::Return, 41 | } 42 | } 43 | 44 | fn with_term(term: Terminator) -> Self { 45 | BlockData { 46 | stmts: vec![], 47 | term, 48 | } 49 | } 50 | } 51 | 52 | #[derive(Debug)] 53 | struct FunctionValue<'ctx> { 54 | // NOTE(ubsan): *this is just for stack locals, not for args* 55 | locals: TypeList<'ctx>, 56 | blks: Vec, 57 | bindings: Vec>, 58 | } 59 | 60 | #[derive(Debug)] 61 | pub struct FunctionBuilder<'ctx> { 62 | decl: FunctionDecl, 63 | locals: TypeList<'ctx>, 64 | blks: Vec, 65 | bindings: Vec>, 66 | } 67 | 68 | // creation and misc 69 | impl<'ctx> FunctionBuilder<'ctx> { 70 | fn new(decl: FunctionDecl, mir: &Mir<'ctx>) -> Self { 71 | let enter_block = BlockData { 72 | stmts: vec![], 73 | term: Terminator::Goto(Block(1)), 74 | }; 75 | let exit_block = BlockData { 76 | stmts: vec![], 77 | term: Terminator::Return, 78 | }; 79 | 80 | let ty = &mir.funcs[decl.0].ty; 81 | let mut bindings = Vec::with_capacity((ty.params.number_of_types() + 1) as usize); 82 | bindings.push(Binding { 83 | name: Some("".to_owned()), 84 | ty: ty.ret, 85 | kind: BindingKind::Return, 86 | }); 87 | for (i, ty) in ty.params.iter().enumerate() { 88 | bindings.push(Binding { 89 | name: None, 90 | ty, 91 | kind: BindingKind::Param(i as u32), 92 | }) 93 | } 94 | FunctionBuilder { 95 | decl, 96 | locals: TypeList::new(), 97 | bindings, 98 | blks: vec![enter_block, exit_block], 99 | } 100 | } 101 | 102 | pub fn entrance(&self) -> Block { 103 | Block(0) 104 | } 105 | 106 | pub fn get_param(&self, n: u32) -> Reference { 107 | Reference::param(n) 108 | } 109 | } 110 | 111 | // modification 112 | impl<'ctx> FunctionBuilder<'ctx> { 113 | pub fn add_stmt( 114 | &mut self, 115 | mir: &Mir<'ctx>, 116 | blk: Block, 117 | lhs: Reference, 118 | rhs: Value, 119 | span: Span, 120 | ) -> Result<(), TypeError<'ctx>> { 121 | { 122 | let ty_lhs = self.bindings[lhs.0 as usize].ty; 123 | let ty_rhs = rhs.ty(self, mir, span)?; 124 | if ty_lhs != ty_rhs { 125 | return Err(Spanned { 126 | thing: TypeErrorVariant::Mismatched { 127 | lhs: ty_lhs, 128 | rhs: ty_rhs, 129 | }, 130 | span, 131 | }); 132 | } 133 | } 134 | let blk_data = &mut self.blks[blk.0 as usize]; 135 | blk_data.stmts.push(Statement { lhs, rhs }); 136 | Ok(()) 137 | } 138 | 139 | // NOTE(ubsan): the returned blocks initially have 140 | // the same terminator as their parent 141 | pub fn term_if_else(&mut self, blk: Block, cond: Reference) -> (Block, Block, Block) { 142 | let (then, els, final_bb) = { 143 | let term = self.blks[blk.0 as usize].term; 144 | self.blks.push(BlockData::new()); 145 | self.blks.push(BlockData::new()); 146 | self.blks.push(BlockData::with_term(term)); 147 | let len = self.blks.len(); 148 | 149 | let final_bb = Block((len - 1) as u32); 150 | self.blks[len - 3].term = Terminator::Goto(final_bb); 151 | self.blks[len - 2].term = Terminator::Goto(final_bb); 152 | 153 | (Block((len - 3) as u32), Block((len - 2) as u32), final_bb) 154 | }; 155 | 156 | self.blks[blk.0 as usize].term = Terminator::IfElse { cond, then, els }; 157 | (then, els, final_bb) 158 | } 159 | 160 | pub fn add_anonymous_local(&mut self, ty: Type<'ctx>) -> Reference { 161 | self.locals.push(ty); 162 | let kind = BindingKind::Local(self.locals.number_of_types() - 1); 163 | self.bindings.push(Binding { 164 | name: None, 165 | ty, 166 | kind, 167 | }); 168 | Reference((self.bindings.len() - 1) as u32) 169 | } 170 | 171 | pub fn add_local(&mut self, name: String, ty: Type<'ctx>) -> Reference { 172 | self.locals.push(ty); 173 | let kind = BindingKind::Local(self.locals.number_of_types() - 1); 174 | self.bindings.push(Binding { 175 | name: Some(name), 176 | ty, 177 | kind, 178 | }); 179 | Reference((self.bindings.len() - 1) as u32) 180 | } 181 | } 182 | 183 | #[derive(Copy, Clone, Debug)] 184 | pub struct FunctionDecl(usize); 185 | 186 | // NOTE(ubsan): when I get namespacing, I should probably 187 | // use paths instead of names? 188 | 189 | #[derive(Debug)] 190 | pub struct FunctionType<'ctx> { 191 | pub params: TypeList<'ctx>, 192 | pub ret: Type<'ctx>, 193 | } 194 | 195 | struct Function<'ctx> { 196 | ty: FunctionType<'ctx>, 197 | name: Option, 198 | value: Option>, 199 | } 200 | 201 | pub struct MirCtxt<'a> { 202 | types: Arena>, 203 | } 204 | 205 | impl<'a> MirCtxt<'a> { 206 | pub fn new() -> Self { 207 | MirCtxt { 208 | types: Arena::new(), 209 | } 210 | } 211 | } 212 | 213 | pub struct Mir<'ctx> { 214 | funcs: Vec>, 215 | types: &'ctx Arena>, 216 | builtin_types: BuiltinTypes<'ctx>, 217 | } 218 | 219 | impl<'ctx> ::std::fmt::Display for FunctionType<'ctx> { 220 | fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { 221 | write!(f, "(")?; 222 | if !self.params.is_empty() { 223 | let mut iter = self.params.iter(); 224 | let mut last = iter.next().unwrap(); 225 | while let Some(cur) = iter.next() { 226 | write!(f, "{}, ", last.name())?; 227 | last = cur; 228 | } 229 | write!(f, "{})", last.name())?; 230 | } 231 | write!(f, "-> {}", self.ret.name()) 232 | } 233 | } 234 | 235 | // creation and run 236 | impl<'ctx> Mir<'ctx> { 237 | pub fn new(ctx: &'ctx MirCtxt<'ctx>, mut ast: Ast) -> Result> { 238 | let types = &ctx.types; 239 | let builtin_types = BuiltinTypes { 240 | unit_ty: Type::new(types.push(NamedType::unit())), 241 | bool_ty: Type::new(types.push(NamedType::bool())), 242 | s32_ty: Type::new(types.push(NamedType::s32())), 243 | }; 244 | let mut self_: Mir<'ctx> = Mir { 245 | funcs: vec![], 246 | types, 247 | builtin_types, 248 | }; 249 | 250 | ast.build_mir(&mut self_)?; 251 | 252 | Ok(self_) 253 | } 254 | 255 | pub fn run(&self) { 256 | for (i, &Function { ref name, .. }) in self.funcs.iter().enumerate() { 257 | if let Some("main") = name.as_ref().map(|s| &**s) { 258 | return Runner::new(self).run(FunctionDecl(i)); 259 | } 260 | } 261 | panic!("no main function found") 262 | } 263 | } 264 | 265 | // functions 266 | impl<'ctx> Mir<'ctx> { 267 | pub fn add_function_decl( 268 | &mut self, 269 | name: Option, 270 | ty: FunctionType<'ctx>, 271 | span: Span, 272 | ) -> Result> { 273 | if let Some("main") = name.as_ref().map(|x| &**x) { 274 | if ty.ret != self.get_builtin_type(BuiltinType::Unit) { 275 | return Err(Spanned { 276 | thing: TypeErrorVariant::IncorrectlyTypedMain { has: ty }, 277 | span, 278 | }); 279 | } 280 | } 281 | self.funcs.push(Function { 282 | ty, 283 | name, 284 | value: None, 285 | }); 286 | Ok(FunctionDecl(self.funcs.len() - 1)) 287 | } 288 | 289 | pub fn get_function_builder(&self, decl: FunctionDecl) -> FunctionBuilder<'ctx> { 290 | FunctionBuilder::new(decl, self) 291 | } 292 | 293 | pub fn add_function_definition(&mut self, builder: FunctionBuilder<'ctx>) { 294 | let value = FunctionValue { 295 | blks: builder.blks, 296 | locals: builder.locals, 297 | bindings: builder.bindings, 298 | }; 299 | 300 | self.funcs[builder.decl.0].value = Some(value); 301 | } 302 | } 303 | 304 | // types 305 | impl<'ctx> Mir<'ctx> { 306 | /* 307 | pub fn insert_type(&self, ty: TypeVariant<'ctx>) -> Type<'ctx> { 308 | Type(self.types.push(ty)) 309 | } 310 | */ 311 | 312 | pub fn get_function_type(&self, decl: FunctionDecl) -> &FunctionType<'ctx> { 313 | &self.funcs[decl.0].ty 314 | } 315 | 316 | pub fn get_builtin_type(&self, ty: BuiltinType) -> Type<'ctx> { 317 | match ty { 318 | BuiltinType::Unit => self.builtin_types.unit_ty, 319 | BuiltinType::Bool => self.builtin_types.bool_ty, 320 | BuiltinType::SInt(IntSize::I32) => self.builtin_types.s32_ty, 321 | } 322 | } 323 | } 324 | 325 | use std::fmt::{self, Display}; 326 | 327 | // printing 328 | impl<'ctx> Display for Mir<'ctx> { 329 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 330 | fn binding_name(binding: &Option) -> &str { 331 | binding.as_ref().map(|s| &**s).unwrap_or("") 332 | } 333 | fn write_binding( 334 | f: &mut fmt::Formatter, 335 | bindings: &[Binding], 336 | r: Reference, 337 | ) -> fmt::Result { 338 | let name = binding_name(&bindings[r.0 as usize].name); 339 | write!(f, "{}_{}", name, r.0) 340 | } 341 | 342 | if let Some(err) = self.types.call_on_all(|ty| { 343 | if let Err(e) = write!(f, "type {} = ", ty.name()) { 344 | return Some(e); 345 | }; 346 | match *ty.variant() { 347 | TypeVariant::Builtin(_) => { 348 | if let Err(e) = writeln!(f, ";") { 349 | return Some(e); 350 | }; 351 | } 352 | TypeVariant::__LifetimeHolder(_) => unreachable!(), 353 | } 354 | None 355 | }) { 356 | return Err(err); 357 | }; 358 | for &Function { 359 | ref ty, 360 | ref name, 361 | ref value, 362 | } in &self.funcs 363 | { 364 | let (name, value) = (name.as_ref().unwrap(), value.as_ref().unwrap()); 365 | write!(f, "func {}(", name)?; 366 | if !ty.params.is_empty() { 367 | let mut iter = ty.params.iter(); 368 | let mut last = iter.next().expect("balkjlfkajdsf"); 369 | while let Some(cur) = iter.next() { 370 | write!(f, "{}, ", last.name())?; 371 | last = cur; 372 | } 373 | write!(f, "{}", last.name())?; 374 | } 375 | writeln!(f, "): {} {{", ty.ret.name())?; 376 | 377 | ::write_indent(f, 1)?; 378 | writeln!(f, "locals = {{")?; 379 | for loc_ty in &value.locals { 380 | ::write_indent(f, 2)?; 381 | writeln!(f, "{},", loc_ty.name())?; 382 | } 383 | ::write_indent(f, 1)?; 384 | writeln!(f, "}}")?; 385 | 386 | ::write_indent(f, 1)?; 387 | writeln!(f, "bindings = {{")?; 388 | for (i, binding) in value.bindings.iter().enumerate() { 389 | ::write_indent(f, 2)?; 390 | match binding.kind { 391 | BindingKind::Return => writeln!(f, ": {},", binding.ty.name())?, 392 | BindingKind::Param(p) => { 393 | writeln!( 394 | f, 395 | "{}_{}: {} = [{}],", 396 | binding_name(&binding.name), 397 | i, 398 | binding.ty.name(), 399 | p, 400 | )?; 401 | } 402 | BindingKind::Local(loc) => { 403 | writeln!( 404 | f, 405 | "{}_{}: {} = [{}],", 406 | binding_name(&binding.name), 407 | i, 408 | binding.ty.name(), 409 | loc, 410 | )?; 411 | } 412 | } 413 | } 414 | ::write_indent(f, 1)?; 415 | writeln!(f, "}}")?; 416 | 417 | let write_value = |f: &mut fmt::Formatter, val: &Value| match *val { 418 | Value::Literal(ref n) => writeln!(f, "literal {:?};", n), 419 | Value::Negative(r) => { 420 | write!(f, "-")?; 421 | write_binding(f, &value.bindings, r)?; 422 | writeln!(f, ";") 423 | } 424 | Value::Reference(r) => { 425 | write_binding(f, &value.bindings, r)?; 426 | writeln!(f, ";") 427 | } 428 | Value::Add(lhs, rhs) => { 429 | write_binding(f, &value.bindings, lhs)?; 430 | write!(f, " + ")?; 431 | write_binding(f, &value.bindings, rhs)?; 432 | writeln!(f, ";") 433 | } 434 | Value::LessEq(lhs, rhs) => { 435 | write_binding(f, &value.bindings, lhs)?; 436 | write!(f, " <= ")?; 437 | write_binding(f, &value.bindings, rhs)?; 438 | writeln!(f, ";") 439 | } 440 | Value::Call { 441 | ref callee, 442 | ref args, 443 | } => { 444 | let name = match self.funcs[callee.0].name { 445 | Some(ref name) => &**name, 446 | None => "", 447 | }; 448 | write!(f, "{}(", name)?; 449 | if !args.is_empty() { 450 | for arg in &args[..args.len() - 1] { 451 | write_binding(f, &value.bindings, *arg)?; 452 | write!(f, ", ")?; 453 | } 454 | write_binding(f, &value.bindings, args[args.len() - 1])?; 455 | } 456 | writeln!(f, ");") 457 | } 458 | Value::Log(ref arg) => { 459 | write!(f, "log(")?; 460 | write_binding(f, &value.bindings, *arg)?; 461 | writeln!(f, ");") 462 | } 463 | }; 464 | 465 | for (n, bb) in value.blks.iter().enumerate() { 466 | ::write_indent(f, 1)?; 467 | writeln!(f, "bb{} = {{", n)?; 468 | for stmt in &bb.stmts { 469 | let Statement { ref lhs, ref rhs } = *stmt; 470 | if lhs.0 == 0 { 471 | ::write_indent(f, 2)?; 472 | write!(f, " = ")?; 473 | } else { 474 | ::write_indent(f, 2)?; 475 | write_binding(f, &value.bindings, *lhs)?; 476 | write!(f, " = ")?; 477 | } 478 | write_value(f, rhs)?; 479 | } 480 | ::write_indent(f, 2)?; 481 | match bb.term { 482 | Terminator::Goto(blk) => { 483 | writeln!(f, "goto bb{};", blk.0)?; 484 | } 485 | Terminator::Return => { 486 | writeln!(f, "return;")?; 487 | } 488 | Terminator::IfElse { cond, then, els } => { 489 | write!(f, "if ")?; 490 | write_binding(f, &value.bindings, cond)?; 491 | writeln!(f, " {{ bb{} }} else {{ bb{} }}", then.0, els.0)?; 492 | } 493 | } 494 | ::write_indent(f, 1)?; 495 | writeln!(f, "}}")?; 496 | } 497 | writeln!(f, "}}")?; 498 | } 499 | Ok(()) 500 | } 501 | } 502 | -------------------------------------------------------------------------------- /src/mir/runner.rs: -------------------------------------------------------------------------------- 1 | use mir::{self, align, Mir}; 2 | 3 | const UNINITIALIZED: u8 = 0x42; 4 | 5 | // meant to be the state of a single function 6 | #[derive(Copy, Clone, Debug)] 7 | struct FunctionState<'mir, 'ctx: 'mir> { 8 | func: &'mir mir::FunctionValue<'ctx>, 9 | func_ty: &'mir mir::FunctionType<'ctx>, 10 | // indices into the stack 11 | // NOTE(ubsan): more information is held than actually 12 | // necessary 13 | // technically, we only need to keep `return_value` and can 14 | // calculate the rest 15 | return_value: usize, 16 | params_start: usize, 17 | locals_start: usize, 18 | } 19 | 20 | // hax 21 | #[derive(Copy, Clone, Debug)] 22 | enum Frame { 23 | Current, 24 | Previous, 25 | } 26 | 27 | pub struct Runner<'mir, 'ctx: 'mir> { 28 | mir: &'mir Mir<'ctx>, 29 | call_stack: Vec>, 30 | stack: Vec, 31 | } 32 | 33 | impl<'mir, 'ctx> Runner<'mir, 'ctx> { 34 | pub fn new(mir: &'mir Mir<'ctx>) -> Self { 35 | Runner { 36 | mir, 37 | call_stack: vec![], 38 | stack: vec![], 39 | } 40 | } 41 | 42 | fn current_state(&self) -> FunctionState<'mir, 'ctx> { 43 | *self.call_stack.last().expect("nothing on the call stack") 44 | } 45 | 46 | fn get_binding( 47 | &mut self, 48 | (frame, ref_): (Frame, mir::Reference), 49 | ) -> (*mut u8, mir::Type<'ctx>) { 50 | let base = self.stack.as_mut_ptr(); 51 | let len = self.stack.len(); // for assertions 52 | let frame = match frame { 53 | Frame::Current => self.current_state(), 54 | Frame::Previous => self.call_stack[self.call_stack.len() - 2], 55 | }; 56 | let offset = |off: usize| { 57 | // it's completely valid for zero-sized types to be at the end of address space 58 | if frame.func_ty.ret.size() != 0 { 59 | assert!(off < len, "tried to index out of bounds"); 60 | } 61 | unsafe { base.offset(off as isize) } 62 | }; 63 | match frame.func.bindings[ref_.0 as usize].kind { 64 | mir::BindingKind::Return => { 65 | let off = offset(frame.return_value); 66 | (off, frame.func_ty.ret) 67 | } 68 | mir::BindingKind::Param(i) => { 69 | let off = offset(frame.params_start + frame.func_ty.params.offset_of(i) as usize); 70 | (off, frame.func_ty.params[i]) 71 | } 72 | mir::BindingKind::Local(i) => { 73 | let off = offset(frame.locals_start + frame.func.locals.offset_of(i) as usize); 74 | (off, frame.func.locals[i]) 75 | } 76 | } 77 | } 78 | 79 | unsafe fn write( 80 | &mut self, 81 | dst: (Frame, mir::Reference), 82 | (src, src_ty): (*mut u8, mir::Type<'ctx>), 83 | ) { 84 | let (dst, dst_ty) = self.get_binding(dst); 85 | assert!( 86 | dst_ty == src_ty, 87 | "dst: {}, src: {}", 88 | dst_ty.name(), 89 | src_ty.name(), 90 | ); 91 | ::std::ptr::copy(src, dst, dst_ty.size() as usize); 92 | } 93 | 94 | fn pop_state(&mut self) { 95 | let new_size = self.current_state().return_value; 96 | self.stack.resize(new_size, UNINITIALIZED); 97 | self.call_stack.pop(); 98 | } 99 | 100 | // after this call, the stack will be set up for the call, 101 | // but without arguments 102 | fn push_state(&mut self, func: mir::FunctionDecl) { 103 | let (func_ty, func) = { 104 | let tmp = &self.mir.funcs[func.0]; 105 | let ty = &tmp.ty; 106 | let func = match tmp.value { 107 | Some(ref f) => f, 108 | None => { 109 | panic!( 110 | "Function never defined: {:?} ({:?})", 111 | self.mir.funcs[func.0].name, 112 | func, 113 | ); 114 | } 115 | }; 116 | (ty, func) 117 | }; 118 | let return_value = align(self.stack.len(), func_ty.ret.align() as usize); 119 | let return_end = return_value + func_ty.ret.size() as usize; 120 | let params_start = align(return_end, 16); 121 | let locals_start = params_start + func_ty.params.size() as usize; 122 | 123 | self.stack.resize(locals_start, UNINITIALIZED); 124 | 125 | self.call_stack.push(FunctionState { 126 | func, 127 | func_ty, 128 | return_value, 129 | params_start, 130 | locals_start, 131 | }); 132 | } 133 | 134 | unsafe fn read_value(&mut self, src: mir::Reference) -> T { 135 | use std::{mem, ptr}; 136 | let mut tmp = mem::zeroed(); 137 | let src = self.get_binding((Frame::Current, src)); 138 | assert!( 139 | mem::size_of::() == src.1.size() as usize, 140 | "attempted to read value of incorrect size: {} (size needed: {})", 141 | src.1.name(), 142 | mem::size_of::(), 143 | ); 144 | ptr::copy_nonoverlapping(src.0, (&mut tmp) as *mut _ as *mut u8, mem::size_of::()); 145 | tmp 146 | } 147 | 148 | pub fn run(&mut self, func: mir::FunctionDecl) { 149 | self.push_state(func); 150 | self.call(); 151 | self.pop_state(); 152 | } 153 | 154 | fn call(&mut self) { 155 | { 156 | let loc_size = self.current_state().func.locals.size(); 157 | let new_size = self.stack.len() + loc_size as usize; 158 | self.stack.resize(new_size, UNINITIALIZED); 159 | } 160 | let mut cur_blk = 0; 161 | 162 | loop { 163 | for stmt in &self.current_state().func.blks[cur_blk].stmts { 164 | let mir::Statement { lhs, ref rhs } = *stmt; 165 | match *rhs { 166 | mir::Value::Literal(ref lit) => self.stmt_lit(lhs, lit), 167 | mir::Value::Negative(ref_) => { 168 | self.stmt_neg(lhs, ref_); 169 | } 170 | mir::Value::Reference(rhs) => { 171 | self.stmt_ref(lhs, rhs); 172 | } 173 | mir::Value::LessEq(op_lhs, op_rhs) => { 174 | self.stmt_leq(lhs, op_lhs, op_rhs); 175 | } 176 | mir::Value::Add(op_lhs, op_rhs) => { 177 | self.stmt_add(lhs, op_lhs, op_rhs); 178 | } 179 | mir::Value::Call { 180 | ref callee, 181 | ref args, 182 | } => { 183 | self.stmt_call(lhs, callee, args); 184 | } 185 | mir::Value::Log(arg) => self.stmt_log(lhs, arg), 186 | } 187 | } 188 | match self.current_state().func.blks[cur_blk].term { 189 | mir::Terminator::IfElse { cond, then, els } => { 190 | let cond = unsafe { 191 | let tmp = self.read_value::(cond); 192 | assert!(tmp == 0 || tmp == 1); 193 | tmp != 0 194 | }; 195 | if cond { 196 | cur_blk = then.0 as usize; 197 | } else { 198 | cur_blk = els.0 as usize; 199 | } 200 | } 201 | mir::Terminator::Goto(blk) => { 202 | cur_blk = blk.0 as usize; 203 | } 204 | mir::Terminator::Return => { 205 | return; 206 | } 207 | } 208 | } 209 | } 210 | } 211 | 212 | impl<'mir, 'ctx> Runner<'mir, 'ctx> { 213 | fn stmt_ref(&mut self, dst: mir::Reference, src: mir::Reference) { 214 | let src = self.get_binding((Frame::Current, src)); 215 | unsafe { 216 | self.write((Frame::Current, dst), src); 217 | } 218 | } 219 | 220 | fn stmt_neg(&mut self, dst: mir::Reference, src: mir::Reference) { 221 | let (src, ty) = self.get_binding((Frame::Current, src)); 222 | unsafe { 223 | let mut src = ::std::mem::transmute::(-Self::get_value::((src, ty))); 224 | self.write((Frame::Current, dst), (src.as_mut_ptr(), ty)); 225 | } 226 | } 227 | 228 | fn stmt_call( 229 | &mut self, 230 | dst: mir::Reference, 231 | callee: &mir::FunctionDecl, 232 | args: &[mir::Reference], 233 | ) { 234 | self.push_state(*callee); 235 | for (i, &arg) in args.iter().enumerate() { 236 | let arg = self.get_binding((Frame::Previous, arg)); 237 | let parm = mir::Reference::param(i as u32); 238 | unsafe { 239 | self.write((Frame::Current, parm), arg); 240 | } 241 | } 242 | self.call(); 243 | 244 | let src = self.get_binding((Frame::Current, mir::Reference::ret())); 245 | unsafe { 246 | self.write((Frame::Previous, dst), src); 247 | } 248 | self.pop_state(); 249 | } 250 | 251 | fn stmt_lit(&mut self, dst: mir::Reference, src: &mir::Literal) { 252 | use std::mem::transmute; 253 | use mir::{BuiltinType, IntSize}; 254 | 255 | let mut backing = [0u8; 8]; 256 | let ty = match *src { 257 | mir::Literal::Int(i) => { 258 | let arr = unsafe { transmute::(i) }; 259 | backing[..4].copy_from_slice(&arr); 260 | self.mir.get_builtin_type(BuiltinType::SInt(IntSize::I32)) 261 | } 262 | mir::Literal::Bool(b) => { 263 | backing[0] = b as u8; 264 | self.mir.get_builtin_type(BuiltinType::Bool) 265 | } 266 | mir::Literal::Unit => self.mir.get_builtin_type(BuiltinType::Unit), 267 | }; 268 | unsafe { 269 | self.write((Frame::Current, dst), (backing.as_mut_ptr(), ty)); 270 | } 271 | } 272 | 273 | unsafe fn get_value(src: (*mut u8, mir::Type)) -> T { 274 | assert!(src.1.size() as usize == ::std::mem::size_of::()); 275 | let mut ret = ::std::mem::uninitialized(); 276 | ::std::ptr::copy( 277 | src.0, 278 | (&mut ret) as *mut T as *mut u8, 279 | ::std::mem::size_of::(), 280 | ); 281 | ret 282 | } 283 | 284 | fn stmt_leq(&mut self, dst: mir::Reference, lhs: mir::Reference, rhs: mir::Reference) { 285 | let lhs = self.get_binding((Frame::Current, lhs)); 286 | let rhs = self.get_binding((Frame::Current, rhs)); 287 | let bool = self.mir.get_builtin_type(mir::BuiltinType::Bool); 288 | unsafe { 289 | let mut src = ::std::mem::transmute::( 290 | Self::get_value::(lhs) <= Self::get_value::(rhs), 291 | ); 292 | self.write((Frame::Current, dst), (src.as_mut_ptr(), bool)); 293 | } 294 | } 295 | 296 | fn stmt_add(&mut self, dst: mir::Reference, lhs: mir::Reference, rhs: mir::Reference) { 297 | let lhs = self.get_binding((Frame::Current, lhs)); 298 | let rhs = self.get_binding((Frame::Current, rhs)); 299 | unsafe { 300 | let mut src = ::std::mem::transmute::( 301 | Self::get_value::(lhs) + Self::get_value::(rhs), 302 | ); 303 | self.write((Frame::Current, dst), (src.as_mut_ptr(), lhs.1)); 304 | } 305 | } 306 | 307 | fn stmt_log(&mut self, _: mir::Reference, thing: mir::Reference) { 308 | let thing = self.get_binding((Frame::Current, thing)); 309 | if let mir::TypeVariant::Builtin(ref ty) = *thing.1.variant() { 310 | unsafe { 311 | match *ty { 312 | mir::BuiltinType::SInt(mir::IntSize::I32) => { 313 | println!("log: {}", Self::get_value::(thing)); 314 | } 315 | mir::BuiltinType::Bool => { 316 | println!("log: {}", Self::get_value::(thing)); 317 | } 318 | mir::BuiltinType::Unit => { 319 | println!("log: ()"); 320 | } 321 | } 322 | } 323 | } else { 324 | unimplemented!() 325 | } 326 | } 327 | } 328 | -------------------------------------------------------------------------------- /src/mir/ty.rs: -------------------------------------------------------------------------------- 1 | use parse::{Span, Spanned}; 2 | use super::align; 3 | 4 | use std::{iter, slice}; 5 | use std::fmt::{self, Display}; 6 | use std::ops::{Deref, Index}; 7 | 8 | #[derive(Debug)] 9 | pub enum TypeErrorVariant<'ctx> { 10 | TypeNotFound(String), 11 | BindingNotFound(String), 12 | IncorrectlyTypedMain { has: super::FunctionType<'ctx> }, 13 | Mismatched { lhs: Type<'ctx>, rhs: Type<'ctx> }, 14 | NumberOfArgs { 15 | name: String, 16 | args_expected: u32, 17 | args_found: u32, 18 | }, 19 | } 20 | 21 | impl<'ctx> Display for TypeErrorVariant<'ctx> { 22 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 23 | use self::TypeErrorVariant::*; 24 | match *self { 25 | TypeNotFound(ref s) => write!(f, "could not find type '{}'", s), 26 | BindingNotFound(ref s) => write!(f, "could not find name '{}'", s), 27 | IncorrectlyTypedMain { ref has } => { 28 | write!(f, "main must have type () -> unit; it has type {}", has) 29 | } 30 | Mismatched { lhs, rhs } => write!(f, "lhs ({}) != rhs ({})", lhs.name(), rhs.name()), 31 | NumberOfArgs { 32 | ref name, 33 | ref args_expected, 34 | ref args_found, 35 | } => write!( 36 | f, 37 | "function '{}' takes {} arguments, but {} arguments were passed", 38 | name, 39 | args_expected, 40 | args_found 41 | ), 42 | } 43 | } 44 | } 45 | pub type TypeError<'ctx> = Spanned>; 46 | 47 | impl<'ctx> TypeError<'ctx> { 48 | pub fn type_not_found(name: String, span: Span) -> Self { 49 | Spanned { 50 | thing: TypeErrorVariant::TypeNotFound(name), 51 | span, 52 | } 53 | } 54 | 55 | pub fn binding_not_found(name: String, span: Span) -> Self { 56 | Spanned { 57 | thing: TypeErrorVariant::BindingNotFound(name), 58 | span, 59 | } 60 | } 61 | } 62 | 63 | #[derive(Copy, Clone, Debug)] 64 | pub enum IntSize { 65 | //I8, 66 | //I16, 67 | I32, 68 | //I64, 69 | // ISize, 70 | // I128, 71 | } 72 | impl IntSize { 73 | fn size(self) -> u32 { 74 | match self { 75 | IntSize::I32 => 4, 76 | } 77 | } 78 | } 79 | #[derive(Debug)] 80 | pub enum BuiltinType { 81 | SInt(IntSize), 82 | //UInt(IntSize), 83 | Bool, 84 | Unit, 85 | } 86 | 87 | impl BuiltinType { 88 | fn size(&self) -> u32 { 89 | match *self { 90 | BuiltinType::SInt(sz) => sz.size(), 91 | BuiltinType::Bool => 1, 92 | BuiltinType::Unit => 0, 93 | } 94 | } 95 | 96 | fn align(&self) -> u32 { 97 | match *self { 98 | BuiltinType::SInt(sz) => sz.size(), 99 | BuiltinType::Bool => 1, 100 | BuiltinType::Unit => 1, 101 | } 102 | } 103 | } 104 | 105 | #[derive(Debug)] 106 | pub enum TypeVariant<'ctx> { 107 | Builtin(BuiltinType), 108 | __LifetimeHolder(::std::marker::PhantomData<&'ctx ()>), 109 | } 110 | 111 | impl<'ctx> TypeVariant<'ctx> { 112 | fn size(&self) -> u32 { 113 | match *self { 114 | TypeVariant::Builtin(ref builtin) => builtin.size(), 115 | TypeVariant::__LifetimeHolder(_) => unreachable!(), 116 | } 117 | } 118 | fn align(&self) -> u32 { 119 | match *self { 120 | TypeVariant::Builtin(ref builtin) => builtin.align(), 121 | TypeVariant::__LifetimeHolder(_) => unreachable!(), 122 | } 123 | } 124 | } 125 | 126 | #[derive(Debug)] 127 | pub struct NamedType<'ctx> { 128 | ty: TypeVariant<'ctx>, 129 | name: String, 130 | } 131 | 132 | impl<'ctx> NamedType<'ctx> { 133 | pub fn s32() -> Self { 134 | Self { 135 | ty: TypeVariant::Builtin(BuiltinType::SInt(IntSize::I32)), 136 | name: "s32".to_owned(), 137 | } 138 | } 139 | 140 | pub fn bool() -> Self { 141 | Self { 142 | ty: TypeVariant::Builtin(BuiltinType::Bool), 143 | name: "bool".to_owned(), 144 | } 145 | } 146 | pub fn unit() -> Self { 147 | Self { 148 | ty: TypeVariant::Builtin(BuiltinType::Unit), 149 | name: "unit".to_owned(), 150 | } 151 | } 152 | 153 | pub fn size(&self) -> u32 { 154 | self.ty.size() 155 | } 156 | pub fn align(&self) -> u32 { 157 | self.ty.align() 158 | } 159 | pub fn name(&self) -> &str { 160 | &self.name 161 | } 162 | pub fn variant(&self) -> &TypeVariant<'ctx> { 163 | &self.ty 164 | } 165 | } 166 | 167 | #[derive(Copy, Clone, Debug)] 168 | pub struct Type<'ctx>(&'ctx NamedType<'ctx>); 169 | impl<'ctx> Type<'ctx> { 170 | pub fn new(inner: &'ctx NamedType<'ctx>) -> Self { 171 | Type(inner) 172 | } 173 | } 174 | impl<'ctx> PartialEq for Type<'ctx> { 175 | fn eq(&self, other: &Self) -> bool { 176 | self.0 as *const _ == other.0 as *const _ 177 | } 178 | } 179 | impl<'ctx> Eq for Type<'ctx> {} 180 | impl<'ctx> Deref for Type<'ctx> { 181 | type Target = NamedType<'ctx>; 182 | fn deref(&self) -> &NamedType<'ctx> { 183 | self.0 184 | } 185 | } 186 | 187 | pub struct BuiltinTypes<'ctx> { 188 | pub unit_ty: Type<'ctx>, 189 | pub bool_ty: Type<'ctx>, 190 | pub s32_ty: Type<'ctx>, 191 | } 192 | 193 | #[derive(Debug)] 194 | pub struct TypeList<'ctx> { 195 | tys: Vec>, 196 | } 197 | 198 | impl<'ctx> TypeList<'ctx> { 199 | pub fn new() -> Self { 200 | TypeList { tys: vec![] } 201 | } 202 | 203 | pub fn push(&mut self, ty: Type<'ctx>) { 204 | self.tys.push(ty); 205 | } 206 | 207 | pub fn from_existing(tys: Vec>) -> Self { 208 | TypeList { tys } 209 | } 210 | 211 | pub fn number_of_types(&self) -> u32 { 212 | self.tys.len() as u32 213 | } 214 | 215 | pub fn is_empty(&self) -> bool { 216 | self.tys.is_empty() 217 | } 218 | 219 | // should really be cached 220 | // aligned to 16 bytes 221 | pub fn size(&self) -> u32 { 222 | let mut offset = 0; 223 | for ty in &self.tys { 224 | let sz = ty.size(); 225 | let aln = ty.align(); 226 | offset = align(offset, aln); 227 | offset += sz; 228 | } 229 | align(offset, 16) 230 | } 231 | 232 | pub fn offset_of(&self, idx: u32) -> u32 { 233 | let mut offset = 0; 234 | for ty in &self.tys[..idx as usize] { 235 | let sz = ty.size(); 236 | let aln = ty.align(); 237 | offset = align(offset, aln); 238 | offset += sz; 239 | } 240 | align(offset, self.tys[idx as usize].align()) 241 | } 242 | 243 | pub fn iter<'a>(&'a self) -> iter::Cloned>> { 244 | self.tys.iter().cloned() 245 | } 246 | 247 | pub fn get(&self, idx: u32) -> Option> { 248 | self.tys.get(idx as usize).map(|&x| x) 249 | } 250 | } 251 | 252 | impl<'ctx> Index for TypeList<'ctx> { 253 | type Output = Type<'ctx>; 254 | fn index(&self, idx: u32) -> &Type<'ctx> { 255 | &self.tys[idx as usize] 256 | } 257 | } 258 | 259 | impl<'a, 'ctx> IntoIterator for &'a TypeList<'ctx> { 260 | type Item = Type<'ctx>; 261 | type IntoIter = iter::Cloned>>; 262 | 263 | fn into_iter(self) -> Self::IntoIter { 264 | self.tys.iter().cloned() 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /src/parse/lexer.rs: -------------------------------------------------------------------------------- 1 | extern crate ucd; 2 | extern crate unicode_normalization; 3 | extern crate unicode_xid; 4 | use self::unicode_normalization::{Recompositions, UnicodeNormalization}; 5 | 6 | use parse::Spanned; 7 | use std::str; 8 | use std::fmt::{self, Display}; 9 | 10 | #[derive(Copy, Clone, Debug)] 11 | struct Location { 12 | line: u32, 13 | column: u32, 14 | } 15 | 16 | impl Location { 17 | fn new() -> Self { 18 | Location { line: 1, column: 0 } 19 | } 20 | 21 | fn next_char(self) -> Self { 22 | Location { 23 | column: self.column + 1, 24 | line: self.line, 25 | } 26 | } 27 | 28 | fn next_line(self) -> Self { 29 | Location { 30 | column: 0, 31 | line: self.line + 1, 32 | } 33 | } 34 | } 35 | 36 | impl Display for Location { 37 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 38 | write!(f, "({}:{})", self.line, self.column) 39 | } 40 | } 41 | 42 | impl Spanned { 43 | fn single(thing: T, start: Location) -> Self { 44 | use super::Span; 45 | let end = start.next_char(); 46 | Spanned { 47 | thing, 48 | span: Span { 49 | start_line: start.line, 50 | start_column: start.column, 51 | end_line: end.line, 52 | end_column: end.column, 53 | }, 54 | } 55 | } 56 | 57 | fn span(thing: T, start: Location, end: Location) -> Self { 58 | use super::Span; 59 | let end = end.next_char(); 60 | Spanned { 61 | thing, 62 | span: Span { 63 | start_line: start.line, 64 | start_column: start.column, 65 | end_line: end.line, 66 | end_column: end.column, 67 | }, 68 | } 69 | } 70 | } 71 | 72 | 73 | pub type LexerResult = Result; 74 | 75 | #[derive(Clone, Debug, PartialEq, Eq)] 76 | pub enum LexerErrorVariant { 77 | IdentAfterIntLiteral, 78 | UnclosedComment, 79 | ReservedToken(&'static str), 80 | UnknownChar(char), 81 | } 82 | 83 | impl Display for LexerErrorVariant { 84 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 85 | use self::LexerErrorVariant::*; 86 | match *self { 87 | IdentAfterIntLiteral => write!( 88 | f, 89 | "found ident after int literal (this is reserved for future extensions)" 90 | ), 91 | UnclosedComment => write!(f, "unclosed comment"), 92 | ReservedToken(ref s) => write!(f, "reserved token: '{}'", s), 93 | UnknownChar(ref c) => write!(f, "unknown character: '{}'", c), 94 | } 95 | } 96 | } 97 | pub type LexerError = Spanned; 98 | 99 | #[derive(Clone, Debug, PartialEq, Eq)] 100 | pub enum TokenVariant { 101 | // Categories 102 | //KeywordRaw, 103 | //KeywordRef, 104 | //KeywordMut, 105 | //KeywordOwn, 106 | 107 | // Braces 108 | OpenBrace, 109 | CloseBrace, 110 | OpenParen, 111 | CloseParen, 112 | 113 | // Expression 114 | Dot, 115 | KeywordTrue, 116 | KeywordFalse, 117 | KeywordIf, 118 | KeywordElse, 119 | KeywordFunc, 120 | KeywordLet, 121 | KeywordType, 122 | KeywordStruct, 123 | //KeywordVal, 124 | Ident(String), 125 | Integer(u64), 126 | 127 | // TODO(ubsan): should be in its own, out-of-line enum 128 | Plus, 129 | Minus, 130 | //Star, 131 | //And, 132 | LessEq, 133 | 134 | // Declaration/Types/Assignment 135 | Colon, 136 | Equals, 137 | 138 | // Separators 139 | Semicolon, 140 | Comma, 141 | //Comma, 142 | Eof, 143 | } 144 | pub type Token = Spanned; 145 | 146 | 147 | pub struct Lexer<'src> { 148 | src: Recompositions>, 149 | lookahead: Option, 150 | current_loc: Location, 151 | } 152 | 153 | impl<'src> Lexer<'src> { 154 | pub fn new(src: &str) -> Lexer { 155 | Lexer { 156 | src: src.nfc(), 157 | lookahead: None, 158 | current_loc: Location::new(), 159 | } 160 | } 161 | 162 | #[inline] 163 | fn is_start_of_ident(c: char) -> bool { 164 | unicode_xid::UnicodeXID::is_xid_start(c) || c == '_' 165 | } 166 | 167 | #[inline] 168 | fn is_ident(c: char) -> bool { 169 | unicode_xid::UnicodeXID::is_xid_continue(c) 170 | } 171 | 172 | #[inline] 173 | fn is_dec_digit(c: char) -> bool { 174 | c >= '0' && c <= '9' 175 | } 176 | 177 | #[inline] 178 | fn is_whitespace(c: char) -> bool { 179 | ucd::Codepoint::is_whitespace(c) 180 | } 181 | 182 | fn getc(&mut self) -> Option<(char, Location)> { 183 | if let Some(ret) = self.peekc() { 184 | self.lookahead.take(); 185 | self.current_loc = ret.1; 186 | Some(ret) 187 | } else { 188 | None 189 | } 190 | } 191 | 192 | fn peekc(&mut self) -> Option<(char, Location)> { 193 | if let Some(ch) = self.lookahead { 194 | if ch == '\n' { 195 | Some((ch, self.current_loc.next_line())) 196 | } else { 197 | Some((ch, self.current_loc.next_char())) 198 | } 199 | } else if let Some(ch) = self.src.next() { 200 | self.lookahead = Some(ch); 201 | if ch == '\n' { 202 | Some((ch, self.current_loc.next_line())) 203 | } else { 204 | Some((ch, self.current_loc.next_char())) 205 | } 206 | } else { 207 | None 208 | } 209 | } 210 | 211 | fn eat_whitespace(&mut self) { 212 | loop { 213 | match self.peekc() { 214 | Some((c, _)) if Self::is_whitespace(c) => { 215 | self.getc(); 216 | } 217 | _ => break, 218 | } 219 | } 220 | } 221 | 222 | fn block_comment(&mut self, loc: Location) -> LexerResult<()> { 223 | let unclosed_err = Err(Spanned::single(LexerErrorVariant::UnclosedComment, loc)); 224 | loop { 225 | let c = self.getc(); 226 | if let Some(('*', _)) = c { 227 | let c = self.getc(); 228 | if let Some(('/', _)) = c { 229 | return Ok(()); 230 | } else if let None = c { 231 | return unclosed_err; 232 | } 233 | } else if let Some(('/', loc)) = c { 234 | let c = self.getc(); 235 | if let Some(('c', _)) = c { 236 | self.block_comment(loc)? 237 | } else if let None = c { 238 | return unclosed_err; 239 | } 240 | } else if let None = c { 241 | return unclosed_err; 242 | } 243 | } 244 | } 245 | 246 | fn line_comment(&mut self) { 247 | loop { 248 | match self.getc() { 249 | Some(('\n', _)) => { 250 | break; 251 | } 252 | None => break, 253 | Some(_) => {} 254 | } 255 | } 256 | } 257 | 258 | // TODO(ubsan): switch to a more modular thing that 259 | // follows the lexer files more closely 260 | pub fn next_token(&mut self) -> LexerResult { 261 | self.eat_whitespace(); 262 | let (first, loc) = match self.getc() { 263 | Some(c) => c, 264 | None => { 265 | return Ok(Spanned::single(TokenVariant::Eof, self.current_loc)); 266 | } 267 | }; 268 | match first { 269 | '(' => Ok(Spanned::single(TokenVariant::OpenParen, loc)), 270 | ')' => Ok(Spanned::single(TokenVariant::CloseParen, loc)), 271 | '{' => Ok(Spanned::single(TokenVariant::OpenBrace, loc)), 272 | '}' => Ok(Spanned::single(TokenVariant::CloseBrace, loc)), 273 | ';' => Ok(Spanned::single(TokenVariant::Semicolon, loc)), 274 | '.' => match self.peekc() { 275 | Some(('.', end_loc)) => { 276 | self.getc(); 277 | Err(Spanned::span( 278 | LexerErrorVariant::ReservedToken(".."), 279 | loc, 280 | end_loc, 281 | )) 282 | } 283 | _ => Ok(Spanned::single(TokenVariant::Dot, loc)), 284 | }, 285 | ':' => match self.peekc() { 286 | Some((':', end_loc)) => { 287 | self.getc(); 288 | Err(Spanned::span( 289 | LexerErrorVariant::ReservedToken("::"), 290 | loc, 291 | end_loc, 292 | )) 293 | } 294 | Some(('=', end_loc)) => { 295 | self.getc(); 296 | Err(Spanned::span( 297 | LexerErrorVariant::ReservedToken(":="), 298 | loc, 299 | end_loc, 300 | )) 301 | } 302 | _ => Ok(Spanned::single(TokenVariant::Colon, loc)), 303 | }, 304 | ',' => Ok(Spanned::single(TokenVariant::Comma, loc)), 305 | '&' => match self.peekc() { 306 | Some(('&', end_loc)) => Err(Spanned::span( 307 | LexerErrorVariant::ReservedToken("&&"), 308 | loc, 309 | end_loc, 310 | )), 311 | Some(('=', end_loc)) => Err(Spanned::span( 312 | LexerErrorVariant::ReservedToken("&="), 313 | loc, 314 | end_loc, 315 | )), 316 | _ => Err(Spanned::single(LexerErrorVariant::ReservedToken("&"), loc)), 317 | }, 318 | '+' => { 319 | match self.peekc() { 320 | // eventually, concat operator 321 | Some(('+', end_loc)) => Err(Spanned::span( 322 | LexerErrorVariant::ReservedToken("++"), 323 | loc, 324 | end_loc, 325 | )), 326 | Some(('=', end_loc)) => Err(Spanned::span( 327 | LexerErrorVariant::ReservedToken("+="), 328 | loc, 329 | end_loc, 330 | )), 331 | _ => Ok(Spanned::single(TokenVariant::Plus, loc)), 332 | } 333 | } 334 | '-' => match self.peekc() { 335 | Some(('>', end_loc)) => { 336 | self.getc(); 337 | Err(Spanned::span( 338 | LexerErrorVariant::ReservedToken("->"), 339 | loc, 340 | end_loc, 341 | )) 342 | } 343 | Some(('=', end_loc)) => { 344 | self.getc(); 345 | Err(Spanned::span( 346 | LexerErrorVariant::ReservedToken("-="), 347 | loc, 348 | end_loc, 349 | )) 350 | } 351 | _ => Ok(Spanned::single(TokenVariant::Minus, loc)), 352 | }, 353 | '*' => match self.peekc() { 354 | Some(('=', end_loc)) => Err(Spanned::span( 355 | LexerErrorVariant::ReservedToken("*="), 356 | loc, 357 | end_loc, 358 | )), 359 | _ => Err(Spanned::single(LexerErrorVariant::ReservedToken("*"), loc)), 360 | }, 361 | '/' => match self.peekc() { 362 | Some(('*', _)) => { 363 | self.getc(); 364 | self.block_comment(loc)?; 365 | self.next_token() 366 | } 367 | Some(('/', _)) => { 368 | self.getc(); 369 | self.line_comment(); 370 | self.next_token() 371 | } 372 | _ => Err(Spanned::single(LexerErrorVariant::ReservedToken("/"), loc)), 373 | }, 374 | 375 | '<' => match self.peekc() { 376 | Some(('=', end_loc)) => { 377 | self.getc(); 378 | Ok(Spanned::span(TokenVariant::LessEq, loc, end_loc)) 379 | } 380 | _ => Err(Spanned::single(LexerErrorVariant::ReservedToken("<"), loc)), 381 | }, 382 | '=' => match self.peekc() { 383 | Some(('=', end_loc)) => { 384 | self.getc(); 385 | Err(Spanned::span( 386 | LexerErrorVariant::ReservedToken("=="), 387 | loc, 388 | end_loc, 389 | )) 390 | } 391 | _ => Ok(Spanned::single(TokenVariant::Equals, loc)), 392 | }, 393 | c if Self::is_start_of_ident(c) => { 394 | // ident 395 | let mut end_loc = loc; 396 | let mut ident = String::new(); 397 | ident.push(c); 398 | loop { 399 | if let Some((c, loc)) = self.peekc() { 400 | if Self::is_ident(c) { 401 | self.getc(); 402 | ident.push(c); 403 | end_loc = loc; 404 | } else { 405 | break; 406 | } 407 | } else { 408 | break; 409 | } 410 | } 411 | let err = |tok| { 412 | Err(Spanned::span( 413 | LexerErrorVariant::ReservedToken(tok), 414 | loc, 415 | end_loc, 416 | )) 417 | }; 418 | // keyword 419 | let tok = if ident == "let" { 420 | TokenVariant::KeywordLet 421 | } else if ident == "func" { 422 | TokenVariant::KeywordFunc 423 | } else if ident == "type" { 424 | TokenVariant::KeywordType 425 | } else if ident == "struct" { 426 | TokenVariant::KeywordStruct 427 | } else if ident == "true" { 428 | TokenVariant::KeywordTrue 429 | } else if ident == "false" { 430 | TokenVariant::KeywordFalse 431 | } else if ident == "if" { 432 | TokenVariant::KeywordIf 433 | } else if ident == "else" { 434 | TokenVariant::KeywordElse 435 | } else if ident == "val" { 436 | return err("val"); 437 | } else if ident == "data" { 438 | return err("data"); 439 | } else if ident == "raw" { 440 | return err("raw"); 441 | } else if ident == "ref" { 442 | return err("ref"); 443 | } else if ident == "mut" { 444 | return err("mut"); 445 | } else if ident == "own" { 446 | return err("own"); 447 | } else { 448 | TokenVariant::Ident(ident) 449 | }; 450 | 451 | Ok(Spanned::span(tok, loc, end_loc)) 452 | } 453 | c if Self::is_dec_digit(c) => { 454 | // number-literal 455 | // TODO(ubsan): support non-decimal integer literals 456 | let mut string = String::new(); 457 | string.push(c); 458 | let mut end_loc = loc; 459 | loop { 460 | if let Some((' ', _)) = self.peekc() { 461 | self.getc(); 462 | } 463 | if let Some((c, loc)) = self.peekc() { 464 | if Self::is_dec_digit(c) { 465 | self.getc(); 466 | string.push(c); 467 | end_loc = loc; 468 | } else if Self::is_start_of_ident(c) { 469 | return Err(Spanned::single( 470 | LexerErrorVariant::IdentAfterIntLiteral, 471 | loc, 472 | )); 473 | } else { 474 | break; 475 | } 476 | } else { 477 | break; 478 | } 479 | } 480 | let value = string 481 | .parse::() 482 | .expect("we pushed something which wasn't 0...9 onto a string"); 483 | 484 | Ok(Spanned::span(TokenVariant::Integer(value), loc, end_loc)) 485 | } 486 | 487 | ch => Err(Spanned::single(LexerErrorVariant::UnknownChar(ch), loc)), 488 | } 489 | } 490 | } 491 | -------------------------------------------------------------------------------- /src/parse/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod lexer; 2 | 3 | use self::lexer::{Lexer, LexerError, LexerErrorVariant, Token, TokenVariant}; 4 | use ast::{BinOp, Expression, ExpressionVariant, FunctionValue, Statement, StatementVariant, 5 | StringlyType}; 6 | 7 | use std::ops::{Deref, DerefMut}; 8 | use std::str; 9 | use std::fmt::{self, Display}; 10 | 11 | enum Either { 12 | Left(T), 13 | Right(U), 14 | } 15 | use self::Either::{Left, Right}; 16 | 17 | #[derive(Copy, Clone, Debug)] 18 | pub struct Span { 19 | pub start_line: u32, 20 | pub start_column: u32, 21 | pub end_line: u32, 22 | pub end_column: u32, 23 | } 24 | 25 | impl Span { 26 | fn union(self, end: Span) -> Self { 27 | let Span { 28 | start_line, 29 | start_column, 30 | .. 31 | } = self; 32 | let Span { 33 | end_line, 34 | end_column, 35 | .. 36 | } = end; 37 | Self { 38 | start_line, 39 | start_column, 40 | end_line, 41 | end_column, 42 | } 43 | } 44 | } 45 | 46 | impl Display for Span { 47 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 48 | write!( 49 | f, 50 | "({}, {}), ({}, {})", 51 | self.start_line, 52 | self.start_column, 53 | self.end_line, 54 | self.end_column 55 | ) 56 | } 57 | } 58 | 59 | #[derive(Copy, Clone, Debug)] 60 | pub struct Spanned { 61 | pub thing: T, 62 | pub span: Span, 63 | } 64 | 65 | impl Deref for Spanned { 66 | type Target = T; 67 | fn deref(&self) -> &T { 68 | &self.thing 69 | } 70 | } 71 | impl DerefMut for Spanned { 72 | fn deref_mut(&mut self) -> &mut T { 73 | &mut self.thing 74 | } 75 | } 76 | 77 | impl Display for Spanned { 78 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 79 | write!(f, "{} at {}", self.thing, self.span) 80 | } 81 | } 82 | 83 | pub enum ItemVariant { 84 | Function(FunctionValue), 85 | StructDecl(Vec<(String, StringlyType)>), 86 | } 87 | pub type Item = Spanned; 88 | 89 | #[derive(Clone, Debug)] 90 | pub enum ExpectedToken { 91 | Ident, 92 | Type, 93 | Expr, 94 | Parameter, 95 | Argument, 96 | ItemKeyword, 97 | SpecificToken(TokenVariant), 98 | } 99 | 100 | #[derive(Debug)] 101 | pub enum ParserErrorVariant { 102 | ExpectedEof, // not an error 103 | 104 | LexerError(LexerErrorVariant), 105 | UnexpectedToken { 106 | found: TokenVariant, 107 | expected: ExpectedToken, 108 | }, 109 | } 110 | impl Display for ParserErrorVariant { 111 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 112 | use self::ParserErrorVariant::*; 113 | match *self { 114 | ExpectedEof => write!(f, "expected end of file - not an error..."), 115 | LexerError(ref le) => le.fmt(f), 116 | UnexpectedToken { 117 | ref found, 118 | ref expected, 119 | } => write!( 120 | f, 121 | "unexpected token: expected {:?}, found {:?}", 122 | expected, 123 | found, 124 | ), 125 | } 126 | } 127 | } 128 | pub type ParserError = Spanned; 129 | 130 | impl From for ParserError { 131 | fn from(le: LexerError) -> Self { 132 | Spanned { 133 | thing: ParserErrorVariant::LexerError(le.thing), 134 | span: le.span, 135 | } 136 | } 137 | } 138 | pub type ParserResult = Result; 139 | 140 | pub struct Parser<'src> { 141 | lexer: Lexer<'src>, 142 | lookahead: Option, 143 | } 144 | 145 | macro_rules! unexpected_token { 146 | ( 147 | $tok:expr, 148 | $expected:ident, 149 | $span:expr 150 | $(,)* 151 | ) => ({ 152 | let thing = ParserErrorVariant::UnexpectedToken { 153 | found: $tok, 154 | expected: ExpectedToken::$expected, 155 | }; 156 | Err(Spanned { thing, span: $span }) 157 | }); 158 | ( 159 | $tok:expr, 160 | $expected:ident 161 | $(,)* 162 | ) => (unexpected_token!($tok.thing, $expected, $tok.span)); 163 | } 164 | 165 | macro_rules! allow_eof { 166 | ($tok:expr) => ( 167 | match $tok { 168 | t @ Ok(_) => t, 169 | Err(sp) => { 170 | let Spanned { thing, span } = sp; 171 | match thing { 172 | ParserErrorVariant::UnexpectedToken { 173 | found: TokenVariant::Eof, 174 | .. 175 | } => Err(Spanned { 176 | thing: ParserErrorVariant::ExpectedEof, 177 | span, 178 | }), 179 | thing => Err(Spanned { thing, span }), 180 | } 181 | }, 182 | } 183 | ) 184 | } 185 | 186 | macro_rules! eat_token { 187 | ($slf:expr, $tok:ident) => ({ 188 | match $slf.get_token()? { 189 | s @ Spanned { thing: TokenVariant::$tok, .. } => s, 190 | Spanned { thing, span } => return Err(Spanned { 191 | thing: ParserErrorVariant::UnexpectedToken { 192 | found: thing, 193 | expected: ExpectedToken::SpecificToken(TokenVariant::$tok), 194 | }, 195 | span, 196 | }), 197 | } 198 | }); 199 | } 200 | 201 | macro_rules! maybe_eat_token { 202 | ($slf:expr, $tok:ident) => ({ 203 | match $slf.peek_token()? { 204 | &Spanned { thing: TokenVariant::$tok, .. } => Some( 205 | $slf.get_token()? 206 | ), 207 | _ => None, 208 | } 209 | }); 210 | } 211 | 212 | // NOTE(ubsan): once we get internal blocks, we should really 213 | // have one function for parsing both func and let definitions 214 | impl<'src> Parser<'src> { 215 | pub fn new(file: &'src str) -> Self { 216 | Parser { 217 | lexer: Lexer::new(file), 218 | lookahead: None, 219 | } 220 | } 221 | 222 | fn get_token(&mut self) -> ParserResult { 223 | match self.lookahead.take() { 224 | Some(tok) => Ok(tok), 225 | None => Ok(self.lexer.next_token()?), 226 | } 227 | } 228 | fn peek_token(&mut self) -> ParserResult<&Token> { 229 | let tok = match self.lookahead { 230 | Some(ref tok) => return Ok(tok), 231 | None => self.lexer.next_token()?, 232 | }; 233 | self.lookahead = Some(tok); 234 | if let Some(ref tok) = self.lookahead { 235 | Ok(tok) 236 | } else { 237 | unreachable!() 238 | } 239 | } 240 | 241 | fn end_of_expr(s: &Token) -> Option { 242 | match **s { 243 | TokenVariant::Semicolon => Some(Spanned { 244 | thing: TokenVariant::Semicolon, 245 | span: s.span, 246 | }), 247 | TokenVariant::CloseBrace => Some(Spanned { 248 | thing: TokenVariant::CloseBrace, 249 | span: s.span, 250 | }), 251 | TokenVariant::CloseParen => Some(Spanned { 252 | thing: TokenVariant::CloseParen, 253 | span: s.span, 254 | }), 255 | _ => None, 256 | } 257 | } 258 | 259 | fn binop(s: &Token) -> Option { 260 | match **s { 261 | TokenVariant::Plus => Some(BinOp::Plus), 262 | _ => None, 263 | } 264 | } 265 | 266 | fn parse_type(&mut self) -> ParserResult { 267 | let Spanned { thing, span } = self.get_token()?; 268 | match thing { 269 | TokenVariant::Ident(s) => Ok(StringlyType::UserDefinedType(s)), 270 | tok => unexpected_token!(tok, Type, span), 271 | } 272 | } 273 | 274 | fn maybe_parse_single_expr(&mut self) -> ParserResult> { 275 | if let Some(tok) = Self::end_of_expr(self.peek_token()?) { 276 | return Ok(Right(tok)); 277 | } 278 | let Spanned { thing, span } = self.get_token()?; 279 | 280 | let mut expr = match thing { 281 | TokenVariant::Integer(u) => Spanned { 282 | thing: ExpressionVariant::IntLiteral { 283 | is_negative: false, 284 | value: u, 285 | }, 286 | span, 287 | }, 288 | TokenVariant::Minus => if let &Spanned { 289 | thing: TokenVariant::Integer(n), 290 | span: end_span, 291 | } = self.peek_token()? 292 | { 293 | self.get_token()?; 294 | Spanned { 295 | thing: ExpressionVariant::IntLiteral { 296 | is_negative: true, 297 | value: n, 298 | }, 299 | span: span.union(end_span), 300 | } 301 | } else { 302 | let expr = self.parse_single_expr()?; 303 | Spanned { 304 | thing: ExpressionVariant::Negative(Box::new(expr)), 305 | span, 306 | } 307 | }, 308 | TokenVariant::OpenParen => { 309 | let end = eat_token!(self, CloseParen).span; 310 | Spanned { 311 | thing: ExpressionVariant::UnitLiteral, 312 | span: span.union(end), 313 | } 314 | } 315 | TokenVariant::Ident(s) => Spanned { 316 | thing: ExpressionVariant::Variable(s), 317 | span, 318 | }, 319 | TokenVariant::KeywordTrue => Spanned { 320 | thing: ExpressionVariant::BoolLiteral(true), 321 | span, 322 | }, 323 | TokenVariant::KeywordFalse => Spanned { 324 | thing: ExpressionVariant::BoolLiteral(false), 325 | span, 326 | }, 327 | TokenVariant::KeywordIf => { 328 | fn parse(this: &mut Parser, span: Span) -> ParserResult { 329 | let cond = this.parse_expr()?; 330 | let then = this.parse_block()?; 331 | if let Some(_) = maybe_eat_token!(this, KeywordElse) { 332 | let els = if let Some(_) = maybe_eat_token!(this, KeywordIf) { 333 | parse(this, span)? 334 | } else { 335 | this.parse_block()? 336 | }; 337 | let end = els.span; 338 | Ok(Spanned { 339 | thing: ExpressionVariant::IfElse { 340 | cond: Box::new(cond), 341 | then: Box::new(then), 342 | els: Box::new(els), 343 | }, 344 | span: span.union(end), 345 | }) 346 | } else { 347 | let els = Spanned { 348 | thing: ExpressionVariant::UnitLiteral, 349 | span, 350 | }; 351 | Ok(Spanned { 352 | thing: ExpressionVariant::IfElse { 353 | cond: Box::new(cond), 354 | then: Box::new(then), 355 | els: Box::new(els), 356 | }, 357 | span, 358 | }) 359 | } 360 | }; 361 | parse(self, span)? 362 | } 363 | TokenVariant::KeywordElse => { 364 | return unexpected_token!(TokenVariant::KeywordElse, Expr, span); 365 | } 366 | TokenVariant::OpenBrace => self.parse_block_no_open(span)?, 367 | tok => panic!( 368 | "unimplemented expression: {:?}", 369 | Spanned { thing: tok, span }, 370 | ), 371 | }; 372 | 373 | // NOTE(ubsan): should be while let, for multiple function 374 | // calls in a row 375 | if let TokenVariant::OpenParen = **self.peek_token()? { 376 | self.get_token()?; 377 | 378 | let mut args = vec![]; 379 | let end; 380 | loop { 381 | match self.maybe_parse_expr()? { 382 | Left(expr) => { 383 | args.push(expr); 384 | if let None = maybe_eat_token!(self, Comma) { 385 | end = eat_token!(self, CloseParen).span; 386 | break; 387 | } 388 | } 389 | Right(tok) => if let TokenVariant::CloseParen = *tok { 390 | self.get_token()?; 391 | end = tok.span; 392 | break; 393 | } else { 394 | return unexpected_token!(tok, Argument); 395 | }, 396 | } 397 | } 398 | 399 | if let ExpressionVariant::Variable(callee) = expr.thing { 400 | // NOTE(ubsan): special handling for logging 401 | if callee == "log" { 402 | assert!(args.len() == 1); 403 | let arg = args.into_boxed_slice(); 404 | let arg = unsafe { Box::from_raw(Box::into_raw(arg) as *mut Expression) }; 405 | expr = Spanned { 406 | thing: ExpressionVariant::Log(arg), 407 | span: expr.span.union(end), 408 | }; 409 | } else { 410 | expr = Spanned { 411 | thing: ExpressionVariant::Call { callee, args }, 412 | span: expr.span.union(end), 413 | }; 414 | } 415 | } else { 416 | unimplemented!() 417 | } 418 | } 419 | 420 | Ok(Left(expr)) 421 | } 422 | 423 | fn parse_single_expr(&mut self) -> ParserResult { 424 | match self.maybe_parse_single_expr()? { 425 | Right(tok) => unexpected_token!(tok, Expr), 426 | Left(e) => Ok(e), 427 | } 428 | } 429 | 430 | fn parse_binop(&mut self, lhs: Expression, left_op: BinOp) -> ParserResult { 431 | fn op(op: BinOp, lhs: Expression, rhs: Expression) -> Expression { 432 | let start = lhs.span; 433 | let end = rhs.span; 434 | let expr = ExpressionVariant::BinOp { 435 | lhs: Box::new(lhs), 436 | rhs: Box::new(rhs), 437 | op, 438 | }; 439 | Spanned { 440 | thing: expr, 441 | span: start.union(end), 442 | } 443 | } 444 | 445 | let rhs = self.parse_single_expr()?; 446 | 447 | if let Some(right_op) = Self::binop(self.peek_token()?) { 448 | self.get_token()?; 449 | if left_op.precedence() >= right_op.precedence() { 450 | let new_lhs = op(left_op, lhs, rhs); 451 | return self.parse_binop(new_lhs, right_op); 452 | } else { 453 | let new_rhs = self.parse_binop(rhs, right_op)?; 454 | return Ok(op(left_op, lhs, new_rhs)); 455 | } 456 | } else { 457 | Ok(op(left_op, lhs, rhs)) 458 | } 459 | } 460 | 461 | fn maybe_parse_expr(&mut self) -> ParserResult> { 462 | let lhs = match self.maybe_parse_single_expr()? { 463 | Left(expr) => expr, 464 | Right(tok) => return Ok(Right(tok)), 465 | }; 466 | 467 | match self.peek_token()?.thing { 468 | // NOTE(ubsan): should probably be a "is_binop" call 469 | TokenVariant::Plus => { 470 | self.get_token()?; 471 | self.parse_binop(lhs, BinOp::Plus).map(Left) 472 | } 473 | TokenVariant::LessEq => { 474 | self.get_token()?; 475 | self.parse_binop(lhs, BinOp::LessEq).map(Left) 476 | } 477 | _ => Ok(Left(lhs)), 478 | } 479 | } 480 | 481 | fn parse_expr(&mut self) -> ParserResult { 482 | match self.maybe_parse_expr()? { 483 | Left(expr) => Ok(expr), 484 | Right(tok) => unexpected_token!(tok, Expr), 485 | } 486 | } 487 | 488 | fn parse_expr_or_stmt(&mut self) -> ParserResult> { 489 | if let TokenVariant::KeywordLet = **self.peek_token()? { 490 | let start = self.get_token()?.span; 491 | let name = match self.get_token()? { 492 | Token { 493 | thing: TokenVariant::Ident(name), 494 | .. 495 | } => name, 496 | tok => { 497 | return unexpected_token!(tok, Ident); 498 | } 499 | }; 500 | let ty = match maybe_eat_token!(self, Colon) { 501 | Some(_) => Some(self.parse_type()?), 502 | None => None, 503 | }; 504 | eat_token!(self, Equals); 505 | let initializer = self.parse_expr()?; 506 | let end = eat_token!(self, Semicolon).span; 507 | 508 | return Ok(Right(Spanned { 509 | thing: StatementVariant::Local { 510 | name, 511 | ty, 512 | initializer, 513 | }, 514 | span: start.union(end), 515 | })); 516 | } 517 | let expr = self.parse_expr()?; 518 | 519 | if let TokenVariant::CloseBrace = **self.peek_token()? { 520 | return Ok(Left(expr)); 521 | } 522 | 523 | let end = eat_token!(self, Semicolon).span; 524 | let start = expr.span; 525 | Ok(Right(Spanned { 526 | thing: StatementVariant::Expr(expr), 527 | span: start.union(end), 528 | })) 529 | } 530 | 531 | fn parse_param_list(&mut self) -> ParserResult>> { 532 | let open = eat_token!(self, OpenParen); 533 | 534 | let mut params = vec![]; 535 | loop { 536 | let tok = self.get_token()?; 537 | match tok.thing { 538 | TokenVariant::Ident(name) => { 539 | eat_token!(self, Colon); 540 | let ty = self.parse_type()?; 541 | params.push((name, ty)); 542 | if let None = maybe_eat_token!(self, Comma) { 543 | let end = eat_token!(self, CloseParen).span; 544 | return Ok(Spanned { 545 | thing: params, 546 | span: open.span.union(end), 547 | }); 548 | } 549 | } 550 | TokenVariant::CloseParen => { 551 | return Ok(Spanned { 552 | thing: params, 553 | span: open.span.union(tok.span), 554 | }); 555 | } 556 | _ => { 557 | return unexpected_token!(tok, Parameter); 558 | } 559 | } 560 | } 561 | } 562 | 563 | fn parse_block_no_open(&mut self, start: Span) -> ParserResult { 564 | let mut statements = vec![]; 565 | let expr; 566 | 567 | // NOTE(ubsan): optimization point: we can optimize by not wrapping the 568 | // expression in a block, if there are no statements 569 | loop { 570 | if let Spanned { 571 | thing: TokenVariant::CloseBrace, 572 | span, 573 | } = *self.peek_token()? 574 | { 575 | expr = Spanned { 576 | thing: ExpressionVariant::UnitLiteral, 577 | span, 578 | }; 579 | break; 580 | } 581 | match self.parse_expr_or_stmt()? { 582 | Left(e) => { 583 | expr = e; 584 | break; 585 | } 586 | Right(s) => statements.push(s), 587 | } 588 | } 589 | let end = eat_token!(self, CloseBrace).span; 590 | let thing = ExpressionVariant::Block { 591 | statements, 592 | expr: Box::new(expr), 593 | }; 594 | Ok(Spanned { 595 | thing, 596 | span: start.union(end), 597 | }) 598 | } 599 | 600 | fn parse_block(&mut self) -> ParserResult { 601 | let span = eat_token!(self, OpenBrace).span; 602 | self.parse_block_no_open(span) 603 | } 604 | 605 | fn parse_func_decl(&mut self, open_kw_span: Span) -> ParserResult<(String, Item)> { 606 | let Spanned { thing, span } = self.get_token()?; 607 | let span = open_kw_span.union(span); 608 | match thing { 609 | TokenVariant::Ident(name) => { 610 | let params = self.parse_param_list()?; 611 | let ret_ty = { 612 | if let Some(_) = maybe_eat_token!(self, Colon) { 613 | self.parse_type()? 614 | } else { 615 | StringlyType::Unit 616 | } 617 | }; 618 | let blk = self.parse_block()?; 619 | let expr_span = blk.span; 620 | let thing = ItemVariant::Function(FunctionValue { 621 | params: params.thing, 622 | ret_ty, 623 | expr: blk, 624 | }); 625 | Ok(( 626 | name, 627 | Spanned { 628 | thing, 629 | span: span.union(expr_span), 630 | }, 631 | )) 632 | } 633 | tok => unexpected_token!(tok, Ident, span), 634 | } 635 | } 636 | 637 | fn parse_type_decl(&mut self, open_kw_span: Span) -> ParserResult<(String, Item)> { 638 | let Spanned { thing, span } = self.get_token()?; 639 | let span = open_kw_span.union(span); 640 | match thing { 641 | TokenVariant::Ident(name) => { 642 | eat_token!(self, Equals); 643 | eat_token!(self, KeywordStruct); 644 | eat_token!(self, OpenBrace); 645 | let mut members: Vec<(String, StringlyType)> = Vec::new(); 646 | loop { 647 | let next_tok = self.get_token()?; 648 | if let TokenVariant::CloseBrace = next_tok.thing { 649 | break; 650 | } 651 | if let TokenVariant::Ident(name) = next_tok.thing { 652 | eat_token!(self, Colon); 653 | let ty = self.parse_type()?; 654 | members.push((name, ty)); 655 | } else { 656 | return unexpected_token!(next_tok, Ident); 657 | } 658 | } 659 | let end = eat_token!(self, Semicolon).span; 660 | let span = span.union(end); 661 | let item = Spanned { 662 | thing: ItemVariant::StructDecl(members), 663 | span, 664 | }; 665 | Ok((name, item)) 666 | } 667 | tok => unexpected_token!(tok, Ident, span), 668 | } 669 | } 670 | 671 | fn parse_item_definition(&mut self) -> ParserResult<(String, Item)> { 672 | let Spanned { thing, span } = self.get_token()?; 673 | match thing { 674 | TokenVariant::KeywordFunc => self.parse_func_decl(span), 675 | TokenVariant::KeywordType => self.parse_type_decl(span), 676 | tok => unexpected_token!(tok, ItemKeyword, span), 677 | } 678 | } 679 | 680 | pub fn next_item(&mut self) -> ParserResult<(String, Item)> { 681 | let item = allow_eof!(self.parse_item_definition())?; 682 | Ok(item) 683 | } 684 | } 685 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | """ 2 | small testing system for the compiler. It should be noted that this is not the 3 | best testing system, and should probably eventually actually check that these 4 | outputs are correct? 5 | """ 6 | 7 | import sys 8 | import os 9 | import re 10 | from subprocess import call 11 | 12 | def fail(name, exit_code): 13 | """ 14 | fail is to be called when a compile that was intended to succeed, didn't 15 | """ 16 | print("failure to compile '", name, "': ", exit_code, file=sys.stderr) 17 | sys.exit(1) 18 | def succeed(name): 19 | """ 20 | succeed is to be called when a compile that was intended to fail, didn't 21 | """ 22 | print("accidentally compiled '", name, "' successfully", file=sys.stderr) 23 | sys.exit(1) 24 | 25 | if call(["cargo", "build"]) != 0: 26 | sys.exit(1) 27 | 28 | print("\nrunning succeed tests") 29 | 30 | def main(): 31 | """ 32 | main function, to get pylint off my back about naming ;) 33 | """ 34 | tests_succeed = "./language/tests-succeed/" 35 | tests_fail = './language/tests-fail/' 36 | 37 | for file in os.listdir(path=tests_succeed): 38 | if re.match(".*\\.cf", file): 39 | print("compiling ", file) 40 | res = call(["cargo", "run", "-q", "--", tests_succeed + file]) 41 | if res != 0: 42 | fail(file, res) 43 | else: 44 | print("weird file found in tests-succeed: ", file, file=sys.stderr) 45 | 46 | for file in os.listdir(path=tests_fail): 47 | if re.match(".*\\.cf", file): 48 | print("compiling ", file, end="\n ") 49 | res = call(["cargo", "run", "-q", "--", "--no-run", tests_fail + file]) 50 | if res == 0: 51 | succeed(file) 52 | else: 53 | print("weird file found in tests-fail: ", file, file=sys.stderr) 54 | 55 | main() 56 | -------------------------------------------------------------------------------- /todo.txt: -------------------------------------------------------------------------------- 1 | notes: 2 | - `-` means "something that will happen at some point" 3 | - `*` means "something that will happen soon" 4 | 5 | - USER EXPERIENCE 6 | - prettier error output 7 | * types not found should point to the type, not to the thing around it 8 | 9 | - COMPILER DESIGN 10 | - do more to modularize mir 11 | - modularize ast 12 | - string cache 13 | 14 | - COMPILED CODE ISSUES 15 | - checking to see if a number is in range 16 | 17 | - LANGUAGE 18 | * reference function parameters 19 | * fully flesh out the basic types 20 | * user defined types! 21 | - parens 22 | - non-addition operators 23 | - non-decimal integer literals 24 | - early return 25 | 26 | - unary operators (+) 27 | 28 | - good typechecking 29 | --------------------------------------------------------------------------------