├── lib.c ├── .gitignore ├── libExt.c ├── README.md ├── exec.go ├── lib.go ├── LICENSE ├── main.go ├── mandel.k ├── test.k ├── nodes.go ├── codegen.go ├── lex.go └── parse.go /lib.c: -------------------------------------------------------------------------------- 1 | 2 | // Want to call a custom C function from kaleidoscope? 3 | // Good news! Here's how: 4 | // 5 | 6 | #include 7 | 8 | double putchard(double x) { 9 | putchar((char)x); 10 | fflush(stdout); 11 | return 0; 12 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | kaleidoscope 25 | -------------------------------------------------------------------------------- /libExt.c: -------------------------------------------------------------------------------- 1 | // +build ignore 2 | 3 | // Not currently in use; if you'd like to add external 4 | // functions here, just follow the instructions below 5 | // to make them visible to the kaleidoscope JIT. 6 | // 7 | // 1. Run: 8 | // clang -dynamiclib libExt.c 9 | // 2. Add to main package: 10 | // err := llvm.LoadLibraryPermanently("./a.out") 11 | // check(err) 12 | // 3. Now kaleidoscope can see the a.out dynamic 13 | // library that contains this function: 14 | // > extern putchard(x); putchard(120) 15 | #include 16 | 17 | extern double putchard(double X) { 18 | char a = (char)X; 19 | putchar(a); 20 | fflush(stdout); 21 | return 0; 22 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Kaleidoscope 2 | ============ 3 | 4 | Go port of [LLVM's Kaleidoscope Tutorial](http://llvm.org/docs/tutorial/LangImpl1.html) using the [go-llvm/llvm](http://github.com/go-llvm/llvm) ^{[doc](http://godoc.org/github.com/go-llvm/llvm)} bindings. 5 | 6 | This is a fully functional clone of the completed tutorial. Currently, I'm refactoring the finished code into ideomatic Go. The lexer and parser are now pretty good. The codegen code, error handling and maybe test integration are what's left. After the refactoring is complete, I will break it back up into chapters and port the text of the tutorial as well. 7 | 8 | Other Resources 9 | =============== 10 | 11 | * [LLVM's Official C++ Kaleidoscope Tutorial](http://llvm.org/docs/tutorial/LangImpl1.html) 12 | 13 | * [Rob Pike's *Lexical Scanning in Go*](http://www.youtube.com/watch?v=HxaD_trXwRE) — our lexer is based on the design outlined in this talk. -------------------------------------------------------------------------------- /exec.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/ajsnow/llvm" 8 | ) 9 | 10 | // Exec JIT-compiles the top level statements in the roots chan and, 11 | // if they are expressions, executes them. 12 | func Exec(roots <-chan node, printLLVMIR bool) { 13 | for n := range roots { 14 | llvmIR := n.codegen() 15 | if llvmIR.IsNil() { 16 | fmt.Fprintln(os.Stderr, "Error: Codegen failed; skipping.") 17 | continue 18 | } 19 | if printLLVMIR { 20 | llvmIR.Dump() 21 | } 22 | if isTopLevelExpr(n) { 23 | returnval := execEngine.RunFunction(llvmIR, []llvm.GenericValue{}) 24 | fmt.Println(returnval.Float(llvm.DoubleType())) 25 | } 26 | } 27 | } 28 | 29 | // isTopLevelExpr determines if the node is a top level expression. 30 | // Top level expressions are function nodes with no name. 31 | func isTopLevelExpr(n node) bool { 32 | return n.Kind() == nodeFunction && n.(*functionNode).proto.(*fnPrototypeNode).name == "" 33 | } 34 | -------------------------------------------------------------------------------- /lib.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // Want to call a Go function from kaleidoscope? 4 | // Good news! Here's how: 5 | // 6 | 7 | // We've separated external Go and C funcs into lib.go/lib.c because of 8 | // a Cgo limitation. Quoting the Go Blog: 9 | // "[I]f your program uses any //export directives, then the C code in 10 | // the comment may only include declarations (extern int f();), not 11 | // definitions (int f() { return 1; }). You can use //export directives 12 | // to make Go functions accessible to C code."[^1](http://blog.golang.org/c-go-cgo) 13 | 14 | // #include 15 | import "C" 16 | import "fmt" 17 | 18 | //export cgoputchard 19 | func cgoputchard(x C.double) C.double { 20 | C.putchar(C.int(x)) 21 | C.fflush(C.stdout) 22 | return 0 23 | } 24 | 25 | //export goputchard 26 | func goputchard(x float64) float64 { 27 | fmt.Printf("%c", rune(x)) 28 | return 0 29 | } 30 | 31 | // Helpers: 32 | 33 | //export printd 34 | func printd(x float64) float64 { 35 | fmt.Println(x) 36 | return 0 37 | } 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Andrew Snow 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | ) 8 | 9 | var ( 10 | batch = flag.Bool("b", false, "batch (non-interactive) mode") 11 | optimized = flag.Bool("opt", true, "add some optimization passes") 12 | printTokens = flag.Bool("tok", false, "print tokens") 13 | printAst = flag.Bool("ast", false, "print abstract syntax tree") 14 | printLLVMIR = flag.Bool("llvm", false, "print LLVM generated code") 15 | ) 16 | 17 | func main() { 18 | flag.Parse() 19 | if *optimized { 20 | Optimize() 21 | } 22 | 23 | lex := Lex() 24 | tokens := lex.Tokens() 25 | if *printTokens { 26 | tokens = DumpTokens(lex.Tokens()) 27 | } 28 | 29 | // add files for the lexer to lex 30 | go func() { 31 | // command line filenames 32 | for _, fn := range flag.Args() { 33 | f, err := os.Open(fn) 34 | if err != nil { 35 | fmt.Fprintln(os.Stderr, err) 36 | os.Exit(-1) 37 | } 38 | lex.Add(f) 39 | } 40 | 41 | // stdin 42 | if !*batch { 43 | lex.Add(os.Stdin) 44 | } 45 | lex.Done() 46 | }() 47 | 48 | nodes := Parse(tokens) 49 | nodesForExec := nodes 50 | if *printAst { 51 | nodesForExec = DumpTree(nodes) 52 | } 53 | 54 | Exec(nodesForExec, *printLLVMIR) 55 | } 56 | -------------------------------------------------------------------------------- /mandel.k: -------------------------------------------------------------------------------- 1 | # Mandelbrot Set Printer 2 | 3 | def binary : 1 (x y) y; 4 | 5 | def binary> 10 (LHS RHS) RHS < LHS; 6 | 7 | def binary| 5 (LHS RHS) 8 | if LHS then 9 | 1 10 | else if RHS then 11 | 1 12 | else 13 | 0; 14 | def unary-(v) 0-v; 15 | 16 | extern putchard(char) 17 | 18 | def printdensity(d) 19 | if d > 8 then 20 | putchard(32) # ' ' 21 | else if d > 4 then 22 | putchard(46) # '.' 23 | else if d > 2 then 24 | putchard(43) # '+' 25 | else 26 | putchard(42); # '*' 27 | 28 | # Determine whether the specific location diverges. 29 | # Solve for z = z^2 + c in the complex plane. 30 | def mandleconverger(real imag iters creal cimag) 31 | if iters > 255 | (real*real + imag*imag > 4) then 32 | iters 33 | else 34 | mandleconverger(real*real - imag*imag + creal, 35 | 2*real*imag + cimag, 36 | iters+1, creal, cimag); 37 | 38 | # Return the number of iterations required for the iteration to escape 39 | def mandleconverge(real imag) 40 | mandleconverger(real, imag, 0, real, imag); 41 | 42 | # Compute and plot the mandlebrot set with the specified 2 dimensional range 43 | # info. 44 | def mandelhelp(xmin xmax xstep ymin ymax ystep) 45 | for y = ymin, y < ymax, ystep in ( 46 | (for x = xmin, x < xmax, xstep in 47 | printdensity(mandleconverge(x,y))) 48 | : putchard(10) 49 | ) 50 | 51 | # mandel - This is a convenient helper function for plotting the mandelbrot set 52 | # from the specified position with the specified Magnification. 53 | def mandel(realstart imagstart realmag imagmag) 54 | mandelhelp(realstart, realstart+realmag*78, realmag, 55 | imagstart, imagstart+imagmag*40, imagmag); 56 | 57 | mandel(-2.3, -1.3, 0.05, 0.07) 58 | -------------------------------------------------------------------------------- /test.k: -------------------------------------------------------------------------------- 1 | # Kaleidoscope Test Routines 2 | 3 | 2 + 2 # Int-like scanning 4 | 3.14 * 13.37 # Float-like scanning 5 | 2 - 1 * (2 - (5 + 5) * 2) / 0.5 # Order of operations 6 | 7 | def foo(a) a # Chaining functions 8 | def double(b) foo(b)*foo(2) 9 | def quad(c) double(c) + double(c) 10 | quad(5) 11 | 12 | extern cos(a); extern sin(a) # External functions 13 | def pi() 3.14159265358979323846 14 | cos(0) 15 | cos(pi()/2) 16 | cos(pi()) 17 | double(cos(3.14)) 18 | cos(sin(5)) 19 | 20 | extern putchard(char) # External func via Cgo C 21 | putchard(67) 22 | extern cgoputchard(char) # External func via Cgo Go (via calls) 23 | cgoputchard(71) 24 | extern goputchard(char) # External func via Cgo Go (via fmt) 25 | goputchard(79) 26 | 27 | def x0(d) 1/2 * (1 + d/1) # Manual Newton's method, for 28 | def x1(d) 1/2 * (x0(d) + d/x0(d)) # before we impliment branches. 29 | def x2(d) 1/2 * (x1(d) + d/x1(d)) 30 | def x3(d) 1/2 * (x2(d) + d/x2(d)) 31 | def x4(d) 1/2 * (x3(d) + d/x3(d)) 32 | def x5(d) 1/2 * (x4(d) + d/x4(d)) 33 | def sqrt_naive(d) 1/2 * (x5(d) + d/x5(d)) 34 | def square(e) e*e 35 | def abc1_naive(a, b, c) (b+sqrt_naive(4*a*c-square(b)))/(2*a) 36 | def abc2_naive(a, b, c) (b-sqrt_naive(4*a*c-square(b)))/(2*a) 37 | 38 | extern sqrt(arg); extern pow(base, exp) 39 | def abc1_c(a, b, c) (b+sqrt(4*a*c-pow(b, 2)))/(2*a) 40 | def abc2_c(a, b, c) (b-sqrt(4*a*c-pow(b, 2)))/(2*a) 41 | 42 | abc1_naive(1, 4, 4) # Let's see how ours compares to the 43 | abc1_c(1, 4, 4) # easy version that uses math.h. 44 | 45 | # If Expr 46 | def fib(x) if x < 3 then 1 else fib(x-1)+fib(x-2) 47 | fib(20) 48 | 49 | # For Loop 50 | def printstar(n) for i = 1, i < n, 1.0 in putchard(42) 51 | printstar(5) 52 | 53 | # User-defined Binary Operators 54 | def binary!(l,r) l * 2 + r / 9 55 | 15 ! 18 56 | def binary?50(l,r) r*r 57 | sqrt(2 ? (15 ! 74 ? sqrt(18))) 58 | def binary∆(l,r) if l < r then r else l 59 | 32 ∆ 2 ∆ 4 ∆ 16 ∆ 96 ∆ 1 60 | 61 | # Mutable Variables 62 | extern printd(x) 63 | def binary:1(x,y) y 64 | def mut(x) 65 | printd(x) : 66 | x = 4 : 67 | printd(x) 68 | 69 | mut(123) 70 | 71 | # Declared Variables 72 | def fibi(x) 73 | var a = 0, b = 1, c in 74 | (for i = 2, i < x in 75 | c = a + b : 76 | a = b : 77 | b = c ) : 78 | b; 79 | fibi(20) 80 | 81 | # Expected output: 82 | # 4 83 | # 41.9818 84 | # 38 85 | # 20 86 | # 1 87 | # 6.123233995736766e-17 88 | # -1 89 | # -1.999997463455079 90 | # 0.574400879193934 91 | # C0 # 'C' printed; 0 returned. 92 | # G0 # 'G' printed; 0 returned. 93 | # O0 # 'O' printed; 0 returned. 94 | # 2.00390625 # Didn't bother confirming this. 95 | # 2 96 | # 6765 97 | # *****0 # "*****" printed; 0 returned. 98 | # 32 99 | # 32 100 | # 96 101 | # 123 102 | # 4 103 | # 0 104 | # 6765 105 | -------------------------------------------------------------------------------- /nodes.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "github.com/ajsnow/llvm" 4 | 5 | // Node Nodes 6 | 7 | type node interface { 8 | Kind() nodeType 9 | // String() string 10 | Position() Pos 11 | codegen() llvm.Value 12 | } 13 | 14 | type nodeType int 15 | 16 | // Pos defines a byte offset from the beginning of the input text. 17 | type Pos int 18 | 19 | func (p Pos) Position() Pos { 20 | return p 21 | } 22 | 23 | // In text/template/parse/node.go Rob adds an unexported() method to Pos 24 | // I do know why he did that rather than make Pos -> pos 25 | 26 | // Type returns itself, embedding into Nodes 27 | func (t nodeType) Kind() nodeType { 28 | return t 29 | } 30 | 31 | const ( 32 | // literals 33 | nodeNumber nodeType = iota 34 | 35 | // expressions 36 | nodeIf 37 | nodeFor 38 | nodeUnary 39 | nodeBinary 40 | nodeFnCall 41 | nodeVariable 42 | nodeVariableExpr 43 | 44 | // non-expression statements 45 | nodeFnPrototype 46 | nodeFunction 47 | 48 | // other 49 | nodeList 50 | ) 51 | 52 | type numberNode struct { 53 | nodeType 54 | Pos 55 | 56 | val float64 57 | } 58 | 59 | // func NewNumberNode(t token, val float64) *numberNode { 60 | // return &numberNode{ 61 | // nodeType: nodeNumber, 62 | // Pos: t.pos, 63 | // val: val, 64 | // } 65 | // } 66 | 67 | type ifNode struct { 68 | nodeType 69 | Pos 70 | 71 | // psudeo-Hungarian notation as 'if' & 'else' are Go keywords 72 | ifN node 73 | thenN node 74 | elseN node 75 | } 76 | 77 | // func NewIfNode(t token, ifN, thenN, elseN node) *ifNode { 78 | // return &ifNode{ 79 | // nodeType: nodeIf, 80 | // Pos: t.pos, 81 | // ifN: ifN, 82 | // thenN: thenN, 83 | // elseN: elseN, 84 | // } 85 | // } 86 | 87 | type forNode struct { 88 | nodeType 89 | Pos 90 | 91 | counter string 92 | start node 93 | test node 94 | step node 95 | body node 96 | } 97 | 98 | // func NewForNode(t token, counter string, start, test, step, body node) *forNode { 99 | // return &forNode{nodeFor, t.pos, counter, start, test, step, body} 100 | // } 101 | 102 | type unaryNode struct { 103 | nodeType 104 | Pos 105 | 106 | name string 107 | operand node 108 | } 109 | 110 | type binaryNode struct { 111 | nodeType 112 | Pos 113 | 114 | op string 115 | left node 116 | right node 117 | } 118 | 119 | type fnCallNode struct { 120 | nodeType 121 | Pos 122 | 123 | callee string 124 | args [](node) 125 | } 126 | 127 | type variableNode struct { 128 | nodeType 129 | Pos 130 | 131 | name string 132 | } 133 | 134 | type variableExprNode struct { 135 | nodeType 136 | Pos 137 | 138 | vars []struct { 139 | name string 140 | node node 141 | } 142 | body node 143 | } 144 | 145 | type fnPrototypeNode struct { 146 | nodeType 147 | Pos 148 | 149 | name string 150 | args []string 151 | isOperator bool 152 | precedence int 153 | } 154 | 155 | type functionNode struct { 156 | nodeType 157 | Pos 158 | 159 | proto node 160 | body node 161 | } 162 | 163 | type listNode struct { 164 | nodeType 165 | Pos 166 | 167 | nodes []node 168 | } 169 | -------------------------------------------------------------------------------- /codegen.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/ajsnow/llvm" 8 | ) 9 | 10 | var ( 11 | rootModule = llvm.NewModule("root") 12 | rootFuncPassMgr = llvm.NewFunctionPassManagerForModule(rootModule) 13 | nativeInitErr = llvm.InitializeNativeTarget() 14 | execEngine, jitInitErr = llvm.NewJITCompiler(rootModule, 0) 15 | builder = llvm.NewBuilder() 16 | namedVals = map[string]llvm.Value{} 17 | ) 18 | 19 | func init() { 20 | if nativeInitErr != nil { 21 | fmt.Fprintln(os.Stderr, nativeInitErr) 22 | os.Exit(-1) 23 | } 24 | if jitInitErr != nil { 25 | fmt.Fprintln(os.Stderr, jitInitErr) 26 | os.Exit(-1) 27 | } 28 | } 29 | 30 | func Optimize() { 31 | rootFuncPassMgr.Add(execEngine.TargetData()) 32 | rootFuncPassMgr.AddPromoteMemoryToRegisterPass() 33 | rootFuncPassMgr.AddInstructionCombiningPass() 34 | rootFuncPassMgr.AddReassociatePass() 35 | rootFuncPassMgr.AddGVNPass() 36 | rootFuncPassMgr.AddCFGSimplificationPass() 37 | rootFuncPassMgr.InitializeFunc() 38 | } 39 | 40 | func createEntryBlockAlloca(f llvm.Value, name string) llvm.Value { 41 | var tmpB = llvm.NewBuilder() 42 | tmpB.SetInsertPoint(f.EntryBasicBlock(), f.EntryBasicBlock().FirstInstruction()) 43 | return tmpB.CreateAlloca(llvm.DoubleType(), name) 44 | } 45 | 46 | func (n *fnPrototypeNode) createArgAlloca(f llvm.Value) { 47 | args := f.Params() 48 | for i := range args { 49 | alloca := createEntryBlockAlloca(f, n.args[i]) 50 | builder.CreateStore(args[i], alloca) 51 | namedVals[n.args[i]] = alloca 52 | } 53 | } 54 | 55 | func (n *numberNode) codegen() llvm.Value { 56 | return llvm.ConstFloat(llvm.DoubleType(), n.val) 57 | } 58 | 59 | func (n *variableNode) codegen() llvm.Value { 60 | v := namedVals[n.name] 61 | if v.IsNil() { 62 | return ErrorV("unknown variable name") 63 | } 64 | return builder.CreateLoad(v, n.name) 65 | } 66 | 67 | func (n *ifNode) codegen() llvm.Value { 68 | ifv := n.ifN.codegen() 69 | if ifv.IsNil() { 70 | return ErrorV("code generation failed for if expression") 71 | } 72 | ifv = builder.CreateFCmp(llvm.FloatONE, ifv, llvm.ConstFloat(llvm.DoubleType(), 0), "ifcond") 73 | 74 | parentFunc := builder.GetInsertBlock().Parent() 75 | thenBlk := llvm.AddBasicBlock(parentFunc, "then") 76 | elseBlk := llvm.AddBasicBlock(parentFunc, "else") 77 | mergeBlk := llvm.AddBasicBlock(parentFunc, "merge") 78 | builder.CreateCondBr(ifv, thenBlk, elseBlk) 79 | 80 | // generate 'then' block 81 | builder.SetInsertPointAtEnd(thenBlk) 82 | thenv := n.thenN.codegen() 83 | if thenv.IsNil() { 84 | return ErrorV("code generation failed for then expression") 85 | } 86 | builder.CreateBr(mergeBlk) 87 | // Codegen of 'Then' can change the current block, update ThenBB for the PHI. 88 | thenBlk = builder.GetInsertBlock() 89 | 90 | // generate 'else' block 91 | // C++ unknown eq: TheFunction->getBasicBlockList().push_back(ElseBB); 92 | builder.SetInsertPointAtEnd(elseBlk) 93 | elsev := n.elseN.codegen() 94 | if elsev.IsNil() { 95 | return ErrorV("code generation failed for else expression") 96 | } 97 | builder.CreateBr(mergeBlk) 98 | elseBlk = builder.GetInsertBlock() 99 | 100 | builder.SetInsertPointAtEnd(mergeBlk) 101 | PhiNode := builder.CreatePHI(llvm.DoubleType(), "iftmp") 102 | PhiNode.AddIncoming([]llvm.Value{thenv}, []llvm.BasicBlock{thenBlk}) 103 | PhiNode.AddIncoming([]llvm.Value{elsev}, []llvm.BasicBlock{elseBlk}) 104 | return PhiNode 105 | } 106 | 107 | func (n *forNode) codegen() llvm.Value { 108 | startVal := n.start.codegen() 109 | if startVal.IsNil() { 110 | return ErrorV("code generation failed for start expression") 111 | } 112 | 113 | parentFunc := builder.GetInsertBlock().Parent() 114 | alloca := createEntryBlockAlloca(parentFunc, n.counter) 115 | builder.CreateStore(startVal, alloca) 116 | loopBlk := llvm.AddBasicBlock(parentFunc, "loop") 117 | 118 | builder.CreateBr(loopBlk) 119 | 120 | builder.SetInsertPointAtEnd(loopBlk) 121 | 122 | // save higher levels' variables if we have the same name 123 | oldVal := namedVals[n.counter] 124 | namedVals[n.counter] = alloca 125 | 126 | if n.body.codegen().IsNil() { 127 | return ErrorV("code generation failed for body expression") 128 | } 129 | 130 | var stepVal llvm.Value 131 | if n.step != nil { 132 | stepVal = n.step.codegen() 133 | if stepVal.IsNil() { 134 | return llvm.ConstNull(llvm.DoubleType()) 135 | } 136 | } else { 137 | stepVal = llvm.ConstFloat(llvm.DoubleType(), 1) 138 | } 139 | 140 | // evaluate end condition before increment 141 | endVal := n.test.codegen() 142 | if endVal.IsNil() { 143 | return endVal 144 | } 145 | 146 | curVar := builder.CreateLoad(alloca, n.counter) 147 | nextVar := builder.CreateFAdd(curVar, stepVal, "nextvar") 148 | builder.CreateStore(nextVar, alloca) 149 | 150 | endVal = builder.CreateFCmp(llvm.FloatONE, endVal, llvm.ConstFloat(llvm.DoubleType(), 0), "loopcond") 151 | afterBlk := llvm.AddBasicBlock(parentFunc, "afterloop") 152 | 153 | builder.CreateCondBr(endVal, loopBlk, afterBlk) 154 | 155 | builder.SetInsertPointAtEnd(afterBlk) 156 | 157 | if !oldVal.IsNil() { 158 | namedVals[n.counter] = oldVal 159 | } else { 160 | delete(namedVals, n.counter) 161 | } 162 | 163 | return llvm.ConstFloat(llvm.DoubleType(), 0) 164 | } 165 | 166 | func (n *unaryNode) codegen() llvm.Value { 167 | operandValue := n.operand.codegen() 168 | if operandValue.IsNil() { 169 | return ErrorV("nil operand") 170 | } 171 | 172 | f := rootModule.NamedFunction("unary" + string(n.name)) 173 | if f.IsNil() { 174 | return ErrorV("unknown unary operator") 175 | } 176 | return builder.CreateCall(f, []llvm.Value{operandValue}, "unop") 177 | } 178 | 179 | func (n *variableExprNode) codegen() llvm.Value { 180 | var oldvars = []llvm.Value{} 181 | 182 | f := builder.GetInsertBlock().Parent() 183 | for i := range n.vars { 184 | name := n.vars[i].name 185 | node := n.vars[i].node 186 | 187 | var val llvm.Value 188 | if node != nil { 189 | val = node.codegen() 190 | if val.IsNil() { 191 | return val // nil 192 | } 193 | } else { // if no initialized value set to 0 194 | val = llvm.ConstFloat(llvm.DoubleType(), 0) 195 | } 196 | 197 | alloca := createEntryBlockAlloca(f, name) 198 | builder.CreateStore(val, alloca) 199 | 200 | oldvars = append(oldvars, namedVals[name]) 201 | namedVals[name] = alloca 202 | } 203 | 204 | // evaluate body now that vars are in scope 205 | bodyVal := n.body.codegen() 206 | if bodyVal.IsNil() { 207 | return ErrorV("body returns nil") // nil 208 | } 209 | 210 | // pop old values 211 | for i := range n.vars { 212 | namedVals[n.vars[i].name] = oldvars[i] 213 | } 214 | 215 | return bodyVal 216 | } 217 | 218 | func (n *fnCallNode) codegen() llvm.Value { 219 | callee := rootModule.NamedFunction(n.callee) 220 | if callee.IsNil() { 221 | return ErrorV("unknown function referenced") 222 | } 223 | 224 | if callee.ParamsCount() != len(n.args) { 225 | return ErrorV("incorrect number of arguments passed") 226 | } 227 | 228 | args := []llvm.Value{} 229 | for _, arg := range n.args { 230 | args = append(args, arg.codegen()) 231 | if args[len(args)-1].IsNil() { 232 | return ErrorV("an argument was nil") 233 | } 234 | } 235 | 236 | return builder.CreateCall(callee, args, "calltmp") 237 | } 238 | 239 | func (n *binaryNode) codegen() llvm.Value { 240 | // Special case '=' because we don't emit the LHS as an expression 241 | if n.op == "=" { 242 | l, ok := n.left.(*variableNode) 243 | if !ok { 244 | return ErrorV("destination of '=' must be a variable") 245 | } 246 | 247 | // get value 248 | val := n.right.codegen() 249 | if val.IsNil() { 250 | return ErrorV("cannot assign null value") 251 | } 252 | 253 | // lookup location of variable from name 254 | p := namedVals[l.name] 255 | 256 | // store 257 | builder.CreateStore(val, p) 258 | 259 | return val 260 | } 261 | 262 | l := n.left.codegen() 263 | r := n.right.codegen() 264 | if l.IsNil() || r.IsNil() { 265 | return ErrorV("operand was nil") 266 | } 267 | 268 | switch n.op { 269 | case "+": 270 | return builder.CreateFAdd(l, r, "addtmp") 271 | case "-": 272 | return builder.CreateFSub(l, r, "subtmp") 273 | case "*": 274 | return builder.CreateFMul(l, r, "multmp") 275 | case "/": 276 | return builder.CreateFDiv(l, r, "divtmp") 277 | case "<": 278 | l = builder.CreateFCmp(llvm.FloatOLT, l, r, "cmptmp") 279 | return builder.CreateUIToFP(l, llvm.DoubleType(), "booltmp") 280 | default: 281 | function := rootModule.NamedFunction("binary" + string(n.op)) 282 | if function.IsNil() { 283 | return ErrorV("invalid binary operator") 284 | } 285 | return builder.CreateCall(function, []llvm.Value{l, r}, "binop") 286 | } 287 | } 288 | 289 | func (n *fnPrototypeNode) codegen() llvm.Value { 290 | funcArgs := []llvm.Type{} 291 | for _ = range n.args { 292 | funcArgs = append(funcArgs, llvm.DoubleType()) 293 | } 294 | funcType := llvm.FunctionType(llvm.DoubleType(), funcArgs, false) 295 | function := llvm.AddFunction(rootModule, n.name, funcType) 296 | 297 | if function.Name() != n.name { 298 | function.EraseFromParentAsFunction() 299 | function = rootModule.NamedFunction(n.name) 300 | } 301 | 302 | if function.BasicBlocksCount() != 0 { 303 | return ErrorV("redefinition of function: " + n.name) 304 | } 305 | 306 | if function.ParamsCount() != len(n.args) { 307 | return ErrorV("redefinition of function with different number of args") 308 | } 309 | 310 | for i, param := range function.Params() { 311 | param.SetName(n.args[i]) 312 | namedVals[n.args[i]] = param 313 | } 314 | 315 | return function 316 | } 317 | 318 | func (n *functionNode) codegen() llvm.Value { 319 | namedVals = make(map[string]llvm.Value) 320 | p := n.proto.(*fnPrototypeNode) 321 | theFunction := n.proto.codegen() 322 | if theFunction.IsNil() { 323 | return ErrorV("prototype") 324 | } 325 | 326 | // if p.isOperator && len(p.args) == 2 { 327 | // opChar, _ := utf8.DecodeLastRuneInString(p.name) 328 | // binaryOpPrecedence[opChar] = p.precedence 329 | // } 330 | 331 | block := llvm.AddBasicBlock(theFunction, "entry") 332 | builder.SetInsertPointAtEnd(block) 333 | 334 | p.createArgAlloca(theFunction) 335 | 336 | retVal := n.body.codegen() 337 | if retVal.IsNil() { 338 | theFunction.EraseFromParentAsFunction() 339 | return ErrorV("function body") 340 | } 341 | 342 | builder.CreateRet(retVal) 343 | if llvm.VerifyFunction(theFunction, llvm.PrintMessageAction) != nil { 344 | theFunction.EraseFromParentAsFunction() 345 | return ErrorV("function verifiction failed") 346 | } 347 | 348 | rootFuncPassMgr.RunFunc(theFunction) 349 | return theFunction 350 | } 351 | -------------------------------------------------------------------------------- /lex.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "strings" 8 | "unicode" 9 | "unicode/utf8" 10 | 11 | "github.com/davecgh/go-spew/spew" 12 | ) 13 | 14 | // token represents the basic lexicographical units of the language. 15 | type token struct { 16 | kind tokenType // The kind of token with which we're dealing. 17 | pos Pos // The byte offset of the beginning of the token with respect to the beginning of the input. 18 | val string // The token's value. Error message for lexError; otherwise, the token's constituent text. 19 | } 20 | 21 | // Defining the String function satisfies the Stinger interface. 22 | // Satisfying Stringer allows package fmt to pretty-print our tokens. 23 | // func (t *token) String() string { 24 | // switch { 25 | // case t.kind == tokError: 26 | // return t.val 27 | // case t.kind == tokEOF: 28 | // return "EOF" 29 | // case t.kind > tokKeyword: 30 | // return fmt.Sprintf("<%s>", t.val) 31 | // case len(t.val) > 10: 32 | // return fmt.Sprintf("%.10q...", t.val) // Limit the max width for long tokens 33 | // case t.kind == tokSpace: 34 | // return "_" 35 | // default: 36 | // return t.val 37 | // } 38 | // } 39 | 40 | // tokenType identifies the type of a token. 41 | type tokenType int 42 | 43 | // The list of tokenTypes. 44 | const ( 45 | // special 46 | tokEndOfTokens tokenType = iota // finished tokenizing all input 47 | tokError // error occurred 48 | tokNewFile 49 | tokComment 50 | 51 | // punctuation 52 | tokSpace 53 | tokSemicolon 54 | tokComma 55 | tokLeftParen 56 | tokRightParen 57 | 58 | // literals 59 | tokNumber 60 | 61 | // identifiers 62 | tokIdentifier 63 | 64 | // keywords 65 | tokKeyword // used to delineate keywords 66 | tokDefine 67 | tokExtern 68 | tokIf 69 | tokThen 70 | tokElse 71 | tokFor 72 | tokIn 73 | tokBinary 74 | tokUnary 75 | tokVariable 76 | 77 | // operators 78 | tokUserUnaryOp // additionally used to delineate operators 79 | tokUserBinaryOp 80 | tokEqual 81 | tokPlus 82 | tokMinus 83 | tokStar 84 | tokSlash 85 | tokLessThan 86 | ) 87 | 88 | // key maps keywords strings to their tokenType. 89 | var key = map[string]tokenType{ 90 | "def": tokDefine, 91 | "extern": tokExtern, 92 | "if": tokIf, 93 | "then": tokThen, 94 | "else": tokElse, 95 | "for": tokFor, 96 | "in": tokIn, 97 | "binary": tokBinary, 98 | "unary": tokUnary, 99 | "var": tokVariable, 100 | } 101 | 102 | // op maps built-in operators to tokenTypes 103 | // As this should never be written to, it is safe to share between lexer goroutines. 104 | var op = map[rune]tokenType{ 105 | '=': tokEqual, 106 | '+': tokPlus, 107 | '-': tokMinus, 108 | '*': tokStar, 109 | '/': tokSlash, 110 | '<': tokLessThan, 111 | } 112 | 113 | // userOpType differentiates a user-defined unary, binary or not found operator. 114 | type userOpType int 115 | 116 | const ( 117 | uopNOP userOpType = iota // Signals that the rune is *not* a user operator. 118 | uopUnaryOp 119 | uopBinaryOp 120 | ) 121 | 122 | // stateFn represents the state of the scanner as a function that returns the next state. 123 | type stateFn func(*lexer) stateFn 124 | 125 | // lexer holds the state of the scanner. 126 | type lexer struct { 127 | files chan *os.File // files to be lexed 128 | scanner *bufio.Scanner // scanner is a buffered interface to the current file 129 | name string // name of current input file; used in error reports 130 | line string // current line being scanned 131 | state stateFn // next lexing function to be called 132 | pos Pos // current position in input 133 | start Pos // beginning position of the current token 134 | width Pos // width of last rune read from input 135 | lineCount int // number of lines seen in the current file 136 | parenDepth int // nested layers of paren expressions 137 | tokens chan token // channel of lexed items 138 | userOperators map[rune]userOpType // userOperators maps user defined operators to number of operands 139 | } 140 | 141 | // Lex creates and runs a new lexer. 142 | func Lex() *lexer { 143 | l := &lexer{ 144 | files: make(chan *os.File, 10), 145 | tokens: make(chan token, 10), 146 | userOperators: map[rune]userOpType{}, 147 | } 148 | go l.run() 149 | return l 150 | } 151 | 152 | // Add adds the given file to the lexer's file queue. 153 | // N.B. Add can block (waiting on the lex's files chan to clear), 154 | // so it should be called in a different goroutine than the ultimate 155 | // consumer of the compiler's pipeline, e.g. Exec. 156 | func (l *lexer) Add(f *os.File) { 157 | l.files <- f 158 | } 159 | 160 | // Done signals that the user is finished Add()ing files 161 | // and that the lexer goroutine should stop once it has 162 | // finished processing all files currently in its queue. 163 | func (l *lexer) Done() { 164 | close(l.files) 165 | } 166 | 167 | // Tokens returns a read-only channel of tokens that can 168 | // be printed or parsed. 169 | func (l *lexer) Tokens() <-chan token { 170 | return l.tokens 171 | } 172 | 173 | // l.next() returns eof to signal end of file to a stateFn. 174 | const eof = -1 175 | 176 | // word returns the value of the token that would be emitted if 177 | // l.emit() were to be called. 178 | func (l *lexer) word() string { 179 | return l.line[l.start:l.pos] 180 | } 181 | 182 | // next returns the next rune from the input and advances the scan. 183 | // It returns the eof constant (-1) if the scanner is at the end of 184 | // the input. 185 | func (l *lexer) next() rune { 186 | if int(l.pos) >= len(l.line) { 187 | if l.scanner.Scan() { 188 | l.line = l.scanner.Text() + "\n" 189 | l.pos = 0 190 | l.start = 0 191 | l.width = 0 192 | } else { 193 | l.width = 0 194 | return eof 195 | } 196 | } 197 | r, w := utf8.DecodeRuneInString(l.line[l.pos:]) 198 | l.width = Pos(w) 199 | l.pos += l.width 200 | // spew.Printf("Rune: %q", r) 201 | return r 202 | } 203 | 204 | // peek returns the next rune without moving the scan forward. 205 | func (l *lexer) peek() rune { 206 | r := l.next() 207 | l.backup() 208 | return r 209 | } 210 | 211 | // backup moves the scan back one rune. 212 | func (l *lexer) backup() { 213 | l.pos -= l.width 214 | } 215 | 216 | // ignore skips the pending input before this point. 217 | func (l *lexer) ignore() { 218 | l.start = l.pos 219 | } 220 | 221 | // acceptRun consumes a run of runes from valid set. 222 | func (l *lexer) acceptRun(valid string) { 223 | for strings.IndexRune(valid, l.next()) >= 0 { 224 | } 225 | l.backup() 226 | } 227 | 228 | // errorf sending an error token and terminates the scan by passing nil as the next stateFn 229 | func (l *lexer) errorf(format string, args ...interface{}) stateFn { 230 | l.tokens <- token{ 231 | kind: tokError, 232 | pos: l.start, 233 | val: fmt.Sprintf(format, args...)} 234 | return nil 235 | } 236 | 237 | // emit passes the current token. 238 | func (l *lexer) emit(tt tokenType) { 239 | l.tokens <- token{ 240 | kind: tt, 241 | pos: l.start, 242 | val: l.word(), 243 | } 244 | l.start = l.pos 245 | } 246 | 247 | // run runs the state machine for the lexer. 248 | func (l *lexer) run() { 249 | for { 250 | f, ok := <-l.files 251 | if !ok { 252 | close(l.tokens) // tokEndOfTokens is the zero value of token 253 | return 254 | } 255 | 256 | // reset Lexer for new file. 257 | l.name = f.Name() 258 | l.scanner = bufio.NewScanner(f) 259 | l.line = "" 260 | l.pos = 0 261 | l.start = 0 262 | l.width = 0 263 | l.parenDepth = 0 264 | 265 | // emit a new file token for the parser. 266 | l.tokens <- token{ 267 | kind: tokNewFile, 268 | val: l.name, 269 | } 270 | 271 | // run state machine for the lexer. 272 | for l.state = lexTopLevel; l.state != nil; { 273 | l.state = l.state(l) 274 | // spew.Println("State:", runtime.FuncForPC(reflect.ValueOf(l.state).Pointer()).Name()) 275 | } 276 | 277 | f.Close() // close file handle 278 | } 279 | } 280 | 281 | // State Functions 282 | 283 | // lexTopLevel lexes any top level statement. Because our language is simple, 284 | // our lexer rarely needs to know its prior state and therefore this amounts 285 | // to the giant-switch style of lexing. Nevertheless, the stateFn technique 286 | // allows us to easy extend our lexer to more complex grammars. 287 | func lexTopLevel(l *lexer) stateFn { 288 | // Either whitespace, an empty line, a comment, 289 | // a number, a paren, identifier, or unary operator. 290 | r := l.next() 291 | switch { 292 | case r == eof: 293 | return nil 294 | case isSpace(r): 295 | l.backup() 296 | return lexSpace 297 | case isEOL(r): 298 | l.start = l.pos 299 | return lexTopLevel 300 | case r == ';': 301 | l.emit(tokSemicolon) 302 | return lexTopLevel 303 | case r == ',': 304 | l.emit(tokComma) 305 | return lexTopLevel 306 | case r == '#': 307 | return lexComment 308 | case r == '(': 309 | l.parenDepth++ 310 | l.emit(tokLeftParen) 311 | return lexTopLevel 312 | case r == ')': 313 | l.parenDepth-- 314 | l.emit(tokRightParen) 315 | if l.parenDepth < 0 { 316 | return l.errorf("unexpected right paren") 317 | } 318 | return lexTopLevel 319 | case '0' <= r && r <= '9', r == '.': 320 | l.backup() 321 | return lexNumber 322 | case isAlphaNumeric(r): 323 | l.backup() 324 | return lexIdentifer 325 | case op[r] > tokUserBinaryOp: 326 | l.emit(op[r]) 327 | return lexTopLevel 328 | case l.userOperators[r] == uopBinaryOp: 329 | l.emit(tokUserBinaryOp) 330 | return lexTopLevel 331 | case l.userOperators[r] == uopUnaryOp: 332 | l.emit(tokUserUnaryOp) 333 | return lexTopLevel 334 | default: 335 | return l.errorf("unrecognized character: %#U", r) 336 | } 337 | } 338 | 339 | // lexSpace globs contiguous whitespace. 340 | func lexSpace(l *lexer) stateFn { 341 | globWhitespace(l) 342 | return lexTopLevel 343 | } 344 | 345 | // globWhitespace globs contiguous whitespace. (Sometimes we 346 | // don't want to return to lexTopLevel after doing this.) 347 | func globWhitespace(l *lexer) { 348 | for isSpace(l.next()) { 349 | } 350 | l.backup() 351 | if l.start != l.pos { 352 | l.emit(tokSpace) 353 | } 354 | } 355 | 356 | // lexComment runs from '#' to the end of line or end of file. 357 | func lexComment(l *lexer) stateFn { 358 | // for !isEOL(l.next()) { 359 | // } 360 | // l.backup() 361 | l.pos = Pos(len(l.line)) 362 | l.emit(tokComment) 363 | return lexTopLevel 364 | } 365 | 366 | // lexNumber globs potential number-like strings. We let the parser 367 | // verify that the token is actually a valid number. 368 | // e.g. "3.A.8" could be emitted by this function. 369 | func lexNumber(l *lexer) stateFn { 370 | l.acceptRun("0123456789.xabcdefABCDEF") 371 | // if isAlphaNumeric(l.peek()) { // probably a mistyped identifier 372 | // l.next() 373 | // return l.errorf("bad number syntax: %q", l.word()) 374 | // } 375 | l.emit(tokNumber) 376 | return lexTopLevel 377 | } 378 | 379 | // lexIdentfier globs unicode alpha-numerics, determines if they 380 | // represent a keyword or identifier, and output the appropriate 381 | // token. For the "binary" & "unary" keywords, we need to add their 382 | // associated user-defined operator to our map so that we can 383 | // identify it later. 384 | func lexIdentifer(l *lexer) stateFn { 385 | for { 386 | switch r := l.next(); { 387 | case isAlphaNumeric(r): 388 | // absorb 389 | default: 390 | l.backup() 391 | word := l.word() 392 | if key[word] > tokKeyword { // We already know it's not an operator. 393 | l.emit(key[word]) 394 | switch word { 395 | case "binary": 396 | return lexUserBinaryOp 397 | case "unary": 398 | return lexUserUnaryOp 399 | } 400 | } else { 401 | l.emit(tokIdentifier) 402 | } 403 | return lexTopLevel 404 | } 405 | } 406 | } 407 | 408 | // lexUserBinaryOp checks for spaces and then identifies and maps. 409 | // the newly defined user operator. 410 | func lexUserBinaryOp(l *lexer) stateFn { 411 | globWhitespace(l) 412 | r := l.next() 413 | l.userOperators[r] = uopBinaryOp 414 | l.emit(tokUserBinaryOp) 415 | return lexTopLevel 416 | } 417 | 418 | // lexUserBinaryOp checks for spaces and then identifies and maps. 419 | // the newly defined user operator. 420 | func lexUserUnaryOp(l *lexer) stateFn { 421 | globWhitespace(l) 422 | r := l.next() 423 | l.userOperators[r] = uopUnaryOp 424 | l.emit(tokUserUnaryOp) 425 | return lexTopLevel 426 | } 427 | 428 | // Helper Functions 429 | 430 | // isSpace reports whether r is whitespace. 431 | func isSpace(r rune) bool { 432 | return r == ' ' || r == '\t' 433 | } 434 | 435 | // isEOL reports whether r is an end-of-line character or an EOF. 436 | func isEOL(r rune) bool { 437 | return r == '\n' || r == '\r' || r == eof 438 | } 439 | 440 | // isValidIdefRune reports if r may be part of an identifier name. 441 | func isAlphaNumeric(r rune) bool { 442 | return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 443 | } 444 | 445 | // DumpTokens spawns a goroutine to dump incomming tokens and 446 | // re-emit them on the output channel. 447 | func DumpTokens(in <-chan token) <-chan token { 448 | out := make(chan token) 449 | go func() { 450 | for { 451 | t, ok := <-in 452 | if !ok { 453 | close(out) 454 | return 455 | } 456 | spew.Dump(t) 457 | out <- t 458 | } 459 | }() 460 | return out 461 | } 462 | -------------------------------------------------------------------------------- /parse.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strconv" 7 | 8 | "github.com/ajsnow/llvm" 9 | "github.com/davecgh/go-spew/spew" 10 | ) 11 | 12 | // A parser holds the internal state of the AST being constructed. Instead of 13 | // composing top-level statements into branches under the AST root, they are 14 | // send along a node channel that can be codegen'd and executed. This allows 15 | // us to begin code generation and execution before we have finished parsing 16 | // input (and/or allows us to use one parser during interactive mode instead 17 | // of creating a new one for each line). 18 | type parser struct { 19 | name string // name of current file whose tokens are being recieved; used in error reporting 20 | tokens <-chan token // channel of tokens from the lexer 21 | token token // current token, most reciently recieved 22 | topLevelNodes chan node // channel of parsed top-level statements 23 | binaryOpPrecedence map[string]int // maps binary operators to the precidence determining the order of operations 24 | } 25 | 26 | // Parse creates and runs a new parser, returning a channel of 27 | // top-level AST sub-trees for further processing. 28 | func Parse(tokens <-chan token) <-chan node { 29 | p := &parser{ 30 | tokens: tokens, 31 | topLevelNodes: make(chan node, 100), 32 | binaryOpPrecedence: map[string]int{ 33 | "=": 2, 34 | "<": 10, 35 | "+": 20, 36 | "-": 20, 37 | "*": 40, 38 | "/": 40, 39 | }, 40 | } 41 | go p.parse() 42 | return p.topLevelNodes 43 | } 44 | 45 | // parse is the parsing main loop. It receives tokens and begins 46 | // the recursive decent until a nil or top-level sub-tree is 47 | // returned. Non-nils are sent to the topLevelNode channel; 48 | // nils are discarded (they indicate either errors, semicolons 49 | // or file boundaries). Once the tokens channel is empty & closed, 50 | // it closes its own topLevelNodes channel. 51 | func (p *parser) parse() { 52 | for p.next(); p.token.kind > tokError; { //p.next() { // may want/need to switch this back once i introduce statement delineation 53 | topLevelNode := p.parseTopLevelStmt() 54 | if topLevelNode != nil { 55 | p.topLevelNodes <- topLevelNode 56 | } 57 | } 58 | 59 | if p.token.kind == tokError { 60 | spew.Dump(p.token) 61 | } 62 | close(p.topLevelNodes) 63 | } 64 | 65 | // next advances to the next useful token, discarding tokens 66 | // that the parser doesn't need to handle like whitespace and 67 | // comments. 68 | func (p *parser) next() token { 69 | for p.token = <-p.tokens; p.token.kind == tokSpace || 70 | p.token.kind == tokComment; p.token = <-p.tokens { 71 | } 72 | return p.token 73 | } 74 | 75 | // parseTopLevelStmt determines if the current token is the 76 | // beginning of a function definition, external declaration or 77 | // a top level expression. Semicolons are ignored; 78 | // file transitions change the parser's file name variable. 79 | // -- 80 | // TODO: don't return nil for non-error, non-done conditions 81 | // TODO: create BadDef, BadExpr, BadExtern nodes 82 | func (p *parser) parseTopLevelStmt() node { 83 | switch p.token.kind { 84 | case tokNewFile: 85 | p.name = p.token.val 86 | p.next() 87 | return nil 88 | case tokSemicolon: 89 | p.next() 90 | return nil 91 | case tokDefine: 92 | return p.parseDefinition() 93 | case tokExtern: 94 | return p.parseExtern() 95 | default: 96 | return p.parseTopLevelExpr() 97 | } 98 | } 99 | 100 | // parseDefinition parses top level function definitions. 101 | func (p *parser) parseDefinition() node { 102 | pos := p.token.pos 103 | p.next() 104 | proto := p.parsePrototype() 105 | if p == nil { 106 | return nil 107 | } 108 | 109 | e := p.parseExpression() 110 | if e == nil { 111 | return nil 112 | } 113 | return &functionNode{nodeFunction, pos, proto, e} 114 | } 115 | 116 | func (p *parser) parseExtern() node { 117 | p.next() 118 | return p.parsePrototype() 119 | } 120 | 121 | // parseTopLevelExpr parses top level expressions by wrapping them 122 | // into unnamed functions. The name "" signals that this statement 123 | // is to be executed directly. 124 | func (p *parser) parseTopLevelExpr() node { 125 | pos := p.token.pos 126 | e := p.parseExpression() 127 | if e == nil { 128 | return nil 129 | } 130 | proto := &fnPrototypeNode{nodeFnPrototype, pos, "", nil, false, 0} // fnName, ArgNames, kind != idef, precedence} 131 | f := &functionNode{nodeFunction, pos, proto, e} 132 | return f 133 | } 134 | 135 | // parsePrototype parses function prototypes. First it determines if 136 | // the function is named. If the name is "unary" or "binary", then 137 | // the prototype is for a user-defined operator. Binary ops may have 138 | // an optional precedence specified to determine the order of 139 | // operations. 140 | // e.g. name(arg1, arg2, arg3) 141 | // e.g. binary ∆ 50 (lhs rhs) 142 | func (p *parser) parsePrototype() node { 143 | pos := p.token.pos 144 | if p.token.kind != tokIdentifier && 145 | p.token.kind != tokBinary && 146 | p.token.kind != tokUnary { 147 | return Error(p.token, "expected function name in prototype") 148 | } 149 | 150 | fnName := p.token.val 151 | p.next() 152 | 153 | precedence := 30 154 | const ( 155 | idef = iota 156 | unary 157 | binary 158 | ) 159 | kind := idef 160 | 161 | switch fnName { 162 | case "unary": 163 | fnName += p.token.val // unary^ 164 | kind = unary 165 | p.next() 166 | case "binary": 167 | fnName += p.token.val // binary^ 168 | op := p.token.val 169 | kind = binary 170 | p.next() 171 | 172 | if p.token.kind == tokNumber { 173 | var err error 174 | precedence, err = strconv.Atoi(p.token.val) 175 | if err != nil { 176 | return Error(p.token, "\ninvalid precedence") 177 | } 178 | p.next() 179 | } 180 | p.binaryOpPrecedence[op] = precedence // make sure to take this out of codegen later if we're going to keep it here. 181 | } 182 | 183 | if p.token.kind != tokLeftParen { 184 | return Error(p.token, "expected '(' in prototype") 185 | } 186 | 187 | ArgNames := []string{} 188 | for p.next(); p.token.kind == tokIdentifier || p.token.kind == tokComma; p.next() { 189 | if p.token.kind != tokComma { 190 | ArgNames = append(ArgNames, p.token.val) 191 | } 192 | } 193 | if p.token.kind != tokRightParen { 194 | return Error(p.token, "expected ')' in prototype") 195 | } 196 | 197 | p.next() 198 | if kind != idef && len(ArgNames) != kind { 199 | return Error(p.token, "invalid number of operands for operator") 200 | } 201 | return &fnPrototypeNode{nodeFnPrototype, pos, fnName, ArgNames, kind != idef, precedence} 202 | } 203 | 204 | // parseExpression parses expressions. First, it tries to parse 205 | // the current token as the beginning of a unary expression. If 206 | // the result is non-null, it will parse the rest as the right- 207 | // hand side of a binary expression. 208 | // e.g. !!5 + sin(2 * 4) - 2 -> {!!5} {+ sin(2 * 4) - 2} 209 | func (p *parser) parseExpression() node { 210 | lhs := p.parseUnarty() 211 | if lhs == nil { 212 | return nil 213 | } 214 | 215 | return p.parseBinaryOpRHS(1, lhs) // TODO: check on this value wrt our : = and 0 val for not found instead of tut's -1 216 | } /// also this way of hacking on left to right preference on top of operator precedence can fail if we have more expressions than the difference in the op pref, right? 217 | 218 | // parseUnarty parses unary expressions. If the current token is 219 | // not a unary operator, parse it as a primary expression; otherwise, 220 | // return a unaryNode, parsing the operand of the unary operator as 221 | // another unary expression (so as to allow chaining of unary ops). 222 | func (p *parser) parseUnarty() node { 223 | pos := p.token.pos 224 | // If we're not an operator, parse as primary {this is correcp.} 225 | if p.token.kind < tokUserUnaryOp { 226 | return p.parsePrimary() 227 | } 228 | 229 | name := p.token.val 230 | p.next() 231 | operand := p.parseUnarty() 232 | if operand != nil { 233 | return &unaryNode{nodeUnary, pos, name, operand} 234 | } 235 | return nil 236 | } 237 | 238 | // parseBinaryOpRHS parses the operator and right-hand side of a 239 | // binary operator expression. 240 | func (p *parser) parseBinaryOpRHS(exprPrec int, lhs node) node { 241 | pos := p.token.pos 242 | for { 243 | if p.token.kind < tokUserUnaryOp { 244 | return lhs // an expression like '5' will get sent back up to parseTopLevelExpr or parseDefinition from here. 245 | } 246 | tokenPrec := p.getTokenPrecedence(p.token.val) 247 | if tokenPrec < exprPrec { 248 | return lhs 249 | } 250 | binOp := p.token.val 251 | p.next() 252 | 253 | rhs := p.parseUnarty() 254 | if rhs == nil { 255 | return nil 256 | } 257 | 258 | nextPrec := p.getTokenPrecedence(p.token.val) 259 | if tokenPrec < nextPrec { 260 | rhs = p.parseBinaryOpRHS(tokenPrec+1, rhs) 261 | if rhs == nil { 262 | return nil 263 | } 264 | } 265 | 266 | lhs = &binaryNode{nodeBinary, pos, binOp, lhs, rhs} 267 | } 268 | } 269 | 270 | // getTokenPrecedence returns a binary operator's precedence 271 | func (p *parser) getTokenPrecedence(token string) int { 272 | return p.binaryOpPrecedence[token] 273 | } 274 | 275 | // parsePrimary parses primary expressions. The parser arrives 276 | // here when operator expressions are gathering their operands. 277 | // (Or when there are no operators at the top level of a given 278 | // sub-expression.) 279 | func (p *parser) parsePrimary() node { 280 | switch p.token.kind { 281 | case tokIdentifier: 282 | return p.parseIdentifierExpr() 283 | case tokIf: 284 | return p.parseIfExpr() 285 | case tokFor: 286 | return p.parseForExpr() 287 | case tokVariable: 288 | return p.parseVarExpr() 289 | case tokNumber: 290 | return p.parseNumericExpr() 291 | case tokLeftParen: 292 | return p.parseParenExpr() 293 | case tokEndOfTokens: 294 | return nil // this token should not be skipped 295 | default: 296 | oldToken := p.token 297 | p.next() 298 | return Error(oldToken, "unknown token encountered when expecting expression") 299 | } 300 | } 301 | 302 | // parseIdentifierExpr parses user defined identifiers (i.e. variable 303 | // and function names). If it is a function name, parse any arguments 304 | // it may take and emit a function call node. Otherwise, emit the variable. 305 | func (p *parser) parseIdentifierExpr() node { 306 | pos := p.token.pos 307 | name := p.token.val 308 | p.next() 309 | // are we a variable? else function call 310 | if p.token.kind != tokLeftParen { 311 | return &variableNode{nodeVariable, pos, name} 312 | } 313 | args := []node{} 314 | for p.next(); p.token.kind != tokRightParen; { 315 | switch p.token.kind { 316 | case tokComma: 317 | p.next() 318 | default: 319 | arg := p.parseExpression() 320 | if arg == nil { 321 | return nil 322 | } 323 | args = append(args, arg) 324 | } 325 | } 326 | p.next() 327 | return &fnCallNode{nodeFnCall, pos, name, args} 328 | } 329 | 330 | // parseIfExpr, as the name suggest, parses each part of an if expression 331 | // and emits the result. 332 | func (p *parser) parseIfExpr() node { 333 | pos := p.token.pos 334 | // if 335 | p.next() 336 | ifE := p.parseExpression() 337 | if ifE == nil { 338 | return Error(p.token, "expected condition after 'if'") 339 | } 340 | 341 | if p.token.kind != tokThen { 342 | return Error(p.token, "expected 'then' after if condition") 343 | } 344 | p.next() 345 | thenE := p.parseExpression() 346 | if thenE == nil { 347 | return Error(p.token, "expected expression after 'then'") 348 | } 349 | 350 | if p.token.kind != tokElse { 351 | return Error(p.token, "expected 'else' after then expr") 352 | } 353 | p.next() 354 | elseE := p.parseExpression() 355 | if elseE == nil { 356 | return Error(p.token, "expected expression after 'else'") 357 | } 358 | 359 | return &ifNode{nodeIf, pos, ifE, thenE, elseE} 360 | } 361 | 362 | // parseIfExpr parses each part of a for expression. The increment 363 | // step is optional and defaults to += 1 if unspecified. 364 | func (p *parser) parseForExpr() node { 365 | pos := p.token.pos 366 | p.next() 367 | if p.token.kind != tokIdentifier { 368 | return Error(p.token, "expected identifier after 'for'") 369 | } 370 | counter := p.token.val 371 | 372 | p.next() 373 | if p.token.kind != tokEqual { 374 | return Error(p.token, "expected '=' after 'for "+counter+"'") 375 | } 376 | 377 | p.next() 378 | start := p.parseExpression() 379 | if start == nil { 380 | return Error(p.token, "expected expression after 'for "+counter+" ='") 381 | } 382 | if p.token.kind != tokComma { 383 | return Error(p.token, "expected ',' after 'for' start expression") 384 | } 385 | 386 | p.next() 387 | end := p.parseExpression() 388 | if end == nil { 389 | return Error(p.token, "expected end expression after 'for' start expression") 390 | } 391 | 392 | // optional step 393 | var step node 394 | if p.token.kind == tokComma { 395 | p.next() 396 | if step = p.parseExpression(); step == nil { 397 | return Error(p.token, "invalid step expression after 'for'") 398 | } 399 | } 400 | 401 | if p.token.kind != tokIn { 402 | return Error(p.token, "expected 'in' after 'for' sub-expression") 403 | } 404 | 405 | p.next() 406 | body := p.parseExpression() 407 | if body == nil { 408 | return Error(p.token, "expected body expression after 'for ... in'") 409 | } 410 | 411 | return &forNode{nodeFor, pos, counter, start, end, step, body} 412 | } 413 | 414 | // parseVarExpr parses an expression declaring (and using) mutable 415 | // variables. 416 | func (p *parser) parseVarExpr() node { 417 | pos := p.token.pos 418 | p.next() 419 | var v = variableExprNode{ 420 | nodeType: nodeVariableExpr, 421 | Pos: pos, 422 | vars: []struct { 423 | name string 424 | node node 425 | }{}, 426 | body: nil, 427 | } 428 | var val node 429 | 430 | // this forloop can be simplified greatly. 431 | if p.token.kind != tokIdentifier { 432 | return Error(p.token, "expected identifier after var") 433 | } 434 | for { 435 | name := p.token.val 436 | p.next() 437 | 438 | // are we initialized? 439 | val = nil 440 | if p.token.kind == tokEqual { 441 | p.next() 442 | val = p.parseExpression() 443 | if val == nil { 444 | return Error(p.token, "initialization failed") 445 | } 446 | } 447 | v.vars = append(v.vars, struct { 448 | name string 449 | node node 450 | }{name, val}) 451 | 452 | if p.token.kind != tokComma { 453 | break 454 | } 455 | p.next() 456 | 457 | if p.token.kind != tokIdentifier { 458 | return Error(p.token, "expected identifier after var") 459 | } 460 | } 461 | 462 | // 'in' 463 | if p.token.kind != tokIn { 464 | return Error(p.token, "expected 'in' after 'var'") 465 | } 466 | p.next() 467 | 468 | v.body = p.parseExpression() 469 | if v.body == nil { 470 | return Error(p.token, "empty body in var expression") 471 | } 472 | return &v 473 | } 474 | 475 | // parseParenExpr parses expressions offset by parens. 476 | func (p *parser) parseParenExpr() node { 477 | p.next() 478 | v := p.parseExpression() 479 | if v == nil { 480 | return nil 481 | } 482 | if p.token.kind != tokRightParen { 483 | return Error(p.token, "expected ')'") 484 | } 485 | p.next() 486 | return v 487 | } 488 | 489 | // parseNumericExpr parses number literals. 490 | func (p *parser) parseNumericExpr() node { 491 | pos := p.token.pos 492 | val, err := strconv.ParseFloat(p.token.val, 64) 493 | p.next() 494 | if err != nil { 495 | return Error(p.token, "invalid number") 496 | } 497 | return &numberNode{nodeNumber, pos, val} 498 | } 499 | 500 | // Helper Functions 501 | 502 | // Error prints error message and returns a nil node. 503 | func Error(t token, str string) node { 504 | fmt.Fprintf(os.Stderr, "Error at %v: %v\n\tkind: %v\n\tvalue: %v\n", t.pos, str, t.kind, t.val) 505 | // log.Fatalf("Error at %v: %v\n\tkind: %v\n\tvalue: %v\n", p.pos, str, p.kind, p.val) 506 | return nil 507 | } 508 | 509 | // ErrorV prints the error message and returns a nil llvm.Value. 510 | func ErrorV(str string) llvm.Value { 511 | fmt.Fprintf(os.Stderr, "Error: %v\n", str) 512 | return llvm.Value{nil} // TODO: this is wrong; fix it. 513 | } 514 | 515 | // DumpTree spawns a goroutine to dump incoming AST subtrees and 516 | // re-emit them on the output channel. 517 | func DumpTree(in <-chan node) <-chan node { 518 | out := make(chan node) 519 | go func() { 520 | for { 521 | n, ok := <-in 522 | if !ok { 523 | close(out) 524 | return 525 | } 526 | spew.Dump(n) 527 | out <- n 528 | } 529 | }() 530 | return out 531 | } 532 | --------------------------------------------------------------------------------