├── staging └── .gitkeep ├── src ├── cmung.sh ├── miniboot.go.carml ├── cmung.carml ├── minicompiler.c.carml ├── carml.carml ├── newreader.c.carml ├── carmlc.h └── self_tco.c ├── test ├── test-function-if.carml ├── test-match-function.carml ├── test-match-base.carml ├── test-declare.carml ├── test-function-begin.carml ├── test-rewrite-tco.carml ├── test-while.carml ├── test-record.carml ├── test-func-err.frag └── test-type.carml ├── .gitignore ├── example ├── example3.carml ├── example2.carml ├── example1.carml ├── example6.carml ├── example4.carml ├── fib1.carml ├── fib0.carml ├── example5.carml ├── playground.carml ├── bitc2carml.carml ├── example0.carml └── cur.carml ├── attic ├── gen_match.py ├── gen.py ├── stage-mini-sexpr-main.go ├── cmung_nomatch.carml ├── cmung.carml └── hash_table.c ├── labs ├── old.carml ├── optionint.carml ├── rawdeque.carml ├── ch0.carml ├── json.carml ├── unix.carml ├── trie.c.carml ├── rewriter.carml ├── sexpr.carml ├── rewrite_match_bind.carml ├── let_rewrite.carml ├── woodchips.carml ├── mini-walk.carml ├── mini-sexpr.carml └── mini-interp.go ├── docs ├── go-anon-struct-tuple.md ├── README.md ├── haskell_core.txt ├── lldb-session.md ├── 2heresy-let.md ├── NOTES.md ├── 3heresy-storage.md ├── tooling.md ├── cml-README.md ├── 1heresy-types.md ├── 325m2.txt ├── opprec.md ├── 4heresy-modules.md ├── 0heresy-functiontypes.md └── todo.md ├── LICENSE ├── editors └── carml.vim └── README.md /staging/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/cmung.sh: -------------------------------------------------------------------------------- 1 | if [ "$1" != "" ] 2 | then 3 | sed -e 's/\([a-z]\)\-\([a-z]\)/\1_\2/g' -e 's/\?/_p/g' $1 4 | fi 5 | -------------------------------------------------------------------------------- /test/test-function-if.carml: -------------------------------------------------------------------------------- 1 | def fact x : int => int = if (< x 0) then (fact (abs x)) 2 | else if (eq? x 0) then 1 3 | else (mul x (fact (sub x 1))) 4 | -------------------------------------------------------------------------------- /test/test-match-function.carml: -------------------------------------------------------------------------------- 1 | def foo bar:int => string = match bar with 2 | 10 => "ten" 3 | 11 => "eleven" 4 | else => "something else" 5 | end 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.swo 3 | c29 4 | carml 5 | carmlc 6 | carmlc.old 7 | *.dSYM 8 | staging/* 9 | .idea/* 10 | .DS_Store 11 | .ninja_deps 12 | .ninja_log 13 | dump 14 | -------------------------------------------------------------------------------- /test/test-match-base.carml: -------------------------------------------------------------------------------- 1 | var x : int = 10 2 | 3 | match x with 4 | 10 => (printf "ten!\n") 5 | 11 => (printf "uh... 11?\n") 6 | else => (printf "oh goodness...") 7 | end 8 | -------------------------------------------------------------------------------- /example/example3.carml: -------------------------------------------------------------------------------- 1 | def fact n : int => int = if (< n 0) then 2 | (fact (abs n)) 3 | else if (num= n 0) then 4 | 1 5 | else 6 | (mul n (fact (sub n 1))) 7 | 8 | def foo x : array[int] => int = (reduce fn y : int z : int => int = (sum y z) 0 x) 9 | -------------------------------------------------------------------------------- /example/example2.carml: -------------------------------------------------------------------------------- 1 | import unix 2 | var data = [""] 3 | 4 | def main ac : int al : array[string] => int = { 5 | let fh = (unix.open "./foo.txt" "r") in 6 | let lines = (unix.readlines fh) in { 7 | vector.append data lines 8 | unix.close fh 9 | } 10 | 0 11 | } 12 | -------------------------------------------------------------------------------- /test/test-declare.carml: -------------------------------------------------------------------------------- 1 | declare foo0 int => int 2 | @foo1 int 3 | @foo2 => int 4 | @foo3 Bar[int] 5 | @foo4 array[int] 6 | @foo5 => Bar[int] 7 | @foo6 => Bar 8 | @foo7 => Either[Bar[int] string] 9 | @foo8 Either[Bar[int] string] => int 10 | @foo9 => array[int] 11 | @foo10 int int array[int] Bar => Baz 12 | -------------------------------------------------------------------------------- /test/test-function-begin.carml: -------------------------------------------------------------------------------- 1 | # These should all parse to 2 | # fairly similar results. The 3 | # sole difference should be 4 | # that foo does not have an 5 | # explicit begin. 6 | 7 | def foo x : int => int = (sum x 10) 8 | def bar x : int => int = { sum x 10 } 9 | def baz x : int => int = { 10 | sum x 10 11 | } 12 | -------------------------------------------------------------------------------- /example/example1.carml: -------------------------------------------------------------------------------- 1 | type Option Tx { 2 | Some Tx 3 | Nothing 4 | } 5 | 6 | # would be interesting to implement Cons-cells 7 | # as a generic type here... you'd end up with 8 | # something... different I think. 9 | type List Tx { 10 | Cons Tx List[Tx] 11 | Nil 12 | } 13 | 14 | # poly Deque a = ... 15 | 16 | -------------------------------------------------------------------------------- /attic/gen_match.py: -------------------------------------------------------------------------------- 1 | for x in ["8","16","32","64"]: 2 | print """if(!strncmp(typespec->value, "U{0}", {1})) {{ 3 | strncat(dst, "uint{0}", {2}); 4 | }} 5 | """.format(x, len(x) + 1, len(x) + 4) 6 | 7 | for x in ["8","16","32","64"]: 8 | print """if(!strncmp(typespec->value, "I{0}", {1})) {{ 9 | strncat(dst, "int{0}", {2}); 10 | }} 11 | """.format(x, len(x) + 1, len(x) + 4) 12 | -------------------------------------------------------------------------------- /example/example6.carml: -------------------------------------------------------------------------------- 1 | record Test { 2 | bar : int 3 | baz : int 4 | } 5 | 6 | def main ac : int al : array[string] => int = { 7 | # stack-allocate a struct 8 | var f : Test = (make-struct 11 20) 9 | 10 | # set the "bar" member to 10 11 | set! (. f bar) 10 12 | 13 | printf "%d + %d is %d\n" (. f bar) (. f baz) (sum (. f bar) (. f baz)) 14 | 15 | 0 16 | } 17 | -------------------------------------------------------------------------------- /labs/old.carml: -------------------------------------------------------------------------------- 1 | #@(#) a base library for carML collections, meant to mimic what 2 | #@(#) we see in systems like SRFI-1 and SRFI-13/14. It's called 3 | #@(#) "old" because it is not module based, as the replacement 4 | #@(#) should be. So why not just write that first? Because I 5 | #@(#) have yet to define how modules are reified to the various 6 | #@(#) languages I support as output from carML 7 | 8 | 9 | -------------------------------------------------------------------------------- /example/example4.carml: -------------------------------------------------------------------------------- 1 | def unix.open path : string mode : string => int = { 2 | 3 | } 4 | 5 | def unix.close fd : int = { 6 | 7 | } 8 | 9 | def unix.readlines fd : int => array[string] = { 10 | 11 | } 12 | 13 | def main ac : int al : array[string] => int = { 14 | val fh = (unix.open "./foo.txt" "r") 15 | val lines = (unix.readlines fh) 16 | unix.close fh 17 | 0 18 | } 19 | -------------------------------------------------------------------------------- /example/fib1.carml: -------------------------------------------------------------------------------- 1 | # tail-recursive fib 2 | def fib n : int i : int j : int => int = if (< n 1) then i else (fib (- n 1) (+ i j) i) 3 | 4 | def main ac : int al : array[string] => int = { 5 | 6 | when (!= ac 2) do { 7 | printf "fib [number]\n" 8 | return -1 9 | } 10 | 11 | val n : int = (atoi (get al 1)) 12 | val t : int = (fib n 0 1) 13 | printf "(fib %d) == %d\n" n t 14 | 0 15 | } 16 | -------------------------------------------------------------------------------- /example/fib0.carml: -------------------------------------------------------------------------------- 1 | # simple, non-tail-recursive fib example 2 | def fib n : int => int = if (<= n 1) then 1 else (+ (fib (- n 1)) (fib (- n 2))) 3 | 4 | def main ac : int al : array[string] => int = { 5 | 6 | when (!= ac 2) do { 7 | printf "fib [number]\n" 8 | return -1 9 | } 10 | 11 | val n : int = (atoi (get al 1)) 12 | val t : int = (fib n) 13 | printf "(fib %d) == %d\n" n t 14 | 0 15 | } 16 | -------------------------------------------------------------------------------- /docs/go-anon-struct-tuple.md: -------------------------------------------------------------------------------- 1 | Just a quick note for tuples: 2 | 3 | ``` 4 | func bar() struct {string; int; } { 5 | return struct {string; int;}{"test", 10,} 6 | } 7 | ``` 8 | 9 | This also is a huge gotcha in go: 10 | 11 | ``` 12 | // this works 13 | return struct {string 14 | int 15 | }{"test", 10,} 16 | 17 | // this is an error 18 | return struct {string int}{"test", 10} 19 | 20 | // because you need to have semicolons in there 21 | ``` 22 | -------------------------------------------------------------------------------- /test/test-rewrite-tco.carml: -------------------------------------------------------------------------------- 1 | #@(#) a simple tail recursive function that can be rewritten to a 2 | #@(#) while loop. Meant to test the self-TCO setup 3 | 4 | def foo x:int y:int => int = if (<= x 0) then y else (foo (- x 1) (+ x y)) 5 | 6 | # this version should be rewritten to a while loop 7 | def fibtco i:int j:int n:int => int = if (<= i 0) then i else (fibtco j (+ i j) (- n 1)) 8 | 9 | # this version should not 10 | def fib n:int => int = if (<= n 2) then 1 else (+ (fib (- n 1)) (fib (- n 2))) 11 | -------------------------------------------------------------------------------- /test/test-while.carml: -------------------------------------------------------------------------------- 1 | def main ac : int al : array[string] => int = { 2 | # whoops; the compiler should warn here... 3 | # TODO: this was originally a `val` but failed in C 4 | var foo : int = 0 5 | while (< foo 10) do { 6 | printf "foo: %d\\n" foo 7 | set! foo (+ foo 1) 8 | } 9 | 0 10 | } 11 | 12 | def bar lim:int => int = { 13 | var foo : int = 0 14 | while (< foo lim) do { 15 | set! foo $ + foo 1 16 | when (> foo 100) do foo 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | This is the documents directory, which houses misc documents that I write whilst thinking about carML. 4 | 5 | - (NOTES.md)[NOTES.md]: misc notes, mostly about lexing. 6 | - (cml-README.md)[cml-README.md]: a previous attempt at something like carML, which I had called cML. 7 | - (haskell_core.txt)[haskell_core.txt]: some quick notes on GHC's Core dialect. 8 | - (todo.md)[todo.md]: my current TODO list for carML. 9 | - (tooling.md)[tooling.md]: ideas surrounding tooling, like static analysis and the like. 10 | -------------------------------------------------------------------------------- /docs/haskell_core.txt: -------------------------------------------------------------------------------- 1 | type CoreExpr = Expr Var 2 | 3 | data Expr b -- "b" for the type of binders, 4 | = Var Id 5 | | Lit Literal 6 | | App (Expr b) (Arg b) 7 | | Lam b (Expr b) 8 | | Let (Bind b) (Expr b) 9 | | Case (Expr b) b Type [Alt b] 10 | | Cast (Expr b) Coercion 11 | | Tick (Tickish Id) (Expr b) 12 | | Type Type 13 | 14 | type Arg b = Expr b 15 | type Alt b = (AltCon, [b], Expr b) 16 | 17 | data AltCon = DataAlt DataCon | LitAlt Literal | DEFAULT 18 | 19 | data Bind b = NonRec b (Expr b) | Rec [(b, (Expr b))] 20 | 21 | -------------------------------------------------------------------------------- /test/test-record.carml: -------------------------------------------------------------------------------- 1 | # a simple test record 2 | # need to add: 3 | # 4 | # - actual tests of helpers & such 5 | # - polymorphic tests 6 | 7 | record Foo { 8 | bar:int # should support simple types 9 | baz:int 10 | blah:float 11 | eh:string # should support complex types 12 | # NOTE (lojikil) I changed the test from 13 | # ref[Foo] to simply Foo, because not all 14 | # targets currently have a ref[any] output 15 | # in Golang that should be easy enough to 16 | # add, but for now, just make sure we can 17 | # pass tests in any target... 18 | next:Foo # should support recursive definition 19 | } 20 | -------------------------------------------------------------------------------- /labs/optionint.carml: -------------------------------------------------------------------------------- 1 | # @(#) a simple test case for working with `type` forms 2 | # @(#) and making sure the C output is correct. 3 | 4 | type OptionInt { 5 | Some int 6 | None 7 | } 8 | 9 | def main ac:int al:array[string] => int = { 10 | 11 | when (<> ac 2) do { 12 | printf "usage: optionint [number]\n" 13 | return 1 14 | } 15 | 16 | val f : OptionInt = (OptionInt.Some $ atoi $ get al 1) 17 | match f with 18 | (OptionInt.Some y) given (> y 10) => (printf "yes and y is greater than 10: %d\n" x) 19 | (OptionInt.Some x) => (printf "yes: %d\n" x) 20 | (OptionInt.None) => (printf "oh no!\n") 21 | end 22 | 0 23 | } 24 | -------------------------------------------------------------------------------- /example/example5.carml: -------------------------------------------------------------------------------- 1 | record Test { 2 | bar : int 3 | baz : int 4 | } 5 | 6 | def main ac : int al : array[string] => int = { 7 | # var, because we want to assign to it 8 | # and technically should be `ref of Test` 9 | var f : ref[Test] = (malloc (sizeof Test)) 10 | 11 | # I dislike having to use `->` at some level, because 12 | # I feel like that's an implementation detail that the 13 | # compiler should be tracking, but it works for now... 14 | set! (-> f bar) 10 15 | set! (-> f baz) 20 16 | # the \\n is a bug in the compiler's output of C. 17 | # will fix 18 | printf "%d + %d is %d\\n" (-> f bar) (-> f baz) (sum (-> f bar) (-> f baz)) 19 | free f 20 | 0 21 | } 22 | -------------------------------------------------------------------------------- /example/playground.carml: -------------------------------------------------------------------------------- 1 | # just a syntactic playground to see what's what. 2 | 3 | #@arrayIota3 array of Num (Num => Num) => () 4 | def arrayIota3 arr:array[int] fun:function[int int] = { 5 | letrec internalIota = fn idx = { 6 | set-array-index! arr (fun idx) x 7 | internalIota (sum idx 1) 8 | } in 9 | internalIota 0 10 | } 11 | 12 | #@arrayIota4 array of Num (Num => Num) => () 13 | def arrayIota4 arr:array[int] fun:function[int int] { 14 | # should `fn` forms accept a `begin` 15 | # form sans `=` just like `def`? 16 | # looks... so much cleaner 17 | letrec internalIota = fn idx { 18 | Array.set! arr (fun idx) x 19 | internalIota $ sum idx 1 20 | } in 21 | internalIota 0 22 | } 23 | -------------------------------------------------------------------------------- /src/miniboot.go.carml: -------------------------------------------------------------------------------- 1 | #@(#) a miniature boot system for Golang 2 | #@(#) basically, we want to target the Golang 3 | #@(#) ecosystem, and use that as a launching 4 | #@(#) off point for the rest of CarML 5 | #@(#) Why Golang? Because the RTS for Golang is 6 | #@(#) already decent enough, and I don't want to 7 | #@(#) spend more time in the C RTS really. I feel 8 | #@(#) as if I'm spending more time fighting previous 9 | #@(#) choices (like being agnostic to requiring a type) 10 | #@(#) that I have since changed than I am working on 11 | #@(#) things that are interesting to me. Since the 12 | #@(#) Golang output is already decent enough, we can 13 | #@(#) start here for producing decent system, and 14 | #@(#) then boot C and the like again from *that* 15 | 16 | 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016-2021, Stefan Edwards 2 | 3 | Permission to use, copy, modify, and/or distribute this software for 4 | any purpose with or without fee is hereby granted, provided that the 5 | above copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 13 | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /example/bitc2carml.carml: -------------------------------------------------------------------------------- 1 | # This is a collection of little programs/functions 2 | # that are directly translated from BitC. carML, like 3 | # cML before it, is pretty heavily influenced by BitC, 4 | # so I naturally wanted to see translations 5 | # could almost be @fact Num of a => a 6 | # @fact int => int 7 | def fact x : int => int = 8 | if (< x 0) then (fact (abs x)) 9 | else if (eq? x 0) then 1 10 | else (mul x (fact (sub x 1))) 11 | 12 | # @fact0 int => int 13 | def fact0 x : int => int = match x with 14 | # hmm... how does this work elsewhere? I like it tho... 15 | #case x, (< x 0) => (fact0 $ abs x) 16 | 0 => 1 17 | y given (< y 0) => (fact0 $ abs y) 18 | else => (mul x $ fact0 $ sub x 1) 19 | end 20 | 21 | # for more info on BitC see: 22 | # https://web.archive.org/web/20130815183427/http://www.bitc-lang.org/docs/bitc/spec.html 23 | -------------------------------------------------------------------------------- /test/test-func-err.frag: -------------------------------------------------------------------------------- 1 | # The following fragments cause an error in carML/C currently: 2 | 3 | - TWHEN => (self-tco? name (-> src $ -> $ get children 1)) 4 | + TWHEN => (self-tco? name (-> src $ get children 1)) 5 | TIF => (any-of 6 | - (self-tco? name (-> src $ -> $ get children 1)) 7 | - (self-tco? name (-> src $ -> $ get children 2))) 8 | + (self-tco? name (-> src $ get children 1)) 9 | + (self-tco? name (-> src $ get children 2))) 10 | 11 | # what I think is happening is that the `->` is a `coperator`, so 12 | # it just assumes that there are two parameters, but one is actually 13 | # nil. note what the `$` expands to: 14 | 15 | (self-tco? name (-> src (-> (getchildren 1)))) 16 | 17 | # which then calls `llcwalk` or `llgwalk` on `head->children[1]`, which 18 | # would be nil 19 | 20 | # another test case: 21 | (foo bar 22 | # will cause a sigsegv 23 | -------------------------------------------------------------------------------- /labs/rawdeque.carml: -------------------------------------------------------------------------------- 1 | # @(#) a simple deque backing 2 | # @(#) just meant for testing, possibly for modes where 3 | # @(#) we don't flatten types in C 4 | 5 | record RawDeque { 6 | # really need to be able to say 7 | # U32/U64 here 8 | capacity: int 9 | length: int 10 | data: ref[any] 11 | } 12 | 13 | # have heap and stack-based make-helpers? 14 | # the heap-based helper 15 | # returns a GC-allocated RawDeque reference 16 | def make_RawDeque => ref[RawDeque] = { 17 | var ret: ref[RawDeque] = nil 18 | set! ret $ hmalloc $ sizeof RawDeque 19 | set! (-> ret capacity) 64 20 | set! (-> ret length) 0 21 | set! (-> ret data) $ hmalloc $ * 64 $ sizeof RawDeque 22 | ret 23 | } 24 | 25 | # the stack-based helper 26 | # returns a stack-allocated RawDeque 27 | def make_RawDeque_low => RawDeque = { 28 | var ret: RawDeque = (make-struct 64 0 nil) 29 | set! (. ret data) $ make-array nil 64 30 | ret 31 | } 32 | -------------------------------------------------------------------------------- /docs/lldb-session.md: -------------------------------------------------------------------------------- 1 | # Just a good thing to note about LLDB 2 | 3 | We can introspect AST objects nicely: 4 | 5 | ``` 6 | (lldb) expr (int) printf("%d %s\n", ((AST *)0x0000000100423a20)->tag, ((AST *)0x0000000100423a20)->value); 7 | 0 foreach-other 8 | (int) $3 = 16 9 | (lldb) expr (int) printf("%d %s %d\n", ((AST *)0x0000000100423a20)->tag, ((AST *)0x0000000100423a20)->value, ((AST *)0x0000000100423a20)->lenchildren); 10 | 0 foreach-other 3 11 | (int) $4 = 18 12 | (lldb) expr (int) printf("%d\n", ((AST *)0x0000000100423a20)->children[0]->tag) 13 | 26 14 | (int) $5 = 3 15 | (lldb) expr (int) printf("%d\n", ((AST *)0x0000000100423a20)->children[1]->tag) 16 | 1 17 | (int) $6 = 2 18 | (lldb) expr (int) printf("%d\n", ((AST *)0x0000000100423a20)->children[2]->tag) 19 | error: Execution was interrupted, reason: Attempted to dereference an invalid pointer.. 20 | The process has been returned to the state before expression evaluation. 21 | (lldb) expr (int) printf("%d\n", ((AST *)0x0000000100423a20)->children[1]->tag) 22 | ``` 23 | -------------------------------------------------------------------------------- /attic/gen.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | if len(sys.argv) != 4: 4 | print "usage: gen.py [symbol name] [string] [return type]" 5 | sys.exit(1) 6 | 7 | sym = sys.argv[1] 8 | tar = sys.argv[2] 9 | returntype = sys.argv[3] 10 | idx = 0 11 | 12 | for t in tar: 13 | print "case {0}{1}: ".format(sym, idx) 14 | idx += 1 15 | print """ if(cur == '{0}') {{ 16 | state = {1}{2}; 17 | }} else if(iswhite(cur) || cur == '\\n' || isbrace(cur)) {{ 18 | ungetc(cur, fdin); 19 | buf[idx - 1] = '\\0'; 20 | return TIDENT; 21 | }} else {{ 22 | state = LIDENT0; 23 | }} 24 | break;""".format(t, sym, idx) 25 | 26 | print """ 27 | case {0}{1}: 28 | if(isident(cur)) {{ 29 | substate = LIDENT0; 30 | }}else if(iswhite(cur) || cur == '\\n' || isbrace(cur)) {{ 31 | ungetc(cur, fdin); 32 | return {2}; 33 | }} else {{ 34 | strncpy(buf, "malformed identifier", 512); 35 | return TERROR; 36 | }} 37 | break; 38 | """.format(sym, idx, returntype) 39 | -------------------------------------------------------------------------------- /test/test-type.carml: -------------------------------------------------------------------------------- 1 | # a simple test of ADTs 2 | # Fixed bug: Shouldn't *need* Bar here 3 | # Fixed bug: parameter-list is malformed 4 | # Fixed bug: Left/Right are missing 5 | # Fixed bug: newline is not being read 6 | # Fixed bug: complex types are not collapsed 7 | 8 | # Test: tag with single type 9 | type Baz { 10 | BazOk int 11 | BazErr int 12 | } 13 | 14 | # Test: tags with multiple types 15 | type Blah { 16 | BlahOk int int int 17 | BlahErr int string int 18 | } 19 | 20 | # Test: we can apply constructors to complex types 21 | type Bloo { 22 | BlooOk array[int] 23 | BlooErr int string 24 | BlooWat array[any] int 25 | } 26 | 27 | type Frob { 28 | FrobOk array[Bloo] 29 | FrobErr array[any] Bloo 30 | FrobWot Bloo int 31 | FrobWat int int string array[int] 32 | } 33 | 34 | # Test: tags separated with semi-colons 35 | type Foo { 36 | Left int 37 | Right int 38 | } 39 | 40 | # Test: parameter list 41 | # XXX (lojikil) currently not working 42 | #type Result Success Fail { 43 | # Ok Success 44 | # Err Fail 45 | #} 46 | -------------------------------------------------------------------------------- /attic/stage-mini-sexpr-main.go: -------------------------------------------------------------------------------- 1 | // this is a Go fragment that... calls code compiled by 2 | // the carML compiler to run a simple S-Expression parser 3 | // 4 | // the point here is mostly to show how this would work, 5 | // but it's also useful in my testing, to show that we 6 | // can easily integrate the two 7 | // 8 | // to use this, compile mini-sexpression.carml with the 9 | // Golang flag (`+g`) and append this file to the end of 10 | // the result, adding `package main` and `import "fmt"` 11 | // to the top of the resulting pass. This is literally 12 | // how I've been testing the compiler... 13 | func main() { 14 | var token Token 15 | src := "(call (identifier +) (integer 10) (integer 20))" 16 | idx := 0 17 | offset := 0 18 | fmt.Printf("src: %s\n", src) 19 | for ; idx < 15; idx++ { 20 | token = next(src, offset) 21 | offset = token.lexeme_offset + len(token.lexeme) 22 | fmt.Printf("token: %s, offset: %d\n", token.lexeme, offset) 23 | } 24 | 25 | result := read_list(src, 0) 26 | 27 | fmt.Printf("result: %T%v\n", result, result) 28 | 29 | roundtrip := sexpression2string(result) 30 | 31 | fmt.Printf("round trip? %s\n", roundtrip) 32 | } 33 | -------------------------------------------------------------------------------- /src/cmung.carml: -------------------------------------------------------------------------------- 1 | # a very simple C-mung implementation 2 | # we have two functions: 3 | # 4 | # - `c_safe_char`, which rewrites characters to ones that are safe for C 5 | # - `cmung`, which simply iterates over one string 6 | # 7 | # I think `c_safe_char` is pretty nice, sans just relying on type inference, 8 | # but `cmung` could be improved: 9 | # 10 | # - a `string.map!` form to handle application 11 | # - generic `map!` form that takes any `Seq` module/type class 12 | # 13 | # or whatever. it's not terrible tho, all things told 14 | 15 | def c_safe_char x:char => char = match x with 16 | '!' => 'B' 17 | '%' => '_' 18 | '$' => '_' 19 | '?' => 'p' 20 | '<' => 'l' 21 | '>' => 'g' 22 | '=' => 'e' 23 | '@' => '_' 24 | '^' => '_' 25 | '&' => '_' 26 | '|' => '_' 27 | '*' => '_' 28 | '.' => '_' 29 | '+' => '_' 30 | '-' => '_' 31 | else => x 32 | end 33 | 34 | def cmung src:string dst:string len:int => string = { 35 | var idx:int = 0 36 | while (< idx len) do { 37 | set! (get dst idx) $ c_safe_char $ get src idx 38 | set! idx $ + idx 1 39 | } 40 | dst 41 | } 42 | -------------------------------------------------------------------------------- /docs/2heresy-let.md: -------------------------------------------------------------------------------- 1 | # `let` signals intent about variable scope 2 | 3 | I was just thinking about the notion of `let` vs `val`: with a `let`, we're signaling that we *do not* want this variable 4 | to live on outside its scope. That intentionality is probably harmless in most cases; for example, if we have a function such as: 5 | 6 | function foo x:int = 7 | let y:int = (+ x 10) in 8 | (printf "y is %d\n" y) 9 | 10 | we don't *actually* care what happens to `y` afterward, as the function terminates. However, what if we have something more nuanced? 11 | 12 | function bar x:int = { 13 | let y:int = (+ x 10) in 14 | (printf "y is %d\n" y) 15 | let y:int = (+ x 20) in 16 | (printf "y is %d\n" y) 17 | } 18 | 19 | here, we're clearly signaling intent that `y` shouldn't outlive the scope of the enclosing `let`. This isn't *really* revolutionary 20 | thinking, it's just interesting to me given that I am supporting both `let` and `val/var` in carML, and that support has an interesting 21 | impact on how we code: the `let` form is clearly trivially rewritten to a fresh symbol, and is easily compsed into SSA via ANF, but a 22 | `val` may have larger impact depending upon its exposure to other programs (and indeed, we may not want to do SSA/ANF for programs we 23 | expect humans to consume...) 24 | -------------------------------------------------------------------------------- /labs/ch0.carml: -------------------------------------------------------------------------------- 1 | # @(#) a simple character frequency counter 2 | # it's interesting... 3 | # the current state of carML's RTS is such that 4 | # the code I have to write is quite imperative. 5 | # More analysis passes are required to make this 6 | # much more functional... there's so many set!'s 7 | # and the like in the code below because things 8 | # like the let form aren't up and running, or 9 | # there's no foreach/map working within the 10 | # compiler that can be fused... and yet, I'm 11 | # actually quite happy that the code can already 12 | # be used this way. 13 | def main ac:int al:array[string] => int = { 14 | var composition : array[int] = (make-array int 256 0) 15 | var fh : ref[FILE] = NULL 16 | var ch : int = 0 17 | var tmp : int = 0 18 | 19 | set! fh $ fopen (get al 1) "r" 20 | 21 | when (eq? fh NULL) do { 22 | printf "cannot open file %s\n" $ get al 1 23 | return 1 24 | } 25 | 26 | while (not $ feof fh) do { 27 | set! ch $ fgetc fh 28 | when (<> ch 0x0D) do { 29 | set! tmp $ get composition ch 30 | set! (get composition ch) $ + tmp 1 31 | } 32 | } 33 | 34 | set! tmp 32 35 | 36 | while (< tmp 127) do { 37 | printf "%c => %d\n" tmp (get composition tmp) 38 | set! tmp $ + tmp 1 39 | } 40 | 41 | 0 42 | } 43 | -------------------------------------------------------------------------------- /labs/json.carml: -------------------------------------------------------------------------------- 1 | #@(#) a simple JSON wrapper, meant for reading & writing JSON 2 | #@(#) should be fairly standard in the base of carML, but could 3 | #@(#) be interesting vis-a-vis certain allocation techniques. 4 | 5 | type Json { 6 | Error string 7 | Null 8 | Int int 9 | Float float 10 | String string 11 | Bool bool 12 | Array array[Json] 13 | Object array[tuple[string Json]] 14 | } 15 | 16 | def from_string src:string => Json = { 17 | 18 | } 19 | 20 | def tuple_to_string src:tuple[string Json] => string = { 21 | strings.join "," $ make-array string (_1 src) (to_string $ _2 src) 22 | } 23 | 24 | # intersting, hitting a bug in carml/C here... 25 | # so two things happened: 26 | # 1. I forgot to add a #\) to the Json.Object call below, and that caused a crash 27 | # 1. I forgot to add an `end`, which exacerbated it... 28 | # intersting! 29 | def to_string src:Json => string = { 30 | match src with 31 | (Json.Error x) => "10" 32 | (Json.Null) => "null" 33 | (Json.Int x) => "11" 34 | (Json.Float x) => "12" 35 | (Json.String x) => "13" 36 | (Json.Bool x) => "14" 37 | (Json.Array x) => (strings.format "[%s]" $ strings.join "," $ map to_string x) 38 | (Json.Object x) => (strings.format "{%s}" $ strings.join "," $ map tuple_to_string x) 39 | end 40 | } 41 | -------------------------------------------------------------------------------- /labs/unix.carml: -------------------------------------------------------------------------------- 1 | # @(#) a simple Unix/Posix interaction library 2 | # @(#) just a test to see what it would be like. 3 | 4 | type UnixResult { 5 | Ok int 6 | Err int string # lookup errno 7 | } 8 | 9 | def unix.open path : string mode : int => UnixResult = { 10 | # TODO: let binding rewriting 11 | val res : int = (open path mode) 12 | # TODO: match form with guard clauses 13 | if (< res 0) then 14 | # TODO: finish $ form, would be so much cleaner: 15 | # UnixResult.Err res $ strerror res 16 | (UnixResult.Err res (strerror res)) 17 | else 18 | (UnixResult.Ok res) 19 | } 20 | 21 | # TODO: fix single Complex type in parameter to C generation 22 | def unix.close_res fh : UnixResult => UnixResult = { 23 | var hdl : int = 0 24 | var res : int = 0 25 | # TODO: pattern matching binds 26 | if (eq? (. fh tag) TAG_UnixResult_ERR) then 27 | fh 28 | else { 29 | set! hdl (. fh (. members (. OK_t m_1))) 30 | set! res (close hdl) 31 | if (< res 0) then 32 | (UnixResult.Err res (strerror res)) 33 | else 34 | (UnixResult.Ok res) 35 | } 36 | } 37 | 38 | def unix.close fh : int => UnixResult = { 39 | val res : int = (close fh) 40 | if (< res 0) then 41 | (UnixResult.Err res (strerror res)) 42 | else 43 | (UnixResult.Ok res) 44 | } 45 | -------------------------------------------------------------------------------- /docs/NOTES.md: -------------------------------------------------------------------------------- 1 | # Lexer bugs: 2 | 3 | - Single character identifiers that start with a keyword letter a broken ("b\nb" is seen as one identifier) 4 | 5 | # keywords 6 | 7 | - `lambda` 8 | - `declare` 9 | - `ref` 10 | - `int` 11 | - `string` 12 | - `char` 13 | - `bool` 14 | - `array` 15 | - `float` 16 | - `match` 17 | - `with` 18 | - `use` 19 | - `module` 20 | - `case` 21 | - `of` 22 | - `define-alien`, `letrec`, `val-alien` 23 | 24 | # Parser bugs: 25 | 26 | None currently (save for missing functionality) 27 | 28 | # Types: 29 | 30 | ## records: 31 | 32 | Records are an interesting case... I do think that `val`-style typing should be used: 33 | 34 | record foo = { 35 | bar : int 36 | baz 37 | } 38 | 39 | here, `foo.bar` has an explicit `int` type, whereas `baz` will be inferred from program 40 | usage... The question remains then how we _allocate_ records. I'd prefer to unbox them 41 | at all times, and even go so far as to do what C# does, and flatten records when passed 42 | to functions. 43 | 44 | Also, what is the accessor syntax? Haskell does what SRFI-9 does, and defines an accessor 45 | function: 46 | 47 | data Person = Person { first_name :: String 48 | , last_name :: String 49 | , age :: Int 50 | } deriving (Eq, Ord, Show) 51 | 52 | print_age person = print $ age person 53 | -------------------------------------------------------------------------------- /docs/3heresy-storage.md: -------------------------------------------------------------------------------- 1 | # Variable Storage is a pointless implementation detail (most of the time) 2 | 3 | I have been thinking about this a lot lately: what is the difference between the following: 4 | 5 | def foo x:int => Either[int, string] = { 6 | ... 7 | } 8 | 9 | def bar x:int => ref[Either[int, string]] = { 10 | ... 11 | } 12 | 13 | Assuming they undertake the same options? For most programmers, they do not *really* care that something 14 | is a reference and something else is a value; that is an implementation detail they do not care about. 15 | Eventually, I would like carML to get to the point where most of the time we do *not* care about such things, 16 | until it is needed (for something like C interop). I run into it all the time, thinking about types and such, 17 | and whilst the compiler should be able to adapt, there is no need to *have* to specify storage location for 18 | most things (until you know you do). I think the thing that drives me most with this is: 19 | 20 | a low barrier to entry and no ceiling. 21 | 22 | I want carML to be relatively consumable for most folks, but I would *also* like those hardcore few who are 23 | looking to replace C or the like to be able to do so, all with the same language. Rust has done well there, 24 | although, like ATS, the barrier to entry is still quite high (tho clearly nothing has as high a barrier as 25 | ATS does, goodness). 26 | -------------------------------------------------------------------------------- /example/example0.carml: -------------------------------------------------------------------------------- 1 | # cannonical loop example 2 | def arrayIota arr f = begin 3 | def arrayIotaPrime arr fun idx = begin 4 | if (< idx (array-length arr)) then { 5 | set-array-index! arr (fun idx) idx 6 | arrayIotaPrime arr fun (add idx 1) 7 | } else () 8 | end 9 | arrayIotaPrime arr f 0 10 | end 11 | 12 | # same as the above, but here we 13 | # use the closure in arrayIota' to 14 | # capture variables, instead of 15 | # explicitly passing them in 16 | declare arrayIota0 : procedure[array[Num] function[Num => Num]] 17 | def arrayIota0 arr fun = begin 18 | def arrayIotaPrime idx = begin 19 | if (< idx (array-length arr)) then { 20 | set-array-index! arr (fun idx) idx; 21 | arrayIotaPrime (add idx 1); 22 | } else () 23 | end 24 | arrayIotaPrime 0; 25 | end 26 | 27 | # same as the above, but using `foreachIndex` 28 | @arrayIota1: procedure[array[Num] function[Num => Num]] 29 | def arrayIota1 arr fun = { 30 | foreachIndex fn x = (set-array-index! arr (fun x) x) arr 31 | } 32 | 33 | # same as the above, but without the `{}` block 34 | @arrayIota2: procedure[array[Num] function[Num => Num]] 35 | def arrayIota2 arr fun = (foreachIndex fn x = (set-array-index! arr (fun x) x) arr) 36 | 37 | # eventually, `def` forms should work 38 | # like `begin` forms 39 | def foo x = (add x 1) 40 | 41 | val f = (make-array 10) 42 | 43 | (arrayIota f foo) 44 | 45 | (println f) 46 | -------------------------------------------------------------------------------- /docs/tooling.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | I think the most important thing for new languages is not the language itself, 4 | but rather that the **ecosystem** _surounding_ the language is strong. To that 5 | end, there's a few tools I'd like carML to include as we get nearer to release: 6 | 7 | - static analysis 8 | - symbolic execution/abstract interpretation 9 | - package/project configuration & management 10 | - base set of libraries 11 | 12 | # Static Analysis 13 | 14 | carML is meant to be simpler to analyze than C proper, because it is 15 | smaller, with less ambigious syntax, and restricted program space, so 16 | static analysis internally at release would be useful. 17 | 18 | # SE/AI 19 | 20 | This one I'm less set on, but it would be nice to have a Frama-C 21 | work-alike. 22 | 23 | # Packaging 24 | 25 | I think having reasonable package & library management is pretty 26 | essential nowadays. I'd like to add a tool, `carmlize`, to the base 27 | install that basically handles the core setup of projects, libraries 28 | and the like. My thinking is that `use` and `import` would check 29 | some set of paths, such as `./lib:~/.carml/lib:/usr/lib/carml:/usr/local/lib/carml:$CARML_LIB` 30 | so that clients can easily bundle, have virtual environments, &c. 31 | all via default install. 32 | 33 | `carmlize [init|install|ginstall|bundle|dep-install...]``` 34 | 35 | # Base Libs 36 | 37 | - HTTP (including WebDAV, CalDAV) 38 | - TLS 39 | - SMTP 40 | - FTP 41 | - tar/zip 42 | - data structures (including png/jpeg/gif/&c.) 43 | - image 44 | - CLI/GUI 45 | -------------------------------------------------------------------------------- /attic/cmung_nomatch.carml: -------------------------------------------------------------------------------- 1 | def find srcset : array of char dstset : array of char cur : char len : int => char = { 2 | var idx : int = 0 3 | while (< idx len) do { 4 | when (eq? (get srcset idx) cur) do (return dst) 5 | set! idx (+ idx 1) 6 | } 7 | cur 8 | } 9 | 10 | # this code is _so_ imperative 11 | # but it gets me where I want to go... 12 | # I need: 13 | # - HOFs, it's really a must 14 | # - combinators like any/every would be useful 15 | # - need to figure out ways to encode variable arguments... 16 | def cmung src : string dst : string len : int => string = { 17 | var idx : int = 0 18 | val srcset : array of char = ['!', '%', '$', '?', '<', '>', '=', '@', '^', '&', '|', '*', '.'] 19 | val dstset : array of char = ['B', '_', '_', 'p', 'l', 'g', 'e', '_', '_', '_', '_', '_', '_'] 20 | 21 | # foreach would be great here 22 | while (< idx len) do { 23 | 24 | set! cur (get dst idx) 25 | 26 | # would be nicer with `any` or `every` 27 | # this is _basically_ an abomination 28 | if (|| (|| (&& (>= cur 'A') (<= cur 'Z')) (&& (>= cur 'a') (<= cur 'z'))) (&& (>= cur '0') (<= cur '9'))) then 29 | (set! cur (get dst idx)) 30 | else 31 | (set! cur (find srcset dstset cur 13)) # would be nicer with dynamic length... 32 | 33 | set! idx (+ idx 1) 34 | 35 | when (eq? (get src idx) '\0') do (return dst) 36 | } 37 | } 38 | 39 | def cmung_alloc src : string len : int => string = { 40 | val dst : string = (malloc (* (sizeof src) len)) 41 | cmung src dst len 42 | } 43 | -------------------------------------------------------------------------------- /docs/cml-README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | A simple, runtime-less, GC-less ML dialect that compiles to Human-Readable C. Syntactically, it is most similar to 4 | ATS & Yeti, and a previous project I had worked on called "Spearhead". 5 | 6 | # Rationale 7 | 8 | I wanted a hyper-minimal ML dialect for system work, but something simpler than ATS. While I can, and do, use 9 | PreDigamma (my restricted Scheme dialect), having a super small ML dialect to throw around isn't bad either. 10 | Additionally, it allows me to experiment with items I find interesting in a GC-less environment (PreDigamma still 11 | requires the use of a GC, for now). 12 | 13 | # Syntactic Forms 14 | 15 | - `{ ... }` a begin/end pair, with statements separates by `;` 16 | - `match ... with ... chtam` a match form, with guard-clauses, binding, and separated by '|' 17 | - `if ... then ... else ...` No single-armed if 18 | - `when ...` Single-armed if 19 | - `val ... = ...` a variable binding 20 | - `let ... in ...` name binding 21 | - `define ... (parameters ...) = ...` function binding 22 | - `fn (parameters ...) = ...` Lambda 23 | - `define-alien ... (parameters ...)` : CFFI 24 | - `val-alien ...` extern C variable 25 | - `define-record ... is ... end` record definitions 26 | - `define-variant ... = ...` (polymorphic) variant definitions 27 | 28 | # Default Library 29 | 30 | - Strings: string-length, string-index, string-string, string-substring, string-match, string-glob 31 | - Vectors: vector-length, vector-index, vector-subvector 32 | - GC: region-gc, region-alloc, region-free 33 | 34 | # C Interaction 35 | 36 | # Examples 37 | 38 | -------------------------------------------------------------------------------- /docs/1heresy-types.md: -------------------------------------------------------------------------------- 1 | # Heresy 1: More thoughts on types 2 | 3 | _2018-APR-01_ 4 | 5 | Just to cut the question short first, no this isn't April Fool's, and no, it's not an Easter discussion either, as I'm Orthodox. However, 6 | I've been thinking about types more again today, in light of some linearization work I was doing on the type processing in carML currently, 7 | as well as my [previous heresy on function types](0heresy-functiontypes.md). Currently, complex types don't *know* their parameterized types. 8 | For example: `array` is technically equivalent to `array[Any]`. This leads to some complexity in the parser: because we allow for `array`, we 9 | have to jump through some hoops to parse the type definitions. This is to make things like `array` come out to C as `void *`, but is the 10 | short syntax _really_ worth it? Thinking about it further, it seems like the shortcut isn't really worth the trouble: 11 | 12 | - it increases our ambiguity in parsing, and leads to the large state machine I currently use to decide syntax transforms at automatons 13 | - I think it _decreases_ readability, and is better written as `array[Any]` or `deque[Any]` 14 | 15 | So I think that's the best: I'll make complex types "aware" of their parameterized types, such that an `array` *must* be followed by an 16 | array-literal of types (for example). This will cut down on the type parsing code (yay less code!), and will mean that users must be 17 | more explicit about the types the specify to the demand system (tho for obvious reasons the demand system will deman/infer a type when 18 | one is *not* specified by the user). 19 | -------------------------------------------------------------------------------- /docs/325m2.txt: -------------------------------------------------------------------------------- 1 | [note]: this document is something I wrote I toyed with in ~2006, and then finally 2 | wrote fully in ~2011. It's funny because these ideas became carML to a large extent, 3 | but also the Scala community added Scala-Native with similar features. 4 | 5 | ---------------- 6 | 7 | 325 m^2 is 1/4 a Scala (Cretian unit of measure) 8 | 9 | support: 10 | 11 | object, class, if/else, when, not, and, or, def, val, for, match, case, 12 | int, float, double, List, Array, String, Char 13 | 14 | compile to: 15 | - C: native, minimal run time, should be self-hosting 16 | + should generate legible code, even at expense of some optimizations 17 | - JS: support both legible & illegible outputs (based on aggression of optimization), 18 | with JS support for Canvas, &c. 19 | 20 | Should support (G)ADTs, Dependent Types 21 | 22 | Examples of what I'm thinking of: 23 | 24 | def main(args: Array[String]) = println("Hello, world"); 25 | 26 | 27 | def main(args: Array[String]) { 28 | for (val a <- args) { 29 | println("a: " + a); 30 | } 31 | } 32 | 33 | def main(args: Array[String]) = for (val a <- args) { println(a); } 34 | 35 | def foo(x) = x + x; /* Type of Number? */ 36 | 37 | def main(args: Array[String]) { 38 | for (val a <- args) { 39 | println(foo(Number.parseNumber(a))); 40 | } 41 | } 42 | 43 | 44 | class AddressBook { 45 | class Person { 46 | val name : String; 47 | val surname : String; 48 | } 49 | } 50 | 51 | def sum(i: Int*) { 52 | if(i.length >= 1) { 53 | i(0) + /* ... */ 54 | } else { 55 | 0 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /docs/opprec.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | CarML has no operator precedence, but something I've been thinking about is how to make it nicer. We often end up with 4 | pipelines like: `somelambda $ foo $ bar (get baz 10) 20` which, whilst not terrible, could be nicer for some operations. 5 | Consider arithmetic: currently in carML, adding three numbers is `(+ 1 $ + 2 3)`. Again, not terrible, but... 6 | 7 | There [was an article on lobste.rs talking about operator precedence](https://lobste.rs/s/jol24u/better_operator_precedence) which 8 | had [an interesting comment](https://lobste.rs/s/jol24u/better_operator_precedence#c_bwd4ij): 9 | 10 | > In Verona, we’re experimenting with no operator precedence. a op1 b op2 c is a parse error. You can chain sequences of the same operator but any pair of different operators needs brackets. You can write (a op1 b op1 c) op2 d or a op1 b op1 (c op2 d) and it’s obvious to the reader what the order of application is. This is a bit more important for us because we allow function names to include symbol characters and allow any function to be used infix, so statically defining rules for application order of a append b £$%£ c would lead to confusion. 11 | 12 | This maps what I've been thinking about (no operator precedence, just use brackets) but does make it much nicer to have arithmetic pipelines: the example 13 | above would just become `(1 + 2 + 3)`. I'd still have to implement *some* form of Shunting Yard for this, but I should do that anyway (and move away 14 | from the current approach, which is just to insert some `call` forms when we see a `$`). This would also help sort out what to do with `$` vs `|>`. 15 | 16 | Thinking about it further tho, this also would be solved by moving to the Scheme-style multi-arity for basic lambdas... 17 | 18 | - `($ foo bar baz)` => `(foo (bar baz))` 19 | - `(* 1 2 3)` => `(* 1 (* 2 3))` 20 | 21 | Esp because I *enjoy* Polish notation 22 | -------------------------------------------------------------------------------- /docs/4heresy-modules.md: -------------------------------------------------------------------------------- 1 | # thoughts on modules 2 | 3 | I've been thinking a lot about what I want to do for modules. I really like how OCaml and SML handle this, but I think 4 | they are quite complex. On the flip side, I think [Mythryl's take on Packages and APIs](https://mythryl.org/my-Packages_and_APIs.html) is 5 | quite fascinating, but not really where I want to go. I think F# strikes a happy medium; you can have nested Modules, and they are 6 | quite simple, but you can manipulate the environment to make things relatively clean (from user syntax perspective) as well. 7 | 8 | Part of my thinking here is that carML already has mechanisms to parameterize types: 9 | 10 | ``` 11 | type Foo X { 12 | Bar int 13 | Blah X 14 | } 15 | ``` 16 | 17 | Syntactically we match the semantic namespace of a constructor to it's type: 18 | 19 | ``` 20 | match x with 21 | (Foo.Bar y) => y 22 | else => 10 23 | end 24 | ``` 25 | 26 | And the generated C/Golang code relies on this fact. That's not to say that we cannot break this, of course, but rather I do 27 | like namespacing constructors, and only a few languages do this. So I thought I'd make a proposal to fix these with some syntax. 28 | 29 | Part of _why_ I'm thinking about this now is that I want to add some simple functions to the base system that can be easily 30 | rewritten for the various output types, but are nicely named. For example, whilst I can easily support `string-map!` currently, 31 | it would be nice to define a module `Strings` that includes a `map!` function. 32 | 33 | # Proposal 34 | 35 | - add `::` and `:=` to syntax as operators, like `|>` and `$` 36 | - this proposal doesn't require `:=`, it's just if I do the work on `::` I may as well do `:=` too 37 | - fix `use` to load modules from some set of paths 38 | - support `Foo::bar` which means the `bar` element of module `Foo` 39 | - add `module` and `open-module` as syntactic forms in the F# style 40 | -------------------------------------------------------------------------------- /labs/trie.c.carml: -------------------------------------------------------------------------------- 1 | #@(#) a simple trie implementation, for strings 2 | #@(#) similar to what I did in Digamma, but just 3 | #@(#) testing, really useful mostly for what I'm 4 | #@(#) doing with the compiler in carML 5 | 6 | record Trie { 7 | key:char 8 | data:int # we'll use -1 as a "no data" 9 | # this is a good point: how do we tell 10 | # C that we want a static array here rather 11 | # than a pointer to a pointer? I believe the 12 | # C compiler treats them similarly, but we 13 | # may want it to be different... 14 | children:array[ref[Trie]] 15 | } 16 | 17 | def partial_key_p key:string src:ref[Trie] => int = { 18 | 19 | } 20 | 21 | def has_key_p key:string src:ref[Trie] => bool = { 22 | 23 | } 24 | 25 | def lookup key:string src:ref[Trie] => ref[IntOption] = { 26 | 27 | } 28 | 29 | def insert key:string value:int offset:int src:ref[Trie] => bool = { 30 | val subkey:char = ' ' 31 | when (>= offset (strlen key)) do { 32 | set! (-> src data) value 33 | return true 34 | } 35 | 36 | } 37 | 38 | def get_keys src:ref[Trie] => array[string] = { 39 | 40 | } 41 | 42 | def delete key:string src:ref[Trie] => bool = { 43 | 44 | } 45 | 46 | def make_trie => ref[Trie] = { 47 | # it would be really nice to just do 48 | # `+(make Trie ...)+` here 49 | # and then rely on the compiler to know 50 | # we want a ref (or use something like 51 | # `+view+` to make a reference...) 52 | val ret:ref[Trie] = (hmalloc $ sizeof Trie) 53 | set! (-> ret key) 0 54 | set! (-> ret data) -1 55 | set! (-> ret children) (hmalloc $ * 26 $ sizeof ref[Trie]) 56 | ret 57 | } 58 | 59 | def main => int = { 60 | var root:ref[Trie] = (make_trie) 61 | insert "foo" 10 root 62 | insert "bar" 11 root 63 | insert "baz" 12 root 64 | val keys:array[string] = (get_keys root) 65 | printf "partial key \"ba\"? %d\n" $ partial_key "ba" root 66 | printf "keys:\n" 67 | while (< idx 3) do { 68 | printf "key: %s\n" $ get keys idx 69 | set! idx $ + idx 1 70 | } 71 | 0 72 | } 73 | -------------------------------------------------------------------------------- /editors/carml.vim: -------------------------------------------------------------------------------- 1 | " Vim syntax file 2 | " Language: carML 3 | " Maintainer: lojikil 4 | " Latest Revision: 18 June 2016 5 | " add this to your .vimrc: 6 | " au BufRead,BufNewFile *.29 set filetype=xl29 7 | " au BufRead,BufNewFile *.carml set filetype=xl29 8 | 9 | if exists("b:current_syntax") 10 | finish 11 | endif 12 | 13 | syn keyword basicLanguageKeywords val def let letrec in when if then else 14 | syn keyword basicLanguageKeywords do match case with of use record type poly 15 | syn keyword basicLanguageKeywords and var for while import use open load 16 | syn keyword xl29BlockCmd begin end 17 | 18 | " Integer with - + or nothing in front 19 | syn match xl29Number '\s\d\+\s' 20 | syn match xl29Number '\s[-+]\d\+\s' 21 | 22 | " Floating point number with decimal no E or e 23 | syn match xl29Number '\s[-+]\d\+\.\d*\s' 24 | 25 | " Floating point like number with E and no decimal point (+,-) 26 | syn match xl29Number '[-+]\=\d[[:digit:]]*[eE][\-+]\=\d\+' 27 | syn match xl29Number '\d[[:digit:]]*[eE][\-+]\=\d\+' 28 | 29 | " Floating point like number with E and decimal point (+,-) 30 | syn match xl29Number '[-+]\=\d[[:digit:]]*\.\d*[eE][\-+]\=\d\+' 31 | syn match xl29Number '\d[[:digit:]]*\.\d*[eE][\-+]\=\d\+' 32 | 33 | syn region xl29DescBlock start="{" end="}" fold transparent 34 | syn region xl29DescBlock start="begin" end="end" fold transparent 35 | syn keyword xl29Todo contained TODO FIXME XXX NOTE 36 | syn match xl29Comment "#.*$" contains=xl29Todo 37 | 38 | syn keyword xl29Type int char float string bool deque array ref 39 | syn keyword xl29Ops add sum println print div mod divide modulo sub subtract printf 40 | syn keyword xl29Ops / + - * % ^ ! @ $ & \| \|\| && and or not <> < <= > >= != eq? ==? 41 | 42 | let b:current_syntax = "xl29" 43 | 44 | hi def link basicLanguageKeywords Statement 45 | hi def link xl29Todo Todo 46 | hi def link xl29Comment Comment 47 | hi def link xl29BlockCmd Statement 48 | hi def link xl29Type Type 49 | hi def link xl29String Constant 50 | hi def link xl29Desc PreProc 51 | hi def link xl29Number Constant 52 | hi def link xl29Ops Operator 53 | -------------------------------------------------------------------------------- /docs/0heresy-functiontypes.md: -------------------------------------------------------------------------------- 1 | # Heresy 0: function/procedure types 2 | 3 | _2018-MAR-11_ 4 | 5 | I was thinking today about adding code to parse types, and had a bit of an epiphany (or maybe a bit of a "fuck this"): 6 | 7 | Why have special code to parse higher-order function types just to keep syntax similar to function declarations themselves? 8 | 9 | The traditional answer to declaring HOFs in functional languages looks something like: 10 | 11 | name: (type0 type1 type2 ... => returnType) 12 | 13 | where `name` is some binding and we have a list of types. I was thinking about the work I've been doing wrt Scala-style types, 14 | and how much easier that makes parsing of type declarations: most of the `of` forms can disappear, and deciding if something 15 | is a new tag or a type-member is as simple as reading an array literal. This got me to thinking about how much code I want to 16 | remove, oh but damn I still need to add `(type...)` parsing for HOFs. Then I had a thought: doesn't Scala and several other 17 | langauges support some notion of a `Function` type? Can't we do something similar? 18 | 19 | ## A Modest ~Proposal~ Heresy 20 | 21 | Functions & procedures are just fancy objects and types about them should be simple. Introducing new parsing code just to 22 | parse HOFs is overkill, as we can introduce two new types, `Function` and `Procedure`, which capture the notion of functions 23 | and procedures (or, functions that do not return a value, Unit functions): 24 | 25 | name: Function[int int] 26 | 27 | `name` is a function from `int` to `int`. For purposes of beauty and perhaps visual delineation, `=>` should work within, but 28 | be ignored by the compiler: 29 | 30 | name: Function[int => int] 31 | 32 | Functions that are only called for their side effects may be easily modeled two ways: 33 | 34 | doSomething: Function[int => ()] 35 | 36 | That is to say, `doSomething` does something on integers and returns no value, or: 37 | 38 | doSomethingElse: Procedure[int] 39 | 40 | Here `doSomethingElse` is a "procedure" (to get into Scheme-style "lambda calculus of procedures") over integers. Nullary 41 | functions that return no values may be introduced with `Function[()]`, and procedures can be introduced simply with 42 | `Procedure`, since the carML type parsing already assumes tagged words without other specification are types. 43 | 44 | I need to masticate on this a bit more, but it's intriguing to me. 45 | -------------------------------------------------------------------------------- /labs/rewriter.carml: -------------------------------------------------------------------------------- 1 | # @(#) a simple rewriter 2 | # @(#) given a needle:ref[AST], replace it with replace:ref[AST] 3 | 4 | def rewrite needle:ref[AST] replace:ref[AST] haystack:ref[AST] => ref[AST] = { 5 | var ret:ref[AST] = NULL 6 | var idx:int = 0 7 | var hlen:int = 0 8 | var htag:int = 0 9 | var hvlen:int = 0 10 | 11 | # if we have a nil haystack, 12 | # just return it 13 | when (eq? haystack NULL) do { 14 | return ret 15 | } 16 | 17 | # decompose some values here 18 | # ideally we would do this in a let/match, 19 | # but we're still too low-level here for that. 20 | # this code will help us do that decomposition... 21 | 22 | set! hlen $ -> haystack lenchildren 23 | set! htag $ -> haystack tag 24 | set! hvlen $ -> haystack lenvalue 25 | 26 | # actually check for the rewrite. 27 | match tag with 28 | # if we have an identifier, simply check if it is the 29 | # needle and, if it is, return the replacement value, 30 | # otherwise return the haystack 31 | TIDENT => if (eq? (strncmp (-> haystack value) (-> needle value) hvlen) 0) then 32 | (return replace) 33 | else 34 | (return haystack) 35 | # here we have something that has children 36 | # iterate over the children, checking if the 37 | # child matches the needle, and calling nested 38 | # rewriter for each if the child is a complex 39 | # AST 40 | x given (> hlen 0) => { 41 | # allocate our copy node. 42 | # TINSTAAFL btw; we either pay 43 | # memory complexity like this, when there is no match 44 | # or we pay algorithmic complexity and iterate over 45 | # potential AST nodes twice, once to scan for matches 46 | # and once to rewrite them. 47 | # the gods of Big O and Big Theta demand sacrifice 48 | set! ret $ hmalloc $ sizeof AST 49 | set! (-> ret lenchildren) hlen 50 | set! (-> ret tag) htag 51 | set! (-> ret lenvalue) hvlen 52 | set! (-> ret children) $ hmalloc $ * hlen $ sizeof ref[AST] 53 | while (<= idx hlen) do { 54 | set! (get (-> ret children) idx) $ rewrite needle replace $ get (-> haystack children) idx 55 | set! idx $ + idx 1 56 | } 57 | } 58 | else => (set! ret haystack) 59 | end 60 | return ret 61 | } 62 | -------------------------------------------------------------------------------- /example/cur.carml: -------------------------------------------------------------------------------- 1 | # this file represents what c29 is _currently_ able to compile 2 | 3 | # `declare` needn't be written right next to the 4 | # value being declared about... 5 | # I also like `@` as shorthand for `declare`... 6 | # 7 | # declare blah Num -> Num 8 | # def blah x = (sum x x) 9 | # 10 | # @blaz Num -> Num 11 | # def blaz x = (sum x x) 12 | # 13 | # Further thoughts: 14 | # - function parameters? @blah Num (Num -> Num) -> Num 15 | # - type variables? @blah Num vec of 'a -> 'a 16 | 17 | declare foo:function[int int] 18 | def foo x = (sum x x) 19 | 20 | @bar: function[int int int => int] 21 | def bar x y z = begin 22 | println x; 23 | println y; 24 | println z; 25 | sum (sum x y) z; 26 | end 27 | 28 | # same as the above, but with 29 | # implicit call termination via 30 | # newlines 31 | @blah:function[int int int => int] 32 | def blah x y z = begin 33 | println x 34 | println y 35 | println z 36 | sum (sum x y) z 37 | end 38 | 39 | # same as the above, but with 40 | # {} instead of begin/end 41 | def blaz x:int y:int z:int => int = { 42 | println x 43 | println y 44 | println z 45 | sum (sum x y) z 46 | } 47 | 48 | @baz:function[int int int => int] 49 | def baz x y z = if (>= x y) then z else x 50 | 51 | #declare baz bool any -> unit 52 | @bar:procedure[bool any] 53 | def bar0 x y = when x do y 54 | 55 | # constant 56 | @x:function[=> int] 57 | def x = 32 58 | 59 | val foo = "simple string for doc note?" 60 | 61 | let refc = 340 in (sum refc refc) 62 | 63 | # nested let 64 | let x = 54 in 65 | let y = 67 in 66 | let z = 75 in (sum x (sum y z)) 67 | 68 | # simple `letrec` example 69 | # this is _not_ being parsed correctly... 70 | # ok, there cannot be a TNEWL after the 71 | # TIN, and it must be wrapped in a TCALL... 72 | # parsing is hard, let's go shopping! 73 | # `letrec loop x = {...} in ...` syntactic 74 | # sugar will go a long way here... 75 | letrec loop = fn x = { 76 | println x 77 | if (< x 10) then (loop (sum x 1)) else () 78 | } in (loop 0) 79 | 80 | # lambda definition of a value... 81 | val yar = fn x = (sum x 32) 82 | 83 | # simple record definition 84 | record Foo { 85 | blah : int 86 | bar : int 87 | baz 88 | } 89 | 90 | val xx : int = 45 91 | 92 | # typed vectors: 93 | val yy : array[int] = [1,2,3,4,5] 94 | val zz : array[array[int]] = [[1,2], [3,4]] 95 | val aa : array[array[int]] = [[1,2], [3,4]] 96 | 97 | # should have the same type as the first: 98 | val zz = [1,2,3,4,5] 99 | 100 | # commas are optional in arrays 101 | val jj = [1 2 3 4 5] 102 | -------------------------------------------------------------------------------- /attic/cmung.carml: -------------------------------------------------------------------------------- 1 | def cmung src : string dst : string len : int => string = { 2 | var idx : int = 0 3 | var tmp : char = ' ' 4 | # ideally, this would be great as a 5 | # map or map! form. The below is 6 | # entirely too low level for my tastes, 7 | # esp. when I can see the proper direction. 8 | while (< idx len) do { 9 | # this would be a wonderful place for the match form 10 | # honestly, instead of having the above arrays, just 11 | # have a simple match here. The match could also detect 12 | # if we have hit end of string (i.e. ASCII NUL). 13 | set! tmp $ get src idx 14 | match tmp with 15 | # refactor out all these set!s... 16 | '!' => (set! (get dst idx) 'B') 17 | '%' => (set! (get dst idx) '_') 18 | '$' => (set! (get dst idx) '_') 19 | '?' => (set! (get dst idx) 'p') 20 | '<' => (set! (get dst idx) 'l') 21 | '>' => (set! (get dst idx) 'g') 22 | '=' => (set! (get dst idx) 'e') 23 | '@' => (set! (get dst idx) '_') 24 | '^' => (set! (get dst idx) '_') 25 | '&' => (set! (get dst idx) '_') 26 | '|' => (set! (get dst idx) '_') 27 | '*' => (set! (get dst idx) '_') 28 | '.' => (set! (get dst idx) '_') 29 | else => (set! (get dst idx) (get src idx)) 30 | end 31 | 32 | # probably should make an `inc!` form 33 | set! idx $ + idx 1 34 | 35 | when (eq? (get src idx) '\0') do 36 | (return dst) 37 | } 38 | } 39 | 40 | def cmung_nicer src:string dst:string len:int => string = { 41 | # this is pretty nice, and it hides much of the 42 | # actual details about how things are stored and 43 | # what not. It's also generally just quite a bit 44 | # smaller. I do wonder if it's *too* minimal tho; 45 | # I think I'll need to get others reading some 46 | # carML at some point soon to see. Probably 47 | # Bruce will see this first tho. XD 48 | map! src dst fn x:char => char = { 49 | match x with 50 | '!' => 'B' 51 | '%' => '_' 52 | '$' => '_' 53 | '?' => 'p' 54 | '<' => 'l' 55 | '>' => 'g' 56 | '=' => 'e' 57 | '@' => '_' 58 | '^' => '_' 59 | '&' => '_' 60 | '|' => '_' 61 | '*' => '_' 62 | '.' => '_' 63 | else => x 64 | end 65 | } 66 | } 67 | 68 | def cmung_alloc src : string len : int => string = { 69 | var dst : string = (malloc (* (sizeof src) len)) 70 | cmung src dst len 71 | } 72 | -------------------------------------------------------------------------------- /src/minicompiler.c.carml: -------------------------------------------------------------------------------- 1 | #@(#) a miniature compiler for carML, written in carML 2 | #@(#) meant to get us over the hump and actually be able 3 | #@(#) to do simple things like check if a function was 4 | #@(#) defined when producing code, and a nicer REPL 5 | #@(#) this also should allow us to do other nicer 6 | #@(#) things in the compiler, like dump bindings for 7 | #@(#) all defined functions; note, this is called 8 | #@(#) a mini-compiler to differentiate it from the 9 | #@(#) full compiler. The delineation there is that 10 | #@(#) this one still relies on what I originally wrote 11 | #@(#) in C itself, whereas the full compiler will not 12 | #@(#) be reliant upon that. As well, the fully compiler 13 | #@(#) will have a properly typed AST, rather than the 14 | #@(#) current array[ref[AST]]-typed one 15 | 16 | # build a spaghetti stack of environment frames 17 | # this would be nicer if we had two things: 18 | # 19 | # . vecdeques 20 | # . tries 21 | # 22 | # if we had the first, we wouldn't need to track 23 | # anything more about the object other than the 24 | # vector, and if we had the second we could do 25 | # faster lookups; currently we will have to iterate 26 | # over the entire memberlist to find a match 27 | 28 | record EnvFrame { 29 | curobj:int 30 | curmod:int 31 | lenobjs:int 32 | lenmods:int 33 | object_names:array[string] 34 | object_vals:array[ref[AST]] 35 | module_names:array[string] 36 | module_vals:array[ref[AST]] 37 | parent:EnvFrame 38 | } 39 | 40 | # add an environment frame to the spaghetti stack 41 | def new-env-frame src:ref[EnvFrame] => ref[EnvFrame] = { 42 | var ret:ref[EnvFrame] = (hmalloc $ sizeof EnvFrame) 43 | set! (-> ret parent) src 44 | ret 45 | } 46 | 47 | # add an AST to the correct place 48 | def add-member! src:ref[AST] dst:ref[EnvFrame] => bool = { 49 | # I *reallllllllly* want accessors... 50 | val tag : int = (-> src tag) 51 | # also, I reaaaaaaally need to fix that `match` function 52 | # call bug 53 | # additionally, it will be really nice to get rid of some 54 | # of this imperative code here... a nice monadic interface 55 | # for these would work too... 56 | # lastly, a nice vector dequeue implementation would elide 57 | # a lot of the detail that I'm manually doing in here... 58 | match tag with 59 | TMODULE => { 60 | # we also need to handle the case when the length is 61 | # at capacity, and need to update that... 62 | when (eq? (-> dst curmod) (-> dst lenmods)) do { 63 | return false 64 | } 65 | set! (get (-> dst module_vals) (-> dst curmod)) src 66 | set! (get (-> dst module_names) (-> dst curmod)) (-> src value) 67 | set! (-> dst curmod) $ + 1 $ -> dst curmod 68 | } 69 | else => { 70 | # we also need to handle the case when the length is 71 | # at capacity, and need to update that... 72 | # NOTE we should check here if this is *actually* and object we care 73 | # about... or we can just rely on the compiler to handle that, which 74 | # isn't a terrible idea either... 75 | when (eq? (-> dst curobj) (-> dst lenobjs)) do { 76 | return false 77 | } 78 | set! (get (-> dst object_vals) (-> dst curobj)) src 79 | set! (get (-> dst object_names) (-> dst curobj)) (-> src value) 80 | set! (-> dst curobj) $ + 1 $ -> dst curobj 81 | } 82 | end 83 | true 84 | } 85 | 86 | # XXX Need to fix the carML/C compiler here, it dies when a return type is unit 87 | def foreach-module src:ref[EnvFrame] idx:int = { 88 | when (< idx (-> src lenmods)) do { 89 | (walk (get (-> src module_vals) idx) 0) 90 | (foreach-module src (+ idx 1)) 91 | } 92 | } 93 | 94 | def foreach-other src:ref[EnvFrame] idx:int = { 95 | when (< idx (-> src lenobjs)) do { 96 | (walk (get (-> src object_vals) idx) 0) 97 | (foreach-other src (+ idx 1)) 98 | } 99 | } 100 | 101 | def dump-frame src:ref[EnvFrame] = { 102 | when (eq? src NULL) do (return) 103 | 104 | foreach-module src 0 105 | foreach-other src 0 106 | dump-frame (-> src parent) 107 | } 108 | 109 | def mini-compiler src:ref[AST] env:ref[EnvFrame] = { 110 | 111 | } 112 | -------------------------------------------------------------------------------- /src/carml.carml: -------------------------------------------------------------------------------- 1 | # the carML compiler, written in carML 2 | 3 | use labs/mini-sexpression 4 | 5 | # we can start to stub out all the types of expressions 6 | # that carML has. Probably should just generate this 7 | # from the carML/C system, but... 8 | type CarML { 9 | # I wonder if we can use another type 10 | # here for these... 11 | # name types-array [(parameter-ident type)] returnType body 12 | Def string array[CarML] array[tuple[string CarML]] CarML CarML 13 | Fn array[CarML] CarML 14 | ParamDef string CarML 15 | Declare string CarML 16 | # thinking these should be an array[string] 17 | # and an array[CarML] that way we can support 18 | # destructuring binds... 19 | Var string CarML CarML 20 | DestructuringVar array[string] array[CarML] CarML 21 | Val string CarML CarML 22 | DestructuringVal array[string] array[CarML] CarML 23 | Let string CarML CarML 24 | DestructuringLet array[string] array[CarML] CarML 25 | LetRec string CarML CarML 26 | DestructuringLetRec array[string] array[CarML] CarML 27 | Integer string 28 | Octal string 29 | Hex string 30 | Bin string 31 | Float string 32 | String string 33 | Char string 34 | Bool string 35 | ArrayLiteral CarML array[CarML] 36 | Ident string 37 | Tag string 38 | Begin array[CarML] 39 | Call CarML array[CarML] 40 | If CarML CarML CarML 41 | When CarML CarML 42 | Match CarML array[CarML] 43 | Guard CarML CarML 44 | While CarML CarML 45 | For CarML CarML 46 | Type string array[CarML] array[CarML] 47 | Record string array[string] array[CarML] 48 | ComplexType string array[CarML] 49 | Else 50 | IntT 51 | StringT 52 | FloatT 53 | CharT 54 | BoolT 55 | AnyT 56 | Unit 57 | Nil 58 | # NOTE: explicitly not handling things like $, =>, &c 59 | # may have to do that eventually too tho... 60 | # NOTE: actually... on second thought... it would be neat 61 | # if I could use this closer to something like Concrete 62 | # Syntax Trees... 63 | Comment string 64 | FatArrow 65 | PipeArrow 66 | Colon 67 | ColonEqual 68 | DollarSign 69 | } 70 | 71 | record Environment { 72 | # should we separate out constructor 73 | # names & accessors? might make sense... 74 | # in Digamma I made these tries, it might 75 | # make sense to have a tree structure for 76 | # these, but I think often it's not going 77 | # to be many of these at one time... 78 | type_names:array[string] 79 | type_defns:array[CarML] 80 | ctor_names:array[string] 81 | ctor_defns:array[CarML] 82 | accr_names:array[string] 83 | func_names:array[string] 84 | func_defns:array[CarML] 85 | recr_names:array[string] 86 | recr_defns:array[string] 87 | val_names:array[string] 88 | val_defns:array[string] 89 | var_names:array[string] 90 | var_defns:array[string] 91 | parent:ref[Environment] 92 | } 93 | 94 | # NOTE: so the general gist here is that we want to be able to 95 | # round trip code from SExpressions back to CarML. For example, 96 | # we may have several optimization passes that end up eliminating 97 | # things, doing tree shaking, &c, and having a Compiler that can 98 | # handle these round trips is key. However, I'm not currently 99 | # focused on having the carML/carML compiler be able to parse 100 | # carML itself, we can continue to use the carML/C compiler as 101 | # a front end. This *does* mean that the carML/C compiler must 102 | # be kept up to date with at least the language, even if the 103 | # simple code generation in the carML/C system cannot handle 104 | # all the cases that it can parse 105 | 106 | def sexpression->ast src:SExpression => CarML = { 107 | make-struct CarML.Nil 108 | } 109 | 110 | def ast->carml src:CarML => string = { 111 | "" 112 | } 113 | 114 | def ast->golang src:CarML => string = { 115 | "" 116 | } 117 | 118 | def ast->c src:CarML => string = { 119 | "" 120 | } 121 | 122 | # NOTE: section: optimizations 123 | # we need to be able to optimize certain situations 124 | # in carML and rewrite them to nicer alternatives. For 125 | # example, whilst `let` is a useful abstraction, it's not 126 | # incredibly useful when getting into actual code; we can 127 | # use something like an alpha rewrite, to transform a `let` 128 | # or a `letrec` into a freshsym `val` or `var` 129 | def let->val src:CarML => CarML = { 130 | make-struct CarML.Nil 131 | } 132 | -------------------------------------------------------------------------------- /src/newreader.c.carml: -------------------------------------------------------------------------------- 1 | @new_reader: function[ref[FILE] ref[AST]] 2 | @read_var_val: function[ref[FILE] ref[AST]] 3 | @read_array_literal: function[ref[FILE] ref[AST]] 4 | @read_def: function[ref[FILE] ref[AST]] 5 | @make_ident: function[array[char] ref[AST]] 6 | @make_int: function[array[char] ref[AST]] 7 | @make_float: function[array[char] ref[AST]] 8 | @make_string: function[array[char] ref[AST]] 9 | @make_eof: function[ref[AST]] 10 | @make_colon: function[ref[AST]] 11 | @make_type: function[int ref[AST]] 12 | @make_complex_type: function[array[ref[AST]] ref[AST]] 13 | 14 | def new_reader fin:ref[FILE] => ref[ASTEither] = { 15 | var buffer:array[char] = (make-array char 512) 16 | var ltype:int = 0 17 | var tmp:ref[AST] = nil 18 | set! ltype (next fdin buffer 512) 19 | match ltype with 20 | TVAR => (read_var_val fdin TVAR) 21 | TVAL => (read_var_val fdin TVAL) 22 | TIDENT => (ASTRight $ make_ident buffer) 23 | TINT => (ASTRight $ make_int buffer) 24 | TFLOAT => (ASTRight $ make_float buffer) 25 | TSTRING => (ASTRight $ make_string buffer) 26 | TEOF => (ASTRight $ make_eof) 27 | TCOLON => (ASTRight $ make_colon) 28 | TINTT => (ASTRight $ make_type TINTT) 29 | TFLOATT => (ASTRight $ make_type TFLOATT) 30 | TSTRT => (ASTRight $ make_type TSTRT) 31 | TBOOLT => (ASTRight $ make_type TBOOLT) 32 | TCHART => (ASTRight $ make_type TCHART) 33 | TARRAY => { 34 | # we need to read in one more element, which should be 35 | # a TARRAYLITERAL 36 | # NOTE I had a good idea: we can make these more 37 | # functional and more stateful by having a set of 38 | # smaller functions, like `read_colon` and `read_array_literal` 39 | # that handle the unwrapping and what not, but also handle 40 | # errors 41 | set! sometmp (new_reader fin) 42 | when (eq? (-> sometmp tag) ASTLEFT) do (return sometmp) 43 | set! tmp (-> sometmp right) 44 | set! tag (-> tmp tag) 45 | match tag with 46 | TARRAYLITERAL => (ASTRight $ make_complex_type TARRAY tmp) 47 | else => (ASTLeft 0 0 "Array as a type *must* be followed be an array-literal of types") 48 | end 49 | } 50 | TDEQUE => { 51 | # same as the above, read once more, and get the vecdeque type 52 | set! sometmp (new_reader fin) 53 | when (eq? (-> sometmp tag) ASTLEFT) do (return sometmp) 54 | set! tmp (-> sometmp right) 55 | set! tag (-> tmp tag) 56 | match tag with 57 | TARRAYLITERAL => (ASTRight $ make_complex_type TARRAY tmp) 58 | else => (ASTLeft 0 0 "Array as a type *must* be followed be an array-literal of types") 59 | end 60 | } 61 | else => (ASTLeft 0 0 "something failed") 62 | end 63 | } 64 | 65 | # it would be interesting to do this sort of parsing 66 | # with a monadic state and |> or the like, I think. 67 | # Something like a LexState monad that has the current 68 | # file, buffer, location, &c. It would also help with 69 | # errors, as the monad could contain current offset and 70 | # what not 71 | def read_var_val fdin:ref[FILE] tag:int => ref[ASTEither] = { 72 | var ident:ref[AST] = (make_null) 73 | var vtype:ref[AST] = (make_null) 74 | var value:ref[AST] = (make_null) 75 | var tmp:ref[AST] = (make_null) 76 | var eithertmp:ref[ASTEither] = nil # yes, I hate it too... 77 | var tmptag:int = 0 78 | set! ident (new_reader fdin) 79 | when (<> (-> ident tag) TIDENT) do { 80 | return $ ASTLeft 0 0 "var *must* be followed by an IDENT" 81 | } 82 | 83 | # actually, we need to check if we get an ASTLeft here and 84 | # respond accordingly... 85 | set! eithertmp (new_reader fdin) 86 | if (eq? (-> eithertmp tag) ASTLEFT) then (return eithertmp) else (set! tmp (-> eithertmp right)) 87 | set! tmptag (-> tmp tag) 88 | match (-> tmp tag) with 89 | TCOLON => { 90 | set! eithertmp (new_reader fdin) 91 | if (eq? (-> eithertmp tag) ASTLEFT) then (return eithertmp) else (set! tmp (-> eithertmp right)) 92 | set! tmptag (-> tmp tag) 93 | match (-> tmp tag) with 94 | TTAG => { 95 | # ok, so here we can either get: 96 | # . a TARRAYLITERAL which means we have a complex type 97 | # . a TEQ, which means we should move on 98 | } 99 | _ given (istypeast $ -> tmp tag) => { 100 | 101 | } 102 | else => (return $ ASTLeft 0 0 "type definition must be followed by a type") 103 | end 104 | } 105 | TEQ => () 106 | else => (ASTLeft 0 0 "var's ident *must* be followed by a COLON or an EQUALS SIGN") 107 | end 108 | } 109 | -------------------------------------------------------------------------------- /labs/sexpr.carml: -------------------------------------------------------------------------------- 1 | #@(#) SExpression library for carML 2 | #@(#) mainly aimed at getting the carML compiler out of the C system 3 | #@(#) should be mainly compatible with the SExpression-based IR that 4 | #@(#) carML/C generates. 5 | # 6 | # the shame is, given carML's current 7 | # type system, there are many work-arounds 8 | # required to make the below work: 9 | # - the type system can't tell ref[SExpression] can be flattened to SExpression 10 | # - some things should be automatically passed around as ref[Type] but you have to manually do that 11 | # - lots of ref[Any] could be turned into a `const`... 12 | # 13 | # probably lots of other hacks as I 14 | # go through it. Luckily, once I move 15 | # past where the C compiler is, I can 16 | # start to handle some of the debt I've 17 | # accrued. 18 | 19 | type Num { 20 | # would be really nice if this could 21 | # just be handled by a union[float int] 22 | NInt int 23 | NFloat float 24 | } 25 | 26 | type SExpression { 27 | Nil 28 | Cons SExpression SExpression 29 | Atom string 30 | String string 31 | Int int 32 | Float float 33 | Rational int int 34 | Complex Num Num 35 | Char char 36 | Bool bool 37 | # need to add length generation to 38 | # the C compiler here 39 | List array[SExpression] 40 | Array array[SExpression] 41 | Error string 42 | # Maybe should have an internal type that 43 | # isn't exported, and an external type that 44 | # is, so as to remove the two constructors 45 | # below... 46 | EndList 47 | EndArray 48 | } 49 | 50 | # add a simple reader system below 51 | # should be able to construct & read 52 | # SExpression-based syntax 53 | # 54 | # What will this be used for? Why, 55 | # for bootstrapping a compiler in carML 56 | # itself really. 57 | 58 | def peek_char fh:ref[FILE] => char = { 59 | val ret:int = (fgetc fh) 60 | val res:int = (ungetc ret fh) 61 | ret 62 | } 63 | 64 | def is_whitespace ch:char => bool = { 65 | match ch with 66 | ' ' => true 67 | '\t' => true 68 | '\n' => true 69 | '\v' => true 70 | '\r' => true 71 | '\l' => true 72 | '\b' => true 73 | else => false 74 | end 75 | } 76 | 77 | def take_while_white fh:ref[FILE] => char = { 78 | # take... on... me... 79 | # _softly_ take on me 80 | # take... me... on... 81 | # _softly_ take on me 82 | # I'll be... gone... 83 | # In a day or twoooooooo 84 | # _synths_ 85 | var ch:int = (fgetc fh) 86 | while (is_whitespace ch) do { 87 | set! ch $ fgetc fh 88 | } 89 | ch 90 | } 91 | 92 | def is_numeric ch:char => bool = { 93 | match ch with 94 | x given (&& (>= ch '0') (<= ch '9')) => true 95 | '.' => true 96 | else => false 97 | end 98 | } 99 | 100 | def is_symbolic ch:char => bool = { 101 | match ch with 102 | '(' => false 103 | ')' => false 104 | '[' => false 105 | ']' => false 106 | '"' => false 107 | '\'' => false 108 | y given (is_whitespace ch) => false 109 | else => true 110 | end 111 | } 112 | 113 | def is_null_or_endp obj:SExpression => bool = { 114 | match obj with 115 | (SExpression.EndList) => true 116 | (SExpression.EndArray) => true 117 | (SExpression.EndFile) => true 118 | else => false 119 | end 120 | } 121 | 122 | def sexpression_append_b dst:SExpresssion src:SExpression = { 123 | 124 | } 125 | 126 | def read_list fh:ref[FILE] => SExpression = { 127 | var ret:SExpression = (SExpression_Nil) 128 | var tmp:SExpression = (sexpression_read fh) 129 | while (not $ is_null_or_endp tmp) do { 130 | sexpression_append_b ret tmp 131 | } 132 | ret 133 | } 134 | 135 | def read_array fh:ref[FILE] => SExpression = { 136 | var ret:array[SExpression] = (make-array SExpression 128 $ SExpression.Nil) 137 | var ret_length:int = 128 138 | var tmp:SExpression = (sexpression_read fh) 139 | var idx:int = 0 140 | while (not $ is_null_or_endp tmp) do { 141 | when (< idx ret_length) do { 142 | set! (get ret idx) tmp 143 | set! idx $ + idx 1 144 | } 145 | } 146 | ret 147 | } 148 | 149 | def read_char fh:ref[FILE] => SExpression = { 150 | 151 | } 152 | 153 | def read_atom fh:ref[FILE] => SExpression = { 154 | 155 | } 156 | 157 | def read_atom_partial fh:ref[FILE] cur:string => SExpression = { 158 | 159 | } 160 | 161 | # it would be nice to enrich types here 162 | # say that this is an SExpression, but also 163 | # that the only types it will return are 164 | # from Int, Float, Rational, Complex 165 | def read_number fh:ref[FILE] => SExpression = { 166 | 167 | } 168 | 169 | def sexpression_read fh:ref[FILE] => ref[SExpression] = { 170 | val ch:char = (take_while_white fh) 171 | match ch with 172 | '(' => (read_list fh) 173 | '[' => (read_array fh) 174 | '"' => (read_string fh) 175 | '#' => (read_char fh) 176 | ')' => (SExpression.EndList) 177 | ']' => (SExpression.EndArray) 178 | x given (is_whitespace ch) => (sexpression_read fh) 179 | z given (is_numeric ch) => (read_number fh) 180 | a given (is_symbolic ch) => (read_atom fh) 181 | else => (SExpression.Error "unknown starting character") 182 | end 183 | } 184 | -------------------------------------------------------------------------------- /labs/rewrite_match_bind.carml: -------------------------------------------------------------------------------- 1 | # @(#) a match rewriter 2 | # @(#) recreating the AST type from carML/c here 3 | # @(#) as a carML record. Were I to clean-room 4 | # @(#) this properly, I'd obviously use variants. 5 | # @(#) However, this at least allows me to test 6 | # @(#) a rewriter without having to link the 7 | # @(#) entire carML/c code base here... 8 | 9 | record AST { 10 | tag:U32 # FIXME: need to define C-level types... 11 | value: string 12 | children:array[ref[AST]] # FIXME need to fix self-referential records... 13 | lenvalue: U32 14 | lenchildren: U32 15 | } 16 | 17 | def make_AST tag:int lenchildren:int children:array[ref[AST]] lenvalue:int value:string => ref[AST] = { 18 | var res:ref[AST] = (malloc $ sizeof AST) 19 | set! (-> res tag) tag 20 | set! (-> res lenchildren) lenchildren 21 | set! (-> res children) children 22 | if (<> value NULL) then 23 | (set! (-> res value) $ strdup value) 24 | else 25 | (set! (-> res value) NULL) 26 | set! (-> res lenvalue) lenvalue 27 | res 28 | } 29 | 30 | 31 | # make a specific type of AST, specifically a 32 | # ident node. 33 | def make_ident_AST name:string => ref[AST] = (make_AST 0 0 NULL (strlen name) name) 34 | 35 | def make_integer_AST value:string => ref[AST] = (make_AST 1 0 NULL (strlen value) value) 36 | 37 | def make_call_AST children:array[ref[AST]] len:int => ref[AST] = (make_AST 2 len children 0 NULL) 38 | 39 | def make_begin_AST children:array[ref[AST]] len:int => ref[AST] = (make_AST 3 len children 0 NULL) 40 | 41 | # make a shallow copy of an AST 42 | def copy_AST src:ref[AST] => ref[AST] = { 43 | (make_AST 44 | (-> src tag) 45 | (-> src lenchildren) 46 | (-> src children) 47 | (-> src lenvalue) 48 | (-> src value)) 49 | } 50 | 51 | # we could just gather names and return a list... 52 | def gather_names head:ref[AST] => array[ref[AST]] = { 53 | val len : int = (-> head lenchildren) 54 | val ret : array[ref[AST]] = (hmalloc $ * len $ sizeof ref[AST]) 55 | var idx : int = 1 56 | var cur : ref[AST] = NULL 57 | 58 | # again, foreach would be perfect here... 59 | # actually just a map... 60 | # and really, could do a list of tuple[ref[AST], int] 61 | # or the like... 62 | while (< idx len) do { 63 | set! cur $ get (-> head children) idx 64 | when (eq? (-> cur tag) TIDENT) do { 65 | if (not $ eq? (-> cur value) "_") then { 66 | set! (get ret idx) cur 67 | } else { 68 | set! (get ret idx) NULL 69 | } 70 | } 71 | set! idx $ + idx 1 72 | } 73 | return ret 74 | } 75 | 76 | # there should only be minimal amount of nodes to 77 | # iterate over, since we'll mainly be using this 78 | # in `given` forms and match-case bodies, but this 79 | # still means we're iterating over all forms to check 80 | # for a rewrite case N^2 or N^3 (if there's both a given 81 | # case guard *AND* a body) 82 | def need_rewritep name:ref[AST] dst:ref[AST] => bool = { 83 | # should combine these two... 84 | when (eq? (-> name tag) (-> dst tag)) do { 85 | when (not (strcmp (-> name value) (-> dst value))) do { 86 | return true 87 | } 88 | } 89 | 90 | when (> (-> dst lenchildren) 0) do { 91 | var idx:int = 0 92 | var rst:bool = false 93 | while (< idx (-> dst lenchildren)) do { 94 | set! rst $ need_rewritep name $ get (-> dst children) idx 95 | when rst do { 96 | return true 97 | } 98 | set! idx $ + idx 1 99 | } 100 | } 101 | 102 | return false 103 | } 104 | 105 | def rewrite_AST name:ref[AST] dst:ref[AST] => ref[AST] = { 106 | NULL 107 | } 108 | 109 | # walk an AST, freeing anything that 110 | # we've allocated with `malloc(3)` and 111 | # friends ("malloc(3) and friends": worst. children's show. ever) 112 | def free_AST head:ref[AST] = { 113 | var idx : int = 0 114 | when (<> NULL $ -> head value) do { 115 | free $ -> head value 116 | } 117 | 118 | when (<> 0 $ -> head lenchildren) do { 119 | # HAAAAAAAAATE while loops like this 120 | # should be a `for` loop, but i haven't 121 | # added support for compiling those yet 122 | while (< idx $ -> head lenchildren) do { 123 | free_AST $ get (-> head children) idx 124 | set! idx $ + idx 1 # probably should just have an inc! form 125 | } 126 | 127 | free $ -> head children 128 | } 129 | 130 | free head 131 | } 132 | 133 | # just for testing, add a main here 134 | # use that for running this whole test... 135 | # would be nice to have some sort of 136 | # conditional compilation language, ala 137 | # SRFI-0 138 | 139 | def main ac:int al:array[string] => int = { 140 | val test:ref[AST] = (make_ident_AST "test") 141 | val newtest:ref[AST] = (copy_AST test) 142 | val fun:ref[AST] = (make_ident_AST "+") 143 | val tmp0:array[ref[AST]] = [fun (make_integer_AST 10) test] 144 | val test0:ref[AST] = (make_call_AST tmp0 3) 145 | val tmp1:array[ref[AST]] = [test0 test] 146 | val test1:ref[AST] = (make_begin_AST tmp1 2) 147 | 148 | # would be nice to make this printf (if...) but the compiler 149 | # would probably generate terrible C for that rn... need to add 150 | # something to detect that 151 | if (<> test newtest) then (printf "they differ!\n") else (printf "wait... what?") 152 | 153 | # clean up our legos 154 | free_AST test 155 | free_AST newtest 156 | 157 | 0 158 | } 159 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Overview of carML 2 | 3 | _carML_ (pronounced "car-mul", caramel), previously called _XL/29_ (eXperimental Language No. 29), is meant to take ideas from PowerLogo & Yeti, mixed in with my experiences of working 4 | with [Digamma](http://lojikil.com/p/digamma/). It's meant to be: 5 | 6 | - tiny 7 | - low barrier to entry 8 | - no ceiling 9 | - somewhat efficient 10 | - without requiring too much mental cost from the programmer 11 | - "secure" 12 | - implemented and understood quickly 13 | - borrowing ideas from ReasonML, Scala, BitC, OCaml, F#, Digamma, Yeti, PowerLogo, & Project Verona 14 | - I've been doing a *lot* of work in ReasonML of late for work, so definitely has become a strong muse 15 | - ReasonML: types, general style, Modules 16 | - F#: Simpler Module style, exploration of advanced types (like FStar and Fomega give) 17 | - Scala: type syntax, general style 18 | - Digamma: focus on C output, focus on Unix integration, production readiness 19 | - Yeti: simplicity & minimalism 20 | - PowerLogo: little barrier to entry, no ceiling, and practical focus 21 | - Project Verona: [this comment on operator precedence in Verona got me thinking](https://lobste.rs/s/jol24u/better_operator_precedence#c_bwd4ij) 22 | 23 | 24 | This is at least the 3rd time (and probably 5th) time that I've attempted such a thing, so don't mind me at all. This *is* the first time 25 | I've experimented publicly with my PLT designs tho. 26 | 27 | # Name 28 | 29 | I've experimented with multiple names; HotelML (named for "The HotelML" in NJ) & Melomys (a specious of mouse) were two contenders. 30 | However, I was grabbing caramel macchiattos, which Starbucks labels as "carml mac", and thus named it after the drink. 31 | 32 | # Syntax 33 | 34 | Syntactically, it's pretty much a garden variety ML dialect. Minimal keywords like `def` exist to visually break up code, but otherwise 35 | if you're familiar with Yeti, StandardML, OCaml, &c. it should be relatively straight forward. The one location that it is *not* is in the 36 | standard base. 37 | 38 | - there are no operators in the standard base. Functions like `(+)` are replaced by `sum`. 39 | - there are no lists or other GC'd data structures in the standard base (this may change based on the implementation of memory). 40 | 41 | Remember, while the language is meant to be understood quickly, it's not meant to be a replacement for everything, Like Logo, I'll probably 42 | eventually add something to "preprocess" source code and have operators (or simply make an `expr` form to handle it). 43 | 44 | _Note_: I've been working on various ML-dialects for several years, and carML is the one I've decided to run with. Interestingly, 45 | [Tulip](http://tuliplang.org/) is also the product of several years of tinkering, and has a very similar syntax. It might be interesting 46 | to combine the two (or use carML for the base instead of C), but it's exciting to me that there is movement in the ML space again, esp. 47 | considering that both languages are _not_ derived from existing ML dialects. [Yeti](https://mth.github.io/yeti/) is no longer alone! 48 | 49 | # Semantics 50 | 51 | - typical ML/PowerLogo/Lisp semantics. 52 | - types, row polymorphism, polymorphic variants 53 | - Minimal datatypes available from base. Need to figure out GC-less lists 54 | - Deque & Vector ala Rust 55 | - GC'd and GC-less data structures? 56 | - Lambda lifting not closure conversion (to save GC) 57 | - Regions + localized GC? 58 | - Continuations? How would that operate in a GC-less environment? 59 | 60 | # Output 61 | 62 | - Enyalios-style Human Readable C/Golang 63 | - Let the C/Golang compiler deal with many of the real optimizations 64 | - But we can do some simple ones like inlining, self-TCO, & rewriting HOFs 65 | 66 | # Examples 67 | 68 | # cannonical loop example 69 | def arrayIota arr:array[int] f:function[int => int] = begin 70 | def arrayIota' arr:array[int] fun:function[int => int] idx:int = begin 71 | if (< idx (array-length arr)) then { 72 | set-array-index! arr (fun idx) idx 73 | arrayIota' arr fun (add idx 1) 74 | } else () 75 | end 76 | arrayIota' arr f 0 77 | end 78 | 79 | # note, there is no _real_ reason to use begin 80 | # form here, but it makes it a bit more pretty. 81 | def arrayIota1 arr : array[int] fun : function[int => int] = { 82 | foreach-index! fn x = (set-array-index! arr (fun x) x) arr 83 | } 84 | 85 | # same as the above, but without the `{}` block 86 | def arrayIota2 arr:array[int] fun : function[int => int] = (foreach-index! fn x = (set-array-index! arr (fun x) x) arr) 87 | 88 | # but really, all we're doing is mapping some function in place... 89 | def arrayIota3 arr:array[int] fun:function[int => int] = (map! fun arr) 90 | 91 | def foo x : int => int = (add x 1) 92 | 93 | # constant: 94 | val f : array[int] = (make-array 10) 95 | # variable 96 | var g : array[int] = [0 1 2 3 4 5 6 7 8 9] 97 | 98 | arrayIota f foo; 99 | 100 | println f; 101 | 102 | # add: 103 | # ref, make 104 | 105 | See also the _examples_ directory for more indepth & up-to-date examples. 106 | 107 | # Implementations 108 | 109 | There are two major implementations: 110 | 111 | - `carmlc` which is just a compiler, written in C. 112 | - `carml` which is a self-hosting version. 113 | 114 | Note that, when supplied with no arguments, `carmlc` will actually enter a REPL. However, 115 | this REPL does not evaluate the supplied expressions for _values_, but rather for _code_: 116 | 117 | >>> def foo x = (sum x x) 118 | (define-function foo (x) 119 | (call (identifier sum) (identifier x) (identifier x))) 120 | 121 | The purpose is more to see what the IR/code will look like, rather than what the code does. It 122 | is meant more as a test bed than an actual REPL. 123 | 124 | # File naming conventions 125 | 126 | There are a few different "types" of files in the system 127 | 128 | - `*.c.carml` is a file that is written in carML, but targets C directly 129 | - `*.go.carml` is a file written in carML, but targets Golang directly 130 | - `*.carml` just a plain carML file, which makes no assumptions as to the underlying system 131 | 132 | Eventually I'd like to be able to include files via an SRFI-0-style mechanism, by which we can 133 | simply import which files we'd like to use at compile time based on flags and such 134 | 135 | # License 136 | 137 | See `LICENSE` for details (ISC license). 138 | -------------------------------------------------------------------------------- /src/carmlc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * @(#) the main header file, meant to keep definitions clean 3 | * @(#) and help make integration of generated code smoother 4 | */ 5 | #ifndef __CARMLC_H 6 | #define __CARMLC_H 7 | 8 | #include 9 | 10 | #ifdef DEBUG 11 | #define debugln printf("dying here on line %d?\n", __LINE__); 12 | #define dprintf(x, ...) fprintf(x, __VA_ARGS__) 13 | #define dwalk(x, y, z) walk(x, y, z) 14 | #define pwalk(x, y) walk(stderr, x, y) 15 | #else 16 | #define debugln 17 | #define dprintf(x, ...) 18 | #define dwalk(x, y, z) 19 | #define pwalk(x, y) 20 | #endif 21 | 22 | #define nil NULL 23 | #define nul '\0' 24 | #define YES 1 25 | #define NO 0 26 | 27 | #ifdef DEBUGMEMORY 28 | #define hmalloc malloc 29 | #else 30 | #define hmalloc GC_MALLOC 31 | #endif 32 | 33 | #define cwalk(fd, head, level) llcwalk(fd, head, level, NO) 34 | #define gwalk(fd, head, level) llgwalk(fd, head, level, NO) 35 | #define indent(fd, level) llindent(fd, level, NO) 36 | #define gindent(fd, level) llindent(fd, level, YES) 37 | 38 | /* Lexical analysis states. 39 | * basically, the tokenizer is a 40 | * statemachine, and this enum represents 41 | * all the state machine states. Because 42 | * we use a decision tree (encoded in an SM) 43 | * for parsing out keywords, there are lots 44 | * of state productions here. 45 | */ 46 | typedef enum { 47 | LSTART, LDEF0, LDEF1, LDEF2, LE0, LELSE1, 48 | LELSE2, LELSE3, LVAL0, LVAL1, LVAL2, LT0, 49 | LTHEN0, LTHEN1, LTHEN2, LTYPE0, LTYPE1, 50 | LTYPE2, LBEGIN0, LBEGIN1, LBEGIN2, LBEGIN3, 51 | LBEGIN4, LEQ0, LNUM0, LIDENT0, LEND0, LEND1, 52 | LEND2, LMATCH0, LCOMMENT, LMCOMMENT, LPOLY0, 53 | LPOLY1, LPOLY2, LPOLY3, LRECORD0, LIF0, LIF1, 54 | LRECORD1, LRECORD2, LRECORD3, LRECORD4, LRECORD5, 55 | LMATCH1, LMATCH2, LMATCH3, LMATCH4, LMATCH5, LEOF, 56 | LWHEN0, LWHEN1, LWHEN2, LWHEN3, LNEWL, LDO0, LDO1, 57 | LD0, LTRUE0, LTRUE1, LTRUE3, LTRUE4, LFALSE0, 58 | LFALSE1, LFALSE2, LFALSE3, LFALSE4, LFALSE5, 59 | LFN0, LFN1, LLET0, LLET1, LLET2, LLETREC0, LLETREC1, LLETREC2, 60 | LLETREC3, LLETREC4, LLETREC5, LLETREC6, LL0, LCHAR0, 61 | LCHAR1, LCHAR2, LCHAR3, LSTRT0, LSTRT1, LSTRT2, LSTRT3, 62 | LSTRT4, LSTRT5, LSTRT6, LINTT0, LINTT1, LINTT2, LFLOATT0, 63 | LFLOATT1, LFLOATT2, LFLOATT3, LFLOATT4, LFLOATT5, LINTT3, 64 | LARRAY0, LARRAY1, LARRAY2, LARRAY3, LARRAY4, LARRAY5, 65 | LB0, LI0, LUSE0, LF0, LC0, LR0, LW0, LOF0, LOF1, 66 | LDEQ0, LDEQ1, LDEQ2, LDEQ3, LDEC0, LDEC1, LDEC2, LDEC3, 67 | LDE0, LBOOL0, LBOOL1, LBOOL2, LREF0, LELSE0, LRE0, LTRUE2, 68 | LWITH0, LWITH1, LWITH2, LDEC4, LUSE1, LUSE2, LVAR2, LTAG0, 69 | LTAGIDENT, LWHILE1, LWHILE2, LWHILE3, LWHILE4, LWHILE5, 70 | LFOR0, LFOR1, LFOR2, LWH0, LTUPLE0, LTUPLE1, LTUPLE2, LTUPLE3, 71 | LTUPLE4, LFUNCTIONT0, LFUNCTIONT1, LFUNCTIONT2, LFUNCTIONT3, 72 | LFUNCTIONT4, LFUNCTIONT5, LFUNCTIONT6, LP0, LPROCEDURET0, 73 | LPROCEDURET1, LPROCEDURET2, LPROCEDURET3, LPROCEDURET4, 74 | LPROCEDURET5, LPROCEDURET6, LPROCEDURET7, LANY0, LANY1, 75 | LAND0, LAND1, LAND2, LA0, LAN0, LEXTERN0, LEXTERN1, LEXTERN2, 76 | LEXTERN3, LEXTERN4, LGIVEN0, LGIVEN1, LGIVEN2, LGIVEN3, 77 | LGIVEN4, LLOW0, LLOW1, LU0, LUNION0, LUNION1, LUNION2, 78 | LUNION3, LUNION4, LWALRUS0, LMODNS0, LMODULE0, LMODULE1, 79 | LMODULE2, LMODULE3, LMODULE4, LMODULE5, LM0 80 | } LexStates; 81 | 82 | /* AST tag enum. 83 | * basically, this is all the AST types, and is 84 | * used both for determining the type of AST 85 | #define gindent(level) llindent(level, YES) 86 | * that is represented within the tree, as well as 87 | * returned from the tokenizer to say what type 88 | * of object it thinks is in buffer 89 | */ 90 | typedef enum { 91 | TDEF, TBEGIN, TEND, TEQUAL, TCOREFORM, // 4 92 | TIDENT, TCALL, TOPAREN, TCPAREN, TMATCH, // 9 93 | TIF, TELSE, TTHEN, TTYPE, TPOLY, TVAL, // 15 94 | TARRAY, TRECORD, TINT, TFLOAT, TSTRING, // 20 95 | TCHAR, TBOOL, TEQ, TSEMI, TEOF, TPARAMLIST, // 26 96 | TTDECL, TWHEN, TNEWL, TDO, TUNIT, TERROR, // 32 97 | TLETREC, TLET, TFN, TCASE, TSTRT, TCHART, // 38 98 | TINTT, TFLOATT, TCOMMENT, TREF, TDEQUET, // 42 99 | TBOOLT, TWITH, TOF, TDECLARE, TFALSE, // 47 100 | TTRUE, TUSE, TIN, TCOLON, TRECDEF, // 52 101 | TCOMPLEXTYPE, TCOMMA, TOARR, TCARR, // 56 102 | TARRAYLITERAL, TBIN, TOCT, THEX, // 60 103 | TARROW, TFATARROW, TCUT, TDOLLAR, // 64 104 | TPIPEARROW, TUSERT, TVAR, TTAG, // 68 105 | TPARAMDEF, TTYPEDEF, TWHILE, TFOR, // 72 106 | TTUPLET, TFUNCTIONT, TPROCEDURET, // 75 107 | TAND, TANY, TGUARD, TEXTERN, TGIVEN, // 80 108 | TLOW, TUNION, TWALRUS, TMODNS, TQIDENT, // 85 109 | TMODULE, // 86 110 | } TypeTag; 111 | 112 | struct _AST { 113 | TypeTag tag; 114 | /* there's going to have to be some 115 | * interpretation here, as we're not 116 | * breaking down the ASTs into discrete 117 | * objects, but rather just grouping them 118 | * into a generic a piece as possible. 119 | * so, for example, and IF block would 120 | * have a lenchildren == 3, no matter what 121 | * whereas a BEGIN block lenchildren == N, 122 | * where N >= 0. In the real thing, would 123 | * probably be best to make this a poly with 124 | * each member broken down, or an SRFI-57-style 125 | * struct. 126 | * in fact... if you have row-polymorphism, there's 127 | * no need for SRFI-57 inheritance, save for to 128 | * create convenience methods... hmm... :thinking_face: 129 | */ 130 | char *value; 131 | uint32_t lenvalue; 132 | uint32_t lenchildren; 133 | struct _AST **children; 134 | }; 135 | 136 | typedef struct _AST AST; 137 | 138 | /* Represent the return type of Readers as 139 | * `data EitherAST = Right (AST) | Left Int Int String`. 140 | * this allows us to return Either an error in the form 141 | * of a line number, error number, and message, *or* an 142 | * actual AST form. 143 | */ 144 | typedef enum _ASTEITHERTAG { ASTLEFT, ASTRIGHT } ASTEitherTag; 145 | 146 | typedef struct _ASTEither { 147 | ASTEitherTag tag; 148 | struct { 149 | int line; 150 | int error; 151 | char *message; 152 | } left; 153 | AST *right; 154 | } ASTEither; 155 | 156 | /* A simple type to hold an ASTEither and an 157 | * offset into the stream as to where we saw 158 | * said ASTEither. Originally this was just a 159 | * wrapper, but I decided to linearize it, looking 160 | * forward to what I'm toying wrt SRFI-57-style 161 | * records 162 | */ 163 | 164 | typedef enum _ASTOFFSETTAG { ASTOFFSETLEFT, ASTOFFSETRIGHT } ASTOffsetTag; 165 | 166 | typedef struct _ASTOFFSET { 167 | int offset; 168 | ASTOffsetTag tag; 169 | struct { 170 | int line; 171 | int error; 172 | char *message; 173 | } left; 174 | AST *right; 175 | } ASTOffset; 176 | 177 | // external defs, written in carML 178 | extern bool self_tco_p(const char *, AST *); 179 | extern AST *shadow_ident(AST *); 180 | extern AST *make_set_bang(AST *, AST *); 181 | extern AST *shadow_params(AST *, AST *); 182 | extern char *shadow_name(char *); 183 | extern char *get_parameter_name(AST *, int); 184 | extern AST *get_parameter_ident(AST *, int); 185 | extern AST *get_parameter_type(AST *, int); 186 | extern AST *define_shadow_params(AST *, AST *); 187 | extern AST *rewrite_tco(AST *); 188 | 189 | #endif 190 | -------------------------------------------------------------------------------- /attic/hash_table.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @(#) a simple hash table for dictionaries, basically so I can implement environments in carML 3 | * @(#) uses FNV1a as a hash function, and just stores them linearly... 4 | * @(#) 5 | * @(#) the purpose of this and all the dictionary tests is to determine which of these is faster, 6 | * @(#) and at what point. If you think about it, most users will not actually store that much 7 | * @(#) in the environment frame, so there's little point in making it optimized for storing 8 | * @(#) huge amounts of data. Instead, it's generally easier to optimize this for storing small 9 | * @(#) amounts of data. I can never find it, but I recall reading somewhere that Clojure (used to?) 10 | * @(#) store dictionaries as two linear arrays, because for small dictionaries were more common and 11 | * @(#) that method was Good Enough(TM) for most purposes. 12 | * @(#) 13 | * @(#) we need to test a few different things: 14 | * @(#) 15 | * @(#) - average storage time 16 | * @(#) - average retrieval time 17 | * @(#) - worst-case storage time 18 | * @(#) - worst-case retrieval time 19 | * @(#) - are there collisions? 20 | * @(#) - performance of long names vs short names 21 | * @(#) - repeated stores 22 | * @(#) - repeated lookups 23 | * @(#) 24 | * @(#) and for each, we want to ignore the time taken to generate random names and 25 | * @(#) the like. We also want to keep track of how long it takes for short names vs 26 | * @(#) long names, and at what point does it no longer make sense. For example, at 27 | * @(#) what point does a linear table scan make sense for an evironment, and when 28 | * @(#) should it be upgraded to something meant to hold more data, like a trie? 29 | */ 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | struct _DICT { 36 | uint64_t keys[128]; 37 | int values[128]; 38 | }; 39 | 40 | typedef struct _DICT dict; 41 | 42 | uint64_t fnv1a(char *, uint32_t); 43 | 44 | /* 45 | * all of the dictionary objects should implement the following protocol: 46 | */ 47 | uint8_t store(dict *, char *, int); 48 | uint8_t exists(dict *, char *); 49 | uint8_t retrieve(dict *, char *, int *); 50 | uint8_t clear(dict *); 51 | 52 | /* 53 | * our test harness, also should be the same across versions... 54 | * @randname(buffer, length, uniform?) 55 | */ 56 | char *randname(char *, int, uint8_t); 57 | 58 | /* 59 | * this could (and probably should) be the same across all 60 | * implementations... 61 | */ 62 | int 63 | main(void) { 64 | dict foo; 65 | // should make this configurable, so that we can 66 | // march this upwards until it no longer makes sense... 67 | char *randnames[32] = {0}; 68 | char buf[128] = {0}; 69 | uint32_t idx = 0, val = 0, time_idx = 0; 70 | // store all run times, then calculate average cases 71 | suseconds_t runtimes[256], avg_s_store, avg_r_store; 72 | suseconds_t avg_s_retrieve, avg_r_retrieve; 73 | // hold all our various interstitial timing information 74 | struct timeval stime, etime; 75 | 76 | for(; idx < 32; idx++) { 77 | // first pass, generate uniform names 78 | randnames[idx] = strdup(randname(buf, 128, YES)); 79 | } 80 | 81 | /* 82 | * test storage... 83 | */ 84 | for(idx = 0; idx < 32; idx++) { 85 | // need to time these and record the timings... 86 | val = arc4random(); 87 | gettimeofday(&stime, nil); 88 | store(foo, randnames[idx], val); 89 | gettimeofday(&etime, nil); 90 | runtimes[time_idx] = etime.tv_usec - stime.tv_usec; 91 | time_idx++; 92 | printf("%s:%ld\n", randnames[idx], val); 93 | } 94 | 95 | /* 96 | * test retrieval... 97 | */ 98 | for(idx = 0; idx < 32; idx++) { 99 | // need to time these and record the timings... 100 | gettimeofday(&stime, nil); 101 | retrieve(foo, randnames[idx], &val); 102 | gettimeofday(&etime, nil); 103 | runtimes[time_idx] = etime.tv_usec - stime.tv_usec; 104 | time_idx++; 105 | printf("%s:%ld\n", randnames[idx], val); 106 | } 107 | 108 | /* 109 | * repeatedly look up a value... 110 | */ 111 | val = arc4random_uniform(32); 112 | for(idx = 0; idx < 256; idx++) { 113 | gettimeofday(&stime, nil); 114 | if(exists(foo, randnames[val])) { 115 | printf("yes, it exists...\n"); 116 | } else 117 | printf("no, it doesn't!\n"); 118 | } 119 | gettimeofday(&etime, nil); 120 | runtimes[time_idx] = etime.tv_usec - stime.tv_usec; 121 | time_idx++; 122 | } 123 | 124 | /* 125 | * repeatedly store a value... 126 | */ 127 | tidx = arc4random_uniform(32); 128 | for(idx = 0; idx < 256; idx++) { 129 | // using val here so that I can remove 130 | // arc4random from the timing loop... 131 | val = arc4random(); 132 | gettimeofday(&stime, nil); 133 | store(foo, randnames[tidx], val); 134 | gettimeofday(&etime, nil); 135 | runtimes[time_idx] = etime.tv_usec - stime.tv_usec; 136 | time_idx++; 137 | } 138 | 139 | // ok, now that we've done that, stress test the whole thing... 140 | clear(foo); 141 | 142 | for(idx = 0; idx < 32; idx++) { 143 | free(randnames[idx]); 144 | } 145 | 146 | /* 147 | * larger smoke test... 148 | */ 149 | 150 | for(; idx < 32; idx++) { 151 | // first pass, generate uniform names 152 | randnames[idx] = strdup(randname(buf, 128, NO)); 153 | } 154 | 155 | /* 156 | * test storage... 157 | */ 158 | for(idx = 0; idx < 32; idx++) { 159 | // need to time these and record the timings... 160 | val = arc4random(); 161 | store(foo, randnames[idx], val); 162 | printf("%s:%ld\n", randnames[idx], val); 163 | } 164 | 165 | /* 166 | * test retrieval... 167 | */ 168 | for(idx = 0; idx < 32; idx++) { 169 | // need to time these and record the timings... 170 | retrieve(foo, randnames[idx], &val); 171 | printf("%s:%ld\n", randnames[idx], val); 172 | } 173 | 174 | /* 175 | * repeatedly look up a value... 176 | */ 177 | val = arc4random_uniform(32); 178 | for(idx = 0; idx < 256; idx++) { 179 | if(exists(foo, randnames[val])) { 180 | printf("yes, it exists...\n"); 181 | } else 182 | printf("no, it doesn't!\n"); 183 | } 184 | } 185 | 186 | /* 187 | * repeatedly store a value... 188 | */ 189 | val = arc4random_uniform(32); 190 | for(idx = 0; idx < 256; idx++) { 191 | store(foo, randnames[val], arc4random()); 192 | } 193 | 194 | clear(dict); 195 | 196 | for(idx = 0; idx < 32; idx++) { 197 | free(randnames[idx]); 198 | } 199 | 200 | return 0; 201 | } 202 | 203 | uint64_t 204 | fnv1a(char *key, uint32_t len) 205 | { 206 | uint64_t hash = 14695981039346656037; 207 | uint32_t idx = 0; 208 | for(; idx < len; idx++) 209 | { 210 | hash ^= key[idx]; 211 | hash *= 1099511628211; 212 | } 213 | return hash; 214 | } 215 | 216 | char * 217 | randname(char *buf, size_t maxlength, uint8_t uniformp) { 218 | char *rc = nil; 219 | uint32_t base = 0, coin_flip = 0; 220 | size_t len = maxlength; 221 | 222 | if(buf != nil) { 223 | rc = buf; 224 | } else { 225 | rc = (char *)malloc(sizeof(char) * maxlength); 226 | if(!rc) { 227 | return nil; 228 | } 229 | } 230 | 231 | if(uniformp == NO) { 232 | len = arc4random_uniform(maxlength); 233 | } 234 | 235 | for(size_t idx = 0; idx < len; idx++) { 236 | base = arc4random_uniform(26); 237 | coin_flip = arc4random_uniform(1); 238 | if(coinflip == 0) { 239 | rc[idx] = 'a' + base; 240 | } else { 241 | rc[idx] = 'A' + base; 242 | } 243 | } 244 | 245 | return rc; 246 | } 247 | -------------------------------------------------------------------------------- /labs/let_rewrite.carml: -------------------------------------------------------------------------------- 1 | #@(#) a simple let/let-rec rewriter 2 | #@(#) rewrite let/let-rec ASTs to 3 | #@(#) simple val nodes 4 | #@(#) Sadly, this is all very imperative 5 | #@(#) at the moment, but we'll get there... 6 | 7 | record NameMap { 8 | len : int # length of names/vals 9 | offset : int # current offset into names/vals 10 | names : ref[array[string]] 11 | vals : ref[array[string]] 12 | } 13 | 14 | # specializing Option here 15 | # myself, because carML's 16 | # allocation strategy hasn't 17 | # *really* been decided yet... 18 | type OptionString { 19 | SomeString string 20 | NoString 21 | } 22 | 23 | def add_rewrite name:string new_name:string rewrites:ref[NameMap] => bool = { 24 | val len : int = (-> rewrites len) 25 | val off : int = (-> rewrites offset) 26 | 27 | when (< off len) do { 28 | set! (get (-> rewrites names) off) name 29 | set! (get (-> rewrites vals) off) new_name 30 | set! (-> ret off) $ + off 1 31 | return true 32 | } 33 | 34 | false 35 | } 36 | 37 | def init_rewrite => ref[NamedMap] = { 38 | var ret : ref[NamedMap] = (hmalloc $ sizeof NamedMap) 39 | val len : int = 64 40 | # again, this could be so much nicer if 41 | # we had decent constructors *or* a monadic 42 | # interface to construction... 43 | set! (-> ret len) len 44 | set! (-> ret offset) 0 45 | # would be interesting to have a "new" semantic for this stuff... 46 | # something like Rust, with it's Vec::new or VecDeque::new 47 | set! (-> ret names) $ hmalloc $ mul len $ sizeof array[string] 48 | set! (-> ret vals) $ hmalloc $ mul len $ sizeof array[string] 49 | ret 50 | } 51 | 52 | def fresh_name src:string => string = { 53 | val len : int = (+ 10 $ strlen src) 54 | var f : string = (hmalloc $ mul len $ sizeof char) 55 | snprintf f "%s%d" len src $ arc4random 56 | f 57 | } 58 | 59 | def find_name name:string rewrites:ref[NameMap] => ref[OptionString] = { 60 | var idx : int = 0 61 | 62 | while (< idx (-> rewrites len)) do { 63 | when (not $ strcmp (get (-> rewrites names) idx) name) do { 64 | return (OptionString_SomeString_ref $ get (-> rewrites vals) idx) 65 | } 66 | set! idx $ + idx 1 67 | } 68 | 69 | (OptionString_NoString_ref) 70 | } 71 | 72 | def rewrite_names head:ref[AST] rewrites:NameMap => ref[AST] = { 73 | val v : int = (-> head tag) 74 | var x : int = 0 75 | var idx : int = 0 76 | var name : ref[OptionString] = (SomeString_NoString_ref) 77 | var ret : ref[AST] = nil 78 | match v with 79 | TIDENT => { 80 | set! name $ find_name (-> head value) rewrites 81 | set! x $ -> name tag 82 | match name with 83 | (OptionString.SomeString x) => (make_ident $ -> name $ -> members $ -> SOMESTRING_t m_1) 84 | else => head 85 | end 86 | } 87 | else => { 88 | set! ret $ hmalloc $ sizeof AST 89 | set! (-> ret lenchildren) (-> head lenchildren) 90 | set! (-> ret tag) (-> head tag) 91 | set! (-> ret children) $ hmalloc $ mul (-> ret lenchildren) $ sizeof ref[AST] 92 | # this would be so much cleaner as a map form over 93 | # ASTs 94 | while (< idx $ -> head lenchildren) do { 95 | # need to copy each node in the AST to 96 | # the return list 97 | set! (get (-> ret children) idx) (rewrite_names (get (-> head children) idx) rewrites) 98 | # XXX a low-level `inc!` form would be nice... 99 | set! idx $ + idx 1 100 | } 101 | ret 102 | } 103 | end 104 | } 105 | 106 | def let_ident_name head:ref[AST] => ref[OptionString] = { 107 | val tag : int = (-> head tag) 108 | match tag with 109 | TLET => (OptionString_SomeString_ref $ get (-> head children) 0) #XXX: I hate how this reads... 110 | else => (OptionString_NoString_ref) 111 | end 112 | } 113 | 114 | def let_ident_value head:ref[AST] => ref[AST] = { 115 | val tag : int = (-> head tag) 116 | match tag with 117 | TLET => (get (-> head children) 1) #XXX: I hate how this reads... 118 | else => nil 119 | end 120 | } 121 | 122 | def make_val name:string value:ref[AST] => ref[AST] = { 123 | # this would be so much nicer with a monadic interface 124 | var ret : ref[AST] = (hmalloc $ sizeof AST) 125 | set! (-> ret tag) TVAL 126 | set! (-> ret lenchildren) 2 127 | set! (-> ret children) $ hmalloc $ mul 2 $ sizeof AST 128 | set! (get (-> ret children) 0) name 129 | set! (get (-> ret children) 1) value 130 | ret 131 | } 132 | 133 | def make_ident name:string => ref[AST] = { 134 | var ret : ref[AST] = (hmalloc $ sizeof AST) 135 | set! (-> ret tag) TIDENT 136 | set! (-> ret lenchildren) 0 137 | set! (-> ret value) name 138 | ret 139 | } 140 | 141 | def make_begin_pair binding:ref[AST] body:ref[AST] => ref[AST] = { 142 | var ret : ref[AST] = (hmalloc $ sizeof AST) 143 | val tag : int = (-> body tag) 144 | set! (-> ret tag) TBEGIN 145 | match tag with 146 | TBEGIN => { 147 | val len : int = (+ 1 $ -> body lenchildren) 148 | var idx : int = 0 149 | set! (-> ret lenchildren) len 150 | set! (-> ret children) $ hmalloc $ mul len $ sizeof AST 151 | # another situation where foreach would be 152 | # perfect for... 153 | while (< idx len) do { 154 | set! (get (-> ret children) idx) (get (-> body children) idx) 155 | set! idx $ + idx 1 156 | } 157 | } 158 | else => { 159 | set! (-> ret lenchildren) 2 160 | set! (-> ret children) $ hmalloc $ mul 2 $ sizeof AST 161 | set! (get (-> ret children) 0) binding 162 | set! (get (-> ret children) 1) body 163 | } 164 | end 165 | ret 166 | } 167 | 168 | def let_to_val head:ref[AST] rewrites:ref[NameMap] => ref[AST] = { 169 | # the code here is very imperative... getting some handle 170 | # on for/foreach/map/reduce/filter/take vis-a-vis Deques 171 | # and the like will be hugely important for fixing the 172 | # readability here 173 | var ret : ref[AST] = nil 174 | var new_binding : ref[AST] = nil 175 | var new_body : ref[AST] = nil 176 | var idx : int = 0 177 | var new_name : string = nil 178 | val tag : int = (-> head tag) 179 | match tag with 180 | # matching multiple cases with | would 181 | # be nice... 182 | # so would destructuring binds 183 | TLET => { 184 | set! new_name $ fresh_name $ let_ident_name head 185 | # we technically need to do rewrites for here as well.. 186 | set! new_binding $ new_val new_name $ let_ident_value head rewrites 187 | # we add this last because the name may shadow another name 188 | # elsewhere... 189 | add_rewrite (let_ident_name head) new_name rewrites 190 | set! new_body $ rewrite_names head rewrites 191 | make_begin_pair new_binding new_body 192 | } 193 | else => { 194 | # thinking that we could iterate over each 195 | # child and see if it is a TLET 196 | set! ret $ hmalloc $ sizeof AST 197 | set! (-> ret lenchildren) (-> head lenchildren) 198 | set! (-> ret tag) (-> head tag) 199 | set! (-> ret children) $ hmalloc $ mul (-> ret lenchildren) $ sizeof ref[AST] 200 | # this would be so much cleaner as a map form over 201 | # ASTs; need to look at this a bit more, but it seems to make sense... 202 | while (< idx $ -> head lenchildren) do { 203 | # need to copy each node in the AST to 204 | # the return list 205 | set! (get (-> ret children) idx) (let_to_val (get (-> head children) idx) rewrites) 206 | set! idx $ + idx 1 207 | } 208 | ret 209 | } 210 | end 211 | } 212 | 213 | # it would be interesting to have some nano-pass 214 | # signature for these sorts of things that made 215 | # these sorts of optimizations easy to handle 216 | # maybe something like this? 217 | def nanopass_let head:ref[AST] => ref[AST] = { 218 | var holder : ref[NameMap] = (init_rewrite) 219 | val tag : int = (-> head tag) 220 | let_to_val head holder 221 | } 222 | -------------------------------------------------------------------------------- /labs/woodchips.carml: -------------------------------------------------------------------------------- 1 | # @(#) a simple woodchips simulation 2 | # @(#) we have a world, and ants that have 3 | # @(#) simple rules: wander until you see a woodchip 4 | # @(#) - if you have a woodchip, put down your woodchip 5 | # @(#) - if you don't have a woodchip, pick that one up 6 | # 7 | # 8 | # This also raises that I need to *finally* do certain things 9 | # - foreach and array fusion 10 | # - figure out a memory model so that `ref` isn't required 11 | # (basically, allow the compiler to figure out when we want a ref, 12 | # unless we *say explicitly* that we want a ref) 13 | # - get make, make-array, make-string, &c. working 14 | # - Just generally make things more functional, less imperative 15 | # - figure out if I'm really going to keep types and records 16 | # (having both seems redundant, and we could have a smaller language...) 17 | # - VLAs need to add a $NAME_len param to lambdas 18 | # - need to fix `for` loop as well as `foreach` and co 19 | # - Fix `void` detection for adding a `return` in C 20 | # - Fix the ability of match & co to return a value 21 | # (currently this leads to a lot of code redundancy, 22 | # `set! foo $ match x with ...` 23 | # is so much cleaner than: 24 | # ``` 25 | # match x with 26 | # 0 +> set! foo... 27 | # ``` 28 | # ) 29 | 30 | record SimMatrix { 31 | x:int 32 | y:int 33 | length:int 34 | data:ref[char] 35 | } 36 | 37 | record Ant { 38 | x:int 39 | y:int 40 | carrying:bool 41 | direction:int # which "direction" (cardinal) is the ant facing? 42 | } 43 | 44 | def make_SimMatrix x:int y:int => ref[SimMatrix] = { 45 | var ret:ref[SimMatrix] = (hmalloc $ sizeof SimMatrix) 46 | val total:int = (* x y) 47 | set! ret->x x 48 | set! ret->y y 49 | set! ret->length total 50 | set! ret->data $ hmalloc $ * total $ sizeof char 51 | ret 52 | } 53 | 54 | def make_Ant max_x:int max_y:int => ref[Ant] = { 55 | var ret:ref[Ant] = (hmalloc $ sizeof Ant) 56 | var rndx:int = (arc4random_uniform max_x) 57 | var rndy:int = (arc4random_uniform max_y) 58 | set! ret->x rndx 59 | set! ret->y rndy 60 | set! ret->direction $ arc4random_uniform 4 61 | ret 62 | } 63 | 64 | # I *really* need to work on inlining... 65 | # helper methods like this could be easily inlined 66 | def get_SimMatrix x:int y:int mat:ref[SimMatrix] => char = { 67 | # XXX (lojikil): 68 | # this actually raises an interesting problem 69 | # when you say (-> mat x) do you mean the *NAME* x 70 | # or the scoped variable x? There almost has to be 71 | # distinction of forms that operate on variables and 72 | # those that operate on the values those variables 73 | # hold 74 | # XXX (lojikil): 75 | # when I was originally designing things, I was heading 76 | # towards a SRFI-9/SRFI-57/Haskell style system wherein 77 | # Records have named accessors, like types do. Then those 78 | # a rewritten to C. Right now, I'll use C-style accessors 79 | # similar to OCaml, but I do like the named accessor style 80 | # better. Why? Because it can be treated like a function 81 | # that is rewritten to a low-level accessor based on the 82 | # typing decisions made by the compiler. This also supports 83 | # making `type` and `record` close to equivalent, since there's 84 | # no *real* need to have a distinction there; a record could 85 | # just be a type with no other variants. 86 | 87 | var offset:int = (* x y) 88 | when (<= mat->length offset) do { 89 | set! offset $ - offset mat->length 90 | } 91 | get mat->data offset 92 | } 93 | 94 | def set_SimMatrix x:int y:int mat:ref[SimMatrix] value:char = { 95 | var offset:int = (* x y) 96 | when (<= mat->length offset) do { 97 | set! offset $ - offset mat->length 98 | } 99 | set! (get mat->data offset) value 100 | } 101 | 102 | def calc_offset x:int y:int mat:ref[SimMatrix] => int = { 103 | var offset:int = (* x y) 104 | when (<= mat->length offset) do (set! offset $ - offset mat->length) 105 | offset 106 | } 107 | 108 | # XXX (lojikil) I do need to fix detection of 109 | # `return` in functions that are void and such 110 | # also, make it so that you can call something 111 | # that technically returns (like printf) but ignore 112 | # that return. I dislike `let () = (printf ...)` 113 | # but maybe that isn't a terrible idea either... 114 | def print_SimMatrix mat:ref[SimMatrix] = { 115 | var idx:int = 0 116 | while (< idx mat->length) do { 117 | when (&& (eq? 0 $ % idx mat->x) (!= idx 0)) do { 118 | printf "\n" 119 | } 120 | printf "%d " $ get mat->data idx 121 | set! idx $ + idx 1 122 | } 123 | printf "\n" 124 | } 125 | 126 | def construct_board mat:ref[SimMatrix] = { 127 | # NOTE we don't want more than 30% of the board 128 | # to be woodchips 129 | val upperbound:int = (/ mat->length 3) 130 | val totalchipcount:int = (arc4random_uniform upperbound) 131 | var idx:int = 0 132 | var chipcount:int = 0 133 | printf "woodchip count: %d\n" totalchipcount 134 | while (< idx mat->length) do { 135 | when (&& (< chipcount totalchipcount) (>= 20 $ arc4random_uniform 100)) do { 136 | # if we haven't exceeded the 30% of the board being woodchips and our 137 | # guess exceeds some threshold, set a woodchip here 138 | set! (get mat->data idx) 2 139 | set! chipcount $ + chipcount 1 140 | } 141 | set! idx $ + idx 1 142 | } 143 | } 144 | 145 | def place_ant ant:ref[Ant] mat:ref[SimMatrix] = { 146 | var offset:int = (* ant->x ant->y) 147 | val total:int = mat->length 148 | var value:char = 0 149 | var res:char = 0 150 | when (> offset total) do { 151 | set! offset $ - offset total 152 | } 153 | set! value $ get mat->data offset 154 | match value with 155 | 0 => (set! res 1) 156 | 1 => (set! res 1) # can there be two ants? 157 | 2 => (set! res 4) 158 | 3 => (set! res 3) # can there be two ants? 159 | 4 => (set! res 4) 160 | end 161 | set! (get mat->data offset) res 162 | } 163 | 164 | # we calculate a value (either x or y) to move 165 | # an object (such as an ant) and support wrapping 166 | # the object back around to max/min 167 | def constrained_offset v:int max_v:int => int = { 168 | mod v max_v 169 | } 170 | 171 | def check_woodchip ant:ref[Ant] mat:ref[SimMatrix] = { 172 | var woodchip_loc:int = 0 173 | match ant->direction with 174 | 0 => (set! woodchip_loc $ mul ant->x $ mod (sub ant->y 1) mat->y) 175 | 1 => (set! woodchip_loc $ mul ant->x $ mod (add ant->y 1) mat->y) 176 | 2 => (set! woodchip_loc $ mul ant->y $ mod (add ant->x 1) mat->x) 177 | 3 => (set! woodchip_loc $ mul ant->y $ mod (sub ant->x 1) mat->x) 178 | end 179 | 180 | # - if we are carrying a woodchip, we need to place the current woodchip 181 | # down and then change to a new direct 182 | # - if we are *not* carrying a woodchip, pick this one up 183 | 184 | } 185 | 186 | def move_ant ant:ref[Ant] mat:ref[SimMatrix] = { 187 | match ant->direction with 188 | 0 => 0 189 | 1 => 1 190 | 2 => 2 191 | 3 => 3 192 | end 193 | } 194 | 195 | # TODO (lojikil): I need to get VLAs working as expeted, so that 196 | # we don't have to pass in an explicit length parameter. For now, 197 | # let's just simulate with *one* ant 198 | def run_simulation ant:ref[Ant] mat:ref[SimMatrix] rnds:int = { 199 | var cnt:int = 0 200 | var idx:int = 0 201 | while (< cnt rnds) do { 202 | check_woodchip ant mat 203 | move_ant ant mat 204 | set! cnt $ + cnt 1 205 | } 206 | } 207 | 208 | # we can encode state: 209 | # 0 - nothing here 210 | # 1 - an ant here 211 | # 2 - a woodchip 212 | # 3 - an ant carrying a woodchip 213 | # 4 - an ant *and* a woodchip (the and just set it down) 214 | # the tough part is, what direction is that ant heading? 215 | # the update process for each ant's movement could determine the 216 | # direction the ant is moving prior to "looking" for woodchips 217 | # ants can only pick up woodchips in the direction they can "see" 218 | # which is forwards 219 | # 220 | # eventually it would be fun to make this into a "turtles" library 221 | # so that folks can do some simple simulations in carML too, esp 222 | # since carML also uses my work on tatwyd (a PowerLogo-alike) 223 | 224 | def main ac:int al:ref[string] => int = { 225 | (GC_INIT) 226 | var mat:ref[SimMatrix] = (make_SimMatrix 10 10) 227 | var ant:ref[Ant] = (make_Ant 10 10) 228 | printf "x: %d\n" mat->x 229 | printf "y: %d\n" mat->y 230 | printf "length: %d\n" mat->length 231 | print_SimMatrix mat 232 | construct_board mat 233 | place_ant ant mat 234 | print_SimMatrix mat 235 | 0 236 | } 237 | -------------------------------------------------------------------------------- /src/self_tco.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @(#) self-Tail Call Optimization (TCO) code, generated from 3 | * @(#) _src/self_tco.c.carml_ 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | extern char *hstrdup(const char *); 14 | 15 | // NOTE: this code is generated from 16 | // src/self_tco.c.carml, so if we really need to change this, 17 | // we probably should fix that file first... 18 | bool 19 | self_tco_p(const char * name, AST * src){ 20 | bool res = false; 21 | int idx = 0; 22 | int tag = 0; 23 | if(src == NULL) { 24 | return false;; 25 | } else { 26 | tag = src->tag; 27 | } 28 | 29 | if(tag == TDEF) { 30 | return self_tco_p(name, src->children[1]); 31 | } else if(tag == TFN) { 32 | return self_tco_p(name, src->children[1]); 33 | } else if(tag == TCALL) { 34 | return !strcmp(name, src->children[0]->value); 35 | } else if(tag == TBEGIN) { 36 | idx = (src->lenchildren - 1); 37 | return self_tco_p(name, src->children[idx]); 38 | } else if(tag == TWHEN) { 39 | return self_tco_p(name, src->children[1]); 40 | } else if(tag == TIF) { 41 | return (self_tco_p(name, src->children[1])) || (self_tco_p(name, src->children[2])); 42 | } else if(tag == TMATCH) { 43 | idx = 1; 44 | while(idx < src->lenchildren){ 45 | if(self_tco_p(name, src->children[idx])) { 46 | return true;; 47 | } else { 48 | idx = (idx + 2); 49 | } 50 | 51 | } 52 | 53 | return false; 54 | } else { 55 | return false; 56 | } 57 | 58 | } 59 | 60 | AST * 61 | shadow_ident(AST * src){ 62 | AST * ret = hmalloc(sizeof(AST)); 63 | ret->tag = TIDENT; 64 | ret->lenchildren = 0; 65 | ret->value = shadow_name(src->value); 66 | return ret; 67 | } 68 | 69 | AST * 70 | make_set_bang(AST * ident, AST * value){ 71 | AST * ret = hmalloc(sizeof(AST)); 72 | AST * setident = hmalloc(sizeof(AST)); 73 | setident->lenchildren = 0; 74 | setident->tag = TIDENT; 75 | setident->value = hmalloc(5 * sizeof(char)); 76 | stpncpy(setident->value, "set!", 5); 77 | ret->tag = TCALL; 78 | ret->lenchildren = 3; 79 | ret->children = hmalloc(3 * sizeof(AST * )); 80 | ret->children[0] = setident; 81 | ret->children[1] = ident; 82 | ret->children[2] = value; 83 | return ret; 84 | } 85 | 86 | AST * 87 | shadow_params(AST * src, AST * impl){ 88 | AST * ret = hmalloc(sizeof(AST)); 89 | const int clen = (src->lenchildren - 1) * 2; 90 | const int ilen = impl->lenchildren; 91 | int idx = 0; 92 | int sidx = 1; 93 | int base = 0; 94 | AST * shadow = nil; 95 | AST * param = nil; 96 | AST * result = nil; 97 | ret->tag = TBEGIN; 98 | ret->lenchildren = clen; 99 | ret->children = hmalloc(clen * sizeof(AST * )); 100 | while(idx < ilen){ 101 | param = get_parameter_ident(impl, idx); 102 | shadow = shadow_ident(param); 103 | result = src->children[sidx]; 104 | ret->children[idx] = make_set_bang(shadow, result); 105 | idx = (idx + 1); 106 | sidx = (sidx + 1); 107 | } 108 | 109 | base = idx; 110 | idx = 0; 111 | while(idx < ilen){ 112 | param = get_parameter_ident(impl, idx); 113 | shadow = shadow_ident(param); 114 | ret->children[idx + base] = make_set_bang(param, shadow); 115 | idx = (idx + 1); 116 | } 117 | 118 | return ret; 119 | } 120 | 121 | char * 122 | shadow_name(char * name){ 123 | char * ret = hmalloc((sizeof(char) * (3 + strlen(name)))); 124 | stpcpy(ret, name); 125 | strcat(ret, "_sh"); 126 | return ret; 127 | } 128 | 129 | char * 130 | get_parameter_name(AST * src, int idx){ 131 | const AST * ret = nil; 132 | if(idx < src->lenchildren) { 133 | ret = src->children[idx]; 134 | ret = ret->children[0]; 135 | return ret->value; 136 | } 137 | 138 | return ""; 139 | } 140 | 141 | AST * 142 | get_parameter_ident(AST * src, int idx){ 143 | const AST * ret = nil; 144 | if(idx < src->lenchildren) { 145 | ret = src->children[idx]; 146 | return ret->children[0]; 147 | } 148 | 149 | return nil; 150 | } 151 | 152 | AST * 153 | get_parameter_type(AST * src, int idx){ 154 | const AST * ret = nil; 155 | if(idx < src->lenchildren) { 156 | ret = src->children[idx]; 157 | return ret->children[1]; 158 | } 159 | 160 | return nil; 161 | } 162 | 163 | AST * 164 | define_shadow_params(AST * src, AST * body){ 165 | AST * ret = hmalloc(sizeof(AST)); 166 | AST * tmp = nil; 167 | AST * * vbuf = (AST * *)hmalloc(sizeof(AST * ) * 64); 168 | int idx = 0; 169 | int cidx = 0; 170 | int capacity = 64; 171 | int length = 0; 172 | ret->tag = TBEGIN; 173 | ret->lenchildren = src->lenchildren; 174 | ret->children = hmalloc(src->lenchildren * sizeof(AST * * )); 175 | while(idx < src->lenchildren){ 176 | tmp = hmalloc(sizeof(AST)); 177 | tmp->tag = TVAR; 178 | tmp->lenchildren = 2; 179 | tmp->value = shadow_name(get_parameter_name(src, idx)); 180 | tmp->children = hmalloc(2 * sizeof(AST * )); 181 | tmp->children[0] = get_parameter_ident(src, idx); 182 | tmp->children[1] = get_parameter_type(src, idx); 183 | vbuf[idx] = tmp; 184 | idx = (idx + 1); 185 | } 186 | 187 | length = idx; 188 | idx = 0; 189 | while(idx < body->lenchildren){ 190 | tmp = body->children[idx]; 191 | if((tmp->tag == TVAR) || (tmp->tag == TVAL)) { 192 | vbuf[length] = tmp; 193 | length = (length + 1); 194 | } 195 | 196 | idx = (idx + 1); 197 | } 198 | 199 | idx = 0; 200 | ret->lenchildren = (length + 1); 201 | ret->children = hmalloc((length + 1) * sizeof(AST * * )); 202 | while(idx < length){ 203 | ret->children[idx] = vbuf[idx]; 204 | idx = (idx + 1); 205 | } 206 | 207 | tmp = hmalloc(sizeof(AST)); 208 | tmp->tag = TWHILE; 209 | tmp->lenchildren = 2; 210 | tmp->children = hmalloc(2 * sizeof(AST * )); 211 | ret->children[length] = tmp; 212 | return ret; 213 | } 214 | 215 | AST * 216 | make_ident(char * src){ 217 | AST * ret = hmalloc(sizeof(AST)); 218 | ret->lenchildren = 0; 219 | ret->tag = TIDENT; 220 | ret->value = hstrdup(src); 221 | return ret; 222 | } 223 | 224 | AST * 225 | make_boolean(int original_value){ 226 | AST * ret = hmalloc(sizeof(AST)); 227 | ret->lenchildren = 0; 228 | ret->children = nil; 229 | if(original_value == 0) { 230 | ret->tag = TFALSE;; 231 | } else { 232 | ret->tag = TTRUE; 233 | } 234 | 235 | return ret; 236 | } 237 | 238 | bool 239 | simple_type_p(int src){ 240 | return (src == TIDENT) || (src == TSTRING) || (src == TINT) || (src == TFLOAT) || (src == TARRAYLITERAL) || (src == TCHAR) || (src == TBOOL) || (src == TTRUE) || (src == TFALSE) || (src == THEX) || (src == TOCT) || (src == TBIN); 241 | } 242 | 243 | AST * 244 | copy_body(AST * src, AST * self, uint8_t finalp){ 245 | AST * ret = hmalloc(sizeof(AST)); 246 | AST * tmp = NULL; 247 | AST * * buf = (AST * *)hmalloc(sizeof(AST * ) * 128); 248 | int sidx = 0; 249 | int bidx = 0; 250 | const int srccap = src->lenchildren; 251 | const int srctag = src->tag; 252 | ret->value = hstrdup(src->value); 253 | ret->tag = src->tag; 254 | if(srctag == TCALL) { 255 | if(finalp) { 256 | return shadow_params(src, self); 257 | } else { 258 | while(bidx < srccap){ 259 | buf[bidx] = copy_body(src->children[sidx], self, false); 260 | bidx = (bidx + 1); 261 | sidx = (sidx + 1); 262 | } 263 | } 264 | } else if((simple_type_p(srctag))) { 265 | if(finalp) { 266 | ret->tag = TCALL; 267 | ret->lenchildren = 2; 268 | ret->children = hmalloc(2 * sizeof(AST * )); 269 | buf[0] = make_ident("return"); 270 | buf[1] = make_ident(src->value); 271 | bidx = 2; 272 | } else { 273 | buf[bidx] = src; 274 | bidx = (bidx + 1); 275 | sidx = (sidx + 1); 276 | } 277 | } else if(srctag == TMATCH) { 278 | 1; 279 | } else if(srctag == TIF) { 280 | buf[0] = copy_body(src->children[0], self, false); 281 | buf[1] = copy_body(src->children[1], self, finalp); 282 | buf[2] = copy_body(src->children[2], self, finalp); 283 | bidx = 3; 284 | } else if(srctag == TWHEN) { 285 | ret->tag = TIF; 286 | ret->lenchildren = 3; 287 | ret->children = hmalloc(2 * sizeof(AST * )); 288 | buf[0] = copy_body(src->children[0], self, false); 289 | buf[1] = copy_body(src->children[1], self, finalp); 290 | tmp = hmalloc(sizeof(AST)); 291 | tmp->tag = TCALL; 292 | tmp->lenchildren = 1; 293 | tmp->children = hmalloc(sizeof(AST * )); 294 | tmp->children[0] = make_ident("return"); 295 | buf[2] = tmp; 296 | bidx = 3; 297 | } else if(srctag == TBEGIN) { 298 | while(bidx < srccap){ 299 | if((finalp) && (bidx == (srccap - 1))) { 300 | buf[bidx] = copy_body(src->children[sidx], self, finalp); 301 | } else { 302 | buf[bidx] = copy_body(src->children[sidx], self, false); 303 | } 304 | 305 | bidx = (bidx + 1); 306 | sidx = (sidx + 1); 307 | } 308 | } else { 309 | while(bidx < srccap){ 310 | buf[bidx] = copy_body(src->children[sidx], self, false); 311 | bidx = (bidx + 1); 312 | sidx = (sidx + 1); 313 | } 314 | } 315 | 316 | sidx = 0; 317 | ret->lenchildren = bidx; 318 | ret->children = hmalloc(bidx * sizeof(AST * )); 319 | while(sidx < bidx){ 320 | ret->children[sidx] = buf[sidx]; 321 | sidx = (sidx + 1); 322 | } 323 | 324 | return ret; 325 | } 326 | 327 | AST * 328 | rewrite_tco(AST * src){ 329 | const AST * params = define_shadow_params(src->children[0], src->children[1]); 330 | AST * ret = hmalloc(sizeof(AST)); 331 | AST * body = params->children[params->lenchildren - 1]; 332 | ret->tag = TDEF; 333 | ret->lenchildren = 3; 334 | ret->children = hmalloc(3 * sizeof(AST * * )); 335 | ret->value = src->value; 336 | ret->children[0] = src->children[0]; 337 | ret->children[2] = src->children[2]; 338 | ret->children[1] = params; 339 | body->children[0] = make_boolean(1); 340 | body->children[1] = copy_body(src->children[1], src->children[0], true); 341 | return ret; 342 | } 343 | -------------------------------------------------------------------------------- /labs/mini-walk.carml: -------------------------------------------------------------------------------- 1 | #@(#) a simple walker, testing out our SExpression reader 2 | #@(#) me: I'm tired of building a Lisp/Scheme interpreter, I want to build an ML 3 | #@(#) also me: [build meta circular interpreter for ML IR that uses SExprs] 4 | 5 | # just use alists for the env 6 | def muwalk src:SExpression env:SExpression => SExpression = { 7 | 8 | } 9 | 10 | # the more I write in here the more I realize that I really just need 11 | # to bite the bullet and add the simple stdlib that can be erased when 12 | # compiling down to the source language... 13 | # 14 | # here, accessors are definitely the thing that I want to have, for sure 15 | # 16 | # also, generally herein I've been writing more like I've TCO'd things 17 | # by hand, but it'll be nice to move towards normal accessor functions 18 | # 19 | # additionally, because I'm hand unrolling a lot of these rather than 20 | # relying on the TCO to do so, we end up with very imperative looking 21 | # lambdas. There's no real reason for this other than I know what's 22 | # going on under the covers, and so can ignore a lot of the issues 23 | # this can rase, but I really want to move away from this. Here too, 24 | # having named & default parameters would help; for example, the 25 | # `while` form below could be replaced just with a simple tail call 26 | # with increasing parameters. 27 | # 28 | # this also probably should be part of mini-sexpr, and not the 29 | # walker, but it's not terrible here either 30 | def sexpression_eqp s0:SExpression s1:SExpression => bool = { 31 | var idx:int = 0 32 | var llen:int = 0 33 | 34 | # NOTE these are all wrong; I ended up decomposing them 35 | # correctly in mini-interp, but I need that sort of introspection 36 | # in carML as well; generally, other languages force you to 37 | # match on it, and I wonder if I'm just overthinking the 38 | # shortcut case here. In Go, I ended up doing: 39 | # 40 | # [source,go] 41 | # ---- 42 | # if reflect.TypeOf(s0) != reflect.TypeOf(s1) { 43 | # return false 44 | # } 45 | # ---- 46 | # 47 | # which isn't _terrible_, but it would be nice if we could 48 | # do that in a simpler way. I really do think I may be overthinking 49 | # the issue here 50 | 51 | when (<> (of-constructor s0) (of-constructor s1)) do { 52 | # Let's use that here, and note that it isn't defined 53 | # what the result might be, so it's only useful for comparing 54 | # constructors... probably need a `constructor-of-int` and 55 | # `constructor-of-string` to support the various variations here 56 | return false 57 | } 58 | 59 | # Also, I need to use these types better 60 | # when deconstructing an ADT<=>Interface; for example, 61 | # in mini-interp, there are a LOT of places I had to add 62 | # a type cast, because the compiler isn't smart enough to 63 | # use those 64 | # 65 | # Interestingly, Haskell as a module, `Data.Data`, which 66 | # exposes a `toConstr` which can be used for that: 67 | # 68 | # `toConstr (BoolValue True) == toConstr (BoolValue False)` 69 | # 70 | # https://hackage.haskell.org/package/base-4.6.0.1/docs/Data-Data.html 71 | # https://stackoverflow.com/questions/10112733/haskell-simple-constructor-comparison-function 72 | match s0 with 73 | (SExpression.Nil) => true 74 | (SExpression.EndFile) => true 75 | (SExpression.EndList) => true 76 | (SExpression.EndArray) => true 77 | (SExpression.Atom _ _) => (eq? (. s0 m_1) (. s1 m_1)) 78 | (SExpression.Int _ _) => (eq? (. s0 m_1) (. s1 m_1)) 79 | (SExpression.Float _ _) => (eq? (. s0 m_1) (. s1 m_1)) 80 | (SExpression.Char _ _) => (eq? (. s0 m_1) (. s1 m_1)) 81 | (SExpression.Bool _ _) => (eq? (. s0 m_1) (. s1 m_1)) 82 | (SExpression.Error _ _) => (eq? (. s0 m_1) (. s1 m_1)) 83 | (SExpression.String _ _) => (eq? (. s0 m_1) (. s1 m_1)) 84 | (SExpression.List _ _) => { 85 | set! llen $ length $ . s0 m_1 86 | while (< idx llen) do { 87 | if (sexpression_eqp (get (. s0 m_1) idx) (get (. s1 m_1) idx)) then { 88 | set! idx $ + idx 1 89 | } else { 90 | return false 91 | } 92 | } 93 | return true 94 | } 95 | end 96 | } 97 | 98 | # ('a, list(('a, 'b))) => 'b 99 | def assoc src:SExpression dst:SExpression => SExpression = { 100 | var idx:int = 0 101 | # same as the above really; we need to check that we 102 | # actually have an SExpression.List here before 103 | # operating, and then cast it 104 | var llen:int = (length (. dst m_1)) 105 | while (< idx llen) do { 106 | when (sexpression_eqp (first (get (. dst m_1) idx)) src) do { 107 | return $ first $ rest $ get (. dst m_1) idx 108 | } 109 | set! idx $ + idx 1 110 | } 111 | # delineating Nil from Null is interesting... 112 | make-struct SExpression.Nil 113 | } 114 | 115 | # ('a, list(('a, 'b))) => bool 116 | def mem_assoc needle:SExpression src:SExpression => bool = { 117 | var idx:int = 0 118 | var llen:int = (length (. dst m_1)) 119 | while (< idx llen) do { 120 | when (sexpression_eqp (first (get (. dst m_1) idx)) src) do { 121 | return true 122 | } 123 | set! idx $ + idx 1 124 | } 125 | false 126 | } 127 | 128 | # ('a, list('a)) => list('a) 129 | def cons hd:SExpression dst:SExpression => SExpression = { 130 | 131 | } 132 | 133 | # list('a) => 'a 134 | def first hd:SExpression => SExpression = { 135 | match hd with 136 | (SExpression.List _ _) => { 137 | get (. hd m_1) 0 138 | } 139 | else => (make-struct SExpression.Error "first can only be applied to lists" 0) 140 | end 141 | } 142 | 143 | # list('a) => list('a) 144 | def rest l:SExpression => SExpression = { 145 | # having array/list primitives here would be great... 146 | # that's one thing I need to implement right away, is the basic 147 | # array/deque/string primitives 148 | var res:array[SExpression] = (make-array SExpression (- (length (. l m_1)) 1) SExpression.Null) 149 | var idx:int = 1 150 | var llen:int = (length (. l m_1)) 151 | while (< idx llen) do { 152 | set! (get res (- idx 1)) (get (. l m_1) idx) 153 | set! idx $ + idx 1 154 | } 155 | make-struct SExpression.List res 0 156 | } 157 | 158 | # (('a => 'a), list('a), list(('a, 'b))) => list('a) 159 | def map f:function[SExpression SExpression] l:SExpression e:SExpression => SExpression = { 160 | 161 | } 162 | 163 | def mapeval l:SExpression e:SExpression => SExpression = { 164 | var res:array[SExpression] = (make-array SExpression (length (. l m_1)) SExpression.Null) 165 | var idx:int = 0 166 | while (<> idx $ length $ . l m_1) do { 167 | set! (get res idx) (mueval (get l idx) e) 168 | set! idx $ + idx 1 169 | } 170 | make-struct SExpression.List res 0 171 | } 172 | 173 | def sexpression_boxint x:int => SExpression = { 174 | (make-struct SExpression.Int x 0) 175 | } 176 | 177 | def sexpression_boxbool b:bool => SExpression = { 178 | (make-struct SExpression.Bool b 0) 179 | } 180 | 181 | def mueval src:SExpression env:SExpression => SExpression = { 182 | match src with 183 | # NOTE (lojikil) 184 | # one of the first things I need to fix 185 | # in any new compiler going forward is 186 | # actualy binding things... and being able to match 187 | # nicely. Look at the pattern below: 188 | # 189 | # `(SExpression.Atom _ _) given (eq? (. m_1 hd) "define")` 190 | # 191 | # I need to fix things such that we can easily say: 192 | # 193 | # `SExpression.Atom "define" _) => ...` 194 | # 195 | # for one, it's much clearer what we intend, and for the other 196 | # it's more correct 197 | # 198 | 199 | (SExpression.List _ _) => { 200 | # here, we must unpack what the first member is, and 201 | # if it is something we can eval, we do so, otherwise 202 | # we just apply it 203 | var hd:SExpression = (get (. src m_1) 0) 204 | match hd with 205 | # I had confused the direction of `.` with the accessor methods 206 | # I wish carML already supported accessors... 207 | # Additionally, I ended up doing an inner match on the head 208 | # value in Go, because it required fewer casts, ala: 209 | # 210 | # [source] 211 | # ---- 212 | # match hd with 213 | # (SExpression_Atom _ _) => match (. hd m_1) with 214 | # "define" => ... 215 | # end 216 | # end 217 | # ---- 218 | # 219 | # because now there was just one type cast, `hd.(SExpression_Atom).m_1` 220 | # instead of many. Not really a big deal if we correctly generate code 221 | (SExpression.Atom _ _) given (eq? (. hd m_1) "define") => { 222 | # add the body with that atom to the env 223 | } 224 | (SExpression.Atom _ _) given (eq? (. hd m_1) "define-value") => { 225 | # add the body with that atom to the env 226 | } 227 | (SExpression.Atom _ _) given (eq? (. hd m_1) "define-mutable-value") => { 228 | # add the body with that atom to the env 229 | } 230 | (SExpression.Atom _ _) given (eq? (. hd m_1) "if") => { 231 | # eval the condition and either the THEN or ELSE clause 232 | } 233 | (SExpression.Atom _ _) given (eq? (. hd m_1) "call") => { 234 | muapply src env 235 | } 236 | # there are several things that will come in as a SExpression.List, but 237 | # aren't something we need to iterate over, like `(integer "10")` 238 | # we have two options here: 239 | # 240 | # . just punt this back to `muapply` to handle 241 | # . actually convert them to the correct SExpression type here 242 | # 243 | # TODO: decide on the above 244 | # could be a series of guard clauses like: 245 | # `(SExpression.Atom _ _) given (eq? (. m_1 hd) "integer") => {` 246 | # but for now let's just return the remaining portion 247 | else => { 248 | rest src 249 | } 250 | end 251 | } 252 | # TODO (lojikil): should these just be 253 | # a guard clause that detects simple types? 254 | # yes, yes they should, not even with a guard clause really 255 | else => src 256 | end 257 | } 258 | 259 | def muapply src:SExpression env:SExpression => SExpression = { 260 | # here we have basically two tasks: 261 | # 262 | # . iterate over the list that was passed to us, calling mueval for each 263 | # . looking up the hd of the list, and making sure it is something we can handle 264 | # 265 | # standard McCarthy-style really 266 | var lst:SExpression = (make-struct SExpression.Null) 267 | var hd:SExpression = (make-struct SExpression.Null) 268 | 269 | # so the nice thing about the language we're working with here is 270 | # that even tho we have first class functions and what not, they 271 | # are *always* behind a call or the like, so we don't need to *ever* 272 | # eval the head of the list, we can just: 273 | # 274 | # . check that we have a list 275 | # . map mueval over the body of the list 276 | # . return the function application, if we know it 277 | 278 | # ok, here we need to generate: 279 | # 280 | # . Golang: reflect.TypeOf(src).Name() == "SExpression_List" 281 | # . C: src.tag == SEXPRESSION_LIST 282 | # 283 | # basically the C version is equivalent to `of-constructor` 284 | # and the Golang version just adds `.Name()` 285 | when (<> (tag-of-constructor src) "SExpression_List") do { 286 | return src 287 | } 288 | 289 | # why `rest (rest src)`? because remember, our calls 290 | # come in like: `(call (identifier "+") (integer "10) (integer "20"))` 291 | # TODO: should make fused versions of these... 292 | set! lst $ mapeval (rest src) env 293 | set! hd $ first lst 294 | 295 | # let's make a calculator first 296 | # so... in the Go code I wrote, I ended up having 297 | # to do a LOT of type dispatch with the below, because 298 | # we can't just assume they are actually SExpression_Int, for example 299 | match hd with 300 | "+" => { 301 | (sexpression_boxint 302 | (+ 303 | (. (first lst) m_1) 304 | (. (first (rest lst)) m_1))) 305 | } 306 | "-" => { 307 | (sexpression_boxint 308 | (- 309 | (. (first lst) m_1) 310 | (. (first (rest lst)) m_1))) 311 | } 312 | "*" => { 313 | (sexpression_boxint 314 | (* 315 | (. (first lst) m_1) 316 | (. (first (rest lst)) m_1))) 317 | } 318 | "/" => { 319 | (sexpression_boxint 320 | (/ 321 | (. (first lst) m_1) 322 | (. (first (rest lst)) m_1))) 323 | } 324 | "<" => { 325 | (sexpression_boxbool 326 | (< 327 | (. (first lst) m_1) 328 | (. (first (rest lst)) m_1))) 329 | 330 | } 331 | "<=" => { 332 | (sexpression_boxbool 333 | (<= 334 | (. (first lst) m_1) 335 | (. (first (rest lst)) m_1))) 336 | 337 | } 338 | ">" => { 339 | (sexpression_boxbool 340 | (> 341 | (. (first lst) m_1) 342 | (. (first (rest lst)) m_1))) 343 | } 344 | ">=" => { 345 | (sexpression_boxbool 346 | (>= 347 | (. (first lst) m_1) 348 | (. (first (rest lst)) m_1))) 349 | } 350 | "<>" => { 351 | (sexpression_boxbool 352 | (<> 353 | (. (first lst) m_1) 354 | (. (first (rest lst)) m_1))) 355 | } 356 | end 357 | } 358 | -------------------------------------------------------------------------------- /docs/todo.md: -------------------------------------------------------------------------------- 1 | # The Current language TODOs 2 | 3 | A quick list of the current `TODO`s. 4 | 5 | ## Syntax 6 | 7 | 1. Named constructor vars `Foo m:int n:int` 8 | 1. destructuring bind (and matching the same ^^^) 9 | 1. Make rebinding for idents in `match` forms (`rewrite_match_bind.carml`) 10 | 1. Make sure that things like `y given (> y 10)` are rewritten properly as well 11 | 12 | ## Semantics 13 | 14 | 1. Row polymorphism with `row[member1, member2, member3]` 15 | 1. Tuples, and flattening them when used as a return type in Golang 16 | 1. RawDeque (see `labs/rawdeque.carml`) as a test backing for Deques 17 | 1. use RawDeque in specialized form to store a spaghetti stack of environment frames 18 | 1. use the environment frames to support typing (finally) 19 | 1. Figure out a decent backing for Rust-style deques (possibly implemented from records + arrays) 20 | 1. make `|>` work it would in OCaml/F# 21 | 1. add `.` ala Haskell for "compose"? `f . g` becomes `(f (g x))` 22 | 1. If we are adding those, may as well do full shunting yard and parse things nicely 23 | 1. integrate `$` and `|>` with `$()` 24 | 1. `deque` and memory model 25 | 1. investigate untagged unions (`int | float` as a type) _note_: I like this style: `union[int float]` 26 | 27 | ## Modules 28 | 29 | 1. Atop SML/OCaml style modules, look into Mythryl's `API` types 30 | 1. **WONT** OCaml/SML-style Modules, and their application to higher-kinded types 31 | 1. will actually use something more akin to F#'s modules 32 | 1. **DONE** Figure out why this is breaking parameter lists: 33 | 34 | ``` 35 | >>> module Foo { 36 | def bar x:int y:int => int = (+ x (+ y 10)) 37 | def baz x:int => int = (+ x 10) 38 | } 39 | (module Foo 40 | (begin 41 | (define bar (parameter-list ) 42 | (returns (type integer)) 43 | (call (identifier +) (identifier x) (call (identifier +) (identifier y) (integer 10)))) 44 | (define baz (parameter-list ) 45 | (returns (type integer)) 46 | (call (identifier +) (identifier x) (integer 10))))) 47 | ``` 48 | - it seems like any `def` form in a `begin` is breaking parameter lists? 49 | - ah, turning on C output, it looks like it's just broken for SExpression output... 50 | - same in Golang, it looks fine 51 | 52 | ## Compile passes 53 | 54 | 1. Need to replace manual `sed -e 's/\([a-z]\)\-\([a-z]\)/\1_\2/g'` and `sed -e 's/\?/_p/g'` with `cmung` 55 | 1. Figure out both type specialization and generics to compiled form 56 | 1. Fusion for `map`, `map!`, `foreach`, and so on. 57 | 1. Variable Length Array (VLA) style 58 | 1. in the base C compiler & carML self-hosting one, include a defined list of compiler errors with friendly data 59 | 1. add line numbers to errors 60 | 1. add nano-pass: a-normal form (ANF) 61 | 1. add nano-pass: lambda lifting 62 | 1. add nano-pass: ANF => SSA 63 | 1. add nano-pass: rewrite `let`/`letrec` => `val` + temporary binding 64 | 1. add nano-pass: constant folding 65 | 1. add nano-pass: select the correct reified constructor implementation 66 | 1. add nano-pass: demand-driven type inference 67 | 1. Investigate: method of determining effects, and how that could make ANF easier (lift once for two calls) 68 | 1. Tests, both for IR and C 69 | 1. JS, Java, C++ backends, but written in carML itself and using the SExpression output. 70 | 1. WebAssembly backend 71 | 72 | ## Environment 73 | 74 | 1. _TEST_: add more complex type tests 75 | 1. add test runner: parse SExpression output... 76 | 1. C version: convert `(foo [1 2 3 4])` to `let x:array[int] = [1 2 3 4] in (foo x)` 77 | 1. Runtime, which should be pretty minimal. Look at Zig here, it has a smaller runtime than C! 78 | 1. write an actual inclusion algorithm for the compiler to consume `use`d libraries 79 | 1. Friendlier REPL, with keyboard support, and a simple Logo-style `edit` command 80 | 1. Interpreter, either ghci style (compiles in another language) or actual interpreter 81 | 1. Symbolic execution engine atop the same (using that paper "From Definitional Interpreters to Symbolic Executors") 82 | 83 | ## completed items: 84 | 85 | 1. **DONE** Add sized ints/uints/floats (e.g. `uint8`) as types (what about `U8` or `U64`? that works...) 86 | 1. **DONE** change `declare` to match how parameters work `@foo: function[int => int]`, `@bar: int` 87 | 1. **DONE** Add Samurai/Ninja as a build system 88 | 1. **DONE** Fix `match` forms that have a call; memoize the call (but need types, sigh) 89 | 1. **DONE** fix `make-array` (plus the VLA style mentioned below); uses GC by default for now, will fix in self-hosted version 90 | 1. **DONE** Fix type parsing code to be much simpler. 91 | 1. **DONE** Fix (finally) the `match` form for variants/poly 92 | 1. **DONE** Add complex types to `val` 93 | 1. **DONE** Add complex types to records 94 | 1. **DONE** Add complex types to `let` 95 | 1. **DONE** Add `var` form 96 | 1. **DONE** type tags 97 | 1. **FIXED** _BUG_: `Foo Bar int` as type constructor is parsed as `(type-constructor Foo (complex-type Bar Int))` when it should be `(type-constructor Foo Bar Int)` (2 members) 98 | 1. **FIXED** _BUG_: look at how the `experiments/sexpr.carml` is being parsed, and note that several constructors are missing 99 | 1. **DONE** (test case for the above: `Foo Bar int`) 100 | 1. **FIXED** _BUG_: `def bar h:Url => Url` is parsed as `(parameter-list (ident h) (tag Url))` when it should be `(parameter-list (parameter-def h (complex-type (tag Url))))` 101 | 1. **FIXED** (test case for the above: `def foo f:Url => Url = f`) 102 | 1. **FIXED** _BUG_: complex return types 103 | 1. **DONE** parse HOFs in declarations 104 | 1. **DONE** Review switch to Scala-style `[]` for types. 105 | 1. **DONE** Parse `@`/`declare` forms 106 | 1. **WONTFIX in C version** Update `val`, `let`, records to use the new `declare` type parser 107 | 1. **WONTFIX** Make function definitions & `let` forms accept `begin` style function calls (i.e. avoid using `()`) 108 | 1. **DONE** `match`/`case` form, with guards. 109 | 1. **DONE**: Partial application syntax: `$()`, including `_` as filler 110 | 1. **WONTFIX** Hoare-logic (pre, post, invariants, &c.): Won't fix because moving towards refinement types 111 | 1. **WONTFIX** make `mung_single_type` for reading a single type, use for `@` forms 112 | 1. **DONE** Compile to C in the style of Enyalios 113 | 1. **DONE** an `extern` or `alien` form for easy FFI 114 | 1. **DONE** Finish the `use` form 115 | 1. **DONE** Parsing of variants 116 | 1. **DONE** Compilation of variants 117 | 1. **DONE** Parsing of polymorphic variants 118 | 1. **DONE** Compilation of polymorphic variants 119 | 1. **DONE** Make sure types are correct for types/polys 120 | 1. **DONE** Syntax updates: `record` 121 | 1. **WONTFIX** Syntax updates: `def` 122 | 1. **WONTFIX** Syntax updates: `match` 123 | 1. **WONTFIX** Syntax updates: `fn` 124 | 1. **WONTFIX in C version** _REVIEW_ make `let` & `var/var` treat items like a call form (as in `val r : int = sum x 10` instead of `val r : int = (sum x 10)`) 125 | 1. **WONTFIX in C version** make the types parsing code more modular; could easily extract that out into a function 126 | 1. **DONE** (not in the most elegant way, mind, but...) Finally fix the frakking lexer to not goof up internal states 127 | 1. **WONTFIX** Csharp-style record parameter unboxing: I think this makes the C interface weird, maybe if we go natively to machine code... 128 | 1. **DONE** Error handling: `Either` 129 | 1. **DONE** Option types 130 | 1. **WONTFIX in C version** Lexer-as-stream 131 | 1. **DONE** Fix type state transition, which fails for Tagged types (`Url`) 132 | 1. **DONE** Fix parsing of single line `begin`: `{sum x 10}` fails to parse properly 133 | 1. **DONE** Fix `float`, `bool`, and `char` parsing 134 | 1. **DONE** Fix edge case: complex type right before `=>` fails `def foo bar : Url => int = ...` fails 135 | 1. **DONE** fix complex type handling in `typespec2c` 136 | 1. **DONE** fix match with `type`s 137 | 1. **DONE** Investigate: currently there is a syntax ambiguity in `begin` forms: is `t` a unary function call, or an identifier? Fix: identifier 138 | 1. **DONE** Make `$` work like it does in Haskell 139 | 140 | # Begin-style function calls 141 | 142 | _NOTE_ the fifth point above is the following: 143 | 144 | currently, `def` and `let` forms must enclose function calls with `()`: 145 | 146 | def foo x = (sum x x) 147 | let x = 95 in (foo x) 148 | 149 | Whereas in `begin` blocks, they needn't be: 150 | 151 | def foo x = { 152 | sum x x 153 | } 154 | 155 | I thought about just requiring users to use `begin` forms, but that means simple functions cannot be 156 | succinctly written. things like `foo` above shouldn't require 3 lines of text. Think about simple 157 | helper functions: 158 | 159 | def f x = sum (mul x 5.3) 10 160 | map f someVector 161 | 162 | Additionally, I want the repl to be able to handle those sorts of calls as well: 163 | 164 | >>> println "foo" 165 | foo 166 | _ : Unit 167 | >>> def f x = sum x 10 # secretly returns void 168 | >>> f (f 10) 169 | _ : Int = 30 170 | 171 | **DONE, 16OCT2017** Also, this is broken: 172 | 173 | >>> def foo x = { sum x x } 174 | (define foo (parameter-list (identifier x)) 175 | (begin 176 | (identifier sum) 177 | (identifier x) 178 | (identifier x))) 179 | >>> def foo x = { 180 | sum x x 181 | } 182 | (define foo (parameter-list (identifier x)) 183 | (begin 184 | (call (identifier sum) (identifier x) (identifier x)))) 185 | 186 | **DONE, 13OCT2017** Furthermore, this actually introduces a syntax & semantic ambiguity: 187 | 188 | def foo x : int => int = { 189 | var t : int = 10 190 | set! t (sum t x) 191 | t 192 | } 193 | 194 | Technically, we want the _value_ of `t`, but the current compiler thinks 195 | that `t` is actually an unary function application. Need to chew on how 196 | to fix that... OCaml requires explicit `()` for unary, so I could do 197 | the same... not really sure I like that, but it is one solution. 198 | 199 | # Scala-style types 200 | 201 | It's pretty appealing to me to use Scala-style type declarations... this would mean `@` would be freed from 202 | declaring types to simply adding annotations like in Scala... Furthermore, it would make parsing a bit easier 203 | to boot. This would imply that: 204 | 205 | @declare foo Array of Int => Int 206 | def foo x = ... 207 | 208 | Would become: 209 | 210 | def foo x: Array[Int] = ... 211 | 212 | I also wonder if we can still use `[]` for `indexGet` because the type language & the term language need-not 213 | be 1 to 1... 214 | 215 | # Syntax Updates 216 | 217 | I've been thinking about certain syntax choices I made originally, and thinking about updating them: 218 | 219 | - **DONE** remove the `=` in `record`s: `record foo { x int; y int;}` 220 | - **WONTFIX** allow the use of begin blocks directly following parameter lists: `def foo x { println x; sum x x}` 221 | - **WONTFIX** allow the use of begin blocks directly following `fn` parameter lists: `fn x { println x; sum x x}` 222 | - **N/A** still allow `def foo x = { block stuff ... }` 223 | - **WONTFIX** match forms to just directly use begin blocks: `match x { Some y => ... ; None => ... }` 224 | 225 | # Pre-requisites, Post-requisites, Invariants 226 | 227 | I'd like to embed some notion of the usual Hoare-logic forms into the language as extensions or _refinements_ to 228 | the `declare` or `@` form. I was thinking something similar to: 229 | 230 | @foo x Num y Num -> Num 231 | @/requires foo >= x y 232 | @/returns foo + x y 233 | def foo x y = ... 234 | 235 | Not sure how I want the Hoare-logic to look; `@ /requires` vs `@/requires `... 236 | 237 | _Update_: I've been playing with F\* for a while now, and reading up about its 238 | [KreMLin](https://fstarlang.github.io/general/2016/09/30/introducing-kremlin.html) backend... the idea to have 239 | refinements & pre/post conditions as `{block}` is pretty appealing to me. 240 | 241 | # Csharp style record parameter unboxing 242 | 243 | So C# will actually unbox structs that are passed as args: 244 | 245 | foo(mystruct); 246 | 247 | will actually become 248 | 249 | foo(mystruct.member0, mystruct.member1, ...); 250 | 251 | iff the struct is not modified (iirc). 252 | 253 | # Error handling 254 | 255 | I don't really want to get too fancy with Error handling. I think it should be simple enough to just 256 | use `Either` heavily for all core functions. Like an `os.open` could theoretically look like: 257 | 258 | @os.open string int -> Either a b 259 | def os.open path mode { 260 | let res = (alien "open" path mode) in 261 | if (< res 0) then 262 | (Left (os.errno_lookup res)) 263 | else 264 | (Right res) 265 | } 266 | 267 | Yes, this would mean that the other side will always have to have some sort of `match` form to 268 | test what the result actually is, but that would mean that error handling should be relatively 269 | simple. I _like_ how Digamma implements SRFIs 23 & 34, but I don't know if I want to have that 270 | much "stuff" in carML. 271 | 272 | # Lexer-as-stream 273 | 274 | The idea here is that a simple lexer should consume _all_ tokens from input, regardless of their syntactic 275 | validity, prior to returning to the parser. The parser then can do the usual RDP, but instead of operating 276 | on the file buffer, the RDP then can operate on the _token_ buffer. Thus, the file stream is completely 277 | consumed (there are no dangling tokens the parser will consume once an error has occurred: the entire 278 | stream can be abandoned), and we can implement nicer parser stuffs. 279 | 280 | # **DONE** what in the world... 281 | 282 | Figured it out: parser has a case for 'c', but no productions, so it can't actually parse anything that 283 | starts with a 'c' 284 | 285 | >>> foo [1,2,3,4] 286 | (identifier foo) 287 | >>> (array-literal (integer 1) (integer 2) (integer 3) (integer 4)) 288 | >>> r 289 | (identifier r) 290 | >>> r [1 2 3 4] 291 | (identifier r) 292 | >>> (array-literal (integer 1) (integer 2) (integer 3) (integer 4)) 293 | >>> car [1 2 3 4] 294 | 295 | It never finished reading with the `car`... 296 | 297 | # Partial Application form (really, a specialization form) 298 | 299 | I'm thinking that `$()` will denote partial application. For example: 300 | 301 | def foo x y z = + x (* y z) 302 | 303 | So here, the function `foo` requires 3 parameters, but there may be cases wherein we 304 | do **not** wish to use all three, and this is where `$()` forms would come into play: 305 | 306 | let bar = $(foo 10 _ 12) in 307 | println (bar 11) 308 | 309 | Here, the "call" to `bar` is just rewritten to be `foo 10 11 12`. I've wondered how, or if, it 310 | should support memoizing parameters; for example, consider the following: 311 | 312 | def foo x y = + x y 313 | def baz x = * x 10 314 | let bar = $(foo _ (baz 11)) in 315 | println (bar 12) 316 | 317 | Should we compute and store the value `(baz 11)`, or should we rerun that calculation at each 318 | application of the specialization form? Both have implications, but I'm wondering which is the 319 | less surprising of the two? I guess the fact that we're going for an SRFI-26 style here is the 320 | real answer to capturing... 321 | 322 | ## Interaction with thrushing forms 323 | 324 | I **do** wish to support `|>` and the like. I think Haskell's `$` form has less appeal here, because 325 | I'm already avoiding as many parens as possible, but `|>` from the various ML dialects is interesting. 326 | 327 | (|> 328 | foo 329 | $(bar _ 11) 330 | baz 331 | $(blah _ 12)) 332 | 333 | It'll be interesting to have a form that doesn't introduce a lambda capture but _does_ allow for 334 | specialization... 335 | 336 | Thinking about `$` though, that might be interesting to have... 337 | 338 | println (int_to_string (sum 54 100)) 339 | # vs 340 | println $ int_to_string $ sum 54 100 341 | 342 | Could make `$` the only infix form... 343 | 344 | # Variable Length Arrays 345 | 346 | Arrays should be fixed size, deques should handle growth, so that also means anywhere that we have 347 | an array we can also define a `len` variable to capture that length. Calls to `length` on arrays or 348 | strings can be simply rewritten to that variable. 349 | 350 | ``` 351 | val foo : array[int] = [1,2,3,4,5] 352 | # compiler defines a `foo_len_$integer` variable... 353 | # ... 354 | # ... 355 | for x in (range 0 $ length foo) do ... 356 | # the `length foo` is rewritten to `foo_len_$integer` 357 | ``` 358 | 359 | Similarly, when we pass arrays into functions, we should capture the length as a parameter: 360 | 361 | ``` 362 | def foo bar:array[int] ... = ... 363 | # the compiler will add a `bar_len_$integer` parameter implicitly... 364 | ``` 365 | 366 | # Memory Model 367 | 368 | To add to the above, I've been thinking about the memory model of carML... a lot. The problem 369 | I have is that I want to be able to both: 370 | 371 | - allow normal users not to think about how memory works 372 | - allow power users to precisely control how memory works 373 | 374 | So I *think* what I need to do is: 375 | 376 | 1. add the antithesis of `ref`: `flat`, which requires stack allocation 377 | 1. pass items by reference by default (`const *`) 378 | 1. pass items by mutable reference when `ref` is in play (`ref[array[int]]`) 379 | 1. pass items by value when `flat` is in play (`flat[array[int]]`) 380 | 1. structs and the like then would _default_ to const refs unless the user specifies otherwise 381 | 382 | **note** I think we'll use `low` for this, because it's dissimilar to `float` 383 | 384 | ## Decomposition 385 | 386 | I was thinking about this today: 387 | 388 | ``` 389 | # ... 390 | var foo:array[int] = (make-array int 10 0) 391 | # should become: 392 | var foo:array[int] = (model-approriate-allocator int 10) 393 | val foo_len = 10 394 | (memset_s foo (sizeof int) 10 0) 395 | ``` 396 | 397 | Basically, a nano-pass can rewrite arrays to be proper captures of setting up the VLAs, len, setting default values, &c. 398 | -------------------------------------------------------------------------------- /labs/mini-sexpr.carml: -------------------------------------------------------------------------------- 1 | #@(#) A Miniture SExpression library for carML 2 | #@(#) mainly aimed at getting the carML compiler out of the C system 3 | #@(#) should be mainly compatible with the SExpression-based IR that 4 | #@(#) carML/C generates. 5 | #@(#) Meant to help even FURTHER accelerate the bootstrap process, 6 | #@(#) since it's smaller than a full SExpression library, and works 7 | #@(#) with the Golang output 8 | # 9 | # the shame is, given carML's current 10 | # type system, there are many work-arounds 11 | # required to make the below work: 12 | # - the type system can't tell ref[SExpression] can be flattened to SExpression 13 | # - some things should be automatically passed around as ref[Type] but you have to manually do that 14 | # - lots of ref[Any] could be turned into a `const`... 15 | # 16 | # probably lots of other hacks as I 17 | # go through it. Luckily, once I move 18 | # past where the C compiler is, I can 19 | # start to handle some of the debt I've 20 | # accrued. 21 | 22 | type SExpression { 23 | Nil 24 | Atom string int 25 | String string int 26 | # making these strings for now to make it easier to 27 | # create, since I haven't written standard functions 28 | # to convert string=>int/float yet. 29 | # eventually, those should be gadgets... 30 | # 31 | # additionally, including an `int` here for the 32 | # offset, since I haven't dont destructuring binds 33 | # yet... 34 | Int string int 35 | Float string int 36 | Char string int 37 | Bool bool int 38 | List array[SExpression] int 39 | Error string int 40 | # Maybe should have an internal type that 41 | # isn't exported, and an external type that 42 | # is, so as to remove the two constructors 43 | # below... 44 | Null 45 | EndList int 46 | EndArray int 47 | EndFile 48 | } 49 | 50 | record Token { 51 | lexeme:string 52 | lexeme_offset:int 53 | lexeme_type:int 54 | } 55 | 56 | # add a simple reader system below 57 | # should be able to construct & read 58 | # SExpression-based syntax 59 | # 60 | # What will this be used for? Why, 61 | # for bootstrapping a compiler in carML 62 | # itself really. 63 | 64 | def is_whitespace ch:char => bool = { 65 | match ch with 66 | ' ' => true 67 | '\t' => true 68 | '\n' => true 69 | '\v' => true 70 | '\r' => true 71 | '\b' => true 72 | else => false 73 | end 74 | } 75 | 76 | def take_while_white src:string start:int => int = { 77 | # take... on... me... 78 | # _softly_ take on me 79 | # take... me... on... 80 | # _softly_ take on me 81 | # I'll be... gone... 82 | # In a day or twoooooooo 83 | # _synths_ 84 | var idx:int = start 85 | var ch:char = (get src start) 86 | # I *hate* how low level this is... 87 | when (is_whitespace ch) do { 88 | while (is_whitespace ch) do { 89 | set! ch $ get src idx 90 | set! idx $ + idx 1 91 | } 92 | return $ - idx 1 93 | } 94 | idx 95 | } 96 | 97 | def take_until_break src:string start:int => int = { 98 | var idx:int = start 99 | var ch:char = ' ' 100 | if (> idx $ len src) then idx 101 | else { 102 | set! ch $ get src idx 103 | while (every (< idx $ len src) (is_symbolic ch)) do { 104 | set! idx $ + idx 1 105 | set! ch $ get src idx 106 | } 107 | idx 108 | } 109 | } 110 | 111 | def next src:string start_offset:int => Token = { 112 | # NOTE I had an idea about how to fix the offset 113 | # issue: instead of trying to read the string AND 114 | # process lexemes, what we need is to go old-School 115 | # Wirth style with our RDP: have a function, next, 116 | # which returns a Token struct with the next lexeme, 117 | # it's length, and type 118 | 119 | when (>= start_offset $ len src) do (return $ make-struct Token "" start_offset 98) 120 | 121 | val offset:int = (take_while_white src start_offset) 122 | val ch:char = (get src offset) 123 | var idx:int = offset 124 | var state:int = 0 125 | # yet again, I really need to make TCO a thing, since 126 | # this function would be so much more nicely expressed 127 | # as a simple tail recursive function... 128 | match ch with 129 | '(' => (return $ make-struct Token "(" offset 1) 130 | ')' => (return $ make-struct Token ")" offset 2) 131 | '[' => (return $ make-struct Token "[" offset 3) 132 | ']' => (return $ make-struct Token "]" offset 4) 133 | z given (is_numeric ch) => { 134 | while (< idx $ len src) do { 135 | match state with 136 | # integer 137 | 0 => match ch with 138 | '.' => (set! state 1) 139 | _ given (is_numeric ch) => (set! state 0) 140 | _ given (is_symbolic ch) => (set! state 2) 141 | _ given (not $ is_symbolic ch) => (return $ make-struct Token (string_slice src offset idx) offset 5) 142 | else => (set! state 0) 143 | end 144 | # float 145 | 1 => match ch with 146 | '.' => (set! state 2) 147 | _ given (is_numeric ch) => (set! state 1) 148 | _ given (is_symbolic ch) => (set! state 2) 149 | _ given (not $ is_symbolic ch) => (return $ make-struct Token (string_slice src offset idx) offset 6) 150 | else => (set! state 1) 151 | end 152 | # atom 153 | 2 => when (not $ is_symbolic ch) do (return $ make-struct Token (string_slice src offset idx) offset 7) 154 | end 155 | set! idx $ + idx 1 156 | when (< idx $ len src) do (set! ch $ get src idx) 157 | } 158 | } 159 | # atoms 160 | _ given (is_symbolic ch) => { 161 | while (every (< idx $ len src) (is_symbolic ch)) do { 162 | set! ch $ get src idx 163 | set! idx $ + idx 1 164 | } 165 | return $ make-struct Token (string_slice src offset $ - idx 1) offset 7 166 | } 167 | # strings 168 | '"' => { 169 | set! idx $ + idx 1 170 | if (>= idx $ len src) then 171 | (return $ make-struct Token "malformed string" offset 99) 172 | else 173 | (set! ch $ get src idx) 174 | while (every (!= ch '"') (< idx $ len src)) do { 175 | match ch with 176 | '\\' => (set! idx $ + idx 2) 177 | else => (set! idx $ + idx 1) 178 | end 179 | when (< idx $ len src) do (set! ch $ get src idx) 180 | } 181 | return $ make-struct Token (string_slice src offset $ + idx 1) offset 8 182 | } 183 | # characters 184 | '#' => { 185 | var tmp:string = "" 186 | var tidx:int = 0 187 | set! idx $ + idx 1 188 | when (>= (+ idx 1) $ len src) do (return $ make-struct Token "malformed character" offset 99) 189 | when (!= (get src idx) '\\') do (return $ make-struct Token "character must be followed by \\" offset 99) 190 | # NOTE what we need to do is read until we hit some non-symbolic, and then attempt to tell 191 | # if we have a named character or a single one... 192 | # for example, we might have: 193 | # 194 | # - #\n (the letter n) 195 | # - #\newline (the literal ASCII newline) 196 | # we want to support both, but also need to make sure that named characters are actual names we can 197 | # parse... 198 | # 199 | # XXX GAH I just realized tho that this means we *cannot* rely on the simple offset 200 | # and instead need to do some helper calculations here in the sizing... this could 201 | # get hairy... 202 | # 203 | # You'll notice below that the calculations for offset are odd. This is because 204 | # I'm actually calculating; length of character prefix (2) + length of named character, 205 | # but we subtract one so that the normal offset + len works. so, some examples: 206 | # - `#\n` the letter 'n', should be three, actually two 207 | # - `#\newline` the ASCII newline, '\n', should be nine, actually 8 208 | set! tidx $ take_until_break src $ + idx 1 209 | set! tmp $ string_slice src (+ idx 1) tidx 210 | match tmp with 211 | # if we only have *one* character, don't bother checking... 212 | _ given (eq? 1 $ len tmp) => (return $ make-struct Token tmp (+ offset 2) 9) 213 | # it would be *so* much more reasonable here to return a value into 214 | # a val and then use that to call `return` once... 215 | # 216 | # once the new compiler is working, I need to move to a correct 217 | # "everything is a value" system... 218 | "newline" => (return $ make-struct Token "\n" (+ offset 8) 9) 219 | "tab" => (return $ make-struct Token "\t" (+ offset 4) 9) 220 | "carriage" => (return $ make-struct Token "\r" (+ offset 9) 9) 221 | "bell" => (return $ make-struct Token "\a" (+ offset 5) 9) 222 | "vtab" => (return $ make-struct Token "\v" (+ offset 5) 9) 223 | "backspace" => (return $ make-struct Token "\b" (+ offset 10) 9) 224 | "backslash" => (return $ make-struct Token "\\" (+ offset 10) 9) 225 | else => (return $ make-struct Token "incorrect named character" offset 99) 226 | end 227 | } 228 | else => (return $ make-struct Token " " 1 99) 229 | end 230 | # XXX: AAAAAAAAAAH... so imperative 231 | match state with 232 | 0 => (return $ make-struct Token (string_slice src offset idx) offset 5) 233 | 1 => (return $ make-struct Token (string_slice src offset idx) offset 6) 234 | 2 => (return $ make-struct Token (string_slice src offset idx) offset 7) 235 | end 236 | return $ make-struct Token " " 1 99 237 | } 238 | 239 | def is_numeric ch:char => bool = { 240 | match ch with 241 | _ given (&& (>= ch '0') (<= ch '9')) => true 242 | '.' => true 243 | else => false 244 | end 245 | } 246 | 247 | def is_symbolic ch:char => bool = { 248 | match ch with 249 | '(' => false 250 | ')' => false 251 | '[' => false 252 | ']' => false 253 | '"' => false 254 | '\'' => false 255 | '#' => false 256 | _ given (is_whitespace ch) => false 257 | else => true 258 | end 259 | } 260 | 261 | def is_null_or_endp obj:SExpression => bool = { 262 | match obj with 263 | (SExpression.Null) => true 264 | (SExpression.EndList) => true 265 | (SExpression.EndArray) => true 266 | (SExpression.EndFile) => true 267 | else => false 268 | end 269 | } 270 | 271 | # this should ideally be replaced by a zero-cost abstraction 272 | # gadget that writes things like List.map! or the like to 273 | # simple Go/C functions, but for now, just like string-slice 274 | # above, write this in carML itself 275 | def shrink_array src:array[SExpression] cap:int length:int => array[SExpression] = { 276 | var ret : array[SExpression] = (make-array SExpression length $ SExpression.Nil) 277 | var idx : int = 0 278 | while (< idx length) do { 279 | set! (get ret idx) (get src idx) 280 | set! idx $ + idx 1 281 | } 282 | ret 283 | } 284 | 285 | # we really don't need much else beyond this part here 286 | # we could honestly do this with append=>array as well, 287 | # which would basically treat this as a stack, maybe by 288 | # backing with a dequeue? 289 | def read_list src:string start:int => SExpression = { 290 | # XXX for now, going to use an array, but I really thinkg 291 | # this would be an *ideal* location for deques... 292 | var ret:array[SExpression] = (make-array SExpression 128 $ SExpression.Nil) 293 | var ret_length:int = 128 294 | var tmp:SExpression = (sexpression_read src $ + start 1) 295 | var idx:int = 0 296 | var offset:int = start 297 | while (not $ is_null_or_endp tmp) do { 298 | 299 | when (>= idx ret_length) do { 300 | return $ make-struct SExpression.Error "read_list array length overrun" start 301 | } 302 | 303 | set! (get ret idx) tmp 304 | set! idx $ + idx 1 305 | # actually, this is still wrong... 306 | # because we're just returning the *offset* in most 307 | # of our setups, but in lists we cannot just run a 308 | # string length check. I think here we can probably 309 | # do a check for most items, but of lists we have to 310 | # set the offset to be the offset of the final 311 | # #\) 312 | match tmp with 313 | (SExpression.List _ _) => (set! offset $ . tmp m_2) 314 | # a limit to what we can do here; because we write 315 | # these as an interface{} and what not in Golang, we 316 | # actually need to check each of the types here. Luckily, 317 | # it's probably better because I actually wasn't checking 318 | # an error was returned! 319 | (SExpression.Atom _ _) => (set! offset $ + (. tmp m_2) $ len $ . tmp m_1) 320 | (SExpression.String _ _) => (set! offset $ + (. tmp m_2) $ len $ . tmp m_1) 321 | (SExpression.Int _ _) => (set! offset $ + (. tmp m_2) $ len $ . tmp m_1) 322 | (SExpression.Float _ _) => (set! offset $ + (. tmp m_2) $ len $ . tmp m_1) 323 | (SExpression.Bool _ _) => if (. tmp m_1) then { 324 | (set! offset $ + (. tmp m_2) 4) 325 | } else { 326 | (set! offset $ + (. tmp m_2) 5) 327 | } 328 | (SExpression.Char _ _) => (set! offset $ + (. tmp m_2) $ len $ . tmp m_1) 329 | (SExpression.EndList _) => (set! offset $ + (. tmp m_1) 1) 330 | else => (return tmp) 331 | end 332 | set! tmp $ sexpression_read src offset 333 | } 334 | make-struct SExpression.List (shrink_array ret 128 idx) $ + offset 1 335 | } 336 | 337 | def read_char src:string offset:int => SExpression = { 338 | val tok : Token = (next src offset) 339 | if (eq? (. tok lexeme_type) 9) then 340 | (make-struct SExpression.Char (. tok lexeme) (. tok lexeme_offset)) 341 | else 342 | (make-struct SExpression.Error "expected character" offset) 343 | } 344 | 345 | def read_atom src:string offset:int => SExpression = { 346 | val tok : Token = (next src offset) 347 | if (eq? (. tok lexeme_type) 7) then 348 | (make-struct SExpression.Atom (. tok lexeme) (. tok lexeme_offset)) 349 | else 350 | (make-struct SExpression.Error "expected atom" offset) 351 | } 352 | 353 | def read_string src:string offset:int => SExpression = { 354 | val tok : Token = (next src offset) 355 | if (eq? (. tok lexeme_type) 8) then 356 | (make-struct SExpression.String (. tok lexeme) (. tok lexeme_offset)) 357 | else 358 | (make-struct SExpression.Error "expected character" offset) 359 | } 360 | 361 | # NOTE (lojikil): 362 | # I was originally going to implement a bunch of things in the 363 | # core library, but then I realized I probably should actually 364 | # make a module and have things work there. So, for now, I'm 365 | # adding this helper here, and will work on making the rest 366 | # of the module system workable... 367 | def string_slice src:string start:int endpoint:int => string = { 368 | val len:int = (- endpoint start) 369 | var res:array[char] = (make-array char len) 370 | var idx:int = 0 371 | while (< idx len) do { 372 | set! (get res idx) $ get src $ + idx start 373 | set! idx $ + idx 1 374 | } 375 | make-string res 376 | } 377 | 378 | # it would be nice to enrich types here 379 | # say that this is an SExpression, but also 380 | # that the only types it will return are 381 | # from Int, Float, Rational, Complex 382 | # NOTE (lojikil) also, this would be a great place 383 | # for default values: offset & start could be set 384 | # freely... 385 | def read_number src:string offset:int => SExpression = { 386 | val tok:Token = (next src offset) 387 | match (. tok lexeme_type) with 388 | 5 => (make-struct SExpression.Int (. tok lexeme) (. tok lexeme_offset)) 389 | 6 => (make-struct SExpression.Float (. tok lexeme) (. tok lexeme_offset)) 390 | else => (make-struct SExpression.Error "wanted to read a number, but other type returned" offset) 391 | end 392 | } 393 | 394 | def sexpression_read src:string start:int => SExpression = { 395 | # this would be so much nicer as a tuple here 396 | # we need a few things tho: 397 | # 1. unboxing tuple[char int] (for example) in Golang returns 398 | # 1. destructuring bind (`var ch, idx : tuple[char int] = ...`) 399 | # 1. making a `make-tuple` and allowing `,` in CALL 400 | val tok:Token = (next src start) 401 | match (. tok lexeme_type) with 402 | 1 => (read_list src $ . tok lexeme_offset) 403 | 2 => (make-struct SExpression.EndList $ + (. tok lexeme_offset) 1) 404 | #'[' => (read_array fh) 405 | #']' => (make-struct SExpression.EndArray) 406 | 5 => (make-struct SExpression.Int (. tok lexeme) (. tok lexeme_offset)) 407 | 6 => (make-struct SExpression.Float (. tok lexeme) (. tok lexeme_offset)) 408 | 7 => (make-struct SExpression.Atom (. tok lexeme) (. tok lexeme_offset)) 409 | 8 => (make-struct SExpression.String (. tok lexeme) (. tok lexeme_offset)) 410 | 9 => (make-struct SExpression.Char (. tok lexeme) (. tok lexeme_offset)) 411 | 98 => (make-struct SExpression.EndFile) 412 | else => (make-struct SExpression.Error (. tok lexeme) (. tok lexeme_offset)) 413 | end 414 | } 415 | 416 | def sexpression2char src:string => string = { 417 | match (get src 0) with 418 | '\n' => "#\\newline" 419 | '\v' => "#\\vtab" 420 | '\r' => "#\\carriage" 421 | '\t' => "#\\tab" 422 | '\b' => "#\\backspace" 423 | '\a' => "#\\bell" 424 | '\0' => "#\\nul" 425 | else => (String.Concat "#\\" src) 426 | end 427 | } 428 | 429 | # need to mung names so this could be called the proper sexpression->string 430 | def sexpression2string src:SExpression => string = { 431 | # NOTE because I defunctionalized the routines below, we end up 432 | # with a bunch of variable detritus here; really, concat probably 433 | # should be it's own function, and then we just call that in a 434 | # loop, but instead I've rewritten all of the HOFs to be simple 435 | # loops below 436 | 437 | declare result:array[string] 438 | declare flattened:array[char] 439 | var idx:int = 0 440 | var cidx:int = 0 441 | var ridx:int = 0 442 | var totallen:int = 0 443 | match src with 444 | (SExpression.Int _ _) => (. src m_1) 445 | (SExpression.Float _ _) => (. src m_1) 446 | (SExpression.Atom _ _) => (. src m_1) 447 | (SExpression.String _ _) => { 448 | set! flattened $ make-array char $ + 2 $ len $ . src m_1 449 | set! idx 1 450 | set! cidx 1 451 | while (< cidx $ - (len $ . src m_1) 1) do { 452 | set! (get flattened cidx) $ get (. src m_1) idx 453 | set! idx $ + idx 1 454 | set! cidx $ + cidx 1 455 | } 456 | set! (get flattened 0) '"' 457 | set! (get flattened idx) '"' 458 | make-string flattened 459 | } 460 | (SExpression.Char _ _) => (sexpression2char $ . src m_1) 461 | (SExpression.List _ _) => { 462 | # break this down a little bit, into something that I know the 463 | # carML Golang compiler can actually deal with in a semi-efficient 464 | # way 465 | # now, we can defunctionalize this manually ourselves... 466 | # NOTE this is all very ugly because I haven't added the necessary 467 | # functions to the code base to actually support things like 468 | # array maps and such. This code is *wild* below, and here be 469 | # dragons five sure 470 | # var result:array[string] = (Array.Map sexpression2string (. src m_1)) 471 | set! result (make-array string (len $ . src m_1) "") 472 | while (< idx $ len result) do { 473 | set! (get result idx) $ sexpression2string (get (. src m_1) idx) 474 | # keep track of the total string length 475 | set! totallen $ + totallen $ len $ get result idx 476 | set! idx $ + idx 1 477 | } 478 | # we now add the number of spaces we need to add... 479 | # plus two spaces for "()" 480 | set! totallen $ + totallen $ + (len result) 2 481 | # var flattened:string = (String.join " " result) 482 | set! flattened $ make-array char totallen 483 | set! (get flattened 0) '(' 484 | set! idx 1 485 | # XXX it almost would have been simpler to 486 | # add really nice TCO elimination in the compiler 487 | # than this spaghetti code here, but this is a hack 488 | # to get us off the ground. 489 | while (< idx $ - (len flattened) 1) do { 490 | set! cidx 0 491 | while (< cidx $ len $ get result ridx) do { 492 | set! (get flattened idx) $ get (get result ridx) cidx 493 | set! idx $ + idx 1 494 | set! cidx $ + cidx 1 495 | } 496 | set! (get flattened idx) ' ' 497 | set! idx $ + idx 1 498 | set! ridx $ + ridx 1 499 | } 500 | set! (get flattened idx) ')' 501 | make-string flattened 502 | } 503 | end 504 | } 505 | -------------------------------------------------------------------------------- /labs/mini-interp.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | /* 4 | * this is the result of two files being smooshed together, plus some 5 | * custom Golang: 6 | * 7 | * . labs/mini-sexpression.carml, for reading in carML/C SExpression output 8 | * . labs/mini-walk.carml, for actually executing code 9 | * 10 | * Currently, it actually is just an interpreter for carML itself, but that 11 | * should change eventually 12 | */ 13 | 14 | import ( 15 | "fmt" 16 | "reflect" 17 | "strconv" 18 | ) 19 | 20 | type SExpression interface { 21 | isSExpression() 22 | } 23 | type SExpression_Nil struct { 24 | } 25 | func (SExpression_Nil) isSExpression() {} 26 | type SExpression_Atom struct { 27 | m_1 string 28 | m_2 int 29 | } 30 | func (SExpression_Atom) isSExpression() {} 31 | type SExpression_String struct { 32 | m_1 string 33 | m_2 int 34 | } 35 | func (SExpression_String) isSExpression() {} 36 | type SExpression_Int struct { 37 | m_1 string 38 | m_2 int 39 | } 40 | func (SExpression_Int) isSExpression() {} 41 | type SExpression_Float struct { 42 | m_1 string 43 | m_2 int 44 | } 45 | func (SExpression_Float) isSExpression() {} 46 | type SExpression_Char struct { 47 | m_1 string 48 | m_2 int 49 | } 50 | func (SExpression_Char) isSExpression() {} 51 | type SExpression_Bool struct { 52 | m_1 bool 53 | m_2 int 54 | } 55 | func (SExpression_Bool) isSExpression() {} 56 | type SExpression_List struct { 57 | m_1 []SExpression 58 | m_2 int 59 | } 60 | func (SExpression_List) isSExpression() {} 61 | type SExpression_Error struct { 62 | m_1 string 63 | m_2 int 64 | } 65 | func (SExpression_Error) isSExpression() {} 66 | type SExpression_Null struct { 67 | } 68 | func (SExpression_Null) isSExpression() {} 69 | type SExpression_EndList struct { 70 | m_1 int 71 | } 72 | func (SExpression_EndList) isSExpression() {} 73 | type SExpression_EndArray struct { 74 | m_1 int 75 | } 76 | func (SExpression_EndArray) isSExpression() {} 77 | type SExpression_EndFile struct { 78 | } 79 | func (SExpression_EndFile) isSExpression() {} 80 | 81 | type Token struct { 82 | lexeme string 83 | lexeme_offset int 84 | lexeme_type int 85 | } 86 | func is_whitespace(ch byte) bool { 87 | switch ch { 88 | case ' ': 89 | return true 90 | case '\t': 91 | return true 92 | case '\n': 93 | return true 94 | case '\v': 95 | return true 96 | case '\r': 97 | return true 98 | case '\b': 99 | return true 100 | default: 101 | return false 102 | } 103 | 104 | } 105 | func take_while_white(src string, start int) int { 106 | var idx int = start 107 | var ch byte = src[start] 108 | if is_whitespace(ch) { 109 | for is_whitespace(ch) { 110 | ch = src[idx] 111 | idx = (idx + 1) 112 | } 113 | 114 | return idx - 1 115 | } 116 | 117 | return idx 118 | } 119 | func take_until_break(src string, start int) int { 120 | var idx int = start 121 | var ch byte = ' ' 122 | if idx > len(src) { 123 | return idx 124 | } else { 125 | ch = src[idx] 126 | for (idx < len(src)) && (is_symbolic(ch)) { 127 | idx = (idx + 1) 128 | ch = src[idx] 129 | } 130 | 131 | return idx 132 | 133 | } 134 | 135 | } 136 | func next(src string, start_offset int) Token { 137 | if start_offset >= len(src) { 138 | return Token{ "", start_offset, 98} 139 | } 140 | 141 | offset := take_while_white(src, start_offset) 142 | ch := src[offset] 143 | var idx int = offset 144 | var state int = 0 145 | switch { 146 | case ch == '(': 147 | return Token{ "(", offset, 1} 148 | case ch == ')': 149 | return Token{ ")", offset, 2} 150 | case ch == '[': 151 | return Token{ "[", offset, 3} 152 | case ch == ']': 153 | return Token{ "]", offset, 4} 154 | case (is_numeric(ch)): 155 | for idx < len(src) { 156 | switch state { 157 | case 0: 158 | switch { 159 | case ch == '.': 160 | state = 1 161 | case (is_numeric(ch)): 162 | state = 0 163 | case (is_symbolic(ch)): 164 | state = 2 165 | case (!is_symbolic(ch)): 166 | return Token{ string_slice(src, offset, idx), offset, 5} 167 | default: 168 | state = 0 169 | } 170 | 171 | case 1: 172 | switch { 173 | case ch == '.': 174 | state = 2 175 | case (is_numeric(ch)): 176 | state = 1 177 | case (is_symbolic(ch)): 178 | state = 2 179 | case (!is_symbolic(ch)): 180 | return Token{ string_slice(src, offset, idx), offset, 6} 181 | default: 182 | state = 1 183 | } 184 | 185 | case 2: 186 | if !is_symbolic(ch) { 187 | return Token{ string_slice(src, offset, idx), offset, 7} 188 | } 189 | 190 | } 191 | 192 | idx = (idx + 1) 193 | if idx < len(src) { 194 | ch = src[idx] 195 | } 196 | 197 | } 198 | 199 | 200 | case (is_symbolic(ch)): 201 | for (idx < len(src)) && (is_symbolic(ch)) { 202 | ch = src[idx] 203 | idx = (idx + 1) 204 | } 205 | 206 | return Token{ string_slice(src, offset, idx - 1), offset, 7} 207 | 208 | case ch == '"': 209 | idx = (idx + 1) 210 | if idx >= len(src) { 211 | return Token{ "malformed string", offset, 99} 212 | } else { 213 | ch = src[idx] 214 | } 215 | 216 | for (ch != '"') && (idx < len(src)) { 217 | switch ch { 218 | case '\\': 219 | idx = (idx + 2) 220 | default: 221 | idx = (idx + 1) 222 | } 223 | 224 | if idx < len(src) { 225 | ch = src[idx] 226 | } 227 | 228 | } 229 | 230 | return Token{ string_slice(src, offset, idx + 1), offset, 8} 231 | 232 | case ch == '#': 233 | var tmp string = "" 234 | var tidx int = 0 235 | idx = (idx + 1) 236 | if (idx + 1) >= len(src) { 237 | return Token{ "malformed character", offset, 99} 238 | } 239 | 240 | if src[idx] != '\\' { 241 | return Token{ "character must be followed by \\", offset, 99} 242 | } 243 | 244 | tidx = take_until_break(src, idx + 1) 245 | tmp = string_slice(src, idx + 1, tidx) 246 | switch { 247 | case (1 == len(tmp)): 248 | return Token{ tmp, offset + 2, 9} 249 | case tmp == "newline": 250 | return Token{ "\n", offset + 8, 9} 251 | case tmp == "tab": 252 | return Token{ "\t", offset + 4, 9} 253 | case tmp == "carriage": 254 | return Token{ "\r", offset + 9, 9} 255 | case tmp == "bell": 256 | return Token{ "\a", offset + 5, 9} 257 | case tmp == "vtab": 258 | return Token{ "\v", offset + 5, 9} 259 | case tmp == "backspace": 260 | return Token{ "\b", offset + 10, 9} 261 | case tmp == "backslash": 262 | return Token{ "\\", offset + 10, 9} 263 | default: 264 | return Token{ "incorrect named character", offset, 99} 265 | } 266 | 267 | 268 | default: 269 | return Token{ " ", 1, 99} 270 | } 271 | 272 | switch state { 273 | case 0: 274 | return Token{ string_slice(src, offset, idx), offset, 5} 275 | case 1: 276 | return Token{ string_slice(src, offset, idx), offset, 6} 277 | case 2: 278 | return Token{ string_slice(src, offset, idx), offset, 7} 279 | } 280 | 281 | return Token{ " ", 1, 99} 282 | } 283 | func is_numeric(ch byte) bool { 284 | switch { 285 | case ((ch >= '0') && (ch <= '9')): 286 | return true 287 | case ch == '.': 288 | return true 289 | default: 290 | return false 291 | } 292 | 293 | } 294 | func is_symbolic(ch byte) bool { 295 | switch { 296 | case ch == '(': 297 | return false 298 | case ch == ')': 299 | return false 300 | case ch == '[': 301 | return false 302 | case ch == ']': 303 | return false 304 | case ch == '"': 305 | return false 306 | case ch == '\'': 307 | return false 308 | case ch == '#': 309 | return false 310 | case (is_whitespace(ch)): 311 | return false 312 | default: 313 | return true 314 | } 315 | 316 | } 317 | func is_null_or_endp(obj SExpression ) bool { 318 | switch obj.(type) { 319 | case SExpression_Null: 320 | return true 321 | case SExpression_EndList: 322 | return true 323 | case SExpression_EndArray: 324 | return true 325 | case SExpression_EndFile: 326 | return true 327 | default: 328 | return false 329 | } 330 | 331 | } 332 | func shrink_array(src []SExpression , cap int, length int) []SExpression { 333 | var ret []SExpression = make([]SExpression, length) 334 | var idx int = 0 335 | for idx < length { 336 | ret[idx] = src[idx] 337 | idx = (idx + 1) 338 | } 339 | 340 | return ret 341 | } 342 | func read_list(src string, start int) SExpression { 343 | var ret []SExpression = make([]SExpression, 128) 344 | var ret_length int = 128 345 | var tmp SExpression = sexpression_read(src, start + 1) 346 | var idx int = 0 347 | var offset int = start 348 | for !is_null_or_endp(tmp) { 349 | if idx >= ret_length { 350 | return SExpression_Error{ "read_list array length overrun", start} 351 | } 352 | 353 | ret[idx] = tmp 354 | idx = (idx + 1) 355 | switch tmp := tmp.(type) { 356 | case SExpression_List: 357 | offset = tmp.m_2 358 | case SExpression_Atom: 359 | offset = (tmp.m_2 + len(tmp.m_1)) 360 | case SExpression_String: 361 | offset = (tmp.m_2 + len(tmp.m_1)) 362 | case SExpression_Int: 363 | offset = (tmp.m_2 + len(tmp.m_1)) 364 | case SExpression_Float: 365 | offset = (tmp.m_2 + len(tmp.m_1)) 366 | case SExpression_Bool: 367 | if tmp.m_1 { 368 | offset = (tmp.m_2 + 4) 369 | } else { 370 | offset = (tmp.m_2 + 5) 371 | } 372 | case SExpression_Char: 373 | offset = (tmp.m_2 + len(tmp.m_1)) 374 | case SExpression_EndList: 375 | offset = (tmp.m_1 + 1) 376 | default: 377 | return tmp 378 | } 379 | 380 | tmp = sexpression_read(src, offset) 381 | } 382 | 383 | return SExpression_List{ shrink_array(ret, 128, idx), offset + 1} 384 | } 385 | func read_char(src string, offset int) SExpression { 386 | tok := next(src, offset) 387 | if tok.lexeme_type == 9 { 388 | return SExpression_Char{ tok.lexeme, tok.lexeme_offset} 389 | } else { 390 | return SExpression_Error{ "expected character", offset} 391 | } 392 | 393 | } 394 | func read_atom(src string, offset int) SExpression { 395 | tok := next(src, offset) 396 | if tok.lexeme_type == 7 { 397 | return SExpression_Atom{ tok.lexeme, tok.lexeme_offset} 398 | } else { 399 | return SExpression_Error{ "expected atom", offset} 400 | } 401 | 402 | } 403 | func read_string(src string, offset int) SExpression { 404 | tok := next(src, offset) 405 | if tok.lexeme_type == 8 { 406 | return SExpression_String{ tok.lexeme, tok.lexeme_offset} 407 | } else { 408 | return SExpression_Error{ "expected character", offset} 409 | } 410 | 411 | } 412 | func string_slice(src string, start int, endpoint int) string { 413 | len := endpoint - start 414 | var res []byte = make([]byte, len) 415 | var idx int = 0 416 | for idx < len { 417 | res[idx] = src[idx + start] 418 | idx = (idx + 1) 419 | } 420 | 421 | return string(res) 422 | } 423 | func read_number(src string, offset int) SExpression { 424 | tok := next(src, offset) 425 | l16807 := tok.lexeme_type 426 | switch l16807 { 427 | case 5: 428 | return SExpression_Int{ tok.lexeme, tok.lexeme_offset} 429 | case 6: 430 | return SExpression_Float{ tok.lexeme, tok.lexeme_offset} 431 | default: 432 | return SExpression_Error{ "wanted to read a number, but other type returned", offset} 433 | } 434 | 435 | } 436 | func sexpression_read(src string, start int) SExpression { 437 | tok := next(src, start) 438 | l282475249 := tok.lexeme_type 439 | switch l282475249 { 440 | case 1: 441 | return read_list(src, tok.lexeme_offset) 442 | case 2: 443 | return SExpression_EndList{ tok.lexeme_offset + 1} 444 | case 5: 445 | return SExpression_Int{ tok.lexeme, tok.lexeme_offset} 446 | case 6: 447 | return SExpression_Float{ tok.lexeme, tok.lexeme_offset} 448 | case 7: 449 | return SExpression_Atom{ tok.lexeme, tok.lexeme_offset} 450 | case 8: 451 | return SExpression_String{ tok.lexeme, tok.lexeme_offset} 452 | case 9: 453 | return SExpression_Char{ tok.lexeme, tok.lexeme_offset} 454 | case 98: 455 | return SExpression_EndFile{ } 456 | default: 457 | return SExpression_Error{ tok.lexeme, tok.lexeme_offset} 458 | } 459 | 460 | } 461 | func sexpression2char(src string) string { 462 | l1622650073 := src[0] 463 | switch l1622650073 { 464 | case '\n': 465 | return "#\\newline" 466 | case '\v': 467 | return "#\\vtab" 468 | case '\r': 469 | return "#\\carriage" 470 | case '\t': 471 | return "#\\tab" 472 | case '\b': 473 | return "#\\backspace" 474 | case '\a': 475 | return "#\\bell" 476 | case '\u0000': 477 | return "#\\nul" 478 | default: 479 | return "#\\" + src 480 | } 481 | 482 | } 483 | func sexpression2string(src SExpression ) string { 484 | var result []string 485 | var flattened []byte 486 | var idx int = 0 487 | var cidx int = 0 488 | var ridx int = 0 489 | var totallen int = 0 490 | switch src := src.(type) { 491 | case SExpression_Int: 492 | return src.m_1 493 | case SExpression_Float: 494 | return src.m_1 495 | case SExpression_Atom: 496 | return src.m_1 497 | case SExpression_String: 498 | flattened = (make([]byte, 2 + len(src.m_1))) 499 | idx = 1 500 | cidx = 1 501 | for cidx < ((len(src.m_1)) - 1) { 502 | flattened[cidx] = src.m_1[idx] 503 | idx = (idx + 1) 504 | cidx = (cidx + 1) 505 | } 506 | 507 | flattened[0] = '"' 508 | flattened[idx] = '"' 509 | return string(flattened) 510 | 511 | case SExpression_Char: 512 | return sexpression2char(src.m_1) 513 | case SExpression_List: 514 | result = (make([]string, len(src.m_1))) 515 | for idx < len(result) { 516 | result[idx] = sexpression2string(src.m_1[idx]) 517 | totallen = (totallen + len(result[idx])) 518 | idx = (idx + 1) 519 | } 520 | 521 | totallen = (totallen + ((len(result)) + 2)) 522 | flattened = (make([]byte, totallen)) 523 | flattened[0] = '(' 524 | idx = 1 525 | for idx < ((len(flattened)) - 1) { 526 | cidx = 0 527 | for cidx < len(result[ridx]) { 528 | flattened[idx] = result[ridx][cidx] 529 | idx = (idx + 1) 530 | cidx = (cidx + 1) 531 | } 532 | 533 | flattened[idx] = ' ' 534 | idx = (idx + 1) 535 | ridx = (ridx + 1) 536 | } 537 | 538 | flattened[idx] = ')' 539 | return string(flattened) 540 | 541 | } 542 | return string(flattened) 543 | 544 | } 545 | 546 | func roundtrip(src string) { 547 | fmt.Printf("src:\n%s\n", src) 548 | g := sexpression_read(src, 0) 549 | fmt.Printf("g: %V\n", g) 550 | h := read_list(src, 0) 551 | fmt.Printf("h: %V\n", h) 552 | f := sexpression2string(g) 553 | fmt.Printf("f: %v\n", f) 554 | d := sexpression2string(h) 555 | fmt.Printf("d: %v\n", d) 556 | } 557 | 558 | func muwalk(src SExpression , env SExpression ) SExpression { 559 | return SExpression_Nil {}; 560 | } 561 | func sexpression_eqp(s0 SExpression , s1 SExpression ) bool { 562 | var idx int = 0 563 | var llen int = 0 564 | if reflect.TypeOf(s0) != reflect.TypeOf(s1) { 565 | return false 566 | } 567 | 568 | switch s0 := s0.(type) { 569 | case SExpression_Nil: 570 | return true 571 | case SExpression_EndFile: 572 | return true 573 | case SExpression_EndList: 574 | return true 575 | case SExpression_EndArray: 576 | return true 577 | case SExpression_Atom: 578 | return s0.m_1 == s1.(SExpression_Atom).m_1 579 | case SExpression_Int: 580 | return s0.m_1 == s1.(SExpression_Int).m_1 581 | case SExpression_Float: 582 | return s0.m_1 == s1.(SExpression_Float).m_1 583 | case SExpression_Char: 584 | return s0.m_1 == s1.(SExpression_Char).m_1 585 | case SExpression_Bool: 586 | return s0.m_1 == s1.(SExpression_Bool).m_1 587 | case SExpression_Error: 588 | return s0.m_1 == s1.(SExpression_Error).m_1 589 | case SExpression_String: 590 | return s0.m_1 == s1.(SExpression_String).m_1 591 | case SExpression_List: 592 | llen = len(s0.m_1) 593 | for idx < llen { 594 | if sexpression_eqp(s0.m_1[idx], s1.(SExpression_List).m_1[idx]) { 595 | idx = (idx + 1) 596 | 597 | } else { 598 | return false 599 | 600 | } 601 | 602 | } 603 | 604 | return true 605 | 606 | } 607 | return false 608 | } 609 | func assoc(src SExpression , dst SExpression ) SExpression { 610 | var idx int = 0 611 | var llen int = len(dst.(SExpression_List).m_1) 612 | for idx < llen { 613 | if sexpression_eqp(first(dst.(SExpression_List).m_1[idx]), src) { 614 | return first(rest(dst.(SExpression_List).m_1[idx])) 615 | } 616 | 617 | idx = (idx + 1) 618 | } 619 | 620 | return SExpression_Nil{ } 621 | } 622 | func mem_assoc(needle SExpression , src SExpression ) SExpression { 623 | var idx int = 0 624 | var llen int = len(needle.(SExpression_List).m_1) 625 | for idx < llen { 626 | if sexpression_eqp(first(needle.(SExpression_List).m_1[idx]), src) { 627 | return sexpression_boxbool(true) 628 | } 629 | 630 | idx = (idx + 1) 631 | } 632 | 633 | return sexpression_boxbool(false) 634 | } 635 | func cons(hd SExpression , dst SExpression ) SExpression { 636 | return SExpression_Nil{ } 637 | } 638 | func first(hd SExpression ) SExpression { 639 | switch hd := hd.(type) { 640 | case SExpression_List: 641 | return hd.m_1[0] 642 | 643 | default: 644 | return SExpression_Error{ "first can only be applied to lists", 0} 645 | } 646 | 647 | } 648 | func rest(l SExpression ) SExpression { 649 | var res []SExpression = make([]SExpression, (len(l.(SExpression_List).m_1)) - 1) 650 | var idx int = 1 651 | var llen int = len(l.(SExpression_List).m_1) 652 | fmt.Printf("idx: %d, llen: %d\n", idx, llen) 653 | for idx < llen { 654 | fmt.Printf("idx: %d, llen: %d\n", idx, llen) 655 | res[idx - 1] = l.(SExpression_List).m_1[idx] 656 | fmt.Printf("res[%d] = %v\n", idx - 1, res[idx - 1]) 657 | idx = (idx + 1) 658 | } 659 | 660 | return SExpression_List{ res, 0} 661 | } 662 | 663 | func mapfn(f func (SExpression) SExpression, l SExpression , e SExpression ) SExpression { 664 | return SExpression_Nil { } 665 | } 666 | 667 | func mapeval(l SExpression , e SExpression ) SExpression { 668 | var res []SExpression = make([]SExpression, len(l.(SExpression_List).m_1)) 669 | var idx int = 0 670 | for idx != len(l.(SExpression_List).m_1) { 671 | res[idx] = mueval(l.(SExpression_List).m_1[idx], e) 672 | idx = (idx + 1) 673 | } 674 | 675 | return SExpression_List{ res, 0} 676 | } 677 | func sexpression_boxint(x int64) SExpression { 678 | return SExpression_Int{ strconv.FormatInt(x, 10), 0} 679 | } 680 | func sexpression_boxbool(b bool) SExpression { 681 | return SExpression_Bool{ b, 0} 682 | } 683 | func mueval(src SExpression , env SExpression ) SExpression { 684 | switch src := src.(type) { 685 | case SExpression_List: 686 | var hd SExpression = src.m_1[0] 687 | switch hd.(type) { 688 | case SExpression_Atom: 689 | switch hd.(SExpression_Atom).m_1 { 690 | case "define": 691 | 692 | case "define-value": 693 | 694 | case "define-mutable-value": 695 | 696 | case "if": 697 | 698 | case "call": 699 | tmp := muapply(src, env) 700 | fmt.Printf("tmp: %v\n", tmp) 701 | return tmp 702 | 703 | default: 704 | return first(rest(src)) 705 | } 706 | default: 707 | return src 708 | 709 | } 710 | 711 | 712 | default: 713 | return src 714 | } 715 | return src 716 | } 717 | func muapply(src SExpression , env SExpression ) SExpression { 718 | var lst SExpression = SExpression_Null{ } 719 | var hd SExpression = SExpression_Null{ } 720 | var tmp0 SExpression = SExpression_Null { } 721 | var tmp1 SExpression = SExpression_Null { } 722 | 723 | switch src.(type) { 724 | case SExpression_List: 725 | src = src.(SExpression_List) 726 | default: 727 | return src 728 | } 729 | 730 | lst = mapeval(rest(src), env) 731 | hd = first(lst) 732 | fmt.Printf("src: %v\n", src) 733 | fmt.Printf("lst: %v\n", lst) 734 | fmt.Printf("hd: %v\n", hd) 735 | fmt.Println("here on 735?") 736 | switch hd.(SExpression_String).m_1 { 737 | case "\"+\"": 738 | fmt.Println("here on 737") 739 | tmp0 = first(rest(lst)) 740 | tmp1 = first(rest(rest(lst))) 741 | fmt.Printf("tmp0: %T%v\n", tmp0, tmp0) 742 | fmt.Printf("tmp1: %T%v\n", tmp1, tmp1) 743 | switch tmp0.(type) { 744 | case SExpression_Int: 745 | fmt.Println("here on 741") 746 | switch tmp1.(type) { 747 | case SExpression_Int: 748 | fmt.Println("here on 743") 749 | n, err := strconv.ParseInt(tmp0.(SExpression_Int).m_1, 10, 64) 750 | if err != nil { 751 | return SExpression_Error{"integer parse error 0", 0} 752 | } 753 | m, err := strconv.ParseInt(tmp1.(SExpression_Int).m_1, 10, 64) 754 | if err != nil { 755 | return SExpression_Error{"integer parse error 1", 0} 756 | } 757 | fmt.Printf("here: %d\n", n + m) 758 | return sexpression_boxint(n + m) 759 | //case SExpression_Float: 760 | default: 761 | return SExpression_Error{"mismatched types 0", 0} 762 | } 763 | case SExpression_Float: 764 | return SExpression_Error{"mismatched types 1", 0} 765 | default: 766 | return SExpression_Error{"mismatched types 2", 0} 767 | } 768 | default: 769 | fmt.Printf("weird, but hd.m_1 is %v\n", hd.(SExpression_String).m_1) 770 | fmt.Printf("weird, but hd.m_1 is %v\n", hd.(SExpression_String).m_1 == "+") 771 | /* 772 | case "-": 773 | return sexpression_boxint(first(lst).m_1 - first(rest(lst)).m_1) 774 | 775 | case "*": 776 | return sexpression_boxint(first(lst).m_1 * first(rest(lst)).m_1) 777 | 778 | case "/": 779 | return sexpression_boxint(first(lst).m_1 / first(rest(lst)).m_1) 780 | case "<": 781 | return sexpression_boxbool(first(lst).m_1 < first(rest(lst)).m_1) 782 | 783 | case "<=": 784 | return sexpression_boxbool(first(lst).m_1 <= first(rest(lst)).m_1) 785 | 786 | case ">": 787 | return sexpression_boxbool(first(lst).m_1 > first(rest(lst)).m_1) 788 | 789 | case ">=": 790 | return sexpression_boxbool(first(lst).m_1 >= first(rest(lst)).m_1) 791 | 792 | case "<>": 793 | return sexpression_boxbool(first(lst).m_1 != first(rest(lst)).m_1) 794 | */ 795 | } 796 | return SExpression_Null { } 797 | } 798 | 799 | func main() { 800 | src := `(define main 801 | 802 | (returns (type integer)) 803 | (begin 804 | (call (identifier printf) (string "this is a test\n")) 805 | (call (identifier printf) (string "test test test\n")) 806 | (integer 0)))` 807 | src1 := `(define main (returns (type integer)) (begin (integer 0)))` 808 | src2 := `(call "+" (integer 10) (call "+" (integer 20) (integer 3)))` 809 | roundtrip(src) 810 | roundtrip(src1) 811 | roundtrip(src2) 812 | v := sexpression_read(src2, 0) 813 | fmt.Printf("%v\n", mueval(v, SExpression_Null { })) 814 | } 815 | --------------------------------------------------------------------------------