├── staging
    └── .gitkeep
├── src
    ├── cmung.sh
    ├── miniboot.go.carml
    ├── cmung.carml
    ├── minicompiler.c.carml
    ├── carml.carml
    ├── newreader.c.carml
    ├── carmlc.h
    └── self_tco.c
├── test
    ├── test-function-if.carml
    ├── test-match-function.carml
    ├── test-match-base.carml
    ├── test-declare.carml
    ├── test-function-begin.carml
    ├── test-rewrite-tco.carml
    ├── test-while.carml
    ├── test-record.carml
    ├── test-func-err.frag
    └── test-type.carml
├── .gitignore
├── example
    ├── example3.carml
    ├── example2.carml
    ├── example1.carml
    ├── example6.carml
    ├── example4.carml
    ├── fib1.carml
    ├── fib0.carml
    ├── example5.carml
    ├── playground.carml
    ├── bitc2carml.carml
    ├── example0.carml
    └── cur.carml
├── attic
    ├── gen_match.py
    ├── gen.py
    ├── stage-mini-sexpr-main.go
    ├── cmung_nomatch.carml
    ├── cmung.carml
    └── hash_table.c
├── labs
    ├── old.carml
    ├── optionint.carml
    ├── rawdeque.carml
    ├── ch0.carml
    ├── json.carml
    ├── unix.carml
    ├── trie.c.carml
    ├── rewriter.carml
    ├── sexpr.carml
    ├── rewrite_match_bind.carml
    ├── let_rewrite.carml
    ├── woodchips.carml
    ├── mini-walk.carml
    ├── mini-sexpr.carml
    └── mini-interp.go
├── docs
    ├── go-anon-struct-tuple.md
    ├── README.md
    ├── haskell_core.txt
    ├── lldb-session.md
    ├── 2heresy-let.md
    ├── NOTES.md
    ├── 3heresy-storage.md
    ├── tooling.md
    ├── cml-README.md
    ├── 1heresy-types.md
    ├── 325m2.txt
    ├── opprec.md
    ├── 4heresy-modules.md
    ├── 0heresy-functiontypes.md
    └── todo.md
├── LICENSE
├── editors
    └── carml.vim
└── README.md


/staging/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/cmung.sh:
--------------------------------------------------------------------------------
1 | if [ "$1" != "" ]
2 | then
3 |     sed -e 's/\([a-z]\)\-\([a-z]\)/\1_\2/g' -e 's/\?/_p/g' $1
4 | fi
5 | 


--------------------------------------------------------------------------------
/test/test-function-if.carml:
--------------------------------------------------------------------------------
1 | def fact x : int => int = if (< x 0) then (fact (abs x)) 
2 |     else if (eq? x 0) then 1
3 |     else (mul x (fact (sub x 1)))
4 | 


--------------------------------------------------------------------------------
/test/test-match-function.carml:
--------------------------------------------------------------------------------
1 | def foo bar:int => string = match bar with
2 |     10 => "ten"
3 |     11 => "eleven"
4 |     else => "something else"
5 | end
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.swp
 2 | *.swo
 3 | c29
 4 | carml
 5 | carmlc
 6 | carmlc.old
 7 | *.dSYM
 8 | staging/*
 9 | .idea/*
10 | .DS_Store
11 | .ninja_deps
12 | .ninja_log
13 | dump
14 | 


--------------------------------------------------------------------------------
/test/test-match-base.carml:
--------------------------------------------------------------------------------
1 | var x : int = 10
2 | 
3 | match x with
4 |     10 => (printf "ten!\n")
5 |     11 => (printf "uh... 11?\n")
6 |     else => (printf "oh goodness...")
7 | end
8 | 


--------------------------------------------------------------------------------
/example/example3.carml:
--------------------------------------------------------------------------------
1 | def fact n : int => int = if (< n 0) then
2 |         (fact (abs n))
3 |     else if (num= n 0) then
4 |         1
5 |     else
6 |         (mul n (fact (sub n 1)))
7 | 
8 | def foo x : array[int] => int = (reduce fn y : int z : int => int = (sum y z) 0 x)
9 | 


--------------------------------------------------------------------------------
/example/example2.carml:
--------------------------------------------------------------------------------
 1 | import unix
 2 | var data = [""]
 3 | 
 4 | def main ac : int al : array[string] => int = {
 5 |     let fh = (unix.open "./foo.txt" "r") in 
 6 |     let lines = (unix.readlines fh) in {
 7 |         vector.append data lines
 8 |         unix.close fh
 9 |     }
10 |     0        
11 | }
12 | 


--------------------------------------------------------------------------------
/test/test-declare.carml:
--------------------------------------------------------------------------------
 1 | declare foo0 int => int
 2 | @foo1 int
 3 | @foo2 => int
 4 | @foo3 Bar[int]
 5 | @foo4 array[int]
 6 | @foo5 => Bar[int]
 7 | @foo6 => Bar
 8 | @foo7 => Either[Bar[int] string]
 9 | @foo8 Either[Bar[int] string] => int
10 | @foo9 => array[int]
11 | @foo10 int int array[int] Bar => Baz
12 | 


--------------------------------------------------------------------------------
/test/test-function-begin.carml:
--------------------------------------------------------------------------------
 1 | # These should all parse to
 2 | # fairly similar results. The
 3 | # sole difference should be
 4 | # that foo does not have an
 5 | # explicit begin.
 6 | 
 7 | def foo x : int => int = (sum x 10)
 8 | def bar x : int => int = { sum x 10 }
 9 | def baz x : int => int = {
10 |     sum x 10
11 | }
12 | 


--------------------------------------------------------------------------------
/example/example1.carml:
--------------------------------------------------------------------------------
 1 | type Option Tx {
 2 |     Some Tx 
 3 |     Nothing
 4 | }
 5 | 
 6 | # would be interesting to implement Cons-cells
 7 | # as a generic type here... you'd end up with
 8 | # something... different I think. 
 9 | type List Tx {
10 |     Cons Tx List[Tx]
11 |     Nil
12 | }
13 | 
14 | # poly Deque a = ... 
15 | 
16 | 


--------------------------------------------------------------------------------
/attic/gen_match.py:
--------------------------------------------------------------------------------
 1 | for x in ["8","16","32","64"]:
 2 |     print """if(!strncmp(typespec->value, "U{0}", {1})) {{
 3 |     strncat(dst, "uint{0}", {2});
 4 | }}
 5 | """.format(x, len(x) + 1, len(x) + 4)
 6 | 
 7 | for x in ["8","16","32","64"]:
 8 |     print """if(!strncmp(typespec->value, "I{0}", {1})) {{
 9 |     strncat(dst, "int{0}", {2});
10 | }}
11 | """.format(x, len(x) + 1, len(x) + 4)
12 | 


--------------------------------------------------------------------------------
/example/example6.carml:
--------------------------------------------------------------------------------
 1 | record Test {
 2 |     bar : int
 3 |     baz : int
 4 | }
 5 | 
 6 | def main ac : int al : array[string] => int = {
 7 |     # stack-allocate a struct
 8 |     var f : Test = (make-struct 11 20)
 9 | 
10 |     # set the "bar" member to 10
11 |     set! (. f bar) 10
12 | 
13 |     printf "%d + %d is %d\n" (. f bar) (. f baz) (sum (. f bar) (. f baz))
14 | 
15 |     0
16 | }
17 | 


--------------------------------------------------------------------------------
/labs/old.carml:
--------------------------------------------------------------------------------
1 | #@(#) a base library for carML collections, meant to mimic what
2 | #@(#) we see in systems like SRFI-1 and SRFI-13/14. It's called
3 | #@(#) "old" because it is not module based, as the replacement
4 | #@(#) should be. So why not just write that first? Because I
5 | #@(#) have yet to define how modules are reified to the various
6 | #@(#) languages I support as output from carML
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/example/example4.carml:
--------------------------------------------------------------------------------
 1 | def unix.open path : string mode : string => int = {
 2 | 
 3 | }
 4 | 
 5 | def unix.close fd : int = {
 6 | 
 7 | }
 8 | 
 9 | def unix.readlines fd : int => array[string] = {
10 | 
11 | }
12 | 
13 | def main ac : int al : array[string] => int = {
14 |     val fh = (unix.open "./foo.txt" "r") 
15 |     val lines = (unix.readlines fh)
16 |     unix.close fh
17 |     0        
18 | }
19 | 


--------------------------------------------------------------------------------
/example/fib1.carml:
--------------------------------------------------------------------------------
 1 | # tail-recursive fib
 2 | def fib n : int i : int j : int => int = if (< n 1) then i else (fib (- n 1) (+ i j) i)
 3 | 
 4 | def main ac : int al : array[string] => int = {
 5 | 
 6 |     when (!= ac 2) do {
 7 |         printf "fib [number]\n"
 8 |         return -1
 9 |     }
10 | 
11 |     val n : int = (atoi (get al 1))
12 |     val t : int = (fib n 0 1)
13 |     printf "(fib %d) == %d\n" n t
14 |     0
15 | }
16 | 


--------------------------------------------------------------------------------
/example/fib0.carml:
--------------------------------------------------------------------------------
 1 | # simple, non-tail-recursive fib example
 2 | def fib n : int => int = if (<= n 1) then 1 else (+ (fib (- n 1)) (fib (- n 2)))
 3 | 
 4 | def main ac : int al : array[string] => int = {
 5 | 
 6 |     when (!= ac 2) do {
 7 |         printf "fib [number]\n"
 8 |         return -1
 9 |     }
10 | 
11 |     val n : int = (atoi (get al 1))
12 |     val t : int = (fib n)
13 |     printf "(fib %d) == %d\n" n t
14 |     0
15 | }
16 | 


--------------------------------------------------------------------------------
/docs/go-anon-struct-tuple.md:
--------------------------------------------------------------------------------
 1 | Just a quick note for tuples:
 2 | 
 3 | ```
 4 | func bar() struct {string; int; } {
 5 | 	return struct {string; int;}{"test", 10,}
 6 | }
 7 | ```
 8 | 
 9 | This also is a huge gotcha in go:
10 | 
11 | ```
12 | // this works
13 | return struct {string
14 | int
15 | }{"test", 10,}
16 | 
17 | // this is an error
18 | return struct {string int}{"test", 10}
19 | 
20 | // because you need to have semicolons in there
21 | ```
22 | 


--------------------------------------------------------------------------------
/test/test-rewrite-tco.carml:
--------------------------------------------------------------------------------
 1 | #@(#) a simple tail recursive function that can be rewritten to a 
 2 | #@(#) while loop. Meant to test the self-TCO setup
 3 | 
 4 | def foo x:int y:int => int = if (<= x 0) then y else (foo (- x 1) (+ x y))
 5 | 
 6 | # this version should be rewritten to a while loop
 7 | def fibtco i:int j:int n:int => int = if (<= i 0) then i else (fibtco j (+ i j) (- n 1))
 8 | 
 9 | # this version should not
10 | def fib n:int => int = if (<= n 2) then 1 else (+ (fib (- n 1)) (fib (- n 2)))
11 | 


--------------------------------------------------------------------------------
/test/test-while.carml:
--------------------------------------------------------------------------------
 1 | def main ac : int al : array[string] => int = {
 2 |     # whoops; the compiler should warn here...
 3 |     # TODO: this was originally a `val` but failed in C
 4 |     var foo : int = 0
 5 |     while (< foo 10) do {
 6 |         printf "foo: %d\\n" foo
 7 |         set! foo (+ foo 1)
 8 |     }
 9 |     0
10 | }
11 | 
12 | def bar lim:int => int = {
13 |     var foo : int = 0
14 |     while (< foo lim) do {
15 |         set! foo $ + foo 1
16 |         when (> foo 100) do foo
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | This is the documents directory, which houses misc documents that I write whilst thinking about carML.
 4 | 
 5 | - (NOTES.md)[NOTES.md]: misc notes, mostly about lexing.
 6 | - (cml-README.md)[cml-README.md]: a previous attempt at something like carML, which I had called cML.
 7 | - (haskell_core.txt)[haskell_core.txt]: some quick notes on GHC's Core dialect.
 8 | - (todo.md)[todo.md]: my current TODO list for carML.
 9 | - (tooling.md)[tooling.md]: ideas surrounding tooling, like static analysis and the like.
10 | 


--------------------------------------------------------------------------------
/docs/haskell_core.txt:
--------------------------------------------------------------------------------
 1 | type CoreExpr = Expr Var
 2 | 
 3 | data Expr b	-- "b" for the type of binders, 
 4 |   = Var	  Id
 5 |   | Lit   Literal
 6 |   | App   (Expr b) (Arg b)
 7 |   | Lam   b (Expr b)
 8 |   | Let   (Bind b) (Expr b)
 9 |   | Case  (Expr b) b Type [Alt b]
10 |   | Cast  (Expr b) Coercion
11 |   | Tick  (Tickish Id) (Expr b)
12 |   | Type  Type
13 | 
14 | type Arg b = Expr b
15 | type Alt b = (AltCon, [b], Expr b)
16 | 
17 | data AltCon = DataAlt DataCon | LitAlt  Literal | DEFAULT
18 | 
19 | data Bind b = NonRec b (Expr b) | Rec [(b, (Expr b))]
20 | 
21 | 


--------------------------------------------------------------------------------
/test/test-record.carml:
--------------------------------------------------------------------------------
 1 | # a simple test record 
 2 | # need to add:
 3 | # 
 4 | # - actual tests of helpers & such
 5 | # - polymorphic tests
 6 | 
 7 | record Foo {
 8 |     bar:int # should support simple types
 9 |     baz:int
10 |     blah:float
11 |     eh:string # should support complex types
12 |     # NOTE (lojikil) I changed the test from
13 |     # ref[Foo] to simply Foo, because not all
14 |     # targets currently have a ref[any] output
15 |     # in Golang that should be easy enough to
16 |     # add, but for now, just make sure we can
17 |     # pass tests in any target...
18 |     next:Foo # should support recursive definition
19 | }
20 | 


--------------------------------------------------------------------------------
/labs/optionint.carml:
--------------------------------------------------------------------------------
 1 | # @(#) a simple test case for working with `type` forms
 2 | # @(#) and making sure the C output is correct.
 3 | 
 4 | type OptionInt {
 5 |     Some int
 6 |     None
 7 | }
 8 | 
 9 | def main ac:int al:array[string] => int = {
10 | 
11 |     when (<> ac 2) do {
12 |         printf "usage: optionint [number]\n"
13 |         return 1
14 |     }
15 | 
16 |     val f : OptionInt = (OptionInt.Some $ atoi $ get al 1)
17 |     match f with
18 |         (OptionInt.Some y) given (> y 10) => (printf "yes and y is greater than 10: %d\n" x)
19 |         (OptionInt.Some x) => (printf "yes: %d\n" x)
20 |         (OptionInt.None) => (printf "oh no!\n")
21 |     end
22 |     0
23 | }
24 | 


--------------------------------------------------------------------------------
/example/example5.carml:
--------------------------------------------------------------------------------
 1 | record Test {
 2 |     bar : int
 3 |     baz : int
 4 | }
 5 | 
 6 | def main ac : int al : array[string] => int = {
 7 |     # var, because we want to assign to it
 8 |     # and technically should be `ref of Test`
 9 |     var f : ref[Test] = (malloc (sizeof Test))
10 | 
11 |     # I dislike having to use `->` at some level, because 
12 |     # I feel like that's an implementation detail that the
13 |     # compiler should be tracking, but it works for now...
14 |     set! (-> f bar) 10
15 |     set! (-> f baz) 20
16 |     # the \\n is a bug in the compiler's output of C.
17 |     # will fix
18 |     printf "%d + %d is %d\\n" (-> f bar) (-> f baz) (sum (-> f bar) (-> f baz))
19 |     free f
20 |     0
21 | }
22 | 


--------------------------------------------------------------------------------
/example/playground.carml:
--------------------------------------------------------------------------------
 1 | # just a syntactic playground to see what's what.
 2 | 
 3 | #@arrayIota3 array of Num (Num => Num) => ()
 4 | def arrayIota3 arr:array[int] fun:function[int int] = {
 5 |     letrec internalIota = fn idx = {
 6 |         set-array-index! arr (fun idx) x
 7 |         internalIota (sum idx 1)
 8 |     } in
 9 |     internalIota 0
10 | }
11 | 
12 | #@arrayIota4 array of Num (Num => Num) => ()
13 | def arrayIota4 arr:array[int] fun:function[int int] {
14 |     # should `fn` forms accept a `begin`
15 |     # form sans `=` just like `def`?
16 |     # looks... so much cleaner
17 |     letrec internalIota = fn idx {
18 |         Array.set! arr (fun idx) x
19 |         internalIota $ sum idx 1
20 |     } in
21 |     internalIota 0
22 | }
23 | 


--------------------------------------------------------------------------------
/src/miniboot.go.carml:
--------------------------------------------------------------------------------
 1 | #@(#) a miniature boot system for Golang
 2 | #@(#) basically, we want to target the Golang
 3 | #@(#) ecosystem, and use that as a launching
 4 | #@(#) off point for the rest of CarML
 5 | #@(#) Why Golang? Because the RTS for Golang is
 6 | #@(#) already decent enough, and I don't want to
 7 | #@(#) spend more time in the C RTS really. I feel
 8 | #@(#) as if I'm spending more time fighting previous
 9 | #@(#) choices (like being agnostic to requiring a type)
10 | #@(#) that I have since changed than I am working on
11 | #@(#) things that are interesting to me. Since the
12 | #@(#) Golang output is already decent enough, we can
13 | #@(#) start here for producing decent system, and
14 | #@(#) then boot C and the like again from *that*
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016-2021, Stefan Edwards <saedwards.ecc@gmail.com>
 2 | 
 3 | Permission to use, copy, modify, and/or distribute this software for
 4 | any purpose with or without fee is hereby granted, provided that the
 5 | above copyright notice and this permission notice appear in all copies.
 6 | 
 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
13 | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14 | 


--------------------------------------------------------------------------------
/example/bitc2carml.carml:
--------------------------------------------------------------------------------
 1 | # This is a collection of little programs/functions
 2 | # that are directly translated from BitC. carML, like
 3 | # cML before it, is pretty heavily influenced by BitC,
 4 | # so I naturally wanted to see translations
 5 | # could almost be @fact Num of a => a
 6 | # @fact int => int
 7 | def fact x : int => int =
 8 |     if (< x 0) then (fact (abs x))
 9 |     else if (eq? x 0) then 1
10 |     else (mul x (fact (sub x 1)))
11 | 
12 | # @fact0 int => int
13 | def fact0 x : int => int = match x with
14 |         # hmm... how does this work elsewhere? I like it tho...
15 |         #case x, (< x 0) => (fact0 $ abs x)
16 |         0 => 1
17 |         y given (< y 0) => (fact0 $ abs y)
18 |         else => (mul x $ fact0 $ sub x 1)
19 |     end
20 | 
21 | # for more info on BitC see:
22 | # https://web.archive.org/web/20130815183427/http://www.bitc-lang.org/docs/bitc/spec.html
23 | 


--------------------------------------------------------------------------------
/test/test-func-err.frag:
--------------------------------------------------------------------------------
 1 | # The following fragments cause an error in carML/C currently:
 2 | 
 3 | -        TWHEN => (self-tco? name (-> src $ -> $ get children 1))
 4 | +        TWHEN => (self-tco? name (-> src $ get children 1))
 5 |          TIF => (any-of
 6 | -                (self-tco? name (-> src $ -> $ get children 1))
 7 | -                (self-tco? name (-> src $ -> $ get children 2)))
 8 | +                (self-tco? name (-> src $ get children 1))
 9 | +                (self-tco? name (-> src $ get children 2)))
10 | 
11 | # what I think is happening is that the `->` is a `coperator`, so
12 | # it just assumes that there are two parameters, but one is actually
13 | # nil. note what the `$` expands to:
14 | 
15 | (self-tco? name (-> src (-> (getchildren 1))))
16 | 
17 | # which then calls `llcwalk` or `llgwalk` on `head->children[1]`, which
18 | # would be nil
19 | 
20 | # another test case:
21 | (foo bar
22 | # will cause a sigsegv
23 | 


--------------------------------------------------------------------------------
/labs/rawdeque.carml:
--------------------------------------------------------------------------------
 1 | # @(#) a simple deque backing
 2 | # @(#) just meant for testing, possibly for modes where 
 3 | # @(#) we don't flatten types in C
 4 | 
 5 | record RawDeque {
 6 |     # really need to be able to say
 7 |     # U32/U64 here
 8 |     capacity: int
 9 |     length: int
10 |     data: ref[any]
11 | }
12 | 
13 | # have heap and stack-based make-helpers?
14 | # the heap-based helper
15 | # returns a GC-allocated RawDeque reference
16 | def make_RawDeque => ref[RawDeque] = {
17 |     var ret: ref[RawDeque] = nil
18 |     set! ret $ hmalloc $ sizeof RawDeque
19 |     set! (-> ret capacity) 64
20 |     set! (-> ret length) 0
21 |     set! (-> ret data) $ hmalloc $ * 64 $ sizeof RawDeque
22 |     ret
23 | }
24 | 
25 | # the stack-based helper
26 | # returns a stack-allocated RawDeque
27 | def make_RawDeque_low => RawDeque = {
28 |     var ret: RawDeque = (make-struct 64 0 nil)
29 |     set! (. ret data) $ make-array nil 64
30 |     ret
31 | }
32 | 


--------------------------------------------------------------------------------
/docs/lldb-session.md:
--------------------------------------------------------------------------------
 1 | # Just a good thing to note about LLDB
 2 | 
 3 | We can introspect AST objects nicely:
 4 | 
 5 | ```
 6 | (lldb) expr (int) printf("%d %s\n", ((AST *)0x0000000100423a20)->tag, ((AST *)0x0000000100423a20)->value);
 7 | 0 foreach-other
 8 | (int) $3 = 16
 9 | (lldb) expr (int) printf("%d %s %d\n", ((AST *)0x0000000100423a20)->tag, ((AST *)0x0000000100423a20)->value, ((AST *)0x0000000100423a20)->lenchildren);
10 | 0 foreach-other 3
11 | (int) $4 = 18
12 | (lldb) expr (int) printf("%d\n", ((AST *)0x0000000100423a20)->children[0]->tag)
13 | 26
14 | (int) $5 = 3
15 | (lldb) expr (int) printf("%d\n", ((AST *)0x0000000100423a20)->children[1]->tag)
16 | 1
17 | (int) $6 = 2
18 | (lldb) expr (int) printf("%d\n", ((AST *)0x0000000100423a20)->children[2]->tag)
19 | error: Execution was interrupted, reason: Attempted to dereference an invalid pointer..
20 | The process has been returned to the state before expression evaluation.
21 | (lldb) expr (int) printf("%d\n", ((AST *)0x0000000100423a20)->children[1]->tag)
22 | ```
23 | 


--------------------------------------------------------------------------------
/attic/gen.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | if len(sys.argv) != 4:
 4 |     print "usage: gen.py [symbol name] [string] [return type]"
 5 |     sys.exit(1)
 6 | 
 7 | sym = sys.argv[1]
 8 | tar = sys.argv[2]
 9 | returntype = sys.argv[3]
10 | idx = 0
11 | 
12 | for t in tar:
13 |     print "case {0}{1}: ".format(sym, idx)
14 |     idx += 1
15 |     print """    if(cur == '{0}') {{
16 |         state = {1}{2};
17 |     }} else if(iswhite(cur) || cur == '\\n' || isbrace(cur)) {{
18 |         ungetc(cur, fdin);
19 |         buf[idx - 1] = '\\0';
20 |         return TIDENT;
21 |     }} else {{
22 |         state = LIDENT0;
23 |     }}
24 |     break;""".format(t, sym, idx)
25 | 
26 | print """
27 | case {0}{1}:
28 |     if(isident(cur)) {{
29 |         substate = LIDENT0;
30 |     }}else if(iswhite(cur) || cur == '\\n' || isbrace(cur)) {{
31 |         ungetc(cur, fdin);
32 |         return {2};
33 |     }} else {{
34 |         strncpy(buf, "malformed identifier", 512);
35 |         return TERROR;
36 |     }}
37 |     break;
38 | """.format(sym, idx, returntype)
39 | 


--------------------------------------------------------------------------------
/test/test-type.carml:
--------------------------------------------------------------------------------
 1 | # a simple test of ADTs
 2 | # Fixed bug: Shouldn't *need* Bar here
 3 | # Fixed bug: parameter-list is malformed 
 4 | # Fixed bug: Left/Right are missing
 5 | # Fixed bug: newline is not being read
 6 | # Fixed bug: complex types are not collapsed 
 7 | 
 8 | # Test: tag with single type
 9 | type Baz {
10 |     BazOk int
11 |     BazErr int
12 | }
13 | 
14 | # Test: tags with multiple types
15 | type Blah {
16 |     BlahOk int int int
17 |     BlahErr int string int
18 | }
19 | 
20 | # Test: we can apply constructors to complex types
21 | type Bloo {
22 |     BlooOk array[int]
23 |     BlooErr int string
24 |     BlooWat array[any] int
25 | }
26 | 
27 | type Frob {
28 |     FrobOk array[Bloo]
29 |     FrobErr array[any] Bloo
30 |     FrobWot Bloo int
31 |     FrobWat int int string array[int]
32 | }
33 | 
34 | # Test: tags separated with semi-colons
35 | type Foo {
36 |     Left int
37 |     Right int
38 | }
39 | 
40 | # Test: parameter list
41 | # XXX (lojikil) currently not working
42 | #type Result Success Fail {
43 | #    Ok Success
44 | #    Err Fail
45 | #}
46 | 


--------------------------------------------------------------------------------
/attic/stage-mini-sexpr-main.go:
--------------------------------------------------------------------------------
 1 | // this is a Go fragment that... calls code compiled by
 2 | // the carML compiler to run a simple S-Expression parser
 3 | //
 4 | // the point here is mostly to show how this would work,
 5 | // but it's also useful in my testing, to show that we
 6 | // can easily integrate the two
 7 | //
 8 | // to use this, compile mini-sexpression.carml with the
 9 | // Golang flag (`+g`) and append this file to the end of
10 | // the result, adding `package main` and `import "fmt"`
11 | // to the top of the resulting pass. This is literally
12 | // how I've been testing the compiler...
13 | func main() {
14 |     var token Token
15 |     src := "(call (identifier +) (integer 10) (integer 20))"
16 |     idx := 0
17 |     offset := 0
18 |     fmt.Printf("src: %s\n", src)
19 |     for ; idx < 15; idx++ {
20 |         token = next(src, offset)
21 |         offset = token.lexeme_offset + len(token.lexeme)
22 |         fmt.Printf("token: %s, offset: %d\n", token.lexeme, offset)
23 |     }
24 | 
25 |     result := read_list(src, 0)
26 | 
27 |     fmt.Printf("result: %T%v\n", result, result)
28 | 
29 |     roundtrip := sexpression2string(result)
30 | 
31 |     fmt.Printf("round trip? %s\n", roundtrip)
32 | }
33 | 


--------------------------------------------------------------------------------
/src/cmung.carml:
--------------------------------------------------------------------------------
 1 | # a very simple C-mung implementation
 2 | # we have two functions:
 3 | #
 4 | # - `c_safe_char`, which rewrites characters to ones that are safe for C
 5 | # - `cmung`, which simply iterates over one string
 6 | #
 7 | # I think `c_safe_char` is pretty nice, sans just relying on type inference,
 8 | # but `cmung` could be improved:
 9 | #
10 | # - a `string.map!` form to handle application
11 | # - generic `map!` form that takes any `Seq` module/type class
12 | #
13 | # or whatever. it's not terrible tho, all things told
14 | 
15 | def c_safe_char x:char => char = match x with
16 |          '!' => 'B'
17 |          '%' => '_'
18 |          '$' => '_'
19 |          '?' => 'p'
20 |          '<' => 'l'
21 |          '>' => 'g'
22 |          '=' => 'e'
23 |          '@' => '_'
24 |          '^' => '_'
25 |          '&' => '_'
26 |          '|' => '_'
27 |          '*' => '_'
28 |          '.' => '_'
29 |          '+' => '_'
30 |          '-' => '_'
31 |          else => x
32 |     end
33 | 
34 | def cmung src:string dst:string len:int => string = {
35 |     var idx:int = 0
36 |     while (< idx len) do {
37 |         set! (get dst idx) $ c_safe_char $ get src idx
38 |         set! idx $ + idx 1
39 |     }
40 |     dst
41 | }
42 | 


--------------------------------------------------------------------------------
/docs/2heresy-let.md:
--------------------------------------------------------------------------------
 1 | # `let` signals intent about variable scope
 2 | 
 3 | I was just thinking about the notion of `let` vs `val`: with a `let`, we're signaling that we *do not* want this variable
 4 | to live on outside its scope. That intentionality is probably harmless in most cases; for example, if we have a function such as:
 5 | 
 6 |     function foo x:int =
 7 |         let y:int = (+ x 10) in
 8 |             (printf "y is %d\n" y)
 9 | 
10 | we don't *actually* care what happens to `y` afterward, as the function terminates. However, what if we have something more nuanced?
11 | 
12 |     function bar x:int = {
13 |         let y:int = (+ x 10) in
14 |             (printf "y is %d\n" y)
15 |         let y:int = (+ x 20) in
16 |             (printf "y is %d\n" y)
17 |     }
18 | 
19 | here, we're clearly signaling intent that `y` shouldn't outlive the scope of the enclosing `let`. This isn't *really* revolutionary
20 | thinking, it's just interesting to me given that I am supporting both `let` and `val/var` in carML, and that support has an interesting
21 | impact on how we code: the `let` form is clearly trivially rewritten to a fresh symbol, and is easily compsed into SSA via ANF, but a
22 | `val` may have larger impact depending upon its exposure to other programs (and indeed, we may not want to do SSA/ANF for programs we
23 | expect humans to consume...)
24 | 


--------------------------------------------------------------------------------
/labs/ch0.carml:
--------------------------------------------------------------------------------
 1 | # @(#) a simple character frequency counter
 2 | # it's interesting...
 3 | # the current state of carML's RTS is such that
 4 | # the code I have to write is quite imperative.
 5 | # More analysis passes are required to make this
 6 | # much more functional... there's so many set!'s
 7 | # and the like in the code below because things
 8 | # like the let form aren't up and running, or 
 9 | # there's no foreach/map working within the
10 | # compiler that can be fused... and yet, I'm
11 | # actually quite happy that the code can already
12 | # be used this way.
13 | def main ac:int al:array[string] => int = {
14 |     var composition : array[int] = (make-array int 256 0)
15 |     var fh : ref[FILE] = NULL
16 |     var ch : int = 0
17 |     var tmp : int = 0
18 |     
19 |     set! fh $ fopen (get al 1) "r"
20 | 
21 |     when (eq? fh NULL) do {
22 |         printf "cannot open file %s\n" $ get al 1
23 |         return 1
24 |     }
25 | 
26 |     while (not $ feof fh) do {
27 |         set! ch $ fgetc fh
28 |         when (<> ch 0x0D) do {
29 |             set! tmp $ get composition ch
30 |             set! (get composition ch) $ + tmp 1
31 |         }
32 |     }
33 | 
34 |     set! tmp 32 
35 | 
36 |     while (< tmp 127) do {
37 |         printf "%c => %d\n" tmp (get composition tmp)
38 |         set! tmp $ + tmp 1
39 |     }
40 | 
41 |     0
42 | }
43 | 


--------------------------------------------------------------------------------
/labs/json.carml:
--------------------------------------------------------------------------------
 1 | #@(#) a simple JSON wrapper, meant for reading & writing JSON
 2 | #@(#) should be fairly standard in the base of carML, but could
 3 | #@(#) be interesting vis-a-vis certain allocation techniques.
 4 | 
 5 | type Json {
 6 |     Error string
 7 |     Null
 8 |     Int int
 9 |     Float float
10 |     String string
11 |     Bool bool
12 |     Array array[Json]
13 |     Object array[tuple[string Json]]
14 | }
15 | 
16 | def from_string src:string => Json = {
17 | 
18 | }
19 | 
20 | def tuple_to_string src:tuple[string Json] => string = {
21 |     strings.join "," $ make-array string (_1 src) (to_string $ _2 src)
22 | }
23 | 
24 | # intersting, hitting a bug in carml/C here...
25 | # so two things happened:
26 | # 1. I forgot to add a #\) to the Json.Object call below, and that caused a crash
27 | # 1. I forgot to add an `end`, which exacerbated it...
28 | # intersting!
29 | def to_string src:Json => string = {
30 |     match src with
31 |         (Json.Error x) => "10"
32 |         (Json.Null) => "null"
33 |         (Json.Int x) => "11"
34 |         (Json.Float x) => "12"
35 |         (Json.String x) => "13"
36 |         (Json.Bool x) => "14"
37 |         (Json.Array x) => (strings.format "[%s]" $ strings.join "," $ map to_string x)
38 |         (Json.Object x) => (strings.format "{%s}" $ strings.join "," $ map tuple_to_string x)
39 |     end
40 | }
41 | 


--------------------------------------------------------------------------------
/labs/unix.carml:
--------------------------------------------------------------------------------
 1 | # @(#) a simple Unix/Posix interaction library
 2 | # @(#) just a test to see what it would be like.
 3 | 
 4 | type UnixResult {
 5 |     Ok int
 6 |     Err int string # lookup errno 
 7 | }
 8 | 
 9 | def unix.open path : string mode : int => UnixResult = {
10 |     # TODO: let binding rewriting 
11 |     val res : int = (open path mode)
12 |     # TODO: match form with guard clauses 
13 |     if (< res 0) then
14 |         # TODO: finish $ form, would be so much cleaner:
15 |         # UnixResult.Err res $ strerror res
16 |         (UnixResult.Err res (strerror res))
17 |     else
18 |         (UnixResult.Ok res)
19 | }
20 | 
21 | # TODO: fix single Complex type in parameter to C generation
22 | def unix.close_res fh : UnixResult => UnixResult = {
23 |     var hdl : int = 0
24 |     var res : int = 0
25 |     # TODO: pattern matching binds
26 |     if (eq? (. fh tag) TAG_UnixResult_ERR) then
27 |         fh
28 |     else {
29 |         set! hdl (. fh (. members (. OK_t m_1)))
30 |         set! res (close hdl)
31 |         if (< res 0) then
32 |             (UnixResult.Err res (strerror res))
33 |         else
34 |             (UnixResult.Ok res)
35 |     }
36 | }
37 | 
38 | def unix.close fh : int => UnixResult = {
39 |     val res : int = (close fh) 
40 |     if (< res 0) then
41 |         (UnixResult.Err res (strerror res))
42 |     else
43 |         (UnixResult.Ok res)
44 | }
45 | 


--------------------------------------------------------------------------------
/docs/NOTES.md:
--------------------------------------------------------------------------------
 1 | # Lexer bugs:
 2 | 
 3 | - Single character identifiers that start with a keyword letter a broken ("b\nb" is seen as one identifier)
 4 | 
 5 | # keywords
 6 | 
 7 | - `lambda`
 8 | - `declare`
 9 | - `ref`
10 | - `int`
11 | - `string`
12 | - `char`
13 | - `bool`
14 | - `array`
15 | - `float`
16 | - `match`
17 | - `with`
18 | - `use`
19 | - `module`
20 | - `case`
21 | - `of`
22 | - `define-alien`, `letrec`, `val-alien`
23 | 
24 | # Parser bugs:
25 | 
26 | None currently (save for missing functionality)
27 | 
28 | # Types:
29 | 
30 | ## records:
31 | 
32 | Records are an interesting case... I do think that `val`-style typing should be used:
33 | 
34 |     record foo = {
35 |         bar : int
36 |         baz
37 |     }
38 | 
39 | here, `foo.bar` has an explicit `int` type, whereas `baz` will be inferred from program
40 | usage... The question remains then how we _allocate_ records. I'd prefer to unbox them
41 | at all times, and even go so far as to do what C# does, and flatten records when passed
42 | to functions.
43 | 
44 | Also, what is the accessor syntax? Haskell does what SRFI-9 does, and defines an accessor
45 | function:
46 | 
47 |     data Person = Person { first_name :: String
48 |                            , last_name :: String
49 |                            , age :: Int 
50 |                          } deriving (Eq, Ord, Show)
51 | 
52 |     print_age person = print $ age person
53 | 


--------------------------------------------------------------------------------
/docs/3heresy-storage.md:
--------------------------------------------------------------------------------
 1 | # Variable Storage is a pointless implementation detail (most of the time)
 2 | 
 3 | I have been thinking about this a lot lately: what is the difference between the following:
 4 | 
 5 |     def foo x:int => Either[int, string] = {
 6 |         ...
 7 |     }
 8 | 
 9 |     def bar x:int => ref[Either[int, string]] = {
10 |         ...
11 |     }
12 | 
13 | Assuming they undertake the same options? For most programmers, they do not *really* care that something
14 | is a reference and something else is a value; that is an implementation detail they do not care about.
15 | Eventually, I would like carML to get to the point where most of the time we do *not* care about such things,
16 | until it is needed (for something like C interop). I run into it all the time, thinking about types and such,
17 | and whilst the compiler should be able to adapt, there is no need to *have* to specify storage location for
18 | most things (until you know you do). I think the thing that drives me most with this is:
19 | 
20 |     a low barrier to entry and no ceiling.
21 | 
22 | I want carML to be relatively consumable for most folks, but I would *also* like those hardcore few who are
23 | looking to replace C or the like to be able to do so, all with the same language. Rust has done well there,
24 | although, like ATS, the barrier to entry is still quite high (tho clearly nothing has as high a barrier as 
25 | ATS does, goodness). 
26 | 


--------------------------------------------------------------------------------
/example/example0.carml:
--------------------------------------------------------------------------------
 1 | # cannonical loop example
 2 | def arrayIota arr f = begin
 3 |     def arrayIotaPrime arr fun idx = begin
 4 |         if (< idx (array-length arr)) then {
 5 |             set-array-index! arr (fun idx) idx
 6 |             arrayIotaPrime arr fun (add idx 1)
 7 |         } else ()
 8 |     end
 9 |     arrayIotaPrime arr f 0
10 | end
11 | 
12 | # same as the above, but here we
13 | # use the closure in arrayIota' to
14 | # capture variables, instead of
15 | # explicitly passing them in
16 | declare arrayIota0 : procedure[array[Num] function[Num => Num]]
17 | def arrayIota0 arr fun = begin
18 |     def arrayIotaPrime idx = begin
19 |         if (< idx (array-length arr)) then {
20 |             set-array-index! arr (fun idx) idx;
21 |             arrayIotaPrime (add idx 1);
22 |         } else ()
23 |     end
24 |     arrayIotaPrime 0;
25 | end
26 | 
27 | # same as the above, but using `foreachIndex`
28 | @arrayIota1: procedure[array[Num] function[Num => Num]]
29 | def arrayIota1 arr fun = {
30 |     foreachIndex fn x = (set-array-index! arr (fun x) x) arr
31 | }
32 | 
33 | # same as the above, but without the `{}` block
34 | @arrayIota2: procedure[array[Num] function[Num => Num]]
35 | def arrayIota2 arr fun = (foreachIndex fn x = (set-array-index! arr (fun x) x) arr)
36 | 
37 | # eventually, `def` forms should work
38 | # like `begin` forms
39 | def foo x = (add x 1)
40 |  
41 | val f = (make-array 10)
42 | 
43 | (arrayIota f foo)
44 | 
45 | (println f)
46 | 


--------------------------------------------------------------------------------
/docs/tooling.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | I think the most important thing for new languages is not the language itself,
 4 | but rather that the **ecosystem** _surounding_ the language is strong. To that
 5 | end, there's a few tools I'd like carML to include as we get nearer to release:
 6 | 
 7 | - static analysis
 8 | - symbolic execution/abstract interpretation
 9 | - package/project configuration & management
10 | - base set of libraries
11 | 
12 | # Static Analysis
13 | 
14 | carML is meant to be simpler to analyze than C proper, because it is 
15 | smaller, with less ambigious syntax, and restricted program space, so
16 | static analysis internally at release would be useful.
17 | 
18 | # SE/AI
19 | 
20 | This one I'm less set on, but it would be nice to have a Frama-C
21 | work-alike.
22 | 
23 | # Packaging
24 | 
25 | I think having reasonable package & library management is pretty 
26 | essential nowadays. I'd like to add a tool, `carmlize`, to the base
27 | install that basically handles the core setup of projects, libraries
28 | and the like. My thinking is that `use` and `import` would check
29 | some set of paths, such as `./lib:~/.carml/lib:/usr/lib/carml:/usr/local/lib/carml:$CARML_LIB`
30 | so that clients can easily bundle, have virtual environments, &c. 
31 | all via default install.
32 | 
33 | `carmlize [init|install|ginstall|bundle|dep-install...]```
34 | 
35 | # Base Libs
36 | 
37 | - HTTP (including WebDAV, CalDAV)
38 | - TLS
39 | - SMTP
40 | - FTP
41 | - tar/zip
42 | - data structures (including png/jpeg/gif/&c.)
43 | - image
44 | - CLI/GUI
45 | 


--------------------------------------------------------------------------------
/attic/cmung_nomatch.carml:
--------------------------------------------------------------------------------
 1 | def find srcset : array of char dstset : array of char cur : char len : int => char = {
 2 |     var idx : int = 0
 3 |     while (< idx len) do {
 4 |         when (eq? (get srcset idx) cur) do (return dst)
 5 |         set! idx (+ idx 1)
 6 |     }
 7 |     cur
 8 | }
 9 | 
10 | # this code is _so_ imperative
11 | # but it gets me where I want to go...
12 | # I need:
13 | # - HOFs, it's really a must
14 | # - combinators like any/every would be useful
15 | # - need to figure out ways to encode variable arguments...
16 | def cmung src : string dst : string len : int => string = {
17 |     var idx : int = 0
18 |     val srcset : array of char = ['!', '%', '$', '?', '<', '>', '=', '@', '^', '&', '|', '*', '.']
19 |     val dstset : array of char = ['B', '_', '_', 'p', 'l', 'g', 'e', '_', '_', '_', '_', '_', '_']
20 | 
21 |     # foreach would be great here
22 |     while (< idx len) do {
23 | 
24 |         set! cur (get dst idx)
25 | 
26 |         # would be nicer with `any` or `every`
27 |         # this is _basically_ an abomination 
28 |         if (|| (|| (&& (>= cur 'A') (<= cur 'Z')) (&& (>= cur 'a') (<= cur 'z'))) (&& (>= cur '0') (<= cur '9'))) then
29 |             (set! cur (get dst idx))
30 |         else
31 |             (set! cur (find srcset dstset cur 13)) # would be nicer with dynamic length...
32 | 
33 |         set! idx (+ idx 1)
34 |         
35 |         when (eq? (get src idx) '\0') do (return dst)
36 |     }
37 | }
38 | 
39 | def cmung_alloc src : string len : int => string = {
40 |     val dst : string = (malloc (* (sizeof src) len))
41 |     cmung src dst len
42 | }
43 | 


--------------------------------------------------------------------------------
/docs/cml-README.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 |  A simple, runtime-less, GC-less ML dialect that compiles to Human-Readable C. Syntactically, it is most similar to
 4 | ATS & Yeti, and a previous project I had worked on called "Spearhead".
 5 | 
 6 | # Rationale 
 7 | 
 8 |  I wanted a hyper-minimal ML dialect for system work, but something simpler than ATS. While I can, and do, use 
 9 | PreDigamma (my restricted Scheme dialect), having a super small ML dialect to throw around isn't bad either.
10 | Additionally, it allows me to experiment with items I find interesting in a GC-less environment (PreDigamma still
11 | requires the use of a GC, for now).
12 | 
13 | # Syntactic Forms
14 | 
15 | - `{ ... }` a begin/end pair, with statements separates by `;`
16 | - `match ... with ... chtam` a match form, with guard-clauses, binding, and separated by '|'
17 | - `if ... then ... else ...` No single-armed if
18 | - `when ...` Single-armed if
19 | - `val ... = ...` a variable binding
20 | - `let ... in ...` name binding
21 | - `define ... (parameters ...) = ...` function binding
22 | - `fn (parameters ...) = ...` Lambda
23 | - `define-alien ... (parameters ...)` : CFFI
24 | - `val-alien ...` extern C variable
25 | - `define-record ... is ... end` record definitions
26 | - `define-variant ... = ...` (polymorphic) variant definitions
27 | 
28 | # Default Library
29 | 
30 | - Strings: string-length, string-index, string-string, string-substring, string-match, string-glob
31 | - Vectors: vector-length, vector-index, vector-subvector
32 | - GC: region-gc, region-alloc, region-free
33 | 
34 | # C Interaction
35 | 
36 | # Examples
37 | 
38 | 


--------------------------------------------------------------------------------
/docs/1heresy-types.md:
--------------------------------------------------------------------------------
 1 | # Heresy 1: More thoughts on types
 2 | 
 3 | _2018-APR-01_
 4 | 
 5 | Just to cut the question short first, no this isn't April Fool's, and no, it's not an Easter discussion either, as I'm Orthodox. However,
 6 | I've been thinking about types more again today, in light of some linearization work I was doing on the type processing in carML currently,
 7 | as well as my [previous heresy on function types](0heresy-functiontypes.md). Currently, complex types don't *know* their parameterized types.
 8 | For example: `array` is technically equivalent to `array[Any]`. This leads to some complexity in the parser: because we allow for `array`, we
 9 | have to jump through some hoops to parse the type definitions. This is to make things like `array` come out to C as `void *`, but is the 
10 | short syntax _really_ worth it? Thinking about it further, it seems like the shortcut isn't really worth the trouble:
11 | 
12 | - it increases our ambiguity in parsing, and leads to the large state machine I currently use to decide syntax transforms at automatons
13 | - I think it _decreases_ readability, and is better written as `array[Any]` or `deque[Any]`
14 | 
15 | So I think that's the best: I'll make complex types "aware" of their parameterized types, such that an `array` *must* be followed by an
16 | array-literal of types (for example). This will cut down on the type parsing code (yay less code!), and will mean that users must be
17 | more explicit about the types the specify to the demand system (tho for obvious reasons the demand system will deman/infer a type when
18 | one is *not* specified by the user). 
19 | 


--------------------------------------------------------------------------------
/docs/325m2.txt:
--------------------------------------------------------------------------------
 1 | [note]: this document is something I wrote I toyed with in ~2006, and then finally
 2 | wrote fully in ~2011. It's funny because these ideas became carML to a large extent,
 3 | but also the Scala community added Scala-Native with similar features.
 4 | 
 5 | ----------------
 6 | 
 7 | 325 m^2 is 1/4 a Scala (Cretian unit of measure)
 8 | 
 9 | support:
10 | 
11 | object, class, if/else, when, not, and, or, def, val, for, match, case,
12 | int, float, double, List, Array, String, Char
13 | 
14 | compile to:
15 |     - C: native, minimal run time, should be self-hosting
16 |         + should generate legible code, even at expense of some optimizations
17 |     - JS: support both legible & illegible outputs (based on aggression of optimization),
18 |         with JS support for Canvas, &c.
19 | 
20 | Should support (G)ADTs, Dependent Types
21 | 
22 | Examples of what I'm thinking of:
23 | 
24 | def main(args: Array[String]) = println("Hello, world");
25 | 
26 | 
27 | def main(args: Array[String]) {
28 |     for (val a <- args) {
29 |         println("a: " + a);
30 |     }
31 | }
32 | 
33 | def main(args: Array[String])  = for (val a <- args) { println(a); }
34 | 
35 | def foo(x) = x + x; /* Type of Number? */
36 | 
37 | def main(args: Array[String]) {
38 |     for (val a <- args) {
39 |         println(foo(Number.parseNumber(a)));
40 |     }
41 | }
42 | 
43 | 
44 | class AddressBook {
45 |     class Person {
46 |         val name : String;
47 |         val surname : String;
48 |     }
49 | }
50 | 
51 | def sum(i: Int*) {
52 |     if(i.length >= 1) {
53 |         i(0) + /* ... */
54 |     } else {
55 |         0
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/docs/opprec.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | CarML has no operator precedence, but something I've been thinking about is how to make it nicer. We often end up with
 4 | pipelines like: `somelambda $ foo $ bar (get baz 10) 20` which, whilst not terrible, could be nicer for some operations.
 5 | Consider arithmetic: currently in carML, adding three numbers is `(+ 1 $ + 2 3)`. Again, not terrible, but...
 6 | 
 7 | There [was an article on lobste.rs talking about operator precedence](https://lobste.rs/s/jol24u/better_operator_precedence) which
 8 | had [an interesting comment](https://lobste.rs/s/jol24u/better_operator_precedence#c_bwd4ij):
 9 | 
10 | > In Verona, we’re experimenting with no operator precedence. a op1 b op2 c is a parse error. You can chain sequences of the same operator but any pair of different operators needs brackets. You can write (a op1 b op1 c) op2 d or a op1 b op1 (c op2 d) and it’s obvious to the reader what the order of application is. This is a bit more important for us because we allow function names to include symbol characters and allow any function to be used infix, so statically defining rules for application order of a append b £$%£ c would lead to confusion.
11 | 
12 | This maps what I've been thinking about (no operator precedence, just use brackets) but does make it much nicer to have arithmetic pipelines: the example
13 | above would just become `(1 + 2 + 3)`. I'd still have to implement *some* form of Shunting Yard for this, but I should do that anyway (and move away
14 | from the current approach, which is just to insert some `call` forms when we see a `$`). This would also help sort out what to do with `$` vs `|>`.
15 | 
16 | Thinking about it further tho, this also would be solved by moving to the Scheme-style multi-arity for basic lambdas...
17 | 
18 | - `($ foo bar baz)` => `(foo (bar baz))`
19 | - `(* 1 2 3)` => `(* 1 (* 2 3))`
20 | 
21 | Esp because I *enjoy* Polish notation
22 | 


--------------------------------------------------------------------------------
/docs/4heresy-modules.md:
--------------------------------------------------------------------------------
 1 | # thoughts on modules
 2 | 
 3 | I've been thinking a lot about what I want to do for modules. I really like how OCaml and SML handle this, but I think
 4 | they are quite complex. On the flip side, I think [Mythryl's take on Packages and APIs](https://mythryl.org/my-Packages_and_APIs.html) is
 5 | quite fascinating, but not really where I want to go. I think F# strikes a happy medium; you can have nested Modules, and they are
 6 | quite simple, but you can manipulate the environment to make things relatively clean (from user syntax perspective) as well.
 7 | 
 8 | Part of my thinking here is that carML already has mechanisms to parameterize types:
 9 | 
10 | ```
11 | type Foo X {
12 |     Bar int
13 |     Blah X
14 | }
15 | ```
16 | 
17 | Syntactically we match the semantic namespace of a constructor to it's type:
18 | 
19 | ```
20 | match x with
21 |     (Foo.Bar y) => y
22 |     else => 10
23 | end
24 | ```
25 | 
26 | And the generated C/Golang code relies on this fact. That's not to say that we cannot break this, of course, but rather I do
27 | like namespacing constructors, and only a few languages do this. So I thought I'd make a proposal to fix these with some syntax.
28 | 
29 | Part of _why_ I'm thinking about this now is that I want to add some simple functions to the base system that can be easily
30 | rewritten for the various output types, but are nicely named. For example, whilst I can easily support `string-map!` currently,
31 | it would be nice to define a module `Strings` that includes a `map!` function.
32 | 
33 | # Proposal
34 | 
35 | - add `::` and `:=` to syntax as operators, like `|>` and `$`
36 |     - this proposal doesn't require `:=`, it's just if I do the work on `::` I may as well do `:=` too
37 | - fix `use` to load modules from some set of paths
38 | - support `Foo::bar` which means the `bar` element of module `Foo`
39 | - add `module` and `open-module` as syntactic forms in the F# style
40 | 


--------------------------------------------------------------------------------
/labs/trie.c.carml:
--------------------------------------------------------------------------------
 1 | #@(#) a simple trie implementation, for strings
 2 | #@(#) similar to what I did in Digamma, but just
 3 | #@(#) testing, really useful mostly for what I'm
 4 | #@(#) doing with the compiler in carML
 5 | 
 6 | record Trie {
 7 |     key:char
 8 |     data:int # we'll use -1 as a "no data"
 9 |     # this is a good point: how do we tell
10 |     # C that we want a static array here rather
11 |     # than a pointer to a pointer? I believe the
12 |     # C compiler treats them similarly, but we
13 |     # may want it to be different...
14 |     children:array[ref[Trie]]
15 | }
16 | 
17 | def partial_key_p key:string src:ref[Trie] => int = {
18 | 
19 | }
20 | 
21 | def has_key_p key:string src:ref[Trie] => bool = {
22 | 
23 | }
24 | 
25 | def lookup key:string src:ref[Trie] => ref[IntOption] = {
26 | 
27 | }
28 | 
29 | def insert key:string value:int offset:int src:ref[Trie] => bool = {
30 |     val subkey:char = ' '
31 |     when (>= offset (strlen key)) do {
32 |         set! (-> src data) value 
33 |         return true
34 |     }
35 | 
36 | }
37 | 
38 | def get_keys src:ref[Trie] => array[string] = {
39 | 
40 | }
41 | 
42 | def delete key:string src:ref[Trie] => bool = {
43 | 
44 | }
45 | 
46 | def make_trie => ref[Trie] = {
47 |     # it would be really nice to just do
48 |     # `+(make Trie ...)+` here
49 |     # and then rely on the compiler to know
50 |     # we want a ref (or use something like
51 |     # `+view+` to make a reference...)
52 |     val ret:ref[Trie] = (hmalloc $ sizeof Trie)
53 |     set! (-> ret key) 0
54 |     set! (-> ret data) -1
55 |     set! (-> ret children) (hmalloc $ * 26 $ sizeof ref[Trie])
56 |     ret
57 | }
58 | 
59 | def main => int = {
60 |     var root:ref[Trie] = (make_trie)
61 |     insert "foo" 10 root
62 |     insert "bar" 11 root
63 |     insert "baz" 12 root
64 |     val keys:array[string] = (get_keys root)
65 |     printf "partial key \"ba\"? %d\n" $ partial_key "ba" root
66 |     printf "keys:\n"
67 |     while (< idx 3) do {
68 |         printf "key: %s\n" $ get keys idx
69 |         set! idx $ + idx 1
70 |     }
71 |     0
72 | }
73 | 


--------------------------------------------------------------------------------
/editors/carml.vim:
--------------------------------------------------------------------------------
 1 | " Vim syntax file
 2 | " Language: carML
 3 | " Maintainer: lojikil
 4 | " Latest Revision: 18 June 2016
 5 | " add this to your .vimrc:
 6 | " au BufRead,BufNewFile *.29 set filetype=xl29
 7 | " au BufRead,BufNewFile *.carml set filetype=xl29
 8 | 
 9 | if exists("b:current_syntax")
10 |   finish
11 | endif
12 | 
13 | syn keyword basicLanguageKeywords val def let letrec in when if then else
14 | syn keyword basicLanguageKeywords do match case with of use record type poly
15 | syn keyword basicLanguageKeywords and var for while import use open load
16 | syn keyword xl29BlockCmd begin end 
17 | 
18 | " Integer with - + or nothing in front
19 | syn match xl29Number '\s\d\+\s'
20 | syn match xl29Number '\s[-+]\d\+\s'
21 | 
22 | " Floating point number with decimal no E or e 
23 | syn match xl29Number '\s[-+]\d\+\.\d*\s'
24 | 
25 | " Floating point like number with E and no decimal point (+,-)
26 | syn match xl29Number '[-+]\=\d[[:digit:]]*[eE][\-+]\=\d\+'
27 | syn match xl29Number '\d[[:digit:]]*[eE][\-+]\=\d\+'
28 | 
29 | " Floating point like number with E and decimal point (+,-)
30 | syn match xl29Number '[-+]\=\d[[:digit:]]*\.\d*[eE][\-+]\=\d\+'
31 | syn match xl29Number '\d[[:digit:]]*\.\d*[eE][\-+]\=\d\+'
32 | 
33 | syn region xl29DescBlock start="{" end="}" fold transparent
34 | syn region xl29DescBlock start="begin" end="end" fold transparent 
35 | syn keyword xl29Todo contained TODO FIXME XXX NOTE
36 | syn match xl29Comment "#.*$" contains=xl29Todo
37 | 
38 | syn keyword xl29Type int char float string bool deque array ref
39 | syn keyword xl29Ops add sum println print div mod divide modulo sub subtract printf
40 | syn keyword xl29Ops / + - * % ^ ! @ $ & \| \|\| && and or not <> < <= > >= != eq? ==?
41 | 
42 | let b:current_syntax = "xl29"
43 | 
44 | hi def link basicLanguageKeywords Statement
45 | hi def link xl29Todo        Todo
46 | hi def link xl29Comment     Comment
47 | hi def link xl29BlockCmd    Statement
48 | hi def link xl29Type        Type
49 | hi def link xl29String      Constant
50 | hi def link xl29Desc        PreProc
51 | hi def link xl29Number      Constant
52 | hi def link xl29Ops         Operator
53 | 


--------------------------------------------------------------------------------
/docs/0heresy-functiontypes.md:
--------------------------------------------------------------------------------
 1 | # Heresy 0: function/procedure types
 2 | 
 3 | _2018-MAR-11_
 4 | 
 5 | I was thinking today about adding code to parse types, and had a bit of an epiphany (or maybe a bit of a "fuck this"):
 6 | 
 7 | Why have special code to parse higher-order function types just to keep syntax similar to function declarations themselves?
 8 | 
 9 | The traditional answer to declaring HOFs in functional languages looks something like:
10 | 
11 |     name: (type0 type1 type2 ... => returnType)
12 | 
13 | where `name` is some binding and we have a list of types. I was thinking about the work I've been doing wrt Scala-style types,
14 | and how much easier that makes parsing of type declarations: most of the `of` forms can disappear, and deciding if something
15 | is a new tag or a type-member is as simple as reading an array literal. This got me to thinking about how much code I want to
16 | remove, oh but damn I still need to add `(type...)` parsing for HOFs. Then I had a thought: doesn't Scala and several other
17 | langauges support some notion of a `Function` type? Can't we do something similar?
18 | 
19 | ## A Modest ~Proposal~ Heresy
20 | 
21 | Functions & procedures are just fancy objects and types about them should be simple. Introducing new parsing code just to
22 | parse HOFs is overkill, as we can introduce two new types, `Function` and `Procedure`, which capture the notion of functions
23 | and procedures (or, functions that do not return a value, Unit functions):
24 | 
25 |     name: Function[int int]
26 | 
27 | `name` is a function from `int` to `int`. For purposes of beauty and perhaps visual delineation, `=>` should work within, but
28 | be ignored by the compiler:
29 | 
30 |     name: Function[int => int]
31 | 
32 | Functions that are only called for their side effects may be easily modeled two ways:
33 | 
34 |     doSomething: Function[int => ()]
35 | 
36 | That is to say, `doSomething` does something on integers and returns no value, or:
37 | 
38 |     doSomethingElse: Procedure[int]
39 | 
40 | Here `doSomethingElse` is a "procedure" (to get into Scheme-style "lambda calculus of procedures") over integers. Nullary
41 | functions that return no values may be introduced with `Function[()]`, and procedures can be introduced simply with
42 | `Procedure`, since the carML type parsing already assumes tagged words without other specification are types.
43 | 
44 | I need to masticate on this a bit more, but it's intriguing to me.
45 | 


--------------------------------------------------------------------------------
/labs/rewriter.carml:
--------------------------------------------------------------------------------
 1 | # @(#) a simple rewriter
 2 | # @(#) given a needle:ref[AST], replace it with replace:ref[AST]
 3 | 
 4 | def rewrite needle:ref[AST] replace:ref[AST] haystack:ref[AST] => ref[AST] = {
 5 |     var ret:ref[AST] = NULL
 6 |     var idx:int = 0
 7 |     var hlen:int = 0
 8 |     var htag:int = 0
 9 |     var hvlen:int = 0
10 | 
11 |     # if we have a nil haystack, 
12 |     # just return it
13 |     when (eq? haystack NULL) do {
14 |         return ret
15 |     }
16 | 
17 |     # decompose some values here
18 |     # ideally we would do this in a let/match,
19 |     # but we're still too low-level here for that.
20 |     # this code will help us do that decomposition...
21 | 
22 |     set! hlen $ -> haystack lenchildren
23 |     set! htag $ -> haystack tag
24 |     set! hvlen $ -> haystack lenvalue
25 | 
26 |     # actually check for the rewrite.
27 |     match tag with 
28 |         # if we have an identifier, simply check if it is the
29 |         # needle and, if it is, return the replacement value,
30 |         # otherwise return the haystack
31 |         TIDENT => if (eq? (strncmp (-> haystack value) (-> needle value) hvlen) 0) then
32 |                     (return replace)
33 |                 else
34 |                     (return haystack)
35 |         # here we have something that has children
36 |         # iterate over the children, checking if the 
37 |         # child matches the needle, and calling nested
38 |         # rewriter for each if the child is a complex 
39 |         # AST
40 |         x given (> hlen 0) => {
41 |             # allocate our copy node.
42 |             # TINSTAAFL btw; we either pay
43 |             # memory complexity like this, when there is no match
44 |             # or we pay algorithmic complexity and iterate over 
45 |             # potential AST nodes twice, once to scan for matches
46 |             # and once to rewrite them.
47 |             # the gods of Big O and Big Theta demand sacrifice
48 |             set! ret $ hmalloc $ sizeof AST
49 |             set! (-> ret lenchildren) hlen
50 |             set! (-> ret tag) htag
51 |             set! (-> ret lenvalue) hvlen
52 |             set! (-> ret children) $ hmalloc $ * hlen $ sizeof ref[AST]
53 |             while (<= idx hlen) do {
54 |                 set! (get (-> ret children) idx) $ rewrite needle replace $ get (-> haystack children) idx
55 |                 set! idx $ + idx 1
56 |             }
57 |         }
58 |         else => (set! ret haystack)
59 |     end
60 |     return ret
61 | }
62 | 


--------------------------------------------------------------------------------
/example/cur.carml:
--------------------------------------------------------------------------------
  1 | # this file represents what c29 is _currently_ able to compile
  2 | 
  3 | # `declare` needn't be written right next to the 
  4 | # value being declared about... 
  5 | # I also like `@` as shorthand for `declare`...
  6 | # 
  7 | # declare blah Num -> Num
  8 | # def blah x = (sum x x)
  9 | #
 10 | # @blaz Num -> Num
 11 | # def blaz x = (sum x x)
 12 | # 
 13 | # Further thoughts:
 14 | # - function parameters? @blah Num (Num -> Num) -> Num
 15 | # - type variables? @blah Num vec of 'a -> 'a
 16 | 
 17 | declare foo:function[int int]
 18 | def foo x = (sum x x)
 19 | 
 20 | @bar: function[int int int => int]
 21 | def bar x y z = begin
 22 |     println x;
 23 |     println y;
 24 |     println z;
 25 |     sum (sum x y) z;
 26 | end
 27 | 
 28 | # same as the above, but with
 29 | # implicit call termination via
 30 | # newlines
 31 | @blah:function[int int int => int]
 32 | def blah x y z = begin
 33 |     println x
 34 |     println y
 35 |     println z
 36 |     sum (sum x y) z 
 37 | end
 38 | 
 39 | # same as the above, but with
 40 | # {} instead of begin/end
 41 | def blaz x:int y:int z:int => int = {
 42 |     println x
 43 |     println y
 44 |     println z
 45 |     sum (sum x y) z
 46 | }
 47 | 
 48 | @baz:function[int int int => int]
 49 | def baz x y z = if (>= x y) then z else x
 50 | 
 51 | #declare baz bool any -> unit
 52 | @bar:procedure[bool any]
 53 | def bar0 x y = when x do y
 54 | 
 55 | # constant
 56 | @x:function[=> int]
 57 | def x = 32
 58 | 
 59 | val foo = "simple string for doc note?"
 60 | 
 61 | let refc = 340 in (sum refc refc)
 62 | 
 63 | # nested let
 64 | let x = 54 in
 65 | let y = 67 in
 66 | let z = 75 in (sum x (sum y z))
 67 | 
 68 | # simple `letrec` example
 69 | # this is _not_ being parsed correctly...
 70 | # ok, there cannot be a TNEWL after the 
 71 | # TIN, and it must be wrapped in a TCALL...
 72 | # parsing is hard, let's go shopping!
 73 | # `letrec loop x = {...} in ...` syntactic
 74 | # sugar will go a long way here... 
 75 | letrec loop = fn x = {
 76 |     println x
 77 |     if (< x 10) then (loop (sum x 1)) else ()
 78 | } in (loop 0)
 79 | 
 80 | # lambda definition of a value...
 81 | val yar = fn x = (sum x 32)
 82 | 
 83 | # simple record definition
 84 | record Foo {
 85 |     blah : int
 86 |     bar : int
 87 |     baz
 88 | }
 89 | 
 90 | val xx : int = 45
 91 | 
 92 | # typed vectors:
 93 | val yy : array[int] = [1,2,3,4,5]
 94 | val zz : array[array[int]] = [[1,2], [3,4]]
 95 | val aa : array[array[int]] = [[1,2], [3,4]]
 96 | 
 97 | # should have the same type as the first:
 98 | val zz = [1,2,3,4,5]
 99 | 
100 | # commas are optional in arrays
101 | val jj = [1 2 3 4 5]
102 | 


--------------------------------------------------------------------------------
/attic/cmung.carml:
--------------------------------------------------------------------------------
 1 | def cmung src : string dst : string len : int => string = {
 2 |     var idx : int = 0
 3 |     var tmp : char = ' '
 4 |     # ideally, this would be great as a 
 5 |     # map or map! form. The below is 
 6 |     # entirely too low level for my tastes,
 7 |     # esp. when I can see the proper direction.
 8 |     while (< idx len) do {
 9 |         # this would be a wonderful place for the match form
10 |         # honestly, instead of having the above arrays, just 
11 |         # have a simple match here. The match could also detect
12 |         # if we have hit end of string (i.e. ASCII NUL).
13 |         set! tmp $ get src idx
14 |         match tmp with 
15 |             # refactor out all these set!s...
16 |              '!' => (set! (get dst idx) 'B')
17 |              '%' => (set! (get dst idx) '_')
18 |              '$' => (set! (get dst idx) '_')
19 |              '?' => (set! (get dst idx) 'p')
20 |              '<' => (set! (get dst idx) 'l')
21 |              '>' => (set! (get dst idx) 'g')
22 |              '=' => (set! (get dst idx) 'e')
23 |              '@' => (set! (get dst idx) '_')
24 |              '^' => (set! (get dst idx) '_')
25 |              '&' => (set! (get dst idx) '_')
26 |              '|' => (set! (get dst idx) '_')
27 |              '*' => (set! (get dst idx) '_')
28 |              '.' => (set! (get dst idx) '_')
29 |              else => (set! (get dst idx) (get src idx))
30 |         end
31 | 
32 |         # probably should make an `inc!` form
33 |         set! idx $ + idx 1
34 | 
35 |         when (eq? (get src idx) '\0') do
36 |             (return dst)
37 |     }
38 | }
39 | 
40 | def cmung_nicer src:string dst:string len:int => string = {
41 |     # this is pretty nice, and it hides much of the
42 |     # actual details about how things are stored and
43 |     # what not. It's also generally just quite a bit
44 |     # smaller. I do wonder if it's *too* minimal tho;
45 |     # I think I'll need to get others reading some
46 |     # carML at some point soon to see. Probably
47 |     # Bruce will see this first tho. XD
48 |     map! src dst fn x:char => char = {
49 |         match x with
50 |              '!' => 'B'
51 |              '%' => '_'
52 |              '$' => '_'
53 |              '?' => 'p'
54 |              '<' => 'l'
55 |              '>' => 'g'
56 |              '=' => 'e'
57 |              '@' => '_'
58 |              '^' => '_'
59 |              '&' => '_'
60 |              '|' => '_'
61 |              '*' => '_'
62 |              '.' => '_'
63 |              else => x
64 |         end
65 |     }
66 | }
67 | 
68 | def cmung_alloc src : string len : int => string = {
69 |     var dst : string = (malloc (* (sizeof src) len))
70 |     cmung src dst len
71 | }
72 | 


--------------------------------------------------------------------------------
/src/minicompiler.c.carml:
--------------------------------------------------------------------------------
  1 | #@(#) a miniature compiler for carML, written in carML
  2 | #@(#) meant to get us over the hump and actually be able
  3 | #@(#) to do simple things like check if a function was
  4 | #@(#) defined when producing code, and a nicer REPL
  5 | #@(#) this also should allow us to do other nicer
  6 | #@(#) things in the compiler, like dump bindings for
  7 | #@(#) all defined functions; note, this is called
  8 | #@(#) a mini-compiler to differentiate it from the
  9 | #@(#) full compiler. The delineation there is that
 10 | #@(#) this one still relies on what I originally wrote
 11 | #@(#) in C itself, whereas the full compiler will not
 12 | #@(#) be reliant upon that. As well, the fully compiler
 13 | #@(#) will have a properly typed AST, rather than the
 14 | #@(#) current array[ref[AST]]-typed one
 15 | 
 16 | # build a spaghetti stack of environment frames
 17 | # this would be nicer if we had two things:
 18 | #
 19 | # . vecdeques
 20 | # . tries
 21 | #
 22 | # if we had the first, we wouldn't need to track
 23 | # anything more about the object other than the
 24 | # vector, and if we had the second we could do
 25 | # faster lookups; currently we will have to iterate
 26 | # over the entire memberlist to find a match
 27 | 
 28 | record EnvFrame {
 29 |     curobj:int
 30 |     curmod:int
 31 |     lenobjs:int
 32 |     lenmods:int
 33 |     object_names:array[string]
 34 |     object_vals:array[ref[AST]]
 35 |     module_names:array[string]
 36 |     module_vals:array[ref[AST]]
 37 |     parent:EnvFrame
 38 | }
 39 | 
 40 | # add an environment frame to the spaghetti stack
 41 | def new-env-frame src:ref[EnvFrame] => ref[EnvFrame] = {
 42 |     var ret:ref[EnvFrame] = (hmalloc $ sizeof EnvFrame)
 43 |     set! (-> ret parent) src
 44 |     ret
 45 | }
 46 | 
 47 | # add an AST to the correct place
 48 | def add-member! src:ref[AST] dst:ref[EnvFrame] => bool = {
 49 |     # I *reallllllllly* want accessors...
 50 |     val tag : int = (-> src tag)
 51 |     # also, I reaaaaaaally need to fix that `match` function
 52 |     # call bug
 53 |     # additionally, it will be really nice to get rid of some
 54 |     # of this imperative code here... a nice monadic interface
 55 |     # for these would work too...
 56 |     # lastly, a nice vector dequeue implementation would elide
 57 |     # a lot of the detail that I'm manually doing in here...
 58 |     match tag with
 59 |         TMODULE => {
 60 |             # we also need to handle the case when the length is
 61 |             # at capacity, and need to update that...
 62 |             when (eq? (-> dst curmod) (-> dst lenmods)) do {
 63 |                 return false
 64 |             }
 65 |             set! (get (-> dst module_vals) (-> dst curmod)) src
 66 |             set! (get (-> dst module_names) (-> dst curmod)) (-> src value)
 67 |             set! (-> dst curmod) $ + 1 $ -> dst curmod
 68 |         }
 69 |         else => {
 70 |             # we also need to handle the case when the length is
 71 |             # at capacity, and need to update that...
 72 |             # NOTE we should check here if this is *actually* and object we care
 73 |             # about... or we can just rely on the compiler to handle that, which
 74 |             # isn't a terrible idea either...
 75 |             when (eq? (-> dst curobj) (-> dst lenobjs)) do {
 76 |                 return false
 77 |             }
 78 |             set! (get (-> dst object_vals) (-> dst curobj)) src
 79 |             set! (get (-> dst object_names) (-> dst curobj)) (-> src value)
 80 |             set! (-> dst curobj) $ + 1 $ -> dst curobj
 81 |         }
 82 |     end
 83 |     true
 84 | }
 85 | 
 86 | # XXX Need to fix the carML/C compiler here, it dies when a return type is unit
 87 | def foreach-module src:ref[EnvFrame] idx:int = {
 88 |     when (< idx (-> src lenmods)) do {
 89 |         (walk (get (-> src module_vals) idx) 0)
 90 |         (foreach-module src (+ idx 1))
 91 |     }
 92 | }
 93 | 
 94 | def foreach-other src:ref[EnvFrame] idx:int = {
 95 |     when (< idx (-> src lenobjs)) do {
 96 |         (walk (get (-> src object_vals) idx) 0)
 97 |         (foreach-other src (+ idx 1))
 98 |     }
 99 | }
100 | 
101 | def dump-frame src:ref[EnvFrame] = {
102 |     when (eq? src NULL) do (return)
103 | 
104 |     foreach-module src 0
105 |     foreach-other src 0
106 |     dump-frame (-> src parent)
107 | }
108 | 
109 | def mini-compiler src:ref[AST] env:ref[EnvFrame] = {
110 | 
111 | }
112 | 


--------------------------------------------------------------------------------
/src/carml.carml:
--------------------------------------------------------------------------------
  1 | # the carML compiler, written in carML
  2 | 
  3 | use labs/mini-sexpression
  4 | 
  5 | # we can start to stub out all the types of expressions
  6 | # that carML has. Probably should just generate this
  7 | # from the carML/C system, but...
  8 | type CarML {
  9 |     # I wonder if we can use another type
 10 |     # here for these...
 11 |     # name types-array [(parameter-ident type)] returnType body
 12 |     Def string array[CarML] array[tuple[string CarML]] CarML CarML
 13 |     Fn array[CarML] CarML
 14 |     ParamDef string CarML
 15 |     Declare string CarML
 16 |     # thinking these should be an array[string]
 17 |     # and an array[CarML] that way we can support
 18 |     # destructuring binds...
 19 |     Var string CarML CarML
 20 |     DestructuringVar array[string] array[CarML] CarML
 21 |     Val string CarML CarML
 22 |     DestructuringVal array[string] array[CarML] CarML
 23 |     Let string CarML CarML
 24 |     DestructuringLet array[string] array[CarML] CarML
 25 |     LetRec string CarML CarML
 26 |     DestructuringLetRec array[string] array[CarML] CarML
 27 |     Integer string
 28 |     Octal string
 29 |     Hex string
 30 |     Bin string
 31 |     Float string
 32 |     String string
 33 |     Char string
 34 |     Bool string
 35 |     ArrayLiteral CarML array[CarML]
 36 |     Ident string
 37 |     Tag string
 38 |     Begin array[CarML]
 39 |     Call CarML array[CarML]
 40 |     If CarML CarML CarML
 41 |     When CarML CarML
 42 |     Match CarML array[CarML]
 43 |     Guard CarML CarML
 44 |     While CarML CarML
 45 |     For CarML CarML
 46 |     Type string array[CarML] array[CarML]
 47 |     Record string array[string] array[CarML]
 48 |     ComplexType string array[CarML]
 49 |     Else
 50 |     IntT
 51 |     StringT
 52 |     FloatT
 53 |     CharT
 54 |     BoolT
 55 |     AnyT
 56 |     Unit
 57 |     Nil
 58 |     # NOTE: explicitly not handling things like $, =>, &c
 59 |     # may have to do that eventually too tho...
 60 |     # NOTE: actually... on second thought... it would be neat
 61 |     # if I could use this closer to something like Concrete
 62 |     # Syntax Trees...
 63 |     Comment string
 64 |     FatArrow
 65 |     PipeArrow
 66 |     Colon
 67 |     ColonEqual
 68 |     DollarSign
 69 | }
 70 | 
 71 | record Environment {
 72 |     # should we separate out constructor
 73 |     # names & accessors? might make sense...
 74 |     # in Digamma I made these tries, it might
 75 |     # make sense to have a tree structure for
 76 |     # these, but I think often it's not going
 77 |     # to be many of these at one time...
 78 |     type_names:array[string]
 79 |     type_defns:array[CarML]
 80 |     ctor_names:array[string]
 81 |     ctor_defns:array[CarML]
 82 |     accr_names:array[string]
 83 |     func_names:array[string]
 84 |     func_defns:array[CarML]
 85 |     recr_names:array[string]
 86 |     recr_defns:array[string]
 87 |     val_names:array[string]
 88 |     val_defns:array[string]
 89 |     var_names:array[string]
 90 |     var_defns:array[string]
 91 |     parent:ref[Environment]
 92 | }
 93 | 
 94 | # NOTE: so the general gist here is that we want to be able to
 95 | # round trip code from SExpressions back to CarML. For example,
 96 | # we may have several optimization passes that end up eliminating
 97 | # things, doing tree shaking, &c, and having a Compiler that can
 98 | # handle these round trips is key. However, I'm not currently
 99 | # focused on having the carML/carML compiler be able to parse
100 | # carML itself, we can continue to use the carML/C compiler as
101 | # a front end. This *does* mean that the carML/C compiler must
102 | # be kept up to date with at least the language, even if the 
103 | # simple code generation in the carML/C system cannot handle
104 | # all the cases that it can parse
105 | 
106 | def sexpression->ast src:SExpression => CarML = {
107 |     make-struct CarML.Nil
108 | }
109 | 
110 | def ast->carml src:CarML => string = {
111 |     ""
112 | }
113 | 
114 | def ast->golang src:CarML => string = {
115 |     ""
116 | }
117 | 
118 | def ast->c src:CarML => string = {
119 |     ""
120 | }
121 | 
122 | # NOTE: section: optimizations
123 | # we need to be able to optimize certain situations
124 | # in carML and rewrite them to nicer alternatives. For
125 | # example, whilst `let` is a useful abstraction, it's not
126 | # incredibly useful when getting into actual code; we can
127 | # use something like an alpha rewrite, to transform a `let`
128 | # or a `letrec` into a freshsym `val` or `var`
129 | def let->val src:CarML => CarML = {
130 |     make-struct CarML.Nil
131 | }
132 | 


--------------------------------------------------------------------------------
/src/newreader.c.carml:
--------------------------------------------------------------------------------
  1 | @new_reader: function[ref[FILE] ref[AST]]
  2 | @read_var_val: function[ref[FILE] ref[AST]]
  3 | @read_array_literal: function[ref[FILE] ref[AST]]
  4 | @read_def: function[ref[FILE] ref[AST]]
  5 | @make_ident: function[array[char] ref[AST]]
  6 | @make_int: function[array[char] ref[AST]]
  7 | @make_float: function[array[char] ref[AST]]
  8 | @make_string: function[array[char] ref[AST]]
  9 | @make_eof: function[ref[AST]]
 10 | @make_colon: function[ref[AST]]
 11 | @make_type: function[int ref[AST]]
 12 | @make_complex_type: function[array[ref[AST]] ref[AST]]
 13 | 
 14 | def new_reader fin:ref[FILE] => ref[ASTEither] = {
 15 |     var buffer:array[char] = (make-array char 512)
 16 |     var ltype:int = 0
 17 |     var tmp:ref[AST] = nil
 18 |     set! ltype (next fdin buffer 512)
 19 |     match ltype with
 20 |         TVAR => (read_var_val fdin TVAR)
 21 |         TVAL => (read_var_val fdin TVAL)
 22 |         TIDENT => (ASTRight $ make_ident buffer)
 23 |         TINT => (ASTRight $ make_int buffer)
 24 |         TFLOAT => (ASTRight $ make_float buffer)
 25 |         TSTRING => (ASTRight $ make_string buffer)
 26 |         TEOF => (ASTRight $ make_eof)
 27 |         TCOLON => (ASTRight $ make_colon)
 28 |         TINTT => (ASTRight $ make_type TINTT)
 29 |         TFLOATT => (ASTRight $ make_type TFLOATT)
 30 |         TSTRT => (ASTRight $ make_type TSTRT)
 31 |         TBOOLT => (ASTRight $ make_type TBOOLT)
 32 |         TCHART => (ASTRight $ make_type TCHART)
 33 |         TARRAY => {
 34 |             # we need to read in one more element, which should be
 35 |             # a TARRAYLITERAL
 36 |             # NOTE I had a good idea: we can make these more
 37 |             # functional and more stateful by having a set of
 38 |             # smaller functions, like `read_colon` and `read_array_literal`
 39 |             # that handle the unwrapping and what not, but also handle
 40 |             # errors
 41 |             set! sometmp (new_reader fin)
 42 |             when (eq? (-> sometmp tag) ASTLEFT) do (return sometmp)
 43 |             set! tmp (-> sometmp right)
 44 |             set! tag (-> tmp tag)
 45 |             match tag with
 46 |                 TARRAYLITERAL => (ASTRight $ make_complex_type TARRAY tmp)
 47 |                 else => (ASTLeft 0 0 "Array as a type *must* be followed be an array-literal of types")
 48 |             end
 49 |         }
 50 |         TDEQUE => {
 51 |             # same as the above, read once more, and get the vecdeque type
 52 |             set! sometmp (new_reader fin)
 53 |             when (eq? (-> sometmp tag) ASTLEFT) do (return sometmp)
 54 |             set! tmp (-> sometmp right)
 55 |             set! tag (-> tmp tag)
 56 |             match tag with
 57 |                 TARRAYLITERAL => (ASTRight $ make_complex_type TARRAY tmp)
 58 |                 else => (ASTLeft 0 0 "Array as a type *must* be followed be an array-literal of types")
 59 |             end
 60 |         }
 61 |         else => (ASTLeft 0 0 "something failed")
 62 |     end
 63 | }
 64 | 
 65 | # it would be interesting to do this sort of parsing
 66 | # with a monadic state and |> or the like, I think.
 67 | # Something like a LexState monad that has the current
 68 | # file, buffer, location, &c. It would also help with
 69 | # errors, as the monad could contain current offset and
 70 | # what not
 71 | def read_var_val fdin:ref[FILE] tag:int => ref[ASTEither] = {
 72 |     var ident:ref[AST] = (make_null)
 73 |     var vtype:ref[AST] = (make_null)
 74 |     var value:ref[AST] = (make_null)
 75 |     var tmp:ref[AST] = (make_null)
 76 |     var eithertmp:ref[ASTEither] = nil # yes, I hate it too...
 77 |     var tmptag:int = 0
 78 |     set! ident (new_reader fdin)
 79 |     when (<> (-> ident tag) TIDENT) do {
 80 |         return $ ASTLeft 0 0 "var *must* be followed by an IDENT"
 81 |     }
 82 | 
 83 |     # actually, we need to check if we get an ASTLeft here and
 84 |     # respond accordingly...
 85 |     set! eithertmp (new_reader fdin)
 86 |     if (eq? (-> eithertmp tag) ASTLEFT) then (return eithertmp) else (set! tmp (-> eithertmp right))
 87 |     set! tmptag (-> tmp tag)
 88 |     match (-> tmp tag) with
 89 |         TCOLON => {
 90 |             set! eithertmp (new_reader fdin)
 91 |             if (eq? (-> eithertmp tag) ASTLEFT) then (return eithertmp) else (set! tmp (-> eithertmp right))
 92 |             set! tmptag (-> tmp tag)
 93 |             match (-> tmp tag) with
 94 |                 TTAG => {
 95 |                     # ok, so here we can either get:
 96 |                     # . a TARRAYLITERAL which means we have a complex type
 97 |                     # . a TEQ, which means we should move on
 98 |                 }
 99 |                 _ given (istypeast $ -> tmp tag) => {
100 | 
101 |                 }
102 |                 else => (return $ ASTLeft 0 0 "type definition must be followed by a type")
103 |             end
104 |         }
105 |         TEQ => ()
106 |         else => (ASTLeft 0 0 "var's ident *must* be followed by a COLON or an EQUALS SIGN")
107 |     end
108 | }
109 | 


--------------------------------------------------------------------------------
/labs/sexpr.carml:
--------------------------------------------------------------------------------
  1 | #@(#) SExpression library for carML
  2 | #@(#) mainly aimed at getting the carML compiler out of the C system
  3 | #@(#) should be mainly compatible with the SExpression-based IR that
  4 | #@(#) carML/C generates.
  5 | #
  6 | # the shame is, given carML's current
  7 | # type system, there are many work-arounds
  8 | # required to make the below work:
  9 | # - the type system can't tell ref[SExpression] can be flattened to SExpression
 10 | # - some things should be automatically passed around as ref[Type] but you have to manually do that
 11 | # - lots of ref[Any] could be turned into a `const`...
 12 | #
 13 | # probably lots of other hacks as I
 14 | # go through it. Luckily, once I move
 15 | # past where the C compiler is, I can
 16 | # start to handle some of the debt I've
 17 | # accrued.
 18 | 
 19 | type Num {
 20 |     # would be really nice if this could
 21 |     # just be handled by a union[float int]
 22 |     NInt int
 23 |     NFloat float
 24 | }
 25 | 
 26 | type SExpression {
 27 |     Nil
 28 |     Cons SExpression SExpression
 29 |     Atom string
 30 |     String string
 31 |     Int int
 32 |     Float float
 33 |     Rational int int
 34 |     Complex Num Num
 35 |     Char char
 36 |     Bool bool
 37 |     # need to add length generation to
 38 |     # the C compiler here
 39 |     List array[SExpression]
 40 |     Array array[SExpression]
 41 |     Error string
 42 |     # Maybe should have an internal type that
 43 |     # isn't exported, and an external type that
 44 |     # is, so as to remove the two constructors
 45 |     # below...
 46 |     EndList
 47 |     EndArray
 48 | }
 49 | 
 50 | # add a simple reader system below
 51 | # should be able to construct & read
 52 | # SExpression-based syntax
 53 | #
 54 | # What will this be used for? Why,
 55 | # for bootstrapping a compiler in carML
 56 | # itself really.
 57 | 
 58 | def peek_char fh:ref[FILE] => char = {
 59 |     val ret:int = (fgetc fh)
 60 |     val res:int = (ungetc ret fh)
 61 |     ret
 62 | }
 63 | 
 64 | def is_whitespace ch:char => bool = {
 65 |     match ch with
 66 |         ' ' => true
 67 |         '\t' => true
 68 |         '\n' => true
 69 |         '\v' => true
 70 |         '\r' => true
 71 |         '\l' => true
 72 |         '\b' => true
 73 |         else => false
 74 |     end
 75 | }
 76 | 
 77 | def take_while_white fh:ref[FILE] => char = {
 78 |     # take... on... me...
 79 |     # _softly_ take on me
 80 |     # take... me... on...
 81 |     # _softly_ take on me
 82 |     # I'll be... gone...
 83 |     # In a day or twoooooooo
 84 |     # _synths_
 85 |     var ch:int = (fgetc fh)
 86 |     while (is_whitespace ch) do {
 87 |         set! ch $ fgetc fh
 88 |     }
 89 |     ch
 90 | }
 91 | 
 92 | def is_numeric ch:char => bool = {
 93 |     match ch with
 94 |         x given (&& (>= ch '0') (<= ch '9')) => true
 95 |         '.' => true
 96 |         else => false
 97 |     end
 98 | }
 99 | 
100 | def is_symbolic ch:char => bool = {
101 |     match ch with
102 |         '(' => false
103 |         ')' => false
104 |         '[' => false
105 |         ']' => false
106 |         '"' => false
107 |         '\'' => false
108 |         y given (is_whitespace ch) => false
109 |         else => true
110 |     end
111 | }
112 | 
113 | def is_null_or_endp obj:SExpression => bool = {
114 |     match obj with
115 |         (SExpression.EndList) => true
116 |         (SExpression.EndArray) => true
117 |         (SExpression.EndFile) => true
118 |         else => false
119 |     end
120 | }
121 | 
122 | def sexpression_append_b dst:SExpresssion src:SExpression = {
123 | 
124 | }
125 | 
126 | def read_list fh:ref[FILE] => SExpression = {
127 |     var ret:SExpression = (SExpression_Nil)
128 |     var tmp:SExpression = (sexpression_read fh)
129 |     while (not $ is_null_or_endp tmp) do {
130 |         sexpression_append_b ret tmp
131 |     }
132 |     ret
133 | }
134 | 
135 | def read_array fh:ref[FILE] => SExpression = {
136 |     var ret:array[SExpression] = (make-array SExpression 128 $ SExpression.Nil)
137 |     var ret_length:int = 128
138 |     var tmp:SExpression = (sexpression_read fh)
139 |     var idx:int = 0
140 |     while (not $ is_null_or_endp tmp) do {
141 |         when (< idx ret_length) do {
142 |             set! (get ret idx) tmp
143 |             set! idx $ + idx 1
144 |         }
145 |     }
146 |     ret
147 | }
148 | 
149 | def read_char fh:ref[FILE] => SExpression = {
150 | 
151 | }
152 | 
153 | def read_atom fh:ref[FILE] => SExpression = {
154 | 
155 | }
156 | 
157 | def read_atom_partial fh:ref[FILE] cur:string => SExpression = {
158 | 
159 | }
160 | 
161 | # it would be nice to enrich types here
162 | # say that this is an SExpression, but also
163 | # that the only types it will return are
164 | # from Int, Float, Rational, Complex
165 | def read_number fh:ref[FILE] => SExpression = {
166 | 
167 | }
168 | 
169 | def sexpression_read fh:ref[FILE] => ref[SExpression] = {
170 |     val ch:char = (take_while_white fh)
171 |     match ch with
172 |         '(' => (read_list fh)
173 |         '[' => (read_array fh)
174 |         '"' => (read_string fh)
175 |         '#' => (read_char fh)
176 |         ')' => (SExpression.EndList)
177 |         ']' => (SExpression.EndArray)
178 |         x given (is_whitespace ch) => (sexpression_read fh)
179 |         z given (is_numeric ch) => (read_number fh)
180 |         a given (is_symbolic ch) => (read_atom fh)
181 |         else => (SExpression.Error "unknown starting character")
182 |     end
183 | }
184 | 


--------------------------------------------------------------------------------
/labs/rewrite_match_bind.carml:
--------------------------------------------------------------------------------
  1 | # @(#) a match rewriter
  2 | # @(#) recreating the AST type from carML/c here
  3 | # @(#) as a carML record. Were I to clean-room
  4 | # @(#) this properly, I'd obviously use variants.
  5 | # @(#) However, this at least allows me to test
  6 | # @(#) a rewriter without having to link the 
  7 | # @(#) entire carML/c code base here...
  8 | 
  9 | record AST {
 10 |     tag:U32 # FIXME: need to define C-level types...
 11 |     value: string
 12 |     children:array[ref[AST]] # FIXME need to fix self-referential records...
 13 |     lenvalue: U32
 14 |     lenchildren: U32
 15 | }
 16 | 
 17 | def make_AST tag:int lenchildren:int children:array[ref[AST]] lenvalue:int value:string => ref[AST] = {
 18 |     var res:ref[AST] = (malloc $ sizeof AST)
 19 |     set! (-> res tag) tag
 20 |     set! (-> res lenchildren) lenchildren
 21 |     set! (-> res children) children
 22 |     if (<> value NULL) then
 23 |         (set! (-> res value) $ strdup value)
 24 |     else
 25 |         (set! (-> res value) NULL)
 26 |     set! (-> res lenvalue) lenvalue
 27 |     res
 28 | }
 29 | 
 30 | 
 31 | # make a specific type of AST, specifically a
 32 | # ident node.
 33 | def make_ident_AST name:string => ref[AST] = (make_AST 0 0 NULL (strlen name) name)
 34 | 
 35 | def make_integer_AST value:string => ref[AST] = (make_AST 1 0 NULL (strlen value) value)
 36 | 
 37 | def make_call_AST children:array[ref[AST]] len:int => ref[AST] = (make_AST 2 len children 0 NULL)
 38 | 
 39 | def make_begin_AST children:array[ref[AST]] len:int => ref[AST] = (make_AST 3 len children 0 NULL)
 40 | 
 41 | # make a shallow copy of an AST
 42 | def copy_AST src:ref[AST] => ref[AST] = {
 43 |     (make_AST
 44 |         (-> src tag)
 45 |         (-> src lenchildren)
 46 |         (-> src children)
 47 |         (-> src lenvalue)
 48 |         (-> src value))
 49 | }
 50 | 
 51 | # we could just gather names and return a list...
 52 | def gather_names head:ref[AST] => array[ref[AST]] = {
 53 |     val len : int = (-> head lenchildren)
 54 |     val ret : array[ref[AST]] = (hmalloc $ * len $ sizeof ref[AST])
 55 |     var idx : int = 1
 56 |     var cur : ref[AST] = NULL
 57 | 
 58 |     # again, foreach would be perfect here...
 59 |     # actually just a map...
 60 |     # and really, could do a list of tuple[ref[AST], int]
 61 |     # or the like...
 62 |     while (< idx len) do {
 63 |         set! cur $ get (-> head children) idx
 64 |         when (eq? (-> cur tag) TIDENT) do {
 65 |             if (not $ eq? (-> cur value) "_") then {
 66 |                 set! (get ret idx) cur
 67 |             } else {
 68 |                 set! (get ret idx) NULL
 69 |             }
 70 |         }
 71 |         set! idx $ + idx 1
 72 |     }
 73 |     return ret
 74 | }
 75 | 
 76 | # there should only be minimal amount of nodes to
 77 | # iterate over, since we'll mainly be using this
 78 | # in `given` forms and match-case bodies, but this
 79 | # still means we're iterating over all forms to check
 80 | # for a rewrite case N^2 or N^3 (if there's both a given
 81 | # case guard *AND* a body)
 82 | def need_rewritep name:ref[AST] dst:ref[AST] => bool = {
 83 |     # should combine these two...
 84 |     when (eq? (-> name tag) (-> dst tag)) do {
 85 |         when (not (strcmp (-> name value) (-> dst value))) do {
 86 |             return true
 87 |         }
 88 |     }
 89 | 
 90 |     when (> (-> dst lenchildren) 0) do {
 91 |         var idx:int = 0
 92 |         var rst:bool = false
 93 |         while (< idx (-> dst lenchildren)) do {
 94 |             set! rst $ need_rewritep name $ get (-> dst children) idx
 95 |             when rst do {
 96 |                 return true
 97 |             }
 98 |             set! idx $ + idx 1
 99 |         }
100 |     }
101 | 
102 |     return false
103 | }
104 | 
105 | def rewrite_AST name:ref[AST] dst:ref[AST] => ref[AST] = {
106 |     NULL
107 | }
108 | 
109 | # walk an AST, freeing anything that
110 | # we've allocated with `malloc(3)` and
111 | # friends ("malloc(3) and friends": worst. children's show. ever)
112 | def free_AST head:ref[AST] = {
113 |     var idx : int = 0
114 |     when (<> NULL $ -> head value) do {
115 |         free $ -> head value
116 |     }
117 | 
118 |     when (<> 0 $ -> head lenchildren) do {
119 |         # HAAAAAAAAATE while loops like this
120 |         # should be a `for` loop, but i haven't
121 |         # added support for compiling those yet
122 |         while (< idx $ -> head lenchildren) do {
123 |             free_AST $ get (-> head children) idx
124 |             set! idx $ + idx 1 # probably should just have an inc! form
125 |         }
126 | 
127 |         free $ -> head children
128 |     }
129 | 
130 |     free head
131 | }
132 | 
133 | # just for testing, add a main here
134 | # use that for running this whole test...
135 | # would be nice to have some sort of 
136 | # conditional compilation language, ala
137 | # SRFI-0
138 | 
139 | def main ac:int al:array[string] => int = {
140 |     val test:ref[AST] = (make_ident_AST "test")
141 |     val newtest:ref[AST] = (copy_AST test)
142 |     val fun:ref[AST] = (make_ident_AST "+")
143 |     val tmp0:array[ref[AST]] = [fun (make_integer_AST 10) test]
144 |     val test0:ref[AST] = (make_call_AST tmp0 3)
145 |     val tmp1:array[ref[AST]] = [test0 test]
146 |     val test1:ref[AST] = (make_begin_AST tmp1 2)
147 | 
148 |     # would be nice to make this printf (if...) but the compiler
149 |     # would probably generate terrible C for that rn... need to add
150 |     # something to detect that
151 |     if (<> test newtest) then (printf "they differ!\n") else (printf "wait... what?")
152 | 
153 |     # clean up our legos
154 |     free_AST test
155 |     free_AST newtest
156 | 
157 |     0
158 | }
159 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Overview of carML
  2 | 
  3 | _carML_ (pronounced "car-mul", caramel), previously called _XL/29_ (eXperimental Language No. 29), is meant to take ideas from PowerLogo & Yeti, mixed in with my experiences of working
  4 | with [Digamma](http://lojikil.com/p/digamma/). It's meant to be:
  5 | 
  6 | - tiny
  7 | - low barrier to entry
  8 | - no ceiling
  9 | - somewhat efficient
 10 | - without requiring too much mental cost from the programmer
 11 | - "secure"
 12 | - implemented and understood quickly
 13 | - borrowing ideas from ReasonML, Scala, BitC, OCaml, F#, Digamma, Yeti, PowerLogo, & Project Verona
 14 |     - I've been doing a *lot* of work in ReasonML of late for work, so definitely has become a strong muse
 15 |     - ReasonML: types, general style, Modules
 16 |     - F#: Simpler Module style, exploration of advanced types (like FStar and Fomega give)
 17 |     - Scala: type syntax, general style
 18 |     - Digamma: focus on C output, focus on Unix integration, production readiness
 19 |     - Yeti: simplicity & minimalism
 20 |     - PowerLogo: little barrier to entry, no ceiling, and practical focus
 21 |     - Project Verona: [this comment on operator precedence in Verona got me thinking](https://lobste.rs/s/jol24u/better_operator_precedence#c_bwd4ij)
 22 | 
 23 | 
 24 | This is at least the 3rd time (and probably 5th) time that I've attempted such a thing, so don't mind me at all. This *is* the first time
 25 | I've experimented publicly with my PLT designs tho.
 26 | 
 27 | # Name
 28 | 
 29 | I've experimented with multiple names; HotelML (named for "The HotelML" in NJ) & Melomys (a specious of mouse) were two contenders.
 30 | However, I was grabbing caramel macchiattos, which Starbucks labels as "carml mac", and thus named it after the drink.
 31 | 
 32 | # Syntax
 33 | 
 34 |  Syntactically, it's pretty much a garden variety ML dialect. Minimal keywords like `def` exist to visually break up code, but otherwise
 35 | if you're familiar with Yeti, StandardML, OCaml, &c. it should be relatively straight forward. The one location that it is *not* is in the
 36 | standard base.
 37 | 
 38 | - there are no operators in the standard base. Functions like `(+)` are replaced by `sum`.
 39 | - there are no lists or other GC'd data structures in the standard base (this may change based on the implementation of memory).
 40 | 
 41 | Remember, while the language is meant to be understood quickly, it's not meant to be a replacement for everything, Like Logo, I'll probably
 42 | eventually add something to "preprocess" source code and have operators (or simply make an `expr` form to handle it).
 43 | 
 44 | _Note_: I've been working on various ML-dialects for several years, and carML is the one I've decided to run with. Interestingly, 
 45 | [Tulip](http://tuliplang.org/) is also the product of several years of tinkering, and has a very similar syntax. It might be interesting
 46 | to combine the two (or use carML for the base instead of C), but it's exciting to me that there is movement in the ML space again, esp.
 47 | considering that both languages are _not_ derived from existing ML dialects. [Yeti](https://mth.github.io/yeti/) is no longer alone!
 48 | 
 49 | # Semantics
 50 | 
 51 | - typical ML/PowerLogo/Lisp semantics. 
 52 | - types, row polymorphism, polymorphic variants
 53 | - Minimal datatypes available from base. Need to figure out GC-less lists
 54 | - Deque & Vector ala Rust
 55 | - GC'd and GC-less data structures?
 56 | - Lambda lifting not closure conversion (to save GC)
 57 | - Regions + localized GC?
 58 | - Continuations? How would that operate in a GC-less environment?
 59 | 
 60 | # Output
 61 | 
 62 | - Enyalios-style Human Readable C/Golang
 63 | - Let the C/Golang compiler deal with many of the real optimizations
 64 | - But we can do some simple ones like inlining, self-TCO, & rewriting HOFs
 65 | 
 66 | # Examples
 67 | 
 68 |     # cannonical loop example
 69 |     def arrayIota arr:array[int] f:function[int => int] = begin
 70 |         def arrayIota' arr:array[int] fun:function[int => int] idx:int = begin
 71 |             if (< idx (array-length arr)) then {
 72 |                 set-array-index! arr (fun idx) idx
 73 |                 arrayIota' arr fun (add idx 1)
 74 |             } else () 
 75 |         end
 76 |         arrayIota' arr f 0
 77 |     end
 78 | 
 79 |     # note, there is no _real_ reason to use begin
 80 |     # form here, but it makes it a bit more pretty.
 81 |     def arrayIota1 arr : array[int] fun : function[int => int] = {
 82 |         foreach-index! fn x = (set-array-index! arr (fun x) x) arr
 83 |     }
 84 | 
 85 |     # same as the above, but without the `{}` block
 86 |     def arrayIota2 arr:array[int] fun : function[int => int] = (foreach-index! fn x = (set-array-index! arr (fun x) x) arr)
 87 | 
 88 |     # but really, all we're doing is mapping some function in place...
 89 |     def arrayIota3 arr:array[int] fun:function[int => int] = (map! fun arr)
 90 | 
 91 |     def foo x : int => int = (add x 1)
 92 | 
 93 |     # constant:
 94 |     val f : array[int] = (make-array 10)
 95 |     # variable
 96 |     var g : array[int] = [0 1 2 3 4 5 6 7 8 9]
 97 |     
 98 |     arrayIota f foo;
 99 |     
100 |     println f;
101 | 
102 |     # add:
103 |     # ref, make
104 | 
105 |  See also the _examples_ directory for more indepth & up-to-date examples.
106 | 
107 | # Implementations
108 | 
109 |  There are two major implementations:
110 | 
111 | - `carmlc` which is just a compiler, written in C.
112 | - `carml` which is a self-hosting version.
113 | 
114 | Note that, when supplied with no arguments, `carmlc` will actually enter a REPL. However,
115 | this REPL does not evaluate the supplied expressions for _values_, but rather for _code_:
116 | 
117 |     >>> def foo x = (sum x x)
118 |     (define-function foo (x)
119 |         (call (identifier sum) (identifier x) (identifier x)))
120 | 
121 | The purpose is more to see what the IR/code will look like, rather than what the code does. It 
122 | is meant more as a test bed than an actual REPL.
123 | 
124 | # File naming conventions
125 | 
126 | There are a few different "types" of files in the system
127 | 
128 | - `*.c.carml` is a file that is written in carML, but targets C directly
129 | - `*.go.carml` is a file written in carML, but targets Golang directly
130 | - `*.carml` just a plain carML file, which makes no assumptions as to the underlying system
131 | 
132 | Eventually I'd like to be able to include files via an SRFI-0-style mechanism, by which we can
133 | simply import which files we'd like to use at compile time based on flags and such
134 | 
135 | # License
136 | 
137 | See `LICENSE` for details (ISC license).
138 | 


--------------------------------------------------------------------------------
/src/carmlc.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * @(#) the main header file, meant to keep definitions clean
  3 |  * @(#) and help make integration of generated code smoother
  4 | */
  5 | #ifndef __CARMLC_H
  6 | #define __CARMLC_H
  7 | 
  8 | #include <stdbool.h>
  9 | 
 10 | #ifdef DEBUG
 11 | #define debugln printf("dying here on line %d?\n", __LINE__);
 12 | #define dprintf(x, ...) fprintf(x, __VA_ARGS__)
 13 | #define dwalk(x, y, z) walk(x, y, z)
 14 | #define pwalk(x, y) walk(stderr, x, y)
 15 | #else
 16 | #define debugln
 17 | #define dprintf(x, ...)
 18 | #define dwalk(x, y, z) 
 19 | #define pwalk(x, y)
 20 | #endif
 21 | 
 22 | #define nil NULL
 23 | #define nul '\0'
 24 | #define YES 1
 25 | #define NO  0
 26 | 
 27 | #ifdef DEBUGMEMORY
 28 | #define hmalloc malloc
 29 | #else
 30 | #define hmalloc GC_MALLOC
 31 | #endif
 32 | 
 33 | #define cwalk(fd, head, level) llcwalk(fd, head, level, NO)
 34 | #define gwalk(fd, head, level) llgwalk(fd, head, level, NO)
 35 | #define indent(fd, level) llindent(fd, level, NO)
 36 | #define gindent(fd, level) llindent(fd, level, YES)
 37 | 
 38 | /* Lexical analysis states.
 39 |  * basically, the tokenizer is a 
 40 |  * statemachine, and this enum represents
 41 |  * all the state machine states. Because
 42 |  * we use a decision tree (encoded in an SM)
 43 |  * for parsing out keywords, there are lots
 44 |  * of state productions here.
 45 |  */
 46 | typedef enum {
 47 |     LSTART, LDEF0, LDEF1, LDEF2, LE0, LELSE1,
 48 |     LELSE2, LELSE3, LVAL0, LVAL1, LVAL2, LT0,
 49 |     LTHEN0, LTHEN1, LTHEN2, LTYPE0, LTYPE1,
 50 |     LTYPE2, LBEGIN0, LBEGIN1, LBEGIN2, LBEGIN3,
 51 |     LBEGIN4, LEQ0, LNUM0, LIDENT0, LEND0, LEND1,
 52 |     LEND2, LMATCH0, LCOMMENT, LMCOMMENT, LPOLY0,
 53 |     LPOLY1, LPOLY2, LPOLY3, LRECORD0, LIF0, LIF1,
 54 |     LRECORD1, LRECORD2, LRECORD3, LRECORD4, LRECORD5,
 55 |     LMATCH1, LMATCH2, LMATCH3, LMATCH4, LMATCH5, LEOF,
 56 |     LWHEN0, LWHEN1, LWHEN2, LWHEN3, LNEWL, LDO0, LDO1,
 57 |     LD0, LTRUE0, LTRUE1, LTRUE3, LTRUE4, LFALSE0,
 58 |     LFALSE1, LFALSE2, LFALSE3, LFALSE4, LFALSE5,
 59 |     LFN0, LFN1, LLET0, LLET1, LLET2, LLETREC0, LLETREC1, LLETREC2,
 60 |     LLETREC3, LLETREC4, LLETREC5, LLETREC6, LL0, LCHAR0,
 61 |     LCHAR1, LCHAR2, LCHAR3, LSTRT0, LSTRT1, LSTRT2, LSTRT3,
 62 |     LSTRT4, LSTRT5, LSTRT6, LINTT0, LINTT1, LINTT2, LFLOATT0,
 63 |     LFLOATT1, LFLOATT2, LFLOATT3, LFLOATT4, LFLOATT5, LINTT3,
 64 |     LARRAY0, LARRAY1, LARRAY2, LARRAY3, LARRAY4, LARRAY5,
 65 |     LB0, LI0, LUSE0, LF0, LC0, LR0, LW0, LOF0, LOF1,
 66 |     LDEQ0, LDEQ1, LDEQ2, LDEQ3, LDEC0, LDEC1, LDEC2, LDEC3,
 67 |     LDE0, LBOOL0, LBOOL1, LBOOL2, LREF0, LELSE0, LRE0, LTRUE2,
 68 |     LWITH0, LWITH1, LWITH2, LDEC4, LUSE1, LUSE2, LVAR2, LTAG0,
 69 |     LTAGIDENT, LWHILE1, LWHILE2, LWHILE3, LWHILE4, LWHILE5,
 70 |     LFOR0, LFOR1, LFOR2, LWH0, LTUPLE0, LTUPLE1, LTUPLE2, LTUPLE3,
 71 |     LTUPLE4, LFUNCTIONT0, LFUNCTIONT1, LFUNCTIONT2, LFUNCTIONT3,
 72 | 	LFUNCTIONT4, LFUNCTIONT5, LFUNCTIONT6, LP0, LPROCEDURET0,
 73 |     LPROCEDURET1, LPROCEDURET2, LPROCEDURET3, LPROCEDURET4,
 74 |     LPROCEDURET5, LPROCEDURET6, LPROCEDURET7, LANY0, LANY1,
 75 |     LAND0, LAND1, LAND2, LA0, LAN0, LEXTERN0, LEXTERN1, LEXTERN2,
 76 |     LEXTERN3, LEXTERN4, LGIVEN0, LGIVEN1, LGIVEN2, LGIVEN3,
 77 |     LGIVEN4, LLOW0, LLOW1, LU0, LUNION0, LUNION1, LUNION2,
 78 |     LUNION3, LUNION4, LWALRUS0, LMODNS0, LMODULE0, LMODULE1,
 79 |     LMODULE2, LMODULE3, LMODULE4, LMODULE5, LM0
 80 | } LexStates;
 81 | 
 82 | /* AST tag enum.
 83 |  * basically, this is all the AST types, and is
 84 |  * used both for determining the type of AST
 85 | #define gindent(level) llindent(level, YES)
 86 |  * that is represented within the tree, as well as
 87 |  * returned from the tokenizer to say what type
 88 |  * of object it thinks is in buffer
 89 |  */
 90 | typedef enum {
 91 |     TDEF, TBEGIN, TEND, TEQUAL, TCOREFORM, // 4
 92 |     TIDENT, TCALL, TOPAREN, TCPAREN, TMATCH, // 9
 93 |     TIF, TELSE, TTHEN, TTYPE, TPOLY, TVAL, // 15
 94 |     TARRAY, TRECORD, TINT, TFLOAT, TSTRING, // 20
 95 |     TCHAR, TBOOL, TEQ, TSEMI, TEOF, TPARAMLIST, // 26
 96 |     TTDECL, TWHEN, TNEWL, TDO, TUNIT, TERROR,  // 32
 97 |     TLETREC, TLET, TFN, TCASE, TSTRT, TCHART, // 38
 98 |     TINTT, TFLOATT, TCOMMENT, TREF, TDEQUET, // 42
 99 |     TBOOLT, TWITH, TOF, TDECLARE, TFALSE, // 47
100 |     TTRUE, TUSE, TIN, TCOLON, TRECDEF, // 52
101 |     TCOMPLEXTYPE, TCOMMA, TOARR, TCARR, // 56
102 |     TARRAYLITERAL, TBIN, TOCT, THEX, // 60
103 |     TARROW, TFATARROW, TCUT, TDOLLAR, // 64
104 |     TPIPEARROW, TUSERT, TVAR, TTAG, // 68
105 |     TPARAMDEF, TTYPEDEF, TWHILE, TFOR, // 72
106 |     TTUPLET, TFUNCTIONT, TPROCEDURET, // 75
107 |     TAND, TANY, TGUARD, TEXTERN, TGIVEN, // 80
108 |     TLOW, TUNION, TWALRUS, TMODNS, TQIDENT, // 85
109 |     TMODULE, // 86
110 | } TypeTag;
111 | 
112 | struct _AST {
113 |     TypeTag tag;
114 |     /* there's going to have to be some
115 |      * interpretation here, as we're not
116 |      * breaking down the ASTs into discrete
117 |      * objects, but rather just grouping them
118 |      * into a generic a piece as possible.
119 |      * so, for example, and IF block would
120 |      * have a lenchildren == 3, no matter what
121 |      * whereas a BEGIN block lenchildren == N,
122 |      * where N >= 0. In the real thing, would
123 |      * probably be best to make this a poly with
124 |      * each member broken down, or an SRFI-57-style
125 |      * struct.
126 |      * in fact... if you have row-polymorphism, there's
127 |      * no need for SRFI-57 inheritance, save for to
128 |      * create convenience methods... hmm... :thinking_face:
129 |      */
130 |     char *value;
131 |     uint32_t lenvalue;
132 |     uint32_t lenchildren;
133 |     struct _AST **children;
134 | };
135 | 
136 | typedef struct _AST AST;
137 | 
138 | /* Represent the return type of Readers as
139 |  * `data EitherAST = Right (AST) | Left Int Int String`.
140 |  * this allows us to return Either an error in the form
141 |  * of a line number, error number, and message, *or* an
142 |  * actual AST form.
143 |  */
144 | typedef enum _ASTEITHERTAG { ASTLEFT, ASTRIGHT } ASTEitherTag;
145 | 
146 | typedef struct _ASTEither {
147 |     ASTEitherTag tag;
148 |     struct {
149 |         int line;
150 |         int error;
151 |         char *message;
152 |     } left;
153 |     AST *right;
154 | } ASTEither;
155 | 
156 | /* A simple type to hold an ASTEither and an
157 |  * offset into the stream as to where we saw
158 |  * said ASTEither. Originally this was just a
159 |  * wrapper, but I decided to linearize it, looking
160 |  * forward to what I'm toying wrt SRFI-57-style
161 |  * records
162 |  */
163 | 
164 | typedef enum _ASTOFFSETTAG { ASTOFFSETLEFT, ASTOFFSETRIGHT } ASTOffsetTag;
165 | 
166 | typedef struct _ASTOFFSET {
167 |     int offset;
168 |     ASTOffsetTag tag;
169 |     struct {
170 |         int line;
171 |         int error;
172 |         char *message;
173 |     } left;
174 |     AST *right;
175 | } ASTOffset;
176 | 
177 | // external defs, written in carML
178 | extern bool self_tco_p(const char *, AST *);
179 | extern AST *shadow_ident(AST *);
180 | extern AST *make_set_bang(AST *, AST *);
181 | extern AST *shadow_params(AST *, AST *);
182 | extern char *shadow_name(char *);
183 | extern char *get_parameter_name(AST *, int);
184 | extern AST *get_parameter_ident(AST *, int);
185 | extern AST *get_parameter_type(AST *, int);
186 | extern AST *define_shadow_params(AST *, AST *);
187 | extern AST *rewrite_tco(AST *);
188 | 
189 | #endif
190 | 


--------------------------------------------------------------------------------
/attic/hash_table.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * @(#) a simple hash table for dictionaries, basically so I can implement environments in carML
  3 |  * @(#) uses FNV1a as a hash function, and just stores them linearly...
  4 |  * @(#) 
  5 |  * @(#) the purpose of this and all the dictionary tests is to determine which of these is faster,
  6 |  * @(#) and at what point. If you think about it, most users will not actually store that much
  7 |  * @(#) in the environment frame, so there's little point in making it optimized for storing 
  8 |  * @(#) huge amounts of data. Instead, it's generally easier to optimize this for storing small
  9 |  * @(#) amounts of data. I can never find it, but I recall reading somewhere that Clojure (used to?)
 10 |  * @(#) store dictionaries as two linear arrays, because for small dictionaries were more common and
 11 |  * @(#) that method was Good Enough(TM) for most purposes.
 12 |  * @(#) 
 13 |  * @(#) we need to test a few different things:
 14 |  * @(#)
 15 |  * @(#) - average storage time
 16 |  * @(#) - average retrieval time
 17 |  * @(#) - worst-case storage time
 18 |  * @(#) - worst-case retrieval time
 19 |  * @(#) - are there collisions?
 20 |  * @(#) - performance of long names vs short names
 21 |  * @(#) - repeated stores
 22 |  * @(#) - repeated lookups
 23 |  * @(#)
 24 |  * @(#) and for each, we want to ignore the time taken to generate random names and 
 25 |  * @(#) the like. We also want to keep track of how long it takes for short names vs
 26 |  * @(#) long names, and at what point does it no longer make sense. For example, at 
 27 |  * @(#) what point does a linear table scan make sense for an evironment, and when
 28 |  * @(#) should it be upgraded to something meant to hold more data, like a trie?
 29 |  */
 30 | 
 31 | #include <stdint.h>
 32 | #include <stdio.h>
 33 | #include <sys/time.h>
 34 | 
 35 | struct _DICT {
 36 |     uint64_t keys[128];
 37 |     int values[128];
 38 | };
 39 | 
 40 | typedef struct _DICT dict;
 41 | 
 42 | uint64_t fnv1a(char *, uint32_t);
 43 | 
 44 | /* 
 45 |  * all of the dictionary objects should implement the following protocol:
 46 |  */
 47 | uint8_t store(dict *, char *, int);
 48 | uint8_t exists(dict *, char *);
 49 | uint8_t retrieve(dict *, char *, int *);
 50 | uint8_t clear(dict *);
 51 | 
 52 | /*
 53 |  * our test harness, also should be the same across versions...
 54 |  * @randname(buffer, length, uniform?)
 55 |  */
 56 | char *randname(char *, int, uint8_t);
 57 | 
 58 | /*
 59 |  * this could (and probably should) be the same across all 
 60 |  * implementations...
 61 |  */
 62 | int
 63 | main(void) {
 64 |     dict foo;
 65 |     // should make this configurable, so that we can
 66 |     // march this upwards until it no longer makes sense...
 67 |     char *randnames[32] = {0};
 68 |     char buf[128] = {0};
 69 |     uint32_t idx = 0, val = 0, time_idx = 0;
 70 |     // store all run times, then calculate average cases
 71 |     suseconds_t runtimes[256], avg_s_store, avg_r_store;
 72 |     suseconds_t avg_s_retrieve, avg_r_retrieve;
 73 |     // hold all our various interstitial timing information
 74 |     struct timeval stime, etime;
 75 | 
 76 |     for(; idx < 32; idx++) {
 77 |         // first pass, generate uniform names
 78 |         randnames[idx] = strdup(randname(buf, 128, YES));
 79 |     }    
 80 | 
 81 |     /*
 82 |      * test storage...
 83 |      */
 84 |     for(idx = 0; idx < 32; idx++) {
 85 |         // need to time these and record the timings...
 86 |         val = arc4random();
 87 |         gettimeofday(&stime, nil);
 88 |         store(foo, randnames[idx], val);
 89 |         gettimeofday(&etime, nil);
 90 |         runtimes[time_idx] = etime.tv_usec - stime.tv_usec;
 91 |         time_idx++;
 92 |         printf("%s:%ld\n", randnames[idx], val);
 93 |     }
 94 | 
 95 |     /*
 96 |      * test retrieval...
 97 |      */
 98 |     for(idx = 0; idx < 32; idx++) {
 99 |         // need to time these and record the timings...
100 |         gettimeofday(&stime, nil);
101 |         retrieve(foo, randnames[idx], &val);
102 |         gettimeofday(&etime, nil);
103 |         runtimes[time_idx] = etime.tv_usec - stime.tv_usec;
104 |         time_idx++;
105 |         printf("%s:%ld\n", randnames[idx], val);
106 |     }
107 | 
108 |     /*
109 |      * repeatedly look up a value...
110 |      */
111 |     val = arc4random_uniform(32);
112 |     for(idx = 0; idx < 256; idx++) {
113 |         gettimeofday(&stime, nil);
114 |         if(exists(foo, randnames[val])) {
115 |             printf("yes, it exists...\n");
116 |         } else
117 |             printf("no, it doesn't!\n");
118 |         }
119 |         gettimeofday(&etime, nil);
120 |         runtimes[time_idx] = etime.tv_usec - stime.tv_usec;
121 |         time_idx++;
122 |     }
123 | 
124 |     /*
125 |      * repeatedly store a value...
126 |      */
127 |     tidx = arc4random_uniform(32);
128 |     for(idx = 0; idx < 256; idx++) {
129 |         // using val here so that I can remove
130 |         // arc4random from the timing loop...
131 |         val = arc4random();
132 |         gettimeofday(&stime, nil);
133 |         store(foo, randnames[tidx], val);
134 |         gettimeofday(&etime, nil);
135 |         runtimes[time_idx] = etime.tv_usec - stime.tv_usec;
136 |         time_idx++;
137 |     }
138 | 
139 |     // ok, now that we've done that, stress test the whole thing...
140 |     clear(foo);
141 | 
142 |     for(idx = 0; idx < 32; idx++) {
143 |         free(randnames[idx]);
144 |     }
145 | 
146 |     /*
147 |      * larger smoke test...
148 |      */
149 | 
150 |     for(; idx < 32; idx++) {
151 |         // first pass, generate uniform names
152 |         randnames[idx] = strdup(randname(buf, 128, NO));
153 |     }    
154 | 
155 |     /*
156 |      * test storage...
157 |      */
158 |     for(idx = 0; idx < 32; idx++) {
159 |         // need to time these and record the timings...
160 |         val = arc4random();
161 |         store(foo, randnames[idx], val);
162 |         printf("%s:%ld\n", randnames[idx], val);
163 |     }
164 | 
165 |     /*
166 |      * test retrieval...
167 |      */
168 |     for(idx = 0; idx < 32; idx++) {
169 |         // need to time these and record the timings...
170 |         retrieve(foo, randnames[idx], &val);
171 |         printf("%s:%ld\n", randnames[idx], val);
172 |     }
173 | 
174 |     /*
175 |      * repeatedly look up a value...
176 |      */
177 |     val = arc4random_uniform(32);
178 |     for(idx = 0; idx < 256; idx++) {
179 |         if(exists(foo, randnames[val])) {
180 |             printf("yes, it exists...\n");
181 |         } else
182 |             printf("no, it doesn't!\n");
183 |         }
184 |     }
185 | 
186 |     /*
187 |      * repeatedly store a value...
188 |      */
189 |     val = arc4random_uniform(32);
190 |     for(idx = 0; idx < 256; idx++) {
191 |         store(foo, randnames[val], arc4random());
192 |     }
193 | 
194 |     clear(dict);
195 | 
196 |     for(idx = 0; idx < 32; idx++) {
197 |         free(randnames[idx]);
198 |     }
199 | 
200 |     return 0;
201 | }
202 | 
203 | uint64_t
204 | fnv1a(char *key, uint32_t len)
205 | {
206 |     uint64_t hash = 14695981039346656037;
207 |     uint32_t idx = 0;
208 |     for(; idx < len; idx++)
209 |     {
210 |         hash ^= key[idx];
211 |         hash *= 1099511628211;
212 |     }
213 |     return hash;
214 | }
215 | 
216 | char *
217 | randname(char *buf, size_t maxlength, uint8_t uniformp) {
218 |     char *rc = nil;
219 |     uint32_t base = 0, coin_flip = 0;
220 |     size_t len = maxlength;
221 | 
222 |     if(buf != nil) {
223 |         rc = buf;
224 |     } else {
225 |         rc = (char *)malloc(sizeof(char) * maxlength);
226 |         if(!rc) {
227 |             return nil;
228 |         }
229 |     }
230 | 
231 |     if(uniformp == NO) {
232 |         len = arc4random_uniform(maxlength);
233 |     }
234 | 
235 |     for(size_t idx = 0; idx < len; idx++) {
236 |         base = arc4random_uniform(26);
237 |         coin_flip = arc4random_uniform(1);
238 |         if(coinflip == 0) {
239 |             rc[idx] = 'a' + base;
240 |         } else {
241 |             rc[idx] = 'A' + base;
242 |         }
243 |     }
244 | 
245 |     return rc;
246 | }
247 | 


--------------------------------------------------------------------------------
/labs/let_rewrite.carml:
--------------------------------------------------------------------------------
  1 | #@(#) a simple let/let-rec rewriter
  2 | #@(#) rewrite let/let-rec ASTs to 
  3 | #@(#) simple val nodes
  4 | #@(#) Sadly, this is all very imperative
  5 | #@(#) at the moment, but we'll get there...
  6 | 
  7 | record NameMap {
  8 |     len : int # length of names/vals
  9 |     offset : int # current offset into names/vals
 10 |     names : ref[array[string]]
 11 |     vals : ref[array[string]]
 12 | }
 13 | 
 14 | # specializing Option here
 15 | # myself, because carML's 
 16 | # allocation strategy hasn't
 17 | # *really* been decided yet...
 18 | type OptionString {
 19 |     SomeString string
 20 |     NoString
 21 | }
 22 | 
 23 | def add_rewrite name:string new_name:string rewrites:ref[NameMap] => bool = {
 24 |     val len : int = (-> rewrites len)
 25 |     val off : int = (-> rewrites offset)
 26 | 
 27 |     when (< off len) do {
 28 |         set! (get (-> rewrites names) off) name
 29 |         set! (get (-> rewrites vals) off) new_name
 30 |         set! (-> ret off) $ + off 1
 31 |         return true
 32 |     }
 33 | 
 34 |     false
 35 | }
 36 | 
 37 | def init_rewrite => ref[NamedMap] = {
 38 |     var ret : ref[NamedMap] = (hmalloc $ sizeof NamedMap)
 39 |     val len : int = 64
 40 |     # again, this could be so much nicer if
 41 |     # we had decent constructors *or* a monadic
 42 |     # interface to construction...
 43 |     set! (-> ret len) len
 44 |     set! (-> ret offset) 0
 45 |     # would be interesting to have a "new" semantic for this stuff...
 46 |     # something like Rust, with it's Vec::new or VecDeque::new
 47 |     set! (-> ret names) $ hmalloc $ mul len $ sizeof array[string]
 48 |     set! (-> ret vals) $ hmalloc $ mul len $ sizeof array[string]
 49 |     ret
 50 | }
 51 | 
 52 | def fresh_name src:string => string = {
 53 |     val len : int = (+ 10 $ strlen src) 
 54 |     var f : string = (hmalloc $ mul len $ sizeof char)
 55 |     snprintf f "%s%d" len src $ arc4random 
 56 |     f
 57 | }
 58 | 
 59 | def find_name name:string rewrites:ref[NameMap] => ref[OptionString] = {
 60 |     var idx : int = 0
 61 | 
 62 |     while (< idx (-> rewrites len)) do {
 63 |         when (not $ strcmp (get (-> rewrites names) idx) name) do {
 64 |             return (OptionString_SomeString_ref $ get (-> rewrites vals) idx)
 65 |         }
 66 |         set! idx $ + idx 1
 67 |     }
 68 | 
 69 |     (OptionString_NoString_ref)
 70 | }
 71 | 
 72 | def rewrite_names head:ref[AST] rewrites:NameMap => ref[AST] = {
 73 |     val v : int = (-> head tag)
 74 |     var x : int = 0
 75 |     var idx : int = 0
 76 |     var name : ref[OptionString] = (SomeString_NoString_ref)
 77 |     var ret : ref[AST] = nil
 78 |     match v with
 79 |         TIDENT => {
 80 |             set! name $ find_name (-> head value) rewrites
 81 |             set! x $ -> name tag
 82 |             match name with
 83 |                 (OptionString.SomeString x) => (make_ident $ -> name $ -> members $ -> SOMESTRING_t m_1)
 84 |                 else => head
 85 |             end
 86 |         }
 87 |         else => {
 88 |             set! ret $ hmalloc $ sizeof AST
 89 |             set! (-> ret lenchildren) (-> head lenchildren)
 90 |             set! (-> ret tag) (-> head tag)
 91 |             set! (-> ret children) $ hmalloc $ mul (-> ret lenchildren) $ sizeof ref[AST]
 92 |             # this would be so much cleaner as a map form over
 93 |             # ASTs
 94 |             while (< idx $ -> head lenchildren) do {
 95 |                 # need to copy each node in the AST to
 96 |                 # the return list
 97 |                 set! (get (-> ret children) idx) (rewrite_names (get (-> head children) idx) rewrites)
 98 |                 # XXX a low-level `inc!` form would be nice... 
 99 |                 set! idx $ + idx 1
100 |             }
101 |             ret
102 |         }
103 |     end
104 | }
105 | 
106 | def let_ident_name head:ref[AST] => ref[OptionString] = {
107 |     val tag : int = (-> head tag)
108 |     match tag with
109 |         TLET => (OptionString_SomeString_ref $ get (-> head children) 0) #XXX: I hate how this reads...
110 |         else => (OptionString_NoString_ref)
111 |     end
112 | }
113 | 
114 | def let_ident_value head:ref[AST] => ref[AST] = {
115 |     val tag : int = (-> head tag)
116 |     match tag with
117 |         TLET => (get (-> head children) 1) #XXX: I hate how this reads...
118 |         else => nil
119 |     end
120 | }
121 | 
122 | def make_val name:string value:ref[AST] => ref[AST] = {
123 |     # this would be so much nicer with a monadic interface
124 |     var ret : ref[AST] = (hmalloc $ sizeof AST)
125 |     set! (-> ret tag) TVAL
126 |     set! (-> ret lenchildren) 2
127 |     set! (-> ret children) $ hmalloc $ mul 2 $ sizeof AST
128 |     set! (get (-> ret children) 0) name
129 |     set! (get (-> ret children) 1) value
130 |     ret
131 | }
132 | 
133 | def make_ident name:string => ref[AST] = {
134 |     var ret : ref[AST] = (hmalloc $ sizeof AST)
135 |     set! (-> ret tag) TIDENT
136 |     set! (-> ret lenchildren) 0
137 |     set! (-> ret value) name
138 |     ret
139 | }
140 | 
141 | def make_begin_pair binding:ref[AST] body:ref[AST] => ref[AST] = {
142 |     var ret : ref[AST] = (hmalloc $ sizeof AST)
143 |     val tag : int = (-> body tag)
144 |     set! (-> ret tag) TBEGIN
145 |     match tag with
146 |         TBEGIN => {
147 |             val len : int = (+ 1 $ -> body lenchildren)
148 |             var idx : int = 0
149 |             set! (-> ret lenchildren) len
150 |             set! (-> ret children) $ hmalloc $ mul len $ sizeof AST
151 |             # another situation where foreach would be
152 |             # perfect for...
153 |             while (< idx len) do {
154 |                 set! (get (-> ret children) idx) (get (-> body children) idx)
155 |                 set! idx $ + idx 1
156 |             }
157 |         }
158 |         else => {
159 |             set! (-> ret lenchildren) 2
160 |             set! (-> ret children) $ hmalloc $ mul 2 $ sizeof AST
161 |             set! (get (-> ret children) 0) binding
162 |             set! (get (-> ret children) 1) body
163 |         }
164 |     end
165 |     ret
166 | }
167 | 
168 | def let_to_val head:ref[AST] rewrites:ref[NameMap] => ref[AST] = {
169 |     # the code here is very imperative... getting some handle
170 |     # on for/foreach/map/reduce/filter/take vis-a-vis Deques
171 |     # and the like will be hugely important for fixing the
172 |     # readability here
173 |     var ret : ref[AST] = nil
174 |     var new_binding : ref[AST] = nil
175 |     var new_body : ref[AST] = nil
176 |     var idx : int = 0
177 |     var new_name : string = nil
178 |     val tag : int = (-> head tag)
179 |     match tag with
180 |         # matching multiple cases with | would
181 |         # be nice...
182 |         # so would destructuring binds
183 |         TLET => {
184 |             set! new_name $ fresh_name $ let_ident_name head
185 |             # we technically need to do rewrites for here as well..
186 |             set! new_binding $ new_val new_name $ let_ident_value head rewrites
187 |             # we add this last because the name may shadow another name
188 |             # elsewhere...
189 |             add_rewrite (let_ident_name head) new_name rewrites
190 |             set! new_body $ rewrite_names head rewrites
191 |             make_begin_pair new_binding new_body
192 |         }
193 |         else => {
194 |             # thinking that we could iterate over each
195 |             # child and see if it is a TLET
196 |             set! ret $ hmalloc $ sizeof AST
197 |             set! (-> ret lenchildren) (-> head lenchildren)
198 |             set! (-> ret tag) (-> head tag)
199 |             set! (-> ret children) $ hmalloc $ mul (-> ret lenchildren) $ sizeof ref[AST]
200 |             # this would be so much cleaner as a map form over
201 |             # ASTs; need to look at this a bit more, but it seems to make sense...
202 |             while (< idx $ -> head lenchildren) do {
203 |                 # need to copy each node in the AST to
204 |                 # the return list
205 |                 set! (get (-> ret children) idx) (let_to_val (get (-> head children) idx) rewrites)
206 |                 set! idx $ + idx 1
207 |             }
208 |             ret
209 |         }
210 |     end
211 | }
212 | 
213 | # it would be interesting to have some nano-pass
214 | # signature for these sorts of things that made
215 | # these sorts of optimizations easy to handle
216 | # maybe something like this?
217 | def nanopass_let head:ref[AST] => ref[AST] = {
218 |     var holder : ref[NameMap] = (init_rewrite)
219 |     val tag : int = (-> head tag)
220 |     let_to_val head holder
221 | }
222 | 


--------------------------------------------------------------------------------
/labs/woodchips.carml:
--------------------------------------------------------------------------------
  1 | # @(#) a simple woodchips simulation
  2 | # @(#) we have a world, and ants that have
  3 | # @(#) simple rules: wander until you see a woodchip
  4 | # @(#) - if you have a woodchip, put down your woodchip
  5 | # @(#) - if you don't have a woodchip, pick that one up
  6 | #
  7 | #
  8 | # This also raises that I need to *finally* do certain things
  9 | # - foreach and array fusion
 10 | # - figure out a memory model so that `ref` isn't required
 11 | # (basically, allow the compiler to figure out when we want a ref,
 12 | # unless we *say explicitly* that we want a ref)
 13 | # - get make, make-array, make-string, &c. working
 14 | # - Just generally make things more functional, less imperative
 15 | # - figure out if I'm really going to keep types and records
 16 | # (having both seems redundant, and we could have a smaller language...)
 17 | # - VLAs need to add a $NAME_len param to lambdas
 18 | # - need to fix `for` loop as well as `foreach` and co
 19 | # - Fix `void` detection for adding a `return` in C
 20 | # - Fix the ability of match & co to return a value
 21 | # (currently this leads to a lot of code redundancy,
 22 | # `set! foo $ match x with ...`
 23 | # is so much cleaner than:
 24 | # ```
 25 | # match x with
 26 | #     0 +> set! foo...
 27 | # ```
 28 | # )
 29 | 
 30 | record SimMatrix {
 31 |     x:int
 32 |     y:int
 33 |     length:int
 34 |     data:ref[char]
 35 | }
 36 | 
 37 | record Ant {
 38 |     x:int
 39 |     y:int
 40 |     carrying:bool
 41 |     direction:int # which "direction" (cardinal) is the ant facing?
 42 | }
 43 | 
 44 | def make_SimMatrix x:int y:int => ref[SimMatrix] = {
 45 |     var ret:ref[SimMatrix] = (hmalloc $ sizeof SimMatrix)
 46 |     val total:int = (* x y)
 47 |     set! ret->x x
 48 |     set! ret->y y
 49 |     set! ret->length total
 50 |     set! ret->data $ hmalloc $ * total $ sizeof char
 51 |     ret
 52 | }
 53 | 
 54 | def make_Ant max_x:int max_y:int => ref[Ant] = {
 55 |     var ret:ref[Ant] = (hmalloc $ sizeof Ant)
 56 |     var rndx:int = (arc4random_uniform max_x)
 57 |     var rndy:int = (arc4random_uniform max_y)
 58 |     set! ret->x rndx
 59 |     set! ret->y rndy
 60 |     set! ret->direction $ arc4random_uniform 4
 61 |     ret
 62 | }
 63 | 
 64 | # I *really* need to work on inlining...
 65 | # helper methods like this could be easily inlined
 66 | def get_SimMatrix x:int y:int mat:ref[SimMatrix] => char = {
 67 |     # XXX (lojikil):
 68 |     # this actually raises an interesting problem
 69 |     # when you say (-> mat x) do you mean the *NAME* x
 70 |     # or the scoped variable x? There almost has to be 
 71 |     # distinction of forms that operate on variables and
 72 |     # those that operate on the values those variables 
 73 |     # hold
 74 |     # XXX (lojikil):
 75 |     # when I was originally designing things, I was heading
 76 |     # towards a SRFI-9/SRFI-57/Haskell style system wherein
 77 |     # Records have named accessors, like types do. Then those
 78 |     # a rewritten to C. Right now, I'll use C-style accessors
 79 |     # similar to OCaml, but I do like the named accessor style
 80 |     # better. Why? Because it can be treated like a function
 81 |     # that is rewritten to a low-level accessor based on the
 82 |     # typing decisions made by the compiler. This also supports
 83 |     # making `type` and `record` close to equivalent, since there's
 84 |     # no *real* need to have a distinction there; a record could
 85 |     # just be a type with no other variants.
 86 | 
 87 |     var offset:int = (* x y)
 88 |     when (<= mat->length offset) do {
 89 |         set! offset $ - offset mat->length
 90 |     }
 91 |     get mat->data offset
 92 | }
 93 | 
 94 | def set_SimMatrix x:int y:int mat:ref[SimMatrix] value:char = {
 95 |     var offset:int = (* x y)
 96 |     when (<= mat->length offset) do {
 97 |         set! offset $ - offset mat->length
 98 |     }
 99 |     set! (get mat->data offset) value
100 | }
101 | 
102 | def calc_offset x:int y:int mat:ref[SimMatrix] => int = {
103 |     var offset:int = (* x y)
104 |     when (<= mat->length offset) do (set! offset $ - offset mat->length)
105 |     offset
106 | }
107 | 
108 | # XXX (lojikil) I do need to fix detection of
109 | # `return` in functions that are void and such
110 | # also, make it so that you can call something
111 | # that technically returns (like printf) but ignore
112 | # that return. I dislike `let () = (printf ...)`
113 | # but maybe that isn't a terrible idea either...
114 | def print_SimMatrix mat:ref[SimMatrix] = {
115 |     var idx:int = 0
116 |     while (< idx mat->length) do {
117 |         when (&& (eq? 0 $ % idx mat->x) (!= idx 0)) do {
118 |             printf "\n"
119 |         }
120 |         printf "%d " $ get mat->data idx
121 |         set! idx $ + idx 1
122 |     }
123 |     printf "\n"
124 | }
125 | 
126 | def construct_board mat:ref[SimMatrix] = {
127 |     # NOTE we don't want more than 30% of the board
128 |     # to be woodchips
129 |     val upperbound:int = (/ mat->length 3)
130 |     val totalchipcount:int = (arc4random_uniform upperbound)
131 |     var idx:int = 0
132 |     var chipcount:int = 0
133 |     printf "woodchip count: %d\n" totalchipcount
134 |     while (< idx mat->length) do {
135 |         when (&& (< chipcount totalchipcount) (>= 20 $ arc4random_uniform 100)) do {
136 |             # if we haven't exceeded the 30% of the board being woodchips and our
137 |             # guess exceeds some threshold, set a woodchip here
138 |             set! (get mat->data idx) 2
139 |             set! chipcount $ + chipcount 1
140 |         }
141 |         set! idx $ + idx 1
142 |     }
143 | }
144 | 
145 | def place_ant ant:ref[Ant] mat:ref[SimMatrix] = {
146 |     var offset:int = (* ant->x ant->y)
147 |     val total:int = mat->length
148 |     var value:char = 0
149 |     var res:char = 0
150 |     when (> offset total) do {
151 |         set! offset $ - offset total
152 |     }
153 |     set! value $ get mat->data offset
154 |     match value with
155 |         0 => (set! res 1)
156 |         1 => (set! res 1) # can there be two ants?
157 |         2 => (set! res 4)
158 |         3 => (set! res 3) # can there be two ants?
159 |         4 => (set! res 4)
160 |     end
161 |     set! (get mat->data offset) res
162 | }
163 | 
164 | # we calculate a value (either x or y) to move
165 | # an object (such as an ant) and support wrapping
166 | # the object back around to max/min
167 | def constrained_offset v:int max_v:int => int = {
168 |     mod v max_v
169 | }
170 | 
171 | def check_woodchip ant:ref[Ant] mat:ref[SimMatrix] = {
172 |     var woodchip_loc:int = 0
173 |     match ant->direction with
174 |         0 => (set! woodchip_loc $ mul ant->x $ mod (sub ant->y 1) mat->y)
175 |         1 => (set! woodchip_loc $ mul ant->x $ mod (add ant->y 1) mat->y)
176 |         2 => (set! woodchip_loc $ mul ant->y $ mod (add ant->x 1) mat->x)
177 |         3 => (set! woodchip_loc $ mul ant->y $ mod (sub ant->x 1) mat->x)
178 |     end
179 | 
180 |     # - if we are carrying a woodchip, we need to place the current woodchip
181 |     # down and then change to a new direct
182 |     # - if we are *not* carrying a woodchip, pick this one up
183 | 
184 | }
185 | 
186 | def move_ant ant:ref[Ant] mat:ref[SimMatrix] = {
187 |     match ant->direction with
188 |         0 => 0
189 |         1 => 1
190 |         2 => 2
191 |         3 => 3
192 |     end
193 | }
194 | 
195 | # TODO (lojikil): I need to get VLAs working as expeted, so that
196 | # we don't have to pass in an explicit length parameter. For now,
197 | # let's just simulate with *one* ant
198 | def run_simulation ant:ref[Ant] mat:ref[SimMatrix] rnds:int = {
199 |     var cnt:int = 0
200 |     var idx:int = 0
201 |     while (< cnt rnds) do {
202 |         check_woodchip ant mat
203 |         move_ant ant mat
204 |         set! cnt $ + cnt 1
205 |     }
206 | }
207 | 
208 | # we can encode state:
209 | # 0 - nothing here
210 | # 1 - an ant here
211 | # 2 - a woodchip
212 | # 3 - an ant carrying a woodchip
213 | # 4 - an ant *and* a woodchip (the and just set it down)
214 | # the tough part is, what direction is that ant heading?
215 | # the update process for each ant's movement could determine the
216 | # direction the ant is moving prior to "looking" for woodchips
217 | # ants can only pick up woodchips in the direction they can "see"
218 | # which is forwards
219 | #
220 | # eventually it would be fun to make this into a "turtles" library
221 | # so that folks can do some simple simulations in carML too, esp
222 | # since carML also uses my work on tatwyd (a PowerLogo-alike)
223 | 
224 | def main ac:int al:ref[string] => int = {
225 |     (GC_INIT)
226 |     var mat:ref[SimMatrix] = (make_SimMatrix 10 10)
227 |     var ant:ref[Ant] = (make_Ant 10 10)
228 |     printf "x: %d\n" mat->x
229 |     printf "y: %d\n" mat->y
230 |     printf "length: %d\n" mat->length
231 |     print_SimMatrix mat
232 |     construct_board mat
233 |     place_ant ant mat
234 |     print_SimMatrix mat
235 |     0
236 | }
237 | 


--------------------------------------------------------------------------------
/src/self_tco.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * @(#) self-Tail Call Optimization (TCO) code, generated from
  3 |  * @(#) _src/self_tco.c.carml_
  4 | */
  5 | 
  6 | #include <stdlib.h>
  7 | #include <stdio.h>
  8 | #include <strings.h>
  9 | #include <gc.h>
 10 | #include <carmlc.h>
 11 | #include <stdbool.h>
 12 | 
 13 | extern char *hstrdup(const char *);
 14 | 
 15 | // NOTE: this code is generated from
 16 | // src/self_tco.c.carml, so if we really need to change this,
 17 | // we probably should fix that file first...
 18 | bool
 19 | self_tco_p(const char * name, AST * src){
 20 |     bool res = false;
 21 |     int idx = 0;
 22 |     int tag = 0;
 23 |     if(src == NULL) {
 24 |         return false;;
 25 |     } else {
 26 |         tag = src->tag;
 27 |     }
 28 | 
 29 |     if(tag == TDEF) {
 30 |         return self_tco_p(name, src->children[1]);
 31 |     } else if(tag == TFN) {
 32 |         return self_tco_p(name, src->children[1]);
 33 |     } else if(tag == TCALL) {
 34 |         return !strcmp(name, src->children[0]->value);
 35 |     } else if(tag == TBEGIN) {
 36 |         idx = (src->lenchildren - 1);
 37 |         return self_tco_p(name, src->children[idx]);
 38 |     } else if(tag == TWHEN) {
 39 |         return self_tco_p(name, src->children[1]);
 40 |     } else if(tag == TIF) {
 41 |         return (self_tco_p(name, src->children[1])) || (self_tco_p(name, src->children[2]));
 42 |     } else if(tag == TMATCH) {
 43 |         idx = 1;
 44 |         while(idx < src->lenchildren){
 45 |             if(self_tco_p(name, src->children[idx])) {
 46 |                 return true;;
 47 |             } else {
 48 |                 idx = (idx + 2);
 49 |             }
 50 | 
 51 |         }
 52 | 
 53 |         return false;
 54 |     } else {
 55 |         return false;
 56 |     }
 57 | 
 58 | }
 59 | 
 60 | AST * 
 61 | shadow_ident(AST * src){
 62 |     AST * ret = hmalloc(sizeof(AST));
 63 |     ret->tag = TIDENT;
 64 |     ret->lenchildren = 0;
 65 |     ret->value = shadow_name(src->value);
 66 |     return ret;
 67 | }
 68 | 
 69 | AST * 
 70 | make_set_bang(AST * ident, AST * value){
 71 |     AST * ret = hmalloc(sizeof(AST));
 72 |     AST * setident = hmalloc(sizeof(AST));
 73 |     setident->lenchildren = 0;
 74 |     setident->tag = TIDENT;
 75 |     setident->value = hmalloc(5 * sizeof(char));
 76 |     stpncpy(setident->value, "set!", 5);
 77 |     ret->tag = TCALL;
 78 |     ret->lenchildren = 3;
 79 |     ret->children = hmalloc(3 * sizeof(AST * ));
 80 |     ret->children[0] = setident;
 81 |     ret->children[1] = ident;
 82 |     ret->children[2] = value;
 83 |     return ret;
 84 | }
 85 | 
 86 | AST * 
 87 | shadow_params(AST * src, AST * impl){
 88 |     AST * ret = hmalloc(sizeof(AST));
 89 |     const int clen = (src->lenchildren - 1) * 2;
 90 |     const int ilen = impl->lenchildren;
 91 |     int idx = 0;
 92 |     int sidx = 1;
 93 |     int base = 0;
 94 |     AST * shadow = nil;
 95 |     AST * param = nil;
 96 |     AST * result = nil;
 97 |     ret->tag = TBEGIN;
 98 |     ret->lenchildren = clen;
 99 |     ret->children = hmalloc(clen * sizeof(AST * ));
100 |     while(idx < ilen){
101 |         param = get_parameter_ident(impl, idx);
102 |         shadow = shadow_ident(param);
103 |         result = src->children[sidx];
104 |         ret->children[idx] = make_set_bang(shadow, result);
105 |         idx = (idx + 1);
106 |         sidx = (sidx + 1);
107 |     }
108 | 
109 |     base = idx;
110 |     idx = 0;
111 |     while(idx < ilen){
112 |         param = get_parameter_ident(impl, idx);
113 |         shadow = shadow_ident(param);
114 |         ret->children[idx + base] = make_set_bang(param, shadow);
115 |         idx = (idx + 1);
116 |     }
117 | 
118 |     return ret;
119 | }
120 | 
121 | char *
122 | shadow_name(char * name){
123 |     char * ret = hmalloc((sizeof(char) * (3 + strlen(name))));
124 |     stpcpy(ret, name);
125 |     strcat(ret, "_sh");
126 |     return ret;
127 | }
128 | 
129 | char *
130 | get_parameter_name(AST * src, int idx){
131 |     const AST * ret = nil;
132 |     if(idx < src->lenchildren) {
133 |         ret = src->children[idx];
134 |         ret = ret->children[0];
135 |         return ret->value;
136 |     }
137 | 
138 |     return "";
139 | }
140 | 
141 | AST * 
142 | get_parameter_ident(AST * src, int idx){
143 |     const AST * ret = nil;
144 |     if(idx < src->lenchildren) {
145 |         ret = src->children[idx];
146 |         return ret->children[0];
147 |     }
148 | 
149 |     return nil;
150 | }
151 | 
152 | AST * 
153 | get_parameter_type(AST * src, int idx){
154 |     const AST * ret = nil;
155 |     if(idx < src->lenchildren) {
156 |         ret = src->children[idx];
157 |         return ret->children[1];
158 |     }
159 | 
160 |     return nil;
161 | }
162 | 
163 | AST * 
164 | define_shadow_params(AST * src, AST * body){
165 |     AST * ret = hmalloc(sizeof(AST));
166 |     AST * tmp = nil;
167 |     AST * * vbuf = (AST * *)hmalloc(sizeof(AST * ) * 64);
168 |     int idx = 0;
169 |     int cidx = 0;
170 |     int capacity = 64;
171 |     int length = 0;
172 |     ret->tag = TBEGIN;
173 |     ret->lenchildren = src->lenchildren;
174 |     ret->children = hmalloc(src->lenchildren * sizeof(AST * * ));
175 |     while(idx < src->lenchildren){
176 |         tmp = hmalloc(sizeof(AST));
177 |         tmp->tag = TVAR;
178 |         tmp->lenchildren = 2;
179 |         tmp->value = shadow_name(get_parameter_name(src, idx));
180 |         tmp->children = hmalloc(2 * sizeof(AST * ));
181 |         tmp->children[0] = get_parameter_ident(src, idx);
182 |         tmp->children[1] = get_parameter_type(src, idx);
183 |         vbuf[idx] = tmp;
184 |         idx = (idx + 1);
185 |     }
186 | 
187 |     length = idx;
188 |     idx = 0;
189 |     while(idx < body->lenchildren){
190 |         tmp = body->children[idx];
191 |         if((tmp->tag == TVAR) || (tmp->tag == TVAL)) {
192 |             vbuf[length] = tmp;
193 |             length = (length + 1);
194 |         }
195 | 
196 |         idx = (idx + 1);
197 |     }
198 | 
199 |     idx = 0;
200 |     ret->lenchildren = (length + 1);
201 |     ret->children = hmalloc((length + 1) * sizeof(AST * * ));
202 |     while(idx < length){
203 |         ret->children[idx] = vbuf[idx];
204 |         idx = (idx + 1);
205 |     }
206 | 
207 |     tmp = hmalloc(sizeof(AST));
208 |     tmp->tag = TWHILE;
209 |     tmp->lenchildren = 2;
210 |     tmp->children = hmalloc(2 * sizeof(AST * ));
211 |     ret->children[length] = tmp;
212 |     return ret;
213 | }
214 | 
215 | AST * 
216 | make_ident(char * src){
217 |     AST * ret = hmalloc(sizeof(AST));
218 |     ret->lenchildren = 0;
219 |     ret->tag = TIDENT;
220 |     ret->value = hstrdup(src);
221 |     return ret;
222 | }
223 | 
224 | AST * 
225 | make_boolean(int original_value){
226 |     AST * ret = hmalloc(sizeof(AST));
227 |     ret->lenchildren = 0;
228 |     ret->children = nil;
229 |     if(original_value == 0) {
230 |         ret->tag = TFALSE;;
231 |     } else {
232 |         ret->tag = TTRUE;
233 |     }
234 | 
235 |     return ret;
236 | }
237 | 
238 | bool
239 | simple_type_p(int src){
240 |     return (src == TIDENT) || (src == TSTRING) || (src == TINT) || (src == TFLOAT) || (src == TARRAYLITERAL) || (src == TCHAR) || (src == TBOOL) || (src == TTRUE) || (src == TFALSE) || (src == THEX) || (src == TOCT) || (src == TBIN);
241 | }
242 | 
243 | AST * 
244 | copy_body(AST * src, AST * self, uint8_t finalp){
245 |     AST * ret = hmalloc(sizeof(AST));
246 |     AST * tmp = NULL;
247 |     AST * * buf = (AST * *)hmalloc(sizeof(AST * ) * 128);
248 |     int sidx = 0;
249 |     int bidx = 0;
250 |     const int srccap = src->lenchildren;
251 |     const int srctag = src->tag;
252 |     ret->value = hstrdup(src->value);
253 |     ret->tag = src->tag;
254 |     if(srctag == TCALL) {
255 |         if(finalp) {
256 |             return shadow_params(src, self);
257 |         } else {
258 |             while(bidx < srccap){
259 |                 buf[bidx] = copy_body(src->children[sidx], self, false);
260 |                 bidx = (bidx + 1);
261 |                 sidx = (sidx + 1);
262 |             }
263 |         }
264 |     } else if((simple_type_p(srctag))) {
265 |         if(finalp) {
266 |             ret->tag = TCALL;
267 |             ret->lenchildren = 2;
268 |             ret->children = hmalloc(2 * sizeof(AST * ));
269 |             buf[0] = make_ident("return");
270 |             buf[1] = make_ident(src->value);
271 |             bidx = 2;
272 |         } else {
273 |             buf[bidx] = src;
274 |             bidx = (bidx + 1);
275 |             sidx = (sidx + 1);
276 |         }
277 |     } else if(srctag == TMATCH) {
278 |         1;
279 |     } else if(srctag == TIF) {
280 |         buf[0] = copy_body(src->children[0], self, false);
281 |         buf[1] = copy_body(src->children[1], self, finalp);
282 |         buf[2] = copy_body(src->children[2], self, finalp);
283 |         bidx = 3;
284 |     } else if(srctag == TWHEN) {
285 |         ret->tag = TIF;
286 |         ret->lenchildren = 3;
287 |         ret->children = hmalloc(2 * sizeof(AST * ));
288 |         buf[0] = copy_body(src->children[0], self, false);
289 |         buf[1] = copy_body(src->children[1], self, finalp);
290 |         tmp = hmalloc(sizeof(AST));
291 |         tmp->tag = TCALL;
292 |         tmp->lenchildren = 1;
293 |         tmp->children = hmalloc(sizeof(AST * ));
294 |         tmp->children[0] = make_ident("return");
295 |         buf[2] = tmp;
296 |         bidx = 3;
297 |     } else if(srctag == TBEGIN) {
298 |         while(bidx < srccap){
299 |             if((finalp) && (bidx == (srccap - 1))) {
300 |                 buf[bidx] = copy_body(src->children[sidx], self, finalp);
301 |             } else {
302 |                 buf[bidx] = copy_body(src->children[sidx], self, false);
303 |             }
304 | 
305 |             bidx = (bidx + 1);
306 |             sidx = (sidx + 1);
307 |         }
308 |     } else {
309 |         while(bidx < srccap){
310 |             buf[bidx] = copy_body(src->children[sidx], self, false);
311 |             bidx = (bidx + 1);
312 |             sidx = (sidx + 1);
313 |         }
314 |     }
315 | 
316 |     sidx = 0;
317 |     ret->lenchildren = bidx;
318 |     ret->children = hmalloc(bidx * sizeof(AST * ));
319 |     while(sidx < bidx){
320 |         ret->children[sidx] = buf[sidx];
321 |         sidx = (sidx + 1);
322 |     }
323 | 
324 |     return ret;
325 | }
326 | 
327 | AST * 
328 | rewrite_tco(AST * src){
329 |     const AST * params = define_shadow_params(src->children[0], src->children[1]);
330 |     AST * ret = hmalloc(sizeof(AST));
331 |     AST * body = params->children[params->lenchildren - 1];
332 |     ret->tag = TDEF;
333 |     ret->lenchildren = 3;
334 |     ret->children = hmalloc(3 * sizeof(AST * * ));
335 |     ret->value = src->value;
336 |     ret->children[0] = src->children[0];
337 |     ret->children[2] = src->children[2];
338 |     ret->children[1] = params;
339 |     body->children[0] = make_boolean(1);
340 |     body->children[1] = copy_body(src->children[1], src->children[0], true);
341 |     return ret;
342 | }
343 | 


--------------------------------------------------------------------------------
/labs/mini-walk.carml:
--------------------------------------------------------------------------------
  1 | #@(#) a simple walker, testing out our SExpression reader
  2 | #@(#) me: I'm tired of building a Lisp/Scheme interpreter, I want to build an ML
  3 | #@(#) also me: [build meta circular interpreter for ML IR that uses SExprs]
  4 | 
  5 | # just use alists for the env
  6 | def muwalk src:SExpression env:SExpression => SExpression = {
  7 | 
  8 | }
  9 | 
 10 | # the more I write in here the more I realize that I really just need
 11 | # to bite the bullet and add the simple stdlib that can be erased when
 12 | # compiling down to the source language...
 13 | #
 14 | # here, accessors are definitely the thing that I want to have, for sure
 15 | #
 16 | # also, generally herein I've been writing more like I've TCO'd things
 17 | # by hand, but it'll be nice to move towards normal accessor functions
 18 | #
 19 | # additionally, because I'm hand unrolling a lot of these rather than
 20 | # relying on the TCO to do so, we end up with very imperative looking
 21 | # lambdas. There's no real reason for this other than I know what's
 22 | # going on under the covers, and so can ignore a lot of the issues
 23 | # this can rase, but I really want to move away from this. Here too,
 24 | # having named & default parameters would help; for example, the
 25 | # `while` form below could be replaced just with a simple tail call
 26 | # with increasing parameters.
 27 | #
 28 | # this also probably should be part of mini-sexpr, and not the
 29 | # walker, but it's not terrible here either
 30 | def sexpression_eqp s0:SExpression s1:SExpression => bool = {
 31 |     var idx:int = 0
 32 |     var llen:int = 0
 33 | 
 34 |     # NOTE these are all wrong; I ended up decomposing them
 35 |     # correctly in mini-interp, but I need that sort of introspection
 36 |     # in carML as well; generally, other languages force you to
 37 |     # match on it, and I wonder if I'm just overthinking the
 38 |     # shortcut case here. In Go, I ended up doing:
 39 |     #
 40 |     # [source,go]
 41 |     # ----
 42 |     # if reflect.TypeOf(s0) != reflect.TypeOf(s1) {
 43 |     #     return false
 44 |     # }
 45 |     # ----
 46 |     #
 47 |     # which isn't _terrible_, but it would be nice if we could
 48 |     # do that in a simpler way. I really do think I may be overthinking
 49 |     # the issue here
 50 | 
 51 |     when (<> (of-constructor s0) (of-constructor s1)) do {
 52 |         # Let's use that here, and note that it isn't defined
 53 |         # what the result might be, so it's only useful for comparing
 54 |         # constructors... probably need a `constructor-of-int` and
 55 |         # `constructor-of-string` to support the various variations here
 56 |         return false
 57 |     }
 58 | 
 59 |     # Also, I need to use these types better
 60 |     # when deconstructing an ADT<=>Interface; for example,
 61 |     # in mini-interp, there are a LOT of places I had to add
 62 |     # a type cast, because the compiler isn't smart enough to
 63 |     # use those
 64 |     #
 65 |     # Interestingly, Haskell as a module, `Data.Data`, which
 66 |     # exposes a `toConstr` which can be used for that:
 67 |     #
 68 |     # `toConstr (BoolValue True) == toConstr (BoolValue False)`
 69 |     #
 70 |     # https://hackage.haskell.org/package/base-4.6.0.1/docs/Data-Data.html
 71 |     # https://stackoverflow.com/questions/10112733/haskell-simple-constructor-comparison-function
 72 |     match s0 with
 73 |         (SExpression.Nil) => true
 74 |         (SExpression.EndFile) => true
 75 |         (SExpression.EndList) => true
 76 |         (SExpression.EndArray) => true
 77 |         (SExpression.Atom _  _) => (eq? (. s0 m_1) (. s1 m_1))
 78 |         (SExpression.Int _ _) => (eq? (. s0 m_1) (. s1 m_1))
 79 |         (SExpression.Float _ _) => (eq? (. s0 m_1) (. s1 m_1))
 80 |         (SExpression.Char _ _) => (eq? (. s0 m_1) (. s1 m_1))
 81 |         (SExpression.Bool _ _) => (eq? (. s0 m_1) (. s1 m_1))
 82 |         (SExpression.Error _ _) => (eq? (. s0 m_1) (. s1 m_1))
 83 |         (SExpression.String _ _) => (eq? (. s0 m_1) (. s1 m_1))
 84 |         (SExpression.List _ _) => {
 85 |             set! llen $ length $ . s0 m_1
 86 |             while (< idx llen) do {
 87 |                 if (sexpression_eqp (get (. s0 m_1) idx) (get (. s1 m_1) idx)) then {
 88 |                     set! idx $ + idx 1
 89 |                 } else {
 90 |                     return false
 91 |                 }
 92 |             }
 93 |             return true
 94 |         }
 95 |     end
 96 | }
 97 | 
 98 | # ('a, list(('a, 'b))) => 'b
 99 | def assoc src:SExpression dst:SExpression => SExpression = {
100 |     var idx:int = 0
101 |     # same as the above really; we need to check that we
102 |     # actually have an SExpression.List here before
103 |     # operating, and then cast it
104 |     var llen:int = (length (. dst m_1))
105 |     while (< idx llen) do {
106 |         when (sexpression_eqp (first (get (. dst m_1) idx)) src) do {
107 |             return $ first $ rest $ get (. dst m_1) idx
108 |         }
109 |         set! idx $ + idx 1
110 |     }
111 |     # delineating Nil from Null is interesting...
112 |     make-struct SExpression.Nil
113 | }
114 | 
115 | # ('a, list(('a, 'b))) => bool 
116 | def mem_assoc needle:SExpression src:SExpression => bool = {
117 |     var idx:int = 0
118 |     var llen:int = (length (. dst m_1))
119 |     while (< idx llen) do {
120 |         when (sexpression_eqp (first (get (. dst m_1) idx)) src) do {
121 |             return true
122 |         }
123 |         set! idx $ + idx 1
124 |     }
125 |     false
126 | }
127 | 
128 | # ('a, list('a)) => list('a)
129 | def cons hd:SExpression dst:SExpression => SExpression = {
130 | 
131 | }
132 | 
133 | # list('a) => 'a
134 | def first hd:SExpression => SExpression = {
135 |     match hd with
136 |         (SExpression.List _ _) => {
137 |             get (. hd m_1) 0
138 |         }
139 |         else => (make-struct SExpression.Error "first can only be applied to lists" 0)
140 |     end
141 | }
142 | 
143 | # list('a) => list('a)
144 | def rest l:SExpression => SExpression = {
145 |     # having array/list primitives here would be great...
146 |     # that's one thing I need to implement right away, is the basic
147 |     # array/deque/string primitives
148 |     var res:array[SExpression] = (make-array SExpression (- (length (. l m_1)) 1) SExpression.Null)
149 |     var idx:int = 1
150 |     var llen:int = (length (. l m_1))
151 |     while (< idx llen) do {
152 |         set! (get res (- idx 1)) (get (. l m_1) idx)
153 |         set! idx $ + idx 1
154 |     }
155 |     make-struct SExpression.List res 0
156 | }
157 | 
158 | # (('a => 'a), list('a), list(('a, 'b))) => list('a)
159 | def map f:function[SExpression SExpression] l:SExpression e:SExpression => SExpression = {
160 | 
161 | }
162 | 
163 | def mapeval l:SExpression e:SExpression => SExpression = {
164 |     var res:array[SExpression] = (make-array SExpression (length (. l m_1)) SExpression.Null)
165 |     var idx:int = 0
166 |     while (<> idx $ length $ . l m_1) do {
167 |         set! (get res idx) (mueval (get l idx) e)
168 |         set! idx $ + idx 1
169 |     }
170 |     make-struct SExpression.List res 0
171 | }
172 | 
173 | def sexpression_boxint x:int => SExpression = {
174 |     (make-struct SExpression.Int x 0)
175 | }
176 | 
177 | def sexpression_boxbool b:bool => SExpression = {
178 |     (make-struct SExpression.Bool b 0)
179 | }
180 | 
181 | def mueval src:SExpression env:SExpression => SExpression = {
182 |     match src with
183 |         # NOTE (lojikil)
184 |         # one of the first things I need to fix
185 |         # in any new compiler going forward is
186 |         # actualy binding things... and being able to match
187 |         # nicely. Look at the pattern below:
188 |         #
189 |         # `(SExpression.Atom _ _) given (eq? (. m_1 hd) "define")`
190 |         #
191 |         # I need to fix things such that we can easily say:
192 |         #
193 |         # `SExpression.Atom "define" _) => ...`
194 |         #
195 |         # for one, it's much clearer what we intend, and for the other
196 |         # it's more correct
197 |         #
198 | 
199 |         (SExpression.List _ _) => {
200 |             # here, we must unpack what the first member is, and
201 |             # if it is something we can eval, we do so, otherwise
202 |             # we just apply it
203 |             var hd:SExpression = (get (. src m_1) 0)
204 |             match hd with
205 |                 # I had confused the direction of `.` with the accessor methods
206 |                 # I wish carML already supported accessors...
207 |                 # Additionally, I ended up doing an inner match on the head
208 |                 # value in Go, because it required fewer casts, ala:
209 |                 #
210 |                 # [source]
211 |                 # ----
212 |                 # match hd with
213 |                 # (SExpression_Atom _ _) => match (. hd m_1) with
214 |                 #   "define" => ...
215 |                 #   end
216 |                 # end
217 |                 # ----
218 |                 #
219 |                 # because now there was just one type cast, `hd.(SExpression_Atom).m_1`
220 |                 # instead of many. Not really a big deal if we correctly generate code
221 |                 (SExpression.Atom _ _) given (eq? (. hd m_1) "define") => {
222 |                     # add the body with that atom to the env
223 |                 }
224 |                 (SExpression.Atom _ _) given (eq? (. hd m_1) "define-value") => {
225 |                     # add the body with that atom to the env
226 |                 }
227 |                 (SExpression.Atom _ _) given (eq? (. hd m_1) "define-mutable-value") => {
228 |                     # add the body with that atom to the env
229 |                 }
230 |                 (SExpression.Atom _ _) given (eq? (. hd m_1) "if") => {
231 |                     # eval the condition and either the THEN or ELSE clause
232 |                 }
233 |                 (SExpression.Atom _ _) given (eq? (. hd m_1) "call") => {
234 |                     muapply src env
235 |                 }
236 |                 # there are several things that will come in as a SExpression.List, but
237 |                 # aren't something we need to iterate over, like `(integer "10")`
238 |                 # we have two options here:
239 |                 #
240 |                 # . just punt this back to `muapply` to handle
241 |                 # . actually convert them to the correct SExpression type here
242 |                 #
243 |                 # TODO: decide on the above
244 |                 # could be a series of guard clauses like:
245 |                 # `(SExpression.Atom _ _) given (eq? (. m_1 hd) "integer") => {`
246 |                 # but for now let's just return the remaining portion
247 |                 else => {
248 |                     rest src
249 |                 }
250 |             end
251 |         }
252 |         # TODO (lojikil): should these just be
253 |         # a guard clause that detects simple types?
254 |         # yes, yes they should, not even with a guard clause really 
255 |         else => src
256 |     end
257 | }
258 | 
259 | def muapply src:SExpression env:SExpression => SExpression = {
260 |     # here we have basically two tasks:
261 |     #
262 |     # . iterate over the list that was passed to us, calling mueval for each
263 |     # . looking up the hd of the list, and making sure it is something we can handle
264 |     #
265 |     # standard McCarthy-style really
266 |     var lst:SExpression = (make-struct SExpression.Null)
267 |     var hd:SExpression = (make-struct SExpression.Null)
268 | 
269 |     # so the nice thing about the language we're working with here is
270 |     # that even tho we have first class functions and what not, they
271 |     # are *always* behind a call or the like, so we don't need to *ever*
272 |     # eval the head of the list, we can just:
273 |     #
274 |     # . check that we have a list
275 |     # . map mueval over the body of the list
276 |     # . return the function application, if we know it
277 | 
278 |     # ok, here we need to generate:
279 |     #
280 |     # . Golang: reflect.TypeOf(src).Name() == "SExpression_List"
281 |     # . C: src.tag == SEXPRESSION_LIST
282 |     #
283 |     # basically the C version is equivalent to `of-constructor`
284 |     # and the Golang version just adds `.Name()`
285 |     when (<> (tag-of-constructor src) "SExpression_List") do {
286 |         return src
287 |     }
288 | 
289 |     # why `rest (rest src)`? because remember, our calls
290 |     # come in like: `(call (identifier "+") (integer "10) (integer "20"))`
291 |     # TODO: should make fused versions of these...
292 |     set! lst $ mapeval (rest src) env
293 |     set! hd $ first lst
294 | 
295 |     # let's make a calculator first
296 |     # so... in the Go code I wrote, I ended up having
297 |     # to do a LOT of type dispatch with the below, because
298 |     # we can't just assume they are actually SExpression_Int, for example
299 |     match hd with
300 |         "+" => {
301 |             (sexpression_boxint
302 |                 (+
303 |                     (. (first lst) m_1)
304 |                     (. (first (rest lst)) m_1)))
305 |         }
306 |         "-" => {
307 |             (sexpression_boxint
308 |                 (-
309 |                     (. (first lst) m_1)
310 |                     (. (first (rest lst)) m_1)))
311 |         }
312 |         "*" => {
313 |             (sexpression_boxint
314 |                 (*
315 |                     (. (first lst) m_1)
316 |                     (. (first (rest lst)) m_1)))
317 |         }
318 |         "/" => {
319 |             (sexpression_boxint
320 |                 (/
321 |                     (. (first lst) m_1)
322 |                     (. (first (rest lst)) m_1)))
323 |         }
324 |         "<" => {
325 |             (sexpression_boxbool
326 |                 (<
327 |                     (. (first lst) m_1)
328 |                     (. (first (rest lst)) m_1)))
329 | 
330 |         }
331 |         "<=" => {
332 |             (sexpression_boxbool
333 |                 (<=
334 |                     (. (first lst) m_1)
335 |                     (. (first (rest lst)) m_1)))
336 | 
337 |         }
338 |         ">" => {
339 |             (sexpression_boxbool
340 |                 (>
341 |                     (. (first lst) m_1)
342 |                     (. (first (rest lst)) m_1)))
343 |         }
344 |         ">=" => {
345 |             (sexpression_boxbool
346 |                 (>=
347 |                     (. (first lst) m_1)
348 |                     (. (first (rest lst)) m_1)))
349 |         }
350 |         "<>" => {
351 |             (sexpression_boxbool
352 |                 (<>
353 |                     (. (first lst) m_1)
354 |                     (. (first (rest lst)) m_1)))
355 |         }
356 |     end
357 | }
358 | 


--------------------------------------------------------------------------------
/docs/todo.md:
--------------------------------------------------------------------------------
  1 | # The Current language TODOs
  2 | 
  3 | A quick list of the current `TODO`s.
  4 | 
  5 | ## Syntax
  6 | 
  7 | 1. Named constructor vars `Foo m:int n:int`
  8 | 1. destructuring bind (and matching the same ^^^)
  9 | 1. Make rebinding for idents in `match` forms (`rewrite_match_bind.carml`)
 10 | 1. Make sure that things like `y given (> y 10)` are rewritten properly as well
 11 | 
 12 | ## Semantics
 13 | 
 14 | 1. Row polymorphism with `row[member1, member2, member3]`
 15 | 1. Tuples, and flattening them when used as a return type in Golang
 16 | 1. RawDeque (see `labs/rawdeque.carml`) as a test backing for Deques
 17 | 1. use RawDeque in specialized form to store a spaghetti stack of environment frames
 18 | 1. use the environment frames to support typing (finally)
 19 | 1. Figure out a decent backing for Rust-style deques (possibly implemented from records + arrays)
 20 | 1. make `|>` work it would in OCaml/F#
 21 | 1. add `.` ala Haskell for "compose"? `f . g` becomes `(f (g x))`
 22 | 1. If we are adding those, may as well do full shunting yard and parse things nicely
 23 | 1. integrate `$` and `|>` with `$()`
 24 | 1. `deque` and memory model
 25 | 1. investigate untagged unions (`int | float` as a type) _note_: I like this style: `union[int float]`
 26 | 
 27 | ## Modules
 28 | 
 29 | 1. Atop SML/OCaml style modules, look into Mythryl's `API` types
 30 | 1. **WONT** OCaml/SML-style Modules, and their application to higher-kinded types
 31 |    1. will actually use something more akin to F#'s modules
 32 | 1. **DONE** Figure out why this is breaking parameter lists:
 33 | 
 34 | ```
 35 | >>> module Foo {
 36 |     def bar x:int y:int => int = (+ x (+ y 10))
 37 |     def baz x:int => int = (+ x 10)
 38 | }
 39 | (module Foo
 40 |     (begin
 41 |         (define bar         (parameter-list )
 42 |     (returns (type integer))
 43 |             (call (identifier +) (identifier x) (call (identifier +) (identifier y) (integer 10))))
 44 |         (define baz         (parameter-list )
 45 |     (returns (type integer))
 46 |             (call (identifier +) (identifier x) (integer 10)))))
 47 | ```
 48 |     - it seems like any `def` form in a `begin` is breaking parameter lists?
 49 |     - ah, turning on C output, it looks like it's just broken for SExpression output...
 50 |     - same in Golang, it looks fine
 51 | 
 52 | ## Compile passes
 53 | 
 54 | 1. Need to replace manual `sed -e 's/\([a-z]\)\-\([a-z]\)/\1_\2/g'` and `sed -e 's/\?/_p/g'` with `cmung`
 55 | 1. Figure out both type specialization and generics to compiled form
 56 | 1. Fusion for `map`, `map!`, `foreach`, and so on.
 57 | 1. Variable Length Array (VLA) style
 58 | 1. in the base C compiler & carML self-hosting one, include a defined list of compiler errors with friendly data
 59 | 1. add line numbers to errors
 60 | 1. add nano-pass: a-normal form (ANF)
 61 | 1. add nano-pass: lambda lifting
 62 | 1. add nano-pass: ANF => SSA
 63 | 1. add nano-pass: rewrite `let`/`letrec` => `val` + temporary binding
 64 | 1. add nano-pass: constant folding
 65 | 1. add nano-pass: select the correct reified constructor implementation
 66 | 1. add nano-pass: demand-driven type inference
 67 | 1. Investigate: method of determining effects, and how that could make ANF easier (lift once for two calls)
 68 | 1. Tests, both for IR and C
 69 | 1. JS, Java, C++ backends, but written in carML itself and using the SExpression output.
 70 | 1. WebAssembly backend
 71 | 
 72 | ## Environment
 73 | 
 74 | 1. _TEST_: add more complex type tests
 75 | 1. add test runner: parse SExpression output...
 76 | 1. C version: convert `(foo [1 2 3 4])` to `let x:array[int] = [1 2 3 4] in (foo x)`
 77 | 1. Runtime, which should be pretty minimal. Look at Zig here, it has a smaller runtime than C!
 78 | 1. write an actual inclusion algorithm for the compiler to consume `use`d libraries
 79 | 1. Friendlier REPL, with keyboard support, and a simple Logo-style `edit` command
 80 | 1. Interpreter, either ghci style (compiles in another language) or actual interpreter
 81 | 1. Symbolic execution engine atop the same (using that paper "From Definitional Interpreters to Symbolic Executors")
 82 | 
 83 | ## completed items:
 84 | 
 85 | 1. **DONE** Add sized ints/uints/floats (e.g. `uint8`) as types (what about `U8` or `U64`? that works...)
 86 | 1. **DONE** change `declare` to match how parameters work `@foo: function[int => int]`, `@bar: int`
 87 | 1. **DONE** Add Samurai/Ninja as a build system
 88 | 1. **DONE** Fix `match` forms that have a call; memoize the call (but need types, sigh)
 89 | 1. **DONE** fix `make-array` (plus the VLA style mentioned below); uses GC by default for now, will fix in self-hosted version
 90 | 1. **DONE** Fix type parsing code to be much simpler.
 91 | 1. **DONE** Fix (finally) the `match` form for variants/poly
 92 | 1. **DONE** Add complex types to `val`
 93 | 1. **DONE** Add complex types to records
 94 | 1. **DONE** Add complex types to `let` 
 95 | 1. **DONE** Add `var` form
 96 | 1. **DONE** type tags
 97 | 1. **FIXED** _BUG_: `Foo Bar int` as type constructor is parsed as `(type-constructor Foo (complex-type Bar Int))` when it should be `(type-constructor Foo Bar Int)` (2 members)
 98 | 1. **FIXED** _BUG_: look at how the `experiments/sexpr.carml` is being parsed, and note that several constructors are missing
 99 | 1. **DONE** (test case for the above: `Foo Bar int`)
100 | 1. **FIXED** _BUG_: `def bar h:Url => Url` is parsed as `(parameter-list (ident h) (tag Url))` when it should be `(parameter-list (parameter-def h (complex-type (tag Url))))`
101 | 1. **FIXED** (test case for the above: `def foo f:Url => Url = f`)
102 | 1. **FIXED** _BUG_: complex return types
103 | 1. **DONE** parse HOFs in declarations
104 | 1. **DONE** Review switch to Scala-style `[]` for types.
105 | 1. **DONE** Parse `@`/`declare` forms
106 | 1. **WONTFIX in C version** Update `val`, `let`, records to use the new `declare` type parser
107 | 1. **WONTFIX** Make function definitions & `let` forms accept `begin` style function calls (i.e. avoid using `()`)
108 | 1. **DONE** `match`/`case` form, with guards.
109 | 1. **DONE**: Partial application syntax: `$()`, including `_` as filler
110 | 1. **WONTFIX** Hoare-logic (pre, post, invariants, &c.): Won't fix because moving towards refinement types
111 | 1. **WONTFIX** make `mung_single_type` for reading a single type, use for `@` forms
112 | 1. **DONE** Compile to C in the style of Enyalios
113 | 1. **DONE** an `extern` or `alien` form for easy FFI
114 | 1. **DONE** Finish the `use` form
115 | 1. **DONE** Parsing of variants
116 | 1. **DONE** Compilation of variants
117 | 1. **DONE** Parsing of polymorphic variants
118 | 1. **DONE** Compilation of polymorphic variants
119 | 1. **DONE** Make sure types are correct for types/polys
120 | 1. **DONE** Syntax updates: `record`
121 | 1. **WONTFIX** Syntax updates: `def`
122 | 1. **WONTFIX** Syntax updates: `match`
123 | 1. **WONTFIX** Syntax updates: `fn`
124 | 1. **WONTFIX in C version** _REVIEW_ make `let` & `var/var` treat items like a call form (as in `val r : int = sum x 10` instead of `val r : int = (sum x 10)`)
125 | 1. **WONTFIX in C version** make the types parsing code more modular; could easily extract that out into a function
126 | 1. **DONE** (not in the most elegant way, mind, but...) Finally fix the frakking lexer to not goof up internal states
127 | 1. **WONTFIX** Csharp-style record parameter unboxing: I think this makes the C interface weird, maybe if we go natively to machine code...
128 | 1. **DONE** Error handling: `Either`
129 | 1. **DONE** Option types
130 | 1. **WONTFIX in C version** Lexer-as-stream
131 | 1. **DONE** Fix type state transition, which fails for Tagged types (`Url`)
132 | 1. **DONE** Fix parsing of single line `begin`: `{sum x 10}` fails to parse properly
133 | 1. **DONE** Fix `float`, `bool`, and `char` parsing
134 | 1. **DONE** Fix edge case: complex type right before `=>` fails `def foo bar : Url => int = ...` fails
135 | 1. **DONE** fix complex type handling in `typespec2c`
136 | 1. **DONE** fix match with `type`s
137 | 1. **DONE** Investigate: currently there is a syntax ambiguity in `begin` forms: is `t` a unary function call, or an identifier? Fix: identifier
138 | 1. **DONE** Make `$` work like it does in Haskell
139 | 
140 | # Begin-style function calls
141 | 
142 | _NOTE_ the fifth point above is the following:
143 | 
144 | currently, `def` and `let` forms must enclose function calls with `()`:
145 | 
146 |     def foo x = (sum x x)
147 |     let x = 95 in (foo x)
148 | 
149 | Whereas in `begin` blocks, they needn't be:
150 | 
151 |     def foo x = {
152 |         sum x x
153 |     }
154 | 
155 | I thought about just requiring users to use `begin` forms, but that means simple functions cannot be
156 | succinctly written. things like `foo` above shouldn't require 3 lines of text. Think about simple
157 | helper functions: 
158 | 
159 |     def f x = sum (mul x 5.3) 10
160 |     map f someVector
161 | 
162 | Additionally, I want the repl to be able to handle those sorts of calls as well:
163 | 
164 |     >>> println "foo"
165 |     foo
166 |     _ : Unit
167 |     >>> def f x = sum x 10 # secretly returns void
168 |     >>> f (f 10)
169 |     _ : Int = 30
170 | 
171 | **DONE, 16OCT2017** Also, this is broken:
172 | 
173 |      >>> def foo x = { sum x x }
174 |     (define foo (parameter-list (identifier x))
175 |         (begin
176 |             (identifier sum)
177 |             (identifier x)
178 |             (identifier x)))
179 |     >>> def foo x = {
180 |     sum x x
181 |     }
182 |     (define foo (parameter-list (identifier x))
183 |         (begin
184 |             (call (identifier sum) (identifier x) (identifier x))))
185 | 
186 |  **DONE, 13OCT2017** Furthermore, this actually introduces a syntax & semantic ambiguity:
187 | 
188 |      def foo x : int => int = {
189 |          var t : int = 10
190 |          set! t (sum t x)
191 |          t
192 |      }
193 | 
194 | Technically, we want the _value_ of `t`, but the current compiler thinks
195 | that `t` is actually an unary function application. Need to chew on how 
196 | to fix that... OCaml requires explicit `()` for unary, so I could do
197 | the same... not really sure I like that, but it is one solution.
198 | 
199 | # Scala-style types
200 | 
201 |  It's pretty appealing to me to use Scala-style type declarations... this would mean `@` would be freed from
202 | declaring types to simply adding annotations like in Scala... Furthermore, it would make parsing a bit easier 
203 | to boot. This would imply that:
204 | 
205 |     @declare foo Array of Int => Int
206 |     def foo x = ...
207 | 
208 | Would become:
209 | 
210 |     def foo x: Array[Int] = ...
211 | 
212 | I also wonder if we can still use `[]` for `indexGet` because the type language & the term language need-not
213 | be 1 to 1...
214 | 
215 | # Syntax Updates
216 | 
217 | I've been thinking about certain syntax choices I made originally, and thinking about updating them:
218 | 
219 | - **DONE** remove the `=` in `record`s: `record foo { x int; y int;}`
220 | - **WONTFIX** allow the use of begin blocks directly following parameter lists: `def foo x { println x; sum x x}`
221 | - **WONTFIX** allow the use of begin blocks directly following `fn` parameter lists: `fn x { println x; sum x x}`
222 | - **N/A** still allow `def foo x = { block stuff ... }`
223 | - **WONTFIX** match forms to just directly use begin blocks: `match x { Some y => ... ; None => ... }`
224 | 
225 | # Pre-requisites, Post-requisites, Invariants
226 | 
227 | I'd like to embed some notion of the usual Hoare-logic forms into the language as extensions or _refinements_ to
228 | the `declare` or `@` form. I was thinking something similar to:
229 | 
230 |     @foo x Num y Num -> Num
231 |     @/requires foo >= x y
232 |     @/returns foo + x y
233 |     def foo x y = ...
234 | 
235 | Not sure how I want the Hoare-logic to look; `@<name> /requires` vs `@/requires <name>`...
236 | 
237 | _Update_: I've been playing with F\* for a while now, and reading up about its
238 | [KreMLin](https://fstarlang.github.io/general/2016/09/30/introducing-kremlin.html) backend... the idea to have
239 | refinements & pre/post conditions as `{block}` is pretty appealing to me.
240 | 
241 | # Csharp style record parameter unboxing
242 | 
243 | So C# will actually unbox structs that are passed as args:
244 | 
245 |     foo(mystruct);
246 | 
247 | will actually become
248 | 
249 |     foo(mystruct.member0, mystruct.member1, ...);
250 | 
251 | iff the struct is not modified (iirc). 
252 | 
253 | # Error handling
254 | 
255 | I don't really want to get too fancy with Error handling. I think it should be simple enough to just
256 | use `Either` heavily for all core functions. Like an `os.open` could theoretically look like:
257 | 
258 |     @os.open string int -> Either a b 
259 |     def os.open path mode {
260 |         let res = (alien "open" path mode) in
261 |         if (< res 0) then
262 |             (Left (os.errno_lookup res))
263 |         else
264 |             (Right res)
265 |     }
266 | 
267 | Yes, this would mean that the other side will always have to have some sort of `match` form to
268 | test what the result actually is, but that would mean that error handling should be relatively
269 | simple. I _like_ how Digamma implements SRFIs 23 & 34, but I don't know if I want to have that
270 | much "stuff" in carML.
271 | 
272 | # Lexer-as-stream
273 | 
274 | The idea here is that a simple lexer should consume _all_ tokens from input, regardless of their syntactic
275 | validity, prior to returning to the parser. The parser then can do the usual RDP, but instead of operating
276 | on the file buffer, the RDP then can operate on the _token_ buffer. Thus, the file stream is completely
277 | consumed (there are no dangling tokens the parser will consume once an error has occurred: the entire
278 | stream can be abandoned), and we can implement nicer parser stuffs.
279 | 
280 | # **DONE** what in the world...
281 | 
282 | Figured it out: parser has a case for 'c', but no productions, so it can't actually parse anything that
283 | starts with a 'c'
284 | 
285 |     >>> foo [1,2,3,4]
286 |     (identifier foo)
287 |     >>> (array-literal (integer 1) (integer 2) (integer 3) (integer 4)) 
288 |     >>> r
289 |     (identifier r)
290 |     >>> r [1 2 3 4]
291 |     (identifier r)
292 |     >>> (array-literal (integer 1) (integer 2) (integer 3) (integer 4)) 
293 |     >>> car [1 2 3 4]
294 | 
295 | It never finished reading with the `car`...
296 | 
297 | # Partial Application form (really, a specialization form)
298 | 
299 | I'm thinking that `$()` will denote partial application. For example:
300 | 
301 |     def foo x y z = + x (* y z)
302 | 
303 | So here, the function `foo` requires 3 parameters, but there may be cases wherein we
304 | do **not** wish to use all three, and this is where `$()` forms would come into play:
305 | 
306 |     let bar = $(foo 10 _ 12) in
307 |     println (bar 11)
308 | 
309 | Here, the "call" to `bar` is just rewritten to be `foo 10 11 12`. I've wondered how, or if, it
310 | should support memoizing parameters; for example, consider the following:
311 | 
312 |     def foo x y = + x y
313 |     def baz x = * x 10
314 |     let bar = $(foo _ (baz 11)) in
315 |     println (bar 12)
316 | 
317 | Should we compute and store the value `(baz 11)`, or should we rerun that calculation at each
318 | application of the specialization form? Both have implications, but I'm wondering which is the
319 | less surprising of the two? I guess the fact that we're going for an SRFI-26 style here is the
320 | real answer to capturing...
321 | 
322 | ## Interaction with thrushing forms
323 | 
324 | I **do** wish to support `|>` and the like. I think Haskell's `$` form has less appeal here, because
325 | I'm already avoiding as many parens as possible, but `|>` from the various ML dialects is interesting.
326 | 
327 |     (|>
328 |         foo
329 |         $(bar _ 11)
330 |         baz
331 |         $(blah _ 12))
332 | 
333 | It'll be interesting to have a form that doesn't introduce a lambda capture but _does_ allow for
334 | specialization...
335 | 
336 | Thinking about `$` though, that might be interesting to have...
337 | 
338 |     println (int_to_string (sum 54 100))
339 |     # vs
340 |     println $ int_to_string $ sum 54 100
341 | 
342 | Could make `$` the only infix form...
343 | 
344 | # Variable Length Arrays
345 | 
346 | Arrays should be fixed size, deques should handle growth, so that also means anywhere that we have
347 | an array we can also define a `len` variable to capture that length. Calls to `length` on arrays or
348 | strings can be simply rewritten to that variable.
349 | 
350 | ```
351 | val foo : array[int] = [1,2,3,4,5]
352 | # compiler defines a `foo_len_$integer` variable...
353 | # ...
354 | # ...
355 | for x in (range 0 $ length foo) do ...
356 | # the `length foo` is rewritten to `foo_len_$integer`
357 | ```
358 | 
359 | Similarly, when we pass arrays into functions, we should capture the length as a parameter:
360 | 
361 | ```
362 | def foo bar:array[int] ... = ...
363 | # the compiler will add a `bar_len_$integer` parameter implicitly...
364 | ```
365 | 
366 | # Memory Model
367 | 
368 | To add to the above, I've been thinking about the memory model of carML... a lot. The problem
369 | I have is that I want to be able to both:
370 | 
371 | - allow normal users not to think about how memory works
372 | - allow power users to precisely control how memory works
373 | 
374 | So I *think* what I need to do is:
375 | 
376 | 1. add the antithesis of `ref`: `flat`, which requires stack allocation
377 | 1. pass items by reference by default (`const *`)
378 | 1. pass items by mutable reference when `ref` is in play (`ref[array[int]]`)
379 | 1. pass items by value when `flat` is in play (`flat[array[int]]`)
380 | 1. structs and the like then would _default_ to const refs unless the user specifies otherwise
381 | 
382 | **note** I think we'll use `low` for this, because it's dissimilar to `float`
383 | 
384 | ## Decomposition
385 | 
386 | I was thinking about this today:
387 | 
388 | ```
389 | # ...
390 | var foo:array[int] = (make-array int 10 0)
391 | # should become:
392 | var foo:array[int] = (model-approriate-allocator int 10)
393 | val foo_len = 10
394 | (memset_s foo (sizeof int) 10 0)
395 | ```
396 | 
397 | Basically, a nano-pass can rewrite arrays to be proper captures of setting up the VLAs, len, setting default values, &c.
398 | 


--------------------------------------------------------------------------------
/labs/mini-sexpr.carml:
--------------------------------------------------------------------------------
  1 | #@(#) A Miniture SExpression library for carML
  2 | #@(#) mainly aimed at getting the carML compiler out of the C system
  3 | #@(#) should be mainly compatible with the SExpression-based IR that
  4 | #@(#) carML/C generates.
  5 | #@(#) Meant to help even FURTHER accelerate the bootstrap process,
  6 | #@(#) since it's smaller than a full SExpression library, and works
  7 | #@(#) with the Golang output
  8 | #
  9 | # the shame is, given carML's current
 10 | # type system, there are many work-arounds
 11 | # required to make the below work:
 12 | # - the type system can't tell ref[SExpression] can be flattened to SExpression
 13 | # - some things should be automatically passed around as ref[Type] but you have to manually do that
 14 | # - lots of ref[Any] could be turned into a `const`...
 15 | #
 16 | # probably lots of other hacks as I
 17 | # go through it. Luckily, once I move
 18 | # past where the C compiler is, I can
 19 | # start to handle some of the debt I've
 20 | # accrued.
 21 | 
 22 | type SExpression {
 23 |     Nil
 24 |     Atom string int
 25 |     String string int
 26 |     # making these strings for now to make it easier to
 27 |     # create, since I haven't written standard functions
 28 |     # to convert string=>int/float yet.
 29 |     # eventually, those should be gadgets...
 30 |     #
 31 |     # additionally, including an `int` here for the 
 32 |     # offset, since I haven't dont destructuring binds
 33 |     # yet...
 34 |     Int string int
 35 |     Float string int
 36 |     Char string int
 37 |     Bool bool int
 38 |     List array[SExpression] int
 39 |     Error string int
 40 |     # Maybe should have an internal type that
 41 |     # isn't exported, and an external type that
 42 |     # is, so as to remove the two constructors
 43 |     # below...
 44 |     Null
 45 |     EndList int
 46 |     EndArray int
 47 |     EndFile
 48 | }
 49 | 
 50 | record Token {
 51 |     lexeme:string
 52 |     lexeme_offset:int
 53 |     lexeme_type:int
 54 | }
 55 | 
 56 | # add a simple reader system below
 57 | # should be able to construct & read
 58 | # SExpression-based syntax
 59 | #
 60 | # What will this be used for? Why,
 61 | # for bootstrapping a compiler in carML
 62 | # itself really.
 63 | 
 64 | def is_whitespace ch:char => bool = {
 65 |     match ch with
 66 |         ' ' => true
 67 |         '\t' => true
 68 |         '\n' => true
 69 |         '\v' => true
 70 |         '\r' => true
 71 |         '\b' => true
 72 |         else => false
 73 |     end
 74 | }
 75 | 
 76 | def take_while_white src:string start:int => int = {
 77 |     # take... on... me...
 78 |     # _softly_ take on me
 79 |     # take... me... on...
 80 |     # _softly_ take on me
 81 |     # I'll be... gone...
 82 |     # In a day or twoooooooo
 83 |     # _synths_
 84 |     var idx:int = start
 85 |     var ch:char = (get src start)
 86 |     # I *hate* how low level this is...
 87 |     when (is_whitespace ch) do {
 88 |         while (is_whitespace ch) do {
 89 |             set! ch $ get src idx
 90 |             set! idx $ + idx 1
 91 |         }
 92 |         return $ - idx 1
 93 |     }
 94 |     idx
 95 | }
 96 | 
 97 | def take_until_break src:string start:int => int = {
 98 |     var idx:int = start
 99 |     var ch:char = ' '
100 |     if (> idx $ len src) then idx
101 |     else {
102 |         set! ch $ get src idx
103 |         while (every (< idx $ len src) (is_symbolic ch)) do {
104 |             set! idx $ + idx 1
105 |             set! ch $ get src idx
106 |         }
107 |         idx
108 |     }
109 | }
110 | 
111 | def next src:string start_offset:int => Token = {
112 |     # NOTE I had an idea about how to fix the offset
113 |     # issue: instead of trying to read the string AND
114 |     # process lexemes, what we need is to go old-School
115 |     # Wirth style with our RDP: have a function, next,
116 |     # which returns a Token struct with the next lexeme,
117 |     # it's length, and type
118 | 
119 |     when (>= start_offset $ len src) do (return $ make-struct Token "" start_offset 98)
120 | 
121 |     val offset:int = (take_while_white src start_offset)
122 |     val ch:char = (get src offset)
123 |     var idx:int = offset
124 |     var state:int = 0
125 |     # yet again, I really need to make TCO a thing, since
126 |     # this function would be so much more nicely expressed
127 |     # as a simple tail recursive function...
128 |     match ch with
129 |         '(' => (return $ make-struct Token "(" offset 1)
130 |         ')' => (return $ make-struct Token ")" offset 2)
131 |         '[' => (return $ make-struct Token "[" offset 3)
132 |         ']' => (return $ make-struct Token "]" offset 4)
133 |         z given (is_numeric ch) => {
134 |             while (< idx $ len src) do {
135 |                 match state with
136 |                     # integer
137 |                     0 => match ch with
138 |                         '.' => (set! state 1)
139 |                         _ given (is_numeric ch) => (set! state 0)
140 |                         _ given (is_symbolic ch) => (set! state 2)
141 |                         _ given (not $ is_symbolic ch) => (return $ make-struct Token (string_slice src offset idx) offset 5)
142 |                         else => (set! state 0)
143 |                     end
144 |                     # float
145 |                     1 => match ch with
146 |                         '.' => (set! state 2)
147 |                         _ given (is_numeric ch) => (set! state 1)
148 |                         _ given (is_symbolic ch) => (set! state 2)
149 |                         _ given (not $ is_symbolic ch) => (return $ make-struct Token (string_slice src offset idx) offset 6)
150 |                         else => (set! state 1)
151 |                     end
152 |                     # atom
153 |                     2 => when (not $ is_symbolic ch) do (return $ make-struct Token (string_slice src offset idx) offset 7)
154 |                end
155 |                set! idx $ + idx 1
156 |                when (< idx $ len src) do (set! ch $ get src idx)
157 |             }
158 |         }
159 |         # atoms
160 |         _ given (is_symbolic ch) => {
161 |             while (every (< idx $ len src) (is_symbolic ch)) do {
162 |                 set! ch $ get src idx
163 |                 set! idx $ + idx 1
164 |             }
165 |             return $ make-struct Token (string_slice src offset $ - idx 1) offset 7
166 |         }
167 |         # strings
168 |         '"' => {
169 |             set! idx $ + idx 1
170 |             if (>= idx $ len src) then
171 |                 (return $ make-struct Token "malformed string" offset 99)
172 |             else
173 |                 (set! ch $ get src idx)
174 |             while (every (!= ch '"') (< idx $ len src)) do {
175 |                 match ch with
176 |                     '\\' => (set! idx $ + idx 2)
177 |                     else => (set! idx $ + idx 1)
178 |                 end
179 |                 when (< idx $ len src) do (set! ch $ get src idx)
180 |             }
181 |             return $ make-struct Token (string_slice src offset $ + idx 1) offset 8
182 |         }
183 |         # characters
184 |         '#' => {
185 |             var tmp:string = ""
186 |             var tidx:int = 0
187 |             set! idx $ + idx 1
188 |             when (>= (+ idx 1) $ len src) do (return $ make-struct Token "malformed character" offset 99)
189 |             when (!= (get src idx) '\\') do (return $ make-struct Token "character must be followed by \\" offset 99)
190 |             # NOTE what we need to do is read until we hit some non-symbolic, and then attempt to tell
191 |             # if we have a named character or a single one...
192 |             # for example, we might have:
193 |             #
194 |             # - #\n (the letter n)
195 |             # - #\newline (the literal ASCII newline)
196 |             # we want to support both, but also need to make sure that named characters are actual names we can
197 |             # parse...
198 |             #
199 |             # XXX GAH I just realized tho that this means we *cannot* rely on the simple offset
200 |             # and instead need to do some helper calculations here in the sizing... this could
201 |             # get hairy...
202 |             #
203 |             # You'll notice below that the calculations for offset are odd. This is because
204 |             # I'm actually calculating; length of character prefix (2) + length of named character,
205 |             # but we subtract one so that the normal offset + len works. so, some examples:
206 |             # - `#\n` the letter 'n', should be three, actually two
207 |             # - `#\newline` the ASCII newline, '\n', should be nine, actually 8
208 |             set! tidx $ take_until_break src $ + idx 1
209 |             set! tmp $ string_slice src (+ idx 1) tidx
210 |             match tmp with
211 |                 # if we only have *one* character, don't bother checking...
212 |                 _ given (eq? 1 $ len tmp) => (return $ make-struct Token tmp (+ offset 2) 9)
213 |                 # it would be *so* much more reasonable here to return a value into
214 |                 # a val and then use that to call `return` once...
215 |                 #
216 |                 # once the new compiler is working, I need to move to a correct
217 |                 # "everything is a value" system...
218 |                 "newline" => (return $ make-struct Token "\n" (+ offset 8) 9)
219 |                 "tab" => (return $ make-struct Token "\t" (+ offset 4) 9)
220 |                 "carriage" => (return $ make-struct Token "\r" (+ offset 9) 9)
221 |                 "bell" => (return $ make-struct Token "\a" (+ offset 5) 9)
222 |                 "vtab" => (return $ make-struct Token "\v" (+ offset 5) 9)
223 |                 "backspace" => (return $ make-struct Token "\b" (+ offset 10) 9)
224 |                 "backslash" => (return $ make-struct Token "\\" (+ offset 10) 9)
225 |                 else => (return $ make-struct Token "incorrect named character" offset 99)
226 |             end
227 |         }
228 |         else => (return $ make-struct Token " " 1 99)
229 |     end
230 |     # XXX: AAAAAAAAAAH... so imperative
231 |     match state with
232 |         0 => (return $ make-struct Token (string_slice src offset idx) offset 5)
233 |         1 => (return $ make-struct Token (string_slice src offset idx) offset 6)
234 |         2 => (return $ make-struct Token (string_slice src offset idx) offset 7)
235 |     end
236 |     return $ make-struct Token " " 1 99
237 | }
238 | 
239 | def is_numeric ch:char => bool = {
240 |     match ch with
241 |         _ given (&& (>= ch '0') (<= ch '9')) => true
242 |         '.' => true
243 |         else => false
244 |     end
245 | }
246 | 
247 | def is_symbolic ch:char => bool = {
248 |     match ch with
249 |         '(' => false
250 |         ')' => false
251 |         '[' => false
252 |         ']' => false
253 |         '"' => false
254 |         '\'' => false
255 |         '#' => false
256 |         _ given (is_whitespace ch) => false
257 |         else => true
258 |     end
259 | }
260 | 
261 | def is_null_or_endp obj:SExpression => bool = {
262 |     match obj with
263 |         (SExpression.Null) => true
264 |         (SExpression.EndList) => true
265 |         (SExpression.EndArray) => true
266 |         (SExpression.EndFile) => true
267 |         else => false
268 |     end
269 | }
270 | 
271 | # this should ideally be replaced by a zero-cost abstraction
272 | # gadget that writes things like List.map! or the like to
273 | # simple Go/C functions, but for now, just like string-slice
274 | # above, write this in carML itself
275 | def shrink_array src:array[SExpression] cap:int length:int => array[SExpression] = {
276 |     var ret : array[SExpression] = (make-array SExpression length $ SExpression.Nil)
277 |     var idx : int = 0
278 |     while (< idx length) do {
279 |         set! (get ret idx) (get src idx)
280 |         set! idx $ + idx 1
281 |     }
282 |     ret
283 | }
284 | 
285 | # we really don't need much else beyond this part here
286 | # we could honestly do this with append=>array as well,
287 | # which would basically treat this as a stack, maybe by
288 | # backing with a dequeue?
289 | def read_list src:string start:int => SExpression = {
290 |     # XXX for now, going to use an array, but I really thinkg
291 |     # this would be an *ideal* location for deques...
292 |     var ret:array[SExpression] = (make-array SExpression 128 $ SExpression.Nil)
293 |     var ret_length:int = 128
294 |     var tmp:SExpression = (sexpression_read src $ + start 1)
295 |     var idx:int = 0
296 |     var offset:int = start
297 |     while (not $ is_null_or_endp tmp) do {
298 | 
299 |         when (>= idx ret_length) do {
300 |             return $ make-struct SExpression.Error "read_list array length overrun" start
301 |         }
302 | 
303 |         set! (get ret idx) tmp
304 |         set! idx $ + idx 1
305 |         # actually, this is still wrong...
306 |         # because we're just returning the *offset* in most
307 |         # of our setups, but in lists we cannot just run a
308 |         # string length check. I think here we can probably
309 |         # do a check for most items, but of lists we have to
310 |         # set the offset to be the offset of the final
311 |         # #\)
312 |         match tmp with
313 |             (SExpression.List _ _) => (set! offset $ . tmp m_2)
314 |             # a limit to what we can do here; because we write
315 |             # these as an interface{} and what not in Golang, we
316 |             # actually need to check each of the types here. Luckily,
317 |             # it's probably better because I actually wasn't checking
318 |             # an error was returned!
319 |             (SExpression.Atom _ _) => (set! offset $ + (. tmp m_2) $ len $ . tmp m_1)
320 |             (SExpression.String _ _) => (set! offset $ + (. tmp m_2) $ len $ . tmp m_1)
321 |             (SExpression.Int _ _) => (set! offset $ + (. tmp m_2) $ len $ . tmp m_1)
322 |             (SExpression.Float _ _) => (set! offset $ + (. tmp m_2) $ len $ . tmp m_1)
323 |             (SExpression.Bool _ _) => if (. tmp m_1) then {
324 |                 (set! offset $ + (. tmp m_2) 4)
325 |             } else {
326 |                 (set! offset $ + (. tmp m_2) 5)
327 |             }
328 |             (SExpression.Char _ _) => (set! offset $ + (. tmp m_2) $ len $ . tmp m_1)
329 |             (SExpression.EndList _) => (set! offset $ + (. tmp m_1) 1)
330 |             else => (return tmp)
331 |         end
332 |         set! tmp $ sexpression_read src offset
333 |     }
334 |     make-struct SExpression.List (shrink_array ret 128 idx) $ + offset 1
335 | }
336 | 
337 | def read_char src:string offset:int => SExpression = {
338 |     val tok : Token = (next src offset)
339 |     if (eq? (. tok lexeme_type) 9) then
340 |         (make-struct SExpression.Char (. tok lexeme) (. tok lexeme_offset))
341 |     else
342 |         (make-struct SExpression.Error "expected character" offset)
343 | }
344 | 
345 | def read_atom src:string offset:int => SExpression = {
346 |     val tok : Token = (next src offset)
347 |     if (eq? (. tok lexeme_type) 7) then
348 |         (make-struct SExpression.Atom (. tok lexeme) (. tok lexeme_offset))
349 |     else
350 |         (make-struct SExpression.Error "expected atom" offset)
351 | }
352 | 
353 | def read_string src:string offset:int => SExpression = {
354 |     val tok : Token = (next src offset)
355 |     if (eq? (. tok lexeme_type) 8) then
356 |         (make-struct SExpression.String (. tok lexeme) (. tok lexeme_offset))
357 |     else
358 |         (make-struct SExpression.Error "expected character" offset)
359 | }
360 | 
361 | # NOTE (lojikil):
362 | # I was originally going to implement a bunch of things in the
363 | # core library, but then I realized I probably should actually
364 | # make a module and have things work there. So, for now, I'm
365 | # adding this helper here, and will work on making the rest
366 | # of the module system workable...
367 | def string_slice src:string start:int endpoint:int => string = {
368 |     val len:int = (- endpoint start)
369 |     var res:array[char] = (make-array char len)
370 |     var idx:int = 0
371 |     while (< idx len) do {
372 |         set! (get res idx) $ get src $ + idx start
373 |         set! idx $ + idx 1
374 |     }
375 |     make-string res
376 | }
377 | 
378 | # it would be nice to enrich types here
379 | # say that this is an SExpression, but also
380 | # that the only types it will return are
381 | # from Int, Float, Rational, Complex
382 | # NOTE (lojikil) also, this would be a great place
383 | # for default values: offset & start could be set
384 | # freely...
385 | def read_number src:string offset:int => SExpression = {
386 |     val tok:Token = (next src offset)
387 |     match (. tok lexeme_type) with
388 |         5 => (make-struct SExpression.Int (. tok lexeme) (. tok lexeme_offset))
389 |         6 => (make-struct SExpression.Float (. tok lexeme) (. tok lexeme_offset))
390 |         else => (make-struct SExpression.Error "wanted to read a number, but other type returned" offset)
391 |     end
392 | }
393 | 
394 | def sexpression_read src:string start:int => SExpression = {
395 |     # this would be so much nicer as a tuple here
396 |     # we need a few things tho:
397 |     # 1. unboxing tuple[char int] (for example) in Golang returns
398 |     # 1. destructuring bind (`var ch, idx : tuple[char int] = ...`)
399 |     # 1. making a `make-tuple` and allowing `,` in CALL
400 |     val tok:Token = (next src start)
401 |     match (. tok lexeme_type) with
402 |         1 => (read_list src $ . tok lexeme_offset)
403 |         2 => (make-struct SExpression.EndList $ + (. tok lexeme_offset) 1)
404 |         #'[' => (read_array fh)
405 |         #']' => (make-struct SExpression.EndArray)
406 |         5 => (make-struct SExpression.Int (. tok lexeme) (. tok lexeme_offset))
407 |         6 => (make-struct SExpression.Float (. tok lexeme) (. tok lexeme_offset))
408 |         7 => (make-struct SExpression.Atom (. tok lexeme) (. tok lexeme_offset))
409 |         8 => (make-struct SExpression.String (. tok lexeme) (. tok lexeme_offset))
410 |         9 => (make-struct SExpression.Char (. tok lexeme) (. tok lexeme_offset))
411 |         98 => (make-struct SExpression.EndFile)
412 |         else => (make-struct SExpression.Error (. tok lexeme) (. tok lexeme_offset))
413 |     end
414 | }
415 | 
416 | def sexpression2char src:string => string = {
417 |     match (get src 0) with
418 |         '\n' => "#\\newline"
419 |         '\v' => "#\\vtab"
420 |         '\r' => "#\\carriage"
421 |         '\t' => "#\\tab"
422 |         '\b' => "#\\backspace"
423 |         '\a' => "#\\bell"
424 |         '\0' => "#\\nul"
425 |         else => (String.Concat "#\\" src)
426 |     end
427 | }
428 | 
429 | # need to mung names so this could be called the proper sexpression->string
430 | def sexpression2string src:SExpression => string = {
431 |     # NOTE because I defunctionalized the routines below, we end up
432 |     # with a bunch of variable detritus here; really, concat probably
433 |     # should be it's own function, and then we just call that in a
434 |     # loop, but instead I've rewritten all of the HOFs to be simple
435 |     # loops below
436 | 
437 |     declare result:array[string]
438 |     declare flattened:array[char]
439 |     var idx:int = 0
440 |     var cidx:int = 0
441 |     var ridx:int = 0
442 |     var totallen:int = 0
443 |     match src with
444 |         (SExpression.Int _ _) => (. src m_1)
445 |         (SExpression.Float _ _) => (. src m_1)
446 |         (SExpression.Atom _ _) => (. src m_1)
447 |         (SExpression.String _ _) => {
448 |             set! flattened $ make-array char $ + 2 $ len $ . src m_1
449 |             set! idx 1
450 |             set! cidx 1
451 |             while (< cidx $ - (len $ . src m_1) 1) do {
452 |                 set! (get flattened cidx) $ get (. src m_1) idx
453 |                 set! idx $ + idx 1
454 |                 set! cidx $ + cidx 1
455 |             }
456 |             set! (get flattened 0) '"'
457 |             set! (get flattened idx) '"'
458 |             make-string flattened
459 |         }
460 |         (SExpression.Char _ _) => (sexpression2char $ . src m_1)
461 |         (SExpression.List _ _) => {
462 |             # break this down a little bit, into something that I know the
463 |             # carML Golang compiler can actually deal with in a semi-efficient
464 |             # way
465 |             # now, we can defunctionalize this manually ourselves...
466 |             # NOTE this is all very ugly because I haven't added the necessary
467 |             # functions to the code base to actually support things like
468 |             # array maps and such. This code is *wild* below, and here be
469 |             # dragons five sure
470 |             # var result:array[string] = (Array.Map sexpression2string (. src m_1))
471 |             set! result (make-array string (len $ . src m_1) "")
472 |             while (< idx $ len result) do {
473 |                 set! (get result idx) $ sexpression2string (get (. src m_1) idx)
474 |                 # keep track of the total string length
475 |                 set! totallen $ + totallen $ len $ get result idx
476 |                 set! idx $ + idx 1
477 |             }
478 |             # we now add the number of spaces we need to add...
479 |             # plus two spaces for "()"
480 |             set! totallen $ + totallen $ + (len result) 2
481 |             # var flattened:string = (String.join " " result)
482 |             set! flattened $ make-array char totallen
483 |             set! (get flattened 0) '('
484 |             set! idx 1
485 |             # XXX it almost would have been simpler to
486 |             # add really nice TCO elimination in the compiler
487 |             # than this spaghetti code here, but this is a hack
488 |             # to get us off the ground.
489 |             while (< idx $ - (len flattened) 1) do {
490 |                 set! cidx 0
491 |                 while (< cidx $ len $ get result ridx) do {
492 |                     set! (get flattened idx) $ get (get result ridx) cidx
493 |                     set! idx $ + idx 1
494 |                     set! cidx $ + cidx 1
495 |                 }
496 |                 set! (get flattened idx) ' '
497 |                 set! idx $ + idx 1
498 |                 set! ridx $ + ridx 1
499 |             }
500 |             set! (get flattened idx) ')'
501 |             make-string flattened
502 |         }
503 |     end
504 | }
505 | 


--------------------------------------------------------------------------------
/labs/mini-interp.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | /*
  4 |  * this is the result of two files being smooshed together, plus some
  5 |  * custom Golang:
  6 |  *
  7 |  * . labs/mini-sexpression.carml, for reading in carML/C SExpression output
  8 |  * . labs/mini-walk.carml, for actually executing code
  9 |  *
 10 |  * Currently, it actually is just an interpreter for carML itself, but that
 11 |  * should change eventually
 12 |  */
 13 | 
 14 | import (
 15 |     "fmt"
 16 |     "reflect"
 17 |     "strconv"
 18 | )
 19 | 
 20 | type SExpression interface {
 21 | 	isSExpression()
 22 | }
 23 | type SExpression_Nil struct {
 24 | }
 25 | func (SExpression_Nil) isSExpression() {}
 26 | type SExpression_Atom struct {
 27 | 	m_1 string
 28 | 	m_2 int
 29 | }
 30 | func (SExpression_Atom) isSExpression() {}
 31 | type SExpression_String struct {
 32 | 	m_1 string
 33 | 	m_2 int
 34 | }
 35 | func (SExpression_String) isSExpression() {}
 36 | type SExpression_Int struct {
 37 | 	m_1 string
 38 | 	m_2 int
 39 | }
 40 | func (SExpression_Int) isSExpression() {}
 41 | type SExpression_Float struct {
 42 | 	m_1 string
 43 | 	m_2 int
 44 | }
 45 | func (SExpression_Float) isSExpression() {}
 46 | type SExpression_Char struct {
 47 | 	m_1 string
 48 | 	m_2 int
 49 | }
 50 | func (SExpression_Char) isSExpression() {}
 51 | type SExpression_Bool struct {
 52 | 	m_1 bool
 53 | 	m_2 int
 54 | }
 55 | func (SExpression_Bool) isSExpression() {}
 56 | type SExpression_List struct {
 57 | 	m_1 []SExpression 
 58 | 	m_2 int
 59 | }
 60 | func (SExpression_List) isSExpression() {}
 61 | type SExpression_Error struct {
 62 | 	m_1 string
 63 | 	m_2 int
 64 | }
 65 | func (SExpression_Error) isSExpression() {}
 66 | type SExpression_Null struct {
 67 | }
 68 | func (SExpression_Null) isSExpression() {}
 69 | type SExpression_EndList struct {
 70 | 	m_1 int
 71 | }
 72 | func (SExpression_EndList) isSExpression() {}
 73 | type SExpression_EndArray struct {
 74 | 	m_1 int
 75 | }
 76 | func (SExpression_EndArray) isSExpression() {}
 77 | type SExpression_EndFile struct {
 78 | }
 79 | func (SExpression_EndFile) isSExpression() {}
 80 | 
 81 | type Token struct {
 82 | 	lexeme string
 83 | 	lexeme_offset int
 84 | 	lexeme_type int
 85 | }
 86 | func is_whitespace(ch byte) bool {
 87 | 	switch ch {
 88 | 		case ' ':
 89 | 			return true
 90 | 		case '\t':
 91 | 			return true
 92 | 		case '\n':
 93 | 			return true
 94 | 		case '\v':
 95 | 			return true
 96 | 		case '\r':
 97 | 			return true
 98 | 		case '\b':
 99 | 			return true
100 | 		default:
101 | 			return false
102 | 	}
103 | 
104 | }
105 | func take_while_white(src string, start int) int {
106 | 	var idx int = start
107 | 	var ch byte = src[start]
108 | 	if is_whitespace(ch) {
109 | 		for is_whitespace(ch) {
110 | 			ch = src[idx]
111 | 			idx = (idx + 1)
112 | 		}
113 | 
114 | 		return idx - 1
115 | 	}
116 | 
117 | 	return idx
118 | }
119 | func take_until_break(src string, start int) int {
120 | 	var idx int = start
121 | 	var ch byte = ' '
122 | 	if idx > len(src) {
123 | 		return idx
124 | 	} else {
125 | 		ch = src[idx]
126 | 		for (idx < len(src)) && (is_symbolic(ch)) {
127 | 			idx = (idx + 1)
128 | 			ch = src[idx]
129 | 		}
130 | 
131 | 		return idx
132 | 
133 | 	}
134 | 
135 | }
136 | func next(src string, start_offset int) Token  {
137 | 	if start_offset >= len(src) {
138 | 		return Token{ "", start_offset, 98}
139 | 	}
140 | 
141 | 	offset := take_while_white(src, start_offset)
142 | 	ch := src[offset]
143 | 	var idx int = offset
144 | 	var state int = 0
145 | 	switch {
146 | 		case ch == '(':
147 | 			return Token{ "(", offset, 1}
148 | 		case ch == ')':
149 | 			return Token{ ")", offset, 2}
150 | 		case ch == '[':
151 | 			return Token{ "[", offset, 3}
152 | 		case ch == ']':
153 | 			return Token{ "]", offset, 4}
154 | 		case (is_numeric(ch)):
155 | 			for idx < len(src) {
156 | 				switch state {
157 | 					case 0:
158 | 						switch {
159 | 							case ch == '.':
160 | 								state = 1
161 | 							case (is_numeric(ch)):
162 | 								state = 0
163 | 							case (is_symbolic(ch)):
164 | 								state = 2
165 | 							case (!is_symbolic(ch)):
166 | 								return Token{ string_slice(src, offset, idx), offset, 5}
167 | 							default:
168 | 								state = 0
169 | 						}
170 | 
171 | 					case 1:
172 | 						switch {
173 | 							case ch == '.':
174 | 								state = 2
175 | 							case (is_numeric(ch)):
176 | 								state = 1
177 | 							case (is_symbolic(ch)):
178 | 								state = 2
179 | 							case (!is_symbolic(ch)):
180 | 								return Token{ string_slice(src, offset, idx), offset, 6}
181 | 							default:
182 | 								state = 1
183 | 						}
184 | 
185 | 					case 2:
186 | 						if !is_symbolic(ch) {
187 | 							return Token{ string_slice(src, offset, idx), offset, 7}
188 | 						}
189 | 
190 | 				}
191 | 
192 | 				idx = (idx + 1)
193 | 				if idx < len(src) {
194 | 					ch = src[idx]
195 | 				}
196 | 
197 | 			}
198 | 
199 | 
200 | 		case (is_symbolic(ch)):
201 | 			for (idx < len(src)) && (is_symbolic(ch)) {
202 | 				ch = src[idx]
203 | 				idx = (idx + 1)
204 | 			}
205 | 
206 | 			return Token{ string_slice(src, offset, idx - 1), offset, 7}
207 | 
208 | 		case ch == '"':
209 | 			idx = (idx + 1)
210 | 			if idx >= len(src) {
211 | 				return Token{ "malformed string", offset, 99}
212 | 			} else {
213 | 				ch = src[idx]
214 | 			}
215 | 
216 | 			for (ch != '"') && (idx < len(src)) {
217 | 				switch ch {
218 | 					case '\\':
219 | 						idx = (idx + 2)
220 | 					default:
221 | 						idx = (idx + 1)
222 | 				}
223 | 
224 | 				if idx < len(src) {
225 | 					ch = src[idx]
226 | 				}
227 | 
228 | 			}
229 | 
230 | 			return Token{ string_slice(src, offset, idx + 1), offset, 8}
231 | 
232 | 		case ch == '#':
233 | 			var tmp string = ""
234 | 			var tidx int = 0
235 | 			idx = (idx + 1)
236 | 			if (idx + 1) >= len(src) {
237 | 				return Token{ "malformed character", offset, 99}
238 | 			}
239 | 
240 | 			if src[idx] != '\\' {
241 | 				return Token{ "character must be followed by \\", offset, 99}
242 | 			}
243 | 
244 | 			tidx = take_until_break(src, idx + 1)
245 | 			tmp = string_slice(src, idx + 1, tidx)
246 | 			switch {
247 | 				case (1 == len(tmp)):
248 | 					return Token{ tmp, offset + 2, 9}
249 | 				case tmp == "newline":
250 | 					return Token{ "\n", offset + 8, 9}
251 | 				case tmp == "tab":
252 | 					return Token{ "\t", offset + 4, 9}
253 | 				case tmp == "carriage":
254 | 					return Token{ "\r", offset + 9, 9}
255 | 				case tmp == "bell":
256 | 					return Token{ "\a", offset + 5, 9}
257 | 				case tmp == "vtab":
258 | 					return Token{ "\v", offset + 5, 9}
259 | 				case tmp == "backspace":
260 | 					return Token{ "\b", offset + 10, 9}
261 | 				case tmp == "backslash":
262 | 					return Token{ "\\", offset + 10, 9}
263 | 				default:
264 | 					return Token{ "incorrect named character", offset, 99}
265 | 			}
266 | 
267 | 
268 | 		default:
269 | 			return Token{ " ", 1, 99}
270 | 	}
271 | 
272 | 	switch state {
273 | 		case 0:
274 | 			return Token{ string_slice(src, offset, idx), offset, 5}
275 | 		case 1:
276 | 			return Token{ string_slice(src, offset, idx), offset, 6}
277 | 		case 2:
278 | 			return Token{ string_slice(src, offset, idx), offset, 7}
279 | 	}
280 | 
281 | 	return Token{ " ", 1, 99}
282 | }
283 | func is_numeric(ch byte) bool {
284 | 	switch {
285 | 		case ((ch >= '0') && (ch <= '9')):
286 | 			return true
287 | 		case ch == '.':
288 | 			return true
289 | 		default:
290 | 			return false
291 | 	}
292 | 
293 | }
294 | func is_symbolic(ch byte) bool {
295 | 	switch {
296 | 		case ch == '(':
297 | 			return false
298 | 		case ch == ')':
299 | 			return false
300 | 		case ch == '[':
301 | 			return false
302 | 		case ch == ']':
303 | 			return false
304 | 		case ch == '"':
305 | 			return false
306 | 		case ch == '\'':
307 | 			return false
308 | 		case ch == '#':
309 | 			return false
310 | 		case (is_whitespace(ch)):
311 | 			return false
312 | 		default:
313 | 			return true
314 | 	}
315 | 
316 | }
317 | func is_null_or_endp(obj SExpression ) bool {
318 | 	switch obj.(type) {
319 | 		case SExpression_Null:
320 | 			return true
321 | 		case SExpression_EndList:
322 | 			return true
323 | 		case SExpression_EndArray:
324 | 			return true
325 | 		case SExpression_EndFile:
326 | 			return true
327 | 		default:
328 | 			return false
329 | 	}
330 | 
331 | }
332 | func shrink_array(src []SExpression , cap int, length int) []SExpression  {
333 | 	var ret []SExpression  = make([]SExpression, length)
334 | 	var idx int = 0
335 | 	for idx < length {
336 | 		ret[idx] = src[idx]
337 | 		idx = (idx + 1)
338 | 	}
339 | 
340 | 	return ret
341 | }
342 | func read_list(src string, start int) SExpression  {
343 | 	var ret []SExpression  = make([]SExpression, 128)
344 | 	var ret_length int = 128
345 | 	var tmp SExpression = sexpression_read(src, start + 1)
346 | 	var idx int = 0
347 | 	var offset int = start
348 | 	for !is_null_or_endp(tmp) {
349 | 		if idx >= ret_length {
350 | 			return SExpression_Error{ "read_list array length overrun", start}
351 | 		}
352 | 
353 | 		ret[idx] = tmp
354 | 		idx = (idx + 1)
355 | 		switch tmp := tmp.(type) {
356 | 			case SExpression_List:
357 | 				offset = tmp.m_2
358 | 			case SExpression_Atom:
359 | 				offset = (tmp.m_2 + len(tmp.m_1))
360 | 			case SExpression_String:
361 | 				offset = (tmp.m_2 + len(tmp.m_1))
362 | 			case SExpression_Int:
363 | 				offset = (tmp.m_2 + len(tmp.m_1))
364 | 			case SExpression_Float:
365 | 				offset = (tmp.m_2 + len(tmp.m_1))
366 | 			case SExpression_Bool:
367 |                 if tmp.m_1 {
368 | 				    offset = (tmp.m_2 + 4)
369 |                 } else {
370 |                     offset = (tmp.m_2 + 5)
371 |                 }
372 | 			case SExpression_Char:
373 | 				offset = (tmp.m_2 + len(tmp.m_1))
374 | 			case SExpression_EndList:
375 | 				offset = (tmp.m_1 + 1)
376 | 			default:
377 | 				return tmp
378 | 		}
379 | 
380 | 		tmp = sexpression_read(src, offset)
381 | 	}
382 | 
383 | 	return SExpression_List{ shrink_array(ret, 128, idx), offset + 1}
384 | }
385 | func read_char(src string, offset int) SExpression  {
386 | 	tok := next(src, offset)
387 | 	if tok.lexeme_type == 9 {
388 | 		return SExpression_Char{ tok.lexeme, tok.lexeme_offset}
389 | 	} else {
390 | 		return SExpression_Error{ "expected character", offset}
391 | 	}
392 | 
393 | }
394 | func read_atom(src string, offset int) SExpression  {
395 | 	tok := next(src, offset)
396 | 	if tok.lexeme_type == 7 {
397 | 		return SExpression_Atom{ tok.lexeme, tok.lexeme_offset}
398 | 	} else {
399 | 		return SExpression_Error{ "expected atom", offset}
400 | 	}
401 | 
402 | }
403 | func read_string(src string, offset int) SExpression  {
404 | 	tok := next(src, offset)
405 | 	if tok.lexeme_type == 8 {
406 | 		return SExpression_String{ tok.lexeme, tok.lexeme_offset}
407 | 	} else {
408 | 		return SExpression_Error{ "expected character", offset}
409 | 	}
410 | 
411 | }
412 | func string_slice(src string, start int, endpoint int) string {
413 | 	len := endpoint - start
414 | 	var res []byte  = make([]byte, len)
415 | 	var idx int = 0
416 | 	for idx < len {
417 | 		res[idx] = src[idx + start]
418 | 		idx = (idx + 1)
419 | 	}
420 | 
421 | 	return string(res)
422 | }
423 | func read_number(src string, offset int) SExpression  {
424 | 	tok := next(src, offset)
425 | 	l16807 := tok.lexeme_type
426 | 	switch l16807 {
427 | 		case 5:
428 | 			return SExpression_Int{ tok.lexeme, tok.lexeme_offset}
429 | 		case 6:
430 | 			return SExpression_Float{ tok.lexeme, tok.lexeme_offset}
431 | 		default:
432 | 			return SExpression_Error{ "wanted to read a number, but other type returned", offset}
433 | 	}
434 | 
435 | }
436 | func sexpression_read(src string, start int) SExpression  {
437 | 	tok := next(src, start)
438 | 	l282475249 := tok.lexeme_type
439 | 	switch l282475249 {
440 | 		case 1:
441 | 			return read_list(src, tok.lexeme_offset)
442 | 		case 2:
443 | 			return SExpression_EndList{ tok.lexeme_offset + 1}
444 | 		case 5:
445 | 			return SExpression_Int{ tok.lexeme, tok.lexeme_offset}
446 | 		case 6:
447 | 			return SExpression_Float{ tok.lexeme, tok.lexeme_offset}
448 | 		case 7:
449 | 			return SExpression_Atom{ tok.lexeme, tok.lexeme_offset}
450 | 		case 8:
451 | 			return SExpression_String{ tok.lexeme, tok.lexeme_offset}
452 | 		case 9:
453 | 			return SExpression_Char{ tok.lexeme, tok.lexeme_offset}
454 | 		case 98:
455 | 			return SExpression_EndFile{ }
456 | 		default:
457 | 			return SExpression_Error{ tok.lexeme, tok.lexeme_offset}
458 | 	}
459 | 
460 | }
461 | func sexpression2char(src string) string {
462 | 	l1622650073 := src[0]
463 | 	switch l1622650073 {
464 | 		case '\n':
465 | 			return "#\\newline"
466 | 		case '\v':
467 | 			return "#\\vtab"
468 | 		case '\r':
469 | 			return "#\\carriage"
470 | 		case '\t':
471 | 			return "#\\tab"
472 | 		case '\b':
473 | 			return "#\\backspace"
474 | 		case '\a':
475 | 			return "#\\bell"
476 | 		case '\u0000':
477 | 			return "#\\nul"
478 | 		default:
479 | 			return "#\\" + src
480 | 	}
481 | 
482 | }
483 | func sexpression2string(src SExpression ) string {
484 | 	var result []string
485 | 	var flattened []byte
486 | 	var idx int = 0
487 | 	var cidx int = 0
488 | 	var ridx int = 0
489 | 	var totallen int = 0
490 | 	switch src := src.(type) {
491 | 		case SExpression_Int:
492 | 			return src.m_1
493 | 		case SExpression_Float:
494 | 			return src.m_1
495 | 		case SExpression_Atom:
496 | 			return src.m_1
497 | 		case SExpression_String:
498 | 			flattened = (make([]byte, 2 + len(src.m_1)))
499 | 			idx = 1
500 | 			cidx = 1
501 | 			for cidx < ((len(src.m_1)) - 1) {
502 | 				flattened[cidx] = src.m_1[idx]
503 | 				idx = (idx + 1)
504 | 				cidx = (cidx + 1)
505 | 			}
506 | 
507 | 			flattened[0] = '"'
508 | 			flattened[idx] = '"'
509 | 			return string(flattened)
510 | 
511 | 		case SExpression_Char:
512 | 			return sexpression2char(src.m_1)
513 | 		case SExpression_List:
514 | 			result = (make([]string, len(src.m_1)))
515 | 			for idx < len(result) {
516 | 				result[idx] = sexpression2string(src.m_1[idx])
517 | 				totallen = (totallen + len(result[idx]))
518 | 				idx = (idx + 1)
519 | 			}
520 | 
521 | 			totallen = (totallen + ((len(result)) + 2))
522 | 			flattened = (make([]byte, totallen))
523 | 			flattened[0] = '('
524 | 			idx = 1
525 | 			for idx < ((len(flattened)) - 1) {
526 | 				cidx = 0
527 | 				for cidx < len(result[ridx]) {
528 | 					flattened[idx] = result[ridx][cidx]
529 | 					idx = (idx + 1)
530 | 					cidx = (cidx + 1)
531 | 				}
532 | 
533 | 				flattened[idx] = ' '
534 | 				idx = (idx + 1)
535 | 				ridx = (ridx + 1)
536 | 			}
537 | 
538 | 			flattened[idx] = ')'
539 | 			return string(flattened)
540 | 
541 | 	}
542 | 	return string(flattened)
543 | 
544 | }
545 | 
546 | func roundtrip(src string) {
547 |     fmt.Printf("src:\n%s\n", src)
548 |     g := sexpression_read(src, 0)
549 |     fmt.Printf("g: %V\n", g)
550 |     h := read_list(src, 0)
551 |     fmt.Printf("h: %V\n", h)
552 |     f := sexpression2string(g)
553 |     fmt.Printf("f: %v\n", f)
554 |     d := sexpression2string(h)
555 |     fmt.Printf("d: %v\n", d)
556 | }
557 | 
558 | func muwalk(src SExpression , env SExpression ) SExpression  {
559 |     return SExpression_Nil {};
560 | }
561 | func sexpression_eqp(s0 SExpression , s1 SExpression ) bool {
562 | 	var idx int = 0
563 | 	var llen int = 0
564 | 	if reflect.TypeOf(s0) != reflect.TypeOf(s1) {
565 | 		return false
566 | 	}
567 | 
568 | 	switch s0 := s0.(type) {
569 | 		case SExpression_Nil:
570 | 			return true
571 | 		case SExpression_EndFile:
572 | 			return true
573 | 		case SExpression_EndList:
574 | 			return true
575 | 		case SExpression_EndArray:
576 | 			return true
577 | 		case SExpression_Atom:
578 | 			return s0.m_1 == s1.(SExpression_Atom).m_1
579 | 		case SExpression_Int:
580 | 			return s0.m_1 == s1.(SExpression_Int).m_1
581 | 		case SExpression_Float:
582 | 			return s0.m_1 == s1.(SExpression_Float).m_1
583 | 		case SExpression_Char:
584 | 			return s0.m_1 == s1.(SExpression_Char).m_1
585 | 		case SExpression_Bool:
586 | 			return s0.m_1 == s1.(SExpression_Bool).m_1
587 | 		case SExpression_Error:
588 | 			return s0.m_1 == s1.(SExpression_Error).m_1
589 | 		case SExpression_String:
590 | 			return s0.m_1 == s1.(SExpression_String).m_1
591 | 		case SExpression_List:
592 | 			llen = len(s0.m_1)
593 | 			for idx < llen {
594 | 				if sexpression_eqp(s0.m_1[idx], s1.(SExpression_List).m_1[idx]) {
595 | 					idx = (idx + 1)
596 | 
597 | 				} else {
598 | 					return false
599 | 
600 | 				}
601 | 
602 | 			}
603 | 
604 | 			return true
605 | 
606 | 	}
607 |     return false
608 | }
609 | func assoc(src SExpression , dst SExpression ) SExpression  {
610 | 	var idx int = 0
611 | 	var llen int = len(dst.(SExpression_List).m_1)
612 | 	for idx < llen {
613 | 		if sexpression_eqp(first(dst.(SExpression_List).m_1[idx]), src) {
614 | 			return first(rest(dst.(SExpression_List).m_1[idx]))
615 | 		}
616 | 
617 | 		idx = (idx + 1)
618 | 	}
619 | 
620 | 	return SExpression_Nil{ }
621 | }
622 | func mem_assoc(needle SExpression , src SExpression ) SExpression  {
623 | 	var idx int = 0
624 | 	var llen int = len(needle.(SExpression_List).m_1)
625 | 	for idx < llen {
626 | 		if sexpression_eqp(first(needle.(SExpression_List).m_1[idx]), src) {
627 | 			return sexpression_boxbool(true)
628 | 		}
629 | 
630 | 		idx = (idx + 1)
631 | 	}
632 | 
633 | 	return sexpression_boxbool(false)
634 | }
635 | func cons(hd SExpression , dst SExpression ) SExpression  {
636 |     return SExpression_Nil{ }
637 | }
638 | func first(hd SExpression ) SExpression  {
639 | 	switch hd := hd.(type) {
640 | 		case SExpression_List:
641 | 			return hd.m_1[0]
642 | 
643 | 		default:
644 | 			return SExpression_Error{ "first can only be applied to lists", 0}
645 | 	}
646 | 
647 | }
648 | func rest(l SExpression ) SExpression  {
649 | 	var res []SExpression  = make([]SExpression, (len(l.(SExpression_List).m_1)) - 1)
650 | 	var idx int = 1
651 | 	var llen int = len(l.(SExpression_List).m_1)
652 |     fmt.Printf("idx: %d, llen: %d\n", idx, llen)
653 | 	for idx < llen {
654 |         fmt.Printf("idx: %d, llen: %d\n", idx, llen)
655 | 		res[idx - 1] = l.(SExpression_List).m_1[idx]
656 |         fmt.Printf("res[%d] = %v\n", idx - 1, res[idx - 1])
657 | 		idx = (idx + 1)
658 | 	}
659 | 
660 | 	return SExpression_List{ res, 0}
661 | }
662 | 
663 | func mapfn(f func (SExpression) SExpression, l SExpression , e SExpression ) SExpression  {
664 |     return SExpression_Nil { }
665 | }
666 | 
667 | func mapeval(l SExpression , e SExpression ) SExpression  {
668 | 	var res []SExpression  = make([]SExpression, len(l.(SExpression_List).m_1))
669 | 	var idx int = 0
670 | 	for idx != len(l.(SExpression_List).m_1) {
671 | 		res[idx] = mueval(l.(SExpression_List).m_1[idx], e)
672 | 		idx = (idx + 1)
673 | 	}
674 | 
675 | 	return SExpression_List{ res, 0}
676 | }
677 | func sexpression_boxint(x int64) SExpression  {
678 | 	return SExpression_Int{ strconv.FormatInt(x, 10), 0}
679 | }
680 | func sexpression_boxbool(b bool) SExpression  {
681 | 	return SExpression_Bool{ b, 0}
682 | }
683 | func mueval(src SExpression , env SExpression ) SExpression  {
684 | 	switch src := src.(type) {
685 | 		case SExpression_List:
686 | 			var hd SExpression = src.m_1[0]
687 | 			switch hd.(type) {
688 |                 case SExpression_Atom:
689 |                     switch hd.(SExpression_Atom).m_1 {
690 |                         case "define":
691 | 
692 |                         case "define-value":
693 | 
694 |                         case "define-mutable-value":
695 | 
696 |                         case "if":
697 | 
698 |                         case "call":
699 |                             tmp := muapply(src, env)
700 |                             fmt.Printf("tmp: %v\n", tmp)
701 |                             return tmp
702 | 
703 |                         default:
704 |                             return first(rest(src))
705 |                     }
706 |                 default:
707 |                     return src
708 | 
709 | 			}
710 | 
711 | 
712 | 		default:
713 | 			return src
714 | 	}
715 |     return src
716 | }
717 | func muapply(src SExpression , env SExpression ) SExpression  {
718 | 	var lst SExpression = SExpression_Null{ }
719 | 	var hd SExpression = SExpression_Null{ }
720 |     var tmp0 SExpression = SExpression_Null { }
721 |     var tmp1 SExpression = SExpression_Null { }
722 | 
723 |     switch src.(type) {
724 |         case SExpression_List:
725 |             src = src.(SExpression_List)
726 |         default:
727 |             return src
728 |     }
729 | 
730 | 	lst = mapeval(rest(src), env)
731 | 	hd = first(lst)
732 |     fmt.Printf("src: %v\n", src)
733 |     fmt.Printf("lst: %v\n", lst)
734 |     fmt.Printf("hd: %v\n", hd)
735 |     fmt.Println("here on 735?")
736 | 	switch hd.(SExpression_String).m_1 {
737 | 		case "\"+\"":
738 |             fmt.Println("here on 737")
739 |             tmp0 = first(rest(lst))
740 |             tmp1 = first(rest(rest(lst)))
741 |             fmt.Printf("tmp0: %T%v\n", tmp0, tmp0)
742 |             fmt.Printf("tmp1: %T%v\n", tmp1, tmp1)
743 |             switch tmp0.(type) {
744 |                 case SExpression_Int:
745 |                     fmt.Println("here on 741")
746 |                     switch tmp1.(type) {
747 |                         case SExpression_Int:
748 |                             fmt.Println("here on 743")
749 |                             n, err := strconv.ParseInt(tmp0.(SExpression_Int).m_1, 10, 64)
750 |                             if err != nil {
751 |                                 return SExpression_Error{"integer parse error 0", 0}
752 |                             }
753 |                             m, err := strconv.ParseInt(tmp1.(SExpression_Int).m_1, 10, 64)
754 |                             if err != nil {
755 |                                 return SExpression_Error{"integer parse error 1", 0}
756 |                             }
757 |                             fmt.Printf("here: %d\n", n + m)
758 |                             return sexpression_boxint(n + m)
759 |                         //case SExpression_Float:
760 |                         default:
761 |                             return SExpression_Error{"mismatched types 0", 0}
762 |                     }
763 |                 case SExpression_Float:
764 |                     return SExpression_Error{"mismatched types 1", 0}
765 |                 default:
766 |                     return SExpression_Error{"mismatched types 2", 0}
767 |             }
768 |         default:
769 |             fmt.Printf("weird, but hd.m_1 is %v\n", hd.(SExpression_String).m_1)
770 |             fmt.Printf("weird, but hd.m_1 is %v\n", hd.(SExpression_String).m_1 == "+")
771 |         /*
772 | 		case "-":
773 | 			return sexpression_boxint(first(lst).m_1 - first(rest(lst)).m_1)
774 | 
775 | 		case "*":
776 | 			return sexpression_boxint(first(lst).m_1 * first(rest(lst)).m_1)
777 | 
778 | 		case "/":
779 | 			return sexpression_boxint(first(lst).m_1 / first(rest(lst)).m_1)
780 | 		case "<":
781 | 			return sexpression_boxbool(first(lst).m_1 < first(rest(lst)).m_1)
782 | 
783 | 		case "<=":
784 | 			return sexpression_boxbool(first(lst).m_1 <= first(rest(lst)).m_1)
785 | 
786 | 		case ">":
787 | 			return sexpression_boxbool(first(lst).m_1 > first(rest(lst)).m_1)
788 | 
789 | 		case ">=":
790 | 			return sexpression_boxbool(first(lst).m_1 >= first(rest(lst)).m_1)
791 | 
792 | 		case "<>":
793 | 			return sexpression_boxbool(first(lst).m_1 != first(rest(lst)).m_1)
794 |         */
795 | 	}
796 |     return SExpression_Null { }
797 | }
798 | 
799 | func main() {
800 |     src := `(define main 
801 | 
802 |     (returns (type integer))
803 |     (begin
804 |         (call (identifier printf) (string "this is a test\n"))
805 |         (call (identifier printf) (string "test test test\n"))
806 |         (integer 0)))`
807 |     src1 := `(define main (returns (type integer)) (begin (integer 0)))`
808 |     src2 := `(call "+" (integer 10) (call "+" (integer 20) (integer 3)))`
809 |     roundtrip(src)
810 |     roundtrip(src1)
811 |     roundtrip(src2)
812 |     v := sexpression_read(src2, 0)
813 |     fmt.Printf("%v\n", mueval(v, SExpression_Null { }))
814 | }
815 | 


--------------------------------------------------------------------------------