├── .gitignore
├── .hgignore
├── .hgtags
├── HISTORY.markdown
├── LICENSE
├── Makefile
├── README.markdown
├── bin
    ├── hexout
    ├── inhex
    ├── tamsin
    └── wrap
├── c_src
    ├── dict.c
    ├── dict.h
    ├── scanner.c
    ├── scanner.h
    ├── tamsin.c
    ├── tamsin.h
    ├── term.c
    └── term.h
├── doc
    ├── 6502-sketch.tamsin
    ├── Advanced_Features.markdown
    ├── Case_Study.markdown
    ├── Error_Reporting.markdown
    ├── Excessive_Tests.markdown
    ├── Micro-Tamsin.markdown
    ├── Mini-Tamsin.markdown
    ├── Notes.markdown
    ├── Philosophy.markdown
    ├── System_Module.markdown
    ├── TODO.markdown
    ├── Tamsin.markdown
    └── Tested_Examples.markdown
├── eg
    ├── alg-expr1.tamsin
    ├── alg-expr2.tamsin
    ├── alg-expr3.tamsin
    ├── backtrack.tamsin
    ├── bitpair.tamsin
    ├── bits.tamsin
    ├── blerf.tamsin
    ├── change-buffer.tamsin
    ├── csv_extract.tamsin
    ├── csv_parse.tamsin
    ├── escape.tamsin
    ├── eval-bool-expr.tamsin
    ├── exciting-long.tamsin
    ├── exciting.tamsin
    ├── exciting.txt
    ├── expector.tamsin
    ├── foobar.tamsin
    ├── hello-world.tamsin
    ├── list-of-chars.tamsin
    ├── list-sugar2.tamsin
    ├── modules.tamsin
    ├── names.csv
    ├── pipeline.tamsin
    ├── prod-branches.tamsin
    ├── reverse.tamsin
    ├── sexpr-eval.tamsin
    ├── store.tamsin
    ├── zeroes-concat.tamsin
    └── zeroes.tamsin
├── fixture
    ├── bootstrapped.markdown
    ├── compiler.py.markdown
    ├── compiler.tamsin.markdown
    ├── micro-tamsin.markdown
    ├── mini-tamsin.markdown
    └── tamsin.py.markdown
├── lib
    ├── list.tamsin
    ├── tamsin_analyzer.tamsin
    ├── tamsin_parser.tamsin
    └── tamsin_scanner.tamsin
├── mains
    ├── analyzer.tamsin
    ├── compiler.tamsin
    ├── desugarer.tamsin
    ├── grammar.tamsin
    ├── micro-tamsin.tamsin
    ├── mini-tamsin.tamsin
    ├── parser.tamsin
    └── scanner.tamsin
├── src
    └── tamsin
    │   ├── __init__.py
    │   ├── analyzer.py
    │   ├── ast.py
    │   ├── backends
    │       ├── __init__.py
    │       └── c.py
    │   ├── buffer.py
    │   ├── codegen.py
    │   ├── codenode.py
    │   ├── compiler.py
    │   ├── desugarer.py
    │   ├── event.py
    │   ├── interpreter.py
    │   ├── main.py
    │   ├── parser.py
    │   ├── scanner.py
    │   ├── sysmod.py
    │   └── term.py
├── test-codegen.sh
└── test.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.o
3 | *.a
4 | bin/tamsin-*
5 | bin/bootstrapped-*
6 | bin/micro-tamsin
7 | tmp/
8 | 


--------------------------------------------------------------------------------
/.hgignore:
--------------------------------------------------------------------------------
 1 | syntax: glob
 2 | 
 3 | *.pyc
 4 | *.o
 5 | *.a
 6 | 
 7 | bin/tamsin-*
 8 | bin/bootstrapped-*
 9 | bin/micro-tamsin
10 | tmp/
11 | 


--------------------------------------------------------------------------------
/.hgtags:
--------------------------------------------------------------------------------
1 | 8c5c38158bc6d671345851015aa15a71f5cd9aa1 0.1
2 | b89a9c3fc2e841573dae7ce7e51deb81313c8a40 0.2
3 | db0e6c779d74337956106874d1ef91385fe86e7d 0.3
4 | c91de5aea6dea0fb9d609cd76ccc4d153f2a3e5a 0.4
5 | 7597a8c4b1c696a0afb96aa496fcec5e36beeebf 0.5
6 | 


--------------------------------------------------------------------------------
/HISTORY.markdown:
--------------------------------------------------------------------------------
  1 | Tamsin Release History
  2 | ======================
  3 | 
  4 | 0.5-2017.0502
  5 | -------------
  6 | 
  7 | This is an interim release, created because the tests pass here, even
  8 | though not everything aimed for for the next release has been achieved.
  9 | 
 10 | ### language ###
 11 | 
 12 | *   The RHS of → can be a pattern term.
 13 | *   "Proper quoted" strings.
 14 | 
 15 | ### implementations ###
 16 | 
 17 | *   `mini-tamsin.tamsin` is an interpreter for "Mini-Tamsin", written in Tamsin.
 18 | *   Better error reporting.
 19 | *   Improvements or bugfixes in the C-language implementation of `$:unquote`.
 20 | *   Tamsin programs can handle streams on input and produce streams on output.
 21 | *   Begun work on a better C-emitting backend.
 22 | *   Better scanning; buffers are more sophisticated and track some state themselves.
 23 | 
 24 | 0.5
 25 | ---
 26 | 
 27 | ### language ###
 28 | 
 29 | *   EOF is no longer a special kind of term; it is no longer exposed, as
 30 |     a value, to Tamsin programs.  (`$:eof` returns `''` on success.)
 31 | *   Prolog/Erlang-style list sugar for terms, in patterns as well.
 32 | *   When a new scanner is switched to using `using`, that scanner defaults
 33 |     to the `$:utf8` scanner for *its* scanning.  This prevents the common
 34 |     shooting-self-in-foot error of selecting a production that is not
 35 |     itself `using` another scanner (which would result in an infinite loop
 36 |     of the production scanner trying to use itself as its subsidiary
 37 |     scanner.)
 38 | 
 39 | ### implementation ###
 40 | 
 41 | *   `struct term *`s are (almost) always `const` in compiled Tamsin
 42 |     programs (for better sharing; we don't need to make copies of them)
 43 | *   related: variable-matching is more efficient (directly updates an array
 44 |     of terms, instead of searching for the variable by name)
 45 | *   related: creating new atoms uses hash-consing, so that no new
 46 |     `struct term` for the atom is allocated if one already exists (the
 47 |     existing one is shared.)  This reduces memory usage significantly.
 48 | 
 49 | 0.4
 50 | ---
 51 | 
 52 | ### language ###
 53 | 
 54 | *   Added `@` (work on different implicit buffer.)
 55 | 
 56 | ### modules ###
 57 | 
 58 | *   Added `$:gensym`.
 59 | *   Added `$:hexchar`.
 60 | *   Added `$:format_octal`.
 61 | *   Added `$:length`.
 62 | *   Added `list:append`.
 63 | 
 64 | ### implementations ###
 65 | 
 66 | *   Tamsin-to-C compiler written in Tamsin (`mains/compiler.tamsin`) passes
 67 |     all tests, and can compile itself.
 68 | *   Refactored `$` functions into `tamsin.sysmod` module in Python version.
 69 | 
 70 | 0.3
 71 | ---
 72 | 
 73 | ### language ###
 74 | 
 75 | *   Defined what it means to `reprify` a term.
 76 | *   Clarified some matters as implementation-defined.
 77 | 
 78 | ### modules ###
 79 | 
 80 | *   `$:equal` now does deep equality of arbitrary ground terms.
 81 | *   `$:repr` added.
 82 | *   `$:reverse` added.
 83 | *   Some standard modules ship in the distribution: `list`,
 84 |     `tamsin_scanner`, and `tamsin_parser`.
 85 | 
 86 | ### implementations ###
 87 | 
 88 | *   Support for user-defined modules.
 89 | *   `tamsin` can take more than one source file on command line; this
 90 |     is how external modules are supported (by this implementation.)
 91 | *   Cleaned-up testing framework; Tamsin versions of scanner, grammar,
 92 |     parser, desugarer, analyzer, and compiler found in `mains` subdir.
 93 | *   Most `tamsin` verbs, and their versions in Tamsin, corresponding to
 94 |     intermediate phases, output reprified terms.
 95 | *   `tamsin` significantly re-factored so that the interpreter and
 96 |     compiler are more similar, and generating code for production branches
 97 |     is easier.
 98 | *   Added Tamsin-to-C compiler written in Tamsin, which can pass the first
 99 |     43 or so tests from the spec ("Mini-Tamsin").
100 | 
101 | 0.2
102 | ---
103 | 
104 | ### language ###
105 | 
106 | *   Module-member syntax changed from `.` to `:`.
107 | *   `:` can be used without any module on the LHS to refer to a production
108 |     in the current module.
109 | *   Added "fold" forms, binary `/` and ternary `//`.
110 | 
111 | ### modules ###
112 | 
113 | *   `$:char` scanner dropped.  Instead, there are `$:byte` (which always
114 |     returns 8-bit-clean bytes) and `$:utf8` (which always returns UTF-8
115 |     sequences.)
116 | *   Added `$:equal(L,R)`.
117 | *   `$:unquote(X,L,R)` takes three arguments now.
118 | 
119 | ### implementations ###
120 | 
121 | *   Beginnings of user-defined module support (very rudimentary, not to be
122 |     used.)
123 | *   Code in `libtamsin` is much more robust.  AST-builder written in Tamsin now
124 |     compiles and runs correctly.
125 | *   Added a desugaring phase to `tamsin`, and a desugarer written in Tamsin.
126 | *   Added Micro-Tamsin interpreter, written in Tamsin.  Can pass the first
127 |     30 tests from the spec.
128 | 
129 | 0.1
130 | ---
131 | 
132 | Initial release.
133 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The contents of the Tamsin distribution are distributed under the following
 2 | three licenses.
 3 | 
 4 | The documentation (in the `doc/` subdirectory) is covered by the following
 5 | BSD-compatible license, modelled after the "Report on the Programming
 6 | Language Haskell 98" license:
 7 | 
 8 | -----------------------------------------------------------------------------
 9 | 
10 |   Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
11 | 
12 |   The authors intend this Report to belong to the entire Tamsin
13 |   community, and so we grant permission to copy and distribute it for
14 |   any purpose, provided that it is reproduced in its entirety,
15 |   including this Notice.  Modified versions of this Report may also be
16 |   copied and distributed for any purpose, provided that the modified
17 |   version is clearly presented as such, and that it does not claim to
18 |   be a definition of the Tamsin Programming Language.
19 | 
20 | -----------------------------------------------------------------------------
21 | 
22 | The source code for the reference interpreter and supporting tools (in the
23 | `src` and `c_src` subdirectories) is covered under the following BSD-style
24 | license:
25 | 
26 | -----------------------------------------------------------------------------
27 | 
28 |    Copyright (c)2014, Chris Pressey, Cat's Eye Technologies.
29 |    All rights reserved.
30 |  
31 |    Redistribution and use in source and binary forms, with or without
32 |    modification, are permitted provided that the following conditions
33 |    are met:
34 | 
35 |      Redistributions of source code must retain the above copyright
36 |      notices, this list of conditions and the following disclaimer.
37 | 
38 |      Redistributions in binary form must reproduce the above copyright
39 |      notices, this list of conditions, and the following disclaimer in
40 |      the documentation and/or other materials provided with the
41 |      distribution.
42 | 
43 |      Neither the names of the copyright holders nor the names of their
44 |      contributors may be used to endorse or promote products derived
45 |      from this software without specific prior written permission. 
46 | 
47 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
48 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES INCLUDING, BUT NOT
49 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
50 |    FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
51 |    COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
52 |    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
53 |    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
54 |    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
55 |    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 |    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
57 |    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
58 |    POSSIBILITY OF SUCH DAMAGE.
59 | 
60 | -----------------------------------------------------------------------------
61 | 
62 | Every example source in the `eg` directory specifies what its own licensing
63 | terms are.  Many of them are in the public domain, in which case the following
64 | UNLICENSE applies to them.  Others may be under other licenses; see the
65 | specific file in question for more information.
66 | 
67 | -----------------------------------------------------------------------------
68 | 
69 | This is free and unencumbered software released into the public domain.
70 | 
71 | Anyone is free to copy, modify, publish, use, compile, sell, or
72 | distribute this software, either in source code form or as a compiled
73 | binary, for any purpose, commercial or non-commercial, and by any
74 | means.
75 | 
76 | In jurisdictions that recognize copyright laws, the author or authors
77 | of this software dedicate any and all copyright interest in the
78 | software to the public domain. We make this dedication for the benefit
79 | of the public at large and to the detriment of our heirs and
80 | successors. We intend this dedication to be an overt act of
81 | relinquishment in perpetuity of all present and future rights to this
82 | software under copyright law.
83 | 
84 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
85 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
86 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
87 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
88 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
89 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
90 | OTHER DEALINGS IN THE SOFTWARE.
91 | 
92 | For more information, please refer to <http://unlicense.org/>
93 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CC?=gcc
 2 | CFLAGS?=-ansi -g -Ic_src -Lc_src
 3 | 
 4 | LCFLAGS?=-ansi -pedantic -g -Wall -Werror -Ic_src -Lc_src
 5 | 
 6 | ##### libtamsin #####
 7 | 
 8 | OBJECTS=c_src/dict.o c_src/scanner.o c_src/term.o c_src/tamsin.o
 9 | PROGS=bin/tamsin-compiler bin/micro-tamsin
10 | 
11 | all: c_src/libtamsin.a
12 | 
13 | c_src/scanner.o: c_src/tamsin.h c_src/scanner.c
14 | 	$(CC) $(LCFLAGS) -c c_src/scanner.c -o $@
15 | 
16 | c_src/term.o: c_src/tamsin.h c_src/term.c
17 | 	$(CC) $(LCFLAGS) -c c_src/term.c -o $@
18 | 
19 | c_src/tamsin.o: c_src/tamsin.h c_src/tamsin.c
20 | 	$(CC) $(LCFLAGS) -c c_src/tamsin.c -o $@
21 | 
22 | c_src/libtamsin.a: $(OBJECTS)
23 | 	ar -r $@ $(OBJECTS)
24 | 
25 | 
26 | ##### executables #####
27 | 
28 | TAMSIN_COMPILER_LIBS=lib/list.tamsin lib/tamsin_scanner.tamsin \
29 |                      lib/tamsin_parser.tamsin lib/tamsin_analyzer.tamsin
30 | bin/tamsin-compiler: c_src/libtamsin.a c_src/tamsin.h \
31 |                      $(TAMSIN_COMPILER_LIBS) \
32 |                      mains/compiler.tamsin
33 | 	bin/tamsin compile $(TAMSIN_COMPILER_LIBS) mains/compiler.tamsin > tmp/foo.c
34 | 	$(CC) $(CFLAGS) tmp/foo.c -o $@ -ltamsin
35 | 
36 | 
37 | bin/bootstrapped-compiler: c_src/libtamsin.a c_src/tamsin.h \
38 |                            bin/tamsin-compiler \
39 |                            $(TAMSIN_COMPILER_LIBS) \
40 |                           mains/compiler.tamsin
41 | 	bin/tamsin-compiler $(TAMSIN_COMPILER_LIBS) mains/compiler.tamsin > tmp/foo.c
42 | 	$(CC) $(CFLAGS) tmp/foo.c -o $@ -ltamsin
43 | 
44 | 
45 | MICRO_TAMSIN_LIBS=lib/list.tamsin lib/tamsin_scanner.tamsin \
46 |                   lib/tamsin_parser.tamsin
47 | bin/micro-tamsin: c_src/libtamsin.a c_src/tamsin.h \
48 |                      $(MICRO_TAMSIN_LIBS) \
49 |                      mains/micro-tamsin.tamsin
50 | 	bin/tamsin compile $(MICRO_TAMSIN_LIBS) mains/micro-tamsin.tamsin > tmp/foo.c
51 | 	$(CC) $(CFLAGS) tmp/foo.c -o $@ -ltamsin
52 | 
53 | 
54 | MINI_TAMSIN_LIBS=lib/list.tamsin lib/tamsin_scanner.tamsin \
55 |                  lib/tamsin_parser.tamsin
56 | bin/mini-tamsin: c_src/libtamsin.a c_src/tamsin.h \
57 |                      $(MINI_TAMSIN_LIBS) \
58 |                      mains/mini-tamsin.tamsin
59 | 	bin/tamsin compile $(MINI_TAMSIN_LIBS) mains/mini-tamsin.tamsin > tmp/foo.c
60 | 	$(CC) $(CFLAGS) tmp/foo.c -o $@ -ltamsin
61 | 
62 | clean:
63 | 	rm -f c_src/libtamsin.a c_src/*.o $(PROGS)
64 | 


--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
  1 | Tamsin
  2 | ======
  3 | 
  4 | Tamsin is an oddball little language that can't decide if it's a
  5 | [meta-language](doc/Philosophy.markdown#meta-language), a
  6 | [programming language](doc/Philosophy.markdown#programming-language), or a
  7 | [rubbish lister](doc/Philosophy.markdown#rubbish-lister).
  8 | 
  9 | Its primary goal is to allow the rapid development of **parsers**,
 10 | **static analyzers**, **interpreters**, and **compilers**, and to allow them
 11 | to be expressed *compactly*.  Golf your grammar!  (Or write it like a decent
 12 | human being, if you must.)
 13 | 
 14 | The current released version of Tamsin is 0.5-2017.0502.
 15 | As indicated by the 0.x version number, it is a **work in progress**,
 16 | with the usual caveat that things may change rapidly (and that version 0.6 might
 17 | look completely different.)  See [HISTORY](HISTORY.markdown)
 18 | for a list of major changes.
 19 | 
 20 | Code Examples
 21 | -------------
 22 | 
 23 | Make a story more exciting in **1 line of code**:
 24 | 
 25 |     main = ("." & '!' | "?" & '?!' | any)/''.
 26 | 
 27 | Parse an algebraic expression for syntactic correctness in **4 lines of code**:
 28 | 
 29 |     main = (expr0 & eof & 'ok').
 30 |     expr0 = expr1 & {"+" & expr1}.
 31 |     expr1 = term & {"*" & term}.
 32 |     term = "x" | "y" | "z" | "(" & expr0 & ")".
 33 | 
 34 | Translate an algebraic expression to RPN (Reverse Polish Notation) in
 35 | **7 lines of code**:
 36 | 
 37 |     main = expr0 → E & walk(E).
 38 |     expr0 = expr1 → E1 & {"+" & expr1 → E2 & E1 ← add(E1,E2)} & E1.
 39 |     expr1 = term → E1 & {"*" & term → E2 & E1 ← mul(E1,E2)} & E1.
 40 |     term = "x" | "y" | "z" | "(" & expr0 → E & ")" & E.
 41 |     walk(add(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' +'.
 42 |     walk(mul(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' *'.
 43 |     walk(X) = return ' '+X.
 44 | 
 45 | Parse a CSV file (handling quoted commas and quotes correctly) and write
 46 | out the 2nd-last field of each record — in **11 lines of code**:
 47 | 
 48 |     main = line → L & L ← lines(nil, L) &
 49 |            {"\n" & line → M & L ← lines(L, M)} & extract(L) & ''.
 50 |     line = field → F & {"," & field → G & F ← fields(G, F)} & F.
 51 |     field = strings | bare.
 52 |     strings = string → T & {string → S & T ← T + '"' + S} & T.
 53 |     string = "\"" & (!"\"" & any)/'' → T & "\"" & T.
 54 |     bare = (!(","|"\n") & any)/''.
 55 |     extract(lines(Ls, L)) = extract(Ls) & extract_field(L).
 56 |     extract(L) = L.
 57 |     extract_field(fields(L, fields(T, X))) = print T.
 58 |     extract_field(X) = X.
 59 | 
 60 | Evaluate an (admittedly trivial) S-expression based language in
 61 | **15 lines of code**:
 62 | 
 63 |     main = sexp → S using scanner & reverse(S, nil) → SR & eval(SR).
 64 |     scanner = ({" "} & ("(" | ")" | $:alnum/'')) using $:utf8.
 65 |     sexp = $:alnum | list.
 66 |     list = "(" & sexp/nil/pair → L & ")" & L.
 67 |     head(pair(A, B)) = A.
 68 |     tail(pair(A, B)) = B.
 69 |     cons(A, B) = return pair(A, B).
 70 |     eval(pair(head, pair(X, nil))) = eval(X) → R & head(R).
 71 |     eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R).
 72 |     eval(pair(cons, pair(A, pair(B, nil)))) =
 73 |        eval(A) → AE & eval(B) → BE & return pair(AE, BE).
 74 |     eval(X) = X.
 75 |     reverse(pair(H, T), A) = reverse(H, nil) → HR & reverse(T, pair(HR, A)).
 76 |     reverse(nil, A) = A.
 77 |     reverse(X, A) = X.
 78 | 
 79 | Interpret a small subset of Tamsin in
 80 | **[30 lines of code](mains/micro-tamsin.tamsin)**
 81 | (not counting the [included batteries](doc/Philosophy.markdown#batteries-included).)
 82 | 
 83 | Compile Tamsin to C in
 84 | **[563 lines of code](mains/compiler.tamsin)**
 85 | (again, not counting the included batteries.)
 86 | 
 87 | For more information
 88 | --------------------
 89 | 
 90 | If the above has piqued your curiosity, you may want to read the specification,
 91 | which contains many more small examples written to demonstrate (and test) the
 92 | syntax and behavior of Tamsin:
 93 | 
 94 | *   [The Tamsin Language Specification](doc/Tamsin.markdown)
 95 | 
 96 | Note that this is the current development version of the specification, and
 97 | it may differ from the examples in this document.
 98 | 
 99 | Quick Start
100 | -----------
101 | 
102 | The Tamsin reference repository is [hosted on Codeberg](https://codeberg.org/catseye/Tamsin).
103 | 
104 | This repository contains the reference implementation of Tamsin, called
105 | `tamsin`, written in Python 2.7.  It can both interpret a Tamsin program and
106 | compile a program written in Tamsin to C.
107 | 
108 | The distribution also contains a Tamsin-to-C compiler written in Tamsin.  It
109 | passes all the tests, and can compile itself.
110 | 
111 | While the interpreter is fine for prototyping, note that some informal
112 | benchmarking revealed the compiled C programs to be about 30x faster.  **Note**
113 | however that while the compiler passes all the tests, it is still largely
114 | unproven (e.g. its UTF-8 support is not RFC 3629-compliant), so it should be
115 | considered a **proof of concept**.
116 | 
117 | To start using `tamsin`,
118 | 
119 | *   Clone the repository — `git clone https://codeberg.org/catseye/Tamsin`
120 | *   Either:
121 |     *   Put the repo's `bin` directory on your `$PATH`, or
122 |     *   Make a symbolic link to `bin/tamsin` somewhere already on your `$PATH`.
123 | *   Errr... that's it.
124 | 
125 | Then you can run `tamsin` like so:
126 | 
127 | *   `tamsin eg/csv_parse.tamsin < eg/names.csv`
128 | 
129 | To use the compiler, you'll need GNU make and `gcc` installed.  Type
130 | 
131 | *   `make`
132 | 
133 | to build the runtime library.  You can then compile to C and compile the C to
134 | an executable and run the executable all in one step, like so:
135 | 
136 | *   `tamsin loadngo eg/csv_extract.tamsin < eg/names.csv`
137 | 
138 | Design Goals
139 | ------------
140 | 
141 | *   Allow parsers, static analyzers, interpreters, and compilers to be
142 |     quickly prototyped.  (And in the future, processor simulators and VM's
143 |     and such things too.)
144 | *   Allow writing these things very compactly.
145 | *   Allow writing anything using only recursive-descent parsing techniques
146 |     (insofar as this is possible.)
147 | *   Allow writing parsers that look very similar to the grammar of the
148 |     language being parsed, so that the structure of the language can be
149 |     clearly seen.
150 | *   Provide means to solve practical problems.
151 | *   Keep the language simple — the grammar should fit on a page, ideally.
152 | *   Recognize that the preceding two goals are in tension.
153 | *   Have a relatively simple reference implementation (currently less than
154 |     5 KLoC, including everything — debugging support and the C runtime
155 |     used by the compiler and the Tamsin modules and implementations.)
156 | 
157 | License
158 | -------
159 | 
160 | BSD-style license; see the file [LICENSE](LICENSE).
161 | 
162 | Related work
163 | ------------
164 | 
165 | *   [CoCo/R](http://www.scifac.ru.ac.za/coco/) (parser generation)
166 | *   [Parsec](http://www.haskell.org/haskellwiki/Parsec) (parser combinators)
167 | *   [Perl](http://perl.org/) (rubbish listing)
168 | *   [Prolog](https://en.wikipedia.org/wiki/Prolog) (pattern-matching, terms,
169 |     backtracking(-ish...))
170 | *   [K](https://github.com/kevinlawler/kona) (similar feel; Tamsin
171 |     is a _vertical language_)
172 | *   [Cat's Eye Technologies](http://catseye.tc)' esoteric and experimental
173 |     languages:
174 |     *   [Squishy2K](http://catseye.tc/node/Squishy2K)
175 |     *   [Arboretuum](http://catseye.tc/node/Arboretuum)
176 |     *   [Treacle](http://catseye.tc/node/Treacle)
177 | 


--------------------------------------------------------------------------------
/bin/hexout:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # converts raw bytes on input to hex couples on input.
 4 | 
 5 | import sys
 6 | 
 7 | while True:
 8 |     byte = sys.stdin.read(1)
 9 |     if len(byte) < 1:
10 |         sys.exit(0)
11 |     sys.stdout.write('%02x' % ord(byte))
12 | 


--------------------------------------------------------------------------------
/bin/inhex:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # converts hex couples on input to raw bytes on output.
 4 | 
 5 | import sys
 6 | 
 7 | while True:
 8 |     hex = sys.stdin.read(2)
 9 |     if len(hex) < 2:
10 |         sys.exit(0)
11 |     sys.stdout.write(chr(int(hex, 16)))
12 | 


--------------------------------------------------------------------------------
/bin/tamsin:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from os.path import realpath, dirname, join
 4 | import sys
 5 | 
 6 | tamsin_dir = join(dirname(realpath(sys.argv[0])), '..')
 7 | sys.path.insert(0, join(tamsin_dir, 'src'))
 8 | 
 9 | from tamsin.main import main
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     main(sys.argv[1:], tamsin_dir=tamsin_dir)
14 | 


--------------------------------------------------------------------------------
/bin/wrap:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | 
 5 | WIDTH=120
 6 | 
 7 | for line in sys.stdin:
 8 |     line = line.rstrip('\n')
 9 |     while len(line) > WIDTH:
10 |         print line[:WIDTH]
11 |         line = line[WIDTH:]
12 |     print line
13 | 
14 | # 123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
15 | 


--------------------------------------------------------------------------------
/c_src/dict.c:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include "term.h"
  5 | 
  6 | #include "dict.h"
  7 | 
  8 | struct chain {
  9 |     struct chain *next;
 10 |     const struct term *value;
 11 | };
 12 | 
 13 | struct dict *dict_new(int num_buckets) {
 14 |     struct dict *d;
 15 |     int i;
 16 | 
 17 |     d = malloc(sizeof(struct dict));
 18 |     d->num_buckets = num_buckets;
 19 |     d->bucket = malloc(sizeof(struct chain *) * d->num_buckets);
 20 |     for (i = 0; i < d->num_buckets; i++) {
 21 |         d->bucket[i] = NULL;
 22 |     }
 23 | 
 24 |     return d;
 25 | }
 26 | 
 27 | /*** UTILITIES ***/
 28 | 
 29 | /*
 30 |  * Hash function, taken from "Compilers: Principles, Techniques, and Tools"
 31 |  * by Aho, Sethi, & Ullman (a.k.a. "The Dragon Book", 2nd edition.)
 32 |  */
 33 | static size_t hashpjw(const char *key, size_t key_size, size_t table_size) {
 34 |     int i;
 35 |     unsigned long int h = 0, g;
 36 | 
 37 |     for (i = 0; i < key_size; i++) {
 38 |         h = (h << 4) + (key[i]);
 39 |         if ((g = h & 0xf0000000)) {
 40 |             h = (h ^ (g >> 24)) ^ g;
 41 |         }
 42 |     }
 43 | 
 44 |     return h % table_size;
 45 | }
 46 | 
 47 | /*
 48 |  * Create a new chain for a bucket (not called directly by client code.)
 49 |  */
 50 | static struct chain *
 51 | chain_new(const struct term *value)
 52 | {
 53 |     struct chain *c = malloc(sizeof(struct chain));
 54 | 
 55 |     c->next = NULL;
 56 |     c->value = value;
 57 | 
 58 |     return c;
 59 | }
 60 | 
 61 | /*
 62 |  * Locate the bucket number a particular key would be located in, and the
 63 |  * chain link itself if such a key exists (or NULL if it could not be found.)
 64 |  */
 65 | static void
 66 | dict_locate(struct dict *d, const char *key, size_t key_size,
 67 | 	    size_t *b_index, struct chain **c)
 68 | {
 69 |     *b_index = hashpjw(key, key_size, d->num_buckets);
 70 |     for (*c = d->bucket[*b_index]; *c != NULL; *c = (*c)->next) {
 71 |         if ((*c)->value->size == key_size &&
 72 |             memcmp(key, (*c)->value->atom, key_size) == 0)
 73 |             break;
 74 |     }
 75 | }
 76 | 
 77 | /*** OPERATIONS ***/
 78 | 
 79 | const struct term *
 80 | dict_fetch(struct dict *d, const char *key, size_t key_size)
 81 | {
 82 |     struct chain *c;
 83 |     size_t i;
 84 |     
 85 |     dict_locate(d, key, key_size, &i, &c);
 86 | 
 87 |     return c != NULL ? c->value : NULL;
 88 | }
 89 | 
 90 | void
 91 | dict_store(struct dict *d, const struct term *t)
 92 | {
 93 |     struct chain *c;
 94 |     size_t i;
 95 |     
 96 |     dict_locate(d, t->atom, t->size, &i, &c);
 97 |     if (c == NULL) {
 98 |         /* Chain does not exist, add a new one. */
 99 |         c = chain_new(t);
100 |         c->next = d->bucket[i];
101 |         d->bucket[i] = c;
102 |     } else {
103 |         assert("term already hash consed" == NULL);
104 |         /* Chain already exists, replace the value. */
105 |         c->value = t;
106 |     }
107 | }
108 | 


--------------------------------------------------------------------------------
/c_src/dict.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
 3 |  * Distributed under a BSD-style license; see LICENSE for more information.
 4 |  */
 5 | 
 6 | #ifndef TAMSIN_DICT_H
 7 | #define TAMSIN_DICT_H
 8 | 
 9 | #include <stdlib.h>
10 | 
11 | struct dict {
12 |     struct chain **bucket;
13 |     size_t num_buckets;
14 | };
15 | 
16 | /*
17 |  * Create a new dictionary.
18 |  * Since this is only used for hash-consing right now, there is only one.
19 |  */
20 | struct dict *dict_new(int);
21 | 
22 | /*
23 |  * Retrieve a value from a dictionary, given its key, or NULL if it's not
24 |  * there.
25 |  */
26 | const struct term *dict_fetch(struct dict *, const char *, size_t);
27 | 
28 | /*
29 |  * Insert a value into a dictionary.
30 |  */
31 | void dict_store(struct dict *, const struct term *);
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/c_src/scanner.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
  3 |  * Distributed under a BSD-style license; see LICENSE for more information.
  4 |  */
  5 | 
  6 | #include "scanner.h"
  7 | #include "term.h"
  8 | #include "tamsin.h"
  9 | 
 10 | struct scanner *scanner_new(const char *buffer, size_t size) {
 11 |     struct scanner *scanner;
 12 | 
 13 |     scanner = malloc(sizeof(struct scanner));
 14 |     scanner->buffer = buffer;
 15 |     scanner->size = size;
 16 |     scanner->position = 0;
 17 |     scanner->reset_position = 0;
 18 |     scanner->engines = NULL;
 19 | 
 20 |     return scanner;
 21 | }
 22 | 
 23 | void scanner_byte_engine(void) {
 24 | }
 25 | 
 26 | void scanner_utf8_engine(void) {
 27 | }
 28 | 
 29 | #define UTF_8_LEN_2_MASK  0xe0    /* 0b11100000 */
 30 | #define UTF_8_LEN_2_BITS  0xc0    /* 0b11000000 */
 31 | 
 32 | #define UTF_8_LEN_3_MASK  0xf0    /* 0b11110000 */
 33 | #define UTF_8_LEN_3_BITS  0xe0    /* 0b11100000 */
 34 | 
 35 | #define UTF_8_LEN_4_MASK  0xf8    /* 0b11111000 */
 36 | #define UTF_8_LEN_4_BITS  0xf0    /* 0b11110000 */
 37 | 
 38 | const struct term *scan(struct scanner *s) {
 39 |     if (s->position >= s->size) {
 40 |         return &tamsin_EOF;
 41 |     }
 42 |     if (s->engines == NULL || s->engines->production == &scanner_utf8_engine) {
 43 |         char c = s->buffer[s->position];
 44 |         int len = 1;
 45 |         const struct term *t;
 46 | 
 47 |         if ((c & UTF_8_LEN_2_MASK) == UTF_8_LEN_2_BITS) {
 48 |             len = 2;
 49 |         } else if ((c & UTF_8_LEN_3_MASK) == UTF_8_LEN_3_BITS) {
 50 |             len = 3;
 51 |         } else if ((c & UTF_8_LEN_4_MASK) == UTF_8_LEN_4_BITS) {
 52 |             len = 4;
 53 |         }
 54 | 
 55 |         t = term_new_atom(s->buffer + s->position, len);
 56 |         s->position += len;
 57 |         return t;
 58 |     } else if (s->engines->production == &scanner_byte_engine) {
 59 |         char c = s->buffer[s->position];
 60 | 
 61 |         s->position++;
 62 |         return term_new_atom_from_char(c);
 63 |     } else {
 64 |         const struct term *save_result = result;
 65 |         int save_reset_position = s->reset_position;
 66 |         void (*production)(void) = s->engines->production;
 67 |         
 68 |         scanner_push_engine(s, &scanner_utf8_engine);
 69 |         production();
 70 |         scanner_pop_engine(s);
 71 |         
 72 |         s->reset_position = save_reset_position;
 73 | 
 74 |         if (!ok) {
 75 |             result = save_result;
 76 |             return &tamsin_EOF;
 77 |         } else {
 78 |             return result;
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | void unscan(struct scanner *s) {
 84 |     s->position = s->reset_position;
 85 | }
 86 | 
 87 | void commit(struct scanner *s) {
 88 |     s->reset_position = s->position;
 89 | }
 90 | 
 91 | struct engine *engine_new(void (*production)(void)) {
 92 |     struct engine *e = malloc(sizeof(struct engine));
 93 | 
 94 |     e->production = production;
 95 |     return e;
 96 | }
 97 | 
 98 | void scanner_push_engine(struct scanner *s, void (*production)(void)) {
 99 |     struct engine *e = engine_new(production);
100 | 
101 |     e->next = s->engines;
102 |     s->engines = e;
103 | }
104 | 
105 | void scanner_pop_engine(struct scanner *s) {
106 |     /* struct engine *e = s->engines; */
107 | 
108 |     s->engines = s->engines->next;
109 |     /* engine_free(e); */
110 | }
111 | 


--------------------------------------------------------------------------------
/c_src/scanner.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
 3 |  * Distributed under a BSD-style license; see LICENSE for more information.
 4 |  */
 5 | 
 6 | #ifndef TAMSIN_SCANNER_H
 7 | #define TAMSIN_SCANNER_H
 8 | 
 9 | #include "term.h"
10 | 
11 | /* -------------------------------------------------------- scanner */
12 | 
13 | struct engine {
14 |     void (*production)(void);
15 |     struct engine *next;
16 | };
17 | 
18 | struct scanner {
19 |     const char *buffer;
20 |     size_t size;
21 |     int position;
22 |     int reset_position;
23 |     struct engine *engines;
24 | };
25 | 
26 | struct scanner *scanner_new(const char *, size_t);
27 | const struct term *scan(struct scanner *);
28 | void unscan(struct scanner *);
29 | void commit(struct scanner *);
30 | void scanner_push_engine(struct scanner *, void (*)(void));
31 | void scanner_pop_engine(struct scanner *);
32 | void scanner_byte_engine(void);
33 | void scanner_utf8_engine(void);
34 | 
35 | /*
36 |  * This value is never (and should never be) exposed to Tamsin programs!
37 |  * It should not be considered a kind of term, really.  That's just for
38 |  * convenience in this implementation.
39 |  */
40 | extern struct term tamsin_EOF;
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/c_src/tamsin.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
 3 |  * Distributed under a BSD-style license; see LICENSE for more information.
 4 |  */
 5 | 
 6 | #ifndef TAMSIN_TAMSIN_H
 7 | #define TAMSIN_TAMSIN_H
 8 | 
 9 | #include "term.h"
10 | #include "scanner.h"
11 | 
12 | /* -------------------------------------------------------- tamsin */
13 | 
14 | void tamsin_eof(struct scanner *);
15 | void tamsin_any(struct scanner *);
16 | void tamsin_expect(struct scanner *, const struct term *);
17 | void tamsin_alnum(struct scanner *);
18 | void tamsin_upper(struct scanner *);
19 | void tamsin_startswith(struct scanner *, const char *);
20 | const struct term *tamsin_unquote(const struct term *,
21 |                                   const struct term *, const struct term *);
22 | const struct term *tamsin_mkterm(const struct term *, const struct term *);
23 | const struct term *tamsin_equal(const struct term *, const struct term *);
24 | const struct term *tamsin_reverse(const struct term *, const struct term *);
25 | const struct term *tamsin_gensym(const struct term *);
26 | const struct term *tamsin_hexbyte(const struct term *, const struct term *);
27 | const struct term *tamsin_format_octal(const struct term *);
28 | const struct term *tamsin_length(const struct term *);
29 | 
30 | /*
31 |  * Given a possibly non-atom term, return an atom consisting of
32 |  * contents of the given term reprified into an atom.
33 |  *
34 |  * The returned term is NOT always newly allocated.
35 |  */
36 | const struct term *tamsin_repr(const struct term *);
37 | 
38 | int tamsin_isalpha(char);
39 | int tamsin_isupper(char);
40 | int tamsin_isdigit(char);
41 | int tamsin_isalnum(char);
42 | 
43 | /* --------------------------------------------------------------- */
44 | /* global state: result of last action */
45 | 
46 | extern int ok;
47 | extern const struct term *result;
48 | 
49 | #endif
50 | 


--------------------------------------------------------------------------------
/c_src/term.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
  3 |  * Distributed under a BSD-style license; see LICENSE for more information.
  4 |  */
  5 | 
  6 | #include <assert.h>
  7 | #include <string.h>
  8 | #include <stdlib.h>
  9 | #include <stdio.h>
 10 | 
 11 | #include "term.h"
 12 | 
 13 | #include "dict.h"
 14 | 
 15 | /*
 16 |  * this code LEAKS MEMORY all over the place, but that's "ok" because
 17 |  * Tamsin programs "aren't long running".  and it's better than having
 18 |  * buffer overflows.
 19 |  */
 20 | 
 21 | struct dict *hash_conser = NULL;
 22 | 
 23 | struct term tamsin_EOF = {"EOF", 3, -1, NULL};
 24 | 
 25 | int hits = 0;
 26 | int misses = 0;
 27 | 
 28 | struct term *term_single_byte_table = NULL;
 29 | char term_single_byte_data[256];
 30 | 
 31 | const struct term *term_new_atom(const char *atom, size_t size) {
 32 |     struct term *t;
 33 |     char *text;
 34 | 
 35 |     /*
 36 |     if (size == 1) {
 37 |         int i;
 38 |         if (term_single_byte_table == NULL) {
 39 |             term_single_byte_table = malloc(sizeof(struct term) * 256);
 40 |             for (i = 0; i < 256; i++) {
 41 |                 term_single_byte_data[i] = (char)i;
 42 |                 term_single_byte_table[i].atom = term_single_byte_data + i;
 43 |                 term_single_byte_table[i].size = 1;
 44 |                 term_single_byte_table[i].index = -1;
 45 |                 term_single_byte_table[i].subterms = NULL;
 46 |             }
 47 |         }
 48 |         i = ((unsigned char *)atom)[0];
 49 |         return &term_single_byte_table[i];
 50 |     }
 51 |     */
 52 | 
 53 |     if (hash_conser == NULL) {
 54 |         hash_conser = dict_new(2503);
 55 |     }
 56 |     t = (struct term *)dict_fetch(hash_conser, atom, size);
 57 |     if (t != NULL) {
 58 |         hits++;
 59 |         return t;
 60 |     }
 61 | 
 62 |     t = malloc(sizeof(struct term));
 63 |     text = malloc(size);
 64 |     memcpy(text, atom, size);
 65 |     t->atom = text;
 66 |     t->size = size;
 67 |     t->index = -1;
 68 |     t->subterms = NULL;
 69 | 
 70 |     dict_store(hash_conser, t);
 71 |     misses++;
 72 | 
 73 |     return t;
 74 | }
 75 | 
 76 | const struct term *term_new_atom_from_char(char c) {
 77 |     char s[2];
 78 | 
 79 |     s[0] = c;
 80 |     s[1] = '\0';
 81 | 
 82 |     return term_new_atom(s, 1);
 83 | }
 84 | 
 85 | const struct term *term_new_atom_from_cstring(const char *atom) {
 86 |     return term_new_atom(atom, strlen(atom));
 87 | }
 88 | 
 89 | const struct term *term_new_constructor(const char *tag, size_t size,
 90 |                                         struct termlist *subterms)
 91 | {
 92 |     struct term *t = malloc(sizeof(struct term));
 93 |     char *text = malloc(size);
 94 | 
 95 |     memcpy(text, tag, size);
 96 |     t->atom = text;
 97 |     t->size = size;
 98 |     t->index = -1;
 99 |     t->subterms = subterms;
100 | 
101 |     return t;
102 | }
103 | 
104 | void termlist_add_term(struct termlist **tl, const struct term *term) {
105 |     struct termlist *new_tl;
106 | 
107 |     new_tl = malloc(sizeof(struct termlist));
108 |     new_tl->term = term;
109 |     new_tl->next = *tl;
110 |     *tl = new_tl;
111 | }
112 | 
113 | const struct term *term_new_variable(const char *name, size_t size, int index) {
114 |     struct term *t;
115 |     char *text;
116 | 
117 |     t = malloc(sizeof(struct term));
118 |     text = malloc(size);
119 |     memcpy(text, name, size);
120 |     t->atom = text;
121 |     t->size = size;
122 |     assert(index != -1);
123 |     t->index = index;
124 |     t->subterms = NULL;
125 | 
126 |     return t;
127 | }
128 | 
129 | int term_atoms_equal(const struct term *lhs, const struct term *rhs) {
130 |     if (lhs->size != rhs->size) {
131 |         return 0;
132 |     }
133 |     return memcmp(lhs->atom, rhs->atom, lhs->size) == 0;
134 | }
135 | 
136 | int term_atom_cstring_equal(const struct term *lhs, const char *string) {
137 |     if (lhs->size != strlen(string)) {
138 |         return 0;
139 |     }
140 |     return memcmp(lhs->atom, string, lhs->size) == 0;
141 | }
142 | 
143 | const struct term *term_concat(const struct term *lhs, const struct term *rhs) {
144 |     const struct term *t;
145 |     int new_size;
146 |     char *new_atom;
147 | 
148 |     assert(lhs->subterms == NULL);
149 |     assert(rhs->subterms == NULL);
150 | 
151 |     new_size = lhs->size + rhs->size;
152 |     new_atom = malloc(new_size);
153 |     memcpy(new_atom, lhs->atom, lhs->size);
154 |     memcpy(new_atom + lhs->size, rhs->atom, rhs->size);
155 |     t = term_new_atom(new_atom, new_size);
156 |     free(new_atom);
157 | 
158 |     return t;
159 | }
160 | 
161 | const struct term COMMASPACE = { ", ", 2, -1, NULL };
162 | 
163 | const struct term *term_flatten(const struct term *t) {
164 |     struct termlist *tl;
165 | 
166 |     if (t->subterms == NULL) {  /* it's an atom */
167 |         return t;
168 |     } else {                           /* it's a constructor */
169 |         const struct term *n;
170 |         /* we clone t here to get an atom from its tag */
171 |         n = term_concat(term_new_atom(t->atom, t->size),
172 |                         term_new_atom_from_char('('));
173 | 
174 |         for (tl = t->subterms; tl != NULL; tl = tl->next) {
175 |             n = term_concat(n, term_flatten(tl->term));
176 |             if (tl->next != NULL) {
177 |                 n = term_concat(n, &COMMASPACE);
178 |             }
179 |         }
180 |         n = term_concat(n, term_new_atom_from_char(')'));
181 |         return n;
182 |     }
183 | }
184 | 
185 | void term_fput(const struct term *t, FILE *f) {
186 |     const struct term *flat = term_flatten(t);
187 | 
188 |     fwrite(flat->atom, 1, flat->size, f);
189 | }
190 | 
191 | int term_equal(const struct term *pattern, const struct term *ground)
192 | {
193 |     struct termlist *tl1, *tl2;
194 | 
195 |     assert(pattern->index == -1);
196 |     assert(ground->index == -1);
197 | 
198 |     if (!term_atoms_equal(pattern, ground)) {
199 |         return 0;
200 |     }
201 |     if (pattern->subterms == NULL && ground->subterms == NULL) {
202 |         return 1;
203 |     }
204 | 
205 |     tl1 = pattern->subterms;
206 |     tl2 = ground->subterms;
207 |     while (tl1 != NULL && tl2 != NULL) {
208 |         if (!term_equal(tl1->term, tl2->term)) {
209 |             return 0;
210 |         }
211 |         tl1 = tl1->next;
212 |         tl2 = tl2->next;
213 |     }
214 |     if (tl1 != NULL || tl2 != NULL) {
215 |         return 0;
216 |     }
217 |     return 1;
218 | }
219 | 
220 | int term_match_unifier(const struct term *pattern, const struct term *ground,
221 |                        const struct term **variables)
222 | {
223 |     struct termlist *tl1, *tl2;
224 | 
225 |     if (pattern->index >= 0) {
226 |         variables[pattern->index] = ground;
227 |         return 1;
228 |     }
229 |     if (!term_atoms_equal(pattern, ground)) {
230 |         return 0;
231 |     }
232 |     if (pattern->subterms == NULL && ground->subterms == NULL) {
233 |         return 1;
234 |     }
235 | 
236 |     tl1 = pattern->subterms;
237 |     tl2 = ground->subterms;
238 |     while (tl1 != NULL && tl2 != NULL) {
239 |         if (!term_match_unifier(tl1->term, tl2->term, variables)) {
240 |             return 0;
241 |         }
242 |         tl1 = tl1->next;
243 |         tl2 = tl2->next;
244 |     }
245 |     if (tl1 != NULL || tl2 != NULL) {
246 |         return 0;
247 |     }
248 | 
249 |     return 1;
250 | }
251 | 


--------------------------------------------------------------------------------
/c_src/term.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
  3 |  * Distributed under a BSD-style license; see LICENSE for more information.
  4 |  */
  5 | 
  6 | #include <stdlib.h>
  7 | #include <stdio.h>
  8 | 
  9 | #ifndef TAMSIN_TERM_H
 10 | #define TAMSIN_TERM_H
 11 | 
 12 | extern int hits;
 13 | extern int misses;
 14 | 
 15 | /*
 16 |  * If `subterms` is NULL and `index` == -1, this is an atom.
 17 |  * 
 18 |  * If `subterms` is non-NULL, this is a constructor.
 19 |  *
 20 |  * If `index` >= 0, this is a variable.
 21 |  *
 22 |  * It is not a legal term if both `subterms` is non-NULL and `index` >= 0.
 23 |  *
 24 |  * In all cases, atom should not be NULL.
 25 |  */
 26 | struct term {
 27 |     const char *atom;
 28 |     size_t size;
 29 |     int index;
 30 |     struct termlist *subterms;
 31 | };
 32 | 
 33 | struct termlist {
 34 |     const struct term *term;
 35 |     struct termlist *next;
 36 | };
 37 | 
 38 | /*
 39 |  * Creates a new "atom" term from the given character string.
 40 |  * The new term contains a dynamically allocated copy of the given string,
 41 |  *   so the given string may be freed after calling this.
 42 |  * Subterms may be added afterwards to turn it into a "constructor" term.
 43 |  * Segfaults if there is insufficient memory to allocate the term.
 44 |  */
 45 | const struct term *term_new_atom(const char *, size_t);
 46 | const struct term *term_new_atom_from_cstring(const char *);
 47 | const struct term *term_new_atom_from_char(char c);
 48 | 
 49 | const struct term *term_new_constructor(const char *, size_t,
 50 |                                         struct termlist *);
 51 | void termlist_add_term(struct termlist **, const struct term *);
 52 | 
 53 | const struct term *term_new_variable(const char *, size_t, int);
 54 | 
 55 | /*
 56 |  * Returns 1 if the atom portion of both terms is identical, otherwise 0.
 57 |  */
 58 | int term_atoms_equal(const struct term *, const struct term *);
 59 | 
 60 | /*
 61 |  * Returns 1 if the atom portion of term is identical to given C string, else 0.
 62 |  */
 63 | int term_atom_cstring_equal(const struct term *, const char *);
 64 | 
 65 | /*
 66 |  * Given the name of a variable, return the variable term of the
 67 |  * same name that is leftmost, uppermost in the given term.
 68 |  */
 69 | /*
 70 | struct term *term_find_variable(const struct term *, const char *);
 71 | */
 72 | 
 73 | /*
 74 |  * Given two "atom" terms, return a new "atom" term consisting of the
 75 |  * text of the input terms concatenated together.
 76 |  */
 77 | const struct term *term_concat(const struct term *, const struct term *);
 78 | 
 79 | /*
 80 |  * Given a possibly non-atom term, return an atom consisting of
 81 |  * contents of the given term flattened into an atom.
 82 |  *
 83 |  * The returned term is NOT always newly allocated.
 84 |  */
 85 | const struct term *term_flatten(const struct term *);
 86 | 
 87 | void term_fput(const struct term *, FILE *);
 88 | 
 89 | /*
 90 |  * Both terms must be ground.
 91 |  */
 92 | int term_equal(const struct term *, const struct term *);
 93 | 
 94 | /*
 95 |  * The third argument is an array of struct term *'s.  It will
 96 |  * be updated with bindings.
 97 |  */
 98 | int term_match_unifier(const struct term *, const struct term *,
 99 |                        const struct term **);
100 | 
101 | #endif
102 | 


--------------------------------------------------------------------------------
/doc/6502-sketch.tamsin:
--------------------------------------------------------------------------------
 1 | # a sketch of what a Tamsin program to simulate a subset of the 6502
 2 | # might look like.
 3 | 
 4 | # note that the 6502 memory is in the IMPLICIT BUFFER.
 5 | 
 6 | sim6502 = instr(0,0,0) using $:byte.
 7 | 
 8 | instr(A,X,Y) =
 9 |       "\xA9" & any → A               & instr(A,X,Y)    # LDA #
10 |     | "\xC8" & inc(Y) → Y            & instr(A,X,Y)    # INY
11 |     | "\x8A" & A ← X                 & instr(A,X,Y)    # TAX
12 |     | "\x4C" & word → W & $:seek(W)  & instr(A,X,Y)    # JMP
13 |     | etc.
14 | 
15 | word =
16 |    any → Lo & any → Hi & return $:add($:ord(Lo), $:mul($:ord(Hi), 256)).
17 | 
18 | etc.
19 | 
20 | 
21 | # That's the recursive version; compiling it to C currently would not be
22 | # nice to the stack.  Here's an iterative version:
23 | 
24 | 
25 | sim6502 =
26 |     A ← 0 & X ← 0 & Y ← 0 &
27 |     !{instr(A,X,Y) → state(A,X,Y)} using $:byte.
28 | 
29 | instr(A,X,Y) =
30 |        "\xA9" & any → A               & return! state(A,X,Y)    # LDA #
31 |     !| "\xC8" & inc(Y) → Y            & return! state(A,X,Y)    # INY
32 |     !| "\x8A" & A ← X                 & return! state(A,X,Y)    # TAX
33 |     !| "\x4C" & word → W & $:seek(W)  & return! state(A,X,Y)    # JMP
34 |     !| "\x00"                         & return! halted          # BRK
35 |     !| etc.
36 | 
37 | 
38 | # this uses ! (non-backtracking) and return! (immediate return from production)
39 | # (not sure about either of these...)
40 | 


--------------------------------------------------------------------------------
/doc/Advanced_Features.markdown:
--------------------------------------------------------------------------------
  1 | Advanced Features of the Tamsin Language
  2 | ========================================
  3 | 
  4 | This document is a **work in progress**.
  5 | 
  6 | Note that none of these features are in Tamsin version 0.1 (although the
  7 | reference implementation might support them or at least the syntax for
  8 | them — they should be regarded as undefined in 0.1.  They may appear in
  9 | 0.2.)
 10 | 
 11 |     -> Tests for functionality "Intepret Tamsin program"
 12 | 
 13 | Three good ways to shoot yourself in the foot
 14 | ---------------------------------------------
 15 |     
 16 | 1, forget that Tamsin is still basically a *programming* language, or at
 17 | best an LL(n) grammar, and try to write a left-recursive rule:
 18 |     
 19 |     expr = expr & "+" & expr | expr & "*" & expr | "0" | "1".
 20 | 
 21 | 2, base a `{}` loop around something that always succeeds, like `return` or
 22 | `eof` at the end of the input.
 23 | 
 24 |     expr = {"k" | return l}.
 25 |     
 26 | 3, base a loop around something that doesn't consume any input, like `!`.
 27 | 
 28 |     expr = !"\n" & expr
 29 | 
 30 | Advanced Assignment
 31 | -------------------
 32 | 
 33 | The right-hand side of `→` can actually be more than a variable name;
 34 | it can be a pattern term, just like is used in the arguments, above.
 35 | This can be useful for "deconstructing" a compound return value from a
 36 | production to extract the parts you want.
 37 | 
 38 |     | main = foo → pair(A,B) & return A.
 39 |     | foo = return pair(wellington, trainer).
 40 |     = wellington
 41 | 
 42 |     | main = foo → pair(A,B) & return B.
 43 |     | foo = return pair(wellington, trainer).
 44 |     = trainer
 45 | 
 46 | Even without variables, this can also be useful simply to assert something
 47 | returns some value.
 48 | 
 49 |     | main = foo → b & print 'yes' | print 'no'.
 50 |     | foo = return a.
 51 |     = no
 52 |     = no
 53 | 
 54 |     | main = foo → b & print 'yes' | print 'no'.
 55 |     | foo = return b.
 56 |     = yes
 57 |     = yes
 58 | 
 59 | Advanced Programming
 60 | --------------------
 61 | 
 62 | Before the first production in a program, any number of _pragmas_ may be
 63 | given.  Pragmas may affect how the program following them is parsed.
 64 | Each pragma begins with a `@` followed by a bareword indicating the
 65 | kind of pragma, followed by a number of arguments specific to that kind
 66 | of pragma, followed by a `.`.
 67 | 
 68 |     | @alias zrrk 2 = jersey.
 69 |     | @unalias zrrk.
 70 |     | main = foo.
 71 |     | foo = "b".
 72 |     + b
 73 |     = b
 74 | 
 75 | ### `@alias` ###
 76 | 
 77 | The pragma `@alias` introduces an alias.  Its syntax consists of the
 78 | name of the alias (a bareword), followed by an integer which indicates
 79 | the _arity_, followed by `=`, followed by the contents of the alias
 80 | (i.e., what is being aliased; presently, this must be a non-terminal.)
 81 | 
 82 | This sets up a syntax rule, in the rule context, that, when the alias
 83 | name is encountered, parses as a call to the aliased non-terminal; in
 84 | addition, this syntax rule is special in that it looks for exactly
 85 | _arity_ number of terms following the alias name.  Parentheses are not
 86 | required to delimit these terms.
 87 | 
 88 |     | @alias foo 2 = jersey.
 89 |     | main = jersey(a,b) & foo c d.
 90 |     | jersey(A,B) = «A» & «B».
 91 |     + abcd
 92 |     = d
 93 | 
 94 | The pragma `@unalias` removes a previously-introduced alias.
 95 | 
 96 |     | @alias foo 2 = jersey.
 97 |     | @unalias foo.
 98 |     | main = jersey(a,b) & foo c d.
 99 |     | jersey(A,B) = «A» & «B».
100 |     + abcd
101 |     ? Expected '.' at ' c d
102 | 
103 | It is an error to attempt to unalias an alias that hasn't been established.
104 | 
105 |     | @alias foo 2 = jersey.
106 |     | @unalias bar.
107 |     | main = return ok.
108 |     ? KeyError
109 | 
110 | Note that various of Tamin's "keywords" are actually built-in aliases for
111 | productions in the `$` module, and they may be unaliased.
112 | 
113 |     | @unalias return.
114 |     | main = return ok.
115 |     ? Expected '.' at ' ok.'
116 | 
117 |     | @unalias return.
118 |     | main = $.return(ok).
119 |     = ok
120 | 
121 | ### Rule Formals ###
122 | 
123 | Then we no longer pattern-match terms.  They're just strings.  So we... we
124 | parse them.  Here's a preview, and we'll get more serious about this further
125 | below.
126 | 
127 | Now that you can create scanners and parsers to your heart's desire, we
128 | return to the reason you would even need to: terms vs. rules in the
129 | "formal arguments" part of a production definition.
130 | 
131 |     | main = ("a" | "b" | "c") → C & donkey('f' + C) → D & return D.
132 |     | donkey["f" & ("a" | "c")] = return yes.
133 |     | donkey["f" & "b"] = return no.
134 |     + a
135 |     = yes
136 | 
137 |     | main = ("a" | "b" | "c") → C & donkey('f' + C) → D & return D.
138 |     | donkey["f" & ("a" | "c")] = return yes.
139 |     | donkey["f" & "b"] = return no.
140 |     + b
141 |     = no
142 | 
143 |     | main = ("a" | "b" | "c") → C & donkey('f' + C) → D & return D.
144 |     | donkey["f" & ("a" | "c")] = return yes.
145 |     | donkey["f" & "b"] = return no.
146 |     + c
147 |     = yes
148 | 
149 | Variables that are set in a parse-pattern formals are available to
150 | the production's rule.
151 | 
152 |     | main = donkey(world).
153 |     | donkey[any → E] = return hello(E).
154 |     = hello(w)
155 | 
156 |     | main = donkey(world).
157 |     | donkey[any → E using word] = return hello(E).
158 |     | word = (T ← '' & {$.alnum → S & T ← T + S} & T) using $.char.
159 |     = hello(world)
160 | 
161 | No variables from the caller leak into the called production.
162 | 
163 |     | main = set F = whatever & donkey(world).
164 |     | donkey[any → E] = return hello(F).
165 |     ? KeyError
166 | 
167 | Terms are stringified before being matched.
168 | 
169 |     | main = donkey(a(b(c))).
170 |     | donkey["a" & "(" & "b" & "(" & "c" & ")" & ")"] = return yes.
171 |     = yes
172 | 
173 | Thus, in this sense at least, terms are sugar for strings.
174 | 
175 |     | main = donkey('a(b(c))').
176 |     | donkey["a" & "(" & "b" & "(" & "c" & ")" & ")"] = return yes.
177 |     = yes
178 | 
179 | The rule formals may call on other rules in the program.
180 | 
181 |     | main = donkey('pair(pair(0,1),1)').
182 |     | donkey[pair → T using mini] = return its_a_pair(T).
183 |     | donkey[bit → T using mini] = return its_a_bit(T).
184 |     | thing = pair | bit.
185 |     | pair = "pair" & "(" & thing → A & "," & thing → B & ")" & return pair(A,B).
186 |     | bit = "0" | "1".
187 |     | mini = (bit | "(" | ")" | "," | word) using $.char.
188 |     | word = (T ← '' & {$.alnum → S & T ← T + S} & T).
189 |     = its_a_pair(pair(pair(0, 1), 1))
190 | 
191 | ### Auto-term creation from productions ###
192 | 
193 | An experimental feature.  But Rooibos does it, and it could help make
194 | parser development faster/shorter.  Note that feature is not fully implemented.
195 | Therefore test disabled.
196 | 
197 |         | main = expr0.
198 |         | expr0! = expr1 & {"+" & expr1}.
199 |         | expr1! = term & {"*" & term}.
200 |         | term = "x" | "y" | "z" | "(" & expr0 & ")".
201 |         + x+y*(z+x+y)
202 |         = expr0(expr1, +, expr1)
203 | 


--------------------------------------------------------------------------------
/doc/Case_Study.markdown:
--------------------------------------------------------------------------------
  1 | Case Study: Parsing and Evaluating S-Expressions in Tamsin
  2 | ==========================================================
  3 | 
  4 |     -> Tests for functionality "Intepret Tamsin program"
  5 | 
  6 | We now have enough tools at our disposal to parse and evaluate simple
  7 | S-expressions (from Lisp or Scheme).
  8 | 
  9 | Note that we no longer have `$.tamsin`, so these examples don't work.
 10 | They're left here to demonstrate the development process.  For now, see
 11 | `eg/sexpr-eval.tamsin`.
 12 | 
 13 | We can write such a parser with `{}`, but the result is a bit messy.
 14 | 
 15 |     | main = sexp using $.tamsin.
 16 |     | sexp = symbol | list.
 17 |     | list = "(" &
 18 |     |        set L = nil &
 19 |     |        {sexp → S & set L = pair(S, L)} &
 20 |     |        ")" &
 21 |     |        return L.
 22 |     | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
 23 |     + (cons (a (cons b nil)))
 24 |     = pair(pair(pair(nil, pair(b, pair(cons, nil))), pair(a, nil)), pair(cons, nil))
 25 | 
 26 | So let's write it in the less intuitive, recursive way:
 27 | 
 28 |     | main = sexp using $.tamsin.
 29 |     | 
 30 |     | sexp = symbol | list.
 31 |     | list = "(" & listtail(nil).
 32 |     | listtail(L) = sexp → S & listtail(pair(S, L))
 33 |     |             | ")" & return L.
 34 |     | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
 35 |     + (a b)
 36 |     = pair(b, pair(a, nil))
 37 | 
 38 | Nice.  But it returns a term that's backwards.  So we need to write a
 39 | reverser.  In Erlang, this would be
 40 | 
 41 |     reverse([H|T], A) -> reverse(T, [H|A]).
 42 |     reverse([], A) -> A.
 43 | 
 44 | In Tamsin, it's:
 45 | 
 46 |     | main = sexp → S using $.tamsin & reverse(S, nil) → SR & return SR.
 47 |     | 
 48 |     | sexp = symbol | list.
 49 |     | list = "(" & listtail(nil).
 50 |     | listtail(L) = sexp → S & listtail(pair(S, L))
 51 |     |             | ")" & return L.
 52 |     | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
 53 |     | 
 54 |     | reverse(pair(H, T), A) =
 55 |     |   reverse(T, pair(H, A)) → TR &
 56 |     |   return TR.
 57 |     | reverse(nil, A) =
 58 |     |   return A.
 59 |     + (a b)
 60 |     = pair(a, pair(b, nil))
 61 | 
 62 | But it's not deep.  It only reverses the top-level list.
 63 | 
 64 |     | main = sexp → S using $.tamsin & reverse(S, nil) → SR & return SR.
 65 |     | 
 66 |     | sexp = symbol | list.
 67 |     | list = "(" & listtail(nil).
 68 |     | listtail(L) = sexp → S & listtail(pair(S, L))
 69 |     |             | ")" & return L.
 70 |     | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
 71 |     | 
 72 |     | reverse(pair(H, T), A) =
 73 |     |   reverse(T, pair(H, A)) → TR &
 74 |     |   return TR.
 75 |     | reverse(nil, A) =
 76 |     |   return A.
 77 |     + (a (c b) b)
 78 |     = pair(a, pair(pair(b, pair(c, nil)), pair(b, nil)))
 79 | 
 80 | So here's a deep reverser.
 81 | 
 82 |     | main = sexp → S using $.tamsin & reverse(S, nil) → SR & return SR.
 83 |     | 
 84 |     | sexp = symbol | list.
 85 |     | list = "(" & listtail(nil).
 86 |     | listtail(L) = sexp → S & listtail(pair(S, L))
 87 |     |             | ")" & return L.
 88 |     | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
 89 |     | 
 90 |     | reverse(pair(H, T), A) =
 91 |     |   reverse(H, nil) → HR &
 92 |     |   reverse(T, pair(HR, A)) → TR &
 93 |     |   return TR.
 94 |     | reverse(nil, A) =
 95 |     |   return A.
 96 |     | reverse(X, A) =
 97 |     |   return X.
 98 |     + (a (c b) b)
 99 |     = pair(a, pair(pair(c, pair(b, nil)), pair(b, nil)))
100 | 
101 | Finally, a little sexpr evaluator.
102 | 
103 |     | main = sexp → S using $.tamsin & reverse(S, nil) → SR & eval(SR).
104 |     | 
105 |     | sexp = symbol | list.
106 |     | list = "(" & listtail(nil).
107 |     | listtail(L) = sexp → S & listtail(pair(S, L))
108 |     |             | ")" & return L.
109 |     | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
110 |     | 
111 |     | head(pair(A, B)) = return A.
112 |     | tail(pair(A, B)) = return B.
113 |     | cons(A, B) = return pair(A, B).
114 |     | 
115 |     | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R) → P & return P.
116 |     | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R) → P & return P.
117 |     | eval(pair(cons, pair(A, pair(B, nil)))) =
118 |     |    eval(A) → AE & eval(B) → BE & return pair(AE, BE).
119 |     | eval(X) = return X.
120 |     | 
121 |     | reverse(pair(H, T), A) =
122 |     |   reverse(H, nil) → HR &
123 |     |   reverse(T, pair(HR, A)) → TR &
124 |     |   return TR.
125 |     | reverse(nil, A) =
126 |     |   return A.
127 |     | reverse(X, A) =
128 |     |   return X.
129 |     + (cons a b)
130 |     = pair(a, b)
131 | 
132 |     | main = sexp → S using $.tamsin & reverse(S, nil) → SR & eval(SR).
133 |     | 
134 |     | sexp = symbol | list.
135 |     | list = "(" & listtail(nil).
136 |     | listtail(L) = sexp → S & listtail(pair(S, L))
137 |     |             | ")" & return L.
138 |     | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
139 |     | 
140 |     | head(pair(A, B)) = return A.
141 |     | tail(pair(A, B)) = return B.
142 |     | cons(A, B) = return pair(A, B).
143 |     | 
144 |     | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R) → P & return P.
145 |     | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R) → P & return P.
146 |     | eval(pair(cons, pair(A, pair(B, nil)))) =
147 |     |    eval(A) → AE & eval(B) → BE & return pair(AE, BE).
148 |     | eval(X) = return X.
149 |     | 
150 |     | reverse(pair(H, T), A) =
151 |     |   reverse(H, nil) → HR &
152 |     |   reverse(T, pair(HR, A)) → TR &
153 |     |   return TR.
154 |     | reverse(nil, A) =
155 |     |   return A.
156 |     | reverse(X, A) =
157 |     |   return X.
158 |     + (head (cons b a))
159 |     = b
160 | 
161 |     | main = sexp → S using $.tamsin & reverse(S, nil) → SR & eval(SR).
162 |     | 
163 |     | sexp = symbol | list.
164 |     | list = "(" & listtail(nil).
165 |     | listtail(L) = sexp → S & listtail(pair(S, L))
166 |     |             | ")" & return L.
167 |     | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
168 |     | 
169 |     | head(pair(A, B)) = return A.
170 |     | tail(pair(A, B)) = return B.
171 |     | cons(A, B) = return pair(A, B).
172 |     | 
173 |     | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R) → P & return P.
174 |     | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R) → P & return P.
175 |     | eval(pair(cons, pair(A, pair(B, nil)))) =
176 |     |    eval(A) → AE & eval(B) → BE & return pair(AE, BE).
177 |     | eval(X) = return X.
178 |     | 
179 |     | reverse(pair(H, T), A) =
180 |     |   reverse(H, nil) → HR &
181 |     |   reverse(T, pair(HR, A)) → TR &
182 |     |   return TR.
183 |     | reverse(nil, A) =
184 |     |   return A.
185 |     | reverse(X, A) =
186 |     |   return X.
187 |     + (tail (tail (cons b (cons b a))))
188 |     = a
189 | 
190 | In this one, we make the evaluator print out some of the steps it takes.
191 | 
192 |     | main = sexp → S using $.tamsin & reverse(S, nil) → SR & eval(SR).
193 |     | 
194 |     | sexp = symbol | list.
195 |     | list = "(" & listtail(nil).
196 |     | listtail(L) = sexp → S & listtail(pair(S, L))
197 |     |             | ")" & return L.
198 |     | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
199 |     | 
200 |     | head(pair(A, B)) = return A.
201 |     | tail(pair(A, B)) = return B.
202 |     | cons(A, B) = return pair(A, B).
203 |     | 
204 |     | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R) → P & return P.
205 |     | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R) → P & return P.
206 |     | eval(pair(cons, pair(A, pair(B, nil)))) =
207 |     |    eval(A) → AE & eval(B) → BE &
208 |     |    $.print(y(AE, BE)) & cons(AE, BE) → C & return C.
209 |     | eval(X) = return X.
210 |     | 
211 |     | reverse(pair(H, T), A) =
212 |     |   reverse(H, nil) → HR &
213 |     |   reverse(T, pair(HR, A)) → TR &
214 |     |   return TR.
215 |     | reverse(nil, A) =
216 |     |   return A.
217 |     | reverse(X, A) =
218 |     |   return X.
219 |     + (cons (tail (cons b a)) (head (cons b a)))
220 |     = y(b, a)
221 |     = y(b, a)
222 |     = y(a, b)
223 |     = pair(a, b)
224 | 


--------------------------------------------------------------------------------
/doc/Error_Reporting.markdown:
--------------------------------------------------------------------------------
 1 | Error Reporting
 2 | ---------------
 3 | 
 4 | For now, only the Tamsin interpreter is expected to pass these tests.
 5 | 
 6 | Also, these tests expose some details about how Falderal creates temp files.
 7 | Boo!
 8 | 
 9 |     -> Tests for functionality "Intepret Tamsin program"
10 | 
11 | When a scanning error occurs in a Tamsin source, the filename, line number,
12 | and column number are reported.
13 | 
14 |     | hello = "h".
15 |     |     %
16 |     ? expected identifiable character but found '%' at line 2, column 5 in '/tmp/tmp
17 | 
18 | When a parsing error occurs in a Tamsin source, the filename, line number,
19 | and column number are reported.
20 | 
21 |     | slough = "h" & ("o" | "p").
22 |     | maidenhead = "h" & ("o" | "p").
23 |     | reading = "h" ("o" | "p").
24 |     ? expected '.' but found '(' at line 3, column 16 in '/tmp/tmp
25 | 
26 |     | pasta = "h" & «hop() & "p".
27 |     ? expected '>>' but found '&' at line 1, column 22 in '/tmp/tmp
28 | 
29 |     | pasta = "h" & «hop()
30 |     ? expected '>>' but found EOF at line 1, column 22 in '/tmp/tmp
31 | 
32 | When a scanning error occurs in the input to a Tamsin program, the filename,
33 | line number, and column number are reported.
34 | 
35 |     | main = "h" & "o" & "x".
36 |     + hop
37 |     ? expected 'x' but found 'p' at line 1, column 3 in '<stdin>'
38 | 
39 |     | main = "h" & "o" & {"\n"} & "0" & "x".
40 |     + ho
41 |     + 
42 |     + 0p
43 |     ? expected 'x' but found 'p' at line 3, column 2 in '<stdin>'
44 | 
45 |     | main = "h" & "o" & "x".
46 |     + ho
47 |     ? expected 'x' but found EOF at line 1, column 3 in '<stdin>'
48 | 
49 |     | main = "h" & "o" & $:eof.
50 |     + hox
51 |     ? expected EOF but found 'x' at line 1, column 3 in '<stdin>'
52 | 
53 |     | main = "h" & "o" & $:any.
54 |     + ho
55 |     ? expected any token but found EOF at line 1, column 3 in '<stdin>'
56 | 
57 |     | main = "h" & "o" & $:alnum.
58 |     + ho&
59 |     ? expected alphanumeric but found '&' at line 1, column 3 in '<stdin>'
60 | 
61 |     | main = "h" & "o" & $:upper.
62 |     + hod
63 |     ? expected uppercase but found 'd' at line 1, column 3 in '<stdin>'
64 | 
65 |     | main = "h" & "o" & $:startswith('f').
66 |     + hod
67 |     ? expected 'f...' but found 'd' at line 1, column 3 in '<stdin>'
68 | 
69 |     | main = "h" & "o" & (! "n").
70 |     + hon
71 |     ? expected anything else but found 'n' at line 1, column 3 in '<stdin>'
72 | 


--------------------------------------------------------------------------------
/doc/Micro-Tamsin.markdown:
--------------------------------------------------------------------------------
  1 | Micro-Tamsin
  2 | ============
  3 | 
  4 | This is just the "fundaments" part of the spec, and a few other bits,
  5 | that the Micro-Tamsin interpreter (written in Tamsin!) can handle.
  6 | 
  7 |     -> Tests for functionality "Intepret Tamsin program"
  8 | 
  9 | Fundaments
 10 | ----------
 11 | 
 12 | A Tamsin program consists of one or more _productions_.  A production consists
 13 | of a name and a _parsing rule_ (or just "rule" for short).  Among other things,
 14 | a rule may be a _non-terminal_, which is the name of a production, or a
 15 | _terminal_, which is a literal string in double quotes.  (A full grammar for
 16 | Tamsin can be found in Appendix A.)
 17 | 
 18 | When run, a Tamsin program processes its input.  It starts at the production
 19 | named `main`, and evaluates its rule.  A non-terminal in a rule "calls" the
 20 | production of that name in the program.  A terminal in a a rule expects a token
 21 | identical to it to be on the input.  If that expectation is met, it evaluates
 22 | to that token.  If not, it raises an error.  The final result of evaluating a
 23 | Tamsin program is sent to its output.
 24 | 
 25 | (If it makes it easier to think about, consider "its input" to mean "stdin",
 26 | and "token" to mean "character"; so the terminal `"x"` is a command that either
 27 | reads the character `x` from stdin and returns it (whence it is printed to
 28 | stdout by the main program), or errors out if it read something else.
 29 | Or, thinking about it from the other angle, we have here the rudiments for
 30 | defining a grammar for parsing a trivial language.)
 31 | 
 32 |     | main = blerf.
 33 |     | blerf = "p".
 34 |     + p
 35 |     = p
 36 | 
 37 |     | main = blerf.
 38 |     | blerf = "p".
 39 |     + k
 40 |     ? expected 'p' found 'k'
 41 | 
 42 | Productions can be written that don't look at the input.  A rule may also
 43 | consist of the keyword `return`, followed a _term_; this expression simply
 44 | evaluates to that term and returns it.  (More on terms later; for now,
 45 | think of them as strings.)
 46 | 
 47 | So, the following program always outputs `blerp`, no matter what the input is.
 48 | 
 49 |     | main = return blerp.
 50 |     + fadda wadda badda kadda nadda sadda hey
 51 |     = blerp
 52 | 
 53 | Note that in the following, `blerp` refers to the production named "blerp"
 54 | in one place, and in the other place, it refers to the term `blerp`.  Tamsin
 55 | sees the difference because of the context; `return` must be followed by a
 56 | term, while a parsing rule cannot be part of a term.
 57 | 
 58 |     | main = blerp.
 59 |     | blerp = return blerp.
 60 |     + foo
 61 |     + foo
 62 |     + foo 0 0 0 0 0
 63 |     = blerp
 64 | 
 65 | A rule may also consist of the keyword `print` followed by a term, which,
 66 | when evaluated, sends the term to the output, and evaluates to the term.
 67 | (Mostly this is useful for debugging.  In the following, `world` is
 68 | repeated because it is both printed, and the result of the evaluation.)
 69 | 
 70 |     | main = print hello & print world.
 71 |     + ahoshoshohspohdphs
 72 |     = hello
 73 |     = world
 74 |     = world
 75 | 
 76 | A rule may also consist of two subrules joined by the `&` operator.
 77 | The `&` operator processes the left-hand side rule.  If the LHS fails, then
 78 | the `&` expression fails; otherwise, it continues and processes the
 79 | right-hand side rule.  If the RHS fails, the `&` expression fails; otherwise
 80 | it evaluates to what the RHS evaluated to.
 81 | 
 82 |     | main = "a" & "p".
 83 |     + ap
 84 |     = p
 85 | 
 86 |     | main = "a" & "p".
 87 |     + ak
 88 |     ? expected 'p' found 'k'
 89 | 
 90 |     | main = "a" & "p".
 91 |     + ep
 92 |     ? expected 'a' found 'e'
 93 | 
 94 | If you are too used to C or Javascript or the shell, you may use `&&`
 95 | instead of `&`.
 96 | 
 97 |     | main = "a" && "p".
 98 |     + ap
 99 |     = p
100 | 
101 | A rule may also consist of two subrules joined by the `|` operator.
102 | The `&` operator processes the left-hand side rule.  If the LHS succeeds,
103 | then the `|` expression evaluates to what the LHS evaluted to, and the
104 | RHS is ignored.  But if the LHS fails, it processes the RHS; if the RHS
105 | fails, the `|` expression fails, but otherwise it evaluates to what the
106 | RHS evaluated to.
107 | 
108 | For example, this program accepts `0` or `1` but nothing else.
109 | 
110 |     | main = "0" | "1".
111 |     + 0
112 |     = 0
113 | 
114 |     | main = "0" | "1".
115 |     + 1
116 |     = 1
117 | 
118 |     | main = "0" | "1".
119 |     + 2
120 |     ? expected '1' found '2'
121 | 
122 | If you are too used to C or Javascript or the shell, you may use `||`
123 | instead of `|`.
124 | 
125 |     | main = "0" || "1".
126 |     + 1
127 |     = 1
128 | 
129 | Using `return` described above, this program accepts 0 or 1 and evaluates
130 | to the opposite.  (Note here also that `&` has a higher precedence than `|`.)
131 | 
132 |     | main = "0" & return 1 | "1" & return 0.
133 |     + 0
134 |     = 1
135 | 
136 |     | main = "0" & return 1 | "1" & return 0.
137 |     + 1
138 |     = 0
139 | 
140 |     | main = "0" & return 1 | "1" & return 0.
141 |     + 2
142 |     ? expected '1' found '2'
143 | 
144 | Evaluation order can be altered by using parentheses, as per usual.
145 | 
146 |     | main = "0" & ("0" | "1") & "1" & return ok.
147 |     + 011
148 |     = ok
149 | 
150 | Note that if the LHS of `|` fails, the RHS is tried at the position of
151 | the stream that the LHS started on.  This property is called "backtracking".
152 | 
153 |     | ohone = "0" & "1".
154 |     | ohtwo = "0" & "2".
155 |     | main = ohone | ohtwo.
156 |     + 02
157 |     = 2
158 | 
159 | Note that `print` and `return` never fail.  Thus, code like the following
160 | is "useless":
161 | 
162 |     | main = foo & print hi | return useless.
163 |     | foo = return bar | print useless.
164 |     = hi
165 |     = hi
166 | 
167 | Note that `return` does not exit the production immediately — although
168 | this behaviour may be re-considered...
169 | 
170 |     | main = return hello & print not_useless.
171 |     = not_useless
172 |     = not_useless
173 | 
174 | Alternatives can select code to be executed, based on the input.
175 | 
176 |     | main = aorb & print aorb | cord & print cord & return ok.
177 |     | aorb = "a" & print ay | "b" & print bee.
178 |     | cord = "c" & print see | eorf & print eorf.
179 |     | eorf = "e" & print ee | "f" & print eff.
180 |     + e
181 |     = ee
182 |     = eorf
183 |     = cord
184 |     = ok
185 | 
186 | And that's the basics.  With these tools, you can write simple
187 | recursive-descent parsers.  For example, to consume nested parentheses
188 | containing a zero:
189 | 
190 |     | main = parens & "." & return ok.
191 |     | parens = "(" & parens & ")" | "0".
192 |     + 0.
193 |     = ok
194 | 
195 |     | main = parens & "." & return ok.
196 |     | parens = "(" & parens & ")" | "0".
197 |     + (((0))).
198 |     = ok
199 | 
200 | (the error message on this test case is a little weird; it's because of
201 | the backtracking.  It tries to match `(((0)))` against the beginning of
202 | input, and fails, because the last `)` is not present.  So it tries to
203 | match `0` at the beginning instead, and fails that too.)
204 | 
205 |     | main = parens & "." & return ok.
206 |     | parens = "(" & parens & ")" | "0".
207 |     + (((0)).
208 |     ? expected '0' found '('
209 | 
210 | (the error message on this one is much more reasonable...)
211 | 
212 |     | main = parens & "." & return ok.
213 |     | parens = "(" & parens & ")" | "0".
214 |     + ((0))).
215 |     ? expected '.' found ')'
216 | 
217 | To consume a comma-seperated list of one or more bits:
218 | 
219 |     | main = bit & {"," & bit} & ".".
220 |     | bit = "0" | "1".
221 |     + 1.
222 |     = .
223 | 
224 |     | main = bit & {"," & bit} & ".".
225 |     | bit = "0" | "1".
226 |     + 0,1,1,0,1,1,1,1,0,0,0,0,1.
227 |     = .
228 | 
229 | (again, backtracking makes the error a little odd)
230 | 
231 |     | main = bit & {"," & bit} & ".".
232 |     | bit = "0" | "1".
233 |     + 0,,1,0.
234 |     ? expected '.' found ','
235 | 
236 |     | main = bit & {"," & bit} & ".".
237 |     | bit = "0" | "1".
238 |     + 0,10,0.
239 |     ? expected '.' found '0'
240 | 
241 | Comments
242 | --------
243 | 
244 | A Tamsin comment is introduced with `#` and continues until the end of the
245 | line.
246 | 
247 |     | # welcome to my Tamsin program!
248 |     | main = # comments may appear anywhere in the syntax
249 |     |        # and a comment may be followed by a comment
250 |     |   "z".
251 |     + z
252 |     = z
253 | 


--------------------------------------------------------------------------------
/doc/Notes.markdown:
--------------------------------------------------------------------------------
  1 | These are now out of context, and kept here for historical purposes.
  2 | 
  3 | ### an aside, written a while back ###
  4 | 
  5 | OK!  So... here is a problem: if you haven't noticed yet,
  6 | 
  7 | *   what a rule consumes, is a string.
  8 | *   what a rule evaluates to, is a term.
  9 | *   the symbol `(` means something different in a rule (where it expresses
 10 |     precendence) than in a term (where it signifies the list of subterms.)
 11 | *   the symbol `foo` means something different in a rule (where it denotes
 12 |     a production) than in a term (where it is an atom.)
 13 | 
 14 | This is probably unacceptable.  Which syntax do we want to change?
 15 | 
 16 |     PRODUCTION = set V = foo & return ⟨atom V production⟩.
 17 | 
 18 | i.e. productions are distinguished from atoms and variables by being
 19 | all-caps.  Lists are distinguished from precedence by being ⟨ ⟩.
 20 | 
 21 |     production = set V = 'foo & return '(atom V production).
 22 | 
 23 | i.e. `'` acts a bit like quote, or rather quasi-quote, as Variables get
 24 | expanded.
 25 | 
 26 |     production = set V = :foo & return :smth(:atom Var :production).
 27 | 
 28 | i.e. atoms are prefixed with `:`, like Ruby, and terms are constructors
 29 | with a leading atom, like real terms and not like lists.
 30 | 
 31 |     production = set V = 「foo」 & return 「(atom Var anotheratom)」.
 32 | 
 33 | A funky, Japanese-influenced version of quote.  Nice, but really not suited
 34 | for this, quite.  Ditto ⟦these⟧.
 35 | 
 36 | Ah, well, it may not be a real problem, unless we want to make `return`
 37 | optional (which we do.)  Maybe, onto weirder stuff first.
 38 | 
 39 | ### stuff about implicit buffer ###
 40 | 
 41 | Here's a "problem": the implicit buffer is a string, and we don't have
 42 | strings in the data domain, we have terms.  This "problem" is easily
 43 | "solvable": we can stringify the term.  This is a terrible "solution",
 44 | but it lets us experiment further.
 45 | 
 46 | This would be nicer if we had a syntax to put arbitrary text in an atom.
 47 | Hey, how about 「this is an atom」?  Hmmm...
 48 | 
 49 | #### A prolix note on implementation ####
 50 | 
 51 | Traditionally, scanners for recursive descent parsers pre-emptively scan
 52 | the next token.  This was done because originally, parsers (for languages
 53 | like Pascal, say,) were distinctly one-pass beasts, reading the source code
 54 | off of a stream from disk (or maybe even from a tape), and you might need
 55 | to refer to the current token several times in the code and you don't want
 56 | to have to read it more than once.
 57 | 
 58 | This setup makes writing a parser with a "hot-swappable" scanner tricky,
 59 | because when we switch scanner, we have to deal with this "cached" token
 60 | somehow.  We could rewind the scanner by the length of the token (plus
 61 | the length of any preceding whitespace and comments), switch the scanner,
 62 | then scan again (by the new rules.)  But this is messy and error-prone.
 63 | 
 64 | Luckily, not many of us are reading files off tape these days, and we have
 65 | plenty of core, so it's no problem reading the whole file into memory.
 66 | In fact, I've seen it argued that the best way to write a scanner nowadays
 67 | is to `mmap()` the file.  We don't do this in the implementation of Tamsin,
 68 | but we do read the entire file into memory.
 69 | 
 70 | This makes the cache-the-next-token method less useful, and so we don't
 71 | do it.  Instead, we look for the next token only when we need it, and we
 72 | have a method `peek()` that returns what the next token would be, and we
 73 | don't cache this value.
 74 | 
 75 | There are a couple of other points about the scanner implementation.
 76 | A scanner only ever has one buffer (the entire string it's scanning); this
 77 | never changes over it's lifetime.  It provides methods for saving and
 78 | restoring its state, and it has a stack of "engines" which provide the
 79 | actual scanning logic.  In addition, there is only one interpreter object,
 80 | and it only has one scanner object during its lifetime.
 81 | 
 82 | ### Implementation Notes ###
 83 | 
 84 | Maybe test-driven language design *not* "for the win" in all cases; it's
 85 | excellent for evolving a design, but not so good for deep debugging.  I had
 86 | to actually write a dedicated test case which directly accessed the internals,
 87 | to find the problem.
 88 | 
 89 | This was only after refactoring the implementation two or three times.  One
 90 | of those times, I removed exceptions, so now the interpreter returns
 91 | `(success, result)` tuples, where `success` is a boolean, and propagates
 92 | parse errors itself.
 93 | 
 94 | We "raise" a parse error only in the `LITERAL` AST node.
 95 | 
 96 | We handle parse errors (backtrack) only in `OR` and `WHILE`, and in the
 97 | ProductionScannerEngine logic (to provide that EOF if the scanning production
 98 | failed.  This can happen even in `peek()` at the end of a string, even after
 99 | we've successfully parsed everything else.)
100 | 
101 | ### aside #2 ###
102 | 
103 | Well this is all very nice, very pretty I'm sure you'll agree, but it doesn't
104 | hang together too well.  Figuration is easier than composition.  The thing is
105 | that we still have these two domains, the domain of strings that we parse
106 | and the domain of terms that we match.  We need to bring them closer together.
107 | This section is just ideas for that.
108 | 
109 | One is that instead of, or alongside terms, we compose strings.
110 | 
111 | First, we put arbitrary text in an atom, with `「this syntax」`.  Then we allow
112 | terms to be concatenated with `•`.  It looks real cool!  But also, it's kind
113 | of annoying.  So we also allow `'this ' + 'syntax.'`.
114 | 
115 | ### ... ###
116 | 
117 | Indeed we can.
118 | 
119 | The next logical step would be to be able to say
120 | 
121 |     main = program using scanner.
122 |     scanner = scan using ☆char.
123 |     scan = {" "} & (...)
124 |     program = "token" & ";" & "token" & ...
125 | 
126 | But we're not there yet.
127 | 
128 | Well, the best way to get there is to make that a test, see it fail, then
129 | improve the implementation so that it passes,  Test-driven language design
130 | for the win!  (But maybe not in all cases.  See my notes below...)
131 | 
132 | ### ... #2 ###
133 | 
134 | Having thought more about it, I think the easiest way to reconcile terms
135 | and strings is to have terms be syntactic sugar for strings.  This is
136 | already the case for ground terms, since `tree(a,b)` stringifies to the
137 | same string as `「tree(a,b)」`.  It's when variables are involved where it
138 | differs.  We would like some kind of quasi-quote such that even though
139 | `「tree(A,b)」` → `tree(A,n)`, `«tree(A,b)»` → `tree(tree(x,y),b)` or
140 | whatever.
141 | 
142 | Although, I still don't know.  The thing about terms is that they are
143 | super-useful for intermediate representations — abstract syntax trees
144 | and the like.  I've been thinking about some kind of compromise.  Which
145 | is, currently, what we sort of have.  A Tamsin term doubles as a string,
146 | for better or worse.  Mainly, we should sort out the properties of terms,
147 | then.  Which we will do.  But first,
148 | 
149 | ### conceptual sugar ###
150 | 
151 | Have
152 | 
153 |     reverse(tree(A,B)) = ...
154 | 
155 | be *conceptually* sugar for
156 | 
157 |     reverse["tree" & "(" & term → A & "," & term → B & ")"] = ...
158 | 
159 | but *actually* we still keep it in terms of terms, for efficiency.
160 | 


--------------------------------------------------------------------------------
/doc/Philosophy.markdown:
--------------------------------------------------------------------------------
  1 | Philosophy of Tamsin
  2 | ====================
  3 | 
  4 | I suppose that's a rather heavy-handed word to use, "philosophy".  But
  5 | this is the document giving the _whys_ of Tamsin rather than the technical
  6 | points.
  7 | 
  8 | Why did you write Tamin?
  9 | ------------------------
 10 | 
 11 | Basically, every time I see someone use a compiler-compiler like `yacc`
 12 | or a parser combinator library, part of me thinks, "Well why didn't
 13 | you just write a recursive-descent parser?  Recursive-descent parsers
 14 | are easy to write and they make for extremely pretty code!"
 15 | And what does a recursive-descent parser do?  It consumes input.  But
 16 | don't *all* algorithms consume input?  So why not have a language which
 17 | makes it easy to write recursive-descent parsers, and force all programs
 18 | to be written as recursive-descent parsers?  Then *all* code will be pretty!
 19 | (Yeah, sure, OK.)
 20 | 
 21 | Why is it/is it not a...
 22 | ------------------------
 23 | 
 24 | ### Meta-Language ###
 25 | 
 26 | (Also known, in their more practical incarnations, as "compiler-compilers"
 27 | or "parser generators".)
 28 | 
 29 | Tamsin is one, because:
 30 |     
 31 | *   The basic operations all map directly to combinators in BNF (or rather,
 32 |     Wirth's EBNF):
 33 |     *   `&` is sequencing
 34 |     *   `|` is alternation
 35 |     *   `[]` is sugar for alternation with the empty string
 36 |     *   `{}` is asteration
 37 |     *   `"foo"` is a terminal
 38 |     *   `foo` is a non-terminal
 39 | *   Using only these operations produces a sensible program — one which
 40 |     parses its input by the grammar so given.
 41 | 
 42 | Tamsin isn't one, because:
 43 | 
 44 | *   There is no requirement that any input be processed at all.
 45 | 
 46 | ### Programming Language ###
 47 | 
 48 | Tamsin is one, because:
 49 | 
 50 | *   Productions can have local variables.
 51 | *   Productions can call other productions (or themselves, recursively) with
 52 |     arguments, and they return a value:
 53 | 
 54 |         reverse(pair(H, T), A) = reverse(T, pair(H, A)).
 55 |         reverse(nil, A) = A.
 56 | 
 57 | *   It's Turing-complete.
 58 | *   It can be, and in fact has been, bootstrapped.
 59 | 
 60 | Tamsin isn't one, because:
 61 | 
 62 | *   The syntax is really geared to consuming input rather than general
 63 |     programming.
 64 | 
 65 | ### Rubbish Lister ###
 66 | 
 67 | What does this even mean?  Well, there is that
 68 | [one famous rubbish lister](http://perl.org/) that we can use as an example
 69 | for now, until I come up with a better definition here.
 70 | 
 71 | Tamsin is one, because:
 72 |     
 73 | *   There's more than one way to say it.
 74 | *   The same symbol means different things in different contexts
 75 |     (for example, `foo` might be either the name of a production, or an
 76 |     atomic term.)
 77 | *   Implicit this, implicit that.
 78 | *   Optomized (a bit) for problem-solving throwaway one-liners rather than
 79 |     large, engineered systems.
 80 | *   Anyone up for a game of golf?
 81 | 
 82 | Tamsin isn't one, because:
 83 | 
 84 | *   It's possible to express its syntax in a form that humans can understand.
 85 | *   In fact, it's possible to express its syntax in Tamsin.
 86 |     In fact, it's possible to bootstrap Tamsin — a Tamsin-to-C compiler has
 87 |     been written in Tamsin.  This is very un-rubbish-lister-ish.
 88 | 
 89 | Batteries Included
 90 | ------------------
 91 | 
 92 | Are batteries included?  Or rather, _what_ batteries are included?  By strange
 93 | coincidence, the batteries that are included are almost exactly the ones
 94 | you'd expect to be useful in bootstrapping a Tamsin-to-C compiler:
 95 | 
 96 | *   `list` module — `reverse`, `append`, `member`, etc.
 97 | *   `tamsin_scanner` module
 98 | *   `tamsin_parser` module
 99 | *   `tamsin_analyzer` module
100 | 


--------------------------------------------------------------------------------
/doc/System_Module.markdown:
--------------------------------------------------------------------------------
  1 | System Module
  2 | -------------
  3 | 
  4 |     -> Tests for functionality "Intepret Tamsin program"
  5 | 
  6 | The module `$` contains a number of built-in productions which would not
  7 | be possible or practical to implement in Tamsin.  See Appendix C for a list.
  8 | 
  9 | In fact, we have been using the `$` module already!  But our usage of it
 10 | has been hidden under some syntactic sugar.  For example, `"k"` is actually...
 11 | 
 12 |     | main = $:expect(k).
 13 |     + k
 14 |     = k
 15 | 
 16 |     | main = $:expect(k).
 17 |     + l
 18 |     ? expected 'k' but found 'l'
 19 | 
 20 | The section about aliases needs to be written too.
 21 | 
 22 | Here's `$:alnum`, which only consumes tokens where the first character is
 23 | alphanumeric.
 24 | 
 25 |     | main = "(" & {$:alnum → A} & ")" & A.
 26 |     + (abc123deefghi459876jklmnopqRSTUVXYZ0)
 27 |     = 0
 28 | 
 29 |     | main = "(" & {$:alnum → A} & ")" & A.
 30 |     + (abc123deefghi459876!jklmnopqRSTUVXYZ0)
 31 |     ? expected ')' but found '!'
 32 | 
 33 | Here's `$:upper`, which only consumes tokens where the first character is
 34 | uppercase alphabetic.
 35 | 
 36 |     | main = "(" & {$:upper → A} & ")" & A.
 37 |     + (ABCDEFGHIJKLMNOPQRSTUVWXYZ)
 38 |     = Z
 39 | 
 40 |     | main = "(" & {$:upper → A} & ")" & A.
 41 |     + (ABCDEFGHIJKLMNoPQRSTUVWXYZ)
 42 |     ? expected ')' but found 'o'
 43 | 
 44 | Here's `$:startswith`, which only consumes tokens which start with
 45 | the given term.  (For a single-character scanner this isn't very
 46 | impressive.)
 47 | 
 48 |     | main = "(" & {$:startswith('A') → A} & ")" & A.
 49 |     + (AAAA)
 50 |     = A
 51 | 
 52 |     | main = "(" & {$:startswith('A') → A} & ")" & A.
 53 |     + (AAAABAAA)
 54 |     ? expected ')' but found 'B'
 55 | 
 56 | Here's `$:mkterm`, which takes an atom and a list and creates a constructor.
 57 | 
 58 |     | main = $:mkterm(atom, list(a, list(b, list(c, nil)))).
 59 |     = atom(a, b, c)
 60 | 
 61 | Here's `$:unquote`, which takes three terms, X, L and R, where L and R
 62 | must be atoms.  If X begins with L and ends with R then the contents
 63 | in-between will be returned as an atom.  Otherwise fails.
 64 | 
 65 |     | main = $:unquote('"hello"', '"', '"').
 66 |     = hello
 67 | 
 68 |     | main = $:unquote('(hello)', '(', ')').
 69 |     = hello
 70 | 
 71 |     | main = $:unquote('(hello)', '(', '"').
 72 |     ? term '(hello)' is not quoted with '(' and '"'
 73 | 
 74 |     | main = $:unquote('(hello)', '[', ')').
 75 |     ? term '(hello)' is not quoted with '[' and ')'
 76 | 
 77 | The quotes can be Unicode characters.
 78 | 
 79 |     | main = $:unquote('“hello”', '“', '”').
 80 |     = hello
 81 | 
 82 | The quotes can be multiple characters.
 83 | 
 84 |     | main = $:unquote('%-hello-%', '%-', '-%').
 85 |     = hello
 86 | 
 87 | The quotes can even be empty strings.
 88 | 
 89 |     | main = $:unquote('hello', '', '').
 90 |     = hello
 91 | 
 92 | Here's `$:equal`, which takes two terms, L and R.  If L and R are equal,
 93 | succeeds and returns that term which they both are.  Otherwise fails.
 94 | 
 95 | Two atoms are equal if their texts are identical.
 96 | 
 97 |     | main = $:equal('hi', 'hi').
 98 |     = hi
 99 | 
100 |     | main = $:equal('hi', 'lo').
101 |     ? term 'hi' does not equal 'lo'
102 | 
103 | Two constructors are equal if their texts are identical, they have the
104 | same number of subterms, and all of their corresponding subterms are equal.
105 | 
106 |     | main = $:equal(hi(there), hi(there)).
107 |     = hi(there)
108 | 
109 |     | main = $:equal(hi(there), lo(there)).
110 |     ? term 'hi(there)' does not equal 'lo(there)'
111 | 
112 |     | main = $:equal(hi(there), hi(here)).
113 |     ? term 'hi(there)' does not equal 'hi(here)'
114 | 
115 |     | main = $:equal(hi(there), hi(there, there)).
116 |     ? term 'hi(there)' does not equal 'hi(there, there)'
117 | 
118 | Here's `$:emit`, which takes an atom and outputs it.  Unlike `print`, which
119 | is meant for debugging, `$:emit` does not append a newline, and is 8-bit-clean.
120 | 
121 |     | main = $:emit('`') & $:emit('wo') & ''.
122 |     = `wo
123 | 
124 |     -> Tests for functionality "Intepret Tamsin program (pre- & post-processed)"
125 | 
126 | `$:emit` is 8-bit-clean: if the atom contains unprintable characters,
127 | `$:emit` does not try to make them readable by UTF-8 or any other encoding.
128 | (`print` may or may not do this, depending on the implementation.)
129 | 
130 |     | main = $:emit('\x00\x01\x02\xfd\xfe\xff') & ''.
131 |     = 000102fdfeff0a
132 | 
133 |     -> Tests for functionality "Intepret Tamsin program"
134 | 
135 | Here's `$:repr`, which takes a term and results in an atom which is the
136 | result of reprifying that term (see section on Terms, above.)
137 | 
138 |     | main = $:repr(hello).
139 |     = hello
140 | 
141 |     | main = $:repr('016fo_oZZ').
142 |     = 016fo_oZZ
143 | 
144 |     | main = $:repr('016fo$oZZ').
145 |     = '016fo$oZZ'
146 | 
147 |     | main = $:repr('').
148 |     = ''
149 | 
150 |     | main = $:repr(' ').
151 |     = ' '
152 | 
153 |     | main = $:repr('016\n016').
154 |     = '016\x0a016'
155 | 
156 |     | main = $:repr(hello(there, world)).
157 |     = hello(there, world)
158 | 
159 |     | main = V ← '♡' & $:repr('□'(there, V)).
160 |     = '\xe2\x96\xa1'(there, '\xe2\x99\xa1')
161 | 
162 |     | main = $:repr(a(b(c('qu\'are\\')))).
163 |     = a(b(c('qu\'are\\')))
164 | 
165 |     | main = $:repr('\x99').
166 |     = '\x99'
167 | 
168 | Here's `$:reverse`, which takes a term E, and a term of the form
169 | `X(a, X(b, ... X(z, E)) ... )`, and returns a term of the form
170 | `X(z, X(y, ... X(a, E)) ... )`.  The constructor tag X is often `cons`
171 | or `pair` or `list` and E is often `nil`.
172 | 
173 |     | main = $:reverse(list(a, list(b, list(c, nil))), nil).
174 |     = list(c, list(b, list(a, nil)))
175 | 
176 | E need not be an atom.
177 | 
178 |     | main = $:reverse(list(a, list(b, list(c, hello(world)))), hello(world)).
179 |     = list(c, list(b, list(a, hello(world))))
180 | 
181 | If the tail of the list isn't E, an error occurs.
182 | 
183 |     | main = $:reverse(list(a, list(b, list(c, hello(world)))), nil).
184 |     ? malformed list
185 | 
186 | If some list constructor doesn't have two children, an error occurs.
187 | 
188 |     | main = $:reverse(list(a, list(b, list(nil))), nil).
189 |     ? malformed list
190 | 
191 | The constructor tag can be anything.
192 | 
193 |     | main = $:reverse(foo(a, foo(b, foo(c, nil))), nil).
194 |     = foo(c, foo(b, foo(a, nil)))
195 | 
196 | But if there is a different constructor somewhere in the list, well,
197 | 
198 |     | main = $:reverse(foo(a, fooz(b, foo(c, nil))), nil).
199 |     ? malformed list
200 | 
201 | You can reverse an empty list.
202 | 
203 |     | main = $:reverse(nil, nil).
204 |     = nil
205 | 
206 | But of course,
207 | 
208 |     | main = $:reverse(nil, zilch).
209 |     ? malformed list
210 | 
211 | This is a shallow reverse.  Embedded lists are not reversed.
212 | 
213 |     | main = $:reverse(list(a, list(list(1, list(2, nil)), list(c, nil))), nil).
214 |     = list(c, list(list(1, list(2, nil)), list(a, nil)))
215 | 
216 | Here's `$:gensym`.
217 | 
218 |     | main = $:gensym('foo').
219 |     = foo1
220 | 
221 |     | main = $:gensym('foo') → F & $:gensym('foo') → G & $:equal(F, G).
222 |     ? 'foo1' does not equal 'foo2'
223 | 
224 | Here's `$:hexbyte`.
225 | 
226 |     | main = $:hexbyte('5', '0').
227 |     = P
228 | 
229 |     | main = $:hexbyte('f', 'f') → C & $:repr(C).
230 |     = '\xff'
231 | 
232 | Here's `$:format_octal`, which makes me feel ill.
233 | 
234 |     | main = $:format_octal('P').
235 |     = 120
236 | 
237 |     | main = $:format_octal('\xff').
238 |     = 377
239 | 
240 | There are never any leading zeroes.
241 | 
242 |     | main = $:format_octal('\n').
243 |     = 12
244 | 
245 | It works on the first byte of the string only.
246 | 
247 |     | main = $:format_octal('«').
248 |     = 302
249 | 
250 | Here's `$:length`, which returns an atom representing the length, in bytes,
251 | of the given term (flattened.)  Note that this is an atom, not an integer,
252 | because Tamsin doesn't even have integers.
253 | 
254 |     | main = $:length(abcde).
255 |     = 5
256 | 
257 |     | main = $:length('').
258 |     = 0
259 | 
260 |     | main = $:length('♥').
261 |     = 3
262 | 
263 |     | main = $:length(a(   b  ,  c  )).
264 |     = 7
265 | 


--------------------------------------------------------------------------------
/doc/TODO.markdown:
--------------------------------------------------------------------------------
  1 | TODO
  2 | ====
  3 | 
  4 | ### C implementation ###
  5 | 
  6 | *   implement buffers in C in libtamsin
  7 | *   implement pattern match in send in C compiler
  8 | 
  9 | ### higher-priority ###
 10 | 
 11 | *   allow switching the kind of buffer that is used when `@` is used:
 12 |     *   `rule @ %stdin` is the default; it is implied when no `@`
 13 |     *   `rule @ %mmap` to use an MmapBuffer
 14 |     *   `rule @ %line` to use a LineEditorBuffer
 15 |     *   `rule @ $:open('file.txt')` ?
 16 | *   `$:add`, `$:sub`, `$:mul`, `$:div`, `$:rem`, for atoms which look like
 17 |     integers: `["-"] & {$:digit}`.
 18 | *   `$:tell` and `$:seek` the implicit buffer — for VM's etc — although
 19 |     note, this may have scary consequences when combined with backtracking
 20 | *   `(foo → S | ok)` & print S ... should set S to error if foo failed?
 21 |     or `(foo |→ S ok)` ?  This is necessary for the meta-circular
 22 |     interpreter: to implement `A | B` we want to interpret `A` and see
 23 |     if it failed or not.  i.e. We want to be able to reify errors...
 24 | 
 25 | ### medium-priority ###
 26 | 
 27 | *   Starting with knowns about `$` builtins, an analysis to determine, for Rule:
 28 |     - may consume input, never consumes input
 29 |     - may fail, always fails
 30 |     - may succeed, always succeeds... (may_backtrack?)
 31 | *   production values
 32 |     *   `$:fold(^production, nil, cons)`
 33 |     *   `$:fold(^($:alnum & " "), '', ^L+','+R)`
 34 | *   codegen and emitter phases in compiler.  take current compiler phase,
 35 |     make it construct a low-level representation instead (codegen), then
 36 |     have a phase that writes out C code from that low-level repr (emitter)
 37 | *   non-backtracking versions of `|` and `{}`:  `|!` and `{}!`
 38 | 
 39 | ### testing ###
 40 | 
 41 | *   test for `''('')`, `'\x00'('\x00')`
 42 | *   document how prod scanners do EOF
 43 | *   tests that `'V'` is not a variable
 44 | *   tests for failing when utf8 scanner hits badly-encoded utf8
 45 | *   tests for invalid escape codes
 46 | *   test for mismatched # of formals in prod branches
 47 | *   document the modules.  in own document.  plus tests.
 48 | 
 49 | ### lower-priority ###
 50 | 
 51 | *   `ctype` module, with `alpha` and `digit` and etc.
 52 | *   `list` module: `deep_reverse`
 53 | *   use Tamsin repr in error messages
 54 | *   __str__ should be Tamsin repr()?
 55 | *   regex-like shortcuts: `\w` for "word", `\s` for "whitespace", etc.
 56 | *   have compiler replace calls to `list` functions
 57 |     by "more efficient" versions written in C -- if they really are...
 58 | *   and maybe even garbage-collect terms in libtamsin
 59 | *   figure out why reading a 4M file in a compiled program TAKES DOWN UBUNTU
 60 | *   make it possible to recover from more errors using `|` (don't throw
 61 |     exceptions so often)
 62 | *   stronger tests for scanner, parser: dump all falderal testbodies to files
 63 | *   option for ref interp to not output result (or by default, don't)
 64 | *   "mini" interpreter that handles variables (ouch)
 65 | *   error handling: skip to next sentinel and report more errors
 66 | *   module-level updatable variables.  or globals.  or "process dictionary"
 67 |     `$:store()` and `$:fetch()`.  or database.
 68 | *   figure out good way to do aliases with the Tamsin-parser-in-Tamsin
 69 |     (dynamic grammar is really more of a Zz thing...)
 70 | *   should be able to import ("open") other modules into your own namespace.
 71 | *   `@` a la Haskell in pattern-match:
 72 |     *   `walk(T@tree(L,R)) = ...`
 73 | *   maps, implemented as hash tables.
 74 |     *   `Table ← {} & fields → F@fields(H,T) & Table[H] ← T`
 75 | *   pretty-print AST for error messages
 76 | 
 77 | ### symbol fun ###
 78 | 
 79 | *   `~` (Lua) for not and `!` (Prolog) for non-backtracking?
 80 | *   lowercase greek letters are variables too!
 81 | *   use `←` instead of `@`, why not?
 82 | *   I'm always typing `prod() → rule` instead of `=`, so why not?
 83 | *   `A;B` — like `&` except assert (statically) that `A` always succeeds
 84 | *   be generous and allow `"xyz"` in term context position?
 85 | *   denotational semantics sugar!  something like...
 86 |     
 87 |         ⟦add α β⟧ = $:add(⟦α⟧, ⟦β⟧).
 88 |     
 89 |     and/or
 90 | 
 91 |         ⟦add α β⟧(σ) = $:add(⟦α⟧(σ), ⟦β⟧(σ)).
 92 |         ⟦var α⟧(σ) = fetch(σ, α).
 93 | 
 94 |     of course, DS is a bit fast-and-loose about actual parsing...
 95 |     but the syntax looks mighty fine.
 96 | 
 97 | ### wild ideas ###    
 98 | 
 99 | *   term-rewriting library; a la Treacle; should make desugarer almost trivial
100 | *   algebraically cool version of `|`, perhaps as a worked example
101 |     (implement Bakerloo in Tamsin)
102 | *   EOF and nil are the same?  it would make sense... call it `end`? (do we?)
103 | *   productions with names with arbitrary characters in them.
104 | *   something like «foo» but foo is the name of a *non*terminal — symbolic
105 |     production references (like Perl's horrible globs as a cheap substitute
106 |     for actual function references or lambdas.)
107 | *   turn system library back into built-in keywords (esp. if : can be used)
108 | *   Tamsin scanner: more liberal (every non-alphanum+_ symbol scans as itself,
109 |     incl. ones that have no meaning currently like `*` and `?`)
110 | *   auto-generate terms from productions, like Rooibos does
111 | *   token classes... somehow.  (then numeric is just a special token class?)
112 |     a token class is just the "call stack" of productions at the time it
113 |     was scanned
114 | *   «» could be an alias w/right sym (`,,`, `„`)
115 |     (still need to scan it specially though)
116 | *   special form that consumes rest of input from the Tamsin source --
117 |     maybe not such a gimmick since micro-tamsin does this
118 | *   feature-testing: `$.exists(module) | do_without_module`
119 | *   ternary: `foo ? bar : baz` — if foo succeeded, do bar, else do baz.
120 |     I don't think this is very necessary because you can usually just say
121 |     `(foo & bar) | baz` — but only if `bar` always succeeds, which it
122 |     usually does (to return something)
123 | 


--------------------------------------------------------------------------------
/doc/Tested_Examples.markdown:
--------------------------------------------------------------------------------
  1 | Tests that used to be in Tamsin's README
  2 | ========================================
  3 | 
  4 |     -> Tests for functionality "Intepret Tamsin program"
  5 | 
  6 | Hello, world!
  7 | 
  8 |     | main = 'Hello, world!'.
  9 |     = Hello, world!
 10 | 
 11 | Make a story more exciting!
 12 | 
 13 |     | main = ("." & '!' | "?" & '?!' | any)/''.
 14 |     + Chapter 1
 15 |     + ---------
 16 |     + It was raining.  She knocked on the door.  She heard
 17 |     + footsteps inside.  The door opened.  The butler peered
 18 |     + out.  "Hello," she said.  "May I come in?"
 19 |     = Chapter 1
 20 |     = ---------
 21 |     = It was raining!  She knocked on the door!  She heard
 22 |     = footsteps inside!  The door opened!  The butler peered
 23 |     = out!  "Hello," she said!  "May I come in?!"
 24 | 
 25 | Parse an algebraic expression for syntactic correctness.
 26 | 
 27 |     | main = (expr0 & eof & 'ok').
 28 |     | expr0 = expr1 & {"+" & expr1}.
 29 |     | expr1 = term & {"*" & term}.
 30 |     | term = "x" | "y" | "z" | "(" & expr0 & ")".
 31 |     + x+y*(z+x+y)
 32 |     = ok
 33 | 
 34 | Parse an algebraic expression to a syntax tree.
 35 | 
 36 |     | main = expr0.
 37 |     | expr0 = expr1 → E1 & {"+" & expr1 → E2 & E1 ← add(E1,E2)} & E1.
 38 |     | expr1 = term → E1 & {"*" & term → E2 & E1 ← mul(E1,E2)} & E1.
 39 |     | term = "x" | "y" | "z" | "(" & expr0 → E & ")" & E.
 40 |     + x+y*(z+x+y)
 41 |     = add(x, mul(y, add(add(z, x), y)))
 42 | 
 43 | Translate an algebraic expression to RPN (Reverse Polish Notation).
 44 | 
 45 |     | main = expr0 → E & walk(E).
 46 |     | expr0 = expr1 → E1 & {"+" & expr1 → E2 & E1 ← add(E1,E2)} & E1.
 47 |     | expr1 = term → E1 & {"*" & term → E2 & E1 ← mul(E1,E2)} & E1.
 48 |     | term = "x" | "y" | "z" | "(" & expr0 → E & ")" & E.
 49 |     | walk(add(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' +'.
 50 |     | walk(mul(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' *'.
 51 |     | walk(X) = return ' '+X.
 52 |     + x+y*(z+x+y)
 53 |     =  x y z x + y + * +
 54 | 
 55 | Reverse a list.
 56 | 
 57 |     | main = reverse(pair(a, pair(b, pair(c, nil))), nil).
 58 |     | reverse(pair(H, T), A) = reverse(T, pair(H, A)).
 59 |     | reverse(nil, A) = A.
 60 |     = pair(c, pair(b, pair(a, nil)))
 61 | 
 62 | Parse and evaluate a Boolean expression.
 63 | 
 64 |     | main = expr0 → E using scanner & eval(E).
 65 |     | expr0 = expr1 → E1 & ("or" & expr1)/E1/or.
 66 |     | expr1 = term → E1 & ("and" & term)/E1/and.
 67 |     | term = "true" | "false" | "(" & expr0 → E & ")" & E.
 68 |     | eval(and(A, B)) = eval(A) → EA & eval(B) → EB & and(EA, EB).
 69 |     | eval(or(A, B)) = eval(A) → EA & eval(B) → EB & or(EA, EB).
 70 |     | eval(X) = X.
 71 |     | and(true, true) = 'true'.
 72 |     | and(A, B) = 'false'.
 73 |     | or(false, false) = 'false'.
 74 |     | or(A, B) = 'true'.
 75 |     | scanner = scan using $:utf8.
 76 |     | scan = {" "} & ("(" | ")" | token).
 77 |     | token = "f" & "a" & "l" & "s" & "e" & 'false'
 78 |     |       | "t" & "r" & "u" & "e" & 'true'
 79 |     |       | "o" & "r" & 'or'
 80 |     |       | "a" & "n" & "d" & 'and'.
 81 |     + (falseortrue)andtrue
 82 |     = true
 83 | 
 84 | Parse a CSV file and write out the 2nd-last field of each record.  Handles
 85 | commas and double-quotes inside quotes.
 86 | 
 87 |     | main = line → L & L ← lines(nil, L) &
 88 |     |        {"\n" & line → M & L ← lines(L, M)} & extract(L) & ''.
 89 |     | line = field → F & {"," & field → G & F ← fields(G, F)} & F.
 90 |     | field = strings | bare.
 91 |     | strings = string → T & {string → S & T ← T + '"' + S} & T.
 92 |     | string = "\"" & (!"\"" & any)/'' → T & "\"" & T.
 93 |     | bare = (!(","|"\n") & any)/''.
 94 |     | extract(lines(Ls, L)) = extract(Ls) & extract_field(L).
 95 |     | extract(L) = L.
 96 |     | extract_field(fields(L, fields(T, X))) = print T.
 97 |     | extract_field(X) = X.
 98 |     + Harold,1850,"21 Baxter Street",burgundy
 99 |     + Smythe,1833,"31 Little Street, St. James",mauve
100 |     + Jones,1791,"41 ""The Gardens""",crimson
101 |     = 21 Baxter Street
102 |     = 31 Little Street, St. James
103 |     = 41 "The Gardens"
104 | 
105 | Evaluate a trivial S-expression-based language.
106 | 
107 |     | main = sexp → S using scanner & reverse(S, nil) → SR & eval(SR).
108 |     | scanner = ({" "} & ("(" | ")" | $:alnum/'')) using $:utf8.
109 |     | sexp = $:alnum | list.
110 |     | list = "(" & sexp/nil/pair → L & ")" & L.
111 |     | head(pair(A, B)) = A.
112 |     | tail(pair(A, B)) = B.
113 |     | cons(A, B) = return pair(A, B).
114 |     | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R).
115 |     | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R).
116 |     | eval(pair(cons, pair(A, pair(B, nil)))) =
117 |     |    eval(A) → AE & eval(B) → BE & return pair(AE, BE).
118 |     | eval(X) = X.
119 |     | reverse(pair(H, T), A) = reverse(H, nil) → HR & reverse(T, pair(HR, A)).
120 |     | reverse(nil, A) = A.
121 |     | reverse(X, A) = X.
122 |     + (head (tail (cons (cons a nil) (cons b nil))))
123 |     = b
124 | 
125 | Escape characters in a string, for use in a C program source.
126 | 
127 |     | main = escaped('"♥\n«"').
128 |     | escaped(S) = escaped_r @ S.
129 |     | escaped_r = A ← '' &
130 |     |     {
131 |     |         "\\" & A ← A + '\\\\'
132 |     |       | "\"" & A ← A + '\\"'
133 |     |       | "\n" & A ← A + '\\n'
134 |     |       | $:alnum → B & A ← A + B
135 |     |       | any → B & (many_format_octal @ B) → B & A ← A + B
136 |     |     } & A.
137 |     | 
138 |     | many_format_octal =
139 |     |     S ← '' &
140 |     |     {any → B & $:format_octal(B) → B & S ← S + '\\' + B} using $:byte &
141 |     |     S.
142 |     = \"\342\231\245\n\302\253\"
143 | 


--------------------------------------------------------------------------------
/eg/alg-expr1.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
3 | 
4 | main = (expr0 & eof & 'ok').
5 | expr0 = expr1 & {"+" & expr1}.
6 | expr1 = term & {"*" & term}.
7 | term = "x" | "y" | "z" | "(" & expr0 & ")".
8 | 


--------------------------------------------------------------------------------
/eg/alg-expr2.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
3 | 
4 | main = expr0.
5 | expr0 = expr1 -> E1 & {"+" & expr1 -> E2 & E1 <- add(E1,E2)} & E1.
6 | expr1 = term -> E1 & {"*" & term -> E2 & E1 <- mul(E1,E2)} & E1.
7 | term = "x" | "y" | "z" | "(" & expr0 -> E & ")" & E.
8 | 


--------------------------------------------------------------------------------
/eg/alg-expr3.tamsin:
--------------------------------------------------------------------------------
 1 | # This example Tamsin program was written by Chris Pressey, and is
 2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
 3 | 
 4 | main = expr0 → E & walk(E).
 5 | expr0 = expr1 → E1 & {"+" & expr1 → E2 & E1 ← add(E1,E2)} & E1.
 6 | expr1 = term → E1 & {"*" & term → E2 & E1 ← mul(E1,E2)} & E1.
 7 | term = "x" | "y" | "z" | "(" & expr0 → E & ")" & E.
 8 | walk(add(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' +'.
 9 | walk(mul(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' *'.
10 | walk(X) = return ' '+X.
11 | 


--------------------------------------------------------------------------------
/eg/backtrack.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
3 | 
4 | main = set E = original &&
5 |          (set E = changed && "0" && "1" | "0" && "2") &
6 |        return E.
7 | 


--------------------------------------------------------------------------------
/eg/bitpair.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
3 | 
4 | main = bit → A & bit → B & return pair(A,B).
5 | bit = "0" | "1".
6 | 


--------------------------------------------------------------------------------
/eg/bits.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
3 | 
4 | main = bit & {"," & bit} & ".".
5 | bit = "0" | "1".
6 | 


--------------------------------------------------------------------------------
/eg/blerf.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
3 | 
4 | main = blerf(tree(tree(tree(a,b),c),d)).
5 | blerf(tree(L,R)) = blerf(L).
6 | blerf(Other) = return Other.
7 | 


--------------------------------------------------------------------------------
/eg/change-buffer.tamsin:
--------------------------------------------------------------------------------
1 | main = one @ 'I process this string until ! where I digress a bit' & ''.
2 | one = {"!" & {any → C & $:emit(C)} @ 'Here I digress' | any → C & $:emit(C)}.
3 | 


--------------------------------------------------------------------------------
/eg/csv_extract.tamsin:
--------------------------------------------------------------------------------
 1 | # This example Tamsin program was written by Chris Pressey, and is
 2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
 3 | 
 4 | main = line → L & L ← lines(nil, L) &
 5 |        {"\n" & line → M & L ← lines(L, M)} & extract(L) & ''.
 6 | line = field → F & {"," & field → G & F ← fields(G, F)} & F.
 7 | field = strings | bare.
 8 | strings = string → T & {string → S & T ← T + '"' + S} & T.
 9 | string = "\"" & T ← '' & {!"\"" & any → S & T ← T + S} & "\"" & T.
10 | bare = T ← '' & {!(","|"\n") & any → S & T ← T + S} & T.
11 | extract(lines(Lines, Line)) = extract(Lines) & extract_field(Line).
12 | extract(L) = L.
13 | extract_field(fields(Last, fields(This, X))) = print This.
14 | extract_field(X) = return X.


--------------------------------------------------------------------------------
/eg/csv_parse.tamsin:
--------------------------------------------------------------------------------
 1 | # This example Tamsin program was written by Chris Pressey, and is
 2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
 3 | 
 4 | main = line & {"\n" & line} & 'ok'.
 5 | line = field → F & {"," & field → G & F ← fields(G, F)} & F.
 6 | field = strings | bare.
 7 | strings = string → T & {string → S & T ← T + '"' + S} & T.
 8 | string = "\"" & T ← '' & {!"\"" & any → S & T ← T + S} & "\"" & T.
 9 | bare = T ← '' & {!(","|"\n") & any → S & T ← T + S} & T.
10 | 


--------------------------------------------------------------------------------
/eg/escape.tamsin:
--------------------------------------------------------------------------------
1 | main = print
2 |        '\n' +
3 |        '\xa0' +
4 |        'r'.
5 | 


--------------------------------------------------------------------------------
/eg/eval-bool-expr.tamsin:
--------------------------------------------------------------------------------
 1 | # This example Tamsin program was written by Chris Pressey, and is
 2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
 3 | 
 4 | main = expr0 → E using scanner & eval(E).
 5 | expr0 = expr1 → E1 & {"or" & expr1 → E2 & E1 ← or(E1,E2)} & E1.
 6 | expr1 = term → E1 & {"and" & term → E2 & E1 ← and(E1,E2)} & E1.
 7 | term = "true" | "false" | "(" & expr0 → E & ")" & E.
 8 | eval(and(A, B)) = eval(A) → EA & eval(B) → EB & and(EA, EB).
 9 | eval(or(A, B)) = eval(A) → EA & eval(B) → EB & or(EA, EB).
10 | eval(X) = X.
11 | and(true, true) = 'true'.
12 | and(A, B) = 'false'.
13 | or(false, false) = 'false'.
14 | or(A, B) = 'true'.
15 | scanner = scan using $:char.
16 | scan = {" "} & ("(" | ")" | token).
17 | token = "f" & "a" & "l" & "s" & "e" & 'false'
18 |       | "t" & "r" & "u" & "e" & 'true'
19 |       | "o" & "r" & 'or'
20 |       | "a" & "n" & "d" & 'and'.
21 | 


--------------------------------------------------------------------------------
/eg/exciting-long.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
3 | 
4 | main = S <- '' & {("." & '!' | "?" & '?!' | any) -> T & S <- S + T} & S.
5 | 


--------------------------------------------------------------------------------
/eg/exciting.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
3 | 
4 | main = ("." & '!' | "?" & '?!' | any)/''.
5 | 


--------------------------------------------------------------------------------
/eg/exciting.txt:
--------------------------------------------------------------------------------
1 | It was raining.  She knocked on the door.  She heard
2 | footsteps inside.  The door opened.  The butler peered
3 | out.  "Hello," she said.  "May I come in?"


--------------------------------------------------------------------------------
/eg/expector.tamsin:
--------------------------------------------------------------------------------
 1 | main = set T = 'foobar' &
 2 |        print T &
 3 |        expect_chars(T).
 4 | 
 5 | # Given a single-character string, return call(prodref('$', 'expect'), S)
 6 | # Given a string, return and(call(prodref('$', 'expect'), head(S)),
 7 | #                            expect_chars(tail(S))).
 8 | 
 9 | expect_chars(S) = print S & expect_chars_r @ S.
10 | expect_chars_r = any → C &
11 |   set E = call(prodref('$', 'expect'), list(atom(C), nil)) &
12 |   ((eof & return E) | (expect_chars_r → R & return and(E, R))).
13 | 


--------------------------------------------------------------------------------
/eg/foobar.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
3 | 
4 | main = print_each_char(fo+ob+ar).
5 | print_each_char(X) = print_each_char_r @ X.
6 | print_each_char_r = any → C & print C & print_each_char_r | return 'ok'.
7 | 


--------------------------------------------------------------------------------
/eg/hello-world.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
3 | 
4 | main = 'Hello, world!'.
5 | 


--------------------------------------------------------------------------------
/eg/list-of-chars.tamsin:
--------------------------------------------------------------------------------
1 | main = any/nil/list.
2 | 


--------------------------------------------------------------------------------
/eg/list-sugar2.tamsin:
--------------------------------------------------------------------------------
1 | main = expr([1,2|3]).
2 | expr([1,2|3]) = "f".
3 | 
4 | 


--------------------------------------------------------------------------------
/eg/modules.tamsin:
--------------------------------------------------------------------------------
 1 | # parses "(0)39".
 2 | 
 3 | stuff {
 4 |     junk = "(" & :return & ")".
 5 |     return = "0".
 6 | }
 7 | main = stuff:junk & :return & :eof.
 8 | return = "3".
 9 | eof = "9".
10 | 


--------------------------------------------------------------------------------
/eg/names.csv:
--------------------------------------------------------------------------------
1 | Harold,1850,"21 Baxter Street",burgundy
2 | Smythe,1833,"31 Little Street, St. James",mauve
3 | Jones,1791,"41 ""The Gardens""",crimson
4 | 


--------------------------------------------------------------------------------
/eg/pipeline.tamsin:
--------------------------------------------------------------------------------
1 | # Demonstrate that Tamsin programs can handle being given a stream on input,
2 | # and producing a stream on output.  (This was not true in versions 0.5 and
3 | # prior)
4 | 
5 | main = {token -> A & whitespace & print A}.
6 | token = S <- '' & {$:alnum -> T & S <- S + T} & return S.
7 | whitespace = {" " | "\n"}.
8 | 


--------------------------------------------------------------------------------
/eg/prod-branches.tamsin:
--------------------------------------------------------------------------------
1 | main = e(1).
2 | e(2) = 'foo'.
3 | e(A) = A.
4 | 
5 | 


--------------------------------------------------------------------------------
/eg/reverse.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
3 | 
4 | main = reverse(pair(a, pair(b, pair(c, nil))), nil).
5 | reverse(pair(H, T), A) = reverse(T, pair(H, A)).
6 | reverse(nil, A) = A.
7 | 


--------------------------------------------------------------------------------
/eg/sexpr-eval.tamsin:
--------------------------------------------------------------------------------
 1 | # This example Tamsin program was written by Chris Pressey, and is
 2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
 3 | 
 4 | main = sexp → S using scanner & reverse(S, nil) → SR & eval(SR).
 5 | scanner = scan using $:utf8.
 6 | scan = {" "} & ("(" | ")" | (T ← '' & {$:alnum → S & T ← T + S} & return T)).
 7 | sexp = $:alnum | list.
 8 | list = "(" & listtail(nil).
 9 | listtail(L) = sexp → S & listtail(pair(S, L)) | ")" & L.
10 | head(pair(A, B)) = return A.
11 | tail(pair(A, B)) = return B.
12 | cons(A, B) = return pair(A, B).
13 | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R).
14 | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R).
15 | eval(pair(cons, pair(A, pair(B, nil)))) =
16 |    eval(A) → AE & eval(B) → BE & return pair(AE, BE).
17 | eval(X) = X.
18 | reverse(pair(H, T), A) = reverse(H, nil) → HR & reverse(T, pair(HR, A)).
19 | reverse(nil, A) = A.
20 | reverse(X, A) = X.
21 | 


--------------------------------------------------------------------------------
/eg/store.tamsin:
--------------------------------------------------------------------------------
 1 | fetch(K1, list(pair(K2, V), T)) = $:equal(K1, K2) & V | fetch(K1, T).
 2 | fetch(K, nil)                   = fail K + ' not found'.
 3 | 
 4 | store(K, V, A)                  = return list(pair(K, V), A).
 5 | 
 6 | main =
 7 |     ST ← nil &
 8 |     store(x, 21, ST) → ST &
 9 |     store(y, 17, ST) → ST &
10 |     store(z, 11, ST) → ST &
11 |     fetch(y, ST).
12 | 
13 | 


--------------------------------------------------------------------------------
/eg/zeroes-concat.tamsin:
--------------------------------------------------------------------------------
1 | main = zeroes.
2 | zeroes = ("0" & zeroes → E & return E + 'Z') | return ''.
3 | 
4 | 


--------------------------------------------------------------------------------
/eg/zeroes.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain.  It comes with NO WARRANTY.
3 | 
4 | main = zeroes.
5 | zeroes = ("0" & zeroes → E & return zero(E)) | return nil.
6 | 


--------------------------------------------------------------------------------
/fixture/bootstrapped.markdown:
--------------------------------------------------------------------------------
 1 | 
 2 |     -> Functionality "Intepret Tamsin program" is implemented by
 3 |     -> shell command
 4 |     -> "bin/bootstrapped-compiler <%(test-body-file) >tmp/foo.c && gcc -Ic_src -Lc_src tmp/foo.c -o tmp/foo -ltamsin && tmp/foo <%(test-input-file)"
 5 | 
 6 |     -> Functionality "Intepret Tamsin program (pre- & post-processed)"
 7 |     -> is implemented by
 8 |     -> shell command "bin/bootstrapped-compiler <%(test-body-file) >tmp/foo.c && gcc -Ic_src -Lc_src tmp/foo.c -o tmp/foo -ltamsin && cat %(test-input-file) | bin/inhex | tmp/foo | bin/hexout"
 9 | 
10 | 


--------------------------------------------------------------------------------
/fixture/compiler.py.markdown:
--------------------------------------------------------------------------------
1 | 
2 |     -> Functionality "Intepret Tamsin program" is implemented by
3 |     -> shell command
4 |     -> "./bin/tamsin loadngo %(test-body-file) < %(test-input-file)"
5 | 
6 |     -> Functionality "Intepret Tamsin program (pre- & post-processed)"
7 |     -> is implemented by
8 |     -> shell command "cat %(test-input-file) | bin/inhex | bin/tamsin loadngo %(test-body-file) | bin/hexout"
9 | 


--------------------------------------------------------------------------------
/fixture/compiler.tamsin.markdown:
--------------------------------------------------------------------------------
 1 | 
 2 |     -> Functionality "Intepret Tamsin program" is implemented by
 3 |     -> shell command
 4 |     -> "bin/tamsin-compiler <%(test-body-file) >tmp/foo.c && gcc -Ic_src -Lc_src tmp/foo.c -o tmp/foo -ltamsin && tmp/foo <%(test-input-file)"
 5 | 
 6 |     -> Functionality "Intepret Tamsin program (pre- & post-processed)"
 7 |     -> is implemented by
 8 |     -> shell command "bin/tamsin-compiler <%(test-body-file) >tmp/foo.c && gcc -Ic_src -Lc_src tmp/foo.c -o tmp/foo -ltamsin && cat %(test-input-file) | bin/inhex | tmp/foo | bin/hexout"
 9 | 
10 | 


--------------------------------------------------------------------------------
/fixture/micro-tamsin.markdown:
--------------------------------------------------------------------------------
1 | 
2 |     -> Functionality "Intepret Tamsin program" is implemented by
3 |     -> shell command
4 |     -> "cat %(test-body-file) > tmp/foz && echo -n '/' >> tmp/foz && cat %(test-input-file) >> tmp/foz && ./bin/micro-tamsin tmp/foz"
5 | 
6 | 


--------------------------------------------------------------------------------
/fixture/mini-tamsin.markdown:
--------------------------------------------------------------------------------
1 | 
2 |     -> Functionality "Intepret Tamsin program" is implemented by
3 |     -> shell command
4 |     -> "cat %(test-body-file) > tmp/foz && echo -n '/' >> tmp/foz && cat %(test-input-file) >> tmp/foz && ./bin/mini-tamsin tmp/foz"
5 | 
6 | 


--------------------------------------------------------------------------------
/fixture/tamsin.py.markdown:
--------------------------------------------------------------------------------
1 | 
2 |     -> Functionality "Intepret Tamsin program" is implemented by
3 |     -> shell command "bin/tamsin %(test-body-file) < %(test-input-file)"
4 | 
5 |     -> Functionality "Intepret Tamsin program (pre- & post-processed)"
6 |     -> is implemented by
7 |     -> shell command "cat %(test-input-file) | bin/inhex | bin/tamsin %(test-body-file) | bin/hexout"
8 | 


--------------------------------------------------------------------------------
/lib/list.tamsin:
--------------------------------------------------------------------------------
 1 | list {
 2 |   reverse(list(H, T), A) = reverse(T, list(H, A)).
 3 |   reverse(nil, A) = A.
 4 | 
 5 |   member(X, nil) = fail 'not a member'.
 6 |   member(X, list(H,T)) =
 7 |     $:equal(X, H) & H | member(X, T).
 8 | 
 9 |   add_elem(X, L) =
10 |     member(X, L) & L | return list(X, L).
11 | 
12 |   union(nil, L2) = L2.
13 |   union(list(H,T), L2) =
14 |     add_elem(H, L2) → L2 &
15 |     union(T, L2).
16 | 
17 |   append(nil, L) = L.
18 |   append(list(H, T), L) =
19 |     append(T, L) → T & return list(H, T).
20 | }
21 | 


--------------------------------------------------------------------------------
/lib/tamsin_analyzer.tamsin:
--------------------------------------------------------------------------------
  1 | # Desugarer for Tamsin AST, written in Tamsin.
  2 | # Distributed under a BSD-style license; see LICENSE.
  3 | 
  4 | tamsin_analyzer {
  5 | 
  6 |   desugar_all(list(H,T)) =
  7 |       desugar(H) → DH &
  8 |       desugar_all(T) → DT &
  9 |       return list(DH, DT).
 10 |   desugar_all(nil) = 'nil'.
 11 | 
 12 |   desugar(program(L)) = desugar_all(L) → DL & return program(DL).
 13 |   desugar(module(N, L)) =
 14 |       desugar_all(L) → DL &
 15 |       merge_prod_branches(DL, nil) → DDL &
 16 |       return module(N, DDL).
 17 |   desugar(production(N, PBs)) =
 18 |       desugar_all(PBs) → DPBs &
 19 |       return production(N, DPBs).
 20 |   desugar(prodbranch(Fs, Ls, B)) =
 21 |       desugar_pattern_all(Fs, 0) → Pair &
 22 |       fst(Pair) → DFs &
 23 |       desugar(B) → DB &
 24 |       return prodbranch(DFs, Ls, DB).
 25 |   desugar(call(PR, Args)) = return call(PR, Args).
 26 |   desugar(or(L, R)) = desugar(L) → DL & desugar(R) → DR & return or(DL, DR).
 27 |   desugar(and(L, R)) = desugar(L) → DL & desugar(R) → DR & return and(DL, DR).
 28 |   desugar(not(X)) = desugar(X) → DX & return not(DX).
 29 |   desugar(while(X)) = desugar(X) → DX & return while(DX).
 30 |   desugar(concat(L, R)) = desugar(L) → DL & desugar(R) → DR & return concat(DL, DR).
 31 |   desugar(using(R, P)) = desugar(R) → DR & return using(DR, P).
 32 |   desugar(on(R, T)) = desugar(R) → DR & desugar(T) → DT & return on(DR, DT).
 33 |   desugar(send(R, V)) = desugar(R) → DR & return send(DR, V).
 34 |   desugar(set(V, T)) = desugar(T) → DT & return set(V, DT).
 35 |   desugar(atom(T)) = return atom(T).
 36 |   desugar(constructor(T, Ts)) = return constructor(T, Ts).
 37 |   desugar(variable(N)) = return variable(N).
 38 |   desugar(fold(R, I, C)) =
 39 |       desugar(R) → DR &
 40 |       SET ← set(variable('_1'), I) &
 41 |       SEND ← send(DR, variable('_2')) &
 42 |       CAT ← concat(variable('_1'), variable('_2')) &
 43 |       ACC ← set(variable('_1'), CAT) &
 44 |       ($:equal(C, nil) |
 45 |           get_tag(C) → Tag &
 46 |           ACC ← set(variable('_1'),
 47 |                      constructor(Tag, list(variable('_2'),
 48 |                                         list(variable('_1'), nil))))) &
 49 |       RET ← call(prodref('$', 'return'), list(variable('_1'), nil)) &
 50 |       return and(and(SET, while(and(SEND, ACC))), RET).
 51 | 
 52 |   desugar_pattern_all(list(H,T), I) =
 53 |       desugar_pattern(H, I) → Pair &
 54 |       fst(Pair) → DH &
 55 |       snd(Pair) → I2 &
 56 |       desugar_pattern_all(T, I2) → Pair &
 57 |       fst(Pair) → DT &
 58 |       snd(Pair) → I3 &
 59 |       return pair(list(DH, DT), I3).
 60 |   desugar_pattern_all(nil, I) = return pair(nil, I).
 61 | 
 62 |   desugar_pattern(atom(T), I) = return pair(atom(T), I).
 63 |   desugar_pattern(constructor(T, Ts), I) =
 64 |       desugar_pattern_all(Ts, I) → Pair &
 65 |       fst(Pair) → DTs &
 66 |       snd(Pair) → I2 &
 67 |       return pair(constructor(T, DTs), I2).
 68 |   desugar_pattern(variable(N), I) =
 69 |       next(I) → I2 &
 70 |       return pair(patternvariable(N, I), I2).
 71 | 
 72 |   fst(pair(A,B)) = A.
 73 |   snd(pair(A,B)) = B.
 74 | 
 75 |   next(0) = '1'.
 76 |   next(1) = '2'.
 77 |   next(2) = '3'.
 78 |   next(3) = '4'.
 79 |   next(4) = '5'.
 80 |   next(5) = '6'.
 81 |   next(6) = '7'.
 82 |   next(7) = '8'.
 83 |   next(8) = '9'.
 84 |   next(9) = '10'.
 85 |   next(10) = '11'.
 86 |   next(11) = '12'.
 87 | 
 88 |   get_tag(atom(T)) = T.
 89 | 
 90 |   fetch(K, list(pair(K2, V), T))  = $:equal(K, K2) & V | fetch(K, T).
 91 |   fetch(K, nil)                   = 'nil'.
 92 |   
 93 |   delete(K, list(pair(K2, V), T)) = $:equal(K, K2) & delete(K, T)
 94 |                                   | delete(K, T) → R & return list(pair(K2, V), R).
 95 |   delete(K, nil)                  = 'nil'.
 96 |   
 97 |   store(K, V, A)                  = delete(K, A) → A2 &
 98 |                                     return list(pair(K, V), A2).
 99 | 
100 |   merge_prod_branches(list(production(N, list(B, nil)),T),Map) =
101 |       fetch(N, Map) → Blist &
102 |       Blist ← list(B, Blist) &
103 |       store(N, Blist, Map) → Map &
104 |       merge_prod_branches(T, Map).
105 |   merge_prod_branches(nil,Map) =
106 |       unmap(Map, nil).
107 | 
108 |   unmap(list(pair(K, V), T), A) =
109 |       list:reverse(V, nil) → RV &
110 |       P ← production(K, RV) &
111 |       A ← list(P, A) &
112 |       unmap(T, A).
113 |   unmap(nil, A) = A.
114 | 
115 |   #####
116 |   # CM = current module name
117 | 
118 |   analyze_all(CM, list(H,T)) =
119 |       analyze(CM, H) → DH &
120 |       analyze_all(CM, T) → DT &
121 |       return list(DH, DT).
122 |   analyze_all(CM, nil) = 'nil'.
123 | 
124 |   analyze(CM, program(L)) =
125 |       analyze_all(CM, L) → DL &
126 |       return program(DL).
127 |   analyze(CM, module(N, L)) =
128 |       analyze_all(N, L) → DL &
129 |       return module(N, DL).
130 |   analyze(CM, production(N, Bs)) =
131 |       analyze_all(CM, Bs) → DBs &
132 |       return production(N, DBs).
133 |   analyze(CM, prodbranch(Fs, Ls, E)) =
134 |       analyze(CM, E) → DE &
135 |       locals(DE, nil) → Ls &
136 |       list:reverse(Ls, nil) → Ls &
137 |       return prodbranch(Fs, Ls, DE).
138 |   analyze(CM, call(PR, As)) =
139 |       analyze(CM, PR) → DPR &
140 |       analyze_all(CM, As) → DAs &
141 |       return call(DPR, DAs).
142 |   analyze(CM, prodref(MN, PN)) =
143 |       $:equal(MN, '') & return prodref(CM, PN)
144 |                       | return prodref(MN, PN).
145 |   analyze(CM, or(L, R)) =
146 |       analyze(CM, L) → DL &
147 |       analyze(CM, R) → DR &
148 |       return or(DL, DR).
149 |   analyze(CM, and(L, R)) =
150 |       analyze(CM, L) → DL &
151 |       analyze(CM, R) → DR &
152 |       return and(DL, DR).
153 |   analyze(CM, not(X)) =
154 |       analyze(CM, X) → DX &
155 |       return not(DX).
156 |   analyze(CM, while(X)) =
157 |       analyze(CM, X) → DX &
158 |       return while(DX).
159 |   analyze(CM, concat(L, R)) =
160 |       analyze(CM, L) → DL &
161 |       analyze(CM, R) → DR &
162 |       return concat(DL, DR).
163 |   analyze(CM, using(R, PR)) =
164 |       analyze(CM, R) → DR &
165 |       analyze(CM, PR) → DPR &
166 |       return using(DR, DPR).
167 |   analyze(CM, on(R, T)) =
168 |       analyze(CM, R) → DR &
169 |       analyze(CM, T) → DT &
170 |       return on(DR, DT).
171 |   analyze(CM, send(R, V)) =
172 |       analyze(CM, R) → DR &
173 |       return send(DR, V).
174 |   analyze(CM, set(V, T)) =
175 |       analyze(CM, T) → DT &
176 |       return set(V, DT).
177 |   analyze(CM, atom(T)) = return atom(T).
178 |   analyze(CM, constructor(T, Ts)) = return constructor(T, Ts).
179 |   analyze(CM, variable(N)) = return variable(N).
180 | 
181 |   #####
182 |   # returns a list of locals
183 | 
184 |   locals(call(PR, As), Ls) =
185 |       Ls.
186 |   locals(or(L, R), Ls) =
187 |       locals(L, Ls) → Ls &
188 |       locals(R, Ls).
189 |   locals(and(L, R), Ls) =
190 |       locals(L, Ls) → Ls &
191 |       locals(R, Ls).
192 |   locals(not(X), Ls) =
193 |       locals(X, Ls).
194 |   locals(while(X), Ls) =
195 |       locals(X, Ls).
196 |   locals(concat(L, R), Ls) =
197 |       locals(L, Ls) → Ls &
198 |       locals(R, Ls).
199 |   locals(using(R, P), Ls) =
200 |       locals(R, Ls).
201 |   locals(on(R, T), Ls) =
202 |       locals(R, Ls) → Ls &
203 |       locals(T, Ls).
204 |   locals(send(R, V), Ls) =
205 |       locals(V, Ls) → Ls &
206 |       locals(R, Ls).
207 |   locals(set(V, T), Ls) =
208 |       locals(V, Ls) → Ls &
209 |       locals(T, Ls).
210 |   locals(atom(T), Ls) = Ls.
211 |   locals(constructor(T, Ts), Ls) =
212 |       locals_all(Ts, Ls).
213 |   locals(variable(N), Ls) =
214 |       list:add_elem(N, Ls).
215 | 
216 |   locals_all(nil, Ls) = Ls.
217 |   locals_all(list(H,T), Ls) =
218 |       locals(H, Ls) → Ls &
219 |       locals_all(T, Ls).
220 | }
221 | 


--------------------------------------------------------------------------------
/lib/tamsin_parser.tamsin:
--------------------------------------------------------------------------------
  1 | # Parse Tamsin source to Tamsin AST, written in Tamsin.
  2 | # Distributed under a BSD-style license; see LICENSE.
  3 | 
  4 | # REQUIRES lib/tamsin_scanner.tamsin
  5 | # REQUIRES lib/list.tamsin
  6 | 
  7 | # Note that this may contain support for some features which are not in
  8 | # the current released or pre-released version.
  9 | 
 10 | tamsin_parser {
 11 |   parse    = grammar using tamsin_scanner:scanner.
 12 |   grammar  = {"@" & pragma & "."} &
 13 |              LM ← nil &
 14 |              LP ← nil &
 15 |              {
 16 |                  production → P & "." & LP ← list(P, LP)
 17 |                  | module → M & LM ← list(M, LM)
 18 |              } &
 19 |              list:reverse(LP, nil) → LP &
 20 |              MM ← module(main, LP) &
 21 |              list:reverse(LM, nil) → LM &
 22 |              ($:equal(LP, nil) | LM ← list(module(main, LP), LM)) &
 23 |              return program(LM).
 24 |   module   = word → N &
 25 |              LP ← nil &
 26 |              "{" &
 27 |              {production → P & "." & LP ← list(P, LP)} &
 28 |              "}" &
 29 |              list:reverse(LP, nil) → LP &
 30 |              return module(N, LP).
 31 |   production = word → N &
 32 |              F ← nil &
 33 |              [formals → F] &
 34 |              "=" &
 35 |              expr0 → E &
 36 |              return production(N, list(prodbranch(F, nil, E), nil)).
 37 |   formals  = L ← nil &
 38 |              "(" &
 39 |              term → T & L ← list(T, L) &
 40 |              {"," & term → T & L ← list(T, L)} &
 41 |              ")" &
 42 |              list:reverse(L, nil) → L &
 43 |              return L
 44 |              | "[" & expr0 & "]".
 45 |   expr0    = expr1 → L & {("|" | "||") & expr1 → R & L ← or(L, R)} & L.
 46 |   expr1    = expr2 → L & {("&" | "&&") & expr2 → R & L ← and(L, R)} & L.
 47 |   expr2    = expr3 → L & ["using" & prodref → P & L ← using(L, P)
 48 |                          | "@" & texpr → T & L ← on(L, T)] & L.
 49 |   expr3    = expr4 → L & [("→" | "->") & variable → V & L ← send(L, V)] & L.
 50 |   expr4    = expr5 → L & ("/" & texpr → T &
 51 |                            ("/" & term → T2 & return fold(L, T, T2)
 52 |                             | return fold(L, T, nil))
 53 |                           | return L).
 54 |   expr5    = "(" & expr0 → E & ")" & E
 55 |            | "[" & expr0 → E & "]" &
 56 |              return or(E, call(prodref('$', return), list(atom(nil), nil)))
 57 |            | "{" & expr0 → E & "}" & return while(E)
 58 |            | "!" & expr5 → E & return not(E)
 59 |            | "set" & variable → V & "=" & texpr → T & return set(V, T)
 60 |            | "return" & texpr → T & return call(prodref('$', return), list(T, nil))
 61 |            | "fail" & texpr → T & return call(prodref('$', fail), list(T, nil))
 62 |            | "print" & texpr → T & return call(prodref('$', print), list(T, nil))
 63 |            | "any" & return call(prodref('$', any), nil)
 64 |            | "eof" & return call(prodref('$', 'eof'), nil)
 65 |            | terminal
 66 |            | variable → V & 
 67 |              (("←" | "<-") & texpr → T & return set(V, T)
 68 |              | return call(prodref('$', return), list(V, nil)))
 69 |            | sq_string → T &
 70 |              $:unquote(T, '\'', '\'') → T &
 71 |              return call(prodref('$', return), list(atom(T), nil))
 72 |            | pq_string → T &
 73 |              $:unquote(T, '“', '”') → T &
 74 |              expect_chars(T) → E &
 75 |              return and(E, call(prodref('$', return), list(atom(T), nil)))
 76 |            | prodref → P &
 77 |              L ← nil &
 78 |              ["(" &
 79 |               texpr → T & L ← list(T, L) &
 80 |               {"," & texpr → T & L ← list(T, L)} &
 81 |               ")"] &
 82 |              list:reverse(L, nil) → L &
 83 |              return call(P, L).
 84 | 
 85 |   texpr    = term → T & {"+" & term → S & T ← concat(T, S)} & T.
 86 |   term     = term0.
 87 |   term0    = variable
 88 |            | "[" & L ← atom(nil) &
 89 |                   [term → T & L ← constructor(list, list(T, list(L, nil))) &
 90 |                   {"," & term → T & L ← constructor(list, list(T, list(L, nil)))}] &
 91 |               Tail ← atom(nil) &
 92 |                   ["|" & term → Tail] &
 93 |               "]" &
 94 |               reverse_c(L, Tail) → L &
 95 |               return L
 96 |            | atom → A & L ← nil & ["(" &
 97 |                                     term0 → T & L ← list(T, L) &
 98 |                                       {"," & term0 → T & L ← list(T, L)} &
 99 |                                     ")"] &
100 |                                     list:reverse(L, nil) → L &
101 |                                     ($:equal(L, nil) & return atom(A)
102 |                                      | return constructor(A, L)).
103 |   atom     = word
104 |            | sq_string → T &
105 |              $:unquote(T, '\'', '\'').
106 | 
107 |   terminal = terminal0 → T & return call(prodref('$', expect), list(T, nil)).
108 |   terminal0 = dq_string → T & $:unquote(T, '"', '"') → T & return atom(T)
109 |            | ("«" | "<<") & texpr → T & ("»" | ">>") & return T.
110 | 
111 |   prodref  = modref → M & ":" & word → P & return prodref(M, P)
112 |            | ":" & word → P & return prodref('', P)
113 |            | word → P & return prodref('', P).
114 |   modref   = "$" | word.
115 |   pragma   = "alias" & word & word & "=" & prodref
116 |            | "unalias" & word.
117 | 
118 |   word = $:alnum.
119 |   variable = $:upper → V & return variable(V).
120 |   sq_string = $:startswith('\'').
121 |   dq_string = $:startswith('"').
122 |   pq_string = $:startswith('“').
123 |   
124 |   ## utility functions on the AST ##
125 | 
126 |   # Given the name of a module and a program AST, return the named
127 |   # module AST found within that program, or fail.
128 | 
129 |   find_module(N, program(Ms)) = find_module(N, Ms).
130 |   find_module(N1, list(module(N2, Ps), T)) =
131 |       $:equal(N1, N2) & return module(N2, Ps) | find_module(N1, T).
132 |   find_module(N, list(H, T)) = find_module(N, T).
133 |   find_module(N, nil) = fail 'no ' + N + ' module'.
134 | 
135 |   # Given the name of a production and a module AST, return the named
136 |   # production AST found within that module, or fail.
137 | 
138 |   find_production(N, module(MN, Ps)) = find_production(N, Ps).
139 |   find_production(N1, list(production(N2, Bs), T)) =
140 |       $:equal(N1, N2) & return production(N2, Bs) | find_production(N1, T).
141 |   find_production(N, list(H, T)) = find_production(N, T).
142 |   find_production(N, nil) = fail 'no ' + N + ' production'.
143 | 
144 |   # Given the name of a module and the name of a production,
145 |   # return the production AST for module:production in the program, or fail.
146 | 
147 |   find_production_global(MN, PN, P) =
148 |       find_module(MN, P) → M & find_production(PN, M).
149 | 
150 |   reverse_c(constructor(list, list(Fst, list(Snd, nil))), Acc) =
151 |       Acc ← constructor(list, list(Fst, list(Acc, nil))) &
152 |       reverse_c(Snd, Acc).
153 |   reverse_c(Other, Acc) = Acc.
154 | 
155 |   # Given a single-character string, return call(prodref('$', 'expect'), S)
156 |   # Given a string, return and(call(prodref('$', 'expect'), head(S)),
157 |   #                            expect_chars(tail(S))).
158 | 
159 |   expect_chars(S) = (expect_chars_r using $:utf8) @ S.
160 |   expect_chars_r = any → C &
161 |     E ← call(prodref('$', 'expect'), list(atom(C), nil)) &
162 |     ((eof & return E) | (expect_chars_r → R & return and(E, R))).
163 | }
164 | 


--------------------------------------------------------------------------------
/lib/tamsin_scanner.tamsin:
--------------------------------------------------------------------------------
 1 | # Scanner for Tamsin tokens, written in Tamsin.
 2 | # Distributed under a BSD-style license; see LICENSE.
 3 | 
 4 | tamsin_scanner {
 5 |   scanner = scan using $:utf8.
 6 |   scan = skippable &
 7 |          (symbol | str('\'', '\'') | str('"', '"') | str('“', '”') | word).
 8 |   symbol = "&" & "&" & '&&'
 9 |          | "|" & "|" & '||'
10 |          | "-" & ">" & '->'
11 |          | "<" & "-" & '<-'
12 |          | "<" & "<" & '<<'
13 |          | ">" & ">" & '>>'
14 |          | "=" | "(" | ")" | "[" | "]" | "{" | "}" | "!" | "|" | "&" | ":"
15 |          | "/" | "," | "." | "@" | "+" | "$" | "→" | "←" | "«" | "»".
16 |   str(O, C) = «O» → T & {("\\" & escape | !«C» & any) → S & T ← T + S} & «C» &
17 |            return T + C.
18 |   escape = "n" & '\n'
19 |          | "r" & '\r'
20 |          | "t" & '\t'
21 |          | "x" & hexdigit → H & hexdigit → L & $:hexbyte(H, L)
22 |          | "\\" & '\\'
23 |          | "'" & '\''
24 |          | "\"" & '"'.
25 |   hexdigit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
26 |              "a" | "b" | "c" | "d" | "e" | "f".
27 |   word = $:alnum → T & { ($:alnum | "_") → S & T ← T + S } & T.
28 |   skippable = {whitespace | comment}.
29 |   whitespace = " " | "\t" | "\r" | "\n".
30 |   comment = "#" & {!"\n" & any} & ("\n" | eof).
31 | }


--------------------------------------------------------------------------------
/mains/analyzer.tamsin:
--------------------------------------------------------------------------------
1 | main = tamsin_parser:parse → AST & tamsin_scanner:skippable & eof &
2 |        tamsin_analyzer:desugar(AST) → AST &
3 |        tamsin_analyzer:analyze(nil, AST) → AST &
4 |        $:repr(AST).
5 | 


--------------------------------------------------------------------------------
/mains/desugarer.tamsin:
--------------------------------------------------------------------------------
1 | main = tamsin_parser:parse → AST & tamsin_scanner:skippable & eof &
2 |        tamsin_analyzer:desugar(AST) → AST &
3 |        $:repr(AST).
4 | 


--------------------------------------------------------------------------------
/mains/grammar.tamsin:
--------------------------------------------------------------------------------
 1 | # Syntax-correctness parser for Tamsin, written in Tamsin.
 2 | # Distributed under a BSD-style license; see LICENSE.
 3 | 
 4 | # REQUIRES lib/tamsin_scanner.tamsin
 5 | 
 6 | # Note that this does not produce any data as a result beyond "yes, it
 7 | # parsed" or "no, there was a syntax error".  This exists to provide a
 8 | # clean, readable grammar.  For actual use, see lib/tamsin_parser.tamsin,
 9 | # which parses a Tamsin program to an AST.
10 | 
11 | # If there is any discrepancy between the language this grammar accepts,
12 | # and the language lib/tamsin_parser.tamsin accepts, lib/tamsin_parser.tamsin
13 | # takes precedence.
14 | 
15 | # Note that this may contain support for some features which are not in
16 | # the current released or pre-released version.
17 | 
18 | main = grammar using tamsin_scanner:scanner.
19 | 
20 | grammar    = {"@" & pragma & "."} &
21 |              {module | production & "."} & eof & 'ok'.
22 | module     = word & "{" & {production & "."} & "}".
23 | production = word & ["(" & term & {"," & term} & ")"
24 |                     | "[" & expr0 & "]"] & "=" & expr0.
25 | expr0      = expr1 & {("|" | "||") & expr1}.
26 | expr1      = expr2 & {("&" | "&&") & expr2}.
27 | expr2      = expr3 & ["using" & prodref | "@" & texpr].
28 | expr3      = expr4 & [("→" | "->") & variable].
29 | expr4      = expr5 & ["/" & texpr & ["/" & term]].
30 | expr5      = "(" & expr0 & ")"
31 |            | "[" & expr0 & "]"
32 |            | "{" & expr0 & "}"
33 |            | "!" & expr5
34 |            | "set" & variable & "=" & texpr
35 |            | "return" & texpr
36 |            | "fail" & texpr
37 |            | "print" & texpr
38 |            | terminal
39 |            | variable & [("←" | "<-") & texpr]
40 |            | sq_string
41 |            | prodref & ["(" & texpr & {"," & texpr} & ")"].
42 | texpr      = term & {"+" & term}.
43 | term       = atom & ["(" & [term & {"," & term}] & ")"]
44 |            | "[" & [term & {"," & term}] & ["|" & term] & "]"
45 |            | variable.
46 | atom       = word | sq_string.
47 | terminal   = dq_string
48 |            | ("«" | "<<") & texpr & ("»" | ">>").
49 | prodref    = modref & ":" & word
50 |            | ":" & word
51 |            | word.
52 | modref     = "$" | word.
53 | pragma     = "alias" & word & word & "=" & prodref
54 |            | "unalias" & word.
55 | 
56 | word = $:alnum.
57 | variable = $:upper.
58 | sq_string = $:startswith('\'').
59 | dq_string = $:startswith('"').
60 | 


--------------------------------------------------------------------------------
/mains/micro-tamsin.tamsin:
--------------------------------------------------------------------------------
 1 | # Interpreter for "Micro-Tamsin", written in Tamsin.
 2 | # (see doc/Micro-Tamsin.markdown.)
 3 | # Distributed under a BSD-style license; see LICENSE.
 4 | 
 5 | # REQUIRES lib/tamsin_scanner.tamsin
 6 | # REQUIRES lib/tamsin_parser.tamsin
 7 | 
 8 | main = tamsin_parser:parse → AST & tamsin_scanner:skippable & "/" &
 9 |        interpret(AST, AST).
10 | 
11 | interpret(P, program(L)) =
12 |     tamsin_parser:find_production_global('main', 'main', P) → Main &
13 |     interpret(P, Main).
14 | 
15 | interpret(P, production(N, list(prodbranch(Fs, Ls, E), nil))) = interpret(P, E).
16 | 
17 | interpret(P, call(prodref('$', 'return'), list(atom(X), nil))) = return X.
18 | interpret(P, call(prodref('$', 'expect'), list(atom(X), nil))) = «X».
19 | interpret(P, call(prodref('$', 'print'), list(atom(X), nil))) = print X.
20 | interpret(P, call(prodref('', N), A)) =
21 |     interpret(P, call(prodref('main', N), A)).
22 | interpret(P, call(prodref(M, N), A)) =
23 |     tamsin_parser:find_production_global(M, N, P) → Prod &
24 |     interpret(P, Prod).
25 | 
26 | interpret(P, or(L, R)) = interpret(P, L) | interpret(P, R).
27 | interpret(P, and(L, R)) = interpret(P, L) & interpret(P, R).
28 | interpret(P, not(X)) = !interpret(P, X).
29 | interpret(P, while(X)) = {interpret(P, X)}.
30 | 


--------------------------------------------------------------------------------
/mains/mini-tamsin.tamsin:
--------------------------------------------------------------------------------
 1 | # Interpreter for "Mini-Tamsin", written in Tamsin.
 2 | # (see doc/Mini-Tamsin.markdown.)
 3 | # Distributed under a BSD-style license; see LICENSE.
 4 | 
 5 | # REQUIRES lib/tamsin_scanner.tamsin
 6 | # REQUIRES lib/tamsin_parser.tamsin
 7 | 
 8 | main = tamsin_parser:parse → AST & tamsin_scanner:skippable & "/" &
 9 |        new_state → S &
10 |        interpret(AST, S, AST).
11 | 
12 | #
13 | # FIXME there are several rather major shortcomings with this, still!
14 | #
15 | 
16 | new_state = return state().
17 | 
18 | #
19 | # interpret(EntireProgram, State, CurrentProgramPart)
20 | # returns a pair(Result, NewState)
21 | #
22 | interpret(P, S, program(L)) =
23 |     tamsin_parser:find_production_global('main', 'main', P) → Main &
24 |     new_state → S &
25 |     interpret(P, S, Main).
26 | 
27 | interpret(P, S, production(N, list(prodbranch(Fs, Ls, E), nil))) =
28 |     interpret(P, S, E).
29 | 
30 | interpret(P, S, call(prodref('$', 'return'), list(atom(X), nil))) =
31 |     return pair(X, S).
32 | 
33 | interpret(P, S, call(prodref('$', 'expect'), list(atom(X), nil))) =
34 |     «X» → R &          # FIXME this isn't going to work if «X» fails, is it.
35 |     return pair(R, S).
36 | 
37 | interpret(P, S, call(prodref('$', 'print'), list(atom(X), nil))) =
38 |     print X &
39 |     return pair(X, S).
40 | 
41 | interpret(P, S, call(prodref('', N), A)) =
42 |     interpret(P, S, call(prodref('main', N), A)).
43 | 
44 | interpret(P, S, call(prodref(M, N), A)) =
45 |     tamsin_parser:find_production_global(M, N, P) → Prod &
46 |     new_state → S2 &
47 |     interpret(P, S2, Prod).
48 | 
49 | interpret(P, S, or(L, R)) =
50 |     interpret(P, S, L) → pair(Res, S2) &
51 |     (Res & return pair(Res, S2)) | interpret(P, S, R).
52 |     # FIXME what happens to S?  I think this is right though
53 | 
54 | interpret(P, S, and(L, R)) =
55 |     interpret(P, S, L) → pair(Res, S2) &
56 |     interpret(P, S2, R).
57 | 
58 | # interpret(P, S, not(X)) = !interpret(P, S, X).
59 | 
60 | interpret(P, S, while(X)) =
61 |     {interpret(P, S, X) → pair(Res, S2) & set S = S2}.
62 | 


--------------------------------------------------------------------------------
/mains/parser.tamsin:
--------------------------------------------------------------------------------
1 | # REQUIRES lib/tamsin_scanner.tamsin
2 | # REQUIRES lib/tamsin_parser.tamsin
3 | 
4 | main = tamsin_parser:parse → P & tamsin_scanner:skippable & eof & $:repr(P).
5 | 


--------------------------------------------------------------------------------
/mains/scanner.tamsin:
--------------------------------------------------------------------------------
1 | # REQUIRES lib/tamsin_scanner.tamsin
2 | 
3 | main = {tamsin_scanner:scan -> T & $:repr(T) -> T & print T} & ''.
4 | 


--------------------------------------------------------------------------------
/src/tamsin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/catseye/Tamsin/cfc9a7270773658a1cddb017aaaf4856939c328c/src/tamsin/__init__.py


--------------------------------------------------------------------------------
/src/tamsin/analyzer.py:
--------------------------------------------------------------------------------
  1 | # encoding: UTF-8
  2 | 
  3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
  4 | # Distributed under a BSD-style license; see LICENSE for more information.
  5 | 
  6 | from tamsin.ast import (
  7 |     Program, Module, Production, ProdBranch,
  8 |     And, Or, Not, While, Call, Send, Set,
  9 |     Using, On, Concat, Prodref,
 10 |     TermNode, VariableNode, PatternVariableNode, AtomNode, ConstructorNode
 11 | )
 12 | from tamsin.term import Term
 13 | from tamsin.event import EventProducer
 14 | 
 15 | 
 16 | class Analyzer(EventProducer):
 17 |     """The Analyzer takes a desugared AST, walks it, and returns a new AST.
 18 |     It is responsible for:
 19 | 
 20 |     * Finding the set of local variable names used in each production and
 21 |       sticking that in the locals_ field of the new Production node.
 22 |     * Resolving any '' modules in Prodrefs to the name of the current
 23 |       module.
 24 | 
 25 |     * Looking for undefined nonterminals and raising an error if such found.
 26 |       (this is done at the end by analyze_prodrefs)
 27 | 
 28 |     TODO: it should also find any locals that are accessed before being set
 29 |     TODO: it should also look for a mismatch in # of formals
 30 |     """
 31 |     def __init__(self, program, listeners=None):
 32 |         self.listeners = listeners
 33 |         self.program = program
 34 |         self.current_module = None
 35 | 
 36 |     def analyze(self, ast):
 37 |         if isinstance(ast, Program):
 38 |             modlist = []
 39 |             for mod in ast.modlist:
 40 |                 mod = self.analyze(mod)
 41 |                 modlist.append(mod)
 42 |             self.program = Program(modlist)
 43 |             self.analyze_prodrefs(self.program)
 44 |             return self.program
 45 |         elif isinstance(ast, Module):
 46 |             self.current_module = ast
 47 |             prodlist = []
 48 |             for prod in ast.prodlist:
 49 |                 prodlist.append(self.analyze(prod))
 50 |             self.current_module = None
 51 |             return Module(ast.name, prodlist)
 52 |         elif isinstance(ast, Production):
 53 |             branches = []
 54 |             for b in ast.branches:
 55 |                 branches.append(self.analyze(b))
 56 |             return Production(ast.name, branches)
 57 |         elif isinstance(ast, ProdBranch):            
 58 |             locals_ = []
 59 |             body = self.analyze(ast.body)
 60 |             self.collect_locals(body, locals_)
 61 |             return ProdBranch(ast.formals, locals_, body)
 62 |         elif isinstance(ast, Or):
 63 |             return Or(self.analyze(ast.lhs), self.analyze(ast.rhs))
 64 |         elif isinstance(ast, And):
 65 |             return And(self.analyze(ast.lhs), self.analyze(ast.rhs))
 66 |         elif isinstance(ast, Using):
 67 |             return Using(self.analyze(ast.rule), self.analyze(ast.prodref))
 68 |         elif isinstance(ast, On):
 69 |             return On(self.analyze(ast.rule), self.analyze(ast.texpr))
 70 |         elif isinstance(ast, Call):
 71 |             return Call(self.analyze(ast.prodref), ast.args)
 72 |         elif isinstance(ast, Send):
 73 |             assert isinstance(ast.pattern, TermNode), ast
 74 |             return Send(self.analyze(ast.rule), self.analyze(ast.pattern))
 75 |         elif isinstance(ast, Set):
 76 |             assert isinstance(ast.variable, VariableNode), ast
 77 |             return Set(ast.variable, self.analyze(ast.texpr))
 78 |         elif isinstance(ast, Not):
 79 |             return Not(self.analyze(ast.rule))
 80 |         elif isinstance(ast, While):
 81 |             return While(self.analyze(ast.rule))
 82 |         elif isinstance(ast, Concat):
 83 |             return Concat(self.analyze(ast.lhs), self.analyze(ast.rhs))
 84 |         elif isinstance(ast, TermNode):
 85 |             return ast
 86 |         elif isinstance(ast, Prodref):
 87 |             module = ast.module
 88 |             if module == '':
 89 |                 module = self.current_module.name
 90 |             new = Prodref(module, ast.name)
 91 |             return new
 92 |         else:
 93 |             raise NotImplementedError(repr(ast))
 94 | 
 95 |     def collect_locals(self, ast, locals_):
 96 |         """locals_ should be a list."""
 97 | 
 98 |         if isinstance(ast, ProdBranch):
 99 |             self.collect_locals(ast.body, locals_)
100 |         elif (isinstance(ast, And) or isinstance(ast, Or) or
101 |               isinstance(ast, Concat)):
102 |             self.collect_locals(ast.lhs, locals_)
103 |             self.collect_locals(ast.rhs, locals_)
104 |         elif isinstance(ast, Using):
105 |             self.collect_locals(ast.rule, locals_)
106 |         elif isinstance(ast, On):
107 |             self.collect_locals(ast.rule, locals_)
108 |             self.collect_locals(ast.texpr, locals_)
109 |         elif isinstance(ast, Call):
110 |             pass
111 |         elif isinstance(ast, Send):
112 |             self.collect_locals(ast.pattern, locals_)
113 |             self.collect_locals(ast.rule, locals_)
114 |         elif isinstance(ast, Set):
115 |             self.collect_locals(ast.variable, locals_)
116 |             self.collect_locals(ast.texpr, locals_)
117 |         elif isinstance(ast, Not) or isinstance(ast, While):
118 |             self.collect_locals(ast.rule, locals_)
119 |         elif isinstance(ast, VariableNode):
120 |             if ast.name not in locals_:
121 |                 locals_.append(ast.name)
122 |         elif isinstance(ast, PatternVariableNode):
123 |             # will probably be needed for Send().  but, not sure.  FIXME
124 |             if ast.name not in locals_:
125 |                 locals_.append(ast.name)
126 |         elif isinstance(ast, ConstructorNode):
127 |             for sub in ast.contents:
128 |                 self.collect_locals(sub, locals_)
129 |         elif isinstance(ast, AtomNode):
130 |             pass
131 |         else:
132 |             raise NotImplementedError(repr(ast))
133 | 
134 |     def analyze_prodrefs(self, ast):
135 |         """does not return anything"""
136 |         if isinstance(ast, Program):
137 |             for mod in ast.modlist:
138 |                 self.analyze_prodrefs(mod)
139 |         elif isinstance(ast, Module):
140 |             for prod in ast.prodlist:
141 |                 self.analyze_prodrefs(prod)
142 |         elif isinstance(ast, Production):
143 |             for b in ast.branches:
144 |                 self.analyze_prodrefs(b)
145 |         elif isinstance(ast, ProdBranch):
146 |             self.analyze_prodrefs(ast.body)
147 |         elif isinstance(ast, Or) or isinstance(ast, And):
148 |             self.analyze_prodrefs(ast.lhs)
149 |             self.analyze_prodrefs(ast.rhs)
150 |         elif isinstance(ast, Using):
151 |             self.analyze_prodrefs(ast.rule)
152 |             self.analyze_prodrefs(ast.prodref)
153 |         elif isinstance(ast, On):
154 |             self.analyze_prodrefs(ast.rule)
155 |         elif isinstance(ast, Call):
156 |             self.analyze_prodrefs(ast.prodref)
157 |         elif isinstance(ast, Send):
158 |             self.analyze_prodrefs(ast.rule)
159 |         elif isinstance(ast, Set):
160 |             pass
161 |         elif isinstance(ast, Not):
162 |             self.analyze_prodrefs(ast.rule)
163 |         elif isinstance(ast, While):
164 |             self.analyze_prodrefs(ast.rule)
165 |         elif isinstance(ast, Concat):
166 |             pass
167 |         elif isinstance(ast, Term):
168 |             pass
169 |         elif isinstance(ast, Prodref):
170 |             assert ast.module != '', repr(ast)
171 |             if ast.module == '$':
172 |                 return # TODO: also check builtins?
173 |             module = self.program.find_module(ast.module)
174 |             if not module:
175 |                 raise KeyError("no '%s' module defined" % ast.module)
176 |             production = module.find_production(ast.name)
177 |             if not production:
178 |                 raise KeyError("no '%s:%s' production defined" %
179 |                     (ast.module, ast.name)
180 |                 )
181 |         else:
182 |             raise NotImplementedError(repr(ast))
183 | 


--------------------------------------------------------------------------------
/src/tamsin/backends/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/catseye/Tamsin/cfc9a7270773658a1cddb017aaaf4856939c328c/src/tamsin/backends/__init__.py


--------------------------------------------------------------------------------
/src/tamsin/buffer.py:
--------------------------------------------------------------------------------
  1 | # encoding: UTF-8
  2 | 
  3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
  4 | # Distributed under a BSD-style license; see LICENSE for more information.
  5 | 
  6 | import sys
  7 | 
  8 | 
  9 | class Buffer(object):
 10 |     """Abstract base class for all Buffer objects.
 11 | 
 12 |     Buffer objects are mutable, but must be capable of saving and restoring
 13 |     their state indefinitely.
 14 | 
 15 |     """
 16 |     def __init__(self, filename='<data>', position=0, line_number=1, column_number=1):
 17 |         """If `position` is given, `line_number` and `column_number` should
 18 |         be given too, to match.
 19 | 
 20 |         """
 21 |         self.filename = filename
 22 |         self.position = position
 23 |         self.line_number = line_number
 24 |         self.column_number = column_number
 25 | 
 26 |     def save_state(self):
 27 |         raise NotImplementedError
 28 | 
 29 |     def restore_state(self):
 30 |         raise NotImplementedError
 31 | 
 32 |     def pop_state(self):
 33 |         raise NotImplementedError
 34 | 
 35 |     def advance(self, inp):
 36 |         """Given a string that we have just consumed from the buffer,
 37 |         return new line_number and column_number.
 38 | 
 39 |         """
 40 |         line_number = self.line_number
 41 |         column_number = self.column_number
 42 |         for char in inp:
 43 |             if char == '\n':
 44 |                 line_number += 1
 45 |                 column_number = 1
 46 |             else:
 47 |                 column_number += 1
 48 |         return (line_number, column_number)
 49 | 
 50 |     def chop(self, amount):
 51 |         """Returns a pair of `amount` characters chopped off the front of
 52 |         the buffer, and a new Buffer object.
 53 | 
 54 |         """
 55 |         raise NotImplementedError
 56 | 
 57 |     def first(self, amount):
 58 |         """Returns a pair of the first `amount` characters in the buffer
 59 |         (without consuming them) and a new Buffer object.
 60 | 
 61 |         """
 62 |         raise NotImplementedError
 63 | 
 64 | 
 65 | class StringBuffer(Buffer):
 66 |     def __init__(self, string, **kwargs):
 67 |         """Create a new StringBuffer object.
 68 | 
 69 |         `string` should be a raw string, not unicode.  If `position` is given,
 70 |         `line_number` and `column_number` should be given too, to match.
 71 | 
 72 |         """
 73 |         assert not isinstance(string, unicode)
 74 |         self.string = string
 75 |         self.stack = []
 76 |         Buffer.__init__(self, **kwargs)
 77 | 
 78 |     def save_state(self):
 79 |         self.stack.append((self.position, self.line_number, self.column_number))
 80 | 
 81 |     def restore_state(self):
 82 |         (self.position, self.line_number, self.column_number) = self.stack.pop()
 83 | 
 84 |     def pop_state(self):
 85 |         self.stack.pop()
 86 | 
 87 |     def __str__(self):
 88 |         return self.string
 89 | 
 90 |     def __repr__(self):
 91 |         return "StringBuffer(%r, filename=%r, position=%r, line_number=%r, column_number=%r)" % (
 92 |             self.string, self.filename, self.position, self.line_number, self.column_number
 93 |         )
 94 | 
 95 |     def chop(self, amount):
 96 |         assert self.position <= len(self.string) - amount, \
 97 |             "attempt made to chop past end of buffer"
 98 |         bytes = self.string[self.position:self.position + amount]
 99 | 
100 |         self.position += amount
101 |         (self.line_number, self.column_number) = self.advance(bytes)
102 | 
103 |         return bytes
104 | 
105 |     def first(self, amount):
106 |         bytes = self.string[self.position:self.position + amount]
107 | 
108 |         return bytes
109 | 
110 | 
111 | class FileBuffer(Buffer):
112 |     def __init__(self, file, **kwargs):
113 |         self.file = file
114 |         # stuff we have read out of the file, but need to keep
115 |         self.pre_buffer = ''
116 |         # the position in the file where we started reading into pre_buffer
117 |         self.pre_position = 0
118 |         self.stack = []
119 |         Buffer.__init__(self, **kwargs)
120 | 
121 |     def save_state(self):
122 |         state = (self.position, self.line_number, self.column_number)
123 |         self.stack.append(state)
124 | 
125 |     def _truncate_pre_buffer(self):
126 |         if not self.stack and self.position > self.pre_position:
127 |             self.pre_buffer = self.pre_buffer[self.position - self.pre_position:]
128 |             self.pre_position = self.position
129 | 
130 |     def restore_state(self):
131 |         state = self.stack.pop()
132 |         (self.position, self.line_number, self.column_number) = state
133 |         self._truncate_pre_buffer()
134 | 
135 |     def pop_state(self):
136 |         self.stack.pop()
137 |         self._truncate_pre_buffer()
138 | 
139 |     def chop(self, amount):
140 |         pos = self.position - self.pre_position
141 |         bytes = self.pre_buffer[pos:pos + amount]
142 |         bytes_to_read = amount - len(bytes)
143 |         if bytes_to_read > 0:
144 |             self.pre_buffer += self.file.read(bytes_to_read)
145 |             bytes = self.pre_buffer[pos:pos + amount]
146 |             #assert len(pre_bytes) == amount   # no, b/c what about EOF?
147 | 
148 |         self.position += amount
149 |         (self.line_number, self.column_number) = self.advance(bytes)
150 |         self._truncate_pre_buffer()
151 |         return bytes
152 | 
153 |     def first(self, amount):
154 |         self.save_state()
155 |         bytes = self.chop(amount)
156 |         self.restore_state()
157 |         return bytes
158 | 


--------------------------------------------------------------------------------
/src/tamsin/codegen.py:
--------------------------------------------------------------------------------
  1 | # encoding: UTF-8
  2 | 
  3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
  4 | # Distributed under a BSD-style license; see LICENSE for more information.
  5 | 
  6 | from tamsin import ast as ack
  7 | from tamsin.ast import AtomNode, VariableNode
  8 | from tamsin.codenode import (
  9 |     CodeNode, Program, Prototype, Subroutine,
 10 |     Block, If, While, And, Not, Return, Builtin, Call, Truth, Falsity,
 11 |     DeclareLocal, GetVar, SetVar, Concat, VariableRef,
 12 |     Unifier, PatternMatch, NoMatch, GetMatchedVar,
 13 |     DeclState, SaveState, RestoreState,
 14 |     MkAtom, MkConstructor,
 15 |     ScannerPushEngine, ScannerPopEngine,
 16 | )
 17 | from tamsin.term import Atom, Constructor, Variable
 18 | import tamsin.sysmod
 19 | 
 20 | 
 21 | # TODO: is this module responsible for allocating names, or is the backend?
 22 | # I think it should probably be this module.
 23 | # On the other hand, backend needs to be able to generate temporaries, too.
 24 | 
 25 | 
 26 | class CodeGen(object):
 27 |     def __init__(self, program):
 28 |         self.program = program
 29 |         self.name_index = 0
 30 | 
 31 |     def new_name(self):
 32 |         name = "var%s" % self.name_index
 33 |         self.name_index += 1
 34 |         return name
 35 | 
 36 |     def generate(self):
 37 |         main = self.program.find_production(ack.Prodref('main', 'main'))
 38 |         if not main:
 39 |             raise ValueError("no 'main:main' production defined")
 40 | 
 41 |         program = Program()
 42 |         for module in self.program.modlist:
 43 |             for prod in module.prodlist:
 44 |                 program.append(
 45 |                     Prototype(module=module, prod=prod, formals=prod.branches[0].formals)
 46 |                 )
 47 | 
 48 |         for module in self.program.modlist:
 49 |             for prod in module.prodlist:
 50 |                 program.append(
 51 |                     self.gen_subroutine(module, prod, prod.branches[0].formals)
 52 |                 )
 53 | 
 54 |         return program
 55 | 
 56 |     def gen_subroutine(self, module, prod, formals):
 57 |         children = []
 58 |         s = Subroutine(module, prod, formals, children)
 59 |         children.append(self.gen_unifier(prod, prod.branches[0]))  # becoming so wrong
 60 |         children.append(self.gen_branches(module, prod, prod.branches))            
 61 |         return s
 62 | 
 63 |     def gen_unifier(self, prod, branch):
 64 |         prod.all_pattern_variables = []
 65 | 
 66 |         pat_names = []
 67 |         for fml_num, formal in enumerate(branch.formals):
 68 |             pat_names.append(self.gen_ast(formal))
 69 | 
 70 |             variables = []
 71 |             formal.collect_variables(variables)
 72 |             for variable in variables:
 73 |                if variable not in prod.all_pattern_variables:
 74 |                    prod.all_pattern_variables.append(variable)
 75 | 
 76 |         return Unifier(prod.all_pattern_variables)
 77 | 
 78 |     def gen_branches(self, module, prod, branches):
 79 |         if not branches:
 80 |             return NoMatch(module=module, prod=prod, formals=[])
 81 |         branch = branches[0]
 82 |         branches = branches[1:]
 83 |         test = Truth()
 84 |         for fml_num in xrange(0, len(branch.formals)):
 85 |             p = PatternMatch()
 86 |             #    self.emit("    term_match_unifier(%s, i%s, unifier) &&" %
 87 |             #        (pat_names[fml_num], fml_num)
 88 |             #    )
 89 |             if not test:
 90 |                 test = p
 91 |             else:
 92 |                 test = And(test, p)
 93 |         return If(test,
 94 |             self.gen_branch(module, prod, branch),
 95 |             self.gen_branches(module, prod, branches)
 96 |         )
 97 | 
 98 |     def gen_branch(self, module, prod, branch):
 99 |         b = Block()
100 | 
101 |         # get variables which are found in patterns for this branch
102 |         for var in prod.all_pattern_variables:
103 |             #self.emit('const struct term *%s = unifier[%s];' %
104 |             #    (var.name, var.index)
105 |             #)
106 |             #self.emit('assert(%s != NULL);' % var.name);
107 |             b.append(GetMatchedVar(var))
108 |         
109 |         all_pattern_variable_names = [x.name for x in prod.all_pattern_variables]
110 |         for local in branch.locals_:
111 |             if local not in all_pattern_variable_names:
112 |                 #self.emit("const struct term *%s;" % local)
113 |                 b.append(DeclareLocal(local))
114 | 
115 |         b.append(self.gen_ast(branch.body))
116 |         return b
117 | 
118 |     def gen_ast(self, ast):
119 |         if isinstance(ast, ack.And):
120 |             return Block(
121 |                 self.gen_ast(ast.lhs),
122 |                 If(GetVar('ok'),
123 |                     self.gen_ast(ast.rhs)
124 |                 )
125 |             )
126 |         elif isinstance(ast, ack.Or):
127 |             return Block(
128 |                 DeclState(),
129 |                 SaveState(),
130 |                 self.gen_ast(ast.lhs),
131 |                 If(Not(GetVar('ok')),
132 |                     Block(
133 |                         RestoreState(),
134 |                         self.gen_ast(ast.rhs)
135 |                     )
136 |                 )
137 |             )
138 |         elif isinstance(ast, ack.Call):
139 |             prodref = ast.prodref
140 |             prodmod = prodref.module or 'main'
141 |             name = prodref.name
142 |             args = ast.args
143 |             if prodmod == '$':
144 |                 c = Builtin(name=name)
145 |                 arity = tamsin.sysmod.arity(name)
146 |                 for i in xrange(0, arity):
147 |                     c.append(self.gen_ast(args[i]))
148 |             else:
149 |                 c = Call(module=prodmod, name=name)
150 |                 for a in args:
151 |                     c.append(self.gen_ast(a))
152 |             return c
153 |         elif isinstance(ast, ack.Send):
154 |             return Block(
155 |                 self.gen_ast(ast.rule),
156 |                 # EMIT PATTERN ... which means generalizing the crap that is
157 |                 # currently in the ProdBranch case up there, way up there ^^^
158 |                 SetVar(self.gen_ast(ast.pattern), GetVar('result'))
159 |             )
160 |         elif isinstance(ast, ack.Set):
161 |             return SetVar(VariableRef(ast.variable.name), self.gen_ast(ast.texpr))
162 |         elif isinstance(ast, ack.While):
163 |             return Block(
164 |                 DeclareLocal('srname', MkAtom('nil')),
165 |                 DeclState(),
166 |                 SetVar(VariableRef('ok'), Truth()),
167 |                 While(GetVar('ok'),
168 |                     Block(
169 |                         SaveState(),
170 |                         self.gen_ast(ast.rule),
171 |                         If(GetVar('ok'),
172 |                             SetVar(VariableRef('srname'), GetVar('result'))
173 |                         )
174 |                     )
175 |                 ),
176 |                 RestoreState(),
177 |                 SetVar(VariableRef('result'), GetVar('srname')),
178 |                 SetVar(VariableRef('ok'), Truth())
179 |             )
180 |         elif isinstance(ast, ack.Not):
181 |             return Block(
182 |                 DeclState(),
183 |                 SaveState(),
184 |                 self.gen_ast(ast.rule),
185 |                 RestoreState(),
186 |                 If(GetVar('ok'),
187 |                     Block(
188 |                         SetVar(VariableRef('ok'), Falsity()),
189 |                         SetVar(VariableRef('result'), MkAtom("expected anything else"))
190 |                     ), Block(
191 |                         SetVar(VariableRef('ok'), Truth()),
192 |                         SetVar(VariableRef('result'), MkAtom("nil"))
193 |                     )
194 |                 )
195 |             )
196 |         elif isinstance(ast, ack.Using):
197 |             return Block(
198 |                 ScannerPushEngine(ast.prodref.module, ast.prodref.name),
199 |                 self.gen_ast(ast.rule),
200 |                 ScannerPopEngine(),
201 |             )
202 |         elif isinstance(ast, ack.On):
203 |             return Block(
204 |                 self.gen_ast(ast.texpr),
205 |                 #flat_name = self.new_name()
206 |                 #self.emit("const struct term *%s = term_flatten(%s);" % (flat_name, name))
207 |                 DeclState(),
208 |                 SaveState(),
209 |                 #self.emit("scanner->buffer = %s->atom;" % flat_name);
210 |                 #self.emit("scanner->size = %s->size;" % flat_name);
211 |                 #self.emit("scanner->position = 0;");
212 |                 #self.emit("scanner->reset_position = 0;");
213 |                 self.gen_ast(ast.rule),
214 |                 RestoreState()
215 |             )
216 |         elif isinstance(ast, ack.Concat):
217 |             lhs = self.gen_ast(ast.lhs)
218 |             rhs = self.gen_ast(ast.rhs)
219 |             name = self.new_name()
220 |             return Concat(name, lhs, rhs)
221 |         elif isinstance(ast, ack.AtomNode):
222 |             return MkAtom(ast.text)
223 |         elif isinstance(ast, ack.VariableNode):
224 |             return VariableRef(ast.name)
225 |         elif isinstance(ast, ack.PatternVariableNode):
226 |             return VariableRef(ast.name)
227 |         elif isinstance(ast, ack.ConstructorNode):
228 |             return MkConstructor(ast.text, [])
229 |         else:
230 |             raise NotImplementedError(repr(ast))
231 | 


--------------------------------------------------------------------------------
/src/tamsin/codenode.py:
--------------------------------------------------------------------------------
  1 | # encoding: UTF-8
  2 | 
  3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
  4 | # Distributed under a BSD-style license; see LICENSE for more information.
  5 | 
  6 | 
  7 | # TODO: some of these are definitely hierarchical, and some are definitely
  8 | # not.  make the distinction.  make the latter more like 3-address-code.
  9 | 
 10 | 
 11 | class CodeNode(object):
 12 |     def __init__(self, *args, **kwargs):
 13 |         self.args = list(args)
 14 |         self.kwargs = kwargs
 15 | 
 16 |     def append(self, item):
 17 |         self.args.append(item)
 18 | 
 19 |     def __getitem__(self, key):
 20 |         if key in self.kwargs:
 21 |             return self.kwargs[key]
 22 |         return self.args[key]
 23 | 
 24 |     def __repr__(self):
 25 |         return "%s(%s%s)" % (
 26 |             self.__class__.__name__,
 27 |             (', '.join([repr(a) for a in self.args]) + ', ') if self.args else '',
 28 |             ', '.join('%s=%r' % (key, self.kwargs[key]) for key in self.kwargs) if self.kwargs else ''
 29 |         )
 30 | 
 31 | 
 32 | class Program(CodeNode):
 33 |     """Represents a target program."""
 34 |     pass
 35 | 
 36 | 
 37 | class Prototype(CodeNode):
 38 |     """Represents a prototype for a subroutine in a target program."""
 39 |     pass
 40 | 
 41 | 
 42 | class Subroutine(CodeNode):
 43 |     """Represents a subroutine in a target program."""
 44 |     def __init__(self, module, prod, formals, children):
 45 |         self.module = module
 46 |         self.prod = prod
 47 |         self.formals = formals
 48 |         self.children = children
 49 | 
 50 |     def __repr__(self):
 51 |         return "Subroutine(%r, %r, %r, %r)" % (
 52 |             self.module, self.prod, self.formals, self.children
 53 |         )
 54 | 
 55 | 
 56 | class Block(CodeNode):
 57 |     pass
 58 | 
 59 | 
 60 | class If(CodeNode):
 61 |     pass
 62 | 
 63 | 
 64 | class While(CodeNode):
 65 |     pass
 66 | 
 67 | 
 68 | class And(CodeNode):
 69 |     pass
 70 | 
 71 | 
 72 | class Not(CodeNode):
 73 |     pass
 74 | 
 75 | 
 76 | class DeclareLocal(CodeNode):
 77 |     pass
 78 | 
 79 | 
 80 | class GetVar(CodeNode):
 81 |     """name is the name of the target-language variable."""
 82 |     def __init__(self, name):
 83 |         self.name = name
 84 | 
 85 |     def __repr__(self):
 86 |         return "GetVar(%r)" % (self.name)
 87 | 
 88 | 
 89 | class SetVar(CodeNode):
 90 |     """ref is a VariableRef for the target-language variable.
 91 |     expr is an expression."""
 92 |     def __init__(self, ref, expr):
 93 |         self.ref = ref
 94 |         self.expr = expr
 95 | 
 96 |     def __repr__(self):
 97 |         return "SetVar(%r, %r)" % (self.ref, self.expr)
 98 | 
 99 | 
100 | class Concat(CodeNode):
101 |     def __init__(self, name, lhs, rhs):
102 |         self.name = name
103 |         self.lhs = lhs
104 |         self.rhs = rhs
105 | 
106 |     def __repr__(self):
107 |         return "Concat(%r, %r, %r)" % (self.name, self.lhs, self.rhs)
108 | 
109 | 
110 | class Unifier(CodeNode):
111 |     pass
112 | 
113 | 
114 | class PatternMatch(CodeNode):
115 |     pass
116 | 
117 | 
118 | class Return(CodeNode):
119 |     pass
120 | 
121 | 
122 | class DeclState(CodeNode):
123 |     pass
124 | 
125 | 
126 | class SaveState(CodeNode):
127 |     pass
128 | 
129 | 
130 | class RestoreState(CodeNode):
131 |     pass
132 | 
133 | 
134 | class Builtin(CodeNode):
135 |     pass
136 | 
137 | 
138 | class Call(CodeNode):
139 |     pass
140 | 
141 | 
142 | class NoMatch(CodeNode):
143 |     pass
144 | 
145 | 
146 | class Truth(CodeNode):
147 |     pass
148 | 
149 | 
150 | class Falsity(CodeNode):
151 |     pass
152 | 
153 | 
154 | class VariableRef(CodeNode):
155 |     pass
156 | 
157 | 
158 | class MkAtom(CodeNode):
159 |     pass
160 | 
161 | 
162 | class MkConstructor(CodeNode):
163 |     """Represents some code in the target program to make a constructor."""
164 |     def __init__(self, text, children):
165 |         self.text = text
166 |         self.children = children
167 | 
168 |     def __repr__(self):
169 |         return "MkConstructor(%r, %r)" % (
170 |             self.text, self.children
171 |         )
172 | 
173 | class ScannerPushEngine(CodeNode):
174 |     pass
175 | 
176 | 
177 | class ScannerPopEngine(CodeNode):
178 |     pass
179 | 
180 | 
181 | class GetMatchedVar(CodeNode):
182 |     pass
183 | 


--------------------------------------------------------------------------------
/src/tamsin/desugarer.py:
--------------------------------------------------------------------------------
  1 | # encoding: UTF-8
  2 | 
  3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
  4 | # Distributed under a BSD-style license; see LICENSE for more information.
  5 | 
  6 | from tamsin.ast import (
  7 |     Program, Module, Production, ProdBranch,
  8 |     And, Or, Not, While, Call, Send, Set,
  9 |     Using, On, Concat, Fold, Prodref,
 10 |     TermNode, VariableNode, PatternVariableNode, AtomNode, ConstructorNode
 11 | )
 12 | from tamsin.event import EventProducer
 13 | 
 14 | 
 15 | class Desugarer(EventProducer):
 16 |     """The Desugarer takes an AST, walks it, and returns a new AST.
 17 |     It is responsible for:
 18 | 
 19 |     * Desugaring Fold() nodes.
 20 |     * Turning the list of Production() nodes into a linked list.
 21 |     * Turning VariableNode() nodes into PatternVariableNodes in a pattern.
 22 | 
 23 |     """
 24 |     def __init__(self, program, listeners=None):
 25 |         self.listeners = listeners
 26 |         self.program = program
 27 |         self.pattern = False
 28 |         self.index = 0
 29 | 
 30 |     def desugar(self, ast):
 31 |         if isinstance(ast, Program):
 32 |             return Program(
 33 |                 [self.desugar(m) for m in ast.modlist]
 34 |             )
 35 |         elif isinstance(ast, Module):
 36 |             prodlist = []
 37 |             
 38 |             def find_prod_pos(name):
 39 |                 i = 0
 40 |                 for prod in prodlist:
 41 |                     if prod.name == name:
 42 |                         return i
 43 |                     i += 1
 44 |                 return None
 45 | 
 46 |             for prod in ast.prodlist:
 47 |                 prod = self.desugar(prod)
 48 |                 pos = find_prod_pos(prod.name)
 49 |                 if pos is None:
 50 |                     prodlist.append(prod)
 51 |                 else:
 52 |                     prodlist[pos].branches.extend(prod.branches)
 53 |             
 54 |             return Module(ast.name, prodlist)
 55 |         elif isinstance(ast, Production):
 56 |             return Production(ast.name, [self.desugar(x) for x in ast.branches])
 57 |         elif isinstance(ast, ProdBranch):
 58 |             self.pattern = True
 59 |             self.index = 0
 60 |             formals = [self.desugar(f) for f in ast.formals]
 61 |             self.pattern = False
 62 |             return ProdBranch(formals, [], self.desugar(ast.body))
 63 |         elif isinstance(ast, Or):
 64 |             return Or(self.desugar(ast.lhs), self.desugar(ast.rhs))
 65 |         elif isinstance(ast, And):
 66 |             return And(self.desugar(ast.lhs), self.desugar(ast.rhs))
 67 |         elif isinstance(ast, Using):
 68 |             return Using(self.desugar(ast.rule), ast.prodref)
 69 |         elif isinstance(ast, On):
 70 |             return On(self.desugar(ast.rule), self.desugar(ast.texpr))
 71 |         elif isinstance(ast, Call):
 72 |             return ast
 73 |         elif isinstance(ast, Send):
 74 |             self.pattern = True
 75 |             pattern = self.desugar(ast.pattern)
 76 |             self.pattern = False
 77 |             return Send(self.desugar(ast.rule), pattern)
 78 |         elif isinstance(ast, Set):
 79 |             return Set(ast.variable, self.desugar(ast.texpr))
 80 |         elif isinstance(ast, Not):
 81 |             return Not(self.desugar(ast.rule))
 82 |         elif isinstance(ast, While):
 83 |             return While(self.desugar(ast.rule))
 84 |         elif isinstance(ast, Concat):
 85 |             return Concat(self.desugar(ast.lhs), self.desugar(ast.rhs))
 86 |         elif isinstance(ast, AtomNode):
 87 |             return ast
 88 |         elif isinstance(ast, ConstructorNode):
 89 |             return ConstructorNode(ast.text,
 90 |                                    [self.desugar(x) for x in ast.contents])
 91 |         elif isinstance(ast, VariableNode):
 92 |             if self.pattern:
 93 |                 index = self.index
 94 |                 self.index += 1
 95 |                 return PatternVariableNode(ast.name, index)
 96 |             return ast
 97 |         elif isinstance(ast, Fold):
 98 |             under1 = VariableNode('_1')
 99 |             under2 = VariableNode('_2')
100 |             set_ = Set(under1, ast.initial)
101 |             send_ = Send(self.desugar(ast.rule), under2)
102 |             acc_ = Set(under1, Concat(under1, under2))
103 |             if ast.tag is not None:
104 |                 assert isinstance(ast.tag, AtomNode)
105 |                 acc_ = Set(under1,
106 |                            ConstructorNode(ast.tag.text,
107 |                                            [under2, under1]))
108 |             return_ = Call(Prodref('$', 'return'), [under1])
109 |             return And(And(set_, While(And(send_, acc_))), return_)
110 |         else:
111 |             raise NotImplementedError(repr(ast))
112 | 


--------------------------------------------------------------------------------
/src/tamsin/event.py:
--------------------------------------------------------------------------------
 1 | # encoding: UTF-8
 2 | 
 3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
 4 | # Distributed under a BSD-style license; see LICENSE for more information.
 5 | 
 6 | import sys
 7 | 
 8 | 
 9 | class EventProducer(object):
10 |     def event(self, tag, *data):
11 |         if self.listeners is None:
12 |             self.listeners = []
13 |         for listener in self.listeners:
14 |             listener.announce(tag, *data)
15 | 
16 |     def subscribe(self, listener):
17 |         if self.listeners is None:
18 |             self.listeners = []
19 |         self.listeners.append(listener)
20 | 
21 | 
22 | class DebugEventListener(object):
23 |     def __init__(self):
24 |         self.indent = 0
25 | 
26 |     def listen_to(self, producer):
27 |         producer.subscribe(self)
28 | 
29 |     def putstr(self, s):
30 |         print (self.indent * '  ' + s)
31 |         sys.stdout.flush()
32 | 
33 |     def announce(self, tag, *data):
34 |         if tag == 'enter_interpreter':
35 |             self.indent += 1
36 |         if tag == 'leave_interpreter':
37 |             self.indent -= 1
38 | 
39 |         # EVERYTHING
40 |         self.putstr("%s %r" % (tag, data))
41 |         for d in data:
42 |             if getattr(d, 'dump', None) is not None:
43 |                 d.dump(self.indent)
44 |         return
45 |          
46 |         if tag in ('enter_interpreter', 'leave_interpreter', 'succeed_or', 'fail_or', 'begin_or'):
47 |             self.putstr("%s %r" % (tag, data))
48 |             return
49 |         elif tag in ('try_literal', 'consume_literal', 'fail_literal'):
50 |             self.putstr("%s %r" % (tag, data))
51 |             data[1].dump(self.indent)
52 |             return
53 |         else:
54 |             return
55 |         ###
56 |         if tag in ('chopped', 'consume', 'scanned'): # ('interpret_ast', 'try_literal'):
57 |             return
58 |         elif tag in ('switched_scanner_forward', 'switched_scanner_back'):
59 |             self.putstr(tag)
60 |             data[0].dump()
61 |             data[1].dump()
62 |         else:
63 |             self.putstr("%s %r" % (tag, data))
64 | 


--------------------------------------------------------------------------------
/src/tamsin/interpreter.py:
--------------------------------------------------------------------------------
  1 | # encoding: UTF-8
  2 | 
  3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
  4 | # Distributed under a BSD-style license; see LICENSE for more information.
  5 | 
  6 | 
  7 | from tamsin.ast import (
  8 |     Production, And, Or, Not, While, Call, Send, Set, Using, On,
  9 |     Prodref, Concat, TermNode
 10 | )
 11 | from tamsin.buffer import StringBuffer
 12 | from tamsin.term import Term, Atom
 13 | from tamsin.event import EventProducer
 14 | from tamsin.scanner import (
 15 |     ByteScannerEngine, UTF8ScannerEngine, ProductionScannerEngine
 16 | )
 17 | import tamsin.sysmod
 18 | 
 19 | 
 20 | class Context(EventProducer):
 21 |     def __init__(self, listeners=None):
 22 |         self.listeners = listeners
 23 |         self.scopes = []
 24 | 
 25 |     def __repr__(self):
 26 |         return "Context(%r)" % (
 27 |             self.scopes
 28 |         )
 29 | 
 30 |     def push_scope(self, purpose):
 31 |         self.scopes.append({})
 32 |         self.event('push_scope', self)
 33 | 
 34 |     def pop_scope(self, purpose):
 35 |         self.scopes.pop()
 36 |         self.event('pop_scope', self)
 37 | 
 38 |     def clone(self):
 39 |         n = Context(listeners=self.listeners)
 40 |         for scope in self.scopes:
 41 |             n.scopes.append(scope.copy())
 42 |         return n
 43 | 
 44 |     def fetch(self, name):
 45 |         self.event('fetch', name,
 46 |             self.scopes[-1].get(name, 'undefined'), self.scopes[-1]
 47 |         )
 48 |         return self.scopes[-1][name]
 49 | 
 50 |     def store(self, name, value):
 51 |         assert(isinstance(value, Term)), "not a Term: %r" % value
 52 |         self.event('store', name,
 53 |             self.scopes[-1].get(name, 'undefined'), value
 54 |         )
 55 |         self.scopes[-1][name] = value
 56 | 
 57 | 
 58 | class Interpreter(EventProducer):
 59 |     def __init__(self, program, scanner, listeners=None):
 60 |         self.listeners = listeners
 61 |         self.program = program
 62 |         self.scanner = scanner
 63 |         self.context = Context(listeners=self.listeners)
 64 | 
 65 |     def __repr__(self):
 66 |         return "Interpreter(%r, %r, %r)" % (
 67 |             self.program, self.scanner, self.context
 68 |         )
 69 | 
 70 |     ### interpreter proper ---------------------------------- ###
 71 | 
 72 |     def interpret_program(self, program):
 73 |         main = program.find_production(Prodref('main', 'main'))
 74 |         if not main:
 75 |             raise ValueError("no 'main:main' production defined")
 76 |         return self.interpret(main)
 77 | 
 78 |     def interpret(self, ast, args=None):
 79 |         """Returns a pair (bool, result) where bool is True if it
 80 |         succeeded and False if it failed.
 81 | 
 82 |         """
 83 |         self.event('interpret_ast', ast)
 84 |         if isinstance(ast, Production):
 85 |             name = ast.name
 86 |             bindings = False
 87 |             branch = None
 88 |             for b in ast.branches:
 89 |                 formals = [self.interpret(f)[1] for f in b.formals]
 90 |                 self.event('call_args', formals, args)
 91 |                 if isinstance(formals, list):
 92 |                     bindings = Term.match_all(formals, args)
 93 |                     self.event('call_bindings', bindings)
 94 |                     if bindings != False:
 95 |                         branch = b
 96 |                         break
 97 |                 # else:
 98 |                 #     self.event('call_newfangled_parsing_args', prod)
 99 |                 #     # start a new scope.  arg bindings will appear here.
100 |                 #     self.context.push_scope(prod.name)
101 |                 #     (success, result) = self.interpret_on_buffer(
102 |                 #         formals, unicode(args[0])
103 |                 #     )
104 |                 #     # we do not want to start a new scope here, and we
105 |                 #     # interpret the rule directly, not the prod.
106 |                 #     if success:
107 |                 #         self.event('begin_interpret_rule', prod.body)
108 |                 #         (success, result) = self.interpret(prod.body)
109 |                 #         self.event('end_interpret_rule', prod.body)
110 |                 #         self.context.pop_scope(prod.name)
111 |                 #         return (success, result)
112 |                 #     else:
113 |                 #         self.context.pop_scope(prod.name)
114 |             if branch is None:
115 |                 raise ValueError("No '%s' production matched arguments %r" %
116 |                     (name, args)
117 |                 )
118 | 
119 |             self.context.push_scope(name)
120 |             if bindings != False:
121 |                 for name in bindings.keys():
122 |                     self.context.store(name, bindings[name])
123 |             self.event('begin_interpret_rule', branch.body)
124 |             assert branch.body, repr(ast)
125 |             (success, result) = self.interpret(branch.body)
126 |             self.event('end_interpret_rule', branch.body)
127 |             self.context.pop_scope(ast.name)
128 | 
129 |             return (success, result)
130 |         elif isinstance(ast, And):
131 |             (success, value_lhs) = self.interpret(ast.lhs)
132 |             if not success:
133 |                 return (False, value_lhs)
134 |             (success, value_rhs) = self.interpret(ast.rhs)
135 |             return (success, value_rhs)
136 |         elif isinstance(ast, Or):
137 |             saved_context = self.context.clone()
138 |             self.scanner.save_state()
139 |             self.event('begin_or', ast.lhs, ast.rhs, saved_context)
140 |             (succeeded, result) = self.interpret(ast.lhs)
141 |             if succeeded:
142 |                 self.event('succeed_or', result)
143 |                 self.scanner.pop_state()
144 |                 return (True, result)
145 |             else:
146 |                 self.event('fail_or', self.context, self.scanner, result)
147 |                 self.context = saved_context
148 |                 self.scanner.restore_state("after or")
149 |                 return self.interpret(ast.rhs)
150 |         elif isinstance(ast, Call):
151 |             prodref = ast.prodref
152 |             name = prodref.name
153 |             args = [self.interpret(x)[1] for x in ast.args]
154 |             args = [x.expand(self.context) for x in args]
155 |             for a in args:
156 |                 assert isinstance(a, Term)
157 |             if prodref.module == '$':
158 |                 return tamsin.sysmod.call(name, self, args)
159 |             prod = self.program.find_production(prodref)
160 |             assert prod is not None, "unresolved: " + repr(prodref)
161 |             self.event('call_candidates', prod)
162 |             return self.interpret(prod, args=args)
163 |         elif isinstance(ast, Send):
164 |             (success, result) = self.interpret(ast.rule)
165 |             #(success, variable) = self.interpret(ast.pattern)  # ... ?
166 |             #self.context.store(variable.name, result)
167 |             formals = [self.interpret(f)[1] for f in [ast.pattern]]
168 |             bindings = Term.match_all(formals, [result])
169 |             if bindings == False:
170 |                 return (False, Atom('nomatch'))
171 |             for name in bindings.keys():
172 |                 self.context.store(name, bindings[name])
173 |             return (success, result)
174 |         elif isinstance(ast, Using):
175 |             sub = ast.rule
176 |             prodref = ast.prodref
177 |             scanner_name = prodref.name
178 |             if prodref.module == '$' and scanner_name == 'byte':
179 |                 new_engine = ByteScannerEngine()
180 |             elif prodref.module == '$' and scanner_name == 'utf8':
181 |                 new_engine = UTF8ScannerEngine()
182 |             else:
183 |                 prod = self.program.find_production(prodref)
184 |                 if not prod:
185 |                     raise ValueError("No such scanner '%s'" % scanner_name)
186 |                 new_engine = ProductionScannerEngine(self, prod)
187 |             self.scanner.push_engine(new_engine)
188 |             self.event('enter_with')
189 |             (succeeded, result) = self.interpret(sub)
190 |             self.event('leave_with', succeeded, result)
191 |             self.scanner.pop_engine()
192 |             return (succeeded, result)
193 |         elif isinstance(ast, On):
194 |             (success, result) = self.interpret(ast.texpr)
195 |             buffer = str(result.expand(self.context))
196 |             self.event('interpret_on_buffer', buffer)
197 |             previous_buffer = self.scanner.get_buffer()
198 |             self.scanner.install_buffer(StringBuffer(buffer))
199 |             (success, result) = self.interpret(ast.rule)
200 |             self.scanner.install_buffer(previous_buffer)
201 |             return (success, result)
202 |         elif isinstance(ast, Set):
203 |             (success, variable) = self.interpret(ast.variable)
204 |             (success, term) = self.interpret(ast.texpr)
205 |             result = term.expand(self.context)
206 |             self.context.store(variable.name, result)
207 |             return (True, result)
208 |         elif isinstance(ast, Not):
209 |             expr = ast.rule
210 |             saved_context = self.context.clone()
211 |             self.scanner.save_state()
212 |             self.event('begin_not', expr, saved_context)
213 |             (succeeded, result) = self.interpret(expr)
214 |             self.context = saved_context
215 |             self.scanner.restore_state("after not")
216 |             if succeeded:
217 |                 return (False, Atom(self.scanner.error_message(
218 |                     "anything else", self.scanner.peek()
219 |                 )))
220 |             else:
221 |                 return (True, Atom('nil'))
222 |         elif isinstance(ast, While):
223 |             result = Atom('nil')
224 |             self.event('begin_while')
225 |             succeeded = True
226 |             successful_result = result
227 |             while succeeded:
228 |                 saved_context = self.context.clone()
229 |                 self.scanner.save_state()
230 |                 (succeeded, result) = self.interpret(ast.rule)
231 |                 if succeeded:
232 |                     self.scanner.pop_state()
233 |                     successful_result = result
234 |                     self.event('repeating_while', result)
235 |                 else:
236 |                     self.scanner.restore_state("after while")
237 |             self.context = saved_context
238 |             self.event('end_while', result)
239 |             return (True, successful_result)
240 |         elif isinstance(ast, Concat):
241 |             (success, lhs) = self.interpret(ast.lhs)
242 |             lhs = str(lhs.expand(self.context))
243 |             (success, rhs) = self.interpret(ast.rhs)
244 |             rhs = str(rhs.expand(self.context))
245 |             return (True, Atom(lhs + rhs))
246 |         elif isinstance(ast, TermNode):
247 |             return (True, ast.to_term())
248 |         else:
249 |             raise NotImplementedError(repr(ast))
250 | 


--------------------------------------------------------------------------------
/src/tamsin/main.py:
--------------------------------------------------------------------------------
  1 | # encoding: UTF-8
  2 | 
  3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
  4 | # Distributed under a BSD-style license; see LICENSE for more information.
  5 | 
  6 | import os
  7 | import subprocess
  8 | import sys
  9 | 
 10 | from tamsin.buffer import FileBuffer, StringBuffer
 11 | from tamsin.event import DebugEventListener
 12 | from tamsin.term import Atom
 13 | from tamsin.scanner import (
 14 |     Scanner, EOF, UTF8ScannerEngine, TamsinScannerEngine
 15 | )
 16 | from tamsin.parser import Parser
 17 | from tamsin.interpreter import Interpreter
 18 | from tamsin.desugarer import Desugarer
 19 | from tamsin.analyzer import Analyzer
 20 | from tamsin.compiler import Compiler  # to be replaced by...
 21 | from tamsin.codegen import CodeGen
 22 | from tamsin.backends.c import Emitter
 23 | 
 24 | 
 25 | def parse(filename):
 26 |     with open(filename, 'r') as f:
 27 |         scanner = Scanner(
 28 |             FileBuffer(f, filename=filename),
 29 |             #StringBuffer(f.read(), filename=filename),
 30 |             engines=(TamsinScannerEngine(),)
 31 |         )
 32 |         parser = Parser(scanner)
 33 |         ast = parser.grammar()
 34 |         desugarer = Desugarer(ast)
 35 |         ast = desugarer.desugar(ast)
 36 |         return ast
 37 | 
 38 | 
 39 | def parse_and_check_args(args):
 40 |     ast = None
 41 |     for arg in args:
 42 |         next_ast = parse(arg)
 43 |         if ast is None:
 44 |             ast = next_ast
 45 |         else:
 46 |             ast.incorporate(next_ast)
 47 |     analyzer = Analyzer(ast)
 48 |     ast = analyzer.analyze(ast)
 49 |     return ast
 50 | 
 51 | 
 52 | def run(ast, listeners=None):
 53 |     scanner = Scanner(
 54 |         FileBuffer(sys.stdin, filename='<stdin>'),
 55 |         #StringBuffer(sys.stdin.read(), filename='<stdin>'),
 56 |         engines=(UTF8ScannerEngine(),),
 57 |         listeners=listeners
 58 |     )
 59 |     interpreter = Interpreter(
 60 |         ast, scanner, listeners=listeners
 61 |     )
 62 |     (succeeded, result) = interpreter.interpret_program(ast)
 63 |     if not succeeded:
 64 |         sys.stderr.write(str(result) + "\n")
 65 |         sys.exit(1)
 66 |     print str(result)
 67 | 
 68 | 
 69 | def main(args, tamsin_dir='.'):
 70 |     listeners = []
 71 |     if args[0] == '--debug':
 72 |         listeners.append(DebugEventListener())
 73 |         args = args[1:]
 74 |     if args[0] == 'scan':
 75 |         with open(args[1], 'r') as f:
 76 |             scanner = Scanner(
 77 |                 FileBuffer(f, filename=args[1]),
 78 |                 engines=(TamsinScannerEngine(),),
 79 |                 listeners=listeners
 80 |             )
 81 |         tok = None
 82 |         while tok is not EOF:
 83 |             tok = scanner.scan()
 84 |             if tok is not EOF:
 85 |                 print Atom(tok).repr()
 86 |         print
 87 |     elif args[0] == 'parse':
 88 |         parser = Parser.for_file(args[1])
 89 |         ast = parser.grammar()
 90 |         print str(ast)
 91 |     elif args[0] == 'desugar':
 92 |         parser = Parser.for_file(args[1])
 93 |         ast = parser.grammar()
 94 |         desugarer = Desugarer(ast)
 95 |         ast = desugarer.desugar(ast)
 96 |         print str(ast)
 97 |     elif args[0] == 'analyze':
 98 |         ast = parse_and_check_args(args[1:])
 99 |         print str(ast)
100 |     elif args[0] == 'compile':
101 |         ast = parse_and_check_args(args[1:])
102 |         compiler = Compiler(ast, sys.stdout)
103 |         compiler.compile()
104 |     elif args[0] == 'codegen':
105 |         ast = parse_and_check_args(args[1:])
106 |         generator = CodeGen(ast)
107 |         result = generator.generate()
108 |         emitter = Emitter(result, sys.stdout)
109 |         emitter.go()        
110 |     elif args[0] == 'doublecompile':
111 |         # http://www.youtube.com/watch?v=6WxJECOFg8w
112 |         ast = parse_and_check_args(args[1:])
113 |         c_filename = 'foo.c'
114 |         exe_filename = './foo'
115 |         with open(c_filename, 'w') as f:
116 |             compiler = Compiler(ast, f)
117 |             compiler.compile()
118 |         c_src_dir = os.path.join(tamsin_dir, 'c_src')
119 |         command = ("gcc", "-g", "-I%s" % c_src_dir, "-L%s" % c_src_dir,
120 |                    c_filename, "-o", exe_filename, "-ltamsin")
121 |         try:
122 |             subprocess.check_call(command)
123 |             exit_code = 0
124 |         except subprocess.CalledProcessError:
125 |             exit_code = 1
126 |         #subprocess.call(('rm', '-f', c_filename))
127 |         sys.exit(exit_code)
128 |     elif args[0] == 'loadngo':
129 |         ast = parse_and_check_args(args[1:])
130 |         c_filename = 'foo.c'
131 |         exe_filename = './foo'
132 |         with open(c_filename, 'w') as f:
133 |             compiler = Compiler(ast, f)
134 |             compiler.compile()
135 |         c_src_dir = os.path.join(tamsin_dir, 'c_src')
136 |         command = ("gcc", "-g", "-I%s" % c_src_dir, "-L%s" % c_src_dir,
137 |                    c_filename, "-o", exe_filename, "-ltamsin")
138 |         try:
139 |             subprocess.check_call(command)
140 |             subprocess.check_call((exe_filename,))
141 |             exit_code = 0
142 |         except subprocess.CalledProcessError:
143 |             exit_code = 1
144 |         subprocess.call(('rm', '-f', c_filename, exe_filename))
145 |         sys.exit(exit_code)
146 |     else:
147 |         ast = parse_and_check_args(args)
148 |         run(ast, listeners=listeners)
149 | 


--------------------------------------------------------------------------------
/src/tamsin/scanner.py:
--------------------------------------------------------------------------------
  1 | # encoding: UTF-8
  2 | 
  3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
  4 | # Distributed under a BSD-style license; see LICENSE for more information.
  5 | 
  6 | from tamsin.buffer import Buffer
  7 | from tamsin.event import EventProducer
  8 | from tamsin.term import Term
  9 | 
 10 | 
 11 | EOF = object()
 12 | 
 13 | 
 14 | class Scanner(EventProducer):
 15 |     def __init__(self, buffer, engines=None, listeners=None):
 16 |         """Create a new Scanner object.
 17 | 
 18 |         """
 19 |         self.listeners = listeners
 20 |         self.event('set_buffer', buffer)
 21 |         assert isinstance(buffer, Buffer)
 22 |         self.buffer = buffer
 23 |         self.engines = []
 24 |         if engines is not None:
 25 |             for engine in engines:
 26 |                 self.push_engine(engine)
 27 | 
 28 |     def __repr__(self):
 29 |         return "Scanner(%r, position=%r)" % (
 30 |             self.buffer, self.position
 31 |         )
 32 | 
 33 |     def get_buffer(self):
 34 |         """Returns an object which represents the current Buffer of this
 35 |         Scanner.
 36 | 
 37 |         """
 38 |         return self.buffer
 39 | 
 40 |     def install_buffer(self, state):
 41 |         """Restores the Buffer of this Scanner to that which was saved by
 42 |         a previous call to get_buffer().
 43 | 
 44 |         """
 45 |         self.buffer = state
 46 | 
 47 |     def push_engine(self, engine):
 48 |         self.engines.append(engine)
 49 | 
 50 |     def pop_engine(self):
 51 |         engine = self.engines.pop()
 52 | 
 53 |     def save_state(self):
 54 |         return self.buffer.save_state()
 55 | 
 56 |     def restore_state(self, reason):
 57 |         return self.buffer.restore_state()
 58 | 
 59 |     def pop_state(self):
 60 |         return self.buffer.pop_state()
 61 | 
 62 |     def chop(self, amount):
 63 |         """Returns amount characters from the buffer and advances the
 64 |         scan position by amount.
 65 | 
 66 |         Should only be used by ScannerEngines.
 67 | 
 68 |         """
 69 |         return self.buffer.chop(amount)
 70 | 
 71 |     def first(self, amount):
 72 |         """Returns amount characters from the buffer.  Does not advance the
 73 |         scan position.
 74 | 
 75 |         Should only be used by ScannerEngines, and then only in error
 76 |         reporting.
 77 | 
 78 |         """
 79 |         return self.buffer.first(amount)
 80 | 
 81 |     def is_at_eof(self):
 82 |         """Returns True iff there is no more input to scan.
 83 | 
 84 |         Should only be used by ScannerEngines.  Parsing code should check
 85 |         to see if ... something
 86 | 
 87 |         """
 88 |         return self.first(1) == ''
 89 | 
 90 |     def is_at_utf8(self):
 91 |         """Returns the number of bytes following that comprise a UTF-8
 92 |         character.  Will be 0 for non-UTF-8 characters.
 93 | 
 94 |         Should only be used by ScannerEngines.
 95 | 
 96 |         """
 97 |         k = ord(self.first(1))
 98 |         if k & 0b11100000 == 0b11000000:
 99 |             return 2
100 |         elif k & 0b11110000 == 0b11100000:
101 |             return 3
102 |         elif k & 0b11111000 == 0b11110000:
103 |             return 4
104 |         else:
105 |             return 0
106 | 
107 |     def startswith(self, strings):
108 |         for s in strings:
109 |             if self.first(len(s)) == s:
110 |                 return True
111 |         return False
112 | 
113 |     def isalnum(self):
114 |         return self.first(1).isalnum()
115 | 
116 |     def error_message(self, expected, found):
117 |         if found is EOF:
118 |             found = 'EOF'
119 |         else:
120 |             found = "'%s'" % found
121 |         return (
122 |             "expected %s but found %s at line %s, column %s in '%s'" %
123 |             (expected, found,
124 |              self.buffer.line_number,
125 |              self.buffer.column_number,
126 |              self.buffer.filename)
127 |         )
128 | 
129 |     def error(self, expected, found):
130 |         raise ValueError(self.error_message(expected, found))
131 | 
132 |     def scan(self):
133 |         """Returns the next token from the buffer.
134 | 
135 |         This method consumes the token.  If you want to just see
136 |         what the next token would be, call peek() instead.
137 | 
138 |         The returned token will always be a raw string, possibly
139 |         containing UTF-8 sequences, possibly not.
140 | 
141 |         """
142 |         token = self.engines[-1].scan_impl(self)
143 |         #import sys
144 |         #print >>sys.stderr, token
145 |         assert not isinstance(token, unicode), repr(token)
146 |         self.event('scanned', self, token)
147 |         return token
148 | 
149 |     def peek(self):
150 |         self.buffer.save_state()
151 |         token = self.scan()
152 |         self.buffer.restore_state()
153 |         return token
154 | 
155 |     def consume(self, t):
156 |         if isinstance(t, unicode):
157 |             t = t.encode('UTF-8')
158 |         assert not isinstance(t, unicode)
159 |         self.event('consume', t)
160 |         self.buffer.save_state()
161 |         s = self.scan()
162 |         if s == t:
163 |             self.buffer.pop_state()
164 |             return t
165 |         else:
166 |             self.buffer.restore_state()
167 |             return None
168 | 
169 |     def expect(self, t):
170 |         r = self.consume(t)
171 |         if r is None:
172 |             self.error("'%s'" % t, self.scan())
173 |         return r
174 |     
175 |     def dump(self, indent=1):
176 |         print "==" * indent + "%r" % self
177 |         print "--" * indent + "engines: %r" % repr(self.engines)
178 |         print "--" * indent + "buffer: %r" % self.buffer
179 | 
180 | 
181 | class ScannerEngine(object):
182 |     def scan_impl(self, scanner):
183 |         """Should always return a non-Unicode string."""
184 |         raise NotImplementedError
185 | 
186 | 
187 | CLOSE_QUOTE = {
188 |     '"': '"',
189 |     '\'': '\'',
190 | }
191 | 
192 | ESCAPE_SEQUENCE = {
193 |     'r': "\r",
194 |     'n': "\n",
195 |     't': "\t",
196 |     "'": "'",
197 |     '"': '"',
198 |     '\\': '\\',
199 | }
200 | 
201 | 
202 | class TamsinScannerEngine(ScannerEngine):
203 |     def scan_impl(self, scanner):
204 |         while not scanner.is_at_eof() and scanner.startswith(('#', ' ', '\t', '\r', '\n')):
205 |             while not scanner.is_at_eof() and scanner.startswith((' ', '\t', '\r', '\n')):
206 |                 scanner.chop(1)
207 |             while not scanner.is_at_eof() and scanner.startswith(('#',)):
208 |                 while not scanner.is_at_eof() and not scanner.startswith(('\n',)):
209 |                     scanner.chop(1)
210 |                 if not scanner.is_at_eof():
211 |                     scanner.chop(1)
212 | 
213 |         if scanner.is_at_eof():
214 |             return EOF
215 | 
216 |         if scanner.startswith(('&&', '||', '->', '<-', '<<', '>>')):
217 |             return scanner.chop(2)
218 | 
219 |         c = scanner.is_at_utf8()
220 |         if c > 0:
221 |             c = scanner.chop(c).decode('UTF-8')
222 |             if c in (u'→', u'←', u'«', u'»'):
223 |                 return c.encode('UTF-8')
224 |             elif c == u'“':
225 |                 return self.consume_quoted(scanner,
226 |                     u'“'.encode('UTF-8'), u'”'.encode('UTF-8')
227 |                 )
228 |             else:
229 |                 scanner.error('identifiable character', scanner.first(1))
230 | 
231 |         if scanner.startswith(('=', '(', ')', '[', ']', '{', '}', '!', ':', '/',
232 |                             '|', '&', ',', '.', '@', '+', '$',
233 |                             )):
234 |             return scanner.chop(1)
235 | 
236 |         for quote in (CLOSE_QUOTE.keys()):
237 |             if scanner.startswith(quote):
238 |                 scanner.chop(len(quote))
239 |                 return self.consume_quoted(scanner, quote, CLOSE_QUOTE[quote])
240 | 
241 |         if scanner.isalnum():
242 |             token = ''
243 |             while not scanner.is_at_eof() and (scanner.isalnum() or
244 |                                                scanner.startswith(('_',))):
245 |                 token += scanner.chop(1)
246 |             return token
247 | 
248 |         scanner.error('identifiable character', scanner.first(1))
249 | 
250 |     def consume_quoted(self, scanner, quote, close_quote):
251 |         # assumes the start quote has already been chopped
252 |         token = quote
253 |         while (not scanner.is_at_eof() and
254 |                not scanner.startswith(close_quote)):
255 |             char = scanner.chop(1)
256 |             if char == '\\':
257 |                 char = scanner.chop(1)
258 |                 if char in ESCAPE_SEQUENCE:
259 |                     char = ESCAPE_SEQUENCE[char]
260 |                 elif char == 'x':
261 |                     char = chr(int(scanner.chop(2), 16))
262 |                 else:
263 |                     scanner.error('legal escape sequence', '\\' + char)
264 |             token += char
265 |         scanner.chop(len(close_quote))  # chop ending quote
266 |         # we add the specific close quote we expect, in case it was EOF
267 |         token += close_quote
268 |         return token
269 | 
270 | 
271 | class UTF8ScannerEngine(ScannerEngine):
272 |     def scan_impl(self, scanner):
273 |         if scanner.is_at_eof():
274 |             return EOF
275 |         c = scanner.is_at_utf8()
276 |         if c > 0:
277 |             return scanner.chop(c)
278 |         return scanner.chop(1)
279 | 
280 | 
281 | class ByteScannerEngine(ScannerEngine):
282 |     def scan_impl(self, scanner):
283 |         if scanner.is_at_eof():
284 |             return EOF
285 |         return scanner.chop(1)
286 | 
287 | 
288 | class ProductionScannerEngine(ScannerEngine):
289 |     """A ScannerEngine that uses a production of the Tamsin program to
290 |     scan the input.
291 | 
292 |     """
293 |     def __init__(self, interpreter, production):
294 |         self.interpreter = interpreter
295 |         self.production = production
296 | 
297 |     def scan_impl(self, scanner):
298 |         if scanner.is_at_eof():
299 |             return EOF
300 | 
301 |         # This will cause the scanner to have another engine pushed onto
302 |         # it.  We rely on that engine to actually get us the token, and it
303 |         # will update the scanner for us.
304 | 
305 |         assert scanner is self.interpreter.scanner
306 | 
307 |         # default to this so you don't shoot yourself in the foot
308 |         scanner.push_engine(UTF8ScannerEngine())
309 | 
310 |         result = self.interpreter.interpret(self.production)
311 |         (success, token) = result
312 | 
313 |         scanner.pop_engine()
314 | 
315 |         if success:
316 |             self.interpreter.event('production_scan', self.production, token)
317 |             assert isinstance(token, Term), repr(token)
318 |             if token is EOF:
319 |                 return token
320 |             return str(token)
321 |         else:
322 |             return EOF
323 | 


--------------------------------------------------------------------------------
/src/tamsin/sysmod.py:
--------------------------------------------------------------------------------
  1 | # encoding: UTF-8
  2 | 
  3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
  4 | # Distributed under a BSD-style license; see LICENSE for more information.
  5 | 
  6 | # Python version of Tamsin's $ module.
  7 | 
  8 | import sys
  9 | 
 10 | from tamsin.term import Atom, Constructor
 11 | from tamsin.scanner import EOF
 12 | 
 13 | 
 14 | TRANSLATOR = {'return': 'return_', 'print': 'print_'}
 15 | 
 16 | 
 17 | def call(name, interpreter, args):
 18 |     name = TRANSLATOR.get(name, name)
 19 |     if name not in globals():
 20 |         raise NotImplementedError(name)
 21 |     return globals()[name](interpreter, args)
 22 | 
 23 | 
 24 | def arity(name):
 25 |     name = TRANSLATOR.get(name, name)
 26 |     if name not in globals():
 27 |         raise NotImplementedError(name)
 28 |     return globals()[name].arity
 29 | 
 30 | 
 31 | def return_(self, args):
 32 |     return (True, args[0])
 33 | return_.arity = 1
 34 | 
 35 | 
 36 | def fail(self, args):
 37 |     return (False, args[0])
 38 | fail.arity = 1
 39 | 
 40 | 
 41 | def expect(self, args):
 42 |     upcoming_token = self.scanner.peek()
 43 |     term = args[0]
 44 |     token = str(term)
 45 |     if self.scanner.consume(token):
 46 |         return (True, term)
 47 |     else:
 48 |         return (False,
 49 |             Atom(self.scanner.error_message("'%s'" % token, upcoming_token))
 50 |         )
 51 | expect.arity = 1
 52 | 
 53 | 
 54 | def eof(self, args):
 55 |     if self.scanner.peek() is EOF:
 56 |         return (True, '')
 57 |     else:
 58 |         return (False,
 59 |             Atom(self.scanner.error_message('EOF', self.scanner.peek()))
 60 |         )
 61 | eof.arity = 0
 62 | 
 63 | 
 64 | def any(self, args):
 65 |     if self.scanner.peek() is not EOF:
 66 |         return (True, Atom(self.scanner.scan()))
 67 |     else:
 68 |         return (False,
 69 |             Atom(self.scanner.error_message('any token', EOF))
 70 |         )
 71 | any.arity = 0
 72 | 
 73 | 
 74 | def alnum(self, args):
 75 |     if (self.scanner.peek() is not EOF and
 76 |         self.scanner.peek()[0].isalnum()):
 77 |         return (True, Atom(self.scanner.scan()))
 78 |     else:
 79 |         return (False,
 80 |             Atom(self.scanner.error_message('alphanumeric', self.scanner.peek()))
 81 |         )
 82 | alnum.arity = 0
 83 | 
 84 | 
 85 | def upper(self, args):
 86 |     if (self.scanner.peek() is not EOF and
 87 |         self.scanner.peek()[0].isupper()):
 88 |         return (True, Atom(self.scanner.scan()))
 89 |     else:
 90 |         return (False,
 91 |             Atom(self.scanner.error_message('uppercase', self.scanner.peek()))
 92 |         )
 93 | upper.arity = 0
 94 | 
 95 | 
 96 | def startswith(self, args):
 97 |     if (self.scanner.peek() is not EOF and
 98 |         self.scanner.peek()[0].startswith((str(args[0]),))):
 99 |         return (True, Atom(self.scanner.scan()))
100 |     else:
101 |         return (False,
102 |             Atom(self.scanner.error_message("'%s...'" % args[0], self.scanner.peek()))
103 |         )
104 | startswith.arity = 1
105 | 
106 | 
107 | def equal(self, args):
108 |     if args[0].match(args[1]) != False:
109 |         return (True, args[0])
110 |     else:
111 |         return (False, Atom("term '%s' does not equal '%s'" %
112 |                             (args[0], args[1])))
113 | equal.arity = 2
114 | 
115 | 
116 | def unquote(self, args):
117 |     q = str(args[0])
118 |     l = str(args[1])
119 |     r = str(args[2])
120 |     if (q.startswith(l) and q.endswith(r)):
121 |         if len(r) == 0:
122 |             return (True, Atom(q[len(l):]))
123 |         return (True, Atom(q[len(l):-len(r)]))
124 |     else:
125 |         return (False, Atom("term '%s' is not quoted with '%s' and '%s'" %
126 |                             (q, l, r)))
127 | unquote.arity = 3
128 | 
129 | 
130 | def mkterm(self, args):
131 |     t = args[0]
132 |     l = args[1]
133 |     contents = []
134 |     while isinstance(l, Constructor) and l.tag == 'list':
135 |         contents.append(l.contents[0])
136 |         l = l.contents[1]
137 |     if contents:
138 |         return (True, Constructor(t.text, contents))
139 |     else:
140 |         return (True, t)
141 | mkterm.arity = 2
142 | 
143 | 
144 | def reverse(self, args):
145 |     return (True, args[0].reversed(args[1]))
146 | reverse.arity = 2
147 | 
148 | 
149 | def print_(self, args):
150 |     val = args[0]
151 |     sys.stdout.write(str(val))
152 |     sys.stdout.write("\n")
153 |     return (True, val)
154 | print_.arity = 1
155 | 
156 | 
157 | def emit(self, args):
158 |     val = args[0]
159 |     sys.stdout.write(str(val))
160 |     return (True, val)
161 | emit.arity = 1
162 | 
163 | 
164 | def repr(self, args):
165 |     val = args[0]
166 |     val = Atom(val.repr())
167 |     return (True, val)
168 | repr.arity = 1
169 | 
170 | 
171 | counter = 0
172 | 
173 | def gensym(self, args):
174 |     global counter
175 |     counter += 1
176 |     return (True, Atom(str(args[0]) + str(counter)))
177 | gensym.arity = 1
178 | 
179 | 
180 | def hexbyte(self, args):
181 |     return (True, Atom(chr(int(args[0].text + args[1].text, 16))))
182 | hexbyte.arity = 2
183 | 
184 | 
185 | def format_octal(self, args):
186 |     return (True, Atom("%o" % ord(args[0].text[0])))
187 | format_octal.arity = 1
188 | 
189 | 
190 | def length(self, args):
191 |     return (True, Atom(str(len(str(args[0])))))
192 | length.arity = 1
193 | 


--------------------------------------------------------------------------------
/src/tamsin/term.py:
--------------------------------------------------------------------------------
  1 | # encoding: UTF-8
  2 | 
  3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
  4 | # Distributed under a BSD-style license; see LICENSE for more information.
  5 | 
  6 | # Note that __str__ and __repr__ and repr perform very different tasks:
  7 | # __str__ : flattening operation on Tamsin terms
  8 | # repr: reprifying operation on Tamsin terms
  9 | # __repr__ : make a string that is valid Python code for constructing the Term
 10 | 
 11 | 
 12 | BAREWORD = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz'
 13 | PRINTABLE = (' !"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_'
 14 |              '`abcdefghijklmnopqrstuvwxyz{|}~')
 15 | 
 16 | 
 17 | def repr_escape(t):
 18 |     if len(t) == 0:
 19 |         return "''"
 20 |     if all(c in BAREWORD for c in t):
 21 |         return t
 22 |     s = ''
 23 |     for c in t:
 24 |         if c == "'":
 25 |             s += r"\'"
 26 |         elif c == "\\":
 27 |             s += r"\\"
 28 |         elif ord(c) > 31 and ord(c) < 127:
 29 |             s += c
 30 |         else:
 31 |             s += r"\x%02x" % ord(c)
 32 |     return "'%s'" % s
 33 | 
 34 | 
 35 | class Term(object):
 36 |     def expand(self, context):
 37 |         """Expands this term, returning a new term where, for all x, all
 38 |         occurrences of (VAR x) are replaced with the value of x in the
 39 |         given context.
 40 | 
 41 |         """
 42 |         return self
 43 | 
 44 |     def __str__(self):
 45 |         raise NotImplementedError
 46 | 
 47 |     def __repr__(self):
 48 |         raise NotImplementedError
 49 | 
 50 |     def repr(self):
 51 |         raise NotImplementedError
 52 | 
 53 |     @classmethod
 54 |     def match_all(_class, patterns, values):
 55 |         """Returns a dict of bindings if all values match all patterns,
 56 |         or False if there was a mismatch.
 57 | 
 58 |         """
 59 |         i = 0
 60 |         bindings = {}
 61 |         while i < len(patterns):
 62 |             sub = patterns[i].match(values[i])
 63 |             if sub == False:
 64 |                 return False
 65 |             bindings.update(sub)
 66 |             i += 1
 67 |         return bindings
 68 | 
 69 |     def match(self, value):
 70 |         raise NotImplementedError
 71 |         
 72 | 
 73 | class Atom(Term):
 74 |     def __init__(self, text):
 75 |         assert not isinstance(text, unicode)
 76 |         self.text = text
 77 | 
 78 |     def __str__(self):
 79 |         return self.text
 80 | 
 81 |     def __repr__(self):
 82 |         return "Atom(%r)" % (self.text)
 83 | 
 84 |     def repr(self):
 85 |         return repr_escape(self.text)
 86 | 
 87 |     def match(self, value):
 88 |         if not isinstance(value, Atom):
 89 |             return False
 90 |         if self.text == value.text:
 91 |             return {}
 92 |         else:
 93 |             return False
 94 | 
 95 |     def reversed(self, sentinel):
 96 |         if self.match(sentinel) != False:
 97 |             return self
 98 |         raise ValueError("malformed list")
 99 | 
100 | 
101 | class Constructor(Term):
102 |     def __init__(self, tag, contents):
103 |         assert not isinstance(tag, unicode)
104 |         self.tag = tag
105 |         for c in contents:
106 |             assert isinstance(c, Term), repr(c)
107 |         self.contents = contents
108 | 
109 |     def expand(self, context):
110 |         return Constructor(self.tag, [x.expand(context) for x in self.contents])
111 | 
112 |     def __str__(self):
113 |         return "%s(%s)" % (
114 |             self.tag, ', '.join([str(x) for x in self.contents])
115 |         )
116 | 
117 |     def __repr__(self):
118 |         return "Constructor(%r, %r)" % (self.tag, self.contents)
119 | 
120 |     def repr(self):
121 |         return "%s(%s)" % (
122 |             repr_escape(self.tag), ', '.join([x.repr() for x in self.contents])
123 |         )
124 | 
125 |     def match(self, value):
126 |         if not isinstance(value, Constructor):
127 |             return False
128 |         if self.tag != value.tag:
129 |             return False
130 |         if len(self.contents) != len(value.contents):
131 |             return False
132 |         bindings = {}
133 |         i = 0
134 |         while i < len(self.contents):
135 |             b = self.contents[i].match(value.contents[i])
136 |             if b == False:
137 |                 return False
138 |             bindings.update(b)
139 |             i += 1
140 |         return bindings
141 | 
142 |     def reversed(self, sentinel):
143 |         acc = sentinel
144 |         l = self
145 |         tag = self.tag
146 |         while isinstance(l, Constructor) and l.tag == tag:
147 |             acc = Constructor(tag, [l.contents[0], acc])
148 |             if len(l.contents) < 2:
149 |                 break
150 |             l = l.contents[1]
151 |         if l.match(sentinel) == False:
152 |             raise ValueError("malformed list %s" % l.repr())
153 |         return acc
154 | 
155 | 
156 | class Variable(Term):
157 |     def __init__(self, name):
158 |         assert not isinstance(name, unicode)
159 |         assert name[0].isupper() or name[0] == u'_', name
160 |         self.name = name
161 | 
162 |     def expand(self, context):
163 |         return context.fetch(self.name)
164 | 
165 |     def __str__(self):
166 |         return self.name
167 | 
168 |     def __repr__(self):
169 |         return "Variable(%r)" % (self.name)
170 | 
171 |     def repr(self):
172 |         return self.name
173 | 
174 |     def match(self, value):
175 |         return {self.name: value}
176 | 


--------------------------------------------------------------------------------
/test-codegen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | YES="
 4 | eg/hello-world.tamsin eg/bits.tamsin eg/bitpair.tamsin
 5 | eg/exciting-long.tamsin eg/list-of-chars.tamsin
 6 | eg/modules.tamsin
 7 | "
 8 | 
 9 | FILES="eg/reverse.tamsin"
10 | 
11 | NO="eg/eval-bool-expr.tamsin"
12 | 
13 | for FILE in $FILES; do
14 |   tamsin codegen $FILE || exit 1
15 | done
16 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | FILES="
  4 |     doc/Tamsin.markdown
  5 |     doc/System_Module.markdown
  6 |     doc/Tested_Examples.markdown
  7 | "
  8 | GLOB="eg/*.tamsin lib/*.tamsin mains/*.tamsin"
  9 | 
 10 | mkdir -p tmp
 11 | 
 12 | if [ x$1 = 'x-f' ]; then
 13 |     shift
 14 |     echo "(Testing on Falderal files '$1' only)"
 15 |     FILES=$1
 16 |     shift
 17 | fi
 18 | 
 19 | MODE=compiled
 20 | if [ x$1 = xcompiled -o x$1 = xinterpreted ]; then
 21 |     MODE=$1
 22 |     shift
 23 | fi
 24 | 
 25 | if [ x$1 = x ]; then
 26 |    $0 interpreter &&
 27 |    $0 compiler &&
 28 |    $0 tcompiler &&
 29 |    $0 bootstrap &&
 30 |    echo "All tests passed!"
 31 |    exit $?
 32 | fi
 33 | 
 34 | if [ x$1 = xtamsin ]; then
 35 |    echo "Testing things written in Tamsin only."
 36 |    $0 compiled scanner &&
 37 |    $0 compiled grammar &&
 38 |    $0 compiled parser &&
 39 |    $0 compiled desugarer &&
 40 |    $0 compiled analyzer &&
 41 |    $0 micro &&
 42 |    $0 tcompiler &&
 43 |    echo "All tests passed!"
 44 |    exit $?
 45 | fi
 46 | 
 47 | if [ x$1 = xthorough ]; then
 48 |    echo "Testing EVERYTHING.  This will take more than 8 minutes.  (On a FAST machine.)"
 49 |    $0 interpreter &&
 50 |    $0 compiler &&
 51 |    $0 interpreted scanner &&
 52 |    $0 interpreted grammar &&
 53 |    $0 interpreted parser &&
 54 |    $0 interpreted desugarer &&
 55 |    $0 interpreted analyzer &&
 56 |    $0 compiled scanner &&
 57 |    $0 compiled grammar &&
 58 |    $0 compiled parser &&
 59 |    $0 compiled desugarer &&
 60 |    $0 compiled analyzer &&
 61 |    $0 micro &&
 62 |    $0 tcompiler &&
 63 |    $0 bootstrap &&
 64 |    echo "All tests passed!"
 65 |    exit $?
 66 | fi
 67 | 
 68 | ok() {
 69 |     echo 'ok'
 70 | }
 71 | 
 72 | test_it() {
 73 |     MODE=$1
 74 |     SRC=$2
 75 |     LIBS=$3
 76 |     CMD=$4
 77 |     BIN=$5
 78 |     if [ x$BIN = x ]; then
 79 |         BIN=foo
 80 |     fi
 81 | 
 82 |     if [ $MODE = "compiled" ]; then
 83 |         make c_src/libtamsin.a || exit 1
 84 |         echo "*** Compiling $SRC (with $LIBS)"
 85 |         echo "*** and testing it against '$CMD'..."
 86 |         bin/tamsin compile $LIBS $SRC > tmp/foo.c && \
 87 |            gcc -g -Ic_src -Lc_src tmp/foo.c -o $BIN -ltamsin || exit 1
 88 |         for EG in $GLOB; do
 89 |             echo $EG
 90 |             $CMD $EG | bin/wrap > tmp/python-cmd.txt
 91 |             $BIN <$EG | bin/wrap > tmp/tamsin-cmd.txt
 92 |             diff -ru tmp/python-cmd.txt tmp/tamsin-cmd.txt > tmp/output.diff
 93 |             diff -ru tmp/python-cmd.txt tmp/tamsin-cmd.txt || exit 1
 94 |         done
 95 |     elif [ $MODE = "interpreted" ]; then
 96 |         echo "*** Interpreting $SRC (with $LIBS)"
 97 |         echo "*** and testing it against '$CMD'..."
 98 |         for EG in $GLOB; do
 99 |             echo $EG
100 |             $CMD $EG | bin/wrap > tmp/python-cmd.txt
101 |             bin/tamsin $LIBS $SRC <$EG | bin/wrap > tmp/tamsin-cmd.txt
102 |             diff -ru tmp/python-cmd.txt tmp/tamsin-cmd.txt > tmp/output.diff
103 |             diff -ru tmp/python-cmd.txt tmp/tamsin-cmd.txt || exit 1
104 |         done
105 |         echo "Passed."
106 |         exit 0
107 |     else
108 |         echo "BAD MODE"
109 |         exit 1
110 |     fi
111 |     echo "Passed."
112 |     exit 0
113 | }
114 | 
115 | if [ x$1 = xinterpreter -o x$1 = xi ]; then
116 |     echo "*** Testing Python interpreter..."
117 |     falderal $VERBOSE --substring-error fixture/tamsin.py.markdown $FILES
118 | elif [ x$1 = xerror-reporting ]; then
119 |     echo "*** Testing error reporting in Python interpreter..."
120 |     falderal $VERBOSE --substring-error fixture/tamsin.py.markdown doc/Error_Reporting.markdown
121 | elif [ x$1 = xcompiler ]; then
122 |     make c_src/libtamsin.a || exit 1
123 |     echo "*** Testing compiler..."
124 |     falderal $VERBOSE --substring-error fixture/compiler.py.markdown $FILES
125 | elif [ x$1 = xgrammar ]; then
126 |     test_it $MODE "mains/grammar.tamsin" \
127 |                   "lib/tamsin_scanner.tamsin" \
128 |                   "ok" \
129 |                   "bin/tamsin-grammar"
130 | elif [ x$1 = xscanner ]; then
131 |     test_it $MODE "mains/scanner.tamsin" \
132 |                   "lib/tamsin_scanner.tamsin" \
133 |                   "./bin/tamsin scan" \
134 |                   "bin/tamsin-scanner"
135 | elif [ x$1 = xparser ]; then
136 |     test_it $MODE "mains/parser.tamsin" \
137 |                   "lib/list.tamsin lib/tamsin_scanner.tamsin lib/tamsin_parser.tamsin" \
138 |                   "./bin/tamsin parse" \
139 |                   "bin/tamsin-parser"
140 | elif [ x$1 = xdesugarer ]; then
141 |     test_it $MODE "mains/desugarer.tamsin" \
142 |                   "lib/list.tamsin lib/tamsin_scanner.tamsin lib/tamsin_parser.tamsin lib/tamsin_analyzer.tamsin" \
143 |                   "./bin/tamsin desugar" \
144 |                   "bin/tamsin-desugarer"
145 | elif [ x$1 = xanalyzer ]; then
146 |     # libs and mains need libs
147 |     GLOB="eg/*.tamsin"
148 |     test_it $MODE "mains/analyzer.tamsin" \
149 |                   "lib/list.tamsin lib/tamsin_scanner.tamsin lib/tamsin_parser.tamsin lib/tamsin_analyzer.tamsin" \
150 |                   "./bin/tamsin analyze" \
151 |                   "bin/tamsin-analyzer"
152 | elif [ x$1 = xtcompiler ]; then
153 |     make bin/tamsin-compiler || exit 1
154 |     echo "*** Testing Tamsin-in-Tamsin compiler..."
155 |     falderal $VERBOSE --substring-error fixture/compiler.tamsin.markdown $FILES
156 | elif [ x$1 = xbootstrap ]; then
157 |     make bin/bootstrapped-compiler || exit 1
158 |     echo "*** Testing Bootstrapped Tamsin-in-Tamsin compiler..."
159 |     falderal $VERBOSE --substring-error fixture/bootstrapped.markdown $FILES
160 | elif [ x$1 = xmicro ]; then
161 |     make bin/micro-tamsin || exit 1
162 |     echo "*** Testing Micro-Tamsin interpreter..."
163 |     FILES="doc/Micro-Tamsin.markdown"
164 |     falderal $VERBOSE --substring-error fixture/micro-tamsin.markdown $FILES
165 | elif [ x$1 = xmini ]; then
166 |     make bin/mini-tamsin || exit 1
167 |     echo "*** Testing Mini-Tamsin interpreter..."
168 |     FILES="doc/Micro-Tamsin.markdown"   # note: does not use Mini-Tamsin.md yet
169 |     falderal $VERBOSE --substring-error fixture/mini-tamsin.markdown $FILES
170 | else
171 |     echo "Unknown test '$1'."
172 |     exit 1
173 | fi
174 | 


--------------------------------------------------------------------------------