├── .gitignore
├── .hgignore
├── .hgtags
├── HISTORY.markdown
├── LICENSE
├── Makefile
├── README.markdown
├── bin
├── hexout
├── inhex
├── tamsin
└── wrap
├── c_src
├── dict.c
├── dict.h
├── scanner.c
├── scanner.h
├── tamsin.c
├── tamsin.h
├── term.c
└── term.h
├── doc
├── 6502-sketch.tamsin
├── Advanced_Features.markdown
├── Case_Study.markdown
├── Error_Reporting.markdown
├── Excessive_Tests.markdown
├── Micro-Tamsin.markdown
├── Mini-Tamsin.markdown
├── Notes.markdown
├── Philosophy.markdown
├── System_Module.markdown
├── TODO.markdown
├── Tamsin.markdown
└── Tested_Examples.markdown
├── eg
├── alg-expr1.tamsin
├── alg-expr2.tamsin
├── alg-expr3.tamsin
├── backtrack.tamsin
├── bitpair.tamsin
├── bits.tamsin
├── blerf.tamsin
├── change-buffer.tamsin
├── csv_extract.tamsin
├── csv_parse.tamsin
├── escape.tamsin
├── eval-bool-expr.tamsin
├── exciting-long.tamsin
├── exciting.tamsin
├── exciting.txt
├── expector.tamsin
├── foobar.tamsin
├── hello-world.tamsin
├── list-of-chars.tamsin
├── list-sugar2.tamsin
├── modules.tamsin
├── names.csv
├── pipeline.tamsin
├── prod-branches.tamsin
├── reverse.tamsin
├── sexpr-eval.tamsin
├── store.tamsin
├── zeroes-concat.tamsin
└── zeroes.tamsin
├── fixture
├── bootstrapped.markdown
├── compiler.py.markdown
├── compiler.tamsin.markdown
├── micro-tamsin.markdown
├── mini-tamsin.markdown
└── tamsin.py.markdown
├── lib
├── list.tamsin
├── tamsin_analyzer.tamsin
├── tamsin_parser.tamsin
└── tamsin_scanner.tamsin
├── mains
├── analyzer.tamsin
├── compiler.tamsin
├── desugarer.tamsin
├── grammar.tamsin
├── micro-tamsin.tamsin
├── mini-tamsin.tamsin
├── parser.tamsin
└── scanner.tamsin
├── src
└── tamsin
│ ├── __init__.py
│ ├── analyzer.py
│ ├── ast.py
│ ├── backends
│ ├── __init__.py
│ └── c.py
│ ├── buffer.py
│ ├── codegen.py
│ ├── codenode.py
│ ├── compiler.py
│ ├── desugarer.py
│ ├── event.py
│ ├── interpreter.py
│ ├── main.py
│ ├── parser.py
│ ├── scanner.py
│ ├── sysmod.py
│ └── term.py
├── test-codegen.sh
└── test.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.o
3 | *.a
4 | bin/tamsin-*
5 | bin/bootstrapped-*
6 | bin/micro-tamsin
7 | tmp/
8 |
--------------------------------------------------------------------------------
/.hgignore:
--------------------------------------------------------------------------------
1 | syntax: glob
2 |
3 | *.pyc
4 | *.o
5 | *.a
6 |
7 | bin/tamsin-*
8 | bin/bootstrapped-*
9 | bin/micro-tamsin
10 | tmp/
11 |
--------------------------------------------------------------------------------
/.hgtags:
--------------------------------------------------------------------------------
1 | 8c5c38158bc6d671345851015aa15a71f5cd9aa1 0.1
2 | b89a9c3fc2e841573dae7ce7e51deb81313c8a40 0.2
3 | db0e6c779d74337956106874d1ef91385fe86e7d 0.3
4 | c91de5aea6dea0fb9d609cd76ccc4d153f2a3e5a 0.4
5 | 7597a8c4b1c696a0afb96aa496fcec5e36beeebf 0.5
6 |
--------------------------------------------------------------------------------
/HISTORY.markdown:
--------------------------------------------------------------------------------
1 | Tamsin Release History
2 | ======================
3 |
4 | 0.5-2017.0502
5 | -------------
6 |
7 | This is an interim release, created because the tests pass here, even
8 | though not everything aimed for for the next release has been achieved.
9 |
10 | ### language ###
11 |
12 | * The RHS of → can be a pattern term.
13 | * "Proper quoted" strings.
14 |
15 | ### implementations ###
16 |
17 | * `mini-tamsin.tamsin` is an interpreter for "Mini-Tamsin", written in Tamsin.
18 | * Better error reporting.
19 | * Improvements or bugfixes in the C-language implementation of `$:unquote`.
20 | * Tamsin programs can handle streams on input and produce streams on output.
21 | * Begun work on a better C-emitting backend.
22 | * Better scanning; buffers are more sophisticated and track some state themselves.
23 |
24 | 0.5
25 | ---
26 |
27 | ### language ###
28 |
29 | * EOF is no longer a special kind of term; it is no longer exposed, as
30 | a value, to Tamsin programs. (`$:eof` returns `''` on success.)
31 | * Prolog/Erlang-style list sugar for terms, in patterns as well.
32 | * When a new scanner is switched to using `using`, that scanner defaults
33 | to the `$:utf8` scanner for *its* scanning. This prevents the common
34 | shooting-self-in-foot error of selecting a production that is not
35 | itself `using` another scanner (which would result in an infinite loop
36 | of the production scanner trying to use itself as its subsidiary
37 | scanner.)
38 |
39 | ### implementation ###
40 |
41 | * `struct term *`s are (almost) always `const` in compiled Tamsin
42 | programs (for better sharing; we don't need to make copies of them)
43 | * related: variable-matching is more efficient (directly updates an array
44 | of terms, instead of searching for the variable by name)
45 | * related: creating new atoms uses hash-consing, so that no new
46 | `struct term` for the atom is allocated if one already exists (the
47 | existing one is shared.) This reduces memory usage significantly.
48 |
49 | 0.4
50 | ---
51 |
52 | ### language ###
53 |
54 | * Added `@` (work on different implicit buffer.)
55 |
56 | ### modules ###
57 |
58 | * Added `$:gensym`.
59 | * Added `$:hexchar`.
60 | * Added `$:format_octal`.
61 | * Added `$:length`.
62 | * Added `list:append`.
63 |
64 | ### implementations ###
65 |
66 | * Tamsin-to-C compiler written in Tamsin (`mains/compiler.tamsin`) passes
67 | all tests, and can compile itself.
68 | * Refactored `$` functions into `tamsin.sysmod` module in Python version.
69 |
70 | 0.3
71 | ---
72 |
73 | ### language ###
74 |
75 | * Defined what it means to `reprify` a term.
76 | * Clarified some matters as implementation-defined.
77 |
78 | ### modules ###
79 |
80 | * `$:equal` now does deep equality of arbitrary ground terms.
81 | * `$:repr` added.
82 | * `$:reverse` added.
83 | * Some standard modules ship in the distribution: `list`,
84 | `tamsin_scanner`, and `tamsin_parser`.
85 |
86 | ### implementations ###
87 |
88 | * Support for user-defined modules.
89 | * `tamsin` can take more than one source file on command line; this
90 | is how external modules are supported (by this implementation.)
91 | * Cleaned-up testing framework; Tamsin versions of scanner, grammar,
92 | parser, desugarer, analyzer, and compiler found in `mains` subdir.
93 | * Most `tamsin` verbs, and their versions in Tamsin, corresponding to
94 | intermediate phases, output reprified terms.
95 | * `tamsin` significantly re-factored so that the interpreter and
96 | compiler are more similar, and generating code for production branches
97 | is easier.
98 | * Added Tamsin-to-C compiler written in Tamsin, which can pass the first
99 | 43 or so tests from the spec ("Mini-Tamsin").
100 |
101 | 0.2
102 | ---
103 |
104 | ### language ###
105 |
106 | * Module-member syntax changed from `.` to `:`.
107 | * `:` can be used without any module on the LHS to refer to a production
108 | in the current module.
109 | * Added "fold" forms, binary `/` and ternary `//`.
110 |
111 | ### modules ###
112 |
113 | * `$:char` scanner dropped. Instead, there are `$:byte` (which always
114 | returns 8-bit-clean bytes) and `$:utf8` (which always returns UTF-8
115 | sequences.)
116 | * Added `$:equal(L,R)`.
117 | * `$:unquote(X,L,R)` takes three arguments now.
118 |
119 | ### implementations ###
120 |
121 | * Beginnings of user-defined module support (very rudimentary, not to be
122 | used.)
123 | * Code in `libtamsin` is much more robust. AST-builder written in Tamsin now
124 | compiles and runs correctly.
125 | * Added a desugaring phase to `tamsin`, and a desugarer written in Tamsin.
126 | * Added Micro-Tamsin interpreter, written in Tamsin. Can pass the first
127 | 30 tests from the spec.
128 |
129 | 0.1
130 | ---
131 |
132 | Initial release.
133 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The contents of the Tamsin distribution are distributed under the following
2 | three licenses.
3 |
4 | The documentation (in the `doc/` subdirectory) is covered by the following
5 | BSD-compatible license, modelled after the "Report on the Programming
6 | Language Haskell 98" license:
7 |
8 | -----------------------------------------------------------------------------
9 |
10 | Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
11 |
12 | The authors intend this Report to belong to the entire Tamsin
13 | community, and so we grant permission to copy and distribute it for
14 | any purpose, provided that it is reproduced in its entirety,
15 | including this Notice. Modified versions of this Report may also be
16 | copied and distributed for any purpose, provided that the modified
17 | version is clearly presented as such, and that it does not claim to
18 | be a definition of the Tamsin Programming Language.
19 |
20 | -----------------------------------------------------------------------------
21 |
22 | The source code for the reference interpreter and supporting tools (in the
23 | `src` and `c_src` subdirectories) is covered under the following BSD-style
24 | license:
25 |
26 | -----------------------------------------------------------------------------
27 |
28 | Copyright (c)2014, Chris Pressey, Cat's Eye Technologies.
29 | All rights reserved.
30 |
31 | Redistribution and use in source and binary forms, with or without
32 | modification, are permitted provided that the following conditions
33 | are met:
34 |
35 | Redistributions of source code must retain the above copyright
36 | notices, this list of conditions and the following disclaimer.
37 |
38 | Redistributions in binary form must reproduce the above copyright
39 | notices, this list of conditions, and the following disclaimer in
40 | the documentation and/or other materials provided with the
41 | distribution.
42 |
43 | Neither the names of the copyright holders nor the names of their
44 | contributors may be used to endorse or promote products derived
45 | from this software without specific prior written permission.
46 |
47 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
48 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES INCLUDING, BUT NOT
49 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
50 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
51 | COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
52 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
53 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
54 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
55 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
57 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
58 | POSSIBILITY OF SUCH DAMAGE.
59 |
60 | -----------------------------------------------------------------------------
61 |
62 | Every example source in the `eg` directory specifies what its own licensing
63 | terms are. Many of them are in the public domain, in which case the following
64 | UNLICENSE applies to them. Others may be under other licenses; see the
65 | specific file in question for more information.
66 |
67 | -----------------------------------------------------------------------------
68 |
69 | This is free and unencumbered software released into the public domain.
70 |
71 | Anyone is free to copy, modify, publish, use, compile, sell, or
72 | distribute this software, either in source code form or as a compiled
73 | binary, for any purpose, commercial or non-commercial, and by any
74 | means.
75 |
76 | In jurisdictions that recognize copyright laws, the author or authors
77 | of this software dedicate any and all copyright interest in the
78 | software to the public domain. We make this dedication for the benefit
79 | of the public at large and to the detriment of our heirs and
80 | successors. We intend this dedication to be an overt act of
81 | relinquishment in perpetuity of all present and future rights to this
82 | software under copyright law.
83 |
84 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
85 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
86 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
87 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
88 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
89 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
90 | OTHER DEALINGS IN THE SOFTWARE.
91 |
92 | For more information, please refer to
93 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | CC?=gcc
2 | CFLAGS?=-ansi -g -Ic_src -Lc_src
3 |
4 | LCFLAGS?=-ansi -pedantic -g -Wall -Werror -Ic_src -Lc_src
5 |
6 | ##### libtamsin #####
7 |
8 | OBJECTS=c_src/dict.o c_src/scanner.o c_src/term.o c_src/tamsin.o
9 | PROGS=bin/tamsin-compiler bin/micro-tamsin
10 |
11 | all: c_src/libtamsin.a
12 |
13 | c_src/scanner.o: c_src/tamsin.h c_src/scanner.c
14 | $(CC) $(LCFLAGS) -c c_src/scanner.c -o $@
15 |
16 | c_src/term.o: c_src/tamsin.h c_src/term.c
17 | $(CC) $(LCFLAGS) -c c_src/term.c -o $@
18 |
19 | c_src/tamsin.o: c_src/tamsin.h c_src/tamsin.c
20 | $(CC) $(LCFLAGS) -c c_src/tamsin.c -o $@
21 |
22 | c_src/libtamsin.a: $(OBJECTS)
23 | ar -r $@ $(OBJECTS)
24 |
25 |
26 | ##### executables #####
27 |
28 | TAMSIN_COMPILER_LIBS=lib/list.tamsin lib/tamsin_scanner.tamsin \
29 | lib/tamsin_parser.tamsin lib/tamsin_analyzer.tamsin
30 | bin/tamsin-compiler: c_src/libtamsin.a c_src/tamsin.h \
31 | $(TAMSIN_COMPILER_LIBS) \
32 | mains/compiler.tamsin
33 | bin/tamsin compile $(TAMSIN_COMPILER_LIBS) mains/compiler.tamsin > tmp/foo.c
34 | $(CC) $(CFLAGS) tmp/foo.c -o $@ -ltamsin
35 |
36 |
37 | bin/bootstrapped-compiler: c_src/libtamsin.a c_src/tamsin.h \
38 | bin/tamsin-compiler \
39 | $(TAMSIN_COMPILER_LIBS) \
40 | mains/compiler.tamsin
41 | bin/tamsin-compiler $(TAMSIN_COMPILER_LIBS) mains/compiler.tamsin > tmp/foo.c
42 | $(CC) $(CFLAGS) tmp/foo.c -o $@ -ltamsin
43 |
44 |
45 | MICRO_TAMSIN_LIBS=lib/list.tamsin lib/tamsin_scanner.tamsin \
46 | lib/tamsin_parser.tamsin
47 | bin/micro-tamsin: c_src/libtamsin.a c_src/tamsin.h \
48 | $(MICRO_TAMSIN_LIBS) \
49 | mains/micro-tamsin.tamsin
50 | bin/tamsin compile $(MICRO_TAMSIN_LIBS) mains/micro-tamsin.tamsin > tmp/foo.c
51 | $(CC) $(CFLAGS) tmp/foo.c -o $@ -ltamsin
52 |
53 |
54 | MINI_TAMSIN_LIBS=lib/list.tamsin lib/tamsin_scanner.tamsin \
55 | lib/tamsin_parser.tamsin
56 | bin/mini-tamsin: c_src/libtamsin.a c_src/tamsin.h \
57 | $(MINI_TAMSIN_LIBS) \
58 | mains/mini-tamsin.tamsin
59 | bin/tamsin compile $(MINI_TAMSIN_LIBS) mains/mini-tamsin.tamsin > tmp/foo.c
60 | $(CC) $(CFLAGS) tmp/foo.c -o $@ -ltamsin
61 |
62 | clean:
63 | rm -f c_src/libtamsin.a c_src/*.o $(PROGS)
64 |
--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
1 | Tamsin
2 | ======
3 |
4 | Tamsin is an oddball little language that can't decide if it's a
5 | [meta-language](doc/Philosophy.markdown#meta-language), a
6 | [programming language](doc/Philosophy.markdown#programming-language), or a
7 | [rubbish lister](doc/Philosophy.markdown#rubbish-lister).
8 |
9 | Its primary goal is to allow the rapid development of **parsers**,
10 | **static analyzers**, **interpreters**, and **compilers**, and to allow them
11 | to be expressed *compactly*. Golf your grammar! (Or write it like a decent
12 | human being, if you must.)
13 |
14 | The current released version of Tamsin is 0.5-2017.0502.
15 | As indicated by the 0.x version number, it is a **work in progress**,
16 | with the usual caveat that things may change rapidly (and that version 0.6 might
17 | look completely different.) See [HISTORY](HISTORY.markdown)
18 | for a list of major changes.
19 |
20 | Code Examples
21 | -------------
22 |
23 | Make a story more exciting in **1 line of code**:
24 |
25 | main = ("." & '!' | "?" & '?!' | any)/''.
26 |
27 | Parse an algebraic expression for syntactic correctness in **4 lines of code**:
28 |
29 | main = (expr0 & eof & 'ok').
30 | expr0 = expr1 & {"+" & expr1}.
31 | expr1 = term & {"*" & term}.
32 | term = "x" | "y" | "z" | "(" & expr0 & ")".
33 |
34 | Translate an algebraic expression to RPN (Reverse Polish Notation) in
35 | **7 lines of code**:
36 |
37 | main = expr0 → E & walk(E).
38 | expr0 = expr1 → E1 & {"+" & expr1 → E2 & E1 ← add(E1,E2)} & E1.
39 | expr1 = term → E1 & {"*" & term → E2 & E1 ← mul(E1,E2)} & E1.
40 | term = "x" | "y" | "z" | "(" & expr0 → E & ")" & E.
41 | walk(add(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' +'.
42 | walk(mul(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' *'.
43 | walk(X) = return ' '+X.
44 |
45 | Parse a CSV file (handling quoted commas and quotes correctly) and write
46 | out the 2nd-last field of each record — in **11 lines of code**:
47 |
48 | main = line → L & L ← lines(nil, L) &
49 | {"\n" & line → M & L ← lines(L, M)} & extract(L) & ''.
50 | line = field → F & {"," & field → G & F ← fields(G, F)} & F.
51 | field = strings | bare.
52 | strings = string → T & {string → S & T ← T + '"' + S} & T.
53 | string = "\"" & (!"\"" & any)/'' → T & "\"" & T.
54 | bare = (!(","|"\n") & any)/''.
55 | extract(lines(Ls, L)) = extract(Ls) & extract_field(L).
56 | extract(L) = L.
57 | extract_field(fields(L, fields(T, X))) = print T.
58 | extract_field(X) = X.
59 |
60 | Evaluate an (admittedly trivial) S-expression based language in
61 | **15 lines of code**:
62 |
63 | main = sexp → S using scanner & reverse(S, nil) → SR & eval(SR).
64 | scanner = ({" "} & ("(" | ")" | $:alnum/'')) using $:utf8.
65 | sexp = $:alnum | list.
66 | list = "(" & sexp/nil/pair → L & ")" & L.
67 | head(pair(A, B)) = A.
68 | tail(pair(A, B)) = B.
69 | cons(A, B) = return pair(A, B).
70 | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R).
71 | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R).
72 | eval(pair(cons, pair(A, pair(B, nil)))) =
73 | eval(A) → AE & eval(B) → BE & return pair(AE, BE).
74 | eval(X) = X.
75 | reverse(pair(H, T), A) = reverse(H, nil) → HR & reverse(T, pair(HR, A)).
76 | reverse(nil, A) = A.
77 | reverse(X, A) = X.
78 |
79 | Interpret a small subset of Tamsin in
80 | **[30 lines of code](mains/micro-tamsin.tamsin)**
81 | (not counting the [included batteries](doc/Philosophy.markdown#batteries-included).)
82 |
83 | Compile Tamsin to C in
84 | **[563 lines of code](mains/compiler.tamsin)**
85 | (again, not counting the included batteries.)
86 |
87 | For more information
88 | --------------------
89 |
90 | If the above has piqued your curiosity, you may want to read the specification,
91 | which contains many more small examples written to demonstrate (and test) the
92 | syntax and behavior of Tamsin:
93 |
94 | * [The Tamsin Language Specification](doc/Tamsin.markdown)
95 |
96 | Note that this is the current development version of the specification, and
97 | it may differ from the examples in this document.
98 |
99 | Quick Start
100 | -----------
101 |
102 | The Tamsin reference repository is [hosted on Codeberg](https://codeberg.org/catseye/Tamsin).
103 |
104 | This repository contains the reference implementation of Tamsin, called
105 | `tamsin`, written in Python 2.7. It can both interpret a Tamsin program and
106 | compile a program written in Tamsin to C.
107 |
108 | The distribution also contains a Tamsin-to-C compiler written in Tamsin. It
109 | passes all the tests, and can compile itself.
110 |
111 | While the interpreter is fine for prototyping, note that some informal
112 | benchmarking revealed the compiled C programs to be about 30x faster. **Note**
113 | however that while the compiler passes all the tests, it is still largely
114 | unproven (e.g. its UTF-8 support is not RFC 3629-compliant), so it should be
115 | considered a **proof of concept**.
116 |
117 | To start using `tamsin`,
118 |
119 | * Clone the repository — `git clone https://codeberg.org/catseye/Tamsin`
120 | * Either:
121 | * Put the repo's `bin` directory on your `$PATH`, or
122 | * Make a symbolic link to `bin/tamsin` somewhere already on your `$PATH`.
123 | * Errr... that's it.
124 |
125 | Then you can run `tamsin` like so:
126 |
127 | * `tamsin eg/csv_parse.tamsin < eg/names.csv`
128 |
129 | To use the compiler, you'll need GNU make and `gcc` installed. Type
130 |
131 | * `make`
132 |
133 | to build the runtime library. You can then compile to C and compile the C to
134 | an executable and run the executable all in one step, like so:
135 |
136 | * `tamsin loadngo eg/csv_extract.tamsin < eg/names.csv`
137 |
138 | Design Goals
139 | ------------
140 |
141 | * Allow parsers, static analyzers, interpreters, and compilers to be
142 | quickly prototyped. (And in the future, processor simulators and VM's
143 | and such things too.)
144 | * Allow writing these things very compactly.
145 | * Allow writing anything using only recursive-descent parsing techniques
146 | (insofar as this is possible.)
147 | * Allow writing parsers that look very similar to the grammar of the
148 | language being parsed, so that the structure of the language can be
149 | clearly seen.
150 | * Provide means to solve practical problems.
151 | * Keep the language simple — the grammar should fit on a page, ideally.
152 | * Recognize that the preceding two goals are in tension.
153 | * Have a relatively simple reference implementation (currently less than
154 | 5 KLoC, including everything — debugging support and the C runtime
155 | used by the compiler and the Tamsin modules and implementations.)
156 |
157 | License
158 | -------
159 |
160 | BSD-style license; see the file [LICENSE](LICENSE).
161 |
162 | Related work
163 | ------------
164 |
165 | * [CoCo/R](http://www.scifac.ru.ac.za/coco/) (parser generation)
166 | * [Parsec](http://www.haskell.org/haskellwiki/Parsec) (parser combinators)
167 | * [Perl](http://perl.org/) (rubbish listing)
168 | * [Prolog](https://en.wikipedia.org/wiki/Prolog) (pattern-matching, terms,
169 | backtracking(-ish...))
170 | * [K](https://github.com/kevinlawler/kona) (similar feel; Tamsin
171 | is a _vertical language_)
172 | * [Cat's Eye Technologies](http://catseye.tc)' esoteric and experimental
173 | languages:
174 | * [Squishy2K](http://catseye.tc/node/Squishy2K)
175 | * [Arboretuum](http://catseye.tc/node/Arboretuum)
176 | * [Treacle](http://catseye.tc/node/Treacle)
177 |
--------------------------------------------------------------------------------
/bin/hexout:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # converts raw bytes on input to hex couples on input.
4 |
5 | import sys
6 |
7 | while True:
8 | byte = sys.stdin.read(1)
9 | if len(byte) < 1:
10 | sys.exit(0)
11 | sys.stdout.write('%02x' % ord(byte))
12 |
--------------------------------------------------------------------------------
/bin/inhex:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # converts hex couples on input to raw bytes on output.
4 |
5 | import sys
6 |
7 | while True:
8 | hex = sys.stdin.read(2)
9 | if len(hex) < 2:
10 | sys.exit(0)
11 | sys.stdout.write(chr(int(hex, 16)))
12 |
--------------------------------------------------------------------------------
/bin/tamsin:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from os.path import realpath, dirname, join
4 | import sys
5 |
6 | tamsin_dir = join(dirname(realpath(sys.argv[0])), '..')
7 | sys.path.insert(0, join(tamsin_dir, 'src'))
8 |
9 | from tamsin.main import main
10 |
11 |
12 | if __name__ == '__main__':
13 | main(sys.argv[1:], tamsin_dir=tamsin_dir)
14 |
--------------------------------------------------------------------------------
/bin/wrap:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import sys
4 |
5 | WIDTH=120
6 |
7 | for line in sys.stdin:
8 | line = line.rstrip('\n')
9 | while len(line) > WIDTH:
10 | print line[:WIDTH]
11 | line = line[WIDTH:]
12 | print line
13 |
14 | # 123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
15 |
--------------------------------------------------------------------------------
/c_src/dict.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "term.h"
5 |
6 | #include "dict.h"
7 |
8 | struct chain {
9 | struct chain *next;
10 | const struct term *value;
11 | };
12 |
13 | struct dict *dict_new(int num_buckets) {
14 | struct dict *d;
15 | int i;
16 |
17 | d = malloc(sizeof(struct dict));
18 | d->num_buckets = num_buckets;
19 | d->bucket = malloc(sizeof(struct chain *) * d->num_buckets);
20 | for (i = 0; i < d->num_buckets; i++) {
21 | d->bucket[i] = NULL;
22 | }
23 |
24 | return d;
25 | }
26 |
27 | /*** UTILITIES ***/
28 |
29 | /*
30 | * Hash function, taken from "Compilers: Principles, Techniques, and Tools"
31 | * by Aho, Sethi, & Ullman (a.k.a. "The Dragon Book", 2nd edition.)
32 | */
33 | static size_t hashpjw(const char *key, size_t key_size, size_t table_size) {
34 | int i;
35 | unsigned long int h = 0, g;
36 |
37 | for (i = 0; i < key_size; i++) {
38 | h = (h << 4) + (key[i]);
39 | if ((g = h & 0xf0000000)) {
40 | h = (h ^ (g >> 24)) ^ g;
41 | }
42 | }
43 |
44 | return h % table_size;
45 | }
46 |
47 | /*
48 | * Create a new chain for a bucket (not called directly by client code.)
49 | */
50 | static struct chain *
51 | chain_new(const struct term *value)
52 | {
53 | struct chain *c = malloc(sizeof(struct chain));
54 |
55 | c->next = NULL;
56 | c->value = value;
57 |
58 | return c;
59 | }
60 |
61 | /*
62 | * Locate the bucket number a particular key would be located in, and the
63 | * chain link itself if such a key exists (or NULL if it could not be found.)
64 | */
65 | static void
66 | dict_locate(struct dict *d, const char *key, size_t key_size,
67 | size_t *b_index, struct chain **c)
68 | {
69 | *b_index = hashpjw(key, key_size, d->num_buckets);
70 | for (*c = d->bucket[*b_index]; *c != NULL; *c = (*c)->next) {
71 | if ((*c)->value->size == key_size &&
72 | memcmp(key, (*c)->value->atom, key_size) == 0)
73 | break;
74 | }
75 | }
76 |
77 | /*** OPERATIONS ***/
78 |
79 | const struct term *
80 | dict_fetch(struct dict *d, const char *key, size_t key_size)
81 | {
82 | struct chain *c;
83 | size_t i;
84 |
85 | dict_locate(d, key, key_size, &i, &c);
86 |
87 | return c != NULL ? c->value : NULL;
88 | }
89 |
90 | void
91 | dict_store(struct dict *d, const struct term *t)
92 | {
93 | struct chain *c;
94 | size_t i;
95 |
96 | dict_locate(d, t->atom, t->size, &i, &c);
97 | if (c == NULL) {
98 | /* Chain does not exist, add a new one. */
99 | c = chain_new(t);
100 | c->next = d->bucket[i];
101 | d->bucket[i] = c;
102 | } else {
103 | assert("term already hash consed" == NULL);
104 | /* Chain already exists, replace the value. */
105 | c->value = t;
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/c_src/dict.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
3 | * Distributed under a BSD-style license; see LICENSE for more information.
4 | */
5 |
6 | #ifndef TAMSIN_DICT_H
7 | #define TAMSIN_DICT_H
8 |
9 | #include
10 |
11 | struct dict {
12 | struct chain **bucket;
13 | size_t num_buckets;
14 | };
15 |
16 | /*
17 | * Create a new dictionary.
18 | * Since this is only used for hash-consing right now, there is only one.
19 | */
20 | struct dict *dict_new(int);
21 |
22 | /*
23 | * Retrieve a value from a dictionary, given its key, or NULL if it's not
24 | * there.
25 | */
26 | const struct term *dict_fetch(struct dict *, const char *, size_t);
27 |
28 | /*
29 | * Insert a value into a dictionary.
30 | */
31 | void dict_store(struct dict *, const struct term *);
32 |
33 | #endif
34 |
--------------------------------------------------------------------------------
/c_src/scanner.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
3 | * Distributed under a BSD-style license; see LICENSE for more information.
4 | */
5 |
6 | #include "scanner.h"
7 | #include "term.h"
8 | #include "tamsin.h"
9 |
10 | struct scanner *scanner_new(const char *buffer, size_t size) {
11 | struct scanner *scanner;
12 |
13 | scanner = malloc(sizeof(struct scanner));
14 | scanner->buffer = buffer;
15 | scanner->size = size;
16 | scanner->position = 0;
17 | scanner->reset_position = 0;
18 | scanner->engines = NULL;
19 |
20 | return scanner;
21 | }
22 |
23 | void scanner_byte_engine(void) {
24 | }
25 |
26 | void scanner_utf8_engine(void) {
27 | }
28 |
29 | #define UTF_8_LEN_2_MASK 0xe0 /* 0b11100000 */
30 | #define UTF_8_LEN_2_BITS 0xc0 /* 0b11000000 */
31 |
32 | #define UTF_8_LEN_3_MASK 0xf0 /* 0b11110000 */
33 | #define UTF_8_LEN_3_BITS 0xe0 /* 0b11100000 */
34 |
35 | #define UTF_8_LEN_4_MASK 0xf8 /* 0b11111000 */
36 | #define UTF_8_LEN_4_BITS 0xf0 /* 0b11110000 */
37 |
38 | const struct term *scan(struct scanner *s) {
39 | if (s->position >= s->size) {
40 | return &tamsin_EOF;
41 | }
42 | if (s->engines == NULL || s->engines->production == &scanner_utf8_engine) {
43 | char c = s->buffer[s->position];
44 | int len = 1;
45 | const struct term *t;
46 |
47 | if ((c & UTF_8_LEN_2_MASK) == UTF_8_LEN_2_BITS) {
48 | len = 2;
49 | } else if ((c & UTF_8_LEN_3_MASK) == UTF_8_LEN_3_BITS) {
50 | len = 3;
51 | } else if ((c & UTF_8_LEN_4_MASK) == UTF_8_LEN_4_BITS) {
52 | len = 4;
53 | }
54 |
55 | t = term_new_atom(s->buffer + s->position, len);
56 | s->position += len;
57 | return t;
58 | } else if (s->engines->production == &scanner_byte_engine) {
59 | char c = s->buffer[s->position];
60 |
61 | s->position++;
62 | return term_new_atom_from_char(c);
63 | } else {
64 | const struct term *save_result = result;
65 | int save_reset_position = s->reset_position;
66 | void (*production)(void) = s->engines->production;
67 |
68 | scanner_push_engine(s, &scanner_utf8_engine);
69 | production();
70 | scanner_pop_engine(s);
71 |
72 | s->reset_position = save_reset_position;
73 |
74 | if (!ok) {
75 | result = save_result;
76 | return &tamsin_EOF;
77 | } else {
78 | return result;
79 | }
80 | }
81 | }
82 |
83 | void unscan(struct scanner *s) {
84 | s->position = s->reset_position;
85 | }
86 |
87 | void commit(struct scanner *s) {
88 | s->reset_position = s->position;
89 | }
90 |
91 | struct engine *engine_new(void (*production)(void)) {
92 | struct engine *e = malloc(sizeof(struct engine));
93 |
94 | e->production = production;
95 | return e;
96 | }
97 |
98 | void scanner_push_engine(struct scanner *s, void (*production)(void)) {
99 | struct engine *e = engine_new(production);
100 |
101 | e->next = s->engines;
102 | s->engines = e;
103 | }
104 |
105 | void scanner_pop_engine(struct scanner *s) {
106 | /* struct engine *e = s->engines; */
107 |
108 | s->engines = s->engines->next;
109 | /* engine_free(e); */
110 | }
111 |
--------------------------------------------------------------------------------
/c_src/scanner.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
3 | * Distributed under a BSD-style license; see LICENSE for more information.
4 | */
5 |
6 | #ifndef TAMSIN_SCANNER_H
7 | #define TAMSIN_SCANNER_H
8 |
9 | #include "term.h"
10 |
11 | /* -------------------------------------------------------- scanner */
12 |
13 | struct engine {
14 | void (*production)(void);
15 | struct engine *next;
16 | };
17 |
18 | struct scanner {
19 | const char *buffer;
20 | size_t size;
21 | int position;
22 | int reset_position;
23 | struct engine *engines;
24 | };
25 |
26 | struct scanner *scanner_new(const char *, size_t);
27 | const struct term *scan(struct scanner *);
28 | void unscan(struct scanner *);
29 | void commit(struct scanner *);
30 | void scanner_push_engine(struct scanner *, void (*)(void));
31 | void scanner_pop_engine(struct scanner *);
32 | void scanner_byte_engine(void);
33 | void scanner_utf8_engine(void);
34 |
35 | /*
36 | * This value is never (and should never be) exposed to Tamsin programs!
37 | * It should not be considered a kind of term, really. That's just for
38 | * convenience in this implementation.
39 | */
40 | extern struct term tamsin_EOF;
41 |
42 | #endif
43 |
--------------------------------------------------------------------------------
/c_src/tamsin.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
3 | * Distributed under a BSD-style license; see LICENSE for more information.
4 | */
5 |
6 | #ifndef TAMSIN_TAMSIN_H
7 | #define TAMSIN_TAMSIN_H
8 |
9 | #include "term.h"
10 | #include "scanner.h"
11 |
12 | /* -------------------------------------------------------- tamsin */
13 |
14 | void tamsin_eof(struct scanner *);
15 | void tamsin_any(struct scanner *);
16 | void tamsin_expect(struct scanner *, const struct term *);
17 | void tamsin_alnum(struct scanner *);
18 | void tamsin_upper(struct scanner *);
19 | void tamsin_startswith(struct scanner *, const char *);
20 | const struct term *tamsin_unquote(const struct term *,
21 | const struct term *, const struct term *);
22 | const struct term *tamsin_mkterm(const struct term *, const struct term *);
23 | const struct term *tamsin_equal(const struct term *, const struct term *);
24 | const struct term *tamsin_reverse(const struct term *, const struct term *);
25 | const struct term *tamsin_gensym(const struct term *);
26 | const struct term *tamsin_hexbyte(const struct term *, const struct term *);
27 | const struct term *tamsin_format_octal(const struct term *);
28 | const struct term *tamsin_length(const struct term *);
29 |
30 | /*
31 | * Given a possibly non-atom term, return an atom consisting of
32 | * contents of the given term reprified into an atom.
33 | *
34 | * The returned term is NOT always newly allocated.
35 | */
36 | const struct term *tamsin_repr(const struct term *);
37 |
38 | int tamsin_isalpha(char);
39 | int tamsin_isupper(char);
40 | int tamsin_isdigit(char);
41 | int tamsin_isalnum(char);
42 |
43 | /* --------------------------------------------------------------- */
44 | /* global state: result of last action */
45 |
46 | extern int ok;
47 | extern const struct term *result;
48 |
49 | #endif
50 |
--------------------------------------------------------------------------------
/c_src/term.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
3 | * Distributed under a BSD-style license; see LICENSE for more information.
4 | */
5 |
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | #include "term.h"
12 |
13 | #include "dict.h"
14 |
15 | /*
16 | * this code LEAKS MEMORY all over the place, but that's "ok" because
17 | * Tamsin programs "aren't long running". and it's better than having
18 | * buffer overflows.
19 | */
20 |
21 | struct dict *hash_conser = NULL;
22 |
23 | struct term tamsin_EOF = {"EOF", 3, -1, NULL};
24 |
25 | int hits = 0;
26 | int misses = 0;
27 |
28 | struct term *term_single_byte_table = NULL;
29 | char term_single_byte_data[256];
30 |
31 | const struct term *term_new_atom(const char *atom, size_t size) {
32 | struct term *t;
33 | char *text;
34 |
35 | /*
36 | if (size == 1) {
37 | int i;
38 | if (term_single_byte_table == NULL) {
39 | term_single_byte_table = malloc(sizeof(struct term) * 256);
40 | for (i = 0; i < 256; i++) {
41 | term_single_byte_data[i] = (char)i;
42 | term_single_byte_table[i].atom = term_single_byte_data + i;
43 | term_single_byte_table[i].size = 1;
44 | term_single_byte_table[i].index = -1;
45 | term_single_byte_table[i].subterms = NULL;
46 | }
47 | }
48 | i = ((unsigned char *)atom)[0];
49 | return &term_single_byte_table[i];
50 | }
51 | */
52 |
53 | if (hash_conser == NULL) {
54 | hash_conser = dict_new(2503);
55 | }
56 | t = (struct term *)dict_fetch(hash_conser, atom, size);
57 | if (t != NULL) {
58 | hits++;
59 | return t;
60 | }
61 |
62 | t = malloc(sizeof(struct term));
63 | text = malloc(size);
64 | memcpy(text, atom, size);
65 | t->atom = text;
66 | t->size = size;
67 | t->index = -1;
68 | t->subterms = NULL;
69 |
70 | dict_store(hash_conser, t);
71 | misses++;
72 |
73 | return t;
74 | }
75 |
76 | const struct term *term_new_atom_from_char(char c) {
77 | char s[2];
78 |
79 | s[0] = c;
80 | s[1] = '\0';
81 |
82 | return term_new_atom(s, 1);
83 | }
84 |
85 | const struct term *term_new_atom_from_cstring(const char *atom) {
86 | return term_new_atom(atom, strlen(atom));
87 | }
88 |
89 | const struct term *term_new_constructor(const char *tag, size_t size,
90 | struct termlist *subterms)
91 | {
92 | struct term *t = malloc(sizeof(struct term));
93 | char *text = malloc(size);
94 |
95 | memcpy(text, tag, size);
96 | t->atom = text;
97 | t->size = size;
98 | t->index = -1;
99 | t->subterms = subterms;
100 |
101 | return t;
102 | }
103 |
104 | void termlist_add_term(struct termlist **tl, const struct term *term) {
105 | struct termlist *new_tl;
106 |
107 | new_tl = malloc(sizeof(struct termlist));
108 | new_tl->term = term;
109 | new_tl->next = *tl;
110 | *tl = new_tl;
111 | }
112 |
113 | const struct term *term_new_variable(const char *name, size_t size, int index) {
114 | struct term *t;
115 | char *text;
116 |
117 | t = malloc(sizeof(struct term));
118 | text = malloc(size);
119 | memcpy(text, name, size);
120 | t->atom = text;
121 | t->size = size;
122 | assert(index != -1);
123 | t->index = index;
124 | t->subterms = NULL;
125 |
126 | return t;
127 | }
128 |
129 | int term_atoms_equal(const struct term *lhs, const struct term *rhs) {
130 | if (lhs->size != rhs->size) {
131 | return 0;
132 | }
133 | return memcmp(lhs->atom, rhs->atom, lhs->size) == 0;
134 | }
135 |
136 | int term_atom_cstring_equal(const struct term *lhs, const char *string) {
137 | if (lhs->size != strlen(string)) {
138 | return 0;
139 | }
140 | return memcmp(lhs->atom, string, lhs->size) == 0;
141 | }
142 |
143 | const struct term *term_concat(const struct term *lhs, const struct term *rhs) {
144 | const struct term *t;
145 | int new_size;
146 | char *new_atom;
147 |
148 | assert(lhs->subterms == NULL);
149 | assert(rhs->subterms == NULL);
150 |
151 | new_size = lhs->size + rhs->size;
152 | new_atom = malloc(new_size);
153 | memcpy(new_atom, lhs->atom, lhs->size);
154 | memcpy(new_atom + lhs->size, rhs->atom, rhs->size);
155 | t = term_new_atom(new_atom, new_size);
156 | free(new_atom);
157 |
158 | return t;
159 | }
160 |
161 | const struct term COMMASPACE = { ", ", 2, -1, NULL };
162 |
163 | const struct term *term_flatten(const struct term *t) {
164 | struct termlist *tl;
165 |
166 | if (t->subterms == NULL) { /* it's an atom */
167 | return t;
168 | } else { /* it's a constructor */
169 | const struct term *n;
170 | /* we clone t here to get an atom from its tag */
171 | n = term_concat(term_new_atom(t->atom, t->size),
172 | term_new_atom_from_char('('));
173 |
174 | for (tl = t->subterms; tl != NULL; tl = tl->next) {
175 | n = term_concat(n, term_flatten(tl->term));
176 | if (tl->next != NULL) {
177 | n = term_concat(n, &COMMASPACE);
178 | }
179 | }
180 | n = term_concat(n, term_new_atom_from_char(')'));
181 | return n;
182 | }
183 | }
184 |
185 | void term_fput(const struct term *t, FILE *f) {
186 | const struct term *flat = term_flatten(t);
187 |
188 | fwrite(flat->atom, 1, flat->size, f);
189 | }
190 |
191 | int term_equal(const struct term *pattern, const struct term *ground)
192 | {
193 | struct termlist *tl1, *tl2;
194 |
195 | assert(pattern->index == -1);
196 | assert(ground->index == -1);
197 |
198 | if (!term_atoms_equal(pattern, ground)) {
199 | return 0;
200 | }
201 | if (pattern->subterms == NULL && ground->subterms == NULL) {
202 | return 1;
203 | }
204 |
205 | tl1 = pattern->subterms;
206 | tl2 = ground->subterms;
207 | while (tl1 != NULL && tl2 != NULL) {
208 | if (!term_equal(tl1->term, tl2->term)) {
209 | return 0;
210 | }
211 | tl1 = tl1->next;
212 | tl2 = tl2->next;
213 | }
214 | if (tl1 != NULL || tl2 != NULL) {
215 | return 0;
216 | }
217 | return 1;
218 | }
219 |
220 | int term_match_unifier(const struct term *pattern, const struct term *ground,
221 | const struct term **variables)
222 | {
223 | struct termlist *tl1, *tl2;
224 |
225 | if (pattern->index >= 0) {
226 | variables[pattern->index] = ground;
227 | return 1;
228 | }
229 | if (!term_atoms_equal(pattern, ground)) {
230 | return 0;
231 | }
232 | if (pattern->subterms == NULL && ground->subterms == NULL) {
233 | return 1;
234 | }
235 |
236 | tl1 = pattern->subterms;
237 | tl2 = ground->subterms;
238 | while (tl1 != NULL && tl2 != NULL) {
239 | if (!term_match_unifier(tl1->term, tl2->term, variables)) {
240 | return 0;
241 | }
242 | tl1 = tl1->next;
243 | tl2 = tl2->next;
244 | }
245 | if (tl1 != NULL || tl2 != NULL) {
246 | return 0;
247 | }
248 |
249 | return 1;
250 | }
251 |
--------------------------------------------------------------------------------
/c_src/term.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
3 | * Distributed under a BSD-style license; see LICENSE for more information.
4 | */
5 |
6 | #include
7 | #include
8 |
9 | #ifndef TAMSIN_TERM_H
10 | #define TAMSIN_TERM_H
11 |
12 | extern int hits;
13 | extern int misses;
14 |
15 | /*
16 | * If `subterms` is NULL and `index` == -1, this is an atom.
17 | *
18 | * If `subterms` is non-NULL, this is a constructor.
19 | *
20 | * If `index` >= 0, this is a variable.
21 | *
22 | * It is not a legal term if both `subterms` is non-NULL and `index` >= 0.
23 | *
24 | * In all cases, atom should not be NULL.
25 | */
26 | struct term {
27 | const char *atom;
28 | size_t size;
29 | int index;
30 | struct termlist *subterms;
31 | };
32 |
33 | struct termlist {
34 | const struct term *term;
35 | struct termlist *next;
36 | };
37 |
38 | /*
39 | * Creates a new "atom" term from the given character string.
40 | * The new term contains a dynamically allocated copy of the given string,
41 | * so the given string may be freed after calling this.
42 | * Subterms may be added afterwards to turn it into a "constructor" term.
43 | * Segfaults if there is insufficient memory to allocate the term.
44 | */
45 | const struct term *term_new_atom(const char *, size_t);
46 | const struct term *term_new_atom_from_cstring(const char *);
47 | const struct term *term_new_atom_from_char(char c);
48 |
49 | const struct term *term_new_constructor(const char *, size_t,
50 | struct termlist *);
51 | void termlist_add_term(struct termlist **, const struct term *);
52 |
53 | const struct term *term_new_variable(const char *, size_t, int);
54 |
55 | /*
56 | * Returns 1 if the atom portion of both terms is identical, otherwise 0.
57 | */
58 | int term_atoms_equal(const struct term *, const struct term *);
59 |
60 | /*
61 | * Returns 1 if the atom portion of term is identical to given C string, else 0.
62 | */
63 | int term_atom_cstring_equal(const struct term *, const char *);
64 |
65 | /*
66 | * Given the name of a variable, return the variable term of the
67 | * same name that is leftmost, uppermost in the given term.
68 | */
69 | /*
70 | struct term *term_find_variable(const struct term *, const char *);
71 | */
72 |
73 | /*
74 | * Given two "atom" terms, return a new "atom" term consisting of the
75 | * text of the input terms concatenated together.
76 | */
77 | const struct term *term_concat(const struct term *, const struct term *);
78 |
79 | /*
80 | * Given a possibly non-atom term, return an atom consisting of
81 | * contents of the given term flattened into an atom.
82 | *
83 | * The returned term is NOT always newly allocated.
84 | */
85 | const struct term *term_flatten(const struct term *);
86 |
87 | void term_fput(const struct term *, FILE *);
88 |
89 | /*
90 | * Both terms must be ground.
91 | */
92 | int term_equal(const struct term *, const struct term *);
93 |
94 | /*
95 | * The third argument is an array of struct term *'s. It will
96 | * be updated with bindings.
97 | */
98 | int term_match_unifier(const struct term *, const struct term *,
99 | const struct term **);
100 |
101 | #endif
102 |
--------------------------------------------------------------------------------
/doc/6502-sketch.tamsin:
--------------------------------------------------------------------------------
1 | # a sketch of what a Tamsin program to simulate a subset of the 6502
2 | # might look like.
3 |
4 | # note that the 6502 memory is in the IMPLICIT BUFFER.
5 |
6 | sim6502 = instr(0,0,0) using $:byte.
7 |
8 | instr(A,X,Y) =
9 | "\xA9" & any → A & instr(A,X,Y) # LDA #
10 | | "\xC8" & inc(Y) → Y & instr(A,X,Y) # INY
11 | | "\x8A" & A ← X & instr(A,X,Y) # TAX
12 | | "\x4C" & word → W & $:seek(W) & instr(A,X,Y) # JMP
13 | | etc.
14 |
15 | word =
16 | any → Lo & any → Hi & return $:add($:ord(Lo), $:mul($:ord(Hi), 256)).
17 |
18 | etc.
19 |
20 |
21 | # That's the recursive version; compiling it to C currently would not be
22 | # nice to the stack. Here's an iterative version:
23 |
24 |
25 | sim6502 =
26 | A ← 0 & X ← 0 & Y ← 0 &
27 | !{instr(A,X,Y) → state(A,X,Y)} using $:byte.
28 |
29 | instr(A,X,Y) =
30 | "\xA9" & any → A & return! state(A,X,Y) # LDA #
31 | !| "\xC8" & inc(Y) → Y & return! state(A,X,Y) # INY
32 | !| "\x8A" & A ← X & return! state(A,X,Y) # TAX
33 | !| "\x4C" & word → W & $:seek(W) & return! state(A,X,Y) # JMP
34 | !| "\x00" & return! halted # BRK
35 | !| etc.
36 |
37 |
38 | # this uses ! (non-backtracking) and return! (immediate return from production)
39 | # (not sure about either of these...)
40 |
--------------------------------------------------------------------------------
/doc/Advanced_Features.markdown:
--------------------------------------------------------------------------------
1 | Advanced Features of the Tamsin Language
2 | ========================================
3 |
4 | This document is a **work in progress**.
5 |
6 | Note that none of these features are in Tamsin version 0.1 (although the
7 | reference implementation might support them or at least the syntax for
8 | them — they should be regarded as undefined in 0.1. They may appear in
9 | 0.2.)
10 |
11 | -> Tests for functionality "Intepret Tamsin program"
12 |
13 | Three good ways to shoot yourself in the foot
14 | ---------------------------------------------
15 |
16 | 1, forget that Tamsin is still basically a *programming* language, or at
17 | best an LL(n) grammar, and try to write a left-recursive rule:
18 |
19 | expr = expr & "+" & expr | expr & "*" & expr | "0" | "1".
20 |
21 | 2, base a `{}` loop around something that always succeeds, like `return` or
22 | `eof` at the end of the input.
23 |
24 | expr = {"k" | return l}.
25 |
26 | 3, base a loop around something that doesn't consume any input, like `!`.
27 |
28 | expr = !"\n" & expr
29 |
30 | Advanced Assignment
31 | -------------------
32 |
33 | The right-hand side of `→` can actually be more than a variable name;
34 | it can be a pattern term, just like is used in the arguments, above.
35 | This can be useful for "deconstructing" a compound return value from a
36 | production to extract the parts you want.
37 |
38 | | main = foo → pair(A,B) & return A.
39 | | foo = return pair(wellington, trainer).
40 | = wellington
41 |
42 | | main = foo → pair(A,B) & return B.
43 | | foo = return pair(wellington, trainer).
44 | = trainer
45 |
46 | Even without variables, this can also be useful simply to assert something
47 | returns some value.
48 |
49 | | main = foo → b & print 'yes' | print 'no'.
50 | | foo = return a.
51 | = no
52 | = no
53 |
54 | | main = foo → b & print 'yes' | print 'no'.
55 | | foo = return b.
56 | = yes
57 | = yes
58 |
59 | Advanced Programming
60 | --------------------
61 |
62 | Before the first production in a program, any number of _pragmas_ may be
63 | given. Pragmas may affect how the program following them is parsed.
64 | Each pragma begins with a `@` followed by a bareword indicating the
65 | kind of pragma, followed by a number of arguments specific to that kind
66 | of pragma, followed by a `.`.
67 |
68 | | @alias zrrk 2 = jersey.
69 | | @unalias zrrk.
70 | | main = foo.
71 | | foo = "b".
72 | + b
73 | = b
74 |
75 | ### `@alias` ###
76 |
77 | The pragma `@alias` introduces an alias. Its syntax consists of the
78 | name of the alias (a bareword), followed by an integer which indicates
79 | the _arity_, followed by `=`, followed by the contents of the alias
80 | (i.e., what is being aliased; presently, this must be a non-terminal.)
81 |
82 | This sets up a syntax rule, in the rule context, that, when the alias
83 | name is encountered, parses as a call to the aliased non-terminal; in
84 | addition, this syntax rule is special in that it looks for exactly
85 | _arity_ number of terms following the alias name. Parentheses are not
86 | required to delimit these terms.
87 |
88 | | @alias foo 2 = jersey.
89 | | main = jersey(a,b) & foo c d.
90 | | jersey(A,B) = «A» & «B».
91 | + abcd
92 | = d
93 |
94 | The pragma `@unalias` removes a previously-introduced alias.
95 |
96 | | @alias foo 2 = jersey.
97 | | @unalias foo.
98 | | main = jersey(a,b) & foo c d.
99 | | jersey(A,B) = «A» & «B».
100 | + abcd
101 | ? Expected '.' at ' c d
102 |
103 | It is an error to attempt to unalias an alias that hasn't been established.
104 |
105 | | @alias foo 2 = jersey.
106 | | @unalias bar.
107 | | main = return ok.
108 | ? KeyError
109 |
110 | Note that various of Tamin's "keywords" are actually built-in aliases for
111 | productions in the `$` module, and they may be unaliased.
112 |
113 | | @unalias return.
114 | | main = return ok.
115 | ? Expected '.' at ' ok.'
116 |
117 | | @unalias return.
118 | | main = $.return(ok).
119 | = ok
120 |
121 | ### Rule Formals ###
122 |
123 | Then we no longer pattern-match terms. They're just strings. So we... we
124 | parse them. Here's a preview, and we'll get more serious about this further
125 | below.
126 |
127 | Now that you can create scanners and parsers to your heart's desire, we
128 | return to the reason you would even need to: terms vs. rules in the
129 | "formal arguments" part of a production definition.
130 |
131 | | main = ("a" | "b" | "c") → C & donkey('f' + C) → D & return D.
132 | | donkey["f" & ("a" | "c")] = return yes.
133 | | donkey["f" & "b"] = return no.
134 | + a
135 | = yes
136 |
137 | | main = ("a" | "b" | "c") → C & donkey('f' + C) → D & return D.
138 | | donkey["f" & ("a" | "c")] = return yes.
139 | | donkey["f" & "b"] = return no.
140 | + b
141 | = no
142 |
143 | | main = ("a" | "b" | "c") → C & donkey('f' + C) → D & return D.
144 | | donkey["f" & ("a" | "c")] = return yes.
145 | | donkey["f" & "b"] = return no.
146 | + c
147 | = yes
148 |
149 | Variables that are set in a parse-pattern formals are available to
150 | the production's rule.
151 |
152 | | main = donkey(world).
153 | | donkey[any → E] = return hello(E).
154 | = hello(w)
155 |
156 | | main = donkey(world).
157 | | donkey[any → E using word] = return hello(E).
158 | | word = (T ← '' & {$.alnum → S & T ← T + S} & T) using $.char.
159 | = hello(world)
160 |
161 | No variables from the caller leak into the called production.
162 |
163 | | main = set F = whatever & donkey(world).
164 | | donkey[any → E] = return hello(F).
165 | ? KeyError
166 |
167 | Terms are stringified before being matched.
168 |
169 | | main = donkey(a(b(c))).
170 | | donkey["a" & "(" & "b" & "(" & "c" & ")" & ")"] = return yes.
171 | = yes
172 |
173 | Thus, in this sense at least, terms are sugar for strings.
174 |
175 | | main = donkey('a(b(c))').
176 | | donkey["a" & "(" & "b" & "(" & "c" & ")" & ")"] = return yes.
177 | = yes
178 |
179 | The rule formals may call on other rules in the program.
180 |
181 | | main = donkey('pair(pair(0,1),1)').
182 | | donkey[pair → T using mini] = return its_a_pair(T).
183 | | donkey[bit → T using mini] = return its_a_bit(T).
184 | | thing = pair | bit.
185 | | pair = "pair" & "(" & thing → A & "," & thing → B & ")" & return pair(A,B).
186 | | bit = "0" | "1".
187 | | mini = (bit | "(" | ")" | "," | word) using $.char.
188 | | word = (T ← '' & {$.alnum → S & T ← T + S} & T).
189 | = its_a_pair(pair(pair(0, 1), 1))
190 |
191 | ### Auto-term creation from productions ###
192 |
193 | An experimental feature. But Rooibos does it, and it could help make
194 | parser development faster/shorter. Note that feature is not fully implemented.
195 | Therefore test disabled.
196 |
197 | | main = expr0.
198 | | expr0! = expr1 & {"+" & expr1}.
199 | | expr1! = term & {"*" & term}.
200 | | term = "x" | "y" | "z" | "(" & expr0 & ")".
201 | + x+y*(z+x+y)
202 | = expr0(expr1, +, expr1)
203 |
--------------------------------------------------------------------------------
/doc/Case_Study.markdown:
--------------------------------------------------------------------------------
1 | Case Study: Parsing and Evaluating S-Expressions in Tamsin
2 | ==========================================================
3 |
4 | -> Tests for functionality "Intepret Tamsin program"
5 |
6 | We now have enough tools at our disposal to parse and evaluate simple
7 | S-expressions (from Lisp or Scheme).
8 |
9 | Note that we no longer have `$.tamsin`, so these examples don't work.
10 | They're left here to demonstrate the development process. For now, see
11 | `eg/sexpr-eval.tamsin`.
12 |
13 | We can write such a parser with `{}`, but the result is a bit messy.
14 |
15 | | main = sexp using $.tamsin.
16 | | sexp = symbol | list.
17 | | list = "(" &
18 | | set L = nil &
19 | | {sexp → S & set L = pair(S, L)} &
20 | | ")" &
21 | | return L.
22 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
23 | + (cons (a (cons b nil)))
24 | = pair(pair(pair(nil, pair(b, pair(cons, nil))), pair(a, nil)), pair(cons, nil))
25 |
26 | So let's write it in the less intuitive, recursive way:
27 |
28 | | main = sexp using $.tamsin.
29 | |
30 | | sexp = symbol | list.
31 | | list = "(" & listtail(nil).
32 | | listtail(L) = sexp → S & listtail(pair(S, L))
33 | | | ")" & return L.
34 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
35 | + (a b)
36 | = pair(b, pair(a, nil))
37 |
38 | Nice. But it returns a term that's backwards. So we need to write a
39 | reverser. In Erlang, this would be
40 |
41 | reverse([H|T], A) -> reverse(T, [H|A]).
42 | reverse([], A) -> A.
43 |
44 | In Tamsin, it's:
45 |
46 | | main = sexp → S using $.tamsin & reverse(S, nil) → SR & return SR.
47 | |
48 | | sexp = symbol | list.
49 | | list = "(" & listtail(nil).
50 | | listtail(L) = sexp → S & listtail(pair(S, L))
51 | | | ")" & return L.
52 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
53 | |
54 | | reverse(pair(H, T), A) =
55 | | reverse(T, pair(H, A)) → TR &
56 | | return TR.
57 | | reverse(nil, A) =
58 | | return A.
59 | + (a b)
60 | = pair(a, pair(b, nil))
61 |
62 | But it's not deep. It only reverses the top-level list.
63 |
64 | | main = sexp → S using $.tamsin & reverse(S, nil) → SR & return SR.
65 | |
66 | | sexp = symbol | list.
67 | | list = "(" & listtail(nil).
68 | | listtail(L) = sexp → S & listtail(pair(S, L))
69 | | | ")" & return L.
70 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
71 | |
72 | | reverse(pair(H, T), A) =
73 | | reverse(T, pair(H, A)) → TR &
74 | | return TR.
75 | | reverse(nil, A) =
76 | | return A.
77 | + (a (c b) b)
78 | = pair(a, pair(pair(b, pair(c, nil)), pair(b, nil)))
79 |
80 | So here's a deep reverser.
81 |
82 | | main = sexp → S using $.tamsin & reverse(S, nil) → SR & return SR.
83 | |
84 | | sexp = symbol | list.
85 | | list = "(" & listtail(nil).
86 | | listtail(L) = sexp → S & listtail(pair(S, L))
87 | | | ")" & return L.
88 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
89 | |
90 | | reverse(pair(H, T), A) =
91 | | reverse(H, nil) → HR &
92 | | reverse(T, pair(HR, A)) → TR &
93 | | return TR.
94 | | reverse(nil, A) =
95 | | return A.
96 | | reverse(X, A) =
97 | | return X.
98 | + (a (c b) b)
99 | = pair(a, pair(pair(c, pair(b, nil)), pair(b, nil)))
100 |
101 | Finally, a little sexpr evaluator.
102 |
103 | | main = sexp → S using $.tamsin & reverse(S, nil) → SR & eval(SR).
104 | |
105 | | sexp = symbol | list.
106 | | list = "(" & listtail(nil).
107 | | listtail(L) = sexp → S & listtail(pair(S, L))
108 | | | ")" & return L.
109 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
110 | |
111 | | head(pair(A, B)) = return A.
112 | | tail(pair(A, B)) = return B.
113 | | cons(A, B) = return pair(A, B).
114 | |
115 | | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R) → P & return P.
116 | | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R) → P & return P.
117 | | eval(pair(cons, pair(A, pair(B, nil)))) =
118 | | eval(A) → AE & eval(B) → BE & return pair(AE, BE).
119 | | eval(X) = return X.
120 | |
121 | | reverse(pair(H, T), A) =
122 | | reverse(H, nil) → HR &
123 | | reverse(T, pair(HR, A)) → TR &
124 | | return TR.
125 | | reverse(nil, A) =
126 | | return A.
127 | | reverse(X, A) =
128 | | return X.
129 | + (cons a b)
130 | = pair(a, b)
131 |
132 | | main = sexp → S using $.tamsin & reverse(S, nil) → SR & eval(SR).
133 | |
134 | | sexp = symbol | list.
135 | | list = "(" & listtail(nil).
136 | | listtail(L) = sexp → S & listtail(pair(S, L))
137 | | | ")" & return L.
138 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
139 | |
140 | | head(pair(A, B)) = return A.
141 | | tail(pair(A, B)) = return B.
142 | | cons(A, B) = return pair(A, B).
143 | |
144 | | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R) → P & return P.
145 | | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R) → P & return P.
146 | | eval(pair(cons, pair(A, pair(B, nil)))) =
147 | | eval(A) → AE & eval(B) → BE & return pair(AE, BE).
148 | | eval(X) = return X.
149 | |
150 | | reverse(pair(H, T), A) =
151 | | reverse(H, nil) → HR &
152 | | reverse(T, pair(HR, A)) → TR &
153 | | return TR.
154 | | reverse(nil, A) =
155 | | return A.
156 | | reverse(X, A) =
157 | | return X.
158 | + (head (cons b a))
159 | = b
160 |
161 | | main = sexp → S using $.tamsin & reverse(S, nil) → SR & eval(SR).
162 | |
163 | | sexp = symbol | list.
164 | | list = "(" & listtail(nil).
165 | | listtail(L) = sexp → S & listtail(pair(S, L))
166 | | | ")" & return L.
167 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
168 | |
169 | | head(pair(A, B)) = return A.
170 | | tail(pair(A, B)) = return B.
171 | | cons(A, B) = return pair(A, B).
172 | |
173 | | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R) → P & return P.
174 | | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R) → P & return P.
175 | | eval(pair(cons, pair(A, pair(B, nil)))) =
176 | | eval(A) → AE & eval(B) → BE & return pair(AE, BE).
177 | | eval(X) = return X.
178 | |
179 | | reverse(pair(H, T), A) =
180 | | reverse(H, nil) → HR &
181 | | reverse(T, pair(HR, A)) → TR &
182 | | return TR.
183 | | reverse(nil, A) =
184 | | return A.
185 | | reverse(X, A) =
186 | | return X.
187 | + (tail (tail (cons b (cons b a))))
188 | = a
189 |
190 | In this one, we make the evaluator print out some of the steps it takes.
191 |
192 | | main = sexp → S using $.tamsin & reverse(S, nil) → SR & eval(SR).
193 | |
194 | | sexp = symbol | list.
195 | | list = "(" & listtail(nil).
196 | | listtail(L) = sexp → S & listtail(pair(S, L))
197 | | | ")" & return L.
198 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c".
199 | |
200 | | head(pair(A, B)) = return A.
201 | | tail(pair(A, B)) = return B.
202 | | cons(A, B) = return pair(A, B).
203 | |
204 | | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R) → P & return P.
205 | | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R) → P & return P.
206 | | eval(pair(cons, pair(A, pair(B, nil)))) =
207 | | eval(A) → AE & eval(B) → BE &
208 | | $.print(y(AE, BE)) & cons(AE, BE) → C & return C.
209 | | eval(X) = return X.
210 | |
211 | | reverse(pair(H, T), A) =
212 | | reverse(H, nil) → HR &
213 | | reverse(T, pair(HR, A)) → TR &
214 | | return TR.
215 | | reverse(nil, A) =
216 | | return A.
217 | | reverse(X, A) =
218 | | return X.
219 | + (cons (tail (cons b a)) (head (cons b a)))
220 | = y(b, a)
221 | = y(b, a)
222 | = y(a, b)
223 | = pair(a, b)
224 |
--------------------------------------------------------------------------------
/doc/Error_Reporting.markdown:
--------------------------------------------------------------------------------
1 | Error Reporting
2 | ---------------
3 |
4 | For now, only the Tamsin interpreter is expected to pass these tests.
5 |
6 | Also, these tests expose some details about how Falderal creates temp files.
7 | Boo!
8 |
9 | -> Tests for functionality "Intepret Tamsin program"
10 |
11 | When a scanning error occurs in a Tamsin source, the filename, line number,
12 | and column number are reported.
13 |
14 | | hello = "h".
15 | | %
16 | ? expected identifiable character but found '%' at line 2, column 5 in '/tmp/tmp
17 |
18 | When a parsing error occurs in a Tamsin source, the filename, line number,
19 | and column number are reported.
20 |
21 | | slough = "h" & ("o" | "p").
22 | | maidenhead = "h" & ("o" | "p").
23 | | reading = "h" ("o" | "p").
24 | ? expected '.' but found '(' at line 3, column 16 in '/tmp/tmp
25 |
26 | | pasta = "h" & «hop() & "p".
27 | ? expected '>>' but found '&' at line 1, column 22 in '/tmp/tmp
28 |
29 | | pasta = "h" & «hop()
30 | ? expected '>>' but found EOF at line 1, column 22 in '/tmp/tmp
31 |
32 | When a scanning error occurs in the input to a Tamsin program, the filename,
33 | line number, and column number are reported.
34 |
35 | | main = "h" & "o" & "x".
36 | + hop
37 | ? expected 'x' but found 'p' at line 1, column 3 in ''
38 |
39 | | main = "h" & "o" & {"\n"} & "0" & "x".
40 | + ho
41 | +
42 | + 0p
43 | ? expected 'x' but found 'p' at line 3, column 2 in ''
44 |
45 | | main = "h" & "o" & "x".
46 | + ho
47 | ? expected 'x' but found EOF at line 1, column 3 in ''
48 |
49 | | main = "h" & "o" & $:eof.
50 | + hox
51 | ? expected EOF but found 'x' at line 1, column 3 in ''
52 |
53 | | main = "h" & "o" & $:any.
54 | + ho
55 | ? expected any token but found EOF at line 1, column 3 in ''
56 |
57 | | main = "h" & "o" & $:alnum.
58 | + ho&
59 | ? expected alphanumeric but found '&' at line 1, column 3 in ''
60 |
61 | | main = "h" & "o" & $:upper.
62 | + hod
63 | ? expected uppercase but found 'd' at line 1, column 3 in ''
64 |
65 | | main = "h" & "o" & $:startswith('f').
66 | + hod
67 | ? expected 'f...' but found 'd' at line 1, column 3 in ''
68 |
69 | | main = "h" & "o" & (! "n").
70 | + hon
71 | ? expected anything else but found 'n' at line 1, column 3 in ''
72 |
--------------------------------------------------------------------------------
/doc/Micro-Tamsin.markdown:
--------------------------------------------------------------------------------
1 | Micro-Tamsin
2 | ============
3 |
4 | This is just the "fundaments" part of the spec, and a few other bits,
5 | that the Micro-Tamsin interpreter (written in Tamsin!) can handle.
6 |
7 | -> Tests for functionality "Intepret Tamsin program"
8 |
9 | Fundaments
10 | ----------
11 |
12 | A Tamsin program consists of one or more _productions_. A production consists
13 | of a name and a _parsing rule_ (or just "rule" for short). Among other things,
14 | a rule may be a _non-terminal_, which is the name of a production, or a
15 | _terminal_, which is a literal string in double quotes. (A full grammar for
16 | Tamsin can be found in Appendix A.)
17 |
18 | When run, a Tamsin program processes its input. It starts at the production
19 | named `main`, and evaluates its rule. A non-terminal in a rule "calls" the
20 | production of that name in the program. A terminal in a a rule expects a token
21 | identical to it to be on the input. If that expectation is met, it evaluates
22 | to that token. If not, it raises an error. The final result of evaluating a
23 | Tamsin program is sent to its output.
24 |
25 | (If it makes it easier to think about, consider "its input" to mean "stdin",
26 | and "token" to mean "character"; so the terminal `"x"` is a command that either
27 | reads the character `x` from stdin and returns it (whence it is printed to
28 | stdout by the main program), or errors out if it read something else.
29 | Or, thinking about it from the other angle, we have here the rudiments for
30 | defining a grammar for parsing a trivial language.)
31 |
32 | | main = blerf.
33 | | blerf = "p".
34 | + p
35 | = p
36 |
37 | | main = blerf.
38 | | blerf = "p".
39 | + k
40 | ? expected 'p' found 'k'
41 |
42 | Productions can be written that don't look at the input. A rule may also
43 | consist of the keyword `return`, followed a _term_; this expression simply
44 | evaluates to that term and returns it. (More on terms later; for now,
45 | think of them as strings.)
46 |
47 | So, the following program always outputs `blerp`, no matter what the input is.
48 |
49 | | main = return blerp.
50 | + fadda wadda badda kadda nadda sadda hey
51 | = blerp
52 |
53 | Note that in the following, `blerp` refers to the production named "blerp"
54 | in one place, and in the other place, it refers to the term `blerp`. Tamsin
55 | sees the difference because of the context; `return` must be followed by a
56 | term, while a parsing rule cannot be part of a term.
57 |
58 | | main = blerp.
59 | | blerp = return blerp.
60 | + foo
61 | + foo
62 | + foo 0 0 0 0 0
63 | = blerp
64 |
65 | A rule may also consist of the keyword `print` followed by a term, which,
66 | when evaluated, sends the term to the output, and evaluates to the term.
67 | (Mostly this is useful for debugging. In the following, `world` is
68 | repeated because it is both printed, and the result of the evaluation.)
69 |
70 | | main = print hello & print world.
71 | + ahoshoshohspohdphs
72 | = hello
73 | = world
74 | = world
75 |
76 | A rule may also consist of two subrules joined by the `&` operator.
77 | The `&` operator processes the left-hand side rule. If the LHS fails, then
78 | the `&` expression fails; otherwise, it continues and processes the
79 | right-hand side rule. If the RHS fails, the `&` expression fails; otherwise
80 | it evaluates to what the RHS evaluated to.
81 |
82 | | main = "a" & "p".
83 | + ap
84 | = p
85 |
86 | | main = "a" & "p".
87 | + ak
88 | ? expected 'p' found 'k'
89 |
90 | | main = "a" & "p".
91 | + ep
92 | ? expected 'a' found 'e'
93 |
94 | If you are too used to C or Javascript or the shell, you may use `&&`
95 | instead of `&`.
96 |
97 | | main = "a" && "p".
98 | + ap
99 | = p
100 |
101 | A rule may also consist of two subrules joined by the `|` operator.
102 | The `&` operator processes the left-hand side rule. If the LHS succeeds,
103 | then the `|` expression evaluates to what the LHS evaluted to, and the
104 | RHS is ignored. But if the LHS fails, it processes the RHS; if the RHS
105 | fails, the `|` expression fails, but otherwise it evaluates to what the
106 | RHS evaluated to.
107 |
108 | For example, this program accepts `0` or `1` but nothing else.
109 |
110 | | main = "0" | "1".
111 | + 0
112 | = 0
113 |
114 | | main = "0" | "1".
115 | + 1
116 | = 1
117 |
118 | | main = "0" | "1".
119 | + 2
120 | ? expected '1' found '2'
121 |
122 | If you are too used to C or Javascript or the shell, you may use `||`
123 | instead of `|`.
124 |
125 | | main = "0" || "1".
126 | + 1
127 | = 1
128 |
129 | Using `return` described above, this program accepts 0 or 1 and evaluates
130 | to the opposite. (Note here also that `&` has a higher precedence than `|`.)
131 |
132 | | main = "0" & return 1 | "1" & return 0.
133 | + 0
134 | = 1
135 |
136 | | main = "0" & return 1 | "1" & return 0.
137 | + 1
138 | = 0
139 |
140 | | main = "0" & return 1 | "1" & return 0.
141 | + 2
142 | ? expected '1' found '2'
143 |
144 | Evaluation order can be altered by using parentheses, as per usual.
145 |
146 | | main = "0" & ("0" | "1") & "1" & return ok.
147 | + 011
148 | = ok
149 |
150 | Note that if the LHS of `|` fails, the RHS is tried at the position of
151 | the stream that the LHS started on. This property is called "backtracking".
152 |
153 | | ohone = "0" & "1".
154 | | ohtwo = "0" & "2".
155 | | main = ohone | ohtwo.
156 | + 02
157 | = 2
158 |
159 | Note that `print` and `return` never fail. Thus, code like the following
160 | is "useless":
161 |
162 | | main = foo & print hi | return useless.
163 | | foo = return bar | print useless.
164 | = hi
165 | = hi
166 |
167 | Note that `return` does not exit the production immediately — although
168 | this behaviour may be re-considered...
169 |
170 | | main = return hello & print not_useless.
171 | = not_useless
172 | = not_useless
173 |
174 | Alternatives can select code to be executed, based on the input.
175 |
176 | | main = aorb & print aorb | cord & print cord & return ok.
177 | | aorb = "a" & print ay | "b" & print bee.
178 | | cord = "c" & print see | eorf & print eorf.
179 | | eorf = "e" & print ee | "f" & print eff.
180 | + e
181 | = ee
182 | = eorf
183 | = cord
184 | = ok
185 |
186 | And that's the basics. With these tools, you can write simple
187 | recursive-descent parsers. For example, to consume nested parentheses
188 | containing a zero:
189 |
190 | | main = parens & "." & return ok.
191 | | parens = "(" & parens & ")" | "0".
192 | + 0.
193 | = ok
194 |
195 | | main = parens & "." & return ok.
196 | | parens = "(" & parens & ")" | "0".
197 | + (((0))).
198 | = ok
199 |
200 | (the error message on this test case is a little weird; it's because of
201 | the backtracking. It tries to match `(((0)))` against the beginning of
202 | input, and fails, because the last `)` is not present. So it tries to
203 | match `0` at the beginning instead, and fails that too.)
204 |
205 | | main = parens & "." & return ok.
206 | | parens = "(" & parens & ")" | "0".
207 | + (((0)).
208 | ? expected '0' found '('
209 |
210 | (the error message on this one is much more reasonable...)
211 |
212 | | main = parens & "." & return ok.
213 | | parens = "(" & parens & ")" | "0".
214 | + ((0))).
215 | ? expected '.' found ')'
216 |
217 | To consume a comma-seperated list of one or more bits:
218 |
219 | | main = bit & {"," & bit} & ".".
220 | | bit = "0" | "1".
221 | + 1.
222 | = .
223 |
224 | | main = bit & {"," & bit} & ".".
225 | | bit = "0" | "1".
226 | + 0,1,1,0,1,1,1,1,0,0,0,0,1.
227 | = .
228 |
229 | (again, backtracking makes the error a little odd)
230 |
231 | | main = bit & {"," & bit} & ".".
232 | | bit = "0" | "1".
233 | + 0,,1,0.
234 | ? expected '.' found ','
235 |
236 | | main = bit & {"," & bit} & ".".
237 | | bit = "0" | "1".
238 | + 0,10,0.
239 | ? expected '.' found '0'
240 |
241 | Comments
242 | --------
243 |
244 | A Tamsin comment is introduced with `#` and continues until the end of the
245 | line.
246 |
247 | | # welcome to my Tamsin program!
248 | | main = # comments may appear anywhere in the syntax
249 | | # and a comment may be followed by a comment
250 | | "z".
251 | + z
252 | = z
253 |
--------------------------------------------------------------------------------
/doc/Notes.markdown:
--------------------------------------------------------------------------------
1 | These are now out of context, and kept here for historical purposes.
2 |
3 | ### an aside, written a while back ###
4 |
5 | OK! So... here is a problem: if you haven't noticed yet,
6 |
7 | * what a rule consumes, is a string.
8 | * what a rule evaluates to, is a term.
9 | * the symbol `(` means something different in a rule (where it expresses
10 | precendence) than in a term (where it signifies the list of subterms.)
11 | * the symbol `foo` means something different in a rule (where it denotes
12 | a production) than in a term (where it is an atom.)
13 |
14 | This is probably unacceptable. Which syntax do we want to change?
15 |
16 | PRODUCTION = set V = foo & return ⟨atom V production⟩.
17 |
18 | i.e. productions are distinguished from atoms and variables by being
19 | all-caps. Lists are distinguished from precedence by being ⟨ ⟩.
20 |
21 | production = set V = 'foo & return '(atom V production).
22 |
23 | i.e. `'` acts a bit like quote, or rather quasi-quote, as Variables get
24 | expanded.
25 |
26 | production = set V = :foo & return :smth(:atom Var :production).
27 |
28 | i.e. atoms are prefixed with `:`, like Ruby, and terms are constructors
29 | with a leading atom, like real terms and not like lists.
30 |
31 | production = set V = 「foo」 & return 「(atom Var anotheratom)」.
32 |
33 | A funky, Japanese-influenced version of quote. Nice, but really not suited
34 | for this, quite. Ditto ⟦these⟧.
35 |
36 | Ah, well, it may not be a real problem, unless we want to make `return`
37 | optional (which we do.) Maybe, onto weirder stuff first.
38 |
39 | ### stuff about implicit buffer ###
40 |
41 | Here's a "problem": the implicit buffer is a string, and we don't have
42 | strings in the data domain, we have terms. This "problem" is easily
43 | "solvable": we can stringify the term. This is a terrible "solution",
44 | but it lets us experiment further.
45 |
46 | This would be nicer if we had a syntax to put arbitrary text in an atom.
47 | Hey, how about 「this is an atom」? Hmmm...
48 |
49 | #### A prolix note on implementation ####
50 |
51 | Traditionally, scanners for recursive descent parsers pre-emptively scan
52 | the next token. This was done because originally, parsers (for languages
53 | like Pascal, say,) were distinctly one-pass beasts, reading the source code
54 | off of a stream from disk (or maybe even from a tape), and you might need
55 | to refer to the current token several times in the code and you don't want
56 | to have to read it more than once.
57 |
58 | This setup makes writing a parser with a "hot-swappable" scanner tricky,
59 | because when we switch scanner, we have to deal with this "cached" token
60 | somehow. We could rewind the scanner by the length of the token (plus
61 | the length of any preceding whitespace and comments), switch the scanner,
62 | then scan again (by the new rules.) But this is messy and error-prone.
63 |
64 | Luckily, not many of us are reading files off tape these days, and we have
65 | plenty of core, so it's no problem reading the whole file into memory.
66 | In fact, I've seen it argued that the best way to write a scanner nowadays
67 | is to `mmap()` the file. We don't do this in the implementation of Tamsin,
68 | but we do read the entire file into memory.
69 |
70 | This makes the cache-the-next-token method less useful, and so we don't
71 | do it. Instead, we look for the next token only when we need it, and we
72 | have a method `peek()` that returns what the next token would be, and we
73 | don't cache this value.
74 |
75 | There are a couple of other points about the scanner implementation.
76 | A scanner only ever has one buffer (the entire string it's scanning); this
77 | never changes over it's lifetime. It provides methods for saving and
78 | restoring its state, and it has a stack of "engines" which provide the
79 | actual scanning logic. In addition, there is only one interpreter object,
80 | and it only has one scanner object during its lifetime.
81 |
82 | ### Implementation Notes ###
83 |
84 | Maybe test-driven language design *not* "for the win" in all cases; it's
85 | excellent for evolving a design, but not so good for deep debugging. I had
86 | to actually write a dedicated test case which directly accessed the internals,
87 | to find the problem.
88 |
89 | This was only after refactoring the implementation two or three times. One
90 | of those times, I removed exceptions, so now the interpreter returns
91 | `(success, result)` tuples, where `success` is a boolean, and propagates
92 | parse errors itself.
93 |
94 | We "raise" a parse error only in the `LITERAL` AST node.
95 |
96 | We handle parse errors (backtrack) only in `OR` and `WHILE`, and in the
97 | ProductionScannerEngine logic (to provide that EOF if the scanning production
98 | failed. This can happen even in `peek()` at the end of a string, even after
99 | we've successfully parsed everything else.)
100 |
101 | ### aside #2 ###
102 |
103 | Well this is all very nice, very pretty I'm sure you'll agree, but it doesn't
104 | hang together too well. Figuration is easier than composition. The thing is
105 | that we still have these two domains, the domain of strings that we parse
106 | and the domain of terms that we match. We need to bring them closer together.
107 | This section is just ideas for that.
108 |
109 | One is that instead of, or alongside terms, we compose strings.
110 |
111 | First, we put arbitrary text in an atom, with `「this syntax」`. Then we allow
112 | terms to be concatenated with `•`. It looks real cool! But also, it's kind
113 | of annoying. So we also allow `'this ' + 'syntax.'`.
114 |
115 | ### ... ###
116 |
117 | Indeed we can.
118 |
119 | The next logical step would be to be able to say
120 |
121 | main = program using scanner.
122 | scanner = scan using ☆char.
123 | scan = {" "} & (...)
124 | program = "token" & ";" & "token" & ...
125 |
126 | But we're not there yet.
127 |
128 | Well, the best way to get there is to make that a test, see it fail, then
129 | improve the implementation so that it passes, Test-driven language design
130 | for the win! (But maybe not in all cases. See my notes below...)
131 |
132 | ### ... #2 ###
133 |
134 | Having thought more about it, I think the easiest way to reconcile terms
135 | and strings is to have terms be syntactic sugar for strings. This is
136 | already the case for ground terms, since `tree(a,b)` stringifies to the
137 | same string as `「tree(a,b)」`. It's when variables are involved where it
138 | differs. We would like some kind of quasi-quote such that even though
139 | `「tree(A,b)」` → `tree(A,n)`, `«tree(A,b)»` → `tree(tree(x,y),b)` or
140 | whatever.
141 |
142 | Although, I still don't know. The thing about terms is that they are
143 | super-useful for intermediate representations — abstract syntax trees
144 | and the like. I've been thinking about some kind of compromise. Which
145 | is, currently, what we sort of have. A Tamsin term doubles as a string,
146 | for better or worse. Mainly, we should sort out the properties of terms,
147 | then. Which we will do. But first,
148 |
149 | ### conceptual sugar ###
150 |
151 | Have
152 |
153 | reverse(tree(A,B)) = ...
154 |
155 | be *conceptually* sugar for
156 |
157 | reverse["tree" & "(" & term → A & "," & term → B & ")"] = ...
158 |
159 | but *actually* we still keep it in terms of terms, for efficiency.
160 |
--------------------------------------------------------------------------------
/doc/Philosophy.markdown:
--------------------------------------------------------------------------------
1 | Philosophy of Tamsin
2 | ====================
3 |
4 | I suppose that's a rather heavy-handed word to use, "philosophy". But
5 | this is the document giving the _whys_ of Tamsin rather than the technical
6 | points.
7 |
8 | Why did you write Tamin?
9 | ------------------------
10 |
11 | Basically, every time I see someone use a compiler-compiler like `yacc`
12 | or a parser combinator library, part of me thinks, "Well why didn't
13 | you just write a recursive-descent parser? Recursive-descent parsers
14 | are easy to write and they make for extremely pretty code!"
15 | And what does a recursive-descent parser do? It consumes input. But
16 | don't *all* algorithms consume input? So why not have a language which
17 | makes it easy to write recursive-descent parsers, and force all programs
18 | to be written as recursive-descent parsers? Then *all* code will be pretty!
19 | (Yeah, sure, OK.)
20 |
21 | Why is it/is it not a...
22 | ------------------------
23 |
24 | ### Meta-Language ###
25 |
26 | (Also known, in their more practical incarnations, as "compiler-compilers"
27 | or "parser generators".)
28 |
29 | Tamsin is one, because:
30 |
31 | * The basic operations all map directly to combinators in BNF (or rather,
32 | Wirth's EBNF):
33 | * `&` is sequencing
34 | * `|` is alternation
35 | * `[]` is sugar for alternation with the empty string
36 | * `{}` is asteration
37 | * `"foo"` is a terminal
38 | * `foo` is a non-terminal
39 | * Using only these operations produces a sensible program — one which
40 | parses its input by the grammar so given.
41 |
42 | Tamsin isn't one, because:
43 |
44 | * There is no requirement that any input be processed at all.
45 |
46 | ### Programming Language ###
47 |
48 | Tamsin is one, because:
49 |
50 | * Productions can have local variables.
51 | * Productions can call other productions (or themselves, recursively) with
52 | arguments, and they return a value:
53 |
54 | reverse(pair(H, T), A) = reverse(T, pair(H, A)).
55 | reverse(nil, A) = A.
56 |
57 | * It's Turing-complete.
58 | * It can be, and in fact has been, bootstrapped.
59 |
60 | Tamsin isn't one, because:
61 |
62 | * The syntax is really geared to consuming input rather than general
63 | programming.
64 |
65 | ### Rubbish Lister ###
66 |
67 | What does this even mean? Well, there is that
68 | [one famous rubbish lister](http://perl.org/) that we can use as an example
69 | for now, until I come up with a better definition here.
70 |
71 | Tamsin is one, because:
72 |
73 | * There's more than one way to say it.
74 | * The same symbol means different things in different contexts
75 | (for example, `foo` might be either the name of a production, or an
76 | atomic term.)
77 | * Implicit this, implicit that.
78 | * Optomized (a bit) for problem-solving throwaway one-liners rather than
79 | large, engineered systems.
80 | * Anyone up for a game of golf?
81 |
82 | Tamsin isn't one, because:
83 |
84 | * It's possible to express its syntax in a form that humans can understand.
85 | * In fact, it's possible to express its syntax in Tamsin.
86 | In fact, it's possible to bootstrap Tamsin — a Tamsin-to-C compiler has
87 | been written in Tamsin. This is very un-rubbish-lister-ish.
88 |
89 | Batteries Included
90 | ------------------
91 |
92 | Are batteries included? Or rather, _what_ batteries are included? By strange
93 | coincidence, the batteries that are included are almost exactly the ones
94 | you'd expect to be useful in bootstrapping a Tamsin-to-C compiler:
95 |
96 | * `list` module — `reverse`, `append`, `member`, etc.
97 | * `tamsin_scanner` module
98 | * `tamsin_parser` module
99 | * `tamsin_analyzer` module
100 |
--------------------------------------------------------------------------------
/doc/System_Module.markdown:
--------------------------------------------------------------------------------
1 | System Module
2 | -------------
3 |
4 | -> Tests for functionality "Intepret Tamsin program"
5 |
6 | The module `$` contains a number of built-in productions which would not
7 | be possible or practical to implement in Tamsin. See Appendix C for a list.
8 |
9 | In fact, we have been using the `$` module already! But our usage of it
10 | has been hidden under some syntactic sugar. For example, `"k"` is actually...
11 |
12 | | main = $:expect(k).
13 | + k
14 | = k
15 |
16 | | main = $:expect(k).
17 | + l
18 | ? expected 'k' but found 'l'
19 |
20 | The section about aliases needs to be written too.
21 |
22 | Here's `$:alnum`, which only consumes tokens where the first character is
23 | alphanumeric.
24 |
25 | | main = "(" & {$:alnum → A} & ")" & A.
26 | + (abc123deefghi459876jklmnopqRSTUVXYZ0)
27 | = 0
28 |
29 | | main = "(" & {$:alnum → A} & ")" & A.
30 | + (abc123deefghi459876!jklmnopqRSTUVXYZ0)
31 | ? expected ')' but found '!'
32 |
33 | Here's `$:upper`, which only consumes tokens where the first character is
34 | uppercase alphabetic.
35 |
36 | | main = "(" & {$:upper → A} & ")" & A.
37 | + (ABCDEFGHIJKLMNOPQRSTUVWXYZ)
38 | = Z
39 |
40 | | main = "(" & {$:upper → A} & ")" & A.
41 | + (ABCDEFGHIJKLMNoPQRSTUVWXYZ)
42 | ? expected ')' but found 'o'
43 |
44 | Here's `$:startswith`, which only consumes tokens which start with
45 | the given term. (For a single-character scanner this isn't very
46 | impressive.)
47 |
48 | | main = "(" & {$:startswith('A') → A} & ")" & A.
49 | + (AAAA)
50 | = A
51 |
52 | | main = "(" & {$:startswith('A') → A} & ")" & A.
53 | + (AAAABAAA)
54 | ? expected ')' but found 'B'
55 |
56 | Here's `$:mkterm`, which takes an atom and a list and creates a constructor.
57 |
58 | | main = $:mkterm(atom, list(a, list(b, list(c, nil)))).
59 | = atom(a, b, c)
60 |
61 | Here's `$:unquote`, which takes three terms, X, L and R, where L and R
62 | must be atoms. If X begins with L and ends with R then the contents
63 | in-between will be returned as an atom. Otherwise fails.
64 |
65 | | main = $:unquote('"hello"', '"', '"').
66 | = hello
67 |
68 | | main = $:unquote('(hello)', '(', ')').
69 | = hello
70 |
71 | | main = $:unquote('(hello)', '(', '"').
72 | ? term '(hello)' is not quoted with '(' and '"'
73 |
74 | | main = $:unquote('(hello)', '[', ')').
75 | ? term '(hello)' is not quoted with '[' and ')'
76 |
77 | The quotes can be Unicode characters.
78 |
79 | | main = $:unquote('“hello”', '“', '”').
80 | = hello
81 |
82 | The quotes can be multiple characters.
83 |
84 | | main = $:unquote('%-hello-%', '%-', '-%').
85 | = hello
86 |
87 | The quotes can even be empty strings.
88 |
89 | | main = $:unquote('hello', '', '').
90 | = hello
91 |
92 | Here's `$:equal`, which takes two terms, L and R. If L and R are equal,
93 | succeeds and returns that term which they both are. Otherwise fails.
94 |
95 | Two atoms are equal if their texts are identical.
96 |
97 | | main = $:equal('hi', 'hi').
98 | = hi
99 |
100 | | main = $:equal('hi', 'lo').
101 | ? term 'hi' does not equal 'lo'
102 |
103 | Two constructors are equal if their texts are identical, they have the
104 | same number of subterms, and all of their corresponding subterms are equal.
105 |
106 | | main = $:equal(hi(there), hi(there)).
107 | = hi(there)
108 |
109 | | main = $:equal(hi(there), lo(there)).
110 | ? term 'hi(there)' does not equal 'lo(there)'
111 |
112 | | main = $:equal(hi(there), hi(here)).
113 | ? term 'hi(there)' does not equal 'hi(here)'
114 |
115 | | main = $:equal(hi(there), hi(there, there)).
116 | ? term 'hi(there)' does not equal 'hi(there, there)'
117 |
118 | Here's `$:emit`, which takes an atom and outputs it. Unlike `print`, which
119 | is meant for debugging, `$:emit` does not append a newline, and is 8-bit-clean.
120 |
121 | | main = $:emit('`') & $:emit('wo') & ''.
122 | = `wo
123 |
124 | -> Tests for functionality "Intepret Tamsin program (pre- & post-processed)"
125 |
126 | `$:emit` is 8-bit-clean: if the atom contains unprintable characters,
127 | `$:emit` does not try to make them readable by UTF-8 or any other encoding.
128 | (`print` may or may not do this, depending on the implementation.)
129 |
130 | | main = $:emit('\x00\x01\x02\xfd\xfe\xff') & ''.
131 | = 000102fdfeff0a
132 |
133 | -> Tests for functionality "Intepret Tamsin program"
134 |
135 | Here's `$:repr`, which takes a term and results in an atom which is the
136 | result of reprifying that term (see section on Terms, above.)
137 |
138 | | main = $:repr(hello).
139 | = hello
140 |
141 | | main = $:repr('016fo_oZZ').
142 | = 016fo_oZZ
143 |
144 | | main = $:repr('016fo$oZZ').
145 | = '016fo$oZZ'
146 |
147 | | main = $:repr('').
148 | = ''
149 |
150 | | main = $:repr(' ').
151 | = ' '
152 |
153 | | main = $:repr('016\n016').
154 | = '016\x0a016'
155 |
156 | | main = $:repr(hello(there, world)).
157 | = hello(there, world)
158 |
159 | | main = V ← '♡' & $:repr('□'(there, V)).
160 | = '\xe2\x96\xa1'(there, '\xe2\x99\xa1')
161 |
162 | | main = $:repr(a(b(c('qu\'are\\')))).
163 | = a(b(c('qu\'are\\')))
164 |
165 | | main = $:repr('\x99').
166 | = '\x99'
167 |
168 | Here's `$:reverse`, which takes a term E, and a term of the form
169 | `X(a, X(b, ... X(z, E)) ... )`, and returns a term of the form
170 | `X(z, X(y, ... X(a, E)) ... )`. The constructor tag X is often `cons`
171 | or `pair` or `list` and E is often `nil`.
172 |
173 | | main = $:reverse(list(a, list(b, list(c, nil))), nil).
174 | = list(c, list(b, list(a, nil)))
175 |
176 | E need not be an atom.
177 |
178 | | main = $:reverse(list(a, list(b, list(c, hello(world)))), hello(world)).
179 | = list(c, list(b, list(a, hello(world))))
180 |
181 | If the tail of the list isn't E, an error occurs.
182 |
183 | | main = $:reverse(list(a, list(b, list(c, hello(world)))), nil).
184 | ? malformed list
185 |
186 | If some list constructor doesn't have two children, an error occurs.
187 |
188 | | main = $:reverse(list(a, list(b, list(nil))), nil).
189 | ? malformed list
190 |
191 | The constructor tag can be anything.
192 |
193 | | main = $:reverse(foo(a, foo(b, foo(c, nil))), nil).
194 | = foo(c, foo(b, foo(a, nil)))
195 |
196 | But if there is a different constructor somewhere in the list, well,
197 |
198 | | main = $:reverse(foo(a, fooz(b, foo(c, nil))), nil).
199 | ? malformed list
200 |
201 | You can reverse an empty list.
202 |
203 | | main = $:reverse(nil, nil).
204 | = nil
205 |
206 | But of course,
207 |
208 | | main = $:reverse(nil, zilch).
209 | ? malformed list
210 |
211 | This is a shallow reverse. Embedded lists are not reversed.
212 |
213 | | main = $:reverse(list(a, list(list(1, list(2, nil)), list(c, nil))), nil).
214 | = list(c, list(list(1, list(2, nil)), list(a, nil)))
215 |
216 | Here's `$:gensym`.
217 |
218 | | main = $:gensym('foo').
219 | = foo1
220 |
221 | | main = $:gensym('foo') → F & $:gensym('foo') → G & $:equal(F, G).
222 | ? 'foo1' does not equal 'foo2'
223 |
224 | Here's `$:hexbyte`.
225 |
226 | | main = $:hexbyte('5', '0').
227 | = P
228 |
229 | | main = $:hexbyte('f', 'f') → C & $:repr(C).
230 | = '\xff'
231 |
232 | Here's `$:format_octal`, which makes me feel ill.
233 |
234 | | main = $:format_octal('P').
235 | = 120
236 |
237 | | main = $:format_octal('\xff').
238 | = 377
239 |
240 | There are never any leading zeroes.
241 |
242 | | main = $:format_octal('\n').
243 | = 12
244 |
245 | It works on the first byte of the string only.
246 |
247 | | main = $:format_octal('«').
248 | = 302
249 |
250 | Here's `$:length`, which returns an atom representing the length, in bytes,
251 | of the given term (flattened.) Note that this is an atom, not an integer,
252 | because Tamsin doesn't even have integers.
253 |
254 | | main = $:length(abcde).
255 | = 5
256 |
257 | | main = $:length('').
258 | = 0
259 |
260 | | main = $:length('♥').
261 | = 3
262 |
263 | | main = $:length(a( b , c )).
264 | = 7
265 |
--------------------------------------------------------------------------------
/doc/TODO.markdown:
--------------------------------------------------------------------------------
1 | TODO
2 | ====
3 |
4 | ### C implementation ###
5 |
6 | * implement buffers in C in libtamsin
7 | * implement pattern match in send in C compiler
8 |
9 | ### higher-priority ###
10 |
11 | * allow switching the kind of buffer that is used when `@` is used:
12 | * `rule @ %stdin` is the default; it is implied when no `@`
13 | * `rule @ %mmap` to use an MmapBuffer
14 | * `rule @ %line` to use a LineEditorBuffer
15 | * `rule @ $:open('file.txt')` ?
16 | * `$:add`, `$:sub`, `$:mul`, `$:div`, `$:rem`, for atoms which look like
17 | integers: `["-"] & {$:digit}`.
18 | * `$:tell` and `$:seek` the implicit buffer — for VM's etc — although
19 | note, this may have scary consequences when combined with backtracking
20 | * `(foo → S | ok)` & print S ... should set S to error if foo failed?
21 | or `(foo |→ S ok)` ? This is necessary for the meta-circular
22 | interpreter: to implement `A | B` we want to interpret `A` and see
23 | if it failed or not. i.e. We want to be able to reify errors...
24 |
25 | ### medium-priority ###
26 |
27 | * Starting with knowns about `$` builtins, an analysis to determine, for Rule:
28 | - may consume input, never consumes input
29 | - may fail, always fails
30 | - may succeed, always succeeds... (may_backtrack?)
31 | * production values
32 | * `$:fold(^production, nil, cons)`
33 | * `$:fold(^($:alnum & " "), '', ^L+','+R)`
34 | * codegen and emitter phases in compiler. take current compiler phase,
35 | make it construct a low-level representation instead (codegen), then
36 | have a phase that writes out C code from that low-level repr (emitter)
37 | * non-backtracking versions of `|` and `{}`: `|!` and `{}!`
38 |
39 | ### testing ###
40 |
41 | * test for `''('')`, `'\x00'('\x00')`
42 | * document how prod scanners do EOF
43 | * tests that `'V'` is not a variable
44 | * tests for failing when utf8 scanner hits badly-encoded utf8
45 | * tests for invalid escape codes
46 | * test for mismatched # of formals in prod branches
47 | * document the modules. in own document. plus tests.
48 |
49 | ### lower-priority ###
50 |
51 | * `ctype` module, with `alpha` and `digit` and etc.
52 | * `list` module: `deep_reverse`
53 | * use Tamsin repr in error messages
54 | * __str__ should be Tamsin repr()?
55 | * regex-like shortcuts: `\w` for "word", `\s` for "whitespace", etc.
56 | * have compiler replace calls to `list` functions
57 | by "more efficient" versions written in C -- if they really are...
58 | * and maybe even garbage-collect terms in libtamsin
59 | * figure out why reading a 4M file in a compiled program TAKES DOWN UBUNTU
60 | * make it possible to recover from more errors using `|` (don't throw
61 | exceptions so often)
62 | * stronger tests for scanner, parser: dump all falderal testbodies to files
63 | * option for ref interp to not output result (or by default, don't)
64 | * "mini" interpreter that handles variables (ouch)
65 | * error handling: skip to next sentinel and report more errors
66 | * module-level updatable variables. or globals. or "process dictionary"
67 | `$:store()` and `$:fetch()`. or database.
68 | * figure out good way to do aliases with the Tamsin-parser-in-Tamsin
69 | (dynamic grammar is really more of a Zz thing...)
70 | * should be able to import ("open") other modules into your own namespace.
71 | * `@` a la Haskell in pattern-match:
72 | * `walk(T@tree(L,R)) = ...`
73 | * maps, implemented as hash tables.
74 | * `Table ← {} & fields → F@fields(H,T) & Table[H] ← T`
75 | * pretty-print AST for error messages
76 |
77 | ### symbol fun ###
78 |
79 | * `~` (Lua) for not and `!` (Prolog) for non-backtracking?
80 | * lowercase greek letters are variables too!
81 | * use `←` instead of `@`, why not?
82 | * I'm always typing `prod() → rule` instead of `=`, so why not?
83 | * `A;B` — like `&` except assert (statically) that `A` always succeeds
84 | * be generous and allow `"xyz"` in term context position?
85 | * denotational semantics sugar! something like...
86 |
87 | ⟦add α β⟧ = $:add(⟦α⟧, ⟦β⟧).
88 |
89 | and/or
90 |
91 | ⟦add α β⟧(σ) = $:add(⟦α⟧(σ), ⟦β⟧(σ)).
92 | ⟦var α⟧(σ) = fetch(σ, α).
93 |
94 | of course, DS is a bit fast-and-loose about actual parsing...
95 | but the syntax looks mighty fine.
96 |
97 | ### wild ideas ###
98 |
99 | * term-rewriting library; a la Treacle; should make desugarer almost trivial
100 | * algebraically cool version of `|`, perhaps as a worked example
101 | (implement Bakerloo in Tamsin)
102 | * EOF and nil are the same? it would make sense... call it `end`? (do we?)
103 | * productions with names with arbitrary characters in them.
104 | * something like «foo» but foo is the name of a *non*terminal — symbolic
105 | production references (like Perl's horrible globs as a cheap substitute
106 | for actual function references or lambdas.)
107 | * turn system library back into built-in keywords (esp. if : can be used)
108 | * Tamsin scanner: more liberal (every non-alphanum+_ symbol scans as itself,
109 | incl. ones that have no meaning currently like `*` and `?`)
110 | * auto-generate terms from productions, like Rooibos does
111 | * token classes... somehow. (then numeric is just a special token class?)
112 | a token class is just the "call stack" of productions at the time it
113 | was scanned
114 | * «» could be an alias w/right sym (`,,`, `„`)
115 | (still need to scan it specially though)
116 | * special form that consumes rest of input from the Tamsin source --
117 | maybe not such a gimmick since micro-tamsin does this
118 | * feature-testing: `$.exists(module) | do_without_module`
119 | * ternary: `foo ? bar : baz` — if foo succeeded, do bar, else do baz.
120 | I don't think this is very necessary because you can usually just say
121 | `(foo & bar) | baz` — but only if `bar` always succeeds, which it
122 | usually does (to return something)
123 |
--------------------------------------------------------------------------------
/doc/Tested_Examples.markdown:
--------------------------------------------------------------------------------
1 | Tests that used to be in Tamsin's README
2 | ========================================
3 |
4 | -> Tests for functionality "Intepret Tamsin program"
5 |
6 | Hello, world!
7 |
8 | | main = 'Hello, world!'.
9 | = Hello, world!
10 |
11 | Make a story more exciting!
12 |
13 | | main = ("." & '!' | "?" & '?!' | any)/''.
14 | + Chapter 1
15 | + ---------
16 | + It was raining. She knocked on the door. She heard
17 | + footsteps inside. The door opened. The butler peered
18 | + out. "Hello," she said. "May I come in?"
19 | = Chapter 1
20 | = ---------
21 | = It was raining! She knocked on the door! She heard
22 | = footsteps inside! The door opened! The butler peered
23 | = out! "Hello," she said! "May I come in?!"
24 |
25 | Parse an algebraic expression for syntactic correctness.
26 |
27 | | main = (expr0 & eof & 'ok').
28 | | expr0 = expr1 & {"+" & expr1}.
29 | | expr1 = term & {"*" & term}.
30 | | term = "x" | "y" | "z" | "(" & expr0 & ")".
31 | + x+y*(z+x+y)
32 | = ok
33 |
34 | Parse an algebraic expression to a syntax tree.
35 |
36 | | main = expr0.
37 | | expr0 = expr1 → E1 & {"+" & expr1 → E2 & E1 ← add(E1,E2)} & E1.
38 | | expr1 = term → E1 & {"*" & term → E2 & E1 ← mul(E1,E2)} & E1.
39 | | term = "x" | "y" | "z" | "(" & expr0 → E & ")" & E.
40 | + x+y*(z+x+y)
41 | = add(x, mul(y, add(add(z, x), y)))
42 |
43 | Translate an algebraic expression to RPN (Reverse Polish Notation).
44 |
45 | | main = expr0 → E & walk(E).
46 | | expr0 = expr1 → E1 & {"+" & expr1 → E2 & E1 ← add(E1,E2)} & E1.
47 | | expr1 = term → E1 & {"*" & term → E2 & E1 ← mul(E1,E2)} & E1.
48 | | term = "x" | "y" | "z" | "(" & expr0 → E & ")" & E.
49 | | walk(add(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' +'.
50 | | walk(mul(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' *'.
51 | | walk(X) = return ' '+X.
52 | + x+y*(z+x+y)
53 | = x y z x + y + * +
54 |
55 | Reverse a list.
56 |
57 | | main = reverse(pair(a, pair(b, pair(c, nil))), nil).
58 | | reverse(pair(H, T), A) = reverse(T, pair(H, A)).
59 | | reverse(nil, A) = A.
60 | = pair(c, pair(b, pair(a, nil)))
61 |
62 | Parse and evaluate a Boolean expression.
63 |
64 | | main = expr0 → E using scanner & eval(E).
65 | | expr0 = expr1 → E1 & ("or" & expr1)/E1/or.
66 | | expr1 = term → E1 & ("and" & term)/E1/and.
67 | | term = "true" | "false" | "(" & expr0 → E & ")" & E.
68 | | eval(and(A, B)) = eval(A) → EA & eval(B) → EB & and(EA, EB).
69 | | eval(or(A, B)) = eval(A) → EA & eval(B) → EB & or(EA, EB).
70 | | eval(X) = X.
71 | | and(true, true) = 'true'.
72 | | and(A, B) = 'false'.
73 | | or(false, false) = 'false'.
74 | | or(A, B) = 'true'.
75 | | scanner = scan using $:utf8.
76 | | scan = {" "} & ("(" | ")" | token).
77 | | token = "f" & "a" & "l" & "s" & "e" & 'false'
78 | | | "t" & "r" & "u" & "e" & 'true'
79 | | | "o" & "r" & 'or'
80 | | | "a" & "n" & "d" & 'and'.
81 | + (falseortrue)andtrue
82 | = true
83 |
84 | Parse a CSV file and write out the 2nd-last field of each record. Handles
85 | commas and double-quotes inside quotes.
86 |
87 | | main = line → L & L ← lines(nil, L) &
88 | | {"\n" & line → M & L ← lines(L, M)} & extract(L) & ''.
89 | | line = field → F & {"," & field → G & F ← fields(G, F)} & F.
90 | | field = strings | bare.
91 | | strings = string → T & {string → S & T ← T + '"' + S} & T.
92 | | string = "\"" & (!"\"" & any)/'' → T & "\"" & T.
93 | | bare = (!(","|"\n") & any)/''.
94 | | extract(lines(Ls, L)) = extract(Ls) & extract_field(L).
95 | | extract(L) = L.
96 | | extract_field(fields(L, fields(T, X))) = print T.
97 | | extract_field(X) = X.
98 | + Harold,1850,"21 Baxter Street",burgundy
99 | + Smythe,1833,"31 Little Street, St. James",mauve
100 | + Jones,1791,"41 ""The Gardens""",crimson
101 | = 21 Baxter Street
102 | = 31 Little Street, St. James
103 | = 41 "The Gardens"
104 |
105 | Evaluate a trivial S-expression-based language.
106 |
107 | | main = sexp → S using scanner & reverse(S, nil) → SR & eval(SR).
108 | | scanner = ({" "} & ("(" | ")" | $:alnum/'')) using $:utf8.
109 | | sexp = $:alnum | list.
110 | | list = "(" & sexp/nil/pair → L & ")" & L.
111 | | head(pair(A, B)) = A.
112 | | tail(pair(A, B)) = B.
113 | | cons(A, B) = return pair(A, B).
114 | | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R).
115 | | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R).
116 | | eval(pair(cons, pair(A, pair(B, nil)))) =
117 | | eval(A) → AE & eval(B) → BE & return pair(AE, BE).
118 | | eval(X) = X.
119 | | reverse(pair(H, T), A) = reverse(H, nil) → HR & reverse(T, pair(HR, A)).
120 | | reverse(nil, A) = A.
121 | | reverse(X, A) = X.
122 | + (head (tail (cons (cons a nil) (cons b nil))))
123 | = b
124 |
125 | Escape characters in a string, for use in a C program source.
126 |
127 | | main = escaped('"♥\n«"').
128 | | escaped(S) = escaped_r @ S.
129 | | escaped_r = A ← '' &
130 | | {
131 | | "\\" & A ← A + '\\\\'
132 | | | "\"" & A ← A + '\\"'
133 | | | "\n" & A ← A + '\\n'
134 | | | $:alnum → B & A ← A + B
135 | | | any → B & (many_format_octal @ B) → B & A ← A + B
136 | | } & A.
137 | |
138 | | many_format_octal =
139 | | S ← '' &
140 | | {any → B & $:format_octal(B) → B & S ← S + '\\' + B} using $:byte &
141 | | S.
142 | = \"\342\231\245\n\302\253\"
143 |
--------------------------------------------------------------------------------
/eg/alg-expr1.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = (expr0 & eof & 'ok').
5 | expr0 = expr1 & {"+" & expr1}.
6 | expr1 = term & {"*" & term}.
7 | term = "x" | "y" | "z" | "(" & expr0 & ")".
8 |
--------------------------------------------------------------------------------
/eg/alg-expr2.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = expr0.
5 | expr0 = expr1 -> E1 & {"+" & expr1 -> E2 & E1 <- add(E1,E2)} & E1.
6 | expr1 = term -> E1 & {"*" & term -> E2 & E1 <- mul(E1,E2)} & E1.
7 | term = "x" | "y" | "z" | "(" & expr0 -> E & ")" & E.
8 |
--------------------------------------------------------------------------------
/eg/alg-expr3.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = expr0 → E & walk(E).
5 | expr0 = expr1 → E1 & {"+" & expr1 → E2 & E1 ← add(E1,E2)} & E1.
6 | expr1 = term → E1 & {"*" & term → E2 & E1 ← mul(E1,E2)} & E1.
7 | term = "x" | "y" | "z" | "(" & expr0 → E & ")" & E.
8 | walk(add(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' +'.
9 | walk(mul(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' *'.
10 | walk(X) = return ' '+X.
11 |
--------------------------------------------------------------------------------
/eg/backtrack.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = set E = original &&
5 | (set E = changed && "0" && "1" | "0" && "2") &
6 | return E.
7 |
--------------------------------------------------------------------------------
/eg/bitpair.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = bit → A & bit → B & return pair(A,B).
5 | bit = "0" | "1".
6 |
--------------------------------------------------------------------------------
/eg/bits.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = bit & {"," & bit} & ".".
5 | bit = "0" | "1".
6 |
--------------------------------------------------------------------------------
/eg/blerf.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = blerf(tree(tree(tree(a,b),c),d)).
5 | blerf(tree(L,R)) = blerf(L).
6 | blerf(Other) = return Other.
7 |
--------------------------------------------------------------------------------
/eg/change-buffer.tamsin:
--------------------------------------------------------------------------------
1 | main = one @ 'I process this string until ! where I digress a bit' & ''.
2 | one = {"!" & {any → C & $:emit(C)} @ 'Here I digress' | any → C & $:emit(C)}.
3 |
--------------------------------------------------------------------------------
/eg/csv_extract.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = line → L & L ← lines(nil, L) &
5 | {"\n" & line → M & L ← lines(L, M)} & extract(L) & ''.
6 | line = field → F & {"," & field → G & F ← fields(G, F)} & F.
7 | field = strings | bare.
8 | strings = string → T & {string → S & T ← T + '"' + S} & T.
9 | string = "\"" & T ← '' & {!"\"" & any → S & T ← T + S} & "\"" & T.
10 | bare = T ← '' & {!(","|"\n") & any → S & T ← T + S} & T.
11 | extract(lines(Lines, Line)) = extract(Lines) & extract_field(Line).
12 | extract(L) = L.
13 | extract_field(fields(Last, fields(This, X))) = print This.
14 | extract_field(X) = return X.
--------------------------------------------------------------------------------
/eg/csv_parse.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = line & {"\n" & line} & 'ok'.
5 | line = field → F & {"," & field → G & F ← fields(G, F)} & F.
6 | field = strings | bare.
7 | strings = string → T & {string → S & T ← T + '"' + S} & T.
8 | string = "\"" & T ← '' & {!"\"" & any → S & T ← T + S} & "\"" & T.
9 | bare = T ← '' & {!(","|"\n") & any → S & T ← T + S} & T.
10 |
--------------------------------------------------------------------------------
/eg/escape.tamsin:
--------------------------------------------------------------------------------
1 | main = print
2 | '\n' +
3 | '\xa0' +
4 | 'r'.
5 |
--------------------------------------------------------------------------------
/eg/eval-bool-expr.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = expr0 → E using scanner & eval(E).
5 | expr0 = expr1 → E1 & {"or" & expr1 → E2 & E1 ← or(E1,E2)} & E1.
6 | expr1 = term → E1 & {"and" & term → E2 & E1 ← and(E1,E2)} & E1.
7 | term = "true" | "false" | "(" & expr0 → E & ")" & E.
8 | eval(and(A, B)) = eval(A) → EA & eval(B) → EB & and(EA, EB).
9 | eval(or(A, B)) = eval(A) → EA & eval(B) → EB & or(EA, EB).
10 | eval(X) = X.
11 | and(true, true) = 'true'.
12 | and(A, B) = 'false'.
13 | or(false, false) = 'false'.
14 | or(A, B) = 'true'.
15 | scanner = scan using $:char.
16 | scan = {" "} & ("(" | ")" | token).
17 | token = "f" & "a" & "l" & "s" & "e" & 'false'
18 | | "t" & "r" & "u" & "e" & 'true'
19 | | "o" & "r" & 'or'
20 | | "a" & "n" & "d" & 'and'.
21 |
--------------------------------------------------------------------------------
/eg/exciting-long.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = S <- '' & {("." & '!' | "?" & '?!' | any) -> T & S <- S + T} & S.
5 |
--------------------------------------------------------------------------------
/eg/exciting.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = ("." & '!' | "?" & '?!' | any)/''.
5 |
--------------------------------------------------------------------------------
/eg/exciting.txt:
--------------------------------------------------------------------------------
1 | It was raining. She knocked on the door. She heard
2 | footsteps inside. The door opened. The butler peered
3 | out. "Hello," she said. "May I come in?"
--------------------------------------------------------------------------------
/eg/expector.tamsin:
--------------------------------------------------------------------------------
1 | main = set T = 'foobar' &
2 | print T &
3 | expect_chars(T).
4 |
5 | # Given a single-character string, return call(prodref('$', 'expect'), S)
6 | # Given a string, return and(call(prodref('$', 'expect'), head(S)),
7 | # expect_chars(tail(S))).
8 |
9 | expect_chars(S) = print S & expect_chars_r @ S.
10 | expect_chars_r = any → C &
11 | set E = call(prodref('$', 'expect'), list(atom(C), nil)) &
12 | ((eof & return E) | (expect_chars_r → R & return and(E, R))).
13 |
--------------------------------------------------------------------------------
/eg/foobar.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = print_each_char(fo+ob+ar).
5 | print_each_char(X) = print_each_char_r @ X.
6 | print_each_char_r = any → C & print C & print_each_char_r | return 'ok'.
7 |
--------------------------------------------------------------------------------
/eg/hello-world.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = 'Hello, world!'.
5 |
--------------------------------------------------------------------------------
/eg/list-of-chars.tamsin:
--------------------------------------------------------------------------------
1 | main = any/nil/list.
2 |
--------------------------------------------------------------------------------
/eg/list-sugar2.tamsin:
--------------------------------------------------------------------------------
1 | main = expr([1,2|3]).
2 | expr([1,2|3]) = "f".
3 |
4 |
--------------------------------------------------------------------------------
/eg/modules.tamsin:
--------------------------------------------------------------------------------
1 | # parses "(0)39".
2 |
3 | stuff {
4 | junk = "(" & :return & ")".
5 | return = "0".
6 | }
7 | main = stuff:junk & :return & :eof.
8 | return = "3".
9 | eof = "9".
10 |
--------------------------------------------------------------------------------
/eg/names.csv:
--------------------------------------------------------------------------------
1 | Harold,1850,"21 Baxter Street",burgundy
2 | Smythe,1833,"31 Little Street, St. James",mauve
3 | Jones,1791,"41 ""The Gardens""",crimson
4 |
--------------------------------------------------------------------------------
/eg/pipeline.tamsin:
--------------------------------------------------------------------------------
1 | # Demonstrate that Tamsin programs can handle being given a stream on input,
2 | # and producing a stream on output. (This was not true in versions 0.5 and
3 | # prior)
4 |
5 | main = {token -> A & whitespace & print A}.
6 | token = S <- '' & {$:alnum -> T & S <- S + T} & return S.
7 | whitespace = {" " | "\n"}.
8 |
--------------------------------------------------------------------------------
/eg/prod-branches.tamsin:
--------------------------------------------------------------------------------
1 | main = e(1).
2 | e(2) = 'foo'.
3 | e(A) = A.
4 |
5 |
--------------------------------------------------------------------------------
/eg/reverse.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = reverse(pair(a, pair(b, pair(c, nil))), nil).
5 | reverse(pair(H, T), A) = reverse(T, pair(H, A)).
6 | reverse(nil, A) = A.
7 |
--------------------------------------------------------------------------------
/eg/sexpr-eval.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = sexp → S using scanner & reverse(S, nil) → SR & eval(SR).
5 | scanner = scan using $:utf8.
6 | scan = {" "} & ("(" | ")" | (T ← '' & {$:alnum → S & T ← T + S} & return T)).
7 | sexp = $:alnum | list.
8 | list = "(" & listtail(nil).
9 | listtail(L) = sexp → S & listtail(pair(S, L)) | ")" & L.
10 | head(pair(A, B)) = return A.
11 | tail(pair(A, B)) = return B.
12 | cons(A, B) = return pair(A, B).
13 | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R).
14 | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R).
15 | eval(pair(cons, pair(A, pair(B, nil)))) =
16 | eval(A) → AE & eval(B) → BE & return pair(AE, BE).
17 | eval(X) = X.
18 | reverse(pair(H, T), A) = reverse(H, nil) → HR & reverse(T, pair(HR, A)).
19 | reverse(nil, A) = A.
20 | reverse(X, A) = X.
21 |
--------------------------------------------------------------------------------
/eg/store.tamsin:
--------------------------------------------------------------------------------
1 | fetch(K1, list(pair(K2, V), T)) = $:equal(K1, K2) & V | fetch(K1, T).
2 | fetch(K, nil) = fail K + ' not found'.
3 |
4 | store(K, V, A) = return list(pair(K, V), A).
5 |
6 | main =
7 | ST ← nil &
8 | store(x, 21, ST) → ST &
9 | store(y, 17, ST) → ST &
10 | store(z, 11, ST) → ST &
11 | fetch(y, ST).
12 |
13 |
--------------------------------------------------------------------------------
/eg/zeroes-concat.tamsin:
--------------------------------------------------------------------------------
1 | main = zeroes.
2 | zeroes = ("0" & zeroes → E & return E + 'Z') | return ''.
3 |
4 |
--------------------------------------------------------------------------------
/eg/zeroes.tamsin:
--------------------------------------------------------------------------------
1 | # This example Tamsin program was written by Chris Pressey, and is
2 | # hereby placed in the public domain. It comes with NO WARRANTY.
3 |
4 | main = zeroes.
5 | zeroes = ("0" & zeroes → E & return zero(E)) | return nil.
6 |
--------------------------------------------------------------------------------
/fixture/bootstrapped.markdown:
--------------------------------------------------------------------------------
1 |
2 | -> Functionality "Intepret Tamsin program" is implemented by
3 | -> shell command
4 | -> "bin/bootstrapped-compiler <%(test-body-file) >tmp/foo.c && gcc -Ic_src -Lc_src tmp/foo.c -o tmp/foo -ltamsin && tmp/foo <%(test-input-file)"
5 |
6 | -> Functionality "Intepret Tamsin program (pre- & post-processed)"
7 | -> is implemented by
8 | -> shell command "bin/bootstrapped-compiler <%(test-body-file) >tmp/foo.c && gcc -Ic_src -Lc_src tmp/foo.c -o tmp/foo -ltamsin && cat %(test-input-file) | bin/inhex | tmp/foo | bin/hexout"
9 |
10 |
--------------------------------------------------------------------------------
/fixture/compiler.py.markdown:
--------------------------------------------------------------------------------
1 |
2 | -> Functionality "Intepret Tamsin program" is implemented by
3 | -> shell command
4 | -> "./bin/tamsin loadngo %(test-body-file) < %(test-input-file)"
5 |
6 | -> Functionality "Intepret Tamsin program (pre- & post-processed)"
7 | -> is implemented by
8 | -> shell command "cat %(test-input-file) | bin/inhex | bin/tamsin loadngo %(test-body-file) | bin/hexout"
9 |
--------------------------------------------------------------------------------
/fixture/compiler.tamsin.markdown:
--------------------------------------------------------------------------------
1 |
2 | -> Functionality "Intepret Tamsin program" is implemented by
3 | -> shell command
4 | -> "bin/tamsin-compiler <%(test-body-file) >tmp/foo.c && gcc -Ic_src -Lc_src tmp/foo.c -o tmp/foo -ltamsin && tmp/foo <%(test-input-file)"
5 |
6 | -> Functionality "Intepret Tamsin program (pre- & post-processed)"
7 | -> is implemented by
8 | -> shell command "bin/tamsin-compiler <%(test-body-file) >tmp/foo.c && gcc -Ic_src -Lc_src tmp/foo.c -o tmp/foo -ltamsin && cat %(test-input-file) | bin/inhex | tmp/foo | bin/hexout"
9 |
10 |
--------------------------------------------------------------------------------
/fixture/micro-tamsin.markdown:
--------------------------------------------------------------------------------
1 |
2 | -> Functionality "Intepret Tamsin program" is implemented by
3 | -> shell command
4 | -> "cat %(test-body-file) > tmp/foz && echo -n '/' >> tmp/foz && cat %(test-input-file) >> tmp/foz && ./bin/micro-tamsin tmp/foz"
5 |
6 |
--------------------------------------------------------------------------------
/fixture/mini-tamsin.markdown:
--------------------------------------------------------------------------------
1 |
2 | -> Functionality "Intepret Tamsin program" is implemented by
3 | -> shell command
4 | -> "cat %(test-body-file) > tmp/foz && echo -n '/' >> tmp/foz && cat %(test-input-file) >> tmp/foz && ./bin/mini-tamsin tmp/foz"
5 |
6 |
--------------------------------------------------------------------------------
/fixture/tamsin.py.markdown:
--------------------------------------------------------------------------------
1 |
2 | -> Functionality "Intepret Tamsin program" is implemented by
3 | -> shell command "bin/tamsin %(test-body-file) < %(test-input-file)"
4 |
5 | -> Functionality "Intepret Tamsin program (pre- & post-processed)"
6 | -> is implemented by
7 | -> shell command "cat %(test-input-file) | bin/inhex | bin/tamsin %(test-body-file) | bin/hexout"
8 |
--------------------------------------------------------------------------------
/lib/list.tamsin:
--------------------------------------------------------------------------------
1 | list {
2 | reverse(list(H, T), A) = reverse(T, list(H, A)).
3 | reverse(nil, A) = A.
4 |
5 | member(X, nil) = fail 'not a member'.
6 | member(X, list(H,T)) =
7 | $:equal(X, H) & H | member(X, T).
8 |
9 | add_elem(X, L) =
10 | member(X, L) & L | return list(X, L).
11 |
12 | union(nil, L2) = L2.
13 | union(list(H,T), L2) =
14 | add_elem(H, L2) → L2 &
15 | union(T, L2).
16 |
17 | append(nil, L) = L.
18 | append(list(H, T), L) =
19 | append(T, L) → T & return list(H, T).
20 | }
21 |
--------------------------------------------------------------------------------
/lib/tamsin_analyzer.tamsin:
--------------------------------------------------------------------------------
1 | # Desugarer for Tamsin AST, written in Tamsin.
2 | # Distributed under a BSD-style license; see LICENSE.
3 |
4 | tamsin_analyzer {
5 |
6 | desugar_all(list(H,T)) =
7 | desugar(H) → DH &
8 | desugar_all(T) → DT &
9 | return list(DH, DT).
10 | desugar_all(nil) = 'nil'.
11 |
12 | desugar(program(L)) = desugar_all(L) → DL & return program(DL).
13 | desugar(module(N, L)) =
14 | desugar_all(L) → DL &
15 | merge_prod_branches(DL, nil) → DDL &
16 | return module(N, DDL).
17 | desugar(production(N, PBs)) =
18 | desugar_all(PBs) → DPBs &
19 | return production(N, DPBs).
20 | desugar(prodbranch(Fs, Ls, B)) =
21 | desugar_pattern_all(Fs, 0) → Pair &
22 | fst(Pair) → DFs &
23 | desugar(B) → DB &
24 | return prodbranch(DFs, Ls, DB).
25 | desugar(call(PR, Args)) = return call(PR, Args).
26 | desugar(or(L, R)) = desugar(L) → DL & desugar(R) → DR & return or(DL, DR).
27 | desugar(and(L, R)) = desugar(L) → DL & desugar(R) → DR & return and(DL, DR).
28 | desugar(not(X)) = desugar(X) → DX & return not(DX).
29 | desugar(while(X)) = desugar(X) → DX & return while(DX).
30 | desugar(concat(L, R)) = desugar(L) → DL & desugar(R) → DR & return concat(DL, DR).
31 | desugar(using(R, P)) = desugar(R) → DR & return using(DR, P).
32 | desugar(on(R, T)) = desugar(R) → DR & desugar(T) → DT & return on(DR, DT).
33 | desugar(send(R, V)) = desugar(R) → DR & return send(DR, V).
34 | desugar(set(V, T)) = desugar(T) → DT & return set(V, DT).
35 | desugar(atom(T)) = return atom(T).
36 | desugar(constructor(T, Ts)) = return constructor(T, Ts).
37 | desugar(variable(N)) = return variable(N).
38 | desugar(fold(R, I, C)) =
39 | desugar(R) → DR &
40 | SET ← set(variable('_1'), I) &
41 | SEND ← send(DR, variable('_2')) &
42 | CAT ← concat(variable('_1'), variable('_2')) &
43 | ACC ← set(variable('_1'), CAT) &
44 | ($:equal(C, nil) |
45 | get_tag(C) → Tag &
46 | ACC ← set(variable('_1'),
47 | constructor(Tag, list(variable('_2'),
48 | list(variable('_1'), nil))))) &
49 | RET ← call(prodref('$', 'return'), list(variable('_1'), nil)) &
50 | return and(and(SET, while(and(SEND, ACC))), RET).
51 |
52 | desugar_pattern_all(list(H,T), I) =
53 | desugar_pattern(H, I) → Pair &
54 | fst(Pair) → DH &
55 | snd(Pair) → I2 &
56 | desugar_pattern_all(T, I2) → Pair &
57 | fst(Pair) → DT &
58 | snd(Pair) → I3 &
59 | return pair(list(DH, DT), I3).
60 | desugar_pattern_all(nil, I) = return pair(nil, I).
61 |
62 | desugar_pattern(atom(T), I) = return pair(atom(T), I).
63 | desugar_pattern(constructor(T, Ts), I) =
64 | desugar_pattern_all(Ts, I) → Pair &
65 | fst(Pair) → DTs &
66 | snd(Pair) → I2 &
67 | return pair(constructor(T, DTs), I2).
68 | desugar_pattern(variable(N), I) =
69 | next(I) → I2 &
70 | return pair(patternvariable(N, I), I2).
71 |
72 | fst(pair(A,B)) = A.
73 | snd(pair(A,B)) = B.
74 |
75 | next(0) = '1'.
76 | next(1) = '2'.
77 | next(2) = '3'.
78 | next(3) = '4'.
79 | next(4) = '5'.
80 | next(5) = '6'.
81 | next(6) = '7'.
82 | next(7) = '8'.
83 | next(8) = '9'.
84 | next(9) = '10'.
85 | next(10) = '11'.
86 | next(11) = '12'.
87 |
88 | get_tag(atom(T)) = T.
89 |
90 | fetch(K, list(pair(K2, V), T)) = $:equal(K, K2) & V | fetch(K, T).
91 | fetch(K, nil) = 'nil'.
92 |
93 | delete(K, list(pair(K2, V), T)) = $:equal(K, K2) & delete(K, T)
94 | | delete(K, T) → R & return list(pair(K2, V), R).
95 | delete(K, nil) = 'nil'.
96 |
97 | store(K, V, A) = delete(K, A) → A2 &
98 | return list(pair(K, V), A2).
99 |
100 | merge_prod_branches(list(production(N, list(B, nil)),T),Map) =
101 | fetch(N, Map) → Blist &
102 | Blist ← list(B, Blist) &
103 | store(N, Blist, Map) → Map &
104 | merge_prod_branches(T, Map).
105 | merge_prod_branches(nil,Map) =
106 | unmap(Map, nil).
107 |
108 | unmap(list(pair(K, V), T), A) =
109 | list:reverse(V, nil) → RV &
110 | P ← production(K, RV) &
111 | A ← list(P, A) &
112 | unmap(T, A).
113 | unmap(nil, A) = A.
114 |
115 | #####
116 | # CM = current module name
117 |
118 | analyze_all(CM, list(H,T)) =
119 | analyze(CM, H) → DH &
120 | analyze_all(CM, T) → DT &
121 | return list(DH, DT).
122 | analyze_all(CM, nil) = 'nil'.
123 |
124 | analyze(CM, program(L)) =
125 | analyze_all(CM, L) → DL &
126 | return program(DL).
127 | analyze(CM, module(N, L)) =
128 | analyze_all(N, L) → DL &
129 | return module(N, DL).
130 | analyze(CM, production(N, Bs)) =
131 | analyze_all(CM, Bs) → DBs &
132 | return production(N, DBs).
133 | analyze(CM, prodbranch(Fs, Ls, E)) =
134 | analyze(CM, E) → DE &
135 | locals(DE, nil) → Ls &
136 | list:reverse(Ls, nil) → Ls &
137 | return prodbranch(Fs, Ls, DE).
138 | analyze(CM, call(PR, As)) =
139 | analyze(CM, PR) → DPR &
140 | analyze_all(CM, As) → DAs &
141 | return call(DPR, DAs).
142 | analyze(CM, prodref(MN, PN)) =
143 | $:equal(MN, '') & return prodref(CM, PN)
144 | | return prodref(MN, PN).
145 | analyze(CM, or(L, R)) =
146 | analyze(CM, L) → DL &
147 | analyze(CM, R) → DR &
148 | return or(DL, DR).
149 | analyze(CM, and(L, R)) =
150 | analyze(CM, L) → DL &
151 | analyze(CM, R) → DR &
152 | return and(DL, DR).
153 | analyze(CM, not(X)) =
154 | analyze(CM, X) → DX &
155 | return not(DX).
156 | analyze(CM, while(X)) =
157 | analyze(CM, X) → DX &
158 | return while(DX).
159 | analyze(CM, concat(L, R)) =
160 | analyze(CM, L) → DL &
161 | analyze(CM, R) → DR &
162 | return concat(DL, DR).
163 | analyze(CM, using(R, PR)) =
164 | analyze(CM, R) → DR &
165 | analyze(CM, PR) → DPR &
166 | return using(DR, DPR).
167 | analyze(CM, on(R, T)) =
168 | analyze(CM, R) → DR &
169 | analyze(CM, T) → DT &
170 | return on(DR, DT).
171 | analyze(CM, send(R, V)) =
172 | analyze(CM, R) → DR &
173 | return send(DR, V).
174 | analyze(CM, set(V, T)) =
175 | analyze(CM, T) → DT &
176 | return set(V, DT).
177 | analyze(CM, atom(T)) = return atom(T).
178 | analyze(CM, constructor(T, Ts)) = return constructor(T, Ts).
179 | analyze(CM, variable(N)) = return variable(N).
180 |
181 | #####
182 | # returns a list of locals
183 |
184 | locals(call(PR, As), Ls) =
185 | Ls.
186 | locals(or(L, R), Ls) =
187 | locals(L, Ls) → Ls &
188 | locals(R, Ls).
189 | locals(and(L, R), Ls) =
190 | locals(L, Ls) → Ls &
191 | locals(R, Ls).
192 | locals(not(X), Ls) =
193 | locals(X, Ls).
194 | locals(while(X), Ls) =
195 | locals(X, Ls).
196 | locals(concat(L, R), Ls) =
197 | locals(L, Ls) → Ls &
198 | locals(R, Ls).
199 | locals(using(R, P), Ls) =
200 | locals(R, Ls).
201 | locals(on(R, T), Ls) =
202 | locals(R, Ls) → Ls &
203 | locals(T, Ls).
204 | locals(send(R, V), Ls) =
205 | locals(V, Ls) → Ls &
206 | locals(R, Ls).
207 | locals(set(V, T), Ls) =
208 | locals(V, Ls) → Ls &
209 | locals(T, Ls).
210 | locals(atom(T), Ls) = Ls.
211 | locals(constructor(T, Ts), Ls) =
212 | locals_all(Ts, Ls).
213 | locals(variable(N), Ls) =
214 | list:add_elem(N, Ls).
215 |
216 | locals_all(nil, Ls) = Ls.
217 | locals_all(list(H,T), Ls) =
218 | locals(H, Ls) → Ls &
219 | locals_all(T, Ls).
220 | }
221 |
--------------------------------------------------------------------------------
/lib/tamsin_parser.tamsin:
--------------------------------------------------------------------------------
1 | # Parse Tamsin source to Tamsin AST, written in Tamsin.
2 | # Distributed under a BSD-style license; see LICENSE.
3 |
4 | # REQUIRES lib/tamsin_scanner.tamsin
5 | # REQUIRES lib/list.tamsin
6 |
7 | # Note that this may contain support for some features which are not in
8 | # the current released or pre-released version.
9 |
10 | tamsin_parser {
11 | parse = grammar using tamsin_scanner:scanner.
12 | grammar = {"@" & pragma & "."} &
13 | LM ← nil &
14 | LP ← nil &
15 | {
16 | production → P & "." & LP ← list(P, LP)
17 | | module → M & LM ← list(M, LM)
18 | } &
19 | list:reverse(LP, nil) → LP &
20 | MM ← module(main, LP) &
21 | list:reverse(LM, nil) → LM &
22 | ($:equal(LP, nil) | LM ← list(module(main, LP), LM)) &
23 | return program(LM).
24 | module = word → N &
25 | LP ← nil &
26 | "{" &
27 | {production → P & "." & LP ← list(P, LP)} &
28 | "}" &
29 | list:reverse(LP, nil) → LP &
30 | return module(N, LP).
31 | production = word → N &
32 | F ← nil &
33 | [formals → F] &
34 | "=" &
35 | expr0 → E &
36 | return production(N, list(prodbranch(F, nil, E), nil)).
37 | formals = L ← nil &
38 | "(" &
39 | term → T & L ← list(T, L) &
40 | {"," & term → T & L ← list(T, L)} &
41 | ")" &
42 | list:reverse(L, nil) → L &
43 | return L
44 | | "[" & expr0 & "]".
45 | expr0 = expr1 → L & {("|" | "||") & expr1 → R & L ← or(L, R)} & L.
46 | expr1 = expr2 → L & {("&" | "&&") & expr2 → R & L ← and(L, R)} & L.
47 | expr2 = expr3 → L & ["using" & prodref → P & L ← using(L, P)
48 | | "@" & texpr → T & L ← on(L, T)] & L.
49 | expr3 = expr4 → L & [("→" | "->") & variable → V & L ← send(L, V)] & L.
50 | expr4 = expr5 → L & ("/" & texpr → T &
51 | ("/" & term → T2 & return fold(L, T, T2)
52 | | return fold(L, T, nil))
53 | | return L).
54 | expr5 = "(" & expr0 → E & ")" & E
55 | | "[" & expr0 → E & "]" &
56 | return or(E, call(prodref('$', return), list(atom(nil), nil)))
57 | | "{" & expr0 → E & "}" & return while(E)
58 | | "!" & expr5 → E & return not(E)
59 | | "set" & variable → V & "=" & texpr → T & return set(V, T)
60 | | "return" & texpr → T & return call(prodref('$', return), list(T, nil))
61 | | "fail" & texpr → T & return call(prodref('$', fail), list(T, nil))
62 | | "print" & texpr → T & return call(prodref('$', print), list(T, nil))
63 | | "any" & return call(prodref('$', any), nil)
64 | | "eof" & return call(prodref('$', 'eof'), nil)
65 | | terminal
66 | | variable → V &
67 | (("←" | "<-") & texpr → T & return set(V, T)
68 | | return call(prodref('$', return), list(V, nil)))
69 | | sq_string → T &
70 | $:unquote(T, '\'', '\'') → T &
71 | return call(prodref('$', return), list(atom(T), nil))
72 | | pq_string → T &
73 | $:unquote(T, '“', '”') → T &
74 | expect_chars(T) → E &
75 | return and(E, call(prodref('$', return), list(atom(T), nil)))
76 | | prodref → P &
77 | L ← nil &
78 | ["(" &
79 | texpr → T & L ← list(T, L) &
80 | {"," & texpr → T & L ← list(T, L)} &
81 | ")"] &
82 | list:reverse(L, nil) → L &
83 | return call(P, L).
84 |
85 | texpr = term → T & {"+" & term → S & T ← concat(T, S)} & T.
86 | term = term0.
87 | term0 = variable
88 | | "[" & L ← atom(nil) &
89 | [term → T & L ← constructor(list, list(T, list(L, nil))) &
90 | {"," & term → T & L ← constructor(list, list(T, list(L, nil)))}] &
91 | Tail ← atom(nil) &
92 | ["|" & term → Tail] &
93 | "]" &
94 | reverse_c(L, Tail) → L &
95 | return L
96 | | atom → A & L ← nil & ["(" &
97 | term0 → T & L ← list(T, L) &
98 | {"," & term0 → T & L ← list(T, L)} &
99 | ")"] &
100 | list:reverse(L, nil) → L &
101 | ($:equal(L, nil) & return atom(A)
102 | | return constructor(A, L)).
103 | atom = word
104 | | sq_string → T &
105 | $:unquote(T, '\'', '\'').
106 |
107 | terminal = terminal0 → T & return call(prodref('$', expect), list(T, nil)).
108 | terminal0 = dq_string → T & $:unquote(T, '"', '"') → T & return atom(T)
109 | | ("«" | "<<") & texpr → T & ("»" | ">>") & return T.
110 |
111 | prodref = modref → M & ":" & word → P & return prodref(M, P)
112 | | ":" & word → P & return prodref('', P)
113 | | word → P & return prodref('', P).
114 | modref = "$" | word.
115 | pragma = "alias" & word & word & "=" & prodref
116 | | "unalias" & word.
117 |
118 | word = $:alnum.
119 | variable = $:upper → V & return variable(V).
120 | sq_string = $:startswith('\'').
121 | dq_string = $:startswith('"').
122 | pq_string = $:startswith('“').
123 |
124 | ## utility functions on the AST ##
125 |
126 | # Given the name of a module and a program AST, return the named
127 | # module AST found within that program, or fail.
128 |
129 | find_module(N, program(Ms)) = find_module(N, Ms).
130 | find_module(N1, list(module(N2, Ps), T)) =
131 | $:equal(N1, N2) & return module(N2, Ps) | find_module(N1, T).
132 | find_module(N, list(H, T)) = find_module(N, T).
133 | find_module(N, nil) = fail 'no ' + N + ' module'.
134 |
135 | # Given the name of a production and a module AST, return the named
136 | # production AST found within that module, or fail.
137 |
138 | find_production(N, module(MN, Ps)) = find_production(N, Ps).
139 | find_production(N1, list(production(N2, Bs), T)) =
140 | $:equal(N1, N2) & return production(N2, Bs) | find_production(N1, T).
141 | find_production(N, list(H, T)) = find_production(N, T).
142 | find_production(N, nil) = fail 'no ' + N + ' production'.
143 |
144 | # Given the name of a module and the name of a production,
145 | # return the production AST for module:production in the program, or fail.
146 |
147 | find_production_global(MN, PN, P) =
148 | find_module(MN, P) → M & find_production(PN, M).
149 |
150 | reverse_c(constructor(list, list(Fst, list(Snd, nil))), Acc) =
151 | Acc ← constructor(list, list(Fst, list(Acc, nil))) &
152 | reverse_c(Snd, Acc).
153 | reverse_c(Other, Acc) = Acc.
154 |
155 | # Given a single-character string, return call(prodref('$', 'expect'), S)
156 | # Given a string, return and(call(prodref('$', 'expect'), head(S)),
157 | # expect_chars(tail(S))).
158 |
159 | expect_chars(S) = (expect_chars_r using $:utf8) @ S.
160 | expect_chars_r = any → C &
161 | E ← call(prodref('$', 'expect'), list(atom(C), nil)) &
162 | ((eof & return E) | (expect_chars_r → R & return and(E, R))).
163 | }
164 |
--------------------------------------------------------------------------------
/lib/tamsin_scanner.tamsin:
--------------------------------------------------------------------------------
1 | # Scanner for Tamsin tokens, written in Tamsin.
2 | # Distributed under a BSD-style license; see LICENSE.
3 |
4 | tamsin_scanner {
5 | scanner = scan using $:utf8.
6 | scan = skippable &
7 | (symbol | str('\'', '\'') | str('"', '"') | str('“', '”') | word).
8 | symbol = "&" & "&" & '&&'
9 | | "|" & "|" & '||'
10 | | "-" & ">" & '->'
11 | | "<" & "-" & '<-'
12 | | "<" & "<" & '<<'
13 | | ">" & ">" & '>>'
14 | | "=" | "(" | ")" | "[" | "]" | "{" | "}" | "!" | "|" | "&" | ":"
15 | | "/" | "," | "." | "@" | "+" | "$" | "→" | "←" | "«" | "»".
16 | str(O, C) = «O» → T & {("\\" & escape | !«C» & any) → S & T ← T + S} & «C» &
17 | return T + C.
18 | escape = "n" & '\n'
19 | | "r" & '\r'
20 | | "t" & '\t'
21 | | "x" & hexdigit → H & hexdigit → L & $:hexbyte(H, L)
22 | | "\\" & '\\'
23 | | "'" & '\''
24 | | "\"" & '"'.
25 | hexdigit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
26 | "a" | "b" | "c" | "d" | "e" | "f".
27 | word = $:alnum → T & { ($:alnum | "_") → S & T ← T + S } & T.
28 | skippable = {whitespace | comment}.
29 | whitespace = " " | "\t" | "\r" | "\n".
30 | comment = "#" & {!"\n" & any} & ("\n" | eof).
31 | }
--------------------------------------------------------------------------------
/mains/analyzer.tamsin:
--------------------------------------------------------------------------------
1 | main = tamsin_parser:parse → AST & tamsin_scanner:skippable & eof &
2 | tamsin_analyzer:desugar(AST) → AST &
3 | tamsin_analyzer:analyze(nil, AST) → AST &
4 | $:repr(AST).
5 |
--------------------------------------------------------------------------------
/mains/desugarer.tamsin:
--------------------------------------------------------------------------------
1 | main = tamsin_parser:parse → AST & tamsin_scanner:skippable & eof &
2 | tamsin_analyzer:desugar(AST) → AST &
3 | $:repr(AST).
4 |
--------------------------------------------------------------------------------
/mains/grammar.tamsin:
--------------------------------------------------------------------------------
1 | # Syntax-correctness parser for Tamsin, written in Tamsin.
2 | # Distributed under a BSD-style license; see LICENSE.
3 |
4 | # REQUIRES lib/tamsin_scanner.tamsin
5 |
6 | # Note that this does not produce any data as a result beyond "yes, it
7 | # parsed" or "no, there was a syntax error". This exists to provide a
8 | # clean, readable grammar. For actual use, see lib/tamsin_parser.tamsin,
9 | # which parses a Tamsin program to an AST.
10 |
11 | # If there is any discrepancy between the language this grammar accepts,
12 | # and the language lib/tamsin_parser.tamsin accepts, lib/tamsin_parser.tamsin
13 | # takes precedence.
14 |
15 | # Note that this may contain support for some features which are not in
16 | # the current released or pre-released version.
17 |
18 | main = grammar using tamsin_scanner:scanner.
19 |
20 | grammar = {"@" & pragma & "."} &
21 | {module | production & "."} & eof & 'ok'.
22 | module = word & "{" & {production & "."} & "}".
23 | production = word & ["(" & term & {"," & term} & ")"
24 | | "[" & expr0 & "]"] & "=" & expr0.
25 | expr0 = expr1 & {("|" | "||") & expr1}.
26 | expr1 = expr2 & {("&" | "&&") & expr2}.
27 | expr2 = expr3 & ["using" & prodref | "@" & texpr].
28 | expr3 = expr4 & [("→" | "->") & variable].
29 | expr4 = expr5 & ["/" & texpr & ["/" & term]].
30 | expr5 = "(" & expr0 & ")"
31 | | "[" & expr0 & "]"
32 | | "{" & expr0 & "}"
33 | | "!" & expr5
34 | | "set" & variable & "=" & texpr
35 | | "return" & texpr
36 | | "fail" & texpr
37 | | "print" & texpr
38 | | terminal
39 | | variable & [("←" | "<-") & texpr]
40 | | sq_string
41 | | prodref & ["(" & texpr & {"," & texpr} & ")"].
42 | texpr = term & {"+" & term}.
43 | term = atom & ["(" & [term & {"," & term}] & ")"]
44 | | "[" & [term & {"," & term}] & ["|" & term] & "]"
45 | | variable.
46 | atom = word | sq_string.
47 | terminal = dq_string
48 | | ("«" | "<<") & texpr & ("»" | ">>").
49 | prodref = modref & ":" & word
50 | | ":" & word
51 | | word.
52 | modref = "$" | word.
53 | pragma = "alias" & word & word & "=" & prodref
54 | | "unalias" & word.
55 |
56 | word = $:alnum.
57 | variable = $:upper.
58 | sq_string = $:startswith('\'').
59 | dq_string = $:startswith('"').
60 |
--------------------------------------------------------------------------------
/mains/micro-tamsin.tamsin:
--------------------------------------------------------------------------------
1 | # Interpreter for "Micro-Tamsin", written in Tamsin.
2 | # (see doc/Micro-Tamsin.markdown.)
3 | # Distributed under a BSD-style license; see LICENSE.
4 |
5 | # REQUIRES lib/tamsin_scanner.tamsin
6 | # REQUIRES lib/tamsin_parser.tamsin
7 |
8 | main = tamsin_parser:parse → AST & tamsin_scanner:skippable & "/" &
9 | interpret(AST, AST).
10 |
11 | interpret(P, program(L)) =
12 | tamsin_parser:find_production_global('main', 'main', P) → Main &
13 | interpret(P, Main).
14 |
15 | interpret(P, production(N, list(prodbranch(Fs, Ls, E), nil))) = interpret(P, E).
16 |
17 | interpret(P, call(prodref('$', 'return'), list(atom(X), nil))) = return X.
18 | interpret(P, call(prodref('$', 'expect'), list(atom(X), nil))) = «X».
19 | interpret(P, call(prodref('$', 'print'), list(atom(X), nil))) = print X.
20 | interpret(P, call(prodref('', N), A)) =
21 | interpret(P, call(prodref('main', N), A)).
22 | interpret(P, call(prodref(M, N), A)) =
23 | tamsin_parser:find_production_global(M, N, P) → Prod &
24 | interpret(P, Prod).
25 |
26 | interpret(P, or(L, R)) = interpret(P, L) | interpret(P, R).
27 | interpret(P, and(L, R)) = interpret(P, L) & interpret(P, R).
28 | interpret(P, not(X)) = !interpret(P, X).
29 | interpret(P, while(X)) = {interpret(P, X)}.
30 |
--------------------------------------------------------------------------------
/mains/mini-tamsin.tamsin:
--------------------------------------------------------------------------------
1 | # Interpreter for "Mini-Tamsin", written in Tamsin.
2 | # (see doc/Mini-Tamsin.markdown.)
3 | # Distributed under a BSD-style license; see LICENSE.
4 |
5 | # REQUIRES lib/tamsin_scanner.tamsin
6 | # REQUIRES lib/tamsin_parser.tamsin
7 |
8 | main = tamsin_parser:parse → AST & tamsin_scanner:skippable & "/" &
9 | new_state → S &
10 | interpret(AST, S, AST).
11 |
12 | #
13 | # FIXME there are several rather major shortcomings with this, still!
14 | #
15 |
16 | new_state = return state().
17 |
18 | #
19 | # interpret(EntireProgram, State, CurrentProgramPart)
20 | # returns a pair(Result, NewState)
21 | #
22 | interpret(P, S, program(L)) =
23 | tamsin_parser:find_production_global('main', 'main', P) → Main &
24 | new_state → S &
25 | interpret(P, S, Main).
26 |
27 | interpret(P, S, production(N, list(prodbranch(Fs, Ls, E), nil))) =
28 | interpret(P, S, E).
29 |
30 | interpret(P, S, call(prodref('$', 'return'), list(atom(X), nil))) =
31 | return pair(X, S).
32 |
33 | interpret(P, S, call(prodref('$', 'expect'), list(atom(X), nil))) =
34 | «X» → R & # FIXME this isn't going to work if «X» fails, is it.
35 | return pair(R, S).
36 |
37 | interpret(P, S, call(prodref('$', 'print'), list(atom(X), nil))) =
38 | print X &
39 | return pair(X, S).
40 |
41 | interpret(P, S, call(prodref('', N), A)) =
42 | interpret(P, S, call(prodref('main', N), A)).
43 |
44 | interpret(P, S, call(prodref(M, N), A)) =
45 | tamsin_parser:find_production_global(M, N, P) → Prod &
46 | new_state → S2 &
47 | interpret(P, S2, Prod).
48 |
49 | interpret(P, S, or(L, R)) =
50 | interpret(P, S, L) → pair(Res, S2) &
51 | (Res & return pair(Res, S2)) | interpret(P, S, R).
52 | # FIXME what happens to S? I think this is right though
53 |
54 | interpret(P, S, and(L, R)) =
55 | interpret(P, S, L) → pair(Res, S2) &
56 | interpret(P, S2, R).
57 |
58 | # interpret(P, S, not(X)) = !interpret(P, S, X).
59 |
60 | interpret(P, S, while(X)) =
61 | {interpret(P, S, X) → pair(Res, S2) & set S = S2}.
62 |
--------------------------------------------------------------------------------
/mains/parser.tamsin:
--------------------------------------------------------------------------------
1 | # REQUIRES lib/tamsin_scanner.tamsin
2 | # REQUIRES lib/tamsin_parser.tamsin
3 |
4 | main = tamsin_parser:parse → P & tamsin_scanner:skippable & eof & $:repr(P).
5 |
--------------------------------------------------------------------------------
/mains/scanner.tamsin:
--------------------------------------------------------------------------------
1 | # REQUIRES lib/tamsin_scanner.tamsin
2 |
3 | main = {tamsin_scanner:scan -> T & $:repr(T) -> T & print T} & ''.
4 |
--------------------------------------------------------------------------------
/src/tamsin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/catseye/Tamsin/cfc9a7270773658a1cddb017aaaf4856939c328c/src/tamsin/__init__.py
--------------------------------------------------------------------------------
/src/tamsin/analyzer.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
4 | # Distributed under a BSD-style license; see LICENSE for more information.
5 |
6 | from tamsin.ast import (
7 | Program, Module, Production, ProdBranch,
8 | And, Or, Not, While, Call, Send, Set,
9 | Using, On, Concat, Prodref,
10 | TermNode, VariableNode, PatternVariableNode, AtomNode, ConstructorNode
11 | )
12 | from tamsin.term import Term
13 | from tamsin.event import EventProducer
14 |
15 |
16 | class Analyzer(EventProducer):
17 | """The Analyzer takes a desugared AST, walks it, and returns a new AST.
18 | It is responsible for:
19 |
20 | * Finding the set of local variable names used in each production and
21 | sticking that in the locals_ field of the new Production node.
22 | * Resolving any '' modules in Prodrefs to the name of the current
23 | module.
24 |
25 | * Looking for undefined nonterminals and raising an error if such found.
26 | (this is done at the end by analyze_prodrefs)
27 |
28 | TODO: it should also find any locals that are accessed before being set
29 | TODO: it should also look for a mismatch in # of formals
30 | """
31 | def __init__(self, program, listeners=None):
32 | self.listeners = listeners
33 | self.program = program
34 | self.current_module = None
35 |
36 | def analyze(self, ast):
37 | if isinstance(ast, Program):
38 | modlist = []
39 | for mod in ast.modlist:
40 | mod = self.analyze(mod)
41 | modlist.append(mod)
42 | self.program = Program(modlist)
43 | self.analyze_prodrefs(self.program)
44 | return self.program
45 | elif isinstance(ast, Module):
46 | self.current_module = ast
47 | prodlist = []
48 | for prod in ast.prodlist:
49 | prodlist.append(self.analyze(prod))
50 | self.current_module = None
51 | return Module(ast.name, prodlist)
52 | elif isinstance(ast, Production):
53 | branches = []
54 | for b in ast.branches:
55 | branches.append(self.analyze(b))
56 | return Production(ast.name, branches)
57 | elif isinstance(ast, ProdBranch):
58 | locals_ = []
59 | body = self.analyze(ast.body)
60 | self.collect_locals(body, locals_)
61 | return ProdBranch(ast.formals, locals_, body)
62 | elif isinstance(ast, Or):
63 | return Or(self.analyze(ast.lhs), self.analyze(ast.rhs))
64 | elif isinstance(ast, And):
65 | return And(self.analyze(ast.lhs), self.analyze(ast.rhs))
66 | elif isinstance(ast, Using):
67 | return Using(self.analyze(ast.rule), self.analyze(ast.prodref))
68 | elif isinstance(ast, On):
69 | return On(self.analyze(ast.rule), self.analyze(ast.texpr))
70 | elif isinstance(ast, Call):
71 | return Call(self.analyze(ast.prodref), ast.args)
72 | elif isinstance(ast, Send):
73 | assert isinstance(ast.pattern, TermNode), ast
74 | return Send(self.analyze(ast.rule), self.analyze(ast.pattern))
75 | elif isinstance(ast, Set):
76 | assert isinstance(ast.variable, VariableNode), ast
77 | return Set(ast.variable, self.analyze(ast.texpr))
78 | elif isinstance(ast, Not):
79 | return Not(self.analyze(ast.rule))
80 | elif isinstance(ast, While):
81 | return While(self.analyze(ast.rule))
82 | elif isinstance(ast, Concat):
83 | return Concat(self.analyze(ast.lhs), self.analyze(ast.rhs))
84 | elif isinstance(ast, TermNode):
85 | return ast
86 | elif isinstance(ast, Prodref):
87 | module = ast.module
88 | if module == '':
89 | module = self.current_module.name
90 | new = Prodref(module, ast.name)
91 | return new
92 | else:
93 | raise NotImplementedError(repr(ast))
94 |
95 | def collect_locals(self, ast, locals_):
96 | """locals_ should be a list."""
97 |
98 | if isinstance(ast, ProdBranch):
99 | self.collect_locals(ast.body, locals_)
100 | elif (isinstance(ast, And) or isinstance(ast, Or) or
101 | isinstance(ast, Concat)):
102 | self.collect_locals(ast.lhs, locals_)
103 | self.collect_locals(ast.rhs, locals_)
104 | elif isinstance(ast, Using):
105 | self.collect_locals(ast.rule, locals_)
106 | elif isinstance(ast, On):
107 | self.collect_locals(ast.rule, locals_)
108 | self.collect_locals(ast.texpr, locals_)
109 | elif isinstance(ast, Call):
110 | pass
111 | elif isinstance(ast, Send):
112 | self.collect_locals(ast.pattern, locals_)
113 | self.collect_locals(ast.rule, locals_)
114 | elif isinstance(ast, Set):
115 | self.collect_locals(ast.variable, locals_)
116 | self.collect_locals(ast.texpr, locals_)
117 | elif isinstance(ast, Not) or isinstance(ast, While):
118 | self.collect_locals(ast.rule, locals_)
119 | elif isinstance(ast, VariableNode):
120 | if ast.name not in locals_:
121 | locals_.append(ast.name)
122 | elif isinstance(ast, PatternVariableNode):
123 | # will probably be needed for Send(). but, not sure. FIXME
124 | if ast.name not in locals_:
125 | locals_.append(ast.name)
126 | elif isinstance(ast, ConstructorNode):
127 | for sub in ast.contents:
128 | self.collect_locals(sub, locals_)
129 | elif isinstance(ast, AtomNode):
130 | pass
131 | else:
132 | raise NotImplementedError(repr(ast))
133 |
134 | def analyze_prodrefs(self, ast):
135 | """does not return anything"""
136 | if isinstance(ast, Program):
137 | for mod in ast.modlist:
138 | self.analyze_prodrefs(mod)
139 | elif isinstance(ast, Module):
140 | for prod in ast.prodlist:
141 | self.analyze_prodrefs(prod)
142 | elif isinstance(ast, Production):
143 | for b in ast.branches:
144 | self.analyze_prodrefs(b)
145 | elif isinstance(ast, ProdBranch):
146 | self.analyze_prodrefs(ast.body)
147 | elif isinstance(ast, Or) or isinstance(ast, And):
148 | self.analyze_prodrefs(ast.lhs)
149 | self.analyze_prodrefs(ast.rhs)
150 | elif isinstance(ast, Using):
151 | self.analyze_prodrefs(ast.rule)
152 | self.analyze_prodrefs(ast.prodref)
153 | elif isinstance(ast, On):
154 | self.analyze_prodrefs(ast.rule)
155 | elif isinstance(ast, Call):
156 | self.analyze_prodrefs(ast.prodref)
157 | elif isinstance(ast, Send):
158 | self.analyze_prodrefs(ast.rule)
159 | elif isinstance(ast, Set):
160 | pass
161 | elif isinstance(ast, Not):
162 | self.analyze_prodrefs(ast.rule)
163 | elif isinstance(ast, While):
164 | self.analyze_prodrefs(ast.rule)
165 | elif isinstance(ast, Concat):
166 | pass
167 | elif isinstance(ast, Term):
168 | pass
169 | elif isinstance(ast, Prodref):
170 | assert ast.module != '', repr(ast)
171 | if ast.module == '$':
172 | return # TODO: also check builtins?
173 | module = self.program.find_module(ast.module)
174 | if not module:
175 | raise KeyError("no '%s' module defined" % ast.module)
176 | production = module.find_production(ast.name)
177 | if not production:
178 | raise KeyError("no '%s:%s' production defined" %
179 | (ast.module, ast.name)
180 | )
181 | else:
182 | raise NotImplementedError(repr(ast))
183 |
--------------------------------------------------------------------------------
/src/tamsin/backends/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/catseye/Tamsin/cfc9a7270773658a1cddb017aaaf4856939c328c/src/tamsin/backends/__init__.py
--------------------------------------------------------------------------------
/src/tamsin/buffer.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
4 | # Distributed under a BSD-style license; see LICENSE for more information.
5 |
6 | import sys
7 |
8 |
9 | class Buffer(object):
10 | """Abstract base class for all Buffer objects.
11 |
12 | Buffer objects are mutable, but must be capable of saving and restoring
13 | their state indefinitely.
14 |
15 | """
16 | def __init__(self, filename='', position=0, line_number=1, column_number=1):
17 | """If `position` is given, `line_number` and `column_number` should
18 | be given too, to match.
19 |
20 | """
21 | self.filename = filename
22 | self.position = position
23 | self.line_number = line_number
24 | self.column_number = column_number
25 |
26 | def save_state(self):
27 | raise NotImplementedError
28 |
29 | def restore_state(self):
30 | raise NotImplementedError
31 |
32 | def pop_state(self):
33 | raise NotImplementedError
34 |
35 | def advance(self, inp):
36 | """Given a string that we have just consumed from the buffer,
37 | return new line_number and column_number.
38 |
39 | """
40 | line_number = self.line_number
41 | column_number = self.column_number
42 | for char in inp:
43 | if char == '\n':
44 | line_number += 1
45 | column_number = 1
46 | else:
47 | column_number += 1
48 | return (line_number, column_number)
49 |
50 | def chop(self, amount):
51 | """Returns a pair of `amount` characters chopped off the front of
52 | the buffer, and a new Buffer object.
53 |
54 | """
55 | raise NotImplementedError
56 |
57 | def first(self, amount):
58 | """Returns a pair of the first `amount` characters in the buffer
59 | (without consuming them) and a new Buffer object.
60 |
61 | """
62 | raise NotImplementedError
63 |
64 |
65 | class StringBuffer(Buffer):
66 | def __init__(self, string, **kwargs):
67 | """Create a new StringBuffer object.
68 |
69 | `string` should be a raw string, not unicode. If `position` is given,
70 | `line_number` and `column_number` should be given too, to match.
71 |
72 | """
73 | assert not isinstance(string, unicode)
74 | self.string = string
75 | self.stack = []
76 | Buffer.__init__(self, **kwargs)
77 |
78 | def save_state(self):
79 | self.stack.append((self.position, self.line_number, self.column_number))
80 |
81 | def restore_state(self):
82 | (self.position, self.line_number, self.column_number) = self.stack.pop()
83 |
84 | def pop_state(self):
85 | self.stack.pop()
86 |
87 | def __str__(self):
88 | return self.string
89 |
90 | def __repr__(self):
91 | return "StringBuffer(%r, filename=%r, position=%r, line_number=%r, column_number=%r)" % (
92 | self.string, self.filename, self.position, self.line_number, self.column_number
93 | )
94 |
95 | def chop(self, amount):
96 | assert self.position <= len(self.string) - amount, \
97 | "attempt made to chop past end of buffer"
98 | bytes = self.string[self.position:self.position + amount]
99 |
100 | self.position += amount
101 | (self.line_number, self.column_number) = self.advance(bytes)
102 |
103 | return bytes
104 |
105 | def first(self, amount):
106 | bytes = self.string[self.position:self.position + amount]
107 |
108 | return bytes
109 |
110 |
111 | class FileBuffer(Buffer):
112 | def __init__(self, file, **kwargs):
113 | self.file = file
114 | # stuff we have read out of the file, but need to keep
115 | self.pre_buffer = ''
116 | # the position in the file where we started reading into pre_buffer
117 | self.pre_position = 0
118 | self.stack = []
119 | Buffer.__init__(self, **kwargs)
120 |
121 | def save_state(self):
122 | state = (self.position, self.line_number, self.column_number)
123 | self.stack.append(state)
124 |
125 | def _truncate_pre_buffer(self):
126 | if not self.stack and self.position > self.pre_position:
127 | self.pre_buffer = self.pre_buffer[self.position - self.pre_position:]
128 | self.pre_position = self.position
129 |
130 | def restore_state(self):
131 | state = self.stack.pop()
132 | (self.position, self.line_number, self.column_number) = state
133 | self._truncate_pre_buffer()
134 |
135 | def pop_state(self):
136 | self.stack.pop()
137 | self._truncate_pre_buffer()
138 |
139 | def chop(self, amount):
140 | pos = self.position - self.pre_position
141 | bytes = self.pre_buffer[pos:pos + amount]
142 | bytes_to_read = amount - len(bytes)
143 | if bytes_to_read > 0:
144 | self.pre_buffer += self.file.read(bytes_to_read)
145 | bytes = self.pre_buffer[pos:pos + amount]
146 | #assert len(pre_bytes) == amount # no, b/c what about EOF?
147 |
148 | self.position += amount
149 | (self.line_number, self.column_number) = self.advance(bytes)
150 | self._truncate_pre_buffer()
151 | return bytes
152 |
153 | def first(self, amount):
154 | self.save_state()
155 | bytes = self.chop(amount)
156 | self.restore_state()
157 | return bytes
158 |
--------------------------------------------------------------------------------
/src/tamsin/codegen.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
4 | # Distributed under a BSD-style license; see LICENSE for more information.
5 |
6 | from tamsin import ast as ack
7 | from tamsin.ast import AtomNode, VariableNode
8 | from tamsin.codenode import (
9 | CodeNode, Program, Prototype, Subroutine,
10 | Block, If, While, And, Not, Return, Builtin, Call, Truth, Falsity,
11 | DeclareLocal, GetVar, SetVar, Concat, VariableRef,
12 | Unifier, PatternMatch, NoMatch, GetMatchedVar,
13 | DeclState, SaveState, RestoreState,
14 | MkAtom, MkConstructor,
15 | ScannerPushEngine, ScannerPopEngine,
16 | )
17 | from tamsin.term import Atom, Constructor, Variable
18 | import tamsin.sysmod
19 |
20 |
21 | # TODO: is this module responsible for allocating names, or is the backend?
22 | # I think it should probably be this module.
23 | # On the other hand, backend needs to be able to generate temporaries, too.
24 |
25 |
26 | class CodeGen(object):
27 | def __init__(self, program):
28 | self.program = program
29 | self.name_index = 0
30 |
31 | def new_name(self):
32 | name = "var%s" % self.name_index
33 | self.name_index += 1
34 | return name
35 |
36 | def generate(self):
37 | main = self.program.find_production(ack.Prodref('main', 'main'))
38 | if not main:
39 | raise ValueError("no 'main:main' production defined")
40 |
41 | program = Program()
42 | for module in self.program.modlist:
43 | for prod in module.prodlist:
44 | program.append(
45 | Prototype(module=module, prod=prod, formals=prod.branches[0].formals)
46 | )
47 |
48 | for module in self.program.modlist:
49 | for prod in module.prodlist:
50 | program.append(
51 | self.gen_subroutine(module, prod, prod.branches[0].formals)
52 | )
53 |
54 | return program
55 |
56 | def gen_subroutine(self, module, prod, formals):
57 | children = []
58 | s = Subroutine(module, prod, formals, children)
59 | children.append(self.gen_unifier(prod, prod.branches[0])) # becoming so wrong
60 | children.append(self.gen_branches(module, prod, prod.branches))
61 | return s
62 |
63 | def gen_unifier(self, prod, branch):
64 | prod.all_pattern_variables = []
65 |
66 | pat_names = []
67 | for fml_num, formal in enumerate(branch.formals):
68 | pat_names.append(self.gen_ast(formal))
69 |
70 | variables = []
71 | formal.collect_variables(variables)
72 | for variable in variables:
73 | if variable not in prod.all_pattern_variables:
74 | prod.all_pattern_variables.append(variable)
75 |
76 | return Unifier(prod.all_pattern_variables)
77 |
78 | def gen_branches(self, module, prod, branches):
79 | if not branches:
80 | return NoMatch(module=module, prod=prod, formals=[])
81 | branch = branches[0]
82 | branches = branches[1:]
83 | test = Truth()
84 | for fml_num in xrange(0, len(branch.formals)):
85 | p = PatternMatch()
86 | # self.emit(" term_match_unifier(%s, i%s, unifier) &&" %
87 | # (pat_names[fml_num], fml_num)
88 | # )
89 | if not test:
90 | test = p
91 | else:
92 | test = And(test, p)
93 | return If(test,
94 | self.gen_branch(module, prod, branch),
95 | self.gen_branches(module, prod, branches)
96 | )
97 |
98 | def gen_branch(self, module, prod, branch):
99 | b = Block()
100 |
101 | # get variables which are found in patterns for this branch
102 | for var in prod.all_pattern_variables:
103 | #self.emit('const struct term *%s = unifier[%s];' %
104 | # (var.name, var.index)
105 | #)
106 | #self.emit('assert(%s != NULL);' % var.name);
107 | b.append(GetMatchedVar(var))
108 |
109 | all_pattern_variable_names = [x.name for x in prod.all_pattern_variables]
110 | for local in branch.locals_:
111 | if local not in all_pattern_variable_names:
112 | #self.emit("const struct term *%s;" % local)
113 | b.append(DeclareLocal(local))
114 |
115 | b.append(self.gen_ast(branch.body))
116 | return b
117 |
118 | def gen_ast(self, ast):
119 | if isinstance(ast, ack.And):
120 | return Block(
121 | self.gen_ast(ast.lhs),
122 | If(GetVar('ok'),
123 | self.gen_ast(ast.rhs)
124 | )
125 | )
126 | elif isinstance(ast, ack.Or):
127 | return Block(
128 | DeclState(),
129 | SaveState(),
130 | self.gen_ast(ast.lhs),
131 | If(Not(GetVar('ok')),
132 | Block(
133 | RestoreState(),
134 | self.gen_ast(ast.rhs)
135 | )
136 | )
137 | )
138 | elif isinstance(ast, ack.Call):
139 | prodref = ast.prodref
140 | prodmod = prodref.module or 'main'
141 | name = prodref.name
142 | args = ast.args
143 | if prodmod == '$':
144 | c = Builtin(name=name)
145 | arity = tamsin.sysmod.arity(name)
146 | for i in xrange(0, arity):
147 | c.append(self.gen_ast(args[i]))
148 | else:
149 | c = Call(module=prodmod, name=name)
150 | for a in args:
151 | c.append(self.gen_ast(a))
152 | return c
153 | elif isinstance(ast, ack.Send):
154 | return Block(
155 | self.gen_ast(ast.rule),
156 | # EMIT PATTERN ... which means generalizing the crap that is
157 | # currently in the ProdBranch case up there, way up there ^^^
158 | SetVar(self.gen_ast(ast.pattern), GetVar('result'))
159 | )
160 | elif isinstance(ast, ack.Set):
161 | return SetVar(VariableRef(ast.variable.name), self.gen_ast(ast.texpr))
162 | elif isinstance(ast, ack.While):
163 | return Block(
164 | DeclareLocal('srname', MkAtom('nil')),
165 | DeclState(),
166 | SetVar(VariableRef('ok'), Truth()),
167 | While(GetVar('ok'),
168 | Block(
169 | SaveState(),
170 | self.gen_ast(ast.rule),
171 | If(GetVar('ok'),
172 | SetVar(VariableRef('srname'), GetVar('result'))
173 | )
174 | )
175 | ),
176 | RestoreState(),
177 | SetVar(VariableRef('result'), GetVar('srname')),
178 | SetVar(VariableRef('ok'), Truth())
179 | )
180 | elif isinstance(ast, ack.Not):
181 | return Block(
182 | DeclState(),
183 | SaveState(),
184 | self.gen_ast(ast.rule),
185 | RestoreState(),
186 | If(GetVar('ok'),
187 | Block(
188 | SetVar(VariableRef('ok'), Falsity()),
189 | SetVar(VariableRef('result'), MkAtom("expected anything else"))
190 | ), Block(
191 | SetVar(VariableRef('ok'), Truth()),
192 | SetVar(VariableRef('result'), MkAtom("nil"))
193 | )
194 | )
195 | )
196 | elif isinstance(ast, ack.Using):
197 | return Block(
198 | ScannerPushEngine(ast.prodref.module, ast.prodref.name),
199 | self.gen_ast(ast.rule),
200 | ScannerPopEngine(),
201 | )
202 | elif isinstance(ast, ack.On):
203 | return Block(
204 | self.gen_ast(ast.texpr),
205 | #flat_name = self.new_name()
206 | #self.emit("const struct term *%s = term_flatten(%s);" % (flat_name, name))
207 | DeclState(),
208 | SaveState(),
209 | #self.emit("scanner->buffer = %s->atom;" % flat_name);
210 | #self.emit("scanner->size = %s->size;" % flat_name);
211 | #self.emit("scanner->position = 0;");
212 | #self.emit("scanner->reset_position = 0;");
213 | self.gen_ast(ast.rule),
214 | RestoreState()
215 | )
216 | elif isinstance(ast, ack.Concat):
217 | lhs = self.gen_ast(ast.lhs)
218 | rhs = self.gen_ast(ast.rhs)
219 | name = self.new_name()
220 | return Concat(name, lhs, rhs)
221 | elif isinstance(ast, ack.AtomNode):
222 | return MkAtom(ast.text)
223 | elif isinstance(ast, ack.VariableNode):
224 | return VariableRef(ast.name)
225 | elif isinstance(ast, ack.PatternVariableNode):
226 | return VariableRef(ast.name)
227 | elif isinstance(ast, ack.ConstructorNode):
228 | return MkConstructor(ast.text, [])
229 | else:
230 | raise NotImplementedError(repr(ast))
231 |
--------------------------------------------------------------------------------
/src/tamsin/codenode.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
4 | # Distributed under a BSD-style license; see LICENSE for more information.
5 |
6 |
7 | # TODO: some of these are definitely hierarchical, and some are definitely
8 | # not. make the distinction. make the latter more like 3-address-code.
9 |
10 |
11 | class CodeNode(object):
12 | def __init__(self, *args, **kwargs):
13 | self.args = list(args)
14 | self.kwargs = kwargs
15 |
16 | def append(self, item):
17 | self.args.append(item)
18 |
19 | def __getitem__(self, key):
20 | if key in self.kwargs:
21 | return self.kwargs[key]
22 | return self.args[key]
23 |
24 | def __repr__(self):
25 | return "%s(%s%s)" % (
26 | self.__class__.__name__,
27 | (', '.join([repr(a) for a in self.args]) + ', ') if self.args else '',
28 | ', '.join('%s=%r' % (key, self.kwargs[key]) for key in self.kwargs) if self.kwargs else ''
29 | )
30 |
31 |
32 | class Program(CodeNode):
33 | """Represents a target program."""
34 | pass
35 |
36 |
37 | class Prototype(CodeNode):
38 | """Represents a prototype for a subroutine in a target program."""
39 | pass
40 |
41 |
42 | class Subroutine(CodeNode):
43 | """Represents a subroutine in a target program."""
44 | def __init__(self, module, prod, formals, children):
45 | self.module = module
46 | self.prod = prod
47 | self.formals = formals
48 | self.children = children
49 |
50 | def __repr__(self):
51 | return "Subroutine(%r, %r, %r, %r)" % (
52 | self.module, self.prod, self.formals, self.children
53 | )
54 |
55 |
56 | class Block(CodeNode):
57 | pass
58 |
59 |
60 | class If(CodeNode):
61 | pass
62 |
63 |
64 | class While(CodeNode):
65 | pass
66 |
67 |
68 | class And(CodeNode):
69 | pass
70 |
71 |
72 | class Not(CodeNode):
73 | pass
74 |
75 |
76 | class DeclareLocal(CodeNode):
77 | pass
78 |
79 |
80 | class GetVar(CodeNode):
81 | """name is the name of the target-language variable."""
82 | def __init__(self, name):
83 | self.name = name
84 |
85 | def __repr__(self):
86 | return "GetVar(%r)" % (self.name)
87 |
88 |
89 | class SetVar(CodeNode):
90 | """ref is a VariableRef for the target-language variable.
91 | expr is an expression."""
92 | def __init__(self, ref, expr):
93 | self.ref = ref
94 | self.expr = expr
95 |
96 | def __repr__(self):
97 | return "SetVar(%r, %r)" % (self.ref, self.expr)
98 |
99 |
100 | class Concat(CodeNode):
101 | def __init__(self, name, lhs, rhs):
102 | self.name = name
103 | self.lhs = lhs
104 | self.rhs = rhs
105 |
106 | def __repr__(self):
107 | return "Concat(%r, %r, %r)" % (self.name, self.lhs, self.rhs)
108 |
109 |
110 | class Unifier(CodeNode):
111 | pass
112 |
113 |
114 | class PatternMatch(CodeNode):
115 | pass
116 |
117 |
118 | class Return(CodeNode):
119 | pass
120 |
121 |
122 | class DeclState(CodeNode):
123 | pass
124 |
125 |
126 | class SaveState(CodeNode):
127 | pass
128 |
129 |
130 | class RestoreState(CodeNode):
131 | pass
132 |
133 |
134 | class Builtin(CodeNode):
135 | pass
136 |
137 |
138 | class Call(CodeNode):
139 | pass
140 |
141 |
142 | class NoMatch(CodeNode):
143 | pass
144 |
145 |
146 | class Truth(CodeNode):
147 | pass
148 |
149 |
150 | class Falsity(CodeNode):
151 | pass
152 |
153 |
154 | class VariableRef(CodeNode):
155 | pass
156 |
157 |
158 | class MkAtom(CodeNode):
159 | pass
160 |
161 |
162 | class MkConstructor(CodeNode):
163 | """Represents some code in the target program to make a constructor."""
164 | def __init__(self, text, children):
165 | self.text = text
166 | self.children = children
167 |
168 | def __repr__(self):
169 | return "MkConstructor(%r, %r)" % (
170 | self.text, self.children
171 | )
172 |
173 | class ScannerPushEngine(CodeNode):
174 | pass
175 |
176 |
177 | class ScannerPopEngine(CodeNode):
178 | pass
179 |
180 |
181 | class GetMatchedVar(CodeNode):
182 | pass
183 |
--------------------------------------------------------------------------------
/src/tamsin/desugarer.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
4 | # Distributed under a BSD-style license; see LICENSE for more information.
5 |
6 | from tamsin.ast import (
7 | Program, Module, Production, ProdBranch,
8 | And, Or, Not, While, Call, Send, Set,
9 | Using, On, Concat, Fold, Prodref,
10 | TermNode, VariableNode, PatternVariableNode, AtomNode, ConstructorNode
11 | )
12 | from tamsin.event import EventProducer
13 |
14 |
15 | class Desugarer(EventProducer):
16 | """The Desugarer takes an AST, walks it, and returns a new AST.
17 | It is responsible for:
18 |
19 | * Desugaring Fold() nodes.
20 | * Turning the list of Production() nodes into a linked list.
21 | * Turning VariableNode() nodes into PatternVariableNodes in a pattern.
22 |
23 | """
24 | def __init__(self, program, listeners=None):
25 | self.listeners = listeners
26 | self.program = program
27 | self.pattern = False
28 | self.index = 0
29 |
30 | def desugar(self, ast):
31 | if isinstance(ast, Program):
32 | return Program(
33 | [self.desugar(m) for m in ast.modlist]
34 | )
35 | elif isinstance(ast, Module):
36 | prodlist = []
37 |
38 | def find_prod_pos(name):
39 | i = 0
40 | for prod in prodlist:
41 | if prod.name == name:
42 | return i
43 | i += 1
44 | return None
45 |
46 | for prod in ast.prodlist:
47 | prod = self.desugar(prod)
48 | pos = find_prod_pos(prod.name)
49 | if pos is None:
50 | prodlist.append(prod)
51 | else:
52 | prodlist[pos].branches.extend(prod.branches)
53 |
54 | return Module(ast.name, prodlist)
55 | elif isinstance(ast, Production):
56 | return Production(ast.name, [self.desugar(x) for x in ast.branches])
57 | elif isinstance(ast, ProdBranch):
58 | self.pattern = True
59 | self.index = 0
60 | formals = [self.desugar(f) for f in ast.formals]
61 | self.pattern = False
62 | return ProdBranch(formals, [], self.desugar(ast.body))
63 | elif isinstance(ast, Or):
64 | return Or(self.desugar(ast.lhs), self.desugar(ast.rhs))
65 | elif isinstance(ast, And):
66 | return And(self.desugar(ast.lhs), self.desugar(ast.rhs))
67 | elif isinstance(ast, Using):
68 | return Using(self.desugar(ast.rule), ast.prodref)
69 | elif isinstance(ast, On):
70 | return On(self.desugar(ast.rule), self.desugar(ast.texpr))
71 | elif isinstance(ast, Call):
72 | return ast
73 | elif isinstance(ast, Send):
74 | self.pattern = True
75 | pattern = self.desugar(ast.pattern)
76 | self.pattern = False
77 | return Send(self.desugar(ast.rule), pattern)
78 | elif isinstance(ast, Set):
79 | return Set(ast.variable, self.desugar(ast.texpr))
80 | elif isinstance(ast, Not):
81 | return Not(self.desugar(ast.rule))
82 | elif isinstance(ast, While):
83 | return While(self.desugar(ast.rule))
84 | elif isinstance(ast, Concat):
85 | return Concat(self.desugar(ast.lhs), self.desugar(ast.rhs))
86 | elif isinstance(ast, AtomNode):
87 | return ast
88 | elif isinstance(ast, ConstructorNode):
89 | return ConstructorNode(ast.text,
90 | [self.desugar(x) for x in ast.contents])
91 | elif isinstance(ast, VariableNode):
92 | if self.pattern:
93 | index = self.index
94 | self.index += 1
95 | return PatternVariableNode(ast.name, index)
96 | return ast
97 | elif isinstance(ast, Fold):
98 | under1 = VariableNode('_1')
99 | under2 = VariableNode('_2')
100 | set_ = Set(under1, ast.initial)
101 | send_ = Send(self.desugar(ast.rule), under2)
102 | acc_ = Set(under1, Concat(under1, under2))
103 | if ast.tag is not None:
104 | assert isinstance(ast.tag, AtomNode)
105 | acc_ = Set(under1,
106 | ConstructorNode(ast.tag.text,
107 | [under2, under1]))
108 | return_ = Call(Prodref('$', 'return'), [under1])
109 | return And(And(set_, While(And(send_, acc_))), return_)
110 | else:
111 | raise NotImplementedError(repr(ast))
112 |
--------------------------------------------------------------------------------
/src/tamsin/event.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
4 | # Distributed under a BSD-style license; see LICENSE for more information.
5 |
6 | import sys
7 |
8 |
9 | class EventProducer(object):
10 | def event(self, tag, *data):
11 | if self.listeners is None:
12 | self.listeners = []
13 | for listener in self.listeners:
14 | listener.announce(tag, *data)
15 |
16 | def subscribe(self, listener):
17 | if self.listeners is None:
18 | self.listeners = []
19 | self.listeners.append(listener)
20 |
21 |
22 | class DebugEventListener(object):
23 | def __init__(self):
24 | self.indent = 0
25 |
26 | def listen_to(self, producer):
27 | producer.subscribe(self)
28 |
29 | def putstr(self, s):
30 | print (self.indent * ' ' + s)
31 | sys.stdout.flush()
32 |
33 | def announce(self, tag, *data):
34 | if tag == 'enter_interpreter':
35 | self.indent += 1
36 | if tag == 'leave_interpreter':
37 | self.indent -= 1
38 |
39 | # EVERYTHING
40 | self.putstr("%s %r" % (tag, data))
41 | for d in data:
42 | if getattr(d, 'dump', None) is not None:
43 | d.dump(self.indent)
44 | return
45 |
46 | if tag in ('enter_interpreter', 'leave_interpreter', 'succeed_or', 'fail_or', 'begin_or'):
47 | self.putstr("%s %r" % (tag, data))
48 | return
49 | elif tag in ('try_literal', 'consume_literal', 'fail_literal'):
50 | self.putstr("%s %r" % (tag, data))
51 | data[1].dump(self.indent)
52 | return
53 | else:
54 | return
55 | ###
56 | if tag in ('chopped', 'consume', 'scanned'): # ('interpret_ast', 'try_literal'):
57 | return
58 | elif tag in ('switched_scanner_forward', 'switched_scanner_back'):
59 | self.putstr(tag)
60 | data[0].dump()
61 | data[1].dump()
62 | else:
63 | self.putstr("%s %r" % (tag, data))
64 |
--------------------------------------------------------------------------------
/src/tamsin/interpreter.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
4 | # Distributed under a BSD-style license; see LICENSE for more information.
5 |
6 |
7 | from tamsin.ast import (
8 | Production, And, Or, Not, While, Call, Send, Set, Using, On,
9 | Prodref, Concat, TermNode
10 | )
11 | from tamsin.buffer import StringBuffer
12 | from tamsin.term import Term, Atom
13 | from tamsin.event import EventProducer
14 | from tamsin.scanner import (
15 | ByteScannerEngine, UTF8ScannerEngine, ProductionScannerEngine
16 | )
17 | import tamsin.sysmod
18 |
19 |
20 | class Context(EventProducer):
21 | def __init__(self, listeners=None):
22 | self.listeners = listeners
23 | self.scopes = []
24 |
25 | def __repr__(self):
26 | return "Context(%r)" % (
27 | self.scopes
28 | )
29 |
30 | def push_scope(self, purpose):
31 | self.scopes.append({})
32 | self.event('push_scope', self)
33 |
34 | def pop_scope(self, purpose):
35 | self.scopes.pop()
36 | self.event('pop_scope', self)
37 |
38 | def clone(self):
39 | n = Context(listeners=self.listeners)
40 | for scope in self.scopes:
41 | n.scopes.append(scope.copy())
42 | return n
43 |
44 | def fetch(self, name):
45 | self.event('fetch', name,
46 | self.scopes[-1].get(name, 'undefined'), self.scopes[-1]
47 | )
48 | return self.scopes[-1][name]
49 |
50 | def store(self, name, value):
51 | assert(isinstance(value, Term)), "not a Term: %r" % value
52 | self.event('store', name,
53 | self.scopes[-1].get(name, 'undefined'), value
54 | )
55 | self.scopes[-1][name] = value
56 |
57 |
58 | class Interpreter(EventProducer):
59 | def __init__(self, program, scanner, listeners=None):
60 | self.listeners = listeners
61 | self.program = program
62 | self.scanner = scanner
63 | self.context = Context(listeners=self.listeners)
64 |
65 | def __repr__(self):
66 | return "Interpreter(%r, %r, %r)" % (
67 | self.program, self.scanner, self.context
68 | )
69 |
70 | ### interpreter proper ---------------------------------- ###
71 |
72 | def interpret_program(self, program):
73 | main = program.find_production(Prodref('main', 'main'))
74 | if not main:
75 | raise ValueError("no 'main:main' production defined")
76 | return self.interpret(main)
77 |
78 | def interpret(self, ast, args=None):
79 | """Returns a pair (bool, result) where bool is True if it
80 | succeeded and False if it failed.
81 |
82 | """
83 | self.event('interpret_ast', ast)
84 | if isinstance(ast, Production):
85 | name = ast.name
86 | bindings = False
87 | branch = None
88 | for b in ast.branches:
89 | formals = [self.interpret(f)[1] for f in b.formals]
90 | self.event('call_args', formals, args)
91 | if isinstance(formals, list):
92 | bindings = Term.match_all(formals, args)
93 | self.event('call_bindings', bindings)
94 | if bindings != False:
95 | branch = b
96 | break
97 | # else:
98 | # self.event('call_newfangled_parsing_args', prod)
99 | # # start a new scope. arg bindings will appear here.
100 | # self.context.push_scope(prod.name)
101 | # (success, result) = self.interpret_on_buffer(
102 | # formals, unicode(args[0])
103 | # )
104 | # # we do not want to start a new scope here, and we
105 | # # interpret the rule directly, not the prod.
106 | # if success:
107 | # self.event('begin_interpret_rule', prod.body)
108 | # (success, result) = self.interpret(prod.body)
109 | # self.event('end_interpret_rule', prod.body)
110 | # self.context.pop_scope(prod.name)
111 | # return (success, result)
112 | # else:
113 | # self.context.pop_scope(prod.name)
114 | if branch is None:
115 | raise ValueError("No '%s' production matched arguments %r" %
116 | (name, args)
117 | )
118 |
119 | self.context.push_scope(name)
120 | if bindings != False:
121 | for name in bindings.keys():
122 | self.context.store(name, bindings[name])
123 | self.event('begin_interpret_rule', branch.body)
124 | assert branch.body, repr(ast)
125 | (success, result) = self.interpret(branch.body)
126 | self.event('end_interpret_rule', branch.body)
127 | self.context.pop_scope(ast.name)
128 |
129 | return (success, result)
130 | elif isinstance(ast, And):
131 | (success, value_lhs) = self.interpret(ast.lhs)
132 | if not success:
133 | return (False, value_lhs)
134 | (success, value_rhs) = self.interpret(ast.rhs)
135 | return (success, value_rhs)
136 | elif isinstance(ast, Or):
137 | saved_context = self.context.clone()
138 | self.scanner.save_state()
139 | self.event('begin_or', ast.lhs, ast.rhs, saved_context)
140 | (succeeded, result) = self.interpret(ast.lhs)
141 | if succeeded:
142 | self.event('succeed_or', result)
143 | self.scanner.pop_state()
144 | return (True, result)
145 | else:
146 | self.event('fail_or', self.context, self.scanner, result)
147 | self.context = saved_context
148 | self.scanner.restore_state("after or")
149 | return self.interpret(ast.rhs)
150 | elif isinstance(ast, Call):
151 | prodref = ast.prodref
152 | name = prodref.name
153 | args = [self.interpret(x)[1] for x in ast.args]
154 | args = [x.expand(self.context) for x in args]
155 | for a in args:
156 | assert isinstance(a, Term)
157 | if prodref.module == '$':
158 | return tamsin.sysmod.call(name, self, args)
159 | prod = self.program.find_production(prodref)
160 | assert prod is not None, "unresolved: " + repr(prodref)
161 | self.event('call_candidates', prod)
162 | return self.interpret(prod, args=args)
163 | elif isinstance(ast, Send):
164 | (success, result) = self.interpret(ast.rule)
165 | #(success, variable) = self.interpret(ast.pattern) # ... ?
166 | #self.context.store(variable.name, result)
167 | formals = [self.interpret(f)[1] for f in [ast.pattern]]
168 | bindings = Term.match_all(formals, [result])
169 | if bindings == False:
170 | return (False, Atom('nomatch'))
171 | for name in bindings.keys():
172 | self.context.store(name, bindings[name])
173 | return (success, result)
174 | elif isinstance(ast, Using):
175 | sub = ast.rule
176 | prodref = ast.prodref
177 | scanner_name = prodref.name
178 | if prodref.module == '$' and scanner_name == 'byte':
179 | new_engine = ByteScannerEngine()
180 | elif prodref.module == '$' and scanner_name == 'utf8':
181 | new_engine = UTF8ScannerEngine()
182 | else:
183 | prod = self.program.find_production(prodref)
184 | if not prod:
185 | raise ValueError("No such scanner '%s'" % scanner_name)
186 | new_engine = ProductionScannerEngine(self, prod)
187 | self.scanner.push_engine(new_engine)
188 | self.event('enter_with')
189 | (succeeded, result) = self.interpret(sub)
190 | self.event('leave_with', succeeded, result)
191 | self.scanner.pop_engine()
192 | return (succeeded, result)
193 | elif isinstance(ast, On):
194 | (success, result) = self.interpret(ast.texpr)
195 | buffer = str(result.expand(self.context))
196 | self.event('interpret_on_buffer', buffer)
197 | previous_buffer = self.scanner.get_buffer()
198 | self.scanner.install_buffer(StringBuffer(buffer))
199 | (success, result) = self.interpret(ast.rule)
200 | self.scanner.install_buffer(previous_buffer)
201 | return (success, result)
202 | elif isinstance(ast, Set):
203 | (success, variable) = self.interpret(ast.variable)
204 | (success, term) = self.interpret(ast.texpr)
205 | result = term.expand(self.context)
206 | self.context.store(variable.name, result)
207 | return (True, result)
208 | elif isinstance(ast, Not):
209 | expr = ast.rule
210 | saved_context = self.context.clone()
211 | self.scanner.save_state()
212 | self.event('begin_not', expr, saved_context)
213 | (succeeded, result) = self.interpret(expr)
214 | self.context = saved_context
215 | self.scanner.restore_state("after not")
216 | if succeeded:
217 | return (False, Atom(self.scanner.error_message(
218 | "anything else", self.scanner.peek()
219 | )))
220 | else:
221 | return (True, Atom('nil'))
222 | elif isinstance(ast, While):
223 | result = Atom('nil')
224 | self.event('begin_while')
225 | succeeded = True
226 | successful_result = result
227 | while succeeded:
228 | saved_context = self.context.clone()
229 | self.scanner.save_state()
230 | (succeeded, result) = self.interpret(ast.rule)
231 | if succeeded:
232 | self.scanner.pop_state()
233 | successful_result = result
234 | self.event('repeating_while', result)
235 | else:
236 | self.scanner.restore_state("after while")
237 | self.context = saved_context
238 | self.event('end_while', result)
239 | return (True, successful_result)
240 | elif isinstance(ast, Concat):
241 | (success, lhs) = self.interpret(ast.lhs)
242 | lhs = str(lhs.expand(self.context))
243 | (success, rhs) = self.interpret(ast.rhs)
244 | rhs = str(rhs.expand(self.context))
245 | return (True, Atom(lhs + rhs))
246 | elif isinstance(ast, TermNode):
247 | return (True, ast.to_term())
248 | else:
249 | raise NotImplementedError(repr(ast))
250 |
--------------------------------------------------------------------------------
/src/tamsin/main.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
4 | # Distributed under a BSD-style license; see LICENSE for more information.
5 |
6 | import os
7 | import subprocess
8 | import sys
9 |
10 | from tamsin.buffer import FileBuffer, StringBuffer
11 | from tamsin.event import DebugEventListener
12 | from tamsin.term import Atom
13 | from tamsin.scanner import (
14 | Scanner, EOF, UTF8ScannerEngine, TamsinScannerEngine
15 | )
16 | from tamsin.parser import Parser
17 | from tamsin.interpreter import Interpreter
18 | from tamsin.desugarer import Desugarer
19 | from tamsin.analyzer import Analyzer
20 | from tamsin.compiler import Compiler # to be replaced by...
21 | from tamsin.codegen import CodeGen
22 | from tamsin.backends.c import Emitter
23 |
24 |
25 | def parse(filename):
26 | with open(filename, 'r') as f:
27 | scanner = Scanner(
28 | FileBuffer(f, filename=filename),
29 | #StringBuffer(f.read(), filename=filename),
30 | engines=(TamsinScannerEngine(),)
31 | )
32 | parser = Parser(scanner)
33 | ast = parser.grammar()
34 | desugarer = Desugarer(ast)
35 | ast = desugarer.desugar(ast)
36 | return ast
37 |
38 |
39 | def parse_and_check_args(args):
40 | ast = None
41 | for arg in args:
42 | next_ast = parse(arg)
43 | if ast is None:
44 | ast = next_ast
45 | else:
46 | ast.incorporate(next_ast)
47 | analyzer = Analyzer(ast)
48 | ast = analyzer.analyze(ast)
49 | return ast
50 |
51 |
52 | def run(ast, listeners=None):
53 | scanner = Scanner(
54 | FileBuffer(sys.stdin, filename=''),
55 | #StringBuffer(sys.stdin.read(), filename=''),
56 | engines=(UTF8ScannerEngine(),),
57 | listeners=listeners
58 | )
59 | interpreter = Interpreter(
60 | ast, scanner, listeners=listeners
61 | )
62 | (succeeded, result) = interpreter.interpret_program(ast)
63 | if not succeeded:
64 | sys.stderr.write(str(result) + "\n")
65 | sys.exit(1)
66 | print str(result)
67 |
68 |
69 | def main(args, tamsin_dir='.'):
70 | listeners = []
71 | if args[0] == '--debug':
72 | listeners.append(DebugEventListener())
73 | args = args[1:]
74 | if args[0] == 'scan':
75 | with open(args[1], 'r') as f:
76 | scanner = Scanner(
77 | FileBuffer(f, filename=args[1]),
78 | engines=(TamsinScannerEngine(),),
79 | listeners=listeners
80 | )
81 | tok = None
82 | while tok is not EOF:
83 | tok = scanner.scan()
84 | if tok is not EOF:
85 | print Atom(tok).repr()
86 | print
87 | elif args[0] == 'parse':
88 | parser = Parser.for_file(args[1])
89 | ast = parser.grammar()
90 | print str(ast)
91 | elif args[0] == 'desugar':
92 | parser = Parser.for_file(args[1])
93 | ast = parser.grammar()
94 | desugarer = Desugarer(ast)
95 | ast = desugarer.desugar(ast)
96 | print str(ast)
97 | elif args[0] == 'analyze':
98 | ast = parse_and_check_args(args[1:])
99 | print str(ast)
100 | elif args[0] == 'compile':
101 | ast = parse_and_check_args(args[1:])
102 | compiler = Compiler(ast, sys.stdout)
103 | compiler.compile()
104 | elif args[0] == 'codegen':
105 | ast = parse_and_check_args(args[1:])
106 | generator = CodeGen(ast)
107 | result = generator.generate()
108 | emitter = Emitter(result, sys.stdout)
109 | emitter.go()
110 | elif args[0] == 'doublecompile':
111 | # http://www.youtube.com/watch?v=6WxJECOFg8w
112 | ast = parse_and_check_args(args[1:])
113 | c_filename = 'foo.c'
114 | exe_filename = './foo'
115 | with open(c_filename, 'w') as f:
116 | compiler = Compiler(ast, f)
117 | compiler.compile()
118 | c_src_dir = os.path.join(tamsin_dir, 'c_src')
119 | command = ("gcc", "-g", "-I%s" % c_src_dir, "-L%s" % c_src_dir,
120 | c_filename, "-o", exe_filename, "-ltamsin")
121 | try:
122 | subprocess.check_call(command)
123 | exit_code = 0
124 | except subprocess.CalledProcessError:
125 | exit_code = 1
126 | #subprocess.call(('rm', '-f', c_filename))
127 | sys.exit(exit_code)
128 | elif args[0] == 'loadngo':
129 | ast = parse_and_check_args(args[1:])
130 | c_filename = 'foo.c'
131 | exe_filename = './foo'
132 | with open(c_filename, 'w') as f:
133 | compiler = Compiler(ast, f)
134 | compiler.compile()
135 | c_src_dir = os.path.join(tamsin_dir, 'c_src')
136 | command = ("gcc", "-g", "-I%s" % c_src_dir, "-L%s" % c_src_dir,
137 | c_filename, "-o", exe_filename, "-ltamsin")
138 | try:
139 | subprocess.check_call(command)
140 | subprocess.check_call((exe_filename,))
141 | exit_code = 0
142 | except subprocess.CalledProcessError:
143 | exit_code = 1
144 | subprocess.call(('rm', '-f', c_filename, exe_filename))
145 | sys.exit(exit_code)
146 | else:
147 | ast = parse_and_check_args(args)
148 | run(ast, listeners=listeners)
149 |
--------------------------------------------------------------------------------
/src/tamsin/scanner.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
4 | # Distributed under a BSD-style license; see LICENSE for more information.
5 |
6 | from tamsin.buffer import Buffer
7 | from tamsin.event import EventProducer
8 | from tamsin.term import Term
9 |
10 |
11 | EOF = object()
12 |
13 |
14 | class Scanner(EventProducer):
15 | def __init__(self, buffer, engines=None, listeners=None):
16 | """Create a new Scanner object.
17 |
18 | """
19 | self.listeners = listeners
20 | self.event('set_buffer', buffer)
21 | assert isinstance(buffer, Buffer)
22 | self.buffer = buffer
23 | self.engines = []
24 | if engines is not None:
25 | for engine in engines:
26 | self.push_engine(engine)
27 |
28 | def __repr__(self):
29 | return "Scanner(%r, position=%r)" % (
30 | self.buffer, self.position
31 | )
32 |
33 | def get_buffer(self):
34 | """Returns an object which represents the current Buffer of this
35 | Scanner.
36 |
37 | """
38 | return self.buffer
39 |
40 | def install_buffer(self, state):
41 | """Restores the Buffer of this Scanner to that which was saved by
42 | a previous call to get_buffer().
43 |
44 | """
45 | self.buffer = state
46 |
47 | def push_engine(self, engine):
48 | self.engines.append(engine)
49 |
50 | def pop_engine(self):
51 | engine = self.engines.pop()
52 |
53 | def save_state(self):
54 | return self.buffer.save_state()
55 |
56 | def restore_state(self, reason):
57 | return self.buffer.restore_state()
58 |
59 | def pop_state(self):
60 | return self.buffer.pop_state()
61 |
62 | def chop(self, amount):
63 | """Returns amount characters from the buffer and advances the
64 | scan position by amount.
65 |
66 | Should only be used by ScannerEngines.
67 |
68 | """
69 | return self.buffer.chop(amount)
70 |
71 | def first(self, amount):
72 | """Returns amount characters from the buffer. Does not advance the
73 | scan position.
74 |
75 | Should only be used by ScannerEngines, and then only in error
76 | reporting.
77 |
78 | """
79 | return self.buffer.first(amount)
80 |
81 | def is_at_eof(self):
82 | """Returns True iff there is no more input to scan.
83 |
84 | Should only be used by ScannerEngines. Parsing code should check
85 | to see if ... something
86 |
87 | """
88 | return self.first(1) == ''
89 |
90 | def is_at_utf8(self):
91 | """Returns the number of bytes following that comprise a UTF-8
92 | character. Will be 0 for non-UTF-8 characters.
93 |
94 | Should only be used by ScannerEngines.
95 |
96 | """
97 | k = ord(self.first(1))
98 | if k & 0b11100000 == 0b11000000:
99 | return 2
100 | elif k & 0b11110000 == 0b11100000:
101 | return 3
102 | elif k & 0b11111000 == 0b11110000:
103 | return 4
104 | else:
105 | return 0
106 |
107 | def startswith(self, strings):
108 | for s in strings:
109 | if self.first(len(s)) == s:
110 | return True
111 | return False
112 |
113 | def isalnum(self):
114 | return self.first(1).isalnum()
115 |
116 | def error_message(self, expected, found):
117 | if found is EOF:
118 | found = 'EOF'
119 | else:
120 | found = "'%s'" % found
121 | return (
122 | "expected %s but found %s at line %s, column %s in '%s'" %
123 | (expected, found,
124 | self.buffer.line_number,
125 | self.buffer.column_number,
126 | self.buffer.filename)
127 | )
128 |
129 | def error(self, expected, found):
130 | raise ValueError(self.error_message(expected, found))
131 |
132 | def scan(self):
133 | """Returns the next token from the buffer.
134 |
135 | This method consumes the token. If you want to just see
136 | what the next token would be, call peek() instead.
137 |
138 | The returned token will always be a raw string, possibly
139 | containing UTF-8 sequences, possibly not.
140 |
141 | """
142 | token = self.engines[-1].scan_impl(self)
143 | #import sys
144 | #print >>sys.stderr, token
145 | assert not isinstance(token, unicode), repr(token)
146 | self.event('scanned', self, token)
147 | return token
148 |
149 | def peek(self):
150 | self.buffer.save_state()
151 | token = self.scan()
152 | self.buffer.restore_state()
153 | return token
154 |
155 | def consume(self, t):
156 | if isinstance(t, unicode):
157 | t = t.encode('UTF-8')
158 | assert not isinstance(t, unicode)
159 | self.event('consume', t)
160 | self.buffer.save_state()
161 | s = self.scan()
162 | if s == t:
163 | self.buffer.pop_state()
164 | return t
165 | else:
166 | self.buffer.restore_state()
167 | return None
168 |
169 | def expect(self, t):
170 | r = self.consume(t)
171 | if r is None:
172 | self.error("'%s'" % t, self.scan())
173 | return r
174 |
175 | def dump(self, indent=1):
176 | print "==" * indent + "%r" % self
177 | print "--" * indent + "engines: %r" % repr(self.engines)
178 | print "--" * indent + "buffer: %r" % self.buffer
179 |
180 |
181 | class ScannerEngine(object):
182 | def scan_impl(self, scanner):
183 | """Should always return a non-Unicode string."""
184 | raise NotImplementedError
185 |
186 |
187 | CLOSE_QUOTE = {
188 | '"': '"',
189 | '\'': '\'',
190 | }
191 |
192 | ESCAPE_SEQUENCE = {
193 | 'r': "\r",
194 | 'n': "\n",
195 | 't': "\t",
196 | "'": "'",
197 | '"': '"',
198 | '\\': '\\',
199 | }
200 |
201 |
202 | class TamsinScannerEngine(ScannerEngine):
203 | def scan_impl(self, scanner):
204 | while not scanner.is_at_eof() and scanner.startswith(('#', ' ', '\t', '\r', '\n')):
205 | while not scanner.is_at_eof() and scanner.startswith((' ', '\t', '\r', '\n')):
206 | scanner.chop(1)
207 | while not scanner.is_at_eof() and scanner.startswith(('#',)):
208 | while not scanner.is_at_eof() and not scanner.startswith(('\n',)):
209 | scanner.chop(1)
210 | if not scanner.is_at_eof():
211 | scanner.chop(1)
212 |
213 | if scanner.is_at_eof():
214 | return EOF
215 |
216 | if scanner.startswith(('&&', '||', '->', '<-', '<<', '>>')):
217 | return scanner.chop(2)
218 |
219 | c = scanner.is_at_utf8()
220 | if c > 0:
221 | c = scanner.chop(c).decode('UTF-8')
222 | if c in (u'→', u'←', u'«', u'»'):
223 | return c.encode('UTF-8')
224 | elif c == u'“':
225 | return self.consume_quoted(scanner,
226 | u'“'.encode('UTF-8'), u'”'.encode('UTF-8')
227 | )
228 | else:
229 | scanner.error('identifiable character', scanner.first(1))
230 |
231 | if scanner.startswith(('=', '(', ')', '[', ']', '{', '}', '!', ':', '/',
232 | '|', '&', ',', '.', '@', '+', '$',
233 | )):
234 | return scanner.chop(1)
235 |
236 | for quote in (CLOSE_QUOTE.keys()):
237 | if scanner.startswith(quote):
238 | scanner.chop(len(quote))
239 | return self.consume_quoted(scanner, quote, CLOSE_QUOTE[quote])
240 |
241 | if scanner.isalnum():
242 | token = ''
243 | while not scanner.is_at_eof() and (scanner.isalnum() or
244 | scanner.startswith(('_',))):
245 | token += scanner.chop(1)
246 | return token
247 |
248 | scanner.error('identifiable character', scanner.first(1))
249 |
250 | def consume_quoted(self, scanner, quote, close_quote):
251 | # assumes the start quote has already been chopped
252 | token = quote
253 | while (not scanner.is_at_eof() and
254 | not scanner.startswith(close_quote)):
255 | char = scanner.chop(1)
256 | if char == '\\':
257 | char = scanner.chop(1)
258 | if char in ESCAPE_SEQUENCE:
259 | char = ESCAPE_SEQUENCE[char]
260 | elif char == 'x':
261 | char = chr(int(scanner.chop(2), 16))
262 | else:
263 | scanner.error('legal escape sequence', '\\' + char)
264 | token += char
265 | scanner.chop(len(close_quote)) # chop ending quote
266 | # we add the specific close quote we expect, in case it was EOF
267 | token += close_quote
268 | return token
269 |
270 |
271 | class UTF8ScannerEngine(ScannerEngine):
272 | def scan_impl(self, scanner):
273 | if scanner.is_at_eof():
274 | return EOF
275 | c = scanner.is_at_utf8()
276 | if c > 0:
277 | return scanner.chop(c)
278 | return scanner.chop(1)
279 |
280 |
281 | class ByteScannerEngine(ScannerEngine):
282 | def scan_impl(self, scanner):
283 | if scanner.is_at_eof():
284 | return EOF
285 | return scanner.chop(1)
286 |
287 |
288 | class ProductionScannerEngine(ScannerEngine):
289 | """A ScannerEngine that uses a production of the Tamsin program to
290 | scan the input.
291 |
292 | """
293 | def __init__(self, interpreter, production):
294 | self.interpreter = interpreter
295 | self.production = production
296 |
297 | def scan_impl(self, scanner):
298 | if scanner.is_at_eof():
299 | return EOF
300 |
301 | # This will cause the scanner to have another engine pushed onto
302 | # it. We rely on that engine to actually get us the token, and it
303 | # will update the scanner for us.
304 |
305 | assert scanner is self.interpreter.scanner
306 |
307 | # default to this so you don't shoot yourself in the foot
308 | scanner.push_engine(UTF8ScannerEngine())
309 |
310 | result = self.interpreter.interpret(self.production)
311 | (success, token) = result
312 |
313 | scanner.pop_engine()
314 |
315 | if success:
316 | self.interpreter.event('production_scan', self.production, token)
317 | assert isinstance(token, Term), repr(token)
318 | if token is EOF:
319 | return token
320 | return str(token)
321 | else:
322 | return EOF
323 |
--------------------------------------------------------------------------------
/src/tamsin/sysmod.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
4 | # Distributed under a BSD-style license; see LICENSE for more information.
5 |
6 | # Python version of Tamsin's $ module.
7 |
8 | import sys
9 |
10 | from tamsin.term import Atom, Constructor
11 | from tamsin.scanner import EOF
12 |
13 |
14 | TRANSLATOR = {'return': 'return_', 'print': 'print_'}
15 |
16 |
17 | def call(name, interpreter, args):
18 | name = TRANSLATOR.get(name, name)
19 | if name not in globals():
20 | raise NotImplementedError(name)
21 | return globals()[name](interpreter, args)
22 |
23 |
24 | def arity(name):
25 | name = TRANSLATOR.get(name, name)
26 | if name not in globals():
27 | raise NotImplementedError(name)
28 | return globals()[name].arity
29 |
30 |
31 | def return_(self, args):
32 | return (True, args[0])
33 | return_.arity = 1
34 |
35 |
36 | def fail(self, args):
37 | return (False, args[0])
38 | fail.arity = 1
39 |
40 |
41 | def expect(self, args):
42 | upcoming_token = self.scanner.peek()
43 | term = args[0]
44 | token = str(term)
45 | if self.scanner.consume(token):
46 | return (True, term)
47 | else:
48 | return (False,
49 | Atom(self.scanner.error_message("'%s'" % token, upcoming_token))
50 | )
51 | expect.arity = 1
52 |
53 |
54 | def eof(self, args):
55 | if self.scanner.peek() is EOF:
56 | return (True, '')
57 | else:
58 | return (False,
59 | Atom(self.scanner.error_message('EOF', self.scanner.peek()))
60 | )
61 | eof.arity = 0
62 |
63 |
64 | def any(self, args):
65 | if self.scanner.peek() is not EOF:
66 | return (True, Atom(self.scanner.scan()))
67 | else:
68 | return (False,
69 | Atom(self.scanner.error_message('any token', EOF))
70 | )
71 | any.arity = 0
72 |
73 |
74 | def alnum(self, args):
75 | if (self.scanner.peek() is not EOF and
76 | self.scanner.peek()[0].isalnum()):
77 | return (True, Atom(self.scanner.scan()))
78 | else:
79 | return (False,
80 | Atom(self.scanner.error_message('alphanumeric', self.scanner.peek()))
81 | )
82 | alnum.arity = 0
83 |
84 |
85 | def upper(self, args):
86 | if (self.scanner.peek() is not EOF and
87 | self.scanner.peek()[0].isupper()):
88 | return (True, Atom(self.scanner.scan()))
89 | else:
90 | return (False,
91 | Atom(self.scanner.error_message('uppercase', self.scanner.peek()))
92 | )
93 | upper.arity = 0
94 |
95 |
96 | def startswith(self, args):
97 | if (self.scanner.peek() is not EOF and
98 | self.scanner.peek()[0].startswith((str(args[0]),))):
99 | return (True, Atom(self.scanner.scan()))
100 | else:
101 | return (False,
102 | Atom(self.scanner.error_message("'%s...'" % args[0], self.scanner.peek()))
103 | )
104 | startswith.arity = 1
105 |
106 |
107 | def equal(self, args):
108 | if args[0].match(args[1]) != False:
109 | return (True, args[0])
110 | else:
111 | return (False, Atom("term '%s' does not equal '%s'" %
112 | (args[0], args[1])))
113 | equal.arity = 2
114 |
115 |
116 | def unquote(self, args):
117 | q = str(args[0])
118 | l = str(args[1])
119 | r = str(args[2])
120 | if (q.startswith(l) and q.endswith(r)):
121 | if len(r) == 0:
122 | return (True, Atom(q[len(l):]))
123 | return (True, Atom(q[len(l):-len(r)]))
124 | else:
125 | return (False, Atom("term '%s' is not quoted with '%s' and '%s'" %
126 | (q, l, r)))
127 | unquote.arity = 3
128 |
129 |
130 | def mkterm(self, args):
131 | t = args[0]
132 | l = args[1]
133 | contents = []
134 | while isinstance(l, Constructor) and l.tag == 'list':
135 | contents.append(l.contents[0])
136 | l = l.contents[1]
137 | if contents:
138 | return (True, Constructor(t.text, contents))
139 | else:
140 | return (True, t)
141 | mkterm.arity = 2
142 |
143 |
144 | def reverse(self, args):
145 | return (True, args[0].reversed(args[1]))
146 | reverse.arity = 2
147 |
148 |
149 | def print_(self, args):
150 | val = args[0]
151 | sys.stdout.write(str(val))
152 | sys.stdout.write("\n")
153 | return (True, val)
154 | print_.arity = 1
155 |
156 |
157 | def emit(self, args):
158 | val = args[0]
159 | sys.stdout.write(str(val))
160 | return (True, val)
161 | emit.arity = 1
162 |
163 |
164 | def repr(self, args):
165 | val = args[0]
166 | val = Atom(val.repr())
167 | return (True, val)
168 | repr.arity = 1
169 |
170 |
171 | counter = 0
172 |
173 | def gensym(self, args):
174 | global counter
175 | counter += 1
176 | return (True, Atom(str(args[0]) + str(counter)))
177 | gensym.arity = 1
178 |
179 |
180 | def hexbyte(self, args):
181 | return (True, Atom(chr(int(args[0].text + args[1].text, 16))))
182 | hexbyte.arity = 2
183 |
184 |
185 | def format_octal(self, args):
186 | return (True, Atom("%o" % ord(args[0].text[0])))
187 | format_octal.arity = 1
188 |
189 |
190 | def length(self, args):
191 | return (True, Atom(str(len(str(args[0])))))
192 | length.arity = 1
193 |
--------------------------------------------------------------------------------
/src/tamsin/term.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies.
4 | # Distributed under a BSD-style license; see LICENSE for more information.
5 |
6 | # Note that __str__ and __repr__ and repr perform very different tasks:
7 | # __str__ : flattening operation on Tamsin terms
8 | # repr: reprifying operation on Tamsin terms
9 | # __repr__ : make a string that is valid Python code for constructing the Term
10 |
11 |
12 | BAREWORD = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz'
13 | PRINTABLE = (' !"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_'
14 | '`abcdefghijklmnopqrstuvwxyz{|}~')
15 |
16 |
17 | def repr_escape(t):
18 | if len(t) == 0:
19 | return "''"
20 | if all(c in BAREWORD for c in t):
21 | return t
22 | s = ''
23 | for c in t:
24 | if c == "'":
25 | s += r"\'"
26 | elif c == "\\":
27 | s += r"\\"
28 | elif ord(c) > 31 and ord(c) < 127:
29 | s += c
30 | else:
31 | s += r"\x%02x" % ord(c)
32 | return "'%s'" % s
33 |
34 |
35 | class Term(object):
36 | def expand(self, context):
37 | """Expands this term, returning a new term where, for all x, all
38 | occurrences of (VAR x) are replaced with the value of x in the
39 | given context.
40 |
41 | """
42 | return self
43 |
44 | def __str__(self):
45 | raise NotImplementedError
46 |
47 | def __repr__(self):
48 | raise NotImplementedError
49 |
50 | def repr(self):
51 | raise NotImplementedError
52 |
53 | @classmethod
54 | def match_all(_class, patterns, values):
55 | """Returns a dict of bindings if all values match all patterns,
56 | or False if there was a mismatch.
57 |
58 | """
59 | i = 0
60 | bindings = {}
61 | while i < len(patterns):
62 | sub = patterns[i].match(values[i])
63 | if sub == False:
64 | return False
65 | bindings.update(sub)
66 | i += 1
67 | return bindings
68 |
69 | def match(self, value):
70 | raise NotImplementedError
71 |
72 |
73 | class Atom(Term):
74 | def __init__(self, text):
75 | assert not isinstance(text, unicode)
76 | self.text = text
77 |
78 | def __str__(self):
79 | return self.text
80 |
81 | def __repr__(self):
82 | return "Atom(%r)" % (self.text)
83 |
84 | def repr(self):
85 | return repr_escape(self.text)
86 |
87 | def match(self, value):
88 | if not isinstance(value, Atom):
89 | return False
90 | if self.text == value.text:
91 | return {}
92 | else:
93 | return False
94 |
95 | def reversed(self, sentinel):
96 | if self.match(sentinel) != False:
97 | return self
98 | raise ValueError("malformed list")
99 |
100 |
101 | class Constructor(Term):
102 | def __init__(self, tag, contents):
103 | assert not isinstance(tag, unicode)
104 | self.tag = tag
105 | for c in contents:
106 | assert isinstance(c, Term), repr(c)
107 | self.contents = contents
108 |
109 | def expand(self, context):
110 | return Constructor(self.tag, [x.expand(context) for x in self.contents])
111 |
112 | def __str__(self):
113 | return "%s(%s)" % (
114 | self.tag, ', '.join([str(x) for x in self.contents])
115 | )
116 |
117 | def __repr__(self):
118 | return "Constructor(%r, %r)" % (self.tag, self.contents)
119 |
120 | def repr(self):
121 | return "%s(%s)" % (
122 | repr_escape(self.tag), ', '.join([x.repr() for x in self.contents])
123 | )
124 |
125 | def match(self, value):
126 | if not isinstance(value, Constructor):
127 | return False
128 | if self.tag != value.tag:
129 | return False
130 | if len(self.contents) != len(value.contents):
131 | return False
132 | bindings = {}
133 | i = 0
134 | while i < len(self.contents):
135 | b = self.contents[i].match(value.contents[i])
136 | if b == False:
137 | return False
138 | bindings.update(b)
139 | i += 1
140 | return bindings
141 |
142 | def reversed(self, sentinel):
143 | acc = sentinel
144 | l = self
145 | tag = self.tag
146 | while isinstance(l, Constructor) and l.tag == tag:
147 | acc = Constructor(tag, [l.contents[0], acc])
148 | if len(l.contents) < 2:
149 | break
150 | l = l.contents[1]
151 | if l.match(sentinel) == False:
152 | raise ValueError("malformed list %s" % l.repr())
153 | return acc
154 |
155 |
156 | class Variable(Term):
157 | def __init__(self, name):
158 | assert not isinstance(name, unicode)
159 | assert name[0].isupper() or name[0] == u'_', name
160 | self.name = name
161 |
162 | def expand(self, context):
163 | return context.fetch(self.name)
164 |
165 | def __str__(self):
166 | return self.name
167 |
168 | def __repr__(self):
169 | return "Variable(%r)" % (self.name)
170 |
171 | def repr(self):
172 | return self.name
173 |
174 | def match(self, value):
175 | return {self.name: value}
176 |
--------------------------------------------------------------------------------
/test-codegen.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | YES="
4 | eg/hello-world.tamsin eg/bits.tamsin eg/bitpair.tamsin
5 | eg/exciting-long.tamsin eg/list-of-chars.tamsin
6 | eg/modules.tamsin
7 | "
8 |
9 | FILES="eg/reverse.tamsin"
10 |
11 | NO="eg/eval-bool-expr.tamsin"
12 |
13 | for FILE in $FILES; do
14 | tamsin codegen $FILE || exit 1
15 | done
16 |
--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | FILES="
4 | doc/Tamsin.markdown
5 | doc/System_Module.markdown
6 | doc/Tested_Examples.markdown
7 | "
8 | GLOB="eg/*.tamsin lib/*.tamsin mains/*.tamsin"
9 |
10 | mkdir -p tmp
11 |
12 | if [ x$1 = 'x-f' ]; then
13 | shift
14 | echo "(Testing on Falderal files '$1' only)"
15 | FILES=$1
16 | shift
17 | fi
18 |
19 | MODE=compiled
20 | if [ x$1 = xcompiled -o x$1 = xinterpreted ]; then
21 | MODE=$1
22 | shift
23 | fi
24 |
25 | if [ x$1 = x ]; then
26 | $0 interpreter &&
27 | $0 compiler &&
28 | $0 tcompiler &&
29 | $0 bootstrap &&
30 | echo "All tests passed!"
31 | exit $?
32 | fi
33 |
34 | if [ x$1 = xtamsin ]; then
35 | echo "Testing things written in Tamsin only."
36 | $0 compiled scanner &&
37 | $0 compiled grammar &&
38 | $0 compiled parser &&
39 | $0 compiled desugarer &&
40 | $0 compiled analyzer &&
41 | $0 micro &&
42 | $0 tcompiler &&
43 | echo "All tests passed!"
44 | exit $?
45 | fi
46 |
47 | if [ x$1 = xthorough ]; then
48 | echo "Testing EVERYTHING. This will take more than 8 minutes. (On a FAST machine.)"
49 | $0 interpreter &&
50 | $0 compiler &&
51 | $0 interpreted scanner &&
52 | $0 interpreted grammar &&
53 | $0 interpreted parser &&
54 | $0 interpreted desugarer &&
55 | $0 interpreted analyzer &&
56 | $0 compiled scanner &&
57 | $0 compiled grammar &&
58 | $0 compiled parser &&
59 | $0 compiled desugarer &&
60 | $0 compiled analyzer &&
61 | $0 micro &&
62 | $0 tcompiler &&
63 | $0 bootstrap &&
64 | echo "All tests passed!"
65 | exit $?
66 | fi
67 |
68 | ok() {
69 | echo 'ok'
70 | }
71 |
72 | test_it() {
73 | MODE=$1
74 | SRC=$2
75 | LIBS=$3
76 | CMD=$4
77 | BIN=$5
78 | if [ x$BIN = x ]; then
79 | BIN=foo
80 | fi
81 |
82 | if [ $MODE = "compiled" ]; then
83 | make c_src/libtamsin.a || exit 1
84 | echo "*** Compiling $SRC (with $LIBS)"
85 | echo "*** and testing it against '$CMD'..."
86 | bin/tamsin compile $LIBS $SRC > tmp/foo.c && \
87 | gcc -g -Ic_src -Lc_src tmp/foo.c -o $BIN -ltamsin || exit 1
88 | for EG in $GLOB; do
89 | echo $EG
90 | $CMD $EG | bin/wrap > tmp/python-cmd.txt
91 | $BIN <$EG | bin/wrap > tmp/tamsin-cmd.txt
92 | diff -ru tmp/python-cmd.txt tmp/tamsin-cmd.txt > tmp/output.diff
93 | diff -ru tmp/python-cmd.txt tmp/tamsin-cmd.txt || exit 1
94 | done
95 | elif [ $MODE = "interpreted" ]; then
96 | echo "*** Interpreting $SRC (with $LIBS)"
97 | echo "*** and testing it against '$CMD'..."
98 | for EG in $GLOB; do
99 | echo $EG
100 | $CMD $EG | bin/wrap > tmp/python-cmd.txt
101 | bin/tamsin $LIBS $SRC <$EG | bin/wrap > tmp/tamsin-cmd.txt
102 | diff -ru tmp/python-cmd.txt tmp/tamsin-cmd.txt > tmp/output.diff
103 | diff -ru tmp/python-cmd.txt tmp/tamsin-cmd.txt || exit 1
104 | done
105 | echo "Passed."
106 | exit 0
107 | else
108 | echo "BAD MODE"
109 | exit 1
110 | fi
111 | echo "Passed."
112 | exit 0
113 | }
114 |
115 | if [ x$1 = xinterpreter -o x$1 = xi ]; then
116 | echo "*** Testing Python interpreter..."
117 | falderal $VERBOSE --substring-error fixture/tamsin.py.markdown $FILES
118 | elif [ x$1 = xerror-reporting ]; then
119 | echo "*** Testing error reporting in Python interpreter..."
120 | falderal $VERBOSE --substring-error fixture/tamsin.py.markdown doc/Error_Reporting.markdown
121 | elif [ x$1 = xcompiler ]; then
122 | make c_src/libtamsin.a || exit 1
123 | echo "*** Testing compiler..."
124 | falderal $VERBOSE --substring-error fixture/compiler.py.markdown $FILES
125 | elif [ x$1 = xgrammar ]; then
126 | test_it $MODE "mains/grammar.tamsin" \
127 | "lib/tamsin_scanner.tamsin" \
128 | "ok" \
129 | "bin/tamsin-grammar"
130 | elif [ x$1 = xscanner ]; then
131 | test_it $MODE "mains/scanner.tamsin" \
132 | "lib/tamsin_scanner.tamsin" \
133 | "./bin/tamsin scan" \
134 | "bin/tamsin-scanner"
135 | elif [ x$1 = xparser ]; then
136 | test_it $MODE "mains/parser.tamsin" \
137 | "lib/list.tamsin lib/tamsin_scanner.tamsin lib/tamsin_parser.tamsin" \
138 | "./bin/tamsin parse" \
139 | "bin/tamsin-parser"
140 | elif [ x$1 = xdesugarer ]; then
141 | test_it $MODE "mains/desugarer.tamsin" \
142 | "lib/list.tamsin lib/tamsin_scanner.tamsin lib/tamsin_parser.tamsin lib/tamsin_analyzer.tamsin" \
143 | "./bin/tamsin desugar" \
144 | "bin/tamsin-desugarer"
145 | elif [ x$1 = xanalyzer ]; then
146 | # libs and mains need libs
147 | GLOB="eg/*.tamsin"
148 | test_it $MODE "mains/analyzer.tamsin" \
149 | "lib/list.tamsin lib/tamsin_scanner.tamsin lib/tamsin_parser.tamsin lib/tamsin_analyzer.tamsin" \
150 | "./bin/tamsin analyze" \
151 | "bin/tamsin-analyzer"
152 | elif [ x$1 = xtcompiler ]; then
153 | make bin/tamsin-compiler || exit 1
154 | echo "*** Testing Tamsin-in-Tamsin compiler..."
155 | falderal $VERBOSE --substring-error fixture/compiler.tamsin.markdown $FILES
156 | elif [ x$1 = xbootstrap ]; then
157 | make bin/bootstrapped-compiler || exit 1
158 | echo "*** Testing Bootstrapped Tamsin-in-Tamsin compiler..."
159 | falderal $VERBOSE --substring-error fixture/bootstrapped.markdown $FILES
160 | elif [ x$1 = xmicro ]; then
161 | make bin/micro-tamsin || exit 1
162 | echo "*** Testing Micro-Tamsin interpreter..."
163 | FILES="doc/Micro-Tamsin.markdown"
164 | falderal $VERBOSE --substring-error fixture/micro-tamsin.markdown $FILES
165 | elif [ x$1 = xmini ]; then
166 | make bin/mini-tamsin || exit 1
167 | echo "*** Testing Mini-Tamsin interpreter..."
168 | FILES="doc/Micro-Tamsin.markdown" # note: does not use Mini-Tamsin.md yet
169 | falderal $VERBOSE --substring-error fixture/mini-tamsin.markdown $FILES
170 | else
171 | echo "Unknown test '$1'."
172 | exit 1
173 | fi
174 |
--------------------------------------------------------------------------------