├── .gitignore ├── .hgignore ├── .hgtags ├── HISTORY.markdown ├── LICENSE ├── Makefile ├── README.markdown ├── bin ├── hexout ├── inhex ├── tamsin └── wrap ├── c_src ├── dict.c ├── dict.h ├── scanner.c ├── scanner.h ├── tamsin.c ├── tamsin.h ├── term.c └── term.h ├── doc ├── 6502-sketch.tamsin ├── Advanced_Features.markdown ├── Case_Study.markdown ├── Error_Reporting.markdown ├── Excessive_Tests.markdown ├── Micro-Tamsin.markdown ├── Mini-Tamsin.markdown ├── Notes.markdown ├── Philosophy.markdown ├── System_Module.markdown ├── TODO.markdown ├── Tamsin.markdown └── Tested_Examples.markdown ├── eg ├── alg-expr1.tamsin ├── alg-expr2.tamsin ├── alg-expr3.tamsin ├── backtrack.tamsin ├── bitpair.tamsin ├── bits.tamsin ├── blerf.tamsin ├── change-buffer.tamsin ├── csv_extract.tamsin ├── csv_parse.tamsin ├── escape.tamsin ├── eval-bool-expr.tamsin ├── exciting-long.tamsin ├── exciting.tamsin ├── exciting.txt ├── expector.tamsin ├── foobar.tamsin ├── hello-world.tamsin ├── list-of-chars.tamsin ├── list-sugar2.tamsin ├── modules.tamsin ├── names.csv ├── pipeline.tamsin ├── prod-branches.tamsin ├── reverse.tamsin ├── sexpr-eval.tamsin ├── store.tamsin ├── zeroes-concat.tamsin └── zeroes.tamsin ├── fixture ├── bootstrapped.markdown ├── compiler.py.markdown ├── compiler.tamsin.markdown ├── micro-tamsin.markdown ├── mini-tamsin.markdown └── tamsin.py.markdown ├── lib ├── list.tamsin ├── tamsin_analyzer.tamsin ├── tamsin_parser.tamsin └── tamsin_scanner.tamsin ├── mains ├── analyzer.tamsin ├── compiler.tamsin ├── desugarer.tamsin ├── grammar.tamsin ├── micro-tamsin.tamsin ├── mini-tamsin.tamsin ├── parser.tamsin └── scanner.tamsin ├── src └── tamsin │ ├── __init__.py │ ├── analyzer.py │ ├── ast.py │ ├── backends │ ├── __init__.py │ └── c.py │ ├── buffer.py │ ├── codegen.py │ ├── codenode.py │ ├── compiler.py │ ├── desugarer.py │ ├── event.py │ ├── interpreter.py │ ├── main.py │ ├── parser.py │ ├── scanner.py │ ├── sysmod.py │ └── term.py ├── test-codegen.sh └── test.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.o 3 | *.a 4 | bin/tamsin-* 5 | bin/bootstrapped-* 6 | bin/micro-tamsin 7 | tmp/ 8 | -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | syntax: glob 2 | 3 | *.pyc 4 | *.o 5 | *.a 6 | 7 | bin/tamsin-* 8 | bin/bootstrapped-* 9 | bin/micro-tamsin 10 | tmp/ 11 | -------------------------------------------------------------------------------- /.hgtags: -------------------------------------------------------------------------------- 1 | 8c5c38158bc6d671345851015aa15a71f5cd9aa1 0.1 2 | b89a9c3fc2e841573dae7ce7e51deb81313c8a40 0.2 3 | db0e6c779d74337956106874d1ef91385fe86e7d 0.3 4 | c91de5aea6dea0fb9d609cd76ccc4d153f2a3e5a 0.4 5 | 7597a8c4b1c696a0afb96aa496fcec5e36beeebf 0.5 6 | -------------------------------------------------------------------------------- /HISTORY.markdown: -------------------------------------------------------------------------------- 1 | Tamsin Release History 2 | ====================== 3 | 4 | 0.5-2017.0502 5 | ------------- 6 | 7 | This is an interim release, created because the tests pass here, even 8 | though not everything aimed for for the next release has been achieved. 9 | 10 | ### language ### 11 | 12 | * The RHS of → can be a pattern term. 13 | * "Proper quoted" strings. 14 | 15 | ### implementations ### 16 | 17 | * `mini-tamsin.tamsin` is an interpreter for "Mini-Tamsin", written in Tamsin. 18 | * Better error reporting. 19 | * Improvements or bugfixes in the C-language implementation of `$:unquote`. 20 | * Tamsin programs can handle streams on input and produce streams on output. 21 | * Begun work on a better C-emitting backend. 22 | * Better scanning; buffers are more sophisticated and track some state themselves. 23 | 24 | 0.5 25 | --- 26 | 27 | ### language ### 28 | 29 | * EOF is no longer a special kind of term; it is no longer exposed, as 30 | a value, to Tamsin programs. (`$:eof` returns `''` on success.) 31 | * Prolog/Erlang-style list sugar for terms, in patterns as well. 32 | * When a new scanner is switched to using `using`, that scanner defaults 33 | to the `$:utf8` scanner for *its* scanning. This prevents the common 34 | shooting-self-in-foot error of selecting a production that is not 35 | itself `using` another scanner (which would result in an infinite loop 36 | of the production scanner trying to use itself as its subsidiary 37 | scanner.) 38 | 39 | ### implementation ### 40 | 41 | * `struct term *`s are (almost) always `const` in compiled Tamsin 42 | programs (for better sharing; we don't need to make copies of them) 43 | * related: variable-matching is more efficient (directly updates an array 44 | of terms, instead of searching for the variable by name) 45 | * related: creating new atoms uses hash-consing, so that no new 46 | `struct term` for the atom is allocated if one already exists (the 47 | existing one is shared.) This reduces memory usage significantly. 48 | 49 | 0.4 50 | --- 51 | 52 | ### language ### 53 | 54 | * Added `@` (work on different implicit buffer.) 55 | 56 | ### modules ### 57 | 58 | * Added `$:gensym`. 59 | * Added `$:hexchar`. 60 | * Added `$:format_octal`. 61 | * Added `$:length`. 62 | * Added `list:append`. 63 | 64 | ### implementations ### 65 | 66 | * Tamsin-to-C compiler written in Tamsin (`mains/compiler.tamsin`) passes 67 | all tests, and can compile itself. 68 | * Refactored `$` functions into `tamsin.sysmod` module in Python version. 69 | 70 | 0.3 71 | --- 72 | 73 | ### language ### 74 | 75 | * Defined what it means to `reprify` a term. 76 | * Clarified some matters as implementation-defined. 77 | 78 | ### modules ### 79 | 80 | * `$:equal` now does deep equality of arbitrary ground terms. 81 | * `$:repr` added. 82 | * `$:reverse` added. 83 | * Some standard modules ship in the distribution: `list`, 84 | `tamsin_scanner`, and `tamsin_parser`. 85 | 86 | ### implementations ### 87 | 88 | * Support for user-defined modules. 89 | * `tamsin` can take more than one source file on command line; this 90 | is how external modules are supported (by this implementation.) 91 | * Cleaned-up testing framework; Tamsin versions of scanner, grammar, 92 | parser, desugarer, analyzer, and compiler found in `mains` subdir. 93 | * Most `tamsin` verbs, and their versions in Tamsin, corresponding to 94 | intermediate phases, output reprified terms. 95 | * `tamsin` significantly re-factored so that the interpreter and 96 | compiler are more similar, and generating code for production branches 97 | is easier. 98 | * Added Tamsin-to-C compiler written in Tamsin, which can pass the first 99 | 43 or so tests from the spec ("Mini-Tamsin"). 100 | 101 | 0.2 102 | --- 103 | 104 | ### language ### 105 | 106 | * Module-member syntax changed from `.` to `:`. 107 | * `:` can be used without any module on the LHS to refer to a production 108 | in the current module. 109 | * Added "fold" forms, binary `/` and ternary `//`. 110 | 111 | ### modules ### 112 | 113 | * `$:char` scanner dropped. Instead, there are `$:byte` (which always 114 | returns 8-bit-clean bytes) and `$:utf8` (which always returns UTF-8 115 | sequences.) 116 | * Added `$:equal(L,R)`. 117 | * `$:unquote(X,L,R)` takes three arguments now. 118 | 119 | ### implementations ### 120 | 121 | * Beginnings of user-defined module support (very rudimentary, not to be 122 | used.) 123 | * Code in `libtamsin` is much more robust. AST-builder written in Tamsin now 124 | compiles and runs correctly. 125 | * Added a desugaring phase to `tamsin`, and a desugarer written in Tamsin. 126 | * Added Micro-Tamsin interpreter, written in Tamsin. Can pass the first 127 | 30 tests from the spec. 128 | 129 | 0.1 130 | --- 131 | 132 | Initial release. 133 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The contents of the Tamsin distribution are distributed under the following 2 | three licenses. 3 | 4 | The documentation (in the `doc/` subdirectory) is covered by the following 5 | BSD-compatible license, modelled after the "Report on the Programming 6 | Language Haskell 98" license: 7 | 8 | ----------------------------------------------------------------------------- 9 | 10 | Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 11 | 12 | The authors intend this Report to belong to the entire Tamsin 13 | community, and so we grant permission to copy and distribute it for 14 | any purpose, provided that it is reproduced in its entirety, 15 | including this Notice. Modified versions of this Report may also be 16 | copied and distributed for any purpose, provided that the modified 17 | version is clearly presented as such, and that it does not claim to 18 | be a definition of the Tamsin Programming Language. 19 | 20 | ----------------------------------------------------------------------------- 21 | 22 | The source code for the reference interpreter and supporting tools (in the 23 | `src` and `c_src` subdirectories) is covered under the following BSD-style 24 | license: 25 | 26 | ----------------------------------------------------------------------------- 27 | 28 | Copyright (c)2014, Chris Pressey, Cat's Eye Technologies. 29 | All rights reserved. 30 | 31 | Redistribution and use in source and binary forms, with or without 32 | modification, are permitted provided that the following conditions 33 | are met: 34 | 35 | Redistributions of source code must retain the above copyright 36 | notices, this list of conditions and the following disclaimer. 37 | 38 | Redistributions in binary form must reproduce the above copyright 39 | notices, this list of conditions, and the following disclaimer in 40 | the documentation and/or other materials provided with the 41 | distribution. 42 | 43 | Neither the names of the copyright holders nor the names of their 44 | contributors may be used to endorse or promote products derived 45 | from this software without specific prior written permission. 46 | 47 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 48 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES INCLUDING, BUT NOT 49 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 50 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 51 | COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 52 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 53 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 54 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 55 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 57 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 58 | POSSIBILITY OF SUCH DAMAGE. 59 | 60 | ----------------------------------------------------------------------------- 61 | 62 | Every example source in the `eg` directory specifies what its own licensing 63 | terms are. Many of them are in the public domain, in which case the following 64 | UNLICENSE applies to them. Others may be under other licenses; see the 65 | specific file in question for more information. 66 | 67 | ----------------------------------------------------------------------------- 68 | 69 | This is free and unencumbered software released into the public domain. 70 | 71 | Anyone is free to copy, modify, publish, use, compile, sell, or 72 | distribute this software, either in source code form or as a compiled 73 | binary, for any purpose, commercial or non-commercial, and by any 74 | means. 75 | 76 | In jurisdictions that recognize copyright laws, the author or authors 77 | of this software dedicate any and all copyright interest in the 78 | software to the public domain. We make this dedication for the benefit 79 | of the public at large and to the detriment of our heirs and 80 | successors. We intend this dedication to be an overt act of 81 | relinquishment in perpetuity of all present and future rights to this 82 | software under copyright law. 83 | 84 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 85 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 86 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 87 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 88 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 89 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 90 | OTHER DEALINGS IN THE SOFTWARE. 91 | 92 | For more information, please refer to 93 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC?=gcc 2 | CFLAGS?=-ansi -g -Ic_src -Lc_src 3 | 4 | LCFLAGS?=-ansi -pedantic -g -Wall -Werror -Ic_src -Lc_src 5 | 6 | ##### libtamsin ##### 7 | 8 | OBJECTS=c_src/dict.o c_src/scanner.o c_src/term.o c_src/tamsin.o 9 | PROGS=bin/tamsin-compiler bin/micro-tamsin 10 | 11 | all: c_src/libtamsin.a 12 | 13 | c_src/scanner.o: c_src/tamsin.h c_src/scanner.c 14 | $(CC) $(LCFLAGS) -c c_src/scanner.c -o $@ 15 | 16 | c_src/term.o: c_src/tamsin.h c_src/term.c 17 | $(CC) $(LCFLAGS) -c c_src/term.c -o $@ 18 | 19 | c_src/tamsin.o: c_src/tamsin.h c_src/tamsin.c 20 | $(CC) $(LCFLAGS) -c c_src/tamsin.c -o $@ 21 | 22 | c_src/libtamsin.a: $(OBJECTS) 23 | ar -r $@ $(OBJECTS) 24 | 25 | 26 | ##### executables ##### 27 | 28 | TAMSIN_COMPILER_LIBS=lib/list.tamsin lib/tamsin_scanner.tamsin \ 29 | lib/tamsin_parser.tamsin lib/tamsin_analyzer.tamsin 30 | bin/tamsin-compiler: c_src/libtamsin.a c_src/tamsin.h \ 31 | $(TAMSIN_COMPILER_LIBS) \ 32 | mains/compiler.tamsin 33 | bin/tamsin compile $(TAMSIN_COMPILER_LIBS) mains/compiler.tamsin > tmp/foo.c 34 | $(CC) $(CFLAGS) tmp/foo.c -o $@ -ltamsin 35 | 36 | 37 | bin/bootstrapped-compiler: c_src/libtamsin.a c_src/tamsin.h \ 38 | bin/tamsin-compiler \ 39 | $(TAMSIN_COMPILER_LIBS) \ 40 | mains/compiler.tamsin 41 | bin/tamsin-compiler $(TAMSIN_COMPILER_LIBS) mains/compiler.tamsin > tmp/foo.c 42 | $(CC) $(CFLAGS) tmp/foo.c -o $@ -ltamsin 43 | 44 | 45 | MICRO_TAMSIN_LIBS=lib/list.tamsin lib/tamsin_scanner.tamsin \ 46 | lib/tamsin_parser.tamsin 47 | bin/micro-tamsin: c_src/libtamsin.a c_src/tamsin.h \ 48 | $(MICRO_TAMSIN_LIBS) \ 49 | mains/micro-tamsin.tamsin 50 | bin/tamsin compile $(MICRO_TAMSIN_LIBS) mains/micro-tamsin.tamsin > tmp/foo.c 51 | $(CC) $(CFLAGS) tmp/foo.c -o $@ -ltamsin 52 | 53 | 54 | MINI_TAMSIN_LIBS=lib/list.tamsin lib/tamsin_scanner.tamsin \ 55 | lib/tamsin_parser.tamsin 56 | bin/mini-tamsin: c_src/libtamsin.a c_src/tamsin.h \ 57 | $(MINI_TAMSIN_LIBS) \ 58 | mains/mini-tamsin.tamsin 59 | bin/tamsin compile $(MINI_TAMSIN_LIBS) mains/mini-tamsin.tamsin > tmp/foo.c 60 | $(CC) $(CFLAGS) tmp/foo.c -o $@ -ltamsin 61 | 62 | clean: 63 | rm -f c_src/libtamsin.a c_src/*.o $(PROGS) 64 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | Tamsin 2 | ====== 3 | 4 | Tamsin is an oddball little language that can't decide if it's a 5 | [meta-language](doc/Philosophy.markdown#meta-language), a 6 | [programming language](doc/Philosophy.markdown#programming-language), or a 7 | [rubbish lister](doc/Philosophy.markdown#rubbish-lister). 8 | 9 | Its primary goal is to allow the rapid development of **parsers**, 10 | **static analyzers**, **interpreters**, and **compilers**, and to allow them 11 | to be expressed *compactly*. Golf your grammar! (Or write it like a decent 12 | human being, if you must.) 13 | 14 | The current released version of Tamsin is 0.5-2017.0502. 15 | As indicated by the 0.x version number, it is a **work in progress**, 16 | with the usual caveat that things may change rapidly (and that version 0.6 might 17 | look completely different.) See [HISTORY](HISTORY.markdown) 18 | for a list of major changes. 19 | 20 | Code Examples 21 | ------------- 22 | 23 | Make a story more exciting in **1 line of code**: 24 | 25 | main = ("." & '!' | "?" & '?!' | any)/''. 26 | 27 | Parse an algebraic expression for syntactic correctness in **4 lines of code**: 28 | 29 | main = (expr0 & eof & 'ok'). 30 | expr0 = expr1 & {"+" & expr1}. 31 | expr1 = term & {"*" & term}. 32 | term = "x" | "y" | "z" | "(" & expr0 & ")". 33 | 34 | Translate an algebraic expression to RPN (Reverse Polish Notation) in 35 | **7 lines of code**: 36 | 37 | main = expr0 → E & walk(E). 38 | expr0 = expr1 → E1 & {"+" & expr1 → E2 & E1 ← add(E1,E2)} & E1. 39 | expr1 = term → E1 & {"*" & term → E2 & E1 ← mul(E1,E2)} & E1. 40 | term = "x" | "y" | "z" | "(" & expr0 → E & ")" & E. 41 | walk(add(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' +'. 42 | walk(mul(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' *'. 43 | walk(X) = return ' '+X. 44 | 45 | Parse a CSV file (handling quoted commas and quotes correctly) and write 46 | out the 2nd-last field of each record — in **11 lines of code**: 47 | 48 | main = line → L & L ← lines(nil, L) & 49 | {"\n" & line → M & L ← lines(L, M)} & extract(L) & ''. 50 | line = field → F & {"," & field → G & F ← fields(G, F)} & F. 51 | field = strings | bare. 52 | strings = string → T & {string → S & T ← T + '"' + S} & T. 53 | string = "\"" & (!"\"" & any)/'' → T & "\"" & T. 54 | bare = (!(","|"\n") & any)/''. 55 | extract(lines(Ls, L)) = extract(Ls) & extract_field(L). 56 | extract(L) = L. 57 | extract_field(fields(L, fields(T, X))) = print T. 58 | extract_field(X) = X. 59 | 60 | Evaluate an (admittedly trivial) S-expression based language in 61 | **15 lines of code**: 62 | 63 | main = sexp → S using scanner & reverse(S, nil) → SR & eval(SR). 64 | scanner = ({" "} & ("(" | ")" | $:alnum/'')) using $:utf8. 65 | sexp = $:alnum | list. 66 | list = "(" & sexp/nil/pair → L & ")" & L. 67 | head(pair(A, B)) = A. 68 | tail(pair(A, B)) = B. 69 | cons(A, B) = return pair(A, B). 70 | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R). 71 | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R). 72 | eval(pair(cons, pair(A, pair(B, nil)))) = 73 | eval(A) → AE & eval(B) → BE & return pair(AE, BE). 74 | eval(X) = X. 75 | reverse(pair(H, T), A) = reverse(H, nil) → HR & reverse(T, pair(HR, A)). 76 | reverse(nil, A) = A. 77 | reverse(X, A) = X. 78 | 79 | Interpret a small subset of Tamsin in 80 | **[30 lines of code](mains/micro-tamsin.tamsin)** 81 | (not counting the [included batteries](doc/Philosophy.markdown#batteries-included).) 82 | 83 | Compile Tamsin to C in 84 | **[563 lines of code](mains/compiler.tamsin)** 85 | (again, not counting the included batteries.) 86 | 87 | For more information 88 | -------------------- 89 | 90 | If the above has piqued your curiosity, you may want to read the specification, 91 | which contains many more small examples written to demonstrate (and test) the 92 | syntax and behavior of Tamsin: 93 | 94 | * [The Tamsin Language Specification](doc/Tamsin.markdown) 95 | 96 | Note that this is the current development version of the specification, and 97 | it may differ from the examples in this document. 98 | 99 | Quick Start 100 | ----------- 101 | 102 | The Tamsin reference repository is [hosted on Codeberg](https://codeberg.org/catseye/Tamsin). 103 | 104 | This repository contains the reference implementation of Tamsin, called 105 | `tamsin`, written in Python 2.7. It can both interpret a Tamsin program and 106 | compile a program written in Tamsin to C. 107 | 108 | The distribution also contains a Tamsin-to-C compiler written in Tamsin. It 109 | passes all the tests, and can compile itself. 110 | 111 | While the interpreter is fine for prototyping, note that some informal 112 | benchmarking revealed the compiled C programs to be about 30x faster. **Note** 113 | however that while the compiler passes all the tests, it is still largely 114 | unproven (e.g. its UTF-8 support is not RFC 3629-compliant), so it should be 115 | considered a **proof of concept**. 116 | 117 | To start using `tamsin`, 118 | 119 | * Clone the repository — `git clone https://codeberg.org/catseye/Tamsin` 120 | * Either: 121 | * Put the repo's `bin` directory on your `$PATH`, or 122 | * Make a symbolic link to `bin/tamsin` somewhere already on your `$PATH`. 123 | * Errr... that's it. 124 | 125 | Then you can run `tamsin` like so: 126 | 127 | * `tamsin eg/csv_parse.tamsin < eg/names.csv` 128 | 129 | To use the compiler, you'll need GNU make and `gcc` installed. Type 130 | 131 | * `make` 132 | 133 | to build the runtime library. You can then compile to C and compile the C to 134 | an executable and run the executable all in one step, like so: 135 | 136 | * `tamsin loadngo eg/csv_extract.tamsin < eg/names.csv` 137 | 138 | Design Goals 139 | ------------ 140 | 141 | * Allow parsers, static analyzers, interpreters, and compilers to be 142 | quickly prototyped. (And in the future, processor simulators and VM's 143 | and such things too.) 144 | * Allow writing these things very compactly. 145 | * Allow writing anything using only recursive-descent parsing techniques 146 | (insofar as this is possible.) 147 | * Allow writing parsers that look very similar to the grammar of the 148 | language being parsed, so that the structure of the language can be 149 | clearly seen. 150 | * Provide means to solve practical problems. 151 | * Keep the language simple — the grammar should fit on a page, ideally. 152 | * Recognize that the preceding two goals are in tension. 153 | * Have a relatively simple reference implementation (currently less than 154 | 5 KLoC, including everything — debugging support and the C runtime 155 | used by the compiler and the Tamsin modules and implementations.) 156 | 157 | License 158 | ------- 159 | 160 | BSD-style license; see the file [LICENSE](LICENSE). 161 | 162 | Related work 163 | ------------ 164 | 165 | * [CoCo/R](http://www.scifac.ru.ac.za/coco/) (parser generation) 166 | * [Parsec](http://www.haskell.org/haskellwiki/Parsec) (parser combinators) 167 | * [Perl](http://perl.org/) (rubbish listing) 168 | * [Prolog](https://en.wikipedia.org/wiki/Prolog) (pattern-matching, terms, 169 | backtracking(-ish...)) 170 | * [K](https://github.com/kevinlawler/kona) (similar feel; Tamsin 171 | is a _vertical language_) 172 | * [Cat's Eye Technologies](http://catseye.tc)' esoteric and experimental 173 | languages: 174 | * [Squishy2K](http://catseye.tc/node/Squishy2K) 175 | * [Arboretuum](http://catseye.tc/node/Arboretuum) 176 | * [Treacle](http://catseye.tc/node/Treacle) 177 | -------------------------------------------------------------------------------- /bin/hexout: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # converts raw bytes on input to hex couples on input. 4 | 5 | import sys 6 | 7 | while True: 8 | byte = sys.stdin.read(1) 9 | if len(byte) < 1: 10 | sys.exit(0) 11 | sys.stdout.write('%02x' % ord(byte)) 12 | -------------------------------------------------------------------------------- /bin/inhex: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # converts hex couples on input to raw bytes on output. 4 | 5 | import sys 6 | 7 | while True: 8 | hex = sys.stdin.read(2) 9 | if len(hex) < 2: 10 | sys.exit(0) 11 | sys.stdout.write(chr(int(hex, 16))) 12 | -------------------------------------------------------------------------------- /bin/tamsin: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from os.path import realpath, dirname, join 4 | import sys 5 | 6 | tamsin_dir = join(dirname(realpath(sys.argv[0])), '..') 7 | sys.path.insert(0, join(tamsin_dir, 'src')) 8 | 9 | from tamsin.main import main 10 | 11 | 12 | if __name__ == '__main__': 13 | main(sys.argv[1:], tamsin_dir=tamsin_dir) 14 | -------------------------------------------------------------------------------- /bin/wrap: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | WIDTH=120 6 | 7 | for line in sys.stdin: 8 | line = line.rstrip('\n') 9 | while len(line) > WIDTH: 10 | print line[:WIDTH] 11 | line = line[WIDTH:] 12 | print line 13 | 14 | # 123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890 15 | -------------------------------------------------------------------------------- /c_src/dict.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "term.h" 5 | 6 | #include "dict.h" 7 | 8 | struct chain { 9 | struct chain *next; 10 | const struct term *value; 11 | }; 12 | 13 | struct dict *dict_new(int num_buckets) { 14 | struct dict *d; 15 | int i; 16 | 17 | d = malloc(sizeof(struct dict)); 18 | d->num_buckets = num_buckets; 19 | d->bucket = malloc(sizeof(struct chain *) * d->num_buckets); 20 | for (i = 0; i < d->num_buckets; i++) { 21 | d->bucket[i] = NULL; 22 | } 23 | 24 | return d; 25 | } 26 | 27 | /*** UTILITIES ***/ 28 | 29 | /* 30 | * Hash function, taken from "Compilers: Principles, Techniques, and Tools" 31 | * by Aho, Sethi, & Ullman (a.k.a. "The Dragon Book", 2nd edition.) 32 | */ 33 | static size_t hashpjw(const char *key, size_t key_size, size_t table_size) { 34 | int i; 35 | unsigned long int h = 0, g; 36 | 37 | for (i = 0; i < key_size; i++) { 38 | h = (h << 4) + (key[i]); 39 | if ((g = h & 0xf0000000)) { 40 | h = (h ^ (g >> 24)) ^ g; 41 | } 42 | } 43 | 44 | return h % table_size; 45 | } 46 | 47 | /* 48 | * Create a new chain for a bucket (not called directly by client code.) 49 | */ 50 | static struct chain * 51 | chain_new(const struct term *value) 52 | { 53 | struct chain *c = malloc(sizeof(struct chain)); 54 | 55 | c->next = NULL; 56 | c->value = value; 57 | 58 | return c; 59 | } 60 | 61 | /* 62 | * Locate the bucket number a particular key would be located in, and the 63 | * chain link itself if such a key exists (or NULL if it could not be found.) 64 | */ 65 | static void 66 | dict_locate(struct dict *d, const char *key, size_t key_size, 67 | size_t *b_index, struct chain **c) 68 | { 69 | *b_index = hashpjw(key, key_size, d->num_buckets); 70 | for (*c = d->bucket[*b_index]; *c != NULL; *c = (*c)->next) { 71 | if ((*c)->value->size == key_size && 72 | memcmp(key, (*c)->value->atom, key_size) == 0) 73 | break; 74 | } 75 | } 76 | 77 | /*** OPERATIONS ***/ 78 | 79 | const struct term * 80 | dict_fetch(struct dict *d, const char *key, size_t key_size) 81 | { 82 | struct chain *c; 83 | size_t i; 84 | 85 | dict_locate(d, key, key_size, &i, &c); 86 | 87 | return c != NULL ? c->value : NULL; 88 | } 89 | 90 | void 91 | dict_store(struct dict *d, const struct term *t) 92 | { 93 | struct chain *c; 94 | size_t i; 95 | 96 | dict_locate(d, t->atom, t->size, &i, &c); 97 | if (c == NULL) { 98 | /* Chain does not exist, add a new one. */ 99 | c = chain_new(t); 100 | c->next = d->bucket[i]; 101 | d->bucket[i] = c; 102 | } else { 103 | assert("term already hash consed" == NULL); 104 | /* Chain already exists, replace the value. */ 105 | c->value = t; 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /c_src/dict.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 3 | * Distributed under a BSD-style license; see LICENSE for more information. 4 | */ 5 | 6 | #ifndef TAMSIN_DICT_H 7 | #define TAMSIN_DICT_H 8 | 9 | #include 10 | 11 | struct dict { 12 | struct chain **bucket; 13 | size_t num_buckets; 14 | }; 15 | 16 | /* 17 | * Create a new dictionary. 18 | * Since this is only used for hash-consing right now, there is only one. 19 | */ 20 | struct dict *dict_new(int); 21 | 22 | /* 23 | * Retrieve a value from a dictionary, given its key, or NULL if it's not 24 | * there. 25 | */ 26 | const struct term *dict_fetch(struct dict *, const char *, size_t); 27 | 28 | /* 29 | * Insert a value into a dictionary. 30 | */ 31 | void dict_store(struct dict *, const struct term *); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /c_src/scanner.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 3 | * Distributed under a BSD-style license; see LICENSE for more information. 4 | */ 5 | 6 | #include "scanner.h" 7 | #include "term.h" 8 | #include "tamsin.h" 9 | 10 | struct scanner *scanner_new(const char *buffer, size_t size) { 11 | struct scanner *scanner; 12 | 13 | scanner = malloc(sizeof(struct scanner)); 14 | scanner->buffer = buffer; 15 | scanner->size = size; 16 | scanner->position = 0; 17 | scanner->reset_position = 0; 18 | scanner->engines = NULL; 19 | 20 | return scanner; 21 | } 22 | 23 | void scanner_byte_engine(void) { 24 | } 25 | 26 | void scanner_utf8_engine(void) { 27 | } 28 | 29 | #define UTF_8_LEN_2_MASK 0xe0 /* 0b11100000 */ 30 | #define UTF_8_LEN_2_BITS 0xc0 /* 0b11000000 */ 31 | 32 | #define UTF_8_LEN_3_MASK 0xf0 /* 0b11110000 */ 33 | #define UTF_8_LEN_3_BITS 0xe0 /* 0b11100000 */ 34 | 35 | #define UTF_8_LEN_4_MASK 0xf8 /* 0b11111000 */ 36 | #define UTF_8_LEN_4_BITS 0xf0 /* 0b11110000 */ 37 | 38 | const struct term *scan(struct scanner *s) { 39 | if (s->position >= s->size) { 40 | return &tamsin_EOF; 41 | } 42 | if (s->engines == NULL || s->engines->production == &scanner_utf8_engine) { 43 | char c = s->buffer[s->position]; 44 | int len = 1; 45 | const struct term *t; 46 | 47 | if ((c & UTF_8_LEN_2_MASK) == UTF_8_LEN_2_BITS) { 48 | len = 2; 49 | } else if ((c & UTF_8_LEN_3_MASK) == UTF_8_LEN_3_BITS) { 50 | len = 3; 51 | } else if ((c & UTF_8_LEN_4_MASK) == UTF_8_LEN_4_BITS) { 52 | len = 4; 53 | } 54 | 55 | t = term_new_atom(s->buffer + s->position, len); 56 | s->position += len; 57 | return t; 58 | } else if (s->engines->production == &scanner_byte_engine) { 59 | char c = s->buffer[s->position]; 60 | 61 | s->position++; 62 | return term_new_atom_from_char(c); 63 | } else { 64 | const struct term *save_result = result; 65 | int save_reset_position = s->reset_position; 66 | void (*production)(void) = s->engines->production; 67 | 68 | scanner_push_engine(s, &scanner_utf8_engine); 69 | production(); 70 | scanner_pop_engine(s); 71 | 72 | s->reset_position = save_reset_position; 73 | 74 | if (!ok) { 75 | result = save_result; 76 | return &tamsin_EOF; 77 | } else { 78 | return result; 79 | } 80 | } 81 | } 82 | 83 | void unscan(struct scanner *s) { 84 | s->position = s->reset_position; 85 | } 86 | 87 | void commit(struct scanner *s) { 88 | s->reset_position = s->position; 89 | } 90 | 91 | struct engine *engine_new(void (*production)(void)) { 92 | struct engine *e = malloc(sizeof(struct engine)); 93 | 94 | e->production = production; 95 | return e; 96 | } 97 | 98 | void scanner_push_engine(struct scanner *s, void (*production)(void)) { 99 | struct engine *e = engine_new(production); 100 | 101 | e->next = s->engines; 102 | s->engines = e; 103 | } 104 | 105 | void scanner_pop_engine(struct scanner *s) { 106 | /* struct engine *e = s->engines; */ 107 | 108 | s->engines = s->engines->next; 109 | /* engine_free(e); */ 110 | } 111 | -------------------------------------------------------------------------------- /c_src/scanner.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 3 | * Distributed under a BSD-style license; see LICENSE for more information. 4 | */ 5 | 6 | #ifndef TAMSIN_SCANNER_H 7 | #define TAMSIN_SCANNER_H 8 | 9 | #include "term.h" 10 | 11 | /* -------------------------------------------------------- scanner */ 12 | 13 | struct engine { 14 | void (*production)(void); 15 | struct engine *next; 16 | }; 17 | 18 | struct scanner { 19 | const char *buffer; 20 | size_t size; 21 | int position; 22 | int reset_position; 23 | struct engine *engines; 24 | }; 25 | 26 | struct scanner *scanner_new(const char *, size_t); 27 | const struct term *scan(struct scanner *); 28 | void unscan(struct scanner *); 29 | void commit(struct scanner *); 30 | void scanner_push_engine(struct scanner *, void (*)(void)); 31 | void scanner_pop_engine(struct scanner *); 32 | void scanner_byte_engine(void); 33 | void scanner_utf8_engine(void); 34 | 35 | /* 36 | * This value is never (and should never be) exposed to Tamsin programs! 37 | * It should not be considered a kind of term, really. That's just for 38 | * convenience in this implementation. 39 | */ 40 | extern struct term tamsin_EOF; 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /c_src/tamsin.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 3 | * Distributed under a BSD-style license; see LICENSE for more information. 4 | */ 5 | 6 | #ifndef TAMSIN_TAMSIN_H 7 | #define TAMSIN_TAMSIN_H 8 | 9 | #include "term.h" 10 | #include "scanner.h" 11 | 12 | /* -------------------------------------------------------- tamsin */ 13 | 14 | void tamsin_eof(struct scanner *); 15 | void tamsin_any(struct scanner *); 16 | void tamsin_expect(struct scanner *, const struct term *); 17 | void tamsin_alnum(struct scanner *); 18 | void tamsin_upper(struct scanner *); 19 | void tamsin_startswith(struct scanner *, const char *); 20 | const struct term *tamsin_unquote(const struct term *, 21 | const struct term *, const struct term *); 22 | const struct term *tamsin_mkterm(const struct term *, const struct term *); 23 | const struct term *tamsin_equal(const struct term *, const struct term *); 24 | const struct term *tamsin_reverse(const struct term *, const struct term *); 25 | const struct term *tamsin_gensym(const struct term *); 26 | const struct term *tamsin_hexbyte(const struct term *, const struct term *); 27 | const struct term *tamsin_format_octal(const struct term *); 28 | const struct term *tamsin_length(const struct term *); 29 | 30 | /* 31 | * Given a possibly non-atom term, return an atom consisting of 32 | * contents of the given term reprified into an atom. 33 | * 34 | * The returned term is NOT always newly allocated. 35 | */ 36 | const struct term *tamsin_repr(const struct term *); 37 | 38 | int tamsin_isalpha(char); 39 | int tamsin_isupper(char); 40 | int tamsin_isdigit(char); 41 | int tamsin_isalnum(char); 42 | 43 | /* --------------------------------------------------------------- */ 44 | /* global state: result of last action */ 45 | 46 | extern int ok; 47 | extern const struct term *result; 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /c_src/term.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 3 | * Distributed under a BSD-style license; see LICENSE for more information. 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "term.h" 12 | 13 | #include "dict.h" 14 | 15 | /* 16 | * this code LEAKS MEMORY all over the place, but that's "ok" because 17 | * Tamsin programs "aren't long running". and it's better than having 18 | * buffer overflows. 19 | */ 20 | 21 | struct dict *hash_conser = NULL; 22 | 23 | struct term tamsin_EOF = {"EOF", 3, -1, NULL}; 24 | 25 | int hits = 0; 26 | int misses = 0; 27 | 28 | struct term *term_single_byte_table = NULL; 29 | char term_single_byte_data[256]; 30 | 31 | const struct term *term_new_atom(const char *atom, size_t size) { 32 | struct term *t; 33 | char *text; 34 | 35 | /* 36 | if (size == 1) { 37 | int i; 38 | if (term_single_byte_table == NULL) { 39 | term_single_byte_table = malloc(sizeof(struct term) * 256); 40 | for (i = 0; i < 256; i++) { 41 | term_single_byte_data[i] = (char)i; 42 | term_single_byte_table[i].atom = term_single_byte_data + i; 43 | term_single_byte_table[i].size = 1; 44 | term_single_byte_table[i].index = -1; 45 | term_single_byte_table[i].subterms = NULL; 46 | } 47 | } 48 | i = ((unsigned char *)atom)[0]; 49 | return &term_single_byte_table[i]; 50 | } 51 | */ 52 | 53 | if (hash_conser == NULL) { 54 | hash_conser = dict_new(2503); 55 | } 56 | t = (struct term *)dict_fetch(hash_conser, atom, size); 57 | if (t != NULL) { 58 | hits++; 59 | return t; 60 | } 61 | 62 | t = malloc(sizeof(struct term)); 63 | text = malloc(size); 64 | memcpy(text, atom, size); 65 | t->atom = text; 66 | t->size = size; 67 | t->index = -1; 68 | t->subterms = NULL; 69 | 70 | dict_store(hash_conser, t); 71 | misses++; 72 | 73 | return t; 74 | } 75 | 76 | const struct term *term_new_atom_from_char(char c) { 77 | char s[2]; 78 | 79 | s[0] = c; 80 | s[1] = '\0'; 81 | 82 | return term_new_atom(s, 1); 83 | } 84 | 85 | const struct term *term_new_atom_from_cstring(const char *atom) { 86 | return term_new_atom(atom, strlen(atom)); 87 | } 88 | 89 | const struct term *term_new_constructor(const char *tag, size_t size, 90 | struct termlist *subterms) 91 | { 92 | struct term *t = malloc(sizeof(struct term)); 93 | char *text = malloc(size); 94 | 95 | memcpy(text, tag, size); 96 | t->atom = text; 97 | t->size = size; 98 | t->index = -1; 99 | t->subterms = subterms; 100 | 101 | return t; 102 | } 103 | 104 | void termlist_add_term(struct termlist **tl, const struct term *term) { 105 | struct termlist *new_tl; 106 | 107 | new_tl = malloc(sizeof(struct termlist)); 108 | new_tl->term = term; 109 | new_tl->next = *tl; 110 | *tl = new_tl; 111 | } 112 | 113 | const struct term *term_new_variable(const char *name, size_t size, int index) { 114 | struct term *t; 115 | char *text; 116 | 117 | t = malloc(sizeof(struct term)); 118 | text = malloc(size); 119 | memcpy(text, name, size); 120 | t->atom = text; 121 | t->size = size; 122 | assert(index != -1); 123 | t->index = index; 124 | t->subterms = NULL; 125 | 126 | return t; 127 | } 128 | 129 | int term_atoms_equal(const struct term *lhs, const struct term *rhs) { 130 | if (lhs->size != rhs->size) { 131 | return 0; 132 | } 133 | return memcmp(lhs->atom, rhs->atom, lhs->size) == 0; 134 | } 135 | 136 | int term_atom_cstring_equal(const struct term *lhs, const char *string) { 137 | if (lhs->size != strlen(string)) { 138 | return 0; 139 | } 140 | return memcmp(lhs->atom, string, lhs->size) == 0; 141 | } 142 | 143 | const struct term *term_concat(const struct term *lhs, const struct term *rhs) { 144 | const struct term *t; 145 | int new_size; 146 | char *new_atom; 147 | 148 | assert(lhs->subterms == NULL); 149 | assert(rhs->subterms == NULL); 150 | 151 | new_size = lhs->size + rhs->size; 152 | new_atom = malloc(new_size); 153 | memcpy(new_atom, lhs->atom, lhs->size); 154 | memcpy(new_atom + lhs->size, rhs->atom, rhs->size); 155 | t = term_new_atom(new_atom, new_size); 156 | free(new_atom); 157 | 158 | return t; 159 | } 160 | 161 | const struct term COMMASPACE = { ", ", 2, -1, NULL }; 162 | 163 | const struct term *term_flatten(const struct term *t) { 164 | struct termlist *tl; 165 | 166 | if (t->subterms == NULL) { /* it's an atom */ 167 | return t; 168 | } else { /* it's a constructor */ 169 | const struct term *n; 170 | /* we clone t here to get an atom from its tag */ 171 | n = term_concat(term_new_atom(t->atom, t->size), 172 | term_new_atom_from_char('(')); 173 | 174 | for (tl = t->subterms; tl != NULL; tl = tl->next) { 175 | n = term_concat(n, term_flatten(tl->term)); 176 | if (tl->next != NULL) { 177 | n = term_concat(n, &COMMASPACE); 178 | } 179 | } 180 | n = term_concat(n, term_new_atom_from_char(')')); 181 | return n; 182 | } 183 | } 184 | 185 | void term_fput(const struct term *t, FILE *f) { 186 | const struct term *flat = term_flatten(t); 187 | 188 | fwrite(flat->atom, 1, flat->size, f); 189 | } 190 | 191 | int term_equal(const struct term *pattern, const struct term *ground) 192 | { 193 | struct termlist *tl1, *tl2; 194 | 195 | assert(pattern->index == -1); 196 | assert(ground->index == -1); 197 | 198 | if (!term_atoms_equal(pattern, ground)) { 199 | return 0; 200 | } 201 | if (pattern->subterms == NULL && ground->subterms == NULL) { 202 | return 1; 203 | } 204 | 205 | tl1 = pattern->subterms; 206 | tl2 = ground->subterms; 207 | while (tl1 != NULL && tl2 != NULL) { 208 | if (!term_equal(tl1->term, tl2->term)) { 209 | return 0; 210 | } 211 | tl1 = tl1->next; 212 | tl2 = tl2->next; 213 | } 214 | if (tl1 != NULL || tl2 != NULL) { 215 | return 0; 216 | } 217 | return 1; 218 | } 219 | 220 | int term_match_unifier(const struct term *pattern, const struct term *ground, 221 | const struct term **variables) 222 | { 223 | struct termlist *tl1, *tl2; 224 | 225 | if (pattern->index >= 0) { 226 | variables[pattern->index] = ground; 227 | return 1; 228 | } 229 | if (!term_atoms_equal(pattern, ground)) { 230 | return 0; 231 | } 232 | if (pattern->subterms == NULL && ground->subterms == NULL) { 233 | return 1; 234 | } 235 | 236 | tl1 = pattern->subterms; 237 | tl2 = ground->subterms; 238 | while (tl1 != NULL && tl2 != NULL) { 239 | if (!term_match_unifier(tl1->term, tl2->term, variables)) { 240 | return 0; 241 | } 242 | tl1 = tl1->next; 243 | tl2 = tl2->next; 244 | } 245 | if (tl1 != NULL || tl2 != NULL) { 246 | return 0; 247 | } 248 | 249 | return 1; 250 | } 251 | -------------------------------------------------------------------------------- /c_src/term.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 3 | * Distributed under a BSD-style license; see LICENSE for more information. 4 | */ 5 | 6 | #include 7 | #include 8 | 9 | #ifndef TAMSIN_TERM_H 10 | #define TAMSIN_TERM_H 11 | 12 | extern int hits; 13 | extern int misses; 14 | 15 | /* 16 | * If `subterms` is NULL and `index` == -1, this is an atom. 17 | * 18 | * If `subterms` is non-NULL, this is a constructor. 19 | * 20 | * If `index` >= 0, this is a variable. 21 | * 22 | * It is not a legal term if both `subterms` is non-NULL and `index` >= 0. 23 | * 24 | * In all cases, atom should not be NULL. 25 | */ 26 | struct term { 27 | const char *atom; 28 | size_t size; 29 | int index; 30 | struct termlist *subterms; 31 | }; 32 | 33 | struct termlist { 34 | const struct term *term; 35 | struct termlist *next; 36 | }; 37 | 38 | /* 39 | * Creates a new "atom" term from the given character string. 40 | * The new term contains a dynamically allocated copy of the given string, 41 | * so the given string may be freed after calling this. 42 | * Subterms may be added afterwards to turn it into a "constructor" term. 43 | * Segfaults if there is insufficient memory to allocate the term. 44 | */ 45 | const struct term *term_new_atom(const char *, size_t); 46 | const struct term *term_new_atom_from_cstring(const char *); 47 | const struct term *term_new_atom_from_char(char c); 48 | 49 | const struct term *term_new_constructor(const char *, size_t, 50 | struct termlist *); 51 | void termlist_add_term(struct termlist **, const struct term *); 52 | 53 | const struct term *term_new_variable(const char *, size_t, int); 54 | 55 | /* 56 | * Returns 1 if the atom portion of both terms is identical, otherwise 0. 57 | */ 58 | int term_atoms_equal(const struct term *, const struct term *); 59 | 60 | /* 61 | * Returns 1 if the atom portion of term is identical to given C string, else 0. 62 | */ 63 | int term_atom_cstring_equal(const struct term *, const char *); 64 | 65 | /* 66 | * Given the name of a variable, return the variable term of the 67 | * same name that is leftmost, uppermost in the given term. 68 | */ 69 | /* 70 | struct term *term_find_variable(const struct term *, const char *); 71 | */ 72 | 73 | /* 74 | * Given two "atom" terms, return a new "atom" term consisting of the 75 | * text of the input terms concatenated together. 76 | */ 77 | const struct term *term_concat(const struct term *, const struct term *); 78 | 79 | /* 80 | * Given a possibly non-atom term, return an atom consisting of 81 | * contents of the given term flattened into an atom. 82 | * 83 | * The returned term is NOT always newly allocated. 84 | */ 85 | const struct term *term_flatten(const struct term *); 86 | 87 | void term_fput(const struct term *, FILE *); 88 | 89 | /* 90 | * Both terms must be ground. 91 | */ 92 | int term_equal(const struct term *, const struct term *); 93 | 94 | /* 95 | * The third argument is an array of struct term *'s. It will 96 | * be updated with bindings. 97 | */ 98 | int term_match_unifier(const struct term *, const struct term *, 99 | const struct term **); 100 | 101 | #endif 102 | -------------------------------------------------------------------------------- /doc/6502-sketch.tamsin: -------------------------------------------------------------------------------- 1 | # a sketch of what a Tamsin program to simulate a subset of the 6502 2 | # might look like. 3 | 4 | # note that the 6502 memory is in the IMPLICIT BUFFER. 5 | 6 | sim6502 = instr(0,0,0) using $:byte. 7 | 8 | instr(A,X,Y) = 9 | "\xA9" & any → A & instr(A,X,Y) # LDA # 10 | | "\xC8" & inc(Y) → Y & instr(A,X,Y) # INY 11 | | "\x8A" & A ← X & instr(A,X,Y) # TAX 12 | | "\x4C" & word → W & $:seek(W) & instr(A,X,Y) # JMP 13 | | etc. 14 | 15 | word = 16 | any → Lo & any → Hi & return $:add($:ord(Lo), $:mul($:ord(Hi), 256)). 17 | 18 | etc. 19 | 20 | 21 | # That's the recursive version; compiling it to C currently would not be 22 | # nice to the stack. Here's an iterative version: 23 | 24 | 25 | sim6502 = 26 | A ← 0 & X ← 0 & Y ← 0 & 27 | !{instr(A,X,Y) → state(A,X,Y)} using $:byte. 28 | 29 | instr(A,X,Y) = 30 | "\xA9" & any → A & return! state(A,X,Y) # LDA # 31 | !| "\xC8" & inc(Y) → Y & return! state(A,X,Y) # INY 32 | !| "\x8A" & A ← X & return! state(A,X,Y) # TAX 33 | !| "\x4C" & word → W & $:seek(W) & return! state(A,X,Y) # JMP 34 | !| "\x00" & return! halted # BRK 35 | !| etc. 36 | 37 | 38 | # this uses ! (non-backtracking) and return! (immediate return from production) 39 | # (not sure about either of these...) 40 | -------------------------------------------------------------------------------- /doc/Advanced_Features.markdown: -------------------------------------------------------------------------------- 1 | Advanced Features of the Tamsin Language 2 | ======================================== 3 | 4 | This document is a **work in progress**. 5 | 6 | Note that none of these features are in Tamsin version 0.1 (although the 7 | reference implementation might support them or at least the syntax for 8 | them — they should be regarded as undefined in 0.1. They may appear in 9 | 0.2.) 10 | 11 | -> Tests for functionality "Intepret Tamsin program" 12 | 13 | Three good ways to shoot yourself in the foot 14 | --------------------------------------------- 15 | 16 | 1, forget that Tamsin is still basically a *programming* language, or at 17 | best an LL(n) grammar, and try to write a left-recursive rule: 18 | 19 | expr = expr & "+" & expr | expr & "*" & expr | "0" | "1". 20 | 21 | 2, base a `{}` loop around something that always succeeds, like `return` or 22 | `eof` at the end of the input. 23 | 24 | expr = {"k" | return l}. 25 | 26 | 3, base a loop around something that doesn't consume any input, like `!`. 27 | 28 | expr = !"\n" & expr 29 | 30 | Advanced Assignment 31 | ------------------- 32 | 33 | The right-hand side of `→` can actually be more than a variable name; 34 | it can be a pattern term, just like is used in the arguments, above. 35 | This can be useful for "deconstructing" a compound return value from a 36 | production to extract the parts you want. 37 | 38 | | main = foo → pair(A,B) & return A. 39 | | foo = return pair(wellington, trainer). 40 | = wellington 41 | 42 | | main = foo → pair(A,B) & return B. 43 | | foo = return pair(wellington, trainer). 44 | = trainer 45 | 46 | Even without variables, this can also be useful simply to assert something 47 | returns some value. 48 | 49 | | main = foo → b & print 'yes' | print 'no'. 50 | | foo = return a. 51 | = no 52 | = no 53 | 54 | | main = foo → b & print 'yes' | print 'no'. 55 | | foo = return b. 56 | = yes 57 | = yes 58 | 59 | Advanced Programming 60 | -------------------- 61 | 62 | Before the first production in a program, any number of _pragmas_ may be 63 | given. Pragmas may affect how the program following them is parsed. 64 | Each pragma begins with a `@` followed by a bareword indicating the 65 | kind of pragma, followed by a number of arguments specific to that kind 66 | of pragma, followed by a `.`. 67 | 68 | | @alias zrrk 2 = jersey. 69 | | @unalias zrrk. 70 | | main = foo. 71 | | foo = "b". 72 | + b 73 | = b 74 | 75 | ### `@alias` ### 76 | 77 | The pragma `@alias` introduces an alias. Its syntax consists of the 78 | name of the alias (a bareword), followed by an integer which indicates 79 | the _arity_, followed by `=`, followed by the contents of the alias 80 | (i.e., what is being aliased; presently, this must be a non-terminal.) 81 | 82 | This sets up a syntax rule, in the rule context, that, when the alias 83 | name is encountered, parses as a call to the aliased non-terminal; in 84 | addition, this syntax rule is special in that it looks for exactly 85 | _arity_ number of terms following the alias name. Parentheses are not 86 | required to delimit these terms. 87 | 88 | | @alias foo 2 = jersey. 89 | | main = jersey(a,b) & foo c d. 90 | | jersey(A,B) = «A» & «B». 91 | + abcd 92 | = d 93 | 94 | The pragma `@unalias` removes a previously-introduced alias. 95 | 96 | | @alias foo 2 = jersey. 97 | | @unalias foo. 98 | | main = jersey(a,b) & foo c d. 99 | | jersey(A,B) = «A» & «B». 100 | + abcd 101 | ? Expected '.' at ' c d 102 | 103 | It is an error to attempt to unalias an alias that hasn't been established. 104 | 105 | | @alias foo 2 = jersey. 106 | | @unalias bar. 107 | | main = return ok. 108 | ? KeyError 109 | 110 | Note that various of Tamin's "keywords" are actually built-in aliases for 111 | productions in the `$` module, and they may be unaliased. 112 | 113 | | @unalias return. 114 | | main = return ok. 115 | ? Expected '.' at ' ok.' 116 | 117 | | @unalias return. 118 | | main = $.return(ok). 119 | = ok 120 | 121 | ### Rule Formals ### 122 | 123 | Then we no longer pattern-match terms. They're just strings. So we... we 124 | parse them. Here's a preview, and we'll get more serious about this further 125 | below. 126 | 127 | Now that you can create scanners and parsers to your heart's desire, we 128 | return to the reason you would even need to: terms vs. rules in the 129 | "formal arguments" part of a production definition. 130 | 131 | | main = ("a" | "b" | "c") → C & donkey('f' + C) → D & return D. 132 | | donkey["f" & ("a" | "c")] = return yes. 133 | | donkey["f" & "b"] = return no. 134 | + a 135 | = yes 136 | 137 | | main = ("a" | "b" | "c") → C & donkey('f' + C) → D & return D. 138 | | donkey["f" & ("a" | "c")] = return yes. 139 | | donkey["f" & "b"] = return no. 140 | + b 141 | = no 142 | 143 | | main = ("a" | "b" | "c") → C & donkey('f' + C) → D & return D. 144 | | donkey["f" & ("a" | "c")] = return yes. 145 | | donkey["f" & "b"] = return no. 146 | + c 147 | = yes 148 | 149 | Variables that are set in a parse-pattern formals are available to 150 | the production's rule. 151 | 152 | | main = donkey(world). 153 | | donkey[any → E] = return hello(E). 154 | = hello(w) 155 | 156 | | main = donkey(world). 157 | | donkey[any → E using word] = return hello(E). 158 | | word = (T ← '' & {$.alnum → S & T ← T + S} & T) using $.char. 159 | = hello(world) 160 | 161 | No variables from the caller leak into the called production. 162 | 163 | | main = set F = whatever & donkey(world). 164 | | donkey[any → E] = return hello(F). 165 | ? KeyError 166 | 167 | Terms are stringified before being matched. 168 | 169 | | main = donkey(a(b(c))). 170 | | donkey["a" & "(" & "b" & "(" & "c" & ")" & ")"] = return yes. 171 | = yes 172 | 173 | Thus, in this sense at least, terms are sugar for strings. 174 | 175 | | main = donkey('a(b(c))'). 176 | | donkey["a" & "(" & "b" & "(" & "c" & ")" & ")"] = return yes. 177 | = yes 178 | 179 | The rule formals may call on other rules in the program. 180 | 181 | | main = donkey('pair(pair(0,1),1)'). 182 | | donkey[pair → T using mini] = return its_a_pair(T). 183 | | donkey[bit → T using mini] = return its_a_bit(T). 184 | | thing = pair | bit. 185 | | pair = "pair" & "(" & thing → A & "," & thing → B & ")" & return pair(A,B). 186 | | bit = "0" | "1". 187 | | mini = (bit | "(" | ")" | "," | word) using $.char. 188 | | word = (T ← '' & {$.alnum → S & T ← T + S} & T). 189 | = its_a_pair(pair(pair(0, 1), 1)) 190 | 191 | ### Auto-term creation from productions ### 192 | 193 | An experimental feature. But Rooibos does it, and it could help make 194 | parser development faster/shorter. Note that feature is not fully implemented. 195 | Therefore test disabled. 196 | 197 | | main = expr0. 198 | | expr0! = expr1 & {"+" & expr1}. 199 | | expr1! = term & {"*" & term}. 200 | | term = "x" | "y" | "z" | "(" & expr0 & ")". 201 | + x+y*(z+x+y) 202 | = expr0(expr1, +, expr1) 203 | -------------------------------------------------------------------------------- /doc/Case_Study.markdown: -------------------------------------------------------------------------------- 1 | Case Study: Parsing and Evaluating S-Expressions in Tamsin 2 | ========================================================== 3 | 4 | -> Tests for functionality "Intepret Tamsin program" 5 | 6 | We now have enough tools at our disposal to parse and evaluate simple 7 | S-expressions (from Lisp or Scheme). 8 | 9 | Note that we no longer have `$.tamsin`, so these examples don't work. 10 | They're left here to demonstrate the development process. For now, see 11 | `eg/sexpr-eval.tamsin`. 12 | 13 | We can write such a parser with `{}`, but the result is a bit messy. 14 | 15 | | main = sexp using $.tamsin. 16 | | sexp = symbol | list. 17 | | list = "(" & 18 | | set L = nil & 19 | | {sexp → S & set L = pair(S, L)} & 20 | | ")" & 21 | | return L. 22 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c". 23 | + (cons (a (cons b nil))) 24 | = pair(pair(pair(nil, pair(b, pair(cons, nil))), pair(a, nil)), pair(cons, nil)) 25 | 26 | So let's write it in the less intuitive, recursive way: 27 | 28 | | main = sexp using $.tamsin. 29 | | 30 | | sexp = symbol | list. 31 | | list = "(" & listtail(nil). 32 | | listtail(L) = sexp → S & listtail(pair(S, L)) 33 | | | ")" & return L. 34 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c". 35 | + (a b) 36 | = pair(b, pair(a, nil)) 37 | 38 | Nice. But it returns a term that's backwards. So we need to write a 39 | reverser. In Erlang, this would be 40 | 41 | reverse([H|T], A) -> reverse(T, [H|A]). 42 | reverse([], A) -> A. 43 | 44 | In Tamsin, it's: 45 | 46 | | main = sexp → S using $.tamsin & reverse(S, nil) → SR & return SR. 47 | | 48 | | sexp = symbol | list. 49 | | list = "(" & listtail(nil). 50 | | listtail(L) = sexp → S & listtail(pair(S, L)) 51 | | | ")" & return L. 52 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c". 53 | | 54 | | reverse(pair(H, T), A) = 55 | | reverse(T, pair(H, A)) → TR & 56 | | return TR. 57 | | reverse(nil, A) = 58 | | return A. 59 | + (a b) 60 | = pair(a, pair(b, nil)) 61 | 62 | But it's not deep. It only reverses the top-level list. 63 | 64 | | main = sexp → S using $.tamsin & reverse(S, nil) → SR & return SR. 65 | | 66 | | sexp = symbol | list. 67 | | list = "(" & listtail(nil). 68 | | listtail(L) = sexp → S & listtail(pair(S, L)) 69 | | | ")" & return L. 70 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c". 71 | | 72 | | reverse(pair(H, T), A) = 73 | | reverse(T, pair(H, A)) → TR & 74 | | return TR. 75 | | reverse(nil, A) = 76 | | return A. 77 | + (a (c b) b) 78 | = pair(a, pair(pair(b, pair(c, nil)), pair(b, nil))) 79 | 80 | So here's a deep reverser. 81 | 82 | | main = sexp → S using $.tamsin & reverse(S, nil) → SR & return SR. 83 | | 84 | | sexp = symbol | list. 85 | | list = "(" & listtail(nil). 86 | | listtail(L) = sexp → S & listtail(pair(S, L)) 87 | | | ")" & return L. 88 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c". 89 | | 90 | | reverse(pair(H, T), A) = 91 | | reverse(H, nil) → HR & 92 | | reverse(T, pair(HR, A)) → TR & 93 | | return TR. 94 | | reverse(nil, A) = 95 | | return A. 96 | | reverse(X, A) = 97 | | return X. 98 | + (a (c b) b) 99 | = pair(a, pair(pair(c, pair(b, nil)), pair(b, nil))) 100 | 101 | Finally, a little sexpr evaluator. 102 | 103 | | main = sexp → S using $.tamsin & reverse(S, nil) → SR & eval(SR). 104 | | 105 | | sexp = symbol | list. 106 | | list = "(" & listtail(nil). 107 | | listtail(L) = sexp → S & listtail(pair(S, L)) 108 | | | ")" & return L. 109 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c". 110 | | 111 | | head(pair(A, B)) = return A. 112 | | tail(pair(A, B)) = return B. 113 | | cons(A, B) = return pair(A, B). 114 | | 115 | | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R) → P & return P. 116 | | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R) → P & return P. 117 | | eval(pair(cons, pair(A, pair(B, nil)))) = 118 | | eval(A) → AE & eval(B) → BE & return pair(AE, BE). 119 | | eval(X) = return X. 120 | | 121 | | reverse(pair(H, T), A) = 122 | | reverse(H, nil) → HR & 123 | | reverse(T, pair(HR, A)) → TR & 124 | | return TR. 125 | | reverse(nil, A) = 126 | | return A. 127 | | reverse(X, A) = 128 | | return X. 129 | + (cons a b) 130 | = pair(a, b) 131 | 132 | | main = sexp → S using $.tamsin & reverse(S, nil) → SR & eval(SR). 133 | | 134 | | sexp = symbol | list. 135 | | list = "(" & listtail(nil). 136 | | listtail(L) = sexp → S & listtail(pair(S, L)) 137 | | | ")" & return L. 138 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c". 139 | | 140 | | head(pair(A, B)) = return A. 141 | | tail(pair(A, B)) = return B. 142 | | cons(A, B) = return pair(A, B). 143 | | 144 | | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R) → P & return P. 145 | | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R) → P & return P. 146 | | eval(pair(cons, pair(A, pair(B, nil)))) = 147 | | eval(A) → AE & eval(B) → BE & return pair(AE, BE). 148 | | eval(X) = return X. 149 | | 150 | | reverse(pair(H, T), A) = 151 | | reverse(H, nil) → HR & 152 | | reverse(T, pair(HR, A)) → TR & 153 | | return TR. 154 | | reverse(nil, A) = 155 | | return A. 156 | | reverse(X, A) = 157 | | return X. 158 | + (head (cons b a)) 159 | = b 160 | 161 | | main = sexp → S using $.tamsin & reverse(S, nil) → SR & eval(SR). 162 | | 163 | | sexp = symbol | list. 164 | | list = "(" & listtail(nil). 165 | | listtail(L) = sexp → S & listtail(pair(S, L)) 166 | | | ")" & return L. 167 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c". 168 | | 169 | | head(pair(A, B)) = return A. 170 | | tail(pair(A, B)) = return B. 171 | | cons(A, B) = return pair(A, B). 172 | | 173 | | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R) → P & return P. 174 | | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R) → P & return P. 175 | | eval(pair(cons, pair(A, pair(B, nil)))) = 176 | | eval(A) → AE & eval(B) → BE & return pair(AE, BE). 177 | | eval(X) = return X. 178 | | 179 | | reverse(pair(H, T), A) = 180 | | reverse(H, nil) → HR & 181 | | reverse(T, pair(HR, A)) → TR & 182 | | return TR. 183 | | reverse(nil, A) = 184 | | return A. 185 | | reverse(X, A) = 186 | | return X. 187 | + (tail (tail (cons b (cons b a)))) 188 | = a 189 | 190 | In this one, we make the evaluator print out some of the steps it takes. 191 | 192 | | main = sexp → S using $.tamsin & reverse(S, nil) → SR & eval(SR). 193 | | 194 | | sexp = symbol | list. 195 | | list = "(" & listtail(nil). 196 | | listtail(L) = sexp → S & listtail(pair(S, L)) 197 | | | ")" & return L. 198 | | symbol = "cons" | "head" | "tail" | "nil" | "a" | "b" | "c". 199 | | 200 | | head(pair(A, B)) = return A. 201 | | tail(pair(A, B)) = return B. 202 | | cons(A, B) = return pair(A, B). 203 | | 204 | | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R) → P & return P. 205 | | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R) → P & return P. 206 | | eval(pair(cons, pair(A, pair(B, nil)))) = 207 | | eval(A) → AE & eval(B) → BE & 208 | | $.print(y(AE, BE)) & cons(AE, BE) → C & return C. 209 | | eval(X) = return X. 210 | | 211 | | reverse(pair(H, T), A) = 212 | | reverse(H, nil) → HR & 213 | | reverse(T, pair(HR, A)) → TR & 214 | | return TR. 215 | | reverse(nil, A) = 216 | | return A. 217 | | reverse(X, A) = 218 | | return X. 219 | + (cons (tail (cons b a)) (head (cons b a))) 220 | = y(b, a) 221 | = y(b, a) 222 | = y(a, b) 223 | = pair(a, b) 224 | -------------------------------------------------------------------------------- /doc/Error_Reporting.markdown: -------------------------------------------------------------------------------- 1 | Error Reporting 2 | --------------- 3 | 4 | For now, only the Tamsin interpreter is expected to pass these tests. 5 | 6 | Also, these tests expose some details about how Falderal creates temp files. 7 | Boo! 8 | 9 | -> Tests for functionality "Intepret Tamsin program" 10 | 11 | When a scanning error occurs in a Tamsin source, the filename, line number, 12 | and column number are reported. 13 | 14 | | hello = "h". 15 | | % 16 | ? expected identifiable character but found '%' at line 2, column 5 in '/tmp/tmp 17 | 18 | When a parsing error occurs in a Tamsin source, the filename, line number, 19 | and column number are reported. 20 | 21 | | slough = "h" & ("o" | "p"). 22 | | maidenhead = "h" & ("o" | "p"). 23 | | reading = "h" ("o" | "p"). 24 | ? expected '.' but found '(' at line 3, column 16 in '/tmp/tmp 25 | 26 | | pasta = "h" & «hop() & "p". 27 | ? expected '>>' but found '&' at line 1, column 22 in '/tmp/tmp 28 | 29 | | pasta = "h" & «hop() 30 | ? expected '>>' but found EOF at line 1, column 22 in '/tmp/tmp 31 | 32 | When a scanning error occurs in the input to a Tamsin program, the filename, 33 | line number, and column number are reported. 34 | 35 | | main = "h" & "o" & "x". 36 | + hop 37 | ? expected 'x' but found 'p' at line 1, column 3 in '' 38 | 39 | | main = "h" & "o" & {"\n"} & "0" & "x". 40 | + ho 41 | + 42 | + 0p 43 | ? expected 'x' but found 'p' at line 3, column 2 in '' 44 | 45 | | main = "h" & "o" & "x". 46 | + ho 47 | ? expected 'x' but found EOF at line 1, column 3 in '' 48 | 49 | | main = "h" & "o" & $:eof. 50 | + hox 51 | ? expected EOF but found 'x' at line 1, column 3 in '' 52 | 53 | | main = "h" & "o" & $:any. 54 | + ho 55 | ? expected any token but found EOF at line 1, column 3 in '' 56 | 57 | | main = "h" & "o" & $:alnum. 58 | + ho& 59 | ? expected alphanumeric but found '&' at line 1, column 3 in '' 60 | 61 | | main = "h" & "o" & $:upper. 62 | + hod 63 | ? expected uppercase but found 'd' at line 1, column 3 in '' 64 | 65 | | main = "h" & "o" & $:startswith('f'). 66 | + hod 67 | ? expected 'f...' but found 'd' at line 1, column 3 in '' 68 | 69 | | main = "h" & "o" & (! "n"). 70 | + hon 71 | ? expected anything else but found 'n' at line 1, column 3 in '' 72 | -------------------------------------------------------------------------------- /doc/Micro-Tamsin.markdown: -------------------------------------------------------------------------------- 1 | Micro-Tamsin 2 | ============ 3 | 4 | This is just the "fundaments" part of the spec, and a few other bits, 5 | that the Micro-Tamsin interpreter (written in Tamsin!) can handle. 6 | 7 | -> Tests for functionality "Intepret Tamsin program" 8 | 9 | Fundaments 10 | ---------- 11 | 12 | A Tamsin program consists of one or more _productions_. A production consists 13 | of a name and a _parsing rule_ (or just "rule" for short). Among other things, 14 | a rule may be a _non-terminal_, which is the name of a production, or a 15 | _terminal_, which is a literal string in double quotes. (A full grammar for 16 | Tamsin can be found in Appendix A.) 17 | 18 | When run, a Tamsin program processes its input. It starts at the production 19 | named `main`, and evaluates its rule. A non-terminal in a rule "calls" the 20 | production of that name in the program. A terminal in a a rule expects a token 21 | identical to it to be on the input. If that expectation is met, it evaluates 22 | to that token. If not, it raises an error. The final result of evaluating a 23 | Tamsin program is sent to its output. 24 | 25 | (If it makes it easier to think about, consider "its input" to mean "stdin", 26 | and "token" to mean "character"; so the terminal `"x"` is a command that either 27 | reads the character `x` from stdin and returns it (whence it is printed to 28 | stdout by the main program), or errors out if it read something else. 29 | Or, thinking about it from the other angle, we have here the rudiments for 30 | defining a grammar for parsing a trivial language.) 31 | 32 | | main = blerf. 33 | | blerf = "p". 34 | + p 35 | = p 36 | 37 | | main = blerf. 38 | | blerf = "p". 39 | + k 40 | ? expected 'p' found 'k' 41 | 42 | Productions can be written that don't look at the input. A rule may also 43 | consist of the keyword `return`, followed a _term_; this expression simply 44 | evaluates to that term and returns it. (More on terms later; for now, 45 | think of them as strings.) 46 | 47 | So, the following program always outputs `blerp`, no matter what the input is. 48 | 49 | | main = return blerp. 50 | + fadda wadda badda kadda nadda sadda hey 51 | = blerp 52 | 53 | Note that in the following, `blerp` refers to the production named "blerp" 54 | in one place, and in the other place, it refers to the term `blerp`. Tamsin 55 | sees the difference because of the context; `return` must be followed by a 56 | term, while a parsing rule cannot be part of a term. 57 | 58 | | main = blerp. 59 | | blerp = return blerp. 60 | + foo 61 | + foo 62 | + foo 0 0 0 0 0 63 | = blerp 64 | 65 | A rule may also consist of the keyword `print` followed by a term, which, 66 | when evaluated, sends the term to the output, and evaluates to the term. 67 | (Mostly this is useful for debugging. In the following, `world` is 68 | repeated because it is both printed, and the result of the evaluation.) 69 | 70 | | main = print hello & print world. 71 | + ahoshoshohspohdphs 72 | = hello 73 | = world 74 | = world 75 | 76 | A rule may also consist of two subrules joined by the `&` operator. 77 | The `&` operator processes the left-hand side rule. If the LHS fails, then 78 | the `&` expression fails; otherwise, it continues and processes the 79 | right-hand side rule. If the RHS fails, the `&` expression fails; otherwise 80 | it evaluates to what the RHS evaluated to. 81 | 82 | | main = "a" & "p". 83 | + ap 84 | = p 85 | 86 | | main = "a" & "p". 87 | + ak 88 | ? expected 'p' found 'k' 89 | 90 | | main = "a" & "p". 91 | + ep 92 | ? expected 'a' found 'e' 93 | 94 | If you are too used to C or Javascript or the shell, you may use `&&` 95 | instead of `&`. 96 | 97 | | main = "a" && "p". 98 | + ap 99 | = p 100 | 101 | A rule may also consist of two subrules joined by the `|` operator. 102 | The `&` operator processes the left-hand side rule. If the LHS succeeds, 103 | then the `|` expression evaluates to what the LHS evaluted to, and the 104 | RHS is ignored. But if the LHS fails, it processes the RHS; if the RHS 105 | fails, the `|` expression fails, but otherwise it evaluates to what the 106 | RHS evaluated to. 107 | 108 | For example, this program accepts `0` or `1` but nothing else. 109 | 110 | | main = "0" | "1". 111 | + 0 112 | = 0 113 | 114 | | main = "0" | "1". 115 | + 1 116 | = 1 117 | 118 | | main = "0" | "1". 119 | + 2 120 | ? expected '1' found '2' 121 | 122 | If you are too used to C or Javascript or the shell, you may use `||` 123 | instead of `|`. 124 | 125 | | main = "0" || "1". 126 | + 1 127 | = 1 128 | 129 | Using `return` described above, this program accepts 0 or 1 and evaluates 130 | to the opposite. (Note here also that `&` has a higher precedence than `|`.) 131 | 132 | | main = "0" & return 1 | "1" & return 0. 133 | + 0 134 | = 1 135 | 136 | | main = "0" & return 1 | "1" & return 0. 137 | + 1 138 | = 0 139 | 140 | | main = "0" & return 1 | "1" & return 0. 141 | + 2 142 | ? expected '1' found '2' 143 | 144 | Evaluation order can be altered by using parentheses, as per usual. 145 | 146 | | main = "0" & ("0" | "1") & "1" & return ok. 147 | + 011 148 | = ok 149 | 150 | Note that if the LHS of `|` fails, the RHS is tried at the position of 151 | the stream that the LHS started on. This property is called "backtracking". 152 | 153 | | ohone = "0" & "1". 154 | | ohtwo = "0" & "2". 155 | | main = ohone | ohtwo. 156 | + 02 157 | = 2 158 | 159 | Note that `print` and `return` never fail. Thus, code like the following 160 | is "useless": 161 | 162 | | main = foo & print hi | return useless. 163 | | foo = return bar | print useless. 164 | = hi 165 | = hi 166 | 167 | Note that `return` does not exit the production immediately — although 168 | this behaviour may be re-considered... 169 | 170 | | main = return hello & print not_useless. 171 | = not_useless 172 | = not_useless 173 | 174 | Alternatives can select code to be executed, based on the input. 175 | 176 | | main = aorb & print aorb | cord & print cord & return ok. 177 | | aorb = "a" & print ay | "b" & print bee. 178 | | cord = "c" & print see | eorf & print eorf. 179 | | eorf = "e" & print ee | "f" & print eff. 180 | + e 181 | = ee 182 | = eorf 183 | = cord 184 | = ok 185 | 186 | And that's the basics. With these tools, you can write simple 187 | recursive-descent parsers. For example, to consume nested parentheses 188 | containing a zero: 189 | 190 | | main = parens & "." & return ok. 191 | | parens = "(" & parens & ")" | "0". 192 | + 0. 193 | = ok 194 | 195 | | main = parens & "." & return ok. 196 | | parens = "(" & parens & ")" | "0". 197 | + (((0))). 198 | = ok 199 | 200 | (the error message on this test case is a little weird; it's because of 201 | the backtracking. It tries to match `(((0)))` against the beginning of 202 | input, and fails, because the last `)` is not present. So it tries to 203 | match `0` at the beginning instead, and fails that too.) 204 | 205 | | main = parens & "." & return ok. 206 | | parens = "(" & parens & ")" | "0". 207 | + (((0)). 208 | ? expected '0' found '(' 209 | 210 | (the error message on this one is much more reasonable...) 211 | 212 | | main = parens & "." & return ok. 213 | | parens = "(" & parens & ")" | "0". 214 | + ((0))). 215 | ? expected '.' found ')' 216 | 217 | To consume a comma-seperated list of one or more bits: 218 | 219 | | main = bit & {"," & bit} & ".". 220 | | bit = "0" | "1". 221 | + 1. 222 | = . 223 | 224 | | main = bit & {"," & bit} & ".". 225 | | bit = "0" | "1". 226 | + 0,1,1,0,1,1,1,1,0,0,0,0,1. 227 | = . 228 | 229 | (again, backtracking makes the error a little odd) 230 | 231 | | main = bit & {"," & bit} & ".". 232 | | bit = "0" | "1". 233 | + 0,,1,0. 234 | ? expected '.' found ',' 235 | 236 | | main = bit & {"," & bit} & ".". 237 | | bit = "0" | "1". 238 | + 0,10,0. 239 | ? expected '.' found '0' 240 | 241 | Comments 242 | -------- 243 | 244 | A Tamsin comment is introduced with `#` and continues until the end of the 245 | line. 246 | 247 | | # welcome to my Tamsin program! 248 | | main = # comments may appear anywhere in the syntax 249 | | # and a comment may be followed by a comment 250 | | "z". 251 | + z 252 | = z 253 | -------------------------------------------------------------------------------- /doc/Notes.markdown: -------------------------------------------------------------------------------- 1 | These are now out of context, and kept here for historical purposes. 2 | 3 | ### an aside, written a while back ### 4 | 5 | OK! So... here is a problem: if you haven't noticed yet, 6 | 7 | * what a rule consumes, is a string. 8 | * what a rule evaluates to, is a term. 9 | * the symbol `(` means something different in a rule (where it expresses 10 | precendence) than in a term (where it signifies the list of subterms.) 11 | * the symbol `foo` means something different in a rule (where it denotes 12 | a production) than in a term (where it is an atom.) 13 | 14 | This is probably unacceptable. Which syntax do we want to change? 15 | 16 | PRODUCTION = set V = foo & return ⟨atom V production⟩. 17 | 18 | i.e. productions are distinguished from atoms and variables by being 19 | all-caps. Lists are distinguished from precedence by being ⟨ ⟩. 20 | 21 | production = set V = 'foo & return '(atom V production). 22 | 23 | i.e. `'` acts a bit like quote, or rather quasi-quote, as Variables get 24 | expanded. 25 | 26 | production = set V = :foo & return :smth(:atom Var :production). 27 | 28 | i.e. atoms are prefixed with `:`, like Ruby, and terms are constructors 29 | with a leading atom, like real terms and not like lists. 30 | 31 | production = set V = 「foo」 & return 「(atom Var anotheratom)」. 32 | 33 | A funky, Japanese-influenced version of quote. Nice, but really not suited 34 | for this, quite. Ditto ⟦these⟧. 35 | 36 | Ah, well, it may not be a real problem, unless we want to make `return` 37 | optional (which we do.) Maybe, onto weirder stuff first. 38 | 39 | ### stuff about implicit buffer ### 40 | 41 | Here's a "problem": the implicit buffer is a string, and we don't have 42 | strings in the data domain, we have terms. This "problem" is easily 43 | "solvable": we can stringify the term. This is a terrible "solution", 44 | but it lets us experiment further. 45 | 46 | This would be nicer if we had a syntax to put arbitrary text in an atom. 47 | Hey, how about 「this is an atom」? Hmmm... 48 | 49 | #### A prolix note on implementation #### 50 | 51 | Traditionally, scanners for recursive descent parsers pre-emptively scan 52 | the next token. This was done because originally, parsers (for languages 53 | like Pascal, say,) were distinctly one-pass beasts, reading the source code 54 | off of a stream from disk (or maybe even from a tape), and you might need 55 | to refer to the current token several times in the code and you don't want 56 | to have to read it more than once. 57 | 58 | This setup makes writing a parser with a "hot-swappable" scanner tricky, 59 | because when we switch scanner, we have to deal with this "cached" token 60 | somehow. We could rewind the scanner by the length of the token (plus 61 | the length of any preceding whitespace and comments), switch the scanner, 62 | then scan again (by the new rules.) But this is messy and error-prone. 63 | 64 | Luckily, not many of us are reading files off tape these days, and we have 65 | plenty of core, so it's no problem reading the whole file into memory. 66 | In fact, I've seen it argued that the best way to write a scanner nowadays 67 | is to `mmap()` the file. We don't do this in the implementation of Tamsin, 68 | but we do read the entire file into memory. 69 | 70 | This makes the cache-the-next-token method less useful, and so we don't 71 | do it. Instead, we look for the next token only when we need it, and we 72 | have a method `peek()` that returns what the next token would be, and we 73 | don't cache this value. 74 | 75 | There are a couple of other points about the scanner implementation. 76 | A scanner only ever has one buffer (the entire string it's scanning); this 77 | never changes over it's lifetime. It provides methods for saving and 78 | restoring its state, and it has a stack of "engines" which provide the 79 | actual scanning logic. In addition, there is only one interpreter object, 80 | and it only has one scanner object during its lifetime. 81 | 82 | ### Implementation Notes ### 83 | 84 | Maybe test-driven language design *not* "for the win" in all cases; it's 85 | excellent for evolving a design, but not so good for deep debugging. I had 86 | to actually write a dedicated test case which directly accessed the internals, 87 | to find the problem. 88 | 89 | This was only after refactoring the implementation two or three times. One 90 | of those times, I removed exceptions, so now the interpreter returns 91 | `(success, result)` tuples, where `success` is a boolean, and propagates 92 | parse errors itself. 93 | 94 | We "raise" a parse error only in the `LITERAL` AST node. 95 | 96 | We handle parse errors (backtrack) only in `OR` and `WHILE`, and in the 97 | ProductionScannerEngine logic (to provide that EOF if the scanning production 98 | failed. This can happen even in `peek()` at the end of a string, even after 99 | we've successfully parsed everything else.) 100 | 101 | ### aside #2 ### 102 | 103 | Well this is all very nice, very pretty I'm sure you'll agree, but it doesn't 104 | hang together too well. Figuration is easier than composition. The thing is 105 | that we still have these two domains, the domain of strings that we parse 106 | and the domain of terms that we match. We need to bring them closer together. 107 | This section is just ideas for that. 108 | 109 | One is that instead of, or alongside terms, we compose strings. 110 | 111 | First, we put arbitrary text in an atom, with `「this syntax」`. Then we allow 112 | terms to be concatenated with `•`. It looks real cool! But also, it's kind 113 | of annoying. So we also allow `'this ' + 'syntax.'`. 114 | 115 | ### ... ### 116 | 117 | Indeed we can. 118 | 119 | The next logical step would be to be able to say 120 | 121 | main = program using scanner. 122 | scanner = scan using ☆char. 123 | scan = {" "} & (...) 124 | program = "token" & ";" & "token" & ... 125 | 126 | But we're not there yet. 127 | 128 | Well, the best way to get there is to make that a test, see it fail, then 129 | improve the implementation so that it passes, Test-driven language design 130 | for the win! (But maybe not in all cases. See my notes below...) 131 | 132 | ### ... #2 ### 133 | 134 | Having thought more about it, I think the easiest way to reconcile terms 135 | and strings is to have terms be syntactic sugar for strings. This is 136 | already the case for ground terms, since `tree(a,b)` stringifies to the 137 | same string as `「tree(a,b)」`. It's when variables are involved where it 138 | differs. We would like some kind of quasi-quote such that even though 139 | `「tree(A,b)」` → `tree(A,n)`, `«tree(A,b)»` → `tree(tree(x,y),b)` or 140 | whatever. 141 | 142 | Although, I still don't know. The thing about terms is that they are 143 | super-useful for intermediate representations — abstract syntax trees 144 | and the like. I've been thinking about some kind of compromise. Which 145 | is, currently, what we sort of have. A Tamsin term doubles as a string, 146 | for better or worse. Mainly, we should sort out the properties of terms, 147 | then. Which we will do. But first, 148 | 149 | ### conceptual sugar ### 150 | 151 | Have 152 | 153 | reverse(tree(A,B)) = ... 154 | 155 | be *conceptually* sugar for 156 | 157 | reverse["tree" & "(" & term → A & "," & term → B & ")"] = ... 158 | 159 | but *actually* we still keep it in terms of terms, for efficiency. 160 | -------------------------------------------------------------------------------- /doc/Philosophy.markdown: -------------------------------------------------------------------------------- 1 | Philosophy of Tamsin 2 | ==================== 3 | 4 | I suppose that's a rather heavy-handed word to use, "philosophy". But 5 | this is the document giving the _whys_ of Tamsin rather than the technical 6 | points. 7 | 8 | Why did you write Tamin? 9 | ------------------------ 10 | 11 | Basically, every time I see someone use a compiler-compiler like `yacc` 12 | or a parser combinator library, part of me thinks, "Well why didn't 13 | you just write a recursive-descent parser? Recursive-descent parsers 14 | are easy to write and they make for extremely pretty code!" 15 | And what does a recursive-descent parser do? It consumes input. But 16 | don't *all* algorithms consume input? So why not have a language which 17 | makes it easy to write recursive-descent parsers, and force all programs 18 | to be written as recursive-descent parsers? Then *all* code will be pretty! 19 | (Yeah, sure, OK.) 20 | 21 | Why is it/is it not a... 22 | ------------------------ 23 | 24 | ### Meta-Language ### 25 | 26 | (Also known, in their more practical incarnations, as "compiler-compilers" 27 | or "parser generators".) 28 | 29 | Tamsin is one, because: 30 | 31 | * The basic operations all map directly to combinators in BNF (or rather, 32 | Wirth's EBNF): 33 | * `&` is sequencing 34 | * `|` is alternation 35 | * `[]` is sugar for alternation with the empty string 36 | * `{}` is asteration 37 | * `"foo"` is a terminal 38 | * `foo` is a non-terminal 39 | * Using only these operations produces a sensible program — one which 40 | parses its input by the grammar so given. 41 | 42 | Tamsin isn't one, because: 43 | 44 | * There is no requirement that any input be processed at all. 45 | 46 | ### Programming Language ### 47 | 48 | Tamsin is one, because: 49 | 50 | * Productions can have local variables. 51 | * Productions can call other productions (or themselves, recursively) with 52 | arguments, and they return a value: 53 | 54 | reverse(pair(H, T), A) = reverse(T, pair(H, A)). 55 | reverse(nil, A) = A. 56 | 57 | * It's Turing-complete. 58 | * It can be, and in fact has been, bootstrapped. 59 | 60 | Tamsin isn't one, because: 61 | 62 | * The syntax is really geared to consuming input rather than general 63 | programming. 64 | 65 | ### Rubbish Lister ### 66 | 67 | What does this even mean? Well, there is that 68 | [one famous rubbish lister](http://perl.org/) that we can use as an example 69 | for now, until I come up with a better definition here. 70 | 71 | Tamsin is one, because: 72 | 73 | * There's more than one way to say it. 74 | * The same symbol means different things in different contexts 75 | (for example, `foo` might be either the name of a production, or an 76 | atomic term.) 77 | * Implicit this, implicit that. 78 | * Optomized (a bit) for problem-solving throwaway one-liners rather than 79 | large, engineered systems. 80 | * Anyone up for a game of golf? 81 | 82 | Tamsin isn't one, because: 83 | 84 | * It's possible to express its syntax in a form that humans can understand. 85 | * In fact, it's possible to express its syntax in Tamsin. 86 | In fact, it's possible to bootstrap Tamsin — a Tamsin-to-C compiler has 87 | been written in Tamsin. This is very un-rubbish-lister-ish. 88 | 89 | Batteries Included 90 | ------------------ 91 | 92 | Are batteries included? Or rather, _what_ batteries are included? By strange 93 | coincidence, the batteries that are included are almost exactly the ones 94 | you'd expect to be useful in bootstrapping a Tamsin-to-C compiler: 95 | 96 | * `list` module — `reverse`, `append`, `member`, etc. 97 | * `tamsin_scanner` module 98 | * `tamsin_parser` module 99 | * `tamsin_analyzer` module 100 | -------------------------------------------------------------------------------- /doc/System_Module.markdown: -------------------------------------------------------------------------------- 1 | System Module 2 | ------------- 3 | 4 | -> Tests for functionality "Intepret Tamsin program" 5 | 6 | The module `$` contains a number of built-in productions which would not 7 | be possible or practical to implement in Tamsin. See Appendix C for a list. 8 | 9 | In fact, we have been using the `$` module already! But our usage of it 10 | has been hidden under some syntactic sugar. For example, `"k"` is actually... 11 | 12 | | main = $:expect(k). 13 | + k 14 | = k 15 | 16 | | main = $:expect(k). 17 | + l 18 | ? expected 'k' but found 'l' 19 | 20 | The section about aliases needs to be written too. 21 | 22 | Here's `$:alnum`, which only consumes tokens where the first character is 23 | alphanumeric. 24 | 25 | | main = "(" & {$:alnum → A} & ")" & A. 26 | + (abc123deefghi459876jklmnopqRSTUVXYZ0) 27 | = 0 28 | 29 | | main = "(" & {$:alnum → A} & ")" & A. 30 | + (abc123deefghi459876!jklmnopqRSTUVXYZ0) 31 | ? expected ')' but found '!' 32 | 33 | Here's `$:upper`, which only consumes tokens where the first character is 34 | uppercase alphabetic. 35 | 36 | | main = "(" & {$:upper → A} & ")" & A. 37 | + (ABCDEFGHIJKLMNOPQRSTUVWXYZ) 38 | = Z 39 | 40 | | main = "(" & {$:upper → A} & ")" & A. 41 | + (ABCDEFGHIJKLMNoPQRSTUVWXYZ) 42 | ? expected ')' but found 'o' 43 | 44 | Here's `$:startswith`, which only consumes tokens which start with 45 | the given term. (For a single-character scanner this isn't very 46 | impressive.) 47 | 48 | | main = "(" & {$:startswith('A') → A} & ")" & A. 49 | + (AAAA) 50 | = A 51 | 52 | | main = "(" & {$:startswith('A') → A} & ")" & A. 53 | + (AAAABAAA) 54 | ? expected ')' but found 'B' 55 | 56 | Here's `$:mkterm`, which takes an atom and a list and creates a constructor. 57 | 58 | | main = $:mkterm(atom, list(a, list(b, list(c, nil)))). 59 | = atom(a, b, c) 60 | 61 | Here's `$:unquote`, which takes three terms, X, L and R, where L and R 62 | must be atoms. If X begins with L and ends with R then the contents 63 | in-between will be returned as an atom. Otherwise fails. 64 | 65 | | main = $:unquote('"hello"', '"', '"'). 66 | = hello 67 | 68 | | main = $:unquote('(hello)', '(', ')'). 69 | = hello 70 | 71 | | main = $:unquote('(hello)', '(', '"'). 72 | ? term '(hello)' is not quoted with '(' and '"' 73 | 74 | | main = $:unquote('(hello)', '[', ')'). 75 | ? term '(hello)' is not quoted with '[' and ')' 76 | 77 | The quotes can be Unicode characters. 78 | 79 | | main = $:unquote('“hello”', '“', '”'). 80 | = hello 81 | 82 | The quotes can be multiple characters. 83 | 84 | | main = $:unquote('%-hello-%', '%-', '-%'). 85 | = hello 86 | 87 | The quotes can even be empty strings. 88 | 89 | | main = $:unquote('hello', '', ''). 90 | = hello 91 | 92 | Here's `$:equal`, which takes two terms, L and R. If L and R are equal, 93 | succeeds and returns that term which they both are. Otherwise fails. 94 | 95 | Two atoms are equal if their texts are identical. 96 | 97 | | main = $:equal('hi', 'hi'). 98 | = hi 99 | 100 | | main = $:equal('hi', 'lo'). 101 | ? term 'hi' does not equal 'lo' 102 | 103 | Two constructors are equal if their texts are identical, they have the 104 | same number of subterms, and all of their corresponding subterms are equal. 105 | 106 | | main = $:equal(hi(there), hi(there)). 107 | = hi(there) 108 | 109 | | main = $:equal(hi(there), lo(there)). 110 | ? term 'hi(there)' does not equal 'lo(there)' 111 | 112 | | main = $:equal(hi(there), hi(here)). 113 | ? term 'hi(there)' does not equal 'hi(here)' 114 | 115 | | main = $:equal(hi(there), hi(there, there)). 116 | ? term 'hi(there)' does not equal 'hi(there, there)' 117 | 118 | Here's `$:emit`, which takes an atom and outputs it. Unlike `print`, which 119 | is meant for debugging, `$:emit` does not append a newline, and is 8-bit-clean. 120 | 121 | | main = $:emit('`') & $:emit('wo') & ''. 122 | = `wo 123 | 124 | -> Tests for functionality "Intepret Tamsin program (pre- & post-processed)" 125 | 126 | `$:emit` is 8-bit-clean: if the atom contains unprintable characters, 127 | `$:emit` does not try to make them readable by UTF-8 or any other encoding. 128 | (`print` may or may not do this, depending on the implementation.) 129 | 130 | | main = $:emit('\x00\x01\x02\xfd\xfe\xff') & ''. 131 | = 000102fdfeff0a 132 | 133 | -> Tests for functionality "Intepret Tamsin program" 134 | 135 | Here's `$:repr`, which takes a term and results in an atom which is the 136 | result of reprifying that term (see section on Terms, above.) 137 | 138 | | main = $:repr(hello). 139 | = hello 140 | 141 | | main = $:repr('016fo_oZZ'). 142 | = 016fo_oZZ 143 | 144 | | main = $:repr('016fo$oZZ'). 145 | = '016fo$oZZ' 146 | 147 | | main = $:repr(''). 148 | = '' 149 | 150 | | main = $:repr(' '). 151 | = ' ' 152 | 153 | | main = $:repr('016\n016'). 154 | = '016\x0a016' 155 | 156 | | main = $:repr(hello(there, world)). 157 | = hello(there, world) 158 | 159 | | main = V ← '♡' & $:repr('□'(there, V)). 160 | = '\xe2\x96\xa1'(there, '\xe2\x99\xa1') 161 | 162 | | main = $:repr(a(b(c('qu\'are\\')))). 163 | = a(b(c('qu\'are\\'))) 164 | 165 | | main = $:repr('\x99'). 166 | = '\x99' 167 | 168 | Here's `$:reverse`, which takes a term E, and a term of the form 169 | `X(a, X(b, ... X(z, E)) ... )`, and returns a term of the form 170 | `X(z, X(y, ... X(a, E)) ... )`. The constructor tag X is often `cons` 171 | or `pair` or `list` and E is often `nil`. 172 | 173 | | main = $:reverse(list(a, list(b, list(c, nil))), nil). 174 | = list(c, list(b, list(a, nil))) 175 | 176 | E need not be an atom. 177 | 178 | | main = $:reverse(list(a, list(b, list(c, hello(world)))), hello(world)). 179 | = list(c, list(b, list(a, hello(world)))) 180 | 181 | If the tail of the list isn't E, an error occurs. 182 | 183 | | main = $:reverse(list(a, list(b, list(c, hello(world)))), nil). 184 | ? malformed list 185 | 186 | If some list constructor doesn't have two children, an error occurs. 187 | 188 | | main = $:reverse(list(a, list(b, list(nil))), nil). 189 | ? malformed list 190 | 191 | The constructor tag can be anything. 192 | 193 | | main = $:reverse(foo(a, foo(b, foo(c, nil))), nil). 194 | = foo(c, foo(b, foo(a, nil))) 195 | 196 | But if there is a different constructor somewhere in the list, well, 197 | 198 | | main = $:reverse(foo(a, fooz(b, foo(c, nil))), nil). 199 | ? malformed list 200 | 201 | You can reverse an empty list. 202 | 203 | | main = $:reverse(nil, nil). 204 | = nil 205 | 206 | But of course, 207 | 208 | | main = $:reverse(nil, zilch). 209 | ? malformed list 210 | 211 | This is a shallow reverse. Embedded lists are not reversed. 212 | 213 | | main = $:reverse(list(a, list(list(1, list(2, nil)), list(c, nil))), nil). 214 | = list(c, list(list(1, list(2, nil)), list(a, nil))) 215 | 216 | Here's `$:gensym`. 217 | 218 | | main = $:gensym('foo'). 219 | = foo1 220 | 221 | | main = $:gensym('foo') → F & $:gensym('foo') → G & $:equal(F, G). 222 | ? 'foo1' does not equal 'foo2' 223 | 224 | Here's `$:hexbyte`. 225 | 226 | | main = $:hexbyte('5', '0'). 227 | = P 228 | 229 | | main = $:hexbyte('f', 'f') → C & $:repr(C). 230 | = '\xff' 231 | 232 | Here's `$:format_octal`, which makes me feel ill. 233 | 234 | | main = $:format_octal('P'). 235 | = 120 236 | 237 | | main = $:format_octal('\xff'). 238 | = 377 239 | 240 | There are never any leading zeroes. 241 | 242 | | main = $:format_octal('\n'). 243 | = 12 244 | 245 | It works on the first byte of the string only. 246 | 247 | | main = $:format_octal('«'). 248 | = 302 249 | 250 | Here's `$:length`, which returns an atom representing the length, in bytes, 251 | of the given term (flattened.) Note that this is an atom, not an integer, 252 | because Tamsin doesn't even have integers. 253 | 254 | | main = $:length(abcde). 255 | = 5 256 | 257 | | main = $:length(''). 258 | = 0 259 | 260 | | main = $:length('♥'). 261 | = 3 262 | 263 | | main = $:length(a( b , c )). 264 | = 7 265 | -------------------------------------------------------------------------------- /doc/TODO.markdown: -------------------------------------------------------------------------------- 1 | TODO 2 | ==== 3 | 4 | ### C implementation ### 5 | 6 | * implement buffers in C in libtamsin 7 | * implement pattern match in send in C compiler 8 | 9 | ### higher-priority ### 10 | 11 | * allow switching the kind of buffer that is used when `@` is used: 12 | * `rule @ %stdin` is the default; it is implied when no `@` 13 | * `rule @ %mmap` to use an MmapBuffer 14 | * `rule @ %line` to use a LineEditorBuffer 15 | * `rule @ $:open('file.txt')` ? 16 | * `$:add`, `$:sub`, `$:mul`, `$:div`, `$:rem`, for atoms which look like 17 | integers: `["-"] & {$:digit}`. 18 | * `$:tell` and `$:seek` the implicit buffer — for VM's etc — although 19 | note, this may have scary consequences when combined with backtracking 20 | * `(foo → S | ok)` & print S ... should set S to error if foo failed? 21 | or `(foo |→ S ok)` ? This is necessary for the meta-circular 22 | interpreter: to implement `A | B` we want to interpret `A` and see 23 | if it failed or not. i.e. We want to be able to reify errors... 24 | 25 | ### medium-priority ### 26 | 27 | * Starting with knowns about `$` builtins, an analysis to determine, for Rule: 28 | - may consume input, never consumes input 29 | - may fail, always fails 30 | - may succeed, always succeeds... (may_backtrack?) 31 | * production values 32 | * `$:fold(^production, nil, cons)` 33 | * `$:fold(^($:alnum & " "), '', ^L+','+R)` 34 | * codegen and emitter phases in compiler. take current compiler phase, 35 | make it construct a low-level representation instead (codegen), then 36 | have a phase that writes out C code from that low-level repr (emitter) 37 | * non-backtracking versions of `|` and `{}`: `|!` and `{}!` 38 | 39 | ### testing ### 40 | 41 | * test for `''('')`, `'\x00'('\x00')` 42 | * document how prod scanners do EOF 43 | * tests that `'V'` is not a variable 44 | * tests for failing when utf8 scanner hits badly-encoded utf8 45 | * tests for invalid escape codes 46 | * test for mismatched # of formals in prod branches 47 | * document the modules. in own document. plus tests. 48 | 49 | ### lower-priority ### 50 | 51 | * `ctype` module, with `alpha` and `digit` and etc. 52 | * `list` module: `deep_reverse` 53 | * use Tamsin repr in error messages 54 | * __str__ should be Tamsin repr()? 55 | * regex-like shortcuts: `\w` for "word", `\s` for "whitespace", etc. 56 | * have compiler replace calls to `list` functions 57 | by "more efficient" versions written in C -- if they really are... 58 | * and maybe even garbage-collect terms in libtamsin 59 | * figure out why reading a 4M file in a compiled program TAKES DOWN UBUNTU 60 | * make it possible to recover from more errors using `|` (don't throw 61 | exceptions so often) 62 | * stronger tests for scanner, parser: dump all falderal testbodies to files 63 | * option for ref interp to not output result (or by default, don't) 64 | * "mini" interpreter that handles variables (ouch) 65 | * error handling: skip to next sentinel and report more errors 66 | * module-level updatable variables. or globals. or "process dictionary" 67 | `$:store()` and `$:fetch()`. or database. 68 | * figure out good way to do aliases with the Tamsin-parser-in-Tamsin 69 | (dynamic grammar is really more of a Zz thing...) 70 | * should be able to import ("open") other modules into your own namespace. 71 | * `@` a la Haskell in pattern-match: 72 | * `walk(T@tree(L,R)) = ...` 73 | * maps, implemented as hash tables. 74 | * `Table ← {} & fields → F@fields(H,T) & Table[H] ← T` 75 | * pretty-print AST for error messages 76 | 77 | ### symbol fun ### 78 | 79 | * `~` (Lua) for not and `!` (Prolog) for non-backtracking? 80 | * lowercase greek letters are variables too! 81 | * use `←` instead of `@`, why not? 82 | * I'm always typing `prod() → rule` instead of `=`, so why not? 83 | * `A;B` — like `&` except assert (statically) that `A` always succeeds 84 | * be generous and allow `"xyz"` in term context position? 85 | * denotational semantics sugar! something like... 86 | 87 | ⟦add α β⟧ = $:add(⟦α⟧, ⟦β⟧). 88 | 89 | and/or 90 | 91 | ⟦add α β⟧(σ) = $:add(⟦α⟧(σ), ⟦β⟧(σ)). 92 | ⟦var α⟧(σ) = fetch(σ, α). 93 | 94 | of course, DS is a bit fast-and-loose about actual parsing... 95 | but the syntax looks mighty fine. 96 | 97 | ### wild ideas ### 98 | 99 | * term-rewriting library; a la Treacle; should make desugarer almost trivial 100 | * algebraically cool version of `|`, perhaps as a worked example 101 | (implement Bakerloo in Tamsin) 102 | * EOF and nil are the same? it would make sense... call it `end`? (do we?) 103 | * productions with names with arbitrary characters in them. 104 | * something like «foo» but foo is the name of a *non*terminal — symbolic 105 | production references (like Perl's horrible globs as a cheap substitute 106 | for actual function references or lambdas.) 107 | * turn system library back into built-in keywords (esp. if : can be used) 108 | * Tamsin scanner: more liberal (every non-alphanum+_ symbol scans as itself, 109 | incl. ones that have no meaning currently like `*` and `?`) 110 | * auto-generate terms from productions, like Rooibos does 111 | * token classes... somehow. (then numeric is just a special token class?) 112 | a token class is just the "call stack" of productions at the time it 113 | was scanned 114 | * «» could be an alias w/right sym (`,,`, `„`) 115 | (still need to scan it specially though) 116 | * special form that consumes rest of input from the Tamsin source -- 117 | maybe not such a gimmick since micro-tamsin does this 118 | * feature-testing: `$.exists(module) | do_without_module` 119 | * ternary: `foo ? bar : baz` — if foo succeeded, do bar, else do baz. 120 | I don't think this is very necessary because you can usually just say 121 | `(foo & bar) | baz` — but only if `bar` always succeeds, which it 122 | usually does (to return something) 123 | -------------------------------------------------------------------------------- /doc/Tested_Examples.markdown: -------------------------------------------------------------------------------- 1 | Tests that used to be in Tamsin's README 2 | ======================================== 3 | 4 | -> Tests for functionality "Intepret Tamsin program" 5 | 6 | Hello, world! 7 | 8 | | main = 'Hello, world!'. 9 | = Hello, world! 10 | 11 | Make a story more exciting! 12 | 13 | | main = ("." & '!' | "?" & '?!' | any)/''. 14 | + Chapter 1 15 | + --------- 16 | + It was raining. She knocked on the door. She heard 17 | + footsteps inside. The door opened. The butler peered 18 | + out. "Hello," she said. "May I come in?" 19 | = Chapter 1 20 | = --------- 21 | = It was raining! She knocked on the door! She heard 22 | = footsteps inside! The door opened! The butler peered 23 | = out! "Hello," she said! "May I come in?!" 24 | 25 | Parse an algebraic expression for syntactic correctness. 26 | 27 | | main = (expr0 & eof & 'ok'). 28 | | expr0 = expr1 & {"+" & expr1}. 29 | | expr1 = term & {"*" & term}. 30 | | term = "x" | "y" | "z" | "(" & expr0 & ")". 31 | + x+y*(z+x+y) 32 | = ok 33 | 34 | Parse an algebraic expression to a syntax tree. 35 | 36 | | main = expr0. 37 | | expr0 = expr1 → E1 & {"+" & expr1 → E2 & E1 ← add(E1,E2)} & E1. 38 | | expr1 = term → E1 & {"*" & term → E2 & E1 ← mul(E1,E2)} & E1. 39 | | term = "x" | "y" | "z" | "(" & expr0 → E & ")" & E. 40 | + x+y*(z+x+y) 41 | = add(x, mul(y, add(add(z, x), y))) 42 | 43 | Translate an algebraic expression to RPN (Reverse Polish Notation). 44 | 45 | | main = expr0 → E & walk(E). 46 | | expr0 = expr1 → E1 & {"+" & expr1 → E2 & E1 ← add(E1,E2)} & E1. 47 | | expr1 = term → E1 & {"*" & term → E2 & E1 ← mul(E1,E2)} & E1. 48 | | term = "x" | "y" | "z" | "(" & expr0 → E & ")" & E. 49 | | walk(add(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' +'. 50 | | walk(mul(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' *'. 51 | | walk(X) = return ' '+X. 52 | + x+y*(z+x+y) 53 | = x y z x + y + * + 54 | 55 | Reverse a list. 56 | 57 | | main = reverse(pair(a, pair(b, pair(c, nil))), nil). 58 | | reverse(pair(H, T), A) = reverse(T, pair(H, A)). 59 | | reverse(nil, A) = A. 60 | = pair(c, pair(b, pair(a, nil))) 61 | 62 | Parse and evaluate a Boolean expression. 63 | 64 | | main = expr0 → E using scanner & eval(E). 65 | | expr0 = expr1 → E1 & ("or" & expr1)/E1/or. 66 | | expr1 = term → E1 & ("and" & term)/E1/and. 67 | | term = "true" | "false" | "(" & expr0 → E & ")" & E. 68 | | eval(and(A, B)) = eval(A) → EA & eval(B) → EB & and(EA, EB). 69 | | eval(or(A, B)) = eval(A) → EA & eval(B) → EB & or(EA, EB). 70 | | eval(X) = X. 71 | | and(true, true) = 'true'. 72 | | and(A, B) = 'false'. 73 | | or(false, false) = 'false'. 74 | | or(A, B) = 'true'. 75 | | scanner = scan using $:utf8. 76 | | scan = {" "} & ("(" | ")" | token). 77 | | token = "f" & "a" & "l" & "s" & "e" & 'false' 78 | | | "t" & "r" & "u" & "e" & 'true' 79 | | | "o" & "r" & 'or' 80 | | | "a" & "n" & "d" & 'and'. 81 | + (falseortrue)andtrue 82 | = true 83 | 84 | Parse a CSV file and write out the 2nd-last field of each record. Handles 85 | commas and double-quotes inside quotes. 86 | 87 | | main = line → L & L ← lines(nil, L) & 88 | | {"\n" & line → M & L ← lines(L, M)} & extract(L) & ''. 89 | | line = field → F & {"," & field → G & F ← fields(G, F)} & F. 90 | | field = strings | bare. 91 | | strings = string → T & {string → S & T ← T + '"' + S} & T. 92 | | string = "\"" & (!"\"" & any)/'' → T & "\"" & T. 93 | | bare = (!(","|"\n") & any)/''. 94 | | extract(lines(Ls, L)) = extract(Ls) & extract_field(L). 95 | | extract(L) = L. 96 | | extract_field(fields(L, fields(T, X))) = print T. 97 | | extract_field(X) = X. 98 | + Harold,1850,"21 Baxter Street",burgundy 99 | + Smythe,1833,"31 Little Street, St. James",mauve 100 | + Jones,1791,"41 ""The Gardens""",crimson 101 | = 21 Baxter Street 102 | = 31 Little Street, St. James 103 | = 41 "The Gardens" 104 | 105 | Evaluate a trivial S-expression-based language. 106 | 107 | | main = sexp → S using scanner & reverse(S, nil) → SR & eval(SR). 108 | | scanner = ({" "} & ("(" | ")" | $:alnum/'')) using $:utf8. 109 | | sexp = $:alnum | list. 110 | | list = "(" & sexp/nil/pair → L & ")" & L. 111 | | head(pair(A, B)) = A. 112 | | tail(pair(A, B)) = B. 113 | | cons(A, B) = return pair(A, B). 114 | | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R). 115 | | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R). 116 | | eval(pair(cons, pair(A, pair(B, nil)))) = 117 | | eval(A) → AE & eval(B) → BE & return pair(AE, BE). 118 | | eval(X) = X. 119 | | reverse(pair(H, T), A) = reverse(H, nil) → HR & reverse(T, pair(HR, A)). 120 | | reverse(nil, A) = A. 121 | | reverse(X, A) = X. 122 | + (head (tail (cons (cons a nil) (cons b nil)))) 123 | = b 124 | 125 | Escape characters in a string, for use in a C program source. 126 | 127 | | main = escaped('"♥\n«"'). 128 | | escaped(S) = escaped_r @ S. 129 | | escaped_r = A ← '' & 130 | | { 131 | | "\\" & A ← A + '\\\\' 132 | | | "\"" & A ← A + '\\"' 133 | | | "\n" & A ← A + '\\n' 134 | | | $:alnum → B & A ← A + B 135 | | | any → B & (many_format_octal @ B) → B & A ← A + B 136 | | } & A. 137 | | 138 | | many_format_octal = 139 | | S ← '' & 140 | | {any → B & $:format_octal(B) → B & S ← S + '\\' + B} using $:byte & 141 | | S. 142 | = \"\342\231\245\n\302\253\" 143 | -------------------------------------------------------------------------------- /eg/alg-expr1.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = (expr0 & eof & 'ok'). 5 | expr0 = expr1 & {"+" & expr1}. 6 | expr1 = term & {"*" & term}. 7 | term = "x" | "y" | "z" | "(" & expr0 & ")". 8 | -------------------------------------------------------------------------------- /eg/alg-expr2.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = expr0. 5 | expr0 = expr1 -> E1 & {"+" & expr1 -> E2 & E1 <- add(E1,E2)} & E1. 6 | expr1 = term -> E1 & {"*" & term -> E2 & E1 <- mul(E1,E2)} & E1. 7 | term = "x" | "y" | "z" | "(" & expr0 -> E & ")" & E. 8 | -------------------------------------------------------------------------------- /eg/alg-expr3.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = expr0 → E & walk(E). 5 | expr0 = expr1 → E1 & {"+" & expr1 → E2 & E1 ← add(E1,E2)} & E1. 6 | expr1 = term → E1 & {"*" & term → E2 & E1 ← mul(E1,E2)} & E1. 7 | term = "x" | "y" | "z" | "(" & expr0 → E & ")" & E. 8 | walk(add(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' +'. 9 | walk(mul(L,R)) = walk(L) → LS & walk(R) → RS & return LS+RS+' *'. 10 | walk(X) = return ' '+X. 11 | -------------------------------------------------------------------------------- /eg/backtrack.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = set E = original && 5 | (set E = changed && "0" && "1" | "0" && "2") & 6 | return E. 7 | -------------------------------------------------------------------------------- /eg/bitpair.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = bit → A & bit → B & return pair(A,B). 5 | bit = "0" | "1". 6 | -------------------------------------------------------------------------------- /eg/bits.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = bit & {"," & bit} & ".". 5 | bit = "0" | "1". 6 | -------------------------------------------------------------------------------- /eg/blerf.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = blerf(tree(tree(tree(a,b),c),d)). 5 | blerf(tree(L,R)) = blerf(L). 6 | blerf(Other) = return Other. 7 | -------------------------------------------------------------------------------- /eg/change-buffer.tamsin: -------------------------------------------------------------------------------- 1 | main = one @ 'I process this string until ! where I digress a bit' & ''. 2 | one = {"!" & {any → C & $:emit(C)} @ 'Here I digress' | any → C & $:emit(C)}. 3 | -------------------------------------------------------------------------------- /eg/csv_extract.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = line → L & L ← lines(nil, L) & 5 | {"\n" & line → M & L ← lines(L, M)} & extract(L) & ''. 6 | line = field → F & {"," & field → G & F ← fields(G, F)} & F. 7 | field = strings | bare. 8 | strings = string → T & {string → S & T ← T + '"' + S} & T. 9 | string = "\"" & T ← '' & {!"\"" & any → S & T ← T + S} & "\"" & T. 10 | bare = T ← '' & {!(","|"\n") & any → S & T ← T + S} & T. 11 | extract(lines(Lines, Line)) = extract(Lines) & extract_field(Line). 12 | extract(L) = L. 13 | extract_field(fields(Last, fields(This, X))) = print This. 14 | extract_field(X) = return X. -------------------------------------------------------------------------------- /eg/csv_parse.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = line & {"\n" & line} & 'ok'. 5 | line = field → F & {"," & field → G & F ← fields(G, F)} & F. 6 | field = strings | bare. 7 | strings = string → T & {string → S & T ← T + '"' + S} & T. 8 | string = "\"" & T ← '' & {!"\"" & any → S & T ← T + S} & "\"" & T. 9 | bare = T ← '' & {!(","|"\n") & any → S & T ← T + S} & T. 10 | -------------------------------------------------------------------------------- /eg/escape.tamsin: -------------------------------------------------------------------------------- 1 | main = print 2 | '\n' + 3 | '\xa0' + 4 | 'r'. 5 | -------------------------------------------------------------------------------- /eg/eval-bool-expr.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = expr0 → E using scanner & eval(E). 5 | expr0 = expr1 → E1 & {"or" & expr1 → E2 & E1 ← or(E1,E2)} & E1. 6 | expr1 = term → E1 & {"and" & term → E2 & E1 ← and(E1,E2)} & E1. 7 | term = "true" | "false" | "(" & expr0 → E & ")" & E. 8 | eval(and(A, B)) = eval(A) → EA & eval(B) → EB & and(EA, EB). 9 | eval(or(A, B)) = eval(A) → EA & eval(B) → EB & or(EA, EB). 10 | eval(X) = X. 11 | and(true, true) = 'true'. 12 | and(A, B) = 'false'. 13 | or(false, false) = 'false'. 14 | or(A, B) = 'true'. 15 | scanner = scan using $:char. 16 | scan = {" "} & ("(" | ")" | token). 17 | token = "f" & "a" & "l" & "s" & "e" & 'false' 18 | | "t" & "r" & "u" & "e" & 'true' 19 | | "o" & "r" & 'or' 20 | | "a" & "n" & "d" & 'and'. 21 | -------------------------------------------------------------------------------- /eg/exciting-long.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = S <- '' & {("." & '!' | "?" & '?!' | any) -> T & S <- S + T} & S. 5 | -------------------------------------------------------------------------------- /eg/exciting.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = ("." & '!' | "?" & '?!' | any)/''. 5 | -------------------------------------------------------------------------------- /eg/exciting.txt: -------------------------------------------------------------------------------- 1 | It was raining. She knocked on the door. She heard 2 | footsteps inside. The door opened. The butler peered 3 | out. "Hello," she said. "May I come in?" -------------------------------------------------------------------------------- /eg/expector.tamsin: -------------------------------------------------------------------------------- 1 | main = set T = 'foobar' & 2 | print T & 3 | expect_chars(T). 4 | 5 | # Given a single-character string, return call(prodref('$', 'expect'), S) 6 | # Given a string, return and(call(prodref('$', 'expect'), head(S)), 7 | # expect_chars(tail(S))). 8 | 9 | expect_chars(S) = print S & expect_chars_r @ S. 10 | expect_chars_r = any → C & 11 | set E = call(prodref('$', 'expect'), list(atom(C), nil)) & 12 | ((eof & return E) | (expect_chars_r → R & return and(E, R))). 13 | -------------------------------------------------------------------------------- /eg/foobar.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = print_each_char(fo+ob+ar). 5 | print_each_char(X) = print_each_char_r @ X. 6 | print_each_char_r = any → C & print C & print_each_char_r | return 'ok'. 7 | -------------------------------------------------------------------------------- /eg/hello-world.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = 'Hello, world!'. 5 | -------------------------------------------------------------------------------- /eg/list-of-chars.tamsin: -------------------------------------------------------------------------------- 1 | main = any/nil/list. 2 | -------------------------------------------------------------------------------- /eg/list-sugar2.tamsin: -------------------------------------------------------------------------------- 1 | main = expr([1,2|3]). 2 | expr([1,2|3]) = "f". 3 | 4 | -------------------------------------------------------------------------------- /eg/modules.tamsin: -------------------------------------------------------------------------------- 1 | # parses "(0)39". 2 | 3 | stuff { 4 | junk = "(" & :return & ")". 5 | return = "0". 6 | } 7 | main = stuff:junk & :return & :eof. 8 | return = "3". 9 | eof = "9". 10 | -------------------------------------------------------------------------------- /eg/names.csv: -------------------------------------------------------------------------------- 1 | Harold,1850,"21 Baxter Street",burgundy 2 | Smythe,1833,"31 Little Street, St. James",mauve 3 | Jones,1791,"41 ""The Gardens""",crimson 4 | -------------------------------------------------------------------------------- /eg/pipeline.tamsin: -------------------------------------------------------------------------------- 1 | # Demonstrate that Tamsin programs can handle being given a stream on input, 2 | # and producing a stream on output. (This was not true in versions 0.5 and 3 | # prior) 4 | 5 | main = {token -> A & whitespace & print A}. 6 | token = S <- '' & {$:alnum -> T & S <- S + T} & return S. 7 | whitespace = {" " | "\n"}. 8 | -------------------------------------------------------------------------------- /eg/prod-branches.tamsin: -------------------------------------------------------------------------------- 1 | main = e(1). 2 | e(2) = 'foo'. 3 | e(A) = A. 4 | 5 | -------------------------------------------------------------------------------- /eg/reverse.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = reverse(pair(a, pair(b, pair(c, nil))), nil). 5 | reverse(pair(H, T), A) = reverse(T, pair(H, A)). 6 | reverse(nil, A) = A. 7 | -------------------------------------------------------------------------------- /eg/sexpr-eval.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = sexp → S using scanner & reverse(S, nil) → SR & eval(SR). 5 | scanner = scan using $:utf8. 6 | scan = {" "} & ("(" | ")" | (T ← '' & {$:alnum → S & T ← T + S} & return T)). 7 | sexp = $:alnum | list. 8 | list = "(" & listtail(nil). 9 | listtail(L) = sexp → S & listtail(pair(S, L)) | ")" & L. 10 | head(pair(A, B)) = return A. 11 | tail(pair(A, B)) = return B. 12 | cons(A, B) = return pair(A, B). 13 | eval(pair(head, pair(X, nil))) = eval(X) → R & head(R). 14 | eval(pair(tail, pair(X, nil))) = eval(X) → R & tail(R). 15 | eval(pair(cons, pair(A, pair(B, nil)))) = 16 | eval(A) → AE & eval(B) → BE & return pair(AE, BE). 17 | eval(X) = X. 18 | reverse(pair(H, T), A) = reverse(H, nil) → HR & reverse(T, pair(HR, A)). 19 | reverse(nil, A) = A. 20 | reverse(X, A) = X. 21 | -------------------------------------------------------------------------------- /eg/store.tamsin: -------------------------------------------------------------------------------- 1 | fetch(K1, list(pair(K2, V), T)) = $:equal(K1, K2) & V | fetch(K1, T). 2 | fetch(K, nil) = fail K + ' not found'. 3 | 4 | store(K, V, A) = return list(pair(K, V), A). 5 | 6 | main = 7 | ST ← nil & 8 | store(x, 21, ST) → ST & 9 | store(y, 17, ST) → ST & 10 | store(z, 11, ST) → ST & 11 | fetch(y, ST). 12 | 13 | -------------------------------------------------------------------------------- /eg/zeroes-concat.tamsin: -------------------------------------------------------------------------------- 1 | main = zeroes. 2 | zeroes = ("0" & zeroes → E & return E + 'Z') | return ''. 3 | 4 | -------------------------------------------------------------------------------- /eg/zeroes.tamsin: -------------------------------------------------------------------------------- 1 | # This example Tamsin program was written by Chris Pressey, and is 2 | # hereby placed in the public domain. It comes with NO WARRANTY. 3 | 4 | main = zeroes. 5 | zeroes = ("0" & zeroes → E & return zero(E)) | return nil. 6 | -------------------------------------------------------------------------------- /fixture/bootstrapped.markdown: -------------------------------------------------------------------------------- 1 | 2 | -> Functionality "Intepret Tamsin program" is implemented by 3 | -> shell command 4 | -> "bin/bootstrapped-compiler <%(test-body-file) >tmp/foo.c && gcc -Ic_src -Lc_src tmp/foo.c -o tmp/foo -ltamsin && tmp/foo <%(test-input-file)" 5 | 6 | -> Functionality "Intepret Tamsin program (pre- & post-processed)" 7 | -> is implemented by 8 | -> shell command "bin/bootstrapped-compiler <%(test-body-file) >tmp/foo.c && gcc -Ic_src -Lc_src tmp/foo.c -o tmp/foo -ltamsin && cat %(test-input-file) | bin/inhex | tmp/foo | bin/hexout" 9 | 10 | -------------------------------------------------------------------------------- /fixture/compiler.py.markdown: -------------------------------------------------------------------------------- 1 | 2 | -> Functionality "Intepret Tamsin program" is implemented by 3 | -> shell command 4 | -> "./bin/tamsin loadngo %(test-body-file) < %(test-input-file)" 5 | 6 | -> Functionality "Intepret Tamsin program (pre- & post-processed)" 7 | -> is implemented by 8 | -> shell command "cat %(test-input-file) | bin/inhex | bin/tamsin loadngo %(test-body-file) | bin/hexout" 9 | -------------------------------------------------------------------------------- /fixture/compiler.tamsin.markdown: -------------------------------------------------------------------------------- 1 | 2 | -> Functionality "Intepret Tamsin program" is implemented by 3 | -> shell command 4 | -> "bin/tamsin-compiler <%(test-body-file) >tmp/foo.c && gcc -Ic_src -Lc_src tmp/foo.c -o tmp/foo -ltamsin && tmp/foo <%(test-input-file)" 5 | 6 | -> Functionality "Intepret Tamsin program (pre- & post-processed)" 7 | -> is implemented by 8 | -> shell command "bin/tamsin-compiler <%(test-body-file) >tmp/foo.c && gcc -Ic_src -Lc_src tmp/foo.c -o tmp/foo -ltamsin && cat %(test-input-file) | bin/inhex | tmp/foo | bin/hexout" 9 | 10 | -------------------------------------------------------------------------------- /fixture/micro-tamsin.markdown: -------------------------------------------------------------------------------- 1 | 2 | -> Functionality "Intepret Tamsin program" is implemented by 3 | -> shell command 4 | -> "cat %(test-body-file) > tmp/foz && echo -n '/' >> tmp/foz && cat %(test-input-file) >> tmp/foz && ./bin/micro-tamsin tmp/foz" 5 | 6 | -------------------------------------------------------------------------------- /fixture/mini-tamsin.markdown: -------------------------------------------------------------------------------- 1 | 2 | -> Functionality "Intepret Tamsin program" is implemented by 3 | -> shell command 4 | -> "cat %(test-body-file) > tmp/foz && echo -n '/' >> tmp/foz && cat %(test-input-file) >> tmp/foz && ./bin/mini-tamsin tmp/foz" 5 | 6 | -------------------------------------------------------------------------------- /fixture/tamsin.py.markdown: -------------------------------------------------------------------------------- 1 | 2 | -> Functionality "Intepret Tamsin program" is implemented by 3 | -> shell command "bin/tamsin %(test-body-file) < %(test-input-file)" 4 | 5 | -> Functionality "Intepret Tamsin program (pre- & post-processed)" 6 | -> is implemented by 7 | -> shell command "cat %(test-input-file) | bin/inhex | bin/tamsin %(test-body-file) | bin/hexout" 8 | -------------------------------------------------------------------------------- /lib/list.tamsin: -------------------------------------------------------------------------------- 1 | list { 2 | reverse(list(H, T), A) = reverse(T, list(H, A)). 3 | reverse(nil, A) = A. 4 | 5 | member(X, nil) = fail 'not a member'. 6 | member(X, list(H,T)) = 7 | $:equal(X, H) & H | member(X, T). 8 | 9 | add_elem(X, L) = 10 | member(X, L) & L | return list(X, L). 11 | 12 | union(nil, L2) = L2. 13 | union(list(H,T), L2) = 14 | add_elem(H, L2) → L2 & 15 | union(T, L2). 16 | 17 | append(nil, L) = L. 18 | append(list(H, T), L) = 19 | append(T, L) → T & return list(H, T). 20 | } 21 | -------------------------------------------------------------------------------- /lib/tamsin_analyzer.tamsin: -------------------------------------------------------------------------------- 1 | # Desugarer for Tamsin AST, written in Tamsin. 2 | # Distributed under a BSD-style license; see LICENSE. 3 | 4 | tamsin_analyzer { 5 | 6 | desugar_all(list(H,T)) = 7 | desugar(H) → DH & 8 | desugar_all(T) → DT & 9 | return list(DH, DT). 10 | desugar_all(nil) = 'nil'. 11 | 12 | desugar(program(L)) = desugar_all(L) → DL & return program(DL). 13 | desugar(module(N, L)) = 14 | desugar_all(L) → DL & 15 | merge_prod_branches(DL, nil) → DDL & 16 | return module(N, DDL). 17 | desugar(production(N, PBs)) = 18 | desugar_all(PBs) → DPBs & 19 | return production(N, DPBs). 20 | desugar(prodbranch(Fs, Ls, B)) = 21 | desugar_pattern_all(Fs, 0) → Pair & 22 | fst(Pair) → DFs & 23 | desugar(B) → DB & 24 | return prodbranch(DFs, Ls, DB). 25 | desugar(call(PR, Args)) = return call(PR, Args). 26 | desugar(or(L, R)) = desugar(L) → DL & desugar(R) → DR & return or(DL, DR). 27 | desugar(and(L, R)) = desugar(L) → DL & desugar(R) → DR & return and(DL, DR). 28 | desugar(not(X)) = desugar(X) → DX & return not(DX). 29 | desugar(while(X)) = desugar(X) → DX & return while(DX). 30 | desugar(concat(L, R)) = desugar(L) → DL & desugar(R) → DR & return concat(DL, DR). 31 | desugar(using(R, P)) = desugar(R) → DR & return using(DR, P). 32 | desugar(on(R, T)) = desugar(R) → DR & desugar(T) → DT & return on(DR, DT). 33 | desugar(send(R, V)) = desugar(R) → DR & return send(DR, V). 34 | desugar(set(V, T)) = desugar(T) → DT & return set(V, DT). 35 | desugar(atom(T)) = return atom(T). 36 | desugar(constructor(T, Ts)) = return constructor(T, Ts). 37 | desugar(variable(N)) = return variable(N). 38 | desugar(fold(R, I, C)) = 39 | desugar(R) → DR & 40 | SET ← set(variable('_1'), I) & 41 | SEND ← send(DR, variable('_2')) & 42 | CAT ← concat(variable('_1'), variable('_2')) & 43 | ACC ← set(variable('_1'), CAT) & 44 | ($:equal(C, nil) | 45 | get_tag(C) → Tag & 46 | ACC ← set(variable('_1'), 47 | constructor(Tag, list(variable('_2'), 48 | list(variable('_1'), nil))))) & 49 | RET ← call(prodref('$', 'return'), list(variable('_1'), nil)) & 50 | return and(and(SET, while(and(SEND, ACC))), RET). 51 | 52 | desugar_pattern_all(list(H,T), I) = 53 | desugar_pattern(H, I) → Pair & 54 | fst(Pair) → DH & 55 | snd(Pair) → I2 & 56 | desugar_pattern_all(T, I2) → Pair & 57 | fst(Pair) → DT & 58 | snd(Pair) → I3 & 59 | return pair(list(DH, DT), I3). 60 | desugar_pattern_all(nil, I) = return pair(nil, I). 61 | 62 | desugar_pattern(atom(T), I) = return pair(atom(T), I). 63 | desugar_pattern(constructor(T, Ts), I) = 64 | desugar_pattern_all(Ts, I) → Pair & 65 | fst(Pair) → DTs & 66 | snd(Pair) → I2 & 67 | return pair(constructor(T, DTs), I2). 68 | desugar_pattern(variable(N), I) = 69 | next(I) → I2 & 70 | return pair(patternvariable(N, I), I2). 71 | 72 | fst(pair(A,B)) = A. 73 | snd(pair(A,B)) = B. 74 | 75 | next(0) = '1'. 76 | next(1) = '2'. 77 | next(2) = '3'. 78 | next(3) = '4'. 79 | next(4) = '5'. 80 | next(5) = '6'. 81 | next(6) = '7'. 82 | next(7) = '8'. 83 | next(8) = '9'. 84 | next(9) = '10'. 85 | next(10) = '11'. 86 | next(11) = '12'. 87 | 88 | get_tag(atom(T)) = T. 89 | 90 | fetch(K, list(pair(K2, V), T)) = $:equal(K, K2) & V | fetch(K, T). 91 | fetch(K, nil) = 'nil'. 92 | 93 | delete(K, list(pair(K2, V), T)) = $:equal(K, K2) & delete(K, T) 94 | | delete(K, T) → R & return list(pair(K2, V), R). 95 | delete(K, nil) = 'nil'. 96 | 97 | store(K, V, A) = delete(K, A) → A2 & 98 | return list(pair(K, V), A2). 99 | 100 | merge_prod_branches(list(production(N, list(B, nil)),T),Map) = 101 | fetch(N, Map) → Blist & 102 | Blist ← list(B, Blist) & 103 | store(N, Blist, Map) → Map & 104 | merge_prod_branches(T, Map). 105 | merge_prod_branches(nil,Map) = 106 | unmap(Map, nil). 107 | 108 | unmap(list(pair(K, V), T), A) = 109 | list:reverse(V, nil) → RV & 110 | P ← production(K, RV) & 111 | A ← list(P, A) & 112 | unmap(T, A). 113 | unmap(nil, A) = A. 114 | 115 | ##### 116 | # CM = current module name 117 | 118 | analyze_all(CM, list(H,T)) = 119 | analyze(CM, H) → DH & 120 | analyze_all(CM, T) → DT & 121 | return list(DH, DT). 122 | analyze_all(CM, nil) = 'nil'. 123 | 124 | analyze(CM, program(L)) = 125 | analyze_all(CM, L) → DL & 126 | return program(DL). 127 | analyze(CM, module(N, L)) = 128 | analyze_all(N, L) → DL & 129 | return module(N, DL). 130 | analyze(CM, production(N, Bs)) = 131 | analyze_all(CM, Bs) → DBs & 132 | return production(N, DBs). 133 | analyze(CM, prodbranch(Fs, Ls, E)) = 134 | analyze(CM, E) → DE & 135 | locals(DE, nil) → Ls & 136 | list:reverse(Ls, nil) → Ls & 137 | return prodbranch(Fs, Ls, DE). 138 | analyze(CM, call(PR, As)) = 139 | analyze(CM, PR) → DPR & 140 | analyze_all(CM, As) → DAs & 141 | return call(DPR, DAs). 142 | analyze(CM, prodref(MN, PN)) = 143 | $:equal(MN, '') & return prodref(CM, PN) 144 | | return prodref(MN, PN). 145 | analyze(CM, or(L, R)) = 146 | analyze(CM, L) → DL & 147 | analyze(CM, R) → DR & 148 | return or(DL, DR). 149 | analyze(CM, and(L, R)) = 150 | analyze(CM, L) → DL & 151 | analyze(CM, R) → DR & 152 | return and(DL, DR). 153 | analyze(CM, not(X)) = 154 | analyze(CM, X) → DX & 155 | return not(DX). 156 | analyze(CM, while(X)) = 157 | analyze(CM, X) → DX & 158 | return while(DX). 159 | analyze(CM, concat(L, R)) = 160 | analyze(CM, L) → DL & 161 | analyze(CM, R) → DR & 162 | return concat(DL, DR). 163 | analyze(CM, using(R, PR)) = 164 | analyze(CM, R) → DR & 165 | analyze(CM, PR) → DPR & 166 | return using(DR, DPR). 167 | analyze(CM, on(R, T)) = 168 | analyze(CM, R) → DR & 169 | analyze(CM, T) → DT & 170 | return on(DR, DT). 171 | analyze(CM, send(R, V)) = 172 | analyze(CM, R) → DR & 173 | return send(DR, V). 174 | analyze(CM, set(V, T)) = 175 | analyze(CM, T) → DT & 176 | return set(V, DT). 177 | analyze(CM, atom(T)) = return atom(T). 178 | analyze(CM, constructor(T, Ts)) = return constructor(T, Ts). 179 | analyze(CM, variable(N)) = return variable(N). 180 | 181 | ##### 182 | # returns a list of locals 183 | 184 | locals(call(PR, As), Ls) = 185 | Ls. 186 | locals(or(L, R), Ls) = 187 | locals(L, Ls) → Ls & 188 | locals(R, Ls). 189 | locals(and(L, R), Ls) = 190 | locals(L, Ls) → Ls & 191 | locals(R, Ls). 192 | locals(not(X), Ls) = 193 | locals(X, Ls). 194 | locals(while(X), Ls) = 195 | locals(X, Ls). 196 | locals(concat(L, R), Ls) = 197 | locals(L, Ls) → Ls & 198 | locals(R, Ls). 199 | locals(using(R, P), Ls) = 200 | locals(R, Ls). 201 | locals(on(R, T), Ls) = 202 | locals(R, Ls) → Ls & 203 | locals(T, Ls). 204 | locals(send(R, V), Ls) = 205 | locals(V, Ls) → Ls & 206 | locals(R, Ls). 207 | locals(set(V, T), Ls) = 208 | locals(V, Ls) → Ls & 209 | locals(T, Ls). 210 | locals(atom(T), Ls) = Ls. 211 | locals(constructor(T, Ts), Ls) = 212 | locals_all(Ts, Ls). 213 | locals(variable(N), Ls) = 214 | list:add_elem(N, Ls). 215 | 216 | locals_all(nil, Ls) = Ls. 217 | locals_all(list(H,T), Ls) = 218 | locals(H, Ls) → Ls & 219 | locals_all(T, Ls). 220 | } 221 | -------------------------------------------------------------------------------- /lib/tamsin_parser.tamsin: -------------------------------------------------------------------------------- 1 | # Parse Tamsin source to Tamsin AST, written in Tamsin. 2 | # Distributed under a BSD-style license; see LICENSE. 3 | 4 | # REQUIRES lib/tamsin_scanner.tamsin 5 | # REQUIRES lib/list.tamsin 6 | 7 | # Note that this may contain support for some features which are not in 8 | # the current released or pre-released version. 9 | 10 | tamsin_parser { 11 | parse = grammar using tamsin_scanner:scanner. 12 | grammar = {"@" & pragma & "."} & 13 | LM ← nil & 14 | LP ← nil & 15 | { 16 | production → P & "." & LP ← list(P, LP) 17 | | module → M & LM ← list(M, LM) 18 | } & 19 | list:reverse(LP, nil) → LP & 20 | MM ← module(main, LP) & 21 | list:reverse(LM, nil) → LM & 22 | ($:equal(LP, nil) | LM ← list(module(main, LP), LM)) & 23 | return program(LM). 24 | module = word → N & 25 | LP ← nil & 26 | "{" & 27 | {production → P & "." & LP ← list(P, LP)} & 28 | "}" & 29 | list:reverse(LP, nil) → LP & 30 | return module(N, LP). 31 | production = word → N & 32 | F ← nil & 33 | [formals → F] & 34 | "=" & 35 | expr0 → E & 36 | return production(N, list(prodbranch(F, nil, E), nil)). 37 | formals = L ← nil & 38 | "(" & 39 | term → T & L ← list(T, L) & 40 | {"," & term → T & L ← list(T, L)} & 41 | ")" & 42 | list:reverse(L, nil) → L & 43 | return L 44 | | "[" & expr0 & "]". 45 | expr0 = expr1 → L & {("|" | "||") & expr1 → R & L ← or(L, R)} & L. 46 | expr1 = expr2 → L & {("&" | "&&") & expr2 → R & L ← and(L, R)} & L. 47 | expr2 = expr3 → L & ["using" & prodref → P & L ← using(L, P) 48 | | "@" & texpr → T & L ← on(L, T)] & L. 49 | expr3 = expr4 → L & [("→" | "->") & variable → V & L ← send(L, V)] & L. 50 | expr4 = expr5 → L & ("/" & texpr → T & 51 | ("/" & term → T2 & return fold(L, T, T2) 52 | | return fold(L, T, nil)) 53 | | return L). 54 | expr5 = "(" & expr0 → E & ")" & E 55 | | "[" & expr0 → E & "]" & 56 | return or(E, call(prodref('$', return), list(atom(nil), nil))) 57 | | "{" & expr0 → E & "}" & return while(E) 58 | | "!" & expr5 → E & return not(E) 59 | | "set" & variable → V & "=" & texpr → T & return set(V, T) 60 | | "return" & texpr → T & return call(prodref('$', return), list(T, nil)) 61 | | "fail" & texpr → T & return call(prodref('$', fail), list(T, nil)) 62 | | "print" & texpr → T & return call(prodref('$', print), list(T, nil)) 63 | | "any" & return call(prodref('$', any), nil) 64 | | "eof" & return call(prodref('$', 'eof'), nil) 65 | | terminal 66 | | variable → V & 67 | (("←" | "<-") & texpr → T & return set(V, T) 68 | | return call(prodref('$', return), list(V, nil))) 69 | | sq_string → T & 70 | $:unquote(T, '\'', '\'') → T & 71 | return call(prodref('$', return), list(atom(T), nil)) 72 | | pq_string → T & 73 | $:unquote(T, '“', '”') → T & 74 | expect_chars(T) → E & 75 | return and(E, call(prodref('$', return), list(atom(T), nil))) 76 | | prodref → P & 77 | L ← nil & 78 | ["(" & 79 | texpr → T & L ← list(T, L) & 80 | {"," & texpr → T & L ← list(T, L)} & 81 | ")"] & 82 | list:reverse(L, nil) → L & 83 | return call(P, L). 84 | 85 | texpr = term → T & {"+" & term → S & T ← concat(T, S)} & T. 86 | term = term0. 87 | term0 = variable 88 | | "[" & L ← atom(nil) & 89 | [term → T & L ← constructor(list, list(T, list(L, nil))) & 90 | {"," & term → T & L ← constructor(list, list(T, list(L, nil)))}] & 91 | Tail ← atom(nil) & 92 | ["|" & term → Tail] & 93 | "]" & 94 | reverse_c(L, Tail) → L & 95 | return L 96 | | atom → A & L ← nil & ["(" & 97 | term0 → T & L ← list(T, L) & 98 | {"," & term0 → T & L ← list(T, L)} & 99 | ")"] & 100 | list:reverse(L, nil) → L & 101 | ($:equal(L, nil) & return atom(A) 102 | | return constructor(A, L)). 103 | atom = word 104 | | sq_string → T & 105 | $:unquote(T, '\'', '\''). 106 | 107 | terminal = terminal0 → T & return call(prodref('$', expect), list(T, nil)). 108 | terminal0 = dq_string → T & $:unquote(T, '"', '"') → T & return atom(T) 109 | | ("«" | "<<") & texpr → T & ("»" | ">>") & return T. 110 | 111 | prodref = modref → M & ":" & word → P & return prodref(M, P) 112 | | ":" & word → P & return prodref('', P) 113 | | word → P & return prodref('', P). 114 | modref = "$" | word. 115 | pragma = "alias" & word & word & "=" & prodref 116 | | "unalias" & word. 117 | 118 | word = $:alnum. 119 | variable = $:upper → V & return variable(V). 120 | sq_string = $:startswith('\''). 121 | dq_string = $:startswith('"'). 122 | pq_string = $:startswith('“'). 123 | 124 | ## utility functions on the AST ## 125 | 126 | # Given the name of a module and a program AST, return the named 127 | # module AST found within that program, or fail. 128 | 129 | find_module(N, program(Ms)) = find_module(N, Ms). 130 | find_module(N1, list(module(N2, Ps), T)) = 131 | $:equal(N1, N2) & return module(N2, Ps) | find_module(N1, T). 132 | find_module(N, list(H, T)) = find_module(N, T). 133 | find_module(N, nil) = fail 'no ' + N + ' module'. 134 | 135 | # Given the name of a production and a module AST, return the named 136 | # production AST found within that module, or fail. 137 | 138 | find_production(N, module(MN, Ps)) = find_production(N, Ps). 139 | find_production(N1, list(production(N2, Bs), T)) = 140 | $:equal(N1, N2) & return production(N2, Bs) | find_production(N1, T). 141 | find_production(N, list(H, T)) = find_production(N, T). 142 | find_production(N, nil) = fail 'no ' + N + ' production'. 143 | 144 | # Given the name of a module and the name of a production, 145 | # return the production AST for module:production in the program, or fail. 146 | 147 | find_production_global(MN, PN, P) = 148 | find_module(MN, P) → M & find_production(PN, M). 149 | 150 | reverse_c(constructor(list, list(Fst, list(Snd, nil))), Acc) = 151 | Acc ← constructor(list, list(Fst, list(Acc, nil))) & 152 | reverse_c(Snd, Acc). 153 | reverse_c(Other, Acc) = Acc. 154 | 155 | # Given a single-character string, return call(prodref('$', 'expect'), S) 156 | # Given a string, return and(call(prodref('$', 'expect'), head(S)), 157 | # expect_chars(tail(S))). 158 | 159 | expect_chars(S) = (expect_chars_r using $:utf8) @ S. 160 | expect_chars_r = any → C & 161 | E ← call(prodref('$', 'expect'), list(atom(C), nil)) & 162 | ((eof & return E) | (expect_chars_r → R & return and(E, R))). 163 | } 164 | -------------------------------------------------------------------------------- /lib/tamsin_scanner.tamsin: -------------------------------------------------------------------------------- 1 | # Scanner for Tamsin tokens, written in Tamsin. 2 | # Distributed under a BSD-style license; see LICENSE. 3 | 4 | tamsin_scanner { 5 | scanner = scan using $:utf8. 6 | scan = skippable & 7 | (symbol | str('\'', '\'') | str('"', '"') | str('“', '”') | word). 8 | symbol = "&" & "&" & '&&' 9 | | "|" & "|" & '||' 10 | | "-" & ">" & '->' 11 | | "<" & "-" & '<-' 12 | | "<" & "<" & '<<' 13 | | ">" & ">" & '>>' 14 | | "=" | "(" | ")" | "[" | "]" | "{" | "}" | "!" | "|" | "&" | ":" 15 | | "/" | "," | "." | "@" | "+" | "$" | "→" | "←" | "«" | "»". 16 | str(O, C) = «O» → T & {("\\" & escape | !«C» & any) → S & T ← T + S} & «C» & 17 | return T + C. 18 | escape = "n" & '\n' 19 | | "r" & '\r' 20 | | "t" & '\t' 21 | | "x" & hexdigit → H & hexdigit → L & $:hexbyte(H, L) 22 | | "\\" & '\\' 23 | | "'" & '\'' 24 | | "\"" & '"'. 25 | hexdigit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | 26 | "a" | "b" | "c" | "d" | "e" | "f". 27 | word = $:alnum → T & { ($:alnum | "_") → S & T ← T + S } & T. 28 | skippable = {whitespace | comment}. 29 | whitespace = " " | "\t" | "\r" | "\n". 30 | comment = "#" & {!"\n" & any} & ("\n" | eof). 31 | } -------------------------------------------------------------------------------- /mains/analyzer.tamsin: -------------------------------------------------------------------------------- 1 | main = tamsin_parser:parse → AST & tamsin_scanner:skippable & eof & 2 | tamsin_analyzer:desugar(AST) → AST & 3 | tamsin_analyzer:analyze(nil, AST) → AST & 4 | $:repr(AST). 5 | -------------------------------------------------------------------------------- /mains/desugarer.tamsin: -------------------------------------------------------------------------------- 1 | main = tamsin_parser:parse → AST & tamsin_scanner:skippable & eof & 2 | tamsin_analyzer:desugar(AST) → AST & 3 | $:repr(AST). 4 | -------------------------------------------------------------------------------- /mains/grammar.tamsin: -------------------------------------------------------------------------------- 1 | # Syntax-correctness parser for Tamsin, written in Tamsin. 2 | # Distributed under a BSD-style license; see LICENSE. 3 | 4 | # REQUIRES lib/tamsin_scanner.tamsin 5 | 6 | # Note that this does not produce any data as a result beyond "yes, it 7 | # parsed" or "no, there was a syntax error". This exists to provide a 8 | # clean, readable grammar. For actual use, see lib/tamsin_parser.tamsin, 9 | # which parses a Tamsin program to an AST. 10 | 11 | # If there is any discrepancy between the language this grammar accepts, 12 | # and the language lib/tamsin_parser.tamsin accepts, lib/tamsin_parser.tamsin 13 | # takes precedence. 14 | 15 | # Note that this may contain support for some features which are not in 16 | # the current released or pre-released version. 17 | 18 | main = grammar using tamsin_scanner:scanner. 19 | 20 | grammar = {"@" & pragma & "."} & 21 | {module | production & "."} & eof & 'ok'. 22 | module = word & "{" & {production & "."} & "}". 23 | production = word & ["(" & term & {"," & term} & ")" 24 | | "[" & expr0 & "]"] & "=" & expr0. 25 | expr0 = expr1 & {("|" | "||") & expr1}. 26 | expr1 = expr2 & {("&" | "&&") & expr2}. 27 | expr2 = expr3 & ["using" & prodref | "@" & texpr]. 28 | expr3 = expr4 & [("→" | "->") & variable]. 29 | expr4 = expr5 & ["/" & texpr & ["/" & term]]. 30 | expr5 = "(" & expr0 & ")" 31 | | "[" & expr0 & "]" 32 | | "{" & expr0 & "}" 33 | | "!" & expr5 34 | | "set" & variable & "=" & texpr 35 | | "return" & texpr 36 | | "fail" & texpr 37 | | "print" & texpr 38 | | terminal 39 | | variable & [("←" | "<-") & texpr] 40 | | sq_string 41 | | prodref & ["(" & texpr & {"," & texpr} & ")"]. 42 | texpr = term & {"+" & term}. 43 | term = atom & ["(" & [term & {"," & term}] & ")"] 44 | | "[" & [term & {"," & term}] & ["|" & term] & "]" 45 | | variable. 46 | atom = word | sq_string. 47 | terminal = dq_string 48 | | ("«" | "<<") & texpr & ("»" | ">>"). 49 | prodref = modref & ":" & word 50 | | ":" & word 51 | | word. 52 | modref = "$" | word. 53 | pragma = "alias" & word & word & "=" & prodref 54 | | "unalias" & word. 55 | 56 | word = $:alnum. 57 | variable = $:upper. 58 | sq_string = $:startswith('\''). 59 | dq_string = $:startswith('"'). 60 | -------------------------------------------------------------------------------- /mains/micro-tamsin.tamsin: -------------------------------------------------------------------------------- 1 | # Interpreter for "Micro-Tamsin", written in Tamsin. 2 | # (see doc/Micro-Tamsin.markdown.) 3 | # Distributed under a BSD-style license; see LICENSE. 4 | 5 | # REQUIRES lib/tamsin_scanner.tamsin 6 | # REQUIRES lib/tamsin_parser.tamsin 7 | 8 | main = tamsin_parser:parse → AST & tamsin_scanner:skippable & "/" & 9 | interpret(AST, AST). 10 | 11 | interpret(P, program(L)) = 12 | tamsin_parser:find_production_global('main', 'main', P) → Main & 13 | interpret(P, Main). 14 | 15 | interpret(P, production(N, list(prodbranch(Fs, Ls, E), nil))) = interpret(P, E). 16 | 17 | interpret(P, call(prodref('$', 'return'), list(atom(X), nil))) = return X. 18 | interpret(P, call(prodref('$', 'expect'), list(atom(X), nil))) = «X». 19 | interpret(P, call(prodref('$', 'print'), list(atom(X), nil))) = print X. 20 | interpret(P, call(prodref('', N), A)) = 21 | interpret(P, call(prodref('main', N), A)). 22 | interpret(P, call(prodref(M, N), A)) = 23 | tamsin_parser:find_production_global(M, N, P) → Prod & 24 | interpret(P, Prod). 25 | 26 | interpret(P, or(L, R)) = interpret(P, L) | interpret(P, R). 27 | interpret(P, and(L, R)) = interpret(P, L) & interpret(P, R). 28 | interpret(P, not(X)) = !interpret(P, X). 29 | interpret(P, while(X)) = {interpret(P, X)}. 30 | -------------------------------------------------------------------------------- /mains/mini-tamsin.tamsin: -------------------------------------------------------------------------------- 1 | # Interpreter for "Mini-Tamsin", written in Tamsin. 2 | # (see doc/Mini-Tamsin.markdown.) 3 | # Distributed under a BSD-style license; see LICENSE. 4 | 5 | # REQUIRES lib/tamsin_scanner.tamsin 6 | # REQUIRES lib/tamsin_parser.tamsin 7 | 8 | main = tamsin_parser:parse → AST & tamsin_scanner:skippable & "/" & 9 | new_state → S & 10 | interpret(AST, S, AST). 11 | 12 | # 13 | # FIXME there are several rather major shortcomings with this, still! 14 | # 15 | 16 | new_state = return state(). 17 | 18 | # 19 | # interpret(EntireProgram, State, CurrentProgramPart) 20 | # returns a pair(Result, NewState) 21 | # 22 | interpret(P, S, program(L)) = 23 | tamsin_parser:find_production_global('main', 'main', P) → Main & 24 | new_state → S & 25 | interpret(P, S, Main). 26 | 27 | interpret(P, S, production(N, list(prodbranch(Fs, Ls, E), nil))) = 28 | interpret(P, S, E). 29 | 30 | interpret(P, S, call(prodref('$', 'return'), list(atom(X), nil))) = 31 | return pair(X, S). 32 | 33 | interpret(P, S, call(prodref('$', 'expect'), list(atom(X), nil))) = 34 | «X» → R & # FIXME this isn't going to work if «X» fails, is it. 35 | return pair(R, S). 36 | 37 | interpret(P, S, call(prodref('$', 'print'), list(atom(X), nil))) = 38 | print X & 39 | return pair(X, S). 40 | 41 | interpret(P, S, call(prodref('', N), A)) = 42 | interpret(P, S, call(prodref('main', N), A)). 43 | 44 | interpret(P, S, call(prodref(M, N), A)) = 45 | tamsin_parser:find_production_global(M, N, P) → Prod & 46 | new_state → S2 & 47 | interpret(P, S2, Prod). 48 | 49 | interpret(P, S, or(L, R)) = 50 | interpret(P, S, L) → pair(Res, S2) & 51 | (Res & return pair(Res, S2)) | interpret(P, S, R). 52 | # FIXME what happens to S? I think this is right though 53 | 54 | interpret(P, S, and(L, R)) = 55 | interpret(P, S, L) → pair(Res, S2) & 56 | interpret(P, S2, R). 57 | 58 | # interpret(P, S, not(X)) = !interpret(P, S, X). 59 | 60 | interpret(P, S, while(X)) = 61 | {interpret(P, S, X) → pair(Res, S2) & set S = S2}. 62 | -------------------------------------------------------------------------------- /mains/parser.tamsin: -------------------------------------------------------------------------------- 1 | # REQUIRES lib/tamsin_scanner.tamsin 2 | # REQUIRES lib/tamsin_parser.tamsin 3 | 4 | main = tamsin_parser:parse → P & tamsin_scanner:skippable & eof & $:repr(P). 5 | -------------------------------------------------------------------------------- /mains/scanner.tamsin: -------------------------------------------------------------------------------- 1 | # REQUIRES lib/tamsin_scanner.tamsin 2 | 3 | main = {tamsin_scanner:scan -> T & $:repr(T) -> T & print T} & ''. 4 | -------------------------------------------------------------------------------- /src/tamsin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/catseye/Tamsin/cfc9a7270773658a1cddb017aaaf4856939c328c/src/tamsin/__init__.py -------------------------------------------------------------------------------- /src/tamsin/analyzer.py: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 4 | # Distributed under a BSD-style license; see LICENSE for more information. 5 | 6 | from tamsin.ast import ( 7 | Program, Module, Production, ProdBranch, 8 | And, Or, Not, While, Call, Send, Set, 9 | Using, On, Concat, Prodref, 10 | TermNode, VariableNode, PatternVariableNode, AtomNode, ConstructorNode 11 | ) 12 | from tamsin.term import Term 13 | from tamsin.event import EventProducer 14 | 15 | 16 | class Analyzer(EventProducer): 17 | """The Analyzer takes a desugared AST, walks it, and returns a new AST. 18 | It is responsible for: 19 | 20 | * Finding the set of local variable names used in each production and 21 | sticking that in the locals_ field of the new Production node. 22 | * Resolving any '' modules in Prodrefs to the name of the current 23 | module. 24 | 25 | * Looking for undefined nonterminals and raising an error if such found. 26 | (this is done at the end by analyze_prodrefs) 27 | 28 | TODO: it should also find any locals that are accessed before being set 29 | TODO: it should also look for a mismatch in # of formals 30 | """ 31 | def __init__(self, program, listeners=None): 32 | self.listeners = listeners 33 | self.program = program 34 | self.current_module = None 35 | 36 | def analyze(self, ast): 37 | if isinstance(ast, Program): 38 | modlist = [] 39 | for mod in ast.modlist: 40 | mod = self.analyze(mod) 41 | modlist.append(mod) 42 | self.program = Program(modlist) 43 | self.analyze_prodrefs(self.program) 44 | return self.program 45 | elif isinstance(ast, Module): 46 | self.current_module = ast 47 | prodlist = [] 48 | for prod in ast.prodlist: 49 | prodlist.append(self.analyze(prod)) 50 | self.current_module = None 51 | return Module(ast.name, prodlist) 52 | elif isinstance(ast, Production): 53 | branches = [] 54 | for b in ast.branches: 55 | branches.append(self.analyze(b)) 56 | return Production(ast.name, branches) 57 | elif isinstance(ast, ProdBranch): 58 | locals_ = [] 59 | body = self.analyze(ast.body) 60 | self.collect_locals(body, locals_) 61 | return ProdBranch(ast.formals, locals_, body) 62 | elif isinstance(ast, Or): 63 | return Or(self.analyze(ast.lhs), self.analyze(ast.rhs)) 64 | elif isinstance(ast, And): 65 | return And(self.analyze(ast.lhs), self.analyze(ast.rhs)) 66 | elif isinstance(ast, Using): 67 | return Using(self.analyze(ast.rule), self.analyze(ast.prodref)) 68 | elif isinstance(ast, On): 69 | return On(self.analyze(ast.rule), self.analyze(ast.texpr)) 70 | elif isinstance(ast, Call): 71 | return Call(self.analyze(ast.prodref), ast.args) 72 | elif isinstance(ast, Send): 73 | assert isinstance(ast.pattern, TermNode), ast 74 | return Send(self.analyze(ast.rule), self.analyze(ast.pattern)) 75 | elif isinstance(ast, Set): 76 | assert isinstance(ast.variable, VariableNode), ast 77 | return Set(ast.variable, self.analyze(ast.texpr)) 78 | elif isinstance(ast, Not): 79 | return Not(self.analyze(ast.rule)) 80 | elif isinstance(ast, While): 81 | return While(self.analyze(ast.rule)) 82 | elif isinstance(ast, Concat): 83 | return Concat(self.analyze(ast.lhs), self.analyze(ast.rhs)) 84 | elif isinstance(ast, TermNode): 85 | return ast 86 | elif isinstance(ast, Prodref): 87 | module = ast.module 88 | if module == '': 89 | module = self.current_module.name 90 | new = Prodref(module, ast.name) 91 | return new 92 | else: 93 | raise NotImplementedError(repr(ast)) 94 | 95 | def collect_locals(self, ast, locals_): 96 | """locals_ should be a list.""" 97 | 98 | if isinstance(ast, ProdBranch): 99 | self.collect_locals(ast.body, locals_) 100 | elif (isinstance(ast, And) or isinstance(ast, Or) or 101 | isinstance(ast, Concat)): 102 | self.collect_locals(ast.lhs, locals_) 103 | self.collect_locals(ast.rhs, locals_) 104 | elif isinstance(ast, Using): 105 | self.collect_locals(ast.rule, locals_) 106 | elif isinstance(ast, On): 107 | self.collect_locals(ast.rule, locals_) 108 | self.collect_locals(ast.texpr, locals_) 109 | elif isinstance(ast, Call): 110 | pass 111 | elif isinstance(ast, Send): 112 | self.collect_locals(ast.pattern, locals_) 113 | self.collect_locals(ast.rule, locals_) 114 | elif isinstance(ast, Set): 115 | self.collect_locals(ast.variable, locals_) 116 | self.collect_locals(ast.texpr, locals_) 117 | elif isinstance(ast, Not) or isinstance(ast, While): 118 | self.collect_locals(ast.rule, locals_) 119 | elif isinstance(ast, VariableNode): 120 | if ast.name not in locals_: 121 | locals_.append(ast.name) 122 | elif isinstance(ast, PatternVariableNode): 123 | # will probably be needed for Send(). but, not sure. FIXME 124 | if ast.name not in locals_: 125 | locals_.append(ast.name) 126 | elif isinstance(ast, ConstructorNode): 127 | for sub in ast.contents: 128 | self.collect_locals(sub, locals_) 129 | elif isinstance(ast, AtomNode): 130 | pass 131 | else: 132 | raise NotImplementedError(repr(ast)) 133 | 134 | def analyze_prodrefs(self, ast): 135 | """does not return anything""" 136 | if isinstance(ast, Program): 137 | for mod in ast.modlist: 138 | self.analyze_prodrefs(mod) 139 | elif isinstance(ast, Module): 140 | for prod in ast.prodlist: 141 | self.analyze_prodrefs(prod) 142 | elif isinstance(ast, Production): 143 | for b in ast.branches: 144 | self.analyze_prodrefs(b) 145 | elif isinstance(ast, ProdBranch): 146 | self.analyze_prodrefs(ast.body) 147 | elif isinstance(ast, Or) or isinstance(ast, And): 148 | self.analyze_prodrefs(ast.lhs) 149 | self.analyze_prodrefs(ast.rhs) 150 | elif isinstance(ast, Using): 151 | self.analyze_prodrefs(ast.rule) 152 | self.analyze_prodrefs(ast.prodref) 153 | elif isinstance(ast, On): 154 | self.analyze_prodrefs(ast.rule) 155 | elif isinstance(ast, Call): 156 | self.analyze_prodrefs(ast.prodref) 157 | elif isinstance(ast, Send): 158 | self.analyze_prodrefs(ast.rule) 159 | elif isinstance(ast, Set): 160 | pass 161 | elif isinstance(ast, Not): 162 | self.analyze_prodrefs(ast.rule) 163 | elif isinstance(ast, While): 164 | self.analyze_prodrefs(ast.rule) 165 | elif isinstance(ast, Concat): 166 | pass 167 | elif isinstance(ast, Term): 168 | pass 169 | elif isinstance(ast, Prodref): 170 | assert ast.module != '', repr(ast) 171 | if ast.module == '$': 172 | return # TODO: also check builtins? 173 | module = self.program.find_module(ast.module) 174 | if not module: 175 | raise KeyError("no '%s' module defined" % ast.module) 176 | production = module.find_production(ast.name) 177 | if not production: 178 | raise KeyError("no '%s:%s' production defined" % 179 | (ast.module, ast.name) 180 | ) 181 | else: 182 | raise NotImplementedError(repr(ast)) 183 | -------------------------------------------------------------------------------- /src/tamsin/backends/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/catseye/Tamsin/cfc9a7270773658a1cddb017aaaf4856939c328c/src/tamsin/backends/__init__.py -------------------------------------------------------------------------------- /src/tamsin/buffer.py: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 4 | # Distributed under a BSD-style license; see LICENSE for more information. 5 | 6 | import sys 7 | 8 | 9 | class Buffer(object): 10 | """Abstract base class for all Buffer objects. 11 | 12 | Buffer objects are mutable, but must be capable of saving and restoring 13 | their state indefinitely. 14 | 15 | """ 16 | def __init__(self, filename='', position=0, line_number=1, column_number=1): 17 | """If `position` is given, `line_number` and `column_number` should 18 | be given too, to match. 19 | 20 | """ 21 | self.filename = filename 22 | self.position = position 23 | self.line_number = line_number 24 | self.column_number = column_number 25 | 26 | def save_state(self): 27 | raise NotImplementedError 28 | 29 | def restore_state(self): 30 | raise NotImplementedError 31 | 32 | def pop_state(self): 33 | raise NotImplementedError 34 | 35 | def advance(self, inp): 36 | """Given a string that we have just consumed from the buffer, 37 | return new line_number and column_number. 38 | 39 | """ 40 | line_number = self.line_number 41 | column_number = self.column_number 42 | for char in inp: 43 | if char == '\n': 44 | line_number += 1 45 | column_number = 1 46 | else: 47 | column_number += 1 48 | return (line_number, column_number) 49 | 50 | def chop(self, amount): 51 | """Returns a pair of `amount` characters chopped off the front of 52 | the buffer, and a new Buffer object. 53 | 54 | """ 55 | raise NotImplementedError 56 | 57 | def first(self, amount): 58 | """Returns a pair of the first `amount` characters in the buffer 59 | (without consuming them) and a new Buffer object. 60 | 61 | """ 62 | raise NotImplementedError 63 | 64 | 65 | class StringBuffer(Buffer): 66 | def __init__(self, string, **kwargs): 67 | """Create a new StringBuffer object. 68 | 69 | `string` should be a raw string, not unicode. If `position` is given, 70 | `line_number` and `column_number` should be given too, to match. 71 | 72 | """ 73 | assert not isinstance(string, unicode) 74 | self.string = string 75 | self.stack = [] 76 | Buffer.__init__(self, **kwargs) 77 | 78 | def save_state(self): 79 | self.stack.append((self.position, self.line_number, self.column_number)) 80 | 81 | def restore_state(self): 82 | (self.position, self.line_number, self.column_number) = self.stack.pop() 83 | 84 | def pop_state(self): 85 | self.stack.pop() 86 | 87 | def __str__(self): 88 | return self.string 89 | 90 | def __repr__(self): 91 | return "StringBuffer(%r, filename=%r, position=%r, line_number=%r, column_number=%r)" % ( 92 | self.string, self.filename, self.position, self.line_number, self.column_number 93 | ) 94 | 95 | def chop(self, amount): 96 | assert self.position <= len(self.string) - amount, \ 97 | "attempt made to chop past end of buffer" 98 | bytes = self.string[self.position:self.position + amount] 99 | 100 | self.position += amount 101 | (self.line_number, self.column_number) = self.advance(bytes) 102 | 103 | return bytes 104 | 105 | def first(self, amount): 106 | bytes = self.string[self.position:self.position + amount] 107 | 108 | return bytes 109 | 110 | 111 | class FileBuffer(Buffer): 112 | def __init__(self, file, **kwargs): 113 | self.file = file 114 | # stuff we have read out of the file, but need to keep 115 | self.pre_buffer = '' 116 | # the position in the file where we started reading into pre_buffer 117 | self.pre_position = 0 118 | self.stack = [] 119 | Buffer.__init__(self, **kwargs) 120 | 121 | def save_state(self): 122 | state = (self.position, self.line_number, self.column_number) 123 | self.stack.append(state) 124 | 125 | def _truncate_pre_buffer(self): 126 | if not self.stack and self.position > self.pre_position: 127 | self.pre_buffer = self.pre_buffer[self.position - self.pre_position:] 128 | self.pre_position = self.position 129 | 130 | def restore_state(self): 131 | state = self.stack.pop() 132 | (self.position, self.line_number, self.column_number) = state 133 | self._truncate_pre_buffer() 134 | 135 | def pop_state(self): 136 | self.stack.pop() 137 | self._truncate_pre_buffer() 138 | 139 | def chop(self, amount): 140 | pos = self.position - self.pre_position 141 | bytes = self.pre_buffer[pos:pos + amount] 142 | bytes_to_read = amount - len(bytes) 143 | if bytes_to_read > 0: 144 | self.pre_buffer += self.file.read(bytes_to_read) 145 | bytes = self.pre_buffer[pos:pos + amount] 146 | #assert len(pre_bytes) == amount # no, b/c what about EOF? 147 | 148 | self.position += amount 149 | (self.line_number, self.column_number) = self.advance(bytes) 150 | self._truncate_pre_buffer() 151 | return bytes 152 | 153 | def first(self, amount): 154 | self.save_state() 155 | bytes = self.chop(amount) 156 | self.restore_state() 157 | return bytes 158 | -------------------------------------------------------------------------------- /src/tamsin/codegen.py: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 4 | # Distributed under a BSD-style license; see LICENSE for more information. 5 | 6 | from tamsin import ast as ack 7 | from tamsin.ast import AtomNode, VariableNode 8 | from tamsin.codenode import ( 9 | CodeNode, Program, Prototype, Subroutine, 10 | Block, If, While, And, Not, Return, Builtin, Call, Truth, Falsity, 11 | DeclareLocal, GetVar, SetVar, Concat, VariableRef, 12 | Unifier, PatternMatch, NoMatch, GetMatchedVar, 13 | DeclState, SaveState, RestoreState, 14 | MkAtom, MkConstructor, 15 | ScannerPushEngine, ScannerPopEngine, 16 | ) 17 | from tamsin.term import Atom, Constructor, Variable 18 | import tamsin.sysmod 19 | 20 | 21 | # TODO: is this module responsible for allocating names, or is the backend? 22 | # I think it should probably be this module. 23 | # On the other hand, backend needs to be able to generate temporaries, too. 24 | 25 | 26 | class CodeGen(object): 27 | def __init__(self, program): 28 | self.program = program 29 | self.name_index = 0 30 | 31 | def new_name(self): 32 | name = "var%s" % self.name_index 33 | self.name_index += 1 34 | return name 35 | 36 | def generate(self): 37 | main = self.program.find_production(ack.Prodref('main', 'main')) 38 | if not main: 39 | raise ValueError("no 'main:main' production defined") 40 | 41 | program = Program() 42 | for module in self.program.modlist: 43 | for prod in module.prodlist: 44 | program.append( 45 | Prototype(module=module, prod=prod, formals=prod.branches[0].formals) 46 | ) 47 | 48 | for module in self.program.modlist: 49 | for prod in module.prodlist: 50 | program.append( 51 | self.gen_subroutine(module, prod, prod.branches[0].formals) 52 | ) 53 | 54 | return program 55 | 56 | def gen_subroutine(self, module, prod, formals): 57 | children = [] 58 | s = Subroutine(module, prod, formals, children) 59 | children.append(self.gen_unifier(prod, prod.branches[0])) # becoming so wrong 60 | children.append(self.gen_branches(module, prod, prod.branches)) 61 | return s 62 | 63 | def gen_unifier(self, prod, branch): 64 | prod.all_pattern_variables = [] 65 | 66 | pat_names = [] 67 | for fml_num, formal in enumerate(branch.formals): 68 | pat_names.append(self.gen_ast(formal)) 69 | 70 | variables = [] 71 | formal.collect_variables(variables) 72 | for variable in variables: 73 | if variable not in prod.all_pattern_variables: 74 | prod.all_pattern_variables.append(variable) 75 | 76 | return Unifier(prod.all_pattern_variables) 77 | 78 | def gen_branches(self, module, prod, branches): 79 | if not branches: 80 | return NoMatch(module=module, prod=prod, formals=[]) 81 | branch = branches[0] 82 | branches = branches[1:] 83 | test = Truth() 84 | for fml_num in xrange(0, len(branch.formals)): 85 | p = PatternMatch() 86 | # self.emit(" term_match_unifier(%s, i%s, unifier) &&" % 87 | # (pat_names[fml_num], fml_num) 88 | # ) 89 | if not test: 90 | test = p 91 | else: 92 | test = And(test, p) 93 | return If(test, 94 | self.gen_branch(module, prod, branch), 95 | self.gen_branches(module, prod, branches) 96 | ) 97 | 98 | def gen_branch(self, module, prod, branch): 99 | b = Block() 100 | 101 | # get variables which are found in patterns for this branch 102 | for var in prod.all_pattern_variables: 103 | #self.emit('const struct term *%s = unifier[%s];' % 104 | # (var.name, var.index) 105 | #) 106 | #self.emit('assert(%s != NULL);' % var.name); 107 | b.append(GetMatchedVar(var)) 108 | 109 | all_pattern_variable_names = [x.name for x in prod.all_pattern_variables] 110 | for local in branch.locals_: 111 | if local not in all_pattern_variable_names: 112 | #self.emit("const struct term *%s;" % local) 113 | b.append(DeclareLocal(local)) 114 | 115 | b.append(self.gen_ast(branch.body)) 116 | return b 117 | 118 | def gen_ast(self, ast): 119 | if isinstance(ast, ack.And): 120 | return Block( 121 | self.gen_ast(ast.lhs), 122 | If(GetVar('ok'), 123 | self.gen_ast(ast.rhs) 124 | ) 125 | ) 126 | elif isinstance(ast, ack.Or): 127 | return Block( 128 | DeclState(), 129 | SaveState(), 130 | self.gen_ast(ast.lhs), 131 | If(Not(GetVar('ok')), 132 | Block( 133 | RestoreState(), 134 | self.gen_ast(ast.rhs) 135 | ) 136 | ) 137 | ) 138 | elif isinstance(ast, ack.Call): 139 | prodref = ast.prodref 140 | prodmod = prodref.module or 'main' 141 | name = prodref.name 142 | args = ast.args 143 | if prodmod == '$': 144 | c = Builtin(name=name) 145 | arity = tamsin.sysmod.arity(name) 146 | for i in xrange(0, arity): 147 | c.append(self.gen_ast(args[i])) 148 | else: 149 | c = Call(module=prodmod, name=name) 150 | for a in args: 151 | c.append(self.gen_ast(a)) 152 | return c 153 | elif isinstance(ast, ack.Send): 154 | return Block( 155 | self.gen_ast(ast.rule), 156 | # EMIT PATTERN ... which means generalizing the crap that is 157 | # currently in the ProdBranch case up there, way up there ^^^ 158 | SetVar(self.gen_ast(ast.pattern), GetVar('result')) 159 | ) 160 | elif isinstance(ast, ack.Set): 161 | return SetVar(VariableRef(ast.variable.name), self.gen_ast(ast.texpr)) 162 | elif isinstance(ast, ack.While): 163 | return Block( 164 | DeclareLocal('srname', MkAtom('nil')), 165 | DeclState(), 166 | SetVar(VariableRef('ok'), Truth()), 167 | While(GetVar('ok'), 168 | Block( 169 | SaveState(), 170 | self.gen_ast(ast.rule), 171 | If(GetVar('ok'), 172 | SetVar(VariableRef('srname'), GetVar('result')) 173 | ) 174 | ) 175 | ), 176 | RestoreState(), 177 | SetVar(VariableRef('result'), GetVar('srname')), 178 | SetVar(VariableRef('ok'), Truth()) 179 | ) 180 | elif isinstance(ast, ack.Not): 181 | return Block( 182 | DeclState(), 183 | SaveState(), 184 | self.gen_ast(ast.rule), 185 | RestoreState(), 186 | If(GetVar('ok'), 187 | Block( 188 | SetVar(VariableRef('ok'), Falsity()), 189 | SetVar(VariableRef('result'), MkAtom("expected anything else")) 190 | ), Block( 191 | SetVar(VariableRef('ok'), Truth()), 192 | SetVar(VariableRef('result'), MkAtom("nil")) 193 | ) 194 | ) 195 | ) 196 | elif isinstance(ast, ack.Using): 197 | return Block( 198 | ScannerPushEngine(ast.prodref.module, ast.prodref.name), 199 | self.gen_ast(ast.rule), 200 | ScannerPopEngine(), 201 | ) 202 | elif isinstance(ast, ack.On): 203 | return Block( 204 | self.gen_ast(ast.texpr), 205 | #flat_name = self.new_name() 206 | #self.emit("const struct term *%s = term_flatten(%s);" % (flat_name, name)) 207 | DeclState(), 208 | SaveState(), 209 | #self.emit("scanner->buffer = %s->atom;" % flat_name); 210 | #self.emit("scanner->size = %s->size;" % flat_name); 211 | #self.emit("scanner->position = 0;"); 212 | #self.emit("scanner->reset_position = 0;"); 213 | self.gen_ast(ast.rule), 214 | RestoreState() 215 | ) 216 | elif isinstance(ast, ack.Concat): 217 | lhs = self.gen_ast(ast.lhs) 218 | rhs = self.gen_ast(ast.rhs) 219 | name = self.new_name() 220 | return Concat(name, lhs, rhs) 221 | elif isinstance(ast, ack.AtomNode): 222 | return MkAtom(ast.text) 223 | elif isinstance(ast, ack.VariableNode): 224 | return VariableRef(ast.name) 225 | elif isinstance(ast, ack.PatternVariableNode): 226 | return VariableRef(ast.name) 227 | elif isinstance(ast, ack.ConstructorNode): 228 | return MkConstructor(ast.text, []) 229 | else: 230 | raise NotImplementedError(repr(ast)) 231 | -------------------------------------------------------------------------------- /src/tamsin/codenode.py: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 4 | # Distributed under a BSD-style license; see LICENSE for more information. 5 | 6 | 7 | # TODO: some of these are definitely hierarchical, and some are definitely 8 | # not. make the distinction. make the latter more like 3-address-code. 9 | 10 | 11 | class CodeNode(object): 12 | def __init__(self, *args, **kwargs): 13 | self.args = list(args) 14 | self.kwargs = kwargs 15 | 16 | def append(self, item): 17 | self.args.append(item) 18 | 19 | def __getitem__(self, key): 20 | if key in self.kwargs: 21 | return self.kwargs[key] 22 | return self.args[key] 23 | 24 | def __repr__(self): 25 | return "%s(%s%s)" % ( 26 | self.__class__.__name__, 27 | (', '.join([repr(a) for a in self.args]) + ', ') if self.args else '', 28 | ', '.join('%s=%r' % (key, self.kwargs[key]) for key in self.kwargs) if self.kwargs else '' 29 | ) 30 | 31 | 32 | class Program(CodeNode): 33 | """Represents a target program.""" 34 | pass 35 | 36 | 37 | class Prototype(CodeNode): 38 | """Represents a prototype for a subroutine in a target program.""" 39 | pass 40 | 41 | 42 | class Subroutine(CodeNode): 43 | """Represents a subroutine in a target program.""" 44 | def __init__(self, module, prod, formals, children): 45 | self.module = module 46 | self.prod = prod 47 | self.formals = formals 48 | self.children = children 49 | 50 | def __repr__(self): 51 | return "Subroutine(%r, %r, %r, %r)" % ( 52 | self.module, self.prod, self.formals, self.children 53 | ) 54 | 55 | 56 | class Block(CodeNode): 57 | pass 58 | 59 | 60 | class If(CodeNode): 61 | pass 62 | 63 | 64 | class While(CodeNode): 65 | pass 66 | 67 | 68 | class And(CodeNode): 69 | pass 70 | 71 | 72 | class Not(CodeNode): 73 | pass 74 | 75 | 76 | class DeclareLocal(CodeNode): 77 | pass 78 | 79 | 80 | class GetVar(CodeNode): 81 | """name is the name of the target-language variable.""" 82 | def __init__(self, name): 83 | self.name = name 84 | 85 | def __repr__(self): 86 | return "GetVar(%r)" % (self.name) 87 | 88 | 89 | class SetVar(CodeNode): 90 | """ref is a VariableRef for the target-language variable. 91 | expr is an expression.""" 92 | def __init__(self, ref, expr): 93 | self.ref = ref 94 | self.expr = expr 95 | 96 | def __repr__(self): 97 | return "SetVar(%r, %r)" % (self.ref, self.expr) 98 | 99 | 100 | class Concat(CodeNode): 101 | def __init__(self, name, lhs, rhs): 102 | self.name = name 103 | self.lhs = lhs 104 | self.rhs = rhs 105 | 106 | def __repr__(self): 107 | return "Concat(%r, %r, %r)" % (self.name, self.lhs, self.rhs) 108 | 109 | 110 | class Unifier(CodeNode): 111 | pass 112 | 113 | 114 | class PatternMatch(CodeNode): 115 | pass 116 | 117 | 118 | class Return(CodeNode): 119 | pass 120 | 121 | 122 | class DeclState(CodeNode): 123 | pass 124 | 125 | 126 | class SaveState(CodeNode): 127 | pass 128 | 129 | 130 | class RestoreState(CodeNode): 131 | pass 132 | 133 | 134 | class Builtin(CodeNode): 135 | pass 136 | 137 | 138 | class Call(CodeNode): 139 | pass 140 | 141 | 142 | class NoMatch(CodeNode): 143 | pass 144 | 145 | 146 | class Truth(CodeNode): 147 | pass 148 | 149 | 150 | class Falsity(CodeNode): 151 | pass 152 | 153 | 154 | class VariableRef(CodeNode): 155 | pass 156 | 157 | 158 | class MkAtom(CodeNode): 159 | pass 160 | 161 | 162 | class MkConstructor(CodeNode): 163 | """Represents some code in the target program to make a constructor.""" 164 | def __init__(self, text, children): 165 | self.text = text 166 | self.children = children 167 | 168 | def __repr__(self): 169 | return "MkConstructor(%r, %r)" % ( 170 | self.text, self.children 171 | ) 172 | 173 | class ScannerPushEngine(CodeNode): 174 | pass 175 | 176 | 177 | class ScannerPopEngine(CodeNode): 178 | pass 179 | 180 | 181 | class GetMatchedVar(CodeNode): 182 | pass 183 | -------------------------------------------------------------------------------- /src/tamsin/desugarer.py: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 4 | # Distributed under a BSD-style license; see LICENSE for more information. 5 | 6 | from tamsin.ast import ( 7 | Program, Module, Production, ProdBranch, 8 | And, Or, Not, While, Call, Send, Set, 9 | Using, On, Concat, Fold, Prodref, 10 | TermNode, VariableNode, PatternVariableNode, AtomNode, ConstructorNode 11 | ) 12 | from tamsin.event import EventProducer 13 | 14 | 15 | class Desugarer(EventProducer): 16 | """The Desugarer takes an AST, walks it, and returns a new AST. 17 | It is responsible for: 18 | 19 | * Desugaring Fold() nodes. 20 | * Turning the list of Production() nodes into a linked list. 21 | * Turning VariableNode() nodes into PatternVariableNodes in a pattern. 22 | 23 | """ 24 | def __init__(self, program, listeners=None): 25 | self.listeners = listeners 26 | self.program = program 27 | self.pattern = False 28 | self.index = 0 29 | 30 | def desugar(self, ast): 31 | if isinstance(ast, Program): 32 | return Program( 33 | [self.desugar(m) for m in ast.modlist] 34 | ) 35 | elif isinstance(ast, Module): 36 | prodlist = [] 37 | 38 | def find_prod_pos(name): 39 | i = 0 40 | for prod in prodlist: 41 | if prod.name == name: 42 | return i 43 | i += 1 44 | return None 45 | 46 | for prod in ast.prodlist: 47 | prod = self.desugar(prod) 48 | pos = find_prod_pos(prod.name) 49 | if pos is None: 50 | prodlist.append(prod) 51 | else: 52 | prodlist[pos].branches.extend(prod.branches) 53 | 54 | return Module(ast.name, prodlist) 55 | elif isinstance(ast, Production): 56 | return Production(ast.name, [self.desugar(x) for x in ast.branches]) 57 | elif isinstance(ast, ProdBranch): 58 | self.pattern = True 59 | self.index = 0 60 | formals = [self.desugar(f) for f in ast.formals] 61 | self.pattern = False 62 | return ProdBranch(formals, [], self.desugar(ast.body)) 63 | elif isinstance(ast, Or): 64 | return Or(self.desugar(ast.lhs), self.desugar(ast.rhs)) 65 | elif isinstance(ast, And): 66 | return And(self.desugar(ast.lhs), self.desugar(ast.rhs)) 67 | elif isinstance(ast, Using): 68 | return Using(self.desugar(ast.rule), ast.prodref) 69 | elif isinstance(ast, On): 70 | return On(self.desugar(ast.rule), self.desugar(ast.texpr)) 71 | elif isinstance(ast, Call): 72 | return ast 73 | elif isinstance(ast, Send): 74 | self.pattern = True 75 | pattern = self.desugar(ast.pattern) 76 | self.pattern = False 77 | return Send(self.desugar(ast.rule), pattern) 78 | elif isinstance(ast, Set): 79 | return Set(ast.variable, self.desugar(ast.texpr)) 80 | elif isinstance(ast, Not): 81 | return Not(self.desugar(ast.rule)) 82 | elif isinstance(ast, While): 83 | return While(self.desugar(ast.rule)) 84 | elif isinstance(ast, Concat): 85 | return Concat(self.desugar(ast.lhs), self.desugar(ast.rhs)) 86 | elif isinstance(ast, AtomNode): 87 | return ast 88 | elif isinstance(ast, ConstructorNode): 89 | return ConstructorNode(ast.text, 90 | [self.desugar(x) for x in ast.contents]) 91 | elif isinstance(ast, VariableNode): 92 | if self.pattern: 93 | index = self.index 94 | self.index += 1 95 | return PatternVariableNode(ast.name, index) 96 | return ast 97 | elif isinstance(ast, Fold): 98 | under1 = VariableNode('_1') 99 | under2 = VariableNode('_2') 100 | set_ = Set(under1, ast.initial) 101 | send_ = Send(self.desugar(ast.rule), under2) 102 | acc_ = Set(under1, Concat(under1, under2)) 103 | if ast.tag is not None: 104 | assert isinstance(ast.tag, AtomNode) 105 | acc_ = Set(under1, 106 | ConstructorNode(ast.tag.text, 107 | [under2, under1])) 108 | return_ = Call(Prodref('$', 'return'), [under1]) 109 | return And(And(set_, While(And(send_, acc_))), return_) 110 | else: 111 | raise NotImplementedError(repr(ast)) 112 | -------------------------------------------------------------------------------- /src/tamsin/event.py: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 4 | # Distributed under a BSD-style license; see LICENSE for more information. 5 | 6 | import sys 7 | 8 | 9 | class EventProducer(object): 10 | def event(self, tag, *data): 11 | if self.listeners is None: 12 | self.listeners = [] 13 | for listener in self.listeners: 14 | listener.announce(tag, *data) 15 | 16 | def subscribe(self, listener): 17 | if self.listeners is None: 18 | self.listeners = [] 19 | self.listeners.append(listener) 20 | 21 | 22 | class DebugEventListener(object): 23 | def __init__(self): 24 | self.indent = 0 25 | 26 | def listen_to(self, producer): 27 | producer.subscribe(self) 28 | 29 | def putstr(self, s): 30 | print (self.indent * ' ' + s) 31 | sys.stdout.flush() 32 | 33 | def announce(self, tag, *data): 34 | if tag == 'enter_interpreter': 35 | self.indent += 1 36 | if tag == 'leave_interpreter': 37 | self.indent -= 1 38 | 39 | # EVERYTHING 40 | self.putstr("%s %r" % (tag, data)) 41 | for d in data: 42 | if getattr(d, 'dump', None) is not None: 43 | d.dump(self.indent) 44 | return 45 | 46 | if tag in ('enter_interpreter', 'leave_interpreter', 'succeed_or', 'fail_or', 'begin_or'): 47 | self.putstr("%s %r" % (tag, data)) 48 | return 49 | elif tag in ('try_literal', 'consume_literal', 'fail_literal'): 50 | self.putstr("%s %r" % (tag, data)) 51 | data[1].dump(self.indent) 52 | return 53 | else: 54 | return 55 | ### 56 | if tag in ('chopped', 'consume', 'scanned'): # ('interpret_ast', 'try_literal'): 57 | return 58 | elif tag in ('switched_scanner_forward', 'switched_scanner_back'): 59 | self.putstr(tag) 60 | data[0].dump() 61 | data[1].dump() 62 | else: 63 | self.putstr("%s %r" % (tag, data)) 64 | -------------------------------------------------------------------------------- /src/tamsin/interpreter.py: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 4 | # Distributed under a BSD-style license; see LICENSE for more information. 5 | 6 | 7 | from tamsin.ast import ( 8 | Production, And, Or, Not, While, Call, Send, Set, Using, On, 9 | Prodref, Concat, TermNode 10 | ) 11 | from tamsin.buffer import StringBuffer 12 | from tamsin.term import Term, Atom 13 | from tamsin.event import EventProducer 14 | from tamsin.scanner import ( 15 | ByteScannerEngine, UTF8ScannerEngine, ProductionScannerEngine 16 | ) 17 | import tamsin.sysmod 18 | 19 | 20 | class Context(EventProducer): 21 | def __init__(self, listeners=None): 22 | self.listeners = listeners 23 | self.scopes = [] 24 | 25 | def __repr__(self): 26 | return "Context(%r)" % ( 27 | self.scopes 28 | ) 29 | 30 | def push_scope(self, purpose): 31 | self.scopes.append({}) 32 | self.event('push_scope', self) 33 | 34 | def pop_scope(self, purpose): 35 | self.scopes.pop() 36 | self.event('pop_scope', self) 37 | 38 | def clone(self): 39 | n = Context(listeners=self.listeners) 40 | for scope in self.scopes: 41 | n.scopes.append(scope.copy()) 42 | return n 43 | 44 | def fetch(self, name): 45 | self.event('fetch', name, 46 | self.scopes[-1].get(name, 'undefined'), self.scopes[-1] 47 | ) 48 | return self.scopes[-1][name] 49 | 50 | def store(self, name, value): 51 | assert(isinstance(value, Term)), "not a Term: %r" % value 52 | self.event('store', name, 53 | self.scopes[-1].get(name, 'undefined'), value 54 | ) 55 | self.scopes[-1][name] = value 56 | 57 | 58 | class Interpreter(EventProducer): 59 | def __init__(self, program, scanner, listeners=None): 60 | self.listeners = listeners 61 | self.program = program 62 | self.scanner = scanner 63 | self.context = Context(listeners=self.listeners) 64 | 65 | def __repr__(self): 66 | return "Interpreter(%r, %r, %r)" % ( 67 | self.program, self.scanner, self.context 68 | ) 69 | 70 | ### interpreter proper ---------------------------------- ### 71 | 72 | def interpret_program(self, program): 73 | main = program.find_production(Prodref('main', 'main')) 74 | if not main: 75 | raise ValueError("no 'main:main' production defined") 76 | return self.interpret(main) 77 | 78 | def interpret(self, ast, args=None): 79 | """Returns a pair (bool, result) where bool is True if it 80 | succeeded and False if it failed. 81 | 82 | """ 83 | self.event('interpret_ast', ast) 84 | if isinstance(ast, Production): 85 | name = ast.name 86 | bindings = False 87 | branch = None 88 | for b in ast.branches: 89 | formals = [self.interpret(f)[1] for f in b.formals] 90 | self.event('call_args', formals, args) 91 | if isinstance(formals, list): 92 | bindings = Term.match_all(formals, args) 93 | self.event('call_bindings', bindings) 94 | if bindings != False: 95 | branch = b 96 | break 97 | # else: 98 | # self.event('call_newfangled_parsing_args', prod) 99 | # # start a new scope. arg bindings will appear here. 100 | # self.context.push_scope(prod.name) 101 | # (success, result) = self.interpret_on_buffer( 102 | # formals, unicode(args[0]) 103 | # ) 104 | # # we do not want to start a new scope here, and we 105 | # # interpret the rule directly, not the prod. 106 | # if success: 107 | # self.event('begin_interpret_rule', prod.body) 108 | # (success, result) = self.interpret(prod.body) 109 | # self.event('end_interpret_rule', prod.body) 110 | # self.context.pop_scope(prod.name) 111 | # return (success, result) 112 | # else: 113 | # self.context.pop_scope(prod.name) 114 | if branch is None: 115 | raise ValueError("No '%s' production matched arguments %r" % 116 | (name, args) 117 | ) 118 | 119 | self.context.push_scope(name) 120 | if bindings != False: 121 | for name in bindings.keys(): 122 | self.context.store(name, bindings[name]) 123 | self.event('begin_interpret_rule', branch.body) 124 | assert branch.body, repr(ast) 125 | (success, result) = self.interpret(branch.body) 126 | self.event('end_interpret_rule', branch.body) 127 | self.context.pop_scope(ast.name) 128 | 129 | return (success, result) 130 | elif isinstance(ast, And): 131 | (success, value_lhs) = self.interpret(ast.lhs) 132 | if not success: 133 | return (False, value_lhs) 134 | (success, value_rhs) = self.interpret(ast.rhs) 135 | return (success, value_rhs) 136 | elif isinstance(ast, Or): 137 | saved_context = self.context.clone() 138 | self.scanner.save_state() 139 | self.event('begin_or', ast.lhs, ast.rhs, saved_context) 140 | (succeeded, result) = self.interpret(ast.lhs) 141 | if succeeded: 142 | self.event('succeed_or', result) 143 | self.scanner.pop_state() 144 | return (True, result) 145 | else: 146 | self.event('fail_or', self.context, self.scanner, result) 147 | self.context = saved_context 148 | self.scanner.restore_state("after or") 149 | return self.interpret(ast.rhs) 150 | elif isinstance(ast, Call): 151 | prodref = ast.prodref 152 | name = prodref.name 153 | args = [self.interpret(x)[1] for x in ast.args] 154 | args = [x.expand(self.context) for x in args] 155 | for a in args: 156 | assert isinstance(a, Term) 157 | if prodref.module == '$': 158 | return tamsin.sysmod.call(name, self, args) 159 | prod = self.program.find_production(prodref) 160 | assert prod is not None, "unresolved: " + repr(prodref) 161 | self.event('call_candidates', prod) 162 | return self.interpret(prod, args=args) 163 | elif isinstance(ast, Send): 164 | (success, result) = self.interpret(ast.rule) 165 | #(success, variable) = self.interpret(ast.pattern) # ... ? 166 | #self.context.store(variable.name, result) 167 | formals = [self.interpret(f)[1] for f in [ast.pattern]] 168 | bindings = Term.match_all(formals, [result]) 169 | if bindings == False: 170 | return (False, Atom('nomatch')) 171 | for name in bindings.keys(): 172 | self.context.store(name, bindings[name]) 173 | return (success, result) 174 | elif isinstance(ast, Using): 175 | sub = ast.rule 176 | prodref = ast.prodref 177 | scanner_name = prodref.name 178 | if prodref.module == '$' and scanner_name == 'byte': 179 | new_engine = ByteScannerEngine() 180 | elif prodref.module == '$' and scanner_name == 'utf8': 181 | new_engine = UTF8ScannerEngine() 182 | else: 183 | prod = self.program.find_production(prodref) 184 | if not prod: 185 | raise ValueError("No such scanner '%s'" % scanner_name) 186 | new_engine = ProductionScannerEngine(self, prod) 187 | self.scanner.push_engine(new_engine) 188 | self.event('enter_with') 189 | (succeeded, result) = self.interpret(sub) 190 | self.event('leave_with', succeeded, result) 191 | self.scanner.pop_engine() 192 | return (succeeded, result) 193 | elif isinstance(ast, On): 194 | (success, result) = self.interpret(ast.texpr) 195 | buffer = str(result.expand(self.context)) 196 | self.event('interpret_on_buffer', buffer) 197 | previous_buffer = self.scanner.get_buffer() 198 | self.scanner.install_buffer(StringBuffer(buffer)) 199 | (success, result) = self.interpret(ast.rule) 200 | self.scanner.install_buffer(previous_buffer) 201 | return (success, result) 202 | elif isinstance(ast, Set): 203 | (success, variable) = self.interpret(ast.variable) 204 | (success, term) = self.interpret(ast.texpr) 205 | result = term.expand(self.context) 206 | self.context.store(variable.name, result) 207 | return (True, result) 208 | elif isinstance(ast, Not): 209 | expr = ast.rule 210 | saved_context = self.context.clone() 211 | self.scanner.save_state() 212 | self.event('begin_not', expr, saved_context) 213 | (succeeded, result) = self.interpret(expr) 214 | self.context = saved_context 215 | self.scanner.restore_state("after not") 216 | if succeeded: 217 | return (False, Atom(self.scanner.error_message( 218 | "anything else", self.scanner.peek() 219 | ))) 220 | else: 221 | return (True, Atom('nil')) 222 | elif isinstance(ast, While): 223 | result = Atom('nil') 224 | self.event('begin_while') 225 | succeeded = True 226 | successful_result = result 227 | while succeeded: 228 | saved_context = self.context.clone() 229 | self.scanner.save_state() 230 | (succeeded, result) = self.interpret(ast.rule) 231 | if succeeded: 232 | self.scanner.pop_state() 233 | successful_result = result 234 | self.event('repeating_while', result) 235 | else: 236 | self.scanner.restore_state("after while") 237 | self.context = saved_context 238 | self.event('end_while', result) 239 | return (True, successful_result) 240 | elif isinstance(ast, Concat): 241 | (success, lhs) = self.interpret(ast.lhs) 242 | lhs = str(lhs.expand(self.context)) 243 | (success, rhs) = self.interpret(ast.rhs) 244 | rhs = str(rhs.expand(self.context)) 245 | return (True, Atom(lhs + rhs)) 246 | elif isinstance(ast, TermNode): 247 | return (True, ast.to_term()) 248 | else: 249 | raise NotImplementedError(repr(ast)) 250 | -------------------------------------------------------------------------------- /src/tamsin/main.py: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 4 | # Distributed under a BSD-style license; see LICENSE for more information. 5 | 6 | import os 7 | import subprocess 8 | import sys 9 | 10 | from tamsin.buffer import FileBuffer, StringBuffer 11 | from tamsin.event import DebugEventListener 12 | from tamsin.term import Atom 13 | from tamsin.scanner import ( 14 | Scanner, EOF, UTF8ScannerEngine, TamsinScannerEngine 15 | ) 16 | from tamsin.parser import Parser 17 | from tamsin.interpreter import Interpreter 18 | from tamsin.desugarer import Desugarer 19 | from tamsin.analyzer import Analyzer 20 | from tamsin.compiler import Compiler # to be replaced by... 21 | from tamsin.codegen import CodeGen 22 | from tamsin.backends.c import Emitter 23 | 24 | 25 | def parse(filename): 26 | with open(filename, 'r') as f: 27 | scanner = Scanner( 28 | FileBuffer(f, filename=filename), 29 | #StringBuffer(f.read(), filename=filename), 30 | engines=(TamsinScannerEngine(),) 31 | ) 32 | parser = Parser(scanner) 33 | ast = parser.grammar() 34 | desugarer = Desugarer(ast) 35 | ast = desugarer.desugar(ast) 36 | return ast 37 | 38 | 39 | def parse_and_check_args(args): 40 | ast = None 41 | for arg in args: 42 | next_ast = parse(arg) 43 | if ast is None: 44 | ast = next_ast 45 | else: 46 | ast.incorporate(next_ast) 47 | analyzer = Analyzer(ast) 48 | ast = analyzer.analyze(ast) 49 | return ast 50 | 51 | 52 | def run(ast, listeners=None): 53 | scanner = Scanner( 54 | FileBuffer(sys.stdin, filename=''), 55 | #StringBuffer(sys.stdin.read(), filename=''), 56 | engines=(UTF8ScannerEngine(),), 57 | listeners=listeners 58 | ) 59 | interpreter = Interpreter( 60 | ast, scanner, listeners=listeners 61 | ) 62 | (succeeded, result) = interpreter.interpret_program(ast) 63 | if not succeeded: 64 | sys.stderr.write(str(result) + "\n") 65 | sys.exit(1) 66 | print str(result) 67 | 68 | 69 | def main(args, tamsin_dir='.'): 70 | listeners = [] 71 | if args[0] == '--debug': 72 | listeners.append(DebugEventListener()) 73 | args = args[1:] 74 | if args[0] == 'scan': 75 | with open(args[1], 'r') as f: 76 | scanner = Scanner( 77 | FileBuffer(f, filename=args[1]), 78 | engines=(TamsinScannerEngine(),), 79 | listeners=listeners 80 | ) 81 | tok = None 82 | while tok is not EOF: 83 | tok = scanner.scan() 84 | if tok is not EOF: 85 | print Atom(tok).repr() 86 | print 87 | elif args[0] == 'parse': 88 | parser = Parser.for_file(args[1]) 89 | ast = parser.grammar() 90 | print str(ast) 91 | elif args[0] == 'desugar': 92 | parser = Parser.for_file(args[1]) 93 | ast = parser.grammar() 94 | desugarer = Desugarer(ast) 95 | ast = desugarer.desugar(ast) 96 | print str(ast) 97 | elif args[0] == 'analyze': 98 | ast = parse_and_check_args(args[1:]) 99 | print str(ast) 100 | elif args[0] == 'compile': 101 | ast = parse_and_check_args(args[1:]) 102 | compiler = Compiler(ast, sys.stdout) 103 | compiler.compile() 104 | elif args[0] == 'codegen': 105 | ast = parse_and_check_args(args[1:]) 106 | generator = CodeGen(ast) 107 | result = generator.generate() 108 | emitter = Emitter(result, sys.stdout) 109 | emitter.go() 110 | elif args[0] == 'doublecompile': 111 | # http://www.youtube.com/watch?v=6WxJECOFg8w 112 | ast = parse_and_check_args(args[1:]) 113 | c_filename = 'foo.c' 114 | exe_filename = './foo' 115 | with open(c_filename, 'w') as f: 116 | compiler = Compiler(ast, f) 117 | compiler.compile() 118 | c_src_dir = os.path.join(tamsin_dir, 'c_src') 119 | command = ("gcc", "-g", "-I%s" % c_src_dir, "-L%s" % c_src_dir, 120 | c_filename, "-o", exe_filename, "-ltamsin") 121 | try: 122 | subprocess.check_call(command) 123 | exit_code = 0 124 | except subprocess.CalledProcessError: 125 | exit_code = 1 126 | #subprocess.call(('rm', '-f', c_filename)) 127 | sys.exit(exit_code) 128 | elif args[0] == 'loadngo': 129 | ast = parse_and_check_args(args[1:]) 130 | c_filename = 'foo.c' 131 | exe_filename = './foo' 132 | with open(c_filename, 'w') as f: 133 | compiler = Compiler(ast, f) 134 | compiler.compile() 135 | c_src_dir = os.path.join(tamsin_dir, 'c_src') 136 | command = ("gcc", "-g", "-I%s" % c_src_dir, "-L%s" % c_src_dir, 137 | c_filename, "-o", exe_filename, "-ltamsin") 138 | try: 139 | subprocess.check_call(command) 140 | subprocess.check_call((exe_filename,)) 141 | exit_code = 0 142 | except subprocess.CalledProcessError: 143 | exit_code = 1 144 | subprocess.call(('rm', '-f', c_filename, exe_filename)) 145 | sys.exit(exit_code) 146 | else: 147 | ast = parse_and_check_args(args) 148 | run(ast, listeners=listeners) 149 | -------------------------------------------------------------------------------- /src/tamsin/scanner.py: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 4 | # Distributed under a BSD-style license; see LICENSE for more information. 5 | 6 | from tamsin.buffer import Buffer 7 | from tamsin.event import EventProducer 8 | from tamsin.term import Term 9 | 10 | 11 | EOF = object() 12 | 13 | 14 | class Scanner(EventProducer): 15 | def __init__(self, buffer, engines=None, listeners=None): 16 | """Create a new Scanner object. 17 | 18 | """ 19 | self.listeners = listeners 20 | self.event('set_buffer', buffer) 21 | assert isinstance(buffer, Buffer) 22 | self.buffer = buffer 23 | self.engines = [] 24 | if engines is not None: 25 | for engine in engines: 26 | self.push_engine(engine) 27 | 28 | def __repr__(self): 29 | return "Scanner(%r, position=%r)" % ( 30 | self.buffer, self.position 31 | ) 32 | 33 | def get_buffer(self): 34 | """Returns an object which represents the current Buffer of this 35 | Scanner. 36 | 37 | """ 38 | return self.buffer 39 | 40 | def install_buffer(self, state): 41 | """Restores the Buffer of this Scanner to that which was saved by 42 | a previous call to get_buffer(). 43 | 44 | """ 45 | self.buffer = state 46 | 47 | def push_engine(self, engine): 48 | self.engines.append(engine) 49 | 50 | def pop_engine(self): 51 | engine = self.engines.pop() 52 | 53 | def save_state(self): 54 | return self.buffer.save_state() 55 | 56 | def restore_state(self, reason): 57 | return self.buffer.restore_state() 58 | 59 | def pop_state(self): 60 | return self.buffer.pop_state() 61 | 62 | def chop(self, amount): 63 | """Returns amount characters from the buffer and advances the 64 | scan position by amount. 65 | 66 | Should only be used by ScannerEngines. 67 | 68 | """ 69 | return self.buffer.chop(amount) 70 | 71 | def first(self, amount): 72 | """Returns amount characters from the buffer. Does not advance the 73 | scan position. 74 | 75 | Should only be used by ScannerEngines, and then only in error 76 | reporting. 77 | 78 | """ 79 | return self.buffer.first(amount) 80 | 81 | def is_at_eof(self): 82 | """Returns True iff there is no more input to scan. 83 | 84 | Should only be used by ScannerEngines. Parsing code should check 85 | to see if ... something 86 | 87 | """ 88 | return self.first(1) == '' 89 | 90 | def is_at_utf8(self): 91 | """Returns the number of bytes following that comprise a UTF-8 92 | character. Will be 0 for non-UTF-8 characters. 93 | 94 | Should only be used by ScannerEngines. 95 | 96 | """ 97 | k = ord(self.first(1)) 98 | if k & 0b11100000 == 0b11000000: 99 | return 2 100 | elif k & 0b11110000 == 0b11100000: 101 | return 3 102 | elif k & 0b11111000 == 0b11110000: 103 | return 4 104 | else: 105 | return 0 106 | 107 | def startswith(self, strings): 108 | for s in strings: 109 | if self.first(len(s)) == s: 110 | return True 111 | return False 112 | 113 | def isalnum(self): 114 | return self.first(1).isalnum() 115 | 116 | def error_message(self, expected, found): 117 | if found is EOF: 118 | found = 'EOF' 119 | else: 120 | found = "'%s'" % found 121 | return ( 122 | "expected %s but found %s at line %s, column %s in '%s'" % 123 | (expected, found, 124 | self.buffer.line_number, 125 | self.buffer.column_number, 126 | self.buffer.filename) 127 | ) 128 | 129 | def error(self, expected, found): 130 | raise ValueError(self.error_message(expected, found)) 131 | 132 | def scan(self): 133 | """Returns the next token from the buffer. 134 | 135 | This method consumes the token. If you want to just see 136 | what the next token would be, call peek() instead. 137 | 138 | The returned token will always be a raw string, possibly 139 | containing UTF-8 sequences, possibly not. 140 | 141 | """ 142 | token = self.engines[-1].scan_impl(self) 143 | #import sys 144 | #print >>sys.stderr, token 145 | assert not isinstance(token, unicode), repr(token) 146 | self.event('scanned', self, token) 147 | return token 148 | 149 | def peek(self): 150 | self.buffer.save_state() 151 | token = self.scan() 152 | self.buffer.restore_state() 153 | return token 154 | 155 | def consume(self, t): 156 | if isinstance(t, unicode): 157 | t = t.encode('UTF-8') 158 | assert not isinstance(t, unicode) 159 | self.event('consume', t) 160 | self.buffer.save_state() 161 | s = self.scan() 162 | if s == t: 163 | self.buffer.pop_state() 164 | return t 165 | else: 166 | self.buffer.restore_state() 167 | return None 168 | 169 | def expect(self, t): 170 | r = self.consume(t) 171 | if r is None: 172 | self.error("'%s'" % t, self.scan()) 173 | return r 174 | 175 | def dump(self, indent=1): 176 | print "==" * indent + "%r" % self 177 | print "--" * indent + "engines: %r" % repr(self.engines) 178 | print "--" * indent + "buffer: %r" % self.buffer 179 | 180 | 181 | class ScannerEngine(object): 182 | def scan_impl(self, scanner): 183 | """Should always return a non-Unicode string.""" 184 | raise NotImplementedError 185 | 186 | 187 | CLOSE_QUOTE = { 188 | '"': '"', 189 | '\'': '\'', 190 | } 191 | 192 | ESCAPE_SEQUENCE = { 193 | 'r': "\r", 194 | 'n': "\n", 195 | 't': "\t", 196 | "'": "'", 197 | '"': '"', 198 | '\\': '\\', 199 | } 200 | 201 | 202 | class TamsinScannerEngine(ScannerEngine): 203 | def scan_impl(self, scanner): 204 | while not scanner.is_at_eof() and scanner.startswith(('#', ' ', '\t', '\r', '\n')): 205 | while not scanner.is_at_eof() and scanner.startswith((' ', '\t', '\r', '\n')): 206 | scanner.chop(1) 207 | while not scanner.is_at_eof() and scanner.startswith(('#',)): 208 | while not scanner.is_at_eof() and not scanner.startswith(('\n',)): 209 | scanner.chop(1) 210 | if not scanner.is_at_eof(): 211 | scanner.chop(1) 212 | 213 | if scanner.is_at_eof(): 214 | return EOF 215 | 216 | if scanner.startswith(('&&', '||', '->', '<-', '<<', '>>')): 217 | return scanner.chop(2) 218 | 219 | c = scanner.is_at_utf8() 220 | if c > 0: 221 | c = scanner.chop(c).decode('UTF-8') 222 | if c in (u'→', u'←', u'«', u'»'): 223 | return c.encode('UTF-8') 224 | elif c == u'“': 225 | return self.consume_quoted(scanner, 226 | u'“'.encode('UTF-8'), u'”'.encode('UTF-8') 227 | ) 228 | else: 229 | scanner.error('identifiable character', scanner.first(1)) 230 | 231 | if scanner.startswith(('=', '(', ')', '[', ']', '{', '}', '!', ':', '/', 232 | '|', '&', ',', '.', '@', '+', '$', 233 | )): 234 | return scanner.chop(1) 235 | 236 | for quote in (CLOSE_QUOTE.keys()): 237 | if scanner.startswith(quote): 238 | scanner.chop(len(quote)) 239 | return self.consume_quoted(scanner, quote, CLOSE_QUOTE[quote]) 240 | 241 | if scanner.isalnum(): 242 | token = '' 243 | while not scanner.is_at_eof() and (scanner.isalnum() or 244 | scanner.startswith(('_',))): 245 | token += scanner.chop(1) 246 | return token 247 | 248 | scanner.error('identifiable character', scanner.first(1)) 249 | 250 | def consume_quoted(self, scanner, quote, close_quote): 251 | # assumes the start quote has already been chopped 252 | token = quote 253 | while (not scanner.is_at_eof() and 254 | not scanner.startswith(close_quote)): 255 | char = scanner.chop(1) 256 | if char == '\\': 257 | char = scanner.chop(1) 258 | if char in ESCAPE_SEQUENCE: 259 | char = ESCAPE_SEQUENCE[char] 260 | elif char == 'x': 261 | char = chr(int(scanner.chop(2), 16)) 262 | else: 263 | scanner.error('legal escape sequence', '\\' + char) 264 | token += char 265 | scanner.chop(len(close_quote)) # chop ending quote 266 | # we add the specific close quote we expect, in case it was EOF 267 | token += close_quote 268 | return token 269 | 270 | 271 | class UTF8ScannerEngine(ScannerEngine): 272 | def scan_impl(self, scanner): 273 | if scanner.is_at_eof(): 274 | return EOF 275 | c = scanner.is_at_utf8() 276 | if c > 0: 277 | return scanner.chop(c) 278 | return scanner.chop(1) 279 | 280 | 281 | class ByteScannerEngine(ScannerEngine): 282 | def scan_impl(self, scanner): 283 | if scanner.is_at_eof(): 284 | return EOF 285 | return scanner.chop(1) 286 | 287 | 288 | class ProductionScannerEngine(ScannerEngine): 289 | """A ScannerEngine that uses a production of the Tamsin program to 290 | scan the input. 291 | 292 | """ 293 | def __init__(self, interpreter, production): 294 | self.interpreter = interpreter 295 | self.production = production 296 | 297 | def scan_impl(self, scanner): 298 | if scanner.is_at_eof(): 299 | return EOF 300 | 301 | # This will cause the scanner to have another engine pushed onto 302 | # it. We rely on that engine to actually get us the token, and it 303 | # will update the scanner for us. 304 | 305 | assert scanner is self.interpreter.scanner 306 | 307 | # default to this so you don't shoot yourself in the foot 308 | scanner.push_engine(UTF8ScannerEngine()) 309 | 310 | result = self.interpreter.interpret(self.production) 311 | (success, token) = result 312 | 313 | scanner.pop_engine() 314 | 315 | if success: 316 | self.interpreter.event('production_scan', self.production, token) 317 | assert isinstance(token, Term), repr(token) 318 | if token is EOF: 319 | return token 320 | return str(token) 321 | else: 322 | return EOF 323 | -------------------------------------------------------------------------------- /src/tamsin/sysmod.py: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 4 | # Distributed under a BSD-style license; see LICENSE for more information. 5 | 6 | # Python version of Tamsin's $ module. 7 | 8 | import sys 9 | 10 | from tamsin.term import Atom, Constructor 11 | from tamsin.scanner import EOF 12 | 13 | 14 | TRANSLATOR = {'return': 'return_', 'print': 'print_'} 15 | 16 | 17 | def call(name, interpreter, args): 18 | name = TRANSLATOR.get(name, name) 19 | if name not in globals(): 20 | raise NotImplementedError(name) 21 | return globals()[name](interpreter, args) 22 | 23 | 24 | def arity(name): 25 | name = TRANSLATOR.get(name, name) 26 | if name not in globals(): 27 | raise NotImplementedError(name) 28 | return globals()[name].arity 29 | 30 | 31 | def return_(self, args): 32 | return (True, args[0]) 33 | return_.arity = 1 34 | 35 | 36 | def fail(self, args): 37 | return (False, args[0]) 38 | fail.arity = 1 39 | 40 | 41 | def expect(self, args): 42 | upcoming_token = self.scanner.peek() 43 | term = args[0] 44 | token = str(term) 45 | if self.scanner.consume(token): 46 | return (True, term) 47 | else: 48 | return (False, 49 | Atom(self.scanner.error_message("'%s'" % token, upcoming_token)) 50 | ) 51 | expect.arity = 1 52 | 53 | 54 | def eof(self, args): 55 | if self.scanner.peek() is EOF: 56 | return (True, '') 57 | else: 58 | return (False, 59 | Atom(self.scanner.error_message('EOF', self.scanner.peek())) 60 | ) 61 | eof.arity = 0 62 | 63 | 64 | def any(self, args): 65 | if self.scanner.peek() is not EOF: 66 | return (True, Atom(self.scanner.scan())) 67 | else: 68 | return (False, 69 | Atom(self.scanner.error_message('any token', EOF)) 70 | ) 71 | any.arity = 0 72 | 73 | 74 | def alnum(self, args): 75 | if (self.scanner.peek() is not EOF and 76 | self.scanner.peek()[0].isalnum()): 77 | return (True, Atom(self.scanner.scan())) 78 | else: 79 | return (False, 80 | Atom(self.scanner.error_message('alphanumeric', self.scanner.peek())) 81 | ) 82 | alnum.arity = 0 83 | 84 | 85 | def upper(self, args): 86 | if (self.scanner.peek() is not EOF and 87 | self.scanner.peek()[0].isupper()): 88 | return (True, Atom(self.scanner.scan())) 89 | else: 90 | return (False, 91 | Atom(self.scanner.error_message('uppercase', self.scanner.peek())) 92 | ) 93 | upper.arity = 0 94 | 95 | 96 | def startswith(self, args): 97 | if (self.scanner.peek() is not EOF and 98 | self.scanner.peek()[0].startswith((str(args[0]),))): 99 | return (True, Atom(self.scanner.scan())) 100 | else: 101 | return (False, 102 | Atom(self.scanner.error_message("'%s...'" % args[0], self.scanner.peek())) 103 | ) 104 | startswith.arity = 1 105 | 106 | 107 | def equal(self, args): 108 | if args[0].match(args[1]) != False: 109 | return (True, args[0]) 110 | else: 111 | return (False, Atom("term '%s' does not equal '%s'" % 112 | (args[0], args[1]))) 113 | equal.arity = 2 114 | 115 | 116 | def unquote(self, args): 117 | q = str(args[0]) 118 | l = str(args[1]) 119 | r = str(args[2]) 120 | if (q.startswith(l) and q.endswith(r)): 121 | if len(r) == 0: 122 | return (True, Atom(q[len(l):])) 123 | return (True, Atom(q[len(l):-len(r)])) 124 | else: 125 | return (False, Atom("term '%s' is not quoted with '%s' and '%s'" % 126 | (q, l, r))) 127 | unquote.arity = 3 128 | 129 | 130 | def mkterm(self, args): 131 | t = args[0] 132 | l = args[1] 133 | contents = [] 134 | while isinstance(l, Constructor) and l.tag == 'list': 135 | contents.append(l.contents[0]) 136 | l = l.contents[1] 137 | if contents: 138 | return (True, Constructor(t.text, contents)) 139 | else: 140 | return (True, t) 141 | mkterm.arity = 2 142 | 143 | 144 | def reverse(self, args): 145 | return (True, args[0].reversed(args[1])) 146 | reverse.arity = 2 147 | 148 | 149 | def print_(self, args): 150 | val = args[0] 151 | sys.stdout.write(str(val)) 152 | sys.stdout.write("\n") 153 | return (True, val) 154 | print_.arity = 1 155 | 156 | 157 | def emit(self, args): 158 | val = args[0] 159 | sys.stdout.write(str(val)) 160 | return (True, val) 161 | emit.arity = 1 162 | 163 | 164 | def repr(self, args): 165 | val = args[0] 166 | val = Atom(val.repr()) 167 | return (True, val) 168 | repr.arity = 1 169 | 170 | 171 | counter = 0 172 | 173 | def gensym(self, args): 174 | global counter 175 | counter += 1 176 | return (True, Atom(str(args[0]) + str(counter))) 177 | gensym.arity = 1 178 | 179 | 180 | def hexbyte(self, args): 181 | return (True, Atom(chr(int(args[0].text + args[1].text, 16)))) 182 | hexbyte.arity = 2 183 | 184 | 185 | def format_octal(self, args): 186 | return (True, Atom("%o" % ord(args[0].text[0]))) 187 | format_octal.arity = 1 188 | 189 | 190 | def length(self, args): 191 | return (True, Atom(str(len(str(args[0]))))) 192 | length.arity = 1 193 | -------------------------------------------------------------------------------- /src/tamsin/term.py: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | # Copyright (c)2014 Chris Pressey, Cat's Eye Technologies. 4 | # Distributed under a BSD-style license; see LICENSE for more information. 5 | 6 | # Note that __str__ and __repr__ and repr perform very different tasks: 7 | # __str__ : flattening operation on Tamsin terms 8 | # repr: reprifying operation on Tamsin terms 9 | # __repr__ : make a string that is valid Python code for constructing the Term 10 | 11 | 12 | BAREWORD = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz' 13 | PRINTABLE = (' !"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_' 14 | '`abcdefghijklmnopqrstuvwxyz{|}~') 15 | 16 | 17 | def repr_escape(t): 18 | if len(t) == 0: 19 | return "''" 20 | if all(c in BAREWORD for c in t): 21 | return t 22 | s = '' 23 | for c in t: 24 | if c == "'": 25 | s += r"\'" 26 | elif c == "\\": 27 | s += r"\\" 28 | elif ord(c) > 31 and ord(c) < 127: 29 | s += c 30 | else: 31 | s += r"\x%02x" % ord(c) 32 | return "'%s'" % s 33 | 34 | 35 | class Term(object): 36 | def expand(self, context): 37 | """Expands this term, returning a new term where, for all x, all 38 | occurrences of (VAR x) are replaced with the value of x in the 39 | given context. 40 | 41 | """ 42 | return self 43 | 44 | def __str__(self): 45 | raise NotImplementedError 46 | 47 | def __repr__(self): 48 | raise NotImplementedError 49 | 50 | def repr(self): 51 | raise NotImplementedError 52 | 53 | @classmethod 54 | def match_all(_class, patterns, values): 55 | """Returns a dict of bindings if all values match all patterns, 56 | or False if there was a mismatch. 57 | 58 | """ 59 | i = 0 60 | bindings = {} 61 | while i < len(patterns): 62 | sub = patterns[i].match(values[i]) 63 | if sub == False: 64 | return False 65 | bindings.update(sub) 66 | i += 1 67 | return bindings 68 | 69 | def match(self, value): 70 | raise NotImplementedError 71 | 72 | 73 | class Atom(Term): 74 | def __init__(self, text): 75 | assert not isinstance(text, unicode) 76 | self.text = text 77 | 78 | def __str__(self): 79 | return self.text 80 | 81 | def __repr__(self): 82 | return "Atom(%r)" % (self.text) 83 | 84 | def repr(self): 85 | return repr_escape(self.text) 86 | 87 | def match(self, value): 88 | if not isinstance(value, Atom): 89 | return False 90 | if self.text == value.text: 91 | return {} 92 | else: 93 | return False 94 | 95 | def reversed(self, sentinel): 96 | if self.match(sentinel) != False: 97 | return self 98 | raise ValueError("malformed list") 99 | 100 | 101 | class Constructor(Term): 102 | def __init__(self, tag, contents): 103 | assert not isinstance(tag, unicode) 104 | self.tag = tag 105 | for c in contents: 106 | assert isinstance(c, Term), repr(c) 107 | self.contents = contents 108 | 109 | def expand(self, context): 110 | return Constructor(self.tag, [x.expand(context) for x in self.contents]) 111 | 112 | def __str__(self): 113 | return "%s(%s)" % ( 114 | self.tag, ', '.join([str(x) for x in self.contents]) 115 | ) 116 | 117 | def __repr__(self): 118 | return "Constructor(%r, %r)" % (self.tag, self.contents) 119 | 120 | def repr(self): 121 | return "%s(%s)" % ( 122 | repr_escape(self.tag), ', '.join([x.repr() for x in self.contents]) 123 | ) 124 | 125 | def match(self, value): 126 | if not isinstance(value, Constructor): 127 | return False 128 | if self.tag != value.tag: 129 | return False 130 | if len(self.contents) != len(value.contents): 131 | return False 132 | bindings = {} 133 | i = 0 134 | while i < len(self.contents): 135 | b = self.contents[i].match(value.contents[i]) 136 | if b == False: 137 | return False 138 | bindings.update(b) 139 | i += 1 140 | return bindings 141 | 142 | def reversed(self, sentinel): 143 | acc = sentinel 144 | l = self 145 | tag = self.tag 146 | while isinstance(l, Constructor) and l.tag == tag: 147 | acc = Constructor(tag, [l.contents[0], acc]) 148 | if len(l.contents) < 2: 149 | break 150 | l = l.contents[1] 151 | if l.match(sentinel) == False: 152 | raise ValueError("malformed list %s" % l.repr()) 153 | return acc 154 | 155 | 156 | class Variable(Term): 157 | def __init__(self, name): 158 | assert not isinstance(name, unicode) 159 | assert name[0].isupper() or name[0] == u'_', name 160 | self.name = name 161 | 162 | def expand(self, context): 163 | return context.fetch(self.name) 164 | 165 | def __str__(self): 166 | return self.name 167 | 168 | def __repr__(self): 169 | return "Variable(%r)" % (self.name) 170 | 171 | def repr(self): 172 | return self.name 173 | 174 | def match(self, value): 175 | return {self.name: value} 176 | -------------------------------------------------------------------------------- /test-codegen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | YES=" 4 | eg/hello-world.tamsin eg/bits.tamsin eg/bitpair.tamsin 5 | eg/exciting-long.tamsin eg/list-of-chars.tamsin 6 | eg/modules.tamsin 7 | " 8 | 9 | FILES="eg/reverse.tamsin" 10 | 11 | NO="eg/eval-bool-expr.tamsin" 12 | 13 | for FILE in $FILES; do 14 | tamsin codegen $FILE || exit 1 15 | done 16 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | FILES=" 4 | doc/Tamsin.markdown 5 | doc/System_Module.markdown 6 | doc/Tested_Examples.markdown 7 | " 8 | GLOB="eg/*.tamsin lib/*.tamsin mains/*.tamsin" 9 | 10 | mkdir -p tmp 11 | 12 | if [ x$1 = 'x-f' ]; then 13 | shift 14 | echo "(Testing on Falderal files '$1' only)" 15 | FILES=$1 16 | shift 17 | fi 18 | 19 | MODE=compiled 20 | if [ x$1 = xcompiled -o x$1 = xinterpreted ]; then 21 | MODE=$1 22 | shift 23 | fi 24 | 25 | if [ x$1 = x ]; then 26 | $0 interpreter && 27 | $0 compiler && 28 | $0 tcompiler && 29 | $0 bootstrap && 30 | echo "All tests passed!" 31 | exit $? 32 | fi 33 | 34 | if [ x$1 = xtamsin ]; then 35 | echo "Testing things written in Tamsin only." 36 | $0 compiled scanner && 37 | $0 compiled grammar && 38 | $0 compiled parser && 39 | $0 compiled desugarer && 40 | $0 compiled analyzer && 41 | $0 micro && 42 | $0 tcompiler && 43 | echo "All tests passed!" 44 | exit $? 45 | fi 46 | 47 | if [ x$1 = xthorough ]; then 48 | echo "Testing EVERYTHING. This will take more than 8 minutes. (On a FAST machine.)" 49 | $0 interpreter && 50 | $0 compiler && 51 | $0 interpreted scanner && 52 | $0 interpreted grammar && 53 | $0 interpreted parser && 54 | $0 interpreted desugarer && 55 | $0 interpreted analyzer && 56 | $0 compiled scanner && 57 | $0 compiled grammar && 58 | $0 compiled parser && 59 | $0 compiled desugarer && 60 | $0 compiled analyzer && 61 | $0 micro && 62 | $0 tcompiler && 63 | $0 bootstrap && 64 | echo "All tests passed!" 65 | exit $? 66 | fi 67 | 68 | ok() { 69 | echo 'ok' 70 | } 71 | 72 | test_it() { 73 | MODE=$1 74 | SRC=$2 75 | LIBS=$3 76 | CMD=$4 77 | BIN=$5 78 | if [ x$BIN = x ]; then 79 | BIN=foo 80 | fi 81 | 82 | if [ $MODE = "compiled" ]; then 83 | make c_src/libtamsin.a || exit 1 84 | echo "*** Compiling $SRC (with $LIBS)" 85 | echo "*** and testing it against '$CMD'..." 86 | bin/tamsin compile $LIBS $SRC > tmp/foo.c && \ 87 | gcc -g -Ic_src -Lc_src tmp/foo.c -o $BIN -ltamsin || exit 1 88 | for EG in $GLOB; do 89 | echo $EG 90 | $CMD $EG | bin/wrap > tmp/python-cmd.txt 91 | $BIN <$EG | bin/wrap > tmp/tamsin-cmd.txt 92 | diff -ru tmp/python-cmd.txt tmp/tamsin-cmd.txt > tmp/output.diff 93 | diff -ru tmp/python-cmd.txt tmp/tamsin-cmd.txt || exit 1 94 | done 95 | elif [ $MODE = "interpreted" ]; then 96 | echo "*** Interpreting $SRC (with $LIBS)" 97 | echo "*** and testing it against '$CMD'..." 98 | for EG in $GLOB; do 99 | echo $EG 100 | $CMD $EG | bin/wrap > tmp/python-cmd.txt 101 | bin/tamsin $LIBS $SRC <$EG | bin/wrap > tmp/tamsin-cmd.txt 102 | diff -ru tmp/python-cmd.txt tmp/tamsin-cmd.txt > tmp/output.diff 103 | diff -ru tmp/python-cmd.txt tmp/tamsin-cmd.txt || exit 1 104 | done 105 | echo "Passed." 106 | exit 0 107 | else 108 | echo "BAD MODE" 109 | exit 1 110 | fi 111 | echo "Passed." 112 | exit 0 113 | } 114 | 115 | if [ x$1 = xinterpreter -o x$1 = xi ]; then 116 | echo "*** Testing Python interpreter..." 117 | falderal $VERBOSE --substring-error fixture/tamsin.py.markdown $FILES 118 | elif [ x$1 = xerror-reporting ]; then 119 | echo "*** Testing error reporting in Python interpreter..." 120 | falderal $VERBOSE --substring-error fixture/tamsin.py.markdown doc/Error_Reporting.markdown 121 | elif [ x$1 = xcompiler ]; then 122 | make c_src/libtamsin.a || exit 1 123 | echo "*** Testing compiler..." 124 | falderal $VERBOSE --substring-error fixture/compiler.py.markdown $FILES 125 | elif [ x$1 = xgrammar ]; then 126 | test_it $MODE "mains/grammar.tamsin" \ 127 | "lib/tamsin_scanner.tamsin" \ 128 | "ok" \ 129 | "bin/tamsin-grammar" 130 | elif [ x$1 = xscanner ]; then 131 | test_it $MODE "mains/scanner.tamsin" \ 132 | "lib/tamsin_scanner.tamsin" \ 133 | "./bin/tamsin scan" \ 134 | "bin/tamsin-scanner" 135 | elif [ x$1 = xparser ]; then 136 | test_it $MODE "mains/parser.tamsin" \ 137 | "lib/list.tamsin lib/tamsin_scanner.tamsin lib/tamsin_parser.tamsin" \ 138 | "./bin/tamsin parse" \ 139 | "bin/tamsin-parser" 140 | elif [ x$1 = xdesugarer ]; then 141 | test_it $MODE "mains/desugarer.tamsin" \ 142 | "lib/list.tamsin lib/tamsin_scanner.tamsin lib/tamsin_parser.tamsin lib/tamsin_analyzer.tamsin" \ 143 | "./bin/tamsin desugar" \ 144 | "bin/tamsin-desugarer" 145 | elif [ x$1 = xanalyzer ]; then 146 | # libs and mains need libs 147 | GLOB="eg/*.tamsin" 148 | test_it $MODE "mains/analyzer.tamsin" \ 149 | "lib/list.tamsin lib/tamsin_scanner.tamsin lib/tamsin_parser.tamsin lib/tamsin_analyzer.tamsin" \ 150 | "./bin/tamsin analyze" \ 151 | "bin/tamsin-analyzer" 152 | elif [ x$1 = xtcompiler ]; then 153 | make bin/tamsin-compiler || exit 1 154 | echo "*** Testing Tamsin-in-Tamsin compiler..." 155 | falderal $VERBOSE --substring-error fixture/compiler.tamsin.markdown $FILES 156 | elif [ x$1 = xbootstrap ]; then 157 | make bin/bootstrapped-compiler || exit 1 158 | echo "*** Testing Bootstrapped Tamsin-in-Tamsin compiler..." 159 | falderal $VERBOSE --substring-error fixture/bootstrapped.markdown $FILES 160 | elif [ x$1 = xmicro ]; then 161 | make bin/micro-tamsin || exit 1 162 | echo "*** Testing Micro-Tamsin interpreter..." 163 | FILES="doc/Micro-Tamsin.markdown" 164 | falderal $VERBOSE --substring-error fixture/micro-tamsin.markdown $FILES 165 | elif [ x$1 = xmini ]; then 166 | make bin/mini-tamsin || exit 1 167 | echo "*** Testing Mini-Tamsin interpreter..." 168 | FILES="doc/Micro-Tamsin.markdown" # note: does not use Mini-Tamsin.md yet 169 | falderal $VERBOSE --substring-error fixture/mini-tamsin.markdown $FILES 170 | else 171 | echo "Unknown test '$1'." 172 | exit 1 173 | fi 174 | --------------------------------------------------------------------------------