├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── bin └── modl_cli.py ├── modl.EBNF ├── modl ├── __init__.py ├── expr.py ├── interpreter.py ├── parser.py ├── scanner.py └── tokens.py ├── std.dl ├── test.dl └── tests ├── __init__.py ├── test_interpreter.py ├── test_parser.py └── test_scanner.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .nox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # IPython 77 | profile_default/ 78 | ipython_config.py 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Environments 90 | .env 91 | .venv 92 | env/ 93 | venv/ 94 | ENV/ 95 | env.bak/ 96 | venv.bak/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | .dmypy.json 111 | dmypy.json 112 | 113 | # Pyre type checker 114 | .pyre/ 115 | *.swp 116 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Tordek 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | MODL 2 | ==== 3 | 4 | My Own Dumb Language 5 | 6 | Built by following @munificent's [great guide](http://www.craftinginterpreters.com/). 7 | 8 | Run the interpreter, call "use test;" then "fibo 5;" to see the extremely exciting value of the fifth element in the fibonacci sequence! 9 | 10 | Syntax 11 | ------ 12 | 13 | The language has very little syntax because reasons: 14 | 15 | use "filename"; 16 | 17 | just runs everything as if it were run right there. TODO: `use "filename" as namespace` to organize stuff. 18 | 19 | To define variables: 20 | 21 | let name <- value; 22 | 23 | But you can define several at once: 24 | 25 | let name1 <- value1, 26 | name2 <- value2; 27 | 28 | See below for details. 29 | 30 | Expressions are separated by `;`, and the last one is the value returned by the whole exprression (i.e., function). 31 | 32 | Function definition is done with: 33 | 34 | { arguments | 35 | expression1; 36 | expression2; 37 | } 38 | 39 | When run, both are executed in order (ideally, you're a good functional boy and that's useless; however, `!` functions exist). 40 | 41 | `!` functions are kinda special: In order to allow for zero-argument functions like `read!` (for reading input from the user), whenever the first call in an expression is a `!` function, an implicit `!` is passed, but `!` isn't valid explicitly anywhere. 42 | 43 | i.e., 44 | 45 | read!; 46 | 47 | is actually parsed as 48 | 49 | read! !; 50 | 51 | but 52 | 53 | map read! list; 54 | 55 | won't work because read! won't have its `!`. You need a `map!` defined with an `f!` argument. 56 | 57 | Scoping 58 | ------- 59 | 60 | Scope is where the let is. 61 | 62 | Every assignment begins a new scope; values can't see assignments that happen later. However, within a single let, the scope is shared. This is useful for defining mutually-recursive functions, like the extremely useful `odd` and `even` pair in the `test.dl` file. Be careful, however: things are still evaluated in order, so this: 63 | 64 | let a <- add1 5, 65 | add1 <- { x | x + 1; }; 66 | 67 | won't work because `a` tries to evaluate `add1` before it's been defined. You need to do it the other way around: 68 | 69 | let add1 <- { x | x + 1; }, 70 | a <- add1 5; 71 | 72 | but you should probably have done it in two `let`s anyway. 73 | 74 | TODO: 75 | ----- 76 | 77 | - Make the interpreter able to work on partial (and multiple!) input 78 | - Make the scanner and parser not take the whole input in the constructor (ew, state) 79 | - Make `!` not be able to cross function boundaries, so it must be thread through `!` functions. 80 | -------------------------------------------------------------------------------- /bin/modl_cli.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | sys.path.insert(0, os.path.abspath(".")) 5 | 6 | import codecs 7 | import traceback 8 | 9 | from modl.parser import Parser 10 | from modl.scanner import Scanner 11 | from modl import interpreter 12 | 13 | 14 | def main(args): 15 | if len(args) > 2: 16 | print("Usage: {} [script]".format(sys.args[0]), file=sys.stderr) 17 | exit(-1) 18 | elif len(args) == 2: 19 | exit(run_file(args[1])) 20 | else: 21 | run_prompt() 22 | 23 | 24 | def run_file(path): 25 | with codecs.open(path, encoding="utf8") as script: 26 | env = interpreter.get_default_env() 27 | scanner = Scanner(command) 28 | parser = Parser(scanner.scan_tokens()) 29 | 30 | for statement in parser.program(): 31 | (result, env) = interpreter.interpret(statement, env) 32 | 33 | return result 34 | 35 | 36 | def run_prompt(): 37 | env = interpreter.get_default_env() 38 | while True: 39 | try: 40 | command = input("> ") 41 | scanner = Scanner(command) 42 | parser = Parser(scanner.scan_tokens()) 43 | (result, env) = interpreter.interpret(parser.statement(), env) 44 | print(result) 45 | hadError = False 46 | except Exception as e: 47 | print(traceback.format_exc()) 48 | 49 | 50 | def error(line, message): 51 | report(line, "", message) 52 | 53 | 54 | def report(line, where, message): 55 | global hadError 56 | print("[line {}] Error{}: {}".format(line, where, message), file=sys.stderr) 57 | hadError = True 58 | 59 | 60 | if __name__ == "__main__": 61 | main(sys.argv) 62 | -------------------------------------------------------------------------------- /modl.EBNF: -------------------------------------------------------------------------------- 1 | program = statement* ; 2 | statement = stmt SEMICOLON ; 3 | stmt = use 4 | | let 5 | | symchain 6 | ; 7 | 8 | use = USE STRING ; 9 | let = LET let_body (COMMA let_body)* ; 10 | let_body = ( IDENTIFIER | SYMBOLIC ) LEFT_ARROW symchain ; 11 | 12 | symchain = expression (SYMBOLIC symchain)* (COLON type_signature)? ; 13 | 14 | expression = primary (primary)* ; 15 | 16 | type_signature = typename (RIGHT_ARROW type_signature)* ; 17 | 18 | primary = function 19 | | cond 20 | | STRING 21 | | NUMBER 22 | | IDENTIFIER 23 | | BUILTIN 24 | | OPEN_PARENTHESES SYMBOLIC CLOSE_PARENTHESES 25 | | OPEN_PARENTHESES symchain CLOSE_PARENTHESES 26 | ; 27 | 28 | cond = COND (PIPE symchain LEFT_ARROW statement+)+ ; 29 | 30 | function = OPEN_BRACE (BANG | identifier) identifier* PIPE statement+ CLOSE_BRACE ; 31 | -------------------------------------------------------------------------------- /modl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tordek/modl/1cd862479eff44541919c0a4a0cd274963f12d58/modl/__init__.py -------------------------------------------------------------------------------- /modl/expr.py: -------------------------------------------------------------------------------- 1 | class TypedExpression: 2 | types = [] 3 | 4 | 5 | class Use: 6 | def __init__(self, filename): 7 | self.filename = filename 8 | 9 | def __repr__(self): 10 | return "USE " + repr(self.filename) 11 | 12 | 13 | class Let: 14 | def __init__(self, assignments): 15 | self.assignments = assignments 16 | 17 | def __repr__(self): 18 | return ( 19 | "(DEFINE " 20 | + "\n ".join( 21 | repr(name) + " " + repr(value) for (name, value) in self.assignments 22 | ) 23 | + ")" 24 | ) 25 | 26 | 27 | class Symchain(TypedExpression): 28 | def __init__(self, left, op, right): 29 | self.left = left 30 | self.op = op 31 | self.right = right 32 | 33 | def __repr__(self): 34 | return ( 35 | "(" + repr(self.op) + " " + repr(self.left) + " " + repr(self.right) + ")" 36 | ) 37 | 38 | 39 | class Identifier(TypedExpression): 40 | def __init__(self, name): 41 | self.name = name 42 | 43 | def __repr__(self): 44 | return self.name 45 | 46 | 47 | class Expression(TypedExpression): 48 | def __init__(self, call): 49 | self.call = call 50 | 51 | def __repr__(self): 52 | return "(" + " ".join(repr(v) for v in self.call) + ")" 53 | 54 | 55 | class Function(TypedExpression): 56 | def __init__(self, args, body): 57 | self.args = args 58 | self.body = body 59 | 60 | def __repr__(self): 61 | return ( 62 | "(LAMBDA (" 63 | + " ".join(repr(arg) for arg in self.args) 64 | + ") " 65 | + " ".join(repr(st) for st in self.body) 66 | + ")" 67 | ) 68 | 69 | 70 | class Literal(TypedExpression): 71 | def __init__(self, value): 72 | self.value = value 73 | 74 | def __repr__(self): 75 | return repr(self.value) 76 | 77 | 78 | class Builtin(TypedExpression): 79 | def __init__(self, name): 80 | self.name = name 81 | 82 | def __repr__(self): 83 | return "#" + self.name 84 | 85 | 86 | class Conditional(TypedExpression): 87 | def __init__(self, cases): 88 | self.cases = cases 89 | 90 | def __repr__(self): 91 | result = "(COND " 92 | for cond, body in self.cases: 93 | result += "(" 94 | result += repr(cond) 95 | if len(body) == 1: 96 | result += " " 97 | result += repr(body[0]) 98 | else: 99 | result += "(progn" 100 | for st in body: 101 | result += "\n" 102 | result += repr(st) 103 | result += ")" 104 | result += ") " 105 | result += ")" 106 | return result 107 | -------------------------------------------------------------------------------- /modl/interpreter.py: -------------------------------------------------------------------------------- 1 | from collections import ChainMap 2 | from . import expr as expr 3 | from .parser import Parser 4 | from .scanner import Scanner 5 | 6 | BUILTIN = { 7 | "print": lambda x: print(x), 8 | "add": lambda x, y: x + y, 9 | "sub": lambda x, y: x - y, 10 | "mul": lambda x, y: x * y, 11 | "fdiv": lambda x, y: x / y, 12 | "mod": lambda x, y: x % y, 13 | "read": lambda _: input(), 14 | "eq": lambda x, y: x == y, 15 | "gt": lambda x, y: x > y, 16 | "if": lambda c, t, f: t if c else f, 17 | "cons": lambda h, t: (h, t), 18 | "head": lambda l: l[0], 19 | "tail": lambda l: l[1], 20 | "true": True, 21 | "false": False, 22 | "empty": (), 23 | } 24 | 25 | 26 | class Function: 27 | def __init__(self, function, environment): 28 | self.function = function 29 | self.environment = environment 30 | 31 | 32 | class TailCall: 33 | def __init__(self, f, params): 34 | self.f = f 35 | self.params = params 36 | 37 | 38 | def get_default_env(): 39 | env = ChainMap() 40 | env["!"] = "!" 41 | return env 42 | 43 | 44 | def interpret(statement, environment, is_tail_call=False): 45 | if isinstance(statement, expr.Literal): 46 | return (statement.value, environment) 47 | elif isinstance(statement, expr.Identifier): 48 | return (environment[statement.name], environment) 49 | elif isinstance(statement, expr.Function): 50 | return (Function(statement, environment), environment) 51 | elif isinstance(statement, expr.Builtin): 52 | return (BUILTIN[statement.name], environment) 53 | elif isinstance(statement, expr.Use): 54 | with open(statement.filename) as file: 55 | contents = file.read() 56 | scanner = Scanner(contents) 57 | parser = Parser(scanner.scan_tokens()) 58 | for statement in parser.program(): 59 | result, environment = interpret(statement, environment) 60 | return (result, environment) 61 | elif isinstance(statement, expr.Let): 62 | environment = environment.new_child() 63 | for (name, value) in statement.assignments: 64 | (evaluated_value, _) = interpret(value, environment) 65 | environment[name.name] = evaluated_value 66 | return (None, environment) 67 | elif isinstance(statement, expr.Expression): 68 | results = [] 69 | for s in statement.call: 70 | result, _ = interpret(s, environment) 71 | results.append(result) 72 | if is_tail_call: 73 | return TailCall(results[0], results[1:]) 74 | else: 75 | return (do_call(*results), environment) 76 | elif isinstance(statement, expr.Symchain): 77 | (left, _) = interpret(statement.left, environment) 78 | (op, _) = interpret(statement.op, environment) 79 | (right, _) = interpret(statement.right, environment) 80 | if is_tail_call: 81 | return TailCall(op, [left, right]) 82 | else: 83 | return (do_call(op, left, right), environment) 84 | elif isinstance(statement, expr.Conditional): 85 | # Discard the environment from the evaluation 86 | # to disallow `let`s done inside a case 87 | for condition, body in statement.cases: 88 | value, _ = interpret(condition, environment) 89 | if value is True: 90 | env = environment 91 | for st in body[:-1]: 92 | result, env = interpret(st, env) 93 | 94 | if is_tail_call: 95 | return interpret(body[-1], env, True) 96 | else: 97 | result, _ = interpret(nody[-1], env) 98 | return (result, environment) 99 | elif value is False: 100 | continue 101 | else: 102 | raise Exception("Type mismatch, condition must be boolean", value) 103 | return (None, environment) # All conditions were false 104 | else: 105 | raise Exception("Trying to run unknown thing", statement) 106 | 107 | 108 | def do_call(f, *params): 109 | while True: 110 | # Currying, but optimized if there are multiple parameters 111 | if isinstance(f, Function): 112 | f_env = f.environment.new_child() 113 | args = f.function.args 114 | for name, value in zip(args, params): 115 | f_env[name.name] = value 116 | if len(args) > len(params): 117 | return Function( 118 | expr.Function(args[len(params) :], f.function.body), f_env 119 | ) 120 | elif len(args) == len(params): 121 | v = None 122 | for statement in f.function.body[:-1]: 123 | (v, f_env) = interpret(statement, f_env) 124 | result = interpret(f.function.body[-1], f_env, True) 125 | if isinstance(result, TailCall): 126 | f = result.f 127 | params = result.params 128 | continue 129 | else: 130 | return result[0] 131 | else: 132 | raise Exception(f, "Function received too many parameters") 133 | elif callable(f): 134 | return f(*params) 135 | else: 136 | raise Exception(f, "Tried to call a non-function object") 137 | -------------------------------------------------------------------------------- /modl/parser.py: -------------------------------------------------------------------------------- 1 | from . import expr 2 | from .tokens import Token, TokenType 3 | 4 | 5 | class Parser: 6 | def __init__(self, tokens): 7 | self.tokens = tokens 8 | self.current = 0 9 | 10 | def program(self): 11 | statements = [] 12 | while not self.is_at_end(): 13 | statements.append(self.statement()) 14 | return statements 15 | 16 | def statement(self): 17 | stmt = None 18 | if self.match(TokenType.USE): 19 | string = self.consume("Need a filename to import", TokenType.STRING) 20 | stmt = expr.Use(string.literal) 21 | elif self.match(TokenType.LET): 22 | stmt = self.let() 23 | else: 24 | stmt = self.symchain() 25 | self.consume("Expect ';' at end of statement.", TokenType.SEMICOLON) 26 | return stmt 27 | 28 | def let(self): 29 | assignments = [] 30 | while True: 31 | identifier = None 32 | if self.match(TokenType.IDENTIFIER, TokenType.SYMBOLIC): 33 | # We only care about the name 34 | identifier = expr.Identifier(self.previous().lexeme) 35 | else: 36 | raise Exception(self.peek(), "Can't assign to this") 37 | 38 | self.consume("Missing <-", TokenType.LEFT_ARROW) 39 | value = self.symchain() 40 | assignments.append((identifier, value)) 41 | if not self.match(TokenType.COMMA): 42 | break 43 | return expr.Let(assignments) 44 | 45 | def symchain(self): 46 | e = self.expression() 47 | 48 | while self.match(TokenType.SYMBOLIC): 49 | # We only care about the name 50 | symbol = expr.Identifier(self.previous().lexeme) 51 | right = self.symchain() 52 | e = expr.Symchain(e, symbol, right) 53 | 54 | types = [] 55 | if self.match(TokenType.COLON): 56 | t = self.consume("Expected a type after :", TokenType.TYPENAME) 57 | types.append(t.lexeme) 58 | 59 | while self.match(TokenType.RIGHT_ARROW): 60 | t = self.consume("Expected a type after ->", TokenType.TYPENAME) 61 | types.append(t.lexeme) 62 | e.types = types 63 | return e 64 | 65 | def expression(self): 66 | chain = [] 67 | 68 | while True: 69 | e = self.try_primary() 70 | if e is None: 71 | break 72 | chain.append(e) 73 | 74 | head = chain[0] 75 | # Special case for bang functions: Add an implicit ! if they're 76 | # the first in the call 77 | if isinstance(head, expr.Identifier) and head.name[-1] == "!": 78 | chain = [chain[0], expr.Identifier("!")] + chain[1:] 79 | 80 | if len(chain) == 1: 81 | return chain[0] 82 | else: 83 | return expr.Expression(chain) 84 | 85 | def try_primary(self): 86 | if self.match(TokenType.OPEN_BRACKETS): 87 | argument_list = [] 88 | head_arg = self.consume( 89 | "Argument list cannot be empty", TokenType.BANG, TokenType.IDENTIFIER 90 | ) 91 | argument_list.append(expr.Identifier(head_arg.lexeme)) 92 | while self.match(TokenType.IDENTIFIER): 93 | argument_list.append(expr.Identifier(self.previous().lexeme)) 94 | self.consume("Missing argument delimiter", TokenType.PIPE) 95 | body = [self.statement()] 96 | while not self.check(TokenType.CLOSE_BRACKETS): 97 | e = self.statement() 98 | body.append(e) 99 | self.consume("Unclosed function definition", TokenType.CLOSE_BRACKETS) 100 | return expr.Function(argument_list, body) 101 | elif self.match(TokenType.STRING, TokenType.B10_FLOAT, TokenType.B10_INTEGER): 102 | return expr.Literal(self.previous().literal) 103 | elif self.match(TokenType.IDENTIFIER): 104 | return expr.Identifier(self.previous().lexeme) 105 | elif self.match(TokenType.BUILTIN): 106 | return expr.Builtin(self.previous().literal) 107 | elif self.match(TokenType.OPEN_PARENTHESES): 108 | if self.check(TokenType.SYMBOLIC): 109 | symbol = self.match(TokenType.SYMBOLIC) 110 | identifier = expr.Identifier(self.previous().lexeme) 111 | self.consume( 112 | "Symbol expressions can only contain a symbol", 113 | TokenType.CLOSE_PARENTHESES, 114 | ) 115 | return identifier 116 | e = self.symchain() 117 | self.consume("Missing closing parentheses", TokenType.CLOSE_PARENTHESES) 118 | return e 119 | elif self.match(TokenType.COND): 120 | cases = [] 121 | while True: 122 | self.consume("Missing condition delimiter", TokenType.PIPE) 123 | condition = self.symchain() 124 | self.consume("Missing condition delimiter", TokenType.RIGHT_ARROW) 125 | 126 | body = [self.statement()] 127 | while not self.check( 128 | TokenType.PIPE, TokenType.SEMICOLON, TokenType.COLON 129 | ): 130 | body.append(self.statement()) 131 | cases.append((condition, body)) 132 | 133 | if not self.check(TokenType.PIPE): 134 | break 135 | 136 | return expr.Conditional(cases) 137 | else: 138 | return None 139 | 140 | def check(self, *types): 141 | if self.is_at_end(): 142 | return False 143 | return self.peek().token_type in types 144 | 145 | def match(self, *types): 146 | if self.check(*types): 147 | self.advance() 148 | return True 149 | return False 150 | 151 | def consume(self, message, *types): 152 | if self.check(*types): 153 | return self.advance() 154 | 155 | raise Exception(self.peek(), message) 156 | 157 | def advance(self): 158 | if not self.is_at_end(): 159 | self.current += 1 160 | return self.previous() 161 | 162 | def is_at_end(self): 163 | return self.peek().token_type == TokenType.EOF 164 | 165 | def peek(self): 166 | return self.tokens[self.current] 167 | 168 | def previous(self): 169 | return self.tokens[self.current - 1] 170 | -------------------------------------------------------------------------------- /modl/scanner.py: -------------------------------------------------------------------------------- 1 | from .tokens import Token, TokenType 2 | 3 | 4 | class Scanner: 5 | # These tokens may not appear as part of a longer token 6 | unique_tokens = { 7 | ";": TokenType.SEMICOLON, 8 | "{": TokenType.OPEN_BRACKETS, 9 | "}": TokenType.CLOSE_BRACKETS, 10 | "(": TokenType.OPEN_PARENTHESES, 11 | ")": TokenType.CLOSE_PARENTHESES, 12 | ",": TokenType.COMMA, 13 | } 14 | 15 | # These tokens are reserved, but may appear as part of a longer token. 16 | reserved_symbols = { 17 | "->": TokenType.RIGHT_ARROW, 18 | "<-": TokenType.LEFT_ARROW, 19 | "!": TokenType.BANG, 20 | "|": TokenType.PIPE, 21 | ":": TokenType.COLON, 22 | } 23 | 24 | # Reserved words may appear as part of a longer token 25 | reserved_words = { 26 | "use": TokenType.USE, 27 | "let": TokenType.LET, 28 | "cond": TokenType.COND, 29 | } 30 | 31 | def __init__(self, source): 32 | self.source = source 33 | 34 | self.start = 0 35 | self.current = 0 36 | self.line = 1 37 | self.tokens = [] 38 | 39 | def scan_tokens(self): 40 | while not self.is_at_end(): 41 | self.start = self.current 42 | self.scan_token() 43 | 44 | self.tokens.append(Token(TokenType.EOF, "", None, self.line)) 45 | return self.tokens 46 | 47 | def is_at_end(self): 48 | return self.current >= len(self.source) 49 | 50 | def scan_token(self): 51 | c = self.peek() 52 | if self.match("{#"): 53 | self.builtin() 54 | elif self.match("/*"): 55 | self.comment() 56 | elif c in self.unique_tokens: 57 | self.advance() 58 | self.add_token(self.unique_tokens[c]) 59 | elif c in ["-", "."] or c.isnumeric(): 60 | self.number() 61 | elif self.match("\n"): 62 | self.line += 1 63 | elif c.isspace(): 64 | self.advance() 65 | elif self.match('"'): 66 | self.string() 67 | elif c.isupper(): 68 | self.typename() 69 | elif c.isalpha(): 70 | self.identifier() 71 | else: 72 | # All other characters count as symbols 73 | self.symbolic() 74 | 75 | def builtin(self): 76 | while self.valid_in_identifier(self.peek()): 77 | self.advance() 78 | if not self.match("}"): 79 | raise Exception(self.line, "Unterminated builtin literal") 80 | self.add_token(TokenType.BUILTIN, self.current_lexeme()[2:-1]) 81 | 82 | def number(self): 83 | self.match("-") # May match a starting - 84 | self.match(".") # May match .??? or -.??? 85 | 86 | is_number = False 87 | while True: 88 | c = self.peek() 89 | if c is None: 90 | break 91 | elif c.isnumeric(): 92 | self.advance() 93 | is_number = True 94 | elif c == ".": 95 | self.advance() 96 | else: 97 | break 98 | 99 | if not is_number: 100 | return self.symbolic() 101 | 102 | if "." in self.current_lexeme(): 103 | self.add_token(TokenType.B10_FLOAT, float(self.current_lexeme())) 104 | else: 105 | self.add_token(TokenType.B10_INTEGER, int(self.current_lexeme(), 10)) 106 | 107 | def typename(self): 108 | while self.valid_in_identifier(self.peek()): 109 | self.advance() 110 | 111 | self.add_token(TokenType.TYPENAME) 112 | 113 | def identifier(self): 114 | while self.valid_in_identifier(self.peek()): 115 | self.advance() 116 | 117 | lexeme = self.current_lexeme() 118 | if lexeme in self.reserved_words: 119 | if self.match("!"): 120 | raise Exception("Can't use bangs after reserved words.") 121 | self.add_token(self.reserved_words[lexeme]) 122 | else: 123 | self.match("!") 124 | self.add_token(TokenType.IDENTIFIER) 125 | 126 | def valid_in_identifier(self, c): 127 | if c is None: 128 | return False 129 | if c.isalnum(): 130 | return True 131 | if c in "'_": 132 | return True 133 | return False 134 | 135 | def symbolic(self): 136 | while self.valid_as_symbolic(self.peek()): 137 | self.advance() 138 | lexeme = self.current_lexeme() 139 | if lexeme == "*/": 140 | raise Exception(self.line, "Unopened comment.") 141 | elif lexeme in self.reserved_symbols: 142 | self.add_token(self.reserved_symbols[lexeme]) 143 | else: 144 | self.add_token(TokenType.SYMBOLIC) 145 | 146 | def valid_as_symbolic(self, c): 147 | if c is None: 148 | return False 149 | elif c.isspace(): 150 | return False 151 | elif c.isalnum(): 152 | return False 153 | elif c in self.unique_tokens: 154 | return False 155 | else: 156 | return True 157 | 158 | def string(self): 159 | literal = "" 160 | while self.peek() != '"' and not self.is_at_end(): 161 | c = self.peek() # Sure would like to have the walrus op 162 | if c == "\n": 163 | self.line += 1 164 | 165 | if c == "\\": # Escape characters 166 | self.advance() 167 | if self.is_at_end(): 168 | break 169 | e = self.peek() 170 | if e == "n": 171 | literal += "\n" 172 | elif e == "r": 173 | literal += "\r" 174 | elif e == "t": 175 | literal += "\t" 176 | elif e == "\\": 177 | literal += "\\" 178 | elif e == '"': 179 | literal += '"' 180 | elif e == "x": 181 | # fetch 4 characters as hex and decode as unicode 182 | codepoint = self.source[self.current + 1 : self.current + 5] 183 | if len(codepoint) < 4: 184 | raise Exception(self.line, "Unterminated string") 185 | literal += chr(int(codepoint, 16)) 186 | self.current += 4 187 | else: 188 | raise Exception(self.line, "\\" + e + " is not an escape sequence") 189 | else: 190 | literal += c 191 | self.advance() 192 | 193 | if self.is_at_end(): 194 | raise Exception(self.line, "Unterminated string") 195 | 196 | self.advance() # Closing " 197 | 198 | self.add_token(TokenType.STRING, literal) 199 | 200 | def comment(self): 201 | while not self.match("*/"): 202 | if self.is_at_end(): 203 | raise Exception(self.line, "Unterminated comment") 204 | if self.peek() == "\n": 205 | self.line += 1 206 | self.advance() 207 | 208 | # self.add_token(TokenType.COMMENT, self.current_lexeme()) 209 | 210 | def advance(self): 211 | self.current += 1 212 | return self.source[self.current - 1] 213 | 214 | def peek(self, n=1): 215 | if self.current + n > len(self.source): 216 | return None 217 | return self.source[self.current : self.current + n] 218 | 219 | def current_lexeme(self): 220 | return self.source[self.start : self.current] 221 | 222 | def add_token(self, token_type, literal=None): 223 | self.tokens.append(Token(token_type, self.current_lexeme(), literal, self.line)) 224 | 225 | def match(self, c): 226 | if self.is_at_end(): 227 | return False 228 | if self.peek(len(c)) != c: 229 | return False 230 | self.current += len(c) 231 | return True 232 | -------------------------------------------------------------------------------- /modl/tokens.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | 4 | class TokenType(enum.Enum): 5 | # Single-Character tokens 6 | SEMICOLON = enum.auto() 7 | COLON = enum.auto() 8 | PIPE = enum.auto() 9 | BANG = enum.auto() 10 | COMMA = enum.auto() 11 | OPEN_BRACKETS = enum.auto() 12 | CLOSE_BRACKETS = enum.auto() 13 | OPEN_PARENTHESES = enum.auto() 14 | CLOSE_PARENTHESES = enum.auto() 15 | COND = enum.auto() 16 | 17 | # Short tokens 18 | LEFT_ARROW = enum.auto() 19 | RIGHT_ARROW = enum.auto() 20 | COMMENT_START = enum.auto() 21 | COMMENT_END = enum.auto() 22 | EQUALS = enum.auto() 23 | 24 | # Symbol tokens, anything not beginning in isalpha() that isn't above 25 | SYMBOLIC = enum.auto() 26 | 27 | # Keywords 28 | USE = enum.auto() 29 | LET = enum.auto() 30 | 31 | # Literals 32 | IDENTIFIER = enum.auto() # !isupper()[isalpha()'_|]+!? 33 | TYPENAME = enum.auto() # isupper()(isalpha'_|)+ 34 | STRING = enum.auto() # ".*" 35 | B10_INTEGER = enum.auto() 36 | B10_FLOAT = enum.auto() 37 | BUILTIN = enum.auto() 38 | 39 | EOF = enum.auto() 40 | pass 41 | 42 | 43 | class Token: 44 | def __init__(self, token_type, lexeme, literal, line): 45 | self.token_type = token_type 46 | self.lexeme = lexeme 47 | self.literal = literal 48 | self.line = line 49 | 50 | def __repr__(self): 51 | if self.literal: 52 | return "{} '{}'".format(self.token_type, self.lexeme) 53 | else: 54 | return "{} '{}' {}".format(self.token_type, self.lexeme, self.literal) 55 | -------------------------------------------------------------------------------- /std.dl: -------------------------------------------------------------------------------- 1 | /* The "Standard Library" */ 2 | let true <- {#true}; 3 | let false <- {#false}; 4 | let empty <- {#empty}; 5 | let otherwise <- true; 6 | 7 | /* IO */ 8 | let print! <- { ! s | {#print} s; }; 9 | let read! <- { ! | {#read} 0; }; 10 | 11 | /* Boolean. NOT short-circuited. */ 12 | let || <- { x y | 13 | cond 14 | | x -> true; 15 | | otherwise -> y; 16 | ; 17 | }; 18 | 19 | let && <- { x y | 20 | cond 21 | | x -> y; 22 | | otherwise -> false; 23 | ; 24 | }; 25 | 26 | let not <- { x | 27 | cond 28 | | x -> false; 29 | | otherwise -> true; 30 | ; 31 | }; 32 | 33 | /* Arithmetics */ 34 | let + <- { x y | {#add} x y; }; 35 | let - <- { x y | {#sub} x y; }; 36 | let * <- { x y | {#mul} x y; }; 37 | let / <- { x y | {#fdiv} x y; }; 38 | let % <- { x y | {#mod} x y; }; 39 | 40 | /* Comparisons */ 41 | let == <- { x y | {#eq} x y; }; 42 | let != <- { x y | not (x == y); }; 43 | let > <- { x y | {#gt} x y; }; 44 | let >= <- { x y | (x == y) || (x > y); }; 45 | let < <- { x y | not (x >= y); }; 46 | let <= <- { x y | not (x > y); }; 47 | 48 | /* Bang! helpers */ 49 | let forever! <- { ! command! | 50 | command!; 51 | forever! command!; 52 | }; 53 | 54 | let flip <- { f x y | f y x; }; 55 | 56 | /* List functions */ 57 | let :: <- { car cdr | {#cons} car cdr; }; 58 | let head <- { l | {#head} l; }; 59 | let tail <- { l | {#tail} l; }; 60 | 61 | let foldl <- { f a l | 62 | cond 63 | | l == empty -> a; 64 | | otherwise -> foldl f (f a (head l)) (tail l); 65 | ; 66 | }; 67 | 68 | let reverse <- foldl (flip (::)) empty; 69 | 70 | let map_ <- { a f l | 71 | cond 72 | | l == empty -> reverse a; 73 | | otherwise -> map_ (f (head l) :: a) f (tail l); 74 | ; 75 | }, map <- map_ empty; 76 | 77 | let range_ <- { acc from to | 78 | cond 79 | | from == to -> reverse acc; 80 | | otherwise -> range_ (from :: acc) (from + 1) to; 81 | ; 82 | }, range <- range_ empty; 83 | 84 | let filter_ <- { a p l | 85 | cond 86 | | l == empty -> reverse a; 87 | | p (head l) -> filter_ (head l :: a) p (tail l); 88 | | otherwise -> filter_ a p (tail l); 89 | ; 90 | }, filter <- filter_ empty; 91 | 92 | let length <- foldl ((+) 1) 0; 93 | 94 | /* Functional helpers */ 95 | let =>> <- { v f | f v; }; 96 | 97 | let >> <- { f g x | g (f x); }; 98 | 99 | let . <- { f g x | f (g x); }; 100 | 101 | let $ <- { f v | f v; }; 102 | -------------------------------------------------------------------------------- /test.dl: -------------------------------------------------------------------------------- 1 | use "std.dl"; 2 | 3 | /* Standard recursive fibonacci definition */ 4 | let fibo <- { x | 5 | cond 6 | | x == 0 -> 1; 7 | | x == 1 -> 1; 8 | | otherwise -> fibo (x - 1) + fibo (x - 2); 9 | ; 10 | }; 11 | 12 | /* Prettier, less deeply recursive fibonacci */ 13 | let fib <- { a b x | 14 | cond 15 | | x == 0 -> a; 16 | | otherwise -> fib b (a + b) (x - 1); 17 | ; 18 | }; 19 | 20 | /* Currying is available */ 21 | let flatfibo <- fib 1 1; 22 | 23 | let tracefibo <- { a b x | 24 | cond 25 | | x == 0 -> print! "exiting"; 26 | ; 27 | cond 28 | | x == 0 -> a; 29 | | otherwise -> 30 | print! "going deeper"; 31 | tracefibo b (a + b) (x - 1); 32 | ; 33 | }; 34 | 35 | /* Test for scope */ 36 | let a <- "outer"; 37 | let f! <- { ! | 38 | let showA! <- { ! | print! a; }; 39 | showA!; 40 | let a <- "inner"; 41 | showA!; 42 | }; 43 | 44 | /* Mutual recursion */ 45 | let odd <- { x | 46 | cond 47 | | x == 0 -> false; 48 | | otherwise -> even (x - 1); 49 | ; 50 | }, 51 | even <- { x | 52 | cond 53 | | x == 0 -> true; 54 | | otherwise -> odd (x - 1); 55 | ; 56 | }; 57 | 58 | /* As useful as expected, but proves TCE works. */ 59 | let loop_forever! <- { ! | loop_forever!; }; 60 | 61 | let hello! <- { ! | forever! { ! | print! "Hello!"; }; }; 62 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tordek/modl/1cd862479eff44541919c0a4a0cd274963f12d58/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_interpreter.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from modl import interpreter 4 | from modl.scanner import Scanner 5 | from modl.parser import Parser 6 | 7 | 8 | class InterpretAllTheThings(unittest.TestCase): 9 | def test_use(self): 10 | scanner = Scanner('use "std.dl";') 11 | parser = Parser(scanner.scan_tokens()) 12 | env = interpreter.get_default_env() 13 | result, env = interpreter.interpret(parser.statement(), env) 14 | self.assertEqual(result, None) 15 | self.assertIn("print!", env) 16 | 17 | def test_fibo(self): 18 | scanner = Scanner('use "test.dl"; fibo 5;') 19 | parser = Parser(scanner.scan_tokens()) 20 | env = interpreter.get_default_env() 21 | for statement in parser.program(): 22 | result, env = interpreter.interpret(statement, env) 23 | self.assertEqual(result, 8) 24 | 25 | def test_deep(self): 26 | scanner = Scanner('use "test.dl"; flatfibo 500;') 27 | parser = Parser(scanner.scan_tokens()) 28 | env = interpreter.get_default_env() 29 | for statement in parser.program(): 30 | result, env = interpreter.interpret(statement, env) 31 | 32 | self.assertIsNotNone(result) 33 | 34 | def test_multideep(self): 35 | scanner = Scanner('use "test.dl"; tracefibo 1 1 5;') 36 | parser = Parser(scanner.scan_tokens()) 37 | env = interpreter.get_default_env() 38 | for statement in parser.program(): 39 | result, env = interpreter.interpret(statement, env) 40 | self.assertEqual(result, 8) 41 | -------------------------------------------------------------------------------- /tests/test_parser.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from modl.parser import Parser 3 | from modl.scanner import Scanner 4 | from modl import expr 5 | 6 | 7 | class PrimaryTests(unittest.TestCase): 8 | literals = {"5;": 5, '"five";': "five", "5.0;": 5.0} 9 | 10 | def test_literals(self): 11 | for string, literal in self.literals.items(): 12 | with self.subTest(i=string): 13 | scanner = Scanner(string) 14 | parser = Parser(scanner.scan_tokens()) 15 | statement = parser.statement() 16 | self.assertIsInstance(statement, expr.Literal) 17 | self.assertEqual(statement.value, literal) 18 | 19 | def test_builtin(self): 20 | scanner = Scanner("{#print};") 21 | parser = Parser(scanner.scan_tokens()) 22 | statement = parser.statement() 23 | self.assertIsInstance(statement, expr.Builtin) 24 | self.assertEqual(statement.name, "print") 25 | 26 | def test_identifier(self): 27 | scanner = Scanner("ident;") 28 | parser = Parser(scanner.scan_tokens()) 29 | statement = parser.statement() 30 | self.assertIsInstance(statement, expr.Identifier) 31 | self.assertEqual(statement.name, "ident") 32 | 33 | def test_symbols_cant_be_used_alone(self): 34 | scanner = Scanner("+;") 35 | parser = Parser(scanner.scan_tokens()) 36 | with self.assertRaises(Exception): 37 | statement = parser.statement() 38 | 39 | def test_symbols_can_be_wrapped_in_parentheses(self): 40 | scanner = Scanner("(+);") 41 | parser = Parser(scanner.scan_tokens()) 42 | statement = parser.statement() 43 | self.assertIsInstance(statement, expr.Identifier) 44 | self.assertEqual(statement.name, "+") 45 | 46 | def test_symbols_can_be_part_of_an_expression(self): 47 | scanner = Scanner("1 + 2;") 48 | parser = Parser(scanner.scan_tokens()) 49 | statement = parser.statement() 50 | self.assertIsInstance(statement, expr.Symchain) 51 | self.assertIsInstance(statement.op, expr.Identifier) 52 | self.assertEqual(statement.op.name, "+") 53 | self.assertIsInstance(statement.left, expr.Literal) 54 | self.assertEqual(statement.left.value, 1) 55 | self.assertIsInstance(statement.right, expr.Literal) 56 | self.assertEqual(statement.right.value, 2) 57 | 58 | def test_parentheses_are_invisible(self): 59 | scanner = Scanner("(ident);") 60 | parser = Parser(scanner.scan_tokens()) 61 | statement = parser.statement() 62 | self.assertIsInstance(statement, expr.Identifier) 63 | self.assertEqual(statement.name, "ident") 64 | 65 | def test_lambda(self): 66 | scanner = Scanner("{ x | x; };") 67 | parser = Parser(scanner.scan_tokens()) 68 | statement = parser.statement() 69 | self.assertIsInstance(statement, expr.Function) 70 | self.assertEqual(len(statement.args), 1) 71 | self.assertEqual(len(statement.body), 1) 72 | 73 | def test_lambda_longer(self): 74 | scanner = Scanner("{ x y | x y; y x; x y;};") 75 | parser = Parser(scanner.scan_tokens()) 76 | statement = parser.statement() 77 | self.assertIsInstance(statement, expr.Function) 78 | self.assertEqual(len(statement.args), 2) 79 | self.assertEqual(len(statement.body), 3) 80 | 81 | def test_lambda_cant_have_empty_parameter_list(self): 82 | scanner = Scanner("{ | 1; };") 83 | parser = Parser(scanner.scan_tokens()) 84 | with self.assertRaises(Exception): 85 | statement = parser.statement() 86 | 87 | def test_lambda_cant_have_empty_body(self): 88 | scanner = Scanner("{ x | };") 89 | parser = Parser(scanner.scan_tokens()) 90 | with self.assertRaises(Exception): 91 | statement = parser.statement() 92 | 93 | def test_cond(self): 94 | scanner = Scanner("cond | 1 -> 1; ;") 95 | parser = Parser(scanner.scan_tokens()) 96 | statement = parser.statement() 97 | self.assertIsInstance(statement, expr.Conditional) 98 | 99 | def test_cond_with_many_actions(self): 100 | scanner = Scanner("cond | x -> f x; 3; ;") 101 | parser = Parser(scanner.scan_tokens()) 102 | statement = parser.statement() 103 | self.assertIsInstance(statement, expr.Conditional) 104 | 105 | def test_cond_cant_be_empty(self): 106 | scanner = Scanner("cond ;") 107 | parser = Parser(scanner.scan_tokens()) 108 | with self.assertRaises(Exception): 109 | statement = parser.statement() 110 | 111 | def test_cond_condition_cant_be_empty(self): 112 | scanner = Scanner("cond | -> x; ;") 113 | parser = Parser(scanner.scan_tokens()) 114 | with self.assertRaises(Exception): 115 | statement = parser.statement() 116 | 117 | def test_cond_action_cant_be_empty(self): 118 | scanner = Scanner("cond | x -> ;") 119 | parser = Parser(scanner.scan_tokens()) 120 | with self.assertRaises(Exception): 121 | statement = parser.statement() 122 | 123 | def test_bangs_are_special(self): 124 | scanner = Scanner("!;") 125 | parser = Parser(scanner.scan_tokens()) 126 | with self.assertRaises(Exception): 127 | statement = parser.statement() 128 | print(statement) 129 | 130 | def test_things_can_have_types(self): 131 | scanner = Scanner("ident : Integer;") 132 | parser = Parser(scanner.scan_tokens()) 133 | statement = parser.statement() 134 | self.assertEqual(len(statement.types), 1) 135 | self.assertEqual(statement.types[0], "Integer") 136 | 137 | def test_lambdas_can_have_types(self): 138 | scanner = Scanner("{ x | x; } : Integer -> Integer;") 139 | parser = Parser(scanner.scan_tokens()) 140 | statement = parser.statement() 141 | self.assertEqual(len(statement.types), 2) 142 | self.assertEqual(statement.types[0], "Integer") 143 | self.assertEqual(statement.types[1], "Integer") 144 | 145 | def things_can_have_types_in_the_middle_of_other_things(self): 146 | scanner = Scanner("ident : Integer + 5;") 147 | parser = Parser(scanner.scan_tokens()) 148 | statement = parser.statement() 149 | self.assertIsInstance(statement, expr.Expression) 150 | self.assertEqual(statement[0].name, "ident") 151 | self.assertEqual(len(statement[0].types), 1) 152 | self.assertEqual(statement[0].types[0], "Integer") 153 | 154 | 155 | class FuncallTests(unittest.TestCase): 156 | def test_simple_call(self): 157 | scanner = Scanner("call par1 2;") 158 | parser = Parser(scanner.scan_tokens()) 159 | statement = parser.statement() 160 | 161 | self.assertIsInstance(statement, expr.Expression) 162 | self.assertEqual(len(statement.call), 3) 163 | 164 | self.assertIsInstance(statement.call[0], expr.Identifier) 165 | self.assertEqual(statement.call[0].name, "call") 166 | 167 | self.assertIsInstance(statement.call[1], expr.Identifier) 168 | self.assertEqual(statement.call[1].name, "par1") 169 | 170 | self.assertIsInstance(statement.call[2], expr.Literal) 171 | self.assertEqual(statement.call[2].value, 2) 172 | 173 | def test_bang_words_are_magic(self): 174 | scanner = Scanner("print!;") 175 | parser = Parser(scanner.scan_tokens()) 176 | statement = parser.statement() 177 | 178 | self.assertIsInstance(statement, expr.Expression) 179 | self.assertEqual(len(statement.call), 2) 180 | 181 | self.assertIsInstance(statement.call[0], expr.Identifier) 182 | self.assertEqual(statement.call[0].name, "print!") 183 | 184 | self.assertIsInstance(statement.call[1], expr.Identifier) 185 | self.assertEqual(statement.call[1].name, "!") 186 | 187 | def test_complete_your_function(self): 188 | scanner = Scanner("{ x | x; ") 189 | parser = Parser(scanner.scan_tokens()) 190 | with self.assertRaises(Exception): 191 | statement = parser.statement() 192 | 193 | def test_simplest_program(self): 194 | scanner = Scanner("1;") 195 | parser = Parser(scanner.scan_tokens()) 196 | program = parser.program() 197 | self.assertEqual(len(program), 1) 198 | 199 | statement = program[0] 200 | self.assertIsInstance(statement, expr.Literal) 201 | self.assertEqual(statement.value, 1) 202 | 203 | 204 | class StatementTests(unittest.TestCase): 205 | def test_use(self): 206 | scanner = Scanner('use "potato";') 207 | parser = Parser(scanner.scan_tokens()) 208 | statement = parser.statement() 209 | self.assertIsInstance(statement, expr.Use) 210 | self.assertEqual(statement.filename, "potato") 211 | 212 | def test_single(self): 213 | scanner = Scanner("let a <- 1;") 214 | parser = Parser(scanner.scan_tokens()) 215 | statement = parser.statement() 216 | self.assertIsInstance(statement, expr.Let) 217 | self.assertEqual(len(statement.assignments), 1) 218 | name, value = statement.assignments[0] 219 | self.assertIsInstance(name, expr.Identifier) 220 | self.assertIsInstance(value, expr.Literal) 221 | 222 | def test_multiple(self): 223 | scanner = Scanner("let a <- 1, + <- { x y | x y; };") 224 | parser = Parser(scanner.scan_tokens()) 225 | statement = parser.statement() 226 | self.assertIsInstance(statement, expr.Let) 227 | self.assertEqual(len(statement.assignments), 2) 228 | name, value = statement.assignments[0] 229 | self.assertIsInstance(name, expr.Identifier) 230 | self.assertIsInstance(value, expr.Literal) 231 | name, value = statement.assignments[1] 232 | self.assertIsInstance(name, expr.Identifier) 233 | self.assertIsInstance(value, expr.Function) 234 | 235 | def test_cant_assign_to_a_function(self): 236 | scanner = Scanner("let { x | x; }; <- 1;") 237 | parser = Parser(scanner.scan_tokens()) 238 | with self.assertRaises(Exception): 239 | statement = parser.statement() 240 | 241 | 242 | class GreenspunTest(unittest.TestCase): 243 | program = """ 244 | use "std.dl"; 245 | let x <- 1 : Integer, f <- { a | {#add} x + a; }; 246 | f (+) x; 247 | cond 248 | | a -> f x 1; 249 | | b -> print! "B"; 250 | (+); 251 | ; 252 | """ 253 | output = """USE 'std.dl' 254 | (DEFINE x 1 255 | f (LAMBDA (a) (+ (#add x) a))) 256 | (f + x) 257 | (COND (a (f x 1)) (b(progn 258 | (print! ! 'B') 259 | +)) )""" 260 | 261 | def test_hey_this_looks_like_lisp_lol(self): 262 | scanner = Scanner(self.program) 263 | parser = Parser(scanner.scan_tokens()) 264 | program = parser.program() 265 | self.assertEqual("\n".join(repr(st) for st in program), self.output) 266 | -------------------------------------------------------------------------------- /tests/test_scanner.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import unittest 4 | from modl.scanner import Scanner 5 | from modl.tokens import TokenType 6 | 7 | 8 | class TestScannerTokens(unittest.TestCase): 9 | valid_integers = { 10 | "0": 0, 11 | "1": 1, 12 | "1234567890": 1234567890, 13 | "12345678901234567890": 12345678901234567890, 14 | "-1": -1, 15 | "-0": 0, # Pointless, but valid 16 | "000123": 123, 17 | "-000123": -123, 18 | } 19 | 20 | valid_floats = {"0.0": 0.0, "0.1": 0.1, "1.0": 1.0, ".5": 0.5, "-.5": -0.5} 21 | 22 | valid_strings = { 23 | r'""': "", 24 | r'"a"': "a", 25 | r'"this is a long string"': "this is a long string", 26 | '"this string\nis multiline"': "this string\nis multiline", 27 | r'"this string\nis also multiline"': "this string\nis also multiline", 28 | r'"A whole bunch of escape sequences \r\n\t\\\"\x0123"': 'A whole bunch of escape sequences \r\n\t\\"\u0123', 29 | } 30 | 31 | reserved = { 32 | "use": TokenType.USE, 33 | "let": TokenType.LET, 34 | "cond": TokenType.COND, 35 | "<-": TokenType.LEFT_ARROW, 36 | "->": TokenType.RIGHT_ARROW, 37 | "|": TokenType.PIPE, 38 | ":": TokenType.COLON, 39 | "!": TokenType.BANG, 40 | ";": TokenType.SEMICOLON, 41 | "{": TokenType.OPEN_BRACKETS, 42 | "}": TokenType.CLOSE_BRACKETS, 43 | "(": TokenType.OPEN_PARENTHESES, 44 | ")": TokenType.CLOSE_PARENTHESES, 45 | ",": TokenType.COMMA, 46 | } 47 | 48 | valid_symbols = [ 49 | "-->", 50 | "<--", 51 | "?", 52 | "!!", 53 | "::", 54 | "||", 55 | "&&", 56 | "->>", 57 | "=*>>", 58 | "-", 59 | "-.", 60 | ".", 61 | "..", 62 | "-..", 63 | "--", 64 | "/", 65 | "//", 66 | "' ", 67 | ] 68 | valid_identifiers = ["foo", "bar_baz", "quux'", "read!", "add4", "plus5!"] 69 | 70 | # Sequences containing unique characters that may not be part of a symbol 71 | invalid_symbols = ["-{", "/a", "-)", "+,+"] 72 | 73 | def test_valid_integers(self): 74 | for string, literal in self.valid_integers.items(): 75 | with self.subTest(i=string): 76 | scanner = Scanner(string) 77 | tokens = scanner.scan_tokens() 78 | self.assertEqual(len(tokens), 2) # Parsed symbol, plus EOF 79 | token = tokens[0] 80 | self.assertIs(token.token_type, TokenType.B10_INTEGER) 81 | self.assertEqual(token.literal, literal) 82 | 83 | def test_valid_floats(self): 84 | for string, literal in self.valid_floats.items(): 85 | with self.subTest(i=string): 86 | scanner = Scanner(string) 87 | tokens = scanner.scan_tokens() 88 | self.assertEqual(len(tokens), 2) # Parsed symbol, plus EOF 89 | token = tokens[0] 90 | self.assertIs(token.token_type, TokenType.B10_FLOAT) 91 | self.assertEqual(token.literal, literal) 92 | 93 | def test_valid_strings(self): 94 | for string, literal in self.valid_strings.items(): 95 | with self.subTest(i=string): 96 | scanner = Scanner(string) 97 | tokens = scanner.scan_tokens() 98 | self.assertEqual(len(tokens), 2) # Parsed symbol, plus EOF 99 | token = tokens[0] 100 | self.assertIs(token.token_type, TokenType.STRING) 101 | self.assertEqual(token.literal, literal) 102 | 103 | def test_reserved_sequences(self): 104 | for string, token_type in self.reserved.items(): 105 | with self.subTest(i=string): 106 | scanner = Scanner(string) 107 | tokens = scanner.scan_tokens() 108 | self.assertEqual(len(tokens), 2) # Parsed symbol, plus EOF 109 | token = tokens[0] 110 | self.assertIs(token.token_type, token_type) 111 | 112 | def test_valid_symbol(self): 113 | for string in self.valid_symbols: 114 | with self.subTest(i=string): 115 | scanner = Scanner(string) 116 | tokens = scanner.scan_tokens() 117 | self.assertEqual(len(tokens), 2) # Parsed symbol, plus EOF 118 | token = tokens[0] 119 | self.assertIs(token.token_type, TokenType.SYMBOLIC) 120 | 121 | def test_invalid_symbol(self): 122 | for string in self.invalid_symbols: 123 | with self.subTest(i=string): 124 | scanner = Scanner(string) 125 | tokens = scanner.scan_tokens() 126 | # Whatever this matched is OK as long as it's not a single symbol 127 | self.assertNotEqual(len(tokens), 2) 128 | 129 | def test_valid_identifier(self): 130 | for string in self.valid_identifiers: 131 | with self.subTest(i=string): 132 | scanner = Scanner(string) 133 | tokens = scanner.scan_tokens() 134 | self.assertEqual(len(tokens), 2) # Parsed symbol, plus EOF 135 | token = tokens[0] 136 | self.assertIs(token.token_type, TokenType.IDENTIFIER) 137 | 138 | def test_single_line(self): 139 | scanner = Scanner("foobar") 140 | scanner.scan_tokens() # Discard 141 | self.assertEqual(scanner.line, 1) 142 | 143 | def test_two_lines(self): 144 | scanner = Scanner("foo\nbar") 145 | scanner.scan_tokens() # Discard 146 | self.assertEqual(scanner.line, 2) 147 | 148 | def test_multiple_lines(self): 149 | scanner = Scanner( 150 | r"""this is a "series of 151 | tokens" 152 | /* including multiline comments 153 | spread 154 | among several */ 155 | lines, also "includes an \n escaped linebreak character" that should be ignored""" 156 | ) 157 | scanner.scan_tokens() # Discard 158 | self.assertEqual(scanner.line, 6) 159 | 160 | def test_builtin(self): 161 | scanner = Scanner("{#builtin_name}") 162 | tokens = scanner.scan_tokens() 163 | self.assertEqual(len(tokens), 2) # Parsed symbol, plus EOF 164 | token = tokens[0] 165 | self.assertIs(token.token_type, TokenType.BUILTIN) 166 | self.assertEqual(token.literal, "builtin_name") 167 | 168 | def test_typename(self): 169 | scanner = Scanner("Type") 170 | tokens = scanner.scan_tokens() 171 | self.assertEqual(len(tokens), 2) # Parsed symbol, plus EOF 172 | token = tokens[0] 173 | self.assertIs(token.token_type, TokenType.TYPENAME) 174 | self.assertEqual(token.lexeme, "Type") 175 | 176 | 177 | class InvalidTokensTest(unittest.TestCase): 178 | def test_unclosed_comment(self): 179 | scanner = Scanner("/* unclosed comment") 180 | with self.assertRaises(Exception): 181 | scanner.scan_tokens() 182 | 183 | def test_unopened_comment(self): 184 | scanner = Scanner("forgot to remove uncomment */") 185 | with self.assertRaises(Exception): 186 | scanner.scan_tokens() 187 | 188 | def test_unclosed_string(self): 189 | scanner = Scanner('"a string with no close') 190 | with self.assertRaises(Exception): 191 | scanner.scan_tokens() 192 | 193 | def test_incomplete_string_escape(self): 194 | scanner = Scanner('"unclosed escape\\') 195 | with self.assertRaises(Exception): 196 | scanner.scan_tokens() 197 | 198 | def test_invalid_escape_sequence(self): 199 | scanner = Scanner(r'"invalid escape \q code"') 200 | with self.assertRaises(Exception): 201 | scanner.scan_tokens() 202 | 203 | def test_invalid_unicode_escape_sequence(self): 204 | scanner = Scanner(r'"invalid escape \x15') 205 | with self.assertRaises(Exception): 206 | scanner.scan_tokens() 207 | 208 | def test_unclosed_builtin(self): 209 | scanner = Scanner(r"{#broken_builtin") 210 | with self.assertRaises(Exception): 211 | scanner.scan_tokens() 212 | 213 | def test_bang_after_keyword(self): 214 | scanner = Scanner(r"let!") 215 | with self.assertRaises(Exception): 216 | scanner.scan_tokens() 217 | 218 | 219 | class ExprTest(unittest.TestCase): 220 | def test_literal_expression(self): 221 | scanner = Scanner("{#builtin_name}") 222 | tokens = scanner.scan_tokens() 223 | self.assertEqual(repr(tokens[0]), "TokenType.BUILTIN '{#builtin_name}'") 224 | 225 | def test_other_expression(self): 226 | scanner = Scanner("+") 227 | tokens = scanner.scan_tokens() 228 | self.assertEqual(repr(tokens[0]), "TokenType.SYMBOLIC '+' None") 229 | --------------------------------------------------------------------------------