├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── bin
    └── modl_cli.py
├── modl.EBNF
├── modl
    ├── __init__.py
    ├── expr.py
    ├── interpreter.py
    ├── parser.py
    ├── scanner.py
    └── tokens.py
├── std.dl
├── test.dl
└── tests
    ├── __init__.py
    ├── test_interpreter.py
    ├── test_parser.py
    └── test_scanner.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .nox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | .pytest_cache/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | db.sqlite3
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # IPython
 77 | profile_default/
 78 | ipython_config.py
 79 | 
 80 | # pyenv
 81 | .python-version
 82 | 
 83 | # celery beat schedule file
 84 | celerybeat-schedule
 85 | 
 86 | # SageMath parsed files
 87 | *.sage.py
 88 | 
 89 | # Environments
 90 | .env
 91 | .venv
 92 | env/
 93 | venv/
 94 | ENV/
 95 | env.bak/
 96 | venv.bak/
 97 | 
 98 | # Spyder project settings
 99 | .spyderproject
100 | .spyproject
101 | 
102 | # Rope project settings
103 | .ropeproject
104 | 
105 | # mkdocs documentation
106 | /site
107 | 
108 | # mypy
109 | .mypy_cache/
110 | .dmypy.json
111 | dmypy.json
112 | 
113 | # Pyre type checker
114 | .pyre/
115 | *.swp
116 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Tordek
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | MODL
 2 | ====
 3 | 
 4 | My Own Dumb Language
 5 | 
 6 | Built by following @munificent's [great guide](http://www.craftinginterpreters.com/).
 7 | 
 8 | Run the interpreter, call "use test;" then "fibo 5;" to see the extremely exciting value of the fifth element in the fibonacci sequence!
 9 | 
10 | Syntax
11 | ------
12 | 
13 | The language has very little syntax because reasons:
14 | 
15 |     use "filename";
16 | 
17 | just runs everything as if it were run right there. TODO: `use "filename" as namespace` to organize stuff.
18 | 
19 | To define variables:
20 | 
21 |     let name <- value;
22 | 
23 | But you can define several at once:
24 | 
25 |     let name1 <- value1,
26 |         name2 <- value2;
27 | 
28 | See below for details.
29 | 
30 | Expressions are separated by `;`, and the last one is the value returned by the whole exprression (i.e., function).
31 | 
32 | Function definition is done with:
33 | 
34 |     { arguments |
35 |       expression1;
36 |       expression2;
37 |     }
38 | 
39 | When run, both are executed in order (ideally, you're a good functional boy and that's useless; however, `!` functions exist).
40 | 
41 | `!` functions are kinda special: In order to allow for zero-argument functions like `read!` (for reading input from the user), whenever the first call in an expression is a `!` function, an implicit `!` is passed, but `!` isn't valid explicitly anywhere.
42 | 
43 | i.e.,
44 | 
45 |     read!;
46 | 
47 | is actually parsed as
48 | 
49 |     read! !;
50 | 
51 | but
52 | 
53 |     map read! list;
54 | 
55 | won't work because read! won't have its `!`. You need a `map!` defined with an `f!` argument.
56 | 
57 | Scoping
58 | -------
59 | 
60 | Scope is where the let is.
61 | 
62 | Every assignment begins a new scope; values can't see assignments that happen later. However, within a single let, the scope is shared. This is useful for defining mutually-recursive functions, like the extremely useful `odd` and `even` pair in the `test.dl` file. Be careful, however: things are still evaluated in order, so this:
63 | 
64 |     let a <- add1 5,
65 |         add1 <- { x | x + 1; };
66 | 
67 | won't work because `a` tries to evaluate `add1` before it's been defined. You need to do it the other way around:
68 | 
69 |      let add1 <- { x | x + 1; },
70 |          a <- add1 5;
71 | 
72 | but you should probably have done it in two `let`s anyway.
73 | 
74 | TODO:
75 | -----
76 | 
77 | - Make the interpreter able to work on partial (and multiple!) input
78 | - Make the scanner and parser not take the whole input in the constructor (ew, state)
79 | - Make `!` not be able to cross function boundaries, so it must be thread through `!` functions.
80 | 


--------------------------------------------------------------------------------
/bin/modl_cli.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | sys.path.insert(0, os.path.abspath("."))
 5 | 
 6 | import codecs
 7 | import traceback
 8 | 
 9 | from modl.parser import Parser
10 | from modl.scanner import Scanner
11 | from modl import interpreter
12 | 
13 | 
14 | def main(args):
15 |     if len(args) > 2:
16 |         print("Usage: {} [script]".format(sys.args[0]), file=sys.stderr)
17 |         exit(-1)
18 |     elif len(args) == 2:
19 |         exit(run_file(args[1]))
20 |     else:
21 |         run_prompt()
22 | 
23 | 
24 | def run_file(path):
25 |     with codecs.open(path, encoding="utf8") as script:
26 |         env = interpreter.get_default_env()
27 |         scanner = Scanner(command)
28 |         parser = Parser(scanner.scan_tokens())
29 | 
30 |         for statement in parser.program():
31 |             (result, env) = interpreter.interpret(statement, env)
32 | 
33 |         return result
34 | 
35 | 
36 | def run_prompt():
37 |     env = interpreter.get_default_env()
38 |     while True:
39 |         try:
40 |             command = input("> ")
41 |             scanner = Scanner(command)
42 |             parser = Parser(scanner.scan_tokens())
43 |             (result, env) = interpreter.interpret(parser.statement(), env)
44 |             print(result)
45 |             hadError = False
46 |         except Exception as e:
47 |             print(traceback.format_exc())
48 | 
49 | 
50 | def error(line, message):
51 |     report(line, "", message)
52 | 
53 | 
54 | def report(line, where, message):
55 |     global hadError
56 |     print("[line {}] Error{}: {}".format(line, where, message), file=sys.stderr)
57 |     hadError = True
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     main(sys.argv)
62 | 


--------------------------------------------------------------------------------
/modl.EBNF:
--------------------------------------------------------------------------------
 1 | program = statement* ;
 2 | statement = stmt SEMICOLON ;
 3 | stmt = use
 4 |      | let
 5 |      | symchain
 6 |      ;
 7 | 
 8 | use = USE STRING ;
 9 | let = LET let_body (COMMA let_body)* ;
10 | let_body = ( IDENTIFIER | SYMBOLIC ) LEFT_ARROW symchain ;
11 | 
12 | symchain = expression (SYMBOLIC symchain)* (COLON type_signature)? ;
13 | 
14 | expression = primary (primary)* ;
15 | 
16 | type_signature = typename (RIGHT_ARROW type_signature)* ;
17 | 
18 | primary = function
19 |         | cond
20 |         | STRING
21 |         | NUMBER
22 |         | IDENTIFIER
23 |         | BUILTIN
24 |         | OPEN_PARENTHESES SYMBOLIC CLOSE_PARENTHESES
25 |         | OPEN_PARENTHESES symchain CLOSE_PARENTHESES
26 |         ;
27 | 
28 | cond = COND (PIPE symchain LEFT_ARROW statement+)+ ;
29 | 
30 | function = OPEN_BRACE (BANG | identifier) identifier* PIPE statement+ CLOSE_BRACE ;
31 | 


--------------------------------------------------------------------------------
/modl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tordek/modl/1cd862479eff44541919c0a4a0cd274963f12d58/modl/__init__.py


--------------------------------------------------------------------------------
/modl/expr.py:
--------------------------------------------------------------------------------
  1 | class TypedExpression:
  2 |     types = []
  3 | 
  4 | 
  5 | class Use:
  6 |     def __init__(self, filename):
  7 |         self.filename = filename
  8 | 
  9 |     def __repr__(self):
 10 |         return "USE " + repr(self.filename)
 11 | 
 12 | 
 13 | class Let:
 14 |     def __init__(self, assignments):
 15 |         self.assignments = assignments
 16 | 
 17 |     def __repr__(self):
 18 |         return (
 19 |             "(DEFINE "
 20 |             + "\n        ".join(
 21 |                 repr(name) + " " + repr(value) for (name, value) in self.assignments
 22 |             )
 23 |             + ")"
 24 |         )
 25 | 
 26 | 
 27 | class Symchain(TypedExpression):
 28 |     def __init__(self, left, op, right):
 29 |         self.left = left
 30 |         self.op = op
 31 |         self.right = right
 32 | 
 33 |     def __repr__(self):
 34 |         return (
 35 |             "(" + repr(self.op) + " " + repr(self.left) + " " + repr(self.right) + ")"
 36 |         )
 37 | 
 38 | 
 39 | class Identifier(TypedExpression):
 40 |     def __init__(self, name):
 41 |         self.name = name
 42 | 
 43 |     def __repr__(self):
 44 |         return self.name
 45 | 
 46 | 
 47 | class Expression(TypedExpression):
 48 |     def __init__(self, call):
 49 |         self.call = call
 50 | 
 51 |     def __repr__(self):
 52 |         return "(" + " ".join(repr(v) for v in self.call) + ")"
 53 | 
 54 | 
 55 | class Function(TypedExpression):
 56 |     def __init__(self, args, body):
 57 |         self.args = args
 58 |         self.body = body
 59 | 
 60 |     def __repr__(self):
 61 |         return (
 62 |             "(LAMBDA ("
 63 |             + " ".join(repr(arg) for arg in self.args)
 64 |             + ") "
 65 |             + " ".join(repr(st) for st in self.body)
 66 |             + ")"
 67 |         )
 68 | 
 69 | 
 70 | class Literal(TypedExpression):
 71 |     def __init__(self, value):
 72 |         self.value = value
 73 | 
 74 |     def __repr__(self):
 75 |         return repr(self.value)
 76 | 
 77 | 
 78 | class Builtin(TypedExpression):
 79 |     def __init__(self, name):
 80 |         self.name = name
 81 | 
 82 |     def __repr__(self):
 83 |         return "#" + self.name
 84 | 
 85 | 
 86 | class Conditional(TypedExpression):
 87 |     def __init__(self, cases):
 88 |         self.cases = cases
 89 | 
 90 |     def __repr__(self):
 91 |         result = "(COND "
 92 |         for cond, body in self.cases:
 93 |             result += "("
 94 |             result += repr(cond)
 95 |             if len(body) == 1:
 96 |                 result += " "
 97 |                 result += repr(body[0])
 98 |             else:
 99 |                 result += "(progn"
100 |                 for st in body:
101 |                     result += "\n"
102 |                     result += repr(st)
103 |                 result += ")"
104 |             result += ") "
105 |         result += ")"
106 |         return result
107 | 


--------------------------------------------------------------------------------
/modl/interpreter.py:
--------------------------------------------------------------------------------
  1 | from collections import ChainMap
  2 | from . import expr as expr
  3 | from .parser import Parser
  4 | from .scanner import Scanner
  5 | 
  6 | BUILTIN = {
  7 |     "print": lambda x: print(x),
  8 |     "add": lambda x, y: x + y,
  9 |     "sub": lambda x, y: x - y,
 10 |     "mul": lambda x, y: x * y,
 11 |     "fdiv": lambda x, y: x / y,
 12 |     "mod": lambda x, y: x % y,
 13 |     "read": lambda _: input(),
 14 |     "eq": lambda x, y: x == y,
 15 |     "gt": lambda x, y: x > y,
 16 |     "if": lambda c, t, f: t if c else f,
 17 |     "cons": lambda h, t: (h, t),
 18 |     "head": lambda l: l[0],
 19 |     "tail": lambda l: l[1],
 20 |     "true": True,
 21 |     "false": False,
 22 |     "empty": (),
 23 | }
 24 | 
 25 | 
 26 | class Function:
 27 |     def __init__(self, function, environment):
 28 |         self.function = function
 29 |         self.environment = environment
 30 | 
 31 | 
 32 | class TailCall:
 33 |     def __init__(self, f, params):
 34 |         self.f = f
 35 |         self.params = params
 36 | 
 37 | 
 38 | def get_default_env():
 39 |     env = ChainMap()
 40 |     env["!"] = "!"
 41 |     return env
 42 | 
 43 | 
 44 | def interpret(statement, environment, is_tail_call=False):
 45 |     if isinstance(statement, expr.Literal):
 46 |         return (statement.value, environment)
 47 |     elif isinstance(statement, expr.Identifier):
 48 |         return (environment[statement.name], environment)
 49 |     elif isinstance(statement, expr.Function):
 50 |         return (Function(statement, environment), environment)
 51 |     elif isinstance(statement, expr.Builtin):
 52 |         return (BUILTIN[statement.name], environment)
 53 |     elif isinstance(statement, expr.Use):
 54 |         with open(statement.filename) as file:
 55 |             contents = file.read()
 56 |             scanner = Scanner(contents)
 57 |             parser = Parser(scanner.scan_tokens())
 58 |             for statement in parser.program():
 59 |                 result, environment = interpret(statement, environment)
 60 |             return (result, environment)
 61 |     elif isinstance(statement, expr.Let):
 62 |         environment = environment.new_child()
 63 |         for (name, value) in statement.assignments:
 64 |             (evaluated_value, _) = interpret(value, environment)
 65 |             environment[name.name] = evaluated_value
 66 |         return (None, environment)
 67 |     elif isinstance(statement, expr.Expression):
 68 |         results = []
 69 |         for s in statement.call:
 70 |             result, _ = interpret(s, environment)
 71 |             results.append(result)
 72 |         if is_tail_call:
 73 |             return TailCall(results[0], results[1:])
 74 |         else:
 75 |             return (do_call(*results), environment)
 76 |     elif isinstance(statement, expr.Symchain):
 77 |         (left, _) = interpret(statement.left, environment)
 78 |         (op, _) = interpret(statement.op, environment)
 79 |         (right, _) = interpret(statement.right, environment)
 80 |         if is_tail_call:
 81 |             return TailCall(op, [left, right])
 82 |         else:
 83 |             return (do_call(op, left, right), environment)
 84 |     elif isinstance(statement, expr.Conditional):
 85 |         # Discard the environment from the evaluation
 86 |         # to disallow `let`s done inside a case
 87 |         for condition, body in statement.cases:
 88 |             value, _ = interpret(condition, environment)
 89 |             if value is True:
 90 |                 env = environment
 91 |                 for st in body[:-1]:
 92 |                     result, env = interpret(st, env)
 93 | 
 94 |                 if is_tail_call:
 95 |                     return interpret(body[-1], env, True)
 96 |                 else:
 97 |                     result, _ = interpret(nody[-1], env)
 98 |                     return (result, environment)
 99 |             elif value is False:
100 |                 continue
101 |             else:
102 |                 raise Exception("Type mismatch, condition must be boolean", value)
103 |         return (None, environment)  # All conditions were false
104 |     else:
105 |         raise Exception("Trying to run unknown thing", statement)
106 | 
107 | 
108 | def do_call(f, *params):
109 |     while True:
110 |         # Currying, but optimized if there are multiple parameters
111 |         if isinstance(f, Function):
112 |             f_env = f.environment.new_child()
113 |             args = f.function.args
114 |             for name, value in zip(args, params):
115 |                 f_env[name.name] = value
116 |             if len(args) > len(params):
117 |                 return Function(
118 |                     expr.Function(args[len(params) :], f.function.body), f_env
119 |                 )
120 |             elif len(args) == len(params):
121 |                 v = None
122 |                 for statement in f.function.body[:-1]:
123 |                     (v, f_env) = interpret(statement, f_env)
124 |                 result = interpret(f.function.body[-1], f_env, True)
125 |                 if isinstance(result, TailCall):
126 |                     f = result.f
127 |                     params = result.params
128 |                     continue
129 |                 else:
130 |                     return result[0]
131 |             else:
132 |                 raise Exception(f, "Function received too many parameters")
133 |         elif callable(f):
134 |             return f(*params)
135 |         else:
136 |             raise Exception(f, "Tried to call a non-function object")
137 | 


--------------------------------------------------------------------------------
/modl/parser.py:
--------------------------------------------------------------------------------
  1 | from . import expr
  2 | from .tokens import Token, TokenType
  3 | 
  4 | 
  5 | class Parser:
  6 |     def __init__(self, tokens):
  7 |         self.tokens = tokens
  8 |         self.current = 0
  9 | 
 10 |     def program(self):
 11 |         statements = []
 12 |         while not self.is_at_end():
 13 |             statements.append(self.statement())
 14 |         return statements
 15 | 
 16 |     def statement(self):
 17 |         stmt = None
 18 |         if self.match(TokenType.USE):
 19 |             string = self.consume("Need a filename to import", TokenType.STRING)
 20 |             stmt = expr.Use(string.literal)
 21 |         elif self.match(TokenType.LET):
 22 |             stmt = self.let()
 23 |         else:
 24 |             stmt = self.symchain()
 25 |         self.consume("Expect ';' at end of statement.", TokenType.SEMICOLON)
 26 |         return stmt
 27 | 
 28 |     def let(self):
 29 |         assignments = []
 30 |         while True:
 31 |             identifier = None
 32 |             if self.match(TokenType.IDENTIFIER, TokenType.SYMBOLIC):
 33 |                 # We only care about the name
 34 |                 identifier = expr.Identifier(self.previous().lexeme)
 35 |             else:
 36 |                 raise Exception(self.peek(), "Can't assign to this")
 37 | 
 38 |             self.consume("Missing <-", TokenType.LEFT_ARROW)
 39 |             value = self.symchain()
 40 |             assignments.append((identifier, value))
 41 |             if not self.match(TokenType.COMMA):
 42 |                 break
 43 |         return expr.Let(assignments)
 44 | 
 45 |     def symchain(self):
 46 |         e = self.expression()
 47 | 
 48 |         while self.match(TokenType.SYMBOLIC):
 49 |             # We only care about the name
 50 |             symbol = expr.Identifier(self.previous().lexeme)
 51 |             right = self.symchain()
 52 |             e = expr.Symchain(e, symbol, right)
 53 | 
 54 |         types = []
 55 |         if self.match(TokenType.COLON):
 56 |             t = self.consume("Expected a type after :", TokenType.TYPENAME)
 57 |             types.append(t.lexeme)
 58 | 
 59 |             while self.match(TokenType.RIGHT_ARROW):
 60 |                 t = self.consume("Expected a type after ->", TokenType.TYPENAME)
 61 |                 types.append(t.lexeme)
 62 |         e.types = types
 63 |         return e
 64 | 
 65 |     def expression(self):
 66 |         chain = []
 67 | 
 68 |         while True:
 69 |             e = self.try_primary()
 70 |             if e is None:
 71 |                 break
 72 |             chain.append(e)
 73 | 
 74 |         head = chain[0]
 75 |         # Special case for bang functions: Add an implicit ! if they're
 76 |         # the first in the call
 77 |         if isinstance(head, expr.Identifier) and head.name[-1] == "!":
 78 |             chain = [chain[0], expr.Identifier("!")] + chain[1:]
 79 | 
 80 |         if len(chain) == 1:
 81 |             return chain[0]
 82 |         else:
 83 |             return expr.Expression(chain)
 84 | 
 85 |     def try_primary(self):
 86 |         if self.match(TokenType.OPEN_BRACKETS):
 87 |             argument_list = []
 88 |             head_arg = self.consume(
 89 |                 "Argument list cannot be empty", TokenType.BANG, TokenType.IDENTIFIER
 90 |             )
 91 |             argument_list.append(expr.Identifier(head_arg.lexeme))
 92 |             while self.match(TokenType.IDENTIFIER):
 93 |                 argument_list.append(expr.Identifier(self.previous().lexeme))
 94 |             self.consume("Missing argument delimiter", TokenType.PIPE)
 95 |             body = [self.statement()]
 96 |             while not self.check(TokenType.CLOSE_BRACKETS):
 97 |                 e = self.statement()
 98 |                 body.append(e)
 99 |             self.consume("Unclosed function definition", TokenType.CLOSE_BRACKETS)
100 |             return expr.Function(argument_list, body)
101 |         elif self.match(TokenType.STRING, TokenType.B10_FLOAT, TokenType.B10_INTEGER):
102 |             return expr.Literal(self.previous().literal)
103 |         elif self.match(TokenType.IDENTIFIER):
104 |             return expr.Identifier(self.previous().lexeme)
105 |         elif self.match(TokenType.BUILTIN):
106 |             return expr.Builtin(self.previous().literal)
107 |         elif self.match(TokenType.OPEN_PARENTHESES):
108 |             if self.check(TokenType.SYMBOLIC):
109 |                 symbol = self.match(TokenType.SYMBOLIC)
110 |                 identifier = expr.Identifier(self.previous().lexeme)
111 |                 self.consume(
112 |                     "Symbol expressions can only contain a symbol",
113 |                     TokenType.CLOSE_PARENTHESES,
114 |                 )
115 |                 return identifier
116 |             e = self.symchain()
117 |             self.consume("Missing closing parentheses", TokenType.CLOSE_PARENTHESES)
118 |             return e
119 |         elif self.match(TokenType.COND):
120 |             cases = []
121 |             while True:
122 |                 self.consume("Missing condition delimiter", TokenType.PIPE)
123 |                 condition = self.symchain()
124 |                 self.consume("Missing condition delimiter", TokenType.RIGHT_ARROW)
125 | 
126 |                 body = [self.statement()]
127 |                 while not self.check(
128 |                     TokenType.PIPE, TokenType.SEMICOLON, TokenType.COLON
129 |                 ):
130 |                     body.append(self.statement())
131 |                 cases.append((condition, body))
132 | 
133 |                 if not self.check(TokenType.PIPE):
134 |                     break
135 | 
136 |             return expr.Conditional(cases)
137 |         else:
138 |             return None
139 | 
140 |     def check(self, *types):
141 |         if self.is_at_end():
142 |             return False
143 |         return self.peek().token_type in types
144 | 
145 |     def match(self, *types):
146 |         if self.check(*types):
147 |             self.advance()
148 |             return True
149 |         return False
150 | 
151 |     def consume(self, message, *types):
152 |         if self.check(*types):
153 |             return self.advance()
154 | 
155 |         raise Exception(self.peek(), message)
156 | 
157 |     def advance(self):
158 |         if not self.is_at_end():
159 |             self.current += 1
160 |         return self.previous()
161 | 
162 |     def is_at_end(self):
163 |         return self.peek().token_type == TokenType.EOF
164 | 
165 |     def peek(self):
166 |         return self.tokens[self.current]
167 | 
168 |     def previous(self):
169 |         return self.tokens[self.current - 1]
170 | 


--------------------------------------------------------------------------------
/modl/scanner.py:
--------------------------------------------------------------------------------
  1 | from .tokens import Token, TokenType
  2 | 
  3 | 
  4 | class Scanner:
  5 |     # These tokens may not appear as part of a longer token
  6 |     unique_tokens = {
  7 |         ";": TokenType.SEMICOLON,
  8 |         "{": TokenType.OPEN_BRACKETS,
  9 |         "}": TokenType.CLOSE_BRACKETS,
 10 |         "(": TokenType.OPEN_PARENTHESES,
 11 |         ")": TokenType.CLOSE_PARENTHESES,
 12 |         ",": TokenType.COMMA,
 13 |     }
 14 | 
 15 |     # These tokens are reserved, but may appear as part of a longer token.
 16 |     reserved_symbols = {
 17 |         "->": TokenType.RIGHT_ARROW,
 18 |         "<-": TokenType.LEFT_ARROW,
 19 |         "!": TokenType.BANG,
 20 |         "|": TokenType.PIPE,
 21 |         ":": TokenType.COLON,
 22 |     }
 23 | 
 24 |     # Reserved words may appear as part of a longer token
 25 |     reserved_words = {
 26 |         "use": TokenType.USE,
 27 |         "let": TokenType.LET,
 28 |         "cond": TokenType.COND,
 29 |     }
 30 | 
 31 |     def __init__(self, source):
 32 |         self.source = source
 33 | 
 34 |         self.start = 0
 35 |         self.current = 0
 36 |         self.line = 1
 37 |         self.tokens = []
 38 | 
 39 |     def scan_tokens(self):
 40 |         while not self.is_at_end():
 41 |             self.start = self.current
 42 |             self.scan_token()
 43 | 
 44 |         self.tokens.append(Token(TokenType.EOF, "", None, self.line))
 45 |         return self.tokens
 46 | 
 47 |     def is_at_end(self):
 48 |         return self.current >= len(self.source)
 49 | 
 50 |     def scan_token(self):
 51 |         c = self.peek()
 52 |         if self.match("{#"):
 53 |             self.builtin()
 54 |         elif self.match("/*"):
 55 |             self.comment()
 56 |         elif c in self.unique_tokens:
 57 |             self.advance()
 58 |             self.add_token(self.unique_tokens[c])
 59 |         elif c in ["-", "."] or c.isnumeric():
 60 |             self.number()
 61 |         elif self.match("\n"):
 62 |             self.line += 1
 63 |         elif c.isspace():
 64 |             self.advance()
 65 |         elif self.match('"'):
 66 |             self.string()
 67 |         elif c.isupper():
 68 |             self.typename()
 69 |         elif c.isalpha():
 70 |             self.identifier()
 71 |         else:
 72 |             # All other characters count as symbols
 73 |             self.symbolic()
 74 | 
 75 |     def builtin(self):
 76 |         while self.valid_in_identifier(self.peek()):
 77 |             self.advance()
 78 |         if not self.match("}"):
 79 |             raise Exception(self.line, "Unterminated builtin literal")
 80 |         self.add_token(TokenType.BUILTIN, self.current_lexeme()[2:-1])
 81 | 
 82 |     def number(self):
 83 |         self.match("-")  # May match a starting -
 84 |         self.match(".")  # May match .??? or -.???
 85 | 
 86 |         is_number = False
 87 |         while True:
 88 |             c = self.peek()
 89 |             if c is None:
 90 |                 break
 91 |             elif c.isnumeric():
 92 |                 self.advance()
 93 |                 is_number = True
 94 |             elif c == ".":
 95 |                 self.advance()
 96 |             else:
 97 |                 break
 98 | 
 99 |         if not is_number:
100 |             return self.symbolic()
101 | 
102 |         if "." in self.current_lexeme():
103 |             self.add_token(TokenType.B10_FLOAT, float(self.current_lexeme()))
104 |         else:
105 |             self.add_token(TokenType.B10_INTEGER, int(self.current_lexeme(), 10))
106 | 
107 |     def typename(self):
108 |         while self.valid_in_identifier(self.peek()):
109 |             self.advance()
110 | 
111 |         self.add_token(TokenType.TYPENAME)
112 | 
113 |     def identifier(self):
114 |         while self.valid_in_identifier(self.peek()):
115 |             self.advance()
116 | 
117 |         lexeme = self.current_lexeme()
118 |         if lexeme in self.reserved_words:
119 |             if self.match("!"):
120 |                 raise Exception("Can't use bangs after reserved words.")
121 |             self.add_token(self.reserved_words[lexeme])
122 |         else:
123 |             self.match("!")
124 |             self.add_token(TokenType.IDENTIFIER)
125 | 
126 |     def valid_in_identifier(self, c):
127 |         if c is None:
128 |             return False
129 |         if c.isalnum():
130 |             return True
131 |         if c in "'_":
132 |             return True
133 |         return False
134 | 
135 |     def symbolic(self):
136 |         while self.valid_as_symbolic(self.peek()):
137 |             self.advance()
138 |         lexeme = self.current_lexeme()
139 |         if lexeme == "*/":
140 |             raise Exception(self.line, "Unopened comment.")
141 |         elif lexeme in self.reserved_symbols:
142 |             self.add_token(self.reserved_symbols[lexeme])
143 |         else:
144 |             self.add_token(TokenType.SYMBOLIC)
145 | 
146 |     def valid_as_symbolic(self, c):
147 |         if c is None:
148 |             return False
149 |         elif c.isspace():
150 |             return False
151 |         elif c.isalnum():
152 |             return False
153 |         elif c in self.unique_tokens:
154 |             return False
155 |         else:
156 |             return True
157 | 
158 |     def string(self):
159 |         literal = ""
160 |         while self.peek() != '"' and not self.is_at_end():
161 |             c = self.peek()  # Sure would like to have the walrus op
162 |             if c == "\n":
163 |                 self.line += 1
164 | 
165 |             if c == "\\":  # Escape characters
166 |                 self.advance()
167 |                 if self.is_at_end():
168 |                     break
169 |                 e = self.peek()
170 |                 if e == "n":
171 |                     literal += "\n"
172 |                 elif e == "r":
173 |                     literal += "\r"
174 |                 elif e == "t":
175 |                     literal += "\t"
176 |                 elif e == "\\":
177 |                     literal += "\\"
178 |                 elif e == '"':
179 |                     literal += '"'
180 |                 elif e == "x":
181 |                     # fetch 4 characters as hex and decode as unicode
182 |                     codepoint = self.source[self.current + 1 : self.current + 5]
183 |                     if len(codepoint) < 4:
184 |                         raise Exception(self.line, "Unterminated string")
185 |                     literal += chr(int(codepoint, 16))
186 |                     self.current += 4
187 |                 else:
188 |                     raise Exception(self.line, "\\" + e + " is not an escape sequence")
189 |             else:
190 |                 literal += c
191 |             self.advance()
192 | 
193 |         if self.is_at_end():
194 |             raise Exception(self.line, "Unterminated string")
195 | 
196 |         self.advance()  # Closing "
197 | 
198 |         self.add_token(TokenType.STRING, literal)
199 | 
200 |     def comment(self):
201 |         while not self.match("*/"):
202 |             if self.is_at_end():
203 |                 raise Exception(self.line, "Unterminated comment")
204 |             if self.peek() == "\n":
205 |                 self.line += 1
206 |             self.advance()
207 | 
208 |         # self.add_token(TokenType.COMMENT, self.current_lexeme())
209 | 
210 |     def advance(self):
211 |         self.current += 1
212 |         return self.source[self.current - 1]
213 | 
214 |     def peek(self, n=1):
215 |         if self.current + n > len(self.source):
216 |             return None
217 |         return self.source[self.current : self.current + n]
218 | 
219 |     def current_lexeme(self):
220 |         return self.source[self.start : self.current]
221 | 
222 |     def add_token(self, token_type, literal=None):
223 |         self.tokens.append(Token(token_type, self.current_lexeme(), literal, self.line))
224 | 
225 |     def match(self, c):
226 |         if self.is_at_end():
227 |             return False
228 |         if self.peek(len(c)) != c:
229 |             return False
230 |         self.current += len(c)
231 |         return True
232 | 


--------------------------------------------------------------------------------
/modl/tokens.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | 
 3 | 
 4 | class TokenType(enum.Enum):
 5 |     # Single-Character tokens
 6 |     SEMICOLON = enum.auto()
 7 |     COLON = enum.auto()
 8 |     PIPE = enum.auto()
 9 |     BANG = enum.auto()
10 |     COMMA = enum.auto()
11 |     OPEN_BRACKETS = enum.auto()
12 |     CLOSE_BRACKETS = enum.auto()
13 |     OPEN_PARENTHESES = enum.auto()
14 |     CLOSE_PARENTHESES = enum.auto()
15 |     COND = enum.auto()
16 | 
17 |     # Short tokens
18 |     LEFT_ARROW = enum.auto()
19 |     RIGHT_ARROW = enum.auto()
20 |     COMMENT_START = enum.auto()
21 |     COMMENT_END = enum.auto()
22 |     EQUALS = enum.auto()
23 | 
24 |     # Symbol tokens, anything not beginning in isalpha() that isn't above
25 |     SYMBOLIC = enum.auto()
26 | 
27 |     # Keywords
28 |     USE = enum.auto()
29 |     LET = enum.auto()
30 | 
31 |     # Literals
32 |     IDENTIFIER = enum.auto()  # !isupper()[isalpha()'_|]+!?
33 |     TYPENAME = enum.auto()  # isupper()(isalpha'_|)+
34 |     STRING = enum.auto()  # ".*"
35 |     B10_INTEGER = enum.auto()
36 |     B10_FLOAT = enum.auto()
37 |     BUILTIN = enum.auto()
38 | 
39 |     EOF = enum.auto()
40 |     pass
41 | 
42 | 
43 | class Token:
44 |     def __init__(self, token_type, lexeme, literal, line):
45 |         self.token_type = token_type
46 |         self.lexeme = lexeme
47 |         self.literal = literal
48 |         self.line = line
49 | 
50 |     def __repr__(self):
51 |         if self.literal:
52 |             return "{} '{}'".format(self.token_type, self.lexeme)
53 |         else:
54 |             return "{} '{}' {}".format(self.token_type, self.lexeme, self.literal)
55 | 


--------------------------------------------------------------------------------
/std.dl:
--------------------------------------------------------------------------------
  1 | /* The "Standard Library" */
  2 | let true <- {#true};
  3 | let false <- {#false};
  4 | let empty <- {#empty};
  5 | let otherwise <- true;
  6 | 
  7 | /* IO */
  8 | let print! <- { ! s | {#print} s; };
  9 | let read! <- { ! | {#read} 0; };
 10 | 
 11 | /* Boolean. NOT short-circuited. */
 12 | let || <- { x y |
 13 |     cond
 14 |     | x -> true;
 15 |     | otherwise -> y;
 16 |     ;
 17 | };
 18 | 
 19 | let && <- { x y |
 20 |     cond
 21 |     | x -> y;
 22 |     | otherwise -> false;
 23 |     ;
 24 | };
 25 | 
 26 | let not <- { x |
 27 |     cond
 28 |     | x -> false;
 29 |     | otherwise -> true;
 30 |     ;
 31 | };
 32 | 
 33 | /* Arithmetics */
 34 | let + <- { x y | {#add} x y; };
 35 | let - <- { x y | {#sub} x y; };
 36 | let * <- { x y | {#mul} x y; };
 37 | let / <- { x y | {#fdiv} x y; };
 38 | let % <- { x y | {#mod} x y; };
 39 | 
 40 | /* Comparisons */
 41 | let == <- { x y | {#eq} x y; };
 42 | let != <- { x y | not (x == y); };
 43 | let > <- { x y | {#gt} x y; };
 44 | let >= <- { x y | (x == y) || (x > y); };
 45 | let < <- { x y | not (x >= y); };
 46 | let <= <- { x y | not (x > y); };
 47 | 
 48 | /* Bang! helpers */
 49 | let forever! <- { ! command! |
 50 |     command!;
 51 |     forever! command!;
 52 | };
 53 | 
 54 | let flip <- { f x y | f y x; };
 55 | 
 56 | /* List functions */
 57 | let :: <- { car cdr | {#cons} car cdr; };
 58 | let head <- { l | {#head} l; };
 59 | let tail <- { l | {#tail} l; };
 60 | 
 61 | let foldl <- { f a l |
 62 |     cond
 63 |     | l == empty -> a;
 64 |     | otherwise -> foldl f (f a (head l)) (tail l);
 65 |     ;
 66 | };
 67 | 
 68 | let reverse <- foldl (flip (::)) empty;
 69 | 
 70 | let map_ <- { a f l |
 71 |     cond
 72 |     | l == empty -> reverse a;
 73 |     | otherwise -> map_ (f (head l) :: a) f (tail l);
 74 |     ;
 75 | }, map <- map_ empty;
 76 | 
 77 | let range_ <- { acc from to |
 78 |     cond
 79 |     | from == to -> reverse acc;
 80 |     | otherwise -> range_ (from :: acc) (from + 1) to;
 81 |     ;
 82 | }, range <- range_ empty;
 83 | 
 84 | let filter_ <- { a p l |
 85 |     cond
 86 |     | l == empty -> reverse a;
 87 |     | p (head l) -> filter_ (head l :: a) p (tail l);
 88 |     | otherwise -> filter_ a p (tail l);
 89 |     ;
 90 | }, filter <- filter_ empty;
 91 | 
 92 | let length <- foldl ((+) 1) 0;
 93 | 
 94 | /* Functional helpers */
 95 | let =>> <- { v f | f v; };
 96 | 
 97 | let >> <- { f g x | g (f x); };
 98 | 
 99 | let . <- { f g x | f (g x); };
100 | 
101 | let $ <- { f v | f v; };
102 | 


--------------------------------------------------------------------------------
/test.dl:
--------------------------------------------------------------------------------
 1 | use "std.dl";
 2 | 
 3 | /* Standard recursive fibonacci definition */
 4 | let fibo <- { x |
 5 |     cond
 6 |     | x == 0 -> 1;
 7 |     | x == 1 -> 1;
 8 |     | otherwise -> fibo (x - 1) + fibo (x - 2);
 9 |     ;
10 | };
11 | 
12 | /* Prettier, less deeply recursive fibonacci */
13 | let fib <- { a b x |
14 |   cond
15 |   | x == 0 -> a;
16 |   | otherwise -> fib b (a + b) (x - 1);
17 |   ;
18 | };
19 | 
20 | /* Currying is available */
21 | let flatfibo <- fib 1 1;
22 | 
23 | let tracefibo <- { a b x |
24 |   cond
25 |   | x == 0 -> print! "exiting";
26 |   ;
27 |   cond
28 |   | x == 0 -> a;
29 |   | otherwise ->
30 |     print! "going deeper";
31 |     tracefibo b (a + b) (x - 1);
32 |   ;
33 | };
34 | 
35 | /* Test for scope */
36 | let a <- "outer";
37 | let f! <- { ! |
38 |   let showA! <- { ! | print! a; };
39 |   showA!;
40 |   let a <- "inner";
41 |   showA!;
42 | };
43 | 
44 | /* Mutual recursion */
45 | let odd <- { x |
46 |   cond
47 |   | x == 0 -> false;
48 |   | otherwise -> even (x - 1);
49 |   ;
50 | },
51 | even <- { x |
52 |   cond
53 |   | x == 0 -> true;
54 |   | otherwise -> odd (x - 1);
55 |   ;
56 | };
57 | 
58 | /* As useful as expected, but proves TCE works. */
59 | let loop_forever! <- { ! | loop_forever!; };
60 | 
61 | let hello! <- { ! | forever! { ! | print! "Hello!"; }; };
62 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tordek/modl/1cd862479eff44541919c0a4a0cd274963f12d58/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_interpreter.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from modl import interpreter
 4 | from modl.scanner import Scanner
 5 | from modl.parser import Parser
 6 | 
 7 | 
 8 | class InterpretAllTheThings(unittest.TestCase):
 9 |     def test_use(self):
10 |         scanner = Scanner('use "std.dl";')
11 |         parser = Parser(scanner.scan_tokens())
12 |         env = interpreter.get_default_env()
13 |         result, env = interpreter.interpret(parser.statement(), env)
14 |         self.assertEqual(result, None)
15 |         self.assertIn("print!", env)
16 | 
17 |     def test_fibo(self):
18 |         scanner = Scanner('use "test.dl"; fibo 5;')
19 |         parser = Parser(scanner.scan_tokens())
20 |         env = interpreter.get_default_env()
21 |         for statement in parser.program():
22 |             result, env = interpreter.interpret(statement, env)
23 |         self.assertEqual(result, 8)
24 | 
25 |     def test_deep(self):
26 |         scanner = Scanner('use "test.dl"; flatfibo 500;')
27 |         parser = Parser(scanner.scan_tokens())
28 |         env = interpreter.get_default_env()
29 |         for statement in parser.program():
30 |             result, env = interpreter.interpret(statement, env)
31 | 
32 |         self.assertIsNotNone(result)
33 | 
34 |     def test_multideep(self):
35 |         scanner = Scanner('use "test.dl"; tracefibo 1 1 5;')
36 |         parser = Parser(scanner.scan_tokens())
37 |         env = interpreter.get_default_env()
38 |         for statement in parser.program():
39 |             result, env = interpreter.interpret(statement, env)
40 |         self.assertEqual(result, 8)
41 | 


--------------------------------------------------------------------------------
/tests/test_parser.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from modl.parser import Parser
  3 | from modl.scanner import Scanner
  4 | from modl import expr
  5 | 
  6 | 
  7 | class PrimaryTests(unittest.TestCase):
  8 |     literals = {"5;": 5, '"five";': "five", "5.0;": 5.0}
  9 | 
 10 |     def test_literals(self):
 11 |         for string, literal in self.literals.items():
 12 |             with self.subTest(i=string):
 13 |                 scanner = Scanner(string)
 14 |                 parser = Parser(scanner.scan_tokens())
 15 |                 statement = parser.statement()
 16 |                 self.assertIsInstance(statement, expr.Literal)
 17 |                 self.assertEqual(statement.value, literal)
 18 | 
 19 |     def test_builtin(self):
 20 |         scanner = Scanner("{#print};")
 21 |         parser = Parser(scanner.scan_tokens())
 22 |         statement = parser.statement()
 23 |         self.assertIsInstance(statement, expr.Builtin)
 24 |         self.assertEqual(statement.name, "print")
 25 | 
 26 |     def test_identifier(self):
 27 |         scanner = Scanner("ident;")
 28 |         parser = Parser(scanner.scan_tokens())
 29 |         statement = parser.statement()
 30 |         self.assertIsInstance(statement, expr.Identifier)
 31 |         self.assertEqual(statement.name, "ident")
 32 | 
 33 |     def test_symbols_cant_be_used_alone(self):
 34 |         scanner = Scanner("+;")
 35 |         parser = Parser(scanner.scan_tokens())
 36 |         with self.assertRaises(Exception):
 37 |             statement = parser.statement()
 38 | 
 39 |     def test_symbols_can_be_wrapped_in_parentheses(self):
 40 |         scanner = Scanner("(+);")
 41 |         parser = Parser(scanner.scan_tokens())
 42 |         statement = parser.statement()
 43 |         self.assertIsInstance(statement, expr.Identifier)
 44 |         self.assertEqual(statement.name, "+")
 45 | 
 46 |     def test_symbols_can_be_part_of_an_expression(self):
 47 |         scanner = Scanner("1 + 2;")
 48 |         parser = Parser(scanner.scan_tokens())
 49 |         statement = parser.statement()
 50 |         self.assertIsInstance(statement, expr.Symchain)
 51 |         self.assertIsInstance(statement.op, expr.Identifier)
 52 |         self.assertEqual(statement.op.name, "+")
 53 |         self.assertIsInstance(statement.left, expr.Literal)
 54 |         self.assertEqual(statement.left.value, 1)
 55 |         self.assertIsInstance(statement.right, expr.Literal)
 56 |         self.assertEqual(statement.right.value, 2)
 57 | 
 58 |     def test_parentheses_are_invisible(self):
 59 |         scanner = Scanner("(ident);")
 60 |         parser = Parser(scanner.scan_tokens())
 61 |         statement = parser.statement()
 62 |         self.assertIsInstance(statement, expr.Identifier)
 63 |         self.assertEqual(statement.name, "ident")
 64 | 
 65 |     def test_lambda(self):
 66 |         scanner = Scanner("{ x | x; };")
 67 |         parser = Parser(scanner.scan_tokens())
 68 |         statement = parser.statement()
 69 |         self.assertIsInstance(statement, expr.Function)
 70 |         self.assertEqual(len(statement.args), 1)
 71 |         self.assertEqual(len(statement.body), 1)
 72 | 
 73 |     def test_lambda_longer(self):
 74 |         scanner = Scanner("{ x y | x y; y x; x y;};")
 75 |         parser = Parser(scanner.scan_tokens())
 76 |         statement = parser.statement()
 77 |         self.assertIsInstance(statement, expr.Function)
 78 |         self.assertEqual(len(statement.args), 2)
 79 |         self.assertEqual(len(statement.body), 3)
 80 | 
 81 |     def test_lambda_cant_have_empty_parameter_list(self):
 82 |         scanner = Scanner("{ | 1; };")
 83 |         parser = Parser(scanner.scan_tokens())
 84 |         with self.assertRaises(Exception):
 85 |             statement = parser.statement()
 86 | 
 87 |     def test_lambda_cant_have_empty_body(self):
 88 |         scanner = Scanner("{ x | };")
 89 |         parser = Parser(scanner.scan_tokens())
 90 |         with self.assertRaises(Exception):
 91 |             statement = parser.statement()
 92 | 
 93 |     def test_cond(self):
 94 |         scanner = Scanner("cond | 1 -> 1; ;")
 95 |         parser = Parser(scanner.scan_tokens())
 96 |         statement = parser.statement()
 97 |         self.assertIsInstance(statement, expr.Conditional)
 98 | 
 99 |     def test_cond_with_many_actions(self):
100 |         scanner = Scanner("cond | x -> f x; 3; ;")
101 |         parser = Parser(scanner.scan_tokens())
102 |         statement = parser.statement()
103 |         self.assertIsInstance(statement, expr.Conditional)
104 | 
105 |     def test_cond_cant_be_empty(self):
106 |         scanner = Scanner("cond ;")
107 |         parser = Parser(scanner.scan_tokens())
108 |         with self.assertRaises(Exception):
109 |             statement = parser.statement()
110 | 
111 |     def test_cond_condition_cant_be_empty(self):
112 |         scanner = Scanner("cond | -> x; ;")
113 |         parser = Parser(scanner.scan_tokens())
114 |         with self.assertRaises(Exception):
115 |             statement = parser.statement()
116 | 
117 |     def test_cond_action_cant_be_empty(self):
118 |         scanner = Scanner("cond | x -> ;")
119 |         parser = Parser(scanner.scan_tokens())
120 |         with self.assertRaises(Exception):
121 |             statement = parser.statement()
122 | 
123 |     def test_bangs_are_special(self):
124 |         scanner = Scanner("!;")
125 |         parser = Parser(scanner.scan_tokens())
126 |         with self.assertRaises(Exception):
127 |             statement = parser.statement()
128 |             print(statement)
129 | 
130 |     def test_things_can_have_types(self):
131 |         scanner = Scanner("ident : Integer;")
132 |         parser = Parser(scanner.scan_tokens())
133 |         statement = parser.statement()
134 |         self.assertEqual(len(statement.types), 1)
135 |         self.assertEqual(statement.types[0], "Integer")
136 | 
137 |     def test_lambdas_can_have_types(self):
138 |         scanner = Scanner("{ x | x; } : Integer -> Integer;")
139 |         parser = Parser(scanner.scan_tokens())
140 |         statement = parser.statement()
141 |         self.assertEqual(len(statement.types), 2)
142 |         self.assertEqual(statement.types[0], "Integer")
143 |         self.assertEqual(statement.types[1], "Integer")
144 | 
145 |     def things_can_have_types_in_the_middle_of_other_things(self):
146 |         scanner = Scanner("ident : Integer + 5;")
147 |         parser = Parser(scanner.scan_tokens())
148 |         statement = parser.statement()
149 |         self.assertIsInstance(statement, expr.Expression)
150 |         self.assertEqual(statement[0].name, "ident")
151 |         self.assertEqual(len(statement[0].types), 1)
152 |         self.assertEqual(statement[0].types[0], "Integer")
153 | 
154 | 
155 | class FuncallTests(unittest.TestCase):
156 |     def test_simple_call(self):
157 |         scanner = Scanner("call par1 2;")
158 |         parser = Parser(scanner.scan_tokens())
159 |         statement = parser.statement()
160 | 
161 |         self.assertIsInstance(statement, expr.Expression)
162 |         self.assertEqual(len(statement.call), 3)
163 | 
164 |         self.assertIsInstance(statement.call[0], expr.Identifier)
165 |         self.assertEqual(statement.call[0].name, "call")
166 | 
167 |         self.assertIsInstance(statement.call[1], expr.Identifier)
168 |         self.assertEqual(statement.call[1].name, "par1")
169 | 
170 |         self.assertIsInstance(statement.call[2], expr.Literal)
171 |         self.assertEqual(statement.call[2].value, 2)
172 | 
173 |     def test_bang_words_are_magic(self):
174 |         scanner = Scanner("print!;")
175 |         parser = Parser(scanner.scan_tokens())
176 |         statement = parser.statement()
177 | 
178 |         self.assertIsInstance(statement, expr.Expression)
179 |         self.assertEqual(len(statement.call), 2)
180 | 
181 |         self.assertIsInstance(statement.call[0], expr.Identifier)
182 |         self.assertEqual(statement.call[0].name, "print!")
183 | 
184 |         self.assertIsInstance(statement.call[1], expr.Identifier)
185 |         self.assertEqual(statement.call[1].name, "!")
186 | 
187 |     def test_complete_your_function(self):
188 |         scanner = Scanner("{ x | x; ")
189 |         parser = Parser(scanner.scan_tokens())
190 |         with self.assertRaises(Exception):
191 |             statement = parser.statement()
192 | 
193 |     def test_simplest_program(self):
194 |         scanner = Scanner("1;")
195 |         parser = Parser(scanner.scan_tokens())
196 |         program = parser.program()
197 |         self.assertEqual(len(program), 1)
198 | 
199 |         statement = program[0]
200 |         self.assertIsInstance(statement, expr.Literal)
201 |         self.assertEqual(statement.value, 1)
202 | 
203 | 
204 | class StatementTests(unittest.TestCase):
205 |     def test_use(self):
206 |         scanner = Scanner('use "potato";')
207 |         parser = Parser(scanner.scan_tokens())
208 |         statement = parser.statement()
209 |         self.assertIsInstance(statement, expr.Use)
210 |         self.assertEqual(statement.filename, "potato")
211 | 
212 |     def test_single(self):
213 |         scanner = Scanner("let a <- 1;")
214 |         parser = Parser(scanner.scan_tokens())
215 |         statement = parser.statement()
216 |         self.assertIsInstance(statement, expr.Let)
217 |         self.assertEqual(len(statement.assignments), 1)
218 |         name, value = statement.assignments[0]
219 |         self.assertIsInstance(name, expr.Identifier)
220 |         self.assertIsInstance(value, expr.Literal)
221 | 
222 |     def test_multiple(self):
223 |         scanner = Scanner("let a <- 1, + <- { x y | x y; };")
224 |         parser = Parser(scanner.scan_tokens())
225 |         statement = parser.statement()
226 |         self.assertIsInstance(statement, expr.Let)
227 |         self.assertEqual(len(statement.assignments), 2)
228 |         name, value = statement.assignments[0]
229 |         self.assertIsInstance(name, expr.Identifier)
230 |         self.assertIsInstance(value, expr.Literal)
231 |         name, value = statement.assignments[1]
232 |         self.assertIsInstance(name, expr.Identifier)
233 |         self.assertIsInstance(value, expr.Function)
234 | 
235 |     def test_cant_assign_to_a_function(self):
236 |         scanner = Scanner("let { x | x; }; <- 1;")
237 |         parser = Parser(scanner.scan_tokens())
238 |         with self.assertRaises(Exception):
239 |             statement = parser.statement()
240 | 
241 | 
242 | class GreenspunTest(unittest.TestCase):
243 |     program = """
244 |     use "std.dl";
245 |     let x <- 1 : Integer, f <- { a | {#add} x + a; };
246 |     f (+) x;
247 |     cond
248 |     | a -> f x 1;
249 |     | b -> print! "B";
250 |            (+);
251 |     ;
252 |     """
253 |     output = """USE 'std.dl'
254 | (DEFINE x 1
255 |         f (LAMBDA (a) (+ (#add x) a)))
256 | (f + x)
257 | (COND (a (f x 1)) (b(progn
258 | (print! ! 'B')
259 | +)) )"""
260 | 
261 |     def test_hey_this_looks_like_lisp_lol(self):
262 |         scanner = Scanner(self.program)
263 |         parser = Parser(scanner.scan_tokens())
264 |         program = parser.program()
265 |         self.assertEqual("\n".join(repr(st) for st in program), self.output)
266 | 


--------------------------------------------------------------------------------
/tests/test_scanner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import unittest
  4 | from modl.scanner import Scanner
  5 | from modl.tokens import TokenType
  6 | 
  7 | 
  8 | class TestScannerTokens(unittest.TestCase):
  9 |     valid_integers = {
 10 |         "0": 0,
 11 |         "1": 1,
 12 |         "1234567890": 1234567890,
 13 |         "12345678901234567890": 12345678901234567890,
 14 |         "-1": -1,
 15 |         "-0": 0,  # Pointless, but valid
 16 |         "000123": 123,
 17 |         "-000123": -123,
 18 |     }
 19 | 
 20 |     valid_floats = {"0.0": 0.0, "0.1": 0.1, "1.0": 1.0, ".5": 0.5, "-.5": -0.5}
 21 | 
 22 |     valid_strings = {
 23 |         r'""': "",
 24 |         r'"a"': "a",
 25 |         r'"this is a long string"': "this is a long string",
 26 |         '"this string\nis multiline"': "this string\nis multiline",
 27 |         r'"this string\nis also multiline"': "this string\nis also multiline",
 28 |         r'"A whole bunch of escape sequences \r\n\t\\\"\x0123"': 'A whole bunch of escape sequences \r\n\t\\"\u0123',
 29 |     }
 30 | 
 31 |     reserved = {
 32 |         "use": TokenType.USE,
 33 |         "let": TokenType.LET,
 34 |         "cond": TokenType.COND,
 35 |         "<-": TokenType.LEFT_ARROW,
 36 |         "->": TokenType.RIGHT_ARROW,
 37 |         "|": TokenType.PIPE,
 38 |         ":": TokenType.COLON,
 39 |         "!": TokenType.BANG,
 40 |         ";": TokenType.SEMICOLON,
 41 |         "{": TokenType.OPEN_BRACKETS,
 42 |         "}": TokenType.CLOSE_BRACKETS,
 43 |         "(": TokenType.OPEN_PARENTHESES,
 44 |         ")": TokenType.CLOSE_PARENTHESES,
 45 |         ",": TokenType.COMMA,
 46 |     }
 47 | 
 48 |     valid_symbols = [
 49 |         "-->",
 50 |         "<--",
 51 |         "?",
 52 |         "!!",
 53 |         "::",
 54 |         "||",
 55 |         "&&",
 56 |         "->>",
 57 |         "=*>>",
 58 |         "-",
 59 |         "-.",
 60 |         ".",
 61 |         "..",
 62 |         "-..",
 63 |         "--",
 64 |         "/",
 65 |         "//",
 66 |         "' ",
 67 |     ]
 68 |     valid_identifiers = ["foo", "bar_baz", "quux'", "read!", "add4", "plus5!"]
 69 | 
 70 |     # Sequences containing unique characters that may not be part of a symbol
 71 |     invalid_symbols = ["-{", "/a", "-)", "+,+"]
 72 | 
 73 |     def test_valid_integers(self):
 74 |         for string, literal in self.valid_integers.items():
 75 |             with self.subTest(i=string):
 76 |                 scanner = Scanner(string)
 77 |                 tokens = scanner.scan_tokens()
 78 |                 self.assertEqual(len(tokens), 2)  # Parsed symbol, plus EOF
 79 |                 token = tokens[0]
 80 |                 self.assertIs(token.token_type, TokenType.B10_INTEGER)
 81 |                 self.assertEqual(token.literal, literal)
 82 | 
 83 |     def test_valid_floats(self):
 84 |         for string, literal in self.valid_floats.items():
 85 |             with self.subTest(i=string):
 86 |                 scanner = Scanner(string)
 87 |                 tokens = scanner.scan_tokens()
 88 |                 self.assertEqual(len(tokens), 2)  # Parsed symbol, plus EOF
 89 |                 token = tokens[0]
 90 |                 self.assertIs(token.token_type, TokenType.B10_FLOAT)
 91 |                 self.assertEqual(token.literal, literal)
 92 | 
 93 |     def test_valid_strings(self):
 94 |         for string, literal in self.valid_strings.items():
 95 |             with self.subTest(i=string):
 96 |                 scanner = Scanner(string)
 97 |                 tokens = scanner.scan_tokens()
 98 |                 self.assertEqual(len(tokens), 2)  # Parsed symbol, plus EOF
 99 |                 token = tokens[0]
100 |                 self.assertIs(token.token_type, TokenType.STRING)
101 |                 self.assertEqual(token.literal, literal)
102 | 
103 |     def test_reserved_sequences(self):
104 |         for string, token_type in self.reserved.items():
105 |             with self.subTest(i=string):
106 |                 scanner = Scanner(string)
107 |                 tokens = scanner.scan_tokens()
108 |                 self.assertEqual(len(tokens), 2)  # Parsed symbol, plus EOF
109 |                 token = tokens[0]
110 |                 self.assertIs(token.token_type, token_type)
111 | 
112 |     def test_valid_symbol(self):
113 |         for string in self.valid_symbols:
114 |             with self.subTest(i=string):
115 |                 scanner = Scanner(string)
116 |                 tokens = scanner.scan_tokens()
117 |                 self.assertEqual(len(tokens), 2)  # Parsed symbol, plus EOF
118 |                 token = tokens[0]
119 |                 self.assertIs(token.token_type, TokenType.SYMBOLIC)
120 | 
121 |     def test_invalid_symbol(self):
122 |         for string in self.invalid_symbols:
123 |             with self.subTest(i=string):
124 |                 scanner = Scanner(string)
125 |                 tokens = scanner.scan_tokens()
126 |                 # Whatever this matched is OK as long as it's not a single symbol
127 |                 self.assertNotEqual(len(tokens), 2)
128 | 
129 |     def test_valid_identifier(self):
130 |         for string in self.valid_identifiers:
131 |             with self.subTest(i=string):
132 |                 scanner = Scanner(string)
133 |                 tokens = scanner.scan_tokens()
134 |                 self.assertEqual(len(tokens), 2)  # Parsed symbol, plus EOF
135 |                 token = tokens[0]
136 |                 self.assertIs(token.token_type, TokenType.IDENTIFIER)
137 | 
138 |     def test_single_line(self):
139 |         scanner = Scanner("foobar")
140 |         scanner.scan_tokens()  # Discard
141 |         self.assertEqual(scanner.line, 1)
142 | 
143 |     def test_two_lines(self):
144 |         scanner = Scanner("foo\nbar")
145 |         scanner.scan_tokens()  # Discard
146 |         self.assertEqual(scanner.line, 2)
147 | 
148 |     def test_multiple_lines(self):
149 |         scanner = Scanner(
150 |             r"""this is a "series of
151 |         tokens"
152 |         /* including multiline comments
153 |         spread
154 |         among several */
155 |         lines, also "includes an \n escaped linebreak character" that should be ignored"""
156 |         )
157 |         scanner.scan_tokens()  # Discard
158 |         self.assertEqual(scanner.line, 6)
159 | 
160 |     def test_builtin(self):
161 |         scanner = Scanner("{#builtin_name}")
162 |         tokens = scanner.scan_tokens()
163 |         self.assertEqual(len(tokens), 2)  # Parsed symbol, plus EOF
164 |         token = tokens[0]
165 |         self.assertIs(token.token_type, TokenType.BUILTIN)
166 |         self.assertEqual(token.literal, "builtin_name")
167 | 
168 |     def test_typename(self):
169 |         scanner = Scanner("Type")
170 |         tokens = scanner.scan_tokens()
171 |         self.assertEqual(len(tokens), 2)  # Parsed symbol, plus EOF
172 |         token = tokens[0]
173 |         self.assertIs(token.token_type, TokenType.TYPENAME)
174 |         self.assertEqual(token.lexeme, "Type")
175 | 
176 | 
177 | class InvalidTokensTest(unittest.TestCase):
178 |     def test_unclosed_comment(self):
179 |         scanner = Scanner("/* unclosed comment")
180 |         with self.assertRaises(Exception):
181 |             scanner.scan_tokens()
182 | 
183 |     def test_unopened_comment(self):
184 |         scanner = Scanner("forgot to remove uncomment */")
185 |         with self.assertRaises(Exception):
186 |             scanner.scan_tokens()
187 | 
188 |     def test_unclosed_string(self):
189 |         scanner = Scanner('"a string with no close')
190 |         with self.assertRaises(Exception):
191 |             scanner.scan_tokens()
192 | 
193 |     def test_incomplete_string_escape(self):
194 |         scanner = Scanner('"unclosed escape\\')
195 |         with self.assertRaises(Exception):
196 |             scanner.scan_tokens()
197 | 
198 |     def test_invalid_escape_sequence(self):
199 |         scanner = Scanner(r'"invalid escape \q code"')
200 |         with self.assertRaises(Exception):
201 |             scanner.scan_tokens()
202 | 
203 |     def test_invalid_unicode_escape_sequence(self):
204 |         scanner = Scanner(r'"invalid escape \x15')
205 |         with self.assertRaises(Exception):
206 |             scanner.scan_tokens()
207 | 
208 |     def test_unclosed_builtin(self):
209 |         scanner = Scanner(r"{#broken_builtin")
210 |         with self.assertRaises(Exception):
211 |             scanner.scan_tokens()
212 | 
213 |     def test_bang_after_keyword(self):
214 |         scanner = Scanner(r"let!")
215 |         with self.assertRaises(Exception):
216 |             scanner.scan_tokens()
217 | 
218 | 
219 | class ExprTest(unittest.TestCase):
220 |     def test_literal_expression(self):
221 |         scanner = Scanner("{#builtin_name}")
222 |         tokens = scanner.scan_tokens()
223 |         self.assertEqual(repr(tokens[0]), "TokenType.BUILTIN '{#builtin_name}'")
224 | 
225 |     def test_other_expression(self):
226 |         scanner = Scanner("+")
227 |         tokens = scanner.scan_tokens()
228 |         self.assertEqual(repr(tokens[0]), "TokenType.SYMBOLIC '+' None")
229 | 


--------------------------------------------------------------------------------