├── .gitignore ├── Makefile ├── README.md ├── compiler.py ├── luatopy ├── __init__.py ├── ast.py ├── builtins.py ├── evaluator.py ├── lexer.py ├── obj.py ├── parser.py └── token.py ├── repl.py ├── requirements.txt └── tests ├── __init__.py ├── test_evaluator.py ├── test_lexer.py └── test_parser.py /.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | .mypy_cache 3 | .pytest_cache 4 | __pycache__ 5 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | fixcode: 2 | source venv/bin/activate && black luatopy --line-length 80 3 | 4 | test: 5 | source venv/bin/activate && pytest 6 | 7 | lint: 8 | source venv/bin/activate && mypy luatopy 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Lua interpreter in Python 2 | 3 | This is my second take on writing a Lua-To-Python compiler, it includes: 4 | 5 | - [x] Lexer 6 | - [x] Parser 7 | - [x] A internal AST representation 8 | - [x] Repl 9 | - [x] Interpeter 10 | 11 | 12 | ## Running repl 13 | 14 | - `python repl.py` 15 | 16 | 17 | ## TODO 18 | - [x] Introduce `;` as a separator 19 | - [x] Named functions 20 | - [x] Not defined variables should return `nil` 21 | - [x] Modulo operator 22 | - [x] `and` operator 23 | - [x] `or` operator 24 | - [ ] `elseif` statement 25 | - [x] Variables with numbers in name 26 | - [ ] Iterator for Table using `pairs`/`ipairs` 27 | - [ ] `_G` for globals access 28 | - [ ] `for` loop 29 | - [ ] `while` loop 30 | - [ ] `repeat` loop 31 | - [ ] Short circuit / tenary operator 32 | - [ ] Dot property syntax in Table for string keys 33 | - [ ] Numbers beginning with `.` (Ex `.5`) 34 | - [ ] Handle global vs local variables in lua style 35 | - [ ] Function calls with single params should not require parens 36 | - [ ] Metatable support for tables 37 | 38 | 39 | ## Supports 40 | - Single and multiline comments 41 | - Variable assignments 42 | - Numbers 43 | - Strings 44 | - Tables 45 | - Addition, multiplication and division 46 | - If statements 47 | - Comparison operators (`==`, `>=`, `>`, `<`, `<≠`, `~=`) 48 | - String concat `..` 49 | - `return` 50 | - `function` declarations (both named and anymous with closures) 51 | - `not` logical operator 52 | - Negative values 53 | - Table indexing 54 | - Table count with `#` 55 | - Non existing identifiers return nil 56 | - Modulo operator 57 | 58 | 59 | ## References 60 | - A lot of the work here is based on the book [Writing A Compiler In Go](https://compilerbook.com/) 61 | - [My first take](https://github.com/marteinn/Lua-To-Python) 62 | - [A Python Interpreter Written in Python](https://www.aosabook.org/en/500L/a-python-interpreter-written-in-python.html) 63 | - [Let’s Build A Simple Interpreter. Part 7: Abstract Syntax Trees](https://ruslanspivak.com/lsbasi-part7/) 64 | -------------------------------------------------------------------------------- /compiler.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | 4 | @click.command() 5 | def run(): 6 | print("HI") 7 | 8 | 9 | if __name__ == '__main__': 10 | run() 11 | -------------------------------------------------------------------------------- /luatopy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marteinn/Lua-Interpreter-In-Python/a3a78f9ce6923eaa6cabd6e325ed27d0626f168c/luatopy/__init__.py -------------------------------------------------------------------------------- /luatopy/ast.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import List, Optional, Dict, Tuple 3 | 4 | from .token import Token 5 | 6 | 7 | class Program: 8 | def __init__(self, statements): 9 | self.statements = statements 10 | 11 | def to_code(self) -> str: 12 | out = [x.to_code() for x in self.statements] 13 | return "\n".join(out) 14 | 15 | 16 | @dataclass 17 | class Node: 18 | token: Token 19 | 20 | def to_code(self) -> str: 21 | pass 22 | 23 | 24 | @dataclass 25 | class Identifier(Node): 26 | value: str 27 | 28 | def to_code(self) -> str: 29 | return self.value 30 | 31 | 32 | class Statement(Node): 33 | pass 34 | 35 | 36 | class Expression(Node): 37 | pass 38 | 39 | 40 | @dataclass 41 | class ReturnStatement(Statement): 42 | value: Expression 43 | 44 | def to_code(self) -> str: 45 | return "return {0}".format(self.value.to_code()) 46 | 47 | 48 | @dataclass 49 | class Boolean(Node): 50 | value: bool 51 | 52 | def to_code(self) -> str: 53 | return "true" if self.value else "false" 54 | 55 | 56 | @dataclass 57 | class AssignStatement(Statement): 58 | name: Identifier 59 | value: Node 60 | 61 | def to_code(self) -> str: 62 | return "{0} = {1}".format(self.name.value, self.value.to_code()) 63 | 64 | 65 | @dataclass 66 | class ExpressionStatement(Statement): 67 | expression: Expression 68 | 69 | def to_code(self) -> str: 70 | if not self.expression: 71 | return "" 72 | return self.expression.to_code() 73 | 74 | 75 | @dataclass 76 | class IntegerLiteral(Node): 77 | value: int 78 | 79 | def to_code(self) -> str: 80 | return str(self.value) 81 | 82 | 83 | @dataclass 84 | class StringLiteral(Node): 85 | value: str 86 | 87 | def to_code(self) -> str: 88 | return '"{0}"'.format(self.value) 89 | 90 | 91 | @dataclass 92 | class PrefixExpression(Expression): 93 | right: Node 94 | operator: str 95 | 96 | def to_code(self) -> str: 97 | if len(self.operator) > 1: 98 | return "({0} {1})".format(self.operator, self.right.to_code()) 99 | return "({0}{1})".format(self.operator, self.right.to_code()) 100 | 101 | 102 | @dataclass 103 | class InfixExpression(Expression): 104 | left: Node 105 | operator: str 106 | right: Node 107 | 108 | def to_code(self) -> str: 109 | return "({0} {1} {2})".format( 110 | self.left.to_code(), self.operator, self.right.to_code() 111 | ) 112 | 113 | 114 | @dataclass 115 | class BlockStatement(Statement): 116 | statements: List[Node] = field(default_factory=list) 117 | 118 | def to_code(self) -> str: 119 | out = [x.to_code() for x in self.statements] 120 | return "\n".join(out) 121 | 122 | 123 | @dataclass 124 | class IfExpression(Expression): 125 | condition: Expression 126 | consequence: BlockStatement 127 | alternative: BlockStatement 128 | 129 | def to_code(self) -> str: 130 | out = "if {0} then ".format(self.condition.to_code()) 131 | out = out + self.consequence.to_code() 132 | if self.alternative: 133 | out = out + " else " 134 | out = out + self.alternative.to_code() 135 | out = out + " end" 136 | return out 137 | 138 | 139 | @dataclass 140 | class FunctionLiteral(Node): 141 | body: BlockStatement 142 | parameters: List[Identifier] = field(default_factory=list) 143 | name: Optional[Identifier] = None 144 | 145 | def to_code(self) -> str: 146 | signature = ", ".join([x.value for x in self.parameters]) 147 | 148 | if self.name: 149 | out = "function {0} ({1}) ".format(self.name.value, signature) 150 | else: 151 | out = "function ({0}) ".format(signature) 152 | 153 | body_code = self.body.to_code().strip() 154 | if body_code: 155 | out = out + body_code + " " 156 | return out + "end" 157 | 158 | 159 | @dataclass 160 | class CallExpression(Expression): 161 | function: Node 162 | arguments: List[Expression] 163 | 164 | def to_code(self) -> str: 165 | out = "{0}({1})".format( 166 | self.function.to_code(), 167 | ", ".join([x.to_code() for x in self.arguments]), 168 | ) 169 | return out 170 | 171 | 172 | @dataclass 173 | class TableLiteral(Expression): 174 | elements: List[Tuple[Expression, Expression]] 175 | 176 | def to_code(self) -> str: 177 | out = "{" 178 | 179 | items = [] 180 | for key, value in self.elements: 181 | key_code = key.to_code() 182 | items.append(f"{key_code} = {value.to_code()}") 183 | 184 | out = out + ", ".join(x for x in items) 185 | out = out + "}" 186 | return out 187 | 188 | 189 | @dataclass 190 | class IndexExpression(Expression): 191 | left: Expression 192 | index: Expression 193 | 194 | def to_code(self) -> str: 195 | return "({0}[{1}])".format(self.left.to_code(), self.index.to_code()) 196 | -------------------------------------------------------------------------------- /luatopy/builtins.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, List 2 | 3 | from luatopy import obj 4 | from luatopy.obj import TRUE, FALSE, NULL 5 | 6 | 7 | builtins: Dict[str, Any] = {} 8 | 9 | 10 | def register(store, name, fn): 11 | store[name] = obj.Builtin(fn=fn) 12 | return store 13 | 14 | 15 | def builtin_type(*args: obj.Obj) -> obj.Obj: 16 | if len(args) == 0: 17 | return obj.Error.create("Missing arguments") 18 | 19 | value = args[0] 20 | value_type: Optional[str] = None 21 | if type(value) == obj.String: 22 | value_type = "string" 23 | if type(value) == obj.Integer: 24 | value_type = "number" 25 | if type(value) == obj.Boolean: 26 | value_type = "boolean" 27 | if type(value) == obj.Table: 28 | value_type = "table" 29 | if type(value) == obj.Function: 30 | value_type = "function" 31 | 32 | if not value_type: 33 | return NULL 34 | return obj.String(value=value_type) 35 | 36 | 37 | builtins = register(builtins, "type", builtin_type) 38 | 39 | 40 | def builtin_print(*args: obj.Obj) -> obj.Obj: 41 | out: List[str] = [x.inspect() for x in args] 42 | print(" ".join(out)) 43 | return NULL 44 | 45 | 46 | builtins = register(builtins, "print", builtin_print) 47 | -------------------------------------------------------------------------------- /luatopy/evaluator.py: -------------------------------------------------------------------------------- 1 | from typing import cast, Optional, List, Tuple, Dict 2 | 3 | from . import ast 4 | from . import obj 5 | from luatopy.builtins import builtins 6 | 7 | from .obj import TRUE, FALSE, NULL 8 | 9 | 10 | def evaluate(node: ast.Node, env: obj.Environment): 11 | klass = type(node) 12 | 13 | if klass == ast.Program: 14 | program: ast.Program = cast(ast.Program, node) 15 | return evaluate_program(program, env) 16 | 17 | if klass == ast.ExpressionStatement: 18 | exp: ast.ExpressionStatement = cast(ast.ExpressionStatement, node) 19 | return evaluate(exp.expression, env) 20 | 21 | if klass == ast.IntegerLiteral: 22 | integer_literal: ast.IntegerLiteral = cast(ast.IntegerLiteral, node) 23 | return obj.Integer(value=integer_literal.value) 24 | 25 | if klass == ast.StringLiteral: 26 | string_literal: ast.StringLiteral = cast(ast.StringLiteral, node) 27 | return obj.String(value=string_literal.value) 28 | 29 | if klass == ast.Boolean: 30 | boolean: ast.Boolean = cast(ast.Boolean, node) 31 | return native_bool_to_bool_obj(boolean.value) 32 | 33 | if klass == ast.PrefixExpression: 34 | prefix_exp: ast.PrefixExpression = cast(ast.PrefixExpression, node) 35 | prefix_right: obj.Obj = evaluate(prefix_exp.right, env) 36 | 37 | if is_error(prefix_right): 38 | return prefix_right 39 | 40 | return evaluate_prefix_expression(prefix_exp.operator, prefix_right) 41 | 42 | if klass == ast.InfixExpression: 43 | infix_exp: ast.InfixExpression = cast(ast.InfixExpression, node) 44 | 45 | infix_left: obj.Obj = evaluate(infix_exp.left, env) 46 | if is_error(infix_left): 47 | return infix_left 48 | 49 | infix_right: obj.Obj = evaluate(infix_exp.right, env) 50 | if is_error(infix_right): 51 | return infix_right 52 | 53 | return evaluate_infix_expression( 54 | infix_exp.operator, infix_left, infix_right 55 | ) 56 | 57 | if klass == ast.BlockStatement: 58 | block_statement: ast.BlockStatement = cast(ast.BlockStatement, node) 59 | return evaluate_block_statement(block_statement, env) 60 | 61 | if klass == ast.IfExpression: 62 | if_exp: ast.IfExpression = cast(ast.IfExpression, node) 63 | return eval_if_expression(if_exp, env) 64 | 65 | if klass == ast.ReturnStatement: 66 | return_statement: ast.ReturnStatement = cast(ast.ReturnStatement, node) 67 | return_value: obj.Obj = evaluate(return_statement.value, env) 68 | if is_error(return_value): 69 | return return_value 70 | return obj.ReturnValue(return_value) 71 | 72 | if klass == ast.AssignStatement: 73 | assignment: ast.AssignStatement = cast(ast.AssignStatement, node) 74 | assignment_value: obj.Obj = evaluate(assignment.value, env) 75 | 76 | if is_error(assignment_value): 77 | return assignment_value 78 | env.set(assignment.name.value, assignment_value) 79 | return None 80 | 81 | if klass == ast.Identifier: 82 | identifier: ast.Identifier = cast(ast.Identifier, node) 83 | return evaluate_identifier(identifier, env) 84 | 85 | if klass == ast.FunctionLiteral: 86 | fn_literal: ast.FunctionLiteral = cast(ast.FunctionLiteral, node) 87 | 88 | if fn_literal.name: 89 | funct_assignment = obj.Function( 90 | body=fn_literal.body, parameters=fn_literal.parameters, env=env 91 | ) 92 | env.set(fn_literal.name.value, funct_assignment) 93 | return None 94 | 95 | return obj.Function( 96 | body=fn_literal.body, parameters=fn_literal.parameters, env=env 97 | ) 98 | 99 | if klass == ast.CallExpression: 100 | call_exp: ast.CallExpression = cast(ast.CallExpression, node) 101 | fn_obj: obj.Obj = evaluate(call_exp.function, env) 102 | 103 | if is_error(fn_obj): 104 | return fn_obj 105 | 106 | fn: obj.Function = cast(obj.Function, fn_obj) 107 | args: List[obj.Obj] = evaluate_expressions(call_exp.arguments, env) 108 | if len(args) > 1 and is_error(args[0]): 109 | return args[0] 110 | 111 | return apply_function(fn, args, env) 112 | 113 | if klass == ast.TableLiteral: 114 | table_literal: ast.TableLiteral = cast(ast.TableLiteral, node) 115 | elements = evaluate_expression_pairs(table_literal.elements, env) 116 | # if len(elements) == 1 and is_error(elements): 117 | # return paris[0] 118 | 119 | return obj.Table(elements=elements) 120 | 121 | if klass == ast.IndexExpression: 122 | index_expression: ast.IndexExpression = cast(ast.IndexExpression, node) 123 | left: obj.Obj = evaluate(index_expression.left, env) 124 | if is_error(left): 125 | return left 126 | 127 | index: obj.Obj = evaluate(index_expression.index, env) 128 | if is_error(index): 129 | return index 130 | 131 | return evaluate_index_expression(left, index) 132 | 133 | return None 134 | 135 | 136 | def evaluate_index_expression(left: obj.Obj, index: obj.Obj) -> obj.Obj: 137 | if left.type() == obj.ObjType.TABLE and index.type() == obj.ObjType.INTEGER: 138 | return evaluate_table_index_expression( 139 | cast(obj.Table, left), cast(obj.Integer, index) 140 | ) 141 | if left.type() == obj.ObjType.TABLE and index.type() == obj.ObjType.STRING: 142 | return evaluate_table_key_expression( 143 | cast(obj.Table, left), cast(obj.String, index) 144 | ) 145 | 146 | return obj.Error.create("Index operation not supported") 147 | 148 | 149 | def evaluate_table_index_expression( 150 | table: obj.Table, index: obj.Integer 151 | ) -> obj.Obj: 152 | try: 153 | return table.elements[index] 154 | except: 155 | return NULL 156 | 157 | 158 | def evaluate_table_key_expression( 159 | table: obj.Table, index: obj.String 160 | ) -> obj.Obj: 161 | index_value: str = index.value 162 | try: 163 | return table.elements[index] 164 | except: 165 | return NULL 166 | 167 | 168 | def apply_function( 169 | fn: obj.Obj, args: List[obj.Obj], env: obj.Environment 170 | ) -> obj.Obj: 171 | if type(fn) == obj.Function: 172 | fn_fn = cast(obj.Function, fn) 173 | extended_env = extend_function_env(fn_fn, args) 174 | evaluated = evaluate(fn_fn.body, extended_env) 175 | return unwrap_return_value(evaluated) 176 | 177 | if type(fn) == obj.Builtin: 178 | builtin_fn = cast(obj.Builtin, fn) 179 | return builtin_fn.fn(*args) 180 | 181 | return obj.Error.create("Not a function {0}", fn.type()) 182 | 183 | 184 | def unwrap_return_value(value: obj.Obj) -> obj.Obj: 185 | if type(value) == obj.ReturnValue: 186 | return_value = cast(obj.ReturnValue, value) 187 | return return_value.value 188 | return value 189 | 190 | 191 | def extend_function_env( 192 | fn: obj.Function, args: List[obj.Obj] 193 | ) -> obj.Environment: 194 | enclosed_env = obj.Environment.create_enclosed(fn.env) 195 | 196 | param: ast.Identifier 197 | for index, param in enumerate(fn.parameters): 198 | enclosed_env.set(param.value, args[index]) 199 | return enclosed_env 200 | 201 | 202 | def evaluate_expressions( 203 | expressions: List[ast.Expression], env: obj.Environment 204 | ) -> List[obj.Obj]: 205 | result: List[obj.Obj] = [] 206 | 207 | for exp in expressions: 208 | evaluated: obj.Obj = evaluate(exp, env) 209 | if is_error(evaluated): 210 | return [evaluated] 211 | 212 | result.append(evaluated) 213 | 214 | return result 215 | 216 | 217 | def evaluate_expression_pairs( 218 | expressions: List[Tuple[ast.Expression, ast.Expression]], 219 | env: obj.Environment, 220 | ) -> Dict[obj.Obj, obj.Obj]: 221 | out: Dict[obj.Obj, obj.Obj] = {} 222 | for key_exp, val_exp in expressions: 223 | key: obj.Obj = evaluate(key_exp, env) 224 | value: obj.Obj = evaluate(val_exp, env) 225 | out[key] = value 226 | 227 | return out 228 | 229 | 230 | def evaluate_identifier( 231 | identifier: ast.Identifier, env: obj.Environment 232 | ) -> obj.Obj: 233 | val, found = env.get(identifier.value, NULL) 234 | if found: 235 | return val 236 | 237 | if identifier.value in builtins: 238 | return builtins[identifier.value] 239 | 240 | return NULL 241 | 242 | 243 | def evaluate_program(program: ast.Program, env: obj.Environment): 244 | result = None 245 | for statement in program.statements: 246 | result = evaluate(statement, env) 247 | 248 | if type(result) == obj.ReturnValue: 249 | return_value: obj.ReturnValue = cast(obj.ReturnValue, result) 250 | return return_value.value 251 | if type(result) == obj.Error: 252 | return result 253 | 254 | return result 255 | 256 | 257 | def evaluate_block_statement( 258 | block_statement: ast.BlockStatement, env: obj.Environment 259 | ): 260 | result = None 261 | for statement in block_statement.statements: 262 | result = evaluate(statement, env) 263 | if result != None: 264 | if result.type() in [obj.ObjType.RETURN, obj.ObjType.ERROR]: 265 | return result 266 | 267 | return result 268 | 269 | 270 | def eval_if_expression(if_exp: ast.IfExpression, env: obj.Environment): 271 | condition = evaluate(if_exp.condition, env) 272 | 273 | if is_error(condition): 274 | return condition 275 | 276 | if is_truthy(condition): 277 | return evaluate(if_exp.consequence, env) 278 | elif if_exp.alternative: 279 | return evaluate(if_exp.alternative, env) 280 | 281 | return NULL 282 | 283 | 284 | def is_truthy(obj: obj.Obj) -> bool: 285 | if obj == NULL: 286 | return False 287 | if obj == TRUE: 288 | return True 289 | if obj == FALSE: 290 | return False 291 | return True 292 | 293 | 294 | def evaluate_statements(statements, env: obj.Environment): 295 | result = None 296 | for statement in statements: 297 | result = evaluate(statement, env) 298 | 299 | if type(result) == obj.ReturnValue: 300 | return_value: obj.ReturnValue = cast(obj.ReturnValue, result) 301 | return return_value.value 302 | 303 | return result 304 | 305 | 306 | def evaluate_prefix_expression(operator: str, right: obj.Obj) -> obj.Obj: 307 | if operator == "not": 308 | return evaluate_not_operator_expression(right) 309 | if operator == "-": 310 | return evaluate_minus_operator_expression(right) 311 | if operator == "#": 312 | return evaluate_length_operator_expression(right) 313 | 314 | return obj.Error.create( 315 | "Unknown operator {0}{0}", operator, right.inspect() 316 | ) 317 | 318 | 319 | def evaluate_not_operator_expression(right: obj.Obj) -> obj.Boolean: 320 | if right == TRUE: 321 | return FALSE 322 | if right == FALSE: 323 | return TRUE 324 | if right == NULL: 325 | return TRUE 326 | return FALSE 327 | 328 | 329 | def evaluate_minus_operator_expression(right: obj.Obj) -> obj.Obj: 330 | if right.type() == obj.ObjType.BOOLEAN: 331 | return obj.Error.create( 332 | "Attempt to perform arithmetic on a boolean value" 333 | ) 334 | 335 | if type(right) != obj.Integer: 336 | return NULL 337 | 338 | obj_int = cast(obj.Integer, right) 339 | return obj.Integer(value=0 - obj_int.value) 340 | 341 | 342 | def evaluate_length_operator_expression(right: obj.Obj) -> obj.Obj: 343 | if right.type() == obj.ObjType.STRING: 344 | return obj.Integer(len(right.value)) 345 | if right.type() == obj.ObjType.TABLE: 346 | length: int = 1 347 | while True: 348 | try: 349 | right.elements[obj.Integer(value=length)] 350 | length = length + 1 351 | except: 352 | break 353 | 354 | return obj.Integer(length - 1) 355 | return NULL 356 | 357 | 358 | def evaluate_infix_expression( 359 | operator: str, left: obj.Obj, right: obj.Obj 360 | ) -> obj.Obj: 361 | if type(left) == obj.Integer and type(right) == obj.Integer: 362 | left_val = cast(obj.Integer, left) 363 | right_val = cast(obj.Integer, right) 364 | return evaluate_infix_integer_expression(operator, left_val, right_val) 365 | 366 | if type(left) == obj.String and type(right) == obj.String: 367 | left_str_val = cast(obj.String, left) 368 | right_str_val = cast(obj.String, right) 369 | return evaluate_infix_string_expression( 370 | operator, left_str_val, right_str_val 371 | ) 372 | 373 | if obj.ObjType.BOOLEAN in [left.type(), right.type()] and operator in [ 374 | "+", 375 | "-", 376 | "*", 377 | "/", 378 | ]: 379 | return obj.Error.create( 380 | "Attempt to perform arithmetic on a boolean value" 381 | ) 382 | 383 | if operator == "==": 384 | return native_bool_to_bool_obj(left == right) 385 | 386 | if operator == "~=": 387 | return native_bool_to_bool_obj(left != right) 388 | 389 | if operator == "and": 390 | return native_bool_to_bool_obj(left.value and right.value) 391 | 392 | if operator == "or": 393 | return native_bool_to_bool_obj(left.value or right.value) 394 | 395 | return obj.Error.create("Unknown infix operator {0}", operator) 396 | 397 | 398 | def evaluate_infix_string_expression( 399 | operator, left: obj.String, right: obj.String 400 | ) -> obj.Obj: 401 | if operator == "..": 402 | return obj.String(left.value + right.value) 403 | return NULL 404 | 405 | 406 | def evaluate_infix_integer_expression( 407 | operator, left: obj.Integer, right: obj.Integer 408 | ) -> obj.Obj: 409 | if operator == "+": 410 | return obj.Integer(left.value + right.value) 411 | 412 | if operator == "-": 413 | return obj.Integer(left.value - right.value) 414 | 415 | if operator == "*": 416 | return obj.Integer(left.value * right.value) 417 | 418 | if operator == "/": 419 | return obj.Float(left.value / right.value) 420 | 421 | if operator == "%": 422 | return obj.Float(left.value % right.value) 423 | 424 | if operator == ">": 425 | return native_bool_to_bool_obj(left.value > right.value) 426 | 427 | if operator == ">=": 428 | return native_bool_to_bool_obj(left.value >= right.value) 429 | 430 | if operator == "<": 431 | return native_bool_to_bool_obj(left.value < right.value) 432 | 433 | if operator == "<=": 434 | return native_bool_to_bool_obj(left.value <= right.value) 435 | 436 | if operator == "==": 437 | return native_bool_to_bool_obj(left.value == right.value) 438 | 439 | if operator == "~=": 440 | return native_bool_to_bool_obj(left.value != right.value) 441 | 442 | return NULL 443 | 444 | 445 | def native_bool_to_bool_obj(value: bool) -> obj.Boolean: 446 | return TRUE if value else FALSE 447 | 448 | 449 | def is_error(instance: obj.Obj) -> bool: 450 | if instance == None: 451 | return False 452 | 453 | return instance.type() == obj.ObjType.ERROR 454 | -------------------------------------------------------------------------------- /luatopy/lexer.py: -------------------------------------------------------------------------------- 1 | import re 2 | from io import StringIO 3 | from typing import Optional, Iterator 4 | 5 | from .token import TokenType, Token 6 | 7 | 8 | EOF_MARKER: str = "<>" 9 | 10 | 11 | class Lexer: 12 | def __init__(self, source: StringIO) -> None: 13 | self.source: str = source.getvalue() 14 | self.pos: int = 0 15 | self.read_pos: int = 0 16 | self.ch: str = "" 17 | 18 | self.read_char() 19 | 20 | def read_char(self) -> None: 21 | if self.read_pos >= len(self.source): 22 | self.ch = EOF_MARKER 23 | else: 24 | self.ch = self.source[self.read_pos] 25 | 26 | self.pos = self.read_pos 27 | self.read_pos = self.read_pos + 1 28 | 29 | def peek_ahead(self, steps: int = 0) -> str: 30 | if self.read_pos + steps >= len(self.source): 31 | return EOF_MARKER 32 | return self.source[self.read_pos + steps] 33 | 34 | def peek_behind(self, steps: int = 0) -> str: 35 | if self.read_pos - steps >= len(self.source): 36 | return EOF_MARKER 37 | return self.source[self.read_pos - steps] 38 | 39 | def skip_whitespace(self) -> None: 40 | while self.ch == " ": 41 | self.read_char() 42 | 43 | def tokens(self) -> Iterator[Token]: 44 | while True: 45 | token = self.next_token() 46 | yield token 47 | 48 | if token.token_type == TokenType.EOF: 49 | break 50 | 51 | def next_token(self) -> Token: 52 | self.skip_whitespace() 53 | 54 | if self.ch == EOF_MARKER: 55 | tok = Token(token_type=TokenType.EOF, literal=self.ch) 56 | self.read_char() 57 | return tok 58 | 59 | if self.ch == "\n": 60 | tok = Token(token_type=TokenType.NEWLINE, literal=self.ch) 61 | self.read_char() 62 | return tok 63 | 64 | if self.ch == ";": 65 | tok = Token(token_type=TokenType.SEMICOLON, literal=self.ch) 66 | self.read_char() 67 | return tok 68 | 69 | if self.ch == "%": 70 | tok = Token(token_type=TokenType.PERCENT, literal=self.ch) 71 | self.read_char() 72 | return tok 73 | 74 | if self.ch == "#": 75 | tok = Token(token_type=TokenType.HASH, literal=self.ch) 76 | self.read_char() 77 | return tok 78 | 79 | if self.ch == "(": 80 | tok = Token(token_type=TokenType.LPAREN, literal=self.ch) 81 | self.read_char() 82 | return tok 83 | 84 | if self.ch == ")": 85 | tok = Token(token_type=TokenType.RPAREN, literal=self.ch) 86 | self.read_char() 87 | return tok 88 | 89 | if self.ch == "{": 90 | tok = Token(token_type=TokenType.LBRACE, literal=self.ch) 91 | self.read_char() 92 | return tok 93 | 94 | if self.ch == "}": 95 | tok = Token(token_type=TokenType.RBRACE, literal=self.ch) 96 | self.read_char() 97 | return tok 98 | 99 | if self.ch == "[": 100 | tok = Token(token_type=TokenType.LBRACKET, literal=self.ch) 101 | self.read_char() 102 | return tok 103 | 104 | if self.ch == "]": 105 | tok = Token(token_type=TokenType.RBRACKET, literal=self.ch) 106 | self.read_char() 107 | return tok 108 | 109 | if self.ch == ",": 110 | tok = Token(token_type=TokenType.COMMA, literal=self.ch) 111 | self.read_char() 112 | return tok 113 | 114 | if self.ch == "+": 115 | tok = Token(token_type=TokenType.PLUS, literal=self.ch) 116 | self.read_char() 117 | return tok 118 | 119 | if self.ch == "*": 120 | tok = Token(token_type=TokenType.ASTERISK, literal=self.ch) 121 | self.read_char() 122 | return tok 123 | 124 | if self.ch == "/": 125 | tok = Token(token_type=TokenType.SLASH, literal=self.ch) 126 | self.read_char() 127 | return tok 128 | 129 | if self.ch == "=": 130 | if self.peek_ahead() == "=": 131 | literal = self.ch 132 | self.read_char() 133 | literal = literal + self.ch 134 | self.read_char() 135 | tok = Token(token_type=TokenType.EQ, literal=literal) 136 | return tok 137 | 138 | tok = Token(token_type=TokenType.ASSIGN, literal=self.ch) 139 | self.read_char() 140 | return tok 141 | 142 | if self.ch == ".": 143 | if self.peek_ahead(0) == ".": 144 | literal = self.ch 145 | self.read_char() 146 | literal = literal + self.ch 147 | self.read_char() 148 | tok = Token(token_type=TokenType.CONCAT, literal=literal) 149 | return tok 150 | 151 | if self.ch == "~": 152 | if self.peek_ahead(0) == "=": 153 | literal = self.ch 154 | self.read_char() 155 | literal = literal + self.ch 156 | self.read_char() 157 | tok = Token(token_type=TokenType.NOT_EQ, literal=literal) 158 | return tok 159 | 160 | if self.ch == ">": 161 | if self.peek_ahead(0) == "=": 162 | literal = self.ch 163 | self.read_char() 164 | literal = literal + self.ch 165 | self.read_char() 166 | tok = Token(token_type=TokenType.GTE, literal=literal) 167 | return tok 168 | 169 | literal = self.ch 170 | self.read_char() 171 | tok = Token(token_type=TokenType.GT, literal=literal) 172 | return tok 173 | 174 | if self.ch == "<": 175 | if self.peek_ahead(0) == "=": 176 | literal = self.ch 177 | self.read_char() 178 | literal = literal + self.ch 179 | self.read_char() 180 | tok = Token(token_type=TokenType.LTE, literal=literal) 181 | return tok 182 | 183 | literal = self.ch 184 | self.read_char() 185 | tok = Token(token_type=TokenType.LT, literal=literal) 186 | return tok 187 | 188 | if self.ch == "-": 189 | if ( 190 | self.peek_ahead(0) == "-" 191 | and self.peek_ahead(1) == "[" 192 | and self.peek_ahead(2) == "[" 193 | ): 194 | comment = self.read_multiline_comment() 195 | tok = Token(token_type=TokenType.COMMENT, literal=comment) 196 | return tok 197 | 198 | if self.peek_ahead(0) == "-": 199 | comment = self.read_comment() 200 | tok = Token(token_type=TokenType.COMMENT, literal=comment) 201 | return tok 202 | 203 | tok = Token(token_type=TokenType.MINUS, literal=self.ch) 204 | self.read_char() 205 | return tok 206 | 207 | if is_letter(self.ch): 208 | identifier = self.read_identifier() 209 | 210 | if identifier == "nil": 211 | return Token(token_type=TokenType.NIL, literal=identifier) 212 | 213 | if identifier == "and": 214 | return Token(token_type=TokenType.AND, literal=identifier) 215 | 216 | if identifier == "or": 217 | return Token(token_type=TokenType.OR, literal=identifier) 218 | 219 | if identifier == "not": 220 | return Token(token_type=TokenType.NOT, literal=identifier) 221 | 222 | if identifier == "true": 223 | return Token(token_type=TokenType.TRUE, literal=identifier) 224 | 225 | if identifier == "false": 226 | return Token(token_type=TokenType.FALSE, literal=identifier) 227 | 228 | # TODO: Add is_keyword 229 | if identifier == "if": 230 | return Token(token_type=TokenType.IF, literal=identifier) 231 | 232 | if identifier == "then": 233 | return Token(token_type=TokenType.THEN, literal=identifier) 234 | 235 | if identifier == "else": 236 | return Token(token_type=TokenType.ELSE, literal=identifier) 237 | 238 | if identifier == "end": 239 | return Token(token_type=TokenType.END, literal=identifier) 240 | 241 | if identifier == "function": 242 | return Token(token_type=TokenType.FUNCTION, literal=identifier) 243 | 244 | if identifier == "return": 245 | return Token(token_type=TokenType.RETURN, literal=identifier) 246 | 247 | return Token(token_type=TokenType.IDENTIFIER, literal=identifier) 248 | 249 | if is_digit(self.ch): 250 | value = self.read_number() 251 | return Token(token_type=TokenType.INT, literal=value) 252 | 253 | if self.ch == '"': 254 | value = self.read_string('"') 255 | return Token(token_type=TokenType.STR, literal=value) 256 | 257 | if self.ch == "'": 258 | value = self.read_string("'") 259 | return Token(token_type=TokenType.STR, literal=value) 260 | 261 | tok = Token(token_type=TokenType.ILLEGAL, literal=self.ch) 262 | self.read_char() 263 | return tok 264 | 265 | def read_identifier(self) -> str: 266 | start_position = self.pos 267 | while self.ch != EOF_MARKER and (is_letter( 268 | self.ch) or is_digit(self.ch) 269 | ): 270 | self.read_char() 271 | return self.source[start_position : self.pos] 272 | 273 | def read_number(self) -> str: 274 | start_position = self.pos 275 | while self.ch != EOF_MARKER and is_digit(self.ch): 276 | self.read_char() 277 | return self.source[start_position : self.pos] 278 | 279 | def read_string(self, indicator: str = '"') -> str: 280 | self.read_char() 281 | 282 | start_position = self.pos 283 | out: str = self.ch 284 | 285 | while True: 286 | self.read_char() 287 | 288 | if self.ch == "\\" and self.peek_ahead(0) == indicator: 289 | self.read_char() 290 | out += indicator 291 | continue 292 | 293 | if self.ch == indicator or self.ch == EOF_MARKER: 294 | break 295 | 296 | out = out + self.ch 297 | 298 | self.read_char() 299 | return out 300 | 301 | def read_comment(self) -> str: 302 | start_position = self.pos 303 | while self.ch != "\n": 304 | self.read_char() 305 | return self.source[start_position + 2 : self.pos] 306 | 307 | def read_multiline_comment(self) -> str: 308 | start_position = self.pos 309 | while not ( 310 | self.ch == "]" 311 | and self.peek_ahead(0) == "]" 312 | and self.peek_ahead(1) == "-" 313 | and self.peek_ahead(2) == "-" 314 | ): 315 | self.read_char() 316 | 317 | for _ in range(0, 4): 318 | self.read_char() 319 | 320 | return self.source[start_position + 4 : self.pos - 4] 321 | 322 | 323 | def is_letter(char) -> bool: 324 | return bool(re.search(r"[a-zA-Z]|_", char)) 325 | 326 | 327 | def is_digit(char) -> bool: 328 | return bool(re.search(r"[0-9]", char)) 329 | -------------------------------------------------------------------------------- /luatopy/obj.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Any, Dict, Tuple, List, Optional, Callable 3 | from mypy_extensions import VarArg 4 | from enum import Enum, auto 5 | 6 | from luatopy import ast 7 | 8 | 9 | class ObjType(Enum): 10 | INTEGER = auto() 11 | FLOAT = auto() 12 | BOOLEAN = auto() 13 | NULL = auto() 14 | RETURN = auto() 15 | ERROR = auto() 16 | FUNCTION = auto() 17 | STRING = auto() 18 | BUILTIN = auto() 19 | TABLE = auto() 20 | 21 | 22 | class Obj: 23 | def type(self) -> ObjType: 24 | pass 25 | 26 | def inspect(self) -> str: 27 | pass 28 | 29 | 30 | class Environment: 31 | def __init__(self, outer: Optional["Environment"] = None): 32 | self.store: Dict[str, Obj] = {} 33 | self.outer: Optional["Environment"] = outer 34 | 35 | def get(self, name: str, default: Obj) -> Tuple[Obj, bool]: 36 | val = self.store.get(name, default) 37 | found = self.contains(name) 38 | 39 | if not found and self.outer: 40 | return self.outer.get(name, default) 41 | 42 | return (val, found) 43 | 44 | def contains(self, name: str) -> bool: 45 | return name in self.store 46 | 47 | def set(self, name: str, value: Obj) -> Obj: 48 | self.store[name] = value 49 | return value 50 | 51 | def __str__(self): 52 | combined = self.store 53 | if self.outer: 54 | combined = {**self.outer.store, **self.store} 55 | return str(combined) 56 | 57 | @staticmethod 58 | def create_enclosed(outer: "Environment") -> "Environment": 59 | return Environment(outer=outer) 60 | 61 | 62 | @dataclass 63 | class Integer(Obj): 64 | value: int = 0 65 | 66 | def type(self) -> ObjType: 67 | return ObjType.INTEGER 68 | 69 | def inspect(self) -> str: 70 | return str(self.value) 71 | 72 | def __hash__(self): 73 | return hash(self.value) 74 | 75 | 76 | @dataclass 77 | class Float(Obj): 78 | value: float = 0.0 79 | 80 | def type(self) -> ObjType: 81 | return ObjType.FLOAT 82 | 83 | def inspect(self) -> str: 84 | return str(self.value) 85 | 86 | 87 | @dataclass 88 | class Boolean(Obj): 89 | value: bool = False 90 | 91 | def type(self) -> ObjType: 92 | return ObjType.BOOLEAN 93 | 94 | def inspect(self) -> str: 95 | return "true" if self.value else "false" 96 | 97 | 98 | class Null(Obj): 99 | def type(self) -> ObjType: 100 | return ObjType.NULL 101 | 102 | def inspect(self) -> str: 103 | return "nil" 104 | 105 | 106 | @dataclass 107 | class ReturnValue(Obj): 108 | value: Obj 109 | 110 | def type(self) -> ObjType: 111 | return ObjType.RETURN 112 | 113 | def inspect(self) -> str: 114 | return self.value.inspect() 115 | 116 | 117 | @dataclass 118 | class Error(Obj): 119 | message: str 120 | 121 | @staticmethod 122 | def create(str_format, *args): 123 | return Error(message=str_format.format(*args)) 124 | 125 | def type(self) -> ObjType: 126 | return ObjType.ERROR 127 | 128 | def inspect(self) -> str: 129 | return "ERROR: {}".format(self.message) 130 | 131 | 132 | @dataclass 133 | class Function(Obj): 134 | body: ast.BlockStatement 135 | env: Environment 136 | parameters: List[ast.Identifier] = field(default_factory=list) 137 | 138 | def type(self) -> ObjType: 139 | return ObjType.FUNCTION 140 | 141 | def inspect(self) -> str: 142 | out: str = "" 143 | signature = ", ".join([x.value for x in self.parameters]) 144 | 145 | out = "function ({0})\n".format(signature) 146 | out = out + self.body.to_code() 147 | out = out + "\nend" 148 | return out 149 | 150 | 151 | @dataclass 152 | class String(Obj): 153 | value: str = "" 154 | 155 | def type(self) -> ObjType: 156 | return ObjType.STRING 157 | 158 | def inspect(self) -> str: 159 | return self.value 160 | 161 | def __hash__(self): 162 | return hash(self.value) 163 | 164 | # def __eq__(self, other): 165 | # if not other: 166 | # return False 167 | # return self.value == other.value 168 | 169 | 170 | @dataclass 171 | class Builtin(Obj): 172 | fn: Callable[[VarArg(Obj)], Obj] 173 | 174 | def type(self) -> ObjType: 175 | return ObjType.BUILTIN 176 | 177 | def inspect(self) -> str: 178 | return "Builtin function" 179 | 180 | 181 | @dataclass 182 | class Table(Obj): 183 | elements: Dict[Obj, Obj] 184 | 185 | def type(self) -> ObjType: 186 | return ObjType.TABLE 187 | 188 | def inspect(self) -> str: 189 | pairs_signature = ", ".join( 190 | [ 191 | f"{x[0].inspect()} = {x[1].inspect()}" 192 | for x in self.elements.items() 193 | ] 194 | ) 195 | 196 | out: str = "{" 197 | out = out + pairs_signature 198 | out = out + "}" 199 | return out 200 | 201 | 202 | TRUE = Boolean(value=True) 203 | FALSE = Boolean(value=False) 204 | NULL = Null() 205 | -------------------------------------------------------------------------------- /luatopy/parser.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Dict, Callable, List, cast, Tuple 2 | from enum import IntEnum, auto 3 | 4 | from .token import TokenType, Token 5 | from .lexer import Lexer 6 | from . import ast 7 | 8 | 9 | class Precedence(IntEnum): 10 | """ 11 | From lua docs: 12 | -------------- 13 | and or 14 | < > <= >= ~= == 15 | .. 16 | + - 17 | * / 18 | not - (unary) 19 | ^ 20 | """ 21 | 22 | LOWEST = 0 23 | EQUALS = 1 24 | LESSGREATER = 2 25 | CONCAT = 3 26 | SUM = 4 27 | PRODUCT = 5 28 | PREFIX = 6 29 | CALL = 7 30 | INDEX = 8 31 | 32 | 33 | precedences: Dict[TokenType, Precedence] = { 34 | TokenType.PLUS: Precedence.SUM, 35 | TokenType.MINUS: Precedence.SUM, 36 | TokenType.ASTERISK: Precedence.PRODUCT, 37 | TokenType.SLASH: Precedence.PRODUCT, 38 | TokenType.PERCENT: Precedence.PRODUCT, 39 | TokenType.EQ: Precedence.EQUALS, 40 | TokenType.NOT_EQ: Precedence.EQUALS, 41 | TokenType.AND: Precedence.EQUALS, 42 | TokenType.OR: Precedence.EQUALS, 43 | TokenType.GT: Precedence.LESSGREATER, 44 | TokenType.GTE: Precedence.LESSGREATER, 45 | TokenType.LT: Precedence.LESSGREATER, 46 | TokenType.LTE: Precedence.LESSGREATER, 47 | TokenType.LPAREN: Precedence.CALL, 48 | TokenType.IF: Precedence.CALL, 49 | TokenType.CONCAT: Precedence.CONCAT, 50 | TokenType.LBRACKET: Precedence.INDEX, 51 | } 52 | 53 | 54 | class Parser: 55 | def __init__(self, lexer: Lexer): 56 | self.lexer: Lexer = lexer 57 | self.errors: List[str] = [] 58 | 59 | self.prefix_parse_fns: Dict[TokenType, Callable] = { 60 | TokenType.IDENTIFIER: self.parse_identifier, 61 | TokenType.INT: self.parse_integer_literal, 62 | TokenType.STR: self.parse_string_literal, 63 | TokenType.MINUS: self.parse_prefix_expression, 64 | TokenType.HASH: self.parse_prefix_expression, 65 | TokenType.TRUE: self.parse_boolean_literal, 66 | TokenType.FALSE: self.parse_boolean_literal, 67 | TokenType.LPAREN: self.parse_grouped_expression, 68 | TokenType.IF: self.parse_if_expression, 69 | TokenType.FUNCTION: self.parse_function_literal, 70 | TokenType.NOT: self.parse_prefix_expression, 71 | TokenType.LBRACE: self.parse_table_literal, # { 72 | } 73 | 74 | self.infix_parse_fns: Dict[ 75 | TokenType, Callable[[ast.Node], ast.Node] 76 | ] = { 77 | TokenType.PLUS: self.parse_infix_expression, 78 | TokenType.MINUS: self.parse_infix_expression, 79 | TokenType.ASTERISK: self.parse_infix_expression, 80 | TokenType.PERCENT: self.parse_infix_expression, 81 | TokenType.SLASH: self.parse_infix_expression, 82 | TokenType.EQ: self.parse_infix_expression, 83 | TokenType.NOT_EQ: self.parse_infix_expression, 84 | TokenType.AND: self.parse_infix_expression, 85 | TokenType.OR: self.parse_infix_expression, 86 | TokenType.GT: self.parse_infix_expression, 87 | TokenType.GTE: self.parse_infix_expression, 88 | TokenType.LT: self.parse_infix_expression, 89 | TokenType.LTE: self.parse_infix_expression, 90 | TokenType.LPAREN: self.parse_call_expression, 91 | TokenType.CONCAT: self.parse_infix_expression, 92 | TokenType.LBRACKET: self.parse_index_expression, 93 | } 94 | 95 | self.table_prefix_fns = { 96 | TokenType.IDENTIFIER: self.parse_table_identifier_pair, 97 | TokenType.LBRACKET: self.parse_table_expression_pair, 98 | } 99 | 100 | self.cur_token: Token = self.lexer.next_token() 101 | self.peek_token: Token = self.lexer.next_token() 102 | 103 | def next_token(self) -> None: 104 | self.cur_token = self.peek_token 105 | self.peek_token = self.lexer.next_token() 106 | 107 | def parse_program(self) -> ast.Program: 108 | statements = [] 109 | while self.cur_token.token_type != TokenType.EOF: 110 | if self.cur_token.token_type in [ 111 | TokenType.NEWLINE, 112 | TokenType.SEMICOLON, 113 | ]: 114 | self.next_token() 115 | continue 116 | 117 | statement = self.parse_statement() 118 | statements.append(statement) 119 | self.next_token() 120 | 121 | return ast.Program(statements) 122 | 123 | def parse_statement(self) -> ast.Node: 124 | if ( 125 | self.cur_token.token_type == TokenType.IDENTIFIER 126 | and self.peek_token.token_type == TokenType.ASSIGN 127 | ): 128 | return self.parse_assignment_statement() 129 | 130 | if self.cur_token.token_type == TokenType.RETURN: 131 | return self.parse_return_statement() 132 | 133 | return self.parse_expression_statement() 134 | 135 | def parse_assignment_statement(self): 136 | token = self.cur_token 137 | 138 | self.next_token() 139 | self.next_token() # We already know the next statement is = 140 | 141 | value = self.parse_expression(Precedence.LOWEST) 142 | 143 | if self.peek_token.token_type in [ 144 | TokenType.NEWLINE, 145 | TokenType.SEMICOLON, 146 | ]: 147 | self.next_token() 148 | 149 | statement = ast.AssignStatement( 150 | token=token, 151 | name=ast.Identifier(token=token, value=token.literal), 152 | value=value, 153 | ) 154 | return statement 155 | 156 | def parse_return_statement(self) -> ast.ReturnStatement: 157 | token = self.cur_token 158 | self.next_token() 159 | 160 | value = self.parse_expression(Precedence.LOWEST) 161 | return ast.ReturnStatement(token=token, value=value) 162 | 163 | def parse_if_expression(self): 164 | token = self.cur_token 165 | 166 | self.next_token() 167 | condition = self.parse_expression(Precedence.LOWEST) 168 | 169 | if not self.expect_peek(TokenType.THEN): 170 | return None 171 | 172 | consequence = self.parse_block_statement() 173 | alternative = None 174 | 175 | if self.cur_token.token_type == TokenType.ELSE: 176 | alternative = self.parse_block_statement() 177 | 178 | return ast.IfExpression( 179 | token=token, 180 | condition=condition, 181 | consequence=consequence, 182 | alternative=alternative, 183 | ) 184 | 185 | def parse_block_statement(self): 186 | token = self.cur_token 187 | statements: List[ast.Statement] = [] 188 | 189 | self.next_token() 190 | 191 | while ( 192 | self.cur_token.token_type != TokenType.END 193 | and self.cur_token.token_type != TokenType.ELSE 194 | and self.cur_token.token_type != TokenType.EOF 195 | ): 196 | statement = self.parse_statement() 197 | 198 | if statement: 199 | statements.append(statement) 200 | 201 | self.next_token() 202 | 203 | return ast.BlockStatement(token=token, statements=statements) 204 | 205 | def parse_expression_statement(self) -> ast.ExpressionStatement: 206 | expression = self.parse_expression(Precedence.LOWEST) 207 | 208 | return ast.ExpressionStatement( 209 | token=self.cur_token, expression=expression 210 | ) 211 | 212 | def parse_expression(self, precedence: Precedence): 213 | prefix_fn = self.prefix_parse_fns.get(self.cur_token.token_type, None) 214 | 215 | if not prefix_fn: 216 | self.errors.append( 217 | "No prefix fn found for {0}".format(self.cur_token.token_type) 218 | ) 219 | # Add error reporting 220 | return None 221 | 222 | left_expression = prefix_fn() 223 | 224 | while ( 225 | self.peek_token.token_type 226 | not in [TokenType.NEWLINE, TokenType.SEMICOLON] 227 | and precedence < self.peek_precedence() 228 | ): 229 | infix_fn = self.infix_parse_fns.get( 230 | self.peek_token.token_type, None 231 | ) 232 | if not infix_fn: 233 | return left_expression 234 | 235 | self.next_token() 236 | left_expression = infix_fn(left_expression) 237 | 238 | return left_expression 239 | 240 | def parse_identifier(self): 241 | value = self.cur_token.literal 242 | return ast.Identifier(token=self.cur_token, value=value) 243 | 244 | def parse_integer_literal(self) -> ast.IntegerLiteral: 245 | literal = self.cur_token.literal 246 | value = int(literal) 247 | return ast.IntegerLiteral(token=self.cur_token, value=value) 248 | 249 | def parse_string_literal(self) -> ast.StringLiteral: 250 | literal = self.cur_token.literal 251 | value = literal 252 | return ast.StringLiteral(token=self.cur_token, value=value) 253 | 254 | def parse_boolean_literal(self) -> ast.Boolean: 255 | literal = self.cur_token.literal 256 | value = literal == "true" 257 | return ast.Boolean(token=self.cur_token, value=value) 258 | 259 | def parse_function_literal(self): 260 | token = self.cur_token 261 | name = None 262 | 263 | if self.peek_token.token_type == TokenType.IDENTIFIER: 264 | self.next_token() 265 | name = ast.Identifier( 266 | token=self.cur_token, value=self.cur_token.literal 267 | ) 268 | 269 | if not self.expect_peek(TokenType.LPAREN): 270 | return None 271 | 272 | parameters = self.parse_function_parameters() 273 | 274 | body = self.parse_block_statement() 275 | return ast.FunctionLiteral( 276 | token=token, parameters=parameters, body=body, name=name 277 | ) 278 | 279 | def parse_function_parameters(self): 280 | identifiers: List[Identifier] = [] 281 | 282 | if self.peek_token.token_type == TokenType.RPAREN: 283 | self.next_token() 284 | return identifiers 285 | 286 | self.next_token() 287 | identifier = ast.Identifier( 288 | token=self.cur_token, value=self.cur_token.literal 289 | ) 290 | identifiers.append(identifier) 291 | 292 | while self.peek_token.token_type == TokenType.COMMA: 293 | self.next_token() 294 | self.next_token() 295 | 296 | identifier = ast.Identifier( 297 | token=self.cur_token, value=self.cur_token.literal 298 | ) 299 | identifiers.append(identifier) 300 | 301 | if not self.expect_peek(TokenType.RPAREN): 302 | return None 303 | 304 | return identifiers 305 | 306 | def cur_precedence(self) -> Precedence: 307 | return precedences.get(self.cur_token.token_type, Precedence.LOWEST) 308 | 309 | def peek_precedence(self) -> Precedence: 310 | return precedences.get(self.peek_token.token_type, Precedence.LOWEST) 311 | 312 | def parse_prefix_expression(self) -> ast.PrefixExpression: 313 | token = self.cur_token 314 | 315 | self.next_token() 316 | right = self.parse_expression(Precedence.PREFIX) 317 | 318 | return ast.PrefixExpression( 319 | token=token, right=right, operator=token.literal 320 | ) 321 | 322 | def parse_infix_expression(self, left: ast.Node) -> ast.InfixExpression: 323 | token = self.cur_token 324 | 325 | precedence = self.cur_precedence() 326 | 327 | self.next_token() 328 | right = self.parse_expression(precedence) 329 | 330 | return ast.InfixExpression( 331 | token=token, left=left, operator=token.literal, right=right 332 | ) 333 | 334 | def parse_call_expression(self, function: ast.Node) -> ast.CallExpression: 335 | token = self.cur_token 336 | arguments = self.parse_call_arguments() 337 | 338 | return ast.CallExpression( 339 | token=token, function=function, arguments=arguments 340 | ) 341 | 342 | def parse_call_arguments(self): 343 | arguments: List[ast.Expression] = [] 344 | 345 | if self.peek_token.token_type == TokenType.RPAREN: 346 | self.next_token() 347 | return arguments 348 | 349 | self.next_token() 350 | arguments.append(self.parse_expression(Precedence.LOWEST)) 351 | 352 | while self.peek_token.token_type == TokenType.COMMA: 353 | self.next_token() 354 | self.next_token() 355 | 356 | identifier = ast.Identifier( 357 | token=self.cur_token, value=self.cur_token.literal 358 | ) 359 | 360 | arguments.append(self.parse_expression(Precedence.LOWEST)) 361 | 362 | if not self.expect_peek(TokenType.RPAREN): 363 | return None 364 | 365 | return arguments 366 | 367 | def parse_grouped_expression(self): 368 | self.next_token() 369 | 370 | expression = self.parse_expression(Precedence.LOWEST) 371 | 372 | if not self.expect_peek(TokenType.RPAREN): 373 | return None 374 | 375 | return expression 376 | 377 | def expect_peek(self, token_type: TokenType) -> bool: 378 | if self.peek_token.token_type != token_type: 379 | self.errors.append( 380 | "Expected next token to be {0}, got {1}".format( 381 | token_type, self.peek_token.token_type 382 | ) 383 | ) 384 | return False 385 | 386 | self.next_token() 387 | return True 388 | 389 | def parse_table_literal(self) -> ast.TableLiteral: 390 | token = self.cur_token 391 | elements = self.parse_table_expression_list() 392 | return ast.TableLiteral(token=token, elements=elements) 393 | 394 | def parse_table_expression_list(self): 395 | elements: List[Tuple[ast.Expression, ast.Expression]] = [] 396 | 397 | if self.peek_token.token_type == TokenType.RBRACE: 398 | self.next_token() 399 | return [] 400 | 401 | self.next_token() 402 | 403 | index: int = 1 404 | 405 | while True: 406 | parse_fn = self.table_prefix_fns.get( 407 | self.cur_token.token_type, self.parse_table_expression_value 408 | ) 409 | element, pair = parse_fn() 410 | 411 | if element: 412 | elements.append( 413 | (ast.IntegerLiteral(token=None, value=index), element) 414 | ) 415 | index = index + 1 416 | elif pair: 417 | elements.append(pair) 418 | 419 | if self.peek_token.token_type == TokenType.RBRACE: 420 | break 421 | 422 | if not self.expect_peek(TokenType.COMMA): 423 | return None 424 | 425 | self.next_token() # Bypass comma 426 | 427 | if not self.expect_peek(TokenType.RBRACE): 428 | return None 429 | 430 | return elements 431 | 432 | def parse_table_identifier_pair(self): 433 | key_token: Token = self.cur_token 434 | 435 | self.next_token() 436 | self.next_token() 437 | 438 | expression = self.parse_expression(Precedence.LOWEST) 439 | return ( 440 | None, 441 | ( 442 | ast.StringLiteral(token=key_token, value=key_token.literal), 443 | expression, 444 | ), 445 | ) 446 | 447 | def parse_table_expression_value(self): 448 | expression = self.parse_expression(Precedence.LOWEST) 449 | return (expression, None) 450 | 451 | def parse_table_expression_pair(self): 452 | self.next_token() 453 | 454 | key_expression = self.parse_expression(Precedence.LOWEST) 455 | 456 | self.next_token() 457 | self.next_token() 458 | self.next_token() 459 | 460 | value_expression = self.parse_expression(Precedence.LOWEST) 461 | 462 | return (None, (key_expression, value_expression)) 463 | 464 | def parse_index_expression(self, left: ast.Node): 465 | left_expression = cast(ast.Expression, left) 466 | token = self.cur_token 467 | 468 | self.next_token() 469 | index = self.parse_expression(Precedence.LOWEST) 470 | 471 | if not self.expect_peek(TokenType.RBRACKET): 472 | return None 473 | 474 | return ast.IndexExpression( 475 | token=token, left=left_expression, index=index 476 | ) 477 | -------------------------------------------------------------------------------- /luatopy/token.py: -------------------------------------------------------------------------------- 1 | from enum import Enum, auto 2 | 3 | from dataclasses import dataclass 4 | 5 | 6 | class TokenType(Enum): 7 | ILLEGAL = auto() 8 | EOF = auto() 9 | NEWLINE = auto() 10 | 11 | IDENTIFIER = auto() 12 | INT = auto() 13 | STR = auto() 14 | NIL = auto() 15 | TRUE = auto() 16 | FALSE = auto() 17 | 18 | COMMENT = auto() 19 | HASH = auto() 20 | 21 | ASSIGN = auto() 22 | PLUS = auto() 23 | MINUS = auto() 24 | ASTERISK = auto() 25 | SLASH = auto() 26 | SEMICOLON = auto() 27 | PERCENT = auto() 28 | 29 | EQ = auto() 30 | NOT_EQ = auto() 31 | 32 | GT = auto() 33 | GTE = auto() 34 | LT = auto() 35 | LTE = auto() 36 | 37 | AND = auto() 38 | OR = auto() 39 | NOT = auto() 40 | 41 | LPAREN = auto() 42 | RPAREN = auto() 43 | 44 | LBRACKET = auto() 45 | RBRACKET = auto() 46 | 47 | LBRACE = auto() 48 | RBRACE = auto() 49 | 50 | COMMA = auto() 51 | CONCAT = auto() 52 | 53 | # Keywords 54 | FUNCTION = auto() 55 | IF = auto() 56 | ELSE = auto() 57 | THEN = auto() 58 | END = auto() 59 | RETURN = auto() 60 | 61 | 62 | @dataclass 63 | class Token: 64 | token_type: TokenType 65 | literal: str 66 | -------------------------------------------------------------------------------- /repl.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | 3 | import click 4 | 5 | from luatopy.lexer import Lexer 6 | from luatopy.parser import Parser 7 | from luatopy.obj import Environment 8 | from luatopy import evaluator 9 | 10 | 11 | @click.command() 12 | @click.option('--tokens', is_flag=True, help='Show lexer tokens') 13 | @click.option('--ast-code', is_flag=True, help='Show AST code') 14 | def run(tokens, ast_code): 15 | print("luatopy repl") 16 | if tokens: 17 | print("* Config: Show lexer tokens") 18 | 19 | if ast_code: 20 | print("* Config: Show ast code") 21 | 22 | env = Environment() 23 | while True: 24 | source = input("> ") 25 | lexer = Lexer(StringIO(source)) 26 | if tokens: 27 | print(list(lexer.tokens())) 28 | 29 | parser = Parser(lexer) 30 | program = parser.parse_program() 31 | 32 | if parser.errors: 33 | for err in parser.errors: 34 | print("ERROR: {0}".format(err)) 35 | 36 | if ast_code: 37 | print(program.to_code()) 38 | 39 | evaluated = evaluator.evaluate(program, env) 40 | if evaluated: 41 | print(evaluated.inspect()) 42 | 43 | 44 | if __name__ == '__main__': 45 | run() 46 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | click 3 | mypy 4 | 5 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marteinn/Lua-Interpreter-In-Python/a3a78f9ce6923eaa6cabd6e325ed27d0626f168c/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_evaluator.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | import unittest 3 | 4 | from luatopy.lexer import Lexer 5 | from luatopy.parser import Parser 6 | from luatopy import obj 7 | from luatopy import evaluator 8 | 9 | 10 | class EvaluatorTest(unittest.TestCase): 11 | def test_integer_expressions(self): 12 | tests = [ 13 | ("1", 1), 14 | ("10", 10), 15 | ("-1", -1), 16 | ("-10", -10), 17 | ("5 + 5 + 3 + 7", 20), 18 | ("5 - 5", 0), 19 | ("1 * 5 * 5", 25), 20 | ("-10 * 5 + 5 * (2 + 2)", -30), 21 | ] 22 | 23 | for source, expected in tests: 24 | evaluated = source_to_eval(source) 25 | 26 | self.assertEqual(type(evaluated), obj.Integer) 27 | self.assertEqual(evaluated.value, expected) 28 | 29 | def test_float_expressions(self): 30 | tests = [ 31 | ("4 / 2", 2.0), 32 | ("5 % 10", 5.0), 33 | ] 34 | 35 | for source, expected in tests: 36 | evaluated = source_to_eval(source) 37 | 38 | self.assertEqual(type(evaluated), obj.Float) 39 | self.assertEqual(evaluated.value, expected) 40 | 41 | def test_string_concat(self): 42 | tests = [ 43 | ('"hello" .. "world"', "helloworld"), 44 | ('"hello" .. "-" .. "world"', "hello-world"), 45 | ] 46 | 47 | for source, expected in tests: 48 | evaluated = source_to_eval(source) 49 | 50 | self.assertEqual(type(evaluated), obj.String) 51 | self.assertEqual(evaluated.value, expected) 52 | 53 | def test_boolean_expressions(self): 54 | tests = [ 55 | ("false", False), 56 | ("true", True), 57 | ("1 > 2", False), 58 | ("1 >= 2", False), 59 | ("1 < 2", True), 60 | ("1 <= 2", True), 61 | ("1 == 1", True), 62 | ("1 ~= 1", False), 63 | ("1 == 2", False), 64 | ("1 ~= 2", True), 65 | ("(2 > 1) == true", True), 66 | ("(2 < 1) == false", True), 67 | ] 68 | 69 | for source, expected in tests: 70 | evaluated = source_to_eval(source) 71 | 72 | self.assertEqual(type(evaluated), obj.Boolean) 73 | self.assertEqual(evaluated.value, expected) 74 | 75 | def test_not_prefix_operator(self): 76 | tests = [ 77 | ("not false", True), 78 | ("not 5", False), 79 | ("not 0", False), 80 | ("not not 0", True), 81 | ("not not true", True), 82 | ] 83 | 84 | for source, expected in tests: 85 | evaluated = source_to_eval(source) 86 | 87 | self.assertEqual(type(evaluated), obj.Boolean) 88 | self.assertEqual(evaluated.value, expected) 89 | 90 | def test_bool_infix_operations(self): 91 | tests = [ 92 | ("true and true", True), 93 | ("false and true", False), 94 | ("a = false; b = true; a and b", False), 95 | ("function a () return true end; a() and true", True), 96 | ("false or true", True), 97 | ("false or false", False), 98 | ("function a () return false end; a() or false", False), 99 | ("(true and false) or false", False), 100 | ] 101 | 102 | for source, expected in tests: 103 | evaluated = source_to_eval(source) 104 | 105 | self.assertEqual(type(evaluated), obj.Boolean) 106 | self.assertEqual(evaluated.value, expected) 107 | 108 | def test_length_prefix_operator(self): 109 | tests = [ 110 | ('#"hello"', 5), 111 | ('#{10, 20, 30}', 3), 112 | ('#{[1] = 1, [(1+1)] = 2, 3}', 2), 113 | ] 114 | 115 | for source, expected in tests: 116 | evaluated = source_to_eval(source) 117 | 118 | self.assertEqual(type(evaluated), obj.Integer) 119 | self.assertEqual(evaluated.value, expected) 120 | 121 | def test_if_else_expressions(self): 122 | tests = [ 123 | ("if true then 10 end", 10), 124 | ("if 1 then 10 end", 10), 125 | ("if false then 10 end", evaluator.NULL), 126 | ("if 10 > 5 then 10 end", 10), 127 | ("if 5 < 2 then 10 end", evaluator.NULL), 128 | ("if false then 10 else 5 end", 5), 129 | ("if true then 10 else 5 end", 10), 130 | ] 131 | 132 | for source, expected in tests: 133 | evaluated = source_to_eval(source) 134 | 135 | if evaluated == evaluator.NULL: 136 | self.assertEqual(evaluated, expected) 137 | else: 138 | self.assertEqual(evaluated.value, expected) 139 | 140 | def test_return_statements(self): 141 | tests = [ 142 | ("return 5", 5), 143 | ( 144 | """return 5 145 | 10 146 | """, 147 | 5, 148 | ), 149 | ("return 5*5", 25), 150 | ( 151 | """10 152 | return 5 153 | """, 154 | 5, 155 | ), 156 | ( 157 | """ 158 | if true then 159 | if true then 160 | return 10 161 | end 162 | return 5 163 | end 164 | """, 165 | 10, 166 | ), 167 | ] 168 | 169 | for source, expected in tests: 170 | evaluated = source_to_eval(source) 171 | 172 | self.assertEqual(type(evaluated), obj.Integer) 173 | self.assertEqual(evaluated.value, expected) 174 | 175 | def test_error_handling(self): 176 | tests = [ 177 | ("1 + true", "Attempt to perform arithmetic on a boolean value"), 178 | ("-true", "Attempt to perform arithmetic on a boolean value"), 179 | ( 180 | """1 + true 181 | 5 182 | """, 183 | "Attempt to perform arithmetic on a boolean value", 184 | ), 185 | ("true + false", "Attempt to perform arithmetic on a boolean value"), 186 | ( 187 | "if true then true + false end", 188 | "Attempt to perform arithmetic on a boolean value", 189 | ), 190 | ( 191 | "if true then if true then true + false end end", 192 | "Attempt to perform arithmetic on a boolean value", 193 | ), 194 | ( 195 | """5 196 | true + false 197 | 6 198 | """, 199 | "Attempt to perform arithmetic on a boolean value", 200 | ), 201 | ( 202 | "if true then if true then return true + false end return 5 end", 203 | "Attempt to perform arithmetic on a boolean value", 204 | ), 205 | ( 206 | "if true + false then 1 else 2 end", 207 | "Attempt to perform arithmetic on a boolean value", 208 | ), 209 | ] 210 | 211 | for source, expected in tests: 212 | evaluated = source_to_eval(source) 213 | 214 | self.assertEqual(type(evaluated), obj.Error) 215 | self.assertEqual(evaluated.message, expected) 216 | 217 | def test_that_non_existing_identifiers_returns_nil(self): 218 | tests = [ 219 | ("a", "nil"), 220 | ] 221 | 222 | for source, expected in tests: 223 | evaluated = source_to_eval(source) 224 | 225 | self.assertEqual(type(evaluated), obj.Null) 226 | 227 | def test_assignments(self): 228 | tests = [ 229 | ("""a = 5; a""", 5), 230 | ("""a = 5 * 5; a""", 25), 231 | ("""a = 5; b = a; b""", 5), 232 | ("""a = 5; b = a; c = a + b; c""", 10), 233 | ] 234 | 235 | for source, expected in tests: 236 | evaluated = source_to_eval(source) 237 | 238 | self.assertEqual(type(evaluated), obj.Integer) 239 | self.assertEqual(evaluated.value, expected) 240 | 241 | def test_function_declaration(self): 242 | tests = [ 243 | ( 244 | "function (a) a = a + 1; return a end", 245 | "function (a)\na = (a + 1)\nreturn a\nend", 246 | ) 247 | ] 248 | 249 | for source, expected in tests: 250 | evaluated = source_to_eval(source) 251 | 252 | self.assertEqual(type(evaluated), obj.Function) 253 | self.assertEqual(evaluated.inspect(), expected) 254 | 255 | def test_named_function_declaration(self): 256 | tests = [ 257 | ("function f (a) a + 1 end; f(1)", 2), 258 | ("function mycat (name) return name end; mycat('sniff')", 'sniff'), 259 | ] 260 | 261 | for source, expected in tests: 262 | evaluated = source_to_eval(source) 263 | self.assertEqual(evaluated.value, expected) 264 | 265 | def test_function_call(self): 266 | tests = [ 267 | ("f = function (a) a + 1 end; f(1)", 2), 268 | ("f = function (a) return a end; f(1)", 1), 269 | ("f = function (a, b) return a+b end; f(1, 2)", 3), 270 | ("(function (x) return x end)(5)", 5), 271 | ] 272 | 273 | for source, expected in tests: 274 | evaluated = source_to_eval(source) 275 | self.assertEqual(evaluated.value, expected) 276 | 277 | def test_function_closure(self): 278 | source = """ 279 | add = function (x) function (y) x + y end end 280 | add_two = add(2) 281 | add_two(3) 282 | """ 283 | 284 | evaluated = source_to_eval(source) 285 | self.assertEqual(evaluated.value, 5) 286 | 287 | def test_string_expressions(self): 288 | tests = [('"hello world"', "hello world")] 289 | 290 | for source, expected in tests: 291 | evaluated = source_to_eval(source) 292 | self.assertEqual(evaluated.value, expected) 293 | 294 | def test_builints(self): 295 | tests = [ 296 | ('type("string")', "string"), 297 | ("type(1)", "number"), 298 | ("type(true)", "boolean"), 299 | ("type({})", "table"), 300 | ("type(function (a) a = a + 1; return a end)", "function"), 301 | ] 302 | 303 | for source, expected in tests: 304 | evaluated = source_to_eval(source) 305 | self.assertEqual(evaluated.value, expected) 306 | 307 | def test_table_expressions(self): 308 | tests = [ 309 | ("{}", "{}"), 310 | ("{1, 2, (1 + 2)}", "{1 = 1, 2 = 2, 3 = 3}"), 311 | ("a = {1, 2, 3}; a", "{1 = 1, 2 = 2, 3 = 3}"), 312 | ('{1, "random", 3}', "{1 = 1, 2 = random, 3 = 3}"), 313 | ("{key = 1}", "{key = 1}"), 314 | ('{key = 1, ["morekey"] = 2}', "{key = 1, morekey = 2}"), 315 | ('a = "hello"; {[a] = 1}', "{hello = 1}"), 316 | ('{[1] = 1, [2] = 2}', '{1 = 1, 2 = 2}'), 317 | ] 318 | 319 | for source, expected in tests: 320 | evaluated = source_to_eval(source) 321 | self.assertEqual(evaluated.inspect(), expected) 322 | 323 | def test_table_index_expressions(self): 324 | tests = [ 325 | ("{1, 2, 3}[1]", "1"), 326 | ("{1, 2, 3}[99]", "nil"), 327 | ("{1, 2, 3}[1+2]", "3"), 328 | ("a = {1, 2, 3}; a[2]", "2"), 329 | ("a = {1, 2}; a[1] + a[2]", "3"), 330 | ('{["hello"] = 2}["hello"]', "2"), 331 | ] 332 | 333 | for source, expected in tests: 334 | evaluated = source_to_eval(source) 335 | self.assertEqual(evaluated.inspect(), expected) 336 | 337 | 338 | def source_to_eval(source) -> obj.Obj: 339 | lexer = Lexer(StringIO(source)) 340 | parser = Parser(lexer) 341 | program = parser.parse_program() 342 | env = obj.Environment() 343 | return evaluator.evaluate(program, env) 344 | -------------------------------------------------------------------------------- /tests/test_lexer.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | import unittest 3 | 4 | from luatopy.lexer import Lexer 5 | from luatopy.token import TokenType 6 | 7 | 8 | class LexerTest(unittest.TestCase): 9 | def test_binary_op(self): 10 | source = """1 + 2 11 | 2-1 12 | """ 13 | lexer = Lexer(StringIO(source)) 14 | 15 | tokens = [ 16 | (TokenType.INT, "1"), 17 | (TokenType.PLUS, "+"), 18 | (TokenType.INT, "2"), 19 | (TokenType.NEWLINE, "\n"), 20 | (TokenType.INT, "2"), 21 | (TokenType.MINUS, "-"), 22 | (TokenType.INT, "1"), 23 | (TokenType.NEWLINE, "\n"), 24 | (TokenType.EOF, "<>"), 25 | ] 26 | 27 | for expected_token in tokens: 28 | token = lexer.next_token() 29 | 30 | self.assertEqual(expected_token[0], token.token_type) 31 | self.assertEqual(expected_token[1], token.literal) 32 | 33 | def test_that_eof_gets_retruned(self): 34 | source = "" 35 | lexer = Lexer(StringIO(source)) 36 | 37 | tokens = [ 38 | (TokenType.EOF, "<>"), 39 | ] 40 | 41 | for expected_token in tokens: 42 | token = lexer.next_token() 43 | 44 | self.assertEqual(expected_token[0], token.token_type) 45 | self.assertEqual(expected_token[1], token.literal) 46 | 47 | def test_that_tokens_are_created_according_to_source(self): 48 | source = """num = 42 49 | cat = 5 50 | -- Hello 51 | --[[ 52 | Goodbye 53 | ]]-- 54 | cookie == 9 55 | a = nil 56 | a and b 57 | a or b 58 | a not b 59 | myfun() 60 | a,b 61 | a ~= b 62 | stra .. strb 63 | 1 > 2 64 | 2 >= 2 65 | 1 < 2 66 | 2 <= 2 67 | 2 * 2 68 | """ 69 | 70 | lexer = Lexer(StringIO(source)) 71 | 72 | tokens = [ 73 | (TokenType.IDENTIFIER, "num"), (TokenType.ASSIGN, "="), (TokenType.INT, "42"), 74 | (TokenType.NEWLINE, "\n"), 75 | (TokenType.IDENTIFIER, "cat"), (TokenType.ASSIGN, "="), (TokenType.INT, "5"), 76 | (TokenType.NEWLINE, "\n"), 77 | (TokenType.COMMENT, " Hello"), 78 | (TokenType.NEWLINE, "\n"), 79 | (TokenType.COMMENT, "\nGoodbye\n"), 80 | (TokenType.NEWLINE, "\n"), 81 | (TokenType.IDENTIFIER, "cookie"), (TokenType.EQ, "=="), (TokenType.INT, "9"), 82 | (TokenType.NEWLINE, "\n"), 83 | (TokenType.IDENTIFIER, "a"), (TokenType.ASSIGN, "="), (TokenType.NIL, "nil"), 84 | (TokenType.NEWLINE, "\n"), 85 | (TokenType.IDENTIFIER, "a"), (TokenType.AND, "and"), (TokenType.IDENTIFIER, "b"), 86 | (TokenType.NEWLINE, "\n"), 87 | (TokenType.IDENTIFIER, "a"), (TokenType.OR, "or"), (TokenType.IDENTIFIER, "b"), 88 | (TokenType.NEWLINE, "\n"), 89 | (TokenType.IDENTIFIER, "a"), (TokenType.NOT, "not"), (TokenType.IDENTIFIER, "b"), 90 | (TokenType.NEWLINE, "\n"), 91 | (TokenType.IDENTIFIER, "myfun"), (TokenType.LPAREN, "("), (TokenType.RPAREN, ")"), 92 | (TokenType.NEWLINE, "\n"), 93 | (TokenType.IDENTIFIER, "a"), (TokenType.COMMA, ","), (TokenType.IDENTIFIER, "b"), 94 | (TokenType.NEWLINE, "\n"), 95 | (TokenType.IDENTIFIER, "a"), (TokenType.NOT_EQ, "~="), (TokenType.IDENTIFIER, "b"), 96 | (TokenType.NEWLINE, "\n"), 97 | (TokenType.IDENTIFIER, "stra"), (TokenType.CONCAT, ".."), (TokenType.IDENTIFIER, "strb"), 98 | (TokenType.NEWLINE, "\n"), 99 | (TokenType.INT, "1"), (TokenType.GT, ">"), (TokenType.INT, "2"), 100 | (TokenType.NEWLINE, "\n"), 101 | (TokenType.INT, "2"), (TokenType.GTE, ">="), (TokenType.INT, "2"), 102 | (TokenType.NEWLINE, "\n"), 103 | (TokenType.INT, "1"), (TokenType.LT, "<"), (TokenType.INT, "2"), 104 | (TokenType.NEWLINE, "\n"), 105 | (TokenType.INT, "2"), (TokenType.LTE, "<="), (TokenType.INT, "2"), 106 | (TokenType.NEWLINE, "\n"), 107 | (TokenType.INT, "2"), (TokenType.ASTERISK, "*"), (TokenType.INT, "2"), 108 | (TokenType.NEWLINE, "\n"), 109 | (TokenType.EOF, "<>"), 110 | ] 111 | 112 | for expected_token in tokens: 113 | token = lexer.next_token() 114 | 115 | self.assertEqual(expected_token[0], token.token_type) 116 | self.assertEqual(expected_token[1], token.literal) 117 | 118 | def test_bool_tokens(self): 119 | source = """true 120 | a = false 121 | """ 122 | 123 | lexer = Lexer(StringIO(source)) 124 | 125 | tokens = [ 126 | (TokenType.TRUE, "true"), 127 | (TokenType.NEWLINE, "\n"), 128 | 129 | (TokenType.IDENTIFIER, "a"), 130 | (TokenType.ASSIGN, "="), 131 | (TokenType.FALSE, "false"), 132 | (TokenType.NEWLINE, "\n"), 133 | 134 | (TokenType.EOF, "<>"), 135 | ] 136 | 137 | for expected_token in tokens: 138 | token = lexer.next_token() 139 | 140 | self.assertEqual(expected_token[0], token.token_type) 141 | self.assertEqual(expected_token[1], token.literal) 142 | 143 | def test_if_statement_keywords(self): 144 | source = "if true then 1 else 2 end" 145 | 146 | lexer = Lexer(StringIO(source)) 147 | 148 | tokens = [ 149 | (TokenType.IF, "if"), 150 | (TokenType.TRUE, "true"), 151 | (TokenType.THEN, "then"), 152 | (TokenType.INT, "1"), 153 | (TokenType.ELSE, "else"), 154 | (TokenType.INT, "2"), 155 | (TokenType.END, "end"), 156 | (TokenType.EOF, "<>"), 157 | ] 158 | 159 | for expected_token in tokens: 160 | token = lexer.next_token() 161 | 162 | self.assertEqual(expected_token[0], token.token_type) 163 | self.assertEqual(expected_token[1], token.literal) 164 | 165 | def test_function_tokens(self): 166 | source = "function fib(n) return 1 end" 167 | lexer = Lexer(StringIO(source)) 168 | 169 | tokens = [ 170 | (TokenType.FUNCTION, "function"), 171 | (TokenType.IDENTIFIER, "fib"), 172 | (TokenType.LPAREN, "("), 173 | (TokenType.IDENTIFIER, "n"), 174 | (TokenType.RPAREN, ")"), 175 | (TokenType.RETURN, "return"), 176 | (TokenType.INT, "1"), 177 | (TokenType.END, "end"), 178 | (TokenType.EOF, "<>"), 179 | ] 180 | 181 | for expected_token in tokens: 182 | token = lexer.next_token() 183 | 184 | self.assertEqual(expected_token[0], token.token_type) 185 | self.assertEqual(expected_token[1], token.literal) 186 | 187 | def test_call_tokens(self): 188 | source = "abc(n, 1+2, 3)" 189 | lexer = Lexer(StringIO(source)) 190 | 191 | tokens = [ 192 | (TokenType.IDENTIFIER, "abc"), 193 | (TokenType.LPAREN, "("), 194 | (TokenType.IDENTIFIER, "n"), 195 | (TokenType.COMMA, ","), 196 | (TokenType.INT, "1"), 197 | (TokenType.PLUS, "+"), 198 | (TokenType.INT, "2"), 199 | (TokenType.COMMA, ","), 200 | (TokenType.INT, "3"), 201 | (TokenType.RPAREN, ")"), 202 | (TokenType.EOF, "<>"), 203 | ] 204 | 205 | for expected_token in tokens: 206 | token = lexer.next_token() 207 | 208 | self.assertEqual(expected_token[0], token.token_type) 209 | self.assertEqual(expected_token[1], token.literal) 210 | 211 | def test_semicolon_delimiter(self): 212 | source = "1; 2" 213 | lexer = Lexer(StringIO(source)) 214 | 215 | tokens = [ 216 | (TokenType.INT, "1"), 217 | (TokenType.SEMICOLON, ";"), 218 | (TokenType.INT, "2"), 219 | (TokenType.EOF, "<>"), 220 | ] 221 | 222 | for expected_token in tokens: 223 | token = lexer.next_token() 224 | 225 | self.assertEqual(expected_token[0], token.token_type) 226 | self.assertEqual(expected_token[1], token.literal) 227 | 228 | def test_string_type(self): 229 | source = """"a random string" 230 | "escape\\" value" 231 | 'another string' 232 | 'with escaped\\' indicator' 233 | """ 234 | lexer = Lexer(StringIO(source)) 235 | 236 | tokens = [ 237 | (TokenType.STR, "a random string"), 238 | (TokenType.NEWLINE, "\n"), 239 | (TokenType.STR, 'escape" value'), 240 | (TokenType.NEWLINE, "\n"), 241 | (TokenType.STR, "another string"), 242 | (TokenType.NEWLINE, "\n"), 243 | (TokenType.STR, "with escaped' indicator"), 244 | (TokenType.NEWLINE, "\n"), 245 | (TokenType.EOF, "<>"), 246 | ] 247 | 248 | for expected_token in tokens: 249 | token = lexer.next_token() 250 | 251 | self.assertEqual(expected_token[0], token.token_type) 252 | self.assertEqual(expected_token[1], token.literal) 253 | 254 | def test_table_list_declaration(self): 255 | source = "{1, 2}" 256 | 257 | lexer = Lexer(StringIO(source)) 258 | 259 | tokens = [ 260 | (TokenType.LBRACE, "{"), 261 | (TokenType.INT, "1"), 262 | (TokenType.COMMA, ","), 263 | (TokenType.INT, "2"), 264 | (TokenType.RBRACE, "}"), 265 | (TokenType.EOF, "<>"), 266 | ] 267 | 268 | for expected_token in tokens: 269 | token = lexer.next_token() 270 | 271 | self.assertEqual(expected_token[0], token.token_type) 272 | self.assertEqual(expected_token[1], token.literal) 273 | 274 | def test_identifier_with_num_in_name(self): 275 | source = "hello_1 = 1" 276 | 277 | lexer = Lexer(StringIO(source)) 278 | 279 | tokens = [ 280 | (TokenType.IDENTIFIER, "hello_1"), 281 | (TokenType.ASSIGN, "="), 282 | (TokenType.INT, "1"), 283 | (TokenType.EOF, "<>"), 284 | ] 285 | 286 | for expected_token in tokens: 287 | token = lexer.next_token() 288 | 289 | self.assertEqual(expected_token[0], token.token_type) 290 | self.assertEqual(expected_token[1], token.literal) 291 | 292 | def test_table_hashmap_declaration(self): 293 | source = "{random = 2}" 294 | 295 | lexer = Lexer(StringIO(source)) 296 | 297 | tokens = [ 298 | (TokenType.LBRACE, "{"), 299 | (TokenType.IDENTIFIER, "random"), 300 | (TokenType.ASSIGN, "="), 301 | (TokenType.INT, "2"), 302 | (TokenType.RBRACE, "}"), 303 | (TokenType.EOF, "<>"), 304 | ] 305 | 306 | for expected_token in tokens: 307 | token = lexer.next_token() 308 | 309 | self.assertEqual(expected_token[0], token.token_type) 310 | self.assertEqual(expected_token[1], token.literal) 311 | 312 | def test_table_length_hash_char(self): 313 | source = "#{1, 2}" 314 | 315 | lexer = Lexer(StringIO(source)) 316 | 317 | tokens = [ 318 | (TokenType.HASH, "#"), 319 | (TokenType.LBRACE, "{"), 320 | (TokenType.INT, "1"), 321 | (TokenType.COMMA, ","), 322 | (TokenType.INT, "2"), 323 | (TokenType.RBRACE, "}"), 324 | (TokenType.EOF, "<>"), 325 | ] 326 | 327 | for expected_token in tokens: 328 | token = lexer.next_token() 329 | 330 | self.assertEqual(expected_token[0], token.token_type) 331 | self.assertEqual(expected_token[1], token.literal) 332 | 333 | def test_modulo_operator(self): 334 | source = "5 % 10" 335 | 336 | lexer = Lexer(StringIO(source)) 337 | 338 | tokens = [ 339 | (TokenType.INT, "5"), 340 | (TokenType.PERCENT, "%"), 341 | (TokenType.INT, "10"), 342 | (TokenType.EOF, "<>"), 343 | ] 344 | 345 | for expected_token in tokens: 346 | token = lexer.next_token() 347 | 348 | self.assertEqual(expected_token[0], token.token_type) 349 | self.assertEqual(expected_token[1], token.literal) 350 | -------------------------------------------------------------------------------- /tests/test_parser.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | import unittest 3 | 4 | from luatopy.lexer import Lexer 5 | from luatopy.parser import Parser 6 | from luatopy import ast 7 | 8 | 9 | class ParserTest(unittest.TestCase): 10 | def test_prefix_parsing(self): 11 | tests = ( 12 | ("-1", "(-1)"), ("not 1", "(not 1)"), ("not not 1", "(not (not 1))"), 13 | ('#{1, 2}', '(#{1 = 1, 2 = 2})'), 14 | ('#"hello"', '(#"hello")'), 15 | ) 16 | 17 | for source, expected in tests: 18 | self.assertEqual(program_from_source(source).to_code(), expected) 19 | 20 | def test_influx_parsing(self): 21 | tests = ( 22 | ("1 + 2", "(1 + 2)"), 23 | ("2-3", "(2 - 3)"), 24 | ("5 * 5", "(5 * 5)"), 25 | ("5 / 5", "(5 / 5)"), 26 | ("5 == 2", "(5 == 2)"), 27 | ("5 ~= 2", "(5 ~= 2)"), 28 | ("5 > 2", "(5 > 2)"), 29 | ("5 < 2", "(5 < 2)"), 30 | ("5 >= 2", "(5 >= 2)"), 31 | ("5 <= 2", "(5 <= 2)"), 32 | ("5 % 10", "(5 % 10)"), 33 | ("true and false", "(true and false)"), 34 | ("false or true", "(false or true)"), 35 | ) 36 | 37 | for source, expected in tests: 38 | self.assertEqual(program_from_source(source).to_code(), expected) 39 | 40 | def test_string_influx_parsing(self): 41 | tests = (('"hello " .. "world"', '("hello " .. "world")'),) 42 | 43 | for source, expected in tests: 44 | self.assertEqual(program_from_source(source).to_code(), expected) 45 | 46 | def test_operator_precedence(self): 47 | tests = ( 48 | ("1 + 2", "(1 + 2)"), 49 | ("1 + 2 + 3", "((1 + 2) + 3)"), 50 | ("-1 + 2", "((-1) + 2)"), 51 | ("1 + 2 * 3", "(1 + (2 * 3))"), 52 | ("1 + 2 / 3", "(1 + (2 / 3))"), 53 | ("1 * 2 + 3 / 4", "((1 * 2) + (3 / 4))"), 54 | ("a + b", "(a + b)"), 55 | ("a + b * c + d / e - f", "(((a + (b * c)) + (d / e)) - f)"), 56 | ("a == b", "(a == b)"), 57 | ("a == true", "(a == true)"), 58 | ("b ~= true", "(b ~= true)"), 59 | ("a+b(c*d)+e", "((a + b((c * d))) + e)"), 60 | ("add(a + b * c + d / e - f)", "add((((a + (b * c)) + (d / e)) - f))"), 61 | ("a * {1, 2}[b * c] * d", "((a * ({1 = 1, 2 = 2}[(b * c)])) * d)"), 62 | ) 63 | 64 | for source, expected in tests: 65 | self.assertEqual(program_from_source(source).to_code(), expected) 66 | 67 | def test_operator_precedence_groups(self): 68 | tests = ( 69 | ("-(1 + 2)", "(-(1 + 2))"), 70 | ("1 + (2 + 3)", "(1 + (2 + 3))"), 71 | ("1 + (2 + (1 - 3))", "(1 + (2 + (1 - 3)))"), 72 | ) 73 | 74 | for source, expected in tests: 75 | self.assertEqual(program_from_source(source).to_code(), expected) 76 | 77 | def test_semicolon_delimiter(self): 78 | tests = (("1; b", 2), ("a = 1; b = 2;", 2), ("1 + 2; 3 + 3;d = 5; 5 * 5", 4)) 79 | 80 | for source, expected in tests: 81 | program = program_from_source(source) 82 | self.assertEqual(len(program.statements), expected) 83 | 84 | def test_integer_literal(self): 85 | program = program_from_source("1") 86 | 87 | statement = program.statements[0] 88 | self.assertIs(type(statement), ast.ExpressionStatement) 89 | self.assertIs(type(statement.expression), ast.IntegerLiteral) 90 | 91 | def test_identifier(self): 92 | program = program_from_source("a") 93 | 94 | statement = program.statements[0] 95 | self.assertIs(type(statement), ast.ExpressionStatement) 96 | self.assertIs(type(statement.expression), ast.Identifier) 97 | 98 | def test_booleans(self): 99 | program = program_from_source("true") 100 | 101 | statement = program.statements[0] 102 | self.assertIs(type(statement), ast.ExpressionStatement) 103 | self.assertIs(type(statement.expression), ast.Boolean) 104 | self.assertIs(statement.expression.value, True) 105 | 106 | def test_variable_assign(self): 107 | self.assertEqual(program_from_source("a = 1").to_code(), "a = 1") 108 | self.assertEqual(program_from_source("a = b").to_code(), "a = b") 109 | self.assertEqual(program_from_source("a = false").to_code(), "a = false") 110 | 111 | def test_multiple_variable_assign(self): 112 | source = """a = 1 113 | b = 2 114 | c = true 115 | """ 116 | 117 | program = program_from_source(source) 118 | self.assertEqual(len(program.statements), 3) 119 | self.assertEqual(program.statements[0].to_code(), "a = 1") 120 | self.assertEqual(program.statements[1].to_code(), "b = 2") 121 | self.assertEqual(program.statements[2].to_code(), "c = true") 122 | 123 | def test_if_statements(self): 124 | tests = ( 125 | ("if 1 > 2 then 1 end", "if (1 > 2) then 1 end"), 126 | ("if 1 > 2 then 1 else 5 end", "if (1 > 2) then 1 else 5 end"), 127 | ( 128 | "if 1 > 2 then if true then 1 end end", 129 | "if (1 > 2) then if true then 1 end end", 130 | ), 131 | ) 132 | 133 | for source, expected in tests: 134 | self.assertEqual(program_from_source(source).to_code(), expected) 135 | 136 | def test_function_statements(self): 137 | tests = ( 138 | ("function (x, y) x*y end", "function (x, y) (x * y) end"), 139 | ("function () 1 end", "function () 1 end"), 140 | ("function (x, y, z) 1 end", "function (x, y, z) 1 end"), 141 | ("function () end", "function () end"), 142 | ("function () return 1 end", "function () return 1 end"), 143 | ("function foo () return 1 end", "function foo () return 1 end"), 144 | ) 145 | 146 | for source, expected in tests: 147 | self.assertEqual(program_from_source(source).to_code(), expected) 148 | 149 | def test_named_function_statements(self): 150 | tests = ( 151 | ("function bar () 1 end", "function bar () 1 end"), 152 | ) 153 | 154 | for source, expected in tests: 155 | self.assertEqual(program_from_source(source).to_code(), expected) 156 | 157 | def test_function_calls(self): 158 | tests = ( 159 | ("abc(1, 2)", "abc(1, 2)"), 160 | ("random(m, 1+1*2)", "random(m, (1 + (1 * 2)))"), 161 | ) 162 | 163 | for source, expected in tests: 164 | self.assertEqual(program_from_source(source).to_code(), expected) 165 | 166 | def test_return_statements(self): 167 | tests = ( 168 | ("return 1", "return 1"), 169 | ("return 1+b", "return (1 + b)"), 170 | ("return false", "return false"), 171 | ) 172 | 173 | for source, expected in tests: 174 | self.assertEqual(program_from_source(source).to_code(), expected) 175 | 176 | def test_string_literal(self): 177 | program = program_from_source('"hello world"') 178 | 179 | statement = program.statements[0] 180 | self.assertIs(type(statement), ast.ExpressionStatement) 181 | self.assertIs(type(statement.expression), ast.StringLiteral) 182 | 183 | def test_table_literal(self): 184 | program = program_from_source('{1, 2, "hello"}') 185 | 186 | statement = program.statements[0] 187 | self.assertIs(type(statement), ast.ExpressionStatement) 188 | self.assertIs(type(statement.expression), ast.TableLiteral) 189 | 190 | def test_table_key_value(self): 191 | tests = [ 192 | ('{key = 1, ["key2"] = 2}', '{"key" = 1, "key2" = 2}'), 193 | ("{1, key = 2}", '{1 = 1, "key" = 2}'), 194 | ("{[a + b] = 2}", "{(a + b) = 2}"), 195 | ("{[a] = 2}", "{a = 2}"), 196 | ("{a = {1, 2}}", '{"a" = {1 = 1, 2 = 2}}'), 197 | ("{[1] = 2}", "{1 = 2}"), 198 | ('{"hello", "goodbye"}', '{1 = "hello", 2 = "goodbye"}'), 199 | ] 200 | 201 | for source, expected in tests: 202 | self.assertEqual(program_from_source(source).to_code(), expected) 203 | 204 | def test_parsing_index_expressions(self): 205 | program = program_from_source("values[1]") 206 | 207 | statement = program.statements[0] 208 | self.assertIs(type(statement), ast.ExpressionStatement) 209 | self.assertIs(type(statement.expression), ast.IndexExpression) 210 | self.assertIs(statement.expression.index.value, 1) 211 | 212 | 213 | def program_from_source(source): 214 | lexer = Lexer(StringIO(source)) 215 | parser = Parser(lexer) 216 | program = parser.parse_program() 217 | 218 | if len(parser.errors) > 0: 219 | print(parser.errors[0]) 220 | 221 | return program 222 | --------------------------------------------------------------------------------