├── .editorconfig ├── .gitignore ├── README.md ├── compile.py ├── core ├── __init__.py ├── for_utils.py └── table.py ├── examples ├── and_or.lua ├── assignments.lua ├── bools.lua ├── for.lua ├── functions.lua ├── globals.lua ├── length.lua ├── modulo.lua ├── numbers.lua ├── pairs.lua ├── tables.lua └── test.lua ├── lexer.py ├── parser.py ├── py_parser.py ├── requirements.txt └── tools ├── ast_to_py.py ├── py_to_ast.py └── py_to_lexer_sample.py /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: https://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | # Unix-style newlines with a newline ending every file 7 | [*] 8 | end_of_line = lf 9 | insert_final_newline = true 10 | 11 | # Matches multiple files with brace expansion notation 12 | # Set default charset 13 | [*.{js,py}] 14 | charset = utf-8 15 | 16 | # 4 space indentation 17 | [*.py] 18 | indent_style = space 19 | indent_size = 4 20 | 21 | # Tab indentation (no size specified) 22 | [Makefile] 23 | indent_style = tab 24 | 25 | # Indentation override for all JS under lib directory 26 | [*.js] 27 | indent_style = space 28 | indent_size = 2 29 | 30 | # Matches the exact files either package.json or .travis.yml 31 | [{package.json,.travis.yml}] 32 | indent_style = space 33 | indent_size = 2 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node 2 | venv 3 | *.tmp 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Lua to Python 2 | 3 | This is a project where I attempt to convert Lua to Python, by transforming Lua into Python AST and then running it. 4 | 5 | 6 | ## Current status 7 | - Variable assignments, basic datatypes and comparisons, if statements, while loops and functions are working. 8 | 9 | 10 | ## Getting started 11 | 12 | - `pip install -r requirements.txt` 13 | 14 | ``` 15 | Usage: compile.py [OPTIONS] SOURCE_FILE 16 | 17 | Options: 18 | --strip_comments INTEGER Remove comments from tokens 19 | --tokens INTEGER Show tokens generated by lexer 20 | --ast INTEGER Show the internal AST (later transformed to Python 21 | AST 22 | --py_ast INTEGER Show Python AST 23 | --py_code INTEGER Show generated Python code 24 | --help Show this message and exit. 25 | ``` 26 | 27 | Example: `./compile.py --strip-comments=1 examples/functions.lua` 28 | 29 | 30 | ## Roadmap 31 | - [x] Single line comments 32 | - [x] Multiline comments 33 | - [x] Numbers 34 | - [x] Strings 35 | - [x] Nil types 36 | - [x] Variable assignments 37 | - [x] Addition 38 | - [x] Multiplication 39 | - [x] If statements 40 | - [x] Nested if statements 41 | - [x] `~=` operator 42 | - [x] `==` operator 43 | - [x] `while` keyword 44 | - [x] Concat strings with `..` 45 | - [x] Subtract values 46 | - [x] `>=` operator 47 | - [x] `<=` operator 48 | - [x] Boolean types 49 | - [x] `function` declarations 50 | - [x] `return` 51 | - [x] `not` logical operator 52 | - [x] `bool` expression in comparison 53 | - [x] `%` operator 54 | - [x] `/` operator 55 | - [x] `or` logical operator 56 | - [x] `and` logical operator 57 | - [x] Assign function return to variable 58 | - [x] Double number support 59 | - [x] Negative values 60 | - [x] Anonymous functions 61 | - [x] Variables with numbers in name 62 | - [x] Table datatype 63 | - [x] Support for accessing Table properties 64 | - [x] Support for lteral notation (`['@!#']`) in Table 65 | - [x] List as argument to Table constructor 66 | - [x] `#` operator for retrieving Table/String length 67 | - [x] Iterator for Table using `pairs`/`ipairs` 68 | - [x] `_G` for globals access 69 | - [x] `for` keyword 70 | - [x] Add multiple assignment support (`x, y, z = 1, 2, 3`) 71 | - [x] Add multiple assignment support in for loop target+iterator 72 | - [x] Add multiline line support to anonymous functions 73 | - [ ] `repeat` keyword 74 | - [ ] Short circuit / tenary operator 75 | - [ ] `local` variables 76 | - [ ] Numbers beginning with `.` (ex `.123`) 77 | - [ ] Undefined variables should return nil 78 | - [ ] Dot property syntax in Table for string keys 79 | - [ ] Function calls with single params should not require parens 80 | - [ ] Metatable support for tables 81 | - [ ] BUG: Function cannot call itself 82 | - [ ] BUG: Function declaration in nested Table 83 | - [ ] BUG: Nested attribute retrival with `["random"]["random2"]` 84 | - [ ] BUG: Decimal type key in Table 85 | - [ ] BUG: Table construct in function (ex: `pairs({a=1, b=2, c=3})` 86 | 87 | 88 | ## References 89 | - https://drew.ltd/blog/posts/2020-7-18.html - Many thanks for Drew and his excellent articles on how to build a programming language 90 | - https://greentreesnakes.readthedocs.io/en/latest/ 91 | - https://github.com/python/cpython/blob/master/Lib/ast.py 92 | - https://learnxinyminutes.com/docs/lua/ 93 | -------------------------------------------------------------------------------- /compile.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import ast 3 | from pprint import pprint 4 | 5 | import click 6 | import astunparse 7 | 8 | import lexer 9 | import parser 10 | import py_parser 11 | 12 | 13 | @click.command() 14 | @click.argument('source_file') 15 | @click.option('--strip_comments', default=0, help="Remove comments from tokens") 16 | @click.option('--tokens', default=0, help="Show tokens generated by lexer") 17 | @click.option('--ast', default=0, help="Show the internal AST (later transformed to Python AST)") 18 | @click.option('--py_ast', default=0, help="Show Python AST") 19 | @click.option('--py_code', default=0, help="Show generated Python code") 20 | def run(source_file, **kwargs): 21 | file_handler = open(source_file, 'r') 22 | source = file_handler.read() 23 | 24 | tokens = lexer.lexer(source) 25 | 26 | if kwargs["strip_comments"]: 27 | tokens = list(filter(lambda x: x["type"] != "COMMENT", tokens)) 28 | tokens = list(filter(lambda x: x["type"] != "MULTILINE-COMMENT", tokens)) 29 | 30 | if kwargs["tokens"]: 31 | pprint(tokens) 32 | return 33 | 34 | ast_ = parser.parse(tokens) 35 | 36 | if kwargs["ast"]: 37 | pprint(ast_) 38 | return 39 | 40 | py_ast = py_parser.ast_to_py_ast(ast_) 41 | 42 | if kwargs["py_ast"]: 43 | print(ast.dump(py_ast)) 44 | return 45 | 46 | if kwargs["py_code"]: 47 | print(astunparse.unparse(py_ast)) 48 | return 49 | 50 | exec(compile(py_ast, filename="", mode="exec")) 51 | 52 | 53 | if __name__ == '__main__': 54 | run() 55 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | from .table import * 2 | from .for_utils import * 3 | -------------------------------------------------------------------------------- /core/for_utils.py: -------------------------------------------------------------------------------- 1 | def get_for_range(start, end, step=1): 2 | if step > 0: 3 | comp = lambda x, y: x <= y 4 | else: 5 | comp = lambda x, y: x >= y 6 | 7 | index = start 8 | while comp(index, end): 9 | yield index 10 | index = index + step 11 | -------------------------------------------------------------------------------- /core/table.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import OrderedDict 3 | 4 | 5 | def pairs(table): 6 | return table.__pairs__() 7 | 8 | 9 | def ipairs(table): 10 | return table.__ipairs__() 11 | 12 | 13 | def is_castable_int(val): 14 | return re.search(r'^([0-9]*)$', val) 15 | 16 | 17 | class Table(object): 18 | def __init__(self, *args, **kwargs): 19 | self.val_list = [None, *args] 20 | self.val_dict = OrderedDict() 21 | 22 | for key, val in kwargs.items(): 23 | if key[:2] == "__": 24 | key = key[2:] 25 | 26 | if is_castable_int(key): 27 | key = int(key) 28 | 29 | if isinstance(key, int): 30 | # Numbers passed in constructor cannot override existing values 31 | if key == len(self.val_list): 32 | self.val_list.append(val) 33 | continue 34 | 35 | self.val_dict[key] = val 36 | 37 | def __len__(self): 38 | return len(self.val_list) - 1 39 | 40 | def __getitem__(self, key): 41 | if isinstance(key, str) and is_castable_int(key): 42 | key = int(key) 43 | 44 | if isinstance(key, int) and key < len(self.val_list): 45 | return self.val_list[key] 46 | 47 | return self.val_dict[key] 48 | 49 | def __setitem__(self, key, value): 50 | if is_castable_int(key): 51 | key = int(key) 52 | 53 | if isinstance(key, int) and key < len(self.val_list): 54 | self.val_list[key] = value 55 | return 56 | 57 | self.val_dict[key] = value 58 | return None 59 | 60 | def __getattr__(self, key): 61 | return self.val_dict[key] 62 | 63 | def __pairs__(self): 64 | from itertools import chain 65 | 66 | return chain(self.__ipairs__(), self.val_dict.items()) 67 | 68 | def __ipairs__(self): 69 | for key, val in enumerate(self.val_list): 70 | if key == 0 and val == None: 71 | continue 72 | yield key, val 73 | 74 | def __str__(self): 75 | list_rep = str(self.val_list[1:]) 76 | list_rep = list_rep[1:-1] 77 | 78 | dict_rep = str(self.val_dict) 79 | dict_rep = dict_rep[12:-2] 80 | if not dict_rep: 81 | return "[{0}]".format(list_rep) 82 | 83 | if not list_rep: 84 | return "[{0}]".format(dict_rep) 85 | 86 | return "[{0}, {1}]".format(list_rep, dict_rep) 87 | 88 | """ 89 | t = Table(1, 2, 3, 4, __1=5, __99="99 val", __word="word val") 90 | print(len(t)) # 4 91 | 92 | tt = Table(1) 93 | print(len(tt)) # 1 94 | 95 | print(t["1"]) # 1 96 | print(t["99"]) # 99 val 97 | print(t["martin"]) # word val 98 | print(t.martin) # word val 99 | 100 | t["test"] = "ing" 101 | t["2"] = "ing" 102 | print(t["test"]) 103 | print(t["2"]) 104 | 105 | print(t) 106 | """ 107 | -------------------------------------------------------------------------------- /examples/and_or.lua: -------------------------------------------------------------------------------- 1 | a = true 2 | b = true 3 | 4 | if a or b then print("Both a and b are true") end 5 | -------------------------------------------------------------------------------- /examples/assignments.lua: -------------------------------------------------------------------------------- 1 | a = 1 2 | x, y = 1, 2 3 | 4 | print(a) 5 | print(x, y) 6 | 7 | function bar() do 8 | return 1, 2, 3, 4 9 | end 10 | 11 | x, y, z, h = bar() 12 | -------------------------------------------------------------------------------- /examples/bools.lua: -------------------------------------------------------------------------------- 1 | val = true 2 | 3 | aBoolValue = true 4 | 5 | if not aBoolValue then print('false') end 6 | if aBoolValue then print('true') end 7 | 8 | v = not aBoolValue 9 | print(v) 10 | -------------------------------------------------------------------------------- /examples/for.lua: -------------------------------------------------------------------------------- 1 | print("-- 1") 2 | for i = 1, 5 do -- 1, 2, 3, 4, 5 3 | print(i) 4 | end 5 | 6 | print("-- 1") 7 | for i = 1, 6, 2 do -- 1, 3, 5 8 | print(i) 9 | end 10 | 11 | print("-- 3") 12 | for i = 5, 1, -2 do -- 5, 3, 1 13 | print(i) 14 | end 15 | 16 | print("-- 4") 17 | v = {a=1, b=2} 18 | for key, val in pairs(v) do -- 1, 2, 3 19 | print(key) 20 | end 21 | 22 | print("-- 5") 23 | v = {1, 2, 3, 4} 24 | for key, val in pairs(v) do -- 1, 2, 3 25 | print(key) 26 | end 27 | -------------------------------------------------------------------------------- /examples/functions.lua: -------------------------------------------------------------------------------- 1 | function bar(a, b, c) 2 | print(a, b, c) 3 | return (1 + 2) 4 | end 5 | 6 | v = bar(1, 2, 3) 7 | print(v) 8 | 9 | a = function (x, y) return x + y end 10 | print(a(1, 2)) 11 | 12 | function fib(n) 13 | if n < 2 then return 1 end 14 | return fib(5) 15 | end 16 | 17 | -- Closures and anonymous functions are ok: 18 | function adder(x) 19 | -- The returned function is created when adder is 20 | -- called, and remembers the value of x: 21 | return function (y) return x + y end 22 | end 23 | 24 | 25 | v = adder(5) 26 | print(v(2)) 27 | 28 | a1 = adder(9) 29 | a2 = adder(36) 30 | print(a1(16)) --> 25 31 | print(a2(64)) --> 100 32 | -------------------------------------------------------------------------------- /examples/globals.lua: -------------------------------------------------------------------------------- 1 | hello = {5 = 1} 2 | 3 | print(_G["ast"]) 4 | --_G["hello"] = 1 -- Currently not working 5 | print(_G) 6 | -------------------------------------------------------------------------------- /examples/length.lua: -------------------------------------------------------------------------------- 1 | u = {1, 2, [3] = "three"} 2 | v = {1, 2, [3] = "three", [4] = "four", random = 1} 3 | 4 | print(#u) -- 3 5 | print(#v) -- 4 6 | -------------------------------------------------------------------------------- /examples/modulo.lua: -------------------------------------------------------------------------------- 1 | a = 1 % 2 2 | 3 | print(a) 4 | -------------------------------------------------------------------------------- /examples/numbers.lua: -------------------------------------------------------------------------------- 1 | a = 5 2 | b = 5.5 3 | c = -5.5 4 | -- c = .6 -- Not yet working 5 | 6 | print(a) 7 | print(b) 8 | print(c) 9 | -------------------------------------------------------------------------------- /examples/pairs.lua: -------------------------------------------------------------------------------- 1 | a = {1, 2, 3, 4, 5, hello=1} 2 | print(pairs(a)) 3 | -------------------------------------------------------------------------------- /examples/tables.lua: -------------------------------------------------------------------------------- 1 | t = {['@!#'] = 1, [2] = 3, key1 = 'value1', key2 = false, key3 = { subkey = 5 }} 2 | u = {1, 2, [3] = "three"} 3 | v = {[1] = 'qbert', [6.28] = 'tau'} 4 | 5 | --print(t["key1"]) 6 | --t.key1 7 | -- 8 | print(t["@!#"]) -- 1 9 | print(t["key3"]) -- 10 | print(t) 11 | 12 | print(u[1]) -- 3 13 | print(u[3]) -- three 14 | print(u) 15 | 16 | --print(t["key3"]) 17 | --print(t["1"]) 18 | print(v[1]) 19 | print(v) 20 | print(v) 21 | 22 | 23 | print({a = 1}) 24 | -------------------------------------------------------------------------------- /examples/test.lua: -------------------------------------------------------------------------------- 1 | -- From: https://learnxinyminutes.com/docs/lua/ 2 | -- Two dashes start a one-line comment. 3 | 4 | --[[ 5 | Adding two ['s and ]'s makes it a 6 | multi-line comment. 7 | --]] 8 | 9 | ---------------------------------------------------- 10 | -- 1. Variables and flow control. 11 | ---------------------------------------------------- 12 | 13 | --num = 12 -- All numbers are doubles. 14 | --numa = 5 -- All numbers are doubles. 15 | -- Don't freak out, 64-bit doubles have 52 bits for 16 | -- storing exact int values; machine precision is 17 | -- not a problem for ints that need < 52 bits. 18 | -- 19 | --s = 'walternate' 20 | --t = "double-quotes are also fine" 21 | --t = nil -- Undefines t; Lua has garbage collection. 22 | 23 | num = 42 24 | --t = 2 25 | --numa = 5 26 | 27 | aBoolValue = false 28 | 29 | while num >= 10 do 30 | num = num - 1 -- No ++ or += type operators. 31 | print(num) 32 | end 33 | 34 | print('Winter is coming, ' .. 'Hello') 35 | -------------------------------------------------------------------------------- /lexer.py: -------------------------------------------------------------------------------- 1 | # Convert source to tokens 2 | 3 | import re 4 | 5 | 6 | def lexer(source): 7 | chars = list(source) 8 | tokens = [] 9 | 10 | while len(chars): 11 | char = chars[0] 12 | 13 | if char == "\n": 14 | char = chars.pop(0) 15 | tokens.append({"type": "NL"}) 16 | continue 17 | 18 | if chars[0:4] == ["-", "-", "[", "["]: 19 | comment = extract_multiline_comment(chars) 20 | tokens.append({"type": "MULTI-COMMENT", "value": comment}) 21 | continue 22 | 23 | if chars[0:3] == ['n', 'i', 'l']: 24 | tokens.append({"type": "NIL", "value": 'nil'}) 25 | del chars[0:3] 26 | continue 27 | 28 | if chars[0:2] == ["-", "-"]: 29 | comment = extract_comment(chars) 30 | tokens.append({"type": "COMMENT", "value": comment}) 31 | continue 32 | 33 | if chars[0:3] == ["a", "n", "d"]: 34 | tokens.append({"type": "OP", "value": "and"}) 35 | del chars[0:3] 36 | continue 37 | 38 | if chars[0:2] == ["o", "r"]: 39 | tokens.append({"type": "OP", "value": "or"}) 40 | del chars[0:2] 41 | continue 42 | 43 | if chars[0:3] == ["n", "o", "t"]: 44 | tokens.append({"type": "OP", "value": "not"}) 45 | del chars[0:3] 46 | continue 47 | 48 | if chars[0:2] == ["~", "="]: 49 | tokens.append({"type": "OP", "value": "~="}) 50 | del chars[0:2] 51 | continue 52 | 53 | if chars[0:2] == [".", "."]: 54 | tokens.append({"type": "OP", "value": ".."}) 55 | del chars[0:2] 56 | continue 57 | 58 | if chars[0:2] == [">", "="]: 59 | tokens.append({"type": "OP", "value": ">="}) 60 | del chars[0:2] 61 | continue 62 | 63 | if chars[0:2] == ["<", "="]: 64 | tokens.append({"type": "OP", "value": "<="}) 65 | del chars[0:2] 66 | continue 67 | 68 | if len(chars) >= 2 and char == "-" and is_num(chars[1]): 69 | del chars[0:1] 70 | num = "-"+extract_num(chars) 71 | tokens.append({"type": "NUMBER", "value": num}) 72 | continue 73 | 74 | if is_num(char): 75 | num = extract_num(chars) 76 | tokens.append({"type": "NUMBER", "value": num}) 77 | continue 78 | 79 | if is_operator(char): 80 | operator = extract_operator(chars) 81 | tokens.append({"type": "OP", "value": operator}) 82 | continue 83 | 84 | if char == "'": 85 | string = extract_str("'", chars) 86 | tokens.append({"type": "STRING", "value": string}) 87 | continue 88 | 89 | if char == '"': 90 | string = extract_str('"', chars) 91 | tokens.append({"type": "STRING", "value": string}) 92 | continue 93 | 94 | if chars[0:2] == ["[", "["]: 95 | comment = extract_multiline_str(chars) 96 | tokens.append({"type": "STRING", "value": comment}) 97 | continue 98 | 99 | if is_letter(char): 100 | word = extract_word(chars) 101 | 102 | if is_keyword(word): 103 | tokens.append({"type": "KEYWORD", "value": word}) 104 | continue 105 | 106 | if word in ["true", "false"]: 107 | tokens.append({"type": "BOOLEAN", "value": word}) 108 | continue 109 | 110 | tokens.append({"type": "NAME", "value": word}) 111 | continue 112 | 113 | chars.pop(0) 114 | 115 | return tokens 116 | 117 | 118 | def is_operator(char): 119 | return char in [ 120 | "~=", "=", "==", "(", ")", "<", "+", "-", "*", ">", "<", "not", "%", 121 | "/", "{", "}", ",", "[", "]", "#" 122 | ] 123 | 124 | 125 | KEYWORDS = [ 126 | "while", "do", "end", "if", "elseif", "else", "then", "function", "return", 127 | "for", "in", 128 | ] 129 | 130 | 131 | def is_keyword(word): 132 | return word in KEYWORDS 133 | 134 | 135 | def is_letter(char): 136 | return re.search(r'[a-zA-Z]|_', char) 137 | 138 | 139 | def is_num(char): 140 | return re.search(r'[0-9]', char) 141 | 142 | 143 | def extract_operator(chars): 144 | op = "" 145 | for letter in chars: 146 | if not is_operator(op+letter): 147 | break 148 | 149 | op = op+letter 150 | del chars[0:len(op)] 151 | return op 152 | 153 | 154 | def extract_num(chars): 155 | num = "" 156 | 157 | for letter in chars: 158 | if not is_num(letter) and letter != ".": 159 | break 160 | 161 | num = num+letter 162 | del chars[0:len(num)] 163 | return num 164 | 165 | 166 | def extract_str(indicator, chars): 167 | out = "" 168 | for letter in chars[1:]: 169 | if letter == indicator: 170 | break 171 | 172 | out = out+letter 173 | del chars[0:len(out)+2] 174 | return out 175 | 176 | 177 | def extract_word(chars): 178 | word = "" 179 | for letter in chars: 180 | if not is_letter(letter) and not re.search(r'([0-9]|_)', letter): 181 | break 182 | 183 | word = word+letter 184 | del chars[0:len(word)] 185 | return word 186 | 187 | 188 | def extract_multiline_comment(chars): 189 | string_chars = "".join(chars) 190 | end_index = string_chars.index("--]]") 191 | 192 | val = string_chars[0:end_index] 193 | del chars[0:end_index+4] 194 | return val 195 | 196 | 197 | def extract_comment(chars): 198 | string_chars = "".join(chars) 199 | end_index = string_chars.index("\n") 200 | 201 | val = string_chars[2:end_index] 202 | del chars[0:end_index] 203 | return val 204 | 205 | 206 | def extract_multiline_str(chars): 207 | string_chars = "".join(chars) 208 | end_index = string_chars.index("]]") 209 | 210 | val = string_chars[2:end_index] 211 | del chars[0:end_index+2] 212 | return val 213 | -------------------------------------------------------------------------------- /parser.py: -------------------------------------------------------------------------------- 1 | # Covert tokens to AST 2 | from pprint import pprint 3 | 4 | OPERATORS = [ 5 | "+", "-", "=", "*", ">", "<", "~=", "==", "..", ">=", "<=", "%", "/", "and", "or", 6 | ] 7 | 8 | fn_name_index = 0 9 | 10 | 11 | def generate_function_name(): 12 | global fn_name_index 13 | 14 | fn_name_index = fn_name_index + 1 15 | return "__fn{0}".format(fn_name_index) 16 | 17 | 18 | def parse_tokens(tokens, in_body=0, in_table_construct=0, in_fn_arguments=0): 19 | 20 | out = [] 21 | 22 | while len(tokens) > 0: 23 | token = tokens.pop(0) 24 | 25 | # Make sure we do not construct tuple when comma list is passed 26 | # as function arguments/table constructor 27 | if len(tokens) \ 28 | and is_op(tokens[0], ",") \ 29 | and in_fn_arguments == 0 \ 30 | and in_table_construct == 0: 31 | 32 | tuple_tokens = [token] 33 | tokens.pop(0) 34 | 35 | # Continue through comma list until the end 36 | while len(tokens) > 0: 37 | tuple_tokens.append(tokens.pop(0)) 38 | 39 | if len(tokens) == 0: 40 | break 41 | 42 | if not is_op(tokens[0], ","): 43 | break 44 | 45 | tokens.pop(0) 46 | 47 | out.append({"type": "tuple", "value": parse_tokens(tuple_tokens)}) 48 | continue 49 | 50 | if token["type"] == "NUMBER": 51 | out.append({"type": "number", "value": token["value"]}) 52 | continue 53 | 54 | if token["type"] == "STRING": 55 | out.append({"type": "string", "value": token["value"]}) 56 | continue 57 | 58 | if token["type"] == "BOOLEAN": 59 | out.append({"type": "boolean", "value": token["value"]}) 60 | continue 61 | 62 | if token["type"] == "NIL": 63 | out.append({"type": "nil", "value": None}) 64 | continue 65 | 66 | if token["type"] == "OP" and token["value"] == "#": 67 | assignments = extract_assignments(tokens) 68 | out.append({ 69 | "type": "call", 70 | "name": "#", 71 | "args": parse_tokens(assignments), 72 | }) 73 | continue 74 | 75 | if token["type"] == "OP" and token["value"] == "{": 76 | table_tokens = extract_table(tokens) 77 | table_tokens = extract_assignments_by_comma(table_tokens) 78 | 79 | nodes = map( 80 | lambda x: parse_tokens(x, in_table_construct=1), 81 | table_tokens 82 | ) 83 | 84 | nodes = [x[0] for x in nodes] 85 | 86 | # print(table_tokens) 87 | out.append({ 88 | "type": "table", 89 | "value": nodes, 90 | }) 91 | continue 92 | 93 | if token["type"] == "OP" and token["value"] == "not": 94 | assignments = extract_assignments(tokens) 95 | out.append({ 96 | "type": "call", 97 | "name": token["value"], 98 | "args": parse_tokens(assignments), 99 | }) 100 | continue 101 | 102 | # Ignore [ if beeing used as constructor in table 103 | if in_table_construct == 1 and is_op(token, "["): 104 | continue 105 | 106 | # [ is beeing used as a accessor for table 107 | if in_table_construct == 0 and is_op(token, "["): 108 | key_tokens = extract_until_end_op(tokens, "]") 109 | 110 | expression = { 111 | "type": "call", 112 | "name": "[", 113 | "args": [out.pop(), parse_tokens(key_tokens)], 114 | } 115 | 116 | if in_body: # Do not wrap expression if already running in one 117 | out.append({ 118 | "type": "expr", 119 | "value": [expression], 120 | }) 121 | else: 122 | out.append(expression) 123 | continue 124 | 125 | if token["type"] == "OP" and token["value"] in OPERATORS: 126 | assignments = extract_assignments(tokens) 127 | 128 | # Move function outside assignment and declare it in above scope 129 | # with keyword ref 130 | if token["value"] == "=" and is_keyword(assignments[0], "function"): 131 | assignments = inline_anonymous_function(assignments, out) 132 | 133 | out.append({ 134 | "type": "call", 135 | "name": token["value"], 136 | "args": [ 137 | out.pop(), 138 | parse_tokens( 139 | assignments, 140 | in_table_construct=in_table_construct, 141 | ) 142 | ], 143 | }) 144 | continue 145 | 146 | if token["type"] == "NAME" and len(tokens) and is_op(tokens[0], "("): 147 | args = extract_args(tokens) 148 | 149 | expression = { 150 | "type": "call", 151 | "name": token["value"], 152 | "args": parse_tokens(args, in_fn_arguments=1) 153 | } 154 | 155 | if in_body: # Do not wrap expression if already running in one 156 | out.append({ 157 | "type": "expr", 158 | "value": [expression], 159 | }) 160 | else: 161 | out.append(expression) 162 | 163 | continue 164 | 165 | if token["type"] == "KEYWORD" and token["value"] == "else": 166 | body = extract_if_body(tokens) 167 | 168 | out.append({ 169 | "type": "else", 170 | "body": parse_tokens(body, in_body=1), 171 | }) 172 | continue 173 | 174 | if token["type"] == "KEYWORD" and token["value"] in ["if", "elseif"]: 175 | if_nodes = extract_scope_body(tokens) 176 | 177 | test_nodes = extract_to_keyword(if_nodes, "then") 178 | body = extract_if_body(if_nodes) 179 | 180 | out.append({ 181 | "type": "if", 182 | "test": parse_tokens(test_nodes), 183 | "body": parse_tokens(body, in_body=1), 184 | "else": parse_tokens(if_nodes), 185 | }) 186 | continue 187 | 188 | if token["type"] == "KEYWORD" and token["value"] == "for": 189 | 190 | 191 | body_tokens = extract_scope_body(tokens) 192 | iteration_tokens = extract_to_keyword(body_tokens, "do") 193 | if contains_op(iteration_tokens, "="): 194 | target_tokens = extract_to_op(iteration_tokens, "=") 195 | else: 196 | target_tokens = extract_to_keyword(iteration_tokens, "in") 197 | 198 | out.append({ 199 | "type": "for", 200 | "target": parse_tokens(target_tokens), 201 | "iteration": parse_tokens(iteration_tokens), 202 | "body": parse_tokens(body_tokens, in_body=1), 203 | }) 204 | continue 205 | 206 | if token["type"] == "KEYWORD" and token["value"] == "while": 207 | while_tokens = extract_scope_body(tokens) 208 | test_tokens = extract_to_keyword(while_tokens, "do") 209 | 210 | out.append({ 211 | "type": "while", 212 | "test": parse_tokens(test_tokens), 213 | "body": parse_tokens(while_tokens, in_body=1), 214 | }) 215 | continue 216 | 217 | if token["type"] == "KEYWORD" and token["value"] == "return": 218 | assignments = extract_assignments(tokens) 219 | 220 | if is_keyword(assignments[0], "function"): 221 | assignments = inline_anonymous_function(assignments, out) 222 | 223 | out.append({ 224 | "type": "return", 225 | "value": parse_tokens(assignments), 226 | }) 227 | continue 228 | 229 | if token["type"] == "KEYWORD" and token["value"] == "function": 230 | function_tokens = extract_scope_body(tokens) 231 | signature_tokens = extract_fn_signature(function_tokens) 232 | function_name = "" 233 | 234 | if signature_tokens[0]["type"] == "NAME": 235 | name_token = signature_tokens.pop(0) 236 | function_name = name_token["value"] 237 | else: 238 | function_name = None 239 | 240 | parameter_tokens = signature_tokens[1:-1] 241 | # Only accept name as argument 242 | parameter_tokens = filter( 243 | lambda x: x["type"] == "NAME", 244 | parameter_tokens 245 | ) 246 | 247 | parameter_tokens = map( 248 | lambda x: {"type": "argument", "name": x["value"]}, 249 | parameter_tokens 250 | ) 251 | 252 | out.append({ 253 | "type": "function", 254 | "name": function_name, 255 | "args": list(parameter_tokens), 256 | "body": parse_tokens(function_tokens, in_body=1), 257 | }) 258 | continue 259 | 260 | if token["type"] == "NAME": 261 | out.append({ 262 | "type": "name", 263 | "name": token["value"], 264 | }) 265 | continue 266 | 267 | return out 268 | 269 | 270 | def is_op(token, op): 271 | return token.get("type", None) == "OP" and token["value"] == op 272 | 273 | 274 | def is_keyword(token, keyword): 275 | return token["type"] == "KEYWORD" and token["value"] == keyword 276 | 277 | 278 | def extract_table(tokens): 279 | out = [] 280 | depth = 0 281 | 282 | while len(tokens) > 0: 283 | token = tokens.pop(0) 284 | out.append(token) 285 | 286 | if depth > 0 and is_op(token, "}"): 287 | depth = depth +1 288 | continue 289 | 290 | if is_op(token, "}"): 291 | break 292 | 293 | return out 294 | 295 | 296 | def extract_fn_signature(tokens): 297 | out = [] 298 | 299 | while len(tokens) > 0: 300 | token = tokens.pop(0) 301 | out.append(token) 302 | if is_op(token, ")"): 303 | break 304 | return out 305 | 306 | 307 | def extract_scope_body(tokens): 308 | out = [] 309 | 310 | depth = 0 311 | 312 | while len(tokens) > 0: 313 | token = tokens.pop(0) 314 | out.append(token) 315 | 316 | if token["type"] == "KEYWORD" and token["value"] in ["if", "function"]: 317 | depth = depth + 1 318 | continue 319 | 320 | if depth > 0 and token["type"] == "KEYWORD" and token["value"] in ["if", "end"]: 321 | depth = depth - 1 322 | continue 323 | 324 | if depth == 0 and token["type"] == "KEYWORD" and token["value"] == "end": 325 | break 326 | 327 | return out 328 | 329 | 330 | def extract_if_body(tokens): 331 | out = [] 332 | depth = 0 333 | 334 | while len(tokens) > 0: 335 | token = tokens[0] 336 | 337 | if is_keyword(token, "if"): 338 | out.append(token) 339 | tokens.pop(0) 340 | depth = depth + 1 341 | continue 342 | 343 | if depth > 0 and is_keyword(token, "end"): 344 | out.append(token) 345 | tokens.pop(0) 346 | depth = depth - 1 347 | continue 348 | 349 | if depth == 0 and is_keyword(token, "elseif"): 350 | break 351 | 352 | if depth == 0 and is_keyword(token, "else"): 353 | break 354 | 355 | if depth == 0 and is_keyword(token, "end"): 356 | break 357 | 358 | out.append(token) 359 | tokens.pop(0) 360 | 361 | return out 362 | 363 | def extract_until_end_op(tokens, exit_op="]"): 364 | out = [] 365 | 366 | while len(tokens) > 0: 367 | token = tokens.pop(0) 368 | 369 | if is_op(token, exit_op): 370 | break 371 | 372 | out.append(token) 373 | 374 | return out 375 | 376 | def extract_until_end_op(tokens, exit_op="]"): 377 | out = [] 378 | 379 | while len(tokens) > 0: 380 | token = tokens.pop(0) 381 | 382 | if is_op(token, exit_op): 383 | break 384 | 385 | out.append(token) 386 | 387 | return out 388 | 389 | 390 | def extract_to_op(tokens, exit_op="="): 391 | out = [] 392 | 393 | while len(tokens) > 0: 394 | token = tokens.pop(0) 395 | 396 | if is_op(token, exit_op): 397 | break 398 | 399 | out.append(token) 400 | 401 | return out 402 | 403 | def extract_to_keyword(tokens, exit_keyword="then"): 404 | out = [] 405 | 406 | while len(tokens) > 0: 407 | token = tokens.pop(0) 408 | 409 | if is_keyword(token, exit_keyword): 410 | break 411 | 412 | out.append(token) 413 | 414 | return out 415 | 416 | 417 | def extract_assignments(tokens): 418 | out = [] 419 | depth = 0 420 | 421 | while len(tokens) > 0: 422 | token = tokens.pop(0) 423 | 424 | if is_keyword(token, "function"): 425 | out.append(token) 426 | depth = depth + 1 427 | continue 428 | 429 | if is_op(token, "("): 430 | out.append(token) 431 | depth = depth + 1 432 | continue 433 | 434 | if is_op(token, "{"): 435 | out.append(token) 436 | depth = depth + 1 437 | continue 438 | 439 | if is_op(token, ")"): 440 | out.append(token) 441 | depth = depth - 1 442 | continue 443 | 444 | if is_keyword(token, "end"): 445 | out.append(token) 446 | depth = depth - 1 447 | continue 448 | 449 | if is_op(token, "}"): 450 | out.append(token) 451 | depth = depth - 1 452 | continue 453 | 454 | if token["type"] == "NL" and depth == 0: 455 | break 456 | 457 | out.append(token) 458 | 459 | return out 460 | 461 | def inline_anonymous_function(tokens, out): 462 | fn_name = generate_function_name() 463 | 464 | fn_tokens = parse_tokens(tokens) 465 | fn_tokens[0]["name"] = fn_name 466 | out.insert(-1, fn_tokens[0]) 467 | assignments = [{"type": "NAME", "value": fn_name}] 468 | return assignments 469 | 470 | 471 | def extract_args(tokens): 472 | depth = 1 473 | args = [] 474 | 475 | tokens.pop(0) # Drop ( 476 | 477 | while depth != 0: 478 | token = tokens.pop(0) 479 | 480 | if is_op(token, "("): 481 | depth = depth+1 482 | 483 | if is_op(token, ")"): 484 | depth = depth-1 485 | 486 | args.append(token) 487 | 488 | args = args[:-1] # Drop ) 489 | return args 490 | 491 | 492 | def extract_assignments_by_comma(tokens): 493 | pairs = [[]] 494 | depth = 0 495 | 496 | while len(tokens) > 0: 497 | token = tokens.pop(0) 498 | 499 | if is_op(token, "{"): 500 | depth = depth + 1 501 | 502 | if is_op(token, "("): 503 | depth = depth + 1 504 | 505 | if is_op(token, "}"): 506 | depth = depth - 1 507 | 508 | if is_op(token, ")"): 509 | depth = depth - 1 510 | 511 | if depth == 0 and is_op(token, ","): 512 | pairs.append([]) 513 | continue 514 | 515 | pairs[-1].append(token) 516 | 517 | return pairs 518 | 519 | 520 | def contains_op(tokens, op): 521 | for token in tokens: 522 | if is_op(token, op): 523 | return True 524 | return False 525 | 526 | 527 | def parse(tokens): 528 | ast_ = parse_tokens(tokens, in_body=1) 529 | return ast_ 530 | -------------------------------------------------------------------------------- /py_parser.py: -------------------------------------------------------------------------------- 1 | # Covert AST to Python AST 2 | 3 | import ast 4 | 5 | 6 | def ast_to_py_ast(nodes): 7 | ast_ = parse_nodes(nodes) 8 | 9 | bootstrap = [ 10 | ast.ImportFrom( 11 | module='core', names=[ 12 | ast.alias(name='*', asname=None) 13 | ], 14 | level=0 15 | ) 16 | ] 17 | 18 | ast_ = bootstrap + ast_ 19 | 20 | tree = ast.Module(ast_) 21 | tree = ast.fix_missing_locations(tree) 22 | 23 | return tree 24 | 25 | 26 | def parse_nodes(nodes, ctx_klass=ast.Load): 27 | out = [] 28 | while len(nodes) > 0: 29 | node = nodes.pop(0) 30 | 31 | if node["type"] == "name" and node["name"] == "_G": 32 | out.append( 33 | ast.Call( 34 | func=ast.Name(id='globals', ctx=ast.Load()), 35 | args=[], 36 | keywords=[], 37 | ) 38 | ) 39 | continue 40 | 41 | if node["type"] == "tuple": 42 | expressions = parse_nodes(node["value"], ctx_klass=ctx_klass) 43 | 44 | out.append( 45 | ast.Tuple( 46 | elts=expressions, 47 | ctx=ctx_klass(), 48 | ) 49 | ) 50 | continue 51 | 52 | if node["type"] == "table": 53 | argument_nodes = [] 54 | keyword_nodes = [] 55 | 56 | for x in node["value"]: 57 | if not (x["type"] == "call" and x["name"] == "="): 58 | argument_nodes.append(x) 59 | continue 60 | 61 | keyword_nodes.append(x) 62 | 63 | key_nodes = [x["args"][0] for x in keyword_nodes] 64 | # Convert name references to strings 65 | key_nodes = [ 66 | {"type": "string", "value": x["name"]} 67 | if x["type"] == "name" else x 68 | for x in key_nodes 69 | ] 70 | 71 | value_nodes = [x["args"][1] for x in keyword_nodes] 72 | value_nodes = [x[0] for x in value_nodes] 73 | value_nodes = parse_nodes(value_nodes) 74 | 75 | keywords = [] 76 | for x in (zip(key_nodes, value_nodes)): 77 | name_node, value_node = x 78 | name = name_node["value"] 79 | 80 | # Apply __ to make sure its casted in Table 81 | if name_node["type"] == "number": 82 | name = "__{0}".format(name) 83 | 84 | keywords.append( 85 | ast.keyword(arg=name, value=value_node) 86 | ) 87 | 88 | out.append( 89 | ast.Call( 90 | func=ast.Name(id='Table', ctx=ast.Load()), 91 | args=parse_nodes(argument_nodes), 92 | keywords=keywords, 93 | ) 94 | ) 95 | continue 96 | 97 | if node["type"] == "string": 98 | out.append(ast.Str(s=node["value"])) 99 | continue 100 | 101 | if node["type"] == "boolean": 102 | value = node["value"] 103 | value = True if value == "true" else value 104 | value = False if value == "false" else value 105 | out.append(ast.NameConstant(value=value)) 106 | continue 107 | 108 | if node["type"] == "number": 109 | value = node["value"] 110 | value = float(value) if "." in value else int(value) 111 | 112 | out.append(ast.Num(n=value)) 113 | continue 114 | 115 | if node["type"] == "nil": 116 | out.append(ast.NameConstant(value=None)) 117 | continue 118 | 119 | if node["type"] == "return": 120 | out.append( 121 | ast.Return(value=parse_nodes(node["value"])[0]) 122 | ) 123 | continue 124 | 125 | if node["type"] == "assign": 126 | out.append( 127 | ast.Assign( 128 | targets=[ 129 | ast.Name(id=node["name"], ctx=ast.Store()) 130 | ], 131 | value=parse_nodes(node["value"])[0], 132 | ) 133 | ) 134 | continue 135 | 136 | if node["type"] == "name": 137 | out.append( 138 | ast.Name(id=node["name"], ctx=ctx_klass()), 139 | ) 140 | continue 141 | 142 | if node["type"] == "expr": 143 | out.append( 144 | ast.Expr( 145 | value=parse_nodes(node["value"])[0] 146 | ) 147 | ) 148 | continue 149 | 150 | if node["type"] == "function": 151 | body_nodes = parse_nodes(node["body"]) 152 | out.append( 153 | ast.FunctionDef( 154 | name=node["name"], 155 | args=ast.arguments( 156 | args=[ 157 | ast.arg( 158 | arg=x["name"], 159 | annotation=None, 160 | ) for x in node["args"] 161 | ], 162 | vararg=None, 163 | kwonlyargs=[], 164 | kw_defaults=[], 165 | kwarg=None, 166 | defaults=[] 167 | ), 168 | body=body_nodes, 169 | decorator_list=[], 170 | ) 171 | ) 172 | continue 173 | 174 | if node["type"] == "if": 175 | test_nodes = parse_nodes(node["test"]) 176 | body_nodes = parse_nodes(node["body"]) 177 | else_nodes = parse_nodes(node["else"]) 178 | 179 | out.append( 180 | ast.If( 181 | test=test_nodes[0], 182 | body=body_nodes, 183 | orelse=else_nodes, 184 | ) 185 | ) 186 | continue 187 | 188 | if node["type"] == "for": 189 | target_expr = parse_nodes(node["target"], ctx_klass=ast.Store) 190 | body_expr = parse_nodes(node["body"]) 191 | 192 | iteration_nodes = node["iteration"] 193 | 194 | # Apply range constructor 195 | if iteration_nodes[0]["type"] == "tuple": 196 | iteration_expr = [ 197 | ast.Call( 198 | func=ast.Name(id='get_for_range', ctx=ast.Load()), 199 | args=parse_nodes(iteration_nodes[0]["value"]), 200 | keywords=[], 201 | ) 202 | ] 203 | 204 | else: 205 | iteration_expr = parse_nodes(iteration_nodes) 206 | 207 | out.append( 208 | ast.For( 209 | target=target_expr[0], 210 | iter=iteration_expr[0], 211 | body=body_expr, 212 | orelse=[] 213 | ) 214 | ) 215 | continue 216 | 217 | if node["type"] == "while": 218 | test_nodes = parse_nodes(node["test"]) 219 | body_nodes = parse_nodes(node["body"]) 220 | 221 | out.append( 222 | ast.While( 223 | test=test_nodes[0], 224 | body=body_nodes, 225 | orelse=[], 226 | ) 227 | ) 228 | 229 | if node["type"] == "else": 230 | body_nodes = parse_nodes(node["body"]) 231 | out = out + body_nodes 232 | continue 233 | 234 | if node["type"] == "call": 235 | if node["name"] == "#": 236 | out.append( 237 | ast.Call( 238 | func=ast.Name(id='len', ctx=ast.Load()), 239 | args=parse_nodes(node["args"]), 240 | keywords=[], 241 | ) 242 | ) 243 | continue 244 | 245 | if node["name"] == "[": 246 | value_node = node["args"][0] 247 | value_expression = parse_nodes([value_node])[0] 248 | 249 | out.append( 250 | ast.Subscript( 251 | value=value_expression, 252 | slice=ast.Index( 253 | value=parse_nodes(node["args"][1])[0] 254 | ), 255 | ctx=ast.Load(), 256 | ) 257 | ) 258 | 259 | continue 260 | 261 | if node["name"] == "=": 262 | name_arg = node["args"][0] 263 | value_arg = node["args"][1] 264 | 265 | target_expr = parse_nodes([name_arg], ctx_klass=ast.Store) 266 | value_expr = parse_nodes(value_arg) 267 | 268 | out.append( 269 | ast.Assign( 270 | targets=target_expr, 271 | value=value_expr[0], 272 | ) 273 | ) 274 | continue 275 | 276 | if node["name"] in ["-", "%", "+", "..", "*", "/"]: 277 | ops = node["name"] 278 | 279 | arg_left = parse_nodes([node["args"][0]]) 280 | arg_right = parse_nodes(node["args"][1]) 281 | 282 | ops_ref = { 283 | "-": ast.Sub, 284 | "%": ast.Mod, 285 | "+": ast.Add, 286 | "..": ast.Add, 287 | "*": ast.Mult, 288 | "/": ast.Div, 289 | } 290 | 291 | out.append( 292 | ast.BinOp( 293 | left=arg_left[0], 294 | op=ops_ref[ops](), 295 | right=arg_right[0], 296 | ) 297 | ) 298 | continue 299 | 300 | if node["name"] in ["and", "or"]: 301 | ops = node["name"] 302 | 303 | arg_left = parse_nodes([node["args"][0]]) 304 | arg_right = parse_nodes(node["args"][1]) 305 | 306 | ops_ref = { 307 | "and": ast.And, 308 | "or": ast.Or, 309 | } 310 | 311 | out.append( 312 | ast.BoolOp( 313 | op=ops_ref[ops](), 314 | values=[ 315 | arg_left[0], 316 | arg_right[0], 317 | ] 318 | ) 319 | ) 320 | continue 321 | 322 | if node["name"] in [">", "<", "~=", "==", "<=", ">="]: 323 | ops = node["name"] 324 | 325 | arg_left = parse_nodes([node["args"][0]]) 326 | arg_right = parse_nodes(node["args"][1]) 327 | 328 | ops_ref = { 329 | ">": ast.Gt, 330 | ">=": ast.GtE, 331 | "<": ast.Lt, 332 | "<=": ast.LtE, 333 | "~=": ast.NotEq, 334 | "==": ast.Eq, 335 | } 336 | 337 | out.append( 338 | ast.Compare( 339 | left=arg_left[0], 340 | ops=[ops_ref[ops]()], 341 | comparators=arg_right, 342 | ) 343 | ) 344 | continue 345 | 346 | if node["name"] == "not": 347 | out.append( 348 | ast.UnaryOp( 349 | op=ast.Not(), 350 | operand=parse_nodes(node["args"])[0] 351 | ) 352 | ) 353 | continue 354 | 355 | out.append( 356 | ast.Call( 357 | func=ast.Name(id=node["name"], ctx=ast.Load()), 358 | args=parse_nodes(node["args"], ctx_klass=ast.Load), 359 | keywords=[] 360 | ) 361 | ) 362 | continue 363 | 364 | return out 365 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | click 2 | astunparse 3 | -------------------------------------------------------------------------------- /tools/ast_to_py.py: -------------------------------------------------------------------------------- 1 | # Evaluate AST 2 | from ast import Module, Str, Print, fix_missing_locations 3 | 4 | 5 | tree = Module([Print(None, [Str("PyCon2010!")], True)]) 6 | tree = fix_missing_locations(tree) 7 | 8 | exec(compile(tree, filename="", mode="exec")) 9 | -------------------------------------------------------------------------------- /tools/py_to_ast.py: -------------------------------------------------------------------------------- 1 | # A simple file for converting python to AST 2 | import ast 3 | import pprint 4 | 5 | 6 | tree = ast.parse( 7 | """ 8 | globals()["a"] = 1 9 | """ 10 | ) 11 | pprint.pprint(ast.dump(tree)) 12 | 13 | -------------------------------------------------------------------------------- /tools/py_to_lexer_sample.py: -------------------------------------------------------------------------------- 1 | # A simple tool for cnverting python code to tokens 2 | 3 | from tokenize import generate_tokens 4 | from io import StringIO 5 | from pprint import pprint 6 | 7 | 8 | code = """ 9 | # My comment 10 | myvar = 1 11 | 12 | def foo(a, b=10): 13 | return a + b 14 | """ 15 | 16 | d = list(generate_tokens(StringIO(code).readline)) 17 | pprint(d) 18 | --------------------------------------------------------------------------------