├── syntatic_diagram.png ├── src ├── token.py ├── id.py ├── input3.txt ├── input2.txt ├── value.py ├── assembly.py ├── main.py ├── input.txt ├── symbolTable.py ├── .gitignore ├── tokenizer.py ├── node.py └── parser.py └── README.md /syntatic_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marcelogdeandrade/PascalToyCompiler/HEAD/syntatic_diagram.png -------------------------------------------------------------------------------- /src/token.py: -------------------------------------------------------------------------------- 1 | class Token(): 2 | def __init__(self, token_type, value): 3 | self.type = token_type 4 | self.value = value 5 | -------------------------------------------------------------------------------- /src/id.py: -------------------------------------------------------------------------------- 1 | # My module 2 | class Id(object): 3 | cur_id = 0 4 | 5 | @staticmethod 6 | def getNewId(): 7 | Id.cur_id += 1 8 | return Id.cur_id 9 | -------------------------------------------------------------------------------- /src/input3.txt: -------------------------------------------------------------------------------- 1 | program teste; 2 | var i, n, f: int; 3 | begin 4 | n := 5; 5 | i := 2; 6 | f := 1; 7 | while i < n + 1 do 8 | begin 9 | f := f * i; 10 | i := i + 1 11 | end; 12 | print(f) 13 | end. -------------------------------------------------------------------------------- /src/input2.txt: -------------------------------------------------------------------------------- 1 | program teste; 2 | var a, b: int; 3 | x: boolean; 4 | 5 | function fib(n: int): int; 6 | begin 7 | if n = 0 then 8 | fib := 0 9 | else 10 | if n = 1 then 11 | fib := 1 12 | else 13 | fib := fib(n - 1) + fib(n - 2) 14 | end 15 | 16 | begin 17 | print(fib(8)) 18 | end. -------------------------------------------------------------------------------- /src/value.py: -------------------------------------------------------------------------------- 1 | class Value(): 2 | def __init__(self, var_type): 3 | self.type = var_type 4 | self.value = None 5 | 6 | def setValue(self, value): 7 | self.value = value 8 | 9 | def getType(self): 10 | return self.type 11 | 12 | def getValue(self): 13 | return self.value 14 | 15 | -------------------------------------------------------------------------------- /src/assembly.py: -------------------------------------------------------------------------------- 1 | # My module 2 | class AssemblyCode(object): 3 | assembly_code = "" 4 | 5 | @staticmethod 6 | def addLine(text): 7 | AssemblyCode.assembly_code += "\n" 8 | AssemblyCode.assembly_code += text 9 | 10 | @staticmethod 11 | def writeFile(filename): 12 | with open(filename, 'w') as the_file: 13 | the_file.write(AssemblyCode.assembly_code) 14 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | from parser import Parser 2 | from symbolTable import SymbolTable 3 | from assembly import AssemblyCode 4 | 5 | 6 | def percorrer_arvore(raiz): 7 | print(raiz.value) 8 | for i in raiz.children: 9 | percorrer_arvore(i) 10 | 11 | 12 | def read_file(file_name): 13 | with open(file_name) as file: 14 | data = file.read() 15 | return data 16 | 17 | 18 | def main(): 19 | test = read_file("input3.txt") 20 | try: 21 | parser = Parser(test) 22 | symbolTable = SymbolTable(None) 23 | result = parser.parseProgram() 24 | # percorrer_arvore(result) 25 | result.Evaluate(symbolTable) 26 | print(AssemblyCode.assembly_code) 27 | AssemblyCode.writeFile("teste.asm") 28 | except ValueError as err: 29 | print(err) 30 | 31 | 32 | main() 33 | -------------------------------------------------------------------------------- /src/input.txt: -------------------------------------------------------------------------------- 1 | program teste; 2 | var a, b: int; 3 | x: boolean; 4 | 5 | function fat(n: int): int; 6 | var i: int; 7 | 8 | function inc(x: int): int; 9 | var i: int; 10 | begin 11 | i := x + 1; 12 | inc := i 13 | end 14 | 15 | begin 16 | fat := 1; 17 | i := 2; 18 | while i < n + 1 then 19 | begin 20 | fat := fat * i; 21 | i := inc(i) 22 | end 23 | end 24 | 25 | function soma(x, y: int): int; 26 | var i: int; 27 | begin 28 | i := x + y; 29 | soma := i 30 | end 31 | 32 | function fib(n: int): int; 33 | begin 34 | if n = 0 then 35 | fib := 0 36 | else 37 | if n = 1 then 38 | fib := 1 39 | else 40 | fib := fib(n - 1) + fin(n - 2) 41 | end 42 | 43 | begin 44 | x := true; 45 | a := 3; 46 | a := soma(a, 4); 47 | 48 | print(a); 49 | b := fat(5); 50 | print(fat(2) + b) 51 | end. -------------------------------------------------------------------------------- /src/symbolTable.py: -------------------------------------------------------------------------------- 1 | from value import Value 2 | from id import Id 3 | 4 | 5 | class SymbolTable(): 6 | def __init__(self, parent): 7 | self.id = Id.getNewId() 8 | self.symbols = {} 9 | self.parent = parent 10 | 11 | def getSymbol(self, symbol, symbol_type=None): 12 | if symbol in self.symbols.keys(): 13 | value = self.symbols[symbol] 14 | if symbol_type is not None and symbol_type != value.getType(): 15 | value = self.parent.getSymbol(symbol, symbol_type) 16 | return value 17 | else: 18 | return value 19 | else: 20 | if self.parent is not None: 21 | symbol = self.parent.getSymbol(symbol) 22 | return symbol 23 | else: 24 | raise ValueError("Variable {} not declared \ 25 | ".format(symbol)) 26 | 27 | def setSymbol(self, symbol, value): 28 | if symbol in self.symbols.keys(): 29 | var = self.symbols[symbol] 30 | var.setValue(value) 31 | else: 32 | if self.parent is not None: 33 | self.parent.setSymbol(symbol, value) 34 | else: 35 | raise ValueError("Variable {} not declared \ 36 | ".format(symbol)) 37 | 38 | def createSymbol(self, symbol, var_type): 39 | self.symbols[symbol] = Value(var_type) 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CompilerLogComp 2 | 3 | Pascal simplified compiler written in python without external dependencies 4 | 5 | # Features 6 | 7 | To-do 8 | 9 | # EBNF 10 | 11 | ``` 12 | program = "program", identifier, ";", block, "."; 13 | functions = {"function", identifier, "(", var_dec, ")", ";", block}; 14 | block = [functions], ["var", var_dec], [statements]; 15 | var_dec = (identifier, {,",", identifier}, ":", type, ";")+ 16 | statements = "begin", statement, {";", statement}, "end"; 17 | statement = attribution | statements | print | if | while; 18 | attribution = identifier, ":=", (expression | read); 19 | print = "print", "(", expression, ")"; 20 | read = "read", "(", ")"; 21 | if = "if", rel_expression, "then", statement, ["else" statement"]; 22 | while = "while", rel_expression, "do", statement; 23 | rel_expression = expression, {comp, expression}; 24 | expression = term, { ("+"|"-"|"or"), term, }; 25 | term = factor, { ("*" | "/" | "and"), factor }; 26 | factor = ("+" | "-" | "not"), (factor | number | boolean | ("(" expression ")") | identifier | func_call); 27 | func_call = identifier, "(", [expression, {",", expression}], ")"; 28 | identifier = letter, {letter | digit | "_" }; 29 | comp = ">" | "<" | "=" | "!="; 30 | number = digit+; 31 | boolean = "true" | "false"; 32 | type = "int" | "boolean"; 33 | letter = [a-zA-Z]; 34 | digit = [0-9]; 35 | ``` 36 | 37 | # SD (Syntatic Diagram) 38 | 39 | ![SD](https://raw.githubusercontent.com/marcelogdeandrade/CompilerLogComp/master/syntatic_diagram.png) 40 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ -------------------------------------------------------------------------------- /src/tokenizer.py: -------------------------------------------------------------------------------- 1 | from token import Token 2 | 3 | PRINT, BEGIN, END, IF, THEN, ELSE, \ 4 | WHILE, OR, AND, NOT, READ, PROGRAM, \ 5 | VAR, INT, BOOLEAN, \ 6 | TRUE, FALSE, DO, \ 7 | FUNCTION = ('print', 'begin', 8 | 'end', 'if', 'then', 9 | 'else', 'while', 10 | 'or', 'and', 'not', 'read', 11 | 'program', 'var', 'int', 12 | 'boolean', 'true', 'false', 'function', 'do') 13 | 14 | ALPHABET = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '-', 15 | '*', '/', '(', ')', ':', '=', ';', '<', '>', '!', '.', ','] 16 | KEYWORDS = [PRINT, BEGIN, END, IF, THEN, ELSE, WHILE, 17 | OR, AND, NOT, READ, PROGRAM, VAR, 18 | INT, BOOLEAN, TRUE, FALSE, FUNCTION, DO] 19 | 20 | TYPES = [INT, BOOLEAN] 21 | 22 | BOOL = [TRUE, FALSE] 23 | 24 | 25 | class Tokenizer(): 26 | def __init__(self, origin): 27 | self.origin = origin 28 | self.position = 0 29 | self.actual = None 30 | self.alphabet = ALPHABET 31 | 32 | def selectNext(self): 33 | # Final do arquivo 34 | if self.position >= len(self.origin): 35 | self.actual = None 36 | return None 37 | char = self.origin[self.position] 38 | # Comentarios 39 | if char == '{': 40 | while char != '}': 41 | self.position += 1 42 | if self.position == len(self.origin): 43 | return None 44 | char = self.origin[self.position] 45 | self.position += 1 46 | char = self.origin[self.position] 47 | # Espacos,enter e tabs 48 | while char.isspace() and self.position: 49 | self.position += 1 50 | if self.position == len(self.origin): 51 | return None 52 | char = self.origin[self.position] 53 | # Identificador 54 | if char.isalpha(): 55 | identifier = char 56 | while True: 57 | self.position += 1 58 | if (self.position >= len(self.origin)): 59 | break 60 | char = self.origin[self.position] 61 | if not char.isalpha() and not char.isdigit() and char != '_': 62 | break 63 | else: 64 | identifier += char 65 | if identifier in TYPES: 66 | self.actual = Token('TYPE', identifier) 67 | elif identifier in BOOL: 68 | self.actual = Token('boolean', identifier) 69 | elif identifier in KEYWORDS: 70 | self.actual = Token(identifier, None) 71 | else: 72 | self.actual = Token('IDE', identifier) 73 | # Caracter invalido 74 | elif char not in self.alphabet: 75 | raise ValueError("Invalid Char") 76 | # Digitos 77 | elif char.isdigit(): 78 | number = char 79 | while True: 80 | self.position += 1 81 | if (self.position >= len(self.origin)): 82 | break 83 | char = self.origin[self.position] 84 | if not char.isdigit(): 85 | break 86 | else: 87 | number += char 88 | self.actual = Token('int', int(number)) 89 | # Operacoes e parenteses 90 | else: 91 | if (char == '+'): 92 | self.actual = Token('PLUS', None) 93 | elif (char == '-'): 94 | self.actual = Token('MINUS', None) 95 | elif (char == '*'): 96 | self.actual = Token('MULT', None) 97 | elif (char == '/'): 98 | self.actual = Token('DIV', None) 99 | elif (char == '('): 100 | self.actual = Token('OPEN_PAR', None) 101 | elif (char == ')'): 102 | self.actual = Token('CLOSE_PAR', None) 103 | elif (char == ';'): 104 | self.actual = Token('SEMI_COLON', None) 105 | elif (char == ':'): 106 | if (self.position + 1 < len(self.origin)): 107 | char = self.origin[self.position + 1] 108 | if (char == '='): 109 | self.actual = Token('ATRIBUTE', None) 110 | self.position += 1 111 | else: 112 | self.actual = Token('VAR_DECLARATION', None) 113 | elif (char == '>'): 114 | self.actual = Token('COMP', ">") 115 | elif (char == '<'): 116 | self.actual = Token('COMP', "<") 117 | elif (char == '='): 118 | self.actual = Token('COMP', "=") 119 | elif (char == '!'): 120 | self.position += 1 121 | char = self.origin[self.position] 122 | if (char == '='): 123 | self.actual = Token('COMP', "!=") 124 | else: 125 | raise ValueError("Invalid Char") 126 | elif (char == '.'): 127 | self.actual = Token('END_PROGRAM', None) 128 | elif (char == ','): 129 | self.actual = Token('COMMA', None) 130 | self.position += 1 131 | return self.actual 132 | -------------------------------------------------------------------------------- /src/node.py: -------------------------------------------------------------------------------- 1 | from value import Value 2 | from symbolTable import SymbolTable as SymbolTableClass 3 | from id import Id 4 | from assembly import AssemblyCode 5 | 6 | 7 | class Node(): 8 | def __init__(self, value, children): 9 | self.id = Id.getNewId() 10 | self.value = value 11 | self.children = children 12 | 13 | def Evaluate(self, SymbolTable, whileFlag=0): 14 | pass 15 | 16 | def generateAsm(self, SymbolTable, whileFlag): 17 | pass 18 | 19 | 20 | class BinOp(Node): 21 | def same_type(self, value1, value2): 22 | if (value1.type == value2.type): 23 | return True 24 | else: 25 | return False 26 | 27 | def Evaluate(self, SymbolTable, whileFlag=0, nodeId=None): 28 | value1_obj = self.children[0].Evaluate(SymbolTable, whileFlag) 29 | self.generateAsm(SymbolTable, "push", whileFlag) 30 | value2_obj = self.children[1].Evaluate(SymbolTable, whileFlag) 31 | self.generateAsm(SymbolTable, "pop", whileFlag) 32 | value1 = value1_obj.getValue() 33 | value2 = value2_obj.getValue() 34 | if (self.value == "+"): 35 | if not self.same_type(value1_obj, value2_obj): 36 | raise ValueError("Operands must be the same type") 37 | value_sum = value1 + value2 38 | self.generateAsm(SymbolTable, "ADD", whileFlag) 39 | result = Value("int") 40 | result.setValue(value_sum) 41 | return result 42 | elif (self.value == "-"): 43 | if not self.same_type(value1_obj, value2_obj): 44 | raise ValueError("Operands must be the same type") 45 | value_sub = value1 - value2 46 | self.generateAsm(SymbolTable, "SUB", whileFlag) 47 | result = Value("int") 48 | result.setValue(value_sub) 49 | return result 50 | elif (self.value == "or"): 51 | if not self.same_type(value1_obj, value2_obj): 52 | raise ValueError("Operands must be the same type") 53 | value_or = value1 or value2 54 | self.generateAsm(SymbolTable, "OR", whileFlag) 55 | result = Value("boolean") 56 | result.setValue(value_or) 57 | return result 58 | elif (self.value == "*"): 59 | if not self.same_type(value1_obj, value2_obj): 60 | raise ValueError("Operands must be the same type") 61 | value_mult = value1 * value2 62 | self.generateAsm(SymbolTable, "IMUL", whileFlag) 63 | result = Value("int") 64 | result.setValue(value_mult) 65 | return result 66 | elif (self.value == "/"): 67 | if not self.same_type(value1_obj, value2_obj): 68 | raise ValueError("Operands must be the same type") 69 | value_div = value1 // value2 70 | result = Value("int") 71 | self.generateAsm(SymbolTable, "DIV", whileFlag) 72 | result.setValue(value_div) 73 | return result 74 | elif (self.value == "and"): 75 | if not self.same_type(value1_obj, value2_obj): 76 | raise ValueError("Operands must be the same type") 77 | value_and = value1 and value2 78 | self.generateAsm(SymbolTable, "AND", whileFlag) 79 | result = Value("boolean") 80 | result.setValue(value_and) 81 | return result 82 | elif (self.value == ">"): 83 | if not self.same_type(value1_obj, value2_obj): 84 | raise ValueError("Operands must be the same type") 85 | self.generateAsm(SymbolTable, "jg", whileFlag, nodeId) 86 | value_bigger = value1 > value2 87 | result = Value("boolean") 88 | result.setValue(value_bigger) 89 | return result 90 | elif (self.value == "<"): 91 | if not self.same_type(value1_obj, value2_obj): 92 | raise ValueError("Operands must be the same type") 93 | self.generateAsm(SymbolTable, "jl", whileFlag, nodeId) 94 | value_smaller = value1 < value2 95 | result = Value("boolean") 96 | result.setValue(value_smaller) 97 | return result 98 | elif (self.value == "="): 99 | if not self.same_type(value1_obj, value2_obj): 100 | raise ValueError("Operands must be the same type") 101 | self.generateAsm(SymbolTable, "je", whileFlag, nodeId) 102 | value_equal = value1 == value2 103 | result = Value("boolean") 104 | result.setValue(value_equal) 105 | return result 106 | elif (self.value == "!="): 107 | if not self.same_type(value1_obj, value2_obj): 108 | raise ValueError("Operands must be the same type") 109 | value_diff = value1 != value2 110 | result = Value("boolean") 111 | result.setValue(value_diff) 112 | return result 113 | else: 114 | return 115 | 116 | def generateAsm(self, SymbolTable, op, whileFlag, nodeId=None): 117 | if not whileFlag: 118 | asm = "" 119 | if op == "push": 120 | asm += "PUSH EBX \n" 121 | elif op == "pop": 122 | asm += "POP EAX \n" 123 | elif op == "jl": 124 | asm += "CMP EAX, EBX \n" 125 | asm += "CALL binop_jl \n" 126 | asm += "CMP EBX, False \n" 127 | asm += "JE EXIT_{0} \n".format(nodeId) 128 | elif op == "jg": 129 | asm += "CMP EAX, EBX \n" 130 | asm += "CALL binop_jg \n" 131 | asm += "CMP EBX, False \n" 132 | asm += "JE EXIT_{0} \n".format(nodeId) 133 | elif op == "je": 134 | asm += "CMP EAX, EBX \n" 135 | asm += "CALL binop_je \n" 136 | asm += "CMP EBX, False \n" 137 | asm += "JE EXIT_{0} \n".format(nodeId) 138 | elif op == "IMUL": 139 | asm += "IMUL EBX \n" 140 | asm += "MOV EBX, EAX \n" 141 | else: 142 | asm += "{0} EBX, EAX \n".format(op) 143 | AssemblyCode.assembly_code += asm 144 | 145 | 146 | class Assignment(Node): 147 | def Evaluate(self, SymbolTable, whileFlag=0): 148 | name = self.children[0].Evaluate(SymbolTable, whileFlag).getValue() 149 | value = self.children[1].Evaluate(SymbolTable, whileFlag).getValue() 150 | self.generateAsm(SymbolTable, whileFlag) 151 | SymbolTable.setSymbol(name, value) 152 | 153 | def generateAsm(self, SymbolTable, whileFlag): 154 | if not whileFlag: 155 | value1 = self.children[0].Evaluate( 156 | SymbolTable, whileFlag).getValue() 157 | value2 = SymbolTable.id 158 | asm = "MOV [{0}_{1}], EBX \n".format(value1, value2) 159 | AssemblyCode.assembly_code += asm 160 | 161 | 162 | class UnOp(Node): 163 | def Evaluate(self, SymbolTable, whileFlag=0): 164 | value_obj = self.children[0].Evaluate(SymbolTable, whileFlag) 165 | value = value_obj.getValue() 166 | if (self.value == "-"): 167 | result = Value("int") 168 | result.setValue(value * -1) 169 | return result 170 | elif (self.value == "not"): 171 | if value_obj.type == "boolean": 172 | result = Value("boolean") 173 | result.setValue(not value) 174 | return result 175 | else: 176 | raise ValueError("Operand must be a boolean") 177 | else: 178 | return 179 | 180 | 181 | class StrVal(Node): 182 | def Evaluate(self, SymbolTable, whileFlag=0): 183 | value = Value("string") 184 | value.setValue(self.value) 185 | return value 186 | 187 | 188 | class IntVal(Node): 189 | def Evaluate(self, SymbolTable, whileFlag=0): 190 | self.generateAsm(SymbolTable, whileFlag) 191 | value = Value("int") 192 | value.setValue(self.value) 193 | return value 194 | 195 | def generateAsm(self, SymbolTable, whileFlag): 196 | if not whileFlag: 197 | asm = "MOV EBX, {0} \n".format(self.value) 198 | AssemblyCode.assembly_code += asm 199 | 200 | 201 | class BoolVal(Node): 202 | def Evaluate(self, SymbolTable, whileFlag=0): 203 | self.generateAsm(SymbolTable, whileFlag) 204 | value = Value("boolean") 205 | value.setValue(self.value) 206 | return value 207 | 208 | def generateAsm(self, SymbolTable, whileFlag): 209 | if not whileFlag: 210 | asm = "MOV EBX, ${0} \n".format(self.value) 211 | AssemblyCode.assembly_code += asm 212 | 213 | 214 | class Identifier(Node): 215 | def Evaluate(self, SymbolTable, whileFlag=0): 216 | self.generateAsm(SymbolTable, whileFlag) 217 | value = SymbolTable.getSymbol(self.value) 218 | return value 219 | 220 | def generateAsm(self, SymbolTable, whileFlag): 221 | if not whileFlag: 222 | asm = "MOV EBX, [{0}_{1}] \n".format(self.value, SymbolTable.id) 223 | AssemblyCode.assembly_code += asm 224 | 225 | 226 | class NoOp(Node): 227 | def Evaluate(self, SymbolTable, whileFlag=0): 228 | return None 229 | 230 | 231 | class Statements(Node): 232 | def Evaluate(self, SymbolTable, whileFlag=0): 233 | for child in self.children: 234 | child.Evaluate(SymbolTable, whileFlag) 235 | 236 | 237 | class Print(Node): 238 | def Evaluate(self, SymbolTable, whileFlag=0): 239 | value = self.children[0].Evaluate(SymbolTable, whileFlag) 240 | self.generateAsm(SymbolTable, whileFlag) 241 | print(value.getValue()) 242 | 243 | def generateAsm(self, SymbolTable, whileFlag): 244 | if not whileFlag: 245 | asm = "PUSH EBX \n" 246 | asm += "CALL print \n" 247 | AssemblyCode.assembly_code += asm 248 | 249 | 250 | class Read(Node): 251 | def Evaluate(self, SymbolTable, whileFlag=0): 252 | result = input() 253 | value = Value("int") 254 | value.setValue(int(result)) 255 | return value 256 | 257 | 258 | class If(Node): 259 | def Evaluate(self, SymbolTable, whileFlag=0): 260 | comp = self.children[0].Evaluate(SymbolTable, whileFlag, self.id) 261 | if (comp.value): 262 | self.children[1].Evaluate(SymbolTable, whileFlag) 263 | else: 264 | self.children[2].Evaluate(SymbolTable, whileFlag) 265 | 266 | def generateAsm(self, SymbolTable): 267 | pass 268 | 269 | 270 | class While(Node): 271 | def Evaluate(self, SymbolTable, whileFlag=0): 272 | comp = self.children[0] 273 | self.generateAsm(SymbolTable, "LOOP", whileFlag, "declare") 274 | flag = 0 275 | while (comp.Evaluate(SymbolTable, flag, self.id).getValue()): 276 | self.children[1].Evaluate(SymbolTable, flag) 277 | flag = 1 278 | self.generateAsm(SymbolTable, "JMP LOOP", whileFlag, "jump") 279 | self.generateAsm(SymbolTable, "EXIT", whileFlag, "declare") 280 | 281 | def generateAsm(self, SymbolTable, label, whileFlag, op): 282 | if not whileFlag: 283 | if op == "declare": 284 | asm = "{0}_{1}: \n".format(label, self.id) 285 | AssemblyCode.assembly_code += asm 286 | else: 287 | asm = "{0}_{1} \n".format(label, self.id) 288 | AssemblyCode.assembly_code += asm 289 | 290 | 291 | class Program(Node): 292 | def Evaluate(self, SymbolTable, whileFlag=0): 293 | self.generateAssemblyConstants() 294 | SymbolTable.createSymbol(self.value, None) 295 | for i in range(len(self.children)): 296 | if i == 2: # Statements 297 | self.generateAsm(SymbolTable, whileFlag) 298 | self.children[i].Evaluate(SymbolTable, whileFlag) 299 | self.generateEndInterruption() 300 | 301 | def generateAsm(self, SymbolTable, whileFlag): 302 | if not whileFlag: 303 | asm = """ 304 | section .text 305 | global _start 306 | 307 | print: ; subrotina print 308 | POP EBX 309 | POP EAX 310 | PUSH EBX 311 | XOR ESI, ESI 312 | 313 | print_dec: 314 | MOV EDX, 0 315 | MOV EBX, 0x000A 316 | DIV EBX 317 | ADD EDX, '0' 318 | PUSH EDX 319 | INC ESI 320 | CMP EAX, 0 321 | JZ print_next 322 | JMP print_dec 323 | 324 | print_next: 325 | CMP ESI, 0 326 | JZ print_exit 327 | DEC ESI 328 | 329 | MOV EAX, SYS_WRITE 330 | MOV EBX, STDOUT 331 | 332 | POP ECX 333 | MOV [res], ECX 334 | MOV ECX, res 335 | 336 | MOV EDX, 1 337 | INT 0x80 338 | JMP print_next 339 | 340 | print_exit: 341 | RET 342 | 343 | ; subrotinas if/while 344 | binop_je: 345 | JE binop_true 346 | JMP binop_false 347 | 348 | binop_jg: 349 | JG binop_true 350 | JMP binop_false 351 | 352 | binop_jl: 353 | JL binop_true 354 | JMP binop_false 355 | 356 | binop_false: 357 | MOV EBX, False 358 | JMP binop_exit 359 | binop_true: 360 | MOV EBX, True 361 | binop_exit: 362 | RET 363 | 364 | """ 365 | asm += "_start: \n" 366 | AssemblyCode.assembly_code += asm 367 | 368 | def generateAssemblyConstants(self): 369 | asm = "" 370 | asm += "; constantes \n" 371 | asm += "SYS_EXIT equ 1 \n" 372 | asm += "SYS_READ equ 3 \n" 373 | asm += "SYS_WRITE equ 4 \n" 374 | asm += "STDIN equ 0 \n" 375 | asm += "STDOUT equ 1 \n" 376 | asm += "True equ 1 \n" 377 | asm += "False equ 0 \n" 378 | asm += "segment .data \n" 379 | AssemblyCode.assembly_code += asm 380 | 381 | def generateEndInterruption(self): 382 | asm = "MOV EAX, 1 \n" 383 | asm += "INT 0x80" 384 | AssemblyCode.assembly_code += asm 385 | 386 | 387 | class VarDec(Node): 388 | def Evaluate(self, SymbolTable, whileFlag=0): 389 | value1_obj = self.children[0].Evaluate(SymbolTable, whileFlag) 390 | value2_obj = self.children[1].Evaluate(SymbolTable, whileFlag) 391 | value1 = value1_obj.getValue() 392 | value2 = value2_obj.getValue() 393 | SymbolTable.createSymbol(value1, value2) 394 | self.generateAsm(SymbolTable, whileFlag) 395 | 396 | def generateAsm(self, SymbolTable, whileFlag): 397 | if not whileFlag: 398 | value1 = self.children[0].Evaluate( 399 | SymbolTable, whileFlag).getValue() 400 | asm = "{0}_{1} RESD 1 \n".format(value1, SymbolTable.id) 401 | AssemblyCode.assembly_code += asm 402 | 403 | 404 | class MultiVarDec(Node): 405 | def Evaluate(self, SymbolTable, whileFlag=0): 406 | self.generateAsm(SymbolTable, whileFlag, "init") 407 | for child in self.children: 408 | child.Evaluate(SymbolTable, whileFlag) 409 | self.generateAsm(SymbolTable, whileFlag, "res") 410 | 411 | def generateAsm(self, SymbolTable, whileFlag, op): 412 | if not whileFlag: 413 | if op == "init": 414 | asm = "segment .bss ; variaveis \n" 415 | AssemblyCode.assembly_code += asm 416 | else: 417 | asm = "res RESB 1" 418 | AssemblyCode.assembly_code += asm 419 | 420 | 421 | class FuncDec(Node): 422 | def Evaluate(self, SymbolTable, whileFlag=0): 423 | SymbolTable.createSymbol(self.value, "func") 424 | SymbolTable.setSymbol(self.value, self) 425 | 426 | 427 | class Funcs(Node): 428 | def Evaluate(self, SymbolTable, whileFlag=0): 429 | for func in self.children: 430 | func.Evaluate(SymbolTable, whileFlag) 431 | 432 | 433 | class FuncCall(Node): 434 | def Evaluate(self, SymbolTable, whileFlag=0): 435 | func_name = self.value 436 | func_node = SymbolTable.getSymbol(func_name, "func").getValue() 437 | funcSymbolTable = SymbolTableClass(SymbolTable) 438 | var_dec = func_node.children[0] 439 | args = [x.children[0] for x in var_dec.children] 440 | func_node.children[0].Evaluate(funcSymbolTable, whileFlag) 441 | if (len(args) != len(self.children)): 442 | raise ValueError("Number of arguments must \ 443 | be the same as declaration") 444 | for i in range(len(args)): 445 | symbol = args[i].Evaluate(funcSymbolTable, whileFlag).getValue() 446 | symbol_type = funcSymbolTable.getSymbol(symbol).getType() 447 | value_obj = self.children[i].Evaluate(SymbolTable, whileFlag) 448 | if (symbol_type != value_obj.getType()): 449 | raise ValueError("Function argument must be \ 450 | the same as declared") 451 | value = value_obj.getValue() 452 | funcSymbolTable.setSymbol(symbol, value) 453 | for i in range(1, len(func_node.children)): 454 | func_node.children[i].Evaluate(funcSymbolTable, whileFlag) 455 | result = funcSymbolTable.getSymbol(func_name) 456 | return result 457 | -------------------------------------------------------------------------------- /src/parser.py: -------------------------------------------------------------------------------- 1 | from tokenizer import Tokenizer 2 | from node import (BinOp, UnOp, IntVal, NoOp, Print, 3 | Identifier, Statements, StrVal, If, 4 | While, Read, Program, MultiVarDec, BoolVal, 5 | FuncDec, Funcs, FuncCall, VarDec, Assignment) 6 | 7 | 8 | class Parser(): 9 | def __init__(self, origin): 10 | self.tokens = Tokenizer(origin) 11 | self.tokens.selectNext() 12 | 13 | def parseProgram(self): 14 | token = self.tokens.actual 15 | if token.type == "program": 16 | token = self.tokens.selectNext() 17 | if token.type == "IDE": 18 | name_program = token.value 19 | token = self.tokens.selectNext() 20 | if token.type == "SEMI_COLON": 21 | self.tokens.selectNext() 22 | variables = self.parseVariables() 23 | functions = self.parseFunctions() 24 | statements = self.parseStatements() 25 | result = Program(name_program, 26 | [variables, functions, statements]) 27 | token = self.tokens.actual 28 | if token.type == "END_PROGRAM": 29 | pass 30 | else: 31 | raise ValueError("Invalid token, expecting a . on position \ 32 | {}".format(self.tokens.position)) 33 | else: 34 | raise ValueError("Invalid token, expecting a semi colon \ 35 | or a end on position {}".format(self.tokens.position)) 36 | else: 37 | raise ValueError("Invalid token, expecting a program on \ 38 | position {}".format(self.tokens.position)) 39 | return result 40 | 41 | def parseFunctionCall(self): 42 | pass 43 | 44 | def parseFunctions(self): 45 | token = self.tokens.actual 46 | result = Funcs(None, []) 47 | while True: 48 | if token.type == "function": 49 | token = self.tokens.selectNext() 50 | if token.type == "IDE": 51 | function_name = token.value 52 | func = FuncDec(function_name, []) 53 | self.tokens.selectNext() 54 | arguments = self.parseArgumentsFunction(function_name) 55 | self.tokens.selectNext() 56 | variables = self.parseVariables() 57 | functions = self.parseFunctions() 58 | statements = self.parseStatements() 59 | func.children.append(arguments) 60 | func.children.append(variables) 61 | func.children.append(functions) 62 | func.children.append(statements) 63 | result.children.append(func) 64 | token = self.tokens.actual 65 | else: 66 | raise ValueError("Invalid token, expecting a identifier on position \ 67 | {}".format(self.tokens.position)) 68 | elif token.type == "begin": 69 | return result 70 | else: 71 | raise ValueError("Invalid token, expecting a function on position \ 72 | {}".format(self.tokens.position)) 73 | 74 | def parseArgumentsFunction(self, function_name): 75 | token = self.tokens.actual 76 | if token.type == "OPEN_PAR": 77 | list_arguments = [] 78 | while True: 79 | token = self.tokens.selectNext() 80 | if token.type == "IDE": 81 | list_arguments.append(token.value) 82 | token = self.tokens.selectNext() 83 | if token.type == "VAR_DECLARATION": 84 | break 85 | elif token.type == "COMMA": 86 | pass 87 | else: 88 | raise ValueError("Invalid token, expecting a : or , on position \ 89 | {}".format(self.tokens.position)) 90 | else: 91 | raise ValueError("Invalid token, expecting a identifier on position \ 92 | {}".format(self.tokens.position)) 93 | token = self.tokens.selectNext() 94 | if token.type == "TYPE": 95 | arguments = MultiVarDec(None, []) 96 | for var_name in list_arguments: 97 | var_name = StrVal(var_name, []) 98 | value = StrVal(token.value, []) 99 | variable = BinOp(":", [var_name, value]) 100 | arguments.children.append(variable) 101 | token = self.tokens.selectNext() 102 | if token.type == "CLOSE_PAR": 103 | token = self.tokens.selectNext() 104 | if token.type == "VAR_DECLARATION": 105 | token = self.tokens.selectNext() 106 | if token.type == "TYPE": 107 | return_var_name = StrVal(function_name, []) 108 | return_type = StrVal(token.value, []) 109 | variable = BinOp(":", [return_var_name, 110 | return_type]) 111 | arguments.children.append(variable) 112 | token = self.tokens.selectNext() 113 | if token.type == "SEMI_COLON": 114 | return arguments 115 | else: 116 | raise ValueError("Invalid token, expecting a ; on position \ 117 | {}".format(self.tokens.position)) 118 | else: 119 | raise ValueError("Invalid token, expecting a type on position \ 120 | {}".format(self.tokens.position)) 121 | else: 122 | raise ValueError("Invalid token, expecting a : on position \ 123 | {}".format(self.tokens.position)) 124 | else: 125 | raise ValueError("Invalid token, expecting a ) on position \ 126 | {}".format(self.tokens.position)) 127 | else: 128 | raise ValueError("Invalid token, expecting a type on position \ 129 | {}".format(self.tokens.position)) 130 | else: 131 | raise ValueError("Invalid token, expecting a ( on position \ 132 | {}".format(self.tokens.position)) 133 | 134 | def parseVariables(self): 135 | token = self.tokens.actual 136 | result = MultiVarDec(None, []) 137 | if token.type != "begin": 138 | if token.type == "var": 139 | token = self.tokens.selectNext() 140 | while True: 141 | list_vars = [] 142 | while True: 143 | if token.type == "IDE": 144 | list_vars.append(token.value) 145 | token = self.tokens.selectNext() 146 | if token.type == "COMMA": 147 | token = self.tokens.selectNext() 148 | elif token.type == "VAR_DECLARATION": 149 | break 150 | else: 151 | raise ValueError("Invalid token, expecting a , or : on position \ 152 | {}".format(self.tokens.position)) 153 | else: 154 | raise ValueError("Invalid token, expecting a identifier on position \ 155 | {}".format(self.tokens.position)) 156 | token = self.tokens.selectNext() 157 | if token.type == "TYPE": 158 | for var_name in list_vars: 159 | var_name = StrVal(var_name, []) 160 | value = StrVal(token.value, []) 161 | variable = VarDec(None, [var_name, value]) 162 | result.children.append(variable) 163 | token = self.tokens.selectNext() 164 | if token.type == "SEMI_COLON": 165 | token = self.tokens.selectNext() 166 | if token.type == "begin": 167 | break 168 | elif token.type == "function": 169 | break 170 | elif token.type == "IDE": 171 | pass 172 | else: 173 | raise ValueError("Invalid token, expecting a begin \ 174 | or identifier on position {}" 175 | .format(self.tokens.position)) 176 | else: 177 | raise ValueError("Invalid token, expecting a ; on position \ 178 | {}".format(self.tokens.position)) 179 | else: 180 | raise ValueError("Invalid token, expecting a type on position \ 181 | {}".format(self.tokens.position)) 182 | else: 183 | raise ValueError("Invalid token, expecting a var on position \ 184 | {}".format(self.tokens.position)) 185 | return result 186 | 187 | def parseStatements(self): 188 | token = self.tokens.actual 189 | if token.type == "begin": 190 | result = Statements(None, []) 191 | while True: 192 | self.tokens.selectNext() 193 | result.children.append(self.parseStatement()) 194 | token = self.tokens.actual 195 | if token.type == "SEMI_COLON": 196 | pass 197 | elif token.type == "end": 198 | break 199 | if self.tokens.actual.type == "end": 200 | self.tokens.selectNext() 201 | pass 202 | else: 203 | raise ValueError("Invalid token, expecting a end on \ 204 | position {}".format(self.tokens.position)) 205 | else: 206 | raise ValueError("Invalid token, expecting a begin on \ 207 | position {}".format(self.tokens.position)) 208 | return result 209 | 210 | def parseStatement(self): 211 | token = self.tokens.actual 212 | if token.type == "begin": 213 | result = self.parseStatements() 214 | elif token.type == "IDE": 215 | result = self.parseAtribution() 216 | elif token.type == "print": 217 | result = self.parsePrint() 218 | elif token.type == "if": 219 | result = self.parseIf() 220 | elif token.type == "while": 221 | result = self.parseWhile() 222 | else: 223 | raise ValueError("Invalid token, expecting a begin,identifier, print, if or while \ 224 | on position {}".format(self.tokens.position)) 225 | return result 226 | 227 | def parseAtribution(self): 228 | value1 = StrVal(self.tokens.actual.value, []) 229 | token = self.tokens.selectNext() 230 | if (token.type == "ATRIBUTE"): 231 | token = self.tokens.selectNext() 232 | if (token.type == "read"): 233 | value2 = self.parseRead() 234 | else: 235 | value2 = self.parseExpression() 236 | result = Assignment(None, [value1, value2]) 237 | else: 238 | raise ValueError("Invalid token, expecting a := on position {}" 239 | .format(self.tokens.position)) 240 | return result 241 | 242 | def parsePrint(self): 243 | token = self.tokens.selectNext() 244 | if token.type == "OPEN_PAR": 245 | self.tokens.selectNext() 246 | value = self.parseExpression() 247 | token = self.tokens.actual 248 | if token.type == "CLOSE_PAR": 249 | result = Print(value, [value]) 250 | self.tokens.selectNext() 251 | else: 252 | raise ValueError("Invalid token, expecting a ) on position {}" 253 | .format(self.tokens.position)) 254 | else: 255 | raise ValueError("Invalid token, expecting a ( on position {}" 256 | .format(self.tokens.position)) 257 | return result 258 | 259 | def parseRelExpression(self): 260 | self.tokens.selectNext() 261 | value1 = self.parseExpression() 262 | token = self.tokens.actual 263 | if token.type == 'COMP': 264 | self.tokens.selectNext() 265 | value2 = self.parseExpression() 266 | result = BinOp(token.value, [value1, value2]) 267 | else: 268 | raise ValueError("Invalid token, expecting a <, >, = or != \ 269 | on position {}".format(self.tokens.position)) 270 | return result 271 | 272 | def parseIf(self): 273 | comp = self.parseRelExpression() 274 | token = self.tokens.actual 275 | if (token.type == "then"): 276 | self.tokens.selectNext() 277 | statement1 = self.parseStatement() 278 | token = self.tokens.actual 279 | if (token.type == "else"): 280 | self.tokens.selectNext() 281 | statement2 = self.parseStatement() 282 | else: 283 | statement2 = NoOp(None, []) 284 | result = If(None, [comp, statement1, statement2]) 285 | else: 286 | raise ValueError("Invalid token, expecting a then on \ 287 | position {}".format(self.tokens.position)) 288 | return result 289 | 290 | def parseRead(self): 291 | token = self.tokens.selectNext() 292 | if token.type == "OPEN_PAR": 293 | self.tokens.selectNext() 294 | token = self.tokens.actual 295 | if token.type == "CLOSE_PAR": 296 | result = Read(None, []) 297 | self.tokens.selectNext() 298 | else: 299 | raise ValueError("Invalid token, expecting a ) on position {}" 300 | .format(self.tokens.position)) 301 | else: 302 | raise ValueError("Invalid token, expecting a ( on position {}" 303 | .format(self.tokens.position)) 304 | return result 305 | 306 | def parseWhile(self): 307 | comp = self.parseRelExpression() 308 | token = self.tokens.actual 309 | if (token.type == "do"): 310 | self.tokens.selectNext() 311 | statement1 = self.parseStatement() 312 | token = self.tokens.actual 313 | result = While(None, [comp, statement1]) 314 | else: 315 | raise ValueError("Invalid token, expecting a do on \ 316 | position {}".format(self.tokens.position)) 317 | return result 318 | 319 | def parseExpression(self): 320 | result = self.parseTerm() 321 | while True: 322 | token = self.tokens.actual 323 | if token is None: 324 | break 325 | if token.type == "PLUS": 326 | self.tokens.selectNext() 327 | second_value = self.parseTerm() 328 | result = BinOp("+", [result, second_value]) 329 | elif token.type == "MINUS": 330 | self.tokens.selectNext() 331 | second_value = self.parseTerm() 332 | result = BinOp("-", [result, second_value]) 333 | elif token.type == "or": 334 | self.tokens.selectNext() 335 | second_value = self.parseTerm() 336 | result = BinOp("or", [result, second_value]) 337 | else: 338 | break 339 | return result 340 | 341 | def parseTerm(self): 342 | result = self.parseFactor() 343 | while True: 344 | token = self.tokens.actual 345 | if token is None: 346 | break 347 | elif token.type == "MULT": 348 | self.tokens.selectNext() 349 | second_value = self.parseFactor() 350 | result = BinOp("*", [result, second_value]) 351 | elif token.type == "DIV": 352 | self.tokens.selectNext() 353 | second_value = self.parseFactor() 354 | result = BinOp("/", [result, second_value]) 355 | elif token.type == "and": 356 | self.tokens.selectNext() 357 | second_value = self.parseFactor() 358 | result = BinOp("and", [result, second_value]) 359 | else: 360 | break 361 | return result 362 | 363 | def parseFactor(self): 364 | token = self.tokens.actual 365 | if token is None: 366 | raise ValueError("Invalid token, expecting a number or opening parentesis on \ 367 | position {}, got NULL".format(self.tokens.position)) 368 | if token.type == "int": 369 | result = IntVal(token.value, []) 370 | self.tokens.selectNext() 371 | elif token.type == "boolean": 372 | result = BoolVal(token.value, []) 373 | self.tokens.selectNext() 374 | elif token.type == "OPEN_PAR": 375 | self.tokens.selectNext() 376 | result = self.parseExpression() 377 | token = self.tokens.actual 378 | if token.type != "CLOSE_PAR": 379 | raise ValueError("Invalid token, missing parentesis close on \ 380 | position {}".format(self.tokens.position)) 381 | elif token.type == "MINUS": 382 | self.tokens.selectNext() 383 | result = self.parseFactor() 384 | result = UnOp("-", [result]) 385 | elif token.type == "not": 386 | self.tokens.selectNext() 387 | result = self.parseFactor() 388 | result = UnOp("not", [result]) 389 | elif token.type == "PLUS": 390 | self.tokens.selectNext() 391 | result = self.parseFactor() 392 | elif token.type == "IDE": 393 | identifier = token.value 394 | token = self.tokens.selectNext() 395 | if token.type == "OPEN_PAR": 396 | token = self.tokens.selectNext() 397 | args = [] 398 | while True: 399 | if token.type == "CLOSE_PAR": 400 | break 401 | else: 402 | arg = self.parseExpression() 403 | args.append(arg) 404 | token = self.tokens.actual 405 | if token.type == "COMMA": 406 | self.tokens.selectNext() 407 | pass 408 | elif token.type == "CLOSE_PAR": 409 | break 410 | else: 411 | raise ValueError("Invalid token, expecting a , or ) on \ 412 | position {}".format(self.tokens.position)) 413 | none_value = IntVal(None, []) 414 | args.append(none_value) 415 | result = FuncCall(identifier, args) 416 | self.tokens.selectNext() 417 | else: 418 | result = Identifier(identifier, []) 419 | else: 420 | raise ValueError("Invalid token, expecting number or opening parentesis on \ 421 | position {}".format(self.tokens.position)) 422 | return result 423 | --------------------------------------------------------------------------------