├── lib ├── __init__.py ├── errors.py ├── datatypes.py ├── scanner.py ├── codegenerator.py └── parser.py ├── tests ├── simpleadd_good.src ├── scopetest_good.src ├── recursiontest_good.src ├── globaltest_good.src ├── looptest_good.src ├── bigtest_good.src ├── codegen_good.src ├── bigtest_bad.src └── runtime_good.src ├── compiler.py └── README.md /lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/simpleadd_good.src: -------------------------------------------------------------------------------- 1 | program simpleadd is 2 | 3 | integer int1; 4 | integer int2; 5 | integer result; 6 | 7 | begin 8 | 9 | // Get the first input 10 | putString("Enter 1st Integer:"); 11 | getInteger(int1); 12 | 13 | // Get the second input 14 | putString("Enter 2nd Integer:"); 15 | getInteger(int2); 16 | 17 | // Calculate the sum 18 | result := int1 + int2; 19 | 20 | putString("Result:"); 21 | putInteger(result); 22 | 23 | end program 24 | -------------------------------------------------------------------------------- /tests/scopetest_good.src: -------------------------------------------------------------------------------- 1 | // SCOPE TEST PROGRAM 2 | program scope_test is 3 | integer add1; 4 | integer add2; 5 | integer sum; 6 | 7 | procedure add(integer add1 in, integer add2 in, integer sum out) 8 | begin 9 | sum := add1 + add2; 10 | return; 11 | end procedure; 12 | 13 | begin 14 | 15 | add1 := 1; 16 | add2 := 5; 17 | 18 | add(add1, add2, sum); 19 | 20 | if (sum == 6) then 21 | putString("SUCCESS"); 22 | else 23 | putString("FAILURE"); 24 | end if; 25 | 26 | return; 27 | 28 | end program 29 | -------------------------------------------------------------------------------- /tests/recursiontest_good.src: -------------------------------------------------------------------------------- 1 | program recursiontest is 2 | 3 | integer start_val; 4 | global integer end_val; 5 | 6 | procedure count_to_ten(integer current_val in) 7 | begin 8 | if (current_val < 10) then 9 | count_to_ten(current_val + 1); 10 | else 11 | end_val := current_val; 12 | end if; 13 | end procedure; 14 | 15 | begin 16 | 17 | start_val := 1; 18 | end_val := 0; 19 | 20 | count_to_ten(start_val); 21 | 22 | if (end_val == 10) then 23 | putString("SUCCESS"); 24 | else 25 | putString("FAILURE"); 26 | end if; 27 | 28 | end program 29 | -------------------------------------------------------------------------------- /tests/globaltest_good.src: -------------------------------------------------------------------------------- 1 | // SCOPE TEST PROGRAM 2 | program scope_test is 3 | global integer my_global_int; 4 | global integer result; 5 | 6 | global procedure increment_global() 7 | begin 8 | result := my_global_int + 1; 9 | end procedure; 10 | 11 | procedure calls_increment_global() 12 | begin 13 | // See if we can call a global procedure from this scope 14 | increment_global(); 15 | end procedure; 16 | 17 | begin 18 | my_global_int := 9; 19 | result := 0; 20 | 21 | calls_increment_global(); 22 | 23 | if (result == 10) then 24 | putString("SUCCESS"); 25 | else 26 | putString("FAILURE"); 27 | end if; 28 | 29 | return; 30 | 31 | end program 32 | -------------------------------------------------------------------------------- /tests/looptest_good.src: -------------------------------------------------------------------------------- 1 | program looptest is 2 | 3 | integer result; 4 | global integer num_loops; 5 | 6 | procedure do_loops(integer result out) 7 | integer counter; 8 | begin 9 | counter := 0; 10 | 11 | putString("Number of Loops:"); 12 | putInteger(num_loops); 13 | 14 | for (counter := counter + 1; counter <= num_loops) 15 | putString("Current Counter:"); 16 | putInteger(counter); 17 | result := counter; 18 | end for; 19 | end procedure; 20 | 21 | begin 22 | 23 | result := 0; 24 | num_loops := 10; 25 | 26 | do_loops(result); 27 | 28 | putString("Expect 10"); 29 | 30 | if (result == 10) then 31 | putString("SUCCESS"); 32 | else 33 | putString("FAILURE"); 34 | end if; 35 | 36 | end program 37 | -------------------------------------------------------------------------------- /lib/errors.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | 4 | class ParserError(Exception): 5 | """ParserError class 6 | 7 | The base error class for all other parsing errors. This should be caught 8 | at resync points. 9 | """ 10 | pass 11 | 12 | 13 | class ParserSyntaxError(ParserError): 14 | """ParserSyntaxError class 15 | 16 | Thrown when a syntax error occurs in the parser. 17 | """ 18 | pass 19 | 20 | 21 | class ParserNameError(ParserError): 22 | """ParserNameError class 23 | 24 | Thrown when a name error occurs in the parser. 25 | """ 26 | pass 27 | 28 | 29 | class ParserTypeError(ParserError): 30 | """ParserTypeError class 31 | 32 | Thrown when a type error occurs in the parser. 33 | """ 34 | pass 35 | 36 | 37 | class ParserRuntimeError(ParserError): 38 | """ParserRuntimeError class 39 | 40 | Thrown when a runtime error occurs in the parser. 41 | """ 42 | pass 43 | -------------------------------------------------------------------------------- /tests/bigtest_good.src: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // File: parser_test.src 3 | // Author: Evan Sneath 4 | ////////////////////////////////////////////////////////////////////////////// 5 | 6 | // This is a test of the parsing component of the compiler. 7 | 8 | program test_program is 9 | 10 | integer i; 11 | global integer one_int; 12 | integer two_int; 13 | integer red_int; 14 | integer blue_int; 15 | integer mult_of_two; 16 | integer test_array[15]; 17 | 18 | string test_string; 19 | 20 | bool isGreaterThan; 21 | 22 | procedure test_proc(integer my_int in) 23 | string two_param; 24 | integer assigned_int; 25 | begin 26 | two_param := "Hello, World 2"; 27 | assigned_int := my_int; 28 | 29 | return; 30 | end procedure; 31 | 32 | begin 33 | 34 | test_string := "Hello, World 1"; 35 | 36 | one_int := 1; 37 | two_int := 2; 38 | red_int := 3 + 4.; 39 | mult_of_two := 1; 40 | 41 | test_proc(red_int); 42 | 43 | // Test some expressions 44 | blue_int := (((((one_int * two_int[one_int]) + red_int)))); 45 | 46 | isGreaterThan := false; 47 | 48 | // Testing 'if' statements 49 | if ((blue_int * 2) >= -1) then 50 | isGreaterThan := true; 51 | end if; 52 | 53 | // Testing 'for' statements 54 | for (i := 0; i <= blue_int) 55 | i := i + 1.; 56 | mult_of_two := i * 2; 57 | end for; 58 | 59 | return; 60 | 61 | end program 62 | -------------------------------------------------------------------------------- /tests/codegen_good.src: -------------------------------------------------------------------------------- 1 | program codegen_test is 2 | 3 | integer count; 4 | 5 | integer add1; 6 | integer add2; 7 | 8 | float float1; 9 | float float2; 10 | 11 | float result1; 12 | 13 | string hello; 14 | 15 | integer sum[2]; 16 | integer result; 17 | 18 | // Test a procedure declaration. Will be called later 19 | procedure increment (integer val in, integer result out) 20 | integer tmp1; 21 | begin 22 | tmp1 := 1; 23 | result := tmp1 + val; 24 | end procedure; 25 | 26 | begin 27 | 28 | result := 0; 29 | increment(4 + 7 + 3, result); 30 | 31 | if (4 + 7 + 3 + 1 == result) then 32 | putString("SUCCESS"); 33 | else 34 | putString("FAILURE"); 35 | end if; 36 | 37 | add1 := 1; 38 | add2 := 5; 39 | 40 | float1 := 1.1; 41 | float2 := 2.; 42 | 43 | result1 := float1 + float2; 44 | result1 := float1 * add1; 45 | 46 | sum[0] := 1 + 5; 47 | sum[1] := add1 + add2; 48 | 49 | if (sum[0] == sum[1]) then 50 | result := 1; 51 | else 52 | result := 0; 53 | end if; 54 | 55 | if (result == 1) then 56 | putString("SUCCESS"); 57 | else 58 | putString("FAILURE"); 59 | end if; 60 | 61 | // Set 'count' to 10 the long way 62 | count := 0; 63 | for (count := count; count < 10) 64 | count := count + 1; 65 | end for; 66 | 67 | if (count == 10) then 68 | putString("SUCCESS"); 69 | else 70 | putString("FAILURE"); 71 | end if; 72 | end program 73 | -------------------------------------------------------------------------------- /tests/bigtest_bad.src: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // File: bigtest_bad.src 3 | // Author: Evan Sneath 4 | // Description: This is a test of the parsing component of the compiler. 5 | // Errors: 4 errors should be raised in this program. 6 | ////////////////////////////////////////////////////////////////////////////// 7 | 8 | program test_program is 9 | 10 | integer i; 11 | global integer one_int; 12 | integer two_int; 13 | integer red_int; 14 | integer blue_int; 15 | integer mult_of_two[1]; 16 | integer test_array[15]; 17 | 18 | // ERROR 1: Multiple definitions of two_int 19 | integer two_int; 20 | 21 | string test_string; 22 | 23 | bool isGreaterThan; 24 | 25 | procedure test_proc(integer my_int in) 26 | string two_param; 27 | integer assigned_int; 28 | begin 29 | two_param := "Hello, World"; 30 | assigned_int := my_int; 31 | 32 | return; 33 | end procedure; 34 | 35 | begin 36 | 37 | test_string := "Hello, World"; 38 | 39 | // ERROR 2: Variable used for function call 40 | one_int(); 41 | 42 | one_int := 1; 43 | two_int := 2; 44 | red_int := 3 + 4.; 45 | mult_of_two := 1; 46 | 47 | test_proc(red_int); 48 | 49 | // ERROR 3: Variable this_doesnt_exist never declared 50 | this_doesnt_exist := 42; 51 | 52 | // ERROR 4: Assigning to a procedure, not a variable 53 | test_proc := "This is not ok"; 54 | 55 | // Test some expressions 56 | blue_int := (((((one_int * two_int[one_int]) + red_int)))); 57 | 58 | isGreaterThan := false; 59 | 60 | // Testing 'if' statements 61 | if ((blue_int * 2) >= -1) then 62 | isGreaterThan := true; 63 | end if; 64 | 65 | // Testing 'for' statements 66 | for (i := 0; i <= blue_int) 67 | i := i + 1.; 68 | mult_of_two := i * 2; 69 | end for; 70 | 71 | return; 72 | 73 | end program 74 | -------------------------------------------------------------------------------- /tests/runtime_good.src: -------------------------------------------------------------------------------- 1 | program codegen_test is 2 | 3 | integer testint; 4 | 5 | string teststring; 6 | string teststring2; 7 | string teststring3; 8 | 9 | bool testbool; 10 | float testfloat; 11 | 12 | procedure increment (integer val in, integer result out) 13 | begin 14 | result := val + 1; 15 | end procedure; 16 | 17 | begin 18 | 19 | //////////////////////////////////////////// 20 | // INTEGER TEST 21 | 22 | //testint := 0; 23 | //increment(41, testint); 24 | 25 | //putString("41 plus 1 is..."); 26 | //putInteger(testint); 27 | 28 | //putString("Enter an integer"); 29 | //getInteger(testint); 30 | //putString("You entered..."); 31 | //putInteger(testint); 32 | 33 | //////////////////////////////////////////// 34 | // BOOL TEST 35 | 36 | //putString("Enter a boolean value 0 or 1"); 37 | //getBool(testbool); 38 | //putString("You entered..."); 39 | //putBool(testbool); 40 | 41 | //////////////////////////////////////////// 42 | // FLOAT TEST 43 | 44 | //testfloat := 4.5; 45 | //putFloat(testfloat); 46 | //putFloat(1414.1414); 47 | 48 | //putString("Enter a float"); 49 | //getFloat(testfloat); 50 | //putString("You entered..."); 51 | //putFloat(testfloat); 52 | 53 | //////////////////////////////////////////// 54 | // STRING TEST 55 | 56 | putString("Enter a string"); 57 | getString(teststring); 58 | putString("You entered..."); 59 | putString(teststring); 60 | 61 | putString("Enter a string"); 62 | getString(teststring2); 63 | putString("You entered..."); 64 | putString(teststring2); 65 | 66 | putString("Enter a string"); 67 | getString(teststring3); 68 | putString("You entered..."); 69 | putString(teststring3); 70 | 71 | putString("This is the first string"); 72 | putString(teststring); 73 | 74 | putString("This is the second string"); 75 | putString(teststring2); 76 | 77 | putString("This is the third string"); 78 | putString(teststring3); 79 | end program 80 | -------------------------------------------------------------------------------- /compiler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """Compiler module 4 | 5 | Acts as the command line interface to the compiler components. When given a 6 | source file, the compilation process will be executed. 7 | 8 | Author: Evan Sneath 9 | License: Open Software License v3.0 10 | 11 | Functions: 12 | parse_arguments: Parses incoming command line arguments. 13 | run_compiler: Executes the complete compilation process. 14 | """ 15 | 16 | # Import standard libraries 17 | import argparse 18 | import subprocess 19 | import sys 20 | 21 | # Import custom compiler libraries 22 | from lib.parser import Parser 23 | 24 | 25 | def parse_arguments(): 26 | """Parse Arguments 27 | 28 | Parses all command line arguments for the compiler program. 29 | 30 | Returns: 31 | An object containing all expected command line arguments. 32 | """ 33 | # Parse the command line arguments 34 | parser = argparse.ArgumentParser() 35 | parser.add_argument('-d', '--debug', 36 | help='print comments in generated code', 37 | action='store_true') 38 | parser.add_argument('source', 39 | help='source file to compile') 40 | parser.add_argument('-o', '--out', 41 | help='target path for the compiled code', 42 | action='store', 43 | default='a.out') 44 | args = parser.parse_args() 45 | 46 | return args 47 | 48 | 49 | def run_compiler(source, target, debug=False): 50 | """Run Compiler 51 | 52 | Executes the compilation process given a source file path. 53 | 54 | Arguments: 55 | source: The source file to compile. 56 | target: The destination binary executable file. 57 | debug: If True, verbose parsing details are shown. (Default: False) 58 | 59 | Returns: 60 | True on success, False otherwise. 61 | """ 62 | # Define a temporary location for the intermediate C code 63 | TMP_CODE_FILE = './ir.c' 64 | 65 | # Create a Parser object to parse the inputted source file 66 | parser = Parser(debug) 67 | 68 | # Parse the source file to the temporary code file 69 | if not parser.parse(source, TMP_CODE_FILE): 70 | print('Error while parsing "%s"' % source) 71 | return False 72 | 73 | # Set up gcc compilation command 74 | gcc_cmd = ['gcc', '-m32', '-o', target, TMP_CODE_FILE] 75 | 76 | # Compile the temporary file with gcc. Output to the target location 77 | if subprocess.call(gcc_cmd) != 0: 78 | print('Error while compiling "%s"' % target) 79 | return False 80 | 81 | return True 82 | 83 | 84 | if __name__ == '__main__': 85 | # Parse compiler arguments 86 | args = parse_arguments() 87 | 88 | # Run compilation process 89 | result = run_compiler(args.source, args.out, debug=args.debug) 90 | 91 | # Terminate program 92 | sys.exit(not result) 93 | -------------------------------------------------------------------------------- /lib/datatypes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """Types module 4 | 5 | Provides data structures necessary for identifier tracking and handling in the 6 | compilation process as well as tokenizing. 7 | 8 | Author: Evan Sneath 9 | License: Open Software License v3.0 10 | 11 | Classes: 12 | Token: A named tuple object containing token information. 13 | Identifier: A named tuple object containing identifier information. 14 | Parameter: A named tuple object containing procedure param information. 15 | IdentifierTable: Extends the list type to provide ID table functionality. 16 | """ 17 | 18 | from lib.errors import ParserNameError 19 | from collections import namedtuple 20 | 21 | 22 | """Token class 23 | 24 | A named tuple object factory containing token information. 25 | 26 | Attributes: 27 | type: The data type of the token to be stored. 28 | value: The value of the token being stored. 29 | line: The line number on which the token was encountered. 30 | """ 31 | Token = namedtuple('Token', ['type', 'value', 'line']) 32 | 33 | 34 | """Identifier class 35 | 36 | A named tuple object factory containing identifier information. 37 | 38 | Attributes: 39 | name: The identifier name. This acts as the dictionary key. 40 | type: The data type of the identifier. 41 | size: The number of elements of the identifier if a variable. 42 | If procedure, program, or non-array type, None is expected. 43 | params: A list of Parameter class objects describing procedure params. 44 | mm_ptr: A pointer to the location of the identifier in main memory. 45 | """ 46 | Identifier = namedtuple('Identifier', 47 | ['name', 'type', 'size', 'params', 'mm_ptr']) 48 | 49 | 50 | """Parameter class 51 | 52 | A named tuple object factory containing procedure parameter information. 53 | 54 | Attributes: 55 | id: The Identifier named tuple of the parameter. 56 | direction: The direction ('in' or 'out') of the parameter. 57 | """ 58 | Parameter = namedtuple('Parameter', ['id', 'direction']) 59 | 60 | 61 | class IdentifierTable(list): 62 | """IdentifierTable class 63 | 64 | Extends the List built-in type with all methods necessary for identifier 65 | table management during compilation. 66 | 67 | Methods: 68 | push_scope: Adds a new scope. 69 | pop_scope: Removes the highest scope. 70 | add: Adds a new identifier to the current or global scope. 71 | find: Determines if an identifier is in the current of global scope. 72 | get_id_location: Determines where the identifier exists in the scope. 73 | is_global: Determines if an identifier exists in the global scope. 74 | is_param: Determines if an identifier is a parameter of the scope. 75 | get_param_direction: Gets the direction of the parameter in the scope. 76 | get_current_scope_owner: Gets the program or procedure name from which 77 | the current scope was created. 78 | """ 79 | def __init__(self): 80 | super().__init__() 81 | 82 | # Create the global scope 83 | self.append({}) 84 | 85 | # Create a list of scope parent names (the owner of the scope) 86 | self._owner_ids = ['global'] 87 | 88 | return 89 | 90 | def push_scope(self, owner_id): 91 | """Push New Identifier Scope 92 | 93 | Creates a new scope on the identifiers table and increases the global 94 | current scope counter. 95 | 96 | Arguments: 97 | owner_id: The name of the identifier which has created this scope. 98 | """ 99 | # Create a brand new scope for the identifiers table 100 | self.append({}) 101 | 102 | # Save the owner of this scope for future lookup 103 | self._owner_ids.append(owner_id) 104 | 105 | return 106 | 107 | def pop_scope(self): 108 | """Pop Highest Identifier Scope 109 | 110 | Disposes of the current scope in the identifiers table and decrements 111 | the global current scope counter. 112 | """ 113 | # Remove this entire scope from the identifiers table 114 | self.pop(-1) 115 | 116 | # Remove the identifier from the owner list 117 | self._owner_ids.pop() 118 | 119 | return 120 | 121 | def add(self, identifier, is_global=False): 122 | """Add Identifier to Scope 123 | 124 | Adds a new identifier to either the current scope of global. 125 | 126 | Arguments: 127 | identifier: An Identifier named tuple object describing the new 128 | identifier to add to the table. 129 | is_global: Determines whether the identifier should be added to 130 | the current scope or the global scope. (Default: False) 131 | 132 | Raises: 133 | ParserNameError if the identifier has been declared at this scope. 134 | """ 135 | scope = -1 if not is_global else 0 136 | 137 | if is_global and len(self) > 2: 138 | raise ParserNameError('global name must be defined in program scope') 139 | 140 | if is_global and (identifier.name in self[0] or (len(self) > 1 and 141 | identifier.name in self[1])): 142 | raise ParserNameError('name already declared at this scope') 143 | 144 | if not is_global and identifier.name in self[-1]: 145 | raise ParserNameError('name already declared at this scope') 146 | 147 | self[scope][identifier.name] = identifier 148 | 149 | return 150 | 151 | def find(self, name): 152 | """Find Identifier in Scope 153 | 154 | Searches for the given identifier in the current and global scope. 155 | 156 | Arguments: 157 | name: The identifier name for which to search. 158 | 159 | Returns: 160 | An Identifier named tuple containing identifier name, type and size 161 | information if found in the current or global scopes. 162 | 163 | Raises: 164 | ParserNameError if the given identifier is not found in any valid scope. 165 | """ 166 | if name in self[-1]: 167 | identifier = self[-1][name] 168 | elif name in self[0]: 169 | identifier = self[0][name] 170 | else: 171 | raise ParserNameError() 172 | 173 | return identifier 174 | 175 | def get_id_location(self, name): 176 | """Get Identifier Location 177 | 178 | Determines the location of the identifier in the stack based on the 179 | identifier's place in the id table. 180 | 181 | Arguments: 182 | name: The identifier name for which to search. 183 | 184 | Returns: 185 | A string value for the location of the identifier in the stack. 186 | This may be 'global', 'param', or 'local'. 187 | """ 188 | if self.is_global(name): 189 | return 'global' 190 | elif self.is_param(name): 191 | return 'param' 192 | 193 | return 'local' 194 | 195 | def is_global(self, name): 196 | """Identifier is Global 197 | 198 | Determines if an identifier exists in the global scope. 199 | 200 | Arguments: 201 | name: The identifier name for which to search. 202 | 203 | Returns: 204 | True if the identifier exists in the global scope. False otherwise. 205 | """ 206 | return name in self[0] 207 | 208 | def is_param(self, name): 209 | """Identifier is Parameter 210 | 211 | Determines if an identifier is a parameter in the current scope. 212 | 213 | Arguments: 214 | name: The identifier name for which to search. 215 | 216 | Returns: 217 | True if the identifier is a scope parameter. False otherwise. 218 | """ 219 | owner = self.get_current_scope_owner() 220 | 221 | if owner == 'global' or not owner.params: 222 | return False 223 | 224 | for param in owner.params: 225 | if name == param.id.name: 226 | return True 227 | 228 | return False 229 | 230 | def get_param_direction(self, name): 231 | """Get Parameter Direction 232 | 233 | If the name given is a valid parameter of the scope, the direction 234 | ('in' or 'out') will be returned. 235 | 236 | Arguments: 237 | name: The identifier name for which to search. 238 | 239 | Returns: 240 | 'in' or 'out' depending on the parameter direction. None if the 241 | name given is not a valid parameter of the current scope. 242 | """ 243 | owner = self.get_current_scope_owner() 244 | 245 | if owner == 'global': 246 | return None 247 | 248 | for param in owner.params: 249 | if name == param.id.name: 250 | return param.direction 251 | 252 | return None 253 | 254 | def get_current_scope_owner(self): 255 | """Get Current Scope Owner 256 | 257 | Returns the Identifier object of the owner of the current scope. This 258 | owner will either be a 'program' or 'procedure' type. 259 | 260 | Returns: 261 | The Identifier object of the owner of the current scope. None if 262 | the current scope is the global scope. 263 | """ 264 | owner = self._owner_ids[-1] 265 | 266 | # If this is the global scope, return no owner 267 | return self[-1][self._owner_ids[-1]] if owner != 'global' else None 268 | -------------------------------------------------------------------------------- /lib/scanner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """Scanner module 4 | 5 | With any attached file, the Scanner class will scan the file token-by-token 6 | until an end-of-file is encountered. 7 | 8 | Author: Evan Sneath 9 | License: Open Software License v3.0 10 | 11 | Classes: 12 | Scanner: An implementation of a scanner for the source language. 13 | """ 14 | 15 | from os.path import isfile 16 | 17 | from lib.datatypes import Token 18 | 19 | 20 | class Scanner: 21 | """Scanner class 22 | 23 | This class implements a scanner object to scan a source code file in the 24 | compilation process. This class is designed to be inherited to be used 25 | during the parsing stage of the compiler. 26 | 27 | Attributes: 28 | keywords: A list of valid keywords in the language. 29 | symbols: A list of valid symbols in the language. 30 | 31 | Methods: 32 | attach_source: Binds a source file to the scanner to begin scanning. 33 | next_token: Returns the next token of the attached file. This token 34 | will be of the Token named tuple class. 35 | """ 36 | # Define all language keywords 37 | keywords = [ 38 | 'string', 'integer', 'bool', 'float', 'global', 'is', 'in', 'out', 39 | 'if', 'then', 'else', 'for', 'and', 'or', 'not', 'program', 40 | 'procedure', 'begin', 'return', 'end', 'true', 'false', 41 | ] 42 | 43 | # Define all language symbols 44 | symbols = [ 45 | ':', ';', ',', '+', '-', '*', '/', '(', ')', '<', '<=', '>', '>=', 46 | '!', '!=', '=', '==', ':=', '[', ']', '&', '|', 47 | ] 48 | 49 | def __init__(self): 50 | super().__init__() 51 | 52 | # Holds the file path of the attached source file 53 | self._src_path = '' 54 | 55 | # Holds all source file data (code) to be scanned 56 | self._src = '' 57 | 58 | # Holds the location of the next character to scan in the source file 59 | self._line_pos = 0 60 | self._char_pos = 0 61 | 62 | return 63 | 64 | def attach_source(self, src_path): 65 | """Attach Source 66 | 67 | Attach a source file to the scanner and prepare for token collection. 68 | 69 | Arguments: 70 | src_path: The path to the source file to scan. 71 | 72 | Returns: 73 | True on success, False otherwise. 74 | """ 75 | # Make sure the inputted file is a actual file 76 | if not isfile(src_path): 77 | print('Error: "%s"' % src_path) 78 | print(' Inputted path is not a file') 79 | return False 80 | 81 | # Try to read all data from the file and split by line 82 | try: 83 | with open(src_path) as f: 84 | keepends = True 85 | self._src = f.read().splitlines(keepends) 86 | except IOError: 87 | print('Error: "%s"' % src_path) 88 | print(' Could not read inputted file') 89 | return False 90 | 91 | # The file was attached and read successfully, store the path 92 | self._src_path = src_path 93 | 94 | return True 95 | 96 | def next_token(self): 97 | """Scan For Next Token 98 | 99 | Scans the source code for the next token. The next token is then 100 | returned for parsing. 101 | 102 | Returns: 103 | The next token object in the source code. 104 | """ 105 | # Get the first character, narrow down the data type possibilities 106 | char = self._next_word() 107 | 108 | if char is None: 109 | return Token('eof', None, self._line_pos) 110 | 111 | # Use the first character to choose the token type to expect 112 | if char == '"': 113 | value, token_type = self._expect_string() 114 | elif char.isdigit(): 115 | value, token_type = self._expect_number(char) 116 | elif char.isalpha(): 117 | value, token_type = self._expect_identifier(char) 118 | elif char in self.symbols: 119 | value, token_type = self._expect_symbol(char) 120 | else: 121 | # We've run across a character that shouldn't be here 122 | msg = 'Invalid character \'%s\' encountered' % char 123 | self._scan_warning(msg, hl=self._char_pos-1) 124 | 125 | # Run this function again until we find something good 126 | return self.next_token() 127 | 128 | if token_type == 'comment': 129 | # If we find a comment, get a token on the next line 130 | self._next_line() 131 | return self.next_token() 132 | 133 | # Build the new token object 134 | new_token = Token(token_type, value, self._line_pos+1) 135 | 136 | return new_token 137 | 138 | def _get_line(self, line_number): 139 | """Get Line (Protected) 140 | 141 | Returns a line stripped of leading and trailing whitespace given a 142 | line number. 143 | 144 | Arguments: 145 | line_number: The line number of the attached source file to print. 146 | 147 | Returns: 148 | The requested line number from the source, None on invalid line. 149 | """ 150 | if 0 < line_number <= len(self._src): 151 | return self._src[line_number-1].strip() 152 | 153 | def _scan_warning(self, msg, hl=-1): 154 | """Print Scanner Warning Message (Protected) 155 | 156 | Prints a formatted warning message. 157 | 158 | Arguments: 159 | msg: The warning message to display 160 | hl: If not -1, there will be an pointer (^) under a 161 | character in the line to be highlighted. (Default: -1) 162 | """ 163 | line = self._src[self._line_pos][0:-1] 164 | 165 | print('Warning: "', self._src_path, '", ', sep='', end='') 166 | print('line ', self._line_pos+1, sep='') 167 | print(' ', msg, '\n ', line.strip(), sep='') 168 | 169 | if hl != -1: 170 | left_spaces = line.find(line.strip()[0]) 171 | print(' %s^' % (' '*(abs(hl)-left_spaces))) 172 | 173 | return 174 | 175 | def _next_word(self): 176 | """Get Next Word Character (Protected) 177 | 178 | Move the cursor to the start of the next non-space character in the 179 | file. 180 | 181 | Returns: 182 | The first non-space character encountered. None if the end of 183 | file was reached. 184 | """ 185 | char = '' 186 | 187 | while True: 188 | char = self._src[self._line_pos][self._char_pos] 189 | 190 | # React according to spaces and newlines 191 | if char == '\n': 192 | if not self._next_line(): 193 | return None 194 | elif char in ' \t': 195 | self._char_pos += 1 196 | else: 197 | break 198 | 199 | # Increment to the next character 200 | self._char_pos += 1 201 | return char 202 | 203 | def _next_line(self): 204 | """Travel to Next Line (Protected) 205 | 206 | Move the cursor to the start of the next line safely. 207 | 208 | Returns: 209 | True on success, False if end of file is encountered 210 | """ 211 | self._line_pos += 1 212 | self._char_pos = 0 213 | 214 | # Check to make sure this isn't the end of file 215 | if self._line_pos == len(self._src): 216 | return False 217 | 218 | return True 219 | 220 | def _next_char(self, peek=False): 221 | """Get Next Character (Protected) 222 | 223 | Move the cursor to the next character in the file. 224 | 225 | Arguments: 226 | peek: If True, the character position pointer will not be 227 | incremented. Set by default to False. 228 | 229 | Returns: 230 | The next character encountered. None if the end of line 231 | was reached. 232 | """ 233 | # Get the next pointed character 234 | char = self._src[self._line_pos][self._char_pos] 235 | 236 | # Return None if we hit a line ending 237 | if char == '\n': 238 | return None 239 | 240 | # Increment to the next character 241 | if not peek: 242 | self._char_pos += 1 243 | 244 | return char 245 | 246 | def _expect_string(self): 247 | """Expect String Token (Protected) 248 | 249 | Parses the following characters in hope of a valid string. If an 250 | invalid string is encountered, all attempts are made to make it valid. 251 | 252 | Returns: 253 | (value, token_type) - A tuple describing the final parsed token. 254 | The resulting token type will be 'string'. 255 | """ 256 | hanging_quote = False 257 | 258 | # We know this is a string. Find the next quotation and return it 259 | string_end = self._src[self._line_pos].find('"', self._char_pos) 260 | 261 | # If we have a hanging quotation, assume quote ends at end of line 262 | if string_end == -1: 263 | hanging_quote = True 264 | string_end = len(self._src[self._line_pos]) - 1 265 | self._scan_warning('No closing quotation in string', hl=string_end) 266 | 267 | value = self._src[self._line_pos][self._char_pos:string_end] 268 | 269 | # Check for illegal characters, send a warning if encountered 270 | for i, char in enumerate(value): 271 | if not char.isalnum() and char not in ' _,;:.\'': 272 | value = value.replace(char, ' ', 1) 273 | msg = 'Invalid character \'%s\' in string' % char 274 | self._scan_warning(msg, hl=self._char_pos+i) 275 | 276 | self._char_pos += len(value) 277 | if not hanging_quote: 278 | self._char_pos += 1 279 | 280 | return value, 'string' 281 | 282 | def _expect_number(self, char): 283 | """Expect Number Token (Protected) 284 | 285 | Parses the following characters in hope of a valid integer or float. 286 | 287 | Arguments: 288 | char: The first character already picked for the value. 289 | 290 | Returns: 291 | (value, token_type) - A tuple describing the final parsed token. 292 | The resulting token type will either be 'int' indicating a valid 293 | integer or 'float' indicating a valid floating point value. 294 | """ 295 | value = '' + char 296 | token_type = 'integer' 297 | 298 | is_float = False 299 | 300 | while True: 301 | char = self._next_char(peek=True) 302 | 303 | if char is None: 304 | break 305 | elif char == '.' and not is_float: 306 | # We found a decimal point. Move to float mode 307 | is_float = True 308 | token_type = 'float' 309 | elif not char.isdigit() and char != '_': 310 | break 311 | 312 | value += char 313 | self._char_pos += 1 314 | 315 | # Remove all underscores in the int/float. These serve no purpose 316 | value = value.replace('_', '') 317 | 318 | # If nothing was given after the decimal point assume 0 319 | if is_float and value.split('.')[-1] == '': 320 | value += '0' 321 | 322 | return value, token_type 323 | 324 | def _expect_identifier(self, char): 325 | """Expect Identifier Token (Protected) 326 | 327 | Parses the following characters in hope of a valid identifier. 328 | 329 | Arguments: 330 | char: The first character already picked for the value. 331 | 332 | Returns: 333 | (value, token_type) - A tuple describing the final parsed token. 334 | The resulting token type will either be 'identifier' indicating a 335 | valid identifier or 'keyword' indicating a valid keyword. 336 | """ 337 | value = '' + char 338 | token_type = 'identifier' 339 | 340 | while True: 341 | char = self._next_char(peek=True) 342 | 343 | if char is None: 344 | break 345 | elif not char.isalnum() and char != '_': 346 | break 347 | 348 | value += char 349 | self._char_pos += 1 350 | 351 | if value in self.keywords: 352 | token_type = 'keyword' 353 | 354 | return value, token_type 355 | 356 | def _expect_symbol(self, char): 357 | """Expect Symbol Token (Protected) 358 | 359 | Parses the following characters in hope of a valid symbol. 360 | 361 | Arguments: 362 | char: The first character already picked for the value. 363 | 364 | Returns: 365 | (value, token_type) - A tuple describing the final parsed token. 366 | The resulting token type will either be 'symbol' indicating a 367 | valid identifier or 'comment' indicating a comment until line end. 368 | """ 369 | value = '' + char 370 | 371 | while True: 372 | char = self._next_char(peek=True) 373 | 374 | if char is None: 375 | break 376 | elif value + str(char) == '//': 377 | return None, 'comment' 378 | elif value + str(char) not in self.symbols: 379 | break 380 | 381 | value += char 382 | self._char_pos += 1 383 | 384 | return value, 'symbol' 385 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Compiler 2 | ======== 3 | 4 | ## Description 5 | A single-pass, recursive decent `LL(1)` compiler written by hand for a made-up. 6 | language. This compiler is written entirely in Python 3 and uses the `gcc` 7 | compiler to finish compilation of the generated intermediate C representation. 8 | 9 | ## Author 10 | Created by [Evan Sneath](http://github.com/evansneath). 11 | 12 | ## License 13 | This software licensed under the 14 | [Open Software License v3.0](http://www.opensource.org/licenses/OSL-3.0). 15 | 16 | ## Dependencies 17 | In order to run, this software requires the following dependencies: 18 | 19 | * [Python 3](http://python.org/download/releases/3.3.2/) 20 | 21 | ## Progress 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 |
ComponentStatus
ScanningCompleted
ParsingCompleted
Type CheckingCompleted
Code GenerationCompleted
RuntimeCompleted
31 | 32 | ## Usage 33 | ``` 34 | usage: compiler.py [-h] [-d] [-o OUT] source 35 | 36 | positional arguments: 37 | source source file to compile 38 | 39 | optional arguments: 40 | -h, --help show this help message and exit 41 | -d, --debug print comments in generated code 42 | -o OUT, --out OUT target path for the compiled code 43 | ``` 44 | 45 | The compiler will scan the source file for all valid tokens and 46 | parse the language grammar. All scanner, parser, and type errors will be 47 | outputted as they are encountered. Generated code is then outputted to `ir.c` 48 | where it is then run through the `gcc` compiler. The default output file 49 | generated by the compiler is `a.out` in the working directory. The `-o` 50 | argument may be used to modify the output file name. 51 | 52 | The `tests/` directory contains test source files which have several examples 53 | of token scanning with error/warning handling, grammar parsing, code 54 | generation, and runtime libraries. 55 | 56 | ## Implementation Details 57 | 58 | ### Software 59 | 60 | In determining the implementation language, robustness was chosen over speed as 61 | the deciding factor for the compiler. Python 3 was selected because ease of 62 | use, access to simple dictionary and table libraries, and my own familiarity 63 | with the language. 64 | 65 | As I progressed through the parser stage of the compiler, it became clear that 66 | the simple exception raising and handling would be useful for displaying 67 | compiler errors and trapping at resync points to continue syntax parsing. 68 | 69 | ### Structure 70 | 71 | For the sake of modularity and ease of debugging, the program is structured in 72 | a hierarchical fashion. 73 | 74 | `compiler.py` acts as the engine responsible for parsing of 75 | command-line arguments, calling the code parser, and completing the build using 76 | the `gcc` compiler with the appropriate arguments once the intermediate C code 77 | is generated. 78 | 79 | `parser.py` and the `Parser` class is the entry-point for the action of 80 | compiling the valid input file. In order to do this, `Parser` inherits the 81 | `Scanner` class (in `scanner.py`) and the `CodeGenerator` class 82 | (in `codegenerator.py`) to allow for simple access to their class methods and 83 | objects. The `datatypes.py` and `errors.py` source files containing several 84 | data types and exception classes respectively which are used in the various 85 | components of the compiler. 86 | 87 | ### Scanning 88 | 89 | The implementation of the language scanner first tackles the problem of source 90 | code parsing by splitting the source code into a list of distinct lines. Not 91 | only does this allow for easier ways to determine end of line and end of file, 92 | but also makes the operation of retrieving line numbers simple for purposes of 93 | warning and error messages. 94 | 95 | At the start of each non-whitespace character, the first character is used to 96 | determine the type of the token to expect. The token is returned if the type is 97 | matched without issue. Otherwise, a scanner warning is thrown. 98 | 99 | The scanner warnings are never fatal, though syntactically the tokens returned 100 | may cause a parser error. My methodology behind the scanner was to try to 101 | correct as many lexical errors as possible. For instance, if a string literal 102 | has no end quote a warning will be thrown and a quote will be assumed at the 103 | end of the line. 104 | 105 | ### Parsing 106 | 107 | In order to eliminate loops caused by recursive grammar, any left-recursion in 108 | the language grammar was rewritten. 109 | 110 | Type-checking is performed in expressions by returning the types from the 111 | expression tree functions and evaluating types for compatibility if an 112 | operation is performed. There are many other locations were type-checking is 113 | performed in the compiler other than expressions. 114 | 115 | Parser resync points are used throughout the compiler to continue parsing if 116 | an error is encountered without propagating spurious error messages. Exception 117 | handling in Python is used to elegantly handle resyncing. Once a parser error 118 | is encountered in a statement or declaration, an exception is raised. This 119 | exception is then handled at the starting point of statement or declaration 120 | parsing and the parsing will continue to the next statement or declaration. 121 | 122 | Note that once a fatal error or any kind is encountered, code will no longer 123 | be generated. 124 | 125 | ### Code Generation 126 | 127 | Memory and registers for the operation of the program are defined and used as 128 | 32-bit integer arrays. This allows for simple addressing of memory and register 129 | space. All non-integer types present in the program are cast as integers for 130 | storage in the memory spaces. In the case of string storage, memory spaces hold 131 | a 32-bit pointer to the start of the string in either the heap (this will be 132 | covered later) or a literal value. To ensure that pointers are 32-bit and may 133 | be cast to integer without issue, the `gcc` compiler flag `-m32` is used. 134 | 135 | A fixed number of available register locations are allocated for use. These are 136 | used incrementally and are not reused or reallocated. For this reason, a large 137 | number of registers are required so that register space is always available. 138 | Future improvements could be made to "push back" register allocation to the 139 | first register (`R[0]`) at the end of each scope. At the end of a scope, it can 140 | be assumed that the same register will not be referenced again. 141 | 142 | The main memory structure of the program is divided into the stack and heap. 143 | The stack begins are the high memory address and is maintained using both a 144 | stack and frame pointer. The frame pointer (pointing to the scope's return 145 | address) provides a way to easily smash local stack variables when leaving the 146 | scope. All global variables may only be declared in the program scope and are 147 | referenced using the offset from the top of main memory. 148 | 149 | The heap in main memory is used only to allocate space for strings during 150 | runtime. This is accomplished using a heap pointer pointing to the next unused 151 | memory location in the heap. As the `getString()` procedure is called, the 152 | string retrieved from `stdin` is moved to the heap and the variable 153 | referencing that string is modified to point to the newly allocated heap 154 | location. 155 | 156 | Memory is arranged in the following manner: 157 | 158 | ``` 159 | MAIN MEMORY 160 | -- .-------------------. 161 | P | RETURN ADDR | <== MM_END (MM_SIZE - 1) 162 | R | ----------------- | 163 | O | LOCAL/GLOBAL VARS | 164 | G | . | 165 | R | . | 166 | A | . | 167 | M | . | 168 | -- | ----------------- | 169 | P | PARAMS | 170 | R | . | 171 | O | . | 172 | C | . | 173 | E | ----------------- | 174 | D | CALLER FP | 175 | U | ----------------- | 176 | R | RETURN ADDR .. | <== FP 177 | E | ----------------- | 178 | | LOCAL VARS | 179 | | . | 180 | | . | 181 | | . | <== SP 182 | `---v--v--v--v--v---` 183 | . 184 | . 185 | . 186 | .---^--^--^--^--^---. 187 | | . | 188 | | . | 189 | | . | 190 | | HEAP | <== MM_START (0) 191 | `-------------------` 192 | ``` 193 | 194 | When entering a scope, the caller pushes all params onto the stack in reverse 195 | order. This allows for easy addressing by their indexes. The caller then stores 196 | its current FP onto the stack and the return address. At this point the called 197 | scope is responsible for maintaining the stack and adding its local variables. 198 | 199 | When leaving a scope, the SP is moved to the FP location and the return address 200 | is called. The caller scope then is responsible for restoring the caller FP and 201 | ensuring that all outbound params are written back to their appropriate 202 | locations. 203 | 204 | All procedure calls are made using C labels and the `goto` statement. This 205 | ensures that the program code remains in the `main` function and no outside 206 | function calls are required. The technique of using 207 | [labels as values](http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html) 208 | was used to store the location of the return labels on the stack. 209 | 210 | Loop and conditional statements also make use of the `goto` statement to 211 | determine program flow. After the conditional expression is resolved to a 212 | boolean form, the register used for the expression is tested. If the expression 213 | resolved to `false`, then the code portion is skipped. 214 | 215 | For example: 216 | 217 | ``` 218 | R[0] = ; 219 | if (!R[0]) goto else_label; 220 | 221 | goto end_if_label; 222 | else_label: 223 | 224 | end_if_label: 225 | ``` 226 | 227 | ### Runtime Environment 228 | Initially, I had created a separate C library to implement the runtime 229 | functions necessary. I determined that these functions were simple enough to be 230 | handwritten directly inline with the generated code as I progressed though 231 | development. The runtime functions use the same principles of stack memory 232 | referencing as other procedures and are populated in the identifiers table 233 | manually at the start of parsing. 234 | 235 | ## Language Specifications 236 | 237 | ### Syntax 238 | ``` 239 | ::= 240 | 241 | 242 | ::= 243 | 'program' 'is' 244 | 245 | ::= 246 | ( ';' )* 247 | 'begin' 248 | ( ';' )* 249 | 'end' 'program' 250 | 251 | ::= 252 | [ 'global' ] 253 | [ 'global' ] 254 | 255 | ::= 256 | [ '[' ']' ] 257 | 258 | ::= 259 | 'integer' | 260 | 'float' | 261 | 'bool' | 262 | 'string' 263 | 264 | ::= 265 | 266 | 267 | ::= 268 | 'procedure' '(' [ ] ')' 269 | 270 | ::= 271 | ( ';' )* 272 | 'begin' 273 | ( ::= 277 | ',' | 278 | 279 | 280 | ::= 281 | ( 'in' | 'out' ) 282 | 283 | ::= 284 | | 285 | | 286 | | 287 | | 288 | 289 | 290 | ::= 291 | ':=' 292 | 293 | ::= 294 | 'if' '(' ')' 'then' ( ';' )+ 295 | [ 'else' ( ';' )+ ] 296 | 'end' 'if' 297 | 298 | ::= 299 | 'for' '(' ';' ')' 300 | ( ';' )* 301 | 'end' 'for' 302 | 303 | ::= 304 | '(' [ ] ')' 305 | 306 | ::= 307 | ',' | 308 | 309 | 310 | ::= 311 | [ '[' ']' ] 312 | 313 | ::= 314 | '&' | 315 | '|' | 316 | [ 'not' ] 317 | 318 | ::= 319 | '+' | 320 | '-' | 321 | 322 | 323 | ::= 324 | '<' | 325 | '>' | 326 | '>=' | 327 | '<=' | 328 | '==' | 329 | '!=' | 330 | 331 | 332 | ::= 333 | '*' | 334 | '/' | 335 | 336 | 337 | ::= 338 | '(' ')' | 339 | [ '-' ] | 340 | [ '-' ] | 341 | | 342 | 'true' | 343 | 'false' | 344 | 345 | ::= 346 | [ '[' ']' ] 347 | 348 | ::= 349 | [a-zA-Z][a-zA-Z0-9_]* 350 | 351 | ::= 352 | [0-9][0-9_]*[.[0-9_]*]? 353 | 354 | ::= 355 | "[a-zA-Z0-9 _,;:.']*" 356 | ``` 357 | 358 | ### Semantics 359 | * Procedure parameters are transmitted by value. Recursion is supported. 360 | * Non-local variables and functions are not visible except for those variables 361 | and functions in the outermost scope prefixed with the global reserved word. 362 | Functions currently being defined are visible in the statement set of the 363 | function itself (so that recursive calls are possible). 364 | * No forward references are permitted or supported. 365 | * Expressions are strongly typed and types must match. However, there is 366 | automatic conversion in the arithmetic operators to allow any mixing between 367 | integers and floats. Furthermore, the relational operators can compare 368 | boolean with integer tokens (boolean tokens are converted to integers as 369 | `false = 0`, `true = 1`). 370 | * The type signatures of a procedure's arguments must match exactly their 371 | parameter declaration. 372 | * Arithmetic operations (`+`, `-`, `*`, `/` `&` `|`) are defined for integers 373 | and floats only. The bitwise AND (`&`), bitwise OR (`|`) and bitwise NOT 374 | (`not`) operators are valid only on variables of type integer. 375 | * Relational operations are defined for integer and boolean tokens. Only 376 | comparisons between the compatible types is possible. Relational operations 377 | return a boolean result. 378 | -------------------------------------------------------------------------------- /lib/codegenerator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """CodeGenerator module 4 | 5 | Provides functionality for code output to a attached destination file. 6 | 7 | Author: Evan Sneath 8 | License: Open Software License v3.0 9 | 10 | Classes: 11 | CodeGenerator: A code generator interface for destination file outputting. 12 | """ 13 | 14 | 15 | class CodeGenerator: 16 | """CodeGenerator class 17 | 18 | This class implements code generator function calls to easily attach a 19 | destination file, input code to generate, and commit the destination 20 | file upon successful compilation. This class is designed to be inherited 21 | the be used during the parsing stage of the compiler. 22 | 23 | Attributes: 24 | runtime_functions: Details of each runtime function and its params. 25 | 26 | Methods: 27 | attach_destination: Binds a destination file to the code generator. 28 | generate_header: Generates overhead code (memory allocation, etc). 29 | generate_footer: Generates finishing overhead code. 30 | generate: Formats and stores a given string of code for later output. 31 | comment: Adds a comment to the generated code with appropriate tabbing. 32 | tab_push: Increases the tab depth by 1 tab (4 spaces). 33 | tab_pop: Decreases the tab depth by 1 tab (4 spaces). 34 | commit: Commits all code generation and writes to the destination file. 35 | get_mm: Provides a free memory space for global or local variables. 36 | reset_local_ptr: Resets the value for the local pointer to default. 37 | reset_param_ptr: Resets the value for the param pointer to default. 38 | get_reg: Provides a free register for intermediate variable use. 39 | get_label_id: Returns a unique identifier for the procedure call. 40 | get_unique_call_id: Returns a unique identifier for multiple calls. 41 | generate_program_entry: Generates all code associated with setting up 42 | the program entry and exit point. 43 | generate_procedure_call: Generates all code associated with managing 44 | the memory stack during a procedure call. 45 | generate_procedure_call_end: Generates code to clean up a procedure 46 | call. This finalizes the call by popping the SP to local stack. 47 | generate_name: Generates all code associated with name reference. 48 | generate_assignment: Generates all code associated with id assignment. 49 | generate_param_push: Generates code to push a param onto the stack. 50 | generate_param_pop: Generates code to pop a param off the stack. 51 | generate_param_store: Generates code to save an outgoing parameter 52 | to an identifier located in main memory. 53 | generate_number: Generates the code for a number reference. 54 | generate_return: Generates the code for the 'return' operation. 55 | generate_operation: Generates operation code given an operation. 56 | """ 57 | def __init__(self): 58 | super().__init__() 59 | 60 | # Holds the file path of the attached destination file 61 | self._dest_path = '' 62 | 63 | # Holds all generated code to be written to the file destination 64 | self._generated_code = '' 65 | 66 | # Holds allocated size of main memory and num registers 67 | self._mm_size = 65536 68 | self._reg_size = 2048 69 | self._buf_size = 256 70 | 71 | # Holds stack pointer, frame pointer, and heap pointer registers 72 | self._SP = 1 73 | self._FP = 2 74 | self._HP = 3 75 | 76 | # Holds the pointer to the lowest unused register for allocation 77 | self._reg = 4 78 | 79 | # Holds the local memory pointer which determines the offset from the 80 | # frame pointer in the current scope. 81 | self._local_ptr = 0 82 | self.reset_local_ptr() 83 | 84 | # Holds the param memory pointer which determines the offset from the 85 | # frame pointer in the current scope. 86 | self._param_ptr = 0 87 | self.reset_param_ptr() 88 | 89 | # Holds the tab count of the code. tab_push, tab_pop manipulate this 90 | self._tab_count = 0 91 | 92 | # Holds an integer used for unique label generation for if/loop 93 | self._label_id = 0 94 | 95 | # Holds an integer to distinguish multiple calls of a function 96 | self._unique_id = 0 97 | 98 | # Holds the details of the runtime functions 99 | self.runtime_functions = { 100 | 'getString': [('my_string', 'string', 'out')], 101 | 'putString': [('my_string', 'string', 'in')], 102 | 'getBool': [('my_bool', 'bool', 'out')], 103 | 'putBool': [('my_bool', 'bool', 'in')], 104 | 'getInteger': [('my_integer', 'integer', 'out')], 105 | 'putInteger': [('my_integer', 'integer', 'in')], 106 | 'getFloat': [('my_float', 'float', 'out')], 107 | 'putFloat': [('my_float', 'float', 'in')], 108 | } 109 | 110 | return 111 | 112 | def attach_destination(self, dest_path): 113 | """Attach Destination 114 | 115 | Attaches a destination file to the code generator and prepares the 116 | file for writing. 117 | 118 | Arguments: 119 | dest_path: The path to the destination file to write. 120 | 121 | Returns: 122 | True on success, False otherwise. 123 | """ 124 | # The target file was attached, store the path 125 | self._dest_path = dest_path 126 | 127 | return True 128 | 129 | def generate_header(self): 130 | """Generate Code Header 131 | 132 | Adds all header code to the generated code buffer. 133 | """ 134 | code = [ 135 | '#include ', 136 | '#include ', 137 | '', 138 | '#define MM_SIZE %d' % self._mm_size, 139 | '#define R_SIZE %d' % self._reg_size, 140 | '#define BUF_SIZE %d' % self._buf_size, 141 | '', 142 | '// Define register locations of stack/frame ptr', 143 | '#define SP %d' % self._SP, 144 | '#define FP %d' % self._FP, 145 | '#define HP %d' % self._HP, 146 | '', 147 | 'int main(void)', 148 | '{', 149 | '// Allocate main memory and register space', 150 | 'int MM[MM_SIZE];', 151 | 'int R[R_SIZE];', 152 | '', 153 | '// SP and FP start at the top of MM', 154 | 'R[SP] = MM_SIZE - 1;', 155 | 'R[FP] = MM_SIZE - 1;', 156 | '', 157 | '// HP starts at the bottom of MM', 158 | 'R[HP] = 0;', 159 | '', 160 | '// Allocate float registers', 161 | 'float R_FLOAT_1;', 162 | 'float R_FLOAT_2;', 163 | '', 164 | '// Allocate space for a string buffer', 165 | 'char STR_BUF[BUF_SIZE];', 166 | '', 167 | '////////////////////////////////////////////////////', 168 | '// PROGRAM START', 169 | '', 170 | ] 171 | 172 | self.generate('\n'.join(code), tabs=0) 173 | 174 | return 175 | 176 | def generate_footer(self): 177 | """Generate Code Footer 178 | 179 | Adds all footer code to the generated code buffer. 180 | """ 181 | code = [ 182 | '', 183 | ' // Jump to the program exit', 184 | ' goto *(void*)MM[R[FP]];', 185 | '', 186 | '////////////////////////////////////////////////////', 187 | '// RUNTIME FUNCTIONS', 188 | '', 189 | 'putString_1:', 190 | ' R[0] = MM[R[FP]+2];', 191 | ' printf("%s\\n", (char*)R[0]);', 192 | ' R[0] = MM[R[FP]];', 193 | ' goto *(void*)R[0];', 194 | '', 195 | 'getString_1:', 196 | ' fgets(STR_BUF, BUF_SIZE, stdin);', 197 | ' R[0] = strlen(STR_BUF) + 1;', 198 | ' memcpy(&MM[R[HP]], &STR_BUF, R[0]);', 199 | ' MM[R[FP]+2] = (int)((char*)&MM[R[HP]]);', 200 | ' R[HP] = R[HP] + R[0];', 201 | ' R[0] = MM[R[FP]];', 202 | ' goto *(void*)R[0];', 203 | '', 204 | 'putBool_1:', 205 | ' R[0] = MM[R[FP]+2];', 206 | ' printf("%s\\n", R[0] ? "true" : "false");', 207 | ' R[0] = MM[R[FP]];', 208 | ' goto *(void*)R[0];', 209 | '', 210 | 'getBool_1:', 211 | ' scanf("%d", &R[0]);', 212 | ' R[0] = R[0] ? 1 : 0;', 213 | ' MM[R[FP]+2] = R[0];', 214 | ' R[0] = MM[R[FP]];', 215 | ' goto *(void*)R[0];', 216 | '', 217 | 'putInteger_1:', 218 | ' R[0] = MM[R[FP]+2];', 219 | ' printf("%d\\n", R[0]);', 220 | ' R[0] = MM[R[FP]];', 221 | ' goto *(void*)R[0];', 222 | '', 223 | 'getInteger_1:', 224 | ' scanf("%d", &R[0]);', 225 | ' MM[R[FP]+2] = R[0];', 226 | ' R[0] = MM[R[FP]];', 227 | ' goto *(void*)R[0];', 228 | '', 229 | 'putFloat_1:', 230 | ' R[0] = MM[R[FP]+2];', 231 | ' memcpy(&R_FLOAT_1, &R[0], sizeof(float));', 232 | ' printf("%g\\n", R_FLOAT_1);', 233 | ' R[0] = MM[R[FP]];', 234 | ' goto *(void*)R[0];', 235 | '', 236 | 'getFloat_1:', 237 | ' scanf("%f", &R_FLOAT_1);', 238 | ' memcpy(&R[0], &R_FLOAT_1, sizeof(float));', 239 | ' MM[R[FP]+2] = R[0];', 240 | ' R[0] = MM[R[FP]];', 241 | ' goto *(void*)R[0];', 242 | '}', 243 | ] 244 | 245 | self.generate('\n'.join(code), tabs=0) 246 | 247 | return 248 | 249 | def generate(self, code, tabs=-1): 250 | """Generate Code 251 | 252 | Adds the given code to the generated code and automatically formats 253 | it with the appropriate tabs and ending newline. 254 | 255 | Arguments: 256 | code: The code to add to the generated code buffer. 257 | tabs: A manual override to determine the number of tabs to place 258 | in this line of code. If -1, then the number of tabs used will 259 | correspond to the tab location from tab_push() and tab_pop() 260 | methods. (Default: -1) 261 | """ 262 | tabs = tabs if tabs != -1 else self._tab_count 263 | self._generated_code += (' ' * tabs) + code + '\n' 264 | 265 | return 266 | 267 | def comment(self, text, is_displayed=False): 268 | """Generate Comment 269 | 270 | Adds a comment to the generated code. 271 | 272 | Arguments: 273 | text: The text to display in the comment. 274 | is_displayed: If True, the comment is written to the generated 275 | code. (Default: False) 276 | """ 277 | if is_displayed: 278 | self.generate('// %s' % text) 279 | 280 | return 281 | 282 | def tab_push(self): 283 | """Tab Push 284 | 285 | Pushes the tab (increases the indentation by 4 spaces) for pretty 286 | code output. 287 | """ 288 | self._tab_count += 1 289 | return 290 | 291 | def tab_pop(self): 292 | """Tab Pop 293 | 294 | Pops the tab (decreases the indentation by 4 spaces) for pretty code 295 | output. 296 | """ 297 | self._tab_count -= 1 if self._tab_count != 0 else 0 298 | return 299 | 300 | def commit(self): 301 | """Commit Code Generation 302 | 303 | Writes the generated code to the destination output file for 304 | intermediate code if the source is parsed without fatal errors. 305 | 306 | Returns: 307 | True if file is successfully written, False otherwise. 308 | """ 309 | try: 310 | with open(self._dest_path, 'w+') as f: 311 | f.write(self._generated_code) 312 | except IOError as e: 313 | print('Error: "%s"' % self._dest_path) 314 | print(' Could not write to destination file: %s' % e.strerror) 315 | return False 316 | 317 | return True 318 | 319 | def get_mm(self, id_size, is_param=False): 320 | """Get Memory Space 321 | 322 | Gets a space in memory appropriately depending on if the variable is 323 | a local variable or a parameter to the scope. 324 | 325 | Arguments: 326 | id_size: The size of the parameter to allocate (used for arrays). 327 | is_param: True if the identifier is a parameter, False if local or 328 | global variable. (Default: False) 329 | 330 | Returns: 331 | An integer denoting the offset corresponding to a stack landmark 332 | depending on the type of variable. For example, local variables 333 | and params are offset by the current FP in different directions 334 | while global variables are offset by the top of main memory. 335 | See the documentation in README for stack details. 336 | """ 337 | # Determine size of the identifier 338 | mem_needed = int(id_size) if id_size is not None else 1 339 | 340 | if is_param: 341 | var_loc = self._param_ptr 342 | self._param_ptr += mem_needed 343 | else: 344 | # Allocate memory in the local variable space 345 | var_loc = self._local_ptr 346 | self._local_ptr += mem_needed 347 | 348 | return var_loc 349 | 350 | def reset_local_ptr(self): 351 | """Reset Local Pointer 352 | 353 | Resets the pointer to the current scope's local variable portion of 354 | the stack. This is used to properly allocate space for the local 355 | variables at the start of the scope. 356 | """ 357 | self._local_ptr = 1 358 | return 359 | 360 | def reset_param_ptr(self): 361 | """Reset Param Pointer 362 | 363 | Resets the pointer to the current scope's parameter portion of the 364 | stack. This is necessary to properly allocate space for the parameters 365 | as they are being pushed onto the stack. 366 | """ 367 | self._param_ptr = 1 368 | return 369 | 370 | def get_reg(self, inc=True): 371 | """Get Register 372 | 373 | Gets new, unused register from the register list. 374 | 375 | Arguments: 376 | inc: If True, a new register will be returned. If False, the last 377 | register allocated will be returned. 378 | 379 | Returns: 380 | An integer denoting the register number. The register may then be 381 | referenced as follows: R[] 382 | """ 383 | # Increment the register if we're getting a brand new one 384 | self._reg += 1 if inc else 0 385 | 386 | return self._reg 387 | 388 | def get_label_id(self): 389 | """Get Label Id 390 | 391 | Gets a label id so that no conflicts occur between procedures with 392 | the same name in difference scopes. 393 | 394 | Returns: 395 | A label id to append to the procedure label. 396 | """ 397 | self._label_id += 1 398 | 399 | return self._label_id 400 | 401 | def get_unique_call_id(self): 402 | """Get Unique Call Id 403 | 404 | Gets a unique call id so that no conflicts occur between return 405 | labels for procedures with multiple calls. 406 | 407 | Returns: 408 | A unique id to append to the procedure return label. 409 | """ 410 | self._unique_id += 1 411 | 412 | return self._unique_id 413 | 414 | def generate_program_entry(self, program_name, program_num, debug): 415 | """Generate Program Entry 416 | 417 | Generates the code associated with managing the entry point for the 418 | program. This involves pushing the program return address onto the 419 | stack, jumping to the entry point, and creating the program exit 420 | section. 421 | 422 | Arguments: 423 | program_name: The name of the program. 424 | program_num: The label id of the program. 425 | debug: Determines if comments should be written to the code. 426 | """ 427 | # Push the return address onto the stack 428 | self.comment('Setting program return address', debug) 429 | self.generate('MM[R[FP]] = (int)&&%s_%d_end;' % 430 | (program_name, program_num)) 431 | 432 | # Make the jump to the entry point 433 | self.generate('goto %s_%d_begin;' % (program_name, program_num)) 434 | 435 | # Make the main program return 436 | self.generate('') 437 | self.comment('Creating the program exit point', debug) 438 | self.generate('%s_%d_end:' % (program_name, program_num)) 439 | self.tab_push() 440 | self.generate('return 0;') 441 | self.tab_pop() 442 | self.generate('') 443 | 444 | return 445 | 446 | def generate_procedure_call(self, procedure_name, procedure_num, debug): 447 | """Generate Procedure Call 448 | 449 | Generates the code associated with managing the stack before and 450 | after a procedure call. Note that this does not include param 451 | pushing and popping operations. 452 | 453 | Arguments: 454 | procedure_name: The name of the procedure to call. 455 | procedure_num: The label id of the procedure to call. 456 | debug: Determines if comments should be written to the code. 457 | """ 458 | # Save the FP to the stack. Set next FP to return address 459 | self.comment('Setting caller FP', debug) 460 | self.generate('R[SP] = R[SP] - 1;') 461 | self.generate('MM[R[SP]] = R[FP];') 462 | self.comment('Setting return address (current FP)', debug) 463 | self.generate('R[SP] = R[SP] - 1;') 464 | self.generate('R[FP] = R[SP];') 465 | 466 | # Generate a new call number so multiple calls do not cause collisions 467 | call_number = self.get_unique_call_id() 468 | 469 | # Push the return address onto the stack 470 | self.generate('MM[R[SP]] = (int)&&%s_%d_%d;' % 471 | (procedure_name, procedure_num, call_number)) 472 | 473 | # Make the jump to the function call 474 | self.generate('goto %s_%d;' % (procedure_name, procedure_num)) 475 | 476 | # Generate the return label 477 | self.generate('%s_%d_%d:' % (procedure_name, procedure_num, call_number)) 478 | 479 | # The SP now points to the return address. Restore the old FP 480 | self.comment('Restore caller FP', debug) 481 | self.generate('R[SP] = R[SP] + 1;') 482 | self.generate('R[FP] = MM[R[SP]];') 483 | 484 | return 485 | 486 | def generate_procedure_call_end(self, debug): 487 | """Generate Procedure Call End 488 | 489 | Generates code to leave the procedure on the stack by pushing the 490 | stack to the lower scope's local stack. 491 | 492 | Arguments: 493 | debug: Determines if comments are to be written in generated code. 494 | """ 495 | self.comment('Move to caller local stack', debug) 496 | 497 | # Finalize the function call. Move the SP off the param list 498 | self.generate('R[SP] = R[SP] + 1;') 499 | 500 | return 501 | 502 | def _generate_get_id_in_mm(self, id_obj, id_location, idx_reg, debug): 503 | """Generate Get Identifier in Main Memory (Protected) 504 | 505 | Knowing the location in the stack and the offset (mm_ptr) value of 506 | a given index, code is generated to calculate the exact location of 507 | the identifier in main memory. 508 | 509 | If identifier is param, offset is the parameter offset. 510 | If identifier is local, offset is the local offset. 511 | If identifier is global, offset is the local offset of program scope. 512 | 513 | Arguments: 514 | id_obj: The Identifier class object containing id data. 515 | id_location: Either 'global', 'param', or 'local' depending on the 516 | location in the stack where the identifier resides. 517 | idx_reg: The register number of the index expression. 518 | debug: Determines if comments are to be written in generated code. 519 | 520 | Returns: 521 | The register number of the calculated address of the identifier. 522 | """ 523 | # Get a new register to calculate the main memory address of this id 524 | id_reg = self.get_reg() 525 | 526 | self.generate('R[%d] = %d;' % (id_reg, id_obj.mm_ptr)) 527 | 528 | if id_obj.size is not None and idx_reg is not None: 529 | self.generate('R[%d] = R[%d] + R[%d];' % 530 | (id_reg, id_reg, idx_reg)) 531 | 532 | if id_location == 'param': 533 | self.comment('Param referenced', debug) 534 | self.generate('R[%d] = R[FP] + 1 + R[%d];' % (id_reg, id_reg)) 535 | elif id_location == 'global': 536 | self.comment('Global var referenced', debug) 537 | self.generate('R[%d] = MM_SIZE - 1 - R[%d];' % (id_reg, id_reg)) 538 | else: 539 | self.comment('Local var referenced', debug) 540 | self.generate('R[%d] = R[FP] - R[%d];' % (id_reg, id_reg)) 541 | 542 | return id_reg 543 | 544 | def generate_name(self, id_obj, id_location, idx_reg, debug): 545 | """Generate Name 546 | 547 | Generates all code necessary to place the contents of the memory 548 | location of a given identifier into a new register for computation. 549 | 550 | Arguments: 551 | id_obj: The Identifier class object containing id data. 552 | id_location: Either 'global', 'param', or 'local' depending on the 553 | location in the stack where the identifier resides. 554 | idx_reg: The register number of the index expression. 555 | debug: Determines if comments are to be written in generated code. 556 | """ 557 | # Calculate the position of the identifier in main memory 558 | id_reg = self._generate_get_id_in_mm(id_obj, id_location, idx_reg, 559 | debug) 560 | 561 | # Retrieve the main memory location and place it in the last register 562 | self.generate('R[%d] = MM[R[%d]];' % (id_reg, id_reg)) 563 | 564 | return 565 | 566 | def generate_assignment(self, id_obj, id_location, idx_reg, expr_reg, 567 | debug): 568 | """Generate Assignment 569 | 570 | Generates all code necessary to place the outcome of an expression 571 | into the proper location of the identifier in main memory. 572 | 573 | Arguments: 574 | id_obj: The Identifier class object containing id data. 575 | id_location: Either 'global', 'param', or 'local' depending on the 576 | location in the stack where the identifier resides. 577 | idx_reg: The register number of the index expression. 578 | expr_reg: The register number of the expression outcome. 579 | debug: Determines if comments are to be written in generated code. 580 | """ 581 | # Calculate the position of the identifier in main memory 582 | id_reg = self._generate_get_id_in_mm(id_obj, id_location, idx_reg, 583 | debug) 584 | 585 | # Set the main memory value to the value in the expression register 586 | self.generate('MM[R[%d]] = R[%d];' % (id_reg, expr_reg)) 587 | 588 | return 589 | 590 | def generate_param_push(self, expr_reg, debug): 591 | """Generate Param Push 592 | 593 | Generates code to push a parameter onto the procedure stack given 594 | a register containing the expression outcome. 595 | 596 | Arguments: 597 | expr_reg: The register number of the expression outcome. 598 | debug: Determines if comments are to be written in generated code. 599 | """ 600 | self.comment('Pushing argument onto the stack', debug) 601 | self.generate('R[SP] = R[SP] - 1;') 602 | self.generate('MM[R[SP]] = R[%d];' % expr_reg) 603 | 604 | return 605 | 606 | def generate_param_pop(self, param_name, debug): 607 | """Generate Param Pop 608 | 609 | Pops a parameter off of the stack (moves the SP) and prints a 610 | comment stating which parameter this is. 611 | 612 | Arguments: 613 | param_name: The parameter name to display. 614 | debug: Determines if comments are to be written in generated code. 615 | """ 616 | self.comment('Popping "%s" param off the stack' % param_name, debug) 617 | 618 | # Move to the next memory space 619 | self.generate('R[SP] = R[SP] + 1;') 620 | 621 | return 622 | 623 | def generate_param_store(self, id_obj, id_location, debug): 624 | """Generate Param Store 625 | 626 | Calculates the memory location of the destination and placed the 627 | value of the popped parameter (at current SP) in that location. 628 | 629 | Arguments: 630 | id_obj: The Identifier class object containing id data. 631 | id_location: Either 'global', 'param', or 'local' depending on the 632 | location in the stack where the identifier resides. 633 | debug: Determines if comments are to be written in generated code. 634 | """ 635 | # Calculate the position of the parameter output location in main mem 636 | id_reg = self._generate_get_id_in_mm(id_obj, id_location, None, debug) 637 | 638 | # Store the parameter in the position pointed to by the SP 639 | self.generate('MM[R[%d]] = MM[R[SP]];' % id_reg) 640 | 641 | return 642 | 643 | def generate_number(self, number, token_type, negate): 644 | """Generate Number 645 | 646 | Generates the code to store a parsed number in a new register. 647 | 648 | Arguments: 649 | number: The parsed number value (this is a string representation). 650 | token_type: The type of the number (either 'integer' or 'float') 651 | negate: A boolean to determine whether or not to negate the value. 652 | """ 653 | reg = self.get_reg() 654 | 655 | if token_type == 'integer': 656 | # This is an integer value, set it to the register 657 | if negate: 658 | self.generate('R[%d] = -%s;' % (reg, number)) 659 | else: 660 | self.generate('R[%d] = %s;' % (reg, number)) 661 | else: 662 | # This is a float value, place it in the float buffer and copy it 663 | # to the register 664 | if negate: 665 | self.generate('R_FLOAT_1 = -%s;' % number) 666 | else: 667 | self.generate('R_FLOAT_1 = %s;' % number) 668 | 669 | self.generate('memcpy(&R[%d], &R_FLOAT_1, sizeof(float));' % reg) 670 | 671 | return 672 | 673 | def generate_return(self, debug): 674 | """Generate Return Statement 675 | 676 | Generates code for all operations needed to move to the scope return 677 | address and execute the jump to the caller scope. 678 | 679 | Arguments: 680 | debug: Determines if comments should be displayed or not. 681 | """ 682 | # Smash the local stack 683 | self.comment('Moving SP to FP (return address)', debug) 684 | self.generate('R[SP] = R[FP];') 685 | 686 | # Go to the return label to exit the procedure 687 | self.comment('Return to calling function', debug) 688 | self.generate('goto *(void*)MM[R[FP]];') 689 | 690 | return 691 | 692 | def generate_operation(self, reg1, type1, reg2, type2, operation): 693 | """Generate Operation 694 | 695 | Given an operation and operand registers with their types, code is 696 | generated to perform these operations. 697 | 698 | Arguments: 699 | reg1: The register of the first operand. 700 | type1: The type of the first operand. 701 | reg2: The register of the second operand. 702 | type2: The type of the second operand. 703 | operation: The operation symbol to perform. 704 | 705 | Returns: 706 | The register number where the result of the operation 707 | is stored. 708 | """ 709 | # Get a register to hold the operation result 710 | result = self.get_reg() 711 | 712 | if type1 != 'float' and type2 != 'float': 713 | self.generate('R[%d] = R[%d] %s R[%d];' % 714 | (result, reg1, operation, reg2)) 715 | return result 716 | 717 | if type1 != 'float': 718 | self.generate('R_FLOAT_1 = R[%d];' % reg1) 719 | else: 720 | self.generate('memcpy(&R_FLOAT_1, &R[%d], sizeof(float));' % reg1) 721 | 722 | if type2 != 'float': 723 | self.generate('R_FLOAT_2 = R[%d];' % reg2) 724 | else: 725 | self.generate('memcpy(&R_FLOAT_2, &R[%d], sizeof(float));' % reg2) 726 | 727 | self.generate('R_FLOAT_1 = R_FLOAT_1 %s R_FLOAT_2;' % operation) 728 | self.generate('memcpy(&R[%d], &R_FLOAT_1, sizeof(float));' % result) 729 | 730 | return result 731 | -------------------------------------------------------------------------------- /lib/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """Parser module 4 | 5 | Inherits the Scanner module and parses the attached file's tokens as they are 6 | encountered with the target grammar. Code is then generated and written to the 7 | given destination file. 8 | 9 | Author: Evan Sneath 10 | License: Open Software License v3.0 11 | 12 | Classes: 13 | Parser: An implementation of a parser for the source language. 14 | """ 15 | 16 | from lib.errors import * 17 | from lib.datatypes import Identifier, Parameter, IdentifierTable 18 | 19 | from lib.scanner import Scanner 20 | from lib.codegenerator import CodeGenerator 21 | 22 | 23 | class Parser(Scanner, CodeGenerator): 24 | """Parser class 25 | 26 | Parses the given source file using the defined language structure. 27 | 28 | Inherits: 29 | Scanner: The lexer component of the compiler. 30 | CodeGenerator: The class responsible for output file abstraction. 31 | 32 | Attributes: 33 | debug: Boolean attribute denoting if successfully parsed tokens should 34 | be displayed as they are encountered and parsed. 35 | 36 | Methods: 37 | parse: Parses the given file until a terminal error is encountered or 38 | the end-of-file token is reached. 39 | """ 40 | def __init__(self, debug=False): 41 | super().__init__() 42 | 43 | # Public class attributes 44 | self.debug = debug 45 | 46 | # Define the previous, current, and future token holder 47 | self._previous = None 48 | self._current = None 49 | self._future = None 50 | 51 | # Define the identifier table to hold all var/program/procedure names 52 | self._ids = IdentifierTable() 53 | 54 | self._has_errors = False 55 | 56 | return 57 | 58 | def parse(self, src_path, dest_path): 59 | """Begin Parsing 60 | 61 | Begins the parse of the inputted source file. 62 | 63 | Arguments: 64 | src_path: The input source file to parse. 65 | dest_path: The output target file to write. 66 | 67 | Returns: 68 | True on success, False otherwise. 69 | """ 70 | # Attach the source file for reading 71 | if not self.attach_source(src_path): 72 | return False 73 | 74 | # Attach the destination file for writing 75 | if not self.attach_destination(dest_path): 76 | return False 77 | 78 | # Advance the tokens twice to populate both current and future tokens 79 | self._advance_token() 80 | self._advance_token() 81 | 82 | # Add all runtime functions 83 | self._add_runtime() 84 | 85 | # Generate the compiled code header to handle runtime overhead 86 | self.generate_header() 87 | 88 | # Begin parsing the root language structure 89 | try: 90 | self._parse_program() 91 | except ParserSyntaxError: 92 | return False 93 | 94 | # Generate the compiled code footer 95 | self.generate_footer() 96 | 97 | # Make sure there's no junk after the end of program 98 | if not self._check('eof'): 99 | self._warning('eof', '') 100 | 101 | # If errors were encountered, don't write code 102 | if self._has_errors: 103 | return False 104 | 105 | # Commit the code buffer to the output code file 106 | self.commit() 107 | 108 | return True 109 | 110 | def _add_runtime(self): 111 | """Add Runtime Functions 112 | 113 | Adds each runtime function to the list of global functions. 114 | """ 115 | # The runtime_functions list is defined in the CodeGenerator class 116 | for func_name in self.runtime_functions: 117 | # Get all parameters for these functions 118 | param_ids = [] 119 | param_list = self.runtime_functions[func_name] 120 | for index, param in enumerate(param_list): 121 | # Build up each param, add it to the list 122 | id_obj = Identifier(name=param[0], type=param[1], size=None, 123 | params=None, mm_ptr=(index+1)) 124 | p_obj = Parameter(id=id_obj, direction=param[2]) 125 | param_ids.append(p_obj) 126 | 127 | # Build the function's identifier 128 | func_id = Identifier(name=func_name, type='procedure', size=None, 129 | params=param_ids, mm_ptr=1) 130 | 131 | # Add the function to the global scope of the identifier table 132 | self._ids.add(func_id, is_global=True) 133 | 134 | return 135 | 136 | def _warning(self, msg, line, prefix='Warning'): 137 | """Print Parser Warning Message (Protected) 138 | 139 | Prints a parser warning message with details about the expected token 140 | and the current token being parsed. 141 | 142 | Arguments: 143 | msg: The warning message to display. 144 | line: The line where the warning has occurred. 145 | prefix: A string value to be printed at the start of the warning. 146 | Overwritten for error messages. (Default: 'Warning') 147 | """ 148 | print('%s: "%s", line %d' % (prefix, self._src_path, line)) 149 | print(' %s' % msg) 150 | print(' %s' % self._get_line(line)) 151 | 152 | return 153 | 154 | def _syntax_error(self, expected): 155 | """Print Syntax Error Message (Protected) 156 | 157 | Prints a syntax error message with details about the expected token 158 | and the current token being parsed. After error printing, an exception 159 | is raised to be caught and resolved by parent nodes. 160 | 161 | Arguments: 162 | expected: A string containing the expected token type/value. 163 | 164 | Raises: 165 | ParserSyntaxError: If this method is being called, an error has been 166 | encountered during parsing. 167 | """ 168 | token = self._current 169 | 170 | # Print the error message 171 | msg = ('Expected %s, encountered "%s" (%s)' % 172 | (expected, token.value, token.type)) 173 | self._warning(msg, token.line, prefix='Error') 174 | 175 | self._has_errors = True 176 | raise ParserSyntaxError() 177 | 178 | def _name_error(self, msg, name, line): 179 | """Print Name Error Message (Protected) 180 | 181 | Prints a name error message with details about the encountered 182 | identifier which caused the error. 183 | 184 | Arguments: 185 | msg: The reason for the error. 186 | name: The name of the identifier where the name error occurred. 187 | line: The line where the name error occurred. 188 | """ 189 | msg = '%s: %s' % (name, msg) 190 | self._warning(msg, line, prefix='Error') 191 | 192 | self._has_errors = True 193 | return 194 | 195 | def _type_error(self, expected, encountered, line): 196 | """Print Type Error Message (Protected) 197 | 198 | Prints a type error message with details about the expected type an 199 | the type that was encountered. 200 | 201 | Arguments: 202 | expected: A string containing the expected token type. 203 | encountered: A string containing the type encountered. 204 | line: The line on which the type error occurred. 205 | """ 206 | msg = 'Expected %s type, encountered %s' % (expected, encountered) 207 | self._warning(msg, line, prefix='Error') 208 | 209 | self._has_errors = True 210 | return 211 | 212 | def _runtime_error(self, msg, line): 213 | """Print Runtime Error Message (Protected) 214 | 215 | Prints a runtime error message with details about the runtime error. 216 | 217 | Arguments: 218 | msg: The reason for the error. 219 | line: The line where the runtime error occurred. 220 | """ 221 | self._warning(msg, line, prefix='Error') 222 | 223 | self._has_errors = True 224 | return 225 | 226 | def _advance_token(self): 227 | """Advance Tokens (Protected) 228 | 229 | Populates the 'current' token with the 'future' token and populates 230 | the 'future' token with the next token in the source file. 231 | """ 232 | self._previous = self._current 233 | self._current = self._future 234 | 235 | if self._future is None or self._future.type != 'eof': 236 | self._future = self.next_token() 237 | 238 | return 239 | 240 | def _check(self, expected_type, expected_value=None, check_future=False): 241 | """Check Token (Protected) 242 | 243 | Peeks at the token to see if the current token matches the given 244 | type and value. If it doesn't, don't make a big deal about it. 245 | 246 | Arguments: 247 | expected_type: The expected type of the token. 248 | expected_value: The expected value of the token. (Default: None) 249 | check_future: If True, the future token is checked (Default: False) 250 | 251 | Returns: 252 | True if the token matches the expected value, False otherwise. 253 | """ 254 | token = self._current 255 | 256 | if check_future: 257 | token = self._future 258 | 259 | return (token.type == expected_type and 260 | (token.value == expected_value or expected_value is None)) 261 | 262 | def _accept(self, expected_type, expected_value=None): 263 | """Accept Token (Protected) 264 | 265 | Compares the token to an expected type and value. If it matches, then 266 | consume the token. If not, don't make a big deal about it. 267 | 268 | Arguments: 269 | expected_type: The expected type of the token. 270 | expected_value: The expected value of the token. (Default: None) 271 | 272 | Returns: 273 | True if the token matches the expected value, False otherwise. 274 | """ 275 | if self._check(expected_type, expected_value): 276 | self._advance_token() 277 | return True 278 | 279 | return False 280 | 281 | def _match(self, expected_type, expected_value=None): 282 | """Match Token (Protected) 283 | 284 | Compares the token to an expected type and value. If it matches, then 285 | consume the token. If not, then throw an error and panic. 286 | 287 | Arguments: 288 | expected_type: The expected type of the token. 289 | expected_value: The expected value of the token. (Default: None) 290 | 291 | Returns: 292 | The matched Token class object if successful. 293 | """ 294 | # Check the id_type, if we specified debug, print everything matched 295 | if self._accept(expected_type, expected_value): 296 | return self._previous 297 | 298 | # Something different than expected was encountered 299 | if expected_value is not None: 300 | self._syntax_error('"'+expected_value+'" ('+expected_type+')') 301 | else: 302 | self._syntax_error(expected_type) 303 | 304 | def _resync_at_token(self, token_type, token_value=None): 305 | """Resync at Token 306 | 307 | Finds the next token of the given type and value and moves the 308 | current token to that point. Code parsing can continue from there. 309 | 310 | Arguments: 311 | token_type: The id_type of the token to resync. 312 | token_value: The value of the token to resync. (Default: None) 313 | """ 314 | while not self._check(token_type, token_value): 315 | self._advance_token() 316 | 317 | return 318 | 319 | def _parse_program(self): 320 | """ (Protected) 321 | 322 | Parses the language structure. 323 | 324 | ::= 325 | 326 | """ 327 | id_obj = self._parse_program_header() 328 | self._parse_program_body(id_obj) 329 | 330 | return 331 | 332 | def _parse_program_header(self): 333 | """ (Protected) 334 | 335 | Parses the language structure. 336 | 337 | ::= 338 | 'program' 'is' 339 | 340 | Returns: 341 | The id object with information about the procedure identifier. 342 | """ 343 | self._match('keyword', 'program') 344 | 345 | id_name = self._current.value 346 | self._match('identifier') 347 | 348 | # Generate procedure label. This will be stored with the identifier 349 | # in place of the mm_ptr attribute since it will not be used 350 | label_id = self.get_label_id() 351 | 352 | # Add the new identifier to the global table 353 | id_obj = Identifier(id_name, 'program', None, None, label_id) 354 | self._ids.add(id_obj, is_global=True) 355 | 356 | self._match('keyword', 'is') 357 | 358 | # Generate the program entry point code 359 | self.generate_program_entry(id_obj.name, id_obj.mm_ptr, self.debug) 360 | 361 | # Push the scope to the program body level 362 | self._ids.push_scope(id_obj.name) 363 | 364 | # Add the program to the base scope so it can be resolved as owner 365 | self._ids.add(id_obj) 366 | 367 | return id_obj 368 | 369 | def _parse_program_body(self, program_id): 370 | """ (Protected) 371 | 372 | Parses the language structure. 373 | 374 | ::= 375 | ( ';' )* 376 | 'begin' 377 | ( ';' )* 378 | 'end' 'program' 379 | 380 | Arguments: 381 | program_id: The identifier object for the program. 382 | """ 383 | local_var_size = 0 384 | 385 | while not self._accept('keyword', 'begin'): 386 | try: 387 | size = self._parse_declaration() 388 | 389 | if size is not None: 390 | local_var_size += int(size) 391 | except ParserError: 392 | self._resync_at_token('symbol', ';') 393 | 394 | self._match('symbol', ';') 395 | 396 | # Label the entry point for the program 397 | self.generate('%s_%d_begin:' % (program_id.name, program_id.mm_ptr)) 398 | self.tab_push() 399 | 400 | if local_var_size != 0: 401 | self.comment('Allocating space for local variables', self.debug) 402 | self.generate('R[SP] = R[SP] - %d;' % local_var_size) 403 | 404 | while not self._accept('keyword', 'end'): 405 | try: 406 | self._parse_statement() 407 | except ParserError: 408 | self._resync_at_token('symbol', ';') 409 | 410 | self._match('symbol', ';') 411 | 412 | self._match('keyword', 'program') 413 | 414 | # Pop out of the program body scope 415 | self._ids.pop_scope() 416 | self.tab_pop() 417 | 418 | return 419 | 420 | def _parse_declaration(self): 421 | """ (Protected) 422 | 423 | Parses the language structure. 424 | 425 | ::= 426 | [ 'global' ] 427 | [ 'global' ] 428 | 429 | Returns: 430 | The size of any variable declared. None if procedure. 431 | """ 432 | is_global = False 433 | 434 | id_obj = None 435 | size = None 436 | 437 | if self._accept('keyword', 'global'): 438 | is_global = True 439 | 440 | if self._first_procedure_declaration(): 441 | self._parse_procedure_declaration(is_global=is_global) 442 | elif self._first_variable_declaration(): 443 | id_obj = self._parse_variable_declaration(is_global=is_global) 444 | else: 445 | self._syntax_error('procedure or variable declaration') 446 | 447 | if id_obj is not None: 448 | size = id_obj.size if id_obj.size is not None else 1 449 | 450 | return size 451 | 452 | def _first_variable_declaration(self): 453 | """first() (Protected) 454 | 455 | Determines if current token matches the first terminals. 456 | 457 | first() ::= 458 | integer | float | bool | string 459 | 460 | Returns: 461 | True if current token matches a first terminal, False otherwise. 462 | """ 463 | return (self._check('keyword', 'integer') or 464 | self._check('keyword', 'float') or 465 | self._check('keyword', 'bool') or 466 | self._check('keyword', 'string')) 467 | 468 | def _parse_variable_declaration(self, is_global=False, is_param=False): 469 | """ (Protected) 470 | 471 | Parses the language structure. 472 | 473 | ::= 474 | [ '[' ']' ] 475 | 476 | Arguments: 477 | is_global: Denotes if the variable is to be globally scoped. 478 | (Default: False) 479 | id_table_add: Denotes if the variable is to be added to the 480 | identifier table. 481 | 482 | Returns: 483 | The Identifier class object of the variable encountered. 484 | """ 485 | id_type = self._parse_type_mark() 486 | 487 | # Stores the array size of the variable 488 | var_size = None 489 | 490 | # Formally match the token to an identifier type 491 | var_token = self._match('identifier') 492 | 493 | if self._accept('symbol', '['): 494 | index_type = self._parse_number(generate_code=False) 495 | 496 | var_size = self._previous.value 497 | index_line = self._previous.line 498 | 499 | # Check the type to make sure this is an integer so that we can 500 | # allocate memory appropriately 501 | if index_type != 'integer': 502 | self._type_error('integer', index_type, index_line) 503 | raise ParserTypeError() 504 | 505 | self._match('symbol', ']') 506 | 507 | # Get the memory space pointer for this variable. 508 | mm_ptr = self.get_mm(var_size, is_param=is_param) 509 | 510 | # The declaration was valid, add the identifier to the table 511 | id_obj = Identifier(var_token.value, id_type, var_size, None, mm_ptr) 512 | 513 | if not is_param: 514 | try: 515 | self._ids.add(id_obj, is_global=is_global) 516 | except ParserNameError as e: 517 | self._name_error(str(e), 518 | var_token.value, var_token.line) 519 | 520 | return id_obj 521 | 522 | def _parse_type_mark(self): 523 | """ (Protected) 524 | 525 | Parses language structure. 526 | 527 | ::= 528 | 'integer' | 529 | 'float' | 530 | 'bool' | 531 | 'string' 532 | 533 | Returns: 534 | Type (as string) of the variable being declared. 535 | """ 536 | id_type = None 537 | 538 | if self._accept('keyword', 'integer'): 539 | id_type = 'integer' 540 | elif self._accept('keyword', 'float'): 541 | id_type = 'float' 542 | elif self._accept('keyword', 'bool'): 543 | id_type = 'bool' 544 | elif self._accept('keyword', 'string'): 545 | id_type = 'string' 546 | else: 547 | self._syntax_error('variable type') 548 | 549 | return id_type 550 | 551 | def _first_procedure_declaration(self): 552 | """first() (Protected) 553 | 554 | Determines if current token matches the first terminals. 555 | 556 | first() ::= 557 | 'procedure' 558 | 559 | Returns: 560 | True if current token matches a first terminal, False otherwise. 561 | """ 562 | return self._check('keyword', 'procedure') 563 | 564 | def _parse_procedure_declaration(self, is_global): 565 | """ (Protected) 566 | 567 | Parses the language structure. 568 | 569 | ::= 570 | 571 | 572 | Arguments: 573 | is_global: Denotes if the procedure is to be globally scoped. 574 | """ 575 | id_obj = self._parse_procedure_header(is_global=is_global) 576 | self._parse_procedure_body(id_obj) 577 | 578 | return 579 | 580 | def _parse_procedure_header(self, is_global): 581 | """ (Protected) 582 | 583 | Parses the language structure. 584 | 585 | ::= 586 | 'procedure' '(' [ ] ')' 587 | 588 | Arguments: 589 | is_global: Denotes if the procedure is to be globally scoped. 590 | """ 591 | self._match('keyword', 'procedure') 592 | 593 | id_name = self._current.value 594 | id_line = self._current.line 595 | 596 | self._match('identifier') 597 | self._match('symbol', '(') 598 | 599 | params = [] 600 | 601 | if not self._check('symbol', ')'): 602 | params = self._parse_parameter_list(params) 603 | 604 | self._match('symbol', ')') 605 | 606 | # Generate procedure label. This will be stored with the identifier 607 | # in place of the mm_ptr attribute since it will not be used 608 | label_id = self.get_label_id() 609 | 610 | id_obj = Identifier(id_name, 'procedure', None, params, label_id) 611 | 612 | try: 613 | # Add the procedure identifier to the parent and its own table 614 | self._ids.add(id_obj, is_global=is_global) 615 | self._ids.push_scope(id_obj.name) 616 | self._ids.add(id_obj) 617 | except ParserNameError: 618 | self._name_error('name already declared at this scope', id_name, 619 | id_line) 620 | 621 | # Attempt to add each encountered param at the procedure scope 622 | for param in params: 623 | try: 624 | self._ids.add(param.id, is_global=False) 625 | except ParserNameError: 626 | self._name_error('name already declared at global scope', 627 | param.id.name, id_line) 628 | 629 | # Define the entry point for the function w/ unique identifier 630 | self.generate('%s_%d:' % (id_obj.name, id_obj.mm_ptr)) 631 | self.tab_push() 632 | 633 | # Define the beginning of the function body 634 | self.generate('goto %s_%d_begin;' % (id_obj.name, id_obj.mm_ptr)) 635 | self.generate('') 636 | 637 | return id_obj 638 | 639 | def _parse_procedure_body(self, procedure_id): 640 | """ (Protected) 641 | 642 | Parses the language structure. 643 | 644 | ::= 645 | ( ';' )* 646 | 'begin' 647 | ( ';' )* 648 | 'end' 'procedure' 649 | 650 | Arguments: 651 | procedure_id: The identifier object for the procedure. 652 | """ 653 | local_var_size = 0 654 | 655 | # Reset the local pointer for the local variables. 656 | self.reset_local_ptr() 657 | self.reset_param_ptr() 658 | 659 | # Accept any declarations 660 | while not self._accept('keyword', 'begin'): 661 | try: 662 | size = self._parse_declaration() 663 | 664 | # If this was a local var, allocate space for it 665 | if size is not None: 666 | local_var_size += size 667 | except ParserError: 668 | self._resync_at_token('symbol', ';') 669 | 670 | self._match('symbol', ';') 671 | 672 | # Define the function begin point 673 | self.generate('%s_%d_begin:' % 674 | (procedure_id.name, procedure_id.mm_ptr)) 675 | 676 | self.tab_push() 677 | 678 | if local_var_size != 0: 679 | self.comment('Allocating space for local variables', self.debug) 680 | self.generate('R[SP] = R[SP] - %d;' % local_var_size) 681 | 682 | # Accept any statements 683 | while not self._accept('keyword', 'end'): 684 | try: 685 | self._parse_statement() 686 | except ParserError: 687 | self._resync_at_token('symbol', ';') 688 | 689 | self._match('symbol', ';') 690 | 691 | self._match('keyword', 'procedure') 692 | 693 | # Generate code to jump back to the caller scope 694 | self.generate_return(self.debug) 695 | self.generate('') 696 | 697 | self.tab_pop() 698 | self._ids.pop_scope() 699 | self.tab_pop() 700 | 701 | return 702 | 703 | def _parse_parameter_list(self, params): 704 | """ (Protected) 705 | 706 | Parse the language structure. 707 | 708 | ::= 709 | ',' | 710 | 711 | 712 | Arguments: 713 | params: A list of Parameter named tuples associated with the 714 | procedure. 715 | 716 | Returns: 717 | An completed list of all Parameter named tuples associated 718 | with the procedure. 719 | """ 720 | # Get one parameter 721 | param = self._parse_parameter() 722 | params.append(param) 723 | 724 | # Get all following parameters 725 | if self._accept('symbol', ','): 726 | params = self._parse_parameter_list(params) 727 | 728 | # All parameters found will be returned in the list 729 | return params 730 | 731 | def _parse_parameter(self): 732 | """ (Protected) 733 | 734 | Parse the language structure. 735 | 736 | ::= 737 | ( 'in' | 'out' ) 738 | """ 739 | # Return the id object, but don't add it to the identifier table 740 | # yet or get a memory location for it. This will be done when the 741 | # procedure is called 742 | id_obj = self._parse_variable_declaration(is_param=True) 743 | 744 | direction = None 745 | 746 | if self._accept('keyword', 'in'): 747 | direction = 'in' 748 | elif self._accept('keyword', 'out'): 749 | direction = 'out' 750 | else: 751 | self._syntax_error('"in" or "out"') 752 | 753 | return Parameter(id_obj, direction) 754 | 755 | def _parse_statement(self): 756 | """ (Protected) 757 | 758 | Parse the language structure. 759 | 760 | ::= 761 | | 762 | | 763 | | 764 | | 765 | 766 | """ 767 | if self._accept('keyword', 'return'): 768 | # Go to the return label to exit the procedure/program 769 | self.generate_return(self.debug) 770 | elif self._first_if_statement(): 771 | self._parse_if_statement() 772 | elif self._first_loop_statement(): 773 | self._parse_loop_statement() 774 | elif self._first_procedure_call(): 775 | self._parse_procedure_call() 776 | elif self._first_assignment_statement(): 777 | self._parse_assignment_statement() 778 | else: 779 | self._syntax_error('statement') 780 | 781 | return 782 | 783 | def _first_assignment_statement(self): 784 | """first() (Protected) 785 | 786 | Determines if current token matches the first terminals. 787 | 788 | first() ::= 789 | 790 | 791 | Returns: 792 | True if current token matches a first terminal, False otherwise. 793 | """ 794 | return self._check('identifier') 795 | 796 | def _parse_assignment_statement(self): 797 | """ (Protected) 798 | 799 | Parses the language structure. 800 | 801 | ::= 802 | ':=' 803 | """ 804 | id_name = self._current.value 805 | id_line = self._current.line 806 | 807 | dest_type = self._parse_destination() 808 | 809 | # Grab the last register used in case this variable is an array 810 | index_reg = self.get_reg(inc=False) 811 | 812 | # Check to make sure this is a valid identifier 813 | id_obj = self._ids.find(id_name) 814 | 815 | self._match('symbol', ':=') 816 | 817 | expr_type = self._parse_expression() 818 | 819 | # Get the register used for the last expression 820 | expr_reg = self.get_reg(inc=False) 821 | 822 | if dest_type != expr_type: 823 | self._type_error(dest_type, expr_type, id_line) 824 | 825 | # Determine the location of the identifier in the stack 826 | id_location = self._ids.get_id_location(id_name) 827 | 828 | # Verify the direction of the id if it is a param 829 | if id_location == 'param': 830 | direction = self._ids.get_param_direction(id_name) 831 | if direction != 'out': 832 | self._type_error('\'out\' param', 833 | '\'%s\' param' % direction, id_line) 834 | raise ParserTypeError() 835 | 836 | # Generate all code associated with retrieving this value 837 | self.generate_assignment(id_obj, id_location, index_reg, expr_reg, 838 | self.debug) 839 | 840 | return 841 | 842 | def _first_if_statement(self): 843 | """first() (Protected) 844 | 845 | Determines if current token matches the first terminals. 846 | 847 | first() ::= 848 | 'if' 849 | 850 | Returns: 851 | True if current token matches a first terminal, False otherwise. 852 | """ 853 | return self._check('keyword', 'if') 854 | 855 | def _parse_if_statement(self): 856 | """ (Protected) 857 | 858 | Parses the language structure. 859 | 860 | ::= 861 | 'if' '(' ')' 'then' ( ';' )+ 862 | [ 'else' ( ';' )+ ] 863 | 'end' 'if' 864 | """ 865 | self._match('keyword', 'if') 866 | self._match('symbol', '(') 867 | self._parse_expression() 868 | self._match('symbol', ')') 869 | self._match('keyword', 'then') 870 | 871 | label_id = self.get_label_id() 872 | expr_reg = self.get_reg(inc=False) 873 | 874 | self.generate('if (!R[%d]) goto else_%d;' % (expr_reg, label_id)) 875 | self.tab_push() 876 | 877 | while True: 878 | try: 879 | self._parse_statement() 880 | except ParserError: 881 | self._resync_at_token('symbol', ';') 882 | 883 | self._match('symbol', ';') 884 | 885 | if self._check('keyword', 'else') or self._check('keyword', 'end'): 886 | break 887 | 888 | self.generate('goto endif_%d;' % label_id) 889 | 890 | self.tab_pop() 891 | self.generate('else_%d:' % label_id) 892 | self.tab_push() 893 | 894 | if self._accept('keyword', 'else'): 895 | while True: 896 | try: 897 | self._parse_statement() 898 | except ParserError: 899 | self._resync_at_token('symbol', ';') 900 | 901 | self._match('symbol', ';') 902 | 903 | if self._check('keyword', 'end'): 904 | break 905 | 906 | self._match('keyword', 'end') 907 | self._match('keyword', 'if') 908 | 909 | self.tab_pop() 910 | self.generate('endif_%d:' % label_id) 911 | 912 | return 913 | 914 | def _first_loop_statement(self): 915 | """first() (Protected) 916 | 917 | Determines if current token matches the first terminals. 918 | 919 | first() ::= 920 | 'for' 921 | 922 | Returns: 923 | True if current token matches a first terminal, False otherwise. 924 | """ 925 | return self._check('keyword', 'for') 926 | 927 | def _parse_loop_statement(self): 928 | """ (Protected) 929 | 930 | Parses the language structure. 931 | 932 | ::= 933 | 'for' '(' ';' ')' 934 | ( ';' )* 935 | 'end' 'for' 936 | """ 937 | self._match('keyword', 'for') 938 | self._match('symbol', '(') 939 | 940 | label_id = self.get_label_id() 941 | self.generate('loop_%d:' % label_id) 942 | self.tab_push() 943 | 944 | try: 945 | self._parse_assignment_statement() 946 | except ParserError: 947 | self._resync_at_token('symbol', ';') 948 | 949 | self._match('symbol', ';') 950 | 951 | self._parse_expression() 952 | self._match('symbol', ')') 953 | 954 | expr_reg = self.get_reg(inc=False) 955 | self.generate('if (!R[%d]) goto endloop_%d;' % (expr_reg, label_id)) 956 | 957 | while not self._accept('keyword', 'end'): 958 | try: 959 | self._parse_statement() 960 | except ParserError: 961 | self._resync_at_token('symbol', ';') 962 | 963 | self._match('symbol', ';') 964 | 965 | self._match('keyword', 'for') 966 | 967 | self.generate('goto loop_%d;' % label_id) 968 | self.tab_pop() 969 | self.generate('endloop_%d:' % label_id) 970 | 971 | return 972 | 973 | def _first_procedure_call(self): 974 | """first() (Protected) 975 | 976 | Determines if current token matches the first terminals. The second 977 | terminal is checked using the future token in this case to distinguish 978 | the first() from first(). 979 | 980 | first() ::= 981 | '(' 982 | 983 | Returns: 984 | True if current token matches a first terminal, False otherwise. 985 | """ 986 | return self._check('symbol', '(', check_future=True) 987 | 988 | def _parse_procedure_call(self): 989 | """ (Protected) 990 | 991 | Parses the language structure. 992 | 993 | ::= 994 | '(' [ ] ')' 995 | """ 996 | # Match an identifier, check to make sure the identifier is procedure 997 | id_name = self._current.value 998 | id_line = self._current.line 999 | 1000 | self._match('identifier') 1001 | 1002 | try: 1003 | id_obj = self._ids.find(id_name) 1004 | except ParserNameError as e: 1005 | self._name_error('procedure has not been declared', id_name, 1006 | id_line) 1007 | raise e 1008 | 1009 | if id_obj.type != 'procedure': 1010 | self._type_error('procedure', id_obj.type, id_line) 1011 | raise ParserTypeError() 1012 | 1013 | self._match('symbol', '(') 1014 | 1015 | out_names = [] 1016 | 1017 | if not self._check('symbol', ')'): 1018 | num_args, out_names = self._parse_argument_list( 1019 | id_obj.params, 1020 | out_names, 1021 | index=0) 1022 | 1023 | # Make sure that too few arguments are not used 1024 | if num_args < len(id_obj.params): 1025 | self._runtime_error( 1026 | 'procedure call accepts %d argument(s), %d given' % 1027 | (len(id_obj.params), num_args), id_line) 1028 | 1029 | raise ParserRuntimeError() 1030 | 1031 | self._match('symbol', ')') 1032 | 1033 | # Generate all procedure call code 1034 | self.generate_procedure_call(id_obj.name, id_obj.mm_ptr, self.debug) 1035 | 1036 | # Pop parameters off the stack 1037 | for index, param in enumerate(id_obj.params): 1038 | out_name = out_names[index] 1039 | 1040 | self.generate_param_pop(param.id.name, self.debug) 1041 | 1042 | # If this is an outbound parameter, we must write it to its 1043 | # memory location 1044 | if param.direction == 'out': 1045 | # Get the identifier object of the destination 1046 | out_id = self._ids.find(out_name) 1047 | 1048 | # Determine where on the stack this identifier exists 1049 | out_location = self._ids.get_id_location(out_name) 1050 | 1051 | # Store the parameter in the appropriate location 1052 | self.generate_param_store(out_id, out_location, self.debug) 1053 | 1054 | # Finish the procedure call 1055 | self.generate_procedure_call_end(self.debug) 1056 | 1057 | return 1058 | 1059 | def _parse_argument_list(self, params, out_names, index=0): 1060 | """ (Protected) 1061 | 1062 | Parses language structure. 1063 | 1064 | ::= 1065 | ',' | 1066 | 1067 | 1068 | Arguments: 1069 | params: A list of Parameter namedtuple objects allowed in the 1070 | procedure call. 1071 | out_names: A list of identifier names that are being used in this 1072 | procedure call and must be written back. 1073 | index: The index in params with which to match the found param. 1074 | (Default: 0) 1075 | 1076 | Returns: 1077 | A tuple (index, out_names) consisting of the number of arguments 1078 | encountered and a list of the identifiers used to write back. 1079 | """ 1080 | arg_line = self._current.line 1081 | arg_type = None 1082 | 1083 | # Make sure that too many arguments are not used 1084 | if index > len(params) - 1: 1085 | self._runtime_error('procedure call accepts only %d argument(s)' % 1086 | len(params), arg_line) 1087 | raise ParserRuntimeError() 1088 | 1089 | # Get the parameter information for this position in the arg list 1090 | param = params[index] 1091 | 1092 | if param.direction == 'out': 1093 | # We may only parse a single identifier if the direction is 'out' 1094 | arg_name = self._current.value 1095 | arg_type = self._parse_name() 1096 | 1097 | out_names.append(arg_name) 1098 | elif param.direction == 'in': 1099 | # This is a 'in' parameter with only one element (not array) 1100 | arg_type = self._parse_expression() 1101 | 1102 | out_names.append(None) 1103 | 1104 | # Get the last reg assignment in the expr. This is argument's register 1105 | expr_reg = self.get_reg(inc=False) 1106 | 1107 | if arg_type != param.id.type: 1108 | self._type_error(param.id.type, arg_type, arg_line) 1109 | 1110 | index += 1 1111 | 1112 | if self._accept('symbol', ','): 1113 | index, out_names = self._parse_argument_list( 1114 | params, 1115 | out_names, 1116 | index=index) 1117 | 1118 | # Push the parameters onto the stack in reverse order. The last param 1119 | # will reach this point first 1120 | self.generate_param_push(expr_reg, self.debug) 1121 | 1122 | return index, out_names 1123 | 1124 | def _parse_destination(self): 1125 | """ (Protected) 1126 | 1127 | Parses the language structure. 1128 | 1129 | ::= 1130 | [ '[' ']' ] 1131 | 1132 | Returns: 1133 | Type of the destination identifier as a string. 1134 | """ 1135 | id_name = self._current.value 1136 | id_line = self._current.line 1137 | 1138 | self._match('identifier') 1139 | 1140 | # Make sure that identifier is valid for the scope 1141 | try: 1142 | id_obj = self._ids.find(id_name) 1143 | except ParserNameError as e: 1144 | self._name_error('not declared in this scope', id_name, id_line) 1145 | raise e 1146 | 1147 | # Check type to make sure it's a variable 1148 | if not id_obj.type in ['integer', 'float', 'bool', 'string']: 1149 | self._type_error('variable', id_obj.type, id_line) 1150 | raise ParserTypeError() 1151 | 1152 | id_type = id_obj.type 1153 | 1154 | if self._accept('symbol', '['): 1155 | expr_line = self._current.line 1156 | expr_type = self._parse_expression() 1157 | 1158 | if expr_type != 'integer': 1159 | self._type_error('integer', expr_type, expr_line) 1160 | 1161 | self._accept('symbol', ']') 1162 | elif id_obj.size is not None: 1163 | self._runtime_error('%s: array requires index' % id_name, id_line) 1164 | 1165 | return id_type 1166 | 1167 | def _parse_expression(self): 1168 | """ (Protected) 1169 | 1170 | Parses language structure. 1171 | 1172 | ::= 1173 | '&' | 1174 | '|' | 1175 | [ 'not' ] 1176 | 1177 | Returns: 1178 | The type value of the expression. 1179 | """ 1180 | self.comment('Parsing expression', self.debug) 1181 | 1182 | negate = False 1183 | 1184 | if self._accept('keyword', 'not'): 1185 | negate = True 1186 | 1187 | line = self._current.line 1188 | id_type = self._parse_arith_op() 1189 | 1190 | if negate and id_type not in ['integer', 'bool']: 1191 | self._type_error('integer or bool', id_type, line) 1192 | raise ParserTypeError() 1193 | 1194 | while True: 1195 | operand1 = self.get_reg(inc=False) 1196 | 1197 | if self._accept('symbol', '&'): 1198 | operation = '&' 1199 | elif self._accept('symbol', '|'): 1200 | operation = '|' 1201 | else: 1202 | break 1203 | 1204 | if id_type not in ['integer', 'bool']: 1205 | self._type_error('integer or bool', id_type, line) 1206 | raise ParserTypeError() 1207 | 1208 | next_type = self._parse_arith_op() 1209 | 1210 | operand2 = self.get_reg(inc=False) 1211 | 1212 | if next_type not in ['integer', 'bool']: 1213 | self._type_error('integer or bool', next_type, line) 1214 | raise ParserTypeError() 1215 | 1216 | result = self.generate_operation(operand1, id_type, operand2, 1217 | next_type, operation) 1218 | 1219 | if negate: 1220 | self.generate('R[%d] = ~R[%d];' % (result, result)) 1221 | 1222 | return id_type 1223 | 1224 | def _parse_arith_op(self): 1225 | """ (Protected) 1226 | 1227 | Parses language structure. 1228 | 1229 | ::= 1230 | '+' | 1231 | '-' | 1232 | 1233 | 1234 | Returns: 1235 | The type value of the expression. 1236 | """ 1237 | line = self._current.line 1238 | id_type = self._parse_relation() 1239 | 1240 | while True: 1241 | operand1 = self.get_reg(inc=False) 1242 | 1243 | if self._accept('symbol', '+'): 1244 | operation = '+' 1245 | elif self._accept('symbol', '-'): 1246 | operation = '-' 1247 | else: 1248 | break 1249 | 1250 | if id_type not in ['integer', 'float']: 1251 | self._type_error('integer or float', id_type, line) 1252 | raise ParserTypeError() 1253 | 1254 | next_type = self._parse_relation() 1255 | 1256 | operand2 = self.get_reg(inc=False) 1257 | 1258 | if next_type not in ['integer', 'float']: 1259 | self._type_error('integer or float', next_type, line) 1260 | raise ParserTypeError() 1261 | 1262 | self.generate_operation(operand1, id_type, operand2, next_type, 1263 | operation) 1264 | 1265 | return id_type 1266 | 1267 | def _parse_relation(self): 1268 | """ (Protected) 1269 | 1270 | Parses language structure. 1271 | 1272 | ::= 1273 | '<' | 1274 | '>' | 1275 | '>=' | 1276 | '<=' | 1277 | '==' | 1278 | '!=' | 1279 | 1280 | 1281 | Returns: 1282 | The type value of the expression. 1283 | """ 1284 | line = self._current.line 1285 | id_type = self._parse_term() 1286 | 1287 | # Check for relational operators. Note that relational operators 1288 | # are only valid for integer or boolean tokens 1289 | while True: 1290 | operand1 = self.get_reg(inc=False) 1291 | 1292 | if self._accept('symbol', '<'): 1293 | operation = '<' 1294 | elif self._accept('symbol', '>'): 1295 | operation = '>' 1296 | elif self._accept('symbol', '<='): 1297 | operation = '<=' 1298 | elif self._accept('symbol', '>='): 1299 | operation = '>=' 1300 | elif self._accept('symbol', '=='): 1301 | operation = '==' 1302 | elif self._accept('symbol', '!='): 1303 | operation = '!=' 1304 | else: 1305 | break 1306 | 1307 | if id_type not in ['integer', 'bool']: 1308 | self._type_error('integer or bool', id_type, line) 1309 | raise ParserTypeError() 1310 | 1311 | next_type = self._parse_term() 1312 | 1313 | operand2 = self.get_reg(inc=False) 1314 | 1315 | if next_type not in ['integer', 'bool']: 1316 | self._type_error('integer or bool', next_type, line) 1317 | raise ParserTypeError() 1318 | 1319 | self.generate_operation(operand1, id_type, operand2, next_type, 1320 | operation) 1321 | 1322 | return id_type 1323 | 1324 | def _parse_term(self): 1325 | """ (Protected) 1326 | 1327 | Parses language structure. 1328 | 1329 | ::= 1330 | '*' | 1331 | '/' | 1332 | 1333 | 1334 | Returns: 1335 | The type value of the expression. 1336 | """ 1337 | line = self._current.line 1338 | id_type = self._parse_factor() 1339 | 1340 | # Check for multiplication or division operators. Note that these 1341 | # operators are only valid for integer or float values 1342 | while True: 1343 | operand1 = self.get_reg(inc=False) 1344 | 1345 | if self._accept('symbol', '*'): 1346 | operation = '*' 1347 | elif self._accept('symbol', '/'): 1348 | operation = '/' 1349 | else: 1350 | break 1351 | 1352 | if id_type not in ['integer', 'float']: 1353 | self._type_error('integer or float', id_type, line) 1354 | raise ParserTypeError() 1355 | 1356 | line = self._current.line 1357 | next_type = self._parse_factor() 1358 | 1359 | operand2 = self.get_reg(inc=False) 1360 | 1361 | if next_type not in ['integer', 'float']: 1362 | self._type_error('integer or float', next_type, line) 1363 | raise ParserTypeError() 1364 | 1365 | self.generate_operation(operand1, id_type, operand2, next_type, 1366 | operation) 1367 | 1368 | return id_type 1369 | 1370 | def _parse_factor(self): 1371 | """ (Protected) 1372 | 1373 | Parses language structure. 1374 | 1375 | ::= 1376 | '(' ')' | 1377 | [ '-' ] | 1378 | [ '-' ] | 1379 | | 1380 | 'true' | 1381 | 'false' 1382 | 1383 | Returns: 1384 | The type value of the expression. 1385 | """ 1386 | id_type = None 1387 | 1388 | if self._accept('symbol', '('): 1389 | id_type = self._parse_expression() 1390 | self._match('symbol', ')') 1391 | elif self._accept('string'): 1392 | id_type = 'string' 1393 | str_val = self._previous.value 1394 | 1395 | self.generate('R[%d] = (int)"%s";' % (self.get_reg(), str_val)) 1396 | elif self._accept('keyword', 'true'): 1397 | id_type = 'bool' 1398 | 1399 | self.generate('R[%d] = 1;' % (self.get_reg())) 1400 | elif self._accept('keyword', 'false'): 1401 | id_type = 'bool' 1402 | 1403 | self.generate('R[%d] = 0;' % (self.get_reg())) 1404 | elif self._accept('symbol', '-'): 1405 | if self._first_name(): 1406 | id_type = self._parse_name() 1407 | elif self._check('integer') or self._check('float'): 1408 | id_type = self._parse_number(negate=True) 1409 | else: 1410 | self._syntax_error('variable name, integer, or float') 1411 | elif self._first_name(): 1412 | id_type = self._parse_name() 1413 | elif self._check('integer') or self._check('float'): 1414 | id_type = self._parse_number(negate=False) 1415 | else: 1416 | self._syntax_error('factor') 1417 | 1418 | return id_type 1419 | 1420 | def _first_name(self): 1421 | """first() (Protected) 1422 | 1423 | Determines if current token matches the first terminals. 1424 | 1425 | first() ::= 1426 | 1427 | 1428 | Returns: 1429 | True if current token matches a first terminal, False otherwise. 1430 | """ 1431 | return self._check('identifier') 1432 | 1433 | def _parse_name(self): 1434 | """ (Protected) 1435 | 1436 | Parses language structure. 1437 | 1438 | ::= 1439 | [ '[' ']' ] 1440 | """ 1441 | id_name = self._current.value 1442 | id_line = self._current.line 1443 | 1444 | self._match('identifier') 1445 | 1446 | # Make sure that identifier is valid for the scope 1447 | try: 1448 | id_obj = self._ids.find(id_name) 1449 | id_type = id_obj.type 1450 | except ParserNameError as e: 1451 | self._name_error('not declared in this scope', id_name, id_line) 1452 | raise e 1453 | 1454 | # Check type to make sure it's a variable 1455 | if not id_type in ['integer', 'float', 'bool', 'string']: 1456 | self._type_error('variable', id_type, id_line) 1457 | raise ParserTypeError() 1458 | 1459 | if self._accept('symbol', '['): 1460 | index_type = self._parse_expression() 1461 | 1462 | if not index_type == 'integer': 1463 | self._type_error('integer', index_type, id_line) 1464 | raise ParserTypeError() 1465 | 1466 | self._match('symbol', ']') 1467 | elif id_obj.size is not None: 1468 | self._runtime_error('%s: array requires index' % id_name, id_line) 1469 | 1470 | # Get the last register allocated. The index will be here if it's used 1471 | index_reg = self.get_reg(inc=False) 1472 | 1473 | # Determine the location of the identifier in the stack 1474 | id_location = self._ids.get_id_location(id_name) 1475 | 1476 | # Verify the direction of the id if it is a param 1477 | if id_location == 'param': 1478 | direction = self._ids.get_param_direction(id_name) 1479 | if direction != 'in': 1480 | self._type_error('\'in\' param', 1481 | '\'%s\' param' % direction, id_line) 1482 | raise ParserTypeError() 1483 | 1484 | # Generate all code associated with retrieving this value 1485 | self.generate_name(id_obj, id_location, index_reg, self.debug) 1486 | 1487 | return id_type 1488 | 1489 | def _parse_number(self, negate=False, generate_code=True): 1490 | """Parse Number (Protected) 1491 | 1492 | Parses the language structure. 1493 | 1494 | ::= 1495 | [0-9][0-9_]*[.[0-9_]*] 1496 | 1497 | Arguments: 1498 | negate: Determines if the number should be negated or not. 1499 | generate_code: Determines if code should be generated for the 1500 | parsed number or not. 1501 | 1502 | Returns: 1503 | The type of the parsed number. 1504 | """ 1505 | number = self._current.value 1506 | id_type = self._current.type 1507 | 1508 | # Parse the number (either float or integer type) 1509 | if not self._accept('integer') and not self._accept('float'): 1510 | self._syntax_error('number') 1511 | 1512 | # Generate the code for this number if desired 1513 | if generate_code: 1514 | self.generate_number(number, id_type, negate) 1515 | 1516 | return id_type 1517 | --------------------------------------------------------------------------------