├── lib
├── __init__.py
├── errors.py
├── datatypes.py
├── scanner.py
├── codegenerator.py
└── parser.py
├── tests
├── simpleadd_good.src
├── scopetest_good.src
├── recursiontest_good.src
├── globaltest_good.src
├── looptest_good.src
├── bigtest_good.src
├── codegen_good.src
├── bigtest_bad.src
└── runtime_good.src
├── compiler.py
└── README.md
/lib/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/simpleadd_good.src:
--------------------------------------------------------------------------------
1 | program simpleadd is
2 |
3 | integer int1;
4 | integer int2;
5 | integer result;
6 |
7 | begin
8 |
9 | // Get the first input
10 | putString("Enter 1st Integer:");
11 | getInteger(int1);
12 |
13 | // Get the second input
14 | putString("Enter 2nd Integer:");
15 | getInteger(int2);
16 |
17 | // Calculate the sum
18 | result := int1 + int2;
19 |
20 | putString("Result:");
21 | putInteger(result);
22 |
23 | end program
24 |
--------------------------------------------------------------------------------
/tests/scopetest_good.src:
--------------------------------------------------------------------------------
1 | // SCOPE TEST PROGRAM
2 | program scope_test is
3 | integer add1;
4 | integer add2;
5 | integer sum;
6 |
7 | procedure add(integer add1 in, integer add2 in, integer sum out)
8 | begin
9 | sum := add1 + add2;
10 | return;
11 | end procedure;
12 |
13 | begin
14 |
15 | add1 := 1;
16 | add2 := 5;
17 |
18 | add(add1, add2, sum);
19 |
20 | if (sum == 6) then
21 | putString("SUCCESS");
22 | else
23 | putString("FAILURE");
24 | end if;
25 |
26 | return;
27 |
28 | end program
29 |
--------------------------------------------------------------------------------
/tests/recursiontest_good.src:
--------------------------------------------------------------------------------
1 | program recursiontest is
2 |
3 | integer start_val;
4 | global integer end_val;
5 |
6 | procedure count_to_ten(integer current_val in)
7 | begin
8 | if (current_val < 10) then
9 | count_to_ten(current_val + 1);
10 | else
11 | end_val := current_val;
12 | end if;
13 | end procedure;
14 |
15 | begin
16 |
17 | start_val := 1;
18 | end_val := 0;
19 |
20 | count_to_ten(start_val);
21 |
22 | if (end_val == 10) then
23 | putString("SUCCESS");
24 | else
25 | putString("FAILURE");
26 | end if;
27 |
28 | end program
29 |
--------------------------------------------------------------------------------
/tests/globaltest_good.src:
--------------------------------------------------------------------------------
1 | // SCOPE TEST PROGRAM
2 | program scope_test is
3 | global integer my_global_int;
4 | global integer result;
5 |
6 | global procedure increment_global()
7 | begin
8 | result := my_global_int + 1;
9 | end procedure;
10 |
11 | procedure calls_increment_global()
12 | begin
13 | // See if we can call a global procedure from this scope
14 | increment_global();
15 | end procedure;
16 |
17 | begin
18 | my_global_int := 9;
19 | result := 0;
20 |
21 | calls_increment_global();
22 |
23 | if (result == 10) then
24 | putString("SUCCESS");
25 | else
26 | putString("FAILURE");
27 | end if;
28 |
29 | return;
30 |
31 | end program
32 |
--------------------------------------------------------------------------------
/tests/looptest_good.src:
--------------------------------------------------------------------------------
1 | program looptest is
2 |
3 | integer result;
4 | global integer num_loops;
5 |
6 | procedure do_loops(integer result out)
7 | integer counter;
8 | begin
9 | counter := 0;
10 |
11 | putString("Number of Loops:");
12 | putInteger(num_loops);
13 |
14 | for (counter := counter + 1; counter <= num_loops)
15 | putString("Current Counter:");
16 | putInteger(counter);
17 | result := counter;
18 | end for;
19 | end procedure;
20 |
21 | begin
22 |
23 | result := 0;
24 | num_loops := 10;
25 |
26 | do_loops(result);
27 |
28 | putString("Expect 10");
29 |
30 | if (result == 10) then
31 | putString("SUCCESS");
32 | else
33 | putString("FAILURE");
34 | end if;
35 |
36 | end program
37 |
--------------------------------------------------------------------------------
/lib/errors.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 |
4 | class ParserError(Exception):
5 | """ParserError class
6 |
7 | The base error class for all other parsing errors. This should be caught
8 | at resync points.
9 | """
10 | pass
11 |
12 |
13 | class ParserSyntaxError(ParserError):
14 | """ParserSyntaxError class
15 |
16 | Thrown when a syntax error occurs in the parser.
17 | """
18 | pass
19 |
20 |
21 | class ParserNameError(ParserError):
22 | """ParserNameError class
23 |
24 | Thrown when a name error occurs in the parser.
25 | """
26 | pass
27 |
28 |
29 | class ParserTypeError(ParserError):
30 | """ParserTypeError class
31 |
32 | Thrown when a type error occurs in the parser.
33 | """
34 | pass
35 |
36 |
37 | class ParserRuntimeError(ParserError):
38 | """ParserRuntimeError class
39 |
40 | Thrown when a runtime error occurs in the parser.
41 | """
42 | pass
43 |
--------------------------------------------------------------------------------
/tests/bigtest_good.src:
--------------------------------------------------------------------------------
1 | //////////////////////////////////////////////////////////////////////////////
2 | // File: parser_test.src
3 | // Author: Evan Sneath
4 | //////////////////////////////////////////////////////////////////////////////
5 |
6 | // This is a test of the parsing component of the compiler.
7 |
8 | program test_program is
9 |
10 | integer i;
11 | global integer one_int;
12 | integer two_int;
13 | integer red_int;
14 | integer blue_int;
15 | integer mult_of_two;
16 | integer test_array[15];
17 |
18 | string test_string;
19 |
20 | bool isGreaterThan;
21 |
22 | procedure test_proc(integer my_int in)
23 | string two_param;
24 | integer assigned_int;
25 | begin
26 | two_param := "Hello, World 2";
27 | assigned_int := my_int;
28 |
29 | return;
30 | end procedure;
31 |
32 | begin
33 |
34 | test_string := "Hello, World 1";
35 |
36 | one_int := 1;
37 | two_int := 2;
38 | red_int := 3 + 4.;
39 | mult_of_two := 1;
40 |
41 | test_proc(red_int);
42 |
43 | // Test some expressions
44 | blue_int := (((((one_int * two_int[one_int]) + red_int))));
45 |
46 | isGreaterThan := false;
47 |
48 | // Testing 'if' statements
49 | if ((blue_int * 2) >= -1) then
50 | isGreaterThan := true;
51 | end if;
52 |
53 | // Testing 'for' statements
54 | for (i := 0; i <= blue_int)
55 | i := i + 1.;
56 | mult_of_two := i * 2;
57 | end for;
58 |
59 | return;
60 |
61 | end program
62 |
--------------------------------------------------------------------------------
/tests/codegen_good.src:
--------------------------------------------------------------------------------
1 | program codegen_test is
2 |
3 | integer count;
4 |
5 | integer add1;
6 | integer add2;
7 |
8 | float float1;
9 | float float2;
10 |
11 | float result1;
12 |
13 | string hello;
14 |
15 | integer sum[2];
16 | integer result;
17 |
18 | // Test a procedure declaration. Will be called later
19 | procedure increment (integer val in, integer result out)
20 | integer tmp1;
21 | begin
22 | tmp1 := 1;
23 | result := tmp1 + val;
24 | end procedure;
25 |
26 | begin
27 |
28 | result := 0;
29 | increment(4 + 7 + 3, result);
30 |
31 | if (4 + 7 + 3 + 1 == result) then
32 | putString("SUCCESS");
33 | else
34 | putString("FAILURE");
35 | end if;
36 |
37 | add1 := 1;
38 | add2 := 5;
39 |
40 | float1 := 1.1;
41 | float2 := 2.;
42 |
43 | result1 := float1 + float2;
44 | result1 := float1 * add1;
45 |
46 | sum[0] := 1 + 5;
47 | sum[1] := add1 + add2;
48 |
49 | if (sum[0] == sum[1]) then
50 | result := 1;
51 | else
52 | result := 0;
53 | end if;
54 |
55 | if (result == 1) then
56 | putString("SUCCESS");
57 | else
58 | putString("FAILURE");
59 | end if;
60 |
61 | // Set 'count' to 10 the long way
62 | count := 0;
63 | for (count := count; count < 10)
64 | count := count + 1;
65 | end for;
66 |
67 | if (count == 10) then
68 | putString("SUCCESS");
69 | else
70 | putString("FAILURE");
71 | end if;
72 | end program
73 |
--------------------------------------------------------------------------------
/tests/bigtest_bad.src:
--------------------------------------------------------------------------------
1 | //////////////////////////////////////////////////////////////////////////////
2 | // File: bigtest_bad.src
3 | // Author: Evan Sneath
4 | // Description: This is a test of the parsing component of the compiler.
5 | // Errors: 4 errors should be raised in this program.
6 | //////////////////////////////////////////////////////////////////////////////
7 |
8 | program test_program is
9 |
10 | integer i;
11 | global integer one_int;
12 | integer two_int;
13 | integer red_int;
14 | integer blue_int;
15 | integer mult_of_two[1];
16 | integer test_array[15];
17 |
18 | // ERROR 1: Multiple definitions of two_int
19 | integer two_int;
20 |
21 | string test_string;
22 |
23 | bool isGreaterThan;
24 |
25 | procedure test_proc(integer my_int in)
26 | string two_param;
27 | integer assigned_int;
28 | begin
29 | two_param := "Hello, World";
30 | assigned_int := my_int;
31 |
32 | return;
33 | end procedure;
34 |
35 | begin
36 |
37 | test_string := "Hello, World";
38 |
39 | // ERROR 2: Variable used for function call
40 | one_int();
41 |
42 | one_int := 1;
43 | two_int := 2;
44 | red_int := 3 + 4.;
45 | mult_of_two := 1;
46 |
47 | test_proc(red_int);
48 |
49 | // ERROR 3: Variable this_doesnt_exist never declared
50 | this_doesnt_exist := 42;
51 |
52 | // ERROR 4: Assigning to a procedure, not a variable
53 | test_proc := "This is not ok";
54 |
55 | // Test some expressions
56 | blue_int := (((((one_int * two_int[one_int]) + red_int))));
57 |
58 | isGreaterThan := false;
59 |
60 | // Testing 'if' statements
61 | if ((blue_int * 2) >= -1) then
62 | isGreaterThan := true;
63 | end if;
64 |
65 | // Testing 'for' statements
66 | for (i := 0; i <= blue_int)
67 | i := i + 1.;
68 | mult_of_two := i * 2;
69 | end for;
70 |
71 | return;
72 |
73 | end program
74 |
--------------------------------------------------------------------------------
/tests/runtime_good.src:
--------------------------------------------------------------------------------
1 | program codegen_test is
2 |
3 | integer testint;
4 |
5 | string teststring;
6 | string teststring2;
7 | string teststring3;
8 |
9 | bool testbool;
10 | float testfloat;
11 |
12 | procedure increment (integer val in, integer result out)
13 | begin
14 | result := val + 1;
15 | end procedure;
16 |
17 | begin
18 |
19 | ////////////////////////////////////////////
20 | // INTEGER TEST
21 |
22 | //testint := 0;
23 | //increment(41, testint);
24 |
25 | //putString("41 plus 1 is...");
26 | //putInteger(testint);
27 |
28 | //putString("Enter an integer");
29 | //getInteger(testint);
30 | //putString("You entered...");
31 | //putInteger(testint);
32 |
33 | ////////////////////////////////////////////
34 | // BOOL TEST
35 |
36 | //putString("Enter a boolean value 0 or 1");
37 | //getBool(testbool);
38 | //putString("You entered...");
39 | //putBool(testbool);
40 |
41 | ////////////////////////////////////////////
42 | // FLOAT TEST
43 |
44 | //testfloat := 4.5;
45 | //putFloat(testfloat);
46 | //putFloat(1414.1414);
47 |
48 | //putString("Enter a float");
49 | //getFloat(testfloat);
50 | //putString("You entered...");
51 | //putFloat(testfloat);
52 |
53 | ////////////////////////////////////////////
54 | // STRING TEST
55 |
56 | putString("Enter a string");
57 | getString(teststring);
58 | putString("You entered...");
59 | putString(teststring);
60 |
61 | putString("Enter a string");
62 | getString(teststring2);
63 | putString("You entered...");
64 | putString(teststring2);
65 |
66 | putString("Enter a string");
67 | getString(teststring3);
68 | putString("You entered...");
69 | putString(teststring3);
70 |
71 | putString("This is the first string");
72 | putString(teststring);
73 |
74 | putString("This is the second string");
75 | putString(teststring2);
76 |
77 | putString("This is the third string");
78 | putString(teststring3);
79 | end program
80 |
--------------------------------------------------------------------------------
/compiler.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """Compiler module
4 |
5 | Acts as the command line interface to the compiler components. When given a
6 | source file, the compilation process will be executed.
7 |
8 | Author: Evan Sneath
9 | License: Open Software License v3.0
10 |
11 | Functions:
12 | parse_arguments: Parses incoming command line arguments.
13 | run_compiler: Executes the complete compilation process.
14 | """
15 |
16 | # Import standard libraries
17 | import argparse
18 | import subprocess
19 | import sys
20 |
21 | # Import custom compiler libraries
22 | from lib.parser import Parser
23 |
24 |
25 | def parse_arguments():
26 | """Parse Arguments
27 |
28 | Parses all command line arguments for the compiler program.
29 |
30 | Returns:
31 | An object containing all expected command line arguments.
32 | """
33 | # Parse the command line arguments
34 | parser = argparse.ArgumentParser()
35 | parser.add_argument('-d', '--debug',
36 | help='print comments in generated code',
37 | action='store_true')
38 | parser.add_argument('source',
39 | help='source file to compile')
40 | parser.add_argument('-o', '--out',
41 | help='target path for the compiled code',
42 | action='store',
43 | default='a.out')
44 | args = parser.parse_args()
45 |
46 | return args
47 |
48 |
49 | def run_compiler(source, target, debug=False):
50 | """Run Compiler
51 |
52 | Executes the compilation process given a source file path.
53 |
54 | Arguments:
55 | source: The source file to compile.
56 | target: The destination binary executable file.
57 | debug: If True, verbose parsing details are shown. (Default: False)
58 |
59 | Returns:
60 | True on success, False otherwise.
61 | """
62 | # Define a temporary location for the intermediate C code
63 | TMP_CODE_FILE = './ir.c'
64 |
65 | # Create a Parser object to parse the inputted source file
66 | parser = Parser(debug)
67 |
68 | # Parse the source file to the temporary code file
69 | if not parser.parse(source, TMP_CODE_FILE):
70 | print('Error while parsing "%s"' % source)
71 | return False
72 |
73 | # Set up gcc compilation command
74 | gcc_cmd = ['gcc', '-m32', '-o', target, TMP_CODE_FILE]
75 |
76 | # Compile the temporary file with gcc. Output to the target location
77 | if subprocess.call(gcc_cmd) != 0:
78 | print('Error while compiling "%s"' % target)
79 | return False
80 |
81 | return True
82 |
83 |
84 | if __name__ == '__main__':
85 | # Parse compiler arguments
86 | args = parse_arguments()
87 |
88 | # Run compilation process
89 | result = run_compiler(args.source, args.out, debug=args.debug)
90 |
91 | # Terminate program
92 | sys.exit(not result)
93 |
--------------------------------------------------------------------------------
/lib/datatypes.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """Types module
4 |
5 | Provides data structures necessary for identifier tracking and handling in the
6 | compilation process as well as tokenizing.
7 |
8 | Author: Evan Sneath
9 | License: Open Software License v3.0
10 |
11 | Classes:
12 | Token: A named tuple object containing token information.
13 | Identifier: A named tuple object containing identifier information.
14 | Parameter: A named tuple object containing procedure param information.
15 | IdentifierTable: Extends the list type to provide ID table functionality.
16 | """
17 |
18 | from lib.errors import ParserNameError
19 | from collections import namedtuple
20 |
21 |
22 | """Token class
23 |
24 | A named tuple object factory containing token information.
25 |
26 | Attributes:
27 | type: The data type of the token to be stored.
28 | value: The value of the token being stored.
29 | line: The line number on which the token was encountered.
30 | """
31 | Token = namedtuple('Token', ['type', 'value', 'line'])
32 |
33 |
34 | """Identifier class
35 |
36 | A named tuple object factory containing identifier information.
37 |
38 | Attributes:
39 | name: The identifier name. This acts as the dictionary key.
40 | type: The data type of the identifier.
41 | size: The number of elements of the identifier if a variable.
42 | If procedure, program, or non-array type, None is expected.
43 | params: A list of Parameter class objects describing procedure params.
44 | mm_ptr: A pointer to the location of the identifier in main memory.
45 | """
46 | Identifier = namedtuple('Identifier',
47 | ['name', 'type', 'size', 'params', 'mm_ptr'])
48 |
49 |
50 | """Parameter class
51 |
52 | A named tuple object factory containing procedure parameter information.
53 |
54 | Attributes:
55 | id: The Identifier named tuple of the parameter.
56 | direction: The direction ('in' or 'out') of the parameter.
57 | """
58 | Parameter = namedtuple('Parameter', ['id', 'direction'])
59 |
60 |
61 | class IdentifierTable(list):
62 | """IdentifierTable class
63 |
64 | Extends the List built-in type with all methods necessary for identifier
65 | table management during compilation.
66 |
67 | Methods:
68 | push_scope: Adds a new scope.
69 | pop_scope: Removes the highest scope.
70 | add: Adds a new identifier to the current or global scope.
71 | find: Determines if an identifier is in the current of global scope.
72 | get_id_location: Determines where the identifier exists in the scope.
73 | is_global: Determines if an identifier exists in the global scope.
74 | is_param: Determines if an identifier is a parameter of the scope.
75 | get_param_direction: Gets the direction of the parameter in the scope.
76 | get_current_scope_owner: Gets the program or procedure name from which
77 | the current scope was created.
78 | """
79 | def __init__(self):
80 | super().__init__()
81 |
82 | # Create the global scope
83 | self.append({})
84 |
85 | # Create a list of scope parent names (the owner of the scope)
86 | self._owner_ids = ['global']
87 |
88 | return
89 |
90 | def push_scope(self, owner_id):
91 | """Push New Identifier Scope
92 |
93 | Creates a new scope on the identifiers table and increases the global
94 | current scope counter.
95 |
96 | Arguments:
97 | owner_id: The name of the identifier which has created this scope.
98 | """
99 | # Create a brand new scope for the identifiers table
100 | self.append({})
101 |
102 | # Save the owner of this scope for future lookup
103 | self._owner_ids.append(owner_id)
104 |
105 | return
106 |
107 | def pop_scope(self):
108 | """Pop Highest Identifier Scope
109 |
110 | Disposes of the current scope in the identifiers table and decrements
111 | the global current scope counter.
112 | """
113 | # Remove this entire scope from the identifiers table
114 | self.pop(-1)
115 |
116 | # Remove the identifier from the owner list
117 | self._owner_ids.pop()
118 |
119 | return
120 |
121 | def add(self, identifier, is_global=False):
122 | """Add Identifier to Scope
123 |
124 | Adds a new identifier to either the current scope of global.
125 |
126 | Arguments:
127 | identifier: An Identifier named tuple object describing the new
128 | identifier to add to the table.
129 | is_global: Determines whether the identifier should be added to
130 | the current scope or the global scope. (Default: False)
131 |
132 | Raises:
133 | ParserNameError if the identifier has been declared at this scope.
134 | """
135 | scope = -1 if not is_global else 0
136 |
137 | if is_global and len(self) > 2:
138 | raise ParserNameError('global name must be defined in program scope')
139 |
140 | if is_global and (identifier.name in self[0] or (len(self) > 1 and
141 | identifier.name in self[1])):
142 | raise ParserNameError('name already declared at this scope')
143 |
144 | if not is_global and identifier.name in self[-1]:
145 | raise ParserNameError('name already declared at this scope')
146 |
147 | self[scope][identifier.name] = identifier
148 |
149 | return
150 |
151 | def find(self, name):
152 | """Find Identifier in Scope
153 |
154 | Searches for the given identifier in the current and global scope.
155 |
156 | Arguments:
157 | name: The identifier name for which to search.
158 |
159 | Returns:
160 | An Identifier named tuple containing identifier name, type and size
161 | information if found in the current or global scopes.
162 |
163 | Raises:
164 | ParserNameError if the given identifier is not found in any valid scope.
165 | """
166 | if name in self[-1]:
167 | identifier = self[-1][name]
168 | elif name in self[0]:
169 | identifier = self[0][name]
170 | else:
171 | raise ParserNameError()
172 |
173 | return identifier
174 |
175 | def get_id_location(self, name):
176 | """Get Identifier Location
177 |
178 | Determines the location of the identifier in the stack based on the
179 | identifier's place in the id table.
180 |
181 | Arguments:
182 | name: The identifier name for which to search.
183 |
184 | Returns:
185 | A string value for the location of the identifier in the stack.
186 | This may be 'global', 'param', or 'local'.
187 | """
188 | if self.is_global(name):
189 | return 'global'
190 | elif self.is_param(name):
191 | return 'param'
192 |
193 | return 'local'
194 |
195 | def is_global(self, name):
196 | """Identifier is Global
197 |
198 | Determines if an identifier exists in the global scope.
199 |
200 | Arguments:
201 | name: The identifier name for which to search.
202 |
203 | Returns:
204 | True if the identifier exists in the global scope. False otherwise.
205 | """
206 | return name in self[0]
207 |
208 | def is_param(self, name):
209 | """Identifier is Parameter
210 |
211 | Determines if an identifier is a parameter in the current scope.
212 |
213 | Arguments:
214 | name: The identifier name for which to search.
215 |
216 | Returns:
217 | True if the identifier is a scope parameter. False otherwise.
218 | """
219 | owner = self.get_current_scope_owner()
220 |
221 | if owner == 'global' or not owner.params:
222 | return False
223 |
224 | for param in owner.params:
225 | if name == param.id.name:
226 | return True
227 |
228 | return False
229 |
230 | def get_param_direction(self, name):
231 | """Get Parameter Direction
232 |
233 | If the name given is a valid parameter of the scope, the direction
234 | ('in' or 'out') will be returned.
235 |
236 | Arguments:
237 | name: The identifier name for which to search.
238 |
239 | Returns:
240 | 'in' or 'out' depending on the parameter direction. None if the
241 | name given is not a valid parameter of the current scope.
242 | """
243 | owner = self.get_current_scope_owner()
244 |
245 | if owner == 'global':
246 | return None
247 |
248 | for param in owner.params:
249 | if name == param.id.name:
250 | return param.direction
251 |
252 | return None
253 |
254 | def get_current_scope_owner(self):
255 | """Get Current Scope Owner
256 |
257 | Returns the Identifier object of the owner of the current scope. This
258 | owner will either be a 'program' or 'procedure' type.
259 |
260 | Returns:
261 | The Identifier object of the owner of the current scope. None if
262 | the current scope is the global scope.
263 | """
264 | owner = self._owner_ids[-1]
265 |
266 | # If this is the global scope, return no owner
267 | return self[-1][self._owner_ids[-1]] if owner != 'global' else None
268 |
--------------------------------------------------------------------------------
/lib/scanner.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """Scanner module
4 |
5 | With any attached file, the Scanner class will scan the file token-by-token
6 | until an end-of-file is encountered.
7 |
8 | Author: Evan Sneath
9 | License: Open Software License v3.0
10 |
11 | Classes:
12 | Scanner: An implementation of a scanner for the source language.
13 | """
14 |
15 | from os.path import isfile
16 |
17 | from lib.datatypes import Token
18 |
19 |
20 | class Scanner:
21 | """Scanner class
22 |
23 | This class implements a scanner object to scan a source code file in the
24 | compilation process. This class is designed to be inherited to be used
25 | during the parsing stage of the compiler.
26 |
27 | Attributes:
28 | keywords: A list of valid keywords in the language.
29 | symbols: A list of valid symbols in the language.
30 |
31 | Methods:
32 | attach_source: Binds a source file to the scanner to begin scanning.
33 | next_token: Returns the next token of the attached file. This token
34 | will be of the Token named tuple class.
35 | """
36 | # Define all language keywords
37 | keywords = [
38 | 'string', 'integer', 'bool', 'float', 'global', 'is', 'in', 'out',
39 | 'if', 'then', 'else', 'for', 'and', 'or', 'not', 'program',
40 | 'procedure', 'begin', 'return', 'end', 'true', 'false',
41 | ]
42 |
43 | # Define all language symbols
44 | symbols = [
45 | ':', ';', ',', '+', '-', '*', '/', '(', ')', '<', '<=', '>', '>=',
46 | '!', '!=', '=', '==', ':=', '[', ']', '&', '|',
47 | ]
48 |
49 | def __init__(self):
50 | super().__init__()
51 |
52 | # Holds the file path of the attached source file
53 | self._src_path = ''
54 |
55 | # Holds all source file data (code) to be scanned
56 | self._src = ''
57 |
58 | # Holds the location of the next character to scan in the source file
59 | self._line_pos = 0
60 | self._char_pos = 0
61 |
62 | return
63 |
64 | def attach_source(self, src_path):
65 | """Attach Source
66 |
67 | Attach a source file to the scanner and prepare for token collection.
68 |
69 | Arguments:
70 | src_path: The path to the source file to scan.
71 |
72 | Returns:
73 | True on success, False otherwise.
74 | """
75 | # Make sure the inputted file is a actual file
76 | if not isfile(src_path):
77 | print('Error: "%s"' % src_path)
78 | print(' Inputted path is not a file')
79 | return False
80 |
81 | # Try to read all data from the file and split by line
82 | try:
83 | with open(src_path) as f:
84 | keepends = True
85 | self._src = f.read().splitlines(keepends)
86 | except IOError:
87 | print('Error: "%s"' % src_path)
88 | print(' Could not read inputted file')
89 | return False
90 |
91 | # The file was attached and read successfully, store the path
92 | self._src_path = src_path
93 |
94 | return True
95 |
96 | def next_token(self):
97 | """Scan For Next Token
98 |
99 | Scans the source code for the next token. The next token is then
100 | returned for parsing.
101 |
102 | Returns:
103 | The next token object in the source code.
104 | """
105 | # Get the first character, narrow down the data type possibilities
106 | char = self._next_word()
107 |
108 | if char is None:
109 | return Token('eof', None, self._line_pos)
110 |
111 | # Use the first character to choose the token type to expect
112 | if char == '"':
113 | value, token_type = self._expect_string()
114 | elif char.isdigit():
115 | value, token_type = self._expect_number(char)
116 | elif char.isalpha():
117 | value, token_type = self._expect_identifier(char)
118 | elif char in self.symbols:
119 | value, token_type = self._expect_symbol(char)
120 | else:
121 | # We've run across a character that shouldn't be here
122 | msg = 'Invalid character \'%s\' encountered' % char
123 | self._scan_warning(msg, hl=self._char_pos-1)
124 |
125 | # Run this function again until we find something good
126 | return self.next_token()
127 |
128 | if token_type == 'comment':
129 | # If we find a comment, get a token on the next line
130 | self._next_line()
131 | return self.next_token()
132 |
133 | # Build the new token object
134 | new_token = Token(token_type, value, self._line_pos+1)
135 |
136 | return new_token
137 |
138 | def _get_line(self, line_number):
139 | """Get Line (Protected)
140 |
141 | Returns a line stripped of leading and trailing whitespace given a
142 | line number.
143 |
144 | Arguments:
145 | line_number: The line number of the attached source file to print.
146 |
147 | Returns:
148 | The requested line number from the source, None on invalid line.
149 | """
150 | if 0 < line_number <= len(self._src):
151 | return self._src[line_number-1].strip()
152 |
153 | def _scan_warning(self, msg, hl=-1):
154 | """Print Scanner Warning Message (Protected)
155 |
156 | Prints a formatted warning message.
157 |
158 | Arguments:
159 | msg: The warning message to display
160 | hl: If not -1, there will be an pointer (^) under a
161 | character in the line to be highlighted. (Default: -1)
162 | """
163 | line = self._src[self._line_pos][0:-1]
164 |
165 | print('Warning: "', self._src_path, '", ', sep='', end='')
166 | print('line ', self._line_pos+1, sep='')
167 | print(' ', msg, '\n ', line.strip(), sep='')
168 |
169 | if hl != -1:
170 | left_spaces = line.find(line.strip()[0])
171 | print(' %s^' % (' '*(abs(hl)-left_spaces)))
172 |
173 | return
174 |
175 | def _next_word(self):
176 | """Get Next Word Character (Protected)
177 |
178 | Move the cursor to the start of the next non-space character in the
179 | file.
180 |
181 | Returns:
182 | The first non-space character encountered. None if the end of
183 | file was reached.
184 | """
185 | char = ''
186 |
187 | while True:
188 | char = self._src[self._line_pos][self._char_pos]
189 |
190 | # React according to spaces and newlines
191 | if char == '\n':
192 | if not self._next_line():
193 | return None
194 | elif char in ' \t':
195 | self._char_pos += 1
196 | else:
197 | break
198 |
199 | # Increment to the next character
200 | self._char_pos += 1
201 | return char
202 |
203 | def _next_line(self):
204 | """Travel to Next Line (Protected)
205 |
206 | Move the cursor to the start of the next line safely.
207 |
208 | Returns:
209 | True on success, False if end of file is encountered
210 | """
211 | self._line_pos += 1
212 | self._char_pos = 0
213 |
214 | # Check to make sure this isn't the end of file
215 | if self._line_pos == len(self._src):
216 | return False
217 |
218 | return True
219 |
220 | def _next_char(self, peek=False):
221 | """Get Next Character (Protected)
222 |
223 | Move the cursor to the next character in the file.
224 |
225 | Arguments:
226 | peek: If True, the character position pointer will not be
227 | incremented. Set by default to False.
228 |
229 | Returns:
230 | The next character encountered. None if the end of line
231 | was reached.
232 | """
233 | # Get the next pointed character
234 | char = self._src[self._line_pos][self._char_pos]
235 |
236 | # Return None if we hit a line ending
237 | if char == '\n':
238 | return None
239 |
240 | # Increment to the next character
241 | if not peek:
242 | self._char_pos += 1
243 |
244 | return char
245 |
246 | def _expect_string(self):
247 | """Expect String Token (Protected)
248 |
249 | Parses the following characters in hope of a valid string. If an
250 | invalid string is encountered, all attempts are made to make it valid.
251 |
252 | Returns:
253 | (value, token_type) - A tuple describing the final parsed token.
254 | The resulting token type will be 'string'.
255 | """
256 | hanging_quote = False
257 |
258 | # We know this is a string. Find the next quotation and return it
259 | string_end = self._src[self._line_pos].find('"', self._char_pos)
260 |
261 | # If we have a hanging quotation, assume quote ends at end of line
262 | if string_end == -1:
263 | hanging_quote = True
264 | string_end = len(self._src[self._line_pos]) - 1
265 | self._scan_warning('No closing quotation in string', hl=string_end)
266 |
267 | value = self._src[self._line_pos][self._char_pos:string_end]
268 |
269 | # Check for illegal characters, send a warning if encountered
270 | for i, char in enumerate(value):
271 | if not char.isalnum() and char not in ' _,;:.\'':
272 | value = value.replace(char, ' ', 1)
273 | msg = 'Invalid character \'%s\' in string' % char
274 | self._scan_warning(msg, hl=self._char_pos+i)
275 |
276 | self._char_pos += len(value)
277 | if not hanging_quote:
278 | self._char_pos += 1
279 |
280 | return value, 'string'
281 |
282 | def _expect_number(self, char):
283 | """Expect Number Token (Protected)
284 |
285 | Parses the following characters in hope of a valid integer or float.
286 |
287 | Arguments:
288 | char: The first character already picked for the value.
289 |
290 | Returns:
291 | (value, token_type) - A tuple describing the final parsed token.
292 | The resulting token type will either be 'int' indicating a valid
293 | integer or 'float' indicating a valid floating point value.
294 | """
295 | value = '' + char
296 | token_type = 'integer'
297 |
298 | is_float = False
299 |
300 | while True:
301 | char = self._next_char(peek=True)
302 |
303 | if char is None:
304 | break
305 | elif char == '.' and not is_float:
306 | # We found a decimal point. Move to float mode
307 | is_float = True
308 | token_type = 'float'
309 | elif not char.isdigit() and char != '_':
310 | break
311 |
312 | value += char
313 | self._char_pos += 1
314 |
315 | # Remove all underscores in the int/float. These serve no purpose
316 | value = value.replace('_', '')
317 |
318 | # If nothing was given after the decimal point assume 0
319 | if is_float and value.split('.')[-1] == '':
320 | value += '0'
321 |
322 | return value, token_type
323 |
324 | def _expect_identifier(self, char):
325 | """Expect Identifier Token (Protected)
326 |
327 | Parses the following characters in hope of a valid identifier.
328 |
329 | Arguments:
330 | char: The first character already picked for the value.
331 |
332 | Returns:
333 | (value, token_type) - A tuple describing the final parsed token.
334 | The resulting token type will either be 'identifier' indicating a
335 | valid identifier or 'keyword' indicating a valid keyword.
336 | """
337 | value = '' + char
338 | token_type = 'identifier'
339 |
340 | while True:
341 | char = self._next_char(peek=True)
342 |
343 | if char is None:
344 | break
345 | elif not char.isalnum() and char != '_':
346 | break
347 |
348 | value += char
349 | self._char_pos += 1
350 |
351 | if value in self.keywords:
352 | token_type = 'keyword'
353 |
354 | return value, token_type
355 |
356 | def _expect_symbol(self, char):
357 | """Expect Symbol Token (Protected)
358 |
359 | Parses the following characters in hope of a valid symbol.
360 |
361 | Arguments:
362 | char: The first character already picked for the value.
363 |
364 | Returns:
365 | (value, token_type) - A tuple describing the final parsed token.
366 | The resulting token type will either be 'symbol' indicating a
367 | valid identifier or 'comment' indicating a comment until line end.
368 | """
369 | value = '' + char
370 |
371 | while True:
372 | char = self._next_char(peek=True)
373 |
374 | if char is None:
375 | break
376 | elif value + str(char) == '//':
377 | return None, 'comment'
378 | elif value + str(char) not in self.symbols:
379 | break
380 |
381 | value += char
382 | self._char_pos += 1
383 |
384 | return value, 'symbol'
385 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Compiler
2 | ========
3 |
4 | ## Description
5 | A single-pass, recursive decent `LL(1)` compiler written by hand for a made-up.
6 | language. This compiler is written entirely in Python 3 and uses the `gcc`
7 | compiler to finish compilation of the generated intermediate C representation.
8 |
9 | ## Author
10 | Created by [Evan Sneath](http://github.com/evansneath).
11 |
12 | ## License
13 | This software licensed under the
14 | [Open Software License v3.0](http://www.opensource.org/licenses/OSL-3.0).
15 |
16 | ## Dependencies
17 | In order to run, this software requires the following dependencies:
18 |
19 | * [Python 3](http://python.org/download/releases/3.3.2/)
20 |
21 | ## Progress
22 |
23 |
24 | | Component | Status |
25 | | Scanning | Completed |
26 | | Parsing | Completed |
27 | | Type Checking | Completed |
28 | | Code Generation | Completed |
29 | | Runtime | Completed |
30 |
31 |
32 | ## Usage
33 | ```
34 | usage: compiler.py [-h] [-d] [-o OUT] source
35 |
36 | positional arguments:
37 | source source file to compile
38 |
39 | optional arguments:
40 | -h, --help show this help message and exit
41 | -d, --debug print comments in generated code
42 | -o OUT, --out OUT target path for the compiled code
43 | ```
44 |
45 | The compiler will scan the source file for all valid tokens and
46 | parse the language grammar. All scanner, parser, and type errors will be
47 | outputted as they are encountered. Generated code is then outputted to `ir.c`
48 | where it is then run through the `gcc` compiler. The default output file
49 | generated by the compiler is `a.out` in the working directory. The `-o`
50 | argument may be used to modify the output file name.
51 |
52 | The `tests/` directory contains test source files which have several examples
53 | of token scanning with error/warning handling, grammar parsing, code
54 | generation, and runtime libraries.
55 |
56 | ## Implementation Details
57 |
58 | ### Software
59 |
60 | In determining the implementation language, robustness was chosen over speed as
61 | the deciding factor for the compiler. Python 3 was selected because ease of
62 | use, access to simple dictionary and table libraries, and my own familiarity
63 | with the language.
64 |
65 | As I progressed through the parser stage of the compiler, it became clear that
66 | the simple exception raising and handling would be useful for displaying
67 | compiler errors and trapping at resync points to continue syntax parsing.
68 |
69 | ### Structure
70 |
71 | For the sake of modularity and ease of debugging, the program is structured in
72 | a hierarchical fashion.
73 |
74 | `compiler.py` acts as the engine responsible for parsing of
75 | command-line arguments, calling the code parser, and completing the build using
76 | the `gcc` compiler with the appropriate arguments once the intermediate C code
77 | is generated.
78 |
79 | `parser.py` and the `Parser` class is the entry-point for the action of
80 | compiling the valid input file. In order to do this, `Parser` inherits the
81 | `Scanner` class (in `scanner.py`) and the `CodeGenerator` class
82 | (in `codegenerator.py`) to allow for simple access to their class methods and
83 | objects. The `datatypes.py` and `errors.py` source files containing several
84 | data types and exception classes respectively which are used in the various
85 | components of the compiler.
86 |
87 | ### Scanning
88 |
89 | The implementation of the language scanner first tackles the problem of source
90 | code parsing by splitting the source code into a list of distinct lines. Not
91 | only does this allow for easier ways to determine end of line and end of file,
92 | but also makes the operation of retrieving line numbers simple for purposes of
93 | warning and error messages.
94 |
95 | At the start of each non-whitespace character, the first character is used to
96 | determine the type of the token to expect. The token is returned if the type is
97 | matched without issue. Otherwise, a scanner warning is thrown.
98 |
99 | The scanner warnings are never fatal, though syntactically the tokens returned
100 | may cause a parser error. My methodology behind the scanner was to try to
101 | correct as many lexical errors as possible. For instance, if a string literal
102 | has no end quote a warning will be thrown and a quote will be assumed at the
103 | end of the line.
104 |
105 | ### Parsing
106 |
107 | In order to eliminate loops caused by recursive grammar, any left-recursion in
108 | the language grammar was rewritten.
109 |
110 | Type-checking is performed in expressions by returning the types from the
111 | expression tree functions and evaluating types for compatibility if an
112 | operation is performed. There are many other locations were type-checking is
113 | performed in the compiler other than expressions.
114 |
115 | Parser resync points are used throughout the compiler to continue parsing if
116 | an error is encountered without propagating spurious error messages. Exception
117 | handling in Python is used to elegantly handle resyncing. Once a parser error
118 | is encountered in a statement or declaration, an exception is raised. This
119 | exception is then handled at the starting point of statement or declaration
120 | parsing and the parsing will continue to the next statement or declaration.
121 |
122 | Note that once a fatal error or any kind is encountered, code will no longer
123 | be generated.
124 |
125 | ### Code Generation
126 |
127 | Memory and registers for the operation of the program are defined and used as
128 | 32-bit integer arrays. This allows for simple addressing of memory and register
129 | space. All non-integer types present in the program are cast as integers for
130 | storage in the memory spaces. In the case of string storage, memory spaces hold
131 | a 32-bit pointer to the start of the string in either the heap (this will be
132 | covered later) or a literal value. To ensure that pointers are 32-bit and may
133 | be cast to integer without issue, the `gcc` compiler flag `-m32` is used.
134 |
135 | A fixed number of available register locations are allocated for use. These are
136 | used incrementally and are not reused or reallocated. For this reason, a large
137 | number of registers are required so that register space is always available.
138 | Future improvements could be made to "push back" register allocation to the
139 | first register (`R[0]`) at the end of each scope. At the end of a scope, it can
140 | be assumed that the same register will not be referenced again.
141 |
142 | The main memory structure of the program is divided into the stack and heap.
143 | The stack begins are the high memory address and is maintained using both a
144 | stack and frame pointer. The frame pointer (pointing to the scope's return
145 | address) provides a way to easily smash local stack variables when leaving the
146 | scope. All global variables may only be declared in the program scope and are
147 | referenced using the offset from the top of main memory.
148 |
149 | The heap in main memory is used only to allocate space for strings during
150 | runtime. This is accomplished using a heap pointer pointing to the next unused
151 | memory location in the heap. As the `getString()` procedure is called, the
152 | string retrieved from `stdin` is moved to the heap and the variable
153 | referencing that string is modified to point to the newly allocated heap
154 | location.
155 |
156 | Memory is arranged in the following manner:
157 |
158 | ```
159 | MAIN MEMORY
160 | -- .-------------------.
161 | P | RETURN ADDR | <== MM_END (MM_SIZE - 1)
162 | R | ----------------- |
163 | O | LOCAL/GLOBAL VARS |
164 | G | . |
165 | R | . |
166 | A | . |
167 | M | . |
168 | -- | ----------------- |
169 | P | PARAMS |
170 | R | . |
171 | O | . |
172 | C | . |
173 | E | ----------------- |
174 | D | CALLER FP |
175 | U | ----------------- |
176 | R | RETURN ADDR .. | <== FP
177 | E | ----------------- |
178 | | LOCAL VARS |
179 | | . |
180 | | . |
181 | | . | <== SP
182 | `---v--v--v--v--v---`
183 | .
184 | .
185 | .
186 | .---^--^--^--^--^---.
187 | | . |
188 | | . |
189 | | . |
190 | | HEAP | <== MM_START (0)
191 | `-------------------`
192 | ```
193 |
194 | When entering a scope, the caller pushes all params onto the stack in reverse
195 | order. This allows for easy addressing by their indexes. The caller then stores
196 | its current FP onto the stack and the return address. At this point the called
197 | scope is responsible for maintaining the stack and adding its local variables.
198 |
199 | When leaving a scope, the SP is moved to the FP location and the return address
200 | is called. The caller scope then is responsible for restoring the caller FP and
201 | ensuring that all outbound params are written back to their appropriate
202 | locations.
203 |
204 | All procedure calls are made using C labels and the `goto` statement. This
205 | ensures that the program code remains in the `main` function and no outside
206 | function calls are required. The technique of using
207 | [labels as values](http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html)
208 | was used to store the location of the return labels on the stack.
209 |
210 | Loop and conditional statements also make use of the `goto` statement to
211 | determine program flow. After the conditional expression is resolved to a
212 | boolean form, the register used for the expression is tested. If the expression
213 | resolved to `false`, then the code portion is skipped.
214 |
215 | For example:
216 |
217 | ```
218 | R[0] = ;
219 | if (!R[0]) goto else_label;
220 |
221 | goto end_if_label;
222 | else_label:
223 |
224 | end_if_label:
225 | ```
226 |
227 | ### Runtime Environment
228 | Initially, I had created a separate C library to implement the runtime
229 | functions necessary. I determined that these functions were simple enough to be
230 | handwritten directly inline with the generated code as I progressed though
231 | development. The runtime functions use the same principles of stack memory
232 | referencing as other procedures and are populated in the identifiers table
233 | manually at the start of parsing.
234 |
235 | ## Language Specifications
236 |
237 | ### Syntax
238 | ```
239 | ::=
240 |
241 |
242 | ::=
243 | 'program' 'is'
244 |
245 | ::=
246 | ( ';' )*
247 | 'begin'
248 | ( ';' )*
249 | 'end' 'program'
250 |
251 | ::=
252 | [ 'global' ]
253 | [ 'global' ]
254 |
255 | ::=
256 | [ '[' ']' ]
257 |
258 | ::=
259 | 'integer' |
260 | 'float' |
261 | 'bool' |
262 | 'string'
263 |
264 | ::=
265 |
266 |
267 | ::=
268 | 'procedure' '(' [ ] ')'
269 |
270 | ::=
271 | ( ';' )*
272 | 'begin'
273 | ( ::=
277 | ',' |
278 |
279 |
280 | ::=
281 | ( 'in' | 'out' )
282 |
283 | ::=
284 | |
285 | |
286 | |
287 | |
288 |
289 |
290 | ::=
291 | ':='
292 |
293 | ::=
294 | 'if' '(' ')' 'then' ( ';' )+
295 | [ 'else' ( ';' )+ ]
296 | 'end' 'if'
297 |
298 | ::=
299 | 'for' '(' ';' ')'
300 | ( ';' )*
301 | 'end' 'for'
302 |
303 | ::=
304 | '(' [ ] ')'
305 |
306 | ::=
307 | ',' |
308 |
309 |
310 | ::=
311 | [ '[' ']' ]
312 |
313 | ::=
314 | '&' |
315 | '|' |
316 | [ 'not' ]
317 |
318 | ::=
319 | '+' |
320 | '-' |
321 |
322 |
323 | ::=
324 | '<' |
325 | '>' |
326 | '>=' |
327 | '<=' |
328 | '==' |
329 | '!=' |
330 |
331 |
332 | ::=
333 | '*' |
334 | '/' |
335 |
336 |
337 | ::=
338 | '(' ')' |
339 | [ '-' ] |
340 | [ '-' ] |
341 | |
342 | 'true' |
343 | 'false' |
344 |
345 | ::=
346 | [ '[' ']' ]
347 |
348 | ::=
349 | [a-zA-Z][a-zA-Z0-9_]*
350 |
351 | ::=
352 | [0-9][0-9_]*[.[0-9_]*]?
353 |
354 | ::=
355 | "[a-zA-Z0-9 _,;:.']*"
356 | ```
357 |
358 | ### Semantics
359 | * Procedure parameters are transmitted by value. Recursion is supported.
360 | * Non-local variables and functions are not visible except for those variables
361 | and functions in the outermost scope prefixed with the global reserved word.
362 | Functions currently being defined are visible in the statement set of the
363 | function itself (so that recursive calls are possible).
364 | * No forward references are permitted or supported.
365 | * Expressions are strongly typed and types must match. However, there is
366 | automatic conversion in the arithmetic operators to allow any mixing between
367 | integers and floats. Furthermore, the relational operators can compare
368 | boolean with integer tokens (boolean tokens are converted to integers as
369 | `false = 0`, `true = 1`).
370 | * The type signatures of a procedure's arguments must match exactly their
371 | parameter declaration.
372 | * Arithmetic operations (`+`, `-`, `*`, `/` `&` `|`) are defined for integers
373 | and floats only. The bitwise AND (`&`), bitwise OR (`|`) and bitwise NOT
374 | (`not`) operators are valid only on variables of type integer.
375 | * Relational operations are defined for integer and boolean tokens. Only
376 | comparisons between the compatible types is possible. Relational operations
377 | return a boolean result.
378 |
--------------------------------------------------------------------------------
/lib/codegenerator.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """CodeGenerator module
4 |
5 | Provides functionality for code output to a attached destination file.
6 |
7 | Author: Evan Sneath
8 | License: Open Software License v3.0
9 |
10 | Classes:
11 | CodeGenerator: A code generator interface for destination file outputting.
12 | """
13 |
14 |
15 | class CodeGenerator:
16 | """CodeGenerator class
17 |
18 | This class implements code generator function calls to easily attach a
19 | destination file, input code to generate, and commit the destination
20 | file upon successful compilation. This class is designed to be inherited
21 | the be used during the parsing stage of the compiler.
22 |
23 | Attributes:
24 | runtime_functions: Details of each runtime function and its params.
25 |
26 | Methods:
27 | attach_destination: Binds a destination file to the code generator.
28 | generate_header: Generates overhead code (memory allocation, etc).
29 | generate_footer: Generates finishing overhead code.
30 | generate: Formats and stores a given string of code for later output.
31 | comment: Adds a comment to the generated code with appropriate tabbing.
32 | tab_push: Increases the tab depth by 1 tab (4 spaces).
33 | tab_pop: Decreases the tab depth by 1 tab (4 spaces).
34 | commit: Commits all code generation and writes to the destination file.
35 | get_mm: Provides a free memory space for global or local variables.
36 | reset_local_ptr: Resets the value for the local pointer to default.
37 | reset_param_ptr: Resets the value for the param pointer to default.
38 | get_reg: Provides a free register for intermediate variable use.
39 | get_label_id: Returns a unique identifier for the procedure call.
40 | get_unique_call_id: Returns a unique identifier for multiple calls.
41 | generate_program_entry: Generates all code associated with setting up
42 | the program entry and exit point.
43 | generate_procedure_call: Generates all code associated with managing
44 | the memory stack during a procedure call.
45 | generate_procedure_call_end: Generates code to clean up a procedure
46 | call. This finalizes the call by popping the SP to local stack.
47 | generate_name: Generates all code associated with name reference.
48 | generate_assignment: Generates all code associated with id assignment.
49 | generate_param_push: Generates code to push a param onto the stack.
50 | generate_param_pop: Generates code to pop a param off the stack.
51 | generate_param_store: Generates code to save an outgoing parameter
52 | to an identifier located in main memory.
53 | generate_number: Generates the code for a number reference.
54 | generate_return: Generates the code for the 'return' operation.
55 | generate_operation: Generates operation code given an operation.
56 | """
57 | def __init__(self):
58 | super().__init__()
59 |
60 | # Holds the file path of the attached destination file
61 | self._dest_path = ''
62 |
63 | # Holds all generated code to be written to the file destination
64 | self._generated_code = ''
65 |
66 | # Holds allocated size of main memory and num registers
67 | self._mm_size = 65536
68 | self._reg_size = 2048
69 | self._buf_size = 256
70 |
71 | # Holds stack pointer, frame pointer, and heap pointer registers
72 | self._SP = 1
73 | self._FP = 2
74 | self._HP = 3
75 |
76 | # Holds the pointer to the lowest unused register for allocation
77 | self._reg = 4
78 |
79 | # Holds the local memory pointer which determines the offset from the
80 | # frame pointer in the current scope.
81 | self._local_ptr = 0
82 | self.reset_local_ptr()
83 |
84 | # Holds the param memory pointer which determines the offset from the
85 | # frame pointer in the current scope.
86 | self._param_ptr = 0
87 | self.reset_param_ptr()
88 |
89 | # Holds the tab count of the code. tab_push, tab_pop manipulate this
90 | self._tab_count = 0
91 |
92 | # Holds an integer used for unique label generation for if/loop
93 | self._label_id = 0
94 |
95 | # Holds an integer to distinguish multiple calls of a function
96 | self._unique_id = 0
97 |
98 | # Holds the details of the runtime functions
99 | self.runtime_functions = {
100 | 'getString': [('my_string', 'string', 'out')],
101 | 'putString': [('my_string', 'string', 'in')],
102 | 'getBool': [('my_bool', 'bool', 'out')],
103 | 'putBool': [('my_bool', 'bool', 'in')],
104 | 'getInteger': [('my_integer', 'integer', 'out')],
105 | 'putInteger': [('my_integer', 'integer', 'in')],
106 | 'getFloat': [('my_float', 'float', 'out')],
107 | 'putFloat': [('my_float', 'float', 'in')],
108 | }
109 |
110 | return
111 |
112 | def attach_destination(self, dest_path):
113 | """Attach Destination
114 |
115 | Attaches a destination file to the code generator and prepares the
116 | file for writing.
117 |
118 | Arguments:
119 | dest_path: The path to the destination file to write.
120 |
121 | Returns:
122 | True on success, False otherwise.
123 | """
124 | # The target file was attached, store the path
125 | self._dest_path = dest_path
126 |
127 | return True
128 |
129 | def generate_header(self):
130 | """Generate Code Header
131 |
132 | Adds all header code to the generated code buffer.
133 | """
134 | code = [
135 | '#include ',
136 | '#include ',
137 | '',
138 | '#define MM_SIZE %d' % self._mm_size,
139 | '#define R_SIZE %d' % self._reg_size,
140 | '#define BUF_SIZE %d' % self._buf_size,
141 | '',
142 | '// Define register locations of stack/frame ptr',
143 | '#define SP %d' % self._SP,
144 | '#define FP %d' % self._FP,
145 | '#define HP %d' % self._HP,
146 | '',
147 | 'int main(void)',
148 | '{',
149 | '// Allocate main memory and register space',
150 | 'int MM[MM_SIZE];',
151 | 'int R[R_SIZE];',
152 | '',
153 | '// SP and FP start at the top of MM',
154 | 'R[SP] = MM_SIZE - 1;',
155 | 'R[FP] = MM_SIZE - 1;',
156 | '',
157 | '// HP starts at the bottom of MM',
158 | 'R[HP] = 0;',
159 | '',
160 | '// Allocate float registers',
161 | 'float R_FLOAT_1;',
162 | 'float R_FLOAT_2;',
163 | '',
164 | '// Allocate space for a string buffer',
165 | 'char STR_BUF[BUF_SIZE];',
166 | '',
167 | '////////////////////////////////////////////////////',
168 | '// PROGRAM START',
169 | '',
170 | ]
171 |
172 | self.generate('\n'.join(code), tabs=0)
173 |
174 | return
175 |
176 | def generate_footer(self):
177 | """Generate Code Footer
178 |
179 | Adds all footer code to the generated code buffer.
180 | """
181 | code = [
182 | '',
183 | ' // Jump to the program exit',
184 | ' goto *(void*)MM[R[FP]];',
185 | '',
186 | '////////////////////////////////////////////////////',
187 | '// RUNTIME FUNCTIONS',
188 | '',
189 | 'putString_1:',
190 | ' R[0] = MM[R[FP]+2];',
191 | ' printf("%s\\n", (char*)R[0]);',
192 | ' R[0] = MM[R[FP]];',
193 | ' goto *(void*)R[0];',
194 | '',
195 | 'getString_1:',
196 | ' fgets(STR_BUF, BUF_SIZE, stdin);',
197 | ' R[0] = strlen(STR_BUF) + 1;',
198 | ' memcpy(&MM[R[HP]], &STR_BUF, R[0]);',
199 | ' MM[R[FP]+2] = (int)((char*)&MM[R[HP]]);',
200 | ' R[HP] = R[HP] + R[0];',
201 | ' R[0] = MM[R[FP]];',
202 | ' goto *(void*)R[0];',
203 | '',
204 | 'putBool_1:',
205 | ' R[0] = MM[R[FP]+2];',
206 | ' printf("%s\\n", R[0] ? "true" : "false");',
207 | ' R[0] = MM[R[FP]];',
208 | ' goto *(void*)R[0];',
209 | '',
210 | 'getBool_1:',
211 | ' scanf("%d", &R[0]);',
212 | ' R[0] = R[0] ? 1 : 0;',
213 | ' MM[R[FP]+2] = R[0];',
214 | ' R[0] = MM[R[FP]];',
215 | ' goto *(void*)R[0];',
216 | '',
217 | 'putInteger_1:',
218 | ' R[0] = MM[R[FP]+2];',
219 | ' printf("%d\\n", R[0]);',
220 | ' R[0] = MM[R[FP]];',
221 | ' goto *(void*)R[0];',
222 | '',
223 | 'getInteger_1:',
224 | ' scanf("%d", &R[0]);',
225 | ' MM[R[FP]+2] = R[0];',
226 | ' R[0] = MM[R[FP]];',
227 | ' goto *(void*)R[0];',
228 | '',
229 | 'putFloat_1:',
230 | ' R[0] = MM[R[FP]+2];',
231 | ' memcpy(&R_FLOAT_1, &R[0], sizeof(float));',
232 | ' printf("%g\\n", R_FLOAT_1);',
233 | ' R[0] = MM[R[FP]];',
234 | ' goto *(void*)R[0];',
235 | '',
236 | 'getFloat_1:',
237 | ' scanf("%f", &R_FLOAT_1);',
238 | ' memcpy(&R[0], &R_FLOAT_1, sizeof(float));',
239 | ' MM[R[FP]+2] = R[0];',
240 | ' R[0] = MM[R[FP]];',
241 | ' goto *(void*)R[0];',
242 | '}',
243 | ]
244 |
245 | self.generate('\n'.join(code), tabs=0)
246 |
247 | return
248 |
249 | def generate(self, code, tabs=-1):
250 | """Generate Code
251 |
252 | Adds the given code to the generated code and automatically formats
253 | it with the appropriate tabs and ending newline.
254 |
255 | Arguments:
256 | code: The code to add to the generated code buffer.
257 | tabs: A manual override to determine the number of tabs to place
258 | in this line of code. If -1, then the number of tabs used will
259 | correspond to the tab location from tab_push() and tab_pop()
260 | methods. (Default: -1)
261 | """
262 | tabs = tabs if tabs != -1 else self._tab_count
263 | self._generated_code += (' ' * tabs) + code + '\n'
264 |
265 | return
266 |
267 | def comment(self, text, is_displayed=False):
268 | """Generate Comment
269 |
270 | Adds a comment to the generated code.
271 |
272 | Arguments:
273 | text: The text to display in the comment.
274 | is_displayed: If True, the comment is written to the generated
275 | code. (Default: False)
276 | """
277 | if is_displayed:
278 | self.generate('// %s' % text)
279 |
280 | return
281 |
282 | def tab_push(self):
283 | """Tab Push
284 |
285 | Pushes the tab (increases the indentation by 4 spaces) for pretty
286 | code output.
287 | """
288 | self._tab_count += 1
289 | return
290 |
291 | def tab_pop(self):
292 | """Tab Pop
293 |
294 | Pops the tab (decreases the indentation by 4 spaces) for pretty code
295 | output.
296 | """
297 | self._tab_count -= 1 if self._tab_count != 0 else 0
298 | return
299 |
300 | def commit(self):
301 | """Commit Code Generation
302 |
303 | Writes the generated code to the destination output file for
304 | intermediate code if the source is parsed without fatal errors.
305 |
306 | Returns:
307 | True if file is successfully written, False otherwise.
308 | """
309 | try:
310 | with open(self._dest_path, 'w+') as f:
311 | f.write(self._generated_code)
312 | except IOError as e:
313 | print('Error: "%s"' % self._dest_path)
314 | print(' Could not write to destination file: %s' % e.strerror)
315 | return False
316 |
317 | return True
318 |
319 | def get_mm(self, id_size, is_param=False):
320 | """Get Memory Space
321 |
322 | Gets a space in memory appropriately depending on if the variable is
323 | a local variable or a parameter to the scope.
324 |
325 | Arguments:
326 | id_size: The size of the parameter to allocate (used for arrays).
327 | is_param: True if the identifier is a parameter, False if local or
328 | global variable. (Default: False)
329 |
330 | Returns:
331 | An integer denoting the offset corresponding to a stack landmark
332 | depending on the type of variable. For example, local variables
333 | and params are offset by the current FP in different directions
334 | while global variables are offset by the top of main memory.
335 | See the documentation in README for stack details.
336 | """
337 | # Determine size of the identifier
338 | mem_needed = int(id_size) if id_size is not None else 1
339 |
340 | if is_param:
341 | var_loc = self._param_ptr
342 | self._param_ptr += mem_needed
343 | else:
344 | # Allocate memory in the local variable space
345 | var_loc = self._local_ptr
346 | self._local_ptr += mem_needed
347 |
348 | return var_loc
349 |
350 | def reset_local_ptr(self):
351 | """Reset Local Pointer
352 |
353 | Resets the pointer to the current scope's local variable portion of
354 | the stack. This is used to properly allocate space for the local
355 | variables at the start of the scope.
356 | """
357 | self._local_ptr = 1
358 | return
359 |
360 | def reset_param_ptr(self):
361 | """Reset Param Pointer
362 |
363 | Resets the pointer to the current scope's parameter portion of the
364 | stack. This is necessary to properly allocate space for the parameters
365 | as they are being pushed onto the stack.
366 | """
367 | self._param_ptr = 1
368 | return
369 |
370 | def get_reg(self, inc=True):
371 | """Get Register
372 |
373 | Gets new, unused register from the register list.
374 |
375 | Arguments:
376 | inc: If True, a new register will be returned. If False, the last
377 | register allocated will be returned.
378 |
379 | Returns:
380 | An integer denoting the register number. The register may then be
381 | referenced as follows: R[]
382 | """
383 | # Increment the register if we're getting a brand new one
384 | self._reg += 1 if inc else 0
385 |
386 | return self._reg
387 |
388 | def get_label_id(self):
389 | """Get Label Id
390 |
391 | Gets a label id so that no conflicts occur between procedures with
392 | the same name in difference scopes.
393 |
394 | Returns:
395 | A label id to append to the procedure label.
396 | """
397 | self._label_id += 1
398 |
399 | return self._label_id
400 |
401 | def get_unique_call_id(self):
402 | """Get Unique Call Id
403 |
404 | Gets a unique call id so that no conflicts occur between return
405 | labels for procedures with multiple calls.
406 |
407 | Returns:
408 | A unique id to append to the procedure return label.
409 | """
410 | self._unique_id += 1
411 |
412 | return self._unique_id
413 |
414 | def generate_program_entry(self, program_name, program_num, debug):
415 | """Generate Program Entry
416 |
417 | Generates the code associated with managing the entry point for the
418 | program. This involves pushing the program return address onto the
419 | stack, jumping to the entry point, and creating the program exit
420 | section.
421 |
422 | Arguments:
423 | program_name: The name of the program.
424 | program_num: The label id of the program.
425 | debug: Determines if comments should be written to the code.
426 | """
427 | # Push the return address onto the stack
428 | self.comment('Setting program return address', debug)
429 | self.generate('MM[R[FP]] = (int)&&%s_%d_end;' %
430 | (program_name, program_num))
431 |
432 | # Make the jump to the entry point
433 | self.generate('goto %s_%d_begin;' % (program_name, program_num))
434 |
435 | # Make the main program return
436 | self.generate('')
437 | self.comment('Creating the program exit point', debug)
438 | self.generate('%s_%d_end:' % (program_name, program_num))
439 | self.tab_push()
440 | self.generate('return 0;')
441 | self.tab_pop()
442 | self.generate('')
443 |
444 | return
445 |
446 | def generate_procedure_call(self, procedure_name, procedure_num, debug):
447 | """Generate Procedure Call
448 |
449 | Generates the code associated with managing the stack before and
450 | after a procedure call. Note that this does not include param
451 | pushing and popping operations.
452 |
453 | Arguments:
454 | procedure_name: The name of the procedure to call.
455 | procedure_num: The label id of the procedure to call.
456 | debug: Determines if comments should be written to the code.
457 | """
458 | # Save the FP to the stack. Set next FP to return address
459 | self.comment('Setting caller FP', debug)
460 | self.generate('R[SP] = R[SP] - 1;')
461 | self.generate('MM[R[SP]] = R[FP];')
462 | self.comment('Setting return address (current FP)', debug)
463 | self.generate('R[SP] = R[SP] - 1;')
464 | self.generate('R[FP] = R[SP];')
465 |
466 | # Generate a new call number so multiple calls do not cause collisions
467 | call_number = self.get_unique_call_id()
468 |
469 | # Push the return address onto the stack
470 | self.generate('MM[R[SP]] = (int)&&%s_%d_%d;' %
471 | (procedure_name, procedure_num, call_number))
472 |
473 | # Make the jump to the function call
474 | self.generate('goto %s_%d;' % (procedure_name, procedure_num))
475 |
476 | # Generate the return label
477 | self.generate('%s_%d_%d:' % (procedure_name, procedure_num, call_number))
478 |
479 | # The SP now points to the return address. Restore the old FP
480 | self.comment('Restore caller FP', debug)
481 | self.generate('R[SP] = R[SP] + 1;')
482 | self.generate('R[FP] = MM[R[SP]];')
483 |
484 | return
485 |
486 | def generate_procedure_call_end(self, debug):
487 | """Generate Procedure Call End
488 |
489 | Generates code to leave the procedure on the stack by pushing the
490 | stack to the lower scope's local stack.
491 |
492 | Arguments:
493 | debug: Determines if comments are to be written in generated code.
494 | """
495 | self.comment('Move to caller local stack', debug)
496 |
497 | # Finalize the function call. Move the SP off the param list
498 | self.generate('R[SP] = R[SP] + 1;')
499 |
500 | return
501 |
502 | def _generate_get_id_in_mm(self, id_obj, id_location, idx_reg, debug):
503 | """Generate Get Identifier in Main Memory (Protected)
504 |
505 | Knowing the location in the stack and the offset (mm_ptr) value of
506 | a given index, code is generated to calculate the exact location of
507 | the identifier in main memory.
508 |
509 | If identifier is param, offset is the parameter offset.
510 | If identifier is local, offset is the local offset.
511 | If identifier is global, offset is the local offset of program scope.
512 |
513 | Arguments:
514 | id_obj: The Identifier class object containing id data.
515 | id_location: Either 'global', 'param', or 'local' depending on the
516 | location in the stack where the identifier resides.
517 | idx_reg: The register number of the index expression.
518 | debug: Determines if comments are to be written in generated code.
519 |
520 | Returns:
521 | The register number of the calculated address of the identifier.
522 | """
523 | # Get a new register to calculate the main memory address of this id
524 | id_reg = self.get_reg()
525 |
526 | self.generate('R[%d] = %d;' % (id_reg, id_obj.mm_ptr))
527 |
528 | if id_obj.size is not None and idx_reg is not None:
529 | self.generate('R[%d] = R[%d] + R[%d];' %
530 | (id_reg, id_reg, idx_reg))
531 |
532 | if id_location == 'param':
533 | self.comment('Param referenced', debug)
534 | self.generate('R[%d] = R[FP] + 1 + R[%d];' % (id_reg, id_reg))
535 | elif id_location == 'global':
536 | self.comment('Global var referenced', debug)
537 | self.generate('R[%d] = MM_SIZE - 1 - R[%d];' % (id_reg, id_reg))
538 | else:
539 | self.comment('Local var referenced', debug)
540 | self.generate('R[%d] = R[FP] - R[%d];' % (id_reg, id_reg))
541 |
542 | return id_reg
543 |
544 | def generate_name(self, id_obj, id_location, idx_reg, debug):
545 | """Generate Name
546 |
547 | Generates all code necessary to place the contents of the memory
548 | location of a given identifier into a new register for computation.
549 |
550 | Arguments:
551 | id_obj: The Identifier class object containing id data.
552 | id_location: Either 'global', 'param', or 'local' depending on the
553 | location in the stack where the identifier resides.
554 | idx_reg: The register number of the index expression.
555 | debug: Determines if comments are to be written in generated code.
556 | """
557 | # Calculate the position of the identifier in main memory
558 | id_reg = self._generate_get_id_in_mm(id_obj, id_location, idx_reg,
559 | debug)
560 |
561 | # Retrieve the main memory location and place it in the last register
562 | self.generate('R[%d] = MM[R[%d]];' % (id_reg, id_reg))
563 |
564 | return
565 |
566 | def generate_assignment(self, id_obj, id_location, idx_reg, expr_reg,
567 | debug):
568 | """Generate Assignment
569 |
570 | Generates all code necessary to place the outcome of an expression
571 | into the proper location of the identifier in main memory.
572 |
573 | Arguments:
574 | id_obj: The Identifier class object containing id data.
575 | id_location: Either 'global', 'param', or 'local' depending on the
576 | location in the stack where the identifier resides.
577 | idx_reg: The register number of the index expression.
578 | expr_reg: The register number of the expression outcome.
579 | debug: Determines if comments are to be written in generated code.
580 | """
581 | # Calculate the position of the identifier in main memory
582 | id_reg = self._generate_get_id_in_mm(id_obj, id_location, idx_reg,
583 | debug)
584 |
585 | # Set the main memory value to the value in the expression register
586 | self.generate('MM[R[%d]] = R[%d];' % (id_reg, expr_reg))
587 |
588 | return
589 |
590 | def generate_param_push(self, expr_reg, debug):
591 | """Generate Param Push
592 |
593 | Generates code to push a parameter onto the procedure stack given
594 | a register containing the expression outcome.
595 |
596 | Arguments:
597 | expr_reg: The register number of the expression outcome.
598 | debug: Determines if comments are to be written in generated code.
599 | """
600 | self.comment('Pushing argument onto the stack', debug)
601 | self.generate('R[SP] = R[SP] - 1;')
602 | self.generate('MM[R[SP]] = R[%d];' % expr_reg)
603 |
604 | return
605 |
606 | def generate_param_pop(self, param_name, debug):
607 | """Generate Param Pop
608 |
609 | Pops a parameter off of the stack (moves the SP) and prints a
610 | comment stating which parameter this is.
611 |
612 | Arguments:
613 | param_name: The parameter name to display.
614 | debug: Determines if comments are to be written in generated code.
615 | """
616 | self.comment('Popping "%s" param off the stack' % param_name, debug)
617 |
618 | # Move to the next memory space
619 | self.generate('R[SP] = R[SP] + 1;')
620 |
621 | return
622 |
623 | def generate_param_store(self, id_obj, id_location, debug):
624 | """Generate Param Store
625 |
626 | Calculates the memory location of the destination and placed the
627 | value of the popped parameter (at current SP) in that location.
628 |
629 | Arguments:
630 | id_obj: The Identifier class object containing id data.
631 | id_location: Either 'global', 'param', or 'local' depending on the
632 | location in the stack where the identifier resides.
633 | debug: Determines if comments are to be written in generated code.
634 | """
635 | # Calculate the position of the parameter output location in main mem
636 | id_reg = self._generate_get_id_in_mm(id_obj, id_location, None, debug)
637 |
638 | # Store the parameter in the position pointed to by the SP
639 | self.generate('MM[R[%d]] = MM[R[SP]];' % id_reg)
640 |
641 | return
642 |
643 | def generate_number(self, number, token_type, negate):
644 | """Generate Number
645 |
646 | Generates the code to store a parsed number in a new register.
647 |
648 | Arguments:
649 | number: The parsed number value (this is a string representation).
650 | token_type: The type of the number (either 'integer' or 'float')
651 | negate: A boolean to determine whether or not to negate the value.
652 | """
653 | reg = self.get_reg()
654 |
655 | if token_type == 'integer':
656 | # This is an integer value, set it to the register
657 | if negate:
658 | self.generate('R[%d] = -%s;' % (reg, number))
659 | else:
660 | self.generate('R[%d] = %s;' % (reg, number))
661 | else:
662 | # This is a float value, place it in the float buffer and copy it
663 | # to the register
664 | if negate:
665 | self.generate('R_FLOAT_1 = -%s;' % number)
666 | else:
667 | self.generate('R_FLOAT_1 = %s;' % number)
668 |
669 | self.generate('memcpy(&R[%d], &R_FLOAT_1, sizeof(float));' % reg)
670 |
671 | return
672 |
673 | def generate_return(self, debug):
674 | """Generate Return Statement
675 |
676 | Generates code for all operations needed to move to the scope return
677 | address and execute the jump to the caller scope.
678 |
679 | Arguments:
680 | debug: Determines if comments should be displayed or not.
681 | """
682 | # Smash the local stack
683 | self.comment('Moving SP to FP (return address)', debug)
684 | self.generate('R[SP] = R[FP];')
685 |
686 | # Go to the return label to exit the procedure
687 | self.comment('Return to calling function', debug)
688 | self.generate('goto *(void*)MM[R[FP]];')
689 |
690 | return
691 |
692 | def generate_operation(self, reg1, type1, reg2, type2, operation):
693 | """Generate Operation
694 |
695 | Given an operation and operand registers with their types, code is
696 | generated to perform these operations.
697 |
698 | Arguments:
699 | reg1: The register of the first operand.
700 | type1: The type of the first operand.
701 | reg2: The register of the second operand.
702 | type2: The type of the second operand.
703 | operation: The operation symbol to perform.
704 |
705 | Returns:
706 | The register number where the result of the operation
707 | is stored.
708 | """
709 | # Get a register to hold the operation result
710 | result = self.get_reg()
711 |
712 | if type1 != 'float' and type2 != 'float':
713 | self.generate('R[%d] = R[%d] %s R[%d];' %
714 | (result, reg1, operation, reg2))
715 | return result
716 |
717 | if type1 != 'float':
718 | self.generate('R_FLOAT_1 = R[%d];' % reg1)
719 | else:
720 | self.generate('memcpy(&R_FLOAT_1, &R[%d], sizeof(float));' % reg1)
721 |
722 | if type2 != 'float':
723 | self.generate('R_FLOAT_2 = R[%d];' % reg2)
724 | else:
725 | self.generate('memcpy(&R_FLOAT_2, &R[%d], sizeof(float));' % reg2)
726 |
727 | self.generate('R_FLOAT_1 = R_FLOAT_1 %s R_FLOAT_2;' % operation)
728 | self.generate('memcpy(&R[%d], &R_FLOAT_1, sizeof(float));' % result)
729 |
730 | return result
731 |
--------------------------------------------------------------------------------
/lib/parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """Parser module
4 |
5 | Inherits the Scanner module and parses the attached file's tokens as they are
6 | encountered with the target grammar. Code is then generated and written to the
7 | given destination file.
8 |
9 | Author: Evan Sneath
10 | License: Open Software License v3.0
11 |
12 | Classes:
13 | Parser: An implementation of a parser for the source language.
14 | """
15 |
16 | from lib.errors import *
17 | from lib.datatypes import Identifier, Parameter, IdentifierTable
18 |
19 | from lib.scanner import Scanner
20 | from lib.codegenerator import CodeGenerator
21 |
22 |
23 | class Parser(Scanner, CodeGenerator):
24 | """Parser class
25 |
26 | Parses the given source file using the defined language structure.
27 |
28 | Inherits:
29 | Scanner: The lexer component of the compiler.
30 | CodeGenerator: The class responsible for output file abstraction.
31 |
32 | Attributes:
33 | debug: Boolean attribute denoting if successfully parsed tokens should
34 | be displayed as they are encountered and parsed.
35 |
36 | Methods:
37 | parse: Parses the given file until a terminal error is encountered or
38 | the end-of-file token is reached.
39 | """
40 | def __init__(self, debug=False):
41 | super().__init__()
42 |
43 | # Public class attributes
44 | self.debug = debug
45 |
46 | # Define the previous, current, and future token holder
47 | self._previous = None
48 | self._current = None
49 | self._future = None
50 |
51 | # Define the identifier table to hold all var/program/procedure names
52 | self._ids = IdentifierTable()
53 |
54 | self._has_errors = False
55 |
56 | return
57 |
58 | def parse(self, src_path, dest_path):
59 | """Begin Parsing
60 |
61 | Begins the parse of the inputted source file.
62 |
63 | Arguments:
64 | src_path: The input source file to parse.
65 | dest_path: The output target file to write.
66 |
67 | Returns:
68 | True on success, False otherwise.
69 | """
70 | # Attach the source file for reading
71 | if not self.attach_source(src_path):
72 | return False
73 |
74 | # Attach the destination file for writing
75 | if not self.attach_destination(dest_path):
76 | return False
77 |
78 | # Advance the tokens twice to populate both current and future tokens
79 | self._advance_token()
80 | self._advance_token()
81 |
82 | # Add all runtime functions
83 | self._add_runtime()
84 |
85 | # Generate the compiled code header to handle runtime overhead
86 | self.generate_header()
87 |
88 | # Begin parsing the root language structure
89 | try:
90 | self._parse_program()
91 | except ParserSyntaxError:
92 | return False
93 |
94 | # Generate the compiled code footer
95 | self.generate_footer()
96 |
97 | # Make sure there's no junk after the end of program
98 | if not self._check('eof'):
99 | self._warning('eof', '')
100 |
101 | # If errors were encountered, don't write code
102 | if self._has_errors:
103 | return False
104 |
105 | # Commit the code buffer to the output code file
106 | self.commit()
107 |
108 | return True
109 |
110 | def _add_runtime(self):
111 | """Add Runtime Functions
112 |
113 | Adds each runtime function to the list of global functions.
114 | """
115 | # The runtime_functions list is defined in the CodeGenerator class
116 | for func_name in self.runtime_functions:
117 | # Get all parameters for these functions
118 | param_ids = []
119 | param_list = self.runtime_functions[func_name]
120 | for index, param in enumerate(param_list):
121 | # Build up each param, add it to the list
122 | id_obj = Identifier(name=param[0], type=param[1], size=None,
123 | params=None, mm_ptr=(index+1))
124 | p_obj = Parameter(id=id_obj, direction=param[2])
125 | param_ids.append(p_obj)
126 |
127 | # Build the function's identifier
128 | func_id = Identifier(name=func_name, type='procedure', size=None,
129 | params=param_ids, mm_ptr=1)
130 |
131 | # Add the function to the global scope of the identifier table
132 | self._ids.add(func_id, is_global=True)
133 |
134 | return
135 |
136 | def _warning(self, msg, line, prefix='Warning'):
137 | """Print Parser Warning Message (Protected)
138 |
139 | Prints a parser warning message with details about the expected token
140 | and the current token being parsed.
141 |
142 | Arguments:
143 | msg: The warning message to display.
144 | line: The line where the warning has occurred.
145 | prefix: A string value to be printed at the start of the warning.
146 | Overwritten for error messages. (Default: 'Warning')
147 | """
148 | print('%s: "%s", line %d' % (prefix, self._src_path, line))
149 | print(' %s' % msg)
150 | print(' %s' % self._get_line(line))
151 |
152 | return
153 |
154 | def _syntax_error(self, expected):
155 | """Print Syntax Error Message (Protected)
156 |
157 | Prints a syntax error message with details about the expected token
158 | and the current token being parsed. After error printing, an exception
159 | is raised to be caught and resolved by parent nodes.
160 |
161 | Arguments:
162 | expected: A string containing the expected token type/value.
163 |
164 | Raises:
165 | ParserSyntaxError: If this method is being called, an error has been
166 | encountered during parsing.
167 | """
168 | token = self._current
169 |
170 | # Print the error message
171 | msg = ('Expected %s, encountered "%s" (%s)' %
172 | (expected, token.value, token.type))
173 | self._warning(msg, token.line, prefix='Error')
174 |
175 | self._has_errors = True
176 | raise ParserSyntaxError()
177 |
178 | def _name_error(self, msg, name, line):
179 | """Print Name Error Message (Protected)
180 |
181 | Prints a name error message with details about the encountered
182 | identifier which caused the error.
183 |
184 | Arguments:
185 | msg: The reason for the error.
186 | name: The name of the identifier where the name error occurred.
187 | line: The line where the name error occurred.
188 | """
189 | msg = '%s: %s' % (name, msg)
190 | self._warning(msg, line, prefix='Error')
191 |
192 | self._has_errors = True
193 | return
194 |
195 | def _type_error(self, expected, encountered, line):
196 | """Print Type Error Message (Protected)
197 |
198 | Prints a type error message with details about the expected type an
199 | the type that was encountered.
200 |
201 | Arguments:
202 | expected: A string containing the expected token type.
203 | encountered: A string containing the type encountered.
204 | line: The line on which the type error occurred.
205 | """
206 | msg = 'Expected %s type, encountered %s' % (expected, encountered)
207 | self._warning(msg, line, prefix='Error')
208 |
209 | self._has_errors = True
210 | return
211 |
212 | def _runtime_error(self, msg, line):
213 | """Print Runtime Error Message (Protected)
214 |
215 | Prints a runtime error message with details about the runtime error.
216 |
217 | Arguments:
218 | msg: The reason for the error.
219 | line: The line where the runtime error occurred.
220 | """
221 | self._warning(msg, line, prefix='Error')
222 |
223 | self._has_errors = True
224 | return
225 |
226 | def _advance_token(self):
227 | """Advance Tokens (Protected)
228 |
229 | Populates the 'current' token with the 'future' token and populates
230 | the 'future' token with the next token in the source file.
231 | """
232 | self._previous = self._current
233 | self._current = self._future
234 |
235 | if self._future is None or self._future.type != 'eof':
236 | self._future = self.next_token()
237 |
238 | return
239 |
240 | def _check(self, expected_type, expected_value=None, check_future=False):
241 | """Check Token (Protected)
242 |
243 | Peeks at the token to see if the current token matches the given
244 | type and value. If it doesn't, don't make a big deal about it.
245 |
246 | Arguments:
247 | expected_type: The expected type of the token.
248 | expected_value: The expected value of the token. (Default: None)
249 | check_future: If True, the future token is checked (Default: False)
250 |
251 | Returns:
252 | True if the token matches the expected value, False otherwise.
253 | """
254 | token = self._current
255 |
256 | if check_future:
257 | token = self._future
258 |
259 | return (token.type == expected_type and
260 | (token.value == expected_value or expected_value is None))
261 |
262 | def _accept(self, expected_type, expected_value=None):
263 | """Accept Token (Protected)
264 |
265 | Compares the token to an expected type and value. If it matches, then
266 | consume the token. If not, don't make a big deal about it.
267 |
268 | Arguments:
269 | expected_type: The expected type of the token.
270 | expected_value: The expected value of the token. (Default: None)
271 |
272 | Returns:
273 | True if the token matches the expected value, False otherwise.
274 | """
275 | if self._check(expected_type, expected_value):
276 | self._advance_token()
277 | return True
278 |
279 | return False
280 |
281 | def _match(self, expected_type, expected_value=None):
282 | """Match Token (Protected)
283 |
284 | Compares the token to an expected type and value. If it matches, then
285 | consume the token. If not, then throw an error and panic.
286 |
287 | Arguments:
288 | expected_type: The expected type of the token.
289 | expected_value: The expected value of the token. (Default: None)
290 |
291 | Returns:
292 | The matched Token class object if successful.
293 | """
294 | # Check the id_type, if we specified debug, print everything matched
295 | if self._accept(expected_type, expected_value):
296 | return self._previous
297 |
298 | # Something different than expected was encountered
299 | if expected_value is not None:
300 | self._syntax_error('"'+expected_value+'" ('+expected_type+')')
301 | else:
302 | self._syntax_error(expected_type)
303 |
304 | def _resync_at_token(self, token_type, token_value=None):
305 | """Resync at Token
306 |
307 | Finds the next token of the given type and value and moves the
308 | current token to that point. Code parsing can continue from there.
309 |
310 | Arguments:
311 | token_type: The id_type of the token to resync.
312 | token_value: The value of the token to resync. (Default: None)
313 | """
314 | while not self._check(token_type, token_value):
315 | self._advance_token()
316 |
317 | return
318 |
319 | def _parse_program(self):
320 | """ (Protected)
321 |
322 | Parses the language structure.
323 |
324 | ::=
325 |
326 | """
327 | id_obj = self._parse_program_header()
328 | self._parse_program_body(id_obj)
329 |
330 | return
331 |
332 | def _parse_program_header(self):
333 | """ (Protected)
334 |
335 | Parses the language structure.
336 |
337 | ::=
338 | 'program' 'is'
339 |
340 | Returns:
341 | The id object with information about the procedure identifier.
342 | """
343 | self._match('keyword', 'program')
344 |
345 | id_name = self._current.value
346 | self._match('identifier')
347 |
348 | # Generate procedure label. This will be stored with the identifier
349 | # in place of the mm_ptr attribute since it will not be used
350 | label_id = self.get_label_id()
351 |
352 | # Add the new identifier to the global table
353 | id_obj = Identifier(id_name, 'program', None, None, label_id)
354 | self._ids.add(id_obj, is_global=True)
355 |
356 | self._match('keyword', 'is')
357 |
358 | # Generate the program entry point code
359 | self.generate_program_entry(id_obj.name, id_obj.mm_ptr, self.debug)
360 |
361 | # Push the scope to the program body level
362 | self._ids.push_scope(id_obj.name)
363 |
364 | # Add the program to the base scope so it can be resolved as owner
365 | self._ids.add(id_obj)
366 |
367 | return id_obj
368 |
369 | def _parse_program_body(self, program_id):
370 | """ (Protected)
371 |
372 | Parses the language structure.
373 |
374 | ::=
375 | ( ';' )*
376 | 'begin'
377 | ( ';' )*
378 | 'end' 'program'
379 |
380 | Arguments:
381 | program_id: The identifier object for the program.
382 | """
383 | local_var_size = 0
384 |
385 | while not self._accept('keyword', 'begin'):
386 | try:
387 | size = self._parse_declaration()
388 |
389 | if size is not None:
390 | local_var_size += int(size)
391 | except ParserError:
392 | self._resync_at_token('symbol', ';')
393 |
394 | self._match('symbol', ';')
395 |
396 | # Label the entry point for the program
397 | self.generate('%s_%d_begin:' % (program_id.name, program_id.mm_ptr))
398 | self.tab_push()
399 |
400 | if local_var_size != 0:
401 | self.comment('Allocating space for local variables', self.debug)
402 | self.generate('R[SP] = R[SP] - %d;' % local_var_size)
403 |
404 | while not self._accept('keyword', 'end'):
405 | try:
406 | self._parse_statement()
407 | except ParserError:
408 | self._resync_at_token('symbol', ';')
409 |
410 | self._match('symbol', ';')
411 |
412 | self._match('keyword', 'program')
413 |
414 | # Pop out of the program body scope
415 | self._ids.pop_scope()
416 | self.tab_pop()
417 |
418 | return
419 |
420 | def _parse_declaration(self):
421 | """ (Protected)
422 |
423 | Parses the language structure.
424 |
425 | ::=
426 | [ 'global' ]
427 | [ 'global' ]
428 |
429 | Returns:
430 | The size of any variable declared. None if procedure.
431 | """
432 | is_global = False
433 |
434 | id_obj = None
435 | size = None
436 |
437 | if self._accept('keyword', 'global'):
438 | is_global = True
439 |
440 | if self._first_procedure_declaration():
441 | self._parse_procedure_declaration(is_global=is_global)
442 | elif self._first_variable_declaration():
443 | id_obj = self._parse_variable_declaration(is_global=is_global)
444 | else:
445 | self._syntax_error('procedure or variable declaration')
446 |
447 | if id_obj is not None:
448 | size = id_obj.size if id_obj.size is not None else 1
449 |
450 | return size
451 |
452 | def _first_variable_declaration(self):
453 | """first() (Protected)
454 |
455 | Determines if current token matches the first terminals.
456 |
457 | first() ::=
458 | integer | float | bool | string
459 |
460 | Returns:
461 | True if current token matches a first terminal, False otherwise.
462 | """
463 | return (self._check('keyword', 'integer') or
464 | self._check('keyword', 'float') or
465 | self._check('keyword', 'bool') or
466 | self._check('keyword', 'string'))
467 |
468 | def _parse_variable_declaration(self, is_global=False, is_param=False):
469 | """ (Protected)
470 |
471 | Parses the language structure.
472 |
473 | ::=
474 | [ '[' ']' ]
475 |
476 | Arguments:
477 | is_global: Denotes if the variable is to be globally scoped.
478 | (Default: False)
479 | id_table_add: Denotes if the variable is to be added to the
480 | identifier table.
481 |
482 | Returns:
483 | The Identifier class object of the variable encountered.
484 | """
485 | id_type = self._parse_type_mark()
486 |
487 | # Stores the array size of the variable
488 | var_size = None
489 |
490 | # Formally match the token to an identifier type
491 | var_token = self._match('identifier')
492 |
493 | if self._accept('symbol', '['):
494 | index_type = self._parse_number(generate_code=False)
495 |
496 | var_size = self._previous.value
497 | index_line = self._previous.line
498 |
499 | # Check the type to make sure this is an integer so that we can
500 | # allocate memory appropriately
501 | if index_type != 'integer':
502 | self._type_error('integer', index_type, index_line)
503 | raise ParserTypeError()
504 |
505 | self._match('symbol', ']')
506 |
507 | # Get the memory space pointer for this variable.
508 | mm_ptr = self.get_mm(var_size, is_param=is_param)
509 |
510 | # The declaration was valid, add the identifier to the table
511 | id_obj = Identifier(var_token.value, id_type, var_size, None, mm_ptr)
512 |
513 | if not is_param:
514 | try:
515 | self._ids.add(id_obj, is_global=is_global)
516 | except ParserNameError as e:
517 | self._name_error(str(e),
518 | var_token.value, var_token.line)
519 |
520 | return id_obj
521 |
522 | def _parse_type_mark(self):
523 | """ (Protected)
524 |
525 | Parses language structure.
526 |
527 | ::=
528 | 'integer' |
529 | 'float' |
530 | 'bool' |
531 | 'string'
532 |
533 | Returns:
534 | Type (as string) of the variable being declared.
535 | """
536 | id_type = None
537 |
538 | if self._accept('keyword', 'integer'):
539 | id_type = 'integer'
540 | elif self._accept('keyword', 'float'):
541 | id_type = 'float'
542 | elif self._accept('keyword', 'bool'):
543 | id_type = 'bool'
544 | elif self._accept('keyword', 'string'):
545 | id_type = 'string'
546 | else:
547 | self._syntax_error('variable type')
548 |
549 | return id_type
550 |
551 | def _first_procedure_declaration(self):
552 | """first() (Protected)
553 |
554 | Determines if current token matches the first terminals.
555 |
556 | first() ::=
557 | 'procedure'
558 |
559 | Returns:
560 | True if current token matches a first terminal, False otherwise.
561 | """
562 | return self._check('keyword', 'procedure')
563 |
564 | def _parse_procedure_declaration(self, is_global):
565 | """ (Protected)
566 |
567 | Parses the language structure.
568 |
569 | ::=
570 |
571 |
572 | Arguments:
573 | is_global: Denotes if the procedure is to be globally scoped.
574 | """
575 | id_obj = self._parse_procedure_header(is_global=is_global)
576 | self._parse_procedure_body(id_obj)
577 |
578 | return
579 |
580 | def _parse_procedure_header(self, is_global):
581 | """ (Protected)
582 |
583 | Parses the language structure.
584 |
585 | ::=
586 | 'procedure' '(' [ ] ')'
587 |
588 | Arguments:
589 | is_global: Denotes if the procedure is to be globally scoped.
590 | """
591 | self._match('keyword', 'procedure')
592 |
593 | id_name = self._current.value
594 | id_line = self._current.line
595 |
596 | self._match('identifier')
597 | self._match('symbol', '(')
598 |
599 | params = []
600 |
601 | if not self._check('symbol', ')'):
602 | params = self._parse_parameter_list(params)
603 |
604 | self._match('symbol', ')')
605 |
606 | # Generate procedure label. This will be stored with the identifier
607 | # in place of the mm_ptr attribute since it will not be used
608 | label_id = self.get_label_id()
609 |
610 | id_obj = Identifier(id_name, 'procedure', None, params, label_id)
611 |
612 | try:
613 | # Add the procedure identifier to the parent and its own table
614 | self._ids.add(id_obj, is_global=is_global)
615 | self._ids.push_scope(id_obj.name)
616 | self._ids.add(id_obj)
617 | except ParserNameError:
618 | self._name_error('name already declared at this scope', id_name,
619 | id_line)
620 |
621 | # Attempt to add each encountered param at the procedure scope
622 | for param in params:
623 | try:
624 | self._ids.add(param.id, is_global=False)
625 | except ParserNameError:
626 | self._name_error('name already declared at global scope',
627 | param.id.name, id_line)
628 |
629 | # Define the entry point for the function w/ unique identifier
630 | self.generate('%s_%d:' % (id_obj.name, id_obj.mm_ptr))
631 | self.tab_push()
632 |
633 | # Define the beginning of the function body
634 | self.generate('goto %s_%d_begin;' % (id_obj.name, id_obj.mm_ptr))
635 | self.generate('')
636 |
637 | return id_obj
638 |
639 | def _parse_procedure_body(self, procedure_id):
640 | """ (Protected)
641 |
642 | Parses the language structure.
643 |
644 | ::=
645 | ( ';' )*
646 | 'begin'
647 | ( ';' )*
648 | 'end' 'procedure'
649 |
650 | Arguments:
651 | procedure_id: The identifier object for the procedure.
652 | """
653 | local_var_size = 0
654 |
655 | # Reset the local pointer for the local variables.
656 | self.reset_local_ptr()
657 | self.reset_param_ptr()
658 |
659 | # Accept any declarations
660 | while not self._accept('keyword', 'begin'):
661 | try:
662 | size = self._parse_declaration()
663 |
664 | # If this was a local var, allocate space for it
665 | if size is not None:
666 | local_var_size += size
667 | except ParserError:
668 | self._resync_at_token('symbol', ';')
669 |
670 | self._match('symbol', ';')
671 |
672 | # Define the function begin point
673 | self.generate('%s_%d_begin:' %
674 | (procedure_id.name, procedure_id.mm_ptr))
675 |
676 | self.tab_push()
677 |
678 | if local_var_size != 0:
679 | self.comment('Allocating space for local variables', self.debug)
680 | self.generate('R[SP] = R[SP] - %d;' % local_var_size)
681 |
682 | # Accept any statements
683 | while not self._accept('keyword', 'end'):
684 | try:
685 | self._parse_statement()
686 | except ParserError:
687 | self._resync_at_token('symbol', ';')
688 |
689 | self._match('symbol', ';')
690 |
691 | self._match('keyword', 'procedure')
692 |
693 | # Generate code to jump back to the caller scope
694 | self.generate_return(self.debug)
695 | self.generate('')
696 |
697 | self.tab_pop()
698 | self._ids.pop_scope()
699 | self.tab_pop()
700 |
701 | return
702 |
703 | def _parse_parameter_list(self, params):
704 | """ (Protected)
705 |
706 | Parse the language structure.
707 |
708 | ::=
709 | ',' |
710 |
711 |
712 | Arguments:
713 | params: A list of Parameter named tuples associated with the
714 | procedure.
715 |
716 | Returns:
717 | An completed list of all Parameter named tuples associated
718 | with the procedure.
719 | """
720 | # Get one parameter
721 | param = self._parse_parameter()
722 | params.append(param)
723 |
724 | # Get all following parameters
725 | if self._accept('symbol', ','):
726 | params = self._parse_parameter_list(params)
727 |
728 | # All parameters found will be returned in the list
729 | return params
730 |
731 | def _parse_parameter(self):
732 | """ (Protected)
733 |
734 | Parse the language structure.
735 |
736 | ::=
737 | ( 'in' | 'out' )
738 | """
739 | # Return the id object, but don't add it to the identifier table
740 | # yet or get a memory location for it. This will be done when the
741 | # procedure is called
742 | id_obj = self._parse_variable_declaration(is_param=True)
743 |
744 | direction = None
745 |
746 | if self._accept('keyword', 'in'):
747 | direction = 'in'
748 | elif self._accept('keyword', 'out'):
749 | direction = 'out'
750 | else:
751 | self._syntax_error('"in" or "out"')
752 |
753 | return Parameter(id_obj, direction)
754 |
755 | def _parse_statement(self):
756 | """ (Protected)
757 |
758 | Parse the language structure.
759 |
760 | ::=
761 | |
762 | |
763 | |
764 | |
765 |
766 | """
767 | if self._accept('keyword', 'return'):
768 | # Go to the return label to exit the procedure/program
769 | self.generate_return(self.debug)
770 | elif self._first_if_statement():
771 | self._parse_if_statement()
772 | elif self._first_loop_statement():
773 | self._parse_loop_statement()
774 | elif self._first_procedure_call():
775 | self._parse_procedure_call()
776 | elif self._first_assignment_statement():
777 | self._parse_assignment_statement()
778 | else:
779 | self._syntax_error('statement')
780 |
781 | return
782 |
783 | def _first_assignment_statement(self):
784 | """first() (Protected)
785 |
786 | Determines if current token matches the first terminals.
787 |
788 | first() ::=
789 |
790 |
791 | Returns:
792 | True if current token matches a first terminal, False otherwise.
793 | """
794 | return self._check('identifier')
795 |
796 | def _parse_assignment_statement(self):
797 | """ (Protected)
798 |
799 | Parses the language structure.
800 |
801 | ::=
802 | ':='
803 | """
804 | id_name = self._current.value
805 | id_line = self._current.line
806 |
807 | dest_type = self._parse_destination()
808 |
809 | # Grab the last register used in case this variable is an array
810 | index_reg = self.get_reg(inc=False)
811 |
812 | # Check to make sure this is a valid identifier
813 | id_obj = self._ids.find(id_name)
814 |
815 | self._match('symbol', ':=')
816 |
817 | expr_type = self._parse_expression()
818 |
819 | # Get the register used for the last expression
820 | expr_reg = self.get_reg(inc=False)
821 |
822 | if dest_type != expr_type:
823 | self._type_error(dest_type, expr_type, id_line)
824 |
825 | # Determine the location of the identifier in the stack
826 | id_location = self._ids.get_id_location(id_name)
827 |
828 | # Verify the direction of the id if it is a param
829 | if id_location == 'param':
830 | direction = self._ids.get_param_direction(id_name)
831 | if direction != 'out':
832 | self._type_error('\'out\' param',
833 | '\'%s\' param' % direction, id_line)
834 | raise ParserTypeError()
835 |
836 | # Generate all code associated with retrieving this value
837 | self.generate_assignment(id_obj, id_location, index_reg, expr_reg,
838 | self.debug)
839 |
840 | return
841 |
842 | def _first_if_statement(self):
843 | """first() (Protected)
844 |
845 | Determines if current token matches the first terminals.
846 |
847 | first() ::=
848 | 'if'
849 |
850 | Returns:
851 | True if current token matches a first terminal, False otherwise.
852 | """
853 | return self._check('keyword', 'if')
854 |
855 | def _parse_if_statement(self):
856 | """ (Protected)
857 |
858 | Parses the language structure.
859 |
860 | ::=
861 | 'if' '(' ')' 'then' ( ';' )+
862 | [ 'else' ( ';' )+ ]
863 | 'end' 'if'
864 | """
865 | self._match('keyword', 'if')
866 | self._match('symbol', '(')
867 | self._parse_expression()
868 | self._match('symbol', ')')
869 | self._match('keyword', 'then')
870 |
871 | label_id = self.get_label_id()
872 | expr_reg = self.get_reg(inc=False)
873 |
874 | self.generate('if (!R[%d]) goto else_%d;' % (expr_reg, label_id))
875 | self.tab_push()
876 |
877 | while True:
878 | try:
879 | self._parse_statement()
880 | except ParserError:
881 | self._resync_at_token('symbol', ';')
882 |
883 | self._match('symbol', ';')
884 |
885 | if self._check('keyword', 'else') or self._check('keyword', 'end'):
886 | break
887 |
888 | self.generate('goto endif_%d;' % label_id)
889 |
890 | self.tab_pop()
891 | self.generate('else_%d:' % label_id)
892 | self.tab_push()
893 |
894 | if self._accept('keyword', 'else'):
895 | while True:
896 | try:
897 | self._parse_statement()
898 | except ParserError:
899 | self._resync_at_token('symbol', ';')
900 |
901 | self._match('symbol', ';')
902 |
903 | if self._check('keyword', 'end'):
904 | break
905 |
906 | self._match('keyword', 'end')
907 | self._match('keyword', 'if')
908 |
909 | self.tab_pop()
910 | self.generate('endif_%d:' % label_id)
911 |
912 | return
913 |
914 | def _first_loop_statement(self):
915 | """first() (Protected)
916 |
917 | Determines if current token matches the first terminals.
918 |
919 | first() ::=
920 | 'for'
921 |
922 | Returns:
923 | True if current token matches a first terminal, False otherwise.
924 | """
925 | return self._check('keyword', 'for')
926 |
927 | def _parse_loop_statement(self):
928 | """ (Protected)
929 |
930 | Parses the language structure.
931 |
932 | ::=
933 | 'for' '(' ';' ')'
934 | ( ';' )*
935 | 'end' 'for'
936 | """
937 | self._match('keyword', 'for')
938 | self._match('symbol', '(')
939 |
940 | label_id = self.get_label_id()
941 | self.generate('loop_%d:' % label_id)
942 | self.tab_push()
943 |
944 | try:
945 | self._parse_assignment_statement()
946 | except ParserError:
947 | self._resync_at_token('symbol', ';')
948 |
949 | self._match('symbol', ';')
950 |
951 | self._parse_expression()
952 | self._match('symbol', ')')
953 |
954 | expr_reg = self.get_reg(inc=False)
955 | self.generate('if (!R[%d]) goto endloop_%d;' % (expr_reg, label_id))
956 |
957 | while not self._accept('keyword', 'end'):
958 | try:
959 | self._parse_statement()
960 | except ParserError:
961 | self._resync_at_token('symbol', ';')
962 |
963 | self._match('symbol', ';')
964 |
965 | self._match('keyword', 'for')
966 |
967 | self.generate('goto loop_%d;' % label_id)
968 | self.tab_pop()
969 | self.generate('endloop_%d:' % label_id)
970 |
971 | return
972 |
973 | def _first_procedure_call(self):
974 | """first() (Protected)
975 |
976 | Determines if current token matches the first terminals. The second
977 | terminal is checked using the future token in this case to distinguish
978 | the first() from first().
979 |
980 | first() ::=
981 | '('
982 |
983 | Returns:
984 | True if current token matches a first terminal, False otherwise.
985 | """
986 | return self._check('symbol', '(', check_future=True)
987 |
988 | def _parse_procedure_call(self):
989 | """ (Protected)
990 |
991 | Parses the language structure.
992 |
993 | ::=
994 | '(' [ ] ')'
995 | """
996 | # Match an identifier, check to make sure the identifier is procedure
997 | id_name = self._current.value
998 | id_line = self._current.line
999 |
1000 | self._match('identifier')
1001 |
1002 | try:
1003 | id_obj = self._ids.find(id_name)
1004 | except ParserNameError as e:
1005 | self._name_error('procedure has not been declared', id_name,
1006 | id_line)
1007 | raise e
1008 |
1009 | if id_obj.type != 'procedure':
1010 | self._type_error('procedure', id_obj.type, id_line)
1011 | raise ParserTypeError()
1012 |
1013 | self._match('symbol', '(')
1014 |
1015 | out_names = []
1016 |
1017 | if not self._check('symbol', ')'):
1018 | num_args, out_names = self._parse_argument_list(
1019 | id_obj.params,
1020 | out_names,
1021 | index=0)
1022 |
1023 | # Make sure that too few arguments are not used
1024 | if num_args < len(id_obj.params):
1025 | self._runtime_error(
1026 | 'procedure call accepts %d argument(s), %d given' %
1027 | (len(id_obj.params), num_args), id_line)
1028 |
1029 | raise ParserRuntimeError()
1030 |
1031 | self._match('symbol', ')')
1032 |
1033 | # Generate all procedure call code
1034 | self.generate_procedure_call(id_obj.name, id_obj.mm_ptr, self.debug)
1035 |
1036 | # Pop parameters off the stack
1037 | for index, param in enumerate(id_obj.params):
1038 | out_name = out_names[index]
1039 |
1040 | self.generate_param_pop(param.id.name, self.debug)
1041 |
1042 | # If this is an outbound parameter, we must write it to its
1043 | # memory location
1044 | if param.direction == 'out':
1045 | # Get the identifier object of the destination
1046 | out_id = self._ids.find(out_name)
1047 |
1048 | # Determine where on the stack this identifier exists
1049 | out_location = self._ids.get_id_location(out_name)
1050 |
1051 | # Store the parameter in the appropriate location
1052 | self.generate_param_store(out_id, out_location, self.debug)
1053 |
1054 | # Finish the procedure call
1055 | self.generate_procedure_call_end(self.debug)
1056 |
1057 | return
1058 |
1059 | def _parse_argument_list(self, params, out_names, index=0):
1060 | """ (Protected)
1061 |
1062 | Parses language structure.
1063 |
1064 | ::=
1065 | ',' |
1066 |
1067 |
1068 | Arguments:
1069 | params: A list of Parameter namedtuple objects allowed in the
1070 | procedure call.
1071 | out_names: A list of identifier names that are being used in this
1072 | procedure call and must be written back.
1073 | index: The index in params with which to match the found param.
1074 | (Default: 0)
1075 |
1076 | Returns:
1077 | A tuple (index, out_names) consisting of the number of arguments
1078 | encountered and a list of the identifiers used to write back.
1079 | """
1080 | arg_line = self._current.line
1081 | arg_type = None
1082 |
1083 | # Make sure that too many arguments are not used
1084 | if index > len(params) - 1:
1085 | self._runtime_error('procedure call accepts only %d argument(s)' %
1086 | len(params), arg_line)
1087 | raise ParserRuntimeError()
1088 |
1089 | # Get the parameter information for this position in the arg list
1090 | param = params[index]
1091 |
1092 | if param.direction == 'out':
1093 | # We may only parse a single identifier if the direction is 'out'
1094 | arg_name = self._current.value
1095 | arg_type = self._parse_name()
1096 |
1097 | out_names.append(arg_name)
1098 | elif param.direction == 'in':
1099 | # This is a 'in' parameter with only one element (not array)
1100 | arg_type = self._parse_expression()
1101 |
1102 | out_names.append(None)
1103 |
1104 | # Get the last reg assignment in the expr. This is argument's register
1105 | expr_reg = self.get_reg(inc=False)
1106 |
1107 | if arg_type != param.id.type:
1108 | self._type_error(param.id.type, arg_type, arg_line)
1109 |
1110 | index += 1
1111 |
1112 | if self._accept('symbol', ','):
1113 | index, out_names = self._parse_argument_list(
1114 | params,
1115 | out_names,
1116 | index=index)
1117 |
1118 | # Push the parameters onto the stack in reverse order. The last param
1119 | # will reach this point first
1120 | self.generate_param_push(expr_reg, self.debug)
1121 |
1122 | return index, out_names
1123 |
1124 | def _parse_destination(self):
1125 | """ (Protected)
1126 |
1127 | Parses the language structure.
1128 |
1129 | ::=
1130 | [ '[' ']' ]
1131 |
1132 | Returns:
1133 | Type of the destination identifier as a string.
1134 | """
1135 | id_name = self._current.value
1136 | id_line = self._current.line
1137 |
1138 | self._match('identifier')
1139 |
1140 | # Make sure that identifier is valid for the scope
1141 | try:
1142 | id_obj = self._ids.find(id_name)
1143 | except ParserNameError as e:
1144 | self._name_error('not declared in this scope', id_name, id_line)
1145 | raise e
1146 |
1147 | # Check type to make sure it's a variable
1148 | if not id_obj.type in ['integer', 'float', 'bool', 'string']:
1149 | self._type_error('variable', id_obj.type, id_line)
1150 | raise ParserTypeError()
1151 |
1152 | id_type = id_obj.type
1153 |
1154 | if self._accept('symbol', '['):
1155 | expr_line = self._current.line
1156 | expr_type = self._parse_expression()
1157 |
1158 | if expr_type != 'integer':
1159 | self._type_error('integer', expr_type, expr_line)
1160 |
1161 | self._accept('symbol', ']')
1162 | elif id_obj.size is not None:
1163 | self._runtime_error('%s: array requires index' % id_name, id_line)
1164 |
1165 | return id_type
1166 |
1167 | def _parse_expression(self):
1168 | """ (Protected)
1169 |
1170 | Parses language structure.
1171 |
1172 | ::=
1173 | '&' |
1174 | '|' |
1175 | [ 'not' ]
1176 |
1177 | Returns:
1178 | The type value of the expression.
1179 | """
1180 | self.comment('Parsing expression', self.debug)
1181 |
1182 | negate = False
1183 |
1184 | if self._accept('keyword', 'not'):
1185 | negate = True
1186 |
1187 | line = self._current.line
1188 | id_type = self._parse_arith_op()
1189 |
1190 | if negate and id_type not in ['integer', 'bool']:
1191 | self._type_error('integer or bool', id_type, line)
1192 | raise ParserTypeError()
1193 |
1194 | while True:
1195 | operand1 = self.get_reg(inc=False)
1196 |
1197 | if self._accept('symbol', '&'):
1198 | operation = '&'
1199 | elif self._accept('symbol', '|'):
1200 | operation = '|'
1201 | else:
1202 | break
1203 |
1204 | if id_type not in ['integer', 'bool']:
1205 | self._type_error('integer or bool', id_type, line)
1206 | raise ParserTypeError()
1207 |
1208 | next_type = self._parse_arith_op()
1209 |
1210 | operand2 = self.get_reg(inc=False)
1211 |
1212 | if next_type not in ['integer', 'bool']:
1213 | self._type_error('integer or bool', next_type, line)
1214 | raise ParserTypeError()
1215 |
1216 | result = self.generate_operation(operand1, id_type, operand2,
1217 | next_type, operation)
1218 |
1219 | if negate:
1220 | self.generate('R[%d] = ~R[%d];' % (result, result))
1221 |
1222 | return id_type
1223 |
1224 | def _parse_arith_op(self):
1225 | """ (Protected)
1226 |
1227 | Parses language structure.
1228 |
1229 | ::=
1230 | '+' |
1231 | '-' |
1232 |
1233 |
1234 | Returns:
1235 | The type value of the expression.
1236 | """
1237 | line = self._current.line
1238 | id_type = self._parse_relation()
1239 |
1240 | while True:
1241 | operand1 = self.get_reg(inc=False)
1242 |
1243 | if self._accept('symbol', '+'):
1244 | operation = '+'
1245 | elif self._accept('symbol', '-'):
1246 | operation = '-'
1247 | else:
1248 | break
1249 |
1250 | if id_type not in ['integer', 'float']:
1251 | self._type_error('integer or float', id_type, line)
1252 | raise ParserTypeError()
1253 |
1254 | next_type = self._parse_relation()
1255 |
1256 | operand2 = self.get_reg(inc=False)
1257 |
1258 | if next_type not in ['integer', 'float']:
1259 | self._type_error('integer or float', next_type, line)
1260 | raise ParserTypeError()
1261 |
1262 | self.generate_operation(operand1, id_type, operand2, next_type,
1263 | operation)
1264 |
1265 | return id_type
1266 |
1267 | def _parse_relation(self):
1268 | """ (Protected)
1269 |
1270 | Parses language structure.
1271 |
1272 | ::=
1273 | '<' |
1274 | '>' |
1275 | '>=' |
1276 | '<=' |
1277 | '==' |
1278 | '!=' |
1279 |
1280 |
1281 | Returns:
1282 | The type value of the expression.
1283 | """
1284 | line = self._current.line
1285 | id_type = self._parse_term()
1286 |
1287 | # Check for relational operators. Note that relational operators
1288 | # are only valid for integer or boolean tokens
1289 | while True:
1290 | operand1 = self.get_reg(inc=False)
1291 |
1292 | if self._accept('symbol', '<'):
1293 | operation = '<'
1294 | elif self._accept('symbol', '>'):
1295 | operation = '>'
1296 | elif self._accept('symbol', '<='):
1297 | operation = '<='
1298 | elif self._accept('symbol', '>='):
1299 | operation = '>='
1300 | elif self._accept('symbol', '=='):
1301 | operation = '=='
1302 | elif self._accept('symbol', '!='):
1303 | operation = '!='
1304 | else:
1305 | break
1306 |
1307 | if id_type not in ['integer', 'bool']:
1308 | self._type_error('integer or bool', id_type, line)
1309 | raise ParserTypeError()
1310 |
1311 | next_type = self._parse_term()
1312 |
1313 | operand2 = self.get_reg(inc=False)
1314 |
1315 | if next_type not in ['integer', 'bool']:
1316 | self._type_error('integer or bool', next_type, line)
1317 | raise ParserTypeError()
1318 |
1319 | self.generate_operation(operand1, id_type, operand2, next_type,
1320 | operation)
1321 |
1322 | return id_type
1323 |
1324 | def _parse_term(self):
1325 | """ (Protected)
1326 |
1327 | Parses language structure.
1328 |
1329 | ::=
1330 | '*' |
1331 |