├── lib
    ├── __init__.py
    ├── errors.py
    ├── datatypes.py
    ├── scanner.py
    ├── codegenerator.py
    └── parser.py
├── tests
    ├── simpleadd_good.src
    ├── scopetest_good.src
    ├── recursiontest_good.src
    ├── globaltest_good.src
    ├── looptest_good.src
    ├── bigtest_good.src
    ├── codegen_good.src
    ├── bigtest_bad.src
    └── runtime_good.src
├── compiler.py
└── README.md


/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/simpleadd_good.src:
--------------------------------------------------------------------------------
 1 | program simpleadd is
 2 | 
 3 |     integer int1;
 4 |     integer int2;
 5 |     integer result;
 6 | 
 7 | begin
 8 | 
 9 |     // Get the first input
10 |     putString("Enter 1st Integer:");
11 |     getInteger(int1);
12 | 
13 |     // Get the second input
14 |     putString("Enter 2nd Integer:");
15 |     getInteger(int2);
16 | 
17 |     // Calculate the sum
18 |     result := int1 + int2;
19 | 
20 |     putString("Result:");
21 |     putInteger(result);
22 | 
23 | end program
24 | 


--------------------------------------------------------------------------------
/tests/scopetest_good.src:
--------------------------------------------------------------------------------
 1 | // SCOPE TEST PROGRAM
 2 | program scope_test is
 3 |     integer add1;
 4 |     integer add2;
 5 |     integer sum;
 6 | 
 7 |     procedure add(integer add1 in, integer add2 in, integer sum out)
 8 |     begin
 9 |         sum := add1 + add2;
10 |         return;
11 |     end procedure;
12 | 
13 | begin
14 | 
15 |     add1 := 1;
16 |     add2 := 5; 
17 | 
18 |     add(add1, add2, sum);
19 | 
20 |     if (sum == 6) then
21 |         putString("SUCCESS");
22 |     else
23 |         putString("FAILURE");
24 |     end if;
25 | 
26 |     return;
27 | 
28 | end program
29 | 


--------------------------------------------------------------------------------
/tests/recursiontest_good.src:
--------------------------------------------------------------------------------
 1 | program recursiontest is
 2 | 
 3 |     integer start_val;
 4 |     global integer end_val;
 5 | 
 6 |     procedure count_to_ten(integer current_val in)
 7 |     begin
 8 |         if (current_val < 10) then
 9 |             count_to_ten(current_val + 1);
10 |         else
11 |             end_val := current_val;
12 |         end if;
13 |     end procedure;
14 | 
15 | begin
16 | 
17 |     start_val := 1;
18 |     end_val := 0;
19 | 
20 |     count_to_ten(start_val);
21 | 
22 |     if (end_val == 10) then
23 |         putString("SUCCESS");
24 |     else
25 |         putString("FAILURE");
26 |     end if;
27 | 
28 | end program
29 | 


--------------------------------------------------------------------------------
/tests/globaltest_good.src:
--------------------------------------------------------------------------------
 1 | // SCOPE TEST PROGRAM
 2 | program scope_test is
 3 |     global integer my_global_int;
 4 |     global integer result;
 5 | 
 6 |     global procedure increment_global()
 7 |     begin
 8 |         result := my_global_int + 1;
 9 |     end procedure;
10 | 
11 |     procedure calls_increment_global()
12 |     begin
13 |         // See if we can call a global procedure from this scope
14 |         increment_global();
15 |     end procedure;
16 | 
17 | begin
18 |     my_global_int := 9;
19 |     result := 0;
20 | 
21 |     calls_increment_global();
22 | 
23 |     if (result == 10) then
24 |         putString("SUCCESS");
25 |     else
26 |         putString("FAILURE");
27 |     end if;
28 | 
29 |     return;
30 | 
31 | end program
32 | 


--------------------------------------------------------------------------------
/tests/looptest_good.src:
--------------------------------------------------------------------------------
 1 | program looptest is
 2 | 
 3 |     integer result;
 4 |     global integer num_loops;
 5 | 
 6 |     procedure do_loops(integer result out)
 7 |         integer counter;
 8 |     begin
 9 |         counter := 0;
10 | 
11 |         putString("Number of Loops:");
12 |         putInteger(num_loops);
13 | 
14 |         for (counter := counter + 1; counter <= num_loops)
15 |             putString("Current Counter:");
16 |             putInteger(counter);
17 |             result := counter;
18 |         end for;
19 |      end procedure;
20 | 
21 | begin
22 | 
23 |     result := 0;
24 |     num_loops := 10;
25 | 
26 |     do_loops(result);
27 | 
28 |     putString("Expect 10");
29 | 
30 |     if (result == 10) then
31 |         putString("SUCCESS");
32 |     else
33 |         putString("FAILURE");
34 |     end if;
35 | 
36 | end program
37 | 


--------------------------------------------------------------------------------
/lib/errors.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | 
 4 | class ParserError(Exception):
 5 |     """ParserError class
 6 | 
 7 |     The base error class for all other parsing errors. This should be caught
 8 |     at resync points.
 9 |     """
10 |     pass
11 | 
12 | 
13 | class ParserSyntaxError(ParserError):
14 |     """ParserSyntaxError class
15 | 
16 |     Thrown when a syntax error occurs in the parser.
17 |     """
18 |     pass
19 | 
20 | 
21 | class ParserNameError(ParserError):
22 |     """ParserNameError class
23 | 
24 |     Thrown when a name error occurs in the parser.
25 |     """
26 |     pass
27 | 
28 | 
29 | class ParserTypeError(ParserError):
30 |     """ParserTypeError class
31 | 
32 |     Thrown when a type error occurs in the parser.
33 |     """
34 |     pass
35 | 
36 | 
37 | class ParserRuntimeError(ParserError):
38 |     """ParserRuntimeError class
39 | 
40 |     Thrown when a runtime error occurs in the parser.
41 |     """
42 |     pass
43 | 


--------------------------------------------------------------------------------
/tests/bigtest_good.src:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////////////////////////////////////////////////
 2 | // File:   parser_test.src
 3 | // Author: Evan Sneath
 4 | //////////////////////////////////////////////////////////////////////////////
 5 | 
 6 | // This is a test of the parsing component of the compiler.
 7 | 
 8 | program test_program is
 9 | 
10 |     integer i;
11 |     global integer one_int;
12 |     integer two_int;
13 |     integer red_int;
14 |     integer blue_int;
15 |     integer mult_of_two;
16 |     integer test_array[15];
17 | 
18 |     string test_string;
19 | 
20 |     bool isGreaterThan;
21 | 
22 |     procedure test_proc(integer my_int in)
23 |         string two_param;
24 |         integer assigned_int;
25 |     begin
26 |         two_param := "Hello, World 2";
27 |         assigned_int := my_int;
28 | 
29 |         return;
30 |     end procedure;
31 | 
32 | begin
33 | 
34 |     test_string := "Hello, World 1";
35 | 
36 |     one_int := 1;
37 |     two_int := 2;
38 |     red_int := 3 + 4.;
39 |     mult_of_two := 1;
40 | 
41 |     test_proc(red_int);
42 | 
43 |     // Test some expressions
44 |     blue_int := (((((one_int * two_int[one_int]) + red_int))));
45 | 
46 |     isGreaterThan := false;
47 | 
48 |     // Testing 'if' statements
49 |     if ((blue_int * 2) >= -1) then
50 |         isGreaterThan := true;
51 |     end if;
52 | 
53 |     // Testing 'for' statements
54 |     for (i := 0; i <= blue_int)
55 |         i := i + 1.;
56 |         mult_of_two := i * 2;
57 |     end for;
58 | 
59 |     return;
60 | 
61 | end program
62 | 


--------------------------------------------------------------------------------
/tests/codegen_good.src:
--------------------------------------------------------------------------------
 1 | program codegen_test is
 2 | 
 3 |     integer count;
 4 | 
 5 |     integer add1;
 6 |     integer add2;
 7 | 
 8 |     float float1;
 9 |     float float2;
10 | 
11 |     float result1;
12 | 
13 |     string hello;
14 | 
15 |     integer sum[2];
16 |     integer result;
17 | 
18 |     // Test a procedure declaration. Will be called later
19 |     procedure increment (integer val in, integer result out)
20 |         integer tmp1;
21 |     begin
22 |         tmp1 := 1;
23 |         result := tmp1 + val;
24 |     end procedure;
25 | 
26 | begin
27 | 
28 |     result := 0;
29 |     increment(4 + 7 + 3, result);
30 | 
31 |     if (4 + 7 + 3 + 1 == result) then
32 |         putString("SUCCESS");
33 |     else
34 |         putString("FAILURE");
35 |     end if;
36 | 
37 |     add1 := 1;
38 |     add2 := 5; 
39 | 
40 |     float1 := 1.1;
41 |     float2 := 2.;
42 | 
43 |     result1 := float1 + float2;
44 |     result1 := float1 * add1;
45 |     
46 |     sum[0] := 1 + 5;
47 |     sum[1] := add1 + add2;
48 | 
49 |     if (sum[0] == sum[1]) then
50 |         result := 1;
51 |     else
52 |         result := 0;
53 |     end if;
54 | 
55 |     if (result == 1) then
56 |         putString("SUCCESS");
57 |     else
58 |         putString("FAILURE");
59 |     end if;
60 | 
61 |     // Set 'count' to 10 the long way
62 |     count := 0;
63 |     for (count := count; count < 10)
64 |         count := count + 1;
65 |     end for;
66 | 
67 |     if (count == 10) then
68 |         putString("SUCCESS");
69 |     else
70 |         putString("FAILURE");
71 |     end if;
72 | end program
73 | 


--------------------------------------------------------------------------------
/tests/bigtest_bad.src:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////////////////////////////////////////////////
 2 | // File:   bigtest_bad.src
 3 | // Author: Evan Sneath
 4 | // Description: This is a test of the parsing component of the compiler.
 5 | // Errors: 4 errors should be raised in this program.
 6 | //////////////////////////////////////////////////////////////////////////////
 7 | 
 8 | program test_program is
 9 | 
10 |     integer i;
11 |     global integer one_int;
12 |     integer two_int;
13 |     integer red_int;
14 |     integer blue_int;
15 |     integer mult_of_two[1];
16 |     integer test_array[15];
17 | 
18 |     // ERROR 1: Multiple definitions of two_int
19 |     integer two_int;
20 | 
21 |     string test_string;
22 | 
23 |     bool isGreaterThan;
24 | 
25 |     procedure test_proc(integer my_int in)
26 |         string two_param;
27 |         integer assigned_int;
28 |     begin
29 |         two_param := "Hello, World";
30 |         assigned_int := my_int;
31 | 
32 |         return;
33 |     end procedure;
34 | 
35 | begin
36 | 
37 |     test_string := "Hello, World";
38 | 
39 |     // ERROR 2: Variable used for function call
40 |     one_int();
41 | 
42 |     one_int := 1;
43 |     two_int := 2;
44 |     red_int := 3 + 4.;
45 |     mult_of_two := 1;
46 | 
47 |     test_proc(red_int);
48 | 
49 |     // ERROR 3: Variable this_doesnt_exist never declared
50 |     this_doesnt_exist := 42;
51 | 
52 |     // ERROR 4: Assigning to a procedure, not a variable
53 |     test_proc := "This is not ok";
54 | 
55 |     // Test some expressions
56 |     blue_int := (((((one_int * two_int[one_int]) + red_int))));
57 | 
58 |     isGreaterThan := false;
59 | 
60 |     // Testing 'if' statements
61 |     if ((blue_int * 2) >= -1) then
62 |         isGreaterThan := true;
63 |     end if;
64 | 
65 |     // Testing 'for' statements
66 |     for (i := 0; i <= blue_int)
67 |         i := i + 1.;
68 |         mult_of_two := i * 2;
69 |     end for;
70 | 
71 |     return;
72 | 
73 | end program
74 | 


--------------------------------------------------------------------------------
/tests/runtime_good.src:
--------------------------------------------------------------------------------
 1 | program codegen_test is
 2 | 
 3 |     integer testint;
 4 | 
 5 |     string teststring;
 6 |     string teststring2;
 7 |     string teststring3;
 8 | 
 9 |     bool testbool;
10 |     float testfloat;
11 | 
12 |     procedure increment (integer val in, integer result out)
13 |     begin
14 |         result := val + 1;
15 |     end procedure;
16 | 
17 | begin
18 | 
19 |     ////////////////////////////////////////////
20 |     // INTEGER TEST
21 | 
22 |     //testint := 0;
23 |     //increment(41, testint);
24 | 
25 |     //putString("41 plus 1 is...");
26 |     //putInteger(testint);
27 | 
28 |     //putString("Enter an integer");
29 |     //getInteger(testint);
30 |     //putString("You entered...");
31 |     //putInteger(testint);
32 | 
33 |     ////////////////////////////////////////////
34 |     // BOOL TEST
35 | 
36 |     //putString("Enter a boolean value 0 or 1");
37 |     //getBool(testbool);
38 |     //putString("You entered...");
39 |     //putBool(testbool);
40 | 
41 |     ////////////////////////////////////////////
42 |     // FLOAT TEST
43 | 
44 |     //testfloat := 4.5;
45 |     //putFloat(testfloat);
46 |     //putFloat(1414.1414);
47 | 
48 |     //putString("Enter a float");
49 |     //getFloat(testfloat);
50 |     //putString("You entered...");
51 |     //putFloat(testfloat);
52 | 
53 |     ////////////////////////////////////////////
54 |     // STRING TEST
55 | 
56 |     putString("Enter a string");
57 |     getString(teststring);
58 |     putString("You entered...");
59 |     putString(teststring);
60 | 
61 |     putString("Enter a string");
62 |     getString(teststring2);
63 |     putString("You entered...");
64 |     putString(teststring2);
65 | 
66 |     putString("Enter a string");
67 |     getString(teststring3);
68 |     putString("You entered...");
69 |     putString(teststring3);
70 | 
71 |     putString("This is the first string");
72 |     putString(teststring);
73 | 
74 |     putString("This is the second string");
75 |     putString(teststring2);
76 | 
77 |     putString("This is the third string");
78 |     putString(teststring3);
79 | end program
80 | 


--------------------------------------------------------------------------------
/compiler.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """Compiler module
 4 | 
 5 | Acts as the command line interface to the compiler components. When given a
 6 | source file, the compilation process will be executed.
 7 | 
 8 | Author: Evan Sneath
 9 | License: Open Software License v3.0
10 | 
11 | Functions:
12 |     parse_arguments: Parses incoming command line arguments.
13 |     run_compiler: Executes the complete compilation process.
14 | """
15 | 
16 | # Import standard libraries
17 | import argparse
18 | import subprocess
19 | import sys
20 | 
21 | # Import custom compiler libraries
22 | from lib.parser import Parser
23 | 
24 | 
25 | def parse_arguments():
26 |     """Parse Arguments
27 | 
28 |     Parses all command line arguments for the compiler program.
29 | 
30 |     Returns:
31 |         An object containing all expected command line arguments.
32 |     """
33 |     # Parse the command line arguments
34 |     parser = argparse.ArgumentParser()
35 |     parser.add_argument('-d', '--debug',
36 |                         help='print comments in generated code',
37 |                         action='store_true')
38 |     parser.add_argument('source',
39 |                         help='source file to compile')
40 |     parser.add_argument('-o', '--out',
41 |                         help='target path for the compiled code',
42 |                         action='store',
43 |                         default='a.out')
44 |     args = parser.parse_args()
45 | 
46 |     return args
47 | 
48 | 
49 | def run_compiler(source, target, debug=False):
50 |     """Run Compiler
51 | 
52 |     Executes the compilation process given a source file path.
53 | 
54 |     Arguments:
55 |         source: The source file to compile.
56 |         target: The destination binary executable file.
57 |         debug: If True, verbose parsing details are shown. (Default: False)
58 | 
59 |     Returns:
60 |         True on success, False otherwise.
61 |     """
62 |     # Define a temporary location for the intermediate C code
63 |     TMP_CODE_FILE = './ir.c'
64 | 
65 |     # Create a Parser object to parse the inputted source file
66 |     parser = Parser(debug)
67 | 
68 |     # Parse the source file to the temporary code file
69 |     if not parser.parse(source, TMP_CODE_FILE):
70 |         print('Error while parsing "%s"' % source)
71 |         return False
72 | 
73 |     # Set up gcc compilation command
74 |     gcc_cmd = ['gcc', '-m32', '-o', target, TMP_CODE_FILE]
75 | 
76 |     # Compile the temporary file with gcc. Output to the target location
77 |     if subprocess.call(gcc_cmd) != 0:
78 |         print('Error while compiling "%s"' % target)
79 |         return False
80 | 
81 |     return True
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     # Parse compiler arguments
86 |     args = parse_arguments()
87 | 
88 |     # Run compilation process
89 |     result = run_compiler(args.source, args.out, debug=args.debug)
90 | 
91 |     # Terminate program
92 |     sys.exit(not result)
93 | 


--------------------------------------------------------------------------------
/lib/datatypes.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """Types module
  4 | 
  5 | Provides data structures necessary for identifier tracking and handling in the
  6 | compilation process as well as tokenizing.
  7 | 
  8 | Author: Evan Sneath
  9 | License: Open Software License v3.0
 10 | 
 11 | Classes:
 12 |     Token: A named tuple object containing token information.
 13 |     Identifier: A named tuple object containing identifier information.
 14 |     Parameter: A named tuple object containing procedure param information.
 15 |     IdentifierTable: Extends the list type to provide ID table functionality.
 16 | """
 17 | 
 18 | from lib.errors import ParserNameError
 19 | from collections import namedtuple
 20 | 
 21 | 
 22 | """Token class
 23 | 
 24 | A named tuple object factory containing token information.
 25 | 
 26 | Attributes:
 27 |     type: The data type of the token to be stored.
 28 |     value: The value of the token being stored.
 29 |     line: The line number on which the token was encountered.
 30 | """
 31 | Token = namedtuple('Token', ['type', 'value', 'line'])
 32 | 
 33 | 
 34 | """Identifier class
 35 | 
 36 | A named tuple object factory containing identifier information.
 37 | 
 38 | Attributes:
 39 |     name: The identifier name. This acts as the dictionary key.
 40 |     type: The data type of the identifier.
 41 |     size: The number of elements of the identifier if a variable.
 42 |         If procedure, program, or non-array type, None is expected.
 43 |     params: A list of Parameter class objects describing procedure params.
 44 |     mm_ptr: A pointer to the location of the identifier in main memory.
 45 | """
 46 | Identifier = namedtuple('Identifier',
 47 |         ['name', 'type', 'size', 'params', 'mm_ptr'])
 48 | 
 49 | 
 50 | """Parameter class
 51 | 
 52 | A named tuple object factory containing procedure parameter information.
 53 | 
 54 | Attributes:
 55 |     id: The Identifier named tuple of the parameter.
 56 |     direction: The direction ('in' or 'out') of the parameter.
 57 | """
 58 | Parameter = namedtuple('Parameter', ['id', 'direction'])
 59 | 
 60 | 
 61 | class IdentifierTable(list):
 62 |     """IdentifierTable class
 63 | 
 64 |     Extends the List built-in type with all methods necessary for identifier
 65 |     table management during compilation.
 66 | 
 67 |     Methods:
 68 |         push_scope: Adds a new scope.
 69 |         pop_scope: Removes the highest scope.
 70 |         add: Adds a new identifier to the current or global scope.
 71 |         find: Determines if an identifier is in the current of global scope.
 72 |         get_id_location: Determines where the identifier exists in the scope.
 73 |         is_global: Determines if an identifier exists in the global scope.
 74 |         is_param: Determines if an identifier is a parameter of the scope.
 75 |         get_param_direction: Gets the direction of the parameter in the scope.
 76 |         get_current_scope_owner: Gets the program or procedure name from which
 77 |             the current scope was created.
 78 |     """
 79 |     def __init__(self):
 80 |         super().__init__()
 81 | 
 82 |         # Create the global scope
 83 |         self.append({})
 84 | 
 85 |         # Create a list of scope parent names (the owner of the scope)
 86 |         self._owner_ids = ['global']
 87 | 
 88 |         return
 89 | 
 90 |     def push_scope(self, owner_id):
 91 |         """Push New Identifier Scope
 92 | 
 93 |         Creates a new scope on the identifiers table and increases the global
 94 |         current scope counter.
 95 | 
 96 |         Arguments:
 97 |             owner_id: The name of the identifier which has created this scope.
 98 |         """
 99 |         # Create a brand new scope for the identifiers table
100 |         self.append({})
101 | 
102 |         # Save the owner of this scope for future lookup
103 |         self._owner_ids.append(owner_id)
104 | 
105 |         return
106 | 
107 |     def pop_scope(self):
108 |         """Pop Highest Identifier Scope
109 | 
110 |         Disposes of the current scope in the identifiers table and decrements
111 |         the global current scope counter.
112 |         """
113 |         # Remove this entire scope from the identifiers table
114 |         self.pop(-1)
115 | 
116 |         # Remove the identifier from the owner list
117 |         self._owner_ids.pop()
118 | 
119 |         return
120 | 
121 |     def add(self, identifier, is_global=False):
122 |         """Add Identifier to Scope
123 | 
124 |         Adds a new identifier to either the current scope of global.
125 | 
126 |         Arguments:
127 |             identifier: An Identifier named tuple object describing the new
128 |                 identifier to add to the table.
129 |             is_global: Determines whether the identifier should be added to
130 |                 the current scope or the global scope. (Default: False)
131 | 
132 |         Raises:
133 |             ParserNameError if the identifier has been declared at this scope.
134 |         """
135 |         scope = -1 if not is_global else 0
136 | 
137 |         if is_global and len(self) > 2:
138 |             raise ParserNameError('global name must be defined in program scope')
139 | 
140 |         if is_global and (identifier.name in self[0] or (len(self) > 1 and
141 |                           identifier.name in self[1])):
142 |             raise ParserNameError('name already declared at this scope')
143 | 
144 |         if not is_global and identifier.name in self[-1]:
145 |             raise ParserNameError('name already declared at this scope')
146 | 
147 |         self[scope][identifier.name] = identifier
148 | 
149 |         return
150 | 
151 |     def find(self, name):
152 |         """Find Identifier in Scope
153 | 
154 |         Searches for the given identifier in the current and global scope.
155 | 
156 |         Arguments:
157 |             name: The identifier name for which to search.
158 | 
159 |         Returns:
160 |             An Identifier named tuple containing identifier name, type and size
161 |             information if found in the current or global scopes.
162 | 
163 |         Raises:
164 |             ParserNameError if the given identifier is not found in any valid scope.
165 |         """
166 |         if name in self[-1]:
167 |             identifier = self[-1][name]
168 |         elif name in self[0]:
169 |             identifier = self[0][name]
170 |         else:
171 |             raise ParserNameError()
172 | 
173 |         return identifier
174 | 
175 |     def get_id_location(self, name):
176 |         """Get Identifier Location
177 | 
178 |         Determines the location of the identifier in the stack based on the
179 |         identifier's place in the id table.
180 | 
181 |         Arguments:
182 |             name: The identifier name for which to search.
183 | 
184 |         Returns:
185 |             A string value for the location of the identifier in the stack.
186 |             This may be 'global', 'param', or 'local'.
187 |         """
188 |         if self.is_global(name):
189 |             return 'global'
190 |         elif self.is_param(name):
191 |             return 'param'
192 | 
193 |         return 'local'
194 | 
195 |     def is_global(self, name):
196 |         """Identifier is Global
197 | 
198 |         Determines if an identifier exists in the global scope.
199 | 
200 |         Arguments:
201 |             name: The identifier name for which to search.
202 | 
203 |         Returns:
204 |             True if the identifier exists in the global scope. False otherwise.
205 |         """
206 |         return name in self[0]
207 | 
208 |     def is_param(self, name):
209 |         """Identifier is Parameter
210 | 
211 |         Determines if an identifier is a parameter in the current scope.
212 | 
213 |         Arguments:
214 |             name: The identifier name for which to search.
215 | 
216 |         Returns:
217 |             True if the identifier is a scope parameter. False otherwise.
218 |         """
219 |         owner = self.get_current_scope_owner()
220 | 
221 |         if owner == 'global' or not owner.params:
222 |             return False
223 | 
224 |         for param in owner.params:
225 |             if name == param.id.name:
226 |                 return True
227 | 
228 |         return False
229 | 
230 |     def get_param_direction(self, name):
231 |         """Get Parameter Direction
232 | 
233 |         If the name given is a valid parameter of the scope, the direction
234 |         ('in' or 'out') will be returned.
235 | 
236 |         Arguments:
237 |             name: The identifier name for which to search.
238 | 
239 |         Returns:
240 |             'in' or 'out' depending on the parameter direction. None if the
241 |             name given is not a valid parameter of the current scope.
242 |         """
243 |         owner = self.get_current_scope_owner()
244 |         
245 |         if owner == 'global':
246 |             return None
247 | 
248 |         for param in owner.params:
249 |             if name == param.id.name:
250 |                 return param.direction
251 | 
252 |         return None
253 | 
254 |     def get_current_scope_owner(self):
255 |         """Get Current Scope Owner
256 | 
257 |         Returns the Identifier object of the owner of the current scope. This
258 |         owner will either be a 'program' or 'procedure' type.
259 | 
260 |         Returns:
261 |             The Identifier object of the owner of the current scope. None if
262 |             the current scope is the global scope.
263 |         """
264 |         owner = self._owner_ids[-1]
265 | 
266 |         # If this is the global scope, return no owner
267 |         return self[-1][self._owner_ids[-1]] if owner != 'global' else None
268 | 


--------------------------------------------------------------------------------
/lib/scanner.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """Scanner module
  4 | 
  5 | With any attached file, the Scanner class will scan the file token-by-token
  6 | until an end-of-file is encountered.
  7 | 
  8 | Author: Evan Sneath
  9 | License: Open Software License v3.0
 10 | 
 11 | Classes:
 12 |     Scanner: An implementation of a scanner for the source language.
 13 | """
 14 | 
 15 | from os.path import isfile
 16 | 
 17 | from lib.datatypes import Token
 18 | 
 19 | 
 20 | class Scanner:
 21 |     """Scanner class
 22 | 
 23 |     This class implements a scanner object to scan a source code file in the
 24 |     compilation process. This class is designed to be inherited to be used
 25 |     during the parsing stage of the compiler.
 26 | 
 27 |     Attributes:
 28 |         keywords: A list of valid keywords in the language.
 29 |         symbols: A list of valid symbols in the language.
 30 | 
 31 |     Methods:
 32 |         attach_source: Binds a source file to the scanner to begin scanning.
 33 |         next_token: Returns the next token of the attached file. This token
 34 |             will be of the Token named tuple class.
 35 |     """
 36 |     # Define all language keywords
 37 |     keywords = [
 38 |         'string', 'integer', 'bool', 'float', 'global', 'is', 'in', 'out',
 39 |         'if', 'then', 'else', 'for', 'and', 'or', 'not', 'program',
 40 |         'procedure', 'begin', 'return', 'end', 'true', 'false',
 41 |     ]
 42 | 
 43 |     # Define all language symbols
 44 |     symbols = [
 45 |         ':', ';', ',', '+', '-', '*', '/', '(', ')', '<', '<=', '>', '>=',
 46 |         '!', '!=', '=', '==', ':=', '[', ']', '&', '|',
 47 |     ]
 48 | 
 49 |     def __init__(self):
 50 |         super().__init__()
 51 | 
 52 |         # Holds the file path of the attached source file
 53 |         self._src_path = ''
 54 | 
 55 |         # Holds all source file data (code) to be scanned
 56 |         self._src = ''
 57 | 
 58 |         # Holds the location of the next character to scan in the source file
 59 |         self._line_pos = 0
 60 |         self._char_pos = 0
 61 | 
 62 |         return
 63 | 
 64 |     def attach_source(self, src_path):
 65 |         """Attach Source 
 66 | 
 67 |         Attach a source file to the scanner and prepare for token collection.
 68 | 
 69 |         Arguments:
 70 |             src_path: The path to the source file to scan.
 71 | 
 72 |         Returns:
 73 |             True on success, False otherwise.
 74 |         """
 75 |         # Make sure the inputted file is a actual file
 76 |         if not isfile(src_path):
 77 |             print('Error: "%s"' % src_path)
 78 |             print('    Inputted path is not a file')
 79 |             return False
 80 | 
 81 |         # Try to read all data from the file and split by line
 82 |         try:
 83 |             with open(src_path) as f:
 84 |                 keepends = True
 85 |                 self._src = f.read().splitlines(keepends)
 86 |         except IOError:
 87 |             print('Error: "%s"' % src_path)
 88 |             print('    Could not read inputted file')
 89 |             return False
 90 | 
 91 |         # The file was attached and read successfully, store the path
 92 |         self._src_path = src_path
 93 | 
 94 |         return True
 95 | 
 96 |     def next_token(self):
 97 |         """Scan For Next Token
 98 | 
 99 |         Scans the source code for the next token. The next token is then
100 |         returned for parsing.
101 | 
102 |         Returns:
103 |             The next token object in the source code.
104 |         """
105 |         # Get the first character, narrow down the data type possibilities
106 |         char = self._next_word()
107 | 
108 |         if char is None:
109 |             return Token('eof', None, self._line_pos)
110 | 
111 |         # Use the first character to choose the token type to expect
112 |         if char == '"':
113 |             value, token_type = self._expect_string()
114 |         elif char.isdigit():
115 |             value, token_type = self._expect_number(char)
116 |         elif char.isalpha():
117 |             value, token_type = self._expect_identifier(char)
118 |         elif char in self.symbols:
119 |             value, token_type = self._expect_symbol(char)
120 |         else:
121 |             # We've run across a character that shouldn't be here
122 |             msg = 'Invalid character \'%s\' encountered' % char
123 |             self._scan_warning(msg, hl=self._char_pos-1)
124 | 
125 |             # Run this function again until we find something good
126 |             return self.next_token()
127 | 
128 |         if token_type == 'comment':
129 |             # If we find a comment, get a token on the next line
130 |             self._next_line()
131 |             return self.next_token()
132 | 
133 |         # Build the new token object
134 |         new_token = Token(token_type, value, self._line_pos+1)
135 | 
136 |         return new_token
137 | 
138 |     def _get_line(self, line_number):
139 |         """Get Line (Protected)
140 | 
141 |         Returns a line stripped of leading and trailing whitespace given a
142 |         line number.
143 | 
144 |         Arguments:
145 |             line_number: The line number of the attached source file to print.
146 | 
147 |         Returns:
148 |             The requested line number from the source, None on invalid line.
149 |         """
150 |         if 0 < line_number <= len(self._src):
151 |             return self._src[line_number-1].strip()
152 | 
153 |     def _scan_warning(self, msg, hl=-1):
154 |         """Print Scanner Warning Message (Protected)
155 | 
156 |         Prints a formatted warning message.
157 | 
158 |         Arguments:
159 |             msg: The warning message to display
160 |             hl: If not -1, there will be an pointer (^) under a
161 |                 character in the line to be highlighted. (Default: -1)
162 |         """
163 |         line = self._src[self._line_pos][0:-1]
164 | 
165 |         print('Warning: "', self._src_path, '", ', sep='', end='')
166 |         print('line ', self._line_pos+1, sep='')
167 |         print('    ', msg, '\n    ', line.strip(), sep='')
168 | 
169 |         if hl != -1:
170 |             left_spaces = line.find(line.strip()[0])
171 |             print('    %s^' % (' '*(abs(hl)-left_spaces)))
172 | 
173 |         return
174 | 
175 |     def _next_word(self):
176 |         """Get Next Word Character (Protected)
177 | 
178 |         Move the cursor to the start of the next non-space character in the
179 |         file.
180 | 
181 |         Returns:
182 |             The first non-space character encountered. None if the end of
183 |             file was reached.
184 |         """
185 |         char = ''
186 | 
187 |         while True:
188 |             char = self._src[self._line_pos][self._char_pos]
189 | 
190 |             # React according to spaces and newlines
191 |             if char == '\n':
192 |                 if not self._next_line():
193 |                     return None
194 |             elif char in ' \t':
195 |                 self._char_pos += 1
196 |             else:
197 |                 break
198 | 
199 |         # Increment to the next character
200 |         self._char_pos += 1
201 |         return char
202 | 
203 |     def _next_line(self):
204 |         """Travel to Next Line (Protected)
205 | 
206 |         Move the cursor to the start of the next line safely.
207 | 
208 |         Returns:
209 |             True on success, False if end of file is encountered
210 |         """
211 |         self._line_pos += 1
212 |         self._char_pos = 0
213 | 
214 |         # Check to make sure this isn't the end of file
215 |         if self._line_pos == len(self._src):
216 |             return False
217 | 
218 |         return True
219 | 
220 |     def _next_char(self, peek=False):
221 |         """Get Next Character (Protected)
222 | 
223 |         Move the cursor to the next character in the file.
224 | 
225 |         Arguments:
226 |             peek: If True, the character position pointer will not be
227 |                 incremented. Set by default to False.
228 | 
229 |         Returns:
230 |             The next character encountered. None if the end of line
231 |             was reached.
232 |         """
233 |         # Get the next pointed character
234 |         char = self._src[self._line_pos][self._char_pos]
235 | 
236 |         # Return None if we hit a line ending
237 |         if char == '\n':
238 |             return None
239 | 
240 |         # Increment to the next character
241 |         if not peek:
242 |             self._char_pos += 1
243 | 
244 |         return char
245 | 
246 |     def _expect_string(self):
247 |         """Expect String Token (Protected)
248 | 
249 |         Parses the following characters in hope of a valid string. If an
250 |         invalid string is encountered, all attempts are made to make it valid.
251 | 
252 |         Returns:
253 |             (value, token_type) - A tuple describing the final parsed token.
254 |             The resulting token type will be 'string'.
255 |         """
256 |         hanging_quote = False
257 | 
258 |         # We know this is a string. Find the next quotation and return it
259 |         string_end = self._src[self._line_pos].find('"', self._char_pos)
260 | 
261 |         # If we have a hanging quotation, assume quote ends at end of line
262 |         if string_end == -1:
263 |             hanging_quote = True
264 |             string_end = len(self._src[self._line_pos]) - 1
265 |             self._scan_warning('No closing quotation in string', hl=string_end)
266 | 
267 |         value = self._src[self._line_pos][self._char_pos:string_end]
268 | 
269 |         # Check for illegal characters, send a warning if encountered
270 |         for i, char in enumerate(value):
271 |             if not char.isalnum() and char not in ' _,;:.\'':
272 |                 value = value.replace(char, ' ', 1)
273 |                 msg = 'Invalid character \'%s\' in string' % char
274 |                 self._scan_warning(msg, hl=self._char_pos+i)
275 | 
276 |         self._char_pos += len(value)
277 |         if not hanging_quote:
278 |             self._char_pos += 1
279 | 
280 |         return value, 'string'
281 | 
282 |     def _expect_number(self, char):
283 |         """Expect Number Token (Protected)
284 | 
285 |         Parses the following characters in hope of a valid integer or float.
286 | 
287 |         Arguments:
288 |             char: The first character already picked for the value.
289 | 
290 |         Returns:
291 |             (value, token_type) - A tuple describing the final parsed token.
292 |             The resulting token type will either be 'int' indicating a valid
293 |             integer or 'float' indicating a valid floating point value.
294 |         """
295 |         value = '' + char
296 |         token_type = 'integer'
297 | 
298 |         is_float = False
299 | 
300 |         while True:
301 |             char = self._next_char(peek=True)
302 | 
303 |             if char is None:
304 |                 break
305 |             elif char == '.' and not is_float:
306 |                 # We found a decimal point. Move to float mode
307 |                 is_float = True
308 |                 token_type = 'float'
309 |             elif not char.isdigit() and char != '_':
310 |                 break
311 | 
312 |             value += char
313 |             self._char_pos += 1
314 | 
315 |         # Remove all underscores in the int/float. These serve no purpose
316 |         value = value.replace('_', '')
317 | 
318 |         # If nothing was given after the decimal point assume 0
319 |         if is_float and value.split('.')[-1] == '':
320 |             value += '0'
321 | 
322 |         return value, token_type
323 | 
324 |     def _expect_identifier(self, char):
325 |         """Expect Identifier Token (Protected)
326 | 
327 |         Parses the following characters in hope of a valid identifier.
328 | 
329 |         Arguments:
330 |             char: The first character already picked for the value.
331 | 
332 |         Returns:
333 |             (value, token_type) - A tuple describing the final parsed token.
334 |             The resulting token type will either be 'identifier' indicating a
335 |             valid identifier or 'keyword' indicating a valid keyword.
336 |         """
337 |         value = '' + char
338 |         token_type = 'identifier'
339 | 
340 |         while True:
341 |             char = self._next_char(peek=True)
342 | 
343 |             if char is None:
344 |                 break
345 |             elif not char.isalnum() and char != '_':
346 |                 break
347 | 
348 |             value += char
349 |             self._char_pos += 1
350 | 
351 |         if value in self.keywords:
352 |             token_type = 'keyword'
353 | 
354 |         return value, token_type
355 | 
356 |     def _expect_symbol(self, char):
357 |         """Expect Symbol Token (Protected)
358 | 
359 |         Parses the following characters in hope of a valid symbol.
360 | 
361 |         Arguments:
362 |             char: The first character already picked for the value.
363 | 
364 |         Returns:
365 |             (value, token_type) - A tuple describing the final parsed token.
366 |             The resulting token type will either be 'symbol' indicating a
367 |             valid identifier or 'comment' indicating a comment until line end.
368 |         """
369 |         value = '' + char
370 | 
371 |         while True:
372 |             char = self._next_char(peek=True)
373 | 
374 |             if char is None:
375 |                 break
376 |             elif value + str(char) == '//':
377 |                 return None, 'comment'
378 |             elif value + str(char) not in self.symbols:
379 |                 break
380 | 
381 |             value += char
382 |             self._char_pos += 1
383 | 
384 |         return value, 'symbol'
385 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Compiler
  2 | ========
  3 | 
  4 | ## Description
  5 | A single-pass, recursive decent `LL(1)` compiler written by hand for a made-up.
  6 | language. This compiler is written entirely in Python 3 and uses the `gcc`
  7 | compiler to finish compilation of the generated intermediate C representation.
  8 | 
  9 | ## Author
 10 | Created by [Evan Sneath](http://github.com/evansneath).
 11 | 
 12 | ## License
 13 | This software licensed under the
 14 | [Open Software License v3.0](http://www.opensource.org/licenses/OSL-3.0).
 15 | 
 16 | ## Dependencies
 17 | In order to run, this software requires the following dependencies:
 18 | 
 19 | * [Python 3](http://python.org/download/releases/3.3.2/)
 20 | 
 21 | ## Progress
 22 | 
 23 | <table>
 24 | <tr><td><b>Component</b></td><td><b>Status</b></td></tr>
 25 | <tr><td>Scanning</td><td>Completed</td></tr>
 26 | <tr><td>Parsing</td><td>Completed</td></tr>
 27 | <tr><td>Type Checking</td><td>Completed</td></tr>
 28 | <tr><td>Code Generation</td><td>Completed</td></tr>
 29 | <tr><td>Runtime</td><td>Completed</td></tr>
 30 | </table>
 31 | 
 32 | ## Usage
 33 | ```
 34 | usage: compiler.py [-h] [-d] [-o OUT] source
 35 | 
 36 | positional arguments:
 37 |   source             source file to compile
 38 | 
 39 | optional arguments:
 40 |   -h, --help         show this help message and exit
 41 |   -d, --debug        print comments in generated code
 42 |   -o OUT, --out OUT  target path for the compiled code
 43 | ```
 44 | 
 45 | The compiler will scan the source file for all valid tokens and 
 46 | parse the language grammar. All scanner, parser, and type errors will be 
 47 | outputted as they are encountered. Generated code is then outputted to `ir.c`
 48 | where it is then run through the `gcc` compiler. The default output file
 49 | generated by the compiler is `a.out` in the working directory. The `-o`
 50 | argument may be used to modify the output file name.
 51 | 
 52 | The `tests/` directory contains test source files which have several examples 
 53 | of token scanning with error/warning handling, grammar parsing, code
 54 | generation, and runtime libraries.
 55 | 
 56 | ## Implementation Details
 57 | 
 58 | ### Software
 59 | 
 60 | In determining the implementation language, robustness was chosen over speed as
 61 | the deciding factor for the compiler. Python 3 was selected because ease of
 62 | use, access to simple dictionary and table libraries, and my own familiarity
 63 | with the language.
 64 | 
 65 | As I progressed through the parser stage of the compiler, it became clear that
 66 | the simple exception raising and handling would be useful for displaying
 67 | compiler errors and trapping at resync points to continue syntax parsing.
 68 | 
 69 | ### Structure
 70 | 
 71 | For the sake of modularity and ease of debugging, the program is structured in
 72 | a hierarchical fashion.
 73 | 
 74 | `compiler.py` acts as the engine responsible for parsing of
 75 | command-line arguments, calling the code parser, and completing the build using
 76 | the `gcc` compiler with the appropriate arguments once the intermediate C code
 77 | is generated.
 78 | 
 79 | `parser.py` and the `Parser` class is the entry-point for the action of
 80 | compiling the valid input file. In order to do this, `Parser` inherits the
 81 | `Scanner` class (in `scanner.py`) and the `CodeGenerator` class
 82 | (in `codegenerator.py`) to allow for simple access to their class methods and
 83 | objects. The `datatypes.py` and `errors.py` source files containing several
 84 | data types and exception classes respectively which are used in the various
 85 | components of the compiler.
 86 | 
 87 | ### Scanning
 88 | 
 89 | The implementation of the language scanner first tackles the problem of source
 90 | code parsing by splitting the source code into a list of distinct lines. Not
 91 | only does this allow for easier ways to determine end of line and end of file,
 92 | but also makes the operation of retrieving line numbers simple for purposes of
 93 | warning and error messages.
 94 | 
 95 | At the start of each non-whitespace character, the first character is used to
 96 | determine the type of the token to expect. The token is returned if the type is
 97 | matched without issue. Otherwise, a scanner warning is thrown.
 98 | 
 99 | The scanner warnings are never fatal, though syntactically the tokens returned
100 | may cause a parser error. My methodology behind the scanner was to try to
101 | correct as many lexical errors as possible. For instance, if a string literal
102 | has no end quote a warning will be thrown and a quote will be assumed at the
103 | end of the line.
104 | 
105 | ### Parsing
106 | 
107 | In order to eliminate loops caused by recursive grammar, any left-recursion in
108 | the language grammar was rewritten.
109 | 
110 | Type-checking is performed in expressions by returning the types from the
111 | expression tree functions and evaluating types for compatibility if an
112 | operation is performed. There are many other locations were type-checking is
113 | performed in the compiler other than expressions.
114 | 
115 | Parser resync points are used throughout the compiler to continue parsing if
116 | an error is encountered without propagating spurious error messages. Exception
117 | handling in Python is used to elegantly handle resyncing. Once a parser error
118 | is encountered in a statement or declaration, an exception is raised. This
119 | exception is then handled at the starting point of statement or declaration
120 | parsing and the parsing will continue to the next statement or declaration.
121 | 
122 | Note that once a fatal error or any kind is encountered, code will no longer
123 | be generated.
124 | 
125 | ### Code Generation
126 | 
127 | Memory and registers for the operation of the program are defined and used as
128 | 32-bit integer arrays. This allows for simple addressing of memory and register
129 | space. All non-integer types present in the program are cast as integers for
130 | storage in the memory spaces. In the case of string storage, memory spaces hold
131 | a 32-bit pointer to the start of the string in either the heap (this will be
132 | covered later) or a literal value. To ensure that pointers are 32-bit and may
133 | be cast to integer without issue, the `gcc` compiler flag `-m32` is used.
134 | 
135 | A fixed number of available register locations are allocated for use. These are
136 | used incrementally and are not reused or reallocated. For this reason, a large
137 | number of registers are required so that register space is always available.
138 | Future improvements could be made to "push back" register allocation to the
139 | first register (`R[0]`) at the end of each scope. At the end of a scope, it can
140 | be assumed that the same register will not be referenced again.
141 | 
142 | The main memory structure of the program is divided into the stack and heap.
143 | The stack begins are the high memory address and is maintained using both a
144 | stack and frame pointer. The frame pointer (pointing to the scope's return
145 | address) provides a way to easily smash local stack variables when leaving the
146 | scope. All global variables may only be declared in the program scope and are
147 | referenced using the offset from the top of main memory.
148 | 
149 | The heap in main memory is used only to allocate space for strings during
150 | runtime. This is accomplished using a heap pointer pointing to the next unused
151 | memory location in the heap. As the `getString()` procedure is called, the
152 | string retrieved from `stdin` is moved to the heap and the variable
153 | referencing that string is modified to point to the newly allocated heap
154 | location.
155 | 
156 | Memory is arranged in the following manner:
157 | 
158 | ```
159 |          MAIN MEMORY
160 | --  .-------------------.
161 | P   | RETURN ADDR       | <== MM_END (MM_SIZE - 1)
162 | R   | ----------------- |
163 | O   | LOCAL/GLOBAL VARS |
164 | G   |         .         |
165 | R   |         .         |
166 | A   |         .         |
167 | M   |         .         |
168 | --  | ----------------- |
169 | P   | PARAMS            | 
170 | R   |         .         |
171 | O   |         .         |
172 | C   |         .         |
173 | E   | ----------------- |
174 | D   | CALLER FP         |
175 | U   | ----------------- |
176 | R   | RETURN ADDR ..    | <== FP
177 | E   | ----------------- |
178 |     | LOCAL VARS        |
179 |     |         .         |
180 |     |         .         |
181 |     |         .         | <== SP
182 |     `---v--v--v--v--v---`
183 |               .
184 |               .
185 |               .
186 |     .---^--^--^--^--^---.
187 |     |         .         |
188 |     |         .         |
189 |     |         .         |
190 |     | HEAP              | <== MM_START (0)
191 |     `-------------------`
192 | ```
193 | 
194 | When entering a scope, the caller pushes all params onto the stack in reverse
195 | order. This allows for easy addressing by their indexes. The caller then stores
196 | its current FP onto the stack and the return address. At this point the called
197 | scope is responsible for maintaining the stack and adding its local variables.
198 | 
199 | When leaving a scope, the SP is moved to the FP location and the return address
200 | is called. The caller scope then is responsible for restoring the caller FP and
201 | ensuring that all outbound params are written back to their appropriate
202 | locations.
203 | 
204 | All procedure calls are made using C labels and the `goto` statement. This
205 | ensures that the program code remains in the `main` function and no outside
206 | function calls are required. The technique of using
207 | [labels as values](http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html)
208 | was used to store the location of the return labels on the stack.
209 | 
210 | Loop and conditional statements also make use of the `goto` statement to
211 | determine program flow. After the conditional expression is resolved to a
212 | boolean form, the register used for the expression is tested. If the expression
213 | resolved to `false`, then the code portion is skipped.
214 | 
215 | For example:
216 | 
217 | ```
218 | R[0] = <expression_outcome>;
219 | if (!R[0]) goto else_label;
220 |     <do_if>
221 |     goto end_if_label;
222 | else_label:
223 |     <do_else>
224 | end_if_label:
225 | ```
226 | 
227 | ### Runtime Environment
228 | Initially, I had created a separate C library to implement the runtime
229 | functions necessary. I determined that these functions were simple enough to be
230 | handwritten directly inline with the generated code as I progressed though
231 | development. The runtime functions use the same principles of stack memory
232 | referencing as other procedures and are populated in the identifiers table
233 | manually at the start of parsing.
234 | 
235 | ## Language Specifications
236 | 
237 | ### Syntax
238 | ```
239 | <program> ::=
240 |     <program_header> <program_body>
241 | 
242 | <program_header> ::=
243 |     'program' <identifier> 'is'
244 | 
245 | <program_body> ::=
246 |         ( <declaration> ';' )*
247 |     'begin'
248 |         ( <statement> ';' )*
249 |     'end' 'program'
250 | 
251 | <declaration> ::=
252 |     [ 'global' ] <procedure_declaration>
253 |     [ 'global' ] <variable_declaration>
254 | 
255 | <variable_declaration> ::=
256 |     <type_mark> <identifier> [ '[' <array_size> ']' ]
257 | 
258 | <type_mark> ::=
259 |     'integer' |
260 |     'float' |
261 |     'bool' |
262 |     'string'
263 | 
264 | <procedure_declaration> ::=
265 |     <procedure_header> <procedure_body>
266 | 
267 | <procedure_header> ::=
268 |     'procedure' <identifier> '(' [ <parameter_list> ] ')'
269 | 
270 | <procedure_body> ::=
271 |         ( <declaration> ';' )*
272 |     'begin'
273 |         ( <statement ';' )*
274 |     'end' 'procedure'
275 | 
276 | <parameter_list> ::=
277 |     <parameter> ',' <parameter_list> |
278 |     <parameter>
279 | 
280 | <parameter> ::=
281 |     <variable_declaration> ( 'in' | 'out' )
282 | 
283 | <statement> ::=
284 |     <assignment_statement> |
285 |     <if_statement> |
286 |     <loop_statement> |
287 |     <return_statement> |
288 |     <procedure_call>
289 | 
290 | <assignment_statement> ::=
291 |     <destination> ':=' <expression>
292 | 
293 | <if_statement> ::=
294 |     'if' '(' <expression> ')' 'then' ( <statement> ';' )+
295 |     [ 'else' ( <statement> ';' )+ ]
296 |     'end' 'if'
297 | 
298 | <loop_statement> ::=
299 |     'for' '(' <assignment_statement> ';' <expression> ')'
300 |         ( <statement> ';' )*
301 |     'end' 'for'
302 | 
303 | <procedure_call> ::=
304 |     <identifier> '(' [ <argument_list> ] ')'
305 | 
306 | <argument_list> ::=
307 |     <expression> ',' <argument_list> |
308 |     <expression>
309 | 
310 | <destination> ::=
311 |     <identifier> [ '[' <expression> ']' ]
312 | 
313 | <expression> ::=
314 |     <expression> '&' <arith_op> |
315 |     <expression> '|' <arith_op> |
316 |     [ 'not' ] <arith_op>
317 | 
318 | <arith_op> ::=
319 |     <arith_op> '+' <relation> |
320 |     <arith_op> '-' <relation> |
321 |     <relation>
322 | 
323 | <relation> ::=
324 |     <relation> '<' <term> |
325 |     <relation> '>' <term> |
326 |     <relation> '>=' <term> |
327 |     <relation> '<=' <term> |
328 |     <relation> '==' <term> |
329 |     <relation> '!=' <term> |
330 |     <term>
331 | 
332 | <term> ::=
333 |     <term> '*' <factor> |
334 |     <term> '/' <factor> |
335 |     <factor>
336 | 
337 | <factor> ::=
338 |     '(' <expression> ')' |
339 |     [ '-' ] <name> |
340 |     [ '-' ] <number> |
341 |     <string> |
342 |     'true' |
343 |     'false' |
344 | 
345 | <name> ::=
346 |     <identifier> [ '[' <expression> ']' ]
347 | 
348 | <identifier> ::=
349 |     [a-zA-Z][a-zA-Z0-9_]*
350 | 
351 | <number> ::=
352 |     [0-9][0-9_]*[.[0-9_]*]?
353 | 
354 | <string> ::=
355 |     "[a-zA-Z0-9 _,;:.']*"
356 | ```
357 | 
358 | ### Semantics
359 | * Procedure parameters are transmitted by value. Recursion is supported.
360 | * Non-local variables and functions are not visible except for those variables
361 |    and functions in the outermost scope prefixed with the global reserved word.
362 |    Functions currently being defined are visible in the statement set of the
363 |    function itself (so that recursive calls are possible).
364 | * No forward references are permitted or supported.
365 | * Expressions are strongly typed and types must match. However, there is
366 |    automatic conversion in the arithmetic operators to allow any mixing between
367 |    integers and floats. Furthermore, the relational operators can compare
368 |    boolean with integer tokens (boolean tokens are converted to integers as
369 |     `false = 0`, `true = 1`).
370 | * The type signatures of a procedure's arguments must match exactly their
371 |    parameter declaration.
372 | * Arithmetic operations (`+`, `-`, `*`, `/` `&` `|`) are defined for integers
373 |    and floats only. The bitwise AND (`&`), bitwise OR (`|`) and bitwise NOT
374 |    (`not`) operators are valid only on variables of type integer.
375 | * Relational operations are defined for integer and boolean tokens. Only
376 |    comparisons between the compatible types is possible. Relational operations
377 |    return a boolean result.
378 | 


--------------------------------------------------------------------------------
/lib/codegenerator.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """CodeGenerator module
  4 | 
  5 | Provides functionality for code output to a attached destination file.
  6 | 
  7 | Author: Evan Sneath
  8 | License: Open Software License v3.0
  9 | 
 10 | Classes:
 11 |     CodeGenerator: A code generator interface for destination file outputting.
 12 | """
 13 | 
 14 | 
 15 | class CodeGenerator:
 16 |     """CodeGenerator class
 17 | 
 18 |     This class implements code generator function calls to easily attach a
 19 |     destination file, input code to generate, and commit the destination
 20 |     file upon successful compilation. This class is designed to be inherited
 21 |     the be used during the parsing stage of the compiler.
 22 | 
 23 |     Attributes:
 24 |         runtime_functions: Details of each runtime function and its params.
 25 | 
 26 |     Methods:
 27 |         attach_destination: Binds a destination file to the code generator.
 28 |         generate_header: Generates overhead code (memory allocation, etc).
 29 |         generate_footer: Generates finishing overhead code.
 30 |         generate: Formats and stores a given string of code for later output.
 31 |         comment: Adds a comment to the generated code with appropriate tabbing.
 32 |         tab_push: Increases the tab depth by 1 tab (4 spaces).
 33 |         tab_pop: Decreases the tab depth by 1 tab (4 spaces).
 34 |         commit: Commits all code generation and writes to the destination file.
 35 |         get_mm: Provides a free memory space for global or local variables.
 36 |         reset_local_ptr: Resets the value for the local pointer to default.
 37 |         reset_param_ptr: Resets the value for the param pointer to default.
 38 |         get_reg: Provides a free register for intermediate variable use.
 39 |         get_label_id: Returns a unique identifier for the procedure call.
 40 |         get_unique_call_id: Returns a unique identifier for multiple calls.
 41 |         generate_program_entry: Generates all code associated with setting up
 42 |             the program entry and exit point.
 43 |         generate_procedure_call: Generates all code associated with managing
 44 |             the memory stack during a procedure call.
 45 |         generate_procedure_call_end: Generates code to clean up a procedure
 46 |             call. This finalizes the call by popping the SP to local stack.
 47 |         generate_name: Generates all code associated with name reference.
 48 |         generate_assignment: Generates all code associated with id assignment.
 49 |         generate_param_push: Generates code to push a param onto the stack.
 50 |         generate_param_pop: Generates code to pop a param off the stack.
 51 |         generate_param_store: Generates code to save an outgoing parameter
 52 |             to an identifier located in main memory.
 53 |         generate_number: Generates the code for a number reference.
 54 |         generate_return: Generates the code for the 'return' operation.
 55 |         generate_operation: Generates operation code given an operation.
 56 |     """
 57 |     def __init__(self):
 58 |         super().__init__()
 59 | 
 60 |         # Holds the file path of the attached destination file
 61 |         self._dest_path = ''
 62 | 
 63 |         # Holds all generated code to be written to the file destination
 64 |         self._generated_code = ''
 65 | 
 66 |         # Holds allocated size of main memory and num registers
 67 |         self._mm_size = 65536
 68 |         self._reg_size = 2048
 69 |         self._buf_size = 256
 70 | 
 71 |         # Holds stack pointer, frame pointer, and heap pointer registers
 72 |         self._SP = 1
 73 |         self._FP = 2
 74 |         self._HP = 3
 75 | 
 76 |         # Holds the pointer to the lowest unused register for allocation
 77 |         self._reg = 4
 78 | 
 79 |         # Holds the local memory pointer which determines the offset from the
 80 |         # frame pointer in the current scope.
 81 |         self._local_ptr = 0
 82 |         self.reset_local_ptr()
 83 | 
 84 |         # Holds the param memory pointer which determines the offset from the
 85 |         # frame pointer in the current scope.
 86 |         self._param_ptr = 0
 87 |         self.reset_param_ptr()
 88 | 
 89 |         # Holds the tab count of the code. tab_push, tab_pop manipulate this
 90 |         self._tab_count = 0
 91 | 
 92 |         # Holds an integer used for unique label generation for if/loop
 93 |         self._label_id = 0
 94 | 
 95 |         # Holds an integer to distinguish multiple calls of a function
 96 |         self._unique_id = 0
 97 | 
 98 |         # Holds the details of the runtime functions
 99 |         self.runtime_functions = {
100 |             'getString': [('my_string', 'string', 'out')],
101 |             'putString': [('my_string', 'string', 'in')],
102 |             'getBool': [('my_bool', 'bool', 'out')],
103 |             'putBool': [('my_bool', 'bool', 'in')],
104 |             'getInteger': [('my_integer', 'integer', 'out')],
105 |             'putInteger': [('my_integer', 'integer', 'in')],
106 |             'getFloat': [('my_float', 'float', 'out')],
107 |             'putFloat': [('my_float', 'float', 'in')],
108 |         }
109 | 
110 |         return
111 | 
112 |     def attach_destination(self, dest_path):
113 |         """Attach Destination
114 | 
115 |         Attaches a destination file to the code generator and prepares the
116 |         file for writing.
117 | 
118 |         Arguments:
119 |             dest_path: The path to the destination file to write.
120 | 
121 |         Returns:
122 |             True on success, False otherwise.
123 |         """
124 |         # The target file was attached, store the path
125 |         self._dest_path = dest_path
126 | 
127 |         return True
128 | 
129 |     def generate_header(self):
130 |         """Generate Code Header
131 | 
132 |         Adds all header code to the generated code buffer.
133 |         """
134 |         code = [
135 |             '#include <stdio.h>',
136 |             '#include <string.h>',
137 |             '',
138 |             '#define MM_SIZE  %d' % self._mm_size,
139 |             '#define R_SIZE   %d' % self._reg_size,
140 |             '#define BUF_SIZE %d' % self._buf_size,
141 |             '',
142 |             '// Define register locations of stack/frame ptr',
143 |             '#define SP       %d' % self._SP,
144 |             '#define FP       %d' % self._FP,
145 |             '#define HP       %d' % self._HP,
146 |             '',
147 |             'int main(void)',
148 |             '{',
149 |             '// Allocate main memory and register space',
150 |             'int MM[MM_SIZE];',
151 |             'int R[R_SIZE];',
152 |             '',
153 |             '// SP and FP start at the top of MM',
154 |             'R[SP] = MM_SIZE - 1;',
155 |             'R[FP] = MM_SIZE - 1;',
156 |             '',
157 |             '// HP starts at the bottom of MM',
158 |             'R[HP] = 0;',
159 |             '',
160 |             '// Allocate float registers',
161 |             'float R_FLOAT_1;',
162 |             'float R_FLOAT_2;',
163 |             '',
164 |             '// Allocate space for a string buffer',
165 |             'char STR_BUF[BUF_SIZE];',
166 |             '',
167 |             '////////////////////////////////////////////////////',
168 |             '// PROGRAM START',
169 |             '',
170 |         ]
171 | 
172 |         self.generate('\n'.join(code), tabs=0)
173 | 
174 |         return
175 | 
176 |     def generate_footer(self):
177 |         """Generate Code Footer
178 | 
179 |         Adds all footer code to the generated code buffer.
180 |         """
181 |         code = [
182 |             '',
183 |             '    // Jump to the program exit',
184 |             '    goto *(void*)MM[R[FP]];',
185 |             '',
186 |             '////////////////////////////////////////////////////',
187 |             '// RUNTIME FUNCTIONS',
188 |             '',
189 |             'putString_1:',
190 |             '    R[0] = MM[R[FP]+2];',
191 |             '    printf("%s\\n", (char*)R[0]);',
192 |             '    R[0] = MM[R[FP]];',
193 |             '    goto *(void*)R[0];',
194 |             '',
195 |             'getString_1:',
196 |             '    fgets(STR_BUF, BUF_SIZE, stdin);',
197 |             '    R[0] = strlen(STR_BUF) + 1;',
198 |             '    memcpy(&MM[R[HP]], &STR_BUF, R[0]);',
199 |             '    MM[R[FP]+2] = (int)((char*)&MM[R[HP]]);',
200 |             '    R[HP] = R[HP] + R[0];',
201 |             '    R[0] = MM[R[FP]];',
202 |             '    goto *(void*)R[0];',
203 |             '',
204 |             'putBool_1:',
205 |             '    R[0] = MM[R[FP]+2];',
206 |             '    printf("%s\\n", R[0] ? "true" : "false");',
207 |             '    R[0] = MM[R[FP]];',
208 |             '    goto *(void*)R[0];',
209 |             '',
210 |             'getBool_1:',
211 |             '    scanf("%d", &R[0]);',
212 |             '    R[0] = R[0] ? 1 : 0;',
213 |             '    MM[R[FP]+2] = R[0];',
214 |             '    R[0] = MM[R[FP]];',
215 |             '    goto *(void*)R[0];',
216 |             '',
217 |             'putInteger_1:',
218 |             '    R[0] = MM[R[FP]+2];',
219 |             '    printf("%d\\n", R[0]);',
220 |             '    R[0] = MM[R[FP]];',
221 |             '    goto *(void*)R[0];',
222 |             '',
223 |             'getInteger_1:',
224 |             '    scanf("%d", &R[0]);',
225 |             '    MM[R[FP]+2] = R[0];',
226 |             '    R[0] = MM[R[FP]];',
227 |             '    goto *(void*)R[0];',
228 |             '',
229 |             'putFloat_1:',
230 |             '    R[0] = MM[R[FP]+2];',
231 |             '    memcpy(&R_FLOAT_1, &R[0], sizeof(float));',
232 |             '    printf("%g\\n", R_FLOAT_1);',
233 |             '    R[0] = MM[R[FP]];',
234 |             '    goto *(void*)R[0];',
235 |             '',
236 |             'getFloat_1:',
237 |             '    scanf("%f", &R_FLOAT_1);',
238 |             '    memcpy(&R[0], &R_FLOAT_1, sizeof(float));',
239 |             '    MM[R[FP]+2] = R[0];',
240 |             '    R[0] = MM[R[FP]];',
241 |             '    goto *(void*)R[0];',
242 |             '}',
243 |         ]
244 | 
245 |         self.generate('\n'.join(code), tabs=0)
246 | 
247 |         return
248 | 
249 |     def generate(self, code, tabs=-1):
250 |         """Generate Code
251 |         
252 |         Adds the given code to the generated code and automatically formats
253 |         it with the appropriate tabs and ending newline.
254 | 
255 |         Arguments:
256 |             code: The code to add to the generated code buffer.
257 |             tabs: A manual override to determine the number of tabs to place
258 |                 in this line of code. If -1, then the number of tabs used will
259 |                 correspond to the tab location from tab_push() and tab_pop()
260 |                 methods. (Default: -1)
261 |         """
262 |         tabs = tabs if tabs != -1 else self._tab_count
263 |         self._generated_code += ('    ' * tabs) + code + '\n'
264 | 
265 |         return
266 | 
267 |     def comment(self, text, is_displayed=False):
268 |         """Generate Comment
269 | 
270 |         Adds a comment to the generated code.
271 | 
272 |         Arguments:
273 |             text: The text to display in the comment.
274 |             is_displayed: If True, the comment is written to the generated
275 |                 code. (Default: False)
276 |         """
277 |         if is_displayed:
278 |             self.generate('// %s' % text)
279 | 
280 |         return
281 | 
282 |     def tab_push(self):
283 |         """Tab Push
284 | 
285 |         Pushes the tab (increases the indentation by 4 spaces) for pretty
286 |         code output.
287 |         """
288 |         self._tab_count += 1
289 |         return
290 | 
291 |     def tab_pop(self):
292 |         """Tab Pop
293 | 
294 |         Pops the tab (decreases the indentation by 4 spaces) for pretty code
295 |         output.
296 |         """
297 |         self._tab_count -= 1 if self._tab_count != 0 else 0
298 |         return
299 | 
300 |     def commit(self):
301 |         """Commit Code Generation
302 | 
303 |         Writes the generated code to the destination output file for
304 |         intermediate code if the source is parsed without fatal errors.
305 | 
306 |         Returns:
307 |             True if file is successfully written, False otherwise.
308 |         """
309 |         try:
310 |             with open(self._dest_path, 'w+') as f:
311 |                 f.write(self._generated_code)
312 |         except IOError as e:
313 |             print('Error: "%s"' % self._dest_path)
314 |             print('    Could not write to destination file: %s' % e.strerror)
315 |             return False
316 | 
317 |         return True
318 | 
319 |     def get_mm(self, id_size, is_param=False):
320 |         """Get Memory Space
321 | 
322 |         Gets a space in memory appropriately depending on if the variable is
323 |         a local variable or a parameter to the scope.
324 | 
325 |         Arguments:
326 |             id_size: The size of the parameter to allocate (used for arrays).
327 |             is_param: True if the identifier is a parameter, False if local or
328 |                 global variable. (Default: False)
329 | 
330 |         Returns:
331 |             An integer denoting the offset corresponding to a stack landmark
332 |             depending on the type of variable. For example, local variables
333 |             and params are offset by the current FP in different directions
334 |             while global variables are offset by the top of main memory.
335 |             See the documentation in README for stack details.
336 |         """
337 |         # Determine size of the identifier
338 |         mem_needed = int(id_size) if id_size is not None else 1
339 |         
340 |         if is_param:
341 |             var_loc = self._param_ptr
342 |             self._param_ptr += mem_needed
343 |         else:
344 |             # Allocate memory in the local variable space
345 |             var_loc = self._local_ptr
346 |             self._local_ptr += mem_needed
347 | 
348 |         return var_loc
349 | 
350 |     def reset_local_ptr(self):
351 |         """Reset Local Pointer
352 | 
353 |         Resets the pointer to the current scope's local variable portion of
354 |         the stack. This is used to properly allocate space for the local
355 |         variables at the start of the scope.
356 |         """
357 |         self._local_ptr = 1
358 |         return
359 | 
360 |     def reset_param_ptr(self):
361 |         """Reset Param Pointer
362 | 
363 |         Resets the pointer to the current scope's parameter portion of the
364 |         stack. This is necessary to properly allocate space for the parameters
365 |         as they are being pushed onto the stack.
366 |         """
367 |         self._param_ptr = 1
368 |         return
369 | 
370 |     def get_reg(self, inc=True):
371 |         """Get Register
372 | 
373 |         Gets new, unused register from the register list.
374 | 
375 |         Arguments:
376 |             inc: If True, a new register will be returned. If False, the last
377 |                 register allocated will be returned.
378 | 
379 |         Returns:
380 |             An integer denoting the register number. The register may then be
381 |             referenced as follows: R[<reg_num>]
382 |         """
383 |         # Increment the register if we're getting a brand new one
384 |         self._reg += 1 if inc else 0
385 | 
386 |         return self._reg
387 | 
388 |     def get_label_id(self):
389 |         """Get Label Id
390 | 
391 |         Gets a label id so that no conflicts occur between procedures with
392 |         the same name in difference scopes.
393 | 
394 |         Returns:
395 |             A label id to append to the procedure label.
396 |         """
397 |         self._label_id += 1
398 | 
399 |         return self._label_id
400 | 
401 |     def get_unique_call_id(self):
402 |         """Get Unique Call Id
403 | 
404 |         Gets a unique call id so that no conflicts occur between return
405 |         labels for procedures with multiple calls.
406 | 
407 |         Returns:
408 |             A unique id to append to the procedure return label.
409 |         """
410 |         self._unique_id += 1
411 | 
412 |         return self._unique_id
413 | 
414 |     def generate_program_entry(self, program_name, program_num, debug):
415 |         """Generate Program Entry
416 | 
417 |         Generates the code associated with managing the entry point for the
418 |         program. This involves pushing the program return address onto the
419 |         stack, jumping to the entry point, and creating the program exit
420 |         section.
421 | 
422 |         Arguments:
423 |             program_name: The name of the program.
424 |             program_num: The label id of the program.
425 |             debug: Determines if comments should be written to the code.
426 |         """
427 |         # Push the return address onto the stack
428 |         self.comment('Setting program return address', debug)
429 |         self.generate('MM[R[FP]] = (int)&&%s_%d_end;' %
430 |                       (program_name, program_num))
431 | 
432 |         # Make the jump to the entry point
433 |         self.generate('goto %s_%d_begin;' % (program_name, program_num))
434 | 
435 |         # Make the main program return
436 |         self.generate('')
437 |         self.comment('Creating the program exit point', debug)
438 |         self.generate('%s_%d_end:' % (program_name, program_num))
439 |         self.tab_push()
440 |         self.generate('return 0;')
441 |         self.tab_pop()
442 |         self.generate('')
443 | 
444 |         return
445 | 
446 |     def generate_procedure_call(self, procedure_name, procedure_num, debug):
447 |         """Generate Procedure Call
448 | 
449 |         Generates the code associated with managing the stack before and
450 |         after a procedure call. Note that this does not include param
451 |         pushing and popping operations.
452 | 
453 |         Arguments:
454 |             procedure_name: The name of the procedure to call.
455 |             procedure_num: The label id of the procedure to call.
456 |             debug: Determines if comments should be written to the code.
457 |         """
458 |         # Save the FP to the stack. Set next FP to return address
459 |         self.comment('Setting caller FP', debug)
460 |         self.generate('R[SP] = R[SP] - 1;')
461 |         self.generate('MM[R[SP]] = R[FP];')
462 |         self.comment('Setting return address (current FP)', debug)
463 |         self.generate('R[SP] = R[SP] - 1;')
464 |         self.generate('R[FP] = R[SP];')
465 | 
466 |         # Generate a new call number so multiple calls do not cause collisions
467 |         call_number = self.get_unique_call_id()
468 | 
469 |         # Push the return address onto the stack
470 |         self.generate('MM[R[SP]] = (int)&&%s_%d_%d;' %
471 |                 (procedure_name, procedure_num, call_number))
472 |                 
473 |         # Make the jump to the function call
474 |         self.generate('goto %s_%d;' % (procedure_name, procedure_num))
475 | 
476 |         # Generate the return label
477 |         self.generate('%s_%d_%d:' % (procedure_name, procedure_num, call_number))
478 | 
479 |         # The SP now points to the return address. Restore the old FP
480 |         self.comment('Restore caller FP', debug)
481 |         self.generate('R[SP] = R[SP] + 1;')
482 |         self.generate('R[FP] = MM[R[SP]];')
483 | 
484 |         return
485 | 
486 |     def generate_procedure_call_end(self, debug):
487 |         """Generate Procedure Call End
488 | 
489 |         Generates code to leave the procedure on the stack by pushing the
490 |         stack to the lower scope's local stack.
491 | 
492 |         Arguments:
493 |             debug: Determines if comments are to be written in generated code.
494 |         """
495 |         self.comment('Move to caller local stack', debug)
496 | 
497 |         # Finalize the function call. Move the SP off the param list
498 |         self.generate('R[SP] = R[SP] + 1;')
499 | 
500 |         return
501 | 
502 |     def _generate_get_id_in_mm(self, id_obj, id_location, idx_reg, debug):
503 |         """Generate Get Identifier in Main Memory (Protected)
504 | 
505 |         Knowing the location in the stack and the offset (mm_ptr) value of
506 |         a given index, code is generated to calculate the exact location of
507 |         the identifier in main memory.
508 | 
509 |         If identifier is param, offset is the parameter offset.
510 |         If identifier is local, offset is the local offset.
511 |         If identifier is global, offset is the local offset of program scope.
512 | 
513 |         Arguments:
514 |             id_obj: The Identifier class object containing id data.
515 |             id_location: Either 'global', 'param', or 'local' depending on the
516 |                 location in the stack where the identifier resides.
517 |             idx_reg: The register number of the index expression.
518 |             debug: Determines if comments are to be written in generated code.
519 | 
520 |         Returns:
521 |             The register number of the calculated address of the identifier.
522 |         """
523 |         # Get a new register to calculate the main memory address of this id
524 |         id_reg = self.get_reg()
525 | 
526 |         self.generate('R[%d] = %d;' % (id_reg, id_obj.mm_ptr))
527 | 
528 |         if id_obj.size is not None and idx_reg is not None:
529 |             self.generate('R[%d] = R[%d] + R[%d];' %
530 |                     (id_reg, id_reg, idx_reg))
531 | 
532 |         if id_location == 'param':
533 |             self.comment('Param referenced', debug)
534 |             self.generate('R[%d] = R[FP] + 1 + R[%d];' % (id_reg, id_reg))
535 |         elif id_location == 'global':
536 |             self.comment('Global var referenced', debug)
537 |             self.generate('R[%d] = MM_SIZE - 1 - R[%d];' % (id_reg, id_reg))
538 |         else:
539 |             self.comment('Local var referenced', debug)
540 |             self.generate('R[%d] = R[FP] - R[%d];' % (id_reg, id_reg))
541 | 
542 |         return id_reg
543 | 
544 |     def generate_name(self, id_obj, id_location, idx_reg, debug):
545 |         """Generate Name
546 | 
547 |         Generates all code necessary to place the contents of the memory
548 |         location of a given identifier into a new register for computation.
549 | 
550 |         Arguments:
551 |             id_obj: The Identifier class object containing id data.
552 |             id_location: Either 'global', 'param', or 'local' depending on the
553 |                 location in the stack where the identifier resides.
554 |             idx_reg: The register number of the index expression.
555 |             debug: Determines if comments are to be written in generated code.
556 |         """
557 |         # Calculate the position of the identifier in main memory
558 |         id_reg = self._generate_get_id_in_mm(id_obj, id_location, idx_reg,
559 |                                              debug)
560 | 
561 |         # Retrieve the main memory location and place it in the last register
562 |         self.generate('R[%d] = MM[R[%d]];' % (id_reg, id_reg))
563 | 
564 |         return
565 | 
566 |     def generate_assignment(self, id_obj, id_location, idx_reg, expr_reg,
567 |                             debug):
568 |         """Generate Assignment
569 | 
570 |         Generates all code necessary to place the outcome of an expression
571 |         into the proper location of the identifier in main memory.
572 | 
573 |         Arguments:
574 |             id_obj: The Identifier class object containing id data.
575 |             id_location: Either 'global', 'param', or 'local' depending on the
576 |                 location in the stack where the identifier resides.
577 |             idx_reg: The register number of the index expression.
578 |             expr_reg: The register number of the expression outcome.
579 |             debug: Determines if comments are to be written in generated code.
580 |         """
581 |         # Calculate the position of the identifier in main memory
582 |         id_reg = self._generate_get_id_in_mm(id_obj, id_location, idx_reg,
583 |                                              debug)
584 | 
585 |         # Set the main memory value to the value in the expression register
586 |         self.generate('MM[R[%d]] = R[%d];' % (id_reg, expr_reg))
587 | 
588 |         return
589 | 
590 |     def generate_param_push(self, expr_reg, debug):
591 |         """Generate Param Push
592 | 
593 |         Generates code to push a parameter onto the procedure stack given
594 |         a register containing the expression outcome.
595 | 
596 |         Arguments:
597 |             expr_reg: The register number of the expression outcome.
598 |             debug: Determines if comments are to be written in generated code.
599 |         """
600 |         self.comment('Pushing argument onto the stack', debug)
601 |         self.generate('R[SP] = R[SP] - 1;')
602 |         self.generate('MM[R[SP]] = R[%d];' % expr_reg)
603 | 
604 |         return
605 | 
606 |     def generate_param_pop(self, param_name, debug):
607 |         """Generate Param Pop
608 | 
609 |         Pops a parameter off of the stack (moves the SP) and prints a
610 |         comment stating which parameter this is.
611 | 
612 |         Arguments:
613 |             param_name: The parameter name to display.
614 |             debug: Determines if comments are to be written in generated code.
615 |         """
616 |         self.comment('Popping "%s" param off the stack' % param_name, debug)
617 |                 
618 |         # Move to the next memory space
619 |         self.generate('R[SP] = R[SP] + 1;')
620 | 
621 |         return
622 | 
623 |     def generate_param_store(self, id_obj, id_location, debug):
624 |         """Generate Param Store
625 | 
626 |         Calculates the memory location of the destination and placed the
627 |         value of the popped parameter (at current SP) in that location.
628 | 
629 |         Arguments:
630 |             id_obj: The Identifier class object containing id data.
631 |             id_location: Either 'global', 'param', or 'local' depending on the
632 |                 location in the stack where the identifier resides.
633 |             debug: Determines if comments are to be written in generated code.
634 |         """
635 |         # Calculate the position of the parameter output location in main mem
636 |         id_reg = self._generate_get_id_in_mm(id_obj, id_location, None, debug)
637 | 
638 |         # Store the parameter in the position pointed to by the SP
639 |         self.generate('MM[R[%d]] = MM[R[SP]];' % id_reg)
640 | 
641 |         return
642 | 
643 |     def generate_number(self, number, token_type, negate):
644 |         """Generate Number
645 | 
646 |         Generates the code to store a parsed number in a new register.
647 | 
648 |         Arguments:
649 |             number: The parsed number value (this is a string representation).
650 |             token_type: The type of the number (either 'integer' or 'float')
651 |             negate: A boolean to determine whether or not to negate the value.
652 |         """
653 |         reg = self.get_reg()
654 | 
655 |         if token_type == 'integer':
656 |             # This is an integer value, set it to the register
657 |             if negate:
658 |                 self.generate('R[%d] = -%s;' % (reg, number))
659 |             else:
660 |                 self.generate('R[%d] = %s;' % (reg, number))
661 |         else:
662 |             # This is a float value, place it in the float buffer and copy it
663 |             # to the register
664 |             if negate:
665 |                 self.generate('R_FLOAT_1 = -%s;' % number)
666 |             else:
667 |                 self.generate('R_FLOAT_1 = %s;' % number)
668 | 
669 |             self.generate('memcpy(&R[%d], &R_FLOAT_1, sizeof(float));' % reg)
670 | 
671 |         return
672 | 
673 |     def generate_return(self, debug):
674 |         """Generate Return Statement
675 | 
676 |         Generates code for all operations needed to move to the scope return
677 |         address and execute the jump to the caller scope.
678 | 
679 |         Arguments:
680 |             debug: Determines if comments should be displayed or not.
681 |         """
682 |         # Smash the local stack
683 |         self.comment('Moving SP to FP (return address)', debug)
684 |         self.generate('R[SP] = R[FP];')
685 | 
686 |         # Go to the return label to exit the procedure
687 |         self.comment('Return to calling function', debug)
688 |         self.generate('goto *(void*)MM[R[FP]];')
689 | 
690 |         return
691 | 
692 |     def generate_operation(self, reg1, type1, reg2, type2, operation):
693 |         """Generate Operation
694 | 
695 |         Given an operation and operand registers with their types, code is
696 |         generated to perform these operations.
697 | 
698 |         Arguments:
699 |             reg1: The register of the first operand.
700 |             type1: The type of the first operand.
701 |             reg2: The register of the second operand.
702 |             type2: The type of the second operand.
703 |             operation: The operation symbol to perform.
704 | 
705 |         Returns:
706 |             The register number where the result of the operation
707 |             is stored.
708 |         """
709 |         # Get a register to hold the operation result
710 |         result = self.get_reg()
711 | 
712 |         if type1 != 'float' and type2 != 'float':
713 |             self.generate('R[%d] = R[%d] %s R[%d];' %
714 |                           (result, reg1, operation, reg2))
715 |             return result
716 | 
717 |         if type1 != 'float':
718 |             self.generate('R_FLOAT_1 = R[%d];' % reg1)
719 |         else:
720 |             self.generate('memcpy(&R_FLOAT_1, &R[%d], sizeof(float));' % reg1)
721 | 
722 |         if type2 != 'float':
723 |             self.generate('R_FLOAT_2 = R[%d];' % reg2)
724 |         else:
725 |             self.generate('memcpy(&R_FLOAT_2, &R[%d], sizeof(float));' % reg2)
726 | 
727 |         self.generate('R_FLOAT_1 = R_FLOAT_1 %s R_FLOAT_2;' % operation)
728 |         self.generate('memcpy(&R[%d], &R_FLOAT_1, sizeof(float));' % result)
729 |         
730 |         return result
731 | 


--------------------------------------------------------------------------------
/lib/parser.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python3
   2 | 
   3 | """Parser module
   4 | 
   5 | Inherits the Scanner module and parses the attached file's tokens as they are
   6 | encountered with the target grammar. Code is then generated and written to the
   7 | given destination file.
   8 | 
   9 | Author: Evan Sneath
  10 | License: Open Software License v3.0
  11 | 
  12 | Classes:
  13 |     Parser: An implementation of a parser for the source language.
  14 | """
  15 | 
  16 | from lib.errors import *
  17 | from lib.datatypes import Identifier, Parameter, IdentifierTable
  18 | 
  19 | from lib.scanner import Scanner
  20 | from lib.codegenerator import CodeGenerator
  21 | 
  22 | 
  23 | class Parser(Scanner, CodeGenerator):
  24 |     """Parser class
  25 | 
  26 |     Parses the given source file using the defined language structure.
  27 | 
  28 |     Inherits:
  29 |         Scanner: The lexer component of the compiler.
  30 |         CodeGenerator: The class responsible for output file abstraction.
  31 | 
  32 |     Attributes:
  33 |         debug: Boolean attribute denoting if successfully parsed tokens should
  34 |             be displayed as they are encountered and parsed.
  35 | 
  36 |     Methods:
  37 |         parse: Parses the given file until a terminal error is encountered or
  38 |             the end-of-file token is reached.
  39 |     """
  40 |     def __init__(self, debug=False):
  41 |         super().__init__()
  42 | 
  43 |         # Public class attributes
  44 |         self.debug = debug
  45 | 
  46 |         # Define the previous, current, and future token holder
  47 |         self._previous = None
  48 |         self._current = None
  49 |         self._future = None
  50 | 
  51 |         # Define the identifier table to hold all var/program/procedure names
  52 |         self._ids = IdentifierTable()
  53 | 
  54 |         self._has_errors = False
  55 | 
  56 |         return
  57 | 
  58 |     def parse(self, src_path, dest_path):
  59 |         """Begin Parsing
  60 | 
  61 |         Begins the parse of the inputted source file.
  62 | 
  63 |         Arguments:
  64 |             src_path: The input source file to parse.
  65 |             dest_path: The output target file to write.
  66 | 
  67 |         Returns:
  68 |             True on success, False otherwise.
  69 |         """
  70 |         # Attach the source file for reading
  71 |         if not self.attach_source(src_path):
  72 |             return False
  73 | 
  74 |         # Attach the destination file for writing
  75 |         if not self.attach_destination(dest_path):
  76 |             return False
  77 | 
  78 |         # Advance the tokens twice to populate both current and future tokens
  79 |         self._advance_token()
  80 |         self._advance_token()
  81 | 
  82 |         # Add all runtime functions
  83 |         self._add_runtime()
  84 | 
  85 |         # Generate the compiled code header to handle runtime overhead
  86 |         self.generate_header()
  87 | 
  88 |         # Begin parsing the root <program> language structure
  89 |         try:
  90 |             self._parse_program()
  91 |         except ParserSyntaxError:
  92 |             return False
  93 | 
  94 |         # Generate the compiled code footer
  95 |         self.generate_footer()
  96 | 
  97 |         # Make sure there's no junk after the end of program
  98 |         if not self._check('eof'):
  99 |             self._warning('eof', '')
 100 | 
 101 |         # If errors were encountered, don't write code
 102 |         if self._has_errors:
 103 |             return False
 104 | 
 105 |         # Commit the code buffer to the output code file
 106 |         self.commit()
 107 | 
 108 |         return True
 109 | 
 110 |     def _add_runtime(self):
 111 |         """Add Runtime Functions
 112 | 
 113 |         Adds each runtime function to the list of global functions.
 114 |         """
 115 |         # The runtime_functions list is defined in the CodeGenerator class
 116 |         for func_name in self.runtime_functions:
 117 |             # Get all parameters for these functions
 118 |             param_ids = []
 119 |             param_list = self.runtime_functions[func_name]
 120 |             for index, param in enumerate(param_list):
 121 |                 # Build up each param, add it to the list
 122 |                 id_obj = Identifier(name=param[0], type=param[1], size=None,
 123 |                                     params=None, mm_ptr=(index+1))
 124 |                 p_obj = Parameter(id=id_obj, direction=param[2])
 125 |                 param_ids.append(p_obj)
 126 | 
 127 |             # Build the function's identifier
 128 |             func_id = Identifier(name=func_name, type='procedure', size=None, 
 129 |                                  params=param_ids, mm_ptr=1)
 130 | 
 131 |             # Add the function to the global scope of the identifier table
 132 |             self._ids.add(func_id, is_global=True)
 133 | 
 134 |         return
 135 | 
 136 |     def _warning(self, msg, line, prefix='Warning'):
 137 |         """Print Parser Warning Message (Protected)
 138 | 
 139 |         Prints a parser warning message with details about the expected token
 140 |         and the current token being parsed.
 141 | 
 142 |         Arguments:
 143 |             msg: The warning message to display.
 144 |             line: The line where the warning has occurred.
 145 |             prefix: A string value to be printed at the start of the warning.
 146 |                 Overwritten for error messages. (Default: 'Warning')
 147 |         """
 148 |         print('%s: "%s", line %d' % (prefix, self._src_path, line))
 149 |         print('    %s' % msg)
 150 |         print('    %s' % self._get_line(line))
 151 | 
 152 |         return
 153 | 
 154 |     def _syntax_error(self, expected):
 155 |         """Print Syntax Error Message (Protected)
 156 | 
 157 |         Prints a syntax error message with details about the expected token
 158 |         and the current token being parsed. After error printing, an exception
 159 |         is raised to be caught and resolved by parent nodes.
 160 | 
 161 |         Arguments:
 162 |             expected: A string containing the expected token type/value.
 163 | 
 164 |         Raises:
 165 |             ParserSyntaxError: If this method is being called, an error has been
 166 |                 encountered during parsing.
 167 |         """
 168 |         token = self._current
 169 | 
 170 |         # Print the error message
 171 |         msg = ('Expected %s, encountered "%s" (%s)' %
 172 |                (expected, token.value, token.type))
 173 |         self._warning(msg, token.line, prefix='Error')
 174 | 
 175 |         self._has_errors = True
 176 |         raise ParserSyntaxError()
 177 | 
 178 |     def _name_error(self, msg, name, line):
 179 |         """Print Name Error Message (Protected)
 180 | 
 181 |         Prints a name error message with details about the encountered
 182 |         identifier which caused the error.
 183 | 
 184 |         Arguments:
 185 |             msg: The reason for the error.
 186 |             name: The name of the identifier where the name error occurred.
 187 |             line: The line where the name error occurred.
 188 |         """
 189 |         msg = '%s: %s' % (name, msg)
 190 |         self._warning(msg, line, prefix='Error')
 191 | 
 192 |         self._has_errors = True
 193 |         return
 194 | 
 195 |     def _type_error(self, expected, encountered, line):
 196 |         """Print Type Error Message (Protected)
 197 | 
 198 |         Prints a type error message with details about the expected type an
 199 |         the type that was encountered.
 200 | 
 201 |         Arguments:
 202 |             expected: A string containing the expected token type.
 203 |             encountered: A string containing the type encountered.
 204 |             line: The line on which the type error occurred.
 205 |         """
 206 |         msg = 'Expected %s type, encountered %s' % (expected, encountered)
 207 |         self._warning(msg, line, prefix='Error')
 208 | 
 209 |         self._has_errors = True
 210 |         return
 211 | 
 212 |     def _runtime_error(self, msg, line):
 213 |         """Print Runtime Error Message (Protected)
 214 | 
 215 |         Prints a runtime error message with details about the runtime error.
 216 | 
 217 |         Arguments:
 218 |             msg: The reason for the error.
 219 |             line: The line where the runtime error occurred.
 220 |         """
 221 |         self._warning(msg, line, prefix='Error')
 222 | 
 223 |         self._has_errors = True
 224 |         return
 225 | 
 226 |     def _advance_token(self):
 227 |         """Advance Tokens (Protected)
 228 | 
 229 |         Populates the 'current' token with the 'future' token and populates
 230 |         the 'future' token with the next token in the source file.
 231 |         """
 232 |         self._previous = self._current
 233 |         self._current = self._future
 234 | 
 235 |         if self._future is None or self._future.type != 'eof':
 236 |             self._future = self.next_token()
 237 | 
 238 |         return
 239 | 
 240 |     def _check(self, expected_type, expected_value=None, check_future=False):
 241 |         """Check Token (Protected)
 242 | 
 243 |         Peeks at the token to see if the current token matches the given
 244 |         type and value. If it doesn't, don't make a big deal about it.
 245 | 
 246 |         Arguments:
 247 |             expected_type: The expected type of the token.
 248 |             expected_value: The expected value of the token. (Default: None)
 249 |             check_future: If True, the future token is checked (Default: False)
 250 | 
 251 |         Returns:
 252 |             True if the token matches the expected value, False otherwise.
 253 |         """
 254 |         token = self._current
 255 | 
 256 |         if check_future:
 257 |             token = self._future
 258 | 
 259 |         return (token.type == expected_type and
 260 |                (token.value == expected_value or expected_value is None))
 261 | 
 262 |     def _accept(self, expected_type, expected_value=None):
 263 |         """Accept Token (Protected)
 264 | 
 265 |         Compares the token to an expected type and value. If it matches, then
 266 |         consume the token. If not, don't make a big deal about it.
 267 | 
 268 |         Arguments:
 269 |             expected_type: The expected type of the token.
 270 |             expected_value: The expected value of the token. (Default: None)
 271 | 
 272 |         Returns:
 273 |             True if the token matches the expected value, False otherwise.
 274 |         """
 275 |         if self._check(expected_type, expected_value):
 276 |             self._advance_token()
 277 |             return True
 278 | 
 279 |         return False
 280 | 
 281 |     def _match(self, expected_type, expected_value=None):
 282 |         """Match Token (Protected)
 283 | 
 284 |         Compares the token to an expected type and value. If it matches, then
 285 |         consume the token. If not, then throw an error and panic.
 286 | 
 287 |         Arguments:
 288 |             expected_type: The expected type of the token.
 289 |             expected_value: The expected value of the token. (Default: None)
 290 | 
 291 |         Returns:
 292 |             The matched Token class object if successful.
 293 |         """
 294 |         # Check the id_type, if we specified debug, print everything matched
 295 |         if self._accept(expected_type, expected_value):
 296 |             return self._previous
 297 | 
 298 |         # Something different than expected was encountered
 299 |         if expected_value is not None:
 300 |             self._syntax_error('"'+expected_value+'" ('+expected_type+')')
 301 |         else:
 302 |             self._syntax_error(expected_type)
 303 | 
 304 |     def _resync_at_token(self, token_type, token_value=None):
 305 |         """Resync at Token
 306 | 
 307 |         Finds the next token of the given type and value and moves the
 308 |         current token to that point. Code parsing can continue from there.
 309 | 
 310 |         Arguments:
 311 |             token_type: The id_type of the token to resync.
 312 |             token_value: The value of the token to resync. (Default: None)
 313 |         """
 314 |         while not self._check(token_type, token_value):
 315 |             self._advance_token()
 316 | 
 317 |         return
 318 | 
 319 |     def _parse_program(self):
 320 |         """<program> (Protected)
 321 | 
 322 |         Parses the <program> language structure.
 323 | 
 324 |             <program> ::=
 325 |                 <program_header> <program_body>
 326 |         """
 327 |         id_obj = self._parse_program_header()
 328 |         self._parse_program_body(id_obj)
 329 | 
 330 |         return
 331 | 
 332 |     def _parse_program_header(self):
 333 |         """<program_header> (Protected)
 334 | 
 335 |         Parses the <program_header> language structure.
 336 | 
 337 |             <program_header> ::=
 338 |                 'program' <identifier> 'is'
 339 | 
 340 |         Returns:
 341 |             The id object with information about the procedure identifier.
 342 |         """
 343 |         self._match('keyword', 'program')
 344 | 
 345 |         id_name = self._current.value
 346 |         self._match('identifier')
 347 | 
 348 |         # Generate procedure label. This will be stored with the identifier
 349 |         # in place of the mm_ptr attribute since it will not be used
 350 |         label_id = self.get_label_id()
 351 | 
 352 |         # Add the new identifier to the global table
 353 |         id_obj = Identifier(id_name, 'program', None, None, label_id)
 354 |         self._ids.add(id_obj, is_global=True)
 355 | 
 356 |         self._match('keyword', 'is')
 357 | 
 358 |         # Generate the program entry point code
 359 |         self.generate_program_entry(id_obj.name, id_obj.mm_ptr, self.debug)
 360 | 
 361 |         # Push the scope to the program body level
 362 |         self._ids.push_scope(id_obj.name)
 363 | 
 364 |         # Add the program to the base scope so it can be resolved as owner
 365 |         self._ids.add(id_obj)
 366 | 
 367 |         return id_obj
 368 | 
 369 |     def _parse_program_body(self, program_id):
 370 |         """<program_body> (Protected)
 371 | 
 372 |         Parses the <program_body> language structure.
 373 | 
 374 |             <program_body> ::=
 375 |                     ( <declaration> ';' )*
 376 |                 'begin'
 377 |                     ( <statement> ';' )*
 378 |                 'end' 'program'
 379 | 
 380 |         Arguments:
 381 |             program_id: The identifier object for the program.
 382 |         """
 383 |         local_var_size = 0
 384 | 
 385 |         while not self._accept('keyword', 'begin'):
 386 |             try:
 387 |                 size = self._parse_declaration()
 388 | 
 389 |                 if size is not None:
 390 |                     local_var_size += int(size)
 391 |             except ParserError:
 392 |                 self._resync_at_token('symbol', ';')
 393 | 
 394 |             self._match('symbol', ';')
 395 | 
 396 |         # Label the entry point for the program
 397 |         self.generate('%s_%d_begin:' % (program_id.name, program_id.mm_ptr))
 398 |         self.tab_push()
 399 | 
 400 |         if local_var_size != 0:
 401 |             self.comment('Allocating space for local variables', self.debug)
 402 |             self.generate('R[SP] = R[SP] - %d;' % local_var_size)
 403 | 
 404 |         while not self._accept('keyword', 'end'):
 405 |             try:
 406 |                 self._parse_statement()
 407 |             except ParserError:
 408 |                 self._resync_at_token('symbol', ';')
 409 | 
 410 |             self._match('symbol', ';')
 411 | 
 412 |         self._match('keyword', 'program')
 413 | 
 414 |         # Pop out of the program body scope
 415 |         self._ids.pop_scope()
 416 |         self.tab_pop()
 417 | 
 418 |         return
 419 | 
 420 |     def _parse_declaration(self):
 421 |         """<declaration> (Protected)
 422 | 
 423 |         Parses the <declaration> language structure.
 424 | 
 425 |             <declaration> ::=
 426 |                 [ 'global' ] <procedure_declaration>
 427 |                 [ 'global' ] <variable_declaration>
 428 | 
 429 |         Returns:
 430 |             The size of any variable declared. None if procedure.
 431 |         """
 432 |         is_global = False
 433 | 
 434 |         id_obj = None
 435 |         size = None
 436 | 
 437 |         if self._accept('keyword', 'global'):
 438 |             is_global = True
 439 | 
 440 |         if self._first_procedure_declaration():
 441 |             self._parse_procedure_declaration(is_global=is_global)
 442 |         elif self._first_variable_declaration():
 443 |             id_obj = self._parse_variable_declaration(is_global=is_global)
 444 |         else:
 445 |             self._syntax_error('procedure or variable declaration')
 446 | 
 447 |         if id_obj is not None:
 448 |             size = id_obj.size if id_obj.size is not None else 1
 449 | 
 450 |         return size
 451 | 
 452 |     def _first_variable_declaration(self):
 453 |         """first(<variable_declaration>) (Protected)
 454 | 
 455 |         Determines if current token matches the first terminals.
 456 | 
 457 |             first(<variable_declaration>) ::=
 458 |                 integer | float | bool | string
 459 | 
 460 |         Returns:
 461 |             True if current token matches a first terminal, False otherwise.
 462 |         """
 463 |         return (self._check('keyword', 'integer') or
 464 |                 self._check('keyword', 'float') or
 465 |                 self._check('keyword', 'bool') or
 466 |                 self._check('keyword', 'string'))
 467 | 
 468 |     def _parse_variable_declaration(self, is_global=False, is_param=False):
 469 |         """<variable_declaration> (Protected)
 470 | 
 471 |         Parses the <variable_declaration> language structure.
 472 | 
 473 |             <variable_declaration> ::=
 474 |                 <type_mark> <identifier> [ '[' <array_size> ']' ]
 475 | 
 476 |         Arguments:
 477 |             is_global: Denotes if the variable is to be globally scoped.
 478 |                 (Default: False)
 479 |             id_table_add: Denotes if the variable is to be added to the
 480 |                 identifier table.
 481 | 
 482 |         Returns:
 483 |             The Identifier class object of the variable encountered.
 484 |         """
 485 |         id_type = self._parse_type_mark()
 486 | 
 487 |         # Stores the array size of the variable
 488 |         var_size = None
 489 | 
 490 |         # Formally match the token to an identifier type
 491 |         var_token = self._match('identifier')
 492 | 
 493 |         if self._accept('symbol', '['):
 494 |             index_type = self._parse_number(generate_code=False)
 495 | 
 496 |             var_size = self._previous.value
 497 |             index_line = self._previous.line
 498 | 
 499 |             # Check the type to make sure this is an integer so that we can
 500 |             # allocate memory appropriately
 501 |             if  index_type != 'integer':
 502 |                 self._type_error('integer', index_type, index_line)
 503 |                 raise ParserTypeError()
 504 | 
 505 |             self._match('symbol', ']')
 506 | 
 507 |         # Get the memory space pointer for this variable.
 508 |         mm_ptr = self.get_mm(var_size, is_param=is_param)
 509 | 
 510 |         # The declaration was valid, add the identifier to the table
 511 |         id_obj = Identifier(var_token.value, id_type, var_size, None, mm_ptr)
 512 | 
 513 |         if not is_param:
 514 |             try:
 515 |                 self._ids.add(id_obj, is_global=is_global)
 516 |             except ParserNameError as e:
 517 |                 self._name_error(str(e),
 518 |                                  var_token.value, var_token.line)
 519 | 
 520 |         return id_obj
 521 | 
 522 |     def _parse_type_mark(self):
 523 |         """<type_mark> (Protected)
 524 | 
 525 |         Parses <type_mark> language structure.
 526 | 
 527 |             <type_mark> ::=
 528 |                 'integer' |
 529 |                 'float' |
 530 |                 'bool' |
 531 |                 'string'
 532 | 
 533 |         Returns:
 534 |             Type (as string) of the variable being declared.
 535 |         """
 536 |         id_type = None
 537 | 
 538 |         if self._accept('keyword', 'integer'):
 539 |             id_type = 'integer'
 540 |         elif self._accept('keyword', 'float'):
 541 |             id_type = 'float'
 542 |         elif self._accept('keyword', 'bool'):
 543 |             id_type = 'bool'
 544 |         elif self._accept('keyword', 'string'):
 545 |             id_type = 'string'
 546 |         else:
 547 |             self._syntax_error('variable type')
 548 | 
 549 |         return id_type
 550 | 
 551 |     def _first_procedure_declaration(self):
 552 |         """first(<procedure_declarations>) (Protected)
 553 | 
 554 |         Determines if current token matches the first terminals.
 555 | 
 556 |             first(<procedure_declaration>) ::=
 557 |                 'procedure'
 558 | 
 559 |         Returns:
 560 |             True if current token matches a first terminal, False otherwise.
 561 |         """
 562 |         return self._check('keyword', 'procedure')
 563 | 
 564 |     def _parse_procedure_declaration(self, is_global):
 565 |         """<procedure_declaration> (Protected)
 566 | 
 567 |         Parses the <procedure_declaration> language structure.
 568 | 
 569 |             <procedure_declaration> ::=
 570 |                 <procedure_header> <procedure_body>
 571 | 
 572 |         Arguments:
 573 |             is_global: Denotes if the procedure is to be globally scoped.
 574 |         """
 575 |         id_obj = self._parse_procedure_header(is_global=is_global)
 576 |         self._parse_procedure_body(id_obj)
 577 | 
 578 |         return
 579 | 
 580 |     def _parse_procedure_header(self, is_global):
 581 |         """<procedure_header> (Protected)
 582 | 
 583 |         Parses the <procedure_header> language structure.
 584 | 
 585 |             <procedure_header> ::=
 586 |                 'procedure' <identifier> '(' [ <parameter_list> ] ')'
 587 | 
 588 |         Arguments:
 589 |             is_global: Denotes if the procedure is to be globally scoped.
 590 |         """
 591 |         self._match('keyword', 'procedure')
 592 | 
 593 |         id_name = self._current.value
 594 |         id_line = self._current.line
 595 | 
 596 |         self._match('identifier')
 597 |         self._match('symbol', '(')
 598 | 
 599 |         params = []
 600 | 
 601 |         if not self._check('symbol', ')'):
 602 |             params = self._parse_parameter_list(params)
 603 | 
 604 |         self._match('symbol', ')')
 605 | 
 606 |         # Generate procedure label. This will be stored with the identifier
 607 |         # in place of the mm_ptr attribute since it will not be used
 608 |         label_id = self.get_label_id()
 609 | 
 610 |         id_obj = Identifier(id_name, 'procedure', None, params, label_id)
 611 | 
 612 |         try:
 613 |             # Add the procedure identifier to the parent and its own table
 614 |             self._ids.add(id_obj, is_global=is_global)
 615 |             self._ids.push_scope(id_obj.name)
 616 |             self._ids.add(id_obj)
 617 |         except ParserNameError:
 618 |             self._name_error('name already declared at this scope', id_name,
 619 |                              id_line)
 620 | 
 621 |         # Attempt to add each encountered param at the procedure scope
 622 |         for param in params:
 623 |             try:
 624 |                 self._ids.add(param.id, is_global=False)
 625 |             except ParserNameError:
 626 |                 self._name_error('name already declared at global scope',
 627 |                                  param.id.name, id_line)
 628 | 
 629 |         # Define the entry point for the function w/ unique identifier
 630 |         self.generate('%s_%d:' % (id_obj.name, id_obj.mm_ptr))
 631 |         self.tab_push()
 632 | 
 633 |         # Define the beginning of the function body
 634 |         self.generate('goto %s_%d_begin;' % (id_obj.name, id_obj.mm_ptr))
 635 |         self.generate('')
 636 | 
 637 |         return id_obj
 638 | 
 639 |     def _parse_procedure_body(self, procedure_id):
 640 |         """<procedure_body> (Protected)
 641 | 
 642 |         Parses the <procedure_body> language structure.
 643 | 
 644 |             <procedure_body> ::=
 645 |                     ( <declaration> ';' )*
 646 |                 'begin'
 647 |                     ( <statement> ';' )*
 648 |                 'end' 'procedure'
 649 | 
 650 |         Arguments:
 651 |             procedure_id: The identifier object for the procedure.
 652 |         """
 653 |         local_var_size = 0
 654 | 
 655 |         # Reset the local pointer for the local variables.
 656 |         self.reset_local_ptr()
 657 |         self.reset_param_ptr()
 658 | 
 659 |         # Accept any declarations
 660 |         while not self._accept('keyword', 'begin'):
 661 |             try:
 662 |                 size = self._parse_declaration()
 663 | 
 664 |                 # If this was a local var, allocate space for it
 665 |                 if size is not None:
 666 |                     local_var_size += size
 667 |             except ParserError:
 668 |                 self._resync_at_token('symbol', ';')
 669 | 
 670 |             self._match('symbol', ';')
 671 | 
 672 |         # Define the function begin point
 673 |         self.generate('%s_%d_begin:' %
 674 |                       (procedure_id.name, procedure_id.mm_ptr))
 675 | 
 676 |         self.tab_push()
 677 | 
 678 |         if local_var_size != 0:
 679 |             self.comment('Allocating space for local variables', self.debug)
 680 |             self.generate('R[SP] = R[SP] - %d;' % local_var_size)
 681 | 
 682 |         # Accept any statements
 683 |         while not self._accept('keyword', 'end'):
 684 |             try:
 685 |                 self._parse_statement()
 686 |             except ParserError:
 687 |                 self._resync_at_token('symbol', ';')
 688 | 
 689 |             self._match('symbol', ';')
 690 | 
 691 |         self._match('keyword', 'procedure')
 692 | 
 693 |         # Generate code to jump back to the caller scope
 694 |         self.generate_return(self.debug)
 695 |         self.generate('')
 696 | 
 697 |         self.tab_pop()
 698 |         self._ids.pop_scope()
 699 |         self.tab_pop()
 700 | 
 701 |         return
 702 | 
 703 |     def _parse_parameter_list(self, params):
 704 |         """<parameter_list> (Protected)
 705 | 
 706 |         Parse the <parameter_list> language structure.
 707 | 
 708 |             <parameter_list> ::=
 709 |                 <parameter> ',' <parameter_list> |
 710 |                 <parameter>
 711 | 
 712 |         Arguments:
 713 |             params: A list of Parameter named tuples associated with the
 714 |                 procedure.
 715 | 
 716 |         Returns:
 717 |             An completed list of all Parameter named tuples associated
 718 |             with the procedure.
 719 |         """
 720 |         # Get one parameter
 721 |         param = self._parse_parameter()
 722 |         params.append(param)
 723 | 
 724 |         # Get all following parameters
 725 |         if self._accept('symbol', ','):
 726 |             params = self._parse_parameter_list(params)
 727 | 
 728 |         # All parameters found will be returned in the list
 729 |         return params
 730 | 
 731 |     def _parse_parameter(self):
 732 |         """<parameter> (Protected)
 733 | 
 734 |         Parse the <parameter> language structure.
 735 | 
 736 |             <parameter> ::=
 737 |                 <variable_declaration> ( 'in' | 'out' )
 738 |         """
 739 |         # Return the id object, but don't add it to the identifier table
 740 |         # yet or get a memory location for it. This will be done when the
 741 |         # procedure is called
 742 |         id_obj = self._parse_variable_declaration(is_param=True)
 743 | 
 744 |         direction = None
 745 | 
 746 |         if self._accept('keyword', 'in'):
 747 |             direction = 'in'
 748 |         elif self._accept('keyword', 'out'):
 749 |             direction = 'out'
 750 |         else:
 751 |             self._syntax_error('"in" or "out"')
 752 | 
 753 |         return Parameter(id_obj, direction)
 754 | 
 755 |     def _parse_statement(self):
 756 |         """<statement> (Protected)
 757 | 
 758 |         Parse the <statement> language structure.
 759 | 
 760 |             <statement> ::=
 761 |                 <assignment_statement> |
 762 |                 <if_statement> |
 763 |                 <loop_statement> |
 764 |                 <return_statement> |
 765 |                 <procedure_call>
 766 |         """
 767 |         if self._accept('keyword', 'return'):
 768 |             # Go to the return label to exit the procedure/program
 769 |             self.generate_return(self.debug)
 770 |         elif self._first_if_statement():
 771 |             self._parse_if_statement()
 772 |         elif self._first_loop_statement():
 773 |             self._parse_loop_statement()
 774 |         elif self._first_procedure_call():
 775 |             self._parse_procedure_call()
 776 |         elif self._first_assignment_statement():
 777 |             self._parse_assignment_statement()
 778 |         else:
 779 |             self._syntax_error('statement')
 780 | 
 781 |         return
 782 | 
 783 |     def _first_assignment_statement(self):
 784 |         """first(<assignment_statement>) (Protected)
 785 | 
 786 |         Determines if current token matches the first terminals.
 787 | 
 788 |             first(<assignment_statement>) ::=
 789 |                 <identifier>
 790 | 
 791 |         Returns:
 792 |             True if current token matches a first terminal, False otherwise.
 793 |         """
 794 |         return self._check('identifier')
 795 | 
 796 |     def _parse_assignment_statement(self):
 797 |         """<assignment_statement> (Protected)
 798 | 
 799 |         Parses the <assignment_statement> language structure.
 800 | 
 801 |             <assignment_statement> ::=
 802 |                 <destination> ':=' <expression>
 803 |         """
 804 |         id_name = self._current.value
 805 |         id_line = self._current.line
 806 | 
 807 |         dest_type = self._parse_destination()
 808 | 
 809 |         # Grab the last register used in case this variable is an array
 810 |         index_reg = self.get_reg(inc=False)
 811 | 
 812 |         # Check to make sure this is a valid identifier
 813 |         id_obj = self._ids.find(id_name)
 814 | 
 815 |         self._match('symbol', ':=')
 816 | 
 817 |         expr_type = self._parse_expression()
 818 | 
 819 |         # Get the register used for the last expression
 820 |         expr_reg = self.get_reg(inc=False)
 821 | 
 822 |         if dest_type != expr_type:
 823 |             self._type_error(dest_type, expr_type, id_line)
 824 | 
 825 |         # Determine the location of the identifier in the stack
 826 |         id_location = self._ids.get_id_location(id_name)
 827 | 
 828 |         # Verify the direction of the id if it is a param
 829 |         if id_location == 'param':
 830 |             direction = self._ids.get_param_direction(id_name)
 831 |             if direction != 'out':
 832 |                 self._type_error('\'out\' param',
 833 |                                  '\'%s\' param' % direction, id_line)
 834 |                 raise ParserTypeError()
 835 | 
 836 |         # Generate all code associated with retrieving this value
 837 |         self.generate_assignment(id_obj, id_location, index_reg, expr_reg,
 838 |                 self.debug)
 839 | 
 840 |         return
 841 | 
 842 |     def _first_if_statement(self):
 843 |         """first(<if_statement>) (Protected)
 844 | 
 845 |         Determines if current token matches the first terminals.
 846 | 
 847 |             first(<if_statement>) ::=
 848 |                 'if'
 849 | 
 850 |         Returns:
 851 |             True if current token matches a first terminal, False otherwise.
 852 |         """
 853 |         return self._check('keyword', 'if')
 854 | 
 855 |     def _parse_if_statement(self):
 856 |         """<if_statement> (Protected)
 857 | 
 858 |         Parses the <if_statement> language structure.
 859 | 
 860 |             <if_statement> ::=
 861 |                 'if' '(' <expression> ')' 'then' ( <statement> ';' )+
 862 |                 [ 'else' ( <statement> ';' )+ ]
 863 |                 'end' 'if'
 864 |         """
 865 |         self._match('keyword', 'if')
 866 |         self._match('symbol', '(')
 867 |         self._parse_expression()
 868 |         self._match('symbol', ')')
 869 |         self._match('keyword', 'then')
 870 | 
 871 |         label_id = self.get_label_id()
 872 |         expr_reg = self.get_reg(inc=False)
 873 | 
 874 |         self.generate('if (!R[%d]) goto else_%d;' % (expr_reg, label_id))
 875 |         self.tab_push()
 876 | 
 877 |         while True:
 878 |             try:
 879 |                 self._parse_statement()
 880 |             except ParserError:
 881 |                 self._resync_at_token('symbol', ';')
 882 | 
 883 |             self._match('symbol', ';')
 884 | 
 885 |             if self._check('keyword', 'else') or self._check('keyword', 'end'):
 886 |                 break
 887 | 
 888 |         self.generate('goto endif_%d;' % label_id)
 889 | 
 890 |         self.tab_pop()
 891 |         self.generate('else_%d:' % label_id)
 892 |         self.tab_push()
 893 | 
 894 |         if self._accept('keyword', 'else'):
 895 |             while True:
 896 |                 try:
 897 |                     self._parse_statement()
 898 |                 except ParserError:
 899 |                     self._resync_at_token('symbol', ';')
 900 | 
 901 |                 self._match('symbol', ';')
 902 | 
 903 |                 if self._check('keyword', 'end'):
 904 |                     break
 905 | 
 906 |         self._match('keyword', 'end')
 907 |         self._match('keyword', 'if')
 908 | 
 909 |         self.tab_pop()
 910 |         self.generate('endif_%d:' % label_id)
 911 | 
 912 |         return
 913 | 
 914 |     def _first_loop_statement(self):
 915 |         """first(<loop_statement>) (Protected)
 916 | 
 917 |         Determines if current token matches the first terminals.
 918 | 
 919 |             first(<loop_statement>) ::=
 920 |                 'for'
 921 | 
 922 |         Returns:
 923 |             True if current token matches a first terminal, False otherwise.
 924 |         """
 925 |         return self._check('keyword', 'for')
 926 | 
 927 |     def _parse_loop_statement(self):
 928 |         """<loop_statement> (Protected)
 929 | 
 930 |         Parses the <loop_statement> language structure.
 931 | 
 932 |             <loop_statement> ::=
 933 |                 'for' '(' <assignment_statement> ';' <expression> ')'
 934 |                     ( <statement> ';' )*
 935 |                 'end' 'for'
 936 |         """
 937 |         self._match('keyword', 'for')
 938 |         self._match('symbol', '(')
 939 | 
 940 |         label_id = self.get_label_id()
 941 |         self.generate('loop_%d:' % label_id)
 942 |         self.tab_push()
 943 | 
 944 |         try:
 945 |             self._parse_assignment_statement()
 946 |         except ParserError:
 947 |             self._resync_at_token('symbol', ';')
 948 | 
 949 |         self._match('symbol', ';')
 950 | 
 951 |         self._parse_expression()
 952 |         self._match('symbol', ')')
 953 | 
 954 |         expr_reg = self.get_reg(inc=False)
 955 |         self.generate('if (!R[%d]) goto endloop_%d;' % (expr_reg, label_id))
 956 | 
 957 |         while not self._accept('keyword', 'end'):
 958 |             try:
 959 |                 self._parse_statement()
 960 |             except ParserError:
 961 |                 self._resync_at_token('symbol', ';')
 962 | 
 963 |             self._match('symbol', ';')
 964 | 
 965 |         self._match('keyword', 'for')
 966 | 
 967 |         self.generate('goto loop_%d;' % label_id)
 968 |         self.tab_pop()
 969 |         self.generate('endloop_%d:' % label_id)
 970 | 
 971 |         return
 972 | 
 973 |     def _first_procedure_call(self):
 974 |         """first(<procedure_call>) (Protected)
 975 | 
 976 |         Determines if current token matches the first terminals. The second
 977 |         terminal is checked using the future token in this case to distinguish
 978 |         the first(<procedure_call>) from first(<assignment_statement>).
 979 | 
 980 |             first(<procedure_call>) ::=
 981 |                 '('
 982 | 
 983 |         Returns:
 984 |             True if current token matches a first terminal, False otherwise.
 985 |         """
 986 |         return self._check('symbol', '(', check_future=True)
 987 | 
 988 |     def _parse_procedure_call(self):
 989 |         """<procedure_call> (Protected)
 990 | 
 991 |         Parses the <procedure_call> language structure.
 992 | 
 993 |             <procedure_call> ::=
 994 |                 <identifier> '(' [ <argument_list> ] ')'
 995 |         """
 996 |         # Match an identifier, check to make sure the identifier is procedure
 997 |         id_name = self._current.value
 998 |         id_line = self._current.line
 999 | 
1000 |         self._match('identifier')
1001 | 
1002 |         try:
1003 |             id_obj = self._ids.find(id_name)
1004 |         except ParserNameError as e:
1005 |             self._name_error('procedure has not been declared', id_name,
1006 |                              id_line)
1007 |             raise e
1008 | 
1009 |         if id_obj.type != 'procedure':
1010 |             self._type_error('procedure', id_obj.type, id_line)
1011 |             raise ParserTypeError()
1012 | 
1013 |         self._match('symbol', '(')
1014 | 
1015 |         out_names = []
1016 | 
1017 |         if not self._check('symbol', ')'):
1018 |             num_args, out_names = self._parse_argument_list(
1019 |                 id_obj.params,
1020 |                 out_names,
1021 |                 index=0)
1022 | 
1023 |             # Make sure that too few arguments are not used
1024 |             if num_args < len(id_obj.params):
1025 |                 self._runtime_error(
1026 |                     'procedure call accepts %d argument(s), %d given' %
1027 |                     (len(id_obj.params), num_args), id_line)
1028 | 
1029 |                 raise ParserRuntimeError()
1030 | 
1031 |         self._match('symbol', ')')
1032 | 
1033 |         # Generate all procedure call code
1034 |         self.generate_procedure_call(id_obj.name, id_obj.mm_ptr, self.debug)
1035 | 
1036 |         # Pop parameters off the stack
1037 |         for index, param in enumerate(id_obj.params):
1038 |             out_name = out_names[index]
1039 | 
1040 |             self.generate_param_pop(param.id.name, self.debug)
1041 | 
1042 |             # If this is an outbound parameter, we must write it to its
1043 |             # memory location
1044 |             if param.direction == 'out':
1045 |                 # Get the identifier object of the destination
1046 |                 out_id = self._ids.find(out_name)
1047 | 
1048 |                 # Determine where on the stack this identifier exists
1049 |                 out_location = self._ids.get_id_location(out_name)
1050 | 
1051 |                 # Store the parameter in the appropriate location
1052 |                 self.generate_param_store(out_id, out_location, self.debug)
1053 | 
1054 |         # Finish the procedure call
1055 |         self.generate_procedure_call_end(self.debug)
1056 | 
1057 |         return
1058 | 
1059 |     def _parse_argument_list(self, params, out_names, index=0):
1060 |         """<argument_list> (Protected)
1061 | 
1062 |         Parses <argument_list> language structure.
1063 | 
1064 |             <argument_list> ::=
1065 |                 <expression> ',' <argument_list> |
1066 |                 <expression>
1067 | 
1068 |         Arguments:
1069 |             params: A list of Parameter namedtuple objects allowed in the
1070 |                 procedure call.
1071 |             out_names: A list of identifier names that are being used in this
1072 |                 procedure call and must be written back.
1073 |             index: The index in params with which to match the found param.
1074 |                 (Default: 0)
1075 | 
1076 |         Returns:
1077 |             A tuple (index, out_names) consisting of the number of arguments
1078 |             encountered and a list of the identifiers used to write back.
1079 |         """
1080 |         arg_line = self._current.line
1081 |         arg_type = None
1082 | 
1083 |         # Make sure that too many arguments are not used
1084 |         if index > len(params) - 1:
1085 |             self._runtime_error('procedure call accepts only %d argument(s)' %
1086 |                                 len(params), arg_line)
1087 |             raise ParserRuntimeError()
1088 | 
1089 |         # Get the parameter information for this position in the arg list
1090 |         param = params[index]
1091 | 
1092 |         if param.direction == 'out':
1093 |             # We may only parse a single identifier if the direction is 'out'
1094 |             arg_name = self._current.value
1095 |             arg_type = self._parse_name()
1096 | 
1097 |             out_names.append(arg_name)
1098 |         elif param.direction == 'in':
1099 |             # This is a 'in' parameter with only one element (not array)
1100 |             arg_type = self._parse_expression()
1101 | 
1102 |             out_names.append(None)
1103 | 
1104 |         # Get the last reg assignment in the expr. This is argument's register
1105 |         expr_reg = self.get_reg(inc=False)
1106 | 
1107 |         if arg_type != param.id.type:
1108 |             self._type_error(param.id.type, arg_type, arg_line)
1109 | 
1110 |         index += 1
1111 | 
1112 |         if self._accept('symbol', ','):
1113 |             index, out_names = self._parse_argument_list(
1114 |                 params,
1115 |                 out_names,
1116 |                 index=index)
1117 | 
1118 |         # Push the parameters onto the stack in reverse order. The last param
1119 |         # will reach this point first
1120 |         self.generate_param_push(expr_reg, self.debug)
1121 | 
1122 |         return index, out_names
1123 | 
1124 |     def _parse_destination(self):
1125 |         """<destination> (Protected)
1126 | 
1127 |         Parses the <destination> language structure.
1128 | 
1129 |             <destination> ::=
1130 |                 <identifier> [ '[' <expression> ']' ]
1131 | 
1132 |         Returns:
1133 |             Type of the destination identifier as a string.
1134 |         """
1135 |         id_name = self._current.value
1136 |         id_line = self._current.line
1137 | 
1138 |         self._match('identifier')
1139 | 
1140 |         # Make sure that identifier is valid for the scope
1141 |         try:
1142 |             id_obj = self._ids.find(id_name)
1143 |         except ParserNameError as e:
1144 |             self._name_error('not declared in this scope', id_name, id_line)
1145 |             raise e
1146 | 
1147 |         # Check type to make sure it's a variable
1148 |         if not id_obj.type in ['integer', 'float', 'bool', 'string']:
1149 |             self._type_error('variable', id_obj.type, id_line)
1150 |             raise ParserTypeError()
1151 | 
1152 |         id_type = id_obj.type
1153 | 
1154 |         if self._accept('symbol', '['):
1155 |             expr_line = self._current.line
1156 |             expr_type = self._parse_expression()
1157 | 
1158 |             if expr_type != 'integer':
1159 |                 self._type_error('integer', expr_type, expr_line)
1160 | 
1161 |             self._accept('symbol', ']')
1162 |         elif id_obj.size is not None:
1163 |             self._runtime_error('%s: array requires index' % id_name, id_line)
1164 | 
1165 |         return id_type
1166 | 
1167 |     def _parse_expression(self):
1168 |         """<expression> (Protected)
1169 | 
1170 |         Parses <expression> language structure.
1171 | 
1172 |             <expression> ::=
1173 |                 <expression> '&' <arith_op> |
1174 |                 <expression> '|' <arith_op> |
1175 |                 [ 'not' ] <arith_op>
1176 | 
1177 |         Returns:
1178 |             The type value of the expression.
1179 |         """
1180 |         self.comment('Parsing expression', self.debug)
1181 | 
1182 |         negate = False
1183 | 
1184 |         if self._accept('keyword', 'not'):
1185 |             negate = True
1186 | 
1187 |         line = self._current.line
1188 |         id_type = self._parse_arith_op()
1189 | 
1190 |         if negate and id_type not in ['integer', 'bool']:
1191 |             self._type_error('integer or bool', id_type, line)
1192 |             raise ParserTypeError()
1193 | 
1194 |         while True:
1195 |             operand1 = self.get_reg(inc=False)
1196 | 
1197 |             if self._accept('symbol', '&'):
1198 |                 operation = '&'
1199 |             elif self._accept('symbol', '|'):
1200 |                 operation = '|'
1201 |             else:
1202 |                 break
1203 | 
1204 |             if id_type not in ['integer', 'bool']:
1205 |                 self._type_error('integer or bool', id_type, line)
1206 |                 raise ParserTypeError()
1207 | 
1208 |             next_type = self._parse_arith_op()
1209 | 
1210 |             operand2 = self.get_reg(inc=False)
1211 | 
1212 |             if next_type not in ['integer', 'bool']:
1213 |                 self._type_error('integer or bool', next_type, line)
1214 |                 raise ParserTypeError()
1215 | 
1216 |             result = self.generate_operation(operand1, id_type, operand2,
1217 |                                              next_type, operation)
1218 | 
1219 |             if negate:
1220 |                 self.generate('R[%d] = ~R[%d];' % (result, result))
1221 | 
1222 |         return id_type
1223 | 
1224 |     def _parse_arith_op(self):
1225 |         """<arith_op> (Protected)
1226 | 
1227 |         Parses <arith_op> language structure.
1228 | 
1229 |             <arith_op> ::=
1230 |                 <arith_op> '+' <relation> |
1231 |                 <arith_op> '-' <relation> |
1232 |                 <relation>
1233 | 
1234 |         Returns:
1235 |             The type value of the expression.
1236 |         """
1237 |         line = self._current.line
1238 |         id_type = self._parse_relation()
1239 | 
1240 |         while True:
1241 |             operand1 = self.get_reg(inc=False)
1242 | 
1243 |             if self._accept('symbol', '+'):
1244 |                 operation = '+'
1245 |             elif self._accept('symbol', '-'):
1246 |                 operation = '-'
1247 |             else:
1248 |                 break
1249 | 
1250 |             if id_type not in ['integer', 'float']:
1251 |                 self._type_error('integer or float', id_type, line)
1252 |                 raise ParserTypeError()
1253 | 
1254 |             next_type = self._parse_relation()
1255 | 
1256 |             operand2 = self.get_reg(inc=False)
1257 |             
1258 |             if next_type not in ['integer', 'float']:
1259 |                 self._type_error('integer or float', next_type, line)
1260 |                 raise ParserTypeError()
1261 | 
1262 |             self.generate_operation(operand1, id_type, operand2, next_type,
1263 |                                     operation)
1264 | 
1265 |         return id_type
1266 | 
1267 |     def _parse_relation(self):
1268 |         """<relation> (Protected)
1269 | 
1270 |         Parses <relation> language structure.
1271 | 
1272 |             <relation> ::=
1273 |                 <relation> '<' <term> |
1274 |                 <relation> '>' <term> |
1275 |                 <relation> '>=' <term> |
1276 |                 <relation> '<=' <term> |
1277 |                 <relation> '==' <term> |
1278 |                 <relation> '!=' <term> |
1279 |                 <term>
1280 | 
1281 |         Returns:
1282 |             The type value of the expression.
1283 |         """
1284 |         line = self._current.line
1285 |         id_type = self._parse_term()
1286 | 
1287 |         # Check for relational operators. Note that relational operators
1288 |         # are only valid for integer or boolean tokens
1289 |         while True:
1290 |             operand1 = self.get_reg(inc=False)
1291 | 
1292 |             if self._accept('symbol', '<'):
1293 |                 operation = '<'
1294 |             elif self._accept('symbol', '>'):
1295 |                 operation = '>'
1296 |             elif self._accept('symbol', '<='):
1297 |                 operation = '<='
1298 |             elif self._accept('symbol', '>='):
1299 |                 operation = '>='
1300 |             elif self._accept('symbol', '=='):
1301 |                 operation = '=='
1302 |             elif self._accept('symbol', '!='):
1303 |                 operation = '!='
1304 |             else:
1305 |                 break
1306 | 
1307 |             if id_type not in ['integer', 'bool']:
1308 |                 self._type_error('integer or bool', id_type, line)
1309 |                 raise ParserTypeError()
1310 | 
1311 |             next_type = self._parse_term()
1312 | 
1313 |             operand2 = self.get_reg(inc=False)
1314 | 
1315 |             if next_type not in ['integer', 'bool']:
1316 |                 self._type_error('integer or bool', next_type, line)
1317 |                 raise ParserTypeError()
1318 | 
1319 |             self.generate_operation(operand1, id_type, operand2, next_type,
1320 |                                     operation)
1321 | 
1322 |         return id_type
1323 | 
1324 |     def _parse_term(self):
1325 |         """<term> (Protected)
1326 | 
1327 |         Parses <term> language structure.
1328 | 
1329 |             <term> ::=
1330 |                 <term> '*' <factor> |
1331 |                 <term> '/' <factor> |
1332 |                 <factor>
1333 | 
1334 |         Returns:
1335 |             The type value of the expression.
1336 |         """
1337 |         line = self._current.line
1338 |         id_type = self._parse_factor()
1339 | 
1340 |         # Check for multiplication or division operators. Note that these
1341 |         # operators are only valid for integer or float values
1342 |         while True:
1343 |             operand1 = self.get_reg(inc=False)
1344 | 
1345 |             if self._accept('symbol', '*'):
1346 |                 operation = '*'
1347 |             elif self._accept('symbol', '/'):
1348 |                 operation = '/'
1349 |             else:
1350 |                 break
1351 | 
1352 |             if id_type not in ['integer', 'float']:
1353 |                 self._type_error('integer or float', id_type, line)
1354 |                 raise ParserTypeError()
1355 | 
1356 |             line = self._current.line
1357 |             next_type = self._parse_factor()
1358 | 
1359 |             operand2 = self.get_reg(inc=False)
1360 | 
1361 |             if next_type not in ['integer', 'float']:
1362 |                 self._type_error('integer or float', next_type, line)
1363 |                 raise ParserTypeError()
1364 | 
1365 |             self.generate_operation(operand1, id_type, operand2, next_type,
1366 |                                     operation)
1367 | 
1368 |         return id_type
1369 | 
1370 |     def _parse_factor(self):
1371 |         """<factor> (Protected)
1372 | 
1373 |         Parses <factor> language structure.
1374 | 
1375 |             <factor> ::=
1376 |                 '(' <expression> ')' |
1377 |                 [ '-' ] <name> |
1378 |                 [ '-' ] <number> |
1379 |                 <string> |
1380 |                 'true' |
1381 |                 'false'
1382 | 
1383 |         Returns:
1384 |             The type value of the expression.
1385 |         """
1386 |         id_type = None
1387 | 
1388 |         if self._accept('symbol', '('):
1389 |             id_type = self._parse_expression()
1390 |             self._match('symbol', ')')
1391 |         elif self._accept('string'):
1392 |             id_type = 'string'
1393 |             str_val = self._previous.value
1394 | 
1395 |             self.generate('R[%d] = (int)"%s";' % (self.get_reg(), str_val))
1396 |         elif self._accept('keyword', 'true'):
1397 |             id_type = 'bool'
1398 | 
1399 |             self.generate('R[%d] = 1;' % (self.get_reg()))
1400 |         elif self._accept('keyword', 'false'):
1401 |             id_type = 'bool'
1402 | 
1403 |             self.generate('R[%d] = 0;' % (self.get_reg()))
1404 |         elif self._accept('symbol', '-'):
1405 |             if self._first_name():
1406 |                 id_type = self._parse_name()
1407 |             elif self._check('integer') or self._check('float'):
1408 |                 id_type = self._parse_number(negate=True)
1409 |             else:
1410 |                 self._syntax_error('variable name, integer, or float')
1411 |         elif self._first_name():
1412 |             id_type = self._parse_name()
1413 |         elif self._check('integer') or self._check('float'):
1414 |             id_type = self._parse_number(negate=False)
1415 |         else:
1416 |             self._syntax_error('factor')
1417 | 
1418 |         return id_type
1419 | 
1420 |     def _first_name(self):
1421 |         """first(<name>) (Protected)
1422 | 
1423 |         Determines if current token matches the first terminals.
1424 | 
1425 |             first(<name>) ::=
1426 |                 <identifier>
1427 | 
1428 |         Returns:
1429 |             True if current token matches a first terminal, False otherwise.
1430 |         """
1431 |         return self._check('identifier')
1432 | 
1433 |     def _parse_name(self):
1434 |         """<name> (Protected)
1435 | 
1436 |         Parses <name> language structure.
1437 | 
1438 |             <name> ::=
1439 |                 <identifier> [ '[' <expression> ']' ]
1440 |         """
1441 |         id_name = self._current.value
1442 |         id_line = self._current.line
1443 | 
1444 |         self._match('identifier')
1445 | 
1446 |         # Make sure that identifier is valid for the scope
1447 |         try:
1448 |             id_obj = self._ids.find(id_name)
1449 |             id_type = id_obj.type
1450 |         except ParserNameError as e:
1451 |             self._name_error('not declared in this scope', id_name, id_line)
1452 |             raise e
1453 | 
1454 |         # Check type to make sure it's a variable
1455 |         if not id_type in ['integer', 'float', 'bool', 'string']:
1456 |             self._type_error('variable', id_type, id_line)
1457 |             raise ParserTypeError()
1458 | 
1459 |         if self._accept('symbol', '['):
1460 |             index_type = self._parse_expression()
1461 | 
1462 |             if not index_type == 'integer':
1463 |                 self._type_error('integer', index_type, id_line)
1464 |                 raise ParserTypeError()
1465 | 
1466 |             self._match('symbol', ']')
1467 |         elif id_obj.size is not None:
1468 |             self._runtime_error('%s: array requires index' % id_name, id_line)
1469 | 
1470 |         # Get the last register allocated. The index will be here if it's used
1471 |         index_reg = self.get_reg(inc=False)
1472 | 
1473 |         # Determine the location of the identifier in the stack
1474 |         id_location = self._ids.get_id_location(id_name)
1475 | 
1476 |         # Verify the direction of the id if it is a param
1477 |         if id_location == 'param':
1478 |             direction = self._ids.get_param_direction(id_name)
1479 |             if direction != 'in':
1480 |                 self._type_error('\'in\' param',
1481 |                                  '\'%s\' param' % direction, id_line)
1482 |                 raise ParserTypeError()
1483 | 
1484 |         # Generate all code associated with retrieving this value
1485 |         self.generate_name(id_obj, id_location, index_reg, self.debug)
1486 | 
1487 |         return id_type
1488 | 
1489 |     def _parse_number(self, negate=False, generate_code=True):
1490 |         """Parse Number (Protected)
1491 | 
1492 |         Parses the <number> language structure.
1493 | 
1494 |             <number> ::=
1495 |                 [0-9][0-9_]*[.[0-9_]*]
1496 | 
1497 |         Arguments:
1498 |             negate: Determines if the number should be negated or not.
1499 |             generate_code: Determines if code should be generated for the
1500 |                 parsed number or not.
1501 | 
1502 |         Returns:
1503 |             The type of the parsed number.
1504 |         """
1505 |         number = self._current.value
1506 |         id_type = self._current.type
1507 | 
1508 |         # Parse the number (either float or integer type)
1509 |         if not self._accept('integer') and not self._accept('float'):
1510 |             self._syntax_error('number')
1511 | 
1512 |         # Generate the code for this number if desired
1513 |         if generate_code:
1514 |             self.generate_number(number, id_type, negate)
1515 | 
1516 |         return id_type
1517 | 


--------------------------------------------------------------------------------