├── .gitignore
├── .vscode
├── .browse.VC.db
├── launch.json
├── settings.json
└── tasks.json
├── ANSI C grammar (Yacc).html
├── Class notes in Chinese.txt
├── LICENSE
├── Presentation scripts
├── README.md
├── ZCC.bnf
├── bnf.tmp
├── generation
├── __init__.py
├── data.py
├── generation.py
└── utility.py
├── main.py
├── public
├── ZCCglobal.py
├── __init__.py
└── const.py
├── symbol
├── .gitignore
├── __init__.py
└── symtab.py
├── test
├── a.s
├── array.c
├── basic.c
├── basic.i
├── basic1.c
├── basic2.c
├── basic3.c
├── errorID.c
├── error_info.c
├── error_pos.c
├── missRightCurly.c
├── missSEMI.c
├── multi_int.c
├── out.txt
├── pointer.c
├── source_code_optimization.c
├── stdio.h
├── struct.c
├── test1.c
├── test1.s
├── test4.c
├── test4.s
├── test4_2.c
├── test4_2.s
├── test9.c
└── test9.s
└── yyparse
├── .gitignore
├── ZCClex.py
├── ZCCparser.py
├── __init__.py
├── missRightCurly.c
├── missSEMI.c
├── ply
├── __init__.py
├── cpp.py
├── ctokens.py
├── lex.py
├── yacc.py
└── ygen.py
├── test1.c
├── test2.c
└── testChar.c
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 | *.c
9 | *.exe
10 | test/*.s
11 | test/*.out
12 |
13 | # Distribution / packaging
14 | .Python
15 | env/
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *,cover
50 | .hypothesis/
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 |
59 | # Sphinx documentation
60 | docs/_build/
61 |
62 | # PyBuilder
63 | target/
64 |
65 | #Ipython Notebook
66 | .ipynb_checkpoints
67 |
68 | #PyCharm
69 |
70 | !/test/*.c
71 | .idea/
72 |
--------------------------------------------------------------------------------
/.vscode/.browse.VC.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hlFu/ZCC/811bd987c9d7a3754f7bc9c9d986359ee3df7327/.vscode/.browse.VC.db
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "0.2.0",
3 | "configurations": [
4 | {
5 | "name": "Python",
6 | "type": "python",
7 | "request": "launch",
8 | "stopOnEntry": true,
9 | "program": "${file}",
10 | "debugOptions": [
11 | "WaitOnAbnormalExit",
12 | "WaitOnNormalExit",
13 | "RedirectOutput"
14 | ]
15 | },
16 | {
17 | "name": "Python Console App",
18 | "type": "python",
19 | "request": "launch",
20 | "stopOnEntry": true,
21 | "program": "${file}",
22 | "externalConsole": true,
23 | "debugOptions": [
24 | "WaitOnAbnormalExit",
25 | "WaitOnNormalExit"
26 | ]
27 | },
28 | {
29 | "name": "Django",
30 | "type": "python",
31 | "request": "launch",
32 | "stopOnEntry": true,
33 | "program": "${workspaceRoot}/manage.py",
34 | "args": [
35 | "runserver",
36 | "--noreload"
37 | ],
38 | "debugOptions": [
39 | "WaitOnAbnormalExit",
40 | "WaitOnNormalExit",
41 | "RedirectOutput",
42 | "DjangoDebugging"
43 | ]
44 | },
45 | {
46 | "name": "Watson",
47 | "type": "python",
48 | "request": "launch",
49 | "stopOnEntry": true,
50 | "program": "${workspaceRoot}/console.py",
51 | "args": [
52 | "dev",
53 | "runserver",
54 | "--noreload=True"
55 | ],
56 | "debugOptions": [
57 | "WaitOnAbnormalExit",
58 | "WaitOnNormalExit",
59 | "RedirectOutput"
60 | ]
61 | }
62 | ]
63 | }
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | // 将设置放入此文件中以覆盖默认值和用户设置。
2 | {
3 | }
--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
1 | // {
2 | // // See https://go.microsoft.com/fwlink/?LinkId=733558
3 | // // for the documentation about the tasks.json format
4 | // "version": "0.1.0",
5 | // "command": "tsc",
6 | // "isShellCommand": true,
7 | // "args": ["-p", "."],
8 | // "showOutput": "silent",
9 | // "problemMatcher": "$tsc"
10 | // }
11 | {
12 | "version": "0.1.0",
13 |
14 | // The command is tsc. Assumes that tsc has been installed using npm install -g typescript
15 | "command": "python.exe",
16 |
17 | // The command is a shell script
18 | "isShellCommand": true,
19 |
20 | // Show the output window only if unrecognized errors occur.
21 | "showOutput": "always",
22 |
23 | // args is the HelloWorld program to compile.
24 | "args": ["${file}"]
25 | }
--------------------------------------------------------------------------------
/ANSI C grammar (Yacc).html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | ANSI C grammar (Yacc)
5 |
6 |
20 |
21 | ANSI C Yacc grammar
22 |
23 | In 1985, Jeff Lee published his Yacc grammar (which is
24 | accompanied by a matching Lex specification )
25 | for the April 30, 1985 draft version of the
26 | ANSI C standard. Tom Stockfisch reposted
27 | it to net.sources in 1987; that original, as mentioned in
28 | the answer to question 17.25
29 | of the comp.lang.c FAQ, can be ftp'ed from ftp.uu.net, file
30 | usenet/net.sources/ansi.c.grammar.Z .
31 |
32 | Jutta Degener , 1995
33 |
34 |
35 | %token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
36 | %token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
37 | %token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
38 | %token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
39 | %token XOR_ASSIGN OR_ASSIGN TYPE_NAME
40 |
41 | %token TYPEDEF EXTERN STATIC AUTO REGISTER
42 | %token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
43 | %token STRUCT UNION ENUM ELLIPSIS
44 |
45 | %token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
46 |
47 | %start translation_unit
48 | %%
49 |
50 | IDENTIFIER
51 | : IDENTIFIER_NAME
52 |
53 | INTEGER
54 | : ORIGINAL_CODE
55 |
56 | DOUBLE
57 | : ORIGINAL_CODE
58 |
59 | STRING
60 | : ORIGINAL_CODE
61 |
62 | primary_expression
63 | : IDENTIFIER
64 | | INTEGER
65 | | DOUBLE
66 | | STRING
67 | | '(' expression ')'
68 | ;
69 |
70 | postfix_expression
71 | : primary_expression
72 | | postfix_expression '[' expression ']'
73 | | postfix_expression '(' ')'
74 | | postfix_expression '(' argument_expression_list ')'
75 | | postfix_expression '.' IDENTIFIER
76 | | postfix_expression PTR_OP IDENTIFIER
77 | | postfix_expression INC_OP
78 | | postfix_expression DEC_OP
79 | ;
80 |
81 | argument_expression_list
82 | : assignment_expression
83 | | argument_expression_list ',' assignment_expression
84 | ;
85 |
86 | unary_expression
87 | : postfix_expression
88 | | INC_OP unary_expression
89 | | DEC_OP unary_expression
90 | | unary_operator cast_expression
91 | | SIZEOF unary_expression
92 | | SIZEOF '(' type_name ')'
93 | ;
94 |
95 | unary_operator
96 | : '&'
97 | | '*'
98 | | '+'
99 | | '-'
100 | | '~'
101 | | '!'
102 | ;
103 |
104 | cast_expression
105 | : unary_expression
106 | | '(' type_name ')' cast_expression
107 | ;
108 |
109 | multiplicative_expression
110 | : cast_expression
111 | | multiplicative_expression '*' cast_expression
112 | | multiplicative_expression '/' cast_expression
113 | | multiplicative_expression '%' cast_expression
114 | ;
115 |
116 | additive_expression
117 | : multiplicative_expression
118 | | additive_expression '+' multiplicative_expression
119 | | additive_expression '-' multiplicative_expression
120 | ;
121 |
122 | shift_expression
123 | : additive_expression
124 | | shift_expression LEFT_OP additive_expression
125 | | shift_expression RIGHT_OP additive_expression
126 | ;
127 |
128 | relational_expression
129 | : shift_expression
130 | | relational_expression '<' shift_expression
131 | | relational_expression '>' shift_expression
132 | | relational_expression LE_OP shift_expression
133 | | relational_expression GE_OP shift_expression
134 | ;
135 |
136 | equality_expression
137 | : relational_expression
138 | | equality_expression EQ_OP relational_expression
139 | | equality_expression NE_OP relational_expression
140 | ;
141 |
142 | and_expression
143 | : equality_expression
144 | | and_expression '&' equality_expression
145 | ;
146 |
147 | exclusive_or_expression
148 | : and_expression
149 | | exclusive_or_expression '^' and_expression
150 | ;
151 |
152 | inclusive_or_expression
153 | : exclusive_or_expression
154 | | inclusive_or_expression '|' exclusive_or_expression
155 | ;
156 |
157 | logical_and_expression
158 | : inclusive_or_expression
159 | | logical_and_expression AND_OP inclusive_or_expression
160 | ;
161 |
162 | logical_or_expression
163 | : logical_and_expression
164 | | logical_or_expression OR_OP logical_and_expression
165 | ;
166 |
167 | conditional_expression
168 | : logical_or_expression
169 | | logical_or_expression '?' expression ':' conditional_expression
170 | ;
171 |
172 | assignment_expression
173 | : conditional_expression
174 | | unary_expression assignment_operator assignment_expression
175 | ;
176 |
177 | assignment_operator
178 | : '='
179 | | MUL_ASSIGN
180 | | DIV_ASSIGN
181 | | MOD_ASSIGN
182 | | ADD_ASSIGN
183 | | SUB_ASSIGN
184 | | LEFT_ASSIGN
185 | | RIGHT_ASSIGN
186 | | AND_ASSIGN
187 | | XOR_ASSIGN
188 | | OR_ASSIGN
189 | ;
190 |
191 | expression
192 | : assignment_expression
193 | | expression ',' assignment_expression
194 | ;
195 |
196 | constant_expression
197 | : conditional_expression
198 | ;
199 |
200 | declaration
201 | : declaration_specifiers ';'
202 | | declaration_specifiers init_declarator_list ';'
203 | ;
204 |
205 | declaration_specifiers
206 | : type_specifier //int
207 | | type_specifier type_qualifier //int const
208 | | type_qualifier type_specifier //const int
209 | | storage_class_specifier type_specifier //static int
210 | | storage_class_specifier type_specifier type_qualifier //static int const
211 | | storage_class_specifier type_qualifier type_specifier //static const int
212 | ;
213 |
214 | init_declarator_list
215 | : init_declarator
216 | | init_declarator_list ',' init_declarator
217 | ;
218 |
219 | init_declarator
220 | : declarator
221 | | declarator '=' initializer
222 | ;
223 |
224 | storage_class_specifier
225 | : TYPEDEF
226 | | EXTERN
227 | | STATIC
228 | ;
229 |
230 | type_specifier
231 | : VOID
232 | | FLOAT
233 | | DOUBLE
234 | | integer_type
235 | | struct_or_union_specifier
236 | | enum_specifier
237 | | TYPE_NAME
238 | ;
239 |
240 | integer_type
241 | : CHAR
242 | | SHORT
243 | | INT
244 | | LONG
245 | | SIGNED integer_type
246 | | UNSIGNED integer_type
247 | | SHORT integer_type
248 | | LONG integer_type
249 |
250 | struct_or_union_specifier
251 | : struct_or_union IDENTIFIER '{' struct_declaration_list '}'
252 | | struct_or_union '{' struct_declaration_list '}'
253 | | struct_or_union IDENTIFIER
254 | ;
255 |
256 | struct_or_union
257 | : STRUCT
258 | | UNION
259 | ;
260 |
261 | struct_declaration_list
262 | : struct_declaration
263 | | struct_declaration_list struct_declaration
264 | ;
265 |
266 | struct_declaration
267 | : specifier_qualifier_list struct_declarator_list ';'
268 | ;
269 |
270 | specifier_qualifier_list
271 | : type_specifier
272 | | type_specifier type_qualifier
273 | | type_qualifier type_specifier
274 | ;
275 |
276 | struct_declarator_list
277 | : declarator
278 | | struct_declarator_list ',' declarator
279 | ;
280 |
287 | enum_specifier
288 | : ENUM '{' enumerator_list '}'
289 | | ENUM IDENTIFIER '{' enumerator_list '}'
290 | | ENUM IDENTIFIER
291 | ;
292 |
293 | enumerator_list
294 | : enumerator
295 | | enumerator_list ',' enumerator
296 | ;
297 |
298 | enumerator
299 | : IDENTIFIER
300 | | IDENTIFIER '=' constant_expression
301 | ;
302 |
303 | type_qualifier
304 | : CONST
305 | ;
306 |
307 | declarator
308 | : pointer direct_declarator
309 | | direct_declarator
310 | ;
311 |
312 | direct_declarator
313 | : IDENTIFIER
314 | | '(' declarator ')'
315 | | direct_declarator '[' constant_expression ']'
316 | | direct_declarator '[' ']'
317 | | direct_declarator '(' parameter_type_list ')'
318 | | direct_declarator '(' ')'
319 | ;
320 |
321 | pointer
322 | : '*'
323 | | '*' CONST
324 | | pointer '*'
325 | | pointer '*' CONST
326 | ;
327 |
328 | type_qualifier_list
329 | : type_qualifier
330 | | type_qualifier_list type_qualifier
331 | ;
332 |
333 |
334 | parameter_type_list
335 | : parameter_list
336 | | parameter_list ',' ELLIPSIS
337 | ;
338 |
339 | parameter_list
340 | : parameter_declaration
341 | | parameter_list ',' parameter_declaration
342 | ;
343 |
344 | parameter_declaration
345 | : declaration_specifiers declarator
346 | | declaration_specifiers abstract_declarator
347 | | declaration_specifiers
348 | ;
349 |
350 | type_name
351 | : specifier_qualifier_list
352 | | specifier_qualifier_list abstract_declarator
353 | ;
354 |
355 | abstract_declarator
356 | : pointer
357 | | direct_abstract_declarator
358 | | pointer direct_abstract_declarator
359 | ;
360 |
361 | direct_abstract_declarator
362 | : '(' abstract_declarator ')'
363 | | '[' ']'
364 | | '[' constant_expression ']'
365 | | direct_abstract_declarator '[' ']'
366 | | direct_abstract_declarator '[' constant_expression ']'
367 | | '(' ')'
368 | | '(' parameter_type_list ')'
369 | | direct_abstract_declarator '(' ')'
370 | | direct_abstract_declarator '(' parameter_type_list ')'
371 | ;
372 |
373 | initializer
374 | : assignment_expression
375 | | '{' initializer_list '}'
376 | | '{' initializer_list ',' '}'
377 | ;
378 |
379 | initializer_list
380 | : initializer
381 | | initializer_list ',' initializer
382 | ;
383 |
384 | statement
385 | : labeled_statement
386 | | compound_statement
387 | | expression_statement
388 | | selection_statement
389 | | iteration_statement
390 | | jump_statement
391 | ;
392 |
393 | labeled_statement
394 | : CASE constant_expression ':' statement
395 | | DEFAULT ':' statement
396 | ;
397 |
398 | compound_statement
399 | : '{' '}'
400 | | '{' statement_list '}'
401 | | '{' declaration_list '}'
402 | | '{' declaration_list statement_list '}'
403 | ;
404 |
405 | declaration_list
406 | : declaration
407 | | declaration_list declaration
408 | ;
409 |
410 | statement_list
411 | : statement
412 | | statement_list statement
413 | ;
414 |
415 | expression_statement
416 | : ';'
417 | | expression ';'
418 | ;
419 |
420 | selection_statement
421 | : IF '(' expression ')' statement
422 | | IF '(' expression ')' statement ELSE statement
423 | | SWITCH '(' expression ')' statement
424 | ;
425 |
426 | iteration_statement
427 | : WHILE '(' expression ')' statement
428 | | DO statement WHILE '(' expression ')' ';'
429 | | FOR '(' expression_statement expression_statement ')' statement
430 | | FOR '(' expression_statement expression_statement expression ')' statement
431 | ;
432 |
433 | jump_statement
434 | : CONTINUE ';'
435 | | BREAK ';'
436 | | RETURN ';'
437 | | RETURN expression ';'
438 | ;
439 |
440 | translation_unit
441 | : external_declaration
442 | | translation_unit external_declaration
443 | ;
444 |
445 | external_declaration
446 | : function_definition
447 | | declaration
448 | ;
449 |
450 | function_definition
451 | : declaration_specifiers declarator compound_statement
452 | ;
453 |
454 | %%
455 | #include <stdio.h>
456 |
457 | extern char yytext[];
458 | extern int column;
459 |
460 | yyerror(s)
461 | char *s;
462 | {
463 | fflush(stdout);
464 | printf("\n%*s\n%*s\n", column, "^", column, s);
465 | }
466 |
467 |
468 |
469 |
470 |
471 |
472 |
--------------------------------------------------------------------------------
/Class notes in Chinese.txt:
--------------------------------------------------------------------------------
1 | 1. 优化代码size和time
2 | 2. 避免编译器过于复杂
3 | 3. 去除冗余操作(if(0)去掉)
4 | 4. 常量传播、预处理
5 | 5. 函数替换、替换尾递归
6 | 6. 分析比例
7 | 7. 流图 点是block 边是跳转
8 | 8. (X+1)(X+1) -> (x+1)^2
9 | 9. error type :
10 | mismatch undefined const redeclaration
11 |
12 | 报告:
13 | 技术
14 | 测试
15 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 | {description}
294 | Copyright (C) {year} {fullname}
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | {signature of Ty Coon}, 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 |
--------------------------------------------------------------------------------
/Presentation scripts:
--------------------------------------------------------------------------------
1 | Error recovery
2 |
3 | Adding error rules to our BNF
4 | Adding EOF token to handle the last missing right curly bracket
5 |
6 | So that we can discover most common mistakes and do error recovery (still build the right parsing tree)
7 |
8 | can handle:
9 | 1. missing semicolon
10 | 2. missing right curly bracket
11 | 3. some error identifier (不符合C标识符命名规则的)
12 | 4. error token after operator
13 | 以上4条均能进行error recovery,返回正确的语法树
14 | 5. 不符合 ANSI C 的各种语法 (error_pos.c)
15 |
16 | test files:
17 | test1.c all test
18 | test2.c missing semicolon
19 | missSEMI.c all errors token after operator + missing semicolon + errorID + missing right curly
20 | errorID.c errorID
21 | missRightCurly.c missing right curly
22 |
23 |
24 | Syntax error finder:
25 | 1. 函数声明与函数定义的参数列表不一致
26 | 2. 变量重复定义
27 | 3. 赋值时 类型不匹配
28 | 4. 表达式中,操作数的类型与规定的类型不一致
29 | 5. typo,打字错误。会从符号表中找出最接近的标识符,给出提示
30 | 6. 函数调用时参数表不符合函数定义
31 | 7. 函数实际返回值类型 不符合 函数定义中的函数返回值类型
32 |
33 | //1. 函数定义声明不一致
34 | int f(int i,...);
35 | int f(int j){
36 | return 0;
37 | }
38 |
39 | int g(int i){
40 | return 0;
41 | }
42 | typedef struct{
43 | int n;
44 | } A;
45 | int main(int argc, char const *argv[])
46 | {
47 |
48 | //2. 重复定义
49 | int k;
50 | int k;
51 | //缺少分号
52 | int i
53 | int count;
54 | //3. 类型不匹配
55 | A a;
56 | a = 5;
57 | //4. 未定义变量
58 | var = 3;
59 | //5. 操作数类型错误
60 | 1.0 >> 4;
61 | //打字错误
62 | cont = 4;
63 | g(1.0);
64 | //6. 参数表不匹配
65 | g(a);
66 | //7. 返回值不匹配
67 | return a;
68 | }
69 |
70 | 1. 基本功能:
71 | 计算add sub mul div 等
72 | 逻辑and or not
73 | 跳转jmp je jg jl
74 | 移位sal sar
75 | 函数call ret
76 | 堆栈push pop
77 | 全局数据 常量浮点数、字符串、global、static变量
78 | 浮点数运算fld fstp fadd fsub fmul fdiv
79 |
80 | 2. 优化
81 | 前端:
82 | constant folding
83 | 死代码消除
84 | 后端优化:
85 | 寄存器优化:
86 | 将ebx,ecx,edx作为临时变量的暂存区域
87 | 将esi edi作为eax的交换区
88 | 指令优化:
89 | *2 / 4 / 8。。。 ->sal
90 | lea 2*eax+offset -> reg
91 |
92 | 3. 支持特性:
93 | 看样例
94 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ZCC
2 | ZJU standard C Compiler
3 |
4 |
17 |
18 | ## Code Organization
19 | * Lex and Yas related codes are in the folder *yyparse*.
20 | * Symbol table and Type check related codes are in the folder *symbol*.
21 | * Generating machine code related codes are in the folder *generation*.
22 | * Treating Special variables related codes are in the folder *public*.
23 | * Optimization codes are stored separately in each folder.
24 |
25 |
50 |
51 |
69 |
70 |
71 |
84 | ## Parsing Tree Sample:
85 |
86 | ```
87 | declaration
88 | declaration_specifiers
89 | storage_class_specifier
90 | typedef
91 | declaration_specifiers
92 | type_specifier
93 | struct_or_union_specifier
94 | struct_or_union
95 | struct
96 | {
97 | struct_declaration_list
98 | struct_declaration_list
99 | struct_declaration
100 | specifier_qualifier_list
101 | type_specifier
102 | int
103 | struct_declarator_list
104 | struct_declarator
105 | declarator
106 | direct_declarator
107 | a
108 | ;
109 | struct_declaration
110 | specifier_qualifier_list
111 | type_specifier
112 | double
113 | struct_declarator_list
114 | struct_declarator
115 | declarator
116 | direct_declarator
117 | c
118 | ;
119 | }
120 | init_declarator_list
121 | init_declarator
122 | declarator
123 | direct_declarator
124 | mytype
125 | ;
126 | ```
127 | ## Code Generation
128 | ### Miscellaneous
129 |
130 | Call functions in *self.tools* to translate.
131 |
132 | Detailed comments and examples can be found in *generation.generate*.
133 |
134 | ### Basic X86 supports
135 | * Calculation: add, sub, mul, div.
136 | * Logic: and, or, not.
137 | * Jump: jmp, je, jg, jl.
138 | * Shift: sal, sar.
139 | * Function: call, ret.
140 | * Stack: push, pop.
141 | * Float number operation: fld, fstp, fadd, fsub, fmul, fdiv.
142 | * Global/Static variables, Constant float number, String
143 |
144 |
150 |
151 | ## Code Optimization
152 | ### The optimization types supported
153 | http://www.compileroptimizations.com/index.html
154 | ### Constant propagation
155 | http://people.eecs.berkeley.edu/~bodik/cs264/lectures/4-chaotic-notes.pdf
156 |
--------------------------------------------------------------------------------
/ZCC.bnf:
--------------------------------------------------------------------------------
1 | %token int_const char_const float_const id string enumeration_const
2 | %%
3 |
4 | translation_unit : external_decl
5 | | translation_unit external_decl
6 | ;
7 | external_decl : function_definition
8 | | decl
9 | ;
10 | function_definition : decl_specs declarator decl_list compound_stat
11 | | declarator decl_list compound_stat
12 | | decl_specs declarator compound_stat
13 | | declarator compound_stat
14 | ;
15 | decl : decl_specs init_declarator_list ';'
16 | | decl_specs ';'
17 | ;
18 | decl_list : decl
19 | | decl_list decl
20 | ;
21 | decl_specs : storage_class_spec decl_specs
22 | | storage_class_spec
23 | | type_spec decl_specs
24 | | type_spec
25 | | type_qualifier decl_specs
26 | | type_qualifier
27 | ;
28 | storage_class_spec : 'auto' | 'register' | 'static' | 'extern' | 'typedef'
29 | ;
30 | type_spec : 'void' | 'char' | 'short' | 'int' | 'long' | 'float'
31 | | 'double' | 'signed' | 'unsigned'
32 | | struct_or_union_spec
33 | | enum_spec
34 | | typedef_name
35 | ;
36 | type_qualifier : 'const' | 'volatile'
37 | ;
38 | struct_or_union_spec : struct_or_union id '{' struct_decl_list '}'
39 | | struct_or_union '{' struct_decl_list '}'
40 | | struct_or_union id
41 | ;
42 | struct_or_union : 'struct' | 'union'
43 | ;
44 | struct_decl_list : struct_decl
45 | | struct_decl_list struct_decl
46 | ;
47 | init_declarator_list : init_declarator
48 | | init_declarator_list ',' init_declarator
49 | ;
50 | init_declarator : declarator
51 | | declarator '=' initializer
52 | ;
53 | struct_decl : spec_qualifier_list struct_declarator_list ';'
54 | ;
55 | spec_qualifier_list : type_spec spec_qualifier_list
56 | | type_spec
57 | | type_qualifier spec_qualifier_list
58 | | type_qualifier
59 | ;
60 | struct_declarator_list : struct_declarator
61 | | struct_declarator_list ',' struct_declarator
62 | ;
63 | struct_declarator : declarator
64 | | declarator ':' const_exp
65 | | ':' const_exp
66 | ;
67 | enum_spec : 'enum' id '{' enumerator_list '}'
68 | | 'enum' '{' enumerator_list '}'
69 | | 'enum' id
70 | ;
71 | enumerator_list : enumerator
72 | | enumerator_list ',' enumerator
73 | ;
74 | enumerator : id
75 | | id '=' const_exp
76 | ;
77 | declarator : pointer direct_declarator
78 | | direct_declarator
79 | ;
80 | direct_declarator : id
81 | | '(' declarator ')'
82 | | direct_declarator '[' const_exp ']'
83 | | direct_declarator '[' ']'
84 | | direct_declarator '(' param_type_list ')'
85 | | direct_declarator '(' id_list ')'
86 | | direct_declarator '(' ')'
87 | ;
88 | pointer : '*' type_qualifier_list
89 | | '*'
90 | | '*' type_qualifier_list pointer
91 | | '*' pointer
92 | ;
93 | type_qualifier_list : type_qualifier
94 | | type_qualifier_list type_qualifier
95 | ;
96 | param_type_list : param_list
97 | | param_list ',' '...'
98 | ;
99 | param_list : param_decl
100 | | param_list ',' param_decl
101 | ;
102 | param_decl : decl_specs declarator
103 | | decl_specs abstract_declarator
104 | | decl_specs
105 | ;
106 | id_list : id
107 | | id_list ',' id
108 | ;
109 | initializer : assignment_exp
110 | | '{' initializer_list '}'
111 | | '{' initializer_list ',' '}'
112 | ;
113 | initializer_list : initializer
114 | | initializer_list ',' initializer
115 | ;
116 | type_name : spec_qualifier_list abstract_declarator
117 | | spec_qualifier_list
118 | ;
119 | abstract_declarator : pointer
120 | | pointer direct_abstract_declarator
121 | | direct_abstract_declarator
122 | ;
123 | direct_abstract_declarator: '(' abstract_declarator ')'
124 | | direct_abstract_declarator '[' const_exp ']'
125 | | '[' const_exp ']'
126 | | direct_abstract_declarator '[' ']'
127 | | '[' ']'
128 | | direct_abstract_declarator '(' param_type_list ')'
129 | | '(' param_type_list ')'
130 | | direct_abstract_declarator '(' ')'
131 | | '(' ')'
132 | ;
133 | typedef_name : id
134 | ;
135 | stat : labeled_stat
136 | | exp_stat
137 | | compound_stat
138 | | selection_stat
139 | | iteration_stat
140 | | jump_stat
141 | ;
142 | labeled_stat : id ':' stat
143 | | 'case' const_exp ':' stat
144 | | 'default' ':' stat
145 | ;
146 | exp_stat : exp ';'
147 | | ';'
148 | ;
149 | compound_stat : '{' decl_list stat_list '}'
150 | | '{' stat_list '}'
151 | | '{' decl_list '}'
152 | | '{' '}'
153 | ;
154 | stat_list : stat
155 | | stat_list stat
156 | ;
157 | selection_stat : 'if' '(' exp ')' stat
158 | | 'if' '(' exp ')' stat 'else' stat
159 | | 'switch' '(' exp ')' stat
160 | ;
161 | iteration_stat : 'while' '(' exp ')' stat
162 | | 'do' stat 'while' '(' exp ')' ';'
163 | | 'for' '(' exp ';' exp ';' exp ')' stat
164 | | 'for' '(' exp ';' exp ';' ')' stat
165 | | 'for' '(' exp ';' ';' exp ')' stat
166 | | 'for' '(' exp ';' ';' ')' stat
167 | | 'for' '(' ';' exp ';' exp ')' stat
168 | | 'for' '(' ';' exp ';' ')' stat
169 | | 'for' '(' ';' ';' exp ')' stat
170 | | 'for' '(' ';' ';' ')' stat
171 | ;
172 | jump_stat : 'goto' id ';'
173 | | 'continue' ';'
174 | | 'break' ';'
175 | | 'return' exp ';'
176 | | 'return' ';'
177 | ;
178 | exp : assignment_exp
179 | | exp ',' assignment_exp
180 | ;
181 | assignment_exp : conditional_exp
182 | | unary_exp assignment_operator assignment_exp
183 | ;
184 | assignment_operator : '=' | '*=' | '/=' | '%=' | '+=' | '-=' | '<<='
185 | | '>>=' | '&=' | '^=' | '|='
186 | ;
187 | conditional_exp : logical_or_exp
188 | | logical_or_exp '?' exp ':' conditional_exp
189 | ;
190 | const_exp : conditional_exp
191 | ;
192 | logical_or_exp : logical_and_exp
193 | | logical_or_exp '||' logical_and_exp
194 | ;
195 | logical_and_exp : inclusive_or_exp
196 | | logical_and_exp '&&' inclusive_or_exp
197 | ;
198 | inclusive_or_exp : exclusive_or_exp
199 | | inclusive_or_exp '|' exclusive_or_exp
200 | ;
201 | exclusive_or_exp : and_exp
202 | | exclusive_or_exp '^' and_exp
203 | ;
204 | and_exp : equality_exp
205 | | and_exp '&' equality_exp
206 | ;
207 | equality_exp : relational_exp
208 | | equality_exp '==' relational_exp
209 | | equality_exp '!=' relational_exp
210 | ;
211 | relational_exp : shift_expression
212 | | relational_exp '<' shift_expression
213 | | relational_exp '>' shift_expression
214 | | relational_exp '<=' shift_expression
215 | | relational_exp '>=' shift_expression
216 | ;
217 | shift_expression : additive_exp
218 | | shift_expression '<<' additive_exp
219 | | shift_expression '>>' additive_exp
220 | ;
221 | additive_exp : mult_exp
222 | | additive_exp '+' mult_exp
223 | | additive_exp '-' mult_exp
224 | ;
225 | mult_exp : cast_exp
226 | | mult_exp '*' cast_exp
227 | | mult_exp '/' cast_exp
228 | | mult_exp '%' cast_exp
229 | ;
230 | cast_exp : unary_exp
231 | | '(' type_name ')' cast_exp
232 | ;
233 | unary_exp : postfix_exp
234 | | '++' unary_exp
235 | | '--' unary_exp
236 | | unary_operator cast_exp
237 | | 'sizeof' unary_exp
238 | | 'sizeof' '(' type_name ')'
239 | ;
240 | unary_operator : '&' | '*' | '+' | '-' | '~' | '!'
241 | ;
242 | postfix_exp : primary_exp
243 | | postfix_exp '[' exp ']'
244 | | postfix_exp '(' argument_exp_list ')'
245 | | postfix_exp '(' ')'
246 | | postfix_exp '.' id
247 | | postfix_exp '->' id
248 | | postfix_exp '++'
249 | | postfix_exp '--'
250 | ;
251 | primary_exp : id
252 | | const
253 | | string
254 | | '(' exp ')'
255 | ;
256 | argument_exp_list : assignment_exp
257 | | argument_exp_list ',' assignment_exp
258 | ;
259 | const : int_const
260 | | char_const
261 | | float_const
262 | | enumeration_const
263 | ;
--------------------------------------------------------------------------------
/bnf.tmp:
--------------------------------------------------------------------------------
1 | primary_expression
2 | : IDENTIFIER
3 | | CONSTANT
4 | | STRING_LITERAL
5 | | '(' expression ')'
6 | ;
7 |
8 | postfix_expression
9 | : primary_expression
10 | | postfix_expression '[' expression ']'
11 | | postfix_expression '(' ')'
12 | | postfix_expression '(' argument_expression_list ')'
13 | | postfix_expression '.' IDENTIFIER
14 | | postfix_expression PTR_OP IDENTIFIER
15 | | postfix_expression INC_OP
16 | | postfix_expression DEC_OP
17 | ;
18 |
19 | argument_expression_list
20 | : assignment_expression
21 | | argument_expression_list ',' assignment_expression
22 | ;
23 |
24 | unary_expression
25 | : postfix_expression
26 | | INC_OP unary_expression
27 | | DEC_OP unary_expression
28 | | unary_operator cast_expression
29 | | SIZEOF unary_expression
30 | | SIZEOF '(' type_name ')'
31 | ;
32 |
33 | unary_operator
34 | : '&'
35 | | '*'
36 | | '+'
37 | | '-'
38 | | '~'
39 | | '!'
40 | ;
41 |
42 | cast_expression
43 | : unary_expression
44 | | '(' type_name ')' cast_expression
45 | ;
46 |
47 | multiplicative_expression
48 | : cast_expression
49 | | multiplicative_expression '*' cast_expression
50 | | multiplicative_expression '/' cast_expression
51 | | multiplicative_expression '%' cast_expression
52 | ;
53 |
54 | additive_expression
55 | : multiplicative_expression
56 | | additive_expression '+' multiplicative_expression
57 | | additive_expression '-' multiplicative_expression
58 | ;
59 |
60 | shift_expression
61 | : additive_expression
62 | | shift_expression LEFT_OP additive_expression
63 | | shift_expression RIGHT_OP additive_expression
64 | ;
65 |
66 | relational_expression
67 | : shift_expression
68 | | relational_expression '<' shift_expression
69 | | relational_expression '>' shift_expression
70 | | relational_expression LE_OP shift_expression
71 | | relational_expression GE_OP shift_expression
72 | ;
73 |
74 | equality_expression
75 | : relational_expression
76 | | equality_expression EQ_OP relational_expression
77 | | equality_expression NE_OP relational_expression
78 | ;
79 |
80 | and_expression
81 | : equality_expression
82 | | and_expression '&' equality_expression
83 | ;
84 |
85 | exclusive_or_expression
86 | : and_expression
87 | | exclusive_or_expression '^' and_expression
88 | ;
89 |
90 | inclusive_or_expression
91 | : exclusive_or_expression
92 | | inclusive_or_expression '|' exclusive_or_expression
93 | ;
94 |
95 | logical_and_expression
96 | : inclusive_or_expression
97 | | logical_and_expression AND_OP inclusive_or_expression
98 | ;
99 |
100 | logical_or_expression
101 | : logical_and_expression
102 | | logical_or_expression OR_OP logical_and_expression
103 | ;
104 |
105 | conditional_expression
106 | : logical_or_expression
107 | | logical_or_expression '?' expression ':' conditional_expression
108 | ;
109 |
110 | assignment_expression
111 | : conditional_expression
112 | | unary_expression assignment_operator assignment_expression
113 | ;
114 |
115 | assignment_operator
116 | : '='
117 | | MUL_ASSIGN
118 | | DIV_ASSIGN
119 | | MOD_ASSIGN
120 | | ADD_ASSIGN
121 | | SUB_ASSIGN
122 | | LEFT_ASSIGN
123 | | RIGHT_ASSIGN
124 | | AND_ASSIGN
125 | | XOR_ASSIGN
126 | | OR_ASSIGN
127 | ;
128 |
129 | expression
130 | : assignment_expression
131 | | expression ',' assignment_expression
132 | ;
133 |
134 | constant_expression
135 | : conditional_expression
136 | ;
137 |
138 | declaration
139 | : declaration_specifiers ';'
140 | | declaration_specifiers init_declarator_list ';'
141 | ;
142 |
143 | declaration_specifiers
144 | : storage_class_specifier
145 | | storage_class_specifier declaration_specifiers
146 | | type_specifier
147 | | type_specifier declaration_specifiers
148 | | type_qualifier
149 | | type_qualifier declaration_specifiers
150 | ;
151 |
152 | init_declarator_list
153 | : init_declarator
154 | | init_declarator_list ',' init_declarator
155 | ;
156 |
157 | init_declarator
158 | : declarator
159 | | declarator '=' initializer
160 | ;
161 |
162 | storage_class_specifier
163 | : TYPEDEF
164 | | EXTERN
165 | | STATIC
166 | | AUTO
167 | | REGISTER
168 | ;
169 |
170 | type_specifier
171 | : VOID
172 | | CHAR
173 | | SHORT
174 | | INT
175 | | LONG
176 | | FLOAT
177 | | DOUBLE
178 | | SIGNED
179 | | UNSIGNED
180 | | struct_or_union_specifier
181 | | enum_specifier
182 | | TYPE_NAME
183 | ;
184 |
185 | struct_or_union_specifier
186 | : struct_or_union IDENTIFIER '{' struct_declaration_list '}'
187 | | struct_or_union '{' struct_declaration_list '}'
188 | | struct_or_union IDENTIFIER
189 | ;
190 |
191 | struct_or_union
192 | : STRUCT
193 | | UNION
194 | ;
195 |
196 | struct_declaration_list
197 | : struct_declaration
198 | | struct_declaration_list struct_declaration
199 | ;
200 |
201 | struct_declaration
202 | : specifier_qualifier_list struct_declarator_list ';'
203 | ;
204 |
205 | specifier_qualifier_list
206 | : type_specifier specifier_qualifier_list
207 | | type_specifier
208 | | type_qualifier specifier_qualifier_list
209 | | type_qualifier
210 | ;
211 |
212 | struct_declarator_list
213 | : struct_declarator
214 | | struct_declarator_list ',' struct_declarator
215 | ;
216 |
217 | struct_declarator
218 | : declarator
219 | | ':' constant_expression
220 | | declarator ':' constant_expression
221 | ;
222 |
223 | enum_specifier
224 | : ENUM '{' enumerator_list '}'
225 | | ENUM IDENTIFIER '{' enumerator_list '}'
226 | | ENUM IDENTIFIER
227 | ;
228 |
229 | enumerator_list
230 | : enumerator
231 | | enumerator_list ',' enumerator
232 | ;
233 |
234 | enumerator
235 | : IDENTIFIER
236 | | IDENTIFIER '=' constant_expression
237 | ;
238 |
239 | type_qualifier
240 | : CONST
241 | | VOLATILE
242 | ;
243 |
244 | declarator
245 | : pointer direct_declarator
246 | | direct_declarator
247 | ;
248 |
249 | direct_declarator
250 | : IDENTIFIER
251 | | '(' declarator ')'
252 | | direct_declarator '[' constant_expression ']'
253 | | direct_declarator '[' ']'
254 | | direct_declarator '(' parameter_type_list ')'
255 | | direct_declarator '(' identifier_list ')'
256 | | direct_declarator '(' ')'
257 | ;
258 |
259 | pointer
260 | : '*'
261 | | '*' type_qualifier_list
262 | | '*' pointer
263 | | '*' type_qualifier_list pointer
264 | ;
265 |
266 | type_qualifier_list
267 | : type_qualifier
268 | | type_qualifier_list type_qualifier
269 | ;
270 |
271 |
272 | parameter_type_list
273 | : parameter_list
274 | | parameter_list ',' ELLIPSIS
275 | ;
276 |
277 | parameter_list
278 | : parameter_declaration
279 | | parameter_list ',' parameter_declaration
280 | ;
281 |
282 | parameter_declaration
283 | : declaration_specifiers declarator
284 | | declaration_specifiers abstract_declarator
285 | | declaration_specifiers
286 | ;
287 |
288 | identifier_list
289 | : IDENTIFIER
290 | | identifier_list ',' IDENTIFIER
291 | ;
292 |
293 | type_name
294 | : specifier_qualifier_list
295 | | specifier_qualifier_list abstract_declarator
296 | ;
297 |
298 | abstract_declarator
299 | : pointer
300 | | direct_abstract_declarator
301 | | pointer direct_abstract_declarator
302 | ;
303 |
304 | direct_abstract_declarator
305 | : '(' abstract_declarator ')'
306 | | '[' ']'
307 | | '[' constant_expression ']'
308 | | direct_abstract_declarator '[' ']'
309 | | direct_abstract_declarator '[' constant_expression ']'
310 | | '(' ')'
311 | | '(' parameter_type_list ')'
312 | | direct_abstract_declarator '(' ')'
313 | | direct_abstract_declarator '(' parameter_type_list ')'
314 | ;
315 |
316 | initializer
317 | : assignment_expression
318 | | '{' initializer_list '}'
319 | | '{' initializer_list ',' '}'
320 | ;
321 |
322 | initializer_list
323 | : initializer
324 | | initializer_list ',' initializer
325 | ;
326 |
327 | statement
328 | : labeled_statement
329 | | compound_statement
330 | | expression_statement
331 | | selection_statement
332 | | iteration_statement
333 | | jump_statement
334 | ;
335 |
336 | labeled_statement
337 | : IDENTIFIER ':' statement
338 | | CASE constant_expression ':' statement
339 | | DEFAULT ':' statement
340 | ;
341 |
342 | compound_statement
343 | : '{' '}'
344 | | '{' statement_list '}'
345 | | '{' declaration_list '}'
346 | | '{' declaration_list statement_list '}'
347 | ;
348 |
349 | declaration_list
350 | : declaration
351 | | declaration_list declaration
352 | ;
353 |
354 | statement_list
355 | : statement
356 | | statement_list statement
357 | ;
358 |
359 | expression_statement
360 | : ';'
361 | | expression ';'
362 | ;
363 |
364 | selection_statement
365 | : IF '(' expression ')' statement
366 | | IF '(' expression ')' statement ELSE statement
367 | | SWITCH '(' expression ')' statement
368 | ;
369 |
370 | iteration_statement
371 | : WHILE '(' expression ')' statement
372 | | DO statement WHILE '(' expression ')' ';'
373 | | FOR '(' expression_statement expression_statement ')' statement
374 | | FOR '(' expression_statement expression_statement expression ')' statement
375 | ;
376 |
377 | jump_statement
378 | : GOTO IDENTIFIER ';'
379 | | CONTINUE ';'
380 | | BREAK ';'
381 | | RETURN ';'
382 | | RETURN expression ';'
383 | ;
384 |
385 | translation_unit
386 | : external_declaration
387 | | translation_unit external_declaration
388 | ;
389 |
390 | external_declaration
391 | : function_definition
392 | | declaration
393 | ;
394 |
395 | function_definition
396 | : declaration_specifiers declarator declaration_list compound_statement
397 | | declaration_specifiers declarator compound_statement
398 | | declarator declaration_list compound_statement
399 | | declarator compound_statement
--------------------------------------------------------------------------------
/generation/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
--------------------------------------------------------------------------------
/generation/data.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #coding=utf-8
3 | from public.ZCCglobal import *
4 |
5 | class Data(object):
6 | def __init__(self,name,offset,type):
7 | """
8 | :type name:str
9 | :type offset:bool
10 | :type type:CType
11 | """
12 | self.name=name
13 | self.offset=offset
14 | self.type=type
15 |
--------------------------------------------------------------------------------
/generation/generation.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # produce machine code
3 | import sys
4 | sys.path.append('c:\\zcc\\zcc')
5 | from public.ZCCglobal import *
6 | from utility import utility
7 | from copy import deepcopy
8 | from data import Data
9 |
10 |
11 | class generator:
12 |
13 | def __init__(self):
14 | # asm output list
15 | self.asm = []
16 | self.tools = utility(self)
17 | self.exp2=[2**x for x in range(32)]
18 | # print(self.exp2)
19 | self.expression_handler = {
20 | 'primary_expression': self.gen_primary_expression,
21 | 'postfix_expression': self.gen_postfix_expression,
22 | 'unary_expression': self.gen_unary_expression,
23 | 'cast_expression': self.gen_cast_expression,
24 | 'multiplicative_expression': self.gen_multiplicative_expression,
25 | 'additive_expression': self.gen_additive_expression,
26 | 'shift_expression': self.gen_shift_expression,
27 | 'relational_expression': self.gen_relational_expression,
28 | 'equality_expression': self.gen_equality_expression,
29 | 'and_expression': self.gen_and_expression,
30 | 'exclusive_or_expression': self.gen_exclusive_or_expression,
31 | 'inclusive_or_expression': self.gen_inclusive_or_expression,
32 | 'logical_and_expression': self.gen_logical_and_expression,
33 | 'logical_or_expression': self.gen_logical_or_expression,
34 | 'conditional_expression': self.gen_conditional_expression,
35 | 'assignment_expression': self.gen_assignment_expression,
36 | 'expression': self.gen_expression}
37 |
38 | def generate(self):
39 | self.tools.globalInitialize()
40 | for funcName in global_context.local:
41 | value = global_context.local[funcName]
42 | if(value.type == 'function'):
43 | if global_context.local[
44 | funcName].compound_statement is not None:
45 | self.tools.newFunc(funcName)
46 | self.gen_compound_statement(
47 | global_context.local[funcName].compound_statement,global_context.local[funcName].compound_statement.context)
48 | self.tools.endFunc()
49 | self.tools.end()
50 |
51 | def output(self, fileName):
52 | with open(fileName, 'w') as out:
53 | for line in self.asm:
54 | out.write(line)
55 |
56 | def gen_statement_list(self, node,context):
57 | """
58 | :type node:TreeNode
59 | :type context:Context
60 | """
61 | for subnode in node[1:]:
62 | if isinstance(subnode, TreeNode):
63 | if subnode[0] == "statement":
64 | self.gen_statement(subnode,context)
65 |
66 |
67 | def gen_statement(self, node,context):
68 | """
69 | :type node:TreeNode
70 | :type context:Context
71 | """
72 | for subnode in node[1:]:
73 | if isinstance(subnode, TreeNode):
74 | if subnode[0] == "expression_statement":
75 | self.gen_expression_statement(subnode,context)
76 | elif subnode[0] == "compound_statement":
77 | self.tools.newScope(subnode.context)
78 | self.gen_compound_statement(subnode,subnode.context)
79 | self.tools.endScope()
80 | elif subnode[0] == "selection_statement":
81 | self.gen_selection_statement(subnode,context)
82 | elif subnode[0]=="jump_statement":
83 | self.gen_jump_statement(subnode,context)
84 | elif subnode[0]=="iteration_statement":
85 | self.gen_iteration_statement(subnode,context)
86 |
87 | def gen_expression_statement(self, node,context):
88 | """
89 | :type node:TreeNode
90 | :type context:Context
91 | :rtype :str
92 | """
93 | if isinstance(node[1],TreeNode):
94 | ret=self.expression_handler[node[1][0]](node[1],context)
95 | else:
96 | ret=self.tools.getTrue()
97 | return ret
98 |
99 | def gen_compound_statement(self, node,context):
100 | """
101 | :type node:TreeNode
102 | :type context:Context
103 | """
104 | for subnode in node[1:]:
105 | if isinstance(subnode, TreeNode):
106 | if subnode[0] == "statement_list":
107 | self.gen_statement_list(subnode,context)
108 |
109 | def gen_selection_statement(self, node,context):
110 | """
111 | :type node:TreeNode
112 | :type context:Context
113 | """
114 | # node[3]:expression
115 | # node[5]:statement
116 | # node[7]:statement
117 | if node[1] == "if":
118 | ret=self.expression_handler[node[3][0]](node[3],context)
119 | if len(node) == 6:
120 | label1=self.tools.allocateLabel()
121 | self.tools.cmp(ret,self.tools.getFalse())
122 | self.tools.je(label1)
123 | self.gen_statement(node[5],context)
124 | self.tools.markLabel(label1)
125 | elif len(node) == 8:
126 | label1=self.tools.allocateLabel()
127 | label2=self.tools.allocateLabel()
128 | self.tools.cmp(ret,self.tools.getFalse())
129 | self.tools.je(label1)
130 | self.gen_statement(node[5],context)
131 | self.tools.jmp(label2)
132 | self.tools.markLabel(label1)
133 | self.gen_statement(node[7],context)
134 | self.tools.markLabel(label2)
135 |
136 | def gen_jump_statement(self,node,context):
137 | """
138 | :type node:TreeNode
139 | :type context:Context
140 | """
141 | if isinstance(node[2],TreeNode):
142 | ret=self.expression_handler[node[2][0]](node[2],context)
143 | self.tools.mov(self.tools.getEax(),ret)
144 | self.tools.ret()
145 |
146 | def gen_iteration_statement(self,node,context):
147 | """
148 | :type node:TreeNode
149 | :type context:Context
150 | """
151 | if node[1]=="for":
152 | if isinstance(node[5],TreeNode):
153 | label1=self.tools.allocateLabel()
154 | label2=self.tools.allocateLabel()
155 | label3=self.tools.allocateLabel()
156 | self.gen_expression_statement(node[3],context)
157 | self.tools.jmp(label2)
158 | self.tools.markLabel(label1)
159 | self.expression_handler[node[5][0]](node[5],context)
160 | self.tools.markLabel(label2)
161 | ret=self.gen_expression_statement(node[4],context)
162 | self.tools.cmp(ret,self.tools.getFalse())
163 | self.tools.je(label3)
164 | self.gen_statement(node[7],context)
165 | self.tools.jmp(label1)
166 | self.tools.markLabel(label3)
167 | else:
168 | label1=self.tools.allocateLabel()
169 | label2=self.tools.allocateLabel()
170 | self.gen_expression_statement(node[3],context)
171 | self.tools.markLabel(label1)
172 | ret=self.gen_expression_statement(node[4],context)
173 | self.tools.cmp(ret,self.tools.getFalse())
174 | self.gen_statement(node[6],context)
175 | self.tools.jmp(label1)
176 | self.tools.markLabel(label2)
177 | elif node[1]=="while":
178 | label1=self.tools.allocateLabel()
179 | label2=self.tools.allocateLabel()
180 | self.tools.markLabel(label1)
181 | ret=self.expression_handler[node[3][0]](node[3],context)
182 | self.tools.cmp(ret,self.tools.getFalse())
183 | self.tools.je(label2)
184 | self.gen_statement(node[5],context)
185 | self.tools.jmp(label1)
186 | self.tools.markLabel(label2)
187 |
188 |
189 |
190 | def gen_additive_expression(self, node,context):
191 | """
192 | :type node:TreeNode
193 | :type context:Context
194 | :rtype: str
195 | """
196 | op1=self.expression_handler[node[1][0]](node[1],context)
197 | tmp=self.tools.allocateNewReg(op1)
198 | self.tools.lock(tmp)
199 | self.tools.mov(tmp,op1)
200 | op2=self.expression_handler[node[3][0]](node[3],context)
201 | if node[2]=="+":
202 | ret=self.tools.add(tmp,op2)
203 | else:
204 | ret=self.tools.sub(tmp,op2)
205 | self.tools.unLock(tmp)
206 | return ret
207 |
208 | def gen_primary_expression(self,node,context):
209 | """
210 | :type node:TreeNode
211 | :type context:Context
212 | :rtype: Data
213 | """
214 | if isinstance(node[1],TreeNode):
215 | if node[1][0]=="IDENTIFIER":
216 | name=node[1][1]
217 | offset=False
218 | type=deepcopy(context.get_type_by_id(name))
219 | return Data(name,offset,type)
220 | else:
221 | if node[1][0]=="INTEGER":
222 | return int(node[1][1])
223 | elif node[1][0]=="DOUBLE":
224 | return float(node[1][1])
225 | elif node[1][0]=="STRING":
226 | return str(node[1][1])
227 |
228 |
229 | def gen_postfix_expression(self,node,context):
230 | """
231 | :type node:TreeNode
232 | :type context:Context
233 | :rtype: str
234 | """
235 | operand=self.expression_handler[node[1][0]](node[1],context)
236 | if node[2]=="[":
237 | if operand.offset==False:
238 | self.tools.mov(self.tools.getEax(),0)
239 | index=self.expression_handler[node[3][0]](node[3],context)
240 | self.tools.mul(index,operand.type.member_type.Size())
241 | operand.offset=True
242 | operand.type=operand.type.member_type
243 | return operand
244 | elif node[2]=="(":
245 | if isinstance(node[3],TreeNode):
246 | argument_expression_list=node[3]
247 | real_arg_list=[]
248 | for argument_expression in argument_expression_list[1:]:
249 | if isinstance(argument_expression,TreeNode):
250 | argument=self.expression_handler[argument_expression[0]](argument_expression,context)
251 | if argument==self.tools.getEax():
252 | tmp=self.tools.allocateNewReg(self.tools.getEax())
253 | self.tools.lock(tmp)
254 | self.tools.mov(tmp,self.tools.getEax())
255 | real_arg_list.append([tmp,0])
256 | elif isinstance(argument,Data) and argument.offset:
257 | tmp=self.tools.allocateNewReg(self.tools.getEax())
258 | self.tools.lock(tmp)
259 | self.tools.mov(tmp,self.tools.getEax())
260 | real_arg_list.append([argument,1,tmp])
261 | else:
262 | real_arg_list.append([argument,2])
263 | for list in real_arg_list:
264 | if list[1]==1:
265 | self.tools.mov(self.tools.getEax(),list[2])
266 | self.tools.passPara(list[0])
267 | if list[1]==0:
268 | self.tools.unLock(list[0])
269 | if list[1]==1:
270 | self.tools.unLock(list[2])
271 | ret=self.tools.call(operand)
272 | return ret
273 | elif node[2]==".":
274 | if operand.offset==False:
275 | self.tools.mov(self.tools.getEax(),0)
276 | member=node[3][1]
277 | self.tools.add(self.tools.getEax(),operand.type.offset[member])
278 | operand.type=operand.type.members[member]
279 | operand.offset=True
280 | return operand
281 | elif node[2]=="->":
282 | self.tools.mov(self.tools.getEax(),operand)
283 | member=node[3][1]
284 | self.tools.add(self.tools.getEax(),operand.type.offset[member])
285 | operand.name=self.tools.getNull()
286 | operand.type=operand.type.members[member]
287 | operand.offset=True
288 | return operand
289 |
290 |
291 | def gen_unary_expression(self,node,context):
292 | """
293 | :type node:TreeNode
294 | :type context:Context
295 | :rtype: str
296 | """
297 | operand=self.expression_handler[node[2][0]](node[2],context)
298 | if isinstance(node[1],TreeNode):
299 | operator=self.gen_unary_operator(node[1],context)
300 | if operator=="&":
301 | if isinstance(operand,Data):
302 | ret=self.tools.lea(operand)
303 | operand.type.is_const.append(False)
304 | return ret
305 | elif operator=="*":
306 | if isinstance(operand,Data):
307 | self.tools.mov(self.tools.getEax(),operand)
308 | operand.name=self.tools.getNull()
309 | operand.offset=True
310 | operand.type.is_const.pop()
311 | return operand
312 | else:
313 | if node[1]=="++":
314 | ret=self.tools.add(operand,1)
315 | self.tools.mov(operand,ret)
316 | return operand
317 | elif node[1]=="--":
318 | self.tools.sub(operand,1)
319 | return operand
320 |
321 |
322 | def gen_cast_expression(self,node,context):
323 | """
324 | :type node:TreeNode
325 | :type context:Context
326 | :rtype: str
327 | """
328 | pass
329 |
330 | def gen_multiplicative_expression(self,node,context):
331 | """
332 | :type node:TreeNode
333 | :type context:Context
334 | :rtype: str
335 | """
336 | op1=self.expression_handler[node[1][0]](node[1],context)
337 | tmp=self.tools.allocateNewReg(op1)
338 | self.tools.lock(tmp)
339 | self.tools.mov(tmp,op1)
340 | op2=self.expression_handler[node[3][0]](node[3],context)
341 | if node[2]=="*":
342 | if isinstance(op2,str):
343 | try:
344 | num=int(op2)
345 | if num in self.exp2:
346 | ret=self.tools.sal(tmp,str(self.exp2.index(num)))
347 | else:
348 | ret=self.tools.mul(tmp,op2)
349 | except Exception:
350 | ret=self.tools.mul(tmp,op2)
351 | else:
352 | ret=self.tools.mul(tmp,op2)
353 | elif node[2]=="/":
354 | if isinstance(op2,str):
355 | try:
356 | num=int(op2)
357 | if num in self.exp2:
358 | ret=self.tools.sar(tmp,str(self.exp2.index(num)))
359 | else:
360 | ret=self.tools.div(tmp,op2)
361 | except Exception:
362 | ret=self.tools.div(tmp,op2)
363 | else:
364 | ret=self.tools.div(tmp,op2)
365 | self.tools.unLock(tmp)
366 | return ret
367 |
368 |
369 | def gen_shift_expression(self,node,context):
370 | """
371 | :type node:TreeNode
372 | :type context:Context
373 | :rtype: str
374 | """
375 | pass
376 |
377 | def gen_relational_expression(self,node,context):
378 | """
379 | :type node:TreeNode
380 | :type context:Context
381 | :rtype: str
382 | """
383 | label1=self.tools.allocateLabel()
384 | label2=self.tools.allocateLabel()
385 | op1=self.expression_handler[node[1][0]](node[1],context)
386 | tmp=self.tools.allocateNewReg(op1)
387 | self.tools.lock(tmp)
388 | self.tools.mov(tmp,op1)
389 | op2=self.expression_handler[node[3][0]](node[3],context)
390 | self.tools.cmp(tmp,op2)
391 | if node[2]=="<":
392 | self.tools.jl(label1)
393 | elif node[2]=="<=":
394 | self.tools.jle(label1)
395 | elif node[2]==">":
396 | self.tools.jg(label1)
397 | elif node[2]==">=":
398 | self.tools.jge(label1)
399 | self.tools.mov(self.tools.getEax(),0)
400 | self.tools.jmp(label2)
401 | self.tools.markLabel(label1)
402 | self.tools.mov(self.tools.getEax(),1)
403 | self.tools.markLabel()
404 | return self.tools.getEax()
405 |
406 | def gen_equality_expression(self,node,context):
407 | """
408 | :type node:TreeNode
409 | :type context:Context
410 | :rtype: str
411 | """
412 | label1=self.tools.allocateLabel()
413 | label2=self.tools.allocateLabel()
414 | op1=self.expression_handler[node[1][0]](node[1],context)
415 | tmp=self.tools.allocateNewReg(op1)
416 | self.tools.lock(tmp)
417 | self.tools.mov(tmp,op1)
418 | op2=self.expression_handler[node[3][0]](node[3],context)
419 | self.tools.cmp(tmp,op2)
420 | if node[2]=="==":
421 | self.tools.je(label1)
422 | elif node[2]=="!=":
423 | self.tools.jne(label1)
424 | self.tools.mov(self.tools.getEax(),0)
425 | self.tools.jmp(label2)
426 | self.tools.markLabel(label1)
427 | self.tools.mov(self.tools.getEax(),1)
428 | self.tools.markLabel()
429 | return self.tools.getEax()
430 |
431 |
432 | def gen_and_expression(self,node,context):
433 | """
434 | :type node:TreeNode
435 | :type context:Context
436 | :rtype: str
437 | """
438 | op1=self.expression_handler[node[1][0]](node[1],context)
439 | tmp=self.tools.allocateNewReg(op1)
440 | self.tools.lock(tmp)
441 | self.tools.mov(tmp,op1)
442 | op2=self.expression_handler[node[3][0]](node[3],context)
443 | ret=self.tools.And(tmp,op2)
444 | self.tools.unLock(tmp)
445 | return ret
446 |
447 | def gen_exclusive_or_expression(self,node,context):
448 | """
449 | :type node:TreeNode
450 | :type context:Context
451 | :rtype: str
452 | """
453 | pass
454 | # op1=self.expression_handler[node[1][0]](node[1],context)
455 | # tmp=self.tools.allocateNewReg()
456 | # self.tools.lock(tmp)
457 | # self.tools.mov(tmp,op1)
458 | # op2=self.expression_handler[node[3][0]](node[3],context)
459 | # ret=self.tools.xor(tmp,op2)
460 | # self.tools.unLock(tmp)
461 | # return ret
462 |
463 | def gen_inclusive_or_expression(self,node,context):
464 | """
465 | :type node:TreeNode
466 | :type context:Context
467 | :rtype: str
468 | """
469 | op1=self.expression_handler[node[1][0]](node[1],context)
470 | tmp=self.tools.allocateNewReg(op1)
471 | self.tools.lock(tmp)
472 | self.tools.mov(tmp,op1)
473 | op2=self.expression_handler[node[3][0]](node[3],context)
474 | ret=self.tools.Or(tmp,op2)
475 | self.tools.unLock(tmp)
476 | return ret
477 |
478 | def gen_logical_and_expression(self,node,context):
479 | """
480 | :type node:TreeNode
481 | :type context:Context
482 | :rtype: str
483 | """
484 | label1=self.tools.allocateLabel()
485 | label2=self.tools.allocateLabel()
486 | op1=self.expression_handler[node[1][0]](node[1],context)
487 | self.tools.cmp(op1,self.tools.getFalse())
488 | self.tools.je(label1)
489 | op2=self.expression_handler[node[3][0]](node[3],context)
490 | self.tools.cmp(op2,self.tools.getFalse())
491 | self.tools.je(label1)
492 | self.tools.mov(self.tools.getEax(),1)
493 | self.tools.jmp(label2)
494 | self.tools.markLabel(label1)
495 | self.tools.mov(self.tools.getEax(),0)
496 | self.tools.markLabel(label2)
497 | return self.tools.getEax()
498 |
499 | def gen_logical_or_expression(self,node,context):
500 | """
501 | :type node:TreeNode
502 | :type context:Context
503 | :rtype: str
504 | """
505 | label1=self.tools.allocateLabel()
506 | label2=self.tools.allocateLabel()
507 | op1=self.expression_handler[node[1][0]](node[1],context)
508 | self.tools.cmp(op1,self.tools.getFalse())
509 | self.tools.jne(label1)
510 | op2=self.expression_handler[node[3][0]](node[3],context)
511 | self.tools.cmp(op2,self.tools.getFalse())
512 | self.tools.jne(label1)
513 | self.tools.mov(self.tools.getEax(),0)
514 | self.tools.jmp(label2)
515 | self.tools.markLabel(label1)
516 | self.tools.mov(self.tools.getEax(),1)
517 | self.tools.markLabel(label2)
518 | return self.tools.getEax()
519 |
520 |
521 | def gen_conditional_expression(self,node,context):
522 | """
523 | :type node:TreeNode
524 | :type context:Context
525 | :rtype: str
526 | """
527 | pass
528 |
529 | def gen_assignment_expression(self,node,context):
530 | """
531 | :type node:TreeNode
532 | :type context:Context
533 | :rtype: str
534 | """
535 | operator=self.gen_assignment_operator(node[2],context)
536 | right=self.expression_handler[node[3][0]](node[3],context)
537 | tmp=self.tools.allocateNewReg(right)
538 | self.tools.lock(tmp)
539 | self.tools.mov(tmp,right)
540 | left=self.expression_handler[node[1][0]](node[1],context)
541 | # print(left)
542 | if operator=="=":
543 | self.tools.mov(left,tmp)
544 | self.tools.unLock(tmp)
545 | return left
546 |
547 | def gen_expression(self,node,context):
548 | """
549 | :type node:TreeNode
550 | :type context:Context
551 | :rtype: str
552 | """
553 | pass
554 |
555 | def gen_assignment_operator(self,node,context):
556 | """
557 | :type node:TreeNode
558 | :type context:Context
559 | :rtype: str
560 | """
561 | return node[1]
562 |
563 | def gen_unary_operator(self,node,context):
564 | """
565 | :type node:TreeNode
566 | :type context:Context
567 | :rtype: str
568 | """
569 | return node[1]
570 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | from yyparse.ZCCparser import parser, printAST
3 | from yyparse.ZCClex import lexer as ZCClexer
4 | from symbol.symtab import c_types
5 | from public.ZCCglobal import global_context, FuncType, error, Context
6 | from generation.generation import generator
7 | import os
8 | import sys
9 |
10 |
11 | def preprocess(source):
12 | stream = os.popen("gcc -E " + source)
13 | return stream.read()
14 |
15 |
16 | if __name__ == '__main__':
17 | if len(sys.argv) < 3:
18 | print "Usage: python main.py \nEnvironment: Python2.7, Linux."
19 | exit(1)
20 | File = sys.argv[1]
21 | codes = preprocess(os.path.abspath("test/"+File))
22 | pt = parser.parse(codes, lexer=ZCClexer)
23 | # print "errorCounter=", parser.errorCounter
24 | printAST(pt)
25 | # with open("test.s","w") as output:
26 | # print global_context
27 | # print error
28 | # printAST(global_context.local['main'].compound_statement.ast)
29 | if(not error[0]):
30 | gen = generator()
31 | gen.generate()
32 | gen.output(sys.argv[2])
33 |
--------------------------------------------------------------------------------
/public/ZCCglobal.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 |
4 |
5 | class CType(object):
6 | def __init__(self, type_name, size=0, **kwargs):
7 | """
8 | :type type_name:str
9 | :type size: int
10 | :type kwargs: dict
11 | :return: None
12 | """
13 | # "int","char","double","float","long","short","void",
14 | # "struct","union","enum","function", "array"
15 | # 'Incomplete'
16 | self.type = type_name # type: str
17 | # sizeof
18 | self.size = size # type: int
19 | self.is_const = [False] # type: list[bool]
20 | self.storage_class = None # type: str
21 | # "static", "extern"
22 |
23 | for key in kwargs:
24 | self.__setattr__(key, kwargs[key])
25 |
26 | def pointer_count(self):
27 | """
28 | :return: int
29 | """
30 | return len(self.is_const) - 1
31 |
32 | def Size(self):
33 | """
34 | Must get size by this function!!!
35 | :rtype: int
36 | """
37 | if self.pointer_count() == 0:
38 | return self.size
39 | else:
40 | return 4
41 |
42 | def __repr__(self):
43 | return self.__add_star__(self.type)
44 |
45 | def __add_star__(self, base_type_repr):
46 | rval = base_type_repr
47 | if self.storage_class:
48 | rval = self.storage_class + " " + rval
49 | for i in xrange(0, len(self.is_const)):
50 | if i > 0:
51 | rval += " *"
52 | if self.is_const[i]:
53 | rval += " const"
54 | return rval
55 |
56 | def __eq__(self, other):
57 | """
58 | :type self: CType
59 | :type other: CType
60 | :rtype: bool
61 | """
62 | if self.pointer_count() != other.pointer_count():
63 | return False
64 | if self.type != other.type:
65 | return False
66 | return True
67 |
68 | def is_integer(self):
69 | """
70 | :rtype: bool
71 | """
72 | return self.pointer_count() > 0 or self.type in \
73 | ['char', 'short', 'int', 'long', 'long long',
74 | 'signed char', 'signed short', 'signed int', 'signed long',
75 | 'signed long long',
76 | 'unsigned char', 'unsigned short', 'unsigned int', 'unsigned long',
77 | 'unsigned long long']
78 |
79 | def is_number(self):
80 | """
81 | :rtype: bool
82 | """
83 | return self.pointer_count() > 0 or self.type in \
84 | ['char', 'short', 'int', 'long', 'long long',
85 | 'signed char', 'signed short', 'signed int', 'signed long',
86 | 'signed long long',
87 | 'unsigned char', 'unsigned short', 'unsigned int', 'unsigned long',
88 | 'unsigned long long',
89 | 'float', 'double']
90 |
91 |
92 | class StructType(CType):
93 | def __init__(self, members=list()):
94 | """
95 | :type members: list[(str,CType)]
96 | :return:
97 | """
98 | CType.__init__(self, 'struct')
99 | self.members = {} # type: dict[str,CType]
100 | self.offset = {}
101 | self.size = 0
102 | for member in members:
103 | self.members[member[0]] = member[1]
104 | self.offset[member[0]] = self.size
105 | self.size += member[1].size
106 | self.size = ((self.size - 1) / 4 + 1) * 4
107 |
108 | def __repr__(self):
109 | return self.__add_star__('struct ' + repr(self.members))
110 |
111 | def __eq__(self, other):
112 | return CType.__eq__(self, other) and has_same_members(self, other)
113 |
114 |
115 | class UnionType(CType):
116 | def __init__(self, members=list()):
117 | """
118 | :type members: list[(str,CType)]
119 | :return:
120 | """
121 | CType.__init__(self, 'union')
122 | self.members = {} # type: dict[str,CType]
123 | self.size = 0 # type: int
124 | for member in members:
125 | self.members[member[0]] = member[1]
126 | if member[1].size > self.size:
127 | self.size = member[1].size
128 |
129 | def __repr__(self):
130 |
131 | return self.__add_star__('union ' + repr(self.members))
132 |
133 | def __eq__(self, other):
134 | return CType.__eq__(self, other) and has_same_members(self, other)
135 |
136 |
137 | class EnumType(CType):
138 | def __init__(self, values):
139 | """
140 | :type values: dict[(str,int)]
141 | :return:
142 | """
143 | CType.__init__(self, 'enum')
144 | self.values = values
145 | self.size = 4
146 |
147 | def __repr__(self):
148 | return self.__add_star__('enum ' + repr(self.values))
149 |
150 | def __eq__(self, other):
151 | raise Exception('Not support enum')
152 |
153 |
154 | class FuncType(CType):
155 | def __init__(self, return_type,
156 | parameter_list=list(),
157 | parameter_list_is_extendable=False,
158 | compound_statement=None):
159 | """
160 | :type return_type: CType
161 | :type parameter_list: list[(str,CType)]
162 | :type parameter_list_is_extendable: bool
163 | :type compound_statement: TreeNode
164 | """
165 | CType.__init__(self, 'function')
166 | self.return_type = return_type # type: CType
167 | self.storage_class = return_type.storage_class
168 | return_type.storage_class = None
169 | self.parameter_list = parameter_list # type: list[(str,CType)]
170 | self.parameter_list_is_extendable = \
171 | parameter_list_is_extendable # type: bool
172 | self.compound_statement = compound_statement # type: TreeNode
173 |
174 | def __repr__(self):
175 | rval = repr(self.return_type) + " function("
176 | for parameter in self.parameter_list:
177 | rval += repr(parameter[1]) + ' ' + parameter[0] + ','
178 | if self.parameter_list_is_extendable:
179 | rval += '...'
180 | rval += ')'
181 | if self.compound_statement is not None:
182 | rval += repr(self.compound_statement.context)
183 | return self.__add_star__(rval)
184 |
185 | def __eq__(self, other):
186 | """
187 | :type other: FuncType
188 | :rtype: bool
189 | """
190 | if self.type != other.type:
191 | return False
192 | if self.pointer_count() + other.pointer_count() > 1:
193 | if self.pointer_count() != other.pointer_count():
194 | return False
195 | if not self.return_type == other.return_type:
196 | return False
197 | if not self.parameter_list_is_extendable == other.parameter_list_is_extendable:
198 | return False
199 | if not len(self.parameter_list) == len(other.parameter_list):
200 | return False
201 | for i in xrange(len(self.parameter_list)):
202 | if not self.parameter_list[i][1] == other.parameter_list[i][1]:
203 | return False
204 | return True
205 |
206 |
207 | class ArrayType(CType):
208 | def __init__(self, c_type, length):
209 | """
210 | :type c_type: CType
211 | :type length: int
212 | :return:
213 | """
214 | CType.__init__(self, 'array', size=length * c_type.Size())
215 | self.length = length
216 | self.member_type = c_type
217 | self.storage_class = c_type.storage_class
218 | c_type.storage_class = None
219 |
220 | def __repr__(self):
221 | return self.__add_star__(repr(self.member_type) + "[%d]" % self.length)
222 |
223 | def __eq__(self, other):
224 | """
225 | :type other: ArrayType
226 | :rtype: bool
227 | """
228 | if not CType.__eq__(self, other):
229 | return False
230 | return self.length == other.length and \
231 | self.member_type == other.member_type
232 |
233 |
234 | class LiteralType(CType):
235 | def __init__(self, val):
236 | """
237 | :type c_type: CType
238 | :return:
239 | """
240 | CType.__init__(self, '')
241 | self.val = val
242 | if isinstance(val, str):
243 | self.type = 'char'
244 | self.size = 1
245 | self.is_const = [True, False]
246 | elif isinstance(val, int):
247 | self.type = 'int'
248 | self.size = 4
249 | self.is_const = [True]
250 | elif isinstance(val, float):
251 | self.type = 'double'
252 | self.size = 8
253 | self.is_const = [True]
254 |
255 |
256 | class Context:
257 | outer_context = None # type: Context
258 | func_type = None # type: FuncType
259 | local = None # type: dict[str,CType]
260 |
261 | def __init__(self, outer_context=None, func_type=None):
262 | self.outer_context = outer_context # type: Context
263 | self.func_type = func_type # type: FuncType
264 | self.local = {}
265 |
266 | def __repr__(self):
267 | return " local: " + repr(self.local)
268 |
269 | def get_return_type(self):
270 | """
271 | :rtype: CType
272 | """
273 | if self.func_type is None:
274 | if self.outer_context is None:
275 | return # global_context has no return type
276 | else:
277 | return self.outer_context.get_return_type()
278 | else:
279 | return self.func_type.return_type
280 |
281 | def get_type_by_id(self, identifier):
282 | """
283 | :type identifier: str
284 | :rtype: CType
285 | """
286 | if identifier in self.local:
287 | return self.local[identifier]
288 | if self.func_type is not None:
289 | for parameter in self.func_type.parameter_list:
290 | if identifier == parameter[0]:
291 | return parameter[1]
292 | if self.outer_context is not None:
293 | return self.outer_context.get_type_by_id(identifier)
294 | return None # if not find
295 |
296 | def add_literal(self, name, literal):
297 | """
298 | :type name: str
299 | :type literal: LiteralType
300 | """
301 | context = self
302 | while context.outer_context is not None:
303 | context = context.outer_context
304 | context.literal[name] = literal
305 |
306 |
307 | class GlobalContext(Context):
308 | def __init__(self):
309 | Context.__init__(self)
310 | self.literal = {} # type: dict[str,LiteralType]
311 |
312 | def __repr__(self):
313 | return 'literals:' + repr(self.literal) + '\n' + Context.__repr__(self)
314 |
315 |
316 | global_context = GlobalContext()
317 | error = [False]
318 |
319 |
320 | class TreeNode(list):
321 | def __init__(self, lineno=-1):
322 | """
323 | :return:
324 | """
325 | self.lineno = lineno # type: int
326 | # self.ast = self # type: # list[list]
327 |
328 |
329 | # class LeafNode(str):
330 | # def __init__(self, lineno=-1):
331 | # """
332 | # :return:
333 | # """
334 | # self.lineno = lineno # type: int
335 |
336 |
337 | # self.ast = ast # type: list[list]
338 | # for key in kwargs:
339 | # self.__setattr__(key, kwargs[key])
340 | #
341 | # def __getitem__(self, item):
342 | # return self.ast.__getitem__(item)
343 | #
344 | # def __setitem__(self, key, value):
345 | # self.ast.__setitem__(key, value)
346 | #
347 | # def __len__(self):
348 | # return self.ast.__len__()
349 |
350 |
351 | def has_same_members(struct_type1, struct_type2):
352 | """
353 | :type struct_type1: StructType
354 | :type struct_type2: StructType
355 | :rtype: bool
356 | """
357 | for member in struct_type1.members:
358 | if member not in struct_type2.members \
359 | or not struct_type1.members[member] == struct_type2.members[member]:
360 | return False
361 |
362 | for member in struct_type2.members:
363 | if member not in struct_type1.members \
364 | or not struct_type2.members[member] == \
365 | struct_type1.members[member]:
366 | return False
367 | return True
368 |
--------------------------------------------------------------------------------
/public/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hlFu/ZCC/811bd987c9d7a3754f7bc9c9d986359ee3df7327/public/__init__.py
--------------------------------------------------------------------------------
/public/const.py:
--------------------------------------------------------------------------------
1 | #constant value put here
2 |
3 | UNDEFINED = -100
4 |
5 | class NodeKind:
6 | STMT = 1
7 | EXP = 2
8 |
9 | class StmtKind:
10 | IF = 1
11 | REPEAT = 2
12 | ASSIGN = 3
13 |
14 | class ExpKind:
15 | OP = 1
16 | CONST = 2
17 | ID = 3
18 |
19 | class ExpType:
20 | VOID = 1
21 | INTEGER = 2
22 | FLOAT = 3
23 | BOOLEAN = 4
--------------------------------------------------------------------------------
/symbol/.gitignore:
--------------------------------------------------------------------------------
1 | *.tmp
--------------------------------------------------------------------------------
/symbol/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hlFu/ZCC/811bd987c9d7a3754f7bc9c9d986359ee3df7327/symbol/__init__.py
--------------------------------------------------------------------------------
/test/a.s:
--------------------------------------------------------------------------------
1 | .file "a.c"
2 | .intel_syntax noprefix
3 | .section .rodata
4 | .LC0:
5 | .string "hello"
6 | .LC1:
7 | .string "%d\n"
8 | .text
9 | .globl foo
10 | .type foo, @function
11 | foo:
12 | push ebp
13 | mov ebp, esp
14 | sub esp, 8
15 | sub esp, 12
16 | push OFFSET FLAT:.LC0
17 | call puts
18 | add esp, 16
19 | sub esp, 8
20 | push DWORD PTR [ebp+8]
21 | push OFFSET FLAT:.LC1
22 | call printf
23 | add esp, 16
24 | mov eax, DWORD PTR [ebp+8]
25 | leave
26 | ret
27 | .size foo, .-foo
28 | .globl main
29 | .type main, @function
30 | main:
31 | lea ecx, [esp+4]
32 | and esp, -16
33 | push DWORD PTR [ecx-4]
34 | push ebp
35 | mov ebp, esp
36 | push ecx
37 | sub esp, 20
38 | mov DWORD PTR [ebp-12], 2
39 | sub esp, 12
40 | push DWORD PTR [ebp-12]
41 | call foo
42 | add esp, 16
43 | mov DWORD PTR [ebp-16], eax
44 | sub esp, 8
45 | push DWORD PTR [ebp-16]
46 | push OFFSET FLAT:.LC1
47 | call printf
48 | add esp, 16
49 | mov eax, 0
50 | mov ecx, DWORD PTR [ebp-4]
51 | leave
52 | lea esp, [ecx-4]
53 | ret
54 | .size main, .-main
55 | .ident "GCC: (GNU) 5.3.1 20160406 (Red Hat 5.3.1-6)"
56 | .section .note.GNU-stack,"",@progbits
57 |
--------------------------------------------------------------------------------
/test/array.c:
--------------------------------------------------------------------------------
1 | /*
2 | * multi-dimension array
3 | */
4 | #include "stdio.h"
5 | int main(void)
6 | {
7 | int a[5][5];
8 | int i,j;
9 | for(i=0;i<5;++i)
10 | {
11 | for (j=0;j<5;++j)
12 | {
13 | a[i][j]=i*5+j;
14 | printf("%02d ",a[i][j]);
15 | }
16 | puts("");
17 | }
18 | return 0;
19 | }
20 |
21 |
--------------------------------------------------------------------------------
/test/basic.c:
--------------------------------------------------------------------------------
1 | /*
2 | basic expression:for if while
3 | basic type: int float double char pointer
4 | glibc:scanf printf
5 | constant: string char float
6 | scope: local, global, static local, compound_statement
7 | arithmetic operation; logical operation
8 | priority
9 | declaration definition
10 | array
11 | increment
12 | preprocessing
13 | */
14 | #include "stdio.h"
15 | #define UPPERCASE_A 65
16 | #define LOWERCASE_A 97
17 | #define LOWERCASE_Z 122
18 | int fib(int n);
19 | int n,i;
20 | int main(int argc,char **argv)
21 | {
22 | double d,f;
23 | char *s;
24 |
25 | s=*argv;
26 | while(*s!=0)
27 | {
28 | if(*s<=LOWERCASE_Z&&*s>=LOWERCASE_A)
29 | *s=*s+(UPPERCASE_A-LOWERCASE_A);
30 | ++s;
31 | }
32 | printf("%s\n",*argv);
33 |
34 | scanf("%d",&n);
35 | printf("%d\n",fib(n));
36 |
37 | f=0.5;
38 | d=1.5;
39 |
40 | for(i=0;i1)
55 | {
56 | return fib(n-1)+fib(n-2);
57 | }
58 | else if(n==1)
59 | {
60 | return 1;
61 | }
62 | else
63 | {
64 | return 0;
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/test/basic.i:
--------------------------------------------------------------------------------
1 | # 1 "basic.c"
2 | # 1 ""
3 | # 1 ""
4 | # 1 "/usr/include/stdc-predef.h" 1 3 4
5 | # 1 "" 2
6 | # 1 "basic.c"
7 | # 14 "basic.c"
8 | # 1 "stdio.h" 1
9 |
10 |
11 | int printf(char *format,...);
12 | int scanf(char *format,...);
13 | int puts(char* s);
14 | # 15 "basic.c" 2
15 |
16 |
17 |
18 | int fib(int n);
19 | int n,i;
20 | int main(int argc,char **argv)
21 | {
22 | float f;
23 | double d;
24 | char *s;
25 |
26 | s=argv[1];
27 | while(*s!=0)
28 | {
29 | if(*s<='z'&&*s>='a')
30 | *s=*s+'A'-'a';
31 | s++;
32 | }
33 | printf("%s\n",argv[1]);
34 |
35 | scanf("%d",&n);
36 | printf("%d\n",fib(n));
37 |
38 | f=0.5;
39 | d=1.5;
40 |
41 | for(i=0;i1)
59 | {
60 | return fib(n-1)+fib(n-2);
61 | }
62 | else if(n==1)
63 | {
64 | return 1;
65 | }
66 | else
67 | {
68 | return 0;
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/test/basic1.c:
--------------------------------------------------------------------------------
1 | #include "stdio.h"
2 |
3 | int main(int argc,char **argv)
4 | {
5 | char *s;
6 | s=*argv;
7 |
8 | while(*s!=0)
9 | {
10 | if(*s>='a'&&*s<='z')
11 | *s=*s+(65-97);
12 | ++s;
13 | }
14 |
15 | printf("%s",*argv);
16 | puts("");
17 |
18 | return 0;
19 | }
20 |
21 |
22 |
--------------------------------------------------------------------------------
/test/basic2.c:
--------------------------------------------------------------------------------
1 | #include "stdio.h"
2 | int fib(int n);
3 | int i;
4 | int main()
5 | {
6 | scanf("%d",&i);
7 | printf("%d\n",fib(i));
8 |
9 | return 0;
10 | }
11 | int fib(int n)
12 | {
13 | int i;
14 | if(n>1)
15 | {
16 | return fib(n-1)+fib(n-2);
17 | }
18 | else if(n==1)
19 | {
20 | return 1;
21 | }
22 | else
23 | {
24 | return 0;
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/test/basic3.c:
--------------------------------------------------------------------------------
1 | #include "stdio.h"
2 | int main()
3 | {
4 | double i,j;
5 | j=2.3;
6 | scanf("%lf",&i);
7 | i=i*j+i*(i*j-i*j)/j;
8 | printf("%lf\n",i);
9 |
10 | return 0;
11 | }
--------------------------------------------------------------------------------
/test/errorID.c:
--------------------------------------------------------------------------------
1 | int $a;
2 |
--------------------------------------------------------------------------------
/test/error_info.c:
--------------------------------------------------------------------------------
1 |
2 | //函数定义声明不一致
3 | int f(int i,...);
4 | int f(int j){
5 | return 0;
6 | }
7 |
8 | int g(int i){
9 | return 0;
10 | }
11 | typedef struct{
12 | int n;
13 | } A;
14 | int main(int argc, char const *argv[])
15 | {
16 |
17 | //重复定义
18 | int k;
19 | int k;
20 | int count;
21 | //类型不匹配
22 | A a;
23 | a = 5;
24 | //未定义变量
25 | var = 3;
26 | //操作数类型错误
27 | 1.0 >> 4;
28 | //打字错误
29 | cont = 4;
30 | g(1.0);
31 | //参数表不匹配
32 | g(a);
33 | //返回值不匹配
34 | return a;
35 | }
36 |
37 | // Semantic Error at line 4: 'int function(int j,)' is not consistent with old declaration 'int function(int i,...)'
38 | // int f ( int j ) { return 0 ; }
39 | //
40 | // Syntax error at 'int', at line: 22, column: 5.
41 | // Error type: missing semicolon before int. at line: 22, lex pos: 258 in declaration.
42 | //
43 | // Semantic Error at line 18: Redeclare k
44 | // k
45 | //
46 | // Semantic Error at line 23: 'int const' cannot be assigned to 'struct {'n': int}'
47 | // a = 5
48 | //
49 | // Semantic Error at line 25: Unknown identifier var
50 | // var
51 | //
52 | // Semantic Error at line 27: double const is not or cannot be recognized as integer
53 | // 1.0
54 | //
55 | // Semantic Error at line 29: Unknown identifier 'cont', do you mean 'count'?
56 | // cont
57 | //
58 | // Semantic Error at line 32: 'struct {'n': int}' can't convert to 'int'
59 | // a
60 | //
61 | // Semantic Error at line 34: 'struct {'n': int}' is not consistant with the function return type 'int'
62 | // return a ;
63 |
--------------------------------------------------------------------------------
/test/error_pos.c:
--------------------------------------------------------------------------------
1 |
2 | int a, b, c;
3 | c = a + b;
4 | int d;
--------------------------------------------------------------------------------
/test/missRightCurly.c:
--------------------------------------------------------------------------------
1 | //
2 | //int a, b, c;
3 | //int main(int argc, char *argv[]) {
4 | // c = a + b;
5 | //
6 | //
7 | //int b, c;
8 |
9 | int f(){
10 |
11 | int d;
--------------------------------------------------------------------------------
/test/missSEMI.c:
--------------------------------------------------------------------------------
1 |
2 | int b
3 |
4 | int main(int argc, char *argv[]) {
5 | int a, b, c, d;
6 | int $a;
7 |
8 | c = a + b;
9 | d = a +/ b;
10 | d = a -/ b;
11 | d = a ^^ / b;
12 | d = a *|b;
13 | d = a >/ b;
14 | d = a b;
15 | d = a <=/ b;
16 | d = a < b;
17 | d = a ==/ b;
18 | d = a &/ b;
19 | d = a ^/ b;
20 | d = a |/ b;
21 | d = a &&| b;
22 | d = a ||| b;
23 |
24 |
25 | a = b + c
26 | printf("asdf\n")
27 | b = a + c;
28 | printf("%d\n", a);
29 |
--------------------------------------------------------------------------------
/test/multi_int.c:
--------------------------------------------------------------------------------
1 | #include "stdio.h"
2 | int main()
3 | {
4 | int i,j;
5 | j=5;
6 | scanf("%d",&i);
7 | i=i*j+i*(i*j-j*j+i);
8 | printf("%d\n",i);
9 |
10 | return 0;
11 | }
12 |
--------------------------------------------------------------------------------
/test/out.txt:
--------------------------------------------------------------------------------
1 | .intel_syntax noprefix
2 | .section .rodata
3 | .text
4 | .globl main
5 | .type main, @function
6 | main:
7 | push ebp
8 | mov ebp, esp
9 | sub esp, 64
10 | mov edx, 1
11 | mov eax, 0
12 | mov [esp+28], edx
13 | mov eax, 0
14 | mov edx, [esp+24]
15 | add eax, None
16 | mov edx, eax
17 | mov eax, 0
18 | mov [esp+24], edx
19 | .size main, .-main
20 | .ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
21 | .section .note.GNU-stack,"",@progbits
22 |
--------------------------------------------------------------------------------
/test/pointer.c:
--------------------------------------------------------------------------------
1 | /*
2 | * embedded functional pointer
3 | */
4 | #include "stdio.h"
5 | int i;
6 |
7 | void print_int(){
8 | printf("%d\n", i);
9 | return;
10 | }
11 |
12 | void (*high_order_func(int n)) (){
13 | i = n;
14 | return print_int;
15 | }
16 |
17 | int main(){
18 | void (*(*f)(int n))();
19 | f = high_order_func;
20 | f(2)();
21 | return 0;
22 | }
23 |
--------------------------------------------------------------------------------
/test/source_code_optimization.c:
--------------------------------------------------------------------------------
1 | int main(int argc, char const *argv[])
2 | {
3 | int c;
4 | int flag;
5 | c = 2 + 3 * 4;//常量压缩
6 | if ((2 - 2)*9){
7 | //这个if语句经过常量压缩,死代码消除后,会被剪掉
8 | }
9 |
10 | if (1){
11 | c = 2;
12 | //这个复合语句会替换掉if语句
13 | }
14 | else{
15 | c = 3;
16 | //这个复合语句会被剪掉
17 | }
18 |
19 | if(flag){
20 | return 0;
21 | c = c + 1; //return后的语句被删除
22 | }
23 | return 0;
24 | }
--------------------------------------------------------------------------------
/test/stdio.h:
--------------------------------------------------------------------------------
1 | #ifndef _STDIO_H_
2 | #define _STDIO_H_
3 | int printf(char *format,...);
4 | int scanf(char *format,...);
5 | int puts(char* s);
6 | #endif
7 |
--------------------------------------------------------------------------------
/test/struct.c:
--------------------------------------------------------------------------------
1 | /*
2 | * embedded struct
3 | * member access: direct pointer
4 | * typedef
5 | */
6 | #include "stdio.h"
7 | typedef struct
8 | {
9 | int a;
10 | char c;
11 | struct {
12 | int b;
13 | double d;
14 | }inner;
15 | }myStruct;
16 |
17 | void modifyStruct(myStruct* sp)
18 | {
19 | sp->a=4;
20 | sp->c='!';
21 | sp->inner.b=5;
22 | sp->inner.d=55.2;
23 | return;
24 | }
25 |
26 | int main(void)
27 | {
28 | myStruct p[3];
29 | myStruct *sp;
30 |
31 | sp=&p[1];
32 | p[1].a=2;
33 | p[1].c='a';
34 | p[1].inner.b=3;
35 | p[1].inner.d=12.3;
36 | printf("before modified\n");
37 | printf("p[1].a=%d\tp[1].c=%c\tp[1].inner.b=%d\tp[1].inner.d=%lf\n",p[1].a,p[1].c,p[1].inner.b,p[1].inner.d);
38 | modifyStruct(sp);
39 | printf("after modified\n");
40 | printf("p[1].a=%d\tp[1].c=%c\tp[1].inner.b=%d\tp[1].inner.d=%lf\n",p[1].a,p[1].c,p[1].inner.b,p[1].inner.d);
41 |
42 | return 0;
43 | }
44 |
45 |
--------------------------------------------------------------------------------
/test/test1.c:
--------------------------------------------------------------------------------
1 | int g_i;
2 | static int ss;
3 | static int sss;
4 |
5 | int foo(int n){
6 | static int x3;
7 | int x1,x2;
8 | x1=2;
9 | x2=3;
10 | x1=x1+1;
11 | return n+1;
12 | }
13 |
14 |
15 | int main(void){
16 | int l_i,x1,x2,x3;
17 | l_i=l_i+1;
18 | x1=1;
19 | x1=x1+l_i;
20 | x2=foo(x1);
21 | return 1;
22 | }
--------------------------------------------------------------------------------
/test/test1.s:
--------------------------------------------------------------------------------
1 | .file "test1.c"
2 | .intel_syntax noprefix
3 | .comm g_i,4,4
4 | .local ss
5 | .comm ss,4,4
6 | .local sss
7 | .comm sss,4,4
8 | .text
9 | .globl main
10 | .type main, @function
11 | main:
12 | push ebp
13 | mov ebp, esp
14 | and esp, -16
15 | sub esp, 32
16 | add DWORD PTR [esp+20], 1
17 | mov DWORD PTR [esp+24], 1
18 | mov eax, DWORD PTR [esp+20]
19 | add DWORD PTR [esp+24], eax
20 | mov eax, DWORD PTR [esp+24]
21 | mov DWORD PTR [esp], eax
22 | call foo
23 | mov DWORD PTR [esp+28], eax
24 | nop
25 | leave
26 | ret
27 | .size main, .-main
28 | .globl foo
29 | .type foo, @function
30 | foo:
31 | push ebp
32 | mov ebp, esp
33 | sub esp, 16
34 | mov DWORD PTR [ebp-8], 2
35 | mov DWORD PTR [ebp-4], 3
36 | add DWORD PTR [ebp-8], 1
37 | mov eax, DWORD PTR [ebp+8]
38 | add eax, 1
39 | leave
40 | ret
41 | .size foo, .-foo
42 | .ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
43 | .section .note.GNU-stack,"",@progbits
44 |
--------------------------------------------------------------------------------
/test/test4.c:
--------------------------------------------------------------------------------
1 | #include
2 | int g_fast;
3 | static int s_g_fast;
4 |
5 | int main(void)
6 | {
7 | int l_fast;
8 | static int s_l_fast;
9 | l_fast=1;
10 | s_l_fast=2;
11 | l_fast=foo(s_l_fast);
12 | printf("%d\n",l_fast);
13 | return 0;
14 | }
15 |
16 | int foo(int n){
17 | return n+1;
18 | }
19 |
--------------------------------------------------------------------------------
/test/test4.s:
--------------------------------------------------------------------------------
1 | .file "test4.c"
2 | .intel_syntax noprefix
3 | .comm g_fast,4,4
4 | .local s_g_fast
5 | .comm s_g_fast,4,4
6 | .section .rodata
7 | .LC0:
8 | .string "%d\n"
9 | .text
10 | .globl main
11 | .type main, @function
12 | main:
13 | push ebp
14 | mov ebp, esp
15 | and esp, -16
16 | sub esp, 32
17 | mov DWORD PTR [esp+28], 1
18 | mov DWORD PTR s_l_fast.1829, 2
19 | mov eax, DWORD PTR s_l_fast.1829
20 | add DWORD PTR [esp+28], eax
21 | mov eax, DWORD PTR s_l_fast.1829
22 | mov DWORD PTR [esp], eax
23 | call foo
24 | mov DWORD PTR [esp+28], eax
25 | mov eax, DWORD PTR [esp+28]
26 | mov DWORD PTR [esp+4], eax
27 | mov DWORD PTR [esp], OFFSET FLAT:.LC0
28 | call printf
29 | mov eax, 0
30 | leave
31 | ret
32 | .size main, .-main
33 | .globl foo
34 | .type foo, @function
35 | foo:
36 | push ebp
37 | mov ebp, esp
38 | mov eax, DWORD PTR [ebp+8]
39 | add eax, 1
40 | pop ebp
41 | ret
42 | .size foo, .-foo
43 | .local s_l_fast.1829
44 | .comm s_l_fast.1829,4,4
45 | .ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
46 | .section .note.GNU-stack,"",@progbits
47 |
--------------------------------------------------------------------------------
/test/test4_2.c:
--------------------------------------------------------------------------------
1 | #include
2 | int g_fast;
3 | static int s_g_fast;
4 |
5 | int main(void)
6 | {
7 | int l_fast;
8 | static int s_l_fast;
9 | l_fast=1;
10 | s_l_fast=2;
11 | l_fast=foo(s_l_fast);
12 | printf("%d\n",l_fast);
13 | return 0;
14 | }
15 |
16 | int foo(int n){
17 | return n+1;
18 | }
19 |
--------------------------------------------------------------------------------
/test/test4_2.s:
--------------------------------------------------------------------------------
1 | .file "test4_2.c"
2 | .intel_syntax noprefix
3 | .section .rodata.str1.1,"aMS",@progbits,1
4 | .LC0:
5 | .string "%d\n"
6 | .text
7 | .globl main
8 | .type main, @function
9 | main:
10 | push ebp
11 | mov ebp, esp
12 | and esp, -16
13 | sub esp, 16
14 | mov DWORD PTR s_l_fast.2034, 2
15 | mov DWORD PTR [esp+8], 3
16 | mov DWORD PTR [esp+4], OFFSET FLAT:.LC0
17 | mov DWORD PTR [esp], 1
18 | call __printf_chk
19 | mov eax, 0
20 | leave
21 | ret
22 | .size main, .-main
23 | .globl foo
24 | .type foo, @function
25 | foo:
26 | mov eax, DWORD PTR [esp+4]
27 | add eax, 1
28 | ret
29 | .size foo, .-foo
30 | .local s_l_fast.2034
31 | .comm s_l_fast.2034,4,4
32 | .comm g_fast,4,4
33 | .ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
34 | .section .note.GNU-stack,"",@progbits
35 |
--------------------------------------------------------------------------------
/test/test9.c:
--------------------------------------------------------------------------------
1 | double a;
2 | struct test{
3 | char a;
4 | int b;
5 | short c;
6 | double e;
7 | };
8 | int foo(char a, int b, short c, struct test d, char *s){
9 | d.e=10.5;
10 | d.c=2;
11 | if(a=='a')
12 | return b-c;
13 | else
14 | return d.e-d.c;
15 | }
16 |
17 | int main(){
18 | struct test t;
19 | a=4.5;
20 | t.a='b';
21 | t.b=2;
22 | t.c=3;
23 | t.e=5.4;
24 | printf("%lf",t.e);
25 | return foo('a',10,2,t,"mamsf");
26 | }
27 |
--------------------------------------------------------------------------------
/test/test9.s:
--------------------------------------------------------------------------------
1 | .file "test9.c"
2 | .intel_syntax noprefix
3 | .globl a
4 | .data
5 | .align 8
6 | .type a, @object
7 | .size a, 8
8 | a:
9 | .long 0
10 | .long 1075052544
11 | .text
12 | .globl foo
13 | .type foo, @function
14 | foo:
15 | push ebp
16 | mov ebp, esp
17 | sub esp, 16
18 | mov edx, DWORD PTR [ebp+8]
19 | mov eax, DWORD PTR [ebp+16]
20 | mov BYTE PTR [ebp-4], dl
21 | mov WORD PTR [ebp-8], ax
22 | fld QWORD PTR .LC0
23 | fstp QWORD PTR [ebp+32]
24 | mov WORD PTR [ebp+28], 2
25 | cmp BYTE PTR [ebp-4], 97
26 | jne .L2
27 | movsx eax, WORD PTR [ebp-8]
28 | mov edx, DWORD PTR [ebp+12]
29 | sub edx, eax
30 | mov eax, edx
31 | jmp .L3
32 | .L2:
33 | fld QWORD PTR [ebp+32]
34 | movzx eax, WORD PTR [ebp+28]
35 | mov WORD PTR [ebp-6], ax
36 | fild WORD PTR [ebp-6]
37 | fsubp st(1), st
38 | fnstcw WORD PTR [ebp-2]
39 | movzx eax, WORD PTR [ebp-2]
40 | mov ah, 12
41 | mov WORD PTR [ebp-10], ax
42 | fldcw WORD PTR [ebp-10]
43 | fistp DWORD PTR [ebp-16]
44 | fldcw WORD PTR [ebp-2]
45 | mov eax, DWORD PTR [ebp-16]
46 | .L3:
47 | leave
48 | ret
49 | .size foo, .-foo
50 | .section .rodata
51 | .LC4:
52 | .string "%lf"
53 | .LC5:
54 | .string "mamsf"
55 | .text
56 | .globl main
57 | .type main, @function
58 | main:
59 | push ebp
60 | mov ebp, esp
61 | and esp, -16
62 | sub esp, 80
63 | fld QWORD PTR .LC2
64 | fstp QWORD PTR a
65 | mov BYTE PTR [esp+60], 98
66 | mov DWORD PTR [esp+64], 2
67 | mov WORD PTR [esp+68], 3
68 | fld QWORD PTR .LC3
69 | fstp QWORD PTR [esp+72]
70 | fld QWORD PTR [esp+72]
71 | fstp QWORD PTR [esp+4]
72 | mov DWORD PTR [esp], OFFSET FLAT:.LC4
73 | call printf
74 | mov DWORD PTR [esp+32], OFFSET FLAT:.LC5
75 | mov eax, DWORD PTR [esp+60]
76 | mov DWORD PTR [esp+12], eax
77 | mov eax, DWORD PTR [esp+64]
78 | mov DWORD PTR [esp+16], eax
79 | mov eax, DWORD PTR [esp+68]
80 | mov DWORD PTR [esp+20], eax
81 | mov eax, DWORD PTR [esp+72]
82 | mov DWORD PTR [esp+24], eax
83 | mov eax, DWORD PTR [esp+76]
84 | mov DWORD PTR [esp+28], eax
85 | mov DWORD PTR [esp+8], 2
86 | mov DWORD PTR [esp+4], 10
87 | mov DWORD PTR [esp], 97
88 | call foo
89 | leave
90 | ret
91 | .size main, .-main
92 | .section .rodata
93 | .align 8
94 | .LC0:
95 | .long 0
96 | .long 1076166656
97 | .align 8
98 | .LC2:
99 | .long 0
100 | .long 1074921472
101 | .align 8
102 | .LC3:
103 | .long -1717986918
104 | .long 1075157401
105 | .ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
106 | .section .note.GNU-stack,"",@progbits
107 |
--------------------------------------------------------------------------------
/yyparse/.gitignore:
--------------------------------------------------------------------------------
1 | *.out
2 | parsetab.py
--------------------------------------------------------------------------------
/yyparse/ZCClex.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import ply.lex as lex
4 | # import ply.yacc as yacc
5 | # from pprint import pprint
6 | from symbol.symtab import is_type
7 | from public.ZCCglobal import TreeNode
8 | lexErrorInfo = []
9 |
10 |
11 | # column = 0
12 |
13 | # Compute column.
14 | # input is the input text string
15 | # token is a token instance
16 | def find_column(input, token):
17 | last_cr = input.rfind('\n', 0, token.lexpos)
18 | if last_cr < 0:
19 | last_cr = 0
20 | column = token.lexpos - last_cr
21 | # print "lexpos: ", token.lexpos, " last_cr: ", last_cr
22 | return column
23 |
24 |
25 | reserved_dict = {
26 | # "auto" :'AUTO',
27 | "break": 'BREAK',
28 | "case": 'CASE',
29 | "char": 'CHAR',
30 | "const": 'CONST',
31 | "continue": 'CONTINUE',
32 | "default": 'DEFAULT',
33 | "do": 'DO',
34 | "double": 'DOUBLE',
35 | "else": 'ELSE',
36 | "enum": 'ENUM',
37 | "extern": 'EXTERN',
38 | "float": 'FLOAT',
39 | "for": 'FOR',
40 | # "goto" :'GOTO',
41 | "if": 'IF',
42 | "int": 'INT',
43 | "long": 'LONG',
44 | # "register" :'REGISTER',
45 | "return": 'RETURN',
46 | "short": 'SHORT',
47 | "signed": 'SIGNED',
48 | "sizeof": 'SIZEOF',
49 | "static": 'STATIC',
50 | "struct": 'STRUCT',
51 | "switch": 'SWITCH',
52 | "typedef": 'TYPEDEF',
53 | "union": 'UNION',
54 | "unsigned": 'UNSIGNED',
55 | "void": 'VOID',
56 | # "volatile": 'VOLATILE',
57 | "while": 'WHILE',
58 | }
59 |
60 | literal_dict = {
61 | '(': 'LBRACKET',
62 | ')': 'RBRACKET',
63 | '[': 'LSQUAREBRACKET',
64 | ']': 'RSQUAREBRACKET',
65 | '{': 'LCURLYBRACKET',
66 | '}': 'RCURLYBRACKET',
67 | ';': 'SEMICOLON',
68 | '.': 'PERIOD',
69 | ',': 'COMMA',
70 | '&': 'AND',
71 | '*': 'STAR',
72 | '+': 'PLUS',
73 | '-': 'MINUS',
74 | '~': 'UNOT',
75 | '!': 'NOT',
76 | '/': 'DIVIDE',
77 | '%': 'MOD',
78 | '<': 'LT',
79 | '>': 'GT',
80 | '^': 'XOR',
81 | '|': 'OR',
82 | '?': 'QUESTIONMARK',
83 | ':': 'COLON',
84 | '=': 'ASSIGN'
85 | }
86 |
87 | tokens = (
88 | # 'AUTO',
89 | 'BREAK',
90 | 'CASE',
91 | 'CHAR',
92 | 'CONST',
93 | 'CONTINUE',
94 | 'DEFAULT',
95 | 'DO',
96 | 'DOUBLE',
97 | 'ELSE',
98 | 'ENUM',
99 | 'EXTERN',
100 | 'FLOAT',
101 | 'FOR',
102 | # 'GOTO',
103 | 'IF',
104 | 'INT',
105 | 'LONG',
106 | # 'REGISTER',
107 | 'RETURN',
108 | 'SHORT',
109 | 'SIGNED',
110 | 'SIZEOF',
111 | 'STATIC',
112 | 'STRUCT',
113 | 'SWITCH',
114 | 'TYPEDEF',
115 | 'UNION',
116 | 'UNSIGNED',
117 | 'VOID',
118 | # 'VOLATILE',
119 | 'WHILE',
120 | "IDENTIFIER",
121 | "TYPE_NAME",
122 | "STRING_LITERAL",
123 | "ELLIPSIS",
124 | "RIGHT_ASSIGN",
125 | "LEFT_ASSIGN",
126 | "ADD_ASSIGN",
127 | "SUB_ASSIGN",
128 | "MUL_ASSIGN",
129 | "DIV_ASSIGN",
130 | "MOD_ASSIGN",
131 | "AND_ASSIGN",
132 | "XOR_ASSIGN",
133 | "OR_ASSIGN",
134 | "RIGHT_OP",
135 | "LEFT_OP",
136 | "INC_OP",
137 | "DEC_OP",
138 | "PTR_OP",
139 | "AND_OP",
140 | "OR_OP",
141 | "LE_OP",
142 | "GE_OP",
143 | "EQ_OP",
144 | "NE_OP",
145 | 'LBRACKET',
146 | 'RBRACKET',
147 | 'LSQUAREBRACKET',
148 | 'RSQUAREBRACKET',
149 | 'LCURLYBRACKET',
150 | 'RCURLYBRACKET',
151 | 'SEMICOLON',
152 | 'PERIOD',
153 | 'COMMA',
154 | 'AND',
155 | 'STAR',
156 | 'PLUS',
157 | 'MINUS',
158 | 'UNOT',
159 | 'NOT',
160 | 'DIVIDE',
161 | 'MOD',
162 | 'LT',
163 | 'GT',
164 | 'XOR',
165 | 'OR',
166 | 'QUESTIONMARK',
167 | 'COLON',
168 | 'ASSIGN',
169 | "ERRORID",
170 | "NUMBER_CONSTANT",
171 | "CHARACTER_CONSTANT",
172 | "EOF"
173 | )
174 |
175 |
176 | def t_STRING_LITERAL(t):
177 | r'\"(\\.|[^\\\"])*\"'
178 | value = t.value
179 | t.value = TreeNode()
180 | t.value.lineno = t.lexer.lineno
181 | t.value.append('STRING')
182 | t.value.append(value)
183 | return t
184 |
185 |
186 | def t_ignore_COMMENT(t):
187 | r'(/\*(.|\n)*?\*/)|(//.*)|(^\#.*)|(\n\#.*)|(\r\n\#.*)'
188 | t.lexer.lineno += t.value.count('\n')
189 | pass
190 |
191 |
192 | def t_IDENTIFIER(t):
193 | r"""[_A-Za-z][_A-Za-z0-9]*"""
194 | t.type = reserved_dict.get(t.value, 'IDENTIFIER')
195 | if t.type == 'IDENTIFIER' and is_type(t.value):
196 | t.type = "TYPE_NAME"
197 | if t.type == 'IDENTIFIER':
198 | value = t.value
199 | t.value = TreeNode()
200 | t.value.lineno = t.lexer.lineno
201 | t.value.append('IDENTIFIER')
202 | t.value.append(value)
203 | return t
204 |
205 |
206 | def t_NUMBER_CONSTANT(t):
207 | r"""([0-9]*\.[0-9]+|[0-9]+\.)([eE][+\-]?[0-9]+)?[flFL]?|[0-9]+([eE][+\-]?[0-9]+)[flFL]?|[1-9][0-9]*[uU]?[lL]{,2}|0[0-7]*[uU]?[lL]{,2}|0[xX][0-9a-fA-F]+[uU]?[lL]{,2}"""
208 | val = eval(t.value)
209 | if isinstance(val, float):
210 | value = t.value
211 | t.value = TreeNode()
212 | t.value.lineno = t.lexer.lineno
213 | t.value.append('DOUBLE')
214 | t.value.append(value)
215 | else:
216 | value = t.value
217 | t.value = TreeNode()
218 | t.value.lineno = t.lexer.lineno
219 | t.value.append('INTEGER')
220 | t.value.append(value)
221 | return t
222 |
223 |
224 | def t_CHARACTER_CONSTANT(t):
225 | r"\'([^\'\\\n]|(\\[\'\"?\\abfnrtv]|[0-7]{1,3}|x[0-9a-fA-F]{1,2}))\'"
226 | value = t.value
227 | t.value = TreeNode()
228 | t.value.lineno = t.lexer.lineno
229 | t.value.append('INTEGER')
230 | t.value.append(str(ord(eval(value))))
231 | return t
232 |
233 |
234 | # def t_CONSTANT(t):
235 | # r'[1-9][0-9]*[Ee][+-]?[1-9][0-9]*[fFlL]?|[0-9]*\.[0-9]+([Ee][+-]?[0-9]+)?[fFlL]?|[0-9]+\.[0-9]*([Ee][+-]?[0-9]+)?[fFlL]?|0[xX][a-fA-F0-9]+(u|U)?(l|L){,2}|((0|[1-9][0-9]*)(u|U)?(l|L){,2})|\'(\S|\\([abfnrtv\\\'\"0]|[0-7]{3}|x[0-9a-fA-F]{2}))\''
236 | # # r'0[xX][a-fA-F0-9]+(u|U)?(l|L){1,2}|'
237 | # # r'0[0-9]+(u|U)?(l|L){1,2}|'
238 | # # r'[0-9]+(u|U)?(l|L){1,2}|'
239 | # # r'\'\S|\\([abfnrtv\\\'\"0]|[0-7]{3}|x[0-9a-fA-F]{2})\'|'
240 | # # r'[0-9]+[Ee][+-]?[0-9]+[fFlL]?|'
241 | # # r'[0-9]*\.[0-9]+([Ee][+-]?[0-9]+)?[fFlL]?|'
242 | # # r'[0-9]+\.[0-9]*([Ee][+-]?[0-9]+)?[fFlL]?'
243 | #
244 | # return t
245 |
246 |
247 |
248 | def t_ELLIPSIS(t):
249 | r"\.\.\."
250 | return t
251 |
252 |
253 | def t_RIGHT_ASSIGN(t):
254 | r">>="
255 | return t
256 |
257 |
258 | def t_LEFT_ASSIGN(t):
259 | r"<<="
260 | return t
261 |
262 |
263 | def t_ADD_ASSIGN(t):
264 | r"\+="
265 | return t
266 |
267 |
268 | def t_MUL_ASSIGN(t):
269 | r"\*="
270 | return t
271 |
272 |
273 | def t_DIV_ASSIGN(t):
274 | r"/="
275 | return t
276 |
277 |
278 | def t_MOD_ASSIGN(t):
279 | r"%="
280 | return t
281 |
282 |
283 | def t_AND_ASSIGN(t):
284 | r"&="
285 | return t
286 |
287 |
288 | def t_XOR_ASSIGN(t):
289 | r"^="
290 | return t
291 |
292 |
293 | def t_OR_ASSIGN(t):
294 | r"\|="
295 | return t
296 |
297 |
298 | def t_RIGHT_OP(t):
299 | r">>"
300 | return t
301 |
302 |
303 | def t_LEFT_OP(t):
304 | r"<<"
305 | return t
306 |
307 |
308 | def t_INC_OP(t):
309 | r"\+\+"
310 | return t
311 |
312 |
313 | def t_DEC_OP(t):
314 | r"--"
315 | return t
316 |
317 |
318 | def t_PTR_OP(t):
319 | r"->"
320 | return t
321 |
322 |
323 | def t_AND_OP(t):
324 | r"&&"
325 | return t
326 |
327 |
328 | def t_OR_OP(t):
329 | r"\|\|"
330 | return t
331 |
332 |
333 | def t_LE_OP(t):
334 | r"<="
335 | return t
336 |
337 |
338 | def t_GE_OP(t):
339 | r">="
340 | return t
341 |
342 |
343 | def t_EQ_OP(t):
344 | r"=="
345 | return t
346 |
347 |
348 | def t_NE_OP(t):
349 | r"!="
350 | return t
351 |
352 |
353 | def t_LITERAL(t):
354 | r"[()\[\]{};.,&*+\-~!/%<>\^|?:=]"
355 | t.type = literal_dict.get(t.value)
356 | if t.value == '{':
357 | t.lexer.curlyBalance += 1
358 | elif t.value == '}':
359 | t.lexer.curlyBalance -= 1
360 | return t
361 |
362 |
363 | # literals = '()[]{};.,&*+-~!/%<>^|?:='
364 |
365 | # Define a rule so we can track line numbers
366 | def t_newline(t):
367 | r'\n'
368 | t.lexer.lineno += 1 # len(t.value)
369 |
370 |
371 | t_ignore = ' \t'
372 |
373 |
374 | def t_ERRORID(t):
375 | r"[^\s;}]+"
376 | t.value = (t.value, "ERRORID")
377 | return t
378 |
379 |
380 | def t_error(t):
381 | error_column = find_column(t.lexer.lexdata, t)
382 | print("Unknown text '%s' at line: %d, column: %d" % (t.value, t.lexer.lineno, error_column))
383 | lexErrorInfo.append({
384 | 'pos': t.lexer.lexpos,
385 | 'lineno': t.lexer.lineno,
386 | 'column': error_column,
387 | 'value': t.value
388 | })
389 | t.lexer.skip(1)
390 |
391 |
392 | orig_lexer = lex.lex()
393 |
394 |
395 | # pprint(lexer.__dict__)
396 |
397 | class ProxyLexer(object):
398 | def __init__(self, lexer, eoftoken):
399 | self.end = False
400 | self.lexer = lexer
401 | self.eof = eoftoken
402 |
403 | def token(self):
404 | tok = self.lexer.token()
405 | if tok is None:
406 | if self.end:
407 | self.end = False
408 | else:
409 | self.end = True
410 | tok = lex.LexToken()
411 | tok.type = self.eof
412 | tok.value = None
413 | tok.lexpos = self.lexer.lexpos
414 | tok.lineno = self.lexer.lineno
415 | # print ('custom', tok)
416 | return tok
417 |
418 | def __getattr__(self, name):
419 | return getattr(self.lexer, name)
420 |
421 |
422 | lexer = ProxyLexer(orig_lexer, 'EOF')
423 | lexer.lexer.curlyBalance = 0
424 |
425 |
426 | def test_lex():
427 | # data = raw_input()
428 |
429 | # c_file_name = raw_input('c file name: ')
430 | c_file_name = "test1.c"
431 | c_file = open(c_file_name, "r")
432 | contents = "".join(c_file.readlines())
433 |
434 | lexer.input(contents)
435 |
436 | while True:
437 | tok = lexer.token()
438 | if not tok:
439 | break
440 | print tok # .value, find_column(lexer.lexdata, tok)
441 |
442 | # test_lex()
443 |
--------------------------------------------------------------------------------
/yyparse/ZCCparser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | from __future__ import print_function
3 | import ply.lex as lex
4 | import ply.yacc as yacc
5 | import ZCClex
6 | from symbol.symtab import symtab_declaration, symtab_function_definition
7 | from public.ZCCglobal import global_context, TreeNode
8 | from ZCClex import tokens
9 | from pprint import pprint
10 |
11 | aTuple = (1, 2)
12 |
13 |
14 | def handleMissingSEMI(p, parentname="", checkPair=()):
15 | last_idx = len(p) - 1
16 | if (len(checkPair) == 0 or (len(checkPair) > 0 and p[
17 | checkPair[0]] == checkPair[1])) and p[last_idx] != ';':
18 | print(
19 | "Error type: missing semicolon before %s. at line: %d, lex pos: %d in %s.\n" %
20 | (p[last_idx].value, p.lineno(last_idx), p.lexpos(last_idx), parentname))
21 | p[last_idx] = ';'
22 | parser.errorCounter = 0
23 | parser.errok()
24 | return [last_idx]
25 | else:
26 | return []
27 |
28 |
29 | def handleMissingRCURLYBRACKET(p):
30 | last_idx = len(p) - 1
31 | if p[last_idx] != '}':
32 | print(
33 | "Error type: missing right curly bracket before %s. at line: %d, lex pos: %d.\n" %
34 | (p[last_idx], p.lineno(last_idx), p.lexpos(last_idx)))
35 | p[last_idx] = '}'
36 | parser.errorCounter = 0
37 | parser.errok()
38 |
39 |
40 | def handleErrorID(p, idx):
41 | if len(p) > idx and isinstance(p[idx],
42 | type(aTuple)) and p[idx][1] == "ERRORID":
43 | print("Syntax error at %r, at line: %d, lex pos: %d." %
44 | (p[idx][0], p.lineno(idx), p.lexpos(idx)))
45 | print("Error type: wrong IDENTIFIER format.\n")
46 | p[idx] = p[idx][0]
47 | parser.errorCounter = 0
48 |
49 |
50 | def construct_node(p, parent_name, del_list=[]):
51 | p[0] = TreeNode()
52 | p[0].append(parent_name)
53 | p[0].lineno = p.lineno(len(p) - 1)
54 | # print("%s's del_list: " % (parent_name))
55 | # print(del_list)
56 | for i in range(1, len(p)):
57 | if i not in del_list:
58 | p[0].append(p[i])
59 | if not isinstance(p[i], str):
60 | if p[0].lineno == 0:
61 | p[0].lineno = p[i].lineno
62 | elif p[0].lineno > p[i].lineno:
63 | p[0].lineno = p[i].lineno
64 |
65 |
66 | def p_outer_translation_unit(p):
67 | """
68 | outer_translation_unit : translation_unit EOF
69 | """
70 | p[0] = p[1]
71 | # construct_node(p, "outer_translation_unit")
72 |
73 |
74 | def p_translation_unit(p):
75 | """
76 | translation_unit : external_declaration
77 | | translation_unit external_declaration
78 | """
79 | if len(p) == 2:
80 | construct_node(p, "translation_unit")
81 | elif len(p) == 3:
82 | # printAST(p[1])
83 | p[1].append(p[2])
84 | p[0] = p[1]
85 | else:
86 | raise Exception("translation_unit just has two children")
87 |
88 |
89 | def p_external_declaration(p):
90 | """
91 | external_declaration : function_definition
92 | | declaration
93 | """
94 | p[0] = p[1]
95 | if p[0][0] == 'declaration':
96 | symtab_declaration(p[0], global_context)
97 | elif p[0][0] == 'function_definition':
98 | symtab_function_definition(p[0], global_context)
99 | # construct_node(p, "external_declaration")
100 |
101 |
102 | def p_declaration(p):
103 | """
104 | declaration : declaration_specifiers SEMICOLON
105 | | declaration_specifiers init_declarator_list SEMICOLON
106 | | declaration_specifiers error
107 | | declaration_specifiers init_declarator_list error
108 | """
109 | del_list = handleMissingSEMI(p, "declaration")
110 | construct_node(p, "declaration", del_list)
111 |
112 |
113 | # print(p[0])
114 |
115 | # def p_constant(p):
116 | # """
117 | # constant : NUMBER_CONSTANT
118 | # | CHARACTER_CONSTANT
119 | # """
120 | # construct_node(p, "constant")
121 |
122 |
123 | def p_declaration_specifiers(p):
124 | """
125 | declaration_specifiers : type_specifier
126 | | type_specifier type_qualifier
127 | | type_qualifier type_specifier
128 | | storage_class_specifier type_specifier
129 | | storage_class_specifier type_specifier type_qualifier
130 | | storage_class_specifier type_qualifier type_specifier
131 | """
132 | # """
133 | # declaration_specifiers : storage_class_specifier
134 | # | storage_class_specifier declaration_specifiers
135 | # | type_specifier
136 | # | type_specifier declaration_specifiers
137 | # | type_qualifier
138 | # | type_qualifier declaration_specifiers
139 | # """
140 | construct_node(p, "declaration_specifiers")
141 | # printAST(p[0], 0)
142 | # pass
143 |
144 |
145 | def p_primary_expression(p):
146 | """
147 | primary_expression : IDENTIFIER
148 | | ERRORID
149 | | NUMBER_CONSTANT
150 | | CHARACTER_CONSTANT
151 | | STRING_LITERAL
152 | | LBRACKET expression RBRACKET
153 | """
154 | handleErrorID(p, 1)
155 | if len(p) == 4:
156 | p[0] = p[2]
157 | else:
158 | construct_node(p, "primary_expression")
159 |
160 |
161 | def p_postfix_expression(p):
162 | """
163 | postfix_expression : primary_expression
164 | | postfix_expression LSQUAREBRACKET expression RSQUAREBRACKET
165 | | postfix_expression LBRACKET RBRACKET
166 | | postfix_expression LBRACKET argument_expression_list RBRACKET
167 | | postfix_expression PERIOD IDENTIFIER
168 | | postfix_expression PTR_OP IDENTIFIER
169 | | postfix_expression PERIOD ERRORID
170 | | postfix_expression PTR_OP ERRORID
171 | | postfix_expression INC_OP
172 | | postfix_expression DEC_OP
173 | """
174 | handleErrorID(p, 3)
175 | if len(p) == 2:
176 | p[0] = p[1]
177 | else:
178 | construct_node(p, "postfix_expression")
179 |
180 |
181 | def p_argument_expression_list(p):
182 | """
183 | argument_expression_list : assignment_expression
184 | | argument_expression_list COMMA assignment_expression
185 | """
186 | if len(p) == 2:
187 | construct_node(p, "argument_expression_list")
188 | else:
189 | p[1].append(p[2])
190 | p[1].append(p[3])
191 | p[0] = p[1]
192 |
193 |
194 | def p_unary_expression(p):
195 | """
196 | unary_expression : postfix_expression
197 | | INC_OP unary_expression
198 | | DEC_OP unary_expression
199 | | unary_operator cast_expression
200 | | SIZEOF unary_expression
201 | | SIZEOF LBRACKET type_name RBRACKET
202 | """
203 | if len(p) == 2:
204 | p[0] = p[1]
205 | else:
206 | construct_node(p, "unary_expression")
207 |
208 |
209 | def p_unary_operator(p):
210 | """
211 | unary_operator : AND
212 | | STAR
213 | | PLUS
214 | | MINUS
215 | | UNOT
216 | | NOT
217 | """
218 | construct_node(p, "unary_operator")
219 |
220 |
221 | def p_cast_expression(p):
222 | """
223 | cast_expression : unary_expression
224 | | LBRACKET type_name RBRACKET cast_expression
225 | """
226 | if len(p) == 2:
227 | p[0] = p[1]
228 | else:
229 | construct_node(p, "cast_expression")
230 | # printAST(p[0], 0)
231 |
232 |
233 | def p_multiplicative_expression(p):
234 | """
235 | multiplicative_expression : cast_expression
236 | | multiplicative_expression STAR cast_expression
237 | | multiplicative_expression DIVIDE cast_expression
238 | | multiplicative_expression MOD cast_expression
239 | | multiplicative_expression STAR error cast_expression
240 | | multiplicative_expression DIVIDE error cast_expression
241 | | multiplicative_expression MOD error cast_expression
242 | """
243 | del_list = []
244 | if len(p) == 5:
245 | print(
246 | "Error type: error token after %s. at line: %d.\n" %
247 | (p[2], p.lineno(2)))
248 | del_list.append(3)
249 | parser.errorCounter = 0
250 | if len(p) == 2:
251 | p[0] = p[1]
252 | else:
253 | construct_node(p, "multiplicative_expression", del_list)
254 |
255 |
256 | def p_additive_expression(p):
257 | """
258 | additive_expression : multiplicative_expression
259 | | additive_expression PLUS multiplicative_expression
260 | | additive_expression MINUS multiplicative_expression
261 | | additive_expression PLUS error multiplicative_expression
262 | | additive_expression MINUS error multiplicative_expression
263 | """
264 | del_list = []
265 | if len(p) == 5:
266 | print(
267 | "Error type: error token after %s. at line: %d.\n" %
268 | (p[2], p.lineno(2)))
269 | del_list.append(3)
270 | parser.errorCounter = 0
271 | if len(p) == 2:
272 | p[0] = p[1]
273 | else:
274 | construct_node(p, "additive_expression", del_list)
275 |
276 |
277 | def p_shift_expression(p):
278 | """
279 | shift_expression : additive_expression
280 | | shift_expression LEFT_OP additive_expression
281 | | shift_expression RIGHT_OP additive_expression
282 | | shift_expression LEFT_OP error additive_expression
283 | | shift_expression RIGHT_OP error additive_expression
284 | """
285 | del_list = []
286 | if len(p) == 5:
287 | print(
288 | "Error type: error token after %s. at line: %d.\n" %
289 | (p[2], p.lineno(2)))
290 | del_list.append(3)
291 | parser.errorCounter = 0
292 | if len(p) == 2:
293 | p[0] = p[1]
294 | else:
295 | construct_node(p, "shift_expression", del_list)
296 |
297 |
298 | def p_relational_expression(p):
299 | """
300 | relational_expression : shift_expression
301 | | relational_expression LT shift_expression
302 | | relational_expression GT shift_expression
303 | | relational_expression LE_OP shift_expression
304 | | relational_expression GE_OP shift_expression
305 | | relational_expression LT error shift_expression
306 | | relational_expression GT error shift_expression
307 | | relational_expression LE_OP error shift_expression
308 | | relational_expression GE_OP error shift_expression
309 | """
310 | del_list = []
311 | if len(p) == 5:
312 | print(
313 | "Error type: error token after %s. at line: %d.\n" %
314 | (p[2], p.lineno(2)))
315 | del_list.append(3)
316 | parser.errorCounter = 0
317 |
318 | if len(p) == 2:
319 | p[0] = p[1]
320 | else:
321 | construct_node(p, "relational_expression", del_list)
322 |
323 |
324 | def p_equality_expression(p):
325 | """
326 | equality_expression : relational_expression
327 | | equality_expression EQ_OP relational_expression
328 | | equality_expression NE_OP relational_expression
329 | | equality_expression EQ_OP error relational_expression
330 | | equality_expression NE_OP error relational_expression
331 | """
332 | del_list = []
333 | if len(p) == 5:
334 | print(
335 | "Error type: error token after %s. at line: %d.\n" %
336 | (p[2], p.lineno(2)))
337 | del_list.append(3)
338 | parser.errorCounter = 0
339 | if len(p) == 2:
340 | p[0] = p[1]
341 | else:
342 | construct_node(p, "equality_expression", del_list)
343 |
344 |
345 | def p_and_expression(p):
346 | """
347 | and_expression : equality_expression
348 | | and_expression AND equality_expression
349 | | and_expression AND error equality_expression
350 | """
351 | del_list = []
352 | if len(p) == 5:
353 | print(
354 | "Error type: error token after %s. at line: %d.\n" %
355 | (p[2], p.lineno(2)))
356 | del_list.append(3)
357 | parser.errorCounter = 0
358 |
359 | if len(p) == 2:
360 | p[0] = p[1]
361 | else:
362 | construct_node(p, "and_expression", del_list)
363 |
364 |
365 | def p_exclusive_or_expression(p):
366 | """
367 | exclusive_or_expression : and_expression
368 | | exclusive_or_expression XOR and_expression
369 | | exclusive_or_expression XOR error and_expression
370 | """
371 | del_list = []
372 | if len(p) == 5:
373 | print(
374 | "Error type: error token after %s. at line: %d.\n" %
375 | (p[2], p.lineno(2)))
376 | del_list.append(3)
377 | parser.errorCounter = 0
378 |
379 | if len(p) == 2:
380 | p[0] = p[1]
381 | else:
382 | construct_node(p, "exclusive_or_expression", del_list)
383 |
384 |
385 | def p_inclusive_or_expression(p):
386 | """
387 | inclusive_or_expression : exclusive_or_expression
388 | | inclusive_or_expression OR exclusive_or_expression
389 | | inclusive_or_expression OR error exclusive_or_expression
390 | """
391 | del_list = []
392 | if len(p) == 5:
393 | print(
394 | "Error type: error token after %s. at line: %d.\n" %
395 | (p[2], p.lineno(2)))
396 | del_list.append(3)
397 | parser.errorCounter = 0
398 |
399 | if len(p) == 2:
400 | p[0] = p[1]
401 | else:
402 | construct_node(p, "inclusive_or_expression", del_list)
403 |
404 |
405 | def p_logical_and_expression(p):
406 | """
407 | logical_and_expression : inclusive_or_expression
408 | | logical_and_expression AND_OP inclusive_or_expression
409 | | logical_and_expression AND_OP error inclusive_or_expression
410 | """
411 | del_list = []
412 | if len(p) == 5:
413 | print(
414 | "Error type: error token after %s. at line: %d.\n" %
415 | (p[2], p.lineno(2)))
416 | del_list.append(3)
417 | parser.errorCounter = 0
418 |
419 | if len(p) == 2:
420 | p[0] = p[1]
421 | else:
422 | construct_node(p, "logical_and_expression", del_list)
423 |
424 |
425 | def p_logical_or_expression(p):
426 | """
427 | logical_or_expression : logical_and_expression
428 | | logical_or_expression OR_OP logical_and_expression
429 | | logical_or_expression OR_OP error logical_and_expression
430 | """
431 | del_list = []
432 | if len(p) == 5:
433 | print(
434 | "Error type: error token after %s. at line: %d.\n" %
435 | (p[2], p.lineno(2)))
436 | del_list.append(3)
437 | parser.errorCounter = 0
438 |
439 | if len(p) == 2:
440 | p[0] = p[1]
441 | else:
442 | construct_node(p, "logical_or_expression", del_list)
443 |
444 |
445 | def p_conditional_expression(p):
446 | """
447 | conditional_expression : logical_or_expression
448 | | logical_or_expression QUESTIONMARK expression COLON conditional_expression
449 | """
450 | if len(p) == 2:
451 | p[0] = p[1]
452 | else:
453 | construct_node(p, "conditional_expression")
454 |
455 |
456 | def p_assignment_expression(p):
457 | """
458 | assignment_expression : conditional_expression
459 | | unary_expression assignment_operator assignment_expression
460 | """
461 | if len(p) == 2:
462 | p[0] = p[1]
463 | else:
464 | construct_node(p, "assignment_expression")
465 |
466 |
467 | def p_assignment_operator(p):
468 | """
469 | assignment_operator : ASSIGN
470 | | MUL_ASSIGN
471 | | DIV_ASSIGN
472 | | MOD_ASSIGN
473 | | ADD_ASSIGN
474 | | SUB_ASSIGN
475 | | LEFT_ASSIGN
476 | | RIGHT_ASSIGN
477 | | AND_ASSIGN
478 | | XOR_ASSIGN
479 | | OR_ASSIGN
480 | """
481 | construct_node(p, "assignment_operator")
482 |
483 |
484 | def p_expression(p):
485 | """
486 | expression : assignment_expression
487 | | expression COMMA assignment_expression
488 | """
489 | if len(p) == 2:
490 | p[0] = p[1]
491 | else:
492 | construct_node(p, "expression")
493 | # if len(p) == 2:
494 | # construct_node(p, "expression")
495 | # elif len(p) == 4:
496 | # # printAST(p[1])
497 | # p[1].append(p[3])
498 | # p[0] = p[1]
499 | # else:
500 | # raise Exception("expression just has 2 or 4 children")
501 |
502 |
503 | def p_constant_expression(p):
504 | """
505 | constant_expression : conditional_expression
506 | """
507 | construct_node(p, "constant_expression")
508 |
509 |
510 | def p_init_declarator_list(p):
511 | """
512 | init_declarator_list : init_declarator
513 | | init_declarator_list COMMA init_declarator
514 | """
515 | if len(p) == 2:
516 | construct_node(p, "init_declarator_list")
517 | else:
518 | p[1].append(p[2])
519 | p[1].append(p[3])
520 | p[0] = p[1]
521 |
522 |
523 | def p_init_declarator(p):
524 | """
525 | init_declarator : declarator
526 | | declarator ASSIGN initializer
527 | """
528 | construct_node(p, "init_declarator")
529 |
530 |
531 | def p_storage_class_specifier(p):
532 | """
533 | storage_class_specifier : TYPEDEF
534 | | EXTERN
535 | | STATIC
536 | """
537 | construct_node(p, "storage_class_specifier")
538 |
539 |
540 | def p_integer_type(p):
541 | """
542 | integer_type : CHAR
543 | | SHORT
544 | | INT
545 | | LONG
546 | | UNSIGNED integer_type
547 | | SIGNED integer_type
548 | | SHORT integer_type
549 | | LONG integer_type
550 | """
551 | if len(p) == 2:
552 | construct_node(p, "integer_type")
553 | else:
554 | p[2].insert(1, p[1])
555 | p[0] = p[2]
556 | # print(p[0])
557 |
558 |
559 | def p_type_specifier(p):
560 | """type_specifier : VOID
561 | | integer_type
562 | | FLOAT
563 | | DOUBLE
564 | | struct_or_union_specifier
565 | | enum_specifier
566 | | TYPE_NAME
567 | """
568 | # | TYPE_NAME
569 | construct_node(p, "type_specifier")
570 |
571 |
572 | def p_struct_or_union_specifier(p):
573 | """
574 | struct_or_union_specifier : struct_or_union IDENTIFIER LCURLYBRACKET struct_declaration_list RCURLYBRACKET
575 | | struct_or_union TYPE_NAME LCURLYBRACKET struct_declaration_list RCURLYBRACKET
576 | | struct_or_union ERRORID LCURLYBRACKET struct_declaration_list RCURLYBRACKET
577 | | struct_or_union LCURLYBRACKET struct_declaration_list RCURLYBRACKET
578 | | struct_or_union IDENTIFIER
579 | | struct_or_union TYPE_NAME
580 | | struct_or_union ERRORID
581 | """
582 | handleErrorID(p, 2)
583 | construct_node(p, "struct_or_union_specifier")
584 |
585 |
586 | def p_struct_or_union(p):
587 | """
588 | struct_or_union : STRUCT
589 | | UNION
590 | """
591 | construct_node(p, "struct_or_union")
592 |
593 |
594 | def p_struct_declaration_list(p):
595 | """struct_declaration_list : struct_declaration
596 | | struct_declaration_list struct_declaration
597 | """
598 | if len(p) == 2:
599 | construct_node(p, "struct_declaration_list")
600 | elif len(p) == 3:
601 | p[1].append(p[2])
602 | p[0] = p[1]
603 |
604 |
605 | def p_struct_declaration(p):
606 | """struct_declaration : specifier_qualifier_list struct_declarator_list SEMICOLON
607 | | specifier_qualifier_list struct_declarator_list error
608 | """
609 | del_list = []
610 | last_idx = len(p) - 1
611 | if p[last_idx] != ';':
612 | print("struct_declaration")
613 | del_list.append(last_idx)
614 | parser.errorCounter = 0
615 | construct_node(p, "struct_declaration", del_list)
616 |
617 |
618 | # print(p[0])
619 |
620 |
621 | def p_specifier_qualifier_list(p):
622 | """
623 | specifier_qualifier_list : type_specifier
624 | | type_specifier type_qualifier
625 | | type_qualifier type_specifier
626 | """
627 | construct_node(p, "specifier_qualifier_list")
628 |
629 |
630 | def p_struct_declarator_list(p):
631 | """
632 | struct_declarator_list : declarator
633 | | struct_declarator_list COMMA declarator
634 | """
635 | if len(p) == 2:
636 | construct_node(p, "struct_declarator_list")
637 | else:
638 | p[1].append(p[2])
639 | p[1].append(p[3])
640 | p[0] = p[1]
641 |
642 |
643 | # def p_struct_declarator(p):
644 | # """
645 | # struct_declarator : declarator
646 | # | COLON constant_expression
647 | # | declarator COLON constant_expression
648 | # """
649 | # construct_node(p, "struct_declarator")
650 |
651 |
652 | def p_enum_specifier(p):
653 | """
654 | enum_specifier : ENUM LCURLYBRACKET enumerator_list RCURLYBRACKET
655 | | ENUM IDENTIFIER LCURLYBRACKET enumerator_list RCURLYBRACKET
656 | | ENUM IDENTIFIER
657 | | ENUM ERRORID LCURLYBRACKET enumerator_list RCURLYBRACKET
658 | | ENUM ERRORID
659 | """
660 | handleErrorID(p, 2)
661 | construct_node(p, "enum_specifier")
662 |
663 |
664 | def p_enumerator_list(p):
665 | """
666 | enumerator_list : enumerator
667 | | enumerator_list COMMA enumerator
668 | """
669 | if len(p) == 2:
670 | construct_node(p, "enumerator_list")
671 | else:
672 | p[1].append(p[2])
673 | p[1].append(p[3])
674 | p[0] = p[1]
675 |
676 |
677 | def p_enumerator(p):
678 | """
679 | enumerator : IDENTIFIER
680 | | IDENTIFIER ASSIGN constant_expression
681 | | ERRORID
682 | | ERRORID ASSIGN constant_expression
683 | """
684 | handleErrorID(p, 1)
685 | construct_node(p, "enumerator")
686 |
687 |
688 | def p_type_qualifier(p):
689 | """
690 | type_qualifier : CONST
691 | """
692 | construct_node(p, "type_qualifier")
693 |
694 |
695 | def p_declarator(p):
696 | """
697 | declarator : pointer direct_declarator
698 | | direct_declarator
699 | """
700 | construct_node(p, "declarator")
701 |
702 |
703 | def p_direct_declarator(p):
704 | """
705 | direct_declarator : direct_declarator LSQUAREBRACKET constant_expression RSQUAREBRACKET
706 | | direct_declarator LSQUAREBRACKET RSQUAREBRACKET
707 | | direct_declarator LBRACKET parameter_type_list RBRACKET
708 | | direct_declarator LBRACKET RBRACKET
709 | | IDENTIFIER
710 | | LBRACKET declarator RBRACKET
711 | | ERRORID
712 | """
713 | handleErrorID(p, 1)
714 | construct_node(p, "direct_declarator")
715 |
716 |
717 | def p_pointer(p):
718 | """
719 | pointer : STAR
720 | | STAR CONST
721 | | pointer STAR
722 | | pointer STAR CONST
723 | """
724 | if p[1][0] != 'pointer':
725 | construct_node(p, "pointer")
726 | else:
727 | p[1].append(p[2])
728 | if len(p) == 4:
729 | p[1].append(p[3])
730 | p[0] = p[1]
731 |
732 |
733 | def p_type_qualifier_list(p):
734 | """
735 | type_qualifier_list : type_qualifier
736 | | type_qualifier_list type_qualifier
737 | """
738 | construct_node(p, "type_qualifier_list")
739 |
740 |
741 | def p_parameter_type_list(p):
742 | """
743 | parameter_type_list : parameter_list
744 | | parameter_list COMMA ELLIPSIS
745 | """
746 | construct_node(p, "parameter_type_list")
747 |
748 |
749 | def p_parameter_list(p):
750 | """
751 | parameter_list : parameter_declaration
752 | | parameter_list COMMA parameter_declaration
753 | """
754 | if len(p) == 2:
755 | construct_node(p, "parameter_list")
756 | else:
757 | p[1].append(p[2])
758 | p[1].append(p[3])
759 | p[0] = p[1]
760 |
761 |
762 | def p_parameter_declaration(p):
763 | """
764 | parameter_declaration : declaration_specifiers declarator
765 | | declaration_specifiers abstract_declarator
766 | | declaration_specifiers
767 | """
768 | construct_node(p, "parameter_declaration")
769 |
770 |
771 | def p_type_name(p):
772 | """
773 | type_name : specifier_qualifier_list
774 | | specifier_qualifier_list abstract_declarator
775 | """
776 | construct_node(p, "type_name")
777 |
778 |
779 | def p_abstract_declarator(p):
780 | """
781 | abstract_declarator : pointer
782 | | direct_abstract_declarator
783 | | pointer direct_abstract_declarator
784 | """
785 | construct_node(p, "abstract_declarator")
786 |
787 |
788 | def p_direct_abstract_declarator(p):
789 | """
790 | direct_abstract_declarator : LBRACKET abstract_declarator RBRACKET
791 | | LSQUAREBRACKET RSQUAREBRACKET
792 | | LSQUAREBRACKET constant_expression RSQUAREBRACKET
793 | | direct_abstract_declarator LSQUAREBRACKET RSQUAREBRACKET
794 | | direct_abstract_declarator LSQUAREBRACKET constant_expression RSQUAREBRACKET
795 | | LBRACKET RBRACKET
796 | | LBRACKET parameter_type_list RBRACKET
797 | | direct_abstract_declarator LBRACKET RBRACKET
798 | | direct_abstract_declarator LBRACKET parameter_type_list RBRACKET
799 | """
800 | construct_node(p, "direct_abstract_declarator")
801 |
802 |
803 | def p_initializer(p):
804 | """
805 | initializer : assignment_expression
806 | | LCURLYBRACKET initializer_list RCURLYBRACKET
807 | | LCURLYBRACKET initializer_list COMMA RCURLYBRACKET
808 | """
809 | construct_node(p, "initializer")
810 |
811 |
812 | def p_initiazer_list(p):
813 | """
814 | initializer_list : initializer
815 | | initializer_list COMMA initializer
816 | """
817 | if len(p) == 2:
818 | construct_node(p, "initializer_list")
819 | else:
820 | p[1].append(p[2])
821 | p[1].append(p[3])
822 | p[0] = p[1]
823 |
824 |
825 | def p_statement(p):
826 | """
827 | statement : labeled_statement
828 | | compound_statement
829 | | expression_statement
830 | | selection_statement
831 | | iteration_statement
832 | | jump_statement
833 | """
834 | construct_node(p, "statement")
835 |
836 |
837 | def p_labeled_statement(p):
838 | """
839 | labeled_statement : CASE constant_expression COLON statement
840 | | DEFAULT COLON statement
841 | """
842 | # | IDENTIFIER COLON statement
843 | # | ERRORID COLON statement
844 | # handleErrorID(p, 1)
845 | construct_node(p, "labeled_statement")
846 |
847 |
848 | def p_compound_statement(p):
849 | """
850 | compound_statement : LCURLYBRACKET RCURLYBRACKET
851 | | LCURLYBRACKET statement_list RCURLYBRACKET
852 | | LCURLYBRACKET declaration_list RCURLYBRACKET
853 | | LCURLYBRACKET declaration_list statement_list RCURLYBRACKET
854 | | LCURLYBRACKET error
855 | | LCURLYBRACKET statement_list error
856 | | LCURLYBRACKET declaration_list error
857 | | LCURLYBRACKET declaration_list statement_list error
858 | """
859 | handleMissingRCURLYBRACKET(p)
860 | construct_node(p, "compound_statement")
861 |
862 |
863 | def p_declaration_list(p):
864 | """
865 | declaration_list : declaration
866 | | declaration_list declaration
867 | """
868 | if len(p) == 2:
869 | construct_node(p, "declaration_list")
870 | elif len(p) == 3:
871 | p[1].append(p[2])
872 | p[0] = p[1]
873 |
874 |
875 | def p_statement_list(p):
876 | """
877 | statement_list : statement
878 | | statement_list statement
879 | """
880 | if len(p) == 2:
881 | construct_node(p, "statement_list")
882 | if len(p) == 3:
883 | p[1].append(p[2])
884 | p[0] = p[1]
885 |
886 |
887 | def p_expression_statement(p):
888 | """
889 | expression_statement : SEMICOLON
890 | | expression SEMICOLON
891 | | expression error
892 | """
893 |
894 | # del_list = []
895 | # last_idx = len(p) - 1
896 | # if p[last_idx] != ';':
897 | # print("expression_statement")
898 | # print("Error type: Missing semicolon before %s. at line: %d, lex pos: %d.\n" % (p[last_idx], p.lineno(last_idx), p.lexpos(last_idx)))
899 | # del_list.append(last_idx)
900 | # parser.errorCounter = 0
901 | del_list = handleMissingSEMI(p, "expression_statement")
902 | construct_node(p, "expression_statement", del_list)
903 |
904 |
905 | def p_selection_statement(p):
906 | """
907 | selection_statement : IF LBRACKET expression RBRACKET statement
908 | | IF LBRACKET expression RBRACKET statement ELSE statement
909 | | SWITCH LBRACKET expression RBRACKET statement
910 | """
911 | construct_node(p, "selection_statement")
912 |
913 |
914 | def p_iteration_statement(p):
915 | """
916 | iteration_statement : WHILE LBRACKET expression RBRACKET statement
917 | | DO statement WHILE LBRACKET expression RBRACKET SEMICOLON
918 | | DO statement WHILE LBRACKET expression RBRACKET error
919 | | FOR LBRACKET expression_statement expression_statement RBRACKET statement
920 | | FOR LBRACKET expression_statement expression_statement expression RBRACKET statement
921 | """
922 | # del_list = []
923 | # last_idx = len(p) - 1
924 | # if p[1] == 'do' and p[last_idx] != ';':
925 | # print("iteration statement")
926 | # print("Error type: Missing semicolon before %s. at line: %d, lex pos: %d.\n" % (p[last_idx], p.lineno(last_idx), p.lexpos(last_idx)))
927 | # del_list.append(last_idx)
928 | # parser.errorCounter = 0
929 | del_list = handleMissingSEMI(p, "iteration_statement", (1, 'do'))
930 | construct_node(p, "iteration_statement", del_list)
931 |
932 |
933 | # print(p[0])
934 |
935 | def p_jump_statement(p):
936 | """
937 | jump_statement : CONTINUE SEMICOLON
938 | | BREAK SEMICOLON
939 | | RETURN SEMICOLON
940 | | RETURN expression SEMICOLON
941 | | CONTINUE error
942 | | BREAK error
943 | | RETURN error
944 | | RETURN expression error
945 | """
946 | # del_list = []
947 | # last_idx = len(p) - 1
948 | # if p[last_idx] != ';':
949 | # print("jump statement")
950 | # print("Error type: Missing semicolon before %s. at line: %d, lex pos: %d.\n" % (p[last_idx], p.lineno(last_idx), p.lexpos(last_idx)))
951 | # del_list.append(last_idx)
952 | # parser.errorCounter = 0
953 | del_list = handleMissingSEMI(p, "jump_statement")
954 | construct_node(p, "jump_statement", del_list)
955 |
956 |
957 | # print(p[0])
958 |
959 | def p_function_definition(p):
960 | """
961 | function_definition : declaration_specifiers declarator compound_statement
962 | """
963 | construct_node(p, "function_definition")
964 |
965 |
966 | def p_error(p):
967 | if not p:
968 | print("End of file.")
969 | return
970 |
971 | if p.type == 'EOF':
972 | if ZCClex.lexer.lexer.curlyBalance > 0:
973 | parser.errok()
974 | return lex.LexToken(
975 | 'RCURCLYBRACKET',
976 | '}',
977 | p.lexer.lineno,
978 | p.lexer.lexpos)
979 | else:
980 | return
981 |
982 | print("Syntax error at %r, at line: %d, column: %d." % (
983 | p.value, p.lexer.lineno, ZCClex.find_column(p.lexer.lexdata, p)))
984 | if p.type == 'IDENTIFIER':
985 | print("Undefined Type " + p.value[1])
986 |
987 | if parser.errorCounter > 0:
988 | print("In panic mode\n")
989 | while True:
990 | tok = parser.token()
991 | if not tok or tok.type == 'SEMICOLON' or tok.type == 'RCURLYBRACKET':
992 | break
993 | parser.restart()
994 | else:
995 | parser.errorCounter += 1
996 | return p
997 |
998 |
999 | def printAST(p, n=0):
1000 | if p is not None:
1001 | # if type(p) is list:
1002 | if len(p) > 0 and not isinstance(p, str):
1003 | print('line:%02d' % p.lineno, end='')
1004 | print(' |' * n, end='-')
1005 | print(p[0])
1006 | for node in p[1:]:
1007 | printAST(node, n + 1)
1008 | else:
1009 | print('line:xx', end='')
1010 | print(' |' * n, end='-')
1011 | print(p)
1012 |
1013 |
1014 | parser = yacc.yacc(start='outer_translation_unit', debug=True)
1015 | parser.errorCounter = 0
1016 |
1017 | if __name__ == "__main__":
1018 | # pprint(parser.__dict__)
1019 | # while True:
1020 | # try:
1021 | # c_file_name = raw_input('c file name: ')
1022 | c_file_name = "test1.c"
1023 | c_file = open(c_file_name, "r")
1024 |
1025 | contents = "".join(c_file.readlines())
1026 | # except EOFError:
1027 | # break
1028 | # if not contents: continue
1029 | # result = parser.parse(contents, lexer = ZCClex.orig_lexer)
1030 | result = parser.parse(contents, lexer=ZCClex.lexer)
1031 | printAST(result)
1032 |
--------------------------------------------------------------------------------
/yyparse/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
--------------------------------------------------------------------------------
/yyparse/missRightCurly.c:
--------------------------------------------------------------------------------
1 | //
2 | //int a, b, c;
3 | //int main(int argc, char *argv[]) {
4 | // c = a + b;
5 | //
6 | //
7 | //int b, c;
8 |
9 | int f(){
10 |
11 | int d;
--------------------------------------------------------------------------------
/yyparse/missSEMI.c:
--------------------------------------------------------------------------------
1 |
2 | int b
3 |
4 | int main(int argc, char *argv[]) {
5 | int $a;
6 | c = a + b;
7 | d = a +/ b;
8 | d = a -/ b;
9 | d = a ^^ / b;
10 | d = a *|b;
11 | d = a >/ b;
12 | d = a b;
13 | d = a <=/ b;
14 | d = a < b;
15 | d = a ==/ b;
16 | d = a &/ b;
17 | d = a ^/ b;
18 | d = a |/ b;
19 | d = a &&| b;
20 | d = a ||| b;
21 |
22 |
23 | a = b + c
24 | printf("asdf\n")
25 | b = a + c;
26 | printf("%d\n", a);
27 |
--------------------------------------------------------------------------------
/yyparse/ply/__init__.py:
--------------------------------------------------------------------------------
1 | # PLY package
2 | # Author: David Beazley (dave@dabeaz.com)
3 |
4 | __version__ = '3.7'
5 | __all__ = ['lex','yacc']
6 |
--------------------------------------------------------------------------------
/yyparse/ply/cpp.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # cpp.py
3 | #
4 | # Author: David Beazley (http://www.dabeaz.com)
5 | # Copyright (C) 2007
6 | # All rights reserved
7 | #
8 | # This module implements an ANSI-C style lexical preprocessor for PLY.
9 | # -----------------------------------------------------------------------------
10 | from __future__ import generators
11 |
12 | # -----------------------------------------------------------------------------
13 | # Default preprocessor lexer definitions. These tokens are enough to get
14 | # a basic preprocessor working. Other modules may import these if they want
15 | # -----------------------------------------------------------------------------
16 |
17 | tokens = (
18 | 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND'
19 | )
20 |
21 | literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
22 |
23 | # Whitespace
24 | def t_CPP_WS(t):
25 | r'\s+'
26 | t.lexer.lineno += t.value.count("\n")
27 | return t
28 |
29 | t_CPP_POUND = r'\#'
30 | t_CPP_DPOUND = r'\#\#'
31 |
32 | # Identifier
33 | t_CPP_ID = r'[A-Za-z_][\w_]*'
34 |
35 | # Integer literal
36 | def CPP_INTEGER(t):
37 | r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)'
38 | return t
39 |
40 | t_CPP_INTEGER = CPP_INTEGER
41 |
42 | # Floating literal
43 | t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
44 |
45 | # String literal
46 | def t_CPP_STRING(t):
47 | r'\"([^\\\n]|(\\(.|\n)))*?\"'
48 | t.lexer.lineno += t.value.count("\n")
49 | return t
50 |
51 | # Character constant 'c' or L'c'
52 | def t_CPP_CHAR(t):
53 | r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
54 | t.lexer.lineno += t.value.count("\n")
55 | return t
56 |
57 | # Comment
58 | def t_CPP_COMMENT1(t):
59 | r'(/\*(.|\n)*?\*/)'
60 | ncr = t.value.count("\n")
61 | t.lexer.lineno += ncr
62 | # replace with one space or a number of '\n'
63 | t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' '
64 | return t
65 |
66 | # Line comment
67 | def t_CPP_COMMENT2(t):
68 | r'(//.*?(\n|$))'
69 | # replace with '/n'
70 | t.type = 'CPP_WS'; t.value = '\n'
71 |
72 | def t_error(t):
73 | t.type = t.value[0]
74 | t.value = t.value[0]
75 | t.lexer.skip(1)
76 | return t
77 |
78 | import re
79 | import copy
80 | import time
81 | import os.path
82 |
83 | # -----------------------------------------------------------------------------
84 | # trigraph()
85 | #
86 | # Given an input string, this function replaces all trigraph sequences.
87 | # The following mapping is used:
88 | #
89 | # ??= #
90 | # ??/ \
91 | # ??' ^
92 | # ??( [
93 | # ??) ]
94 | # ??! |
95 | # ??< {
96 | # ??> }
97 | # ??- ~
98 | # -----------------------------------------------------------------------------
99 |
100 | _trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
101 | _trigraph_rep = {
102 | '=':'#',
103 | '/':'\\',
104 | "'":'^',
105 | '(':'[',
106 | ')':']',
107 | '!':'|',
108 | '<':'{',
109 | '>':'}',
110 | '-':'~'
111 | }
112 |
113 | def trigraph(input):
114 | return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
115 |
116 | # ------------------------------------------------------------------
117 | # Macro object
118 | #
119 | # This object holds information about preprocessor macros
120 | #
121 | # .name - Macro name (string)
122 | # .value - Macro value (a list of tokens)
123 | # .arglist - List of argument names
124 | # .variadic - Boolean indicating whether or not variadic macro
125 | # .vararg - Name of the variadic parameter
126 | #
127 | # When a macro is created, the macro replacement token sequence is
128 | # pre-scanned and used to create patch lists that are later used
129 | # during macro expansion
130 | # ------------------------------------------------------------------
131 |
132 | class Macro(object):
133 | def __init__(self,name,value,arglist=None,variadic=False):
134 | self.name = name
135 | self.value = value
136 | self.arglist = arglist
137 | self.variadic = variadic
138 | if variadic:
139 | self.vararg = arglist[-1]
140 | self.source = None
141 |
142 | # ------------------------------------------------------------------
143 | # Preprocessor object
144 | #
145 | # Object representing a preprocessor. Contains macro definitions,
146 | # include directories, and other information
147 | # ------------------------------------------------------------------
148 |
149 | class Preprocessor(object):
150 | def __init__(self,lexer=None):
151 | if lexer is None:
152 | lexer = lex.lexer
153 | self.lexer = lexer
154 | self.macros = { }
155 | self.path = []
156 | self.temp_path = []
157 |
158 | # Probe the lexer for selected tokens
159 | self.lexprobe()
160 |
161 | tm = time.localtime()
162 | self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
163 | self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
164 | self.parser = None
165 |
166 | # -----------------------------------------------------------------------------
167 | # tokenize()
168 | #
169 | # Utility function. Given a string of text, tokenize into a list of tokens
170 | # -----------------------------------------------------------------------------
171 |
172 | def tokenize(self,text):
173 | tokens = []
174 | self.lexer.input(text)
175 | while True:
176 | tok = self.lexer.token()
177 | if not tok: break
178 | tokens.append(tok)
179 | return tokens
180 |
181 | # ---------------------------------------------------------------------
182 | # error()
183 | #
184 | # Report a preprocessor error/warning of some kind
185 | # ----------------------------------------------------------------------
186 |
187 | def error(self,file,line,msg):
188 | print("%s:%d %s" % (file,line,msg))
189 |
190 | # ----------------------------------------------------------------------
191 | # lexprobe()
192 | #
193 | # This method probes the preprocessor lexer object to discover
194 | # the token types of symbols that are important to the preprocessor.
195 | # If this works right, the preprocessor will simply "work"
196 | # with any suitable lexer regardless of how tokens have been named.
197 | # ----------------------------------------------------------------------
198 |
199 | def lexprobe(self):
200 |
201 | # Determine the token type for identifiers
202 | self.lexer.input("identifier")
203 | tok = self.lexer.token()
204 | if not tok or tok.value != "identifier":
205 | print("Couldn't determine identifier type")
206 | else:
207 | self.t_ID = tok.type
208 |
209 | # Determine the token type for integers
210 | self.lexer.input("12345")
211 | tok = self.lexer.token()
212 | if not tok or int(tok.value) != 12345:
213 | print("Couldn't determine integer type")
214 | else:
215 | self.t_INTEGER = tok.type
216 | self.t_INTEGER_TYPE = type(tok.value)
217 |
218 | # Determine the token type for strings enclosed in double quotes
219 | self.lexer.input("\"filename\"")
220 | tok = self.lexer.token()
221 | if not tok or tok.value != "\"filename\"":
222 | print("Couldn't determine string type")
223 | else:
224 | self.t_STRING = tok.type
225 |
226 | # Determine the token type for whitespace--if any
227 | self.lexer.input(" ")
228 | tok = self.lexer.token()
229 | if not tok or tok.value != " ":
230 | self.t_SPACE = None
231 | else:
232 | self.t_SPACE = tok.type
233 |
234 | # Determine the token type for newlines
235 | self.lexer.input("\n")
236 | tok = self.lexer.token()
237 | if not tok or tok.value != "\n":
238 | self.t_NEWLINE = None
239 | print("Couldn't determine token for newlines")
240 | else:
241 | self.t_NEWLINE = tok.type
242 |
243 | self.t_WS = (self.t_SPACE, self.t_NEWLINE)
244 |
245 | # Check for other characters used by the preprocessor
246 | chars = [ '<','>','#','##','\\','(',')',',','.']
247 | for c in chars:
248 | self.lexer.input(c)
249 | tok = self.lexer.token()
250 | if not tok or tok.value != c:
251 | print("Unable to lex '%s' required for preprocessor" % c)
252 |
253 | # ----------------------------------------------------------------------
254 | # add_path()
255 | #
256 | # Adds a search path to the preprocessor.
257 | # ----------------------------------------------------------------------
258 |
259 | def add_path(self,path):
260 | self.path.append(path)
261 |
262 | # ----------------------------------------------------------------------
263 | # group_lines()
264 | #
265 | # Given an input string, this function splits it into lines. Trailing whitespace
266 | # is removed. Any line ending with \ is grouped with the next line. This
267 | # function forms the lowest level of the preprocessor---grouping into text into
268 | # a line-by-line format.
269 | # ----------------------------------------------------------------------
270 |
271 | def group_lines(self,input):
272 | lex = self.lexer.clone()
273 | lines = [x.rstrip() for x in input.splitlines()]
274 | for i in xrange(len(lines)):
275 | j = i+1
276 | while lines[i].endswith('\\') and (j < len(lines)):
277 | lines[i] = lines[i][:-1]+lines[j]
278 | lines[j] = ""
279 | j += 1
280 |
281 | input = "\n".join(lines)
282 | lex.input(input)
283 | lex.lineno = 1
284 |
285 | current_line = []
286 | while True:
287 | tok = lex.token()
288 | if not tok:
289 | break
290 | current_line.append(tok)
291 | if tok.type in self.t_WS and '\n' in tok.value:
292 | yield current_line
293 | current_line = []
294 |
295 | if current_line:
296 | yield current_line
297 |
298 | # ----------------------------------------------------------------------
299 | # tokenstrip()
300 | #
301 | # Remove leading/trailing whitespace tokens from a token list
302 | # ----------------------------------------------------------------------
303 |
304 | def tokenstrip(self,tokens):
305 | i = 0
306 | while i < len(tokens) and tokens[i].type in self.t_WS:
307 | i += 1
308 | del tokens[:i]
309 | i = len(tokens)-1
310 | while i >= 0 and tokens[i].type in self.t_WS:
311 | i -= 1
312 | del tokens[i+1:]
313 | return tokens
314 |
315 |
316 | # ----------------------------------------------------------------------
317 | # collect_args()
318 | #
319 | # Collects comma separated arguments from a list of tokens. The arguments
320 | # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions)
321 | # where tokencount is the number of tokens consumed, args is a list of arguments,
322 | # and positions is a list of integers containing the starting index of each
323 | # argument. Each argument is represented by a list of tokens.
324 | #
325 | # When collecting arguments, leading and trailing whitespace is removed
326 | # from each argument.
327 | #
328 | # This function properly handles nested parenthesis and commas---these do not
329 | # define new arguments.
330 | # ----------------------------------------------------------------------
331 |
332 | def collect_args(self,tokenlist):
333 | args = []
334 | positions = []
335 | current_arg = []
336 | nesting = 1
337 | tokenlen = len(tokenlist)
338 |
339 | # Search for the opening '('.
340 | i = 0
341 | while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
342 | i += 1
343 |
344 | if (i < tokenlen) and (tokenlist[i].value == '('):
345 | positions.append(i+1)
346 | else:
347 | self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
348 | return 0, [], []
349 |
350 | i += 1
351 |
352 | while i < tokenlen:
353 | t = tokenlist[i]
354 | if t.value == '(':
355 | current_arg.append(t)
356 | nesting += 1
357 | elif t.value == ')':
358 | nesting -= 1
359 | if nesting == 0:
360 | if current_arg:
361 | args.append(self.tokenstrip(current_arg))
362 | positions.append(i)
363 | return i+1,args,positions
364 | current_arg.append(t)
365 | elif t.value == ',' and nesting == 1:
366 | args.append(self.tokenstrip(current_arg))
367 | positions.append(i+1)
368 | current_arg = []
369 | else:
370 | current_arg.append(t)
371 | i += 1
372 |
373 | # Missing end argument
374 | self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
375 | return 0, [],[]
376 |
377 | # ----------------------------------------------------------------------
378 | # macro_prescan()
379 | #
380 | # Examine the macro value (token sequence) and identify patch points
381 | # This is used to speed up macro expansion later on---we'll know
382 | # right away where to apply patches to the value to form the expansion
383 | # ----------------------------------------------------------------------
384 |
385 | def macro_prescan(self,macro):
386 | macro.patch = [] # Standard macro arguments
387 | macro.str_patch = [] # String conversion expansion
388 | macro.var_comma_patch = [] # Variadic macro comma patch
389 | i = 0
390 | while i < len(macro.value):
391 | if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
392 | argnum = macro.arglist.index(macro.value[i].value)
393 | # Conversion of argument to a string
394 | if i > 0 and macro.value[i-1].value == '#':
395 | macro.value[i] = copy.copy(macro.value[i])
396 | macro.value[i].type = self.t_STRING
397 | del macro.value[i-1]
398 | macro.str_patch.append((argnum,i-1))
399 | continue
400 | # Concatenation
401 | elif (i > 0 and macro.value[i-1].value == '##'):
402 | macro.patch.append(('c',argnum,i-1))
403 | del macro.value[i-1]
404 | continue
405 | elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
406 | macro.patch.append(('c',argnum,i))
407 | i += 1
408 | continue
409 | # Standard expansion
410 | else:
411 | macro.patch.append(('e',argnum,i))
412 | elif macro.value[i].value == '##':
413 | if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
414 | ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
415 | (macro.value[i+1].value == macro.vararg):
416 | macro.var_comma_patch.append(i-1)
417 | i += 1
418 | macro.patch.sort(key=lambda x: x[2],reverse=True)
419 |
420 | # ----------------------------------------------------------------------
421 | # macro_expand_args()
422 | #
423 | # Given a Macro and list of arguments (each a token list), this method
424 | # returns an expanded version of a macro. The return value is a token sequence
425 | # representing the replacement macro tokens
426 | # ----------------------------------------------------------------------
427 |
428 | def macro_expand_args(self,macro,args):
429 | # Make a copy of the macro token sequence
430 | rep = [copy.copy(_x) for _x in macro.value]
431 |
432 | # Make string expansion patches. These do not alter the length of the replacement sequence
433 |
434 | str_expansion = {}
435 | for argnum, i in macro.str_patch:
436 | if argnum not in str_expansion:
437 | str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
438 | rep[i] = copy.copy(rep[i])
439 | rep[i].value = str_expansion[argnum]
440 |
441 | # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid
442 | comma_patch = False
443 | if macro.variadic and not args[-1]:
444 | for i in macro.var_comma_patch:
445 | rep[i] = None
446 | comma_patch = True
447 |
448 | # Make all other patches. The order of these matters. It is assumed that the patch list
449 | # has been sorted in reverse order of patch location since replacements will cause the
450 | # size of the replacement sequence to expand from the patch point.
451 |
452 | expanded = { }
453 | for ptype, argnum, i in macro.patch:
454 | # Concatenation. Argument is left unexpanded
455 | if ptype == 'c':
456 | rep[i:i+1] = args[argnum]
457 | # Normal expansion. Argument is macro expanded first
458 | elif ptype == 'e':
459 | if argnum not in expanded:
460 | expanded[argnum] = self.expand_macros(args[argnum])
461 | rep[i:i+1] = expanded[argnum]
462 |
463 | # Get rid of removed comma if necessary
464 | if comma_patch:
465 | rep = [_i for _i in rep if _i]
466 |
467 | return rep
468 |
469 |
470 | # ----------------------------------------------------------------------
471 | # expand_macros()
472 | #
473 | # Given a list of tokens, this function performs macro expansion.
474 | # The expanded argument is a dictionary that contains macros already
475 | # expanded. This is used to prevent infinite recursion.
476 | # ----------------------------------------------------------------------
477 |
478 | def expand_macros(self,tokens,expanded=None):
479 | if expanded is None:
480 | expanded = {}
481 | i = 0
482 | while i < len(tokens):
483 | t = tokens[i]
484 | if t.type == self.t_ID:
485 | if t.value in self.macros and t.value not in expanded:
486 | # Yes, we found a macro match
487 | expanded[t.value] = True
488 |
489 | m = self.macros[t.value]
490 | if not m.arglist:
491 | # A simple macro
492 | ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
493 | for e in ex:
494 | e.lineno = t.lineno
495 | tokens[i:i+1] = ex
496 | i += len(ex)
497 | else:
498 | # A macro with arguments
499 | j = i + 1
500 | while j < len(tokens) and tokens[j].type in self.t_WS:
501 | j += 1
502 | if tokens[j].value == '(':
503 | tokcount,args,positions = self.collect_args(tokens[j:])
504 | if not m.variadic and len(args) != len(m.arglist):
505 | self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
506 | i = j + tokcount
507 | elif m.variadic and len(args) < len(m.arglist)-1:
508 | if len(m.arglist) > 2:
509 | self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
510 | else:
511 | self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
512 | i = j + tokcount
513 | else:
514 | if m.variadic:
515 | if len(args) == len(m.arglist)-1:
516 | args.append([])
517 | else:
518 | args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
519 | del args[len(m.arglist):]
520 |
521 | # Get macro replacement text
522 | rep = self.macro_expand_args(m,args)
523 | rep = self.expand_macros(rep,expanded)
524 | for r in rep:
525 | r.lineno = t.lineno
526 | tokens[i:j+tokcount] = rep
527 | i += len(rep)
528 | del expanded[t.value]
529 | continue
530 | elif t.value == '__LINE__':
531 | t.type = self.t_INTEGER
532 | t.value = self.t_INTEGER_TYPE(t.lineno)
533 |
534 | i += 1
535 | return tokens
536 |
537 | # ----------------------------------------------------------------------
538 | # evalexpr()
539 | #
540 | # Evaluate an expression token sequence for the purposes of evaluating
541 | # integral expressions.
542 | # ----------------------------------------------------------------------
543 |
544 | def evalexpr(self,tokens):
545 | # tokens = tokenize(line)
546 | # Search for defined macros
547 | i = 0
548 | while i < len(tokens):
549 | if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
550 | j = i + 1
551 | needparen = False
552 | result = "0L"
553 | while j < len(tokens):
554 | if tokens[j].type in self.t_WS:
555 | j += 1
556 | continue
557 | elif tokens[j].type == self.t_ID:
558 | if tokens[j].value in self.macros:
559 | result = "1L"
560 | else:
561 | result = "0L"
562 | if not needparen: break
563 | elif tokens[j].value == '(':
564 | needparen = True
565 | elif tokens[j].value == ')':
566 | break
567 | else:
568 | self.error(self.source,tokens[i].lineno,"Malformed defined()")
569 | j += 1
570 | tokens[i].type = self.t_INTEGER
571 | tokens[i].value = self.t_INTEGER_TYPE(result)
572 | del tokens[i+1:j+1]
573 | i += 1
574 | tokens = self.expand_macros(tokens)
575 | for i,t in enumerate(tokens):
576 | if t.type == self.t_ID:
577 | tokens[i] = copy.copy(t)
578 | tokens[i].type = self.t_INTEGER
579 | tokens[i].value = self.t_INTEGER_TYPE("0L")
580 | elif t.type == self.t_INTEGER:
581 | tokens[i] = copy.copy(t)
582 | # Strip off any trailing suffixes
583 | tokens[i].value = str(tokens[i].value)
584 | while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
585 | tokens[i].value = tokens[i].value[:-1]
586 |
587 | expr = "".join([str(x.value) for x in tokens])
588 | expr = expr.replace("&&"," and ")
589 | expr = expr.replace("||"," or ")
590 | expr = expr.replace("!"," not ")
591 | try:
592 | result = eval(expr)
593 | except StandardError:
594 | self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
595 | result = 0
596 | return result
597 |
598 | # ----------------------------------------------------------------------
599 | # parsegen()
600 | #
601 | # Parse an input string/
602 | # ----------------------------------------------------------------------
603 | def parsegen(self,input,source=None):
604 |
605 | # Replace trigraph sequences
606 | t = trigraph(input)
607 | lines = self.group_lines(t)
608 |
609 | if not source:
610 | source = ""
611 |
612 | self.define("__FILE__ \"%s\"" % source)
613 |
614 | self.source = source
615 | chunk = []
616 | enable = True
617 | iftrigger = False
618 | ifstack = []
619 |
620 | for x in lines:
621 | for i,tok in enumerate(x):
622 | if tok.type not in self.t_WS: break
623 | if tok.value == '#':
624 | # Preprocessor directive
625 |
626 | # insert necessary whitespace instead of eaten tokens
627 | for tok in x:
628 | if tok.type in self.t_WS and '\n' in tok.value:
629 | chunk.append(tok)
630 |
631 | dirtokens = self.tokenstrip(x[i+1:])
632 | if dirtokens:
633 | name = dirtokens[0].value
634 | args = self.tokenstrip(dirtokens[1:])
635 | else:
636 | name = ""
637 | args = []
638 |
639 | if name == 'define':
640 | if enable:
641 | for tok in self.expand_macros(chunk):
642 | yield tok
643 | chunk = []
644 | self.define(args)
645 | elif name == 'include':
646 | if enable:
647 | for tok in self.expand_macros(chunk):
648 | yield tok
649 | chunk = []
650 | oldfile = self.macros['__FILE__']
651 | for tok in self.include(args):
652 | yield tok
653 | self.macros['__FILE__'] = oldfile
654 | self.source = source
655 | elif name == 'undef':
656 | if enable:
657 | for tok in self.expand_macros(chunk):
658 | yield tok
659 | chunk = []
660 | self.undef(args)
661 | elif name == 'ifdef':
662 | ifstack.append((enable,iftrigger))
663 | if enable:
664 | if not args[0].value in self.macros:
665 | enable = False
666 | iftrigger = False
667 | else:
668 | iftrigger = True
669 | elif name == 'ifndef':
670 | ifstack.append((enable,iftrigger))
671 | if enable:
672 | if args[0].value in self.macros:
673 | enable = False
674 | iftrigger = False
675 | else:
676 | iftrigger = True
677 | elif name == 'if':
678 | ifstack.append((enable,iftrigger))
679 | if enable:
680 | result = self.evalexpr(args)
681 | if not result:
682 | enable = False
683 | iftrigger = False
684 | else:
685 | iftrigger = True
686 | elif name == 'elif':
687 | if ifstack:
688 | if ifstack[-1][0]: # We only pay attention if outer "if" allows this
689 | if enable: # If already true, we flip enable False
690 | enable = False
691 | elif not iftrigger: # If False, but not triggered yet, we'll check expression
692 | result = self.evalexpr(args)
693 | if result:
694 | enable = True
695 | iftrigger = True
696 | else:
697 | self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
698 |
699 | elif name == 'else':
700 | if ifstack:
701 | if ifstack[-1][0]:
702 | if enable:
703 | enable = False
704 | elif not iftrigger:
705 | enable = True
706 | iftrigger = True
707 | else:
708 | self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
709 |
710 | elif name == 'endif':
711 | if ifstack:
712 | enable,iftrigger = ifstack.pop()
713 | else:
714 | self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
715 | else:
716 | # Unknown preprocessor directive
717 | pass
718 |
719 | else:
720 | # Normal text
721 | if enable:
722 | chunk.extend(x)
723 |
724 | for tok in self.expand_macros(chunk):
725 | yield tok
726 | chunk = []
727 |
728 | # ----------------------------------------------------------------------
729 | # include()
730 | #
731 | # Implementation of file-inclusion
732 | # ----------------------------------------------------------------------
733 |
734 | def include(self,tokens):
735 | # Try to extract the filename and then process an include file
736 | if not tokens:
737 | return
738 | if tokens:
739 | if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
740 | tokens = self.expand_macros(tokens)
741 |
742 | if tokens[0].value == '<':
743 | # Include <...>
744 | i = 1
745 | while i < len(tokens):
746 | if tokens[i].value == '>':
747 | break
748 | i += 1
749 | else:
750 | print("Malformed #include <...>")
751 | return
752 | filename = "".join([x.value for x in tokens[1:i]])
753 | path = self.path + [""] + self.temp_path
754 | elif tokens[0].type == self.t_STRING:
755 | filename = tokens[0].value[1:-1]
756 | path = self.temp_path + [""] + self.path
757 | else:
758 | print("Malformed #include statement")
759 | return
760 | for p in path:
761 | iname = os.path.join(p,filename)
762 | try:
763 | data = open(iname,"r").read()
764 | dname = os.path.dirname(iname)
765 | if dname:
766 | self.temp_path.insert(0,dname)
767 | for tok in self.parsegen(data,filename):
768 | yield tok
769 | if dname:
770 | del self.temp_path[0]
771 | break
772 | except IOError:
773 | pass
774 | else:
775 | print("Couldn't find '%s'" % filename)
776 |
777 | # ----------------------------------------------------------------------
778 | # define()
779 | #
780 | # Define a new macro
781 | # ----------------------------------------------------------------------
782 |
783 | def define(self,tokens):
784 | if isinstance(tokens,(str,unicode)):
785 | tokens = self.tokenize(tokens)
786 |
787 | linetok = tokens
788 | try:
789 | name = linetok[0]
790 | if len(linetok) > 1:
791 | mtype = linetok[1]
792 | else:
793 | mtype = None
794 | if not mtype:
795 | m = Macro(name.value,[])
796 | self.macros[name.value] = m
797 | elif mtype.type in self.t_WS:
798 | # A normal macro
799 | m = Macro(name.value,self.tokenstrip(linetok[2:]))
800 | self.macros[name.value] = m
801 | elif mtype.value == '(':
802 | # A macro with arguments
803 | tokcount, args, positions = self.collect_args(linetok[1:])
804 | variadic = False
805 | for a in args:
806 | if variadic:
807 | print("No more arguments may follow a variadic argument")
808 | break
809 | astr = "".join([str(_i.value) for _i in a])
810 | if astr == "...":
811 | variadic = True
812 | a[0].type = self.t_ID
813 | a[0].value = '__VA_ARGS__'
814 | variadic = True
815 | del a[1:]
816 | continue
817 | elif astr[-3:] == "..." and a[0].type == self.t_ID:
818 | variadic = True
819 | del a[1:]
820 | # If, for some reason, "." is part of the identifier, strip off the name for the purposes
821 | # of macro expansion
822 | if a[0].value[-3:] == '...':
823 | a[0].value = a[0].value[:-3]
824 | continue
825 | if len(a) > 1 or a[0].type != self.t_ID:
826 | print("Invalid macro argument")
827 | break
828 | else:
829 | mvalue = self.tokenstrip(linetok[1+tokcount:])
830 | i = 0
831 | while i < len(mvalue):
832 | if i+1 < len(mvalue):
833 | if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
834 | del mvalue[i]
835 | continue
836 | elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
837 | del mvalue[i+1]
838 | i += 1
839 | m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
840 | self.macro_prescan(m)
841 | self.macros[name.value] = m
842 | else:
843 | print("Bad macro definition")
844 | except LookupError:
845 | print("Bad macro definition")
846 |
847 | # ----------------------------------------------------------------------
848 | # undef()
849 | #
850 | # Undefine a macro
851 | # ----------------------------------------------------------------------
852 |
853 | def undef(self,tokens):
854 | id = tokens[0].value
855 | try:
856 | del self.macros[id]
857 | except LookupError:
858 | pass
859 |
860 | # ----------------------------------------------------------------------
861 | # parse()
862 | #
863 | # Parse input text.
864 | # ----------------------------------------------------------------------
865 | def parse(self,input,source=None,ignore={}):
866 | self.ignore = ignore
867 | self.parser = self.parsegen(input,source)
868 |
869 | # ----------------------------------------------------------------------
870 | # token()
871 | #
872 | # Method to return individual tokens
873 | # ----------------------------------------------------------------------
874 | def token(self):
875 | try:
876 | while True:
877 | tok = next(self.parser)
878 | if tok.type not in self.ignore: return tok
879 | except StopIteration:
880 | self.parser = None
881 | return None
882 |
883 | if __name__ == '__main__':
884 | import ply.lex as lex
885 | lexer = lex.lex()
886 |
887 | # Run a preprocessor
888 | import sys
889 | f = open(sys.argv[1])
890 | input = f.read()
891 |
892 | p = Preprocessor(lexer)
893 | p.parse(input,sys.argv[1])
894 | while True:
895 | tok = p.token()
896 | if not tok: break
897 | print(p.source, tok)
898 |
899 |
900 |
901 |
902 |
903 |
904 |
905 |
906 |
907 |
908 |
909 |
--------------------------------------------------------------------------------
/yyparse/ply/ctokens.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # ctokens.py
3 | #
4 | # Token specifications for symbols in ANSI C and C++. This file is
5 | # meant to be used as a library in other tokenizers.
6 | # ----------------------------------------------------------------------
7 |
8 | # Reserved words
9 |
10 | tokens = [
11 | # Literals (identifier, integer constant, float constant, string constant, char const)
12 | 'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER',
13 |
14 | # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
15 | 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO',
16 | 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
17 | 'LOR', 'LAND', 'LNOT',
18 | 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
19 |
20 | # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
21 | 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
22 | 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
23 |
24 | # Increment/decrement (++,--)
25 | 'INCREMENT', 'DECREMENT',
26 |
27 | # Structure dereference (->)
28 | 'ARROW',
29 |
30 | # Ternary operator (?)
31 | 'TERNARY',
32 |
33 | # Delimeters ( ) [ ] { } , . ; :
34 | 'LPAREN', 'RPAREN',
35 | 'LBRACKET', 'RBRACKET',
36 | 'LBRACE', 'RBRACE',
37 | 'COMMA', 'PERIOD', 'SEMI', 'COLON',
38 |
39 | # Ellipsis (...)
40 | 'ELLIPSIS',
41 | ]
42 |
43 | # Operators
44 | t_PLUS = r'\+'
45 | t_MINUS = r'-'
46 | t_TIMES = r'\*'
47 | t_DIVIDE = r'/'
48 | t_MODULO = r'%'
49 | t_OR = r'\|'
50 | t_AND = r'&'
51 | t_NOT = r'~'
52 | t_XOR = r'\^'
53 | t_LSHIFT = r'<<'
54 | t_RSHIFT = r'>>'
55 | t_LOR = r'\|\|'
56 | t_LAND = r'&&'
57 | t_LNOT = r'!'
58 | t_LT = r'<'
59 | t_GT = r'>'
60 | t_LE = r'<='
61 | t_GE = r'>='
62 | t_EQ = r'=='
63 | t_NE = r'!='
64 |
65 | # Assignment operators
66 |
67 | t_EQUALS = r'='
68 | t_TIMESEQUAL = r'\*='
69 | t_DIVEQUAL = r'/='
70 | t_MODEQUAL = r'%='
71 | t_PLUSEQUAL = r'\+='
72 | t_MINUSEQUAL = r'-='
73 | t_LSHIFTEQUAL = r'<<='
74 | t_RSHIFTEQUAL = r'>>='
75 | t_ANDEQUAL = r'&='
76 | t_OREQUAL = r'\|='
77 | t_XOREQUAL = r'\^='
78 |
79 | # Increment/decrement
80 | t_INCREMENT = r'\+\+'
81 | t_DECREMENT = r'--'
82 |
83 | # ->
84 | t_ARROW = r'->'
85 |
86 | # ?
87 | t_TERNARY = r'\?'
88 |
89 | # Delimeters
90 | t_LPAREN = r'\('
91 | t_RPAREN = r'\)'
92 | t_LBRACKET = r'\['
93 | t_RBRACKET = r'\]'
94 | t_LBRACE = r'\{'
95 | t_RBRACE = r'\}'
96 | t_COMMA = r','
97 | t_PERIOD = r'\.'
98 | t_SEMI = r';'
99 | t_COLON = r':'
100 | t_ELLIPSIS = r'\.\.\.'
101 |
102 | # Identifiers
103 | t_ID = r'[A-Za-z_][A-Za-z0-9_]*'
104 |
105 | # Integer literal
106 | t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
107 |
108 | # Floating literal
109 | t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
110 |
111 | # String literal
112 | t_STRING = r'\"([^\\\n]|(\\.))*?\"'
113 |
114 | # Character constant 'c' or L'c'
115 | t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\''
116 |
117 | # Comment (C-Style)
118 | def t_COMMENT(t):
119 | r'/\*(.|\n)*?\*/'
120 | t.lexer.lineno += t.value.count('\n')
121 | return t
122 |
123 | # Comment (C++-Style)
124 | def t_CPPCOMMENT(t):
125 | r'//.*\n'
126 | t.lexer.lineno += 1
127 | return t
128 |
129 |
130 |
131 |
132 |
133 |
134 |
--------------------------------------------------------------------------------
/yyparse/ply/ygen.py:
--------------------------------------------------------------------------------
1 | # ply: ygen.py
2 | #
3 | # This is a support program that auto-generates different versions of the YACC parsing
4 | # function with different features removed for the purposes of performance.
5 | #
6 | # Users should edit the method LParser.parsedebug() in yacc.py. The source code
7 | # for that method is then used to create the other methods. See the comments in
8 | # yacc.py for further details.
9 |
10 | import os.path
11 | import shutil
12 |
13 | def get_source_range(lines, tag):
14 | srclines = enumerate(lines)
15 | start_tag = '#--! %s-start' % tag
16 | end_tag = '#--! %s-end' % tag
17 |
18 | for start_index, line in srclines:
19 | if line.strip().startswith(start_tag):
20 | break
21 |
22 | for end_index, line in srclines:
23 | if line.strip().endswith(end_tag):
24 | break
25 |
26 | return (start_index + 1, end_index)
27 |
28 | def filter_section(lines, tag):
29 | filtered_lines = []
30 | include = True
31 | tag_text = '#--! %s' % tag
32 | for line in lines:
33 | if line.strip().startswith(tag_text):
34 | include = not include
35 | elif include:
36 | filtered_lines.append(line)
37 | return filtered_lines
38 |
39 | def main():
40 | dirname = os.path.dirname(__file__)
41 | shutil.copy2(os.path.join(dirname, 'yacc.py'), os.path.join(dirname, 'yacc.py.bak'))
42 | with open(os.path.join(dirname, 'yacc.py'), 'r') as f:
43 | lines = f.readlines()
44 |
45 | parse_start, parse_end = get_source_range(lines, 'parsedebug')
46 | parseopt_start, parseopt_end = get_source_range(lines, 'parseopt')
47 | parseopt_notrack_start, parseopt_notrack_end = get_source_range(lines, 'parseopt-notrack')
48 |
49 | # Get the original source
50 | orig_lines = lines[parse_start:parse_end]
51 |
52 | # Filter the DEBUG sections out
53 | parseopt_lines = filter_section(orig_lines, 'DEBUG')
54 |
55 | # Filter the TRACKING sections out
56 | parseopt_notrack_lines = filter_section(parseopt_lines, 'TRACKING')
57 |
58 | # Replace the parser source sections with updated versions
59 | lines[parseopt_notrack_start:parseopt_notrack_end] = parseopt_notrack_lines
60 | lines[parseopt_start:parseopt_end] = parseopt_lines
61 |
62 | lines = [line.rstrip()+'\n' for line in lines]
63 | with open(os.path.join(dirname, 'yacc.py'), 'w') as f:
64 | f.writelines(lines)
65 |
66 | print('Updated yacc.py')
67 |
68 | if __name__ == '__main__':
69 | main()
70 |
71 |
72 |
73 |
74 |
75 |
--------------------------------------------------------------------------------
/yyparse/test1.c:
--------------------------------------------------------------------------------
1 | extern x;
2 | enum Boolean
3 | {
4 | false,
5 | true
6 | };
7 |
8 | typedef struct{
9 | int a;
10 | double c;
11 | }mytype;
12 |
13 | void fff(){
14 | int asdf = 1;
15 | }
16 |
17 | int main(int argc, char *argv[]) {
18 | int a, c;
19 | double b;
20 | int i;
21 | char ch;
22 | long f = 122L;
23 | const unsigned short g = 1;
24 | // double b = 12.3E2;
25 | printf("a + b = c\n");
26 | printf("%d\n", sizeof(int));
27 |
28 | b = 12.3E2;
29 | b = 12.3 + 345 - 1. * 0.9999;
30 | c = 345;
31 |
32 | if (1) {
33 | a = b;
34 | }else if(0){
35 | c = 1;
36 | }else {
37 | b = a *c;}
38 |
39 | for (i = 0; i < 10; i++) {
40 | a += c?1:2;
41 | }
42 |
43 | do {
44 | a >>= 1;
45 | if (a < 0) {
46 | break;
47 | }else {
48 | continue;
49 | }
50 | } while (1);
51 |
52 | while (a) {
53 | a--;
54 | getc_unlocked(a);
55 | }
56 |
57 | switch (ch) {
58 | case 'a':
59 | case 'b':
60 | break;
61 | case 'c':
62 | putchar(ch);
63 | case 'd':
64 | default:
65 | break;
66 | }
67 |
68 | a = 1;
69 | }
--------------------------------------------------------------------------------
/yyparse/test2.c:
--------------------------------------------------------------------------------
1 |
2 | int a;
3 | a + b = c;
4 | int c;
--------------------------------------------------------------------------------
/yyparse/testChar.c:
--------------------------------------------------------------------------------
1 | 0
2 | 123
3 | 12.4
4 | 1.
5 | .123
6 | 1.3E1
7 | 1.3e0
8 | 2e10
9 |
10 |
11 | ()[]{};.,&*+-~!/%<>^|?:=
--------------------------------------------------------------------------------