├── .gitignore ├── .vscode ├── .browse.VC.db ├── launch.json ├── settings.json └── tasks.json ├── ANSI C grammar (Yacc).html ├── Class notes in Chinese.txt ├── LICENSE ├── Presentation scripts ├── README.md ├── ZCC.bnf ├── bnf.tmp ├── generation ├── __init__.py ├── data.py ├── generation.py └── utility.py ├── main.py ├── public ├── ZCCglobal.py ├── __init__.py └── const.py ├── symbol ├── .gitignore ├── __init__.py └── symtab.py ├── test ├── a.s ├── array.c ├── basic.c ├── basic.i ├── basic1.c ├── basic2.c ├── basic3.c ├── errorID.c ├── error_info.c ├── error_pos.c ├── missRightCurly.c ├── missSEMI.c ├── multi_int.c ├── out.txt ├── pointer.c ├── source_code_optimization.c ├── stdio.h ├── struct.c ├── test1.c ├── test1.s ├── test4.c ├── test4.s ├── test4_2.c ├── test4_2.s ├── test9.c └── test9.s └── yyparse ├── .gitignore ├── ZCClex.py ├── ZCCparser.py ├── __init__.py ├── missRightCurly.c ├── missSEMI.c ├── ply ├── __init__.py ├── cpp.py ├── ctokens.py ├── lex.py ├── yacc.py └── ygen.py ├── test1.c ├── test2.c └── testChar.c /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | *.c 9 | *.exe 10 | test/*.s 11 | test/*.out 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *,cover 50 | .hypothesis/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | 59 | # Sphinx documentation 60 | docs/_build/ 61 | 62 | # PyBuilder 63 | target/ 64 | 65 | #Ipython Notebook 66 | .ipynb_checkpoints 67 | 68 | #PyCharm 69 | 70 | !/test/*.c 71 | .idea/ 72 | -------------------------------------------------------------------------------- /.vscode/.browse.VC.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hlFu/ZCC/811bd987c9d7a3754f7bc9c9d986359ee3df7327/.vscode/.browse.VC.db -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Python", 6 | "type": "python", 7 | "request": "launch", 8 | "stopOnEntry": true, 9 | "program": "${file}", 10 | "debugOptions": [ 11 | "WaitOnAbnormalExit", 12 | "WaitOnNormalExit", 13 | "RedirectOutput" 14 | ] 15 | }, 16 | { 17 | "name": "Python Console App", 18 | "type": "python", 19 | "request": "launch", 20 | "stopOnEntry": true, 21 | "program": "${file}", 22 | "externalConsole": true, 23 | "debugOptions": [ 24 | "WaitOnAbnormalExit", 25 | "WaitOnNormalExit" 26 | ] 27 | }, 28 | { 29 | "name": "Django", 30 | "type": "python", 31 | "request": "launch", 32 | "stopOnEntry": true, 33 | "program": "${workspaceRoot}/manage.py", 34 | "args": [ 35 | "runserver", 36 | "--noreload" 37 | ], 38 | "debugOptions": [ 39 | "WaitOnAbnormalExit", 40 | "WaitOnNormalExit", 41 | "RedirectOutput", 42 | "DjangoDebugging" 43 | ] 44 | }, 45 | { 46 | "name": "Watson", 47 | "type": "python", 48 | "request": "launch", 49 | "stopOnEntry": true, 50 | "program": "${workspaceRoot}/console.py", 51 | "args": [ 52 | "dev", 53 | "runserver", 54 | "--noreload=True" 55 | ], 56 | "debugOptions": [ 57 | "WaitOnAbnormalExit", 58 | "WaitOnNormalExit", 59 | "RedirectOutput" 60 | ] 61 | } 62 | ] 63 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | // 将设置放入此文件中以覆盖默认值和用户设置。 2 | { 3 | } -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | // { 2 | // // See https://go.microsoft.com/fwlink/?LinkId=733558 3 | // // for the documentation about the tasks.json format 4 | // "version": "0.1.0", 5 | // "command": "tsc", 6 | // "isShellCommand": true, 7 | // "args": ["-p", "."], 8 | // "showOutput": "silent", 9 | // "problemMatcher": "$tsc" 10 | // } 11 | { 12 | "version": "0.1.0", 13 | 14 | // The command is tsc. Assumes that tsc has been installed using npm install -g typescript 15 | "command": "python.exe", 16 | 17 | // The command is a shell script 18 | "isShellCommand": true, 19 | 20 | // Show the output window only if unrecognized errors occur. 21 | "showOutput": "always", 22 | 23 | // args is the HelloWorld program to compile. 24 | "args": ["${file}"] 25 | } -------------------------------------------------------------------------------- /ANSI C grammar (Yacc).html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ANSI C grammar (Yacc) 5 | 6 | 20 | 21 |

ANSI C Yacc grammar

22 | 23 | In 1985, Jeff Lee published his Yacc grammar (which is 24 | accompanied by a matching Lex specification) 25 | for the April 30, 1985 draft version of the 26 | ANSI C standard.  Tom Stockfisch reposted 27 | it to net.sources in 1987; that original, as mentioned in 28 | the answer to question 17.25 29 | of the comp.lang.c FAQ, can be ftp'ed from ftp.uu.net, file 30 | usenet/net.sources/ansi.c.grammar.Z. 31 |

32 | Jutta Degener, 1995 33 |

34 |


35 |
%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
 36 | %token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
 37 | %token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
 38 | %token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
 39 | %token XOR_ASSIGN OR_ASSIGN TYPE_NAME
 40 | 
 41 | %token TYPEDEF EXTERN STATIC AUTO REGISTER
 42 | %token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
 43 | %token STRUCT UNION ENUM ELLIPSIS
 44 | 
 45 | %token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
 46 | 
 47 | %start translation_unit
 48 | %%
 49 | 
 50 | IDENTIFIER
 51 | 	: IDENTIFIER_NAME
 52 | 
 53 | INTEGER
 54 | 	: ORIGINAL_CODE
 55 | 
 56 | DOUBLE
 57 | 	: ORIGINAL_CODE
 58 | 
 59 | STRING
 60 | 	: ORIGINAL_CODE
 61 | 
 62 | primary_expression
 63 | 	: IDENTIFIER
 64 | 	| INTEGER
 65 | 	| DOUBLE
 66 | 	| STRING
 67 | 	| '(' expression ')'
 68 | 	;
 69 | 
 70 | postfix_expression
 71 | 	: primary_expression
 72 | 	| postfix_expression '[' expression ']'
 73 | 	| postfix_expression '(' ')'
 74 | 	| postfix_expression '(' argument_expression_list ')'
 75 | 	| postfix_expression '.' IDENTIFIER
 76 | 	| postfix_expression PTR_OP IDENTIFIER
 77 | 	| postfix_expression INC_OP
 78 | 	| postfix_expression DEC_OP
 79 | 	;
 80 | 
 81 | argument_expression_list
 82 | 	: assignment_expression
 83 | 	| argument_expression_list ',' assignment_expression
 84 | 	;
 85 | 
 86 | unary_expression
 87 | 	: postfix_expression
 88 | 	| INC_OP unary_expression
 89 | 	| DEC_OP unary_expression
 90 | 	| unary_operator cast_expression
 91 | 	| SIZEOF unary_expression
 92 | 	| SIZEOF '(' type_name ')'
 93 | 	;
 94 | 
 95 | unary_operator
 96 | 	: '&'
 97 | 	| '*'
 98 | 	| '+'
 99 | 	| '-'
100 | 	| '~'
101 | 	| '!'
102 | 	;
103 | 
104 | cast_expression
105 | 	: unary_expression
106 | 	| '(' type_name ')' cast_expression
107 | 	;
108 | 
109 | multiplicative_expression
110 | 	: cast_expression
111 | 	| multiplicative_expression '*' cast_expression
112 | 	| multiplicative_expression '/' cast_expression
113 | 	| multiplicative_expression '%' cast_expression
114 | 	;
115 | 
116 | additive_expression
117 | 	: multiplicative_expression
118 | 	| additive_expression '+' multiplicative_expression
119 | 	| additive_expression '-' multiplicative_expression
120 | 	;
121 | 
122 | shift_expression
123 | 	: additive_expression
124 | 	| shift_expression LEFT_OP additive_expression
125 | 	| shift_expression RIGHT_OP additive_expression
126 | 	;
127 | 
128 | relational_expression
129 | 	: shift_expression
130 | 	| relational_expression '<' shift_expression
131 | 	| relational_expression '>' shift_expression
132 | 	| relational_expression LE_OP shift_expression
133 | 	| relational_expression GE_OP shift_expression
134 | 	;
135 | 
136 | equality_expression
137 | 	: relational_expression
138 | 	| equality_expression EQ_OP relational_expression
139 | 	| equality_expression NE_OP relational_expression
140 | 	;
141 | 
142 | and_expression
143 | 	: equality_expression
144 | 	| and_expression '&' equality_expression
145 | 	;
146 | 
147 | exclusive_or_expression
148 | 	: and_expression
149 | 	| exclusive_or_expression '^' and_expression
150 | 	;
151 | 
152 | inclusive_or_expression
153 | 	: exclusive_or_expression
154 | 	| inclusive_or_expression '|' exclusive_or_expression
155 | 	;
156 | 
157 | logical_and_expression
158 | 	: inclusive_or_expression
159 | 	| logical_and_expression AND_OP inclusive_or_expression
160 | 	;
161 | 
162 | logical_or_expression
163 | 	: logical_and_expression
164 | 	| logical_or_expression OR_OP logical_and_expression
165 | 	;
166 | 
167 | conditional_expression
168 | 	: logical_or_expression
169 | 	| logical_or_expression '?' expression ':' conditional_expression
170 | 	;
171 | 
172 | assignment_expression
173 | 	: conditional_expression
174 | 	| unary_expression assignment_operator assignment_expression
175 | 	;
176 | 
177 | assignment_operator
178 | 	: '='
179 | 	| MUL_ASSIGN
180 | 	| DIV_ASSIGN
181 | 	| MOD_ASSIGN
182 | 	| ADD_ASSIGN
183 | 	| SUB_ASSIGN
184 | 	| LEFT_ASSIGN
185 | 	| RIGHT_ASSIGN
186 | 	| AND_ASSIGN
187 | 	| XOR_ASSIGN
188 | 	| OR_ASSIGN
189 | 	;
190 | 
191 | expression
192 | 	: assignment_expression
193 | 	| expression ',' assignment_expression
194 | 	;
195 | 
196 | constant_expression
197 | 	: conditional_expression
198 | 	;
199 | 
200 | declaration
201 | 	: declaration_specifiers ';'
202 | 	| declaration_specifiers init_declarator_list ';'
203 | 	;
204 | 
205 | declaration_specifiers			 
206 | 	: type_specifier 						//int 
207 | 	| type_specifier type_qualifier					//int const
208 | 	| type_qualifier type_specifier					//const int
209 | 	| storage_class_specifier type_specifier			//static int 
210 | 	| storage_class_specifier type_specifier type_qualifier		//static int const
211 | 	| storage_class_specifier type_qualifier type_specifier		//static const int
212 | 	;
213 | 
214 | init_declarator_list
215 | 	: init_declarator
216 | 	| init_declarator_list ',' init_declarator
217 | 	;
218 | 
219 | init_declarator
220 | 	: declarator
221 | 	| declarator '=' initializer
222 | 	;
223 | 
224 | storage_class_specifier
225 | 	: TYPEDEF
226 | 	| EXTERN
227 | 	| STATIC
228 | 	;
229 | 
230 | type_specifier
231 | 	: VOID
232 | 	| FLOAT
233 | 	| DOUBLE
234 | 	| integer_type
235 | 	| struct_or_union_specifier
236 | 	| enum_specifier
237 | 	| TYPE_NAME
238 | 	;
239 | 
240 | integer_type
241 | 	: CHAR
242 | 	| SHORT
243 | 	| INT
244 | 	| LONG
245 | 	| SIGNED integer_type
246 | 	| UNSIGNED integer_type
247 | 	| SHORT integer_type
248 | 	| LONG integer_type
249 | 
250 | struct_or_union_specifier
251 | 	: struct_or_union IDENTIFIER '{' struct_declaration_list '}'
252 | 	| struct_or_union '{' struct_declaration_list '}'
253 | 	| struct_or_union IDENTIFIER
254 | 	;
255 | 
256 | struct_or_union
257 | 	: STRUCT
258 | 	| UNION
259 | 	;
260 | 
261 | struct_declaration_list
262 | 	: struct_declaration
263 | 	| struct_declaration_list struct_declaration
264 | 	;
265 | 
266 | struct_declaration
267 | 	: specifier_qualifier_list struct_declarator_list ';'
268 | 	;
269 | 
270 | specifier_qualifier_list
271 | 	: type_specifier 
272 | 	| type_specifier type_qualifier
273 | 	| type_qualifier type_specifier 
274 | 	;
275 | 
276 | struct_declarator_list
277 | 	: declarator
278 | 	| struct_declarator_list ',' declarator
279 | 	;
280 | 
287 | enum_specifier
288 | 	: ENUM '{' enumerator_list '}'
289 | 	| ENUM IDENTIFIER '{' enumerator_list '}'
290 | 	| ENUM IDENTIFIER
291 | 	;
292 | 
293 | enumerator_list
294 | 	: enumerator
295 | 	| enumerator_list ',' enumerator
296 | 	;
297 | 
298 | enumerator
299 | 	: IDENTIFIER
300 | 	| IDENTIFIER '=' constant_expression
301 | 	;
302 | 
303 | type_qualifier
304 | 	: CONST
305 | 	;
306 | 
307 | declarator
308 | 	: pointer direct_declarator
309 | 	| direct_declarator
310 | 	;
311 | 
312 | direct_declarator
313 | 	: IDENTIFIER
314 | 	| '(' declarator ')'
315 | 	| direct_declarator '[' constant_expression ']'
316 | 	| direct_declarator '[' ']'
317 | 	| direct_declarator '(' parameter_type_list ')' 
318 | 	| direct_declarator '(' ')'
319 | 	;
320 | 
321 | pointer
322 | 	: '*'
323 | 	| '*' CONST
324 | 	| pointer '*' 
325 | 	| pointer '*' CONST
326 | 	;
327 | 
328 | type_qualifier_list
329 | 	: type_qualifier
330 | 	| type_qualifier_list type_qualifier
331 | 	;
332 | 
333 | 
334 | parameter_type_list
335 | 	: parameter_list
336 | 	| parameter_list ',' ELLIPSIS
337 | 	;
338 | 
339 | parameter_list
340 | 	: parameter_declaration
341 | 	| parameter_list ',' parameter_declaration
342 | 	;
343 | 
344 | parameter_declaration
345 | 	: declaration_specifiers declarator
346 | 	| declaration_specifiers abstract_declarator
347 | 	| declaration_specifiers
348 | 	;
349 | 
350 | type_name
351 | 	: specifier_qualifier_list
352 | 	| specifier_qualifier_list abstract_declarator
353 | 	;
354 | 
355 | abstract_declarator
356 | 	: pointer
357 | 	| direct_abstract_declarator
358 | 	| pointer direct_abstract_declarator
359 | 	;
360 | 
361 | direct_abstract_declarator
362 | 	: '(' abstract_declarator ')'
363 | 	| '[' ']'
364 | 	| '[' constant_expression ']'
365 | 	| direct_abstract_declarator '[' ']'
366 | 	| direct_abstract_declarator '[' constant_expression ']'
367 | 	| '(' ')'
368 | 	| '(' parameter_type_list ')'
369 | 	| direct_abstract_declarator '(' ')'
370 | 	| direct_abstract_declarator '(' parameter_type_list ')'
371 | 	;
372 | 
373 | initializer
374 | 	: assignment_expression
375 | 	| '{' initializer_list '}'
376 | 	| '{' initializer_list ',' '}'
377 | 	;
378 | 
379 | initializer_list
380 | 	: initializer
381 | 	| initializer_list ',' initializer
382 | 	;
383 | 
384 | statement
385 | 	: labeled_statement
386 | 	| compound_statement
387 | 	| expression_statement
388 | 	| selection_statement
389 | 	| iteration_statement
390 | 	| jump_statement
391 | 	;
392 | 
393 | labeled_statement
394 | 	: CASE constant_expression ':' statement
395 | 	| DEFAULT ':' statement
396 | 	;
397 | 
398 | compound_statement
399 | 	: '{' '}'
400 | 	| '{' statement_list '}'
401 | 	| '{' declaration_list '}'
402 | 	| '{' declaration_list statement_list '}'
403 | 	;
404 | 
405 | declaration_list
406 | 	: declaration
407 | 	| declaration_list declaration
408 | 	;
409 | 
410 | statement_list
411 | 	: statement
412 | 	| statement_list statement
413 | 	;
414 | 
415 | expression_statement
416 | 	: ';'
417 | 	| expression ';'
418 | 	;
419 | 
420 | selection_statement
421 | 	: IF '(' expression ')' statement
422 | 	| IF '(' expression ')' statement ELSE statement
423 | 	| SWITCH '(' expression ')' statement
424 | 	;
425 | 
426 | iteration_statement
427 | 	: WHILE '(' expression ')' statement
428 | 	| DO statement WHILE '(' expression ')' ';'
429 | 	| FOR '(' expression_statement expression_statement ')' statement
430 | 	| FOR '(' expression_statement expression_statement expression ')' statement
431 | 	;
432 | 
433 | jump_statement
434 | 	:  CONTINUE ';'
435 | 	| BREAK ';'
436 | 	| RETURN ';'
437 | 	| RETURN expression ';'
438 | 	;
439 | 
440 | translation_unit
441 | 	: external_declaration
442 | 	| translation_unit external_declaration
443 | 	;
444 | 
445 | external_declaration
446 | 	: function_definition
447 | 	| declaration
448 | 	;
449 | 
450 | function_definition
451 | 	:    declaration_specifiers declarator compound_statement   
452 | 	;
453 | 
454 | %%
455 | #include <stdio.h>
456 | 
457 | extern char yytext[];
458 | extern int column;
459 | 
460 | yyerror(s)
461 | char *s;
462 | {
463 | 	fflush(stdout);
464 | 	printf("\n%*s\n%*s\n", column, "^", column, s);
465 | }
466 | 
467 | 468 | 469 | 470 | 471 | 472 | -------------------------------------------------------------------------------- /Class notes in Chinese.txt: -------------------------------------------------------------------------------- 1 | 1. 优化代码size和time 2 | 2. 避免编译器过于复杂 3 | 3. 去除冗余操作(if(0)去掉) 4 | 4. 常量传播、预处理 5 | 5. 函数替换、替换尾递归 6 | 6. 分析比例 7 | 7. 流图 点是block 边是跳转 8 | 8. (X+1)(X+1) -> (x+1)^2 9 | 9. error type : 10 | mismatch undefined const redeclaration 11 | 12 | 报告: 13 | 技术 14 | 测试 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /Presentation scripts: -------------------------------------------------------------------------------- 1 | Error recovery 2 | 3 | Adding error rules to our BNF 4 | Adding EOF token to handle the last missing right curly bracket 5 | 6 | So that we can discover most common mistakes and do error recovery (still build the right parsing tree) 7 | 8 | can handle: 9 | 1. missing semicolon 10 | 2. missing right curly bracket 11 | 3. some error identifier (不符合C标识符命名规则的) 12 | 4. error token after operator 13 | 以上4条均能进行error recovery,返回正确的语法树 14 | 5. 不符合 ANSI C 的各种语法 (error_pos.c) 15 | 16 | test files: 17 | test1.c all test 18 | test2.c missing semicolon 19 | missSEMI.c all errors token after operator + missing semicolon + errorID + missing right curly 20 | errorID.c errorID 21 | missRightCurly.c missing right curly 22 | 23 | 24 | Syntax error finder: 25 | 1. 函数声明与函数定义的参数列表不一致 26 | 2. 变量重复定义 27 | 3. 赋值时 类型不匹配 28 | 4. 表达式中,操作数的类型与规定的类型不一致 29 | 5. typo,打字错误。会从符号表中找出最接近的标识符,给出提示 30 | 6. 函数调用时参数表不符合函数定义 31 | 7. 函数实际返回值类型 不符合 函数定义中的函数返回值类型 32 | 33 | //1. 函数定义声明不一致 34 | int f(int i,...); 35 | int f(int j){ 36 | return 0; 37 | } 38 | 39 | int g(int i){ 40 | return 0; 41 | } 42 | typedef struct{ 43 | int n; 44 | } A; 45 | int main(int argc, char const *argv[]) 46 | { 47 | 48 | //2. 重复定义 49 | int k; 50 | int k; 51 | //缺少分号 52 | int i 53 | int count; 54 | //3. 类型不匹配 55 | A a; 56 | a = 5; 57 | //4. 未定义变量 58 | var = 3; 59 | //5. 操作数类型错误 60 | 1.0 >> 4; 61 | //打字错误 62 | cont = 4; 63 | g(1.0); 64 | //6. 参数表不匹配 65 | g(a); 66 | //7. 返回值不匹配 67 | return a; 68 | } 69 | 70 | 1. 基本功能: 71 | 计算add sub mul div 等 72 | 逻辑and or not 73 | 跳转jmp je jg jl 74 | 移位sal sar 75 | 函数call ret 76 | 堆栈push pop 77 | 全局数据 常量浮点数、字符串、global、static变量 78 | 浮点数运算fld fstp fadd fsub fmul fdiv 79 | 80 | 2. 优化 81 | 前端: 82 | constant folding 83 | 死代码消除 84 | 后端优化: 85 | 寄存器优化: 86 | 将ebx,ecx,edx作为临时变量的暂存区域 87 | 将esi edi作为eax的交换区 88 | 指令优化: 89 | *2 / 4 / 8。。。 ->sal 90 | lea 2*eax+offset -> reg 91 | 92 | 3. 支持特性: 93 | 看样例 94 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ZCC 2 | ZJU standard C Compiler 3 | 4 | 17 | 18 | ## Code Organization 19 | * Lex and Yas related codes are in the folder *yyparse*. 20 | * Symbol table and Type check related codes are in the folder *symbol*. 21 | * Generating machine code related codes are in the folder *generation*. 22 | * Treating Special variables related codes are in the folder *public*. 23 | * Optimization codes are stored separately in each folder. 24 | 25 | 50 | 51 | 69 | 70 | 71 | 84 | ## Parsing Tree Sample: 85 | 86 | ``` 87 | declaration 88 | declaration_specifiers 89 | storage_class_specifier 90 | typedef 91 | declaration_specifiers 92 | type_specifier 93 | struct_or_union_specifier 94 | struct_or_union 95 | struct 96 | { 97 | struct_declaration_list 98 | struct_declaration_list 99 | struct_declaration 100 | specifier_qualifier_list 101 | type_specifier 102 | int 103 | struct_declarator_list 104 | struct_declarator 105 | declarator 106 | direct_declarator 107 | a 108 | ; 109 | struct_declaration 110 | specifier_qualifier_list 111 | type_specifier 112 | double 113 | struct_declarator_list 114 | struct_declarator 115 | declarator 116 | direct_declarator 117 | c 118 | ; 119 | } 120 | init_declarator_list 121 | init_declarator 122 | declarator 123 | direct_declarator 124 | mytype 125 | ; 126 | ``` 127 | ## Code Generation 128 | ### Miscellaneous 129 | 130 | Call functions in *self.tools* to translate. 131 | 132 | Detailed comments and examples can be found in *generation.generate*. 133 | 134 | ### Basic X86 supports 135 | * Calculation: add, sub, mul, div. 136 | * Logic: and, or, not. 137 | * Jump: jmp, je, jg, jl. 138 | * Shift: sal, sar. 139 | * Function: call, ret. 140 | * Stack: push, pop. 141 | * Float number operation: fld, fstp, fadd, fsub, fmul, fdiv. 142 | * Global/Static variables, Constant float number, String 143 | 144 | 150 | 151 | ## Code Optimization 152 | ### The optimization types supported 153 | http://www.compileroptimizations.com/index.html 154 | ### Constant propagation 155 | http://people.eecs.berkeley.edu/~bodik/cs264/lectures/4-chaotic-notes.pdf 156 | -------------------------------------------------------------------------------- /ZCC.bnf: -------------------------------------------------------------------------------- 1 | %token int_const char_const float_const id string enumeration_const 2 | %% 3 | 4 | translation_unit : external_decl 5 | | translation_unit external_decl 6 | ; 7 | external_decl : function_definition 8 | | decl 9 | ; 10 | function_definition : decl_specs declarator decl_list compound_stat 11 | | declarator decl_list compound_stat 12 | | decl_specs declarator compound_stat 13 | | declarator compound_stat 14 | ; 15 | decl : decl_specs init_declarator_list ';' 16 | | decl_specs ';' 17 | ; 18 | decl_list : decl 19 | | decl_list decl 20 | ; 21 | decl_specs : storage_class_spec decl_specs 22 | | storage_class_spec 23 | | type_spec decl_specs 24 | | type_spec 25 | | type_qualifier decl_specs 26 | | type_qualifier 27 | ; 28 | storage_class_spec : 'auto' | 'register' | 'static' | 'extern' | 'typedef' 29 | ; 30 | type_spec : 'void' | 'char' | 'short' | 'int' | 'long' | 'float' 31 | | 'double' | 'signed' | 'unsigned' 32 | | struct_or_union_spec 33 | | enum_spec 34 | | typedef_name 35 | ; 36 | type_qualifier : 'const' | 'volatile' 37 | ; 38 | struct_or_union_spec : struct_or_union id '{' struct_decl_list '}' 39 | | struct_or_union '{' struct_decl_list '}' 40 | | struct_or_union id 41 | ; 42 | struct_or_union : 'struct' | 'union' 43 | ; 44 | struct_decl_list : struct_decl 45 | | struct_decl_list struct_decl 46 | ; 47 | init_declarator_list : init_declarator 48 | | init_declarator_list ',' init_declarator 49 | ; 50 | init_declarator : declarator 51 | | declarator '=' initializer 52 | ; 53 | struct_decl : spec_qualifier_list struct_declarator_list ';' 54 | ; 55 | spec_qualifier_list : type_spec spec_qualifier_list 56 | | type_spec 57 | | type_qualifier spec_qualifier_list 58 | | type_qualifier 59 | ; 60 | struct_declarator_list : struct_declarator 61 | | struct_declarator_list ',' struct_declarator 62 | ; 63 | struct_declarator : declarator 64 | | declarator ':' const_exp 65 | | ':' const_exp 66 | ; 67 | enum_spec : 'enum' id '{' enumerator_list '}' 68 | | 'enum' '{' enumerator_list '}' 69 | | 'enum' id 70 | ; 71 | enumerator_list : enumerator 72 | | enumerator_list ',' enumerator 73 | ; 74 | enumerator : id 75 | | id '=' const_exp 76 | ; 77 | declarator : pointer direct_declarator 78 | | direct_declarator 79 | ; 80 | direct_declarator : id 81 | | '(' declarator ')' 82 | | direct_declarator '[' const_exp ']' 83 | | direct_declarator '[' ']' 84 | | direct_declarator '(' param_type_list ')' 85 | | direct_declarator '(' id_list ')' 86 | | direct_declarator '(' ')' 87 | ; 88 | pointer : '*' type_qualifier_list 89 | | '*' 90 | | '*' type_qualifier_list pointer 91 | | '*' pointer 92 | ; 93 | type_qualifier_list : type_qualifier 94 | | type_qualifier_list type_qualifier 95 | ; 96 | param_type_list : param_list 97 | | param_list ',' '...' 98 | ; 99 | param_list : param_decl 100 | | param_list ',' param_decl 101 | ; 102 | param_decl : decl_specs declarator 103 | | decl_specs abstract_declarator 104 | | decl_specs 105 | ; 106 | id_list : id 107 | | id_list ',' id 108 | ; 109 | initializer : assignment_exp 110 | | '{' initializer_list '}' 111 | | '{' initializer_list ',' '}' 112 | ; 113 | initializer_list : initializer 114 | | initializer_list ',' initializer 115 | ; 116 | type_name : spec_qualifier_list abstract_declarator 117 | | spec_qualifier_list 118 | ; 119 | abstract_declarator : pointer 120 | | pointer direct_abstract_declarator 121 | | direct_abstract_declarator 122 | ; 123 | direct_abstract_declarator: '(' abstract_declarator ')' 124 | | direct_abstract_declarator '[' const_exp ']' 125 | | '[' const_exp ']' 126 | | direct_abstract_declarator '[' ']' 127 | | '[' ']' 128 | | direct_abstract_declarator '(' param_type_list ')' 129 | | '(' param_type_list ')' 130 | | direct_abstract_declarator '(' ')' 131 | | '(' ')' 132 | ; 133 | typedef_name : id 134 | ; 135 | stat : labeled_stat 136 | | exp_stat 137 | | compound_stat 138 | | selection_stat 139 | | iteration_stat 140 | | jump_stat 141 | ; 142 | labeled_stat : id ':' stat 143 | | 'case' const_exp ':' stat 144 | | 'default' ':' stat 145 | ; 146 | exp_stat : exp ';' 147 | | ';' 148 | ; 149 | compound_stat : '{' decl_list stat_list '}' 150 | | '{' stat_list '}' 151 | | '{' decl_list '}' 152 | | '{' '}' 153 | ; 154 | stat_list : stat 155 | | stat_list stat 156 | ; 157 | selection_stat : 'if' '(' exp ')' stat 158 | | 'if' '(' exp ')' stat 'else' stat 159 | | 'switch' '(' exp ')' stat 160 | ; 161 | iteration_stat : 'while' '(' exp ')' stat 162 | | 'do' stat 'while' '(' exp ')' ';' 163 | | 'for' '(' exp ';' exp ';' exp ')' stat 164 | | 'for' '(' exp ';' exp ';' ')' stat 165 | | 'for' '(' exp ';' ';' exp ')' stat 166 | | 'for' '(' exp ';' ';' ')' stat 167 | | 'for' '(' ';' exp ';' exp ')' stat 168 | | 'for' '(' ';' exp ';' ')' stat 169 | | 'for' '(' ';' ';' exp ')' stat 170 | | 'for' '(' ';' ';' ')' stat 171 | ; 172 | jump_stat : 'goto' id ';' 173 | | 'continue' ';' 174 | | 'break' ';' 175 | | 'return' exp ';' 176 | | 'return' ';' 177 | ; 178 | exp : assignment_exp 179 | | exp ',' assignment_exp 180 | ; 181 | assignment_exp : conditional_exp 182 | | unary_exp assignment_operator assignment_exp 183 | ; 184 | assignment_operator : '=' | '*=' | '/=' | '%=' | '+=' | '-=' | '<<=' 185 | | '>>=' | '&=' | '^=' | '|=' 186 | ; 187 | conditional_exp : logical_or_exp 188 | | logical_or_exp '?' exp ':' conditional_exp 189 | ; 190 | const_exp : conditional_exp 191 | ; 192 | logical_or_exp : logical_and_exp 193 | | logical_or_exp '||' logical_and_exp 194 | ; 195 | logical_and_exp : inclusive_or_exp 196 | | logical_and_exp '&&' inclusive_or_exp 197 | ; 198 | inclusive_or_exp : exclusive_or_exp 199 | | inclusive_or_exp '|' exclusive_or_exp 200 | ; 201 | exclusive_or_exp : and_exp 202 | | exclusive_or_exp '^' and_exp 203 | ; 204 | and_exp : equality_exp 205 | | and_exp '&' equality_exp 206 | ; 207 | equality_exp : relational_exp 208 | | equality_exp '==' relational_exp 209 | | equality_exp '!=' relational_exp 210 | ; 211 | relational_exp : shift_expression 212 | | relational_exp '<' shift_expression 213 | | relational_exp '>' shift_expression 214 | | relational_exp '<=' shift_expression 215 | | relational_exp '>=' shift_expression 216 | ; 217 | shift_expression : additive_exp 218 | | shift_expression '<<' additive_exp 219 | | shift_expression '>>' additive_exp 220 | ; 221 | additive_exp : mult_exp 222 | | additive_exp '+' mult_exp 223 | | additive_exp '-' mult_exp 224 | ; 225 | mult_exp : cast_exp 226 | | mult_exp '*' cast_exp 227 | | mult_exp '/' cast_exp 228 | | mult_exp '%' cast_exp 229 | ; 230 | cast_exp : unary_exp 231 | | '(' type_name ')' cast_exp 232 | ; 233 | unary_exp : postfix_exp 234 | | '++' unary_exp 235 | | '--' unary_exp 236 | | unary_operator cast_exp 237 | | 'sizeof' unary_exp 238 | | 'sizeof' '(' type_name ')' 239 | ; 240 | unary_operator : '&' | '*' | '+' | '-' | '~' | '!' 241 | ; 242 | postfix_exp : primary_exp 243 | | postfix_exp '[' exp ']' 244 | | postfix_exp '(' argument_exp_list ')' 245 | | postfix_exp '(' ')' 246 | | postfix_exp '.' id 247 | | postfix_exp '->' id 248 | | postfix_exp '++' 249 | | postfix_exp '--' 250 | ; 251 | primary_exp : id 252 | | const 253 | | string 254 | | '(' exp ')' 255 | ; 256 | argument_exp_list : assignment_exp 257 | | argument_exp_list ',' assignment_exp 258 | ; 259 | const : int_const 260 | | char_const 261 | | float_const 262 | | enumeration_const 263 | ; -------------------------------------------------------------------------------- /bnf.tmp: -------------------------------------------------------------------------------- 1 | primary_expression 2 | : IDENTIFIER 3 | | CONSTANT 4 | | STRING_LITERAL 5 | | '(' expression ')' 6 | ; 7 | 8 | postfix_expression 9 | : primary_expression 10 | | postfix_expression '[' expression ']' 11 | | postfix_expression '(' ')' 12 | | postfix_expression '(' argument_expression_list ')' 13 | | postfix_expression '.' IDENTIFIER 14 | | postfix_expression PTR_OP IDENTIFIER 15 | | postfix_expression INC_OP 16 | | postfix_expression DEC_OP 17 | ; 18 | 19 | argument_expression_list 20 | : assignment_expression 21 | | argument_expression_list ',' assignment_expression 22 | ; 23 | 24 | unary_expression 25 | : postfix_expression 26 | | INC_OP unary_expression 27 | | DEC_OP unary_expression 28 | | unary_operator cast_expression 29 | | SIZEOF unary_expression 30 | | SIZEOF '(' type_name ')' 31 | ; 32 | 33 | unary_operator 34 | : '&' 35 | | '*' 36 | | '+' 37 | | '-' 38 | | '~' 39 | | '!' 40 | ; 41 | 42 | cast_expression 43 | : unary_expression 44 | | '(' type_name ')' cast_expression 45 | ; 46 | 47 | multiplicative_expression 48 | : cast_expression 49 | | multiplicative_expression '*' cast_expression 50 | | multiplicative_expression '/' cast_expression 51 | | multiplicative_expression '%' cast_expression 52 | ; 53 | 54 | additive_expression 55 | : multiplicative_expression 56 | | additive_expression '+' multiplicative_expression 57 | | additive_expression '-' multiplicative_expression 58 | ; 59 | 60 | shift_expression 61 | : additive_expression 62 | | shift_expression LEFT_OP additive_expression 63 | | shift_expression RIGHT_OP additive_expression 64 | ; 65 | 66 | relational_expression 67 | : shift_expression 68 | | relational_expression '<' shift_expression 69 | | relational_expression '>' shift_expression 70 | | relational_expression LE_OP shift_expression 71 | | relational_expression GE_OP shift_expression 72 | ; 73 | 74 | equality_expression 75 | : relational_expression 76 | | equality_expression EQ_OP relational_expression 77 | | equality_expression NE_OP relational_expression 78 | ; 79 | 80 | and_expression 81 | : equality_expression 82 | | and_expression '&' equality_expression 83 | ; 84 | 85 | exclusive_or_expression 86 | : and_expression 87 | | exclusive_or_expression '^' and_expression 88 | ; 89 | 90 | inclusive_or_expression 91 | : exclusive_or_expression 92 | | inclusive_or_expression '|' exclusive_or_expression 93 | ; 94 | 95 | logical_and_expression 96 | : inclusive_or_expression 97 | | logical_and_expression AND_OP inclusive_or_expression 98 | ; 99 | 100 | logical_or_expression 101 | : logical_and_expression 102 | | logical_or_expression OR_OP logical_and_expression 103 | ; 104 | 105 | conditional_expression 106 | : logical_or_expression 107 | | logical_or_expression '?' expression ':' conditional_expression 108 | ; 109 | 110 | assignment_expression 111 | : conditional_expression 112 | | unary_expression assignment_operator assignment_expression 113 | ; 114 | 115 | assignment_operator 116 | : '=' 117 | | MUL_ASSIGN 118 | | DIV_ASSIGN 119 | | MOD_ASSIGN 120 | | ADD_ASSIGN 121 | | SUB_ASSIGN 122 | | LEFT_ASSIGN 123 | | RIGHT_ASSIGN 124 | | AND_ASSIGN 125 | | XOR_ASSIGN 126 | | OR_ASSIGN 127 | ; 128 | 129 | expression 130 | : assignment_expression 131 | | expression ',' assignment_expression 132 | ; 133 | 134 | constant_expression 135 | : conditional_expression 136 | ; 137 | 138 | declaration 139 | : declaration_specifiers ';' 140 | | declaration_specifiers init_declarator_list ';' 141 | ; 142 | 143 | declaration_specifiers 144 | : storage_class_specifier 145 | | storage_class_specifier declaration_specifiers 146 | | type_specifier 147 | | type_specifier declaration_specifiers 148 | | type_qualifier 149 | | type_qualifier declaration_specifiers 150 | ; 151 | 152 | init_declarator_list 153 | : init_declarator 154 | | init_declarator_list ',' init_declarator 155 | ; 156 | 157 | init_declarator 158 | : declarator 159 | | declarator '=' initializer 160 | ; 161 | 162 | storage_class_specifier 163 | : TYPEDEF 164 | | EXTERN 165 | | STATIC 166 | | AUTO 167 | | REGISTER 168 | ; 169 | 170 | type_specifier 171 | : VOID 172 | | CHAR 173 | | SHORT 174 | | INT 175 | | LONG 176 | | FLOAT 177 | | DOUBLE 178 | | SIGNED 179 | | UNSIGNED 180 | | struct_or_union_specifier 181 | | enum_specifier 182 | | TYPE_NAME 183 | ; 184 | 185 | struct_or_union_specifier 186 | : struct_or_union IDENTIFIER '{' struct_declaration_list '}' 187 | | struct_or_union '{' struct_declaration_list '}' 188 | | struct_or_union IDENTIFIER 189 | ; 190 | 191 | struct_or_union 192 | : STRUCT 193 | | UNION 194 | ; 195 | 196 | struct_declaration_list 197 | : struct_declaration 198 | | struct_declaration_list struct_declaration 199 | ; 200 | 201 | struct_declaration 202 | : specifier_qualifier_list struct_declarator_list ';' 203 | ; 204 | 205 | specifier_qualifier_list 206 | : type_specifier specifier_qualifier_list 207 | | type_specifier 208 | | type_qualifier specifier_qualifier_list 209 | | type_qualifier 210 | ; 211 | 212 | struct_declarator_list 213 | : struct_declarator 214 | | struct_declarator_list ',' struct_declarator 215 | ; 216 | 217 | struct_declarator 218 | : declarator 219 | | ':' constant_expression 220 | | declarator ':' constant_expression 221 | ; 222 | 223 | enum_specifier 224 | : ENUM '{' enumerator_list '}' 225 | | ENUM IDENTIFIER '{' enumerator_list '}' 226 | | ENUM IDENTIFIER 227 | ; 228 | 229 | enumerator_list 230 | : enumerator 231 | | enumerator_list ',' enumerator 232 | ; 233 | 234 | enumerator 235 | : IDENTIFIER 236 | | IDENTIFIER '=' constant_expression 237 | ; 238 | 239 | type_qualifier 240 | : CONST 241 | | VOLATILE 242 | ; 243 | 244 | declarator 245 | : pointer direct_declarator 246 | | direct_declarator 247 | ; 248 | 249 | direct_declarator 250 | : IDENTIFIER 251 | | '(' declarator ')' 252 | | direct_declarator '[' constant_expression ']' 253 | | direct_declarator '[' ']' 254 | | direct_declarator '(' parameter_type_list ')' 255 | | direct_declarator '(' identifier_list ')' 256 | | direct_declarator '(' ')' 257 | ; 258 | 259 | pointer 260 | : '*' 261 | | '*' type_qualifier_list 262 | | '*' pointer 263 | | '*' type_qualifier_list pointer 264 | ; 265 | 266 | type_qualifier_list 267 | : type_qualifier 268 | | type_qualifier_list type_qualifier 269 | ; 270 | 271 | 272 | parameter_type_list 273 | : parameter_list 274 | | parameter_list ',' ELLIPSIS 275 | ; 276 | 277 | parameter_list 278 | : parameter_declaration 279 | | parameter_list ',' parameter_declaration 280 | ; 281 | 282 | parameter_declaration 283 | : declaration_specifiers declarator 284 | | declaration_specifiers abstract_declarator 285 | | declaration_specifiers 286 | ; 287 | 288 | identifier_list 289 | : IDENTIFIER 290 | | identifier_list ',' IDENTIFIER 291 | ; 292 | 293 | type_name 294 | : specifier_qualifier_list 295 | | specifier_qualifier_list abstract_declarator 296 | ; 297 | 298 | abstract_declarator 299 | : pointer 300 | | direct_abstract_declarator 301 | | pointer direct_abstract_declarator 302 | ; 303 | 304 | direct_abstract_declarator 305 | : '(' abstract_declarator ')' 306 | | '[' ']' 307 | | '[' constant_expression ']' 308 | | direct_abstract_declarator '[' ']' 309 | | direct_abstract_declarator '[' constant_expression ']' 310 | | '(' ')' 311 | | '(' parameter_type_list ')' 312 | | direct_abstract_declarator '(' ')' 313 | | direct_abstract_declarator '(' parameter_type_list ')' 314 | ; 315 | 316 | initializer 317 | : assignment_expression 318 | | '{' initializer_list '}' 319 | | '{' initializer_list ',' '}' 320 | ; 321 | 322 | initializer_list 323 | : initializer 324 | | initializer_list ',' initializer 325 | ; 326 | 327 | statement 328 | : labeled_statement 329 | | compound_statement 330 | | expression_statement 331 | | selection_statement 332 | | iteration_statement 333 | | jump_statement 334 | ; 335 | 336 | labeled_statement 337 | : IDENTIFIER ':' statement 338 | | CASE constant_expression ':' statement 339 | | DEFAULT ':' statement 340 | ; 341 | 342 | compound_statement 343 | : '{' '}' 344 | | '{' statement_list '}' 345 | | '{' declaration_list '}' 346 | | '{' declaration_list statement_list '}' 347 | ; 348 | 349 | declaration_list 350 | : declaration 351 | | declaration_list declaration 352 | ; 353 | 354 | statement_list 355 | : statement 356 | | statement_list statement 357 | ; 358 | 359 | expression_statement 360 | : ';' 361 | | expression ';' 362 | ; 363 | 364 | selection_statement 365 | : IF '(' expression ')' statement 366 | | IF '(' expression ')' statement ELSE statement 367 | | SWITCH '(' expression ')' statement 368 | ; 369 | 370 | iteration_statement 371 | : WHILE '(' expression ')' statement 372 | | DO statement WHILE '(' expression ')' ';' 373 | | FOR '(' expression_statement expression_statement ')' statement 374 | | FOR '(' expression_statement expression_statement expression ')' statement 375 | ; 376 | 377 | jump_statement 378 | : GOTO IDENTIFIER ';' 379 | | CONTINUE ';' 380 | | BREAK ';' 381 | | RETURN ';' 382 | | RETURN expression ';' 383 | ; 384 | 385 | translation_unit 386 | : external_declaration 387 | | translation_unit external_declaration 388 | ; 389 | 390 | external_declaration 391 | : function_definition 392 | | declaration 393 | ; 394 | 395 | function_definition 396 | : declaration_specifiers declarator declaration_list compound_statement 397 | | declaration_specifiers declarator compound_statement 398 | | declarator declaration_list compound_statement 399 | | declarator compound_statement -------------------------------------------------------------------------------- /generation/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | -------------------------------------------------------------------------------- /generation/data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding=utf-8 3 | from public.ZCCglobal import * 4 | 5 | class Data(object): 6 | def __init__(self,name,offset,type): 7 | """ 8 | :type name:str 9 | :type offset:bool 10 | :type type:CType 11 | """ 12 | self.name=name 13 | self.offset=offset 14 | self.type=type 15 | -------------------------------------------------------------------------------- /generation/generation.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # produce machine code 3 | import sys 4 | sys.path.append('c:\\zcc\\zcc') 5 | from public.ZCCglobal import * 6 | from utility import utility 7 | from copy import deepcopy 8 | from data import Data 9 | 10 | 11 | class generator: 12 | 13 | def __init__(self): 14 | # asm output list 15 | self.asm = [] 16 | self.tools = utility(self) 17 | self.exp2=[2**x for x in range(32)] 18 | # print(self.exp2) 19 | self.expression_handler = { 20 | 'primary_expression': self.gen_primary_expression, 21 | 'postfix_expression': self.gen_postfix_expression, 22 | 'unary_expression': self.gen_unary_expression, 23 | 'cast_expression': self.gen_cast_expression, 24 | 'multiplicative_expression': self.gen_multiplicative_expression, 25 | 'additive_expression': self.gen_additive_expression, 26 | 'shift_expression': self.gen_shift_expression, 27 | 'relational_expression': self.gen_relational_expression, 28 | 'equality_expression': self.gen_equality_expression, 29 | 'and_expression': self.gen_and_expression, 30 | 'exclusive_or_expression': self.gen_exclusive_or_expression, 31 | 'inclusive_or_expression': self.gen_inclusive_or_expression, 32 | 'logical_and_expression': self.gen_logical_and_expression, 33 | 'logical_or_expression': self.gen_logical_or_expression, 34 | 'conditional_expression': self.gen_conditional_expression, 35 | 'assignment_expression': self.gen_assignment_expression, 36 | 'expression': self.gen_expression} 37 | 38 | def generate(self): 39 | self.tools.globalInitialize() 40 | for funcName in global_context.local: 41 | value = global_context.local[funcName] 42 | if(value.type == 'function'): 43 | if global_context.local[ 44 | funcName].compound_statement is not None: 45 | self.tools.newFunc(funcName) 46 | self.gen_compound_statement( 47 | global_context.local[funcName].compound_statement,global_context.local[funcName].compound_statement.context) 48 | self.tools.endFunc() 49 | self.tools.end() 50 | 51 | def output(self, fileName): 52 | with open(fileName, 'w') as out: 53 | for line in self.asm: 54 | out.write(line) 55 | 56 | def gen_statement_list(self, node,context): 57 | """ 58 | :type node:TreeNode 59 | :type context:Context 60 | """ 61 | for subnode in node[1:]: 62 | if isinstance(subnode, TreeNode): 63 | if subnode[0] == "statement": 64 | self.gen_statement(subnode,context) 65 | 66 | 67 | def gen_statement(self, node,context): 68 | """ 69 | :type node:TreeNode 70 | :type context:Context 71 | """ 72 | for subnode in node[1:]: 73 | if isinstance(subnode, TreeNode): 74 | if subnode[0] == "expression_statement": 75 | self.gen_expression_statement(subnode,context) 76 | elif subnode[0] == "compound_statement": 77 | self.tools.newScope(subnode.context) 78 | self.gen_compound_statement(subnode,subnode.context) 79 | self.tools.endScope() 80 | elif subnode[0] == "selection_statement": 81 | self.gen_selection_statement(subnode,context) 82 | elif subnode[0]=="jump_statement": 83 | self.gen_jump_statement(subnode,context) 84 | elif subnode[0]=="iteration_statement": 85 | self.gen_iteration_statement(subnode,context) 86 | 87 | def gen_expression_statement(self, node,context): 88 | """ 89 | :type node:TreeNode 90 | :type context:Context 91 | :rtype :str 92 | """ 93 | if isinstance(node[1],TreeNode): 94 | ret=self.expression_handler[node[1][0]](node[1],context) 95 | else: 96 | ret=self.tools.getTrue() 97 | return ret 98 | 99 | def gen_compound_statement(self, node,context): 100 | """ 101 | :type node:TreeNode 102 | :type context:Context 103 | """ 104 | for subnode in node[1:]: 105 | if isinstance(subnode, TreeNode): 106 | if subnode[0] == "statement_list": 107 | self.gen_statement_list(subnode,context) 108 | 109 | def gen_selection_statement(self, node,context): 110 | """ 111 | :type node:TreeNode 112 | :type context:Context 113 | """ 114 | # node[3]:expression 115 | # node[5]:statement 116 | # node[7]:statement 117 | if node[1] == "if": 118 | ret=self.expression_handler[node[3][0]](node[3],context) 119 | if len(node) == 6: 120 | label1=self.tools.allocateLabel() 121 | self.tools.cmp(ret,self.tools.getFalse()) 122 | self.tools.je(label1) 123 | self.gen_statement(node[5],context) 124 | self.tools.markLabel(label1) 125 | elif len(node) == 8: 126 | label1=self.tools.allocateLabel() 127 | label2=self.tools.allocateLabel() 128 | self.tools.cmp(ret,self.tools.getFalse()) 129 | self.tools.je(label1) 130 | self.gen_statement(node[5],context) 131 | self.tools.jmp(label2) 132 | self.tools.markLabel(label1) 133 | self.gen_statement(node[7],context) 134 | self.tools.markLabel(label2) 135 | 136 | def gen_jump_statement(self,node,context): 137 | """ 138 | :type node:TreeNode 139 | :type context:Context 140 | """ 141 | if isinstance(node[2],TreeNode): 142 | ret=self.expression_handler[node[2][0]](node[2],context) 143 | self.tools.mov(self.tools.getEax(),ret) 144 | self.tools.ret() 145 | 146 | def gen_iteration_statement(self,node,context): 147 | """ 148 | :type node:TreeNode 149 | :type context:Context 150 | """ 151 | if node[1]=="for": 152 | if isinstance(node[5],TreeNode): 153 | label1=self.tools.allocateLabel() 154 | label2=self.tools.allocateLabel() 155 | label3=self.tools.allocateLabel() 156 | self.gen_expression_statement(node[3],context) 157 | self.tools.jmp(label2) 158 | self.tools.markLabel(label1) 159 | self.expression_handler[node[5][0]](node[5],context) 160 | self.tools.markLabel(label2) 161 | ret=self.gen_expression_statement(node[4],context) 162 | self.tools.cmp(ret,self.tools.getFalse()) 163 | self.tools.je(label3) 164 | self.gen_statement(node[7],context) 165 | self.tools.jmp(label1) 166 | self.tools.markLabel(label3) 167 | else: 168 | label1=self.tools.allocateLabel() 169 | label2=self.tools.allocateLabel() 170 | self.gen_expression_statement(node[3],context) 171 | self.tools.markLabel(label1) 172 | ret=self.gen_expression_statement(node[4],context) 173 | self.tools.cmp(ret,self.tools.getFalse()) 174 | self.gen_statement(node[6],context) 175 | self.tools.jmp(label1) 176 | self.tools.markLabel(label2) 177 | elif node[1]=="while": 178 | label1=self.tools.allocateLabel() 179 | label2=self.tools.allocateLabel() 180 | self.tools.markLabel(label1) 181 | ret=self.expression_handler[node[3][0]](node[3],context) 182 | self.tools.cmp(ret,self.tools.getFalse()) 183 | self.tools.je(label2) 184 | self.gen_statement(node[5],context) 185 | self.tools.jmp(label1) 186 | self.tools.markLabel(label2) 187 | 188 | 189 | 190 | def gen_additive_expression(self, node,context): 191 | """ 192 | :type node:TreeNode 193 | :type context:Context 194 | :rtype: str 195 | """ 196 | op1=self.expression_handler[node[1][0]](node[1],context) 197 | tmp=self.tools.allocateNewReg(op1) 198 | self.tools.lock(tmp) 199 | self.tools.mov(tmp,op1) 200 | op2=self.expression_handler[node[3][0]](node[3],context) 201 | if node[2]=="+": 202 | ret=self.tools.add(tmp,op2) 203 | else: 204 | ret=self.tools.sub(tmp,op2) 205 | self.tools.unLock(tmp) 206 | return ret 207 | 208 | def gen_primary_expression(self,node,context): 209 | """ 210 | :type node:TreeNode 211 | :type context:Context 212 | :rtype: Data 213 | """ 214 | if isinstance(node[1],TreeNode): 215 | if node[1][0]=="IDENTIFIER": 216 | name=node[1][1] 217 | offset=False 218 | type=deepcopy(context.get_type_by_id(name)) 219 | return Data(name,offset,type) 220 | else: 221 | if node[1][0]=="INTEGER": 222 | return int(node[1][1]) 223 | elif node[1][0]=="DOUBLE": 224 | return float(node[1][1]) 225 | elif node[1][0]=="STRING": 226 | return str(node[1][1]) 227 | 228 | 229 | def gen_postfix_expression(self,node,context): 230 | """ 231 | :type node:TreeNode 232 | :type context:Context 233 | :rtype: str 234 | """ 235 | operand=self.expression_handler[node[1][0]](node[1],context) 236 | if node[2]=="[": 237 | if operand.offset==False: 238 | self.tools.mov(self.tools.getEax(),0) 239 | index=self.expression_handler[node[3][0]](node[3],context) 240 | self.tools.mul(index,operand.type.member_type.Size()) 241 | operand.offset=True 242 | operand.type=operand.type.member_type 243 | return operand 244 | elif node[2]=="(": 245 | if isinstance(node[3],TreeNode): 246 | argument_expression_list=node[3] 247 | real_arg_list=[] 248 | for argument_expression in argument_expression_list[1:]: 249 | if isinstance(argument_expression,TreeNode): 250 | argument=self.expression_handler[argument_expression[0]](argument_expression,context) 251 | if argument==self.tools.getEax(): 252 | tmp=self.tools.allocateNewReg(self.tools.getEax()) 253 | self.tools.lock(tmp) 254 | self.tools.mov(tmp,self.tools.getEax()) 255 | real_arg_list.append([tmp,0]) 256 | elif isinstance(argument,Data) and argument.offset: 257 | tmp=self.tools.allocateNewReg(self.tools.getEax()) 258 | self.tools.lock(tmp) 259 | self.tools.mov(tmp,self.tools.getEax()) 260 | real_arg_list.append([argument,1,tmp]) 261 | else: 262 | real_arg_list.append([argument,2]) 263 | for list in real_arg_list: 264 | if list[1]==1: 265 | self.tools.mov(self.tools.getEax(),list[2]) 266 | self.tools.passPara(list[0]) 267 | if list[1]==0: 268 | self.tools.unLock(list[0]) 269 | if list[1]==1: 270 | self.tools.unLock(list[2]) 271 | ret=self.tools.call(operand) 272 | return ret 273 | elif node[2]==".": 274 | if operand.offset==False: 275 | self.tools.mov(self.tools.getEax(),0) 276 | member=node[3][1] 277 | self.tools.add(self.tools.getEax(),operand.type.offset[member]) 278 | operand.type=operand.type.members[member] 279 | operand.offset=True 280 | return operand 281 | elif node[2]=="->": 282 | self.tools.mov(self.tools.getEax(),operand) 283 | member=node[3][1] 284 | self.tools.add(self.tools.getEax(),operand.type.offset[member]) 285 | operand.name=self.tools.getNull() 286 | operand.type=operand.type.members[member] 287 | operand.offset=True 288 | return operand 289 | 290 | 291 | def gen_unary_expression(self,node,context): 292 | """ 293 | :type node:TreeNode 294 | :type context:Context 295 | :rtype: str 296 | """ 297 | operand=self.expression_handler[node[2][0]](node[2],context) 298 | if isinstance(node[1],TreeNode): 299 | operator=self.gen_unary_operator(node[1],context) 300 | if operator=="&": 301 | if isinstance(operand,Data): 302 | ret=self.tools.lea(operand) 303 | operand.type.is_const.append(False) 304 | return ret 305 | elif operator=="*": 306 | if isinstance(operand,Data): 307 | self.tools.mov(self.tools.getEax(),operand) 308 | operand.name=self.tools.getNull() 309 | operand.offset=True 310 | operand.type.is_const.pop() 311 | return operand 312 | else: 313 | if node[1]=="++": 314 | ret=self.tools.add(operand,1) 315 | self.tools.mov(operand,ret) 316 | return operand 317 | elif node[1]=="--": 318 | self.tools.sub(operand,1) 319 | return operand 320 | 321 | 322 | def gen_cast_expression(self,node,context): 323 | """ 324 | :type node:TreeNode 325 | :type context:Context 326 | :rtype: str 327 | """ 328 | pass 329 | 330 | def gen_multiplicative_expression(self,node,context): 331 | """ 332 | :type node:TreeNode 333 | :type context:Context 334 | :rtype: str 335 | """ 336 | op1=self.expression_handler[node[1][0]](node[1],context) 337 | tmp=self.tools.allocateNewReg(op1) 338 | self.tools.lock(tmp) 339 | self.tools.mov(tmp,op1) 340 | op2=self.expression_handler[node[3][0]](node[3],context) 341 | if node[2]=="*": 342 | if isinstance(op2,str): 343 | try: 344 | num=int(op2) 345 | if num in self.exp2: 346 | ret=self.tools.sal(tmp,str(self.exp2.index(num))) 347 | else: 348 | ret=self.tools.mul(tmp,op2) 349 | except Exception: 350 | ret=self.tools.mul(tmp,op2) 351 | else: 352 | ret=self.tools.mul(tmp,op2) 353 | elif node[2]=="/": 354 | if isinstance(op2,str): 355 | try: 356 | num=int(op2) 357 | if num in self.exp2: 358 | ret=self.tools.sar(tmp,str(self.exp2.index(num))) 359 | else: 360 | ret=self.tools.div(tmp,op2) 361 | except Exception: 362 | ret=self.tools.div(tmp,op2) 363 | else: 364 | ret=self.tools.div(tmp,op2) 365 | self.tools.unLock(tmp) 366 | return ret 367 | 368 | 369 | def gen_shift_expression(self,node,context): 370 | """ 371 | :type node:TreeNode 372 | :type context:Context 373 | :rtype: str 374 | """ 375 | pass 376 | 377 | def gen_relational_expression(self,node,context): 378 | """ 379 | :type node:TreeNode 380 | :type context:Context 381 | :rtype: str 382 | """ 383 | label1=self.tools.allocateLabel() 384 | label2=self.tools.allocateLabel() 385 | op1=self.expression_handler[node[1][0]](node[1],context) 386 | tmp=self.tools.allocateNewReg(op1) 387 | self.tools.lock(tmp) 388 | self.tools.mov(tmp,op1) 389 | op2=self.expression_handler[node[3][0]](node[3],context) 390 | self.tools.cmp(tmp,op2) 391 | if node[2]=="<": 392 | self.tools.jl(label1) 393 | elif node[2]=="<=": 394 | self.tools.jle(label1) 395 | elif node[2]==">": 396 | self.tools.jg(label1) 397 | elif node[2]==">=": 398 | self.tools.jge(label1) 399 | self.tools.mov(self.tools.getEax(),0) 400 | self.tools.jmp(label2) 401 | self.tools.markLabel(label1) 402 | self.tools.mov(self.tools.getEax(),1) 403 | self.tools.markLabel() 404 | return self.tools.getEax() 405 | 406 | def gen_equality_expression(self,node,context): 407 | """ 408 | :type node:TreeNode 409 | :type context:Context 410 | :rtype: str 411 | """ 412 | label1=self.tools.allocateLabel() 413 | label2=self.tools.allocateLabel() 414 | op1=self.expression_handler[node[1][0]](node[1],context) 415 | tmp=self.tools.allocateNewReg(op1) 416 | self.tools.lock(tmp) 417 | self.tools.mov(tmp,op1) 418 | op2=self.expression_handler[node[3][0]](node[3],context) 419 | self.tools.cmp(tmp,op2) 420 | if node[2]=="==": 421 | self.tools.je(label1) 422 | elif node[2]=="!=": 423 | self.tools.jne(label1) 424 | self.tools.mov(self.tools.getEax(),0) 425 | self.tools.jmp(label2) 426 | self.tools.markLabel(label1) 427 | self.tools.mov(self.tools.getEax(),1) 428 | self.tools.markLabel() 429 | return self.tools.getEax() 430 | 431 | 432 | def gen_and_expression(self,node,context): 433 | """ 434 | :type node:TreeNode 435 | :type context:Context 436 | :rtype: str 437 | """ 438 | op1=self.expression_handler[node[1][0]](node[1],context) 439 | tmp=self.tools.allocateNewReg(op1) 440 | self.tools.lock(tmp) 441 | self.tools.mov(tmp,op1) 442 | op2=self.expression_handler[node[3][0]](node[3],context) 443 | ret=self.tools.And(tmp,op2) 444 | self.tools.unLock(tmp) 445 | return ret 446 | 447 | def gen_exclusive_or_expression(self,node,context): 448 | """ 449 | :type node:TreeNode 450 | :type context:Context 451 | :rtype: str 452 | """ 453 | pass 454 | # op1=self.expression_handler[node[1][0]](node[1],context) 455 | # tmp=self.tools.allocateNewReg() 456 | # self.tools.lock(tmp) 457 | # self.tools.mov(tmp,op1) 458 | # op2=self.expression_handler[node[3][0]](node[3],context) 459 | # ret=self.tools.xor(tmp,op2) 460 | # self.tools.unLock(tmp) 461 | # return ret 462 | 463 | def gen_inclusive_or_expression(self,node,context): 464 | """ 465 | :type node:TreeNode 466 | :type context:Context 467 | :rtype: str 468 | """ 469 | op1=self.expression_handler[node[1][0]](node[1],context) 470 | tmp=self.tools.allocateNewReg(op1) 471 | self.tools.lock(tmp) 472 | self.tools.mov(tmp,op1) 473 | op2=self.expression_handler[node[3][0]](node[3],context) 474 | ret=self.tools.Or(tmp,op2) 475 | self.tools.unLock(tmp) 476 | return ret 477 | 478 | def gen_logical_and_expression(self,node,context): 479 | """ 480 | :type node:TreeNode 481 | :type context:Context 482 | :rtype: str 483 | """ 484 | label1=self.tools.allocateLabel() 485 | label2=self.tools.allocateLabel() 486 | op1=self.expression_handler[node[1][0]](node[1],context) 487 | self.tools.cmp(op1,self.tools.getFalse()) 488 | self.tools.je(label1) 489 | op2=self.expression_handler[node[3][0]](node[3],context) 490 | self.tools.cmp(op2,self.tools.getFalse()) 491 | self.tools.je(label1) 492 | self.tools.mov(self.tools.getEax(),1) 493 | self.tools.jmp(label2) 494 | self.tools.markLabel(label1) 495 | self.tools.mov(self.tools.getEax(),0) 496 | self.tools.markLabel(label2) 497 | return self.tools.getEax() 498 | 499 | def gen_logical_or_expression(self,node,context): 500 | """ 501 | :type node:TreeNode 502 | :type context:Context 503 | :rtype: str 504 | """ 505 | label1=self.tools.allocateLabel() 506 | label2=self.tools.allocateLabel() 507 | op1=self.expression_handler[node[1][0]](node[1],context) 508 | self.tools.cmp(op1,self.tools.getFalse()) 509 | self.tools.jne(label1) 510 | op2=self.expression_handler[node[3][0]](node[3],context) 511 | self.tools.cmp(op2,self.tools.getFalse()) 512 | self.tools.jne(label1) 513 | self.tools.mov(self.tools.getEax(),0) 514 | self.tools.jmp(label2) 515 | self.tools.markLabel(label1) 516 | self.tools.mov(self.tools.getEax(),1) 517 | self.tools.markLabel(label2) 518 | return self.tools.getEax() 519 | 520 | 521 | def gen_conditional_expression(self,node,context): 522 | """ 523 | :type node:TreeNode 524 | :type context:Context 525 | :rtype: str 526 | """ 527 | pass 528 | 529 | def gen_assignment_expression(self,node,context): 530 | """ 531 | :type node:TreeNode 532 | :type context:Context 533 | :rtype: str 534 | """ 535 | operator=self.gen_assignment_operator(node[2],context) 536 | right=self.expression_handler[node[3][0]](node[3],context) 537 | tmp=self.tools.allocateNewReg(right) 538 | self.tools.lock(tmp) 539 | self.tools.mov(tmp,right) 540 | left=self.expression_handler[node[1][0]](node[1],context) 541 | # print(left) 542 | if operator=="=": 543 | self.tools.mov(left,tmp) 544 | self.tools.unLock(tmp) 545 | return left 546 | 547 | def gen_expression(self,node,context): 548 | """ 549 | :type node:TreeNode 550 | :type context:Context 551 | :rtype: str 552 | """ 553 | pass 554 | 555 | def gen_assignment_operator(self,node,context): 556 | """ 557 | :type node:TreeNode 558 | :type context:Context 559 | :rtype: str 560 | """ 561 | return node[1] 562 | 563 | def gen_unary_operator(self,node,context): 564 | """ 565 | :type node:TreeNode 566 | :type context:Context 567 | :rtype: str 568 | """ 569 | return node[1] 570 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from yyparse.ZCCparser import parser, printAST 3 | from yyparse.ZCClex import lexer as ZCClexer 4 | from symbol.symtab import c_types 5 | from public.ZCCglobal import global_context, FuncType, error, Context 6 | from generation.generation import generator 7 | import os 8 | import sys 9 | 10 | 11 | def preprocess(source): 12 | stream = os.popen("gcc -E " + source) 13 | return stream.read() 14 | 15 | 16 | if __name__ == '__main__': 17 | if len(sys.argv) < 3: 18 | print "Usage: python main.py \nEnvironment: Python2.7, Linux." 19 | exit(1) 20 | File = sys.argv[1] 21 | codes = preprocess(os.path.abspath("test/"+File)) 22 | pt = parser.parse(codes, lexer=ZCClexer) 23 | # print "errorCounter=", parser.errorCounter 24 | printAST(pt) 25 | # with open("test.s","w") as output: 26 | # print global_context 27 | # print error 28 | # printAST(global_context.local['main'].compound_statement.ast) 29 | if(not error[0]): 30 | gen = generator() 31 | gen.generate() 32 | gen.output(sys.argv[2]) 33 | -------------------------------------------------------------------------------- /public/ZCCglobal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | class CType(object): 6 | def __init__(self, type_name, size=0, **kwargs): 7 | """ 8 | :type type_name:str 9 | :type size: int 10 | :type kwargs: dict 11 | :return: None 12 | """ 13 | # "int","char","double","float","long","short","void", 14 | # "struct","union","enum","function", "array" 15 | # 'Incomplete' 16 | self.type = type_name # type: str 17 | # sizeof 18 | self.size = size # type: int 19 | self.is_const = [False] # type: list[bool] 20 | self.storage_class = None # type: str 21 | # "static", "extern" 22 | 23 | for key in kwargs: 24 | self.__setattr__(key, kwargs[key]) 25 | 26 | def pointer_count(self): 27 | """ 28 | :return: int 29 | """ 30 | return len(self.is_const) - 1 31 | 32 | def Size(self): 33 | """ 34 | Must get size by this function!!! 35 | :rtype: int 36 | """ 37 | if self.pointer_count() == 0: 38 | return self.size 39 | else: 40 | return 4 41 | 42 | def __repr__(self): 43 | return self.__add_star__(self.type) 44 | 45 | def __add_star__(self, base_type_repr): 46 | rval = base_type_repr 47 | if self.storage_class: 48 | rval = self.storage_class + " " + rval 49 | for i in xrange(0, len(self.is_const)): 50 | if i > 0: 51 | rval += " *" 52 | if self.is_const[i]: 53 | rval += " const" 54 | return rval 55 | 56 | def __eq__(self, other): 57 | """ 58 | :type self: CType 59 | :type other: CType 60 | :rtype: bool 61 | """ 62 | if self.pointer_count() != other.pointer_count(): 63 | return False 64 | if self.type != other.type: 65 | return False 66 | return True 67 | 68 | def is_integer(self): 69 | """ 70 | :rtype: bool 71 | """ 72 | return self.pointer_count() > 0 or self.type in \ 73 | ['char', 'short', 'int', 'long', 'long long', 74 | 'signed char', 'signed short', 'signed int', 'signed long', 75 | 'signed long long', 76 | 'unsigned char', 'unsigned short', 'unsigned int', 'unsigned long', 77 | 'unsigned long long'] 78 | 79 | def is_number(self): 80 | """ 81 | :rtype: bool 82 | """ 83 | return self.pointer_count() > 0 or self.type in \ 84 | ['char', 'short', 'int', 'long', 'long long', 85 | 'signed char', 'signed short', 'signed int', 'signed long', 86 | 'signed long long', 87 | 'unsigned char', 'unsigned short', 'unsigned int', 'unsigned long', 88 | 'unsigned long long', 89 | 'float', 'double'] 90 | 91 | 92 | class StructType(CType): 93 | def __init__(self, members=list()): 94 | """ 95 | :type members: list[(str,CType)] 96 | :return: 97 | """ 98 | CType.__init__(self, 'struct') 99 | self.members = {} # type: dict[str,CType] 100 | self.offset = {} 101 | self.size = 0 102 | for member in members: 103 | self.members[member[0]] = member[1] 104 | self.offset[member[0]] = self.size 105 | self.size += member[1].size 106 | self.size = ((self.size - 1) / 4 + 1) * 4 107 | 108 | def __repr__(self): 109 | return self.__add_star__('struct ' + repr(self.members)) 110 | 111 | def __eq__(self, other): 112 | return CType.__eq__(self, other) and has_same_members(self, other) 113 | 114 | 115 | class UnionType(CType): 116 | def __init__(self, members=list()): 117 | """ 118 | :type members: list[(str,CType)] 119 | :return: 120 | """ 121 | CType.__init__(self, 'union') 122 | self.members = {} # type: dict[str,CType] 123 | self.size = 0 # type: int 124 | for member in members: 125 | self.members[member[0]] = member[1] 126 | if member[1].size > self.size: 127 | self.size = member[1].size 128 | 129 | def __repr__(self): 130 | 131 | return self.__add_star__('union ' + repr(self.members)) 132 | 133 | def __eq__(self, other): 134 | return CType.__eq__(self, other) and has_same_members(self, other) 135 | 136 | 137 | class EnumType(CType): 138 | def __init__(self, values): 139 | """ 140 | :type values: dict[(str,int)] 141 | :return: 142 | """ 143 | CType.__init__(self, 'enum') 144 | self.values = values 145 | self.size = 4 146 | 147 | def __repr__(self): 148 | return self.__add_star__('enum ' + repr(self.values)) 149 | 150 | def __eq__(self, other): 151 | raise Exception('Not support enum') 152 | 153 | 154 | class FuncType(CType): 155 | def __init__(self, return_type, 156 | parameter_list=list(), 157 | parameter_list_is_extendable=False, 158 | compound_statement=None): 159 | """ 160 | :type return_type: CType 161 | :type parameter_list: list[(str,CType)] 162 | :type parameter_list_is_extendable: bool 163 | :type compound_statement: TreeNode 164 | """ 165 | CType.__init__(self, 'function') 166 | self.return_type = return_type # type: CType 167 | self.storage_class = return_type.storage_class 168 | return_type.storage_class = None 169 | self.parameter_list = parameter_list # type: list[(str,CType)] 170 | self.parameter_list_is_extendable = \ 171 | parameter_list_is_extendable # type: bool 172 | self.compound_statement = compound_statement # type: TreeNode 173 | 174 | def __repr__(self): 175 | rval = repr(self.return_type) + " function(" 176 | for parameter in self.parameter_list: 177 | rval += repr(parameter[1]) + ' ' + parameter[0] + ',' 178 | if self.parameter_list_is_extendable: 179 | rval += '...' 180 | rval += ')' 181 | if self.compound_statement is not None: 182 | rval += repr(self.compound_statement.context) 183 | return self.__add_star__(rval) 184 | 185 | def __eq__(self, other): 186 | """ 187 | :type other: FuncType 188 | :rtype: bool 189 | """ 190 | if self.type != other.type: 191 | return False 192 | if self.pointer_count() + other.pointer_count() > 1: 193 | if self.pointer_count() != other.pointer_count(): 194 | return False 195 | if not self.return_type == other.return_type: 196 | return False 197 | if not self.parameter_list_is_extendable == other.parameter_list_is_extendable: 198 | return False 199 | if not len(self.parameter_list) == len(other.parameter_list): 200 | return False 201 | for i in xrange(len(self.parameter_list)): 202 | if not self.parameter_list[i][1] == other.parameter_list[i][1]: 203 | return False 204 | return True 205 | 206 | 207 | class ArrayType(CType): 208 | def __init__(self, c_type, length): 209 | """ 210 | :type c_type: CType 211 | :type length: int 212 | :return: 213 | """ 214 | CType.__init__(self, 'array', size=length * c_type.Size()) 215 | self.length = length 216 | self.member_type = c_type 217 | self.storage_class = c_type.storage_class 218 | c_type.storage_class = None 219 | 220 | def __repr__(self): 221 | return self.__add_star__(repr(self.member_type) + "[%d]" % self.length) 222 | 223 | def __eq__(self, other): 224 | """ 225 | :type other: ArrayType 226 | :rtype: bool 227 | """ 228 | if not CType.__eq__(self, other): 229 | return False 230 | return self.length == other.length and \ 231 | self.member_type == other.member_type 232 | 233 | 234 | class LiteralType(CType): 235 | def __init__(self, val): 236 | """ 237 | :type c_type: CType 238 | :return: 239 | """ 240 | CType.__init__(self, '') 241 | self.val = val 242 | if isinstance(val, str): 243 | self.type = 'char' 244 | self.size = 1 245 | self.is_const = [True, False] 246 | elif isinstance(val, int): 247 | self.type = 'int' 248 | self.size = 4 249 | self.is_const = [True] 250 | elif isinstance(val, float): 251 | self.type = 'double' 252 | self.size = 8 253 | self.is_const = [True] 254 | 255 | 256 | class Context: 257 | outer_context = None # type: Context 258 | func_type = None # type: FuncType 259 | local = None # type: dict[str,CType] 260 | 261 | def __init__(self, outer_context=None, func_type=None): 262 | self.outer_context = outer_context # type: Context 263 | self.func_type = func_type # type: FuncType 264 | self.local = {} 265 | 266 | def __repr__(self): 267 | return " local: " + repr(self.local) 268 | 269 | def get_return_type(self): 270 | """ 271 | :rtype: CType 272 | """ 273 | if self.func_type is None: 274 | if self.outer_context is None: 275 | return # global_context has no return type 276 | else: 277 | return self.outer_context.get_return_type() 278 | else: 279 | return self.func_type.return_type 280 | 281 | def get_type_by_id(self, identifier): 282 | """ 283 | :type identifier: str 284 | :rtype: CType 285 | """ 286 | if identifier in self.local: 287 | return self.local[identifier] 288 | if self.func_type is not None: 289 | for parameter in self.func_type.parameter_list: 290 | if identifier == parameter[0]: 291 | return parameter[1] 292 | if self.outer_context is not None: 293 | return self.outer_context.get_type_by_id(identifier) 294 | return None # if not find 295 | 296 | def add_literal(self, name, literal): 297 | """ 298 | :type name: str 299 | :type literal: LiteralType 300 | """ 301 | context = self 302 | while context.outer_context is not None: 303 | context = context.outer_context 304 | context.literal[name] = literal 305 | 306 | 307 | class GlobalContext(Context): 308 | def __init__(self): 309 | Context.__init__(self) 310 | self.literal = {} # type: dict[str,LiteralType] 311 | 312 | def __repr__(self): 313 | return 'literals:' + repr(self.literal) + '\n' + Context.__repr__(self) 314 | 315 | 316 | global_context = GlobalContext() 317 | error = [False] 318 | 319 | 320 | class TreeNode(list): 321 | def __init__(self, lineno=-1): 322 | """ 323 | :return: 324 | """ 325 | self.lineno = lineno # type: int 326 | # self.ast = self # type: # list[list] 327 | 328 | 329 | # class LeafNode(str): 330 | # def __init__(self, lineno=-1): 331 | # """ 332 | # :return: 333 | # """ 334 | # self.lineno = lineno # type: int 335 | 336 | 337 | # self.ast = ast # type: list[list] 338 | # for key in kwargs: 339 | # self.__setattr__(key, kwargs[key]) 340 | # 341 | # def __getitem__(self, item): 342 | # return self.ast.__getitem__(item) 343 | # 344 | # def __setitem__(self, key, value): 345 | # self.ast.__setitem__(key, value) 346 | # 347 | # def __len__(self): 348 | # return self.ast.__len__() 349 | 350 | 351 | def has_same_members(struct_type1, struct_type2): 352 | """ 353 | :type struct_type1: StructType 354 | :type struct_type2: StructType 355 | :rtype: bool 356 | """ 357 | for member in struct_type1.members: 358 | if member not in struct_type2.members \ 359 | or not struct_type1.members[member] == struct_type2.members[member]: 360 | return False 361 | 362 | for member in struct_type2.members: 363 | if member not in struct_type1.members \ 364 | or not struct_type2.members[member] == \ 365 | struct_type1.members[member]: 366 | return False 367 | return True 368 | -------------------------------------------------------------------------------- /public/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hlFu/ZCC/811bd987c9d7a3754f7bc9c9d986359ee3df7327/public/__init__.py -------------------------------------------------------------------------------- /public/const.py: -------------------------------------------------------------------------------- 1 | #constant value put here 2 | 3 | UNDEFINED = -100 4 | 5 | class NodeKind: 6 | STMT = 1 7 | EXP = 2 8 | 9 | class StmtKind: 10 | IF = 1 11 | REPEAT = 2 12 | ASSIGN = 3 13 | 14 | class ExpKind: 15 | OP = 1 16 | CONST = 2 17 | ID = 3 18 | 19 | class ExpType: 20 | VOID = 1 21 | INTEGER = 2 22 | FLOAT = 3 23 | BOOLEAN = 4 -------------------------------------------------------------------------------- /symbol/.gitignore: -------------------------------------------------------------------------------- 1 | *.tmp -------------------------------------------------------------------------------- /symbol/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hlFu/ZCC/811bd987c9d7a3754f7bc9c9d986359ee3df7327/symbol/__init__.py -------------------------------------------------------------------------------- /test/a.s: -------------------------------------------------------------------------------- 1 | .file "a.c" 2 | .intel_syntax noprefix 3 | .section .rodata 4 | .LC0: 5 | .string "hello" 6 | .LC1: 7 | .string "%d\n" 8 | .text 9 | .globl foo 10 | .type foo, @function 11 | foo: 12 | push ebp 13 | mov ebp, esp 14 | sub esp, 8 15 | sub esp, 12 16 | push OFFSET FLAT:.LC0 17 | call puts 18 | add esp, 16 19 | sub esp, 8 20 | push DWORD PTR [ebp+8] 21 | push OFFSET FLAT:.LC1 22 | call printf 23 | add esp, 16 24 | mov eax, DWORD PTR [ebp+8] 25 | leave 26 | ret 27 | .size foo, .-foo 28 | .globl main 29 | .type main, @function 30 | main: 31 | lea ecx, [esp+4] 32 | and esp, -16 33 | push DWORD PTR [ecx-4] 34 | push ebp 35 | mov ebp, esp 36 | push ecx 37 | sub esp, 20 38 | mov DWORD PTR [ebp-12], 2 39 | sub esp, 12 40 | push DWORD PTR [ebp-12] 41 | call foo 42 | add esp, 16 43 | mov DWORD PTR [ebp-16], eax 44 | sub esp, 8 45 | push DWORD PTR [ebp-16] 46 | push OFFSET FLAT:.LC1 47 | call printf 48 | add esp, 16 49 | mov eax, 0 50 | mov ecx, DWORD PTR [ebp-4] 51 | leave 52 | lea esp, [ecx-4] 53 | ret 54 | .size main, .-main 55 | .ident "GCC: (GNU) 5.3.1 20160406 (Red Hat 5.3.1-6)" 56 | .section .note.GNU-stack,"",@progbits 57 | -------------------------------------------------------------------------------- /test/array.c: -------------------------------------------------------------------------------- 1 | /* 2 | * multi-dimension array 3 | */ 4 | #include "stdio.h" 5 | int main(void) 6 | { 7 | int a[5][5]; 8 | int i,j; 9 | for(i=0;i<5;++i) 10 | { 11 | for (j=0;j<5;++j) 12 | { 13 | a[i][j]=i*5+j; 14 | printf("%02d ",a[i][j]); 15 | } 16 | puts(""); 17 | } 18 | return 0; 19 | } 20 | 21 | -------------------------------------------------------------------------------- /test/basic.c: -------------------------------------------------------------------------------- 1 | /* 2 | basic expression:for if while 3 | basic type: int float double char pointer 4 | glibc:scanf printf 5 | constant: string char float 6 | scope: local, global, static local, compound_statement 7 | arithmetic operation; logical operation 8 | priority 9 | declaration definition 10 | array 11 | increment 12 | preprocessing 13 | */ 14 | #include "stdio.h" 15 | #define UPPERCASE_A 65 16 | #define LOWERCASE_A 97 17 | #define LOWERCASE_Z 122 18 | int fib(int n); 19 | int n,i; 20 | int main(int argc,char **argv) 21 | { 22 | double d,f; 23 | char *s; 24 | 25 | s=*argv; 26 | while(*s!=0) 27 | { 28 | if(*s<=LOWERCASE_Z&&*s>=LOWERCASE_A) 29 | *s=*s+(UPPERCASE_A-LOWERCASE_A); 30 | ++s; 31 | } 32 | printf("%s\n",*argv); 33 | 34 | scanf("%d",&n); 35 | printf("%d\n",fib(n)); 36 | 37 | f=0.5; 38 | d=1.5; 39 | 40 | for(i=0;i1) 55 | { 56 | return fib(n-1)+fib(n-2); 57 | } 58 | else if(n==1) 59 | { 60 | return 1; 61 | } 62 | else 63 | { 64 | return 0; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /test/basic.i: -------------------------------------------------------------------------------- 1 | # 1 "basic.c" 2 | # 1 "" 3 | # 1 "" 4 | # 1 "/usr/include/stdc-predef.h" 1 3 4 5 | # 1 "" 2 6 | # 1 "basic.c" 7 | # 14 "basic.c" 8 | # 1 "stdio.h" 1 9 | 10 | 11 | int printf(char *format,...); 12 | int scanf(char *format,...); 13 | int puts(char* s); 14 | # 15 "basic.c" 2 15 | 16 | 17 | 18 | int fib(int n); 19 | int n,i; 20 | int main(int argc,char **argv) 21 | { 22 | float f; 23 | double d; 24 | char *s; 25 | 26 | s=argv[1]; 27 | while(*s!=0) 28 | { 29 | if(*s<='z'&&*s>='a') 30 | *s=*s+'A'-'a'; 31 | s++; 32 | } 33 | printf("%s\n",argv[1]); 34 | 35 | scanf("%d",&n); 36 | printf("%d\n",fib(n)); 37 | 38 | f=0.5; 39 | d=1.5; 40 | 41 | for(i=0;i1) 59 | { 60 | return fib(n-1)+fib(n-2); 61 | } 62 | else if(n==1) 63 | { 64 | return 1; 65 | } 66 | else 67 | { 68 | return 0; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /test/basic1.c: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | 3 | int main(int argc,char **argv) 4 | { 5 | char *s; 6 | s=*argv; 7 | 8 | while(*s!=0) 9 | { 10 | if(*s>='a'&&*s<='z') 11 | *s=*s+(65-97); 12 | ++s; 13 | } 14 | 15 | printf("%s",*argv); 16 | puts(""); 17 | 18 | return 0; 19 | } 20 | 21 | 22 | -------------------------------------------------------------------------------- /test/basic2.c: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | int fib(int n); 3 | int i; 4 | int main() 5 | { 6 | scanf("%d",&i); 7 | printf("%d\n",fib(i)); 8 | 9 | return 0; 10 | } 11 | int fib(int n) 12 | { 13 | int i; 14 | if(n>1) 15 | { 16 | return fib(n-1)+fib(n-2); 17 | } 18 | else if(n==1) 19 | { 20 | return 1; 21 | } 22 | else 23 | { 24 | return 0; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /test/basic3.c: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | int main() 3 | { 4 | double i,j; 5 | j=2.3; 6 | scanf("%lf",&i); 7 | i=i*j+i*(i*j-i*j)/j; 8 | printf("%lf\n",i); 9 | 10 | return 0; 11 | } -------------------------------------------------------------------------------- /test/errorID.c: -------------------------------------------------------------------------------- 1 | int $a; 2 | -------------------------------------------------------------------------------- /test/error_info.c: -------------------------------------------------------------------------------- 1 | 2 | //函数定义声明不一致 3 | int f(int i,...); 4 | int f(int j){ 5 | return 0; 6 | } 7 | 8 | int g(int i){ 9 | return 0; 10 | } 11 | typedef struct{ 12 | int n; 13 | } A; 14 | int main(int argc, char const *argv[]) 15 | { 16 | 17 | //重复定义 18 | int k; 19 | int k; 20 | int count; 21 | //类型不匹配 22 | A a; 23 | a = 5; 24 | //未定义变量 25 | var = 3; 26 | //操作数类型错误 27 | 1.0 >> 4; 28 | //打字错误 29 | cont = 4; 30 | g(1.0); 31 | //参数表不匹配 32 | g(a); 33 | //返回值不匹配 34 | return a; 35 | } 36 | 37 | // Semantic Error at line 4: 'int function(int j,)' is not consistent with old declaration 'int function(int i,...)' 38 | // int f ( int j ) { return 0 ; } 39 | // 40 | // Syntax error at 'int', at line: 22, column: 5. 41 | // Error type: missing semicolon before int. at line: 22, lex pos: 258 in declaration. 42 | // 43 | // Semantic Error at line 18: Redeclare k 44 | // k 45 | // 46 | // Semantic Error at line 23: 'int const' cannot be assigned to 'struct {'n': int}' 47 | // a = 5 48 | // 49 | // Semantic Error at line 25: Unknown identifier var 50 | // var 51 | // 52 | // Semantic Error at line 27: double const is not or cannot be recognized as integer 53 | // 1.0 54 | // 55 | // Semantic Error at line 29: Unknown identifier 'cont', do you mean 'count'? 56 | // cont 57 | // 58 | // Semantic Error at line 32: 'struct {'n': int}' can't convert to 'int' 59 | // a 60 | // 61 | // Semantic Error at line 34: 'struct {'n': int}' is not consistant with the function return type 'int' 62 | // return a ; 63 | -------------------------------------------------------------------------------- /test/error_pos.c: -------------------------------------------------------------------------------- 1 | 2 | int a, b, c; 3 | c = a + b; 4 | int d; -------------------------------------------------------------------------------- /test/missRightCurly.c: -------------------------------------------------------------------------------- 1 | // 2 | //int a, b, c; 3 | //int main(int argc, char *argv[]) { 4 | // c = a + b; 5 | // 6 | // 7 | //int b, c; 8 | 9 | int f(){ 10 | 11 | int d; -------------------------------------------------------------------------------- /test/missSEMI.c: -------------------------------------------------------------------------------- 1 | 2 | int b 3 | 4 | int main(int argc, char *argv[]) { 5 | int a, b, c, d; 6 | int $a; 7 | 8 | c = a + b; 9 | d = a +/ b; 10 | d = a -/ b; 11 | d = a ^^ / b; 12 | d = a *|b; 13 | d = a >/ b; 14 | d = a a=4; 20 | sp->c='!'; 21 | sp->inner.b=5; 22 | sp->inner.d=55.2; 23 | return; 24 | } 25 | 26 | int main(void) 27 | { 28 | myStruct p[3]; 29 | myStruct *sp; 30 | 31 | sp=&p[1]; 32 | p[1].a=2; 33 | p[1].c='a'; 34 | p[1].inner.b=3; 35 | p[1].inner.d=12.3; 36 | printf("before modified\n"); 37 | printf("p[1].a=%d\tp[1].c=%c\tp[1].inner.b=%d\tp[1].inner.d=%lf\n",p[1].a,p[1].c,p[1].inner.b,p[1].inner.d); 38 | modifyStruct(sp); 39 | printf("after modified\n"); 40 | printf("p[1].a=%d\tp[1].c=%c\tp[1].inner.b=%d\tp[1].inner.d=%lf\n",p[1].a,p[1].c,p[1].inner.b,p[1].inner.d); 41 | 42 | return 0; 43 | } 44 | 45 | -------------------------------------------------------------------------------- /test/test1.c: -------------------------------------------------------------------------------- 1 | int g_i; 2 | static int ss; 3 | static int sss; 4 | 5 | int foo(int n){ 6 | static int x3; 7 | int x1,x2; 8 | x1=2; 9 | x2=3; 10 | x1=x1+1; 11 | return n+1; 12 | } 13 | 14 | 15 | int main(void){ 16 | int l_i,x1,x2,x3; 17 | l_i=l_i+1; 18 | x1=1; 19 | x1=x1+l_i; 20 | x2=foo(x1); 21 | return 1; 22 | } -------------------------------------------------------------------------------- /test/test1.s: -------------------------------------------------------------------------------- 1 | .file "test1.c" 2 | .intel_syntax noprefix 3 | .comm g_i,4,4 4 | .local ss 5 | .comm ss,4,4 6 | .local sss 7 | .comm sss,4,4 8 | .text 9 | .globl main 10 | .type main, @function 11 | main: 12 | push ebp 13 | mov ebp, esp 14 | and esp, -16 15 | sub esp, 32 16 | add DWORD PTR [esp+20], 1 17 | mov DWORD PTR [esp+24], 1 18 | mov eax, DWORD PTR [esp+20] 19 | add DWORD PTR [esp+24], eax 20 | mov eax, DWORD PTR [esp+24] 21 | mov DWORD PTR [esp], eax 22 | call foo 23 | mov DWORD PTR [esp+28], eax 24 | nop 25 | leave 26 | ret 27 | .size main, .-main 28 | .globl foo 29 | .type foo, @function 30 | foo: 31 | push ebp 32 | mov ebp, esp 33 | sub esp, 16 34 | mov DWORD PTR [ebp-8], 2 35 | mov DWORD PTR [ebp-4], 3 36 | add DWORD PTR [ebp-8], 1 37 | mov eax, DWORD PTR [ebp+8] 38 | add eax, 1 39 | leave 40 | ret 41 | .size foo, .-foo 42 | .ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2" 43 | .section .note.GNU-stack,"",@progbits 44 | -------------------------------------------------------------------------------- /test/test4.c: -------------------------------------------------------------------------------- 1 | #include 2 | int g_fast; 3 | static int s_g_fast; 4 | 5 | int main(void) 6 | { 7 | int l_fast; 8 | static int s_l_fast; 9 | l_fast=1; 10 | s_l_fast=2; 11 | l_fast=foo(s_l_fast); 12 | printf("%d\n",l_fast); 13 | return 0; 14 | } 15 | 16 | int foo(int n){ 17 | return n+1; 18 | } 19 | -------------------------------------------------------------------------------- /test/test4.s: -------------------------------------------------------------------------------- 1 | .file "test4.c" 2 | .intel_syntax noprefix 3 | .comm g_fast,4,4 4 | .local s_g_fast 5 | .comm s_g_fast,4,4 6 | .section .rodata 7 | .LC0: 8 | .string "%d\n" 9 | .text 10 | .globl main 11 | .type main, @function 12 | main: 13 | push ebp 14 | mov ebp, esp 15 | and esp, -16 16 | sub esp, 32 17 | mov DWORD PTR [esp+28], 1 18 | mov DWORD PTR s_l_fast.1829, 2 19 | mov eax, DWORD PTR s_l_fast.1829 20 | add DWORD PTR [esp+28], eax 21 | mov eax, DWORD PTR s_l_fast.1829 22 | mov DWORD PTR [esp], eax 23 | call foo 24 | mov DWORD PTR [esp+28], eax 25 | mov eax, DWORD PTR [esp+28] 26 | mov DWORD PTR [esp+4], eax 27 | mov DWORD PTR [esp], OFFSET FLAT:.LC0 28 | call printf 29 | mov eax, 0 30 | leave 31 | ret 32 | .size main, .-main 33 | .globl foo 34 | .type foo, @function 35 | foo: 36 | push ebp 37 | mov ebp, esp 38 | mov eax, DWORD PTR [ebp+8] 39 | add eax, 1 40 | pop ebp 41 | ret 42 | .size foo, .-foo 43 | .local s_l_fast.1829 44 | .comm s_l_fast.1829,4,4 45 | .ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2" 46 | .section .note.GNU-stack,"",@progbits 47 | -------------------------------------------------------------------------------- /test/test4_2.c: -------------------------------------------------------------------------------- 1 | #include 2 | int g_fast; 3 | static int s_g_fast; 4 | 5 | int main(void) 6 | { 7 | int l_fast; 8 | static int s_l_fast; 9 | l_fast=1; 10 | s_l_fast=2; 11 | l_fast=foo(s_l_fast); 12 | printf("%d\n",l_fast); 13 | return 0; 14 | } 15 | 16 | int foo(int n){ 17 | return n+1; 18 | } 19 | -------------------------------------------------------------------------------- /test/test4_2.s: -------------------------------------------------------------------------------- 1 | .file "test4_2.c" 2 | .intel_syntax noprefix 3 | .section .rodata.str1.1,"aMS",@progbits,1 4 | .LC0: 5 | .string "%d\n" 6 | .text 7 | .globl main 8 | .type main, @function 9 | main: 10 | push ebp 11 | mov ebp, esp 12 | and esp, -16 13 | sub esp, 16 14 | mov DWORD PTR s_l_fast.2034, 2 15 | mov DWORD PTR [esp+8], 3 16 | mov DWORD PTR [esp+4], OFFSET FLAT:.LC0 17 | mov DWORD PTR [esp], 1 18 | call __printf_chk 19 | mov eax, 0 20 | leave 21 | ret 22 | .size main, .-main 23 | .globl foo 24 | .type foo, @function 25 | foo: 26 | mov eax, DWORD PTR [esp+4] 27 | add eax, 1 28 | ret 29 | .size foo, .-foo 30 | .local s_l_fast.2034 31 | .comm s_l_fast.2034,4,4 32 | .comm g_fast,4,4 33 | .ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2" 34 | .section .note.GNU-stack,"",@progbits 35 | -------------------------------------------------------------------------------- /test/test9.c: -------------------------------------------------------------------------------- 1 | double a; 2 | struct test{ 3 | char a; 4 | int b; 5 | short c; 6 | double e; 7 | }; 8 | int foo(char a, int b, short c, struct test d, char *s){ 9 | d.e=10.5; 10 | d.c=2; 11 | if(a=='a') 12 | return b-c; 13 | else 14 | return d.e-d.c; 15 | } 16 | 17 | int main(){ 18 | struct test t; 19 | a=4.5; 20 | t.a='b'; 21 | t.b=2; 22 | t.c=3; 23 | t.e=5.4; 24 | printf("%lf",t.e); 25 | return foo('a',10,2,t,"mamsf"); 26 | } 27 | -------------------------------------------------------------------------------- /test/test9.s: -------------------------------------------------------------------------------- 1 | .file "test9.c" 2 | .intel_syntax noprefix 3 | .globl a 4 | .data 5 | .align 8 6 | .type a, @object 7 | .size a, 8 8 | a: 9 | .long 0 10 | .long 1075052544 11 | .text 12 | .globl foo 13 | .type foo, @function 14 | foo: 15 | push ebp 16 | mov ebp, esp 17 | sub esp, 16 18 | mov edx, DWORD PTR [ebp+8] 19 | mov eax, DWORD PTR [ebp+16] 20 | mov BYTE PTR [ebp-4], dl 21 | mov WORD PTR [ebp-8], ax 22 | fld QWORD PTR .LC0 23 | fstp QWORD PTR [ebp+32] 24 | mov WORD PTR [ebp+28], 2 25 | cmp BYTE PTR [ebp-4], 97 26 | jne .L2 27 | movsx eax, WORD PTR [ebp-8] 28 | mov edx, DWORD PTR [ebp+12] 29 | sub edx, eax 30 | mov eax, edx 31 | jmp .L3 32 | .L2: 33 | fld QWORD PTR [ebp+32] 34 | movzx eax, WORD PTR [ebp+28] 35 | mov WORD PTR [ebp-6], ax 36 | fild WORD PTR [ebp-6] 37 | fsubp st(1), st 38 | fnstcw WORD PTR [ebp-2] 39 | movzx eax, WORD PTR [ebp-2] 40 | mov ah, 12 41 | mov WORD PTR [ebp-10], ax 42 | fldcw WORD PTR [ebp-10] 43 | fistp DWORD PTR [ebp-16] 44 | fldcw WORD PTR [ebp-2] 45 | mov eax, DWORD PTR [ebp-16] 46 | .L3: 47 | leave 48 | ret 49 | .size foo, .-foo 50 | .section .rodata 51 | .LC4: 52 | .string "%lf" 53 | .LC5: 54 | .string "mamsf" 55 | .text 56 | .globl main 57 | .type main, @function 58 | main: 59 | push ebp 60 | mov ebp, esp 61 | and esp, -16 62 | sub esp, 80 63 | fld QWORD PTR .LC2 64 | fstp QWORD PTR a 65 | mov BYTE PTR [esp+60], 98 66 | mov DWORD PTR [esp+64], 2 67 | mov WORD PTR [esp+68], 3 68 | fld QWORD PTR .LC3 69 | fstp QWORD PTR [esp+72] 70 | fld QWORD PTR [esp+72] 71 | fstp QWORD PTR [esp+4] 72 | mov DWORD PTR [esp], OFFSET FLAT:.LC4 73 | call printf 74 | mov DWORD PTR [esp+32], OFFSET FLAT:.LC5 75 | mov eax, DWORD PTR [esp+60] 76 | mov DWORD PTR [esp+12], eax 77 | mov eax, DWORD PTR [esp+64] 78 | mov DWORD PTR [esp+16], eax 79 | mov eax, DWORD PTR [esp+68] 80 | mov DWORD PTR [esp+20], eax 81 | mov eax, DWORD PTR [esp+72] 82 | mov DWORD PTR [esp+24], eax 83 | mov eax, DWORD PTR [esp+76] 84 | mov DWORD PTR [esp+28], eax 85 | mov DWORD PTR [esp+8], 2 86 | mov DWORD PTR [esp+4], 10 87 | mov DWORD PTR [esp], 97 88 | call foo 89 | leave 90 | ret 91 | .size main, .-main 92 | .section .rodata 93 | .align 8 94 | .LC0: 95 | .long 0 96 | .long 1076166656 97 | .align 8 98 | .LC2: 99 | .long 0 100 | .long 1074921472 101 | .align 8 102 | .LC3: 103 | .long -1717986918 104 | .long 1075157401 105 | .ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2" 106 | .section .note.GNU-stack,"",@progbits 107 | -------------------------------------------------------------------------------- /yyparse/.gitignore: -------------------------------------------------------------------------------- 1 | *.out 2 | parsetab.py -------------------------------------------------------------------------------- /yyparse/ZCClex.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import ply.lex as lex 4 | # import ply.yacc as yacc 5 | # from pprint import pprint 6 | from symbol.symtab import is_type 7 | from public.ZCCglobal import TreeNode 8 | lexErrorInfo = [] 9 | 10 | 11 | # column = 0 12 | 13 | # Compute column. 14 | # input is the input text string 15 | # token is a token instance 16 | def find_column(input, token): 17 | last_cr = input.rfind('\n', 0, token.lexpos) 18 | if last_cr < 0: 19 | last_cr = 0 20 | column = token.lexpos - last_cr 21 | # print "lexpos: ", token.lexpos, " last_cr: ", last_cr 22 | return column 23 | 24 | 25 | reserved_dict = { 26 | # "auto" :'AUTO', 27 | "break": 'BREAK', 28 | "case": 'CASE', 29 | "char": 'CHAR', 30 | "const": 'CONST', 31 | "continue": 'CONTINUE', 32 | "default": 'DEFAULT', 33 | "do": 'DO', 34 | "double": 'DOUBLE', 35 | "else": 'ELSE', 36 | "enum": 'ENUM', 37 | "extern": 'EXTERN', 38 | "float": 'FLOAT', 39 | "for": 'FOR', 40 | # "goto" :'GOTO', 41 | "if": 'IF', 42 | "int": 'INT', 43 | "long": 'LONG', 44 | # "register" :'REGISTER', 45 | "return": 'RETURN', 46 | "short": 'SHORT', 47 | "signed": 'SIGNED', 48 | "sizeof": 'SIZEOF', 49 | "static": 'STATIC', 50 | "struct": 'STRUCT', 51 | "switch": 'SWITCH', 52 | "typedef": 'TYPEDEF', 53 | "union": 'UNION', 54 | "unsigned": 'UNSIGNED', 55 | "void": 'VOID', 56 | # "volatile": 'VOLATILE', 57 | "while": 'WHILE', 58 | } 59 | 60 | literal_dict = { 61 | '(': 'LBRACKET', 62 | ')': 'RBRACKET', 63 | '[': 'LSQUAREBRACKET', 64 | ']': 'RSQUAREBRACKET', 65 | '{': 'LCURLYBRACKET', 66 | '}': 'RCURLYBRACKET', 67 | ';': 'SEMICOLON', 68 | '.': 'PERIOD', 69 | ',': 'COMMA', 70 | '&': 'AND', 71 | '*': 'STAR', 72 | '+': 'PLUS', 73 | '-': 'MINUS', 74 | '~': 'UNOT', 75 | '!': 'NOT', 76 | '/': 'DIVIDE', 77 | '%': 'MOD', 78 | '<': 'LT', 79 | '>': 'GT', 80 | '^': 'XOR', 81 | '|': 'OR', 82 | '?': 'QUESTIONMARK', 83 | ':': 'COLON', 84 | '=': 'ASSIGN' 85 | } 86 | 87 | tokens = ( 88 | # 'AUTO', 89 | 'BREAK', 90 | 'CASE', 91 | 'CHAR', 92 | 'CONST', 93 | 'CONTINUE', 94 | 'DEFAULT', 95 | 'DO', 96 | 'DOUBLE', 97 | 'ELSE', 98 | 'ENUM', 99 | 'EXTERN', 100 | 'FLOAT', 101 | 'FOR', 102 | # 'GOTO', 103 | 'IF', 104 | 'INT', 105 | 'LONG', 106 | # 'REGISTER', 107 | 'RETURN', 108 | 'SHORT', 109 | 'SIGNED', 110 | 'SIZEOF', 111 | 'STATIC', 112 | 'STRUCT', 113 | 'SWITCH', 114 | 'TYPEDEF', 115 | 'UNION', 116 | 'UNSIGNED', 117 | 'VOID', 118 | # 'VOLATILE', 119 | 'WHILE', 120 | "IDENTIFIER", 121 | "TYPE_NAME", 122 | "STRING_LITERAL", 123 | "ELLIPSIS", 124 | "RIGHT_ASSIGN", 125 | "LEFT_ASSIGN", 126 | "ADD_ASSIGN", 127 | "SUB_ASSIGN", 128 | "MUL_ASSIGN", 129 | "DIV_ASSIGN", 130 | "MOD_ASSIGN", 131 | "AND_ASSIGN", 132 | "XOR_ASSIGN", 133 | "OR_ASSIGN", 134 | "RIGHT_OP", 135 | "LEFT_OP", 136 | "INC_OP", 137 | "DEC_OP", 138 | "PTR_OP", 139 | "AND_OP", 140 | "OR_OP", 141 | "LE_OP", 142 | "GE_OP", 143 | "EQ_OP", 144 | "NE_OP", 145 | 'LBRACKET', 146 | 'RBRACKET', 147 | 'LSQUAREBRACKET', 148 | 'RSQUAREBRACKET', 149 | 'LCURLYBRACKET', 150 | 'RCURLYBRACKET', 151 | 'SEMICOLON', 152 | 'PERIOD', 153 | 'COMMA', 154 | 'AND', 155 | 'STAR', 156 | 'PLUS', 157 | 'MINUS', 158 | 'UNOT', 159 | 'NOT', 160 | 'DIVIDE', 161 | 'MOD', 162 | 'LT', 163 | 'GT', 164 | 'XOR', 165 | 'OR', 166 | 'QUESTIONMARK', 167 | 'COLON', 168 | 'ASSIGN', 169 | "ERRORID", 170 | "NUMBER_CONSTANT", 171 | "CHARACTER_CONSTANT", 172 | "EOF" 173 | ) 174 | 175 | 176 | def t_STRING_LITERAL(t): 177 | r'\"(\\.|[^\\\"])*\"' 178 | value = t.value 179 | t.value = TreeNode() 180 | t.value.lineno = t.lexer.lineno 181 | t.value.append('STRING') 182 | t.value.append(value) 183 | return t 184 | 185 | 186 | def t_ignore_COMMENT(t): 187 | r'(/\*(.|\n)*?\*/)|(//.*)|(^\#.*)|(\n\#.*)|(\r\n\#.*)' 188 | t.lexer.lineno += t.value.count('\n') 189 | pass 190 | 191 | 192 | def t_IDENTIFIER(t): 193 | r"""[_A-Za-z][_A-Za-z0-9]*""" 194 | t.type = reserved_dict.get(t.value, 'IDENTIFIER') 195 | if t.type == 'IDENTIFIER' and is_type(t.value): 196 | t.type = "TYPE_NAME" 197 | if t.type == 'IDENTIFIER': 198 | value = t.value 199 | t.value = TreeNode() 200 | t.value.lineno = t.lexer.lineno 201 | t.value.append('IDENTIFIER') 202 | t.value.append(value) 203 | return t 204 | 205 | 206 | def t_NUMBER_CONSTANT(t): 207 | r"""([0-9]*\.[0-9]+|[0-9]+\.)([eE][+\-]?[0-9]+)?[flFL]?|[0-9]+([eE][+\-]?[0-9]+)[flFL]?|[1-9][0-9]*[uU]?[lL]{,2}|0[0-7]*[uU]?[lL]{,2}|0[xX][0-9a-fA-F]+[uU]?[lL]{,2}""" 208 | val = eval(t.value) 209 | if isinstance(val, float): 210 | value = t.value 211 | t.value = TreeNode() 212 | t.value.lineno = t.lexer.lineno 213 | t.value.append('DOUBLE') 214 | t.value.append(value) 215 | else: 216 | value = t.value 217 | t.value = TreeNode() 218 | t.value.lineno = t.lexer.lineno 219 | t.value.append('INTEGER') 220 | t.value.append(value) 221 | return t 222 | 223 | 224 | def t_CHARACTER_CONSTANT(t): 225 | r"\'([^\'\\\n]|(\\[\'\"?\\abfnrtv]|[0-7]{1,3}|x[0-9a-fA-F]{1,2}))\'" 226 | value = t.value 227 | t.value = TreeNode() 228 | t.value.lineno = t.lexer.lineno 229 | t.value.append('INTEGER') 230 | t.value.append(str(ord(eval(value)))) 231 | return t 232 | 233 | 234 | # def t_CONSTANT(t): 235 | # r'[1-9][0-9]*[Ee][+-]?[1-9][0-9]*[fFlL]?|[0-9]*\.[0-9]+([Ee][+-]?[0-9]+)?[fFlL]?|[0-9]+\.[0-9]*([Ee][+-]?[0-9]+)?[fFlL]?|0[xX][a-fA-F0-9]+(u|U)?(l|L){,2}|((0|[1-9][0-9]*)(u|U)?(l|L){,2})|\'(\S|\\([abfnrtv\\\'\"0]|[0-7]{3}|x[0-9a-fA-F]{2}))\'' 236 | # # r'0[xX][a-fA-F0-9]+(u|U)?(l|L){1,2}|' 237 | # # r'0[0-9]+(u|U)?(l|L){1,2}|' 238 | # # r'[0-9]+(u|U)?(l|L){1,2}|' 239 | # # r'\'\S|\\([abfnrtv\\\'\"0]|[0-7]{3}|x[0-9a-fA-F]{2})\'|' 240 | # # r'[0-9]+[Ee][+-]?[0-9]+[fFlL]?|' 241 | # # r'[0-9]*\.[0-9]+([Ee][+-]?[0-9]+)?[fFlL]?|' 242 | # # r'[0-9]+\.[0-9]*([Ee][+-]?[0-9]+)?[fFlL]?' 243 | # 244 | # return t 245 | 246 | 247 | 248 | def t_ELLIPSIS(t): 249 | r"\.\.\." 250 | return t 251 | 252 | 253 | def t_RIGHT_ASSIGN(t): 254 | r">>=" 255 | return t 256 | 257 | 258 | def t_LEFT_ASSIGN(t): 259 | r"<<=" 260 | return t 261 | 262 | 263 | def t_ADD_ASSIGN(t): 264 | r"\+=" 265 | return t 266 | 267 | 268 | def t_MUL_ASSIGN(t): 269 | r"\*=" 270 | return t 271 | 272 | 273 | def t_DIV_ASSIGN(t): 274 | r"/=" 275 | return t 276 | 277 | 278 | def t_MOD_ASSIGN(t): 279 | r"%=" 280 | return t 281 | 282 | 283 | def t_AND_ASSIGN(t): 284 | r"&=" 285 | return t 286 | 287 | 288 | def t_XOR_ASSIGN(t): 289 | r"^=" 290 | return t 291 | 292 | 293 | def t_OR_ASSIGN(t): 294 | r"\|=" 295 | return t 296 | 297 | 298 | def t_RIGHT_OP(t): 299 | r">>" 300 | return t 301 | 302 | 303 | def t_LEFT_OP(t): 304 | r"<<" 305 | return t 306 | 307 | 308 | def t_INC_OP(t): 309 | r"\+\+" 310 | return t 311 | 312 | 313 | def t_DEC_OP(t): 314 | r"--" 315 | return t 316 | 317 | 318 | def t_PTR_OP(t): 319 | r"->" 320 | return t 321 | 322 | 323 | def t_AND_OP(t): 324 | r"&&" 325 | return t 326 | 327 | 328 | def t_OR_OP(t): 329 | r"\|\|" 330 | return t 331 | 332 | 333 | def t_LE_OP(t): 334 | r"<=" 335 | return t 336 | 337 | 338 | def t_GE_OP(t): 339 | r">=" 340 | return t 341 | 342 | 343 | def t_EQ_OP(t): 344 | r"==" 345 | return t 346 | 347 | 348 | def t_NE_OP(t): 349 | r"!=" 350 | return t 351 | 352 | 353 | def t_LITERAL(t): 354 | r"[()\[\]{};.,&*+\-~!/%<>\^|?:=]" 355 | t.type = literal_dict.get(t.value) 356 | if t.value == '{': 357 | t.lexer.curlyBalance += 1 358 | elif t.value == '}': 359 | t.lexer.curlyBalance -= 1 360 | return t 361 | 362 | 363 | # literals = '()[]{};.,&*+-~!/%<>^|?:=' 364 | 365 | # Define a rule so we can track line numbers 366 | def t_newline(t): 367 | r'\n' 368 | t.lexer.lineno += 1 # len(t.value) 369 | 370 | 371 | t_ignore = ' \t' 372 | 373 | 374 | def t_ERRORID(t): 375 | r"[^\s;}]+" 376 | t.value = (t.value, "ERRORID") 377 | return t 378 | 379 | 380 | def t_error(t): 381 | error_column = find_column(t.lexer.lexdata, t) 382 | print("Unknown text '%s' at line: %d, column: %d" % (t.value, t.lexer.lineno, error_column)) 383 | lexErrorInfo.append({ 384 | 'pos': t.lexer.lexpos, 385 | 'lineno': t.lexer.lineno, 386 | 'column': error_column, 387 | 'value': t.value 388 | }) 389 | t.lexer.skip(1) 390 | 391 | 392 | orig_lexer = lex.lex() 393 | 394 | 395 | # pprint(lexer.__dict__) 396 | 397 | class ProxyLexer(object): 398 | def __init__(self, lexer, eoftoken): 399 | self.end = False 400 | self.lexer = lexer 401 | self.eof = eoftoken 402 | 403 | def token(self): 404 | tok = self.lexer.token() 405 | if tok is None: 406 | if self.end: 407 | self.end = False 408 | else: 409 | self.end = True 410 | tok = lex.LexToken() 411 | tok.type = self.eof 412 | tok.value = None 413 | tok.lexpos = self.lexer.lexpos 414 | tok.lineno = self.lexer.lineno 415 | # print ('custom', tok) 416 | return tok 417 | 418 | def __getattr__(self, name): 419 | return getattr(self.lexer, name) 420 | 421 | 422 | lexer = ProxyLexer(orig_lexer, 'EOF') 423 | lexer.lexer.curlyBalance = 0 424 | 425 | 426 | def test_lex(): 427 | # data = raw_input() 428 | 429 | # c_file_name = raw_input('c file name: ') 430 | c_file_name = "test1.c" 431 | c_file = open(c_file_name, "r") 432 | contents = "".join(c_file.readlines()) 433 | 434 | lexer.input(contents) 435 | 436 | while True: 437 | tok = lexer.token() 438 | if not tok: 439 | break 440 | print tok # .value, find_column(lexer.lexdata, tok) 441 | 442 | # test_lex() 443 | -------------------------------------------------------------------------------- /yyparse/ZCCparser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from __future__ import print_function 3 | import ply.lex as lex 4 | import ply.yacc as yacc 5 | import ZCClex 6 | from symbol.symtab import symtab_declaration, symtab_function_definition 7 | from public.ZCCglobal import global_context, TreeNode 8 | from ZCClex import tokens 9 | from pprint import pprint 10 | 11 | aTuple = (1, 2) 12 | 13 | 14 | def handleMissingSEMI(p, parentname="", checkPair=()): 15 | last_idx = len(p) - 1 16 | if (len(checkPair) == 0 or (len(checkPair) > 0 and p[ 17 | checkPair[0]] == checkPair[1])) and p[last_idx] != ';': 18 | print( 19 | "Error type: missing semicolon before %s. at line: %d, lex pos: %d in %s.\n" % 20 | (p[last_idx].value, p.lineno(last_idx), p.lexpos(last_idx), parentname)) 21 | p[last_idx] = ';' 22 | parser.errorCounter = 0 23 | parser.errok() 24 | return [last_idx] 25 | else: 26 | return [] 27 | 28 | 29 | def handleMissingRCURLYBRACKET(p): 30 | last_idx = len(p) - 1 31 | if p[last_idx] != '}': 32 | print( 33 | "Error type: missing right curly bracket before %s. at line: %d, lex pos: %d.\n" % 34 | (p[last_idx], p.lineno(last_idx), p.lexpos(last_idx))) 35 | p[last_idx] = '}' 36 | parser.errorCounter = 0 37 | parser.errok() 38 | 39 | 40 | def handleErrorID(p, idx): 41 | if len(p) > idx and isinstance(p[idx], 42 | type(aTuple)) and p[idx][1] == "ERRORID": 43 | print("Syntax error at %r, at line: %d, lex pos: %d." % 44 | (p[idx][0], p.lineno(idx), p.lexpos(idx))) 45 | print("Error type: wrong IDENTIFIER format.\n") 46 | p[idx] = p[idx][0] 47 | parser.errorCounter = 0 48 | 49 | 50 | def construct_node(p, parent_name, del_list=[]): 51 | p[0] = TreeNode() 52 | p[0].append(parent_name) 53 | p[0].lineno = p.lineno(len(p) - 1) 54 | # print("%s's del_list: " % (parent_name)) 55 | # print(del_list) 56 | for i in range(1, len(p)): 57 | if i not in del_list: 58 | p[0].append(p[i]) 59 | if not isinstance(p[i], str): 60 | if p[0].lineno == 0: 61 | p[0].lineno = p[i].lineno 62 | elif p[0].lineno > p[i].lineno: 63 | p[0].lineno = p[i].lineno 64 | 65 | 66 | def p_outer_translation_unit(p): 67 | """ 68 | outer_translation_unit : translation_unit EOF 69 | """ 70 | p[0] = p[1] 71 | # construct_node(p, "outer_translation_unit") 72 | 73 | 74 | def p_translation_unit(p): 75 | """ 76 | translation_unit : external_declaration 77 | | translation_unit external_declaration 78 | """ 79 | if len(p) == 2: 80 | construct_node(p, "translation_unit") 81 | elif len(p) == 3: 82 | # printAST(p[1]) 83 | p[1].append(p[2]) 84 | p[0] = p[1] 85 | else: 86 | raise Exception("translation_unit just has two children") 87 | 88 | 89 | def p_external_declaration(p): 90 | """ 91 | external_declaration : function_definition 92 | | declaration 93 | """ 94 | p[0] = p[1] 95 | if p[0][0] == 'declaration': 96 | symtab_declaration(p[0], global_context) 97 | elif p[0][0] == 'function_definition': 98 | symtab_function_definition(p[0], global_context) 99 | # construct_node(p, "external_declaration") 100 | 101 | 102 | def p_declaration(p): 103 | """ 104 | declaration : declaration_specifiers SEMICOLON 105 | | declaration_specifiers init_declarator_list SEMICOLON 106 | | declaration_specifiers error 107 | | declaration_specifiers init_declarator_list error 108 | """ 109 | del_list = handleMissingSEMI(p, "declaration") 110 | construct_node(p, "declaration", del_list) 111 | 112 | 113 | # print(p[0]) 114 | 115 | # def p_constant(p): 116 | # """ 117 | # constant : NUMBER_CONSTANT 118 | # | CHARACTER_CONSTANT 119 | # """ 120 | # construct_node(p, "constant") 121 | 122 | 123 | def p_declaration_specifiers(p): 124 | """ 125 | declaration_specifiers : type_specifier 126 | | type_specifier type_qualifier 127 | | type_qualifier type_specifier 128 | | storage_class_specifier type_specifier 129 | | storage_class_specifier type_specifier type_qualifier 130 | | storage_class_specifier type_qualifier type_specifier 131 | """ 132 | # """ 133 | # declaration_specifiers : storage_class_specifier 134 | # | storage_class_specifier declaration_specifiers 135 | # | type_specifier 136 | # | type_specifier declaration_specifiers 137 | # | type_qualifier 138 | # | type_qualifier declaration_specifiers 139 | # """ 140 | construct_node(p, "declaration_specifiers") 141 | # printAST(p[0], 0) 142 | # pass 143 | 144 | 145 | def p_primary_expression(p): 146 | """ 147 | primary_expression : IDENTIFIER 148 | | ERRORID 149 | | NUMBER_CONSTANT 150 | | CHARACTER_CONSTANT 151 | | STRING_LITERAL 152 | | LBRACKET expression RBRACKET 153 | """ 154 | handleErrorID(p, 1) 155 | if len(p) == 4: 156 | p[0] = p[2] 157 | else: 158 | construct_node(p, "primary_expression") 159 | 160 | 161 | def p_postfix_expression(p): 162 | """ 163 | postfix_expression : primary_expression 164 | | postfix_expression LSQUAREBRACKET expression RSQUAREBRACKET 165 | | postfix_expression LBRACKET RBRACKET 166 | | postfix_expression LBRACKET argument_expression_list RBRACKET 167 | | postfix_expression PERIOD IDENTIFIER 168 | | postfix_expression PTR_OP IDENTIFIER 169 | | postfix_expression PERIOD ERRORID 170 | | postfix_expression PTR_OP ERRORID 171 | | postfix_expression INC_OP 172 | | postfix_expression DEC_OP 173 | """ 174 | handleErrorID(p, 3) 175 | if len(p) == 2: 176 | p[0] = p[1] 177 | else: 178 | construct_node(p, "postfix_expression") 179 | 180 | 181 | def p_argument_expression_list(p): 182 | """ 183 | argument_expression_list : assignment_expression 184 | | argument_expression_list COMMA assignment_expression 185 | """ 186 | if len(p) == 2: 187 | construct_node(p, "argument_expression_list") 188 | else: 189 | p[1].append(p[2]) 190 | p[1].append(p[3]) 191 | p[0] = p[1] 192 | 193 | 194 | def p_unary_expression(p): 195 | """ 196 | unary_expression : postfix_expression 197 | | INC_OP unary_expression 198 | | DEC_OP unary_expression 199 | | unary_operator cast_expression 200 | | SIZEOF unary_expression 201 | | SIZEOF LBRACKET type_name RBRACKET 202 | """ 203 | if len(p) == 2: 204 | p[0] = p[1] 205 | else: 206 | construct_node(p, "unary_expression") 207 | 208 | 209 | def p_unary_operator(p): 210 | """ 211 | unary_operator : AND 212 | | STAR 213 | | PLUS 214 | | MINUS 215 | | UNOT 216 | | NOT 217 | """ 218 | construct_node(p, "unary_operator") 219 | 220 | 221 | def p_cast_expression(p): 222 | """ 223 | cast_expression : unary_expression 224 | | LBRACKET type_name RBRACKET cast_expression 225 | """ 226 | if len(p) == 2: 227 | p[0] = p[1] 228 | else: 229 | construct_node(p, "cast_expression") 230 | # printAST(p[0], 0) 231 | 232 | 233 | def p_multiplicative_expression(p): 234 | """ 235 | multiplicative_expression : cast_expression 236 | | multiplicative_expression STAR cast_expression 237 | | multiplicative_expression DIVIDE cast_expression 238 | | multiplicative_expression MOD cast_expression 239 | | multiplicative_expression STAR error cast_expression 240 | | multiplicative_expression DIVIDE error cast_expression 241 | | multiplicative_expression MOD error cast_expression 242 | """ 243 | del_list = [] 244 | if len(p) == 5: 245 | print( 246 | "Error type: error token after %s. at line: %d.\n" % 247 | (p[2], p.lineno(2))) 248 | del_list.append(3) 249 | parser.errorCounter = 0 250 | if len(p) == 2: 251 | p[0] = p[1] 252 | else: 253 | construct_node(p, "multiplicative_expression", del_list) 254 | 255 | 256 | def p_additive_expression(p): 257 | """ 258 | additive_expression : multiplicative_expression 259 | | additive_expression PLUS multiplicative_expression 260 | | additive_expression MINUS multiplicative_expression 261 | | additive_expression PLUS error multiplicative_expression 262 | | additive_expression MINUS error multiplicative_expression 263 | """ 264 | del_list = [] 265 | if len(p) == 5: 266 | print( 267 | "Error type: error token after %s. at line: %d.\n" % 268 | (p[2], p.lineno(2))) 269 | del_list.append(3) 270 | parser.errorCounter = 0 271 | if len(p) == 2: 272 | p[0] = p[1] 273 | else: 274 | construct_node(p, "additive_expression", del_list) 275 | 276 | 277 | def p_shift_expression(p): 278 | """ 279 | shift_expression : additive_expression 280 | | shift_expression LEFT_OP additive_expression 281 | | shift_expression RIGHT_OP additive_expression 282 | | shift_expression LEFT_OP error additive_expression 283 | | shift_expression RIGHT_OP error additive_expression 284 | """ 285 | del_list = [] 286 | if len(p) == 5: 287 | print( 288 | "Error type: error token after %s. at line: %d.\n" % 289 | (p[2], p.lineno(2))) 290 | del_list.append(3) 291 | parser.errorCounter = 0 292 | if len(p) == 2: 293 | p[0] = p[1] 294 | else: 295 | construct_node(p, "shift_expression", del_list) 296 | 297 | 298 | def p_relational_expression(p): 299 | """ 300 | relational_expression : shift_expression 301 | | relational_expression LT shift_expression 302 | | relational_expression GT shift_expression 303 | | relational_expression LE_OP shift_expression 304 | | relational_expression GE_OP shift_expression 305 | | relational_expression LT error shift_expression 306 | | relational_expression GT error shift_expression 307 | | relational_expression LE_OP error shift_expression 308 | | relational_expression GE_OP error shift_expression 309 | """ 310 | del_list = [] 311 | if len(p) == 5: 312 | print( 313 | "Error type: error token after %s. at line: %d.\n" % 314 | (p[2], p.lineno(2))) 315 | del_list.append(3) 316 | parser.errorCounter = 0 317 | 318 | if len(p) == 2: 319 | p[0] = p[1] 320 | else: 321 | construct_node(p, "relational_expression", del_list) 322 | 323 | 324 | def p_equality_expression(p): 325 | """ 326 | equality_expression : relational_expression 327 | | equality_expression EQ_OP relational_expression 328 | | equality_expression NE_OP relational_expression 329 | | equality_expression EQ_OP error relational_expression 330 | | equality_expression NE_OP error relational_expression 331 | """ 332 | del_list = [] 333 | if len(p) == 5: 334 | print( 335 | "Error type: error token after %s. at line: %d.\n" % 336 | (p[2], p.lineno(2))) 337 | del_list.append(3) 338 | parser.errorCounter = 0 339 | if len(p) == 2: 340 | p[0] = p[1] 341 | else: 342 | construct_node(p, "equality_expression", del_list) 343 | 344 | 345 | def p_and_expression(p): 346 | """ 347 | and_expression : equality_expression 348 | | and_expression AND equality_expression 349 | | and_expression AND error equality_expression 350 | """ 351 | del_list = [] 352 | if len(p) == 5: 353 | print( 354 | "Error type: error token after %s. at line: %d.\n" % 355 | (p[2], p.lineno(2))) 356 | del_list.append(3) 357 | parser.errorCounter = 0 358 | 359 | if len(p) == 2: 360 | p[0] = p[1] 361 | else: 362 | construct_node(p, "and_expression", del_list) 363 | 364 | 365 | def p_exclusive_or_expression(p): 366 | """ 367 | exclusive_or_expression : and_expression 368 | | exclusive_or_expression XOR and_expression 369 | | exclusive_or_expression XOR error and_expression 370 | """ 371 | del_list = [] 372 | if len(p) == 5: 373 | print( 374 | "Error type: error token after %s. at line: %d.\n" % 375 | (p[2], p.lineno(2))) 376 | del_list.append(3) 377 | parser.errorCounter = 0 378 | 379 | if len(p) == 2: 380 | p[0] = p[1] 381 | else: 382 | construct_node(p, "exclusive_or_expression", del_list) 383 | 384 | 385 | def p_inclusive_or_expression(p): 386 | """ 387 | inclusive_or_expression : exclusive_or_expression 388 | | inclusive_or_expression OR exclusive_or_expression 389 | | inclusive_or_expression OR error exclusive_or_expression 390 | """ 391 | del_list = [] 392 | if len(p) == 5: 393 | print( 394 | "Error type: error token after %s. at line: %d.\n" % 395 | (p[2], p.lineno(2))) 396 | del_list.append(3) 397 | parser.errorCounter = 0 398 | 399 | if len(p) == 2: 400 | p[0] = p[1] 401 | else: 402 | construct_node(p, "inclusive_or_expression", del_list) 403 | 404 | 405 | def p_logical_and_expression(p): 406 | """ 407 | logical_and_expression : inclusive_or_expression 408 | | logical_and_expression AND_OP inclusive_or_expression 409 | | logical_and_expression AND_OP error inclusive_or_expression 410 | """ 411 | del_list = [] 412 | if len(p) == 5: 413 | print( 414 | "Error type: error token after %s. at line: %d.\n" % 415 | (p[2], p.lineno(2))) 416 | del_list.append(3) 417 | parser.errorCounter = 0 418 | 419 | if len(p) == 2: 420 | p[0] = p[1] 421 | else: 422 | construct_node(p, "logical_and_expression", del_list) 423 | 424 | 425 | def p_logical_or_expression(p): 426 | """ 427 | logical_or_expression : logical_and_expression 428 | | logical_or_expression OR_OP logical_and_expression 429 | | logical_or_expression OR_OP error logical_and_expression 430 | """ 431 | del_list = [] 432 | if len(p) == 5: 433 | print( 434 | "Error type: error token after %s. at line: %d.\n" % 435 | (p[2], p.lineno(2))) 436 | del_list.append(3) 437 | parser.errorCounter = 0 438 | 439 | if len(p) == 2: 440 | p[0] = p[1] 441 | else: 442 | construct_node(p, "logical_or_expression", del_list) 443 | 444 | 445 | def p_conditional_expression(p): 446 | """ 447 | conditional_expression : logical_or_expression 448 | | logical_or_expression QUESTIONMARK expression COLON conditional_expression 449 | """ 450 | if len(p) == 2: 451 | p[0] = p[1] 452 | else: 453 | construct_node(p, "conditional_expression") 454 | 455 | 456 | def p_assignment_expression(p): 457 | """ 458 | assignment_expression : conditional_expression 459 | | unary_expression assignment_operator assignment_expression 460 | """ 461 | if len(p) == 2: 462 | p[0] = p[1] 463 | else: 464 | construct_node(p, "assignment_expression") 465 | 466 | 467 | def p_assignment_operator(p): 468 | """ 469 | assignment_operator : ASSIGN 470 | | MUL_ASSIGN 471 | | DIV_ASSIGN 472 | | MOD_ASSIGN 473 | | ADD_ASSIGN 474 | | SUB_ASSIGN 475 | | LEFT_ASSIGN 476 | | RIGHT_ASSIGN 477 | | AND_ASSIGN 478 | | XOR_ASSIGN 479 | | OR_ASSIGN 480 | """ 481 | construct_node(p, "assignment_operator") 482 | 483 | 484 | def p_expression(p): 485 | """ 486 | expression : assignment_expression 487 | | expression COMMA assignment_expression 488 | """ 489 | if len(p) == 2: 490 | p[0] = p[1] 491 | else: 492 | construct_node(p, "expression") 493 | # if len(p) == 2: 494 | # construct_node(p, "expression") 495 | # elif len(p) == 4: 496 | # # printAST(p[1]) 497 | # p[1].append(p[3]) 498 | # p[0] = p[1] 499 | # else: 500 | # raise Exception("expression just has 2 or 4 children") 501 | 502 | 503 | def p_constant_expression(p): 504 | """ 505 | constant_expression : conditional_expression 506 | """ 507 | construct_node(p, "constant_expression") 508 | 509 | 510 | def p_init_declarator_list(p): 511 | """ 512 | init_declarator_list : init_declarator 513 | | init_declarator_list COMMA init_declarator 514 | """ 515 | if len(p) == 2: 516 | construct_node(p, "init_declarator_list") 517 | else: 518 | p[1].append(p[2]) 519 | p[1].append(p[3]) 520 | p[0] = p[1] 521 | 522 | 523 | def p_init_declarator(p): 524 | """ 525 | init_declarator : declarator 526 | | declarator ASSIGN initializer 527 | """ 528 | construct_node(p, "init_declarator") 529 | 530 | 531 | def p_storage_class_specifier(p): 532 | """ 533 | storage_class_specifier : TYPEDEF 534 | | EXTERN 535 | | STATIC 536 | """ 537 | construct_node(p, "storage_class_specifier") 538 | 539 | 540 | def p_integer_type(p): 541 | """ 542 | integer_type : CHAR 543 | | SHORT 544 | | INT 545 | | LONG 546 | | UNSIGNED integer_type 547 | | SIGNED integer_type 548 | | SHORT integer_type 549 | | LONG integer_type 550 | """ 551 | if len(p) == 2: 552 | construct_node(p, "integer_type") 553 | else: 554 | p[2].insert(1, p[1]) 555 | p[0] = p[2] 556 | # print(p[0]) 557 | 558 | 559 | def p_type_specifier(p): 560 | """type_specifier : VOID 561 | | integer_type 562 | | FLOAT 563 | | DOUBLE 564 | | struct_or_union_specifier 565 | | enum_specifier 566 | | TYPE_NAME 567 | """ 568 | # | TYPE_NAME 569 | construct_node(p, "type_specifier") 570 | 571 | 572 | def p_struct_or_union_specifier(p): 573 | """ 574 | struct_or_union_specifier : struct_or_union IDENTIFIER LCURLYBRACKET struct_declaration_list RCURLYBRACKET 575 | | struct_or_union TYPE_NAME LCURLYBRACKET struct_declaration_list RCURLYBRACKET 576 | | struct_or_union ERRORID LCURLYBRACKET struct_declaration_list RCURLYBRACKET 577 | | struct_or_union LCURLYBRACKET struct_declaration_list RCURLYBRACKET 578 | | struct_or_union IDENTIFIER 579 | | struct_or_union TYPE_NAME 580 | | struct_or_union ERRORID 581 | """ 582 | handleErrorID(p, 2) 583 | construct_node(p, "struct_or_union_specifier") 584 | 585 | 586 | def p_struct_or_union(p): 587 | """ 588 | struct_or_union : STRUCT 589 | | UNION 590 | """ 591 | construct_node(p, "struct_or_union") 592 | 593 | 594 | def p_struct_declaration_list(p): 595 | """struct_declaration_list : struct_declaration 596 | | struct_declaration_list struct_declaration 597 | """ 598 | if len(p) == 2: 599 | construct_node(p, "struct_declaration_list") 600 | elif len(p) == 3: 601 | p[1].append(p[2]) 602 | p[0] = p[1] 603 | 604 | 605 | def p_struct_declaration(p): 606 | """struct_declaration : specifier_qualifier_list struct_declarator_list SEMICOLON 607 | | specifier_qualifier_list struct_declarator_list error 608 | """ 609 | del_list = [] 610 | last_idx = len(p) - 1 611 | if p[last_idx] != ';': 612 | print("struct_declaration") 613 | del_list.append(last_idx) 614 | parser.errorCounter = 0 615 | construct_node(p, "struct_declaration", del_list) 616 | 617 | 618 | # print(p[0]) 619 | 620 | 621 | def p_specifier_qualifier_list(p): 622 | """ 623 | specifier_qualifier_list : type_specifier 624 | | type_specifier type_qualifier 625 | | type_qualifier type_specifier 626 | """ 627 | construct_node(p, "specifier_qualifier_list") 628 | 629 | 630 | def p_struct_declarator_list(p): 631 | """ 632 | struct_declarator_list : declarator 633 | | struct_declarator_list COMMA declarator 634 | """ 635 | if len(p) == 2: 636 | construct_node(p, "struct_declarator_list") 637 | else: 638 | p[1].append(p[2]) 639 | p[1].append(p[3]) 640 | p[0] = p[1] 641 | 642 | 643 | # def p_struct_declarator(p): 644 | # """ 645 | # struct_declarator : declarator 646 | # | COLON constant_expression 647 | # | declarator COLON constant_expression 648 | # """ 649 | # construct_node(p, "struct_declarator") 650 | 651 | 652 | def p_enum_specifier(p): 653 | """ 654 | enum_specifier : ENUM LCURLYBRACKET enumerator_list RCURLYBRACKET 655 | | ENUM IDENTIFIER LCURLYBRACKET enumerator_list RCURLYBRACKET 656 | | ENUM IDENTIFIER 657 | | ENUM ERRORID LCURLYBRACKET enumerator_list RCURLYBRACKET 658 | | ENUM ERRORID 659 | """ 660 | handleErrorID(p, 2) 661 | construct_node(p, "enum_specifier") 662 | 663 | 664 | def p_enumerator_list(p): 665 | """ 666 | enumerator_list : enumerator 667 | | enumerator_list COMMA enumerator 668 | """ 669 | if len(p) == 2: 670 | construct_node(p, "enumerator_list") 671 | else: 672 | p[1].append(p[2]) 673 | p[1].append(p[3]) 674 | p[0] = p[1] 675 | 676 | 677 | def p_enumerator(p): 678 | """ 679 | enumerator : IDENTIFIER 680 | | IDENTIFIER ASSIGN constant_expression 681 | | ERRORID 682 | | ERRORID ASSIGN constant_expression 683 | """ 684 | handleErrorID(p, 1) 685 | construct_node(p, "enumerator") 686 | 687 | 688 | def p_type_qualifier(p): 689 | """ 690 | type_qualifier : CONST 691 | """ 692 | construct_node(p, "type_qualifier") 693 | 694 | 695 | def p_declarator(p): 696 | """ 697 | declarator : pointer direct_declarator 698 | | direct_declarator 699 | """ 700 | construct_node(p, "declarator") 701 | 702 | 703 | def p_direct_declarator(p): 704 | """ 705 | direct_declarator : direct_declarator LSQUAREBRACKET constant_expression RSQUAREBRACKET 706 | | direct_declarator LSQUAREBRACKET RSQUAREBRACKET 707 | | direct_declarator LBRACKET parameter_type_list RBRACKET 708 | | direct_declarator LBRACKET RBRACKET 709 | | IDENTIFIER 710 | | LBRACKET declarator RBRACKET 711 | | ERRORID 712 | """ 713 | handleErrorID(p, 1) 714 | construct_node(p, "direct_declarator") 715 | 716 | 717 | def p_pointer(p): 718 | """ 719 | pointer : STAR 720 | | STAR CONST 721 | | pointer STAR 722 | | pointer STAR CONST 723 | """ 724 | if p[1][0] != 'pointer': 725 | construct_node(p, "pointer") 726 | else: 727 | p[1].append(p[2]) 728 | if len(p) == 4: 729 | p[1].append(p[3]) 730 | p[0] = p[1] 731 | 732 | 733 | def p_type_qualifier_list(p): 734 | """ 735 | type_qualifier_list : type_qualifier 736 | | type_qualifier_list type_qualifier 737 | """ 738 | construct_node(p, "type_qualifier_list") 739 | 740 | 741 | def p_parameter_type_list(p): 742 | """ 743 | parameter_type_list : parameter_list 744 | | parameter_list COMMA ELLIPSIS 745 | """ 746 | construct_node(p, "parameter_type_list") 747 | 748 | 749 | def p_parameter_list(p): 750 | """ 751 | parameter_list : parameter_declaration 752 | | parameter_list COMMA parameter_declaration 753 | """ 754 | if len(p) == 2: 755 | construct_node(p, "parameter_list") 756 | else: 757 | p[1].append(p[2]) 758 | p[1].append(p[3]) 759 | p[0] = p[1] 760 | 761 | 762 | def p_parameter_declaration(p): 763 | """ 764 | parameter_declaration : declaration_specifiers declarator 765 | | declaration_specifiers abstract_declarator 766 | | declaration_specifiers 767 | """ 768 | construct_node(p, "parameter_declaration") 769 | 770 | 771 | def p_type_name(p): 772 | """ 773 | type_name : specifier_qualifier_list 774 | | specifier_qualifier_list abstract_declarator 775 | """ 776 | construct_node(p, "type_name") 777 | 778 | 779 | def p_abstract_declarator(p): 780 | """ 781 | abstract_declarator : pointer 782 | | direct_abstract_declarator 783 | | pointer direct_abstract_declarator 784 | """ 785 | construct_node(p, "abstract_declarator") 786 | 787 | 788 | def p_direct_abstract_declarator(p): 789 | """ 790 | direct_abstract_declarator : LBRACKET abstract_declarator RBRACKET 791 | | LSQUAREBRACKET RSQUAREBRACKET 792 | | LSQUAREBRACKET constant_expression RSQUAREBRACKET 793 | | direct_abstract_declarator LSQUAREBRACKET RSQUAREBRACKET 794 | | direct_abstract_declarator LSQUAREBRACKET constant_expression RSQUAREBRACKET 795 | | LBRACKET RBRACKET 796 | | LBRACKET parameter_type_list RBRACKET 797 | | direct_abstract_declarator LBRACKET RBRACKET 798 | | direct_abstract_declarator LBRACKET parameter_type_list RBRACKET 799 | """ 800 | construct_node(p, "direct_abstract_declarator") 801 | 802 | 803 | def p_initializer(p): 804 | """ 805 | initializer : assignment_expression 806 | | LCURLYBRACKET initializer_list RCURLYBRACKET 807 | | LCURLYBRACKET initializer_list COMMA RCURLYBRACKET 808 | """ 809 | construct_node(p, "initializer") 810 | 811 | 812 | def p_initiazer_list(p): 813 | """ 814 | initializer_list : initializer 815 | | initializer_list COMMA initializer 816 | """ 817 | if len(p) == 2: 818 | construct_node(p, "initializer_list") 819 | else: 820 | p[1].append(p[2]) 821 | p[1].append(p[3]) 822 | p[0] = p[1] 823 | 824 | 825 | def p_statement(p): 826 | """ 827 | statement : labeled_statement 828 | | compound_statement 829 | | expression_statement 830 | | selection_statement 831 | | iteration_statement 832 | | jump_statement 833 | """ 834 | construct_node(p, "statement") 835 | 836 | 837 | def p_labeled_statement(p): 838 | """ 839 | labeled_statement : CASE constant_expression COLON statement 840 | | DEFAULT COLON statement 841 | """ 842 | # | IDENTIFIER COLON statement 843 | # | ERRORID COLON statement 844 | # handleErrorID(p, 1) 845 | construct_node(p, "labeled_statement") 846 | 847 | 848 | def p_compound_statement(p): 849 | """ 850 | compound_statement : LCURLYBRACKET RCURLYBRACKET 851 | | LCURLYBRACKET statement_list RCURLYBRACKET 852 | | LCURLYBRACKET declaration_list RCURLYBRACKET 853 | | LCURLYBRACKET declaration_list statement_list RCURLYBRACKET 854 | | LCURLYBRACKET error 855 | | LCURLYBRACKET statement_list error 856 | | LCURLYBRACKET declaration_list error 857 | | LCURLYBRACKET declaration_list statement_list error 858 | """ 859 | handleMissingRCURLYBRACKET(p) 860 | construct_node(p, "compound_statement") 861 | 862 | 863 | def p_declaration_list(p): 864 | """ 865 | declaration_list : declaration 866 | | declaration_list declaration 867 | """ 868 | if len(p) == 2: 869 | construct_node(p, "declaration_list") 870 | elif len(p) == 3: 871 | p[1].append(p[2]) 872 | p[0] = p[1] 873 | 874 | 875 | def p_statement_list(p): 876 | """ 877 | statement_list : statement 878 | | statement_list statement 879 | """ 880 | if len(p) == 2: 881 | construct_node(p, "statement_list") 882 | if len(p) == 3: 883 | p[1].append(p[2]) 884 | p[0] = p[1] 885 | 886 | 887 | def p_expression_statement(p): 888 | """ 889 | expression_statement : SEMICOLON 890 | | expression SEMICOLON 891 | | expression error 892 | """ 893 | 894 | # del_list = [] 895 | # last_idx = len(p) - 1 896 | # if p[last_idx] != ';': 897 | # print("expression_statement") 898 | # print("Error type: Missing semicolon before %s. at line: %d, lex pos: %d.\n" % (p[last_idx], p.lineno(last_idx), p.lexpos(last_idx))) 899 | # del_list.append(last_idx) 900 | # parser.errorCounter = 0 901 | del_list = handleMissingSEMI(p, "expression_statement") 902 | construct_node(p, "expression_statement", del_list) 903 | 904 | 905 | def p_selection_statement(p): 906 | """ 907 | selection_statement : IF LBRACKET expression RBRACKET statement 908 | | IF LBRACKET expression RBRACKET statement ELSE statement 909 | | SWITCH LBRACKET expression RBRACKET statement 910 | """ 911 | construct_node(p, "selection_statement") 912 | 913 | 914 | def p_iteration_statement(p): 915 | """ 916 | iteration_statement : WHILE LBRACKET expression RBRACKET statement 917 | | DO statement WHILE LBRACKET expression RBRACKET SEMICOLON 918 | | DO statement WHILE LBRACKET expression RBRACKET error 919 | | FOR LBRACKET expression_statement expression_statement RBRACKET statement 920 | | FOR LBRACKET expression_statement expression_statement expression RBRACKET statement 921 | """ 922 | # del_list = [] 923 | # last_idx = len(p) - 1 924 | # if p[1] == 'do' and p[last_idx] != ';': 925 | # print("iteration statement") 926 | # print("Error type: Missing semicolon before %s. at line: %d, lex pos: %d.\n" % (p[last_idx], p.lineno(last_idx), p.lexpos(last_idx))) 927 | # del_list.append(last_idx) 928 | # parser.errorCounter = 0 929 | del_list = handleMissingSEMI(p, "iteration_statement", (1, 'do')) 930 | construct_node(p, "iteration_statement", del_list) 931 | 932 | 933 | # print(p[0]) 934 | 935 | def p_jump_statement(p): 936 | """ 937 | jump_statement : CONTINUE SEMICOLON 938 | | BREAK SEMICOLON 939 | | RETURN SEMICOLON 940 | | RETURN expression SEMICOLON 941 | | CONTINUE error 942 | | BREAK error 943 | | RETURN error 944 | | RETURN expression error 945 | """ 946 | # del_list = [] 947 | # last_idx = len(p) - 1 948 | # if p[last_idx] != ';': 949 | # print("jump statement") 950 | # print("Error type: Missing semicolon before %s. at line: %d, lex pos: %d.\n" % (p[last_idx], p.lineno(last_idx), p.lexpos(last_idx))) 951 | # del_list.append(last_idx) 952 | # parser.errorCounter = 0 953 | del_list = handleMissingSEMI(p, "jump_statement") 954 | construct_node(p, "jump_statement", del_list) 955 | 956 | 957 | # print(p[0]) 958 | 959 | def p_function_definition(p): 960 | """ 961 | function_definition : declaration_specifiers declarator compound_statement 962 | """ 963 | construct_node(p, "function_definition") 964 | 965 | 966 | def p_error(p): 967 | if not p: 968 | print("End of file.") 969 | return 970 | 971 | if p.type == 'EOF': 972 | if ZCClex.lexer.lexer.curlyBalance > 0: 973 | parser.errok() 974 | return lex.LexToken( 975 | 'RCURCLYBRACKET', 976 | '}', 977 | p.lexer.lineno, 978 | p.lexer.lexpos) 979 | else: 980 | return 981 | 982 | print("Syntax error at %r, at line: %d, column: %d." % ( 983 | p.value, p.lexer.lineno, ZCClex.find_column(p.lexer.lexdata, p))) 984 | if p.type == 'IDENTIFIER': 985 | print("Undefined Type " + p.value[1]) 986 | 987 | if parser.errorCounter > 0: 988 | print("In panic mode\n") 989 | while True: 990 | tok = parser.token() 991 | if not tok or tok.type == 'SEMICOLON' or tok.type == 'RCURLYBRACKET': 992 | break 993 | parser.restart() 994 | else: 995 | parser.errorCounter += 1 996 | return p 997 | 998 | 999 | def printAST(p, n=0): 1000 | if p is not None: 1001 | # if type(p) is list: 1002 | if len(p) > 0 and not isinstance(p, str): 1003 | print('line:%02d' % p.lineno, end='') 1004 | print(' |' * n, end='-') 1005 | print(p[0]) 1006 | for node in p[1:]: 1007 | printAST(node, n + 1) 1008 | else: 1009 | print('line:xx', end='') 1010 | print(' |' * n, end='-') 1011 | print(p) 1012 | 1013 | 1014 | parser = yacc.yacc(start='outer_translation_unit', debug=True) 1015 | parser.errorCounter = 0 1016 | 1017 | if __name__ == "__main__": 1018 | # pprint(parser.__dict__) 1019 | # while True: 1020 | # try: 1021 | # c_file_name = raw_input('c file name: ') 1022 | c_file_name = "test1.c" 1023 | c_file = open(c_file_name, "r") 1024 | 1025 | contents = "".join(c_file.readlines()) 1026 | # except EOFError: 1027 | # break 1028 | # if not contents: continue 1029 | # result = parser.parse(contents, lexer = ZCClex.orig_lexer) 1030 | result = parser.parse(contents, lexer=ZCClex.lexer) 1031 | printAST(result) 1032 | -------------------------------------------------------------------------------- /yyparse/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | -------------------------------------------------------------------------------- /yyparse/missRightCurly.c: -------------------------------------------------------------------------------- 1 | // 2 | //int a, b, c; 3 | //int main(int argc, char *argv[]) { 4 | // c = a + b; 5 | // 6 | // 7 | //int b, c; 8 | 9 | int f(){ 10 | 11 | int d; -------------------------------------------------------------------------------- /yyparse/missSEMI.c: -------------------------------------------------------------------------------- 1 | 2 | int b 3 | 4 | int main(int argc, char *argv[]) { 5 | int $a; 6 | c = a + b; 7 | d = a +/ b; 8 | d = a -/ b; 9 | d = a ^^ / b; 10 | d = a *|b; 11 | d = a >/ b; 12 | d = a =!?()[]{}.,;:\\\'\"" 22 | 23 | # Whitespace 24 | def t_CPP_WS(t): 25 | r'\s+' 26 | t.lexer.lineno += t.value.count("\n") 27 | return t 28 | 29 | t_CPP_POUND = r'\#' 30 | t_CPP_DPOUND = r'\#\#' 31 | 32 | # Identifier 33 | t_CPP_ID = r'[A-Za-z_][\w_]*' 34 | 35 | # Integer literal 36 | def CPP_INTEGER(t): 37 | r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)' 38 | return t 39 | 40 | t_CPP_INTEGER = CPP_INTEGER 41 | 42 | # Floating literal 43 | t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' 44 | 45 | # String literal 46 | def t_CPP_STRING(t): 47 | r'\"([^\\\n]|(\\(.|\n)))*?\"' 48 | t.lexer.lineno += t.value.count("\n") 49 | return t 50 | 51 | # Character constant 'c' or L'c' 52 | def t_CPP_CHAR(t): 53 | r'(L)?\'([^\\\n]|(\\(.|\n)))*?\'' 54 | t.lexer.lineno += t.value.count("\n") 55 | return t 56 | 57 | # Comment 58 | def t_CPP_COMMENT1(t): 59 | r'(/\*(.|\n)*?\*/)' 60 | ncr = t.value.count("\n") 61 | t.lexer.lineno += ncr 62 | # replace with one space or a number of '\n' 63 | t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' ' 64 | return t 65 | 66 | # Line comment 67 | def t_CPP_COMMENT2(t): 68 | r'(//.*?(\n|$))' 69 | # replace with '/n' 70 | t.type = 'CPP_WS'; t.value = '\n' 71 | 72 | def t_error(t): 73 | t.type = t.value[0] 74 | t.value = t.value[0] 75 | t.lexer.skip(1) 76 | return t 77 | 78 | import re 79 | import copy 80 | import time 81 | import os.path 82 | 83 | # ----------------------------------------------------------------------------- 84 | # trigraph() 85 | # 86 | # Given an input string, this function replaces all trigraph sequences. 87 | # The following mapping is used: 88 | # 89 | # ??= # 90 | # ??/ \ 91 | # ??' ^ 92 | # ??( [ 93 | # ??) ] 94 | # ??! | 95 | # ??< { 96 | # ??> } 97 | # ??- ~ 98 | # ----------------------------------------------------------------------------- 99 | 100 | _trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''') 101 | _trigraph_rep = { 102 | '=':'#', 103 | '/':'\\', 104 | "'":'^', 105 | '(':'[', 106 | ')':']', 107 | '!':'|', 108 | '<':'{', 109 | '>':'}', 110 | '-':'~' 111 | } 112 | 113 | def trigraph(input): 114 | return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input) 115 | 116 | # ------------------------------------------------------------------ 117 | # Macro object 118 | # 119 | # This object holds information about preprocessor macros 120 | # 121 | # .name - Macro name (string) 122 | # .value - Macro value (a list of tokens) 123 | # .arglist - List of argument names 124 | # .variadic - Boolean indicating whether or not variadic macro 125 | # .vararg - Name of the variadic parameter 126 | # 127 | # When a macro is created, the macro replacement token sequence is 128 | # pre-scanned and used to create patch lists that are later used 129 | # during macro expansion 130 | # ------------------------------------------------------------------ 131 | 132 | class Macro(object): 133 | def __init__(self,name,value,arglist=None,variadic=False): 134 | self.name = name 135 | self.value = value 136 | self.arglist = arglist 137 | self.variadic = variadic 138 | if variadic: 139 | self.vararg = arglist[-1] 140 | self.source = None 141 | 142 | # ------------------------------------------------------------------ 143 | # Preprocessor object 144 | # 145 | # Object representing a preprocessor. Contains macro definitions, 146 | # include directories, and other information 147 | # ------------------------------------------------------------------ 148 | 149 | class Preprocessor(object): 150 | def __init__(self,lexer=None): 151 | if lexer is None: 152 | lexer = lex.lexer 153 | self.lexer = lexer 154 | self.macros = { } 155 | self.path = [] 156 | self.temp_path = [] 157 | 158 | # Probe the lexer for selected tokens 159 | self.lexprobe() 160 | 161 | tm = time.localtime() 162 | self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm)) 163 | self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm)) 164 | self.parser = None 165 | 166 | # ----------------------------------------------------------------------------- 167 | # tokenize() 168 | # 169 | # Utility function. Given a string of text, tokenize into a list of tokens 170 | # ----------------------------------------------------------------------------- 171 | 172 | def tokenize(self,text): 173 | tokens = [] 174 | self.lexer.input(text) 175 | while True: 176 | tok = self.lexer.token() 177 | if not tok: break 178 | tokens.append(tok) 179 | return tokens 180 | 181 | # --------------------------------------------------------------------- 182 | # error() 183 | # 184 | # Report a preprocessor error/warning of some kind 185 | # ---------------------------------------------------------------------- 186 | 187 | def error(self,file,line,msg): 188 | print("%s:%d %s" % (file,line,msg)) 189 | 190 | # ---------------------------------------------------------------------- 191 | # lexprobe() 192 | # 193 | # This method probes the preprocessor lexer object to discover 194 | # the token types of symbols that are important to the preprocessor. 195 | # If this works right, the preprocessor will simply "work" 196 | # with any suitable lexer regardless of how tokens have been named. 197 | # ---------------------------------------------------------------------- 198 | 199 | def lexprobe(self): 200 | 201 | # Determine the token type for identifiers 202 | self.lexer.input("identifier") 203 | tok = self.lexer.token() 204 | if not tok or tok.value != "identifier": 205 | print("Couldn't determine identifier type") 206 | else: 207 | self.t_ID = tok.type 208 | 209 | # Determine the token type for integers 210 | self.lexer.input("12345") 211 | tok = self.lexer.token() 212 | if not tok or int(tok.value) != 12345: 213 | print("Couldn't determine integer type") 214 | else: 215 | self.t_INTEGER = tok.type 216 | self.t_INTEGER_TYPE = type(tok.value) 217 | 218 | # Determine the token type for strings enclosed in double quotes 219 | self.lexer.input("\"filename\"") 220 | tok = self.lexer.token() 221 | if not tok or tok.value != "\"filename\"": 222 | print("Couldn't determine string type") 223 | else: 224 | self.t_STRING = tok.type 225 | 226 | # Determine the token type for whitespace--if any 227 | self.lexer.input(" ") 228 | tok = self.lexer.token() 229 | if not tok or tok.value != " ": 230 | self.t_SPACE = None 231 | else: 232 | self.t_SPACE = tok.type 233 | 234 | # Determine the token type for newlines 235 | self.lexer.input("\n") 236 | tok = self.lexer.token() 237 | if not tok or tok.value != "\n": 238 | self.t_NEWLINE = None 239 | print("Couldn't determine token for newlines") 240 | else: 241 | self.t_NEWLINE = tok.type 242 | 243 | self.t_WS = (self.t_SPACE, self.t_NEWLINE) 244 | 245 | # Check for other characters used by the preprocessor 246 | chars = [ '<','>','#','##','\\','(',')',',','.'] 247 | for c in chars: 248 | self.lexer.input(c) 249 | tok = self.lexer.token() 250 | if not tok or tok.value != c: 251 | print("Unable to lex '%s' required for preprocessor" % c) 252 | 253 | # ---------------------------------------------------------------------- 254 | # add_path() 255 | # 256 | # Adds a search path to the preprocessor. 257 | # ---------------------------------------------------------------------- 258 | 259 | def add_path(self,path): 260 | self.path.append(path) 261 | 262 | # ---------------------------------------------------------------------- 263 | # group_lines() 264 | # 265 | # Given an input string, this function splits it into lines. Trailing whitespace 266 | # is removed. Any line ending with \ is grouped with the next line. This 267 | # function forms the lowest level of the preprocessor---grouping into text into 268 | # a line-by-line format. 269 | # ---------------------------------------------------------------------- 270 | 271 | def group_lines(self,input): 272 | lex = self.lexer.clone() 273 | lines = [x.rstrip() for x in input.splitlines()] 274 | for i in xrange(len(lines)): 275 | j = i+1 276 | while lines[i].endswith('\\') and (j < len(lines)): 277 | lines[i] = lines[i][:-1]+lines[j] 278 | lines[j] = "" 279 | j += 1 280 | 281 | input = "\n".join(lines) 282 | lex.input(input) 283 | lex.lineno = 1 284 | 285 | current_line = [] 286 | while True: 287 | tok = lex.token() 288 | if not tok: 289 | break 290 | current_line.append(tok) 291 | if tok.type in self.t_WS and '\n' in tok.value: 292 | yield current_line 293 | current_line = [] 294 | 295 | if current_line: 296 | yield current_line 297 | 298 | # ---------------------------------------------------------------------- 299 | # tokenstrip() 300 | # 301 | # Remove leading/trailing whitespace tokens from a token list 302 | # ---------------------------------------------------------------------- 303 | 304 | def tokenstrip(self,tokens): 305 | i = 0 306 | while i < len(tokens) and tokens[i].type in self.t_WS: 307 | i += 1 308 | del tokens[:i] 309 | i = len(tokens)-1 310 | while i >= 0 and tokens[i].type in self.t_WS: 311 | i -= 1 312 | del tokens[i+1:] 313 | return tokens 314 | 315 | 316 | # ---------------------------------------------------------------------- 317 | # collect_args() 318 | # 319 | # Collects comma separated arguments from a list of tokens. The arguments 320 | # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions) 321 | # where tokencount is the number of tokens consumed, args is a list of arguments, 322 | # and positions is a list of integers containing the starting index of each 323 | # argument. Each argument is represented by a list of tokens. 324 | # 325 | # When collecting arguments, leading and trailing whitespace is removed 326 | # from each argument. 327 | # 328 | # This function properly handles nested parenthesis and commas---these do not 329 | # define new arguments. 330 | # ---------------------------------------------------------------------- 331 | 332 | def collect_args(self,tokenlist): 333 | args = [] 334 | positions = [] 335 | current_arg = [] 336 | nesting = 1 337 | tokenlen = len(tokenlist) 338 | 339 | # Search for the opening '('. 340 | i = 0 341 | while (i < tokenlen) and (tokenlist[i].type in self.t_WS): 342 | i += 1 343 | 344 | if (i < tokenlen) and (tokenlist[i].value == '('): 345 | positions.append(i+1) 346 | else: 347 | self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments") 348 | return 0, [], [] 349 | 350 | i += 1 351 | 352 | while i < tokenlen: 353 | t = tokenlist[i] 354 | if t.value == '(': 355 | current_arg.append(t) 356 | nesting += 1 357 | elif t.value == ')': 358 | nesting -= 1 359 | if nesting == 0: 360 | if current_arg: 361 | args.append(self.tokenstrip(current_arg)) 362 | positions.append(i) 363 | return i+1,args,positions 364 | current_arg.append(t) 365 | elif t.value == ',' and nesting == 1: 366 | args.append(self.tokenstrip(current_arg)) 367 | positions.append(i+1) 368 | current_arg = [] 369 | else: 370 | current_arg.append(t) 371 | i += 1 372 | 373 | # Missing end argument 374 | self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments") 375 | return 0, [],[] 376 | 377 | # ---------------------------------------------------------------------- 378 | # macro_prescan() 379 | # 380 | # Examine the macro value (token sequence) and identify patch points 381 | # This is used to speed up macro expansion later on---we'll know 382 | # right away where to apply patches to the value to form the expansion 383 | # ---------------------------------------------------------------------- 384 | 385 | def macro_prescan(self,macro): 386 | macro.patch = [] # Standard macro arguments 387 | macro.str_patch = [] # String conversion expansion 388 | macro.var_comma_patch = [] # Variadic macro comma patch 389 | i = 0 390 | while i < len(macro.value): 391 | if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist: 392 | argnum = macro.arglist.index(macro.value[i].value) 393 | # Conversion of argument to a string 394 | if i > 0 and macro.value[i-1].value == '#': 395 | macro.value[i] = copy.copy(macro.value[i]) 396 | macro.value[i].type = self.t_STRING 397 | del macro.value[i-1] 398 | macro.str_patch.append((argnum,i-1)) 399 | continue 400 | # Concatenation 401 | elif (i > 0 and macro.value[i-1].value == '##'): 402 | macro.patch.append(('c',argnum,i-1)) 403 | del macro.value[i-1] 404 | continue 405 | elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'): 406 | macro.patch.append(('c',argnum,i)) 407 | i += 1 408 | continue 409 | # Standard expansion 410 | else: 411 | macro.patch.append(('e',argnum,i)) 412 | elif macro.value[i].value == '##': 413 | if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \ 414 | ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \ 415 | (macro.value[i+1].value == macro.vararg): 416 | macro.var_comma_patch.append(i-1) 417 | i += 1 418 | macro.patch.sort(key=lambda x: x[2],reverse=True) 419 | 420 | # ---------------------------------------------------------------------- 421 | # macro_expand_args() 422 | # 423 | # Given a Macro and list of arguments (each a token list), this method 424 | # returns an expanded version of a macro. The return value is a token sequence 425 | # representing the replacement macro tokens 426 | # ---------------------------------------------------------------------- 427 | 428 | def macro_expand_args(self,macro,args): 429 | # Make a copy of the macro token sequence 430 | rep = [copy.copy(_x) for _x in macro.value] 431 | 432 | # Make string expansion patches. These do not alter the length of the replacement sequence 433 | 434 | str_expansion = {} 435 | for argnum, i in macro.str_patch: 436 | if argnum not in str_expansion: 437 | str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\") 438 | rep[i] = copy.copy(rep[i]) 439 | rep[i].value = str_expansion[argnum] 440 | 441 | # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid 442 | comma_patch = False 443 | if macro.variadic and not args[-1]: 444 | for i in macro.var_comma_patch: 445 | rep[i] = None 446 | comma_patch = True 447 | 448 | # Make all other patches. The order of these matters. It is assumed that the patch list 449 | # has been sorted in reverse order of patch location since replacements will cause the 450 | # size of the replacement sequence to expand from the patch point. 451 | 452 | expanded = { } 453 | for ptype, argnum, i in macro.patch: 454 | # Concatenation. Argument is left unexpanded 455 | if ptype == 'c': 456 | rep[i:i+1] = args[argnum] 457 | # Normal expansion. Argument is macro expanded first 458 | elif ptype == 'e': 459 | if argnum not in expanded: 460 | expanded[argnum] = self.expand_macros(args[argnum]) 461 | rep[i:i+1] = expanded[argnum] 462 | 463 | # Get rid of removed comma if necessary 464 | if comma_patch: 465 | rep = [_i for _i in rep if _i] 466 | 467 | return rep 468 | 469 | 470 | # ---------------------------------------------------------------------- 471 | # expand_macros() 472 | # 473 | # Given a list of tokens, this function performs macro expansion. 474 | # The expanded argument is a dictionary that contains macros already 475 | # expanded. This is used to prevent infinite recursion. 476 | # ---------------------------------------------------------------------- 477 | 478 | def expand_macros(self,tokens,expanded=None): 479 | if expanded is None: 480 | expanded = {} 481 | i = 0 482 | while i < len(tokens): 483 | t = tokens[i] 484 | if t.type == self.t_ID: 485 | if t.value in self.macros and t.value not in expanded: 486 | # Yes, we found a macro match 487 | expanded[t.value] = True 488 | 489 | m = self.macros[t.value] 490 | if not m.arglist: 491 | # A simple macro 492 | ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded) 493 | for e in ex: 494 | e.lineno = t.lineno 495 | tokens[i:i+1] = ex 496 | i += len(ex) 497 | else: 498 | # A macro with arguments 499 | j = i + 1 500 | while j < len(tokens) and tokens[j].type in self.t_WS: 501 | j += 1 502 | if tokens[j].value == '(': 503 | tokcount,args,positions = self.collect_args(tokens[j:]) 504 | if not m.variadic and len(args) != len(m.arglist): 505 | self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist))) 506 | i = j + tokcount 507 | elif m.variadic and len(args) < len(m.arglist)-1: 508 | if len(m.arglist) > 2: 509 | self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1)) 510 | else: 511 | self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1)) 512 | i = j + tokcount 513 | else: 514 | if m.variadic: 515 | if len(args) == len(m.arglist)-1: 516 | args.append([]) 517 | else: 518 | args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1] 519 | del args[len(m.arglist):] 520 | 521 | # Get macro replacement text 522 | rep = self.macro_expand_args(m,args) 523 | rep = self.expand_macros(rep,expanded) 524 | for r in rep: 525 | r.lineno = t.lineno 526 | tokens[i:j+tokcount] = rep 527 | i += len(rep) 528 | del expanded[t.value] 529 | continue 530 | elif t.value == '__LINE__': 531 | t.type = self.t_INTEGER 532 | t.value = self.t_INTEGER_TYPE(t.lineno) 533 | 534 | i += 1 535 | return tokens 536 | 537 | # ---------------------------------------------------------------------- 538 | # evalexpr() 539 | # 540 | # Evaluate an expression token sequence for the purposes of evaluating 541 | # integral expressions. 542 | # ---------------------------------------------------------------------- 543 | 544 | def evalexpr(self,tokens): 545 | # tokens = tokenize(line) 546 | # Search for defined macros 547 | i = 0 548 | while i < len(tokens): 549 | if tokens[i].type == self.t_ID and tokens[i].value == 'defined': 550 | j = i + 1 551 | needparen = False 552 | result = "0L" 553 | while j < len(tokens): 554 | if tokens[j].type in self.t_WS: 555 | j += 1 556 | continue 557 | elif tokens[j].type == self.t_ID: 558 | if tokens[j].value in self.macros: 559 | result = "1L" 560 | else: 561 | result = "0L" 562 | if not needparen: break 563 | elif tokens[j].value == '(': 564 | needparen = True 565 | elif tokens[j].value == ')': 566 | break 567 | else: 568 | self.error(self.source,tokens[i].lineno,"Malformed defined()") 569 | j += 1 570 | tokens[i].type = self.t_INTEGER 571 | tokens[i].value = self.t_INTEGER_TYPE(result) 572 | del tokens[i+1:j+1] 573 | i += 1 574 | tokens = self.expand_macros(tokens) 575 | for i,t in enumerate(tokens): 576 | if t.type == self.t_ID: 577 | tokens[i] = copy.copy(t) 578 | tokens[i].type = self.t_INTEGER 579 | tokens[i].value = self.t_INTEGER_TYPE("0L") 580 | elif t.type == self.t_INTEGER: 581 | tokens[i] = copy.copy(t) 582 | # Strip off any trailing suffixes 583 | tokens[i].value = str(tokens[i].value) 584 | while tokens[i].value[-1] not in "0123456789abcdefABCDEF": 585 | tokens[i].value = tokens[i].value[:-1] 586 | 587 | expr = "".join([str(x.value) for x in tokens]) 588 | expr = expr.replace("&&"," and ") 589 | expr = expr.replace("||"," or ") 590 | expr = expr.replace("!"," not ") 591 | try: 592 | result = eval(expr) 593 | except StandardError: 594 | self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression") 595 | result = 0 596 | return result 597 | 598 | # ---------------------------------------------------------------------- 599 | # parsegen() 600 | # 601 | # Parse an input string/ 602 | # ---------------------------------------------------------------------- 603 | def parsegen(self,input,source=None): 604 | 605 | # Replace trigraph sequences 606 | t = trigraph(input) 607 | lines = self.group_lines(t) 608 | 609 | if not source: 610 | source = "" 611 | 612 | self.define("__FILE__ \"%s\"" % source) 613 | 614 | self.source = source 615 | chunk = [] 616 | enable = True 617 | iftrigger = False 618 | ifstack = [] 619 | 620 | for x in lines: 621 | for i,tok in enumerate(x): 622 | if tok.type not in self.t_WS: break 623 | if tok.value == '#': 624 | # Preprocessor directive 625 | 626 | # insert necessary whitespace instead of eaten tokens 627 | for tok in x: 628 | if tok.type in self.t_WS and '\n' in tok.value: 629 | chunk.append(tok) 630 | 631 | dirtokens = self.tokenstrip(x[i+1:]) 632 | if dirtokens: 633 | name = dirtokens[0].value 634 | args = self.tokenstrip(dirtokens[1:]) 635 | else: 636 | name = "" 637 | args = [] 638 | 639 | if name == 'define': 640 | if enable: 641 | for tok in self.expand_macros(chunk): 642 | yield tok 643 | chunk = [] 644 | self.define(args) 645 | elif name == 'include': 646 | if enable: 647 | for tok in self.expand_macros(chunk): 648 | yield tok 649 | chunk = [] 650 | oldfile = self.macros['__FILE__'] 651 | for tok in self.include(args): 652 | yield tok 653 | self.macros['__FILE__'] = oldfile 654 | self.source = source 655 | elif name == 'undef': 656 | if enable: 657 | for tok in self.expand_macros(chunk): 658 | yield tok 659 | chunk = [] 660 | self.undef(args) 661 | elif name == 'ifdef': 662 | ifstack.append((enable,iftrigger)) 663 | if enable: 664 | if not args[0].value in self.macros: 665 | enable = False 666 | iftrigger = False 667 | else: 668 | iftrigger = True 669 | elif name == 'ifndef': 670 | ifstack.append((enable,iftrigger)) 671 | if enable: 672 | if args[0].value in self.macros: 673 | enable = False 674 | iftrigger = False 675 | else: 676 | iftrigger = True 677 | elif name == 'if': 678 | ifstack.append((enable,iftrigger)) 679 | if enable: 680 | result = self.evalexpr(args) 681 | if not result: 682 | enable = False 683 | iftrigger = False 684 | else: 685 | iftrigger = True 686 | elif name == 'elif': 687 | if ifstack: 688 | if ifstack[-1][0]: # We only pay attention if outer "if" allows this 689 | if enable: # If already true, we flip enable False 690 | enable = False 691 | elif not iftrigger: # If False, but not triggered yet, we'll check expression 692 | result = self.evalexpr(args) 693 | if result: 694 | enable = True 695 | iftrigger = True 696 | else: 697 | self.error(self.source,dirtokens[0].lineno,"Misplaced #elif") 698 | 699 | elif name == 'else': 700 | if ifstack: 701 | if ifstack[-1][0]: 702 | if enable: 703 | enable = False 704 | elif not iftrigger: 705 | enable = True 706 | iftrigger = True 707 | else: 708 | self.error(self.source,dirtokens[0].lineno,"Misplaced #else") 709 | 710 | elif name == 'endif': 711 | if ifstack: 712 | enable,iftrigger = ifstack.pop() 713 | else: 714 | self.error(self.source,dirtokens[0].lineno,"Misplaced #endif") 715 | else: 716 | # Unknown preprocessor directive 717 | pass 718 | 719 | else: 720 | # Normal text 721 | if enable: 722 | chunk.extend(x) 723 | 724 | for tok in self.expand_macros(chunk): 725 | yield tok 726 | chunk = [] 727 | 728 | # ---------------------------------------------------------------------- 729 | # include() 730 | # 731 | # Implementation of file-inclusion 732 | # ---------------------------------------------------------------------- 733 | 734 | def include(self,tokens): 735 | # Try to extract the filename and then process an include file 736 | if not tokens: 737 | return 738 | if tokens: 739 | if tokens[0].value != '<' and tokens[0].type != self.t_STRING: 740 | tokens = self.expand_macros(tokens) 741 | 742 | if tokens[0].value == '<': 743 | # Include <...> 744 | i = 1 745 | while i < len(tokens): 746 | if tokens[i].value == '>': 747 | break 748 | i += 1 749 | else: 750 | print("Malformed #include <...>") 751 | return 752 | filename = "".join([x.value for x in tokens[1:i]]) 753 | path = self.path + [""] + self.temp_path 754 | elif tokens[0].type == self.t_STRING: 755 | filename = tokens[0].value[1:-1] 756 | path = self.temp_path + [""] + self.path 757 | else: 758 | print("Malformed #include statement") 759 | return 760 | for p in path: 761 | iname = os.path.join(p,filename) 762 | try: 763 | data = open(iname,"r").read() 764 | dname = os.path.dirname(iname) 765 | if dname: 766 | self.temp_path.insert(0,dname) 767 | for tok in self.parsegen(data,filename): 768 | yield tok 769 | if dname: 770 | del self.temp_path[0] 771 | break 772 | except IOError: 773 | pass 774 | else: 775 | print("Couldn't find '%s'" % filename) 776 | 777 | # ---------------------------------------------------------------------- 778 | # define() 779 | # 780 | # Define a new macro 781 | # ---------------------------------------------------------------------- 782 | 783 | def define(self,tokens): 784 | if isinstance(tokens,(str,unicode)): 785 | tokens = self.tokenize(tokens) 786 | 787 | linetok = tokens 788 | try: 789 | name = linetok[0] 790 | if len(linetok) > 1: 791 | mtype = linetok[1] 792 | else: 793 | mtype = None 794 | if not mtype: 795 | m = Macro(name.value,[]) 796 | self.macros[name.value] = m 797 | elif mtype.type in self.t_WS: 798 | # A normal macro 799 | m = Macro(name.value,self.tokenstrip(linetok[2:])) 800 | self.macros[name.value] = m 801 | elif mtype.value == '(': 802 | # A macro with arguments 803 | tokcount, args, positions = self.collect_args(linetok[1:]) 804 | variadic = False 805 | for a in args: 806 | if variadic: 807 | print("No more arguments may follow a variadic argument") 808 | break 809 | astr = "".join([str(_i.value) for _i in a]) 810 | if astr == "...": 811 | variadic = True 812 | a[0].type = self.t_ID 813 | a[0].value = '__VA_ARGS__' 814 | variadic = True 815 | del a[1:] 816 | continue 817 | elif astr[-3:] == "..." and a[0].type == self.t_ID: 818 | variadic = True 819 | del a[1:] 820 | # If, for some reason, "." is part of the identifier, strip off the name for the purposes 821 | # of macro expansion 822 | if a[0].value[-3:] == '...': 823 | a[0].value = a[0].value[:-3] 824 | continue 825 | if len(a) > 1 or a[0].type != self.t_ID: 826 | print("Invalid macro argument") 827 | break 828 | else: 829 | mvalue = self.tokenstrip(linetok[1+tokcount:]) 830 | i = 0 831 | while i < len(mvalue): 832 | if i+1 < len(mvalue): 833 | if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##': 834 | del mvalue[i] 835 | continue 836 | elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS: 837 | del mvalue[i+1] 838 | i += 1 839 | m = Macro(name.value,mvalue,[x[0].value for x in args],variadic) 840 | self.macro_prescan(m) 841 | self.macros[name.value] = m 842 | else: 843 | print("Bad macro definition") 844 | except LookupError: 845 | print("Bad macro definition") 846 | 847 | # ---------------------------------------------------------------------- 848 | # undef() 849 | # 850 | # Undefine a macro 851 | # ---------------------------------------------------------------------- 852 | 853 | def undef(self,tokens): 854 | id = tokens[0].value 855 | try: 856 | del self.macros[id] 857 | except LookupError: 858 | pass 859 | 860 | # ---------------------------------------------------------------------- 861 | # parse() 862 | # 863 | # Parse input text. 864 | # ---------------------------------------------------------------------- 865 | def parse(self,input,source=None,ignore={}): 866 | self.ignore = ignore 867 | self.parser = self.parsegen(input,source) 868 | 869 | # ---------------------------------------------------------------------- 870 | # token() 871 | # 872 | # Method to return individual tokens 873 | # ---------------------------------------------------------------------- 874 | def token(self): 875 | try: 876 | while True: 877 | tok = next(self.parser) 878 | if tok.type not in self.ignore: return tok 879 | except StopIteration: 880 | self.parser = None 881 | return None 882 | 883 | if __name__ == '__main__': 884 | import ply.lex as lex 885 | lexer = lex.lex() 886 | 887 | # Run a preprocessor 888 | import sys 889 | f = open(sys.argv[1]) 890 | input = f.read() 891 | 892 | p = Preprocessor(lexer) 893 | p.parse(input,sys.argv[1]) 894 | while True: 895 | tok = p.token() 896 | if not tok: break 897 | print(p.source, tok) 898 | 899 | 900 | 901 | 902 | 903 | 904 | 905 | 906 | 907 | 908 | 909 | -------------------------------------------------------------------------------- /yyparse/ply/ctokens.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # ctokens.py 3 | # 4 | # Token specifications for symbols in ANSI C and C++. This file is 5 | # meant to be used as a library in other tokenizers. 6 | # ---------------------------------------------------------------------- 7 | 8 | # Reserved words 9 | 10 | tokens = [ 11 | # Literals (identifier, integer constant, float constant, string constant, char const) 12 | 'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER', 13 | 14 | # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) 15 | 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO', 16 | 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', 17 | 'LOR', 'LAND', 'LNOT', 18 | 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', 19 | 20 | # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) 21 | 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', 22 | 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', 23 | 24 | # Increment/decrement (++,--) 25 | 'INCREMENT', 'DECREMENT', 26 | 27 | # Structure dereference (->) 28 | 'ARROW', 29 | 30 | # Ternary operator (?) 31 | 'TERNARY', 32 | 33 | # Delimeters ( ) [ ] { } , . ; : 34 | 'LPAREN', 'RPAREN', 35 | 'LBRACKET', 'RBRACKET', 36 | 'LBRACE', 'RBRACE', 37 | 'COMMA', 'PERIOD', 'SEMI', 'COLON', 38 | 39 | # Ellipsis (...) 40 | 'ELLIPSIS', 41 | ] 42 | 43 | # Operators 44 | t_PLUS = r'\+' 45 | t_MINUS = r'-' 46 | t_TIMES = r'\*' 47 | t_DIVIDE = r'/' 48 | t_MODULO = r'%' 49 | t_OR = r'\|' 50 | t_AND = r'&' 51 | t_NOT = r'~' 52 | t_XOR = r'\^' 53 | t_LSHIFT = r'<<' 54 | t_RSHIFT = r'>>' 55 | t_LOR = r'\|\|' 56 | t_LAND = r'&&' 57 | t_LNOT = r'!' 58 | t_LT = r'<' 59 | t_GT = r'>' 60 | t_LE = r'<=' 61 | t_GE = r'>=' 62 | t_EQ = r'==' 63 | t_NE = r'!=' 64 | 65 | # Assignment operators 66 | 67 | t_EQUALS = r'=' 68 | t_TIMESEQUAL = r'\*=' 69 | t_DIVEQUAL = r'/=' 70 | t_MODEQUAL = r'%=' 71 | t_PLUSEQUAL = r'\+=' 72 | t_MINUSEQUAL = r'-=' 73 | t_LSHIFTEQUAL = r'<<=' 74 | t_RSHIFTEQUAL = r'>>=' 75 | t_ANDEQUAL = r'&=' 76 | t_OREQUAL = r'\|=' 77 | t_XOREQUAL = r'\^=' 78 | 79 | # Increment/decrement 80 | t_INCREMENT = r'\+\+' 81 | t_DECREMENT = r'--' 82 | 83 | # -> 84 | t_ARROW = r'->' 85 | 86 | # ? 87 | t_TERNARY = r'\?' 88 | 89 | # Delimeters 90 | t_LPAREN = r'\(' 91 | t_RPAREN = r'\)' 92 | t_LBRACKET = r'\[' 93 | t_RBRACKET = r'\]' 94 | t_LBRACE = r'\{' 95 | t_RBRACE = r'\}' 96 | t_COMMA = r',' 97 | t_PERIOD = r'\.' 98 | t_SEMI = r';' 99 | t_COLON = r':' 100 | t_ELLIPSIS = r'\.\.\.' 101 | 102 | # Identifiers 103 | t_ID = r'[A-Za-z_][A-Za-z0-9_]*' 104 | 105 | # Integer literal 106 | t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' 107 | 108 | # Floating literal 109 | t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' 110 | 111 | # String literal 112 | t_STRING = r'\"([^\\\n]|(\\.))*?\"' 113 | 114 | # Character constant 'c' or L'c' 115 | t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\'' 116 | 117 | # Comment (C-Style) 118 | def t_COMMENT(t): 119 | r'/\*(.|\n)*?\*/' 120 | t.lexer.lineno += t.value.count('\n') 121 | return t 122 | 123 | # Comment (C++-Style) 124 | def t_CPPCOMMENT(t): 125 | r'//.*\n' 126 | t.lexer.lineno += 1 127 | return t 128 | 129 | 130 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /yyparse/ply/ygen.py: -------------------------------------------------------------------------------- 1 | # ply: ygen.py 2 | # 3 | # This is a support program that auto-generates different versions of the YACC parsing 4 | # function with different features removed for the purposes of performance. 5 | # 6 | # Users should edit the method LParser.parsedebug() in yacc.py. The source code 7 | # for that method is then used to create the other methods. See the comments in 8 | # yacc.py for further details. 9 | 10 | import os.path 11 | import shutil 12 | 13 | def get_source_range(lines, tag): 14 | srclines = enumerate(lines) 15 | start_tag = '#--! %s-start' % tag 16 | end_tag = '#--! %s-end' % tag 17 | 18 | for start_index, line in srclines: 19 | if line.strip().startswith(start_tag): 20 | break 21 | 22 | for end_index, line in srclines: 23 | if line.strip().endswith(end_tag): 24 | break 25 | 26 | return (start_index + 1, end_index) 27 | 28 | def filter_section(lines, tag): 29 | filtered_lines = [] 30 | include = True 31 | tag_text = '#--! %s' % tag 32 | for line in lines: 33 | if line.strip().startswith(tag_text): 34 | include = not include 35 | elif include: 36 | filtered_lines.append(line) 37 | return filtered_lines 38 | 39 | def main(): 40 | dirname = os.path.dirname(__file__) 41 | shutil.copy2(os.path.join(dirname, 'yacc.py'), os.path.join(dirname, 'yacc.py.bak')) 42 | with open(os.path.join(dirname, 'yacc.py'), 'r') as f: 43 | lines = f.readlines() 44 | 45 | parse_start, parse_end = get_source_range(lines, 'parsedebug') 46 | parseopt_start, parseopt_end = get_source_range(lines, 'parseopt') 47 | parseopt_notrack_start, parseopt_notrack_end = get_source_range(lines, 'parseopt-notrack') 48 | 49 | # Get the original source 50 | orig_lines = lines[parse_start:parse_end] 51 | 52 | # Filter the DEBUG sections out 53 | parseopt_lines = filter_section(orig_lines, 'DEBUG') 54 | 55 | # Filter the TRACKING sections out 56 | parseopt_notrack_lines = filter_section(parseopt_lines, 'TRACKING') 57 | 58 | # Replace the parser source sections with updated versions 59 | lines[parseopt_notrack_start:parseopt_notrack_end] = parseopt_notrack_lines 60 | lines[parseopt_start:parseopt_end] = parseopt_lines 61 | 62 | lines = [line.rstrip()+'\n' for line in lines] 63 | with open(os.path.join(dirname, 'yacc.py'), 'w') as f: 64 | f.writelines(lines) 65 | 66 | print('Updated yacc.py') 67 | 68 | if __name__ == '__main__': 69 | main() 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /yyparse/test1.c: -------------------------------------------------------------------------------- 1 | extern x; 2 | enum Boolean 3 | { 4 | false, 5 | true 6 | }; 7 | 8 | typedef struct{ 9 | int a; 10 | double c; 11 | }mytype; 12 | 13 | void fff(){ 14 | int asdf = 1; 15 | } 16 | 17 | int main(int argc, char *argv[]) { 18 | int a, c; 19 | double b; 20 | int i; 21 | char ch; 22 | long f = 122L; 23 | const unsigned short g = 1; 24 | // double b = 12.3E2; 25 | printf("a + b = c\n"); 26 | printf("%d\n", sizeof(int)); 27 | 28 | b = 12.3E2; 29 | b = 12.3 + 345 - 1. * 0.9999; 30 | c = 345; 31 | 32 | if (1) { 33 | a = b; 34 | }else if(0){ 35 | c = 1; 36 | }else { 37 | b = a *c;} 38 | 39 | for (i = 0; i < 10; i++) { 40 | a += c?1:2; 41 | } 42 | 43 | do { 44 | a >>= 1; 45 | if (a < 0) { 46 | break; 47 | }else { 48 | continue; 49 | } 50 | } while (1); 51 | 52 | while (a) { 53 | a--; 54 | getc_unlocked(a); 55 | } 56 | 57 | switch (ch) { 58 | case 'a': 59 | case 'b': 60 | break; 61 | case 'c': 62 | putchar(ch); 63 | case 'd': 64 | default: 65 | break; 66 | } 67 | 68 | a = 1; 69 | } -------------------------------------------------------------------------------- /yyparse/test2.c: -------------------------------------------------------------------------------- 1 | 2 | int a; 3 | a + b = c; 4 | int c; -------------------------------------------------------------------------------- /yyparse/testChar.c: -------------------------------------------------------------------------------- 1 | 0 2 | 123 3 | 12.4 4 | 1. 5 | .123 6 | 1.3E1 7 | 1.3e0 8 | 2e10 9 | 10 | 11 | ()[]{};.,&*+-~!/%<>^|?:= --------------------------------------------------------------------------------