├── .gitignore
├── .vscode
    ├── .browse.VC.db
    ├── launch.json
    ├── settings.json
    └── tasks.json
├── ANSI C grammar (Yacc).html
├── Class notes in Chinese.txt
├── LICENSE
├── Presentation scripts
├── README.md
├── ZCC.bnf
├── bnf.tmp
├── generation
    ├── __init__.py
    ├── data.py
    ├── generation.py
    └── utility.py
├── main.py
├── public
    ├── ZCCglobal.py
    ├── __init__.py
    └── const.py
├── symbol
    ├── .gitignore
    ├── __init__.py
    └── symtab.py
├── test
    ├── a.s
    ├── array.c
    ├── basic.c
    ├── basic.i
    ├── basic1.c
    ├── basic2.c
    ├── basic3.c
    ├── errorID.c
    ├── error_info.c
    ├── error_pos.c
    ├── missRightCurly.c
    ├── missSEMI.c
    ├── multi_int.c
    ├── out.txt
    ├── pointer.c
    ├── source_code_optimization.c
    ├── stdio.h
    ├── struct.c
    ├── test1.c
    ├── test1.s
    ├── test4.c
    ├── test4.s
    ├── test4_2.c
    ├── test4_2.s
    ├── test9.c
    └── test9.s
└── yyparse
    ├── .gitignore
    ├── ZCClex.py
    ├── ZCCparser.py
    ├── __init__.py
    ├── missRightCurly.c
    ├── missSEMI.c
    ├── ply
        ├── __init__.py
        ├── cpp.py
        ├── ctokens.py
        ├── lex.py
        ├── yacc.py
        └── ygen.py
    ├── test1.c
    ├── test2.c
    └── testChar.c


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | *.c
 9 | *.exe
10 | test/*.s
11 | test/*.out
12 | 
13 | # Distribution / packaging
14 | .Python
15 | env/
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | 
31 | # PyInstaller
32 | #  Usually these files are written by a python script from a template
33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 | 
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 | 
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *,cover
50 | .hypothesis/
51 | 
52 | # Translations
53 | *.mo
54 | *.pot
55 | 
56 | # Django stuff:
57 | *.log
58 | 
59 | # Sphinx documentation
60 | docs/_build/
61 | 
62 | # PyBuilder
63 | target/
64 | 
65 | #Ipython Notebook
66 | .ipynb_checkpoints
67 | 
68 | #PyCharm
69 | 
70 | !/test/*.c
71 | .idea/
72 | 


--------------------------------------------------------------------------------
/.vscode/.browse.VC.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hlFu/ZCC/811bd987c9d7a3754f7bc9c9d986359ee3df7327/.vscode/.browse.VC.db


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.2.0",
 3 |     "configurations": [
 4 |         {
 5 |             "name": "Python",
 6 |             "type": "python",
 7 |             "request": "launch",
 8 |             "stopOnEntry": true,
 9 |             "program": "${file}",
10 |             "debugOptions": [
11 |                 "WaitOnAbnormalExit",
12 |                 "WaitOnNormalExit",
13 |                 "RedirectOutput"
14 |             ]
15 |         },
16 |         {
17 |             "name": "Python Console App",
18 |             "type": "python",
19 |             "request": "launch",
20 |             "stopOnEntry": true,
21 |             "program": "${file}",
22 |             "externalConsole": true,
23 |             "debugOptions": [
24 |                 "WaitOnAbnormalExit",
25 |                 "WaitOnNormalExit"
26 |             ]
27 |         },
28 |         {
29 |             "name": "Django",
30 |             "type": "python",
31 |             "request": "launch",
32 |             "stopOnEntry": true,
33 |             "program": "${workspaceRoot}/manage.py",
34 |             "args": [
35 |                 "runserver",
36 |                 "--noreload"
37 |             ],
38 |             "debugOptions": [
39 |                 "WaitOnAbnormalExit",
40 |                 "WaitOnNormalExit",
41 |                 "RedirectOutput",
42 |                 "DjangoDebugging"
43 |             ]
44 |         },
45 |         {
46 |             "name": "Watson",
47 |             "type": "python",
48 |             "request": "launch",
49 |             "stopOnEntry": true,
50 |             "program": "${workspaceRoot}/console.py",
51 |             "args": [
52 |                 "dev",
53 |                 "runserver",
54 |                 "--noreload=True"
55 |             ],
56 |             "debugOptions": [
57 |                 "WaitOnAbnormalExit",
58 |                 "WaitOnNormalExit",
59 |                 "RedirectOutput"
60 |             ]
61 |         }
62 |     ]
63 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | // 将设置放入此文件中以覆盖默认值和用户设置。
2 | {
3 | }


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
 1 | // {
 2 | //     // See https://go.microsoft.com/fwlink/?LinkId=733558
 3 | //     // for the documentation about the tasks.json format
 4 | //     "version": "0.1.0",
 5 | //     "command": "tsc",
 6 | //     "isShellCommand": true,
 7 | //     "args": ["-p", "."],
 8 | //     "showOutput": "silent",
 9 | //     "problemMatcher": "$tsc"
10 | // }
11 | {
12 |     "version": "0.1.0",
13 | 
14 |     // The command is tsc. Assumes that tsc has been installed using npm install -g typescript
15 |     "command": "python.exe",
16 | 
17 |     // The command is a shell script
18 |     "isShellCommand": true,
19 | 
20 |     // Show the output window only if unrecognized errors occur.
21 |     "showOutput": "always",
22 | 
23 |     // args is the HelloWorld program to compile.
24 |     "args": ["${file}"]
25 | }


--------------------------------------------------------------------------------
/ANSI C grammar (Yacc).html:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- saved from url=(0067)http://www.lysator.liu.se/c/ANSI-C-grammar-y.html#direct-declarator -->
  3 | <html><head>
  4 | <title>ANSI C grammar (Yacc)</title>
  5 | <link rel="made" rev="made" href="mailto:jutta@pobox.com">
  6 | <style id="style-1-cropbar-clipper">/* Copyright 2014 Evernote Corporation. All rights reserved. */
  7 | .en-markup-crop-options {
  8 |     top: 18px !important;
  9 |     left: 50% !important;
 10 |     margin-left: -100px !important;
 11 |     width: 200px !important;
 12 |     border: 2px rgba(255,255,255,.38) solid !important;
 13 |     border-radius: 4px !important;
 14 | }
 15 | 
 16 | .en-markup-crop-options div div:first-of-type {
 17 |     margin-left: 0px !important;
 18 | }
 19 | </style><style></style></head><body>
 20 | 
 21 | <h1>ANSI C Yacc grammar</h1>
 22 | 
 23 | In 1985, Jeff Lee published his Yacc grammar (which is
 24 | accompanied by a matching <a href="ANSI-C-grammar-l.html">Lex specification</a>)
 25 | for the April 30, 1985 draft version of the
 26 | ANSI C standard.&nbsp;<tt> </tt>Tom Stockfisch reposted
 27 | it to net.sources in 1987; that original, as mentioned in 
 28 | the answer to <a href="c-faq/c-17.html#17-25">question 17.25</a>
 29 | of the comp.lang.c FAQ, can be ftp'ed from ftp.uu.net, file
 30 | <a href="ftp://ftp.uu.net/usenet/net.sources/ansi.c.grammar.Z">usenet/net.sources/ansi.c.grammar.Z</a>.
 31 | <p>
 32 | <a href="mailto:jutta@pobox.com">Jutta Degener</a>, 1995
 33 | </p><p>
 34 | </p><hr>
 35 | <pre>%token <a href="ANSI-C-grammar-l.html#check-type">IDENTIFIER</a> <a href="ANSI-C-grammar-l.html#CONSTANT">CONSTANT</a> <a href="ANSI-C-grammar-l.html#STRING-LITERAL">STRING_LITERAL</a> <a href="ANSI-C-grammar-l.html#SIZEOF">SIZEOF</a>
 36 | %token <a href="ANSI-C-grammar-l.html#PTR-OP">PTR_OP</a> <a href="ANSI-C-grammar-l.html#INC-OP">INC_OP</a> <a href="ANSI-C-grammar-l.html#DEC-OP">DEC_OP</a> <a href="ANSI-C-grammar-l.html#LEFT-OP">LEFT_OP</a> <a href="ANSI-C-grammar-l.html#RIGHT-OP">RIGHT_OP</a> <a href="ANSI-C-grammar-l.html#LE-OP">LE_OP</a> <a href="ANSI-C-grammar-l.html#GE-OP">GE_OP</a> <a href="ANSI-C-grammar-l.html#EQ-OP">EQ_OP</a> <a href="ANSI-C-grammar-l.html#NE-OP">NE_OP</a>
 37 | %token <a href="ANSI-C-grammar-l.html#AND-OP">AND_OP</a> <a href="ANSI-C-grammar-l.html#OR-OP">OR_OP</a> <a href="ANSI-C-grammar-l.html#MUL-ASSIGN">MUL_ASSIGN</a> <a href="ANSI-C-grammar-l.html#DIV-ASSIGN">DIV_ASSIGN</a> <a href="ANSI-C-grammar-l.html#MOD-ASSIGN">MOD_ASSIGN</a> <a href="ANSI-C-grammar-l.html#ADD-ASSIGN">ADD_ASSIGN</a>
 38 | %token <a href="ANSI-C-grammar-l.html#SUB-ASSIGN">SUB_ASSIGN</a> <a href="ANSI-C-grammar-l.html#LEFT-ASSIGN">LEFT_ASSIGN</a> <a href="ANSI-C-grammar-l.html#RIGHT-ASSIGN">RIGHT_ASSIGN</a> <a href="ANSI-C-grammar-l.html#AND-ASSIGN">AND_ASSIGN</a>
 39 | %token <a href="ANSI-C-grammar-l.html#XOR-ASSIGN">XOR_ASSIGN</a> <a href="ANSI-C-grammar-l.html#OR-ASSIGN">OR_ASSIGN</a> <a href="ANSI-C-grammar-l.html#check-type">TYPE_NAME</a>
 40 | 
 41 | %token <a href="ANSI-C-grammar-l.html#TYPEDEF">TYPEDEF</a> <a href="ANSI-C-grammar-l.html#EXTERN">EXTERN</a> <a href="ANSI-C-grammar-l.html#STATIC">STATIC</a> <a href="ANSI-C-grammar-l.html#AUTO">AUTO</a> <a href="ANSI-C-grammar-l.html#REGISTER">REGISTER</a>
 42 | %token <a href="ANSI-C-grammar-l.html#CHAR">CHAR</a> <a href="ANSI-C-grammar-l.html#SHORT">SHORT</a> <a href="ANSI-C-grammar-l.html#INT">INT</a> <a href="ANSI-C-grammar-l.html#LONG">LONG</a> <a href="ANSI-C-grammar-l.html#SIGNED">SIGNED</a> <a href="ANSI-C-grammar-l.html#UNSIGNED">UNSIGNED</a> <a href="ANSI-C-grammar-l.html#FLOAT">FLOAT</a> <a href="ANSI-C-grammar-l.html#DOUBLE">DOUBLE</a> <a href="ANSI-C-grammar-l.html#CONST">CONST</a> <a href="ANSI-C-grammar-l.html#VOLATILE">VOLATILE</a> <a href="ANSI-C-grammar-l.html#VOID">VOID</a>
 43 | %token <a href="ANSI-C-grammar-l.html#STRUCT">STRUCT</a> <a href="ANSI-C-grammar-l.html#UNION">UNION</a> <a href="ANSI-C-grammar-l.html#ENUM">ENUM</a> <a href="ANSI-C-grammar-l.html#ELLIPSIS">ELLIPSIS</a>
 44 | 
 45 | %token <a href="ANSI-C-grammar-l.html#CASE">CASE</a> <a href="ANSI-C-grammar-l.html#DEFAULT">DEFAULT</a> <a href="ANSI-C-grammar-l.html#IF">IF</a> <a href="ANSI-C-grammar-l.html#ELSE">ELSE</a> <a href="ANSI-C-grammar-l.html#SWITCH">SWITCH</a> <a href="ANSI-C-grammar-l.html#WHILE">WHILE</a> <a href="ANSI-C-grammar-l.html#DO">DO</a> <a href="ANSI-C-grammar-l.html#FOR">FOR</a> <a href="ANSI-C-grammar-l.html#GOTO">GOTO</a> <a href="ANSI-C-grammar-l.html#CONTINUE">CONTINUE</a> <a href="ANSI-C-grammar-l.html#BREAK">BREAK</a> <a href="ANSI-C-grammar-l.html#RETURN">RETURN</a>
 46 | 
 47 | %start <a href="#translation-unit">translation_unit</a>
 48 | %%
 49 | 
 50 | <a name="IDENTIFIER">IDENTIFIER</a>
 51 | 	: IDENTIFIER_NAME
 52 | 
 53 | <a name="INTEGER">INTEGER</a>
 54 | 	: ORIGINAL_CODE
 55 | 
 56 | <a name="DOUBLE">DOUBLE</a>
 57 | 	: ORIGINAL_CODE
 58 | 
 59 | <a name="STRING">STRING</a>
 60 | 	: ORIGINAL_CODE
 61 | 
 62 | <a name="primary-expression">primary_expression</a>
 63 | 	: <a href="#IDENTIFIER">IDENTIFIER</a>
 64 | 	| <a href="#INTEGER">INTEGER</a>
 65 | 	| <a href="#DOUBLE">DOUBLE</a>
 66 | 	| <a href="#STRING">STRING</a>
 67 | 	| '(' <a href="#expression">expression</a> ')'
 68 | 	;
 69 | 
 70 | <a name="postfix-expression">postfix_expression</a>
 71 | 	: <a href="#primary-expression">primary_expression</a>
 72 | 	| postfix_expression '[' <a href="#expression">expression</a> ']'
 73 | 	| postfix_expression '(' ')'
 74 | 	| postfix_expression '(' <a href="#argument-expression-list">argument_expression_list</a> ')'
 75 | 	| postfix_expression '.' <a href="ANSI-C-grammar-l.html#check-type">IDENTIFIER</a>
 76 | 	| postfix_expression <a href="ANSI-C-grammar-l.html#PTR-OP">PTR_OP</a> <a href="ANSI-C-grammar-l.html#check-type">IDENTIFIER</a>
 77 | 	| postfix_expression <a href="ANSI-C-grammar-l.html#INC-OP">INC_OP</a>
 78 | 	| postfix_expression <a href="ANSI-C-grammar-l.html#DEC-OP">DEC_OP</a>
 79 | 	;
 80 | 
 81 | <a name="argument-expression-list">argument_expression_list</a>
 82 | 	: <a href="#assignment-expression">assignment_expression</a>
 83 | 	| argument_expression_list ',' <a href="#assignment-expression">assignment_expression</a>
 84 | 	;
 85 | 
 86 | <a name="unary-expression">unary_expression</a>
 87 | 	: <a href="#postfix-expression">postfix_expression</a>
 88 | 	| <a href="ANSI-C-grammar-l.html#INC-OP">INC_OP</a> unary_expression
 89 | 	| <a href="ANSI-C-grammar-l.html#DEC-OP">DEC_OP</a> unary_expression
 90 | 	| <a href="#unary-operator">unary_operator</a> <a href="#cast-expression">cast_expression</a>
 91 | 	| <a href="ANSI-C-grammar-l.html#SIZEOF">SIZEOF</a> unary_expression
 92 | 	| <a href="ANSI-C-grammar-l.html#SIZEOF">SIZEOF</a> '(' <a href="#type-name">type_name</a> ')'
 93 | 	;
 94 | 
 95 | <a name="unary-operator">unary_operator</a>
 96 | 	: '&amp;'
 97 | 	| '*'
 98 | 	| '+'
 99 | 	| '-'
100 | 	| '~'
101 | 	| '!'
102 | 	;
103 | 
104 | <a name="cast-expression">cast_expression</a>
105 | 	: <a href="#unary-expression">unary_expression</a>
106 | 	| '(' <a href="#type-name">type_name</a> ')' cast_expression
107 | 	;
108 | 
109 | <a name="multiplicative-expression">multiplicative_expression</a>
110 | 	: <a href="#cast-expression">cast_expression</a>
111 | 	| multiplicative_expression '*' <a href="#cast-expression">cast_expression</a>
112 | 	| multiplicative_expression '/' <a href="#cast-expression">cast_expression</a>
113 | 	| multiplicative_expression '%' <a href="#cast-expression">cast_expression</a>
114 | 	;
115 | 
116 | <a name="additive-expression">additive_expression</a>
117 | 	: <a href="#multiplicative-expression">multiplicative_expression</a>
118 | 	| additive_expression '+' <a href="#multiplicative-expression">multiplicative_expression</a>
119 | 	| additive_expression '-' <a href="#multiplicative-expression">multiplicative_expression</a>
120 | 	;
121 | 
122 | <a name="shift-expression">shift_expression</a>
123 | 	: <a href="#additive-expression">additive_expression</a>
124 | 	| shift_expression <a href="ANSI-C-grammar-l.html#LEFT-OP">LEFT_OP</a> <a href="#additive-expression">additive_expression</a>
125 | 	| shift_expression <a href="ANSI-C-grammar-l.html#RIGHT-OP">RIGHT_OP</a> <a href="#additive-expression">additive_expression</a>
126 | 	;
127 | 
128 | <a name="relational-expression">relational_expression</a>
129 | 	: <a href="#shift-expression">shift_expression</a>
130 | 	| relational_expression '&lt;' <a href="#shift-expression">shift_expression</a>
131 | 	| relational_expression '&gt;' <a href="#shift-expression">shift_expression</a>
132 | 	| relational_expression <a href="ANSI-C-grammar-l.html#LE-OP">LE_OP</a> <a href="#shift-expression">shift_expression</a>
133 | 	| relational_expression <a href="ANSI-C-grammar-l.html#GE-OP">GE_OP</a> <a href="#shift-expression">shift_expression</a>
134 | 	;
135 | 
136 | <a name="equality-expression">equality_expression</a>
137 | 	: <a href="#relational-expression">relational_expression</a>
138 | 	| equality_expression <a href="ANSI-C-grammar-l.html#EQ-OP">EQ_OP</a> <a href="#relational-expression">relational_expression</a>
139 | 	| equality_expression <a href="ANSI-C-grammar-l.html#NE-OP">NE_OP</a> <a href="#relational-expression">relational_expression</a>
140 | 	;
141 | 
142 | <a name="and-expression">and_expression</a>
143 | 	: <a href="#equality-expression">equality_expression</a>
144 | 	| and_expression '&amp;' <a href="#equality-expression">equality_expression</a>
145 | 	;
146 | 
147 | <a name="exclusive-or-expression">exclusive_or_expression</a>
148 | 	: <a href="#and-expression">and_expression</a>
149 | 	| exclusive_or_expression '^' <a href="#and-expression">and_expression</a>
150 | 	;
151 | 
152 | <a name="inclusive-or-expression">inclusive_or_expression</a>
153 | 	: <a href="#exclusive-or-expression">exclusive_or_expression</a>
154 | 	| inclusive_or_expression '|' <a href="#exclusive-or-expression">exclusive_or_expression</a>
155 | 	;
156 | 
157 | <a name="logical-and-expression">logical_and_expression</a>
158 | 	: <a href="#inclusive-or-expression">inclusive_or_expression</a>
159 | 	| logical_and_expression <a href="ANSI-C-grammar-l.html#AND-OP">AND_OP</a> <a href="#inclusive-or-expression">inclusive_or_expression</a>
160 | 	;
161 | 
162 | <a name="logical-or-expression">logical_or_expression</a>
163 | 	: <a href="#logical-and-expression">logical_and_expression</a>
164 | 	| logical_or_expression <a href="ANSI-C-grammar-l.html#OR-OP">OR_OP</a> <a href="#logical-and-expression">logical_and_expression</a>
165 | 	;
166 | 
167 | <a name="conditional-expression">conditional_expression</a>
168 | 	: <a href="#logical-or-expression">logical_or_expression</a>
169 | 	| <a href="#logical-or-expression">logical_or_expression</a> '?' <a href="#expression">expression</a> ':' conditional_expression
170 | 	;
171 | 
172 | <a name="assignment-expression">assignment_expression</a>
173 | 	: <a href="#conditional-expression">conditional_expression</a>
174 | 	| <a href="#unary-expression">unary_expression</a> <a href="#assignment-operator">assignment_operator</a> assignment_expression
175 | 	;
176 | 
177 | <a name="assignment-operator">assignment_operator</a>
178 | 	: '='
179 | 	| <a href="ANSI-C-grammar-l.html#MUL-ASSIGN">MUL_ASSIGN</a>
180 | 	| <a href="ANSI-C-grammar-l.html#DIV-ASSIGN">DIV_ASSIGN</a>
181 | 	| <a href="ANSI-C-grammar-l.html#MOD-ASSIGN">MOD_ASSIGN</a>
182 | 	| <a href="ANSI-C-grammar-l.html#ADD-ASSIGN">ADD_ASSIGN</a>
183 | 	| <a href="ANSI-C-grammar-l.html#SUB-ASSIGN">SUB_ASSIGN</a>
184 | 	| <a href="ANSI-C-grammar-l.html#LEFT-ASSIGN">LEFT_ASSIGN</a>
185 | 	| <a href="ANSI-C-grammar-l.html#RIGHT-ASSIGN">RIGHT_ASSIGN</a>
186 | 	| <a href="ANSI-C-grammar-l.html#AND-ASSIGN">AND_ASSIGN</a>
187 | 	| <a href="ANSI-C-grammar-l.html#XOR-ASSIGN">XOR_ASSIGN</a>
188 | 	| <a href="ANSI-C-grammar-l.html#OR-ASSIGN">OR_ASSIGN</a>
189 | 	;
190 | 
191 | <a name="expression">expression</a>
192 | 	: <a href="#assignment-expression">assignment_expression</a>
193 | 	| expression ',' <a href="#assignment-expression">assignment_expression</a>
194 | 	;
195 | 
196 | <a name="constant-expression">constant_expression</a>
197 | 	: <a href="#conditional-expression">conditional_expression</a>
198 | 	;
199 | 
200 | <a name="declaration">declaration</a>
201 | 	: <a href="#declaration-specifiers">declaration_specifiers</a> ';'
202 | 	| <a href="#declaration-specifiers">declaration_specifiers</a> <a href="#init-declarator-list">init_declarator_list</a> ';'
203 | 	;
204 | 
205 | <a name="declaration-specifiers">declaration_specifiers</a>			 
206 | 	: <a href="#type-specifier">type_specifier</a> 						//int 
207 | 	| <a href="#type-specifier">type_specifier</a> <a href="#type-qualifier">type_qualifier</a>					//int const
208 | 	| <a href="#type-qualifier">type_qualifier</a> <a href="#type-specifier">type_specifier</a>					//const int
209 | 	| <a href="#storage-class-specifier">storage_class_specifier</a> <a href="#type-specifier">type_specifier</a>			//static int 
210 | 	| <a href="#storage-class-specifier">storage_class_specifier</a> <a href="#type-specifier">type_specifier</a> <a href="#type-qualifier">type_qualifier</a>		//static int const
211 | 	| <a href="#storage-class-specifier">storage_class_specifier</a> <a href="#type-qualifier">type_qualifier</a> <a href="#type-specifier">type_specifier</a>		//static const int
212 | 	;
213 | 
214 | <a name="init-declarator-list">init_declarator_list</a>
215 | 	: <a href="#init-declarator">init_declarator</a>
216 | 	| init_declarator_list ',' <a href="#init-declarator">init_declarator</a>
217 | 	;
218 | 
219 | <a name="init-declarator">init_declarator</a>
220 | 	: <a href="#declarator">declarator</a>
221 | 	| <a href="#declarator">declarator</a> '=' <a href="#initializer">initializer</a>
222 | 	;
223 | 
224 | <a name="storage-class-specifier">storage_class_specifier</a>
225 | 	: <a href="ANSI-C-grammar-l.html#TYPEDEF">TYPEDEF</a>
226 | 	| <a href="ANSI-C-grammar-l.html#EXTERN">EXTERN</a>
227 | 	| <a href="ANSI-C-grammar-l.html#STATIC">STATIC</a>
228 | 	;
229 | 
230 | <a name="type-specifier">type_specifier</a>
231 | 	: <a href="ANSI-C-grammar-l.html#VOID">VOID</a>
232 | 	| <a href="ANSI-C-grammar-l.html#FLOAT">FLOAT</a>
233 | 	| <a href="ANSI-C-grammar-l.html#DOUBLE">DOUBLE</a>
234 | 	| <a href="#integer-type">integer_type</a>
235 | 	| <a href="#struct-or-union-specifier">struct_or_union_specifier</a>
236 | 	| <a href="#enum-specifier">enum_specifier</a>
237 | 	| <a href="ANSI-C-grammar-l.html#TYPE_NAME">TYPE_NAME</a>
238 | 	;
239 | 
240 | <a name="integer-type">integer_type</a>
241 | 	: <a href="ANSI-C-grammar-l.html#CHAR">CHAR</a>
242 | 	| <a href="ANSI-C-grammar-l.html#SHORT">SHORT</a>
243 | 	| <a href="ANSI-C-grammar-l.html#INT">INT</a>
244 | 	| <a href="ANSI-C-grammar-l.html#LONG">LONG</a>
245 | 	| <a href="ANSI-C-grammar-l.html#SIGNED">SIGNED</a> integer_type
246 | 	| <a href="ANSI-C-grammar-l.html#UNSIGNED">UNSIGNED</a> integer_type
247 | 	| <a href="ANSI-C-grammar-l.html#SHORT">SHORT</a> integer_type
248 | 	| <a href="ANSI-C-grammar-l.html#LONG">LONG</a> integer_type
249 | 
250 | <a name="struct-or-union-specifier">struct_or_union_specifier</a>
251 | 	: <a href="#struct-or-union">struct_or_union</a> <a href="ANSI-C-grammar-l.html#check-type">IDENTIFIER</a> '{' <a href="#struct-declaration-list">struct_declaration_list</a> '}'
252 | 	| <a href="#struct-or-union">struct_or_union</a> '{' <a href="#struct-declaration-list">struct_declaration_list</a> '}'
253 | 	| <a href="#struct-or-union">struct_or_union</a> <a href="ANSI-C-grammar-l.html#check-type">IDENTIFIER</a>
254 | 	;
255 | 
256 | <a name="struct-or-union">struct_or_union</a>
257 | 	: <a href="ANSI-C-grammar-l.html#STRUCT">STRUCT</a>
258 | 	| <a href="ANSI-C-grammar-l.html#UNION">UNION</a>
259 | 	;
260 | 
261 | <a name="struct-declaration-list">struct_declaration_list</a>
262 | 	: <a href="#struct-declaration">struct_declaration</a>
263 | 	| struct_declaration_list <a href="#struct-declaration">struct_declaration</a>
264 | 	;
265 | 
266 | <a name="struct-declaration">struct_declaration</a>
267 | 	: <a href="#specifier-qualifier-list">specifier_qualifier_list</a> <a href="#struct-declarator-list">struct_declarator_list</a> ';'
268 | 	;
269 | 
270 | <a name="specifier-qualifier-list">specifier_qualifier_list</a>
271 | 	: <a href="#type-specifier">type_specifier</a> 
272 | 	| <a href="#type-specifier">type_specifier</a> <a href="#type-qualifier">type_qualifier</a>
273 | 	| <a href="#type-qualifier">type_qualifier</a> <a href="#type-specifier">type_specifier</a> 
274 | 	;
275 | 
276 | <a name="struct-declarator-list">struct_declarator_list</a>
277 | 	: <a href="#declarator">declarator</a>
278 | 	| struct_declarator_list ',' <a href="#declarator">declarator</a>
279 | 	;
280 | <!--
281 | <a name="struct-declarator">struct_declarator</a>
282 | 	: <a href="#declarator">declarator</a>
283 | 	| ':' <a href="#constant-expression">constant_expression</a>
284 | 	| <a href="#declarator">declarator</a> ':' <a href="#constant-expression">constant_expression</a>
285 | 	;
286 | -->
287 | <a name="enum-specifier">enum_specifier</a>
288 | 	: <a href="ANSI-C-grammar-l.html#ENUM">ENUM</a> '{' <a href="#enumerator-list">enumerator_list</a> '}'
289 | 	| <a href="ANSI-C-grammar-l.html#ENUM">ENUM</a> <a href="ANSI-C-grammar-l.html#check-type">IDENTIFIER</a> '{' <a href="#enumerator-list">enumerator_list</a> '}'
290 | 	| <a href="ANSI-C-grammar-l.html#ENUM">ENUM</a> <a href="ANSI-C-grammar-l.html#check-type">IDENTIFIER</a>
291 | 	;
292 | 
293 | <a name="enumerator-list">enumerator_list</a>
294 | 	: <a href="#enumerator">enumerator</a>
295 | 	| enumerator_list ',' <a href="#enumerator">enumerator</a>
296 | 	;
297 | 
298 | <a name="enumerator">enumerator</a>
299 | 	: <a href="ANSI-C-grammar-l.html#check-type">IDENTIFIER</a>
300 | 	| <a href="ANSI-C-grammar-l.html#check-type">IDENTIFIER</a> '=' <a href="#constant-expression">constant_expression</a>
301 | 	;
302 | 
303 | <a name="type-qualifier">type_qualifier</a>
304 | 	: <a href="ANSI-C-grammar-l.html#CONST">CONST</a>
305 | 	;
306 | 
307 | <a name="declarator">declarator</a>
308 | 	: <a href="#pointer">pointer</a> <a href="#direct-declarator">direct_declarator</a>
309 | 	| <a href="#direct-declarator">direct_declarator</a>
310 | 	;
311 | 
312 | <a name="direct-declarator">direct_declarator</a>
313 | 	: <a href="ANSI-C-grammar-l.html#check-type">IDENTIFIER</a>
314 | 	| '(' <a href="#declarator">declarator</a> ')'
315 | 	| direct_declarator '[' <a href="#constant-expression">constant_expression</a> ']'
316 | 	| direct_declarator '[' ']'
317 | 	| direct_declarator '(' <a href="#parameter-type-list">parameter_type_list</a> ')' 
318 | 	| direct_declarator '(' ')'
319 | 	;
320 | 
321 | <a name="pointer">pointer</a>
322 | 	: '*'
323 | 	| '*' <a href="ANSI-C-grammar-l.html#CONST">CONST</a>
324 | 	| pointer '*' 
325 | 	| pointer '*' <a href="ANSI-C-grammar-l.html#CONST">CONST</a>
326 | 	;
327 | 
328 | <a name="type-qualifier-list">type_qualifier_list</a>
329 | 	: <a href="#type-qualifier">type_qualifier</a>
330 | 	| type_qualifier_list <a href="#type-qualifier">type_qualifier</a>
331 | 	;
332 | 
333 | 
334 | <a name="parameter-type-list">parameter_type_list</a>
335 | 	: <a href="#parameter-list">parameter_list</a>
336 | 	| <a href="#parameter-list">parameter_list</a> ',' <a href="ANSI-C-grammar-l.html#ELLIPSIS">ELLIPSIS</a>
337 | 	;
338 | 
339 | <a name="parameter-list">parameter_list</a>
340 | 	: <a href="#parameter-declaration">parameter_declaration</a>
341 | 	| <a href="#parameter-list">parameter_list</a> ',' <a href="#parameter-declaration">parameter_declaration</a>
342 | 	;
343 | 
344 | <a name="parameter-declaration">parameter_declaration</a>
345 | 	: <a href="#declaration-specifiers">declaration_specifiers</a> <a href="#declarator">declarator</a>
346 | 	| <a href="#declaration-specifiers">declaration_specifiers</a> <a href="#abstract-declarator">abstract_declarator</a>
347 | 	| <a href="#declaration-specifiers">declaration_specifiers</a>
348 | 	;
349 | 
350 | <a name="type-name">type_name</a>
351 | 	: <a href="#specifier-qualifier-list">specifier_qualifier_list</a>
352 | 	| <a href="#specifier-qualifier-list">specifier_qualifier_list</a> <a href="#abstract-declarator">abstract_declarator</a>
353 | 	;
354 | 
355 | <a name="abstract-declarator">abstract_declarator</a>
356 | 	: <a href="#pointer">pointer</a>
357 | 	| <a href="#direct-abstract-declarator">direct_abstract_declarator</a>
358 | 	| <a href="#pointer">pointer</a> <a href="#direct-abstract-declarator">direct_abstract_declarator</a>
359 | 	;
360 | 
361 | <a name="direct-abstract-declarator">direct_abstract_declarator</a>
362 | 	: '(' <a href="#abstract-declarator">abstract_declarator</a> ')'
363 | 	| '[' ']'
364 | 	| '[' <a href="#constant-expression">constant_expression</a> ']'
365 | 	| direct_abstract_declarator '[' ']'
366 | 	| direct_abstract_declarator '[' <a href="#constant-expression">constant_expression</a> ']'
367 | 	| '(' ')'
368 | 	| '(' <a href="#parameter-type-list">parameter_type_list</a> ')'
369 | 	| direct_abstract_declarator '(' ')'
370 | 	| direct_abstract_declarator '(' <a href="#parameter-type-list">parameter_type_list</a> ')'
371 | 	;
372 | 
373 | <a name="initializer">initializer</a>
374 | 	: <a href="#assignment-expression">assignment_expression</a>
375 | 	| '{' <a href="#initializer-list">initializer_list</a> '}'
376 | 	| '{' <a href="#initializer-list">initializer_list</a> ',' '}'
377 | 	;
378 | 
379 | <a name="initializer-list">initializer_list</a>
380 | 	: <a href="#initializer">initializer</a>
381 | 	| initializer_list ',' <a href="#initializer">initializer</a>
382 | 	;
383 | 
384 | <a name="statement">statement</a>
385 | 	: <a href="#labeled-statement">labeled_statement</a>
386 | 	| <a href="#compound-statement">compound_statement</a>
387 | 	| <a href="#expression-statement">expression_statement</a>
388 | 	| <a href="#selection-statement">selection_statement</a>
389 | 	| <a href="#iteration-statement">iteration_statement</a>
390 | 	| <a href="#jump-statement">jump_statement</a>
391 | 	;
392 | 
393 | <a name="labeled-statement">labeled_statement</a>
394 | 	: <a href="ANSI-C-grammar-l.html#CASE">CASE</a> <a href="#constant-expression">constant_expression</a> ':' <a href="#statement">statement</a>
395 | 	| <a href="ANSI-C-grammar-l.html#DEFAULT">DEFAULT</a> ':' <a href="#statement">statement</a>
396 | 	;
397 | 
398 | <a name="compound-statement">compound_statement</a>
399 | 	: '{' '}'
400 | 	| '{' <a href="#statement-list">statement_list</a> '}'
401 | 	| '{' <a href="#declaration-list">declaration_list</a> '}'
402 | 	| '{' <a href="#declaration-list">declaration_list</a> <a href="#statement-list">statement_list</a> '}'
403 | 	;
404 | 
405 | <a name="declaration-list">declaration_list</a>
406 | 	: <a href="#declaration">declaration</a>
407 | 	| declaration_list <a href="#declaration">declaration</a>
408 | 	;
409 | 
410 | <a name="statement-list">statement_list</a>
411 | 	: <a href="#statement">statement</a>
412 | 	| statement_list <a href="#statement">statement</a>
413 | 	;
414 | 
415 | <a name="expression-statement">expression_statement</a>
416 | 	: ';'
417 | 	| <a href="#expression">expression</a> ';'
418 | 	;
419 | 
420 | <a name="selection-statement">selection_statement</a>
421 | 	: <a href="ANSI-C-grammar-l.html#IF">IF</a> '(' <a href="#expression">expression</a> ')' <a href="#statement">statement</a>
422 | 	| <a href="ANSI-C-grammar-l.html#IF">IF</a> '(' <a href="#expression">expression</a> ')' <a href="#statement">statement</a> <a href="ANSI-C-grammar-l.html#ELSE">ELSE</a> <a href="#statement">statement</a>
423 | 	| <a href="ANSI-C-grammar-l.html#SWITCH">SWITCH</a> '(' <a href="#expression">expression</a> ')' <a href="#statement">statement</a>
424 | 	;
425 | 
426 | <a name="iteration-statement">iteration_statement</a>
427 | 	: <a href="ANSI-C-grammar-l.html#WHILE">WHILE</a> '(' <a href="#expression">expression</a> ')' <a href="#statement">statement</a>
428 | 	| <a href="ANSI-C-grammar-l.html#DO">DO</a> <a href="#statement">statement</a> <a href="ANSI-C-grammar-l.html#WHILE">WHILE</a> '(' <a href="#expression">expression</a> ')' ';'
429 | 	| <a href="ANSI-C-grammar-l.html#FOR">FOR</a> '(' <a href="#expression-statement">expression_statement</a> <a href="#expression-statement">expression_statement</a> ')' <a href="#statement">statement</a>
430 | 	| <a href="ANSI-C-grammar-l.html#FOR">FOR</a> '(' <a href="#expression-statement">expression_statement</a> <a href="#expression-statement">expression_statement</a> <a href="#expression">expression</a> ')' <a href="#statement">statement</a>
431 | 	;
432 | 
433 | <a name="jump-statement">jump_statement</a>
434 | 	:  <a href="ANSI-C-grammar-l.html#CONTINUE">CONTINUE</a> ';'
435 | 	| <a href="ANSI-C-grammar-l.html#BREAK">BREAK</a> ';'
436 | 	| <a href="ANSI-C-grammar-l.html#RETURN">RETURN</a> ';'
437 | 	| <a href="ANSI-C-grammar-l.html#RETURN">RETURN</a> <a href="#expression">expression</a> ';'
438 | 	;
439 | 
440 | <a name="translation-unit">translation_unit</a>
441 | 	: <a href="#external-declaration">external_declaration</a>
442 | 	| translation_unit <a href="#external-declaration">external_declaration</a>
443 | 	;
444 | 
445 | <a name="external-declaration">external_declaration</a>
446 | 	: <a href="#function-definition">function_definition</a>
447 | 	| <a href="#declaration">declaration</a>
448 | 	;
449 | 
450 | <a name="function-definition">function_definition</a>
451 | 	:    <a href="#declaration-specifiers">declaration_specifiers</a> <a href="#declarator">declarator</a> <a href="#compound-statement">compound_statement</a>   
452 | 	;
453 | 
454 | %%
455 | #include &lt;stdio.h&gt;
456 | 
457 | extern char yytext[];
458 | extern int column;
459 | 
460 | yyerror(s)
461 | char *s;
462 | {
463 | 	fflush(stdout);
464 | 	printf("\n%*s\n%*s\n", column, "^", column, s);
465 | }
466 | </pre>
467 | 
468 | 
469 | <embed id="xunlei_com_thunder_helper_plugin_d462f475-c18e-46be-bd10-327458d045bd" type="application/thunder_download_plugin" height="0" width="0">
470 | 
471 | </body></html>
472 | 


--------------------------------------------------------------------------------
/Class notes in Chinese.txt:
--------------------------------------------------------------------------------
 1 | 1. 优化代码size和time
 2 | 2. 避免编译器过于复杂
 3 | 3. 去除冗余操作（if（0）去掉）
 4 | 4. 常量传播、预处理
 5 | 5. 函数替换、替换尾递归
 6 | 6. 分析比例
 7 | 7. 流图 点是block 边是跳转
 8 | 8. （X+1）(X+1) -> (x+1)^2
 9 | 9. error type :
10 |  mismatch undefined const redeclaration
11 | 
12 | 报告：
13 |    技术
14 |    测试
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/Presentation scripts:
--------------------------------------------------------------------------------
 1 | Error recovery
 2 | 
 3 | Adding error rules to our BNF
 4 | Adding EOF token to handle the last missing right curly bracket
 5 | 
 6 | So that we can discover most common mistakes and do error recovery (still build the right parsing tree)
 7 | 
 8 | can handle:
 9 | 1. missing semicolon
10 | 2. missing right curly bracket
11 | 3. some error identifier (不符合C标识符命名规则的)
12 | 4. error token after operator
13 | 以上4条均能进行error recovery，返回正确的语法树
14 | 5. 不符合 ANSI C 的各种语法 （error_pos.c)
15 | 
16 | test files:
17 | test1.c  all test
18 | test2.c  missing semicolon
19 | missSEMI.c all errors token after operator + missing semicolon + errorID + missing right curly
20 | errorID.c errorID
21 | missRightCurly.c missing right curly
22 | 
23 | 
24 | Syntax error finder:
25 | 1. 函数声明与函数定义的参数列表不一致
26 | 2. 变量重复定义
27 | 3. 赋值时 类型不匹配
28 | 4. 表达式中，操作数的类型与规定的类型不一致
29 | 5. typo，打字错误。会从符号表中找出最接近的标识符，给出提示
30 | 6. 函数调用时参数表不符合函数定义
31 | 7. 函数实际返回值类型 不符合 函数定义中的函数返回值类型
32 | 
33 | //1. 函数定义声明不一致
34 | int f(int i,...);
35 | int f(int j){
36 |     return 0;
37 | }
38 | 
39 | int g(int i){
40 |     return 0;
41 | }
42 | typedef struct{
43 |     int n;
44 | } A;
45 | int main(int argc, char const *argv[])
46 | {
47 | 
48 |     //2. 重复定义
49 |     int k;
50 |     int k;
51 |     //缺少分号
52 |     int i
53 |     int count;
54 |     //3. 类型不匹配
55 |     A a;
56 |     a = 5;
57 |     //4. 未定义变量
58 |     var = 3;
59 |     //5. 操作数类型错误
60 |     1.0 >> 4;
61 |     //打字错误
62 |     cont = 4;
63 |     g(1.0);
64 |     //6. 参数表不匹配
65 |     g(a);
66 |     //7. 返回值不匹配
67 |     return a;
68 | }
69 | 
70 | 1. 基本功能：
71 |     计算add sub mul div 等
72 |     逻辑and or not
73 |     跳转jmp je jg jl
74 |     移位sal sar
75 |     函数call ret
76 |     堆栈push pop
77 |     全局数据 常量浮点数、字符串、global、static变量
78 |     浮点数运算fld fstp fadd fsub fmul fdiv
79 | 
80 | 2. 优化
81 |     前端：
82 |         constant folding
83 |         死代码消除
84 |     后端优化：
85 |         寄存器优化：
86 |             将ebx,ecx,edx作为临时变量的暂存区域
87 |             将esi edi作为eax的交换区
88 |         指令优化：
89 |             *2 / 4 / 8。。。 ->sal
90 |             lea 2*eax+offset -> reg
91 | 
92 | 3. 支持特性：
93 |     看样例
94 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ZCC
  2 | ZJU standard C Compiler
  3 | 
  4 | <!--
  5 | ##免责声明
  6 | 我看大家都没动手，为了美好的明天，必须出来打个底
  7 | 如果大家觉得定的有什么不对的地方，欢迎修改！！！
  8 | 
  9 | ## 项目分布
 10 | 请将：  
 11 |     lex和yac放在yyparse文件夹  
 12 |     symbol table和类型检查放在symbol  
 13 |     机器码生成放在generation  
 14 |     全局变量放在public  
 15 |     优化代码放在各自部分的文件夹下  
 16 | -->
 17 | 
 18 | ## Code Organization
 19 | * Lex and Yas related codes are in the folder *yyparse*.
 20 | * Symbol table and Type check related codes are in the folder *symbol*.
 21 | * Generating machine code related codes are in the folder *generation*.
 22 | * Treating Special variables related codes are in the folder *public*.
 23 | * Optimization codes are stored separately in each folder.
 24 | 
 25 | <!--
 26 | ##命名规则
 27 | 为了不出现引用错误，简单的定一下规则，（再次声明觉得不科学的一定要提出来） 
 28 | ###类名
 29 | 每个单词的首字母大写
 30 | ###变量和函数名
 31 | 从第二个单词开始首字母大写，如treeNode
 32 | ###常量
 33 | 全部大写
 34 | 
 35 | ##合作
 36 | 1.请务必在需要交互的所有类和函数上面写上注释，必要的话可以写上重要函数的注释  
 37 | 2.请把你觉得可重用的函数写到public文件夹  
 38 | 3.因为耦合不是很多，大家基本不会再一个文件里编写，所以不开分支也行，但是push之前一定保证能跑起来  
 39 | 
 40 | ##遥远的祝福
 41 | 祝大家大程满分，如果可以的话，希望能在下个星期日之前完成v1.0
 42 | 
 43 | ##关于wiki
 44 | 请大家踊跃地写wiki，这是为了整组的效率，比如做过js解释器的李某其麦学长，如果有什么让大家能快速上手py parse的资料，请把链接写到wiki
 45 | 
 46 | ##关于tree结构和BNF
 47 |   实话说，我看了一下总结出：没开始写就想定树结构，简直就是在搞笑，所以我仿照tiny语言擅自定了一个，在编写过程中人人都可以修改，但一定要写注释
 48 | 那个BNF也是同样的，要出来商量BNF，效率太低，加上我们吹牛的时间，必定雪崩，于是我找了一个，大家每个人都要看一遍，具体为什么其麦学长那天也说了，然可能的话看的过程写下注释，这样后面的人更好理解，也可以揪出你理解的错误，然后发现问题或者要修改请立即提出
 49 | -->
 50 | 
 51 | <!--
 52 | FU BABA is supposed to complete this part!
 53 | ## 目前语法分析的能力
 54 | 对于正确的程序，语法分析，符号表建立，都完成了。
 55 | 文档写的不多，我会给你熊学长和付学长系统的讲一遍，之后你们有疑问直接问我这个活文档
 56 | 到时候，我看哪里需要写一下，我再写。
 57 | 现在不支持
 58 | 1. enum
 59 | 2. 定义初始化,如 int i = 3;
 60 | 3. 常量表达式,如 int i[2*3];
 61 | 4. 语法检查也比较弱，没有检查运算的操作数的类型合法性。
 62 | 5. 一些其它的特性也没有。只要你用了不支持的特性，都会报错提醒你。
 63 | 6. size的维护问题。
 64 | 目前已经够你们后端写起来了。
 65 | 你们一定要保证测试样例是正确的，因为我只进行了部分的语法检查。
 66 | 你们的样例都要先用gcc测一遍正确性，再来跑我们的zcc。
 67 | 其它的特性，等我把别的作业写一写，再来加。
 68 | -->
 69 | 
 70 | 
 71 | <!--
 72 | ##Problems
 73 | * 错误处理
 74 | 
 75 | * typedef 的语法树可能有点问题
 76 | 
 77 | * 李学长：根据标准typedef就是这个样子，不过需要我这里在进行一些检查，你们对这个typedef的语法树有疑问来问我。
 78 | 
 79 | typedef struct{
 80 |     int a;
 81 |     double c;
 82 | }mytype;
 83 | -->
 84 | ## Parsing Tree Sample：
 85 | 
 86 | ```
 87 | declaration
 88 |     declaration_specifiers
 89 |         storage_class_specifier
 90 |             typedef
 91 |         declaration_specifiers
 92 |             type_specifier
 93 |                 struct_or_union_specifier
 94 |                     struct_or_union
 95 |                         struct
 96 |                     {
 97 |                     struct_declaration_list
 98 |                         struct_declaration_list
 99 |                             struct_declaration
100 |                                 specifier_qualifier_list
101 |                                     type_specifier
102 |                                         int
103 |                                 struct_declarator_list
104 |                                     struct_declarator
105 |                                         declarator
106 |                                             direct_declarator
107 |                                                 a
108 |                                 ;
109 |                         struct_declaration
110 |                             specifier_qualifier_list
111 |                                 type_specifier
112 |                                     double
113 |                             struct_declarator_list
114 |                                 struct_declarator
115 |                                     declarator
116 |                                         direct_declarator
117 |                                             c
118 |                             ;
119 |                     }
120 |     init_declarator_list
121 |         init_declarator
122 |             declarator
123 |                 direct_declarator
124 |                     mytype
125 |     ;
126 | ```    
127 | ## Code Generation
128 | ### Miscellaneous
129 | <!--用来解析树和调用self.tools中的函数进行翻译  -->
130 | Call functions in *self.tools* to translate.
131 | 
132 | Detailed comments and examples can be found in *generation.generate*.
133 | 
134 | ### Basic X86 supports
135 | *    Calculation: add, sub, mul, div.
136 | *    Logic: and, or, not.
137 | *    Jump: jmp, je, jg, jl.
138 | *    Shift: sal, sar.
139 | *    Function: call, ret.
140 | *    Stack: push, pop.
141 | *    Float number operation: fld, fstp, fadd, fsub, fmul, fdiv.
142 | *    Global/Static variables, Constant float number, String
143 | 
144 | <!--
145 | ###utility
146 | 翻译的工具函数  
147 | 已完成各种初始化、变量绑定、寄存器简单优化  
148 | 目前支持赋值、简单计算、跳转、call等基本功能
149 | -->
150 | 
151 | ## Code Optimization
152 | ### The optimization types supported
153 | http://www.compileroptimizations.com/index.html
154 | ### Constant propagation
155 | http://people.eecs.berkeley.edu/~bodik/cs264/lectures/4-chaotic-notes.pdf
156 | 


--------------------------------------------------------------------------------
/ZCC.bnf:
--------------------------------------------------------------------------------
  1 | %token int_const char_const float_const id string enumeration_const
  2 | %%
  3 | 
  4 | translation_unit	: external_decl
  5 | 			| translation_unit external_decl
  6 | 			;
  7 | external_decl		: function_definition
  8 | 			| decl
  9 | 			;
 10 | function_definition	: decl_specs declarator decl_list compound_stat
 11 | 			|		declarator decl_list compound_stat
 12 | 			| decl_specs declarator		compound_stat
 13 | 			|		declarator 	compound_stat
 14 | 			;
 15 | decl			: decl_specs init_declarator_list ';'
 16 | 			| decl_specs			';'
 17 | 			;
 18 | decl_list		: decl
 19 | 			| decl_list decl
 20 | 			;
 21 | decl_specs		: storage_class_spec decl_specs
 22 | 			| storage_class_spec
 23 | 			| type_spec decl_specs
 24 | 			| type_spec
 25 | 			| type_qualifier decl_specs
 26 | 			| type_qualifier
 27 | 			;
 28 | storage_class_spec	: 'auto' | 'register' | 'static' | 'extern' | 'typedef'
 29 | 			;
 30 | type_spec		: 'void' | 'char' | 'short' | 'int' | 'long' | 'float'
 31 | 			| 'double' | 'signed' | 'unsigned'
 32 | 			| struct_or_union_spec
 33 | 			| enum_spec
 34 | 			| typedef_name
 35 | 			;
 36 | type_qualifier		: 'const' | 'volatile'
 37 | 			;
 38 | struct_or_union_spec	: struct_or_union id '{' struct_decl_list '}'
 39 | 			| struct_or_union	'{' struct_decl_list '}'
 40 | 			| struct_or_union id
 41 | 			;
 42 | struct_or_union		: 'struct' | 'union'
 43 | 			;
 44 | struct_decl_list	: struct_decl
 45 | 			| struct_decl_list struct_decl
 46 | 			;
 47 | init_declarator_list	: init_declarator
 48 | 			| init_declarator_list ',' init_declarator
 49 | 			;
 50 | init_declarator		: declarator
 51 | 			| declarator '=' initializer
 52 | 			;
 53 | struct_decl		: spec_qualifier_list struct_declarator_list ';'
 54 | 			;
 55 | spec_qualifier_list	: type_spec spec_qualifier_list
 56 | 			| type_spec
 57 | 			| type_qualifier spec_qualifier_list
 58 | 			| type_qualifier
 59 | 			;
 60 | struct_declarator_list	: struct_declarator
 61 | 			| struct_declarator_list ',' struct_declarator
 62 | 			;
 63 | struct_declarator	: declarator
 64 | 			| declarator ':' const_exp
 65 | 			|		':' const_exp
 66 | 			;
 67 | enum_spec		: 'enum' id '{' enumerator_list '}'
 68 | 			| 'enum'	'{' enumerator_list '}'
 69 | 			| 'enum' id
 70 | 			;
 71 | enumerator_list		: enumerator
 72 | 			| enumerator_list ',' enumerator
 73 | 			;
 74 | enumerator		: id
 75 | 			| id '=' const_exp
 76 | 			;
 77 | declarator		: pointer direct_declarator
 78 | 			|	direct_declarator
 79 | 			;
 80 | direct_declarator	: id
 81 | 			| '(' declarator ')'
 82 | 			| direct_declarator '[' const_exp ']'
 83 | 			| direct_declarator '['		']'
 84 | 			| direct_declarator '(' param_type_list ')'
 85 | 			| direct_declarator '(' id_list ')'
 86 | 			| direct_declarator '('		')'
 87 | 			;
 88 | pointer			: '*' type_qualifier_list
 89 | 			| '*'
 90 | 			| '*' type_qualifier_list pointer
 91 | 			| '*'			pointer
 92 | 			;
 93 | type_qualifier_list	: type_qualifier
 94 | 			| type_qualifier_list type_qualifier
 95 | 			;
 96 | param_type_list		: param_list
 97 | 			| param_list ',' '...'
 98 | 			;
 99 | param_list		: param_decl
100 | 			| param_list ',' param_decl
101 | 			;
102 | param_decl		: decl_specs declarator
103 | 			| decl_specs abstract_declarator
104 | 			| decl_specs
105 | 			;
106 | id_list			: id
107 | 			| id_list ',' id
108 | 			;
109 | initializer		: assignment_exp
110 | 			| '{' initializer_list '}'
111 | 			| '{' initializer_list ',' '}'
112 | 			;
113 | initializer_list	: initializer
114 | 			| initializer_list ',' initializer
115 | 			;
116 | type_name		: spec_qualifier_list abstract_declarator
117 | 			| spec_qualifier_list
118 | 			;
119 | abstract_declarator	: pointer
120 | 			| pointer direct_abstract_declarator
121 | 			|	direct_abstract_declarator
122 | 			;
123 | direct_abstract_declarator: '(' abstract_declarator ')'
124 | 			| direct_abstract_declarator '[' const_exp ']'
125 | 			|				'[' const_exp ']'
126 | 			| direct_abstract_declarator '['	']'
127 | 			|				'['	']'
128 | 			| direct_abstract_declarator '(' param_type_list ')'
129 | 			|				'(' param_type_list ')'
130 | 			| direct_abstract_declarator '('		')'
131 | 			|				'('		')'
132 | 			;
133 | typedef_name		: id
134 | 			;
135 | stat			: labeled_stat
136 | 			| exp_stat
137 | 			| compound_stat
138 | 			| selection_stat
139 | 			| iteration_stat
140 | 			| jump_stat
141 | 			;
142 | labeled_stat		: id ':' stat
143 | 			| 'case' const_exp ':' stat
144 | 			| 'default' ':' stat
145 | 			;
146 | exp_stat		: exp ';'
147 | 			|	';'
148 | 			;
149 | compound_stat		: '{' decl_list stat_list '}'
150 | 			| '{'		stat_list '}'
151 | 			| '{' decl_list		'}'
152 | 			| '{'			'}'
153 | 			;
154 | stat_list		: stat
155 | 			| stat_list stat
156 | 			;
157 | selection_stat		: 'if' '(' exp ')' stat
158 | 			| 'if' '(' exp ')' stat 'else' stat
159 | 			| 'switch' '(' exp ')' stat
160 | 			;
161 | iteration_stat		: 'while' '(' exp ')' stat
162 | 			| 'do' stat 'while' '(' exp ')' ';'
163 | 			| 'for' '(' exp ';' exp ';' exp ')' stat
164 | 			| 'for' '(' exp ';' exp ';'	')' stat
165 | 			| 'for' '(' exp ';'	';' exp ')' stat
166 | 			| 'for' '(' exp ';'	';'	')' stat
167 | 			| 'for' '('	';' exp ';' exp ')' stat
168 | 			| 'for' '('	';' exp ';'	')' stat
169 | 			| 'for' '('	';'	';' exp ')' stat
170 | 			| 'for' '('	';'	';'	')' stat
171 | 			;
172 | jump_stat		: 'goto' id ';'
173 | 			| 'continue' ';'
174 | 			| 'break' ';'
175 | 			| 'return' exp ';'
176 | 			| 'return'	';'
177 | 			;
178 | exp			: assignment_exp
179 | 			| exp ',' assignment_exp
180 | 			;
181 | assignment_exp		: conditional_exp
182 | 			| unary_exp assignment_operator assignment_exp
183 | 			;
184 | assignment_operator	: '=' | '*=' | '/=' | '%=' | '+=' | '-=' | '<<='
185 | 			| '>>=' | '&=' | '^=' | '|='
186 | 			;
187 | conditional_exp		: logical_or_exp
188 | 			| logical_or_exp '?' exp ':' conditional_exp
189 | 			;
190 | const_exp		: conditional_exp
191 | 			;
192 | logical_or_exp		: logical_and_exp
193 | 			| logical_or_exp '||' logical_and_exp
194 | 			;
195 | logical_and_exp		: inclusive_or_exp
196 | 			| logical_and_exp '&&' inclusive_or_exp
197 | 			;
198 | inclusive_or_exp	: exclusive_or_exp
199 | 			| inclusive_or_exp '|' exclusive_or_exp
200 | 			;
201 | exclusive_or_exp	: and_exp
202 | 			| exclusive_or_exp '^' and_exp
203 | 			;
204 | and_exp			: equality_exp
205 | 			| and_exp '&' equality_exp
206 | 			;
207 | equality_exp		: relational_exp
208 | 			| equality_exp '==' relational_exp
209 | 			| equality_exp '!=' relational_exp
210 | 			;
211 | relational_exp		: shift_expression
212 | 			| relational_exp '<' shift_expression
213 | 			| relational_exp '>' shift_expression
214 | 			| relational_exp '<=' shift_expression
215 | 			| relational_exp '>=' shift_expression
216 | 			;
217 | shift_expression	: additive_exp
218 | 			| shift_expression '<<' additive_exp
219 | 			| shift_expression '>>' additive_exp
220 | 			;
221 | additive_exp		: mult_exp
222 | 			| additive_exp '+' mult_exp
223 | 			| additive_exp '-' mult_exp
224 | 			;
225 | mult_exp		: cast_exp
226 | 			| mult_exp '*' cast_exp
227 | 			| mult_exp '/' cast_exp
228 | 			| mult_exp '%' cast_exp
229 | 			;
230 | cast_exp		: unary_exp
231 | 			| '(' type_name ')' cast_exp
232 | 			;
233 | unary_exp		: postfix_exp
234 | 			| '++' unary_exp
235 | 			| '--' unary_exp
236 | 			| unary_operator cast_exp
237 | 			| 'sizeof' unary_exp
238 | 			| 'sizeof' '(' type_name ')'
239 | 			;
240 | unary_operator		: '&' | '*' | '+' | '-' | '~' | '!'
241 | 			;
242 | postfix_exp		: primary_exp
243 | 			| postfix_exp '[' exp ']'
244 | 			| postfix_exp '(' argument_exp_list ')'
245 | 			| postfix_exp '('			')'
246 | 			| postfix_exp '.' id
247 | 			| postfix_exp '->' id
248 | 			| postfix_exp '++'
249 | 			| postfix_exp '--'
250 | 			;
251 | primary_exp		: id
252 | 			| const
253 | 			| string
254 | 			| '(' exp ')'
255 | 			;
256 | argument_exp_list	: assignment_exp
257 | 			| argument_exp_list ',' assignment_exp
258 | 			;
259 | const			: int_const
260 | 			| char_const
261 | 			| float_const
262 | 			| enumeration_const
263 | 			;


--------------------------------------------------------------------------------
/bnf.tmp:
--------------------------------------------------------------------------------
  1 | primary_expression
  2 | 	: IDENTIFIER
  3 | 	| CONSTANT
  4 | 	| STRING_LITERAL
  5 | 	| '(' expression ')'
  6 | 	;
  7 | 
  8 | postfix_expression
  9 | 	: primary_expression
 10 | 	| postfix_expression '[' expression ']'
 11 | 	| postfix_expression '(' ')'
 12 | 	| postfix_expression '(' argument_expression_list ')'
 13 | 	| postfix_expression '.' IDENTIFIER
 14 | 	| postfix_expression PTR_OP IDENTIFIER
 15 | 	| postfix_expression INC_OP
 16 | 	| postfix_expression DEC_OP
 17 | 	;
 18 | 
 19 | argument_expression_list
 20 | 	: assignment_expression
 21 | 	| argument_expression_list ',' assignment_expression
 22 | 	;
 23 | 
 24 | unary_expression
 25 | 	: postfix_expression
 26 | 	| INC_OP unary_expression
 27 | 	| DEC_OP unary_expression
 28 | 	| unary_operator cast_expression
 29 | 	| SIZEOF unary_expression
 30 | 	| SIZEOF '(' type_name ')'
 31 | 	;
 32 | 
 33 | unary_operator
 34 | 	: '&'
 35 | 	| '*'
 36 | 	| '+'
 37 | 	| '-'
 38 | 	| '~'
 39 | 	| '!'
 40 | 	;
 41 | 
 42 | cast_expression
 43 | 	: unary_expression
 44 | 	| '(' type_name ')' cast_expression
 45 | 	;
 46 | 
 47 | multiplicative_expression
 48 | 	: cast_expression
 49 | 	| multiplicative_expression '*' cast_expression
 50 | 	| multiplicative_expression '/' cast_expression
 51 | 	| multiplicative_expression '%' cast_expression
 52 | 	;
 53 | 
 54 | additive_expression
 55 | 	: multiplicative_expression
 56 | 	| additive_expression '+' multiplicative_expression
 57 | 	| additive_expression '-' multiplicative_expression
 58 | 	;
 59 | 
 60 | shift_expression
 61 | 	: additive_expression
 62 | 	| shift_expression LEFT_OP additive_expression
 63 | 	| shift_expression RIGHT_OP additive_expression
 64 | 	;
 65 | 
 66 | relational_expression
 67 | 	: shift_expression
 68 | 	| relational_expression '<' shift_expression
 69 | 	| relational_expression '>' shift_expression
 70 | 	| relational_expression LE_OP shift_expression
 71 | 	| relational_expression GE_OP shift_expression
 72 | 	;
 73 | 
 74 | equality_expression
 75 | 	: relational_expression
 76 | 	| equality_expression EQ_OP relational_expression
 77 | 	| equality_expression NE_OP relational_expression
 78 | 	;
 79 | 
 80 | and_expression
 81 | 	: equality_expression
 82 | 	| and_expression '&' equality_expression
 83 | 	;
 84 | 
 85 | exclusive_or_expression
 86 | 	: and_expression
 87 | 	| exclusive_or_expression '^' and_expression
 88 | 	;
 89 | 
 90 | inclusive_or_expression
 91 | 	: exclusive_or_expression
 92 | 	| inclusive_or_expression '|' exclusive_or_expression
 93 | 	;
 94 | 
 95 | logical_and_expression
 96 | 	: inclusive_or_expression
 97 | 	| logical_and_expression AND_OP inclusive_or_expression
 98 | 	;
 99 | 
100 | logical_or_expression
101 | 	: logical_and_expression
102 | 	| logical_or_expression OR_OP logical_and_expression
103 | 	;
104 | 
105 | conditional_expression
106 | 	: logical_or_expression
107 | 	| logical_or_expression '?' expression ':' conditional_expression
108 | 	;
109 | 
110 | assignment_expression
111 | 	: conditional_expression
112 | 	| unary_expression assignment_operator assignment_expression
113 | 	;
114 | 
115 | assignment_operator
116 | 	: '='
117 | 	| MUL_ASSIGN
118 | 	| DIV_ASSIGN
119 | 	| MOD_ASSIGN
120 | 	| ADD_ASSIGN
121 | 	| SUB_ASSIGN
122 | 	| LEFT_ASSIGN
123 | 	| RIGHT_ASSIGN
124 | 	| AND_ASSIGN
125 | 	| XOR_ASSIGN
126 | 	| OR_ASSIGN
127 | 	;
128 | 
129 | expression
130 | 	: assignment_expression
131 | 	| expression ',' assignment_expression
132 | 	;
133 | 
134 | constant_expression
135 | 	: conditional_expression
136 | 	;
137 | 
138 | declaration
139 | 	: declaration_specifiers ';'
140 | 	| declaration_specifiers init_declarator_list ';'
141 | 	;
142 | 
143 | declaration_specifiers
144 | 	: storage_class_specifier
145 | 	| storage_class_specifier declaration_specifiers
146 | 	| type_specifier
147 | 	| type_specifier declaration_specifiers
148 | 	| type_qualifier
149 | 	| type_qualifier declaration_specifiers
150 | 	;
151 | 
152 | init_declarator_list
153 | 	: init_declarator
154 | 	| init_declarator_list ',' init_declarator
155 | 	;
156 | 
157 | init_declarator
158 | 	: declarator
159 | 	| declarator '=' initializer
160 | 	;
161 | 
162 | storage_class_specifier
163 | 	: TYPEDEF
164 | 	| EXTERN
165 | 	| STATIC
166 | 	| AUTO
167 | 	| REGISTER
168 | 	;
169 | 
170 | type_specifier
171 | 	: VOID
172 | 	| CHAR
173 | 	| SHORT
174 | 	| INT
175 | 	| LONG
176 | 	| FLOAT
177 | 	| DOUBLE
178 | 	| SIGNED
179 | 	| UNSIGNED
180 | 	| struct_or_union_specifier
181 | 	| enum_specifier
182 | 	| TYPE_NAME
183 | 	;
184 | 
185 | struct_or_union_specifier
186 | 	: struct_or_union IDENTIFIER '{' struct_declaration_list '}'
187 | 	| struct_or_union '{' struct_declaration_list '}'
188 | 	| struct_or_union IDENTIFIER
189 | 	;
190 | 
191 | struct_or_union
192 | 	: STRUCT
193 | 	| UNION
194 | 	;
195 | 
196 | struct_declaration_list
197 | 	: struct_declaration
198 | 	| struct_declaration_list struct_declaration
199 | 	;
200 | 
201 | struct_declaration
202 | 	: specifier_qualifier_list struct_declarator_list ';'
203 | 	;
204 | 
205 | specifier_qualifier_list
206 | 	: type_specifier specifier_qualifier_list
207 | 	| type_specifier
208 | 	| type_qualifier specifier_qualifier_list
209 | 	| type_qualifier
210 | 	;
211 | 
212 | struct_declarator_list
213 | 	: struct_declarator
214 | 	| struct_declarator_list ',' struct_declarator
215 | 	;
216 | 
217 | struct_declarator
218 | 	: declarator
219 | 	| ':' constant_expression
220 | 	| declarator ':' constant_expression
221 | 	;
222 | 
223 | enum_specifier
224 | 	: ENUM '{' enumerator_list '}'
225 | 	| ENUM IDENTIFIER '{' enumerator_list '}'
226 | 	| ENUM IDENTIFIER
227 | 	;
228 | 
229 | enumerator_list
230 | 	: enumerator
231 | 	| enumerator_list ',' enumerator
232 | 	;
233 | 
234 | enumerator
235 | 	: IDENTIFIER
236 | 	| IDENTIFIER '=' constant_expression
237 | 	;
238 | 
239 | type_qualifier
240 | 	: CONST
241 | 	| VOLATILE
242 | 	;
243 | 
244 | declarator
245 | 	: pointer direct_declarator
246 | 	| direct_declarator
247 | 	;
248 | 
249 | direct_declarator
250 | 	: IDENTIFIER
251 | 	| '(' declarator ')'
252 | 	| direct_declarator '[' constant_expression ']'
253 | 	| direct_declarator '[' ']'
254 | 	| direct_declarator '(' parameter_type_list ')'
255 | 	| direct_declarator '(' identifier_list ')'
256 | 	| direct_declarator '(' ')'
257 | 	;
258 | 
259 | pointer
260 | 	: '*'
261 | 	| '*' type_qualifier_list
262 | 	| '*' pointer
263 | 	| '*' type_qualifier_list pointer
264 | 	;
265 | 
266 | type_qualifier_list
267 | 	: type_qualifier
268 | 	| type_qualifier_list type_qualifier
269 | 	;
270 | 
271 | 
272 | parameter_type_list
273 | 	: parameter_list
274 | 	| parameter_list ',' ELLIPSIS
275 | 	;
276 | 
277 | parameter_list
278 | 	: parameter_declaration
279 | 	| parameter_list ',' parameter_declaration
280 | 	;
281 | 
282 | parameter_declaration
283 | 	: declaration_specifiers declarator
284 | 	| declaration_specifiers abstract_declarator
285 | 	| declaration_specifiers
286 | 	;
287 | 
288 | identifier_list
289 | 	: IDENTIFIER
290 | 	| identifier_list ',' IDENTIFIER
291 | 	;
292 | 
293 | type_name
294 | 	: specifier_qualifier_list
295 | 	| specifier_qualifier_list abstract_declarator
296 | 	;
297 | 
298 | abstract_declarator
299 | 	: pointer
300 | 	| direct_abstract_declarator
301 | 	| pointer direct_abstract_declarator
302 | 	;
303 | 
304 | direct_abstract_declarator
305 | 	: '(' abstract_declarator ')'
306 | 	| '[' ']'
307 | 	| '[' constant_expression ']'
308 | 	| direct_abstract_declarator '[' ']'
309 | 	| direct_abstract_declarator '[' constant_expression ']'
310 | 	| '(' ')'
311 | 	| '(' parameter_type_list ')'
312 | 	| direct_abstract_declarator '(' ')'
313 | 	| direct_abstract_declarator '(' parameter_type_list ')'
314 | 	;
315 | 
316 | initializer
317 | 	: assignment_expression
318 | 	| '{' initializer_list '}'
319 | 	| '{' initializer_list ',' '}'
320 | 	;
321 | 
322 | initializer_list
323 | 	: initializer
324 | 	| initializer_list ',' initializer
325 | 	;
326 | 
327 | statement
328 | 	: labeled_statement
329 | 	| compound_statement
330 | 	| expression_statement
331 | 	| selection_statement
332 | 	| iteration_statement
333 | 	| jump_statement
334 | 	;
335 | 
336 | labeled_statement
337 | 	: IDENTIFIER ':' statement
338 | 	| CASE constant_expression ':' statement
339 | 	| DEFAULT ':' statement
340 | 	;
341 | 
342 | compound_statement
343 | 	: '{' '}'
344 | 	| '{' statement_list '}'
345 | 	| '{' declaration_list '}'
346 | 	| '{' declaration_list statement_list '}'
347 | 	;
348 | 
349 | declaration_list
350 | 	: declaration
351 | 	| declaration_list declaration
352 | 	;
353 | 
354 | statement_list
355 | 	: statement
356 | 	| statement_list statement
357 | 	;
358 | 
359 | expression_statement
360 | 	: ';'
361 | 	| expression ';'
362 | 	;
363 | 
364 | selection_statement
365 | 	: IF '(' expression ')' statement
366 | 	| IF '(' expression ')' statement ELSE statement
367 | 	| SWITCH '(' expression ')' statement
368 | 	;
369 | 
370 | iteration_statement
371 | 	: WHILE '(' expression ')' statement
372 | 	| DO statement WHILE '(' expression ')' ';'
373 | 	| FOR '(' expression_statement expression_statement ')' statement
374 | 	| FOR '(' expression_statement expression_statement expression ')' statement
375 | 	;
376 | 
377 | jump_statement
378 | 	: GOTO IDENTIFIER ';'
379 | 	| CONTINUE ';'
380 | 	| BREAK ';'
381 | 	| RETURN ';'
382 | 	| RETURN expression ';'
383 | 	;
384 | 
385 | translation_unit
386 | 	: external_declaration
387 | 	| translation_unit external_declaration
388 | 	;
389 | 
390 | external_declaration
391 | 	: function_definition
392 | 	| declaration
393 | 	;
394 | 
395 | function_definition
396 | 	: declaration_specifiers declarator declaration_list compound_statement
397 | 	| declaration_specifiers declarator compound_statement
398 | 	| declarator declaration_list compound_statement
399 | 	| declarator compound_statement


--------------------------------------------------------------------------------
/generation/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | 


--------------------------------------------------------------------------------
/generation/data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #coding=utf-8
 3 | from public.ZCCglobal import *
 4 | 
 5 | class Data(object):
 6 |     def __init__(self,name,offset,type):
 7 |         """
 8 |         :type name:str
 9 |         :type offset:bool
10 |         :type type:CType
11 |         """
12 |         self.name=name
13 |         self.offset=offset
14 |         self.type=type
15 | 


--------------------------------------------------------------------------------
/generation/generation.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # produce machine code
  3 | import sys
  4 | sys.path.append('c:\\zcc\\zcc')
  5 | from public.ZCCglobal import *
  6 | from utility import utility
  7 | from copy import deepcopy
  8 | from data import Data
  9 | 
 10 | 
 11 | class generator:
 12 | 
 13 |     def __init__(self):
 14 |         # asm output list
 15 |         self.asm = []
 16 |         self.tools = utility(self)
 17 |         self.exp2=[2**x for x in range(32)]
 18 |         # print(self.exp2)
 19 |         self.expression_handler = {
 20 |             'primary_expression': self.gen_primary_expression,
 21 |             'postfix_expression': self.gen_postfix_expression,
 22 |             'unary_expression': self.gen_unary_expression,
 23 |             'cast_expression': self.gen_cast_expression,
 24 |             'multiplicative_expression': self.gen_multiplicative_expression,
 25 |             'additive_expression': self.gen_additive_expression,
 26 |             'shift_expression': self.gen_shift_expression,
 27 |             'relational_expression': self.gen_relational_expression,
 28 |             'equality_expression': self.gen_equality_expression,
 29 |             'and_expression': self.gen_and_expression,
 30 |             'exclusive_or_expression': self.gen_exclusive_or_expression,
 31 |             'inclusive_or_expression': self.gen_inclusive_or_expression,
 32 |             'logical_and_expression': self.gen_logical_and_expression,
 33 |             'logical_or_expression': self.gen_logical_or_expression,
 34 |             'conditional_expression': self.gen_conditional_expression,
 35 |             'assignment_expression': self.gen_assignment_expression,
 36 |             'expression': self.gen_expression}
 37 | 
 38 |     def generate(self):
 39 |         self.tools.globalInitialize()
 40 |         for funcName in global_context.local:
 41 |             value = global_context.local[funcName]
 42 |             if(value.type == 'function'):
 43 |                 if global_context.local[
 44 |                         funcName].compound_statement is not None:
 45 |                     self.tools.newFunc(funcName)
 46 |                     self.gen_compound_statement(
 47 |                         global_context.local[funcName].compound_statement,global_context.local[funcName].compound_statement.context)
 48 |                     self.tools.endFunc()
 49 |         self.tools.end()
 50 | 
 51 |     def output(self, fileName):
 52 |         with open(fileName, 'w') as out:
 53 |             for line in self.asm:
 54 |                 out.write(line)
 55 | 
 56 |     def gen_statement_list(self, node,context):
 57 |         """
 58 |         :type node:TreeNode
 59 |         :type context:Context
 60 |         """
 61 |         for subnode in node[1:]:
 62 |             if isinstance(subnode, TreeNode):
 63 |                 if subnode[0] == "statement":
 64 |                     self.gen_statement(subnode,context)
 65 | 
 66 | 
 67 |     def gen_statement(self, node,context):
 68 |         """
 69 |         :type node:TreeNode
 70 |         :type context:Context
 71 |         """
 72 |         for subnode in node[1:]:
 73 |             if isinstance(subnode, TreeNode):
 74 |                 if subnode[0] == "expression_statement":
 75 |                     self.gen_expression_statement(subnode,context)
 76 |                 elif subnode[0] == "compound_statement":
 77 |                     self.tools.newScope(subnode.context)
 78 |                     self.gen_compound_statement(subnode,subnode.context)
 79 |                     self.tools.endScope()
 80 |                 elif subnode[0] == "selection_statement":
 81 |                     self.gen_selection_statement(subnode,context)
 82 |                 elif subnode[0]=="jump_statement":
 83 |                     self.gen_jump_statement(subnode,context)
 84 |                 elif subnode[0]=="iteration_statement":
 85 |                     self.gen_iteration_statement(subnode,context)
 86 | 
 87 |     def gen_expression_statement(self, node,context):
 88 |         """
 89 |         :type node:TreeNode
 90 |         :type context:Context
 91 |         :rtype :str
 92 |         """
 93 |         if isinstance(node[1],TreeNode):
 94 |             ret=self.expression_handler[node[1][0]](node[1],context)
 95 |         else:
 96 |             ret=self.tools.getTrue()
 97 |         return ret
 98 | 
 99 |     def gen_compound_statement(self, node,context):
100 |         """
101 |         :type node:TreeNode
102 |         :type context:Context
103 |         """
104 |         for subnode in node[1:]:
105 |             if isinstance(subnode, TreeNode):
106 |                 if subnode[0] == "statement_list":
107 |                     self.gen_statement_list(subnode,context)
108 | 
109 |     def gen_selection_statement(self, node,context):
110 |         """
111 |         :type node:TreeNode
112 |         :type context:Context
113 |         """
114 |         # node[3]:expression
115 |         # node[5]:statement
116 |         # node[7]:statement
117 |         if node[1] == "if":
118 |             ret=self.expression_handler[node[3][0]](node[3],context)
119 |             if len(node) == 6:
120 |                 label1=self.tools.allocateLabel()
121 |                 self.tools.cmp(ret,self.tools.getFalse())
122 |                 self.tools.je(label1)
123 |                 self.gen_statement(node[5],context)
124 |                 self.tools.markLabel(label1)
125 |             elif len(node) == 8:
126 |                 label1=self.tools.allocateLabel()
127 |                 label2=self.tools.allocateLabel()
128 |                 self.tools.cmp(ret,self.tools.getFalse())
129 |                 self.tools.je(label1)
130 |                 self.gen_statement(node[5],context)
131 |                 self.tools.jmp(label2)
132 |                 self.tools.markLabel(label1)
133 |                 self.gen_statement(node[7],context)
134 |                 self.tools.markLabel(label2)
135 | 
136 |     def gen_jump_statement(self,node,context):
137 |         """
138 |         :type node:TreeNode
139 |         :type context:Context
140 |         """
141 |         if isinstance(node[2],TreeNode):
142 |             ret=self.expression_handler[node[2][0]](node[2],context)
143 |             self.tools.mov(self.tools.getEax(),ret)
144 |         self.tools.ret()
145 | 
146 |     def gen_iteration_statement(self,node,context):
147 |         """
148 |         :type node:TreeNode
149 |         :type context:Context
150 |         """
151 |         if node[1]=="for":
152 |             if isinstance(node[5],TreeNode):
153 |                 label1=self.tools.allocateLabel()
154 |                 label2=self.tools.allocateLabel()
155 |                 label3=self.tools.allocateLabel()
156 |                 self.gen_expression_statement(node[3],context)
157 |                 self.tools.jmp(label2)
158 |                 self.tools.markLabel(label1)
159 |                 self.expression_handler[node[5][0]](node[5],context)
160 |                 self.tools.markLabel(label2)
161 |                 ret=self.gen_expression_statement(node[4],context)
162 |                 self.tools.cmp(ret,self.tools.getFalse())
163 |                 self.tools.je(label3)
164 |                 self.gen_statement(node[7],context)
165 |                 self.tools.jmp(label1)
166 |                 self.tools.markLabel(label3)
167 |             else:
168 |                 label1=self.tools.allocateLabel()
169 |                 label2=self.tools.allocateLabel()
170 |                 self.gen_expression_statement(node[3],context)
171 |                 self.tools.markLabel(label1)
172 |                 ret=self.gen_expression_statement(node[4],context)
173 |                 self.tools.cmp(ret,self.tools.getFalse())
174 |                 self.gen_statement(node[6],context)
175 |                 self.tools.jmp(label1)
176 |                 self.tools.markLabel(label2)
177 |         elif node[1]=="while":
178 |             label1=self.tools.allocateLabel()
179 |             label2=self.tools.allocateLabel()
180 |             self.tools.markLabel(label1)
181 |             ret=self.expression_handler[node[3][0]](node[3],context)
182 |             self.tools.cmp(ret,self.tools.getFalse())
183 |             self.tools.je(label2)
184 |             self.gen_statement(node[5],context)
185 |             self.tools.jmp(label1)
186 |             self.tools.markLabel(label2)
187 | 
188 | 
189 | 
190 |     def gen_additive_expression(self, node,context):
191 |         """
192 |         :type node:TreeNode
193 |         :type context:Context
194 |         :rtype: str
195 |         """
196 |         op1=self.expression_handler[node[1][0]](node[1],context)
197 |         tmp=self.tools.allocateNewReg(op1)
198 |         self.tools.lock(tmp)
199 |         self.tools.mov(tmp,op1)
200 |         op2=self.expression_handler[node[3][0]](node[3],context)
201 |         if node[2]=="+":
202 |             ret=self.tools.add(tmp,op2)
203 |         else:
204 |             ret=self.tools.sub(tmp,op2)
205 |         self.tools.unLock(tmp)
206 |         return ret
207 | 
208 |     def gen_primary_expression(self,node,context):
209 |         """
210 |         :type node:TreeNode
211 |         :type context:Context
212 |         :rtype: Data
213 |         """
214 |         if isinstance(node[1],TreeNode):
215 |             if node[1][0]=="IDENTIFIER":
216 |                 name=node[1][1]
217 |                 offset=False
218 |                 type=deepcopy(context.get_type_by_id(name))
219 |                 return Data(name,offset,type)
220 |             else:
221 |                 if node[1][0]=="INTEGER":
222 |                     return int(node[1][1])
223 |                 elif node[1][0]=="DOUBLE":
224 |                     return float(node[1][1])
225 |                 elif node[1][0]=="STRING":
226 |                     return str(node[1][1]) 
227 | 
228 | 
229 |     def gen_postfix_expression(self,node,context):
230 |         """
231 |         :type node:TreeNode
232 |         :type context:Context
233 |         :rtype: str
234 |         """
235 |         operand=self.expression_handler[node[1][0]](node[1],context)
236 |         if node[2]=="[":
237 |             if operand.offset==False:
238 |                 self.tools.mov(self.tools.getEax(),0)
239 |             index=self.expression_handler[node[3][0]](node[3],context)
240 |             self.tools.mul(index,operand.type.member_type.Size())
241 |             operand.offset=True
242 |             operand.type=operand.type.member_type
243 |             return operand
244 |         elif node[2]=="(":
245 |             if isinstance(node[3],TreeNode):
246 |                 argument_expression_list=node[3]
247 |                 real_arg_list=[]
248 |                 for argument_expression in argument_expression_list[1:]:
249 |                     if isinstance(argument_expression,TreeNode):
250 |                         argument=self.expression_handler[argument_expression[0]](argument_expression,context)
251 |                         if argument==self.tools.getEax():
252 |                             tmp=self.tools.allocateNewReg(self.tools.getEax())
253 |                             self.tools.lock(tmp)
254 |                             self.tools.mov(tmp,self.tools.getEax())
255 |                             real_arg_list.append([tmp,0])
256 |                         elif isinstance(argument,Data) and argument.offset:
257 |                             tmp=self.tools.allocateNewReg(self.tools.getEax())
258 |                             self.tools.lock(tmp)
259 |                             self.tools.mov(tmp,self.tools.getEax())
260 |                             real_arg_list.append([argument,1,tmp])
261 |                         else:
262 |                             real_arg_list.append([argument,2])
263 |                 for list in real_arg_list:
264 |                     if list[1]==1:
265 |                         self.tools.mov(self.tools.getEax(),list[2])
266 |                     self.tools.passPara(list[0])
267 |                     if list[1]==0:
268 |                         self.tools.unLock(list[0])
269 |                     if list[1]==1:
270 |                         self.tools.unLock(list[2])
271 |             ret=self.tools.call(operand)
272 |             return ret
273 |         elif node[2]==".":
274 |             if operand.offset==False:
275 |                 self.tools.mov(self.tools.getEax(),0)
276 |             member=node[3][1]
277 |             self.tools.add(self.tools.getEax(),operand.type.offset[member])
278 |             operand.type=operand.type.members[member]
279 |             operand.offset=True
280 |             return operand
281 |         elif node[2]=="->":
282 |             self.tools.mov(self.tools.getEax(),operand)
283 |             member=node[3][1]
284 |             self.tools.add(self.tools.getEax(),operand.type.offset[member])
285 |             operand.name=self.tools.getNull()
286 |             operand.type=operand.type.members[member]
287 |             operand.offset=True
288 |             return operand
289 | 
290 | 
291 |     def gen_unary_expression(self,node,context):
292 |         """
293 |         :type node:TreeNode
294 |         :type context:Context
295 |         :rtype: str
296 |         """
297 |         operand=self.expression_handler[node[2][0]](node[2],context)
298 |         if isinstance(node[1],TreeNode):
299 |             operator=self.gen_unary_operator(node[1],context)
300 |             if operator=="&":
301 |                 if isinstance(operand,Data):
302 |                     ret=self.tools.lea(operand)
303 |                     operand.type.is_const.append(False)
304 |                     return ret
305 |             elif operator=="*":
306 |                 if isinstance(operand,Data):
307 |                     self.tools.mov(self.tools.getEax(),operand)
308 |                     operand.name=self.tools.getNull()
309 |                     operand.offset=True
310 |                     operand.type.is_const.pop()
311 |                     return operand
312 |         else:
313 |             if node[1]=="++":
314 |                 ret=self.tools.add(operand,1)
315 |                 self.tools.mov(operand,ret)
316 |                 return operand
317 |             elif node[1]=="--":
318 |                 self.tools.sub(operand,1)
319 |                 return operand
320 | 
321 | 
322 |     def gen_cast_expression(self,node,context):
323 |         """
324 |         :type node:TreeNode
325 |         :type context:Context
326 |         :rtype: str
327 |         """
328 |         pass
329 | 
330 |     def gen_multiplicative_expression(self,node,context):
331 |         """
332 |         :type node:TreeNode
333 |         :type context:Context
334 |         :rtype: str
335 |         """
336 |         op1=self.expression_handler[node[1][0]](node[1],context)
337 |         tmp=self.tools.allocateNewReg(op1)
338 |         self.tools.lock(tmp)
339 |         self.tools.mov(tmp,op1)
340 |         op2=self.expression_handler[node[3][0]](node[3],context)
341 |         if node[2]=="*":
342 |             if isinstance(op2,str):
343 |                 try:
344 |                     num=int(op2)
345 |                     if num in self.exp2:
346 |                         ret=self.tools.sal(tmp,str(self.exp2.index(num)))
347 |                     else:
348 |                         ret=self.tools.mul(tmp,op2)
349 |                 except Exception:
350 |                     ret=self.tools.mul(tmp,op2)
351 |             else:
352 |                 ret=self.tools.mul(tmp,op2)
353 |         elif node[2]=="/":
354 |             if isinstance(op2,str):
355 |                 try:
356 |                     num=int(op2)
357 |                     if num in self.exp2:
358 |                         ret=self.tools.sar(tmp,str(self.exp2.index(num)))
359 |                     else:
360 |                         ret=self.tools.div(tmp,op2)
361 |                 except Exception:
362 |                     ret=self.tools.div(tmp,op2)
363 |             else:
364 |                 ret=self.tools.div(tmp,op2)
365 |         self.tools.unLock(tmp)
366 |         return ret
367 | 
368 | 
369 |     def gen_shift_expression(self,node,context):
370 |         """
371 |         :type node:TreeNode
372 |         :type context:Context
373 |         :rtype: str
374 |         """
375 |         pass
376 | 
377 |     def gen_relational_expression(self,node,context):
378 |         """
379 |         :type node:TreeNode
380 |         :type context:Context
381 |         :rtype: str
382 |         """
383 |         label1=self.tools.allocateLabel()
384 |         label2=self.tools.allocateLabel()
385 |         op1=self.expression_handler[node[1][0]](node[1],context)
386 |         tmp=self.tools.allocateNewReg(op1)
387 |         self.tools.lock(tmp)
388 |         self.tools.mov(tmp,op1)
389 |         op2=self.expression_handler[node[3][0]](node[3],context)
390 |         self.tools.cmp(tmp,op2)
391 |         if node[2]=="<":
392 |             self.tools.jl(label1)
393 |         elif node[2]=="<=":
394 |             self.tools.jle(label1)
395 |         elif node[2]==">":
396 |             self.tools.jg(label1)
397 |         elif node[2]==">=":
398 |             self.tools.jge(label1)
399 |         self.tools.mov(self.tools.getEax(),0)
400 |         self.tools.jmp(label2)
401 |         self.tools.markLabel(label1)
402 |         self.tools.mov(self.tools.getEax(),1)
403 |         self.tools.markLabel()
404 |         return self.tools.getEax()
405 | 
406 |     def gen_equality_expression(self,node,context):
407 |         """
408 |         :type node:TreeNode
409 |         :type context:Context
410 |         :rtype: str
411 |         """
412 |         label1=self.tools.allocateLabel()
413 |         label2=self.tools.allocateLabel()
414 |         op1=self.expression_handler[node[1][0]](node[1],context)
415 |         tmp=self.tools.allocateNewReg(op1)
416 |         self.tools.lock(tmp)
417 |         self.tools.mov(tmp,op1)
418 |         op2=self.expression_handler[node[3][0]](node[3],context)
419 |         self.tools.cmp(tmp,op2)
420 |         if node[2]=="==":
421 |             self.tools.je(label1)
422 |         elif node[2]=="!=":
423 |             self.tools.jne(label1)
424 |         self.tools.mov(self.tools.getEax(),0)
425 |         self.tools.jmp(label2)
426 |         self.tools.markLabel(label1)
427 |         self.tools.mov(self.tools.getEax(),1)
428 |         self.tools.markLabel()
429 |         return self.tools.getEax()
430 | 
431 | 
432 |     def gen_and_expression(self,node,context):
433 |         """
434 |         :type node:TreeNode
435 |         :type context:Context
436 |         :rtype: str
437 |         """
438 |         op1=self.expression_handler[node[1][0]](node[1],context)
439 |         tmp=self.tools.allocateNewReg(op1)
440 |         self.tools.lock(tmp)
441 |         self.tools.mov(tmp,op1)
442 |         op2=self.expression_handler[node[3][0]](node[3],context)
443 |         ret=self.tools.And(tmp,op2)
444 |         self.tools.unLock(tmp)
445 |         return ret
446 | 
447 |     def gen_exclusive_or_expression(self,node,context):
448 |         """
449 |         :type node:TreeNode
450 |         :type context:Context
451 |         :rtype: str
452 |         """
453 |         pass
454 |         # op1=self.expression_handler[node[1][0]](node[1],context)
455 |         # tmp=self.tools.allocateNewReg()
456 |         # self.tools.lock(tmp)
457 |         # self.tools.mov(tmp,op1)
458 |         # op2=self.expression_handler[node[3][0]](node[3],context)
459 |         # ret=self.tools.xor(tmp,op2)
460 |         # self.tools.unLock(tmp)
461 |         # return ret
462 | 
463 |     def gen_inclusive_or_expression(self,node,context):
464 |         """
465 |         :type node:TreeNode
466 |         :type context:Context
467 |         :rtype: str
468 |         """
469 |         op1=self.expression_handler[node[1][0]](node[1],context)
470 |         tmp=self.tools.allocateNewReg(op1)
471 |         self.tools.lock(tmp)
472 |         self.tools.mov(tmp,op1)
473 |         op2=self.expression_handler[node[3][0]](node[3],context)
474 |         ret=self.tools.Or(tmp,op2)
475 |         self.tools.unLock(tmp)
476 |         return ret
477 | 
478 |     def gen_logical_and_expression(self,node,context):
479 |         """
480 |         :type node:TreeNode
481 |         :type context:Context
482 |         :rtype: str
483 |         """
484 |         label1=self.tools.allocateLabel()
485 |         label2=self.tools.allocateLabel()
486 |         op1=self.expression_handler[node[1][0]](node[1],context)
487 |         self.tools.cmp(op1,self.tools.getFalse())
488 |         self.tools.je(label1)
489 |         op2=self.expression_handler[node[3][0]](node[3],context)
490 |         self.tools.cmp(op2,self.tools.getFalse())
491 |         self.tools.je(label1)
492 |         self.tools.mov(self.tools.getEax(),1)
493 |         self.tools.jmp(label2)
494 |         self.tools.markLabel(label1)
495 |         self.tools.mov(self.tools.getEax(),0)
496 |         self.tools.markLabel(label2)
497 |         return self.tools.getEax()
498 | 
499 |     def gen_logical_or_expression(self,node,context):
500 |         """
501 |         :type node:TreeNode
502 |         :type context:Context
503 |         :rtype: str
504 |         """
505 |         label1=self.tools.allocateLabel()
506 |         label2=self.tools.allocateLabel()
507 |         op1=self.expression_handler[node[1][0]](node[1],context)
508 |         self.tools.cmp(op1,self.tools.getFalse())
509 |         self.tools.jne(label1)
510 |         op2=self.expression_handler[node[3][0]](node[3],context)
511 |         self.tools.cmp(op2,self.tools.getFalse())
512 |         self.tools.jne(label1)
513 |         self.tools.mov(self.tools.getEax(),0)
514 |         self.tools.jmp(label2)
515 |         self.tools.markLabel(label1)
516 |         self.tools.mov(self.tools.getEax(),1)
517 |         self.tools.markLabel(label2)
518 |         return self.tools.getEax()
519 | 
520 | 
521 |     def gen_conditional_expression(self,node,context):
522 |         """
523 |         :type node:TreeNode
524 |         :type context:Context
525 |         :rtype: str
526 |         """
527 |         pass
528 | 
529 |     def gen_assignment_expression(self,node,context):
530 |         """
531 |         :type node:TreeNode
532 |         :type context:Context
533 |         :rtype: str
534 |         """
535 |         operator=self.gen_assignment_operator(node[2],context)
536 |         right=self.expression_handler[node[3][0]](node[3],context)
537 |         tmp=self.tools.allocateNewReg(right)
538 |         self.tools.lock(tmp)
539 |         self.tools.mov(tmp,right)
540 |         left=self.expression_handler[node[1][0]](node[1],context)
541 |         # print(left)
542 |         if operator=="=":
543 |             self.tools.mov(left,tmp)
544 |         self.tools.unLock(tmp)
545 |         return left
546 | 
547 |     def gen_expression(self,node,context):
548 |         """
549 |         :type node:TreeNode
550 |         :type context:Context
551 |         :rtype: str
552 |         """
553 |         pass
554 | 
555 |     def gen_assignment_operator(self,node,context):
556 |         """
557 |         :type node:TreeNode
558 |         :type context:Context
559 |         :rtype: str
560 |         """
561 |         return node[1]
562 | 
563 |     def gen_unary_operator(self,node,context):
564 |         """
565 |         :type node:TreeNode
566 |         :type context:Context
567 |         :rtype: str
568 |         """
569 |         return node[1]
570 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | from yyparse.ZCCparser import parser, printAST
 3 | from yyparse.ZCClex import lexer as ZCClexer
 4 | from symbol.symtab import c_types
 5 | from public.ZCCglobal import global_context, FuncType, error, Context
 6 | from generation.generation import generator
 7 | import os
 8 | import sys
 9 | 
10 | 
11 | def preprocess(source):
12 |     stream = os.popen("gcc -E " + source)
13 |     return stream.read()
14 | 
15 | 
16 | if __name__ == '__main__':
17 |     if len(sys.argv) < 3:
18 |         print "Usage: python main.py <source_file> <x86asm_file>\nEnvironment: Python2.7, Linux."
19 |         exit(1)
20 |     File = sys.argv[1]
21 |     codes = preprocess(os.path.abspath("test/"+File))
22 |     pt = parser.parse(codes, lexer=ZCClexer)
23 |     # print "errorCounter=", parser.errorCounter
24 |     printAST(pt)
25 |     # with open("test.s","w") as output:
26 |     # print global_context
27 |     # print error
28 |     # printAST(global_context.local['main'].compound_statement.ast)
29 |     if(not error[0]):
30 |         gen = generator()
31 |         gen.generate()
32 |         gen.output(sys.argv[2])
33 | 


--------------------------------------------------------------------------------
/public/ZCCglobal.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | 
  5 | class CType(object):
  6 |     def __init__(self, type_name, size=0, **kwargs):
  7 |         """
  8 |         :type type_name:str
  9 |         :type size: int
 10 |         :type kwargs: dict
 11 |         :return: None
 12 |         """
 13 |         # "int","char","double","float","long","short","void",
 14 |         # "struct","union","enum","function", "array"
 15 |         # 'Incomplete'
 16 |         self.type = type_name  # type: str
 17 |         # sizeof
 18 |         self.size = size  # type: int
 19 |         self.is_const = [False]  # type: list[bool]
 20 |         self.storage_class = None  # type: str
 21 |         # "static", "extern"
 22 | 
 23 |         for key in kwargs:
 24 |             self.__setattr__(key, kwargs[key])
 25 | 
 26 |     def pointer_count(self):
 27 |         """
 28 |         :return: int
 29 |         """
 30 |         return len(self.is_const) - 1
 31 | 
 32 |     def Size(self):
 33 |         """
 34 |         Must get size by this function!!!
 35 |         :rtype: int
 36 |         """
 37 |         if self.pointer_count() == 0:
 38 |             return self.size
 39 |         else:
 40 |             return 4
 41 | 
 42 |     def __repr__(self):
 43 |         return self.__add_star__(self.type)
 44 | 
 45 |     def __add_star__(self, base_type_repr):
 46 |         rval = base_type_repr
 47 |         if self.storage_class:
 48 |             rval = self.storage_class + " " + rval
 49 |         for i in xrange(0, len(self.is_const)):
 50 |             if i > 0:
 51 |                 rval += " *"
 52 |             if self.is_const[i]:
 53 |                 rval += " const"
 54 |         return rval
 55 | 
 56 |     def __eq__(self, other):
 57 |         """
 58 |         :type self: CType
 59 |         :type other: CType
 60 |         :rtype: bool
 61 |         """
 62 |         if self.pointer_count() != other.pointer_count():
 63 |             return False
 64 |         if self.type != other.type:
 65 |             return False
 66 |         return True
 67 | 
 68 |     def is_integer(self):
 69 |         """
 70 |         :rtype: bool
 71 |         """
 72 |         return self.pointer_count() > 0 or self.type in \
 73 |                                            ['char', 'short', 'int', 'long', 'long long',
 74 |                                             'signed char', 'signed short', 'signed int', 'signed long',
 75 |                                             'signed long long',
 76 |                                             'unsigned char', 'unsigned short', 'unsigned int', 'unsigned long',
 77 |                                             'unsigned long long']
 78 | 
 79 |     def is_number(self):
 80 |         """
 81 |         :rtype: bool
 82 |         """
 83 |         return self.pointer_count() > 0 or self.type in \
 84 |                                            ['char', 'short', 'int', 'long', 'long long',
 85 |                                             'signed char', 'signed short', 'signed int', 'signed long',
 86 |                                             'signed long long',
 87 |                                             'unsigned char', 'unsigned short', 'unsigned int', 'unsigned long',
 88 |                                             'unsigned long long',
 89 |                                             'float', 'double']
 90 | 
 91 | 
 92 | class StructType(CType):
 93 |     def __init__(self, members=list()):
 94 |         """
 95 |         :type members: list[(str,CType)]
 96 |         :return:
 97 |         """
 98 |         CType.__init__(self, 'struct')
 99 |         self.members = {}  # type: dict[str,CType]
100 |         self.offset = {}
101 |         self.size = 0
102 |         for member in members:
103 |             self.members[member[0]] = member[1]
104 |             self.offset[member[0]] = self.size
105 |             self.size += member[1].size
106 |             self.size = ((self.size - 1) / 4 + 1) * 4
107 | 
108 |     def __repr__(self):
109 |         return self.__add_star__('struct ' + repr(self.members))
110 | 
111 |     def __eq__(self, other):
112 |         return CType.__eq__(self, other) and has_same_members(self, other)
113 | 
114 | 
115 | class UnionType(CType):
116 |     def __init__(self, members=list()):
117 |         """
118 |         :type members: list[(str,CType)]
119 |         :return:
120 |         """
121 |         CType.__init__(self, 'union')
122 |         self.members = {}  # type: dict[str,CType]
123 |         self.size = 0  # type: int
124 |         for member in members:
125 |             self.members[member[0]] = member[1]
126 |             if member[1].size > self.size:
127 |                 self.size = member[1].size
128 | 
129 |     def __repr__(self):
130 | 
131 |         return self.__add_star__('union ' + repr(self.members))
132 | 
133 |     def __eq__(self, other):
134 |         return CType.__eq__(self, other) and has_same_members(self, other)
135 | 
136 | 
137 | class EnumType(CType):
138 |     def __init__(self, values):
139 |         """
140 |         :type values: dict[(str,int)]
141 |         :return:
142 |         """
143 |         CType.__init__(self, 'enum')
144 |         self.values = values
145 |         self.size = 4
146 | 
147 |     def __repr__(self):
148 |         return self.__add_star__('enum ' + repr(self.values))
149 | 
150 |     def __eq__(self, other):
151 |         raise Exception('Not support enum')
152 | 
153 | 
154 | class FuncType(CType):
155 |     def __init__(self, return_type,
156 |                  parameter_list=list(),
157 |                  parameter_list_is_extendable=False,
158 |                  compound_statement=None):
159 |         """
160 |         :type return_type: CType
161 |         :type parameter_list: list[(str,CType)]
162 |         :type parameter_list_is_extendable: bool
163 |         :type compound_statement: TreeNode
164 |         """
165 |         CType.__init__(self, 'function')
166 |         self.return_type = return_type  # type: CType
167 |         self.storage_class = return_type.storage_class
168 |         return_type.storage_class = None
169 |         self.parameter_list = parameter_list  # type: list[(str,CType)]
170 |         self.parameter_list_is_extendable = \
171 |             parameter_list_is_extendable  # type: bool
172 |         self.compound_statement = compound_statement  # type: TreeNode
173 | 
174 |     def __repr__(self):
175 |         rval = repr(self.return_type) + " function("
176 |         for parameter in self.parameter_list:
177 |             rval += repr(parameter[1]) + ' ' + parameter[0] + ','
178 |         if self.parameter_list_is_extendable:
179 |             rval += '...'
180 |         rval += ')'
181 |         if self.compound_statement is not None:
182 |             rval += repr(self.compound_statement.context)
183 |         return self.__add_star__(rval)
184 | 
185 |     def __eq__(self, other):
186 |         """
187 |         :type other: FuncType
188 |         :rtype: bool
189 |         """
190 |         if self.type != other.type:
191 |             return False
192 |         if self.pointer_count() + other.pointer_count() > 1:
193 |             if self.pointer_count() != other.pointer_count():
194 |                 return False
195 |         if not self.return_type == other.return_type:
196 |             return False
197 |         if not self.parameter_list_is_extendable == other.parameter_list_is_extendable:
198 |             return False
199 |         if not len(self.parameter_list) == len(other.parameter_list):
200 |             return False
201 |         for i in xrange(len(self.parameter_list)):
202 |             if not self.parameter_list[i][1] == other.parameter_list[i][1]:
203 |                 return False
204 |         return True
205 | 
206 | 
207 | class ArrayType(CType):
208 |     def __init__(self, c_type, length):
209 |         """
210 |         :type c_type: CType
211 |         :type length: int
212 |         :return:
213 |         """
214 |         CType.__init__(self, 'array', size=length * c_type.Size())
215 |         self.length = length
216 |         self.member_type = c_type
217 |         self.storage_class = c_type.storage_class
218 |         c_type.storage_class = None
219 | 
220 |     def __repr__(self):
221 |         return self.__add_star__(repr(self.member_type) + "[%d]" % self.length)
222 | 
223 |     def __eq__(self, other):
224 |         """
225 |         :type other: ArrayType
226 |         :rtype: bool
227 |         """
228 |         if not CType.__eq__(self, other):
229 |             return False
230 |         return self.length == other.length and \
231 |                self.member_type == other.member_type
232 | 
233 | 
234 | class LiteralType(CType):
235 |     def __init__(self, val):
236 |         """
237 |         :type c_type: CType
238 |         :return:
239 |         """
240 |         CType.__init__(self, '')
241 |         self.val = val
242 |         if isinstance(val, str):
243 |             self.type = 'char'
244 |             self.size = 1
245 |             self.is_const = [True, False]
246 |         elif isinstance(val, int):
247 |             self.type = 'int'
248 |             self.size = 4
249 |             self.is_const = [True]
250 |         elif isinstance(val, float):
251 |             self.type = 'double'
252 |             self.size = 8
253 |             self.is_const = [True]
254 | 
255 | 
256 | class Context:
257 |     outer_context = None  # type: Context
258 |     func_type = None  # type: FuncType
259 |     local = None  # type: dict[str,CType]
260 | 
261 |     def __init__(self, outer_context=None, func_type=None):
262 |         self.outer_context = outer_context  # type: Context
263 |         self.func_type = func_type  # type: FuncType
264 |         self.local = {}
265 | 
266 |     def __repr__(self):
267 |         return " local: " + repr(self.local)
268 | 
269 |     def get_return_type(self):
270 |         """
271 |         :rtype: CType
272 |         """
273 |         if self.func_type is None:
274 |             if self.outer_context is None:
275 |                 return  # global_context has no return type
276 |             else:
277 |                 return self.outer_context.get_return_type()
278 |         else:
279 |             return self.func_type.return_type
280 | 
281 |     def get_type_by_id(self, identifier):
282 |         """
283 |         :type identifier: str
284 |         :rtype: CType
285 |         """
286 |         if identifier in self.local:
287 |             return self.local[identifier]
288 |         if self.func_type is not None:
289 |             for parameter in self.func_type.parameter_list:
290 |                 if identifier == parameter[0]:
291 |                     return parameter[1]
292 |         if self.outer_context is not None:
293 |             return self.outer_context.get_type_by_id(identifier)
294 |         return None  # if not find
295 | 
296 |     def add_literal(self, name, literal):
297 |         """
298 |         :type name: str
299 |         :type literal: LiteralType
300 |         """
301 |         context = self
302 |         while context.outer_context is not None:
303 |             context = context.outer_context
304 |         context.literal[name] = literal
305 | 
306 | 
307 | class GlobalContext(Context):
308 |     def __init__(self):
309 |         Context.__init__(self)
310 |         self.literal = {}  # type: dict[str,LiteralType]
311 | 
312 |     def __repr__(self):
313 |         return 'literals:' + repr(self.literal) + '\n' + Context.__repr__(self)
314 | 
315 | 
316 | global_context = GlobalContext()
317 | error = [False]
318 | 
319 | 
320 | class TreeNode(list):
321 |     def __init__(self, lineno=-1):
322 |         """
323 |         :return:
324 |         """
325 |         self.lineno = lineno  # type: int
326 |         # self.ast = self  # type: # list[list]
327 | 
328 | 
329 | # class LeafNode(str):
330 | #     def __init__(self, lineno=-1):
331 | #         """
332 | #         :return:
333 | #         """
334 | #         self.lineno = lineno  # type: int
335 | 
336 | 
337 | # self.ast = ast  # type: list[list]
338 | #     for key in kwargs:
339 | #         self.__setattr__(key, kwargs[key])
340 | #
341 | # def __getitem__(self, item):
342 | #     return self.ast.__getitem__(item)
343 | #
344 | # def __setitem__(self, key, value):
345 | #     self.ast.__setitem__(key, value)
346 | #
347 | # def __len__(self):
348 | #     return self.ast.__len__()
349 | 
350 | 
351 | def has_same_members(struct_type1, struct_type2):
352 |     """
353 |     :type struct_type1: StructType
354 |     :type struct_type2: StructType
355 |     :rtype: bool
356 |     """
357 |     for member in struct_type1.members:
358 |         if member not in struct_type2.members \
359 |                 or not struct_type1.members[member] == struct_type2.members[member]:
360 |             return False
361 | 
362 |     for member in struct_type2.members:
363 |         if member not in struct_type1.members \
364 |                 or not struct_type2.members[member] == \
365 |                         struct_type1.members[member]:
366 |             return False
367 |     return True
368 | 


--------------------------------------------------------------------------------
/public/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hlFu/ZCC/811bd987c9d7a3754f7bc9c9d986359ee3df7327/public/__init__.py


--------------------------------------------------------------------------------
/public/const.py:
--------------------------------------------------------------------------------
 1 | #constant value put here
 2 | 
 3 | UNDEFINED = -100
 4 | 
 5 | class NodeKind:
 6 |     STMT = 1
 7 |     EXP = 2
 8 |     
 9 | class StmtKind:
10 |     IF     = 1
11 |     REPEAT = 2
12 |     ASSIGN = 3
13 | 
14 | class ExpKind:
15 |     OP    = 1
16 |     CONST = 2
17 |     ID    = 3
18 | 
19 | class ExpType:
20 |     VOID    = 1
21 |     INTEGER = 2
22 |     FLOAT   = 3
23 |     BOOLEAN = 4


--------------------------------------------------------------------------------
/symbol/.gitignore:
--------------------------------------------------------------------------------
1 | *.tmp


--------------------------------------------------------------------------------
/symbol/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hlFu/ZCC/811bd987c9d7a3754f7bc9c9d986359ee3df7327/symbol/__init__.py


--------------------------------------------------------------------------------
/test/a.s:
--------------------------------------------------------------------------------
 1 | 	.file	"a.c"
 2 | 	.intel_syntax noprefix
 3 | 	.section	.rodata
 4 | .LC0:
 5 | 	.string	"hello"
 6 | .LC1:
 7 | 	.string	"%d\n"
 8 | 	.text
 9 | 	.globl	foo
10 | 	.type	foo, @function
11 | foo:
12 | 	push	ebp
13 | 	mov	ebp, esp
14 | 	sub	esp, 8
15 | 	sub	esp, 12
16 | 	push	OFFSET FLAT:.LC0
17 | 	call	puts
18 | 	add	esp, 16
19 | 	sub	esp, 8
20 | 	push	DWORD PTR [ebp+8]
21 | 	push	OFFSET FLAT:.LC1
22 | 	call	printf
23 | 	add	esp, 16
24 | 	mov	eax, DWORD PTR [ebp+8]
25 | 	leave
26 | 	ret
27 | 	.size	foo, .-foo
28 | 	.globl	main
29 | 	.type	main, @function
30 | main:
31 | 	lea	ecx, [esp+4]
32 | 	and	esp, -16
33 | 	push	DWORD PTR [ecx-4]
34 | 	push	ebp
35 | 	mov	ebp, esp
36 | 	push	ecx
37 | 	sub	esp, 20
38 | 	mov	DWORD PTR [ebp-12], 2
39 | 	sub	esp, 12
40 | 	push	DWORD PTR [ebp-12]
41 | 	call	foo
42 | 	add	esp, 16
43 | 	mov	DWORD PTR [ebp-16], eax
44 | 	sub	esp, 8
45 | 	push	DWORD PTR [ebp-16]
46 | 	push	OFFSET FLAT:.LC1
47 | 	call	printf
48 | 	add	esp, 16
49 | 	mov	eax, 0
50 | 	mov	ecx, DWORD PTR [ebp-4]
51 | 	leave
52 | 	lea	esp, [ecx-4]
53 | 	ret
54 | 	.size	main, .-main
55 | 	.ident	"GCC: (GNU) 5.3.1 20160406 (Red Hat 5.3.1-6)"
56 | 	.section	.note.GNU-stack,"",@progbits
57 | 


--------------------------------------------------------------------------------
/test/array.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * multi-dimension array
 3 |  */
 4 | #include "stdio.h"
 5 | int main(void)
 6 | {
 7 |     int a[5][5];
 8 |     int i,j;
 9 |     for(i=0;i<5;++i)
10 |     {
11 |         for (j=0;j<5;++j)
12 |         {
13 |             a[i][j]=i*5+j;
14 |             printf("%02d ",a[i][j]);
15 |         }
16 |         puts("");
17 |     }
18 |     return 0;
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/test/basic.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | basic expression:for if while
 3 | basic type: int float double char pointer
 4 | glibc:scanf printf
 5 | constant: string char float
 6 | scope: local, global, static local, compound_statement
 7 | arithmetic operation; logical operation
 8 | priority
 9 | declaration definition
10 | array
11 | increment
12 | preprocessing
13 | */
14 | #include "stdio.h"
15 | #define UPPERCASE_A 65
16 | #define LOWERCASE_A 97
17 | #define LOWERCASE_Z 122
18 | int fib(int n);
19 | int n,i;
20 | int main(int argc,char **argv)
21 | {
22 |     double d,f;
23 |     char *s;                   
24 | 
25 |     s=*argv;
26 |     while(*s!=0)
27 |     {
28 |         if(*s<=LOWERCASE_Z&&*s>=LOWERCASE_A)
29 |             *s=*s+(UPPERCASE_A-LOWERCASE_A);
30 |         ++s;
31 |     }    
32 |     printf("%s\n",*argv);
33 | 
34 |     scanf("%d",&n);
35 |     printf("%d\n",fib(n));
36 | 
37 |     f=0.5;
38 |     d=1.5;
39 | 
40 |     for(i=0;i<n;++i)
41 |     {
42 |         int a;
43 |         a=3;
44 |         f=(f+d*i)/a;
45 |     }
46 |     printf("f=%f\n",f);
47 | 
48 | 
49 |     return 0;
50 | }
51 | 
52 | int fib(int n)
53 | {
54 |     if(n>1)
55 |     {
56 |         return fib(n-1)+fib(n-2);
57 |     }
58 |     else if(n==1)
59 |     {
60 |         return 1;
61 |     }
62 |     else
63 |     {
64 |         return 0;
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/test/basic.i:
--------------------------------------------------------------------------------
 1 | # 1 "basic.c"
 2 | # 1 "<built-in>"
 3 | # 1 "<command-line>"
 4 | # 1 "/usr/include/stdc-predef.h" 1 3 4
 5 | # 1 "<command-line>" 2
 6 | # 1 "basic.c"
 7 | # 14 "basic.c"
 8 | # 1 "stdio.h" 1
 9 | 
10 | 
11 | int printf(char *format,...);
12 | int scanf(char *format,...);
13 | int puts(char* s);
14 | # 15 "basic.c" 2
15 | 
16 | 
17 | 
18 | int fib(int n);
19 | int n,i;
20 | int main(int argc,char **argv)
21 | {
22 |     float f;
23 |     double d;
24 |     char *s;
25 | 
26 |     s=argv[1];
27 |     while(*s!=0)
28 |     {
29 |         if(*s<='z'&&*s>='a')
30 |             *s=*s+'A'-'a';
31 |         s++;
32 |     }
33 |     printf("%s\n",argv[1]);
34 | 
35 |     scanf("%d",&n);
36 |     printf("%d\n",fib(n));
37 | 
38 |     f=0.5;
39 |     d=1.5;
40 | 
41 |     for(i=0;i<n;i++)
42 |     {
43 |         int a;
44 |         a=3;
45 |         f=(f+d*i)/a;
46 |     }
47 |     printf("f=%f\n",f);
48 | 
49 | 
50 |     return 0;
51 | }
52 | 
53 | int fib(int n)
54 | {
55 | 
56 | 
57 | 
58 |     if(n>1)
59 |     {
60 |         return fib(n-1)+fib(n-2);
61 |     }
62 |     else if(n==1)
63 |     {
64 |         return 1;
65 |     }
66 |     else
67 |     {
68 |         return 0;
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/test/basic1.c:
--------------------------------------------------------------------------------
 1 | #include "stdio.h"
 2 | 
 3 | int main(int argc,char **argv)
 4 | {
 5 |     char *s;
 6 |     s=*argv;
 7 | 
 8 |     while(*s!=0)
 9 |     {
10 |         if(*s>='a'&&*s<='z')
11 |             *s=*s+(65-97);
12 |         ++s;
13 |     }
14 | 
15 |     printf("%s",*argv);
16 |     puts("");
17 | 
18 |     return 0;
19 | }
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/test/basic2.c:
--------------------------------------------------------------------------------
 1 | #include "stdio.h"
 2 | int fib(int n);
 3 | int i;
 4 | int main()
 5 | {
 6 |     scanf("%d",&i);
 7 |     printf("%d\n",fib(i));
 8 | 
 9 |     return 0;
10 | }
11 | int fib(int n)
12 | {
13 |     int i;
14 |     if(n>1)
15 |     {
16 |         return fib(n-1)+fib(n-2);
17 |     }
18 |     else if(n==1)
19 |     {
20 |         return 1;
21 |     }
22 |     else
23 |     {
24 |         return 0;
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/test/basic3.c:
--------------------------------------------------------------------------------
 1 | #include "stdio.h"
 2 | int main()
 3 | {
 4 |     double i,j;
 5 |     j=2.3;
 6 |     scanf("%lf",&i);
 7 |     i=i*j+i*(i*j-i*j)/j;
 8 |     printf("%lf\n",i);
 9 | 
10 |     return 0;
11 | }


--------------------------------------------------------------------------------
/test/errorID.c:
--------------------------------------------------------------------------------
1 | int $a;
2 | 


--------------------------------------------------------------------------------
/test/error_info.c:
--------------------------------------------------------------------------------
 1 | 
 2 | //函数定义声明不一致
 3 | int f(int i,...);
 4 | int f(int j){
 5 |     return 0;
 6 | }
 7 | 
 8 | int g(int i){
 9 |     return 0;
10 | }
11 | typedef struct{
12 |     int n;
13 | } A;
14 | int main(int argc, char const *argv[])
15 | {
16 | 
17 |     //重复定义
18 |     int k;
19 |     int k;
20 |     int count;
21 |     //类型不匹配
22 |     A a;
23 |     a = 5;
24 |     //未定义变量
25 |     var = 3;
26 |     //操作数类型错误
27 |     1.0 >> 4;
28 |     //打字错误
29 |     cont = 4;
30 |     g(1.0);
31 |     //参数表不匹配
32 |     g(a);
33 |     //返回值不匹配
34 |     return a;
35 | }
36 | 
37 | // Semantic Error at line 4:  'int function(int j,)' is not consistent with old declaration 'int function(int i,...)'
38 | //     int f ( int j ) { return 0 ; }
39 | //
40 | // Syntax error at 'int', at line: 22, column: 5.
41 | // Error type: missing semicolon before int. at line: 22, lex pos: 258 in declaration.
42 | //
43 | // Semantic Error at line 18:  Redeclare k
44 | //     k
45 | //
46 | // Semantic Error at line 23:  'int const' cannot be assigned to 'struct {'n': int}'
47 | //     a = 5
48 | //
49 | // Semantic Error at line 25:  Unknown identifier var
50 | //     var
51 | //
52 | // Semantic Error at line 27:  double const is not or cannot be recognized as integer
53 | //     1.0
54 | //
55 | // Semantic Error at line 29:  Unknown identifier 'cont', do you mean 'count'?
56 | //     cont
57 | //
58 | // Semantic Error at line 32:  'struct {'n': int}' can't convert to 'int'
59 | //     a
60 | //
61 | // Semantic Error at line 34:  'struct {'n': int}' is not consistant with the function return type 'int'
62 | //     return a ;
63 | 


--------------------------------------------------------------------------------
/test/error_pos.c:
--------------------------------------------------------------------------------
1 | 
2 | int a, b, c;
3 | c = a + b;
4 | int d;


--------------------------------------------------------------------------------
/test/missRightCurly.c:
--------------------------------------------------------------------------------
 1 | //
 2 | //int a, b, c;
 3 | //int main(int argc, char *argv[]) {
 4 | //	c = a + b;
 5 | //
 6 | //	
 7 | //int b, c;
 8 | 
 9 | int f(){
10 | 
11 | int d;


--------------------------------------------------------------------------------
/test/missSEMI.c:
--------------------------------------------------------------------------------
 1 | 
 2 | int b
 3 | 
 4 | int main(int argc, char *argv[]) {
 5 | 	int a, b, c, d;
 6 | 	int $a;
 7 | 	
 8 | 	c = a + b;
 9 | 	d = a +/ b;
10 | 	d = a -/ b;
11 | 	d = a ^^ / b;
12 | 	d = a *|b;
13 | 	d = a >/ b;
14 | 	d = a </ b;
15 | 	d = a <=/ b;		
16 | 	d = a <</ b;	
17 | 	d = a ==/ b;	
18 | 	d = a &/ b;		
19 | 	d = a ^/ b;			
20 | 	d = a |/ b;		
21 | 	d = a &&| b;	
22 | 	d = a ||| b;				
23 | 
24 | 	
25 | 	a = b + c
26 | 	printf("asdf\n")
27 | 	b = a + c;
28 | 	printf("%d\n", a);
29 | 


--------------------------------------------------------------------------------
/test/multi_int.c:
--------------------------------------------------------------------------------
 1 | #include "stdio.h"
 2 | int main()
 3 | {
 4 |     int i,j;
 5 |     j=5;
 6 |     scanf("%d",&i);
 7 |     i=i*j+i*(i*j-j*j+i);
 8 |     printf("%d\n",i);
 9 | 
10 |     return 0;
11 | }
12 | 


--------------------------------------------------------------------------------
/test/out.txt:
--------------------------------------------------------------------------------
 1 | 	.intel_syntax noprefix
 2 | 	.section .rodata
 3 | 	.text
 4 | 	.globl main
 5 | 	.type main, @function
 6 | main:
 7 | 	push ebp
 8 | 	mov ebp, esp
 9 | 	sub esp, 64
10 | 	mov edx, 1
11 | 	mov eax, 0
12 | 	mov [esp+28], edx
13 | 	mov eax, 0
14 | 	mov edx, [esp+24]
15 | 	add eax, None
16 | 	mov edx, eax
17 | 	mov eax, 0
18 | 	mov [esp+24], edx
19 | 	.size main, .-main
20 | 	.ident	"GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
21 | 	.section	.note.GNU-stack,"",@progbits
22 | 


--------------------------------------------------------------------------------
/test/pointer.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * embedded functional pointer
 3 |  */
 4 | #include "stdio.h"
 5 | int i;
 6 | 
 7 | void print_int(){
 8 |     printf("%d\n", i);
 9 |     return;
10 | }
11 | 
12 | void (*high_order_func(int n)) (){
13 |     i = n;
14 |     return print_int;
15 | }
16 | 
17 | int main(){
18 |     void (*(*f)(int n))();
19 |     f = high_order_func;
20 |     f(2)();
21 |     return 0;
22 | }
23 | 


--------------------------------------------------------------------------------
/test/source_code_optimization.c:
--------------------------------------------------------------------------------
 1 | int main(int argc, char const *argv[])
 2 | {
 3 |     int c;
 4 |     int flag;
 5 |     c = 2 + 3 * 4;//常量压缩
 6 |     if ((2 - 2)*9){
 7 |         //这个if语句经过常量压缩，死代码消除后，会被剪掉
 8 |     }
 9 | 
10 |     if (1){
11 |         c = 2;
12 |         //这个复合语句会替换掉if语句
13 |     }
14 |     else{
15 |         c = 3;
16 |         //这个复合语句会被剪掉
17 |     }
18 | 
19 |     if(flag){
20 |         return 0;
21 |         c = c + 1;  //return后的语句被删除
22 |     }
23 |     return 0;
24 | }


--------------------------------------------------------------------------------
/test/stdio.h:
--------------------------------------------------------------------------------
1 | #ifndef _STDIO_H_
2 | #define _STDIO_H_
3 | int printf(char *format,...);
4 | int scanf(char *format,...);
5 | int puts(char* s);
6 | #endif
7 | 


--------------------------------------------------------------------------------
/test/struct.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * embedded struct
 3 |  * member access: direct pointer
 4 |  * typedef
 5 |  */
 6 | #include "stdio.h"
 7 | typedef struct
 8 | {
 9 |     int a;
10 |     char c;
11 |     struct {
12 |         int b;
13 |         double d;
14 |     }inner;
15 | }myStruct;
16 | 
17 | void modifyStruct(myStruct* sp)
18 | {
19 |     sp->a=4;
20 |     sp->c='!';
21 |     sp->inner.b=5;
22 |     sp->inner.d=55.2;
23 |     return;
24 | }
25 | 
26 | int main(void)
27 | {
28 |     myStruct p[3];
29 |     myStruct *sp;
30 | 
31 |     sp=&p[1];
32 |     p[1].a=2;
33 |     p[1].c='a';
34 |     p[1].inner.b=3;
35 |     p[1].inner.d=12.3;
36 |     printf("before modified\n");
37 |     printf("p[1].a=%d\tp[1].c=%c\tp[1].inner.b=%d\tp[1].inner.d=%lf\n",p[1].a,p[1].c,p[1].inner.b,p[1].inner.d);
38 |     modifyStruct(sp);
39 |     printf("after modified\n");
40 |     printf("p[1].a=%d\tp[1].c=%c\tp[1].inner.b=%d\tp[1].inner.d=%lf\n",p[1].a,p[1].c,p[1].inner.b,p[1].inner.d);
41 | 
42 |     return 0;
43 | }
44 | 
45 | 


--------------------------------------------------------------------------------
/test/test1.c:
--------------------------------------------------------------------------------
 1 | int g_i;
 2 | static int ss;
 3 | static int sss;
 4 | 
 5 | int foo(int n){
 6 |     static int x3;
 7 |     int x1,x2;
 8 |     x1=2;
 9 |     x2=3;
10 |     x1=x1+1;
11 |     return n+1;
12 | }
13 | 
14 | 
15 | int main(void){
16 |     int l_i,x1,x2,x3;
17 |     l_i=l_i+1;
18 |     x1=1;
19 |     x1=x1+l_i;
20 |     x2=foo(x1);
21 |     return 1;
22 | }


--------------------------------------------------------------------------------
/test/test1.s:
--------------------------------------------------------------------------------
 1 | 	.file	"test1.c"
 2 | 	.intel_syntax noprefix
 3 | 	.comm	g_i,4,4
 4 | 	.local	ss
 5 | 	.comm	ss,4,4
 6 | 	.local	sss
 7 | 	.comm	sss,4,4
 8 | 	.text
 9 | 	.globl	main
10 | 	.type	main, @function
11 | main:
12 | 	push	ebp
13 | 	mov	ebp, esp
14 | 	and	esp, -16
15 | 	sub	esp, 32
16 | 	add	DWORD PTR [esp+20], 1
17 | 	mov	DWORD PTR [esp+24], 1
18 | 	mov	eax, DWORD PTR [esp+20]
19 | 	add	DWORD PTR [esp+24], eax
20 | 	mov	eax, DWORD PTR [esp+24]
21 | 	mov	DWORD PTR [esp], eax
22 | 	call	foo
23 | 	mov	DWORD PTR [esp+28], eax
24 | 	nop
25 | 	leave
26 | 	ret
27 | 	.size	main, .-main
28 | 	.globl	foo
29 | 	.type	foo, @function
30 | foo:
31 | 	push	ebp
32 | 	mov	ebp, esp
33 | 	sub	esp, 16
34 | 	mov	DWORD PTR [ebp-8], 2
35 | 	mov	DWORD PTR [ebp-4], 3
36 | 	add	DWORD PTR [ebp-8], 1
37 | 	mov	eax, DWORD PTR [ebp+8]
38 | 	add	eax, 1
39 | 	leave
40 | 	ret
41 | 	.size	foo, .-foo
42 | 	.ident	"GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
43 | 	.section	.note.GNU-stack,"",@progbits
44 | 


--------------------------------------------------------------------------------
/test/test4.c:
--------------------------------------------------------------------------------
 1 | #include<stdio.h>
 2 | int g_fast;
 3 | static int s_g_fast;
 4 | 
 5 | int main(void)
 6 | {
 7 |     int l_fast;
 8 |     static int  s_l_fast;
 9 |     l_fast=1;
10 |     s_l_fast=2;
11 |     l_fast=foo(s_l_fast);
12 |     printf("%d\n",l_fast);
13 |     return 0;
14 |                 }
15 | 
16 | int foo(int n){
17 |    return n+1; 
18 | }
19 | 


--------------------------------------------------------------------------------
/test/test4.s:
--------------------------------------------------------------------------------
 1 | 	.file	"test4.c"
 2 | 	.intel_syntax noprefix
 3 | 	.comm	g_fast,4,4
 4 | 	.local	s_g_fast
 5 | 	.comm	s_g_fast,4,4
 6 | 	.section	.rodata
 7 | .LC0:
 8 | 	.string	"%d\n"
 9 | 	.text
10 | 	.globl	main
11 | 	.type	main, @function
12 | main:
13 | 	push	ebp
14 | 	mov	ebp, esp
15 | 	and	esp, -16
16 | 	sub	esp, 32
17 | 	mov	DWORD PTR [esp+28], 1
18 | 	mov	DWORD PTR s_l_fast.1829, 2
19 | 	mov	eax, DWORD PTR s_l_fast.1829
20 | 	add	DWORD PTR [esp+28], eax
21 | 	mov	eax, DWORD PTR s_l_fast.1829
22 | 	mov	DWORD PTR [esp], eax
23 | 	call	foo
24 | 	mov	DWORD PTR [esp+28], eax
25 | 	mov	eax, DWORD PTR [esp+28]
26 | 	mov	DWORD PTR [esp+4], eax
27 | 	mov	DWORD PTR [esp], OFFSET FLAT:.LC0
28 | 	call	printf
29 | 	mov	eax, 0
30 | 	leave
31 | 	ret
32 | 	.size	main, .-main
33 | 	.globl	foo
34 | 	.type	foo, @function
35 | foo:
36 | 	push	ebp
37 | 	mov	ebp, esp
38 | 	mov	eax, DWORD PTR [ebp+8]
39 | 	add	eax, 1
40 | 	pop	ebp
41 | 	ret
42 | 	.size	foo, .-foo
43 | 	.local	s_l_fast.1829
44 | 	.comm	s_l_fast.1829,4,4
45 | 	.ident	"GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
46 | 	.section	.note.GNU-stack,"",@progbits
47 | 


--------------------------------------------------------------------------------
/test/test4_2.c:
--------------------------------------------------------------------------------
 1 | #include<stdio.h>
 2 | int g_fast;
 3 | static int s_g_fast;
 4 | 
 5 | int main(void)
 6 | {
 7 |     int l_fast;
 8 |     static int  s_l_fast;
 9 |     l_fast=1;
10 |     s_l_fast=2;
11 |     l_fast=foo(s_l_fast);
12 |     printf("%d\n",l_fast);
13 |     return 0;
14 |                 }
15 | 
16 | int foo(int n){
17 |    return n+1; 
18 | }
19 | 


--------------------------------------------------------------------------------
/test/test4_2.s:
--------------------------------------------------------------------------------
 1 | 	.file	"test4_2.c"
 2 | 	.intel_syntax noprefix
 3 | 	.section	.rodata.str1.1,"aMS",@progbits,1
 4 | .LC0:
 5 | 	.string	"%d\n"
 6 | 	.text
 7 | 	.globl	main
 8 | 	.type	main, @function
 9 | main:
10 | 	push	ebp
11 | 	mov	ebp, esp
12 | 	and	esp, -16
13 | 	sub	esp, 16
14 | 	mov	DWORD PTR s_l_fast.2034, 2
15 | 	mov	DWORD PTR [esp+8], 3
16 | 	mov	DWORD PTR [esp+4], OFFSET FLAT:.LC0
17 | 	mov	DWORD PTR [esp], 1
18 | 	call	__printf_chk
19 | 	mov	eax, 0
20 | 	leave
21 | 	ret
22 | 	.size	main, .-main
23 | 	.globl	foo
24 | 	.type	foo, @function
25 | foo:
26 | 	mov	eax, DWORD PTR [esp+4]
27 | 	add	eax, 1
28 | 	ret
29 | 	.size	foo, .-foo
30 | 	.local	s_l_fast.2034
31 | 	.comm	s_l_fast.2034,4,4
32 | 	.comm	g_fast,4,4
33 | 	.ident	"GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
34 | 	.section	.note.GNU-stack,"",@progbits
35 | 


--------------------------------------------------------------------------------
/test/test9.c:
--------------------------------------------------------------------------------
 1 | double a;
 2 | struct test{
 3 |     char a;
 4 |     int b;
 5 |     short c;
 6 |     double e;
 7 | };
 8 | int foo(char a, int b, short c, struct test d, char *s){
 9 |     d.e=10.5;
10 |     d.c=2;
11 |     if(a=='a')
12 |         return b-c;
13 |     else
14 |         return d.e-d.c;
15 | }
16 | 
17 | int main(){
18 |     struct test t;
19 |     a=4.5;
20 |     t.a='b';
21 |     t.b=2;
22 |     t.c=3;
23 |     t.e=5.4;
24 |     printf("%lf",t.e);
25 |     return foo('a',10,2,t,"mamsf");
26 | }
27 | 


--------------------------------------------------------------------------------
/test/test9.s:
--------------------------------------------------------------------------------
  1 | 	.file	"test9.c"
  2 | 	.intel_syntax noprefix
  3 | 	.globl	a
  4 | 	.data
  5 | 	.align 8
  6 | 	.type	a, @object
  7 | 	.size	a, 8
  8 | a:
  9 | 	.long	0
 10 | 	.long	1075052544
 11 | 	.text
 12 | 	.globl	foo
 13 | 	.type	foo, @function
 14 | foo:
 15 | 	push	ebp
 16 | 	mov	ebp, esp
 17 | 	sub	esp, 16
 18 | 	mov	edx, DWORD PTR [ebp+8]
 19 | 	mov	eax, DWORD PTR [ebp+16]
 20 | 	mov	BYTE PTR [ebp-4], dl
 21 | 	mov	WORD PTR [ebp-8], ax
 22 | 	fld	QWORD PTR .LC0
 23 | 	fstp	QWORD PTR [ebp+32]
 24 | 	mov	WORD PTR [ebp+28], 2
 25 | 	cmp	BYTE PTR [ebp-4], 97
 26 | 	jne	.L2
 27 | 	movsx	eax, WORD PTR [ebp-8]
 28 | 	mov	edx, DWORD PTR [ebp+12]
 29 | 	sub	edx, eax
 30 | 	mov	eax, edx
 31 | 	jmp	.L3
 32 | .L2:
 33 | 	fld	QWORD PTR [ebp+32]
 34 | 	movzx	eax, WORD PTR [ebp+28]
 35 | 	mov	WORD PTR [ebp-6], ax
 36 | 	fild	WORD PTR [ebp-6]
 37 | 	fsubp	st(1), st
 38 | 	fnstcw	WORD PTR [ebp-2]
 39 | 	movzx	eax, WORD PTR [ebp-2]
 40 | 	mov	ah, 12
 41 | 	mov	WORD PTR [ebp-10], ax
 42 | 	fldcw	WORD PTR [ebp-10]
 43 | 	fistp	DWORD PTR [ebp-16]
 44 | 	fldcw	WORD PTR [ebp-2]
 45 | 	mov	eax, DWORD PTR [ebp-16]
 46 | .L3:
 47 | 	leave
 48 | 	ret
 49 | 	.size	foo, .-foo
 50 | 	.section	.rodata
 51 | .LC4:
 52 | 	.string	"%lf"
 53 | .LC5:
 54 | 	.string	"mamsf"
 55 | 	.text
 56 | 	.globl	main
 57 | 	.type	main, @function
 58 | main:
 59 | 	push	ebp
 60 | 	mov	ebp, esp
 61 | 	and	esp, -16
 62 | 	sub	esp, 80
 63 | 	fld	QWORD PTR .LC2
 64 | 	fstp	QWORD PTR a
 65 | 	mov	BYTE PTR [esp+60], 98
 66 | 	mov	DWORD PTR [esp+64], 2
 67 | 	mov	WORD PTR [esp+68], 3
 68 | 	fld	QWORD PTR .LC3
 69 | 	fstp	QWORD PTR [esp+72]
 70 | 	fld	QWORD PTR [esp+72]
 71 | 	fstp	QWORD PTR [esp+4]
 72 | 	mov	DWORD PTR [esp], OFFSET FLAT:.LC4
 73 | 	call	printf
 74 | 	mov	DWORD PTR [esp+32], OFFSET FLAT:.LC5
 75 | 	mov	eax, DWORD PTR [esp+60]
 76 | 	mov	DWORD PTR [esp+12], eax
 77 | 	mov	eax, DWORD PTR [esp+64]
 78 | 	mov	DWORD PTR [esp+16], eax
 79 | 	mov	eax, DWORD PTR [esp+68]
 80 | 	mov	DWORD PTR [esp+20], eax
 81 | 	mov	eax, DWORD PTR [esp+72]
 82 | 	mov	DWORD PTR [esp+24], eax
 83 | 	mov	eax, DWORD PTR [esp+76]
 84 | 	mov	DWORD PTR [esp+28], eax
 85 | 	mov	DWORD PTR [esp+8], 2
 86 | 	mov	DWORD PTR [esp+4], 10
 87 | 	mov	DWORD PTR [esp], 97
 88 | 	call	foo
 89 | 	leave
 90 | 	ret
 91 | 	.size	main, .-main
 92 | 	.section	.rodata
 93 | 	.align 8
 94 | .LC0:
 95 | 	.long	0
 96 | 	.long	1076166656
 97 | 	.align 8
 98 | .LC2:
 99 | 	.long	0
100 | 	.long	1074921472
101 | 	.align 8
102 | .LC3:
103 | 	.long	-1717986918
104 | 	.long	1075157401
105 | 	.ident	"GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
106 | 	.section	.note.GNU-stack,"",@progbits
107 | 


--------------------------------------------------------------------------------
/yyparse/.gitignore:
--------------------------------------------------------------------------------
1 | *.out
2 | parsetab.py


--------------------------------------------------------------------------------
/yyparse/ZCClex.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import ply.lex as lex
  4 | # import ply.yacc as yacc
  5 | # from pprint import pprint
  6 | from symbol.symtab import is_type
  7 | from public.ZCCglobal import TreeNode
  8 | lexErrorInfo = []
  9 | 
 10 | 
 11 | # column = 0
 12 | 
 13 | # Compute column. 
 14 | #     input is the input text string
 15 | #     token is a token instance
 16 | def find_column(input, token):
 17 |     last_cr = input.rfind('\n', 0, token.lexpos)
 18 |     if last_cr < 0:
 19 |         last_cr = 0
 20 |     column = token.lexpos - last_cr
 21 |     #	print "lexpos: ", token.lexpos, " last_cr: ", last_cr
 22 |     return column
 23 | 
 24 | 
 25 | reserved_dict = {
 26 |     #	"auto"			:'AUTO',
 27 |     "break": 'BREAK',
 28 |     "case": 'CASE',
 29 |     "char": 'CHAR',
 30 |     "const": 'CONST',
 31 |     "continue": 'CONTINUE',
 32 |     "default": 'DEFAULT',
 33 |     "do": 'DO',
 34 |     "double": 'DOUBLE',
 35 |     "else": 'ELSE',
 36 |     "enum": 'ENUM',
 37 |     "extern": 'EXTERN',
 38 |     "float": 'FLOAT',
 39 |     "for": 'FOR',
 40 |     #	"goto"		 	:'GOTO',
 41 |     "if": 'IF',
 42 |     "int": 'INT',
 43 |     "long": 'LONG',
 44 |     #	"register"		:'REGISTER',
 45 |     "return": 'RETURN',
 46 |     "short": 'SHORT',
 47 |     "signed": 'SIGNED',
 48 |     "sizeof": 'SIZEOF',
 49 |     "static": 'STATIC',
 50 |     "struct": 'STRUCT',
 51 |     "switch": 'SWITCH',
 52 |     "typedef": 'TYPEDEF',
 53 |     "union": 'UNION',
 54 |     "unsigned": 'UNSIGNED',
 55 |     "void": 'VOID',
 56 |     # "volatile": 'VOLATILE',
 57 |     "while": 'WHILE',
 58 | }
 59 | 
 60 | literal_dict = {
 61 |     '(': 'LBRACKET',
 62 |     ')': 'RBRACKET',
 63 |     '[': 'LSQUAREBRACKET',
 64 |     ']': 'RSQUAREBRACKET',
 65 |     '{': 'LCURLYBRACKET',
 66 |     '}': 'RCURLYBRACKET',
 67 |     ';': 'SEMICOLON',
 68 |     '.': 'PERIOD',
 69 |     ',': 'COMMA',
 70 |     '&': 'AND',
 71 |     '*': 'STAR',
 72 |     '+': 'PLUS',
 73 |     '-': 'MINUS',
 74 |     '~': 'UNOT',
 75 |     '!': 'NOT',
 76 |     '/': 'DIVIDE',
 77 |     '%': 'MOD',
 78 |     '<': 'LT',
 79 |     '>': 'GT',
 80 |     '^': 'XOR',
 81 |     '|': 'OR',
 82 |     '?': 'QUESTIONMARK',
 83 |     ':': 'COLON',
 84 |     '=': 'ASSIGN'
 85 | }
 86 | 
 87 | tokens = (
 88 |     # 'AUTO',
 89 |     'BREAK',
 90 |     'CASE',
 91 |     'CHAR',
 92 |     'CONST',
 93 |     'CONTINUE',
 94 |     'DEFAULT',
 95 |     'DO',
 96 |     'DOUBLE',
 97 |     'ELSE',
 98 |     'ENUM',
 99 |     'EXTERN',
100 |     'FLOAT',
101 |     'FOR',
102 |     # 'GOTO',
103 |     'IF',
104 |     'INT',
105 |     'LONG',
106 |     # 'REGISTER',
107 |     'RETURN',
108 |     'SHORT',
109 |     'SIGNED',
110 |     'SIZEOF',
111 |     'STATIC',
112 |     'STRUCT',
113 |     'SWITCH',
114 |     'TYPEDEF',
115 |     'UNION',
116 |     'UNSIGNED',
117 |     'VOID',
118 |     #	'VOLATILE',
119 |     'WHILE',
120 |     "IDENTIFIER",
121 |     "TYPE_NAME",
122 |     "STRING_LITERAL",
123 |     "ELLIPSIS",
124 |     "RIGHT_ASSIGN",
125 |     "LEFT_ASSIGN",
126 |     "ADD_ASSIGN",
127 |     "SUB_ASSIGN",
128 |     "MUL_ASSIGN",
129 |     "DIV_ASSIGN",
130 |     "MOD_ASSIGN",
131 |     "AND_ASSIGN",
132 |     "XOR_ASSIGN",
133 |     "OR_ASSIGN",
134 |     "RIGHT_OP",
135 |     "LEFT_OP",
136 |     "INC_OP",
137 |     "DEC_OP",
138 |     "PTR_OP",
139 |     "AND_OP",
140 |     "OR_OP",
141 |     "LE_OP",
142 |     "GE_OP",
143 |     "EQ_OP",
144 |     "NE_OP",
145 |     'LBRACKET',
146 |     'RBRACKET',
147 |     'LSQUAREBRACKET',
148 |     'RSQUAREBRACKET',
149 |     'LCURLYBRACKET',
150 |     'RCURLYBRACKET',
151 |     'SEMICOLON',
152 |     'PERIOD',
153 |     'COMMA',
154 |     'AND',
155 |     'STAR',
156 |     'PLUS',
157 |     'MINUS',
158 |     'UNOT',
159 |     'NOT',
160 |     'DIVIDE',
161 |     'MOD',
162 |     'LT',
163 |     'GT',
164 |     'XOR',
165 |     'OR',
166 |     'QUESTIONMARK',
167 |     'COLON',
168 |     'ASSIGN',
169 |     "ERRORID",
170 |     "NUMBER_CONSTANT",
171 |     "CHARACTER_CONSTANT",
172 |     "EOF"
173 | )
174 | 
175 | 
176 | def t_STRING_LITERAL(t):
177 |     r'\"(\\.|[^\\\"])*\"'
178 |     value = t.value
179 |     t.value = TreeNode()
180 |     t.value.lineno = t.lexer.lineno
181 |     t.value.append('STRING')
182 |     t.value.append(value)
183 |     return t
184 | 
185 | 
186 | def t_ignore_COMMENT(t):
187 |     r'(/\*(.|\n)*?\*/)|(//.*)|(^\#.*)|(\n\#.*)|(\r\n\#.*)'
188 |     t.lexer.lineno += t.value.count('\n')
189 |     pass
190 | 
191 | 
192 | def t_IDENTIFIER(t):
193 |     r"""[_A-Za-z][_A-Za-z0-9]*"""
194 |     t.type = reserved_dict.get(t.value, 'IDENTIFIER')
195 |     if t.type == 'IDENTIFIER' and is_type(t.value):
196 |         t.type = "TYPE_NAME"
197 |     if t.type == 'IDENTIFIER':
198 |         value = t.value
199 |         t.value = TreeNode()
200 |         t.value.lineno = t.lexer.lineno
201 |         t.value.append('IDENTIFIER')
202 |         t.value.append(value)
203 |     return t
204 | 
205 | 
206 | def t_NUMBER_CONSTANT(t):
207 |     r"""([0-9]*\.[0-9]+|[0-9]+\.)([eE][+\-]?[0-9]+)?[flFL]?|[0-9]+([eE][+\-]?[0-9]+)[flFL]?|[1-9][0-9]*[uU]?[lL]{,2}|0[0-7]*[uU]?[lL]{,2}|0[xX][0-9a-fA-F]+[uU]?[lL]{,2}"""
208 |     val = eval(t.value)
209 |     if isinstance(val, float):
210 |         value = t.value
211 |         t.value = TreeNode()
212 |         t.value.lineno = t.lexer.lineno
213 |         t.value.append('DOUBLE')
214 |         t.value.append(value)
215 |     else:
216 |         value = t.value
217 |         t.value = TreeNode()
218 |         t.value.lineno = t.lexer.lineno
219 |         t.value.append('INTEGER')
220 |         t.value.append(value)
221 |     return t
222 | 
223 | 
224 | def t_CHARACTER_CONSTANT(t):
225 |     r"\'([^\'\\\n]|(\\[\'\"?\\abfnrtv]|[0-7]{1,3}|x[0-9a-fA-F]{1,2}))\'"
226 |     value = t.value
227 |     t.value = TreeNode()
228 |     t.value.lineno = t.lexer.lineno
229 |     t.value.append('INTEGER')
230 |     t.value.append(str(ord(eval(value))))
231 |     return t
232 | 
233 | 
234 | # def t_CONSTANT(t):
235 | # 	r'[1-9][0-9]*[Ee][+-]?[1-9][0-9]*[fFlL]?|[0-9]*\.[0-9]+([Ee][+-]?[0-9]+)?[fFlL]?|[0-9]+\.[0-9]*([Ee][+-]?[0-9]+)?[fFlL]?|0[xX][a-fA-F0-9]+(u|U)?(l|L){,2}|((0|[1-9][0-9]*)(u|U)?(l|L){,2})|\'(\S|\\([abfnrtv\\\'\"0]|[0-7]{3}|x[0-9a-fA-F]{2}))\''
236 | # #	r'0[xX][a-fA-F0-9]+(u|U)?(l|L){1,2}|'
237 | # #	r'0[0-9]+(u|U)?(l|L){1,2}|'
238 | # #	r'[0-9]+(u|U)?(l|L){1,2}|'
239 | # #	r'\'\S|\\([abfnrtv\\\'\"0]|[0-7]{3}|x[0-9a-fA-F]{2})\'|'
240 | # #	r'[0-9]+[Ee][+-]?[0-9]+[fFlL]?|'
241 | # #	r'[0-9]*\.[0-9]+([Ee][+-]?[0-9]+)?[fFlL]?|'
242 | # #	r'[0-9]+\.[0-9]*([Ee][+-]?[0-9]+)?[fFlL]?'
243 | #
244 | # 	return t
245 | 
246 | 
247 | 
248 | def t_ELLIPSIS(t):
249 |     r"\.\.\."
250 |     return t
251 | 
252 | 
253 | def t_RIGHT_ASSIGN(t):
254 |     r">>="
255 |     return t
256 | 
257 | 
258 | def t_LEFT_ASSIGN(t):
259 |     r"<<="
260 |     return t
261 | 
262 | 
263 | def t_ADD_ASSIGN(t):
264 |     r"\+="
265 |     return t
266 | 
267 | 
268 | def t_MUL_ASSIGN(t):
269 |     r"\*="
270 |     return t
271 | 
272 | 
273 | def t_DIV_ASSIGN(t):
274 |     r"/="
275 |     return t
276 | 
277 | 
278 | def t_MOD_ASSIGN(t):
279 |     r"%="
280 |     return t
281 | 
282 | 
283 | def t_AND_ASSIGN(t):
284 |     r"&="
285 |     return t
286 | 
287 | 
288 | def t_XOR_ASSIGN(t):
289 |     r"^="
290 |     return t
291 | 
292 | 
293 | def t_OR_ASSIGN(t):
294 |     r"\|="
295 |     return t
296 | 
297 | 
298 | def t_RIGHT_OP(t):
299 |     r">>"
300 |     return t
301 | 
302 | 
303 | def t_LEFT_OP(t):
304 |     r"<<"
305 |     return t
306 | 
307 | 
308 | def t_INC_OP(t):
309 |     r"\+\+"
310 |     return t
311 | 
312 | 
313 | def t_DEC_OP(t):
314 |     r"--"
315 |     return t
316 | 
317 | 
318 | def t_PTR_OP(t):
319 |     r"->"
320 |     return t
321 | 
322 | 
323 | def t_AND_OP(t):
324 |     r"&&"
325 |     return t
326 | 
327 | 
328 | def t_OR_OP(t):
329 |     r"\|\|"
330 |     return t
331 | 
332 | 
333 | def t_LE_OP(t):
334 |     r"<="
335 |     return t
336 | 
337 | 
338 | def t_GE_OP(t):
339 |     r">="
340 |     return t
341 | 
342 | 
343 | def t_EQ_OP(t):
344 |     r"=="
345 |     return t
346 | 
347 | 
348 | def t_NE_OP(t):
349 |     r"!="
350 |     return t
351 | 
352 | 
353 | def t_LITERAL(t):
354 |     r"[()\[\]{};.,&*+\-~!/%<>\^|?:=]"
355 |     t.type = literal_dict.get(t.value)
356 |     if t.value == '{':
357 |         t.lexer.curlyBalance += 1
358 |     elif t.value == '}':
359 |         t.lexer.curlyBalance -= 1
360 |     return t
361 | 
362 | 
363 | # literals = '()[]{};.,&*+-~!/%<>^|?:='
364 | 
365 | # Define a rule so we can track line numbers
366 | def t_newline(t):
367 |     r'\n'
368 |     t.lexer.lineno += 1 # len(t.value)
369 | 
370 | 
371 | t_ignore = ' \t'
372 | 
373 | 
374 | def t_ERRORID(t):
375 |     r"[^\s;}]+"
376 |     t.value = (t.value, "ERRORID")
377 |     return t
378 | 
379 | 
380 | def t_error(t):
381 |     error_column = find_column(t.lexer.lexdata, t)
382 |     print("Unknown text '%s' at line: %d, column: %d" % (t.value, t.lexer.lineno, error_column))
383 |     lexErrorInfo.append({
384 |         'pos': t.lexer.lexpos,
385 |         'lineno': t.lexer.lineno,
386 |         'column': error_column,
387 |         'value': t.value
388 |     })
389 |     t.lexer.skip(1)
390 | 
391 | 
392 | orig_lexer = lex.lex()
393 | 
394 | 
395 | # pprint(lexer.__dict__)
396 | 
397 | class ProxyLexer(object):
398 |     def __init__(self, lexer, eoftoken):
399 |         self.end = False
400 |         self.lexer = lexer
401 |         self.eof = eoftoken
402 | 
403 |     def token(self):
404 |         tok = self.lexer.token()
405 |         if tok is None:
406 |             if self.end:
407 |                 self.end = False
408 |             else:
409 |                 self.end = True
410 |                 tok = lex.LexToken()
411 |                 tok.type = self.eof
412 |                 tok.value = None
413 |                 tok.lexpos = self.lexer.lexpos
414 |                 tok.lineno = self.lexer.lineno
415 |         # print ('custom', tok)
416 |         return tok
417 | 
418 |     def __getattr__(self, name):
419 |         return getattr(self.lexer, name)
420 | 
421 | 
422 | lexer = ProxyLexer(orig_lexer, 'EOF')
423 | lexer.lexer.curlyBalance = 0
424 | 
425 | 
426 | def test_lex():
427 |     # data = raw_input()
428 | 
429 |     #	c_file_name = raw_input('c file name: ')
430 |     c_file_name = "test1.c"
431 |     c_file = open(c_file_name, "r")
432 |     contents = "".join(c_file.readlines())
433 | 
434 |     lexer.input(contents)
435 | 
436 |     while True:
437 |         tok = lexer.token()
438 |         if not tok:
439 |             break
440 |         print tok  # .value, find_column(lexer.lexdata, tok)
441 | 
442 | # test_lex()
443 | 


--------------------------------------------------------------------------------
/yyparse/ZCCparser.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/python
   2 | from __future__ import print_function
   3 | import ply.lex as lex
   4 | import ply.yacc as yacc
   5 | import ZCClex
   6 | from symbol.symtab import symtab_declaration, symtab_function_definition
   7 | from public.ZCCglobal import global_context, TreeNode
   8 | from ZCClex import tokens
   9 | from pprint import pprint
  10 | 
  11 | aTuple = (1, 2)
  12 | 
  13 | 
  14 | def handleMissingSEMI(p, parentname="", checkPair=()):
  15 |     last_idx = len(p) - 1
  16 |     if (len(checkPair) == 0 or (len(checkPair) > 0 and p[
  17 |             checkPair[0]] == checkPair[1])) and p[last_idx] != ';':
  18 |         print(
  19 |             "Error type: missing semicolon before %s. at line: %d, lex pos: %d in %s.\n" %
  20 |             (p[last_idx].value, p.lineno(last_idx), p.lexpos(last_idx), parentname))
  21 |         p[last_idx] = ';'
  22 |         parser.errorCounter = 0
  23 |         parser.errok()
  24 |         return [last_idx]
  25 |     else:
  26 |         return []
  27 | 
  28 | 
  29 | def handleMissingRCURLYBRACKET(p):
  30 |     last_idx = len(p) - 1
  31 |     if p[last_idx] != '}':
  32 |         print(
  33 |             "Error type: missing right curly bracket before %s. at line: %d, lex pos: %d.\n" %
  34 |             (p[last_idx], p.lineno(last_idx), p.lexpos(last_idx)))
  35 |         p[last_idx] = '}'
  36 |         parser.errorCounter = 0
  37 |         parser.errok()
  38 | 
  39 | 
  40 | def handleErrorID(p, idx):
  41 |     if len(p) > idx and isinstance(p[idx],
  42 |                                    type(aTuple)) and p[idx][1] == "ERRORID":
  43 |         print("Syntax error at %r, at line: %d, lex pos: %d." %
  44 |               (p[idx][0], p.lineno(idx), p.lexpos(idx)))
  45 |         print("Error type: wrong IDENTIFIER format.\n")
  46 |         p[idx] = p[idx][0]
  47 |         parser.errorCounter = 0
  48 | 
  49 | 
  50 | def construct_node(p, parent_name, del_list=[]):
  51 |     p[0] = TreeNode()
  52 |     p[0].append(parent_name)
  53 |     p[0].lineno = p.lineno(len(p) - 1)
  54 |     #    print("%s's del_list: " % (parent_name))
  55 |     #    print(del_list)
  56 |     for i in range(1, len(p)):
  57 |         if i not in del_list:
  58 |             p[0].append(p[i])
  59 |             if not isinstance(p[i], str):
  60 |                 if p[0].lineno == 0:
  61 |                     p[0].lineno = p[i].lineno
  62 |                 elif p[0].lineno > p[i].lineno:
  63 |                     p[0].lineno = p[i].lineno
  64 | 
  65 | 
  66 | def p_outer_translation_unit(p):
  67 |     """
  68 |     outer_translation_unit : translation_unit EOF
  69 |     """
  70 |     p[0] = p[1]
  71 |     # construct_node(p, "outer_translation_unit")
  72 | 
  73 | 
  74 | def p_translation_unit(p):
  75 |     """
  76 |     translation_unit : external_declaration
  77 |     | translation_unit external_declaration
  78 |     """
  79 |     if len(p) == 2:
  80 |         construct_node(p, "translation_unit")
  81 |     elif len(p) == 3:
  82 |         # printAST(p[1])
  83 |         p[1].append(p[2])
  84 |         p[0] = p[1]
  85 |     else:
  86 |         raise Exception("translation_unit just has two children")
  87 | 
  88 | 
  89 | def p_external_declaration(p):
  90 |     """
  91 |     external_declaration : function_definition
  92 |     | declaration
  93 |     """
  94 |     p[0] = p[1]
  95 |     if p[0][0] == 'declaration':
  96 |         symtab_declaration(p[0], global_context)
  97 |     elif p[0][0] == 'function_definition':
  98 |         symtab_function_definition(p[0], global_context)
  99 |     # construct_node(p, "external_declaration")
 100 | 
 101 | 
 102 | def p_declaration(p):
 103 |     """
 104 |     declaration : declaration_specifiers SEMICOLON
 105 |     | declaration_specifiers init_declarator_list SEMICOLON
 106 |     | declaration_specifiers error
 107 |     | declaration_specifiers init_declarator_list error
 108 |     """
 109 |     del_list = handleMissingSEMI(p, "declaration")
 110 |     construct_node(p, "declaration", del_list)
 111 | 
 112 | 
 113 | #    print(p[0])
 114 | 
 115 | # def p_constant(p):
 116 | #     """
 117 | #     constant : NUMBER_CONSTANT
 118 | #     | CHARACTER_CONSTANT
 119 | #     """
 120 | #     construct_node(p, "constant")
 121 | 
 122 | 
 123 | def p_declaration_specifiers(p):
 124 |     """
 125 |     declaration_specifiers : type_specifier
 126 |     | type_specifier type_qualifier
 127 |     | type_qualifier type_specifier
 128 |     | storage_class_specifier type_specifier
 129 |     | storage_class_specifier type_specifier type_qualifier
 130 |     | storage_class_specifier type_qualifier type_specifier
 131 |     """
 132 |     # """
 133 |     # declaration_specifiers : storage_class_specifier
 134 |     # | storage_class_specifier declaration_specifiers
 135 |     # | type_specifier
 136 |     # | type_specifier declaration_specifiers
 137 |     # | type_qualifier
 138 |     # | type_qualifier declaration_specifiers
 139 |     # """
 140 |     construct_node(p, "declaration_specifiers")
 141 |     # printAST(p[0], 0)
 142 |     # pass
 143 | 
 144 | 
 145 | def p_primary_expression(p):
 146 |     """
 147 |     primary_expression : IDENTIFIER
 148 |         | ERRORID
 149 |         | NUMBER_CONSTANT
 150 |         | CHARACTER_CONSTANT
 151 |         | STRING_LITERAL
 152 |         | LBRACKET expression RBRACKET
 153 |     """
 154 |     handleErrorID(p, 1)
 155 |     if len(p) == 4:
 156 |         p[0] = p[2]
 157 |     else:
 158 |         construct_node(p, "primary_expression")
 159 | 
 160 | 
 161 | def p_postfix_expression(p):
 162 |     """
 163 |     postfix_expression : primary_expression
 164 |         | postfix_expression LSQUAREBRACKET expression RSQUAREBRACKET
 165 |         | postfix_expression LBRACKET RBRACKET
 166 |         | postfix_expression LBRACKET argument_expression_list RBRACKET
 167 |         | postfix_expression PERIOD IDENTIFIER
 168 |         | postfix_expression PTR_OP IDENTIFIER
 169 |         | postfix_expression PERIOD ERRORID
 170 |         | postfix_expression PTR_OP ERRORID
 171 |         | postfix_expression INC_OP
 172 |         | postfix_expression DEC_OP
 173 |     """
 174 |     handleErrorID(p, 3)
 175 |     if len(p) == 2:
 176 |         p[0] = p[1]
 177 |     else:
 178 |         construct_node(p, "postfix_expression")
 179 | 
 180 | 
 181 | def p_argument_expression_list(p):
 182 |     """
 183 |     argument_expression_list : assignment_expression
 184 |         | argument_expression_list COMMA assignment_expression
 185 |     """
 186 |     if len(p) == 2:
 187 |         construct_node(p, "argument_expression_list")
 188 |     else:
 189 |         p[1].append(p[2])
 190 |         p[1].append(p[3])
 191 |         p[0] = p[1]
 192 | 
 193 | 
 194 | def p_unary_expression(p):
 195 |     """
 196 | unary_expression : postfix_expression
 197 |     | INC_OP unary_expression
 198 |     | DEC_OP unary_expression
 199 |     | unary_operator cast_expression
 200 |     | SIZEOF unary_expression
 201 |     | SIZEOF LBRACKET type_name RBRACKET
 202 |     """
 203 |     if len(p) == 2:
 204 |         p[0] = p[1]
 205 |     else:
 206 |         construct_node(p, "unary_expression")
 207 | 
 208 | 
 209 | def p_unary_operator(p):
 210 |     """
 211 | unary_operator : AND
 212 |     | STAR
 213 |     | PLUS
 214 |     | MINUS
 215 |     | UNOT
 216 |     | NOT
 217 |     """
 218 |     construct_node(p, "unary_operator")
 219 | 
 220 | 
 221 | def p_cast_expression(p):
 222 |     """
 223 | cast_expression : unary_expression
 224 |     | LBRACKET type_name RBRACKET cast_expression
 225 |     """
 226 |     if len(p) == 2:
 227 |         p[0] = p[1]
 228 |     else:
 229 |         construct_node(p, "cast_expression")
 230 |         # printAST(p[0], 0)
 231 | 
 232 | 
 233 | def p_multiplicative_expression(p):
 234 |     """
 235 | multiplicative_expression : cast_expression
 236 |     | multiplicative_expression STAR cast_expression
 237 |     | multiplicative_expression DIVIDE cast_expression
 238 |     | multiplicative_expression MOD cast_expression
 239 |     | multiplicative_expression STAR error cast_expression
 240 |     | multiplicative_expression DIVIDE error cast_expression
 241 |     | multiplicative_expression MOD error cast_expression
 242 |     """
 243 |     del_list = []
 244 |     if len(p) == 5:
 245 |         print(
 246 |             "Error type: error token after %s. at line: %d.\n" %
 247 |             (p[2], p.lineno(2)))
 248 |         del_list.append(3)
 249 |         parser.errorCounter = 0
 250 |     if len(p) == 2:
 251 |         p[0] = p[1]
 252 |     else:
 253 |         construct_node(p, "multiplicative_expression", del_list)
 254 | 
 255 | 
 256 | def p_additive_expression(p):
 257 |     """
 258 | additive_expression : multiplicative_expression
 259 |     | additive_expression PLUS multiplicative_expression
 260 |     | additive_expression MINUS multiplicative_expression
 261 |     | additive_expression PLUS error multiplicative_expression
 262 |     | additive_expression MINUS error multiplicative_expression
 263 |     """
 264 |     del_list = []
 265 |     if len(p) == 5:
 266 |         print(
 267 |             "Error type: error token after %s. at line: %d.\n" %
 268 |             (p[2], p.lineno(2)))
 269 |         del_list.append(3)
 270 |         parser.errorCounter = 0
 271 |     if len(p) == 2:
 272 |         p[0] = p[1]
 273 |     else:
 274 |         construct_node(p, "additive_expression", del_list)
 275 | 
 276 | 
 277 | def p_shift_expression(p):
 278 |     """
 279 | shift_expression : additive_expression
 280 |     | shift_expression LEFT_OP additive_expression
 281 |     | shift_expression RIGHT_OP additive_expression
 282 |     | shift_expression LEFT_OP error additive_expression
 283 |     | shift_expression RIGHT_OP error additive_expression
 284 |     """
 285 |     del_list = []
 286 |     if len(p) == 5:
 287 |         print(
 288 |             "Error type: error token after %s. at line: %d.\n" %
 289 |             (p[2], p.lineno(2)))
 290 |         del_list.append(3)
 291 |         parser.errorCounter = 0
 292 |     if len(p) == 2:
 293 |         p[0] = p[1]
 294 |     else:
 295 |         construct_node(p, "shift_expression", del_list)
 296 | 
 297 | 
 298 | def p_relational_expression(p):
 299 |     """
 300 | relational_expression : shift_expression
 301 |     | relational_expression LT shift_expression
 302 |     | relational_expression GT shift_expression
 303 |     | relational_expression LE_OP shift_expression
 304 |     | relational_expression GE_OP shift_expression
 305 |     | relational_expression LT error shift_expression
 306 |     | relational_expression GT error shift_expression
 307 |     | relational_expression LE_OP error shift_expression
 308 |     | relational_expression GE_OP error shift_expression
 309 |     """
 310 |     del_list = []
 311 |     if len(p) == 5:
 312 |         print(
 313 |             "Error type: error token after %s. at line: %d.\n" %
 314 |             (p[2], p.lineno(2)))
 315 |         del_list.append(3)
 316 |         parser.errorCounter = 0
 317 | 
 318 |     if len(p) == 2:
 319 |         p[0] = p[1]
 320 |     else:
 321 |         construct_node(p, "relational_expression", del_list)
 322 | 
 323 | 
 324 | def p_equality_expression(p):
 325 |     """
 326 | equality_expression : relational_expression
 327 |     | equality_expression EQ_OP relational_expression
 328 |     | equality_expression NE_OP relational_expression
 329 |     | equality_expression EQ_OP error relational_expression
 330 |     | equality_expression NE_OP error relational_expression
 331 |     """
 332 |     del_list = []
 333 |     if len(p) == 5:
 334 |         print(
 335 |             "Error type: error token after %s. at line: %d.\n" %
 336 |             (p[2], p.lineno(2)))
 337 |         del_list.append(3)
 338 |         parser.errorCounter = 0
 339 |     if len(p) == 2:
 340 |         p[0] = p[1]
 341 |     else:
 342 |         construct_node(p, "equality_expression", del_list)
 343 | 
 344 | 
 345 | def p_and_expression(p):
 346 |     """
 347 | and_expression : equality_expression
 348 |     | and_expression AND equality_expression
 349 |     | and_expression AND error equality_expression
 350 |     """
 351 |     del_list = []
 352 |     if len(p) == 5:
 353 |         print(
 354 |             "Error type: error token after %s. at line: %d.\n" %
 355 |             (p[2], p.lineno(2)))
 356 |         del_list.append(3)
 357 |         parser.errorCounter = 0
 358 | 
 359 |     if len(p) == 2:
 360 |         p[0] = p[1]
 361 |     else:
 362 |         construct_node(p, "and_expression", del_list)
 363 | 
 364 | 
 365 | def p_exclusive_or_expression(p):
 366 |     """
 367 | exclusive_or_expression : and_expression
 368 |     | exclusive_or_expression XOR and_expression
 369 |     | exclusive_or_expression XOR error and_expression
 370 |     """
 371 |     del_list = []
 372 |     if len(p) == 5:
 373 |         print(
 374 |             "Error type: error token after %s. at line: %d.\n" %
 375 |             (p[2], p.lineno(2)))
 376 |         del_list.append(3)
 377 |         parser.errorCounter = 0
 378 | 
 379 |     if len(p) == 2:
 380 |         p[0] = p[1]
 381 |     else:
 382 |         construct_node(p, "exclusive_or_expression", del_list)
 383 | 
 384 | 
 385 | def p_inclusive_or_expression(p):
 386 |     """
 387 | inclusive_or_expression : exclusive_or_expression
 388 |     | inclusive_or_expression OR exclusive_or_expression
 389 |     | inclusive_or_expression OR error exclusive_or_expression
 390 |     """
 391 |     del_list = []
 392 |     if len(p) == 5:
 393 |         print(
 394 |             "Error type: error token after %s. at line: %d.\n" %
 395 |             (p[2], p.lineno(2)))
 396 |         del_list.append(3)
 397 |         parser.errorCounter = 0
 398 | 
 399 |     if len(p) == 2:
 400 |         p[0] = p[1]
 401 |     else:
 402 |         construct_node(p, "inclusive_or_expression", del_list)
 403 | 
 404 | 
 405 | def p_logical_and_expression(p):
 406 |     """
 407 | logical_and_expression : inclusive_or_expression
 408 |     | logical_and_expression AND_OP inclusive_or_expression
 409 |     | logical_and_expression AND_OP error inclusive_or_expression
 410 |     """
 411 |     del_list = []
 412 |     if len(p) == 5:
 413 |         print(
 414 |             "Error type: error token after %s. at line: %d.\n" %
 415 |             (p[2], p.lineno(2)))
 416 |         del_list.append(3)
 417 |         parser.errorCounter = 0
 418 | 
 419 |     if len(p) == 2:
 420 |         p[0] = p[1]
 421 |     else:
 422 |         construct_node(p, "logical_and_expression", del_list)
 423 | 
 424 | 
 425 | def p_logical_or_expression(p):
 426 |     """
 427 | logical_or_expression : logical_and_expression
 428 |     | logical_or_expression OR_OP logical_and_expression
 429 |     | logical_or_expression OR_OP error logical_and_expression
 430 |     """
 431 |     del_list = []
 432 |     if len(p) == 5:
 433 |         print(
 434 |             "Error type: error token after %s. at line: %d.\n" %
 435 |             (p[2], p.lineno(2)))
 436 |         del_list.append(3)
 437 |         parser.errorCounter = 0
 438 | 
 439 |     if len(p) == 2:
 440 |         p[0] = p[1]
 441 |     else:
 442 |         construct_node(p, "logical_or_expression", del_list)
 443 | 
 444 | 
 445 | def p_conditional_expression(p):
 446 |     """
 447 | conditional_expression : logical_or_expression
 448 |     | logical_or_expression QUESTIONMARK expression COLON conditional_expression
 449 |     """
 450 |     if len(p) == 2:
 451 |         p[0] = p[1]
 452 |     else:
 453 |         construct_node(p, "conditional_expression")
 454 | 
 455 | 
 456 | def p_assignment_expression(p):
 457 |     """
 458 | assignment_expression : conditional_expression
 459 |     | unary_expression assignment_operator assignment_expression
 460 |     """
 461 |     if len(p) == 2:
 462 |         p[0] = p[1]
 463 |     else:
 464 |         construct_node(p, "assignment_expression")
 465 | 
 466 | 
 467 | def p_assignment_operator(p):
 468 |     """
 469 | assignment_operator : ASSIGN
 470 |     | MUL_ASSIGN
 471 |     | DIV_ASSIGN
 472 |     | MOD_ASSIGN
 473 |     | ADD_ASSIGN
 474 |     | SUB_ASSIGN
 475 |     | LEFT_ASSIGN
 476 |     | RIGHT_ASSIGN
 477 |     | AND_ASSIGN
 478 |     | XOR_ASSIGN
 479 |     | OR_ASSIGN
 480 |     """
 481 |     construct_node(p, "assignment_operator")
 482 | 
 483 | 
 484 | def p_expression(p):
 485 |     """
 486 | expression : assignment_expression
 487 |     | expression COMMA assignment_expression
 488 |     """
 489 |     if len(p) == 2:
 490 |         p[0] = p[1]
 491 |     else:
 492 |         construct_node(p, "expression")
 493 |         # if len(p) == 2:
 494 |         #     construct_node(p, "expression")
 495 |         # elif len(p) == 4:
 496 |         #     # printAST(p[1])
 497 |         #     p[1].append(p[3])
 498 |         #     p[0] = p[1]
 499 |         # else:
 500 |         #     raise Exception("expression just has 2 or 4 children")
 501 | 
 502 | 
 503 | def p_constant_expression(p):
 504 |     """
 505 | constant_expression : conditional_expression
 506 |     """
 507 |     construct_node(p, "constant_expression")
 508 | 
 509 | 
 510 | def p_init_declarator_list(p):
 511 |     """
 512 | init_declarator_list : init_declarator
 513 |     | init_declarator_list COMMA init_declarator
 514 |     """
 515 |     if len(p) == 2:
 516 |         construct_node(p, "init_declarator_list")
 517 |     else:
 518 |         p[1].append(p[2])
 519 |         p[1].append(p[3])
 520 |         p[0] = p[1]
 521 | 
 522 | 
 523 | def p_init_declarator(p):
 524 |     """
 525 | init_declarator : declarator
 526 |     | declarator ASSIGN initializer
 527 |     """
 528 |     construct_node(p, "init_declarator")
 529 | 
 530 | 
 531 | def p_storage_class_specifier(p):
 532 |     """
 533 | storage_class_specifier : TYPEDEF
 534 |     | EXTERN
 535 |     | STATIC
 536 |     """
 537 |     construct_node(p, "storage_class_specifier")
 538 | 
 539 | 
 540 | def p_integer_type(p):
 541 |     """
 542 |     integer_type : CHAR
 543 |     | SHORT
 544 |     | INT
 545 |     | LONG
 546 |     | UNSIGNED integer_type
 547 |     | SIGNED integer_type
 548 |     | SHORT integer_type
 549 |     | LONG integer_type
 550 |     """
 551 |     if len(p) == 2:
 552 |         construct_node(p, "integer_type")
 553 |     else:
 554 |         p[2].insert(1, p[1])
 555 |         p[0] = p[2]
 556 |         # print(p[0])
 557 | 
 558 | 
 559 | def p_type_specifier(p):
 560 |     """type_specifier : VOID
 561 |     | integer_type
 562 |     | FLOAT
 563 |     | DOUBLE
 564 |     | struct_or_union_specifier
 565 |     | enum_specifier
 566 |     | TYPE_NAME
 567 |     """
 568 |     #    | TYPE_NAME
 569 |     construct_node(p, "type_specifier")
 570 | 
 571 | 
 572 | def p_struct_or_union_specifier(p):
 573 |     """
 574 | struct_or_union_specifier : struct_or_union IDENTIFIER LCURLYBRACKET struct_declaration_list RCURLYBRACKET
 575 |     | struct_or_union TYPE_NAME LCURLYBRACKET struct_declaration_list RCURLYBRACKET
 576 |     | struct_or_union ERRORID LCURLYBRACKET struct_declaration_list RCURLYBRACKET
 577 |     | struct_or_union LCURLYBRACKET struct_declaration_list RCURLYBRACKET
 578 |     | struct_or_union IDENTIFIER
 579 |     | struct_or_union TYPE_NAME
 580 |     | struct_or_union ERRORID
 581 |     """
 582 |     handleErrorID(p, 2)
 583 |     construct_node(p, "struct_or_union_specifier")
 584 | 
 585 | 
 586 | def p_struct_or_union(p):
 587 |     """
 588 | struct_or_union : STRUCT
 589 |     | UNION
 590 |     """
 591 |     construct_node(p, "struct_or_union")
 592 | 
 593 | 
 594 | def p_struct_declaration_list(p):
 595 |     """struct_declaration_list : struct_declaration
 596 |     | struct_declaration_list struct_declaration
 597 |     """
 598 |     if len(p) == 2:
 599 |         construct_node(p, "struct_declaration_list")
 600 |     elif len(p) == 3:
 601 |         p[1].append(p[2])
 602 |         p[0] = p[1]
 603 | 
 604 | 
 605 | def p_struct_declaration(p):
 606 |     """struct_declaration : specifier_qualifier_list struct_declarator_list SEMICOLON
 607 |     | specifier_qualifier_list struct_declarator_list error
 608 |     """
 609 |     del_list = []
 610 |     last_idx = len(p) - 1
 611 |     if p[last_idx] != ';':
 612 |         print("struct_declaration")
 613 |         del_list.append(last_idx)
 614 |         parser.errorCounter = 0
 615 |     construct_node(p, "struct_declaration", del_list)
 616 | 
 617 | 
 618 | #    print(p[0])
 619 | 
 620 | 
 621 | def p_specifier_qualifier_list(p):
 622 |     """
 623 | specifier_qualifier_list : type_specifier
 624 |     | type_specifier type_qualifier
 625 |     | type_qualifier type_specifier
 626 |     """
 627 |     construct_node(p, "specifier_qualifier_list")
 628 | 
 629 | 
 630 | def p_struct_declarator_list(p):
 631 |     """
 632 | struct_declarator_list : declarator
 633 |     | struct_declarator_list COMMA declarator
 634 |     """
 635 |     if len(p) == 2:
 636 |         construct_node(p, "struct_declarator_list")
 637 |     else:
 638 |         p[1].append(p[2])
 639 |         p[1].append(p[3])
 640 |         p[0] = p[1]
 641 | 
 642 | 
 643 | # def p_struct_declarator(p):
 644 | #     """
 645 | # struct_declarator : declarator
 646 | #     | COLON constant_expression
 647 | #     | declarator COLON constant_expression
 648 | #     """
 649 | #     construct_node(p, "struct_declarator")
 650 | 
 651 | 
 652 | def p_enum_specifier(p):
 653 |     """
 654 | enum_specifier : ENUM LCURLYBRACKET enumerator_list RCURLYBRACKET
 655 |     | ENUM IDENTIFIER LCURLYBRACKET enumerator_list RCURLYBRACKET
 656 |     | ENUM IDENTIFIER
 657 |     | ENUM ERRORID LCURLYBRACKET enumerator_list RCURLYBRACKET
 658 |     | ENUM ERRORID
 659 |     """
 660 |     handleErrorID(p, 2)
 661 |     construct_node(p, "enum_specifier")
 662 | 
 663 | 
 664 | def p_enumerator_list(p):
 665 |     """
 666 | enumerator_list : enumerator
 667 |     | enumerator_list COMMA enumerator
 668 |     """
 669 |     if len(p) == 2:
 670 |         construct_node(p, "enumerator_list")
 671 |     else:
 672 |         p[1].append(p[2])
 673 |         p[1].append(p[3])
 674 |         p[0] = p[1]
 675 | 
 676 | 
 677 | def p_enumerator(p):
 678 |     """
 679 | enumerator : IDENTIFIER
 680 |     | IDENTIFIER ASSIGN constant_expression
 681 |     | ERRORID
 682 |     | ERRORID ASSIGN constant_expression
 683 |     """
 684 |     handleErrorID(p, 1)
 685 |     construct_node(p, "enumerator")
 686 | 
 687 | 
 688 | def p_type_qualifier(p):
 689 |     """
 690 | type_qualifier : CONST
 691 |     """
 692 |     construct_node(p, "type_qualifier")
 693 | 
 694 | 
 695 | def p_declarator(p):
 696 |     """
 697 | declarator : pointer direct_declarator
 698 |     | direct_declarator
 699 |     """
 700 |     construct_node(p, "declarator")
 701 | 
 702 | 
 703 | def p_direct_declarator(p):
 704 |     """
 705 | direct_declarator : direct_declarator LSQUAREBRACKET constant_expression RSQUAREBRACKET
 706 |     | direct_declarator LSQUAREBRACKET RSQUAREBRACKET
 707 |     | direct_declarator LBRACKET parameter_type_list RBRACKET
 708 |     | direct_declarator LBRACKET RBRACKET
 709 |     | IDENTIFIER
 710 |     | LBRACKET declarator RBRACKET
 711 |     | ERRORID
 712 |     """
 713 |     handleErrorID(p, 1)
 714 |     construct_node(p, "direct_declarator")
 715 | 
 716 | 
 717 | def p_pointer(p):
 718 |     """
 719 | pointer : STAR
 720 |     | STAR CONST
 721 |     | pointer STAR
 722 |     | pointer STAR CONST
 723 |     """
 724 |     if p[1][0] != 'pointer':
 725 |         construct_node(p, "pointer")
 726 |     else:
 727 |         p[1].append(p[2])
 728 |         if len(p) == 4:
 729 |             p[1].append(p[3])
 730 |         p[0] = p[1]
 731 | 
 732 | 
 733 | def p_type_qualifier_list(p):
 734 |     """
 735 | type_qualifier_list : type_qualifier
 736 |     | type_qualifier_list type_qualifier
 737 |     """
 738 |     construct_node(p, "type_qualifier_list")
 739 | 
 740 | 
 741 | def p_parameter_type_list(p):
 742 |     """
 743 | parameter_type_list : parameter_list
 744 |     | parameter_list COMMA ELLIPSIS
 745 |     """
 746 |     construct_node(p, "parameter_type_list")
 747 | 
 748 | 
 749 | def p_parameter_list(p):
 750 |     """
 751 | parameter_list : parameter_declaration
 752 |     | parameter_list COMMA parameter_declaration
 753 |     """
 754 |     if len(p) == 2:
 755 |         construct_node(p, "parameter_list")
 756 |     else:
 757 |         p[1].append(p[2])
 758 |         p[1].append(p[3])
 759 |         p[0] = p[1]
 760 | 
 761 | 
 762 | def p_parameter_declaration(p):
 763 |     """
 764 | parameter_declaration : declaration_specifiers declarator
 765 |     | declaration_specifiers abstract_declarator
 766 |     | declaration_specifiers
 767 |     """
 768 |     construct_node(p, "parameter_declaration")
 769 | 
 770 | 
 771 | def p_type_name(p):
 772 |     """
 773 | type_name : specifier_qualifier_list
 774 |     | specifier_qualifier_list abstract_declarator
 775 |     """
 776 |     construct_node(p, "type_name")
 777 | 
 778 | 
 779 | def p_abstract_declarator(p):
 780 |     """
 781 | abstract_declarator : pointer
 782 |     | direct_abstract_declarator
 783 |     | pointer direct_abstract_declarator
 784 |     """
 785 |     construct_node(p, "abstract_declarator")
 786 | 
 787 | 
 788 | def p_direct_abstract_declarator(p):
 789 |     """
 790 | direct_abstract_declarator : LBRACKET abstract_declarator RBRACKET
 791 |     | LSQUAREBRACKET RSQUAREBRACKET
 792 |     | LSQUAREBRACKET constant_expression RSQUAREBRACKET
 793 |     | direct_abstract_declarator LSQUAREBRACKET RSQUAREBRACKET
 794 |     | direct_abstract_declarator LSQUAREBRACKET constant_expression RSQUAREBRACKET
 795 |     | LBRACKET RBRACKET
 796 |     | LBRACKET parameter_type_list RBRACKET
 797 |     | direct_abstract_declarator LBRACKET RBRACKET
 798 |     | direct_abstract_declarator LBRACKET parameter_type_list RBRACKET
 799 |     """
 800 |     construct_node(p, "direct_abstract_declarator")
 801 | 
 802 | 
 803 | def p_initializer(p):
 804 |     """
 805 | initializer : assignment_expression
 806 |     | LCURLYBRACKET initializer_list RCURLYBRACKET
 807 |     | LCURLYBRACKET initializer_list COMMA RCURLYBRACKET
 808 |     """
 809 |     construct_node(p, "initializer")
 810 | 
 811 | 
 812 | def p_initiazer_list(p):
 813 |     """
 814 | initializer_list : initializer
 815 |     | initializer_list COMMA initializer
 816 |     """
 817 |     if len(p) == 2:
 818 |         construct_node(p, "initializer_list")
 819 |     else:
 820 |         p[1].append(p[2])
 821 |         p[1].append(p[3])
 822 |         p[0] = p[1]
 823 | 
 824 | 
 825 | def p_statement(p):
 826 |     """
 827 | statement : labeled_statement
 828 |     | compound_statement
 829 |     | expression_statement
 830 |     | selection_statement
 831 |     | iteration_statement
 832 |     | jump_statement
 833 |     """
 834 |     construct_node(p, "statement")
 835 | 
 836 | 
 837 | def p_labeled_statement(p):
 838 |     """
 839 | labeled_statement : CASE constant_expression COLON statement
 840 |     | DEFAULT COLON statement
 841 |     """
 842 |     #    | IDENTIFIER COLON statement
 843 |     #    | ERRORID COLON statement
 844 |     #    handleErrorID(p, 1)
 845 |     construct_node(p, "labeled_statement")
 846 | 
 847 | 
 848 | def p_compound_statement(p):
 849 |     """
 850 | compound_statement : LCURLYBRACKET RCURLYBRACKET
 851 |     | LCURLYBRACKET statement_list RCURLYBRACKET
 852 |     | LCURLYBRACKET declaration_list RCURLYBRACKET
 853 |     | LCURLYBRACKET declaration_list statement_list RCURLYBRACKET
 854 |     | LCURLYBRACKET error
 855 |     | LCURLYBRACKET statement_list error
 856 |     | LCURLYBRACKET declaration_list error
 857 |     | LCURLYBRACKET declaration_list statement_list error
 858 |     """
 859 |     handleMissingRCURLYBRACKET(p)
 860 |     construct_node(p, "compound_statement")
 861 | 
 862 | 
 863 | def p_declaration_list(p):
 864 |     """
 865 | declaration_list : declaration
 866 |     | declaration_list declaration
 867 |     """
 868 |     if len(p) == 2:
 869 |         construct_node(p, "declaration_list")
 870 |     elif len(p) == 3:
 871 |         p[1].append(p[2])
 872 |         p[0] = p[1]
 873 | 
 874 | 
 875 | def p_statement_list(p):
 876 |     """
 877 | statement_list : statement
 878 |     | statement_list statement
 879 |     """
 880 |     if len(p) == 2:
 881 |         construct_node(p, "statement_list")
 882 |     if len(p) == 3:
 883 |         p[1].append(p[2])
 884 |         p[0] = p[1]
 885 | 
 886 | 
 887 | def p_expression_statement(p):
 888 |     """
 889 | expression_statement : SEMICOLON
 890 |     | expression SEMICOLON
 891 |     | expression error
 892 |     """
 893 | 
 894 |     #    del_list = []
 895 |     #    last_idx = len(p) - 1
 896 |     #    if p[last_idx] != ';':
 897 |     #        print("expression_statement")
 898 |     #        print("Error type: Missing semicolon before %s. at line: %d, lex pos: %d.\n" % (p[last_idx], p.lineno(last_idx), p.lexpos(last_idx)))
 899 |     #        del_list.append(last_idx)
 900 |     #        parser.errorCounter = 0
 901 |     del_list = handleMissingSEMI(p, "expression_statement")
 902 |     construct_node(p, "expression_statement", del_list)
 903 | 
 904 | 
 905 | def p_selection_statement(p):
 906 |     """
 907 | selection_statement : IF LBRACKET expression RBRACKET statement
 908 |     | IF LBRACKET expression RBRACKET statement ELSE statement
 909 |     | SWITCH LBRACKET expression RBRACKET statement
 910 |     """
 911 |     construct_node(p, "selection_statement")
 912 | 
 913 | 
 914 | def p_iteration_statement(p):
 915 |     """
 916 | iteration_statement : WHILE LBRACKET expression RBRACKET statement
 917 |     | DO statement WHILE LBRACKET expression RBRACKET SEMICOLON
 918 |     | DO statement WHILE LBRACKET expression RBRACKET error
 919 |     | FOR LBRACKET expression_statement expression_statement RBRACKET statement
 920 |     | FOR LBRACKET expression_statement expression_statement expression RBRACKET statement
 921 |     """
 922 |     #    del_list = []
 923 |     #    last_idx = len(p) - 1
 924 |     #    if p[1] == 'do' and p[last_idx] != ';':
 925 |     #        print("iteration statement")
 926 |     #        print("Error type: Missing semicolon before %s. at line: %d, lex pos: %d.\n" % (p[last_idx], p.lineno(last_idx), p.lexpos(last_idx)))
 927 |     #        del_list.append(last_idx)
 928 |     #        parser.errorCounter = 0
 929 |     del_list = handleMissingSEMI(p, "iteration_statement", (1, 'do'))
 930 |     construct_node(p, "iteration_statement", del_list)
 931 | 
 932 | 
 933 | #    print(p[0])
 934 | 
 935 | def p_jump_statement(p):
 936 |     """
 937 | jump_statement : CONTINUE SEMICOLON
 938 |     | BREAK SEMICOLON
 939 |     | RETURN SEMICOLON
 940 |     | RETURN expression SEMICOLON
 941 |     | CONTINUE error
 942 |     | BREAK error
 943 |     | RETURN error
 944 |     | RETURN expression error
 945 |     """
 946 |     #    del_list = []
 947 |     #    last_idx = len(p) - 1
 948 |     #    if p[last_idx] != ';':
 949 |     #        print("jump statement")
 950 |     #        print("Error type: Missing semicolon before %s. at line: %d, lex pos: %d.\n" % (p[last_idx], p.lineno(last_idx), p.lexpos(last_idx)))
 951 |     #        del_list.append(last_idx)
 952 |     #        parser.errorCounter = 0
 953 |     del_list = handleMissingSEMI(p, "jump_statement")
 954 |     construct_node(p, "jump_statement", del_list)
 955 | 
 956 | 
 957 | #    print(p[0])
 958 | 
 959 | def p_function_definition(p):
 960 |     """
 961 | function_definition : declaration_specifiers declarator compound_statement
 962 |     """
 963 |     construct_node(p, "function_definition")
 964 | 
 965 | 
 966 | def p_error(p):
 967 |     if not p:
 968 |         print("End of file.")
 969 |         return
 970 | 
 971 |     if p.type == 'EOF':
 972 |         if ZCClex.lexer.lexer.curlyBalance > 0:
 973 |             parser.errok()
 974 |             return lex.LexToken(
 975 |                 'RCURCLYBRACKET',
 976 |                 '}',
 977 |                 p.lexer.lineno,
 978 |                 p.lexer.lexpos)
 979 |         else:
 980 |             return
 981 | 
 982 |     print("Syntax error at %r, at line: %d, column: %d." % (
 983 |         p.value, p.lexer.lineno, ZCClex.find_column(p.lexer.lexdata, p)))
 984 |     if p.type == 'IDENTIFIER':
 985 |         print("Undefined Type " + p.value[1])
 986 | 
 987 |     if parser.errorCounter > 0:
 988 |         print("In panic mode\n")
 989 |         while True:
 990 |             tok = parser.token()
 991 |             if not tok or tok.type == 'SEMICOLON' or tok.type == 'RCURLYBRACKET':
 992 |                 break
 993 |         parser.restart()
 994 |     else:
 995 |         parser.errorCounter += 1
 996 |     return p
 997 | 
 998 | 
 999 | def printAST(p, n=0):
1000 |     if p is not None:
1001 |         # if type(p) is list:
1002 |         if len(p) > 0 and not isinstance(p, str):
1003 |             print('line:%02d' % p.lineno, end='')
1004 |             print(' |' * n, end='-')
1005 |             print(p[0])
1006 |             for node in p[1:]:
1007 |                 printAST(node, n + 1)
1008 |         else:
1009 |             print('line:xx', end='')
1010 |             print(' |' * n, end='-')
1011 |             print(p)
1012 | 
1013 | 
1014 | parser = yacc.yacc(start='outer_translation_unit', debug=True)
1015 | parser.errorCounter = 0
1016 | 
1017 | if __name__ == "__main__":
1018 |     # pprint(parser.__dict__)
1019 |     # while True:
1020 |     # try:
1021 |     #       c_file_name = raw_input('c file name: ')
1022 |     c_file_name = "test1.c"
1023 |     c_file = open(c_file_name, "r")
1024 | 
1025 |     contents = "".join(c_file.readlines())
1026 |     # except EOFError:
1027 |     #   break
1028 |     # if not contents: continue
1029 |     # result = parser.parse(contents, lexer = ZCClex.orig_lexer)
1030 |     result = parser.parse(contents, lexer=ZCClex.lexer)
1031 |     printAST(result)
1032 | 


--------------------------------------------------------------------------------
/yyparse/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | 


--------------------------------------------------------------------------------
/yyparse/missRightCurly.c:
--------------------------------------------------------------------------------
 1 | //
 2 | //int a, b, c;
 3 | //int main(int argc, char *argv[]) {
 4 | //	c = a + b;
 5 | //
 6 | //	
 7 | //int b, c;
 8 | 
 9 | int f(){
10 | 
11 | int d;


--------------------------------------------------------------------------------
/yyparse/missSEMI.c:
--------------------------------------------------------------------------------
 1 | 
 2 | int b
 3 | 
 4 | int main(int argc, char *argv[]) {
 5 | 	int $a;
 6 | 	c = a + b;
 7 | 	d = a +/ b;
 8 | 	d = a -/ b;
 9 | 	d = a ^^ / b;
10 | 	d = a *|b;
11 | 	d = a >/ b;
12 | 	d = a </ b;
13 | 	d = a <=/ b;		
14 | 	d = a <</ b;	
15 | 	d = a ==/ b;	
16 | 	d = a &/ b;		
17 | 	d = a ^/ b;			
18 | 	d = a |/ b;		
19 | 	d = a &&| b;	
20 | 	d = a ||| b;				
21 | 
22 | 	
23 | 	a = b + c
24 | 	printf("asdf\n")
25 | 	b = a + c;
26 | 	printf("%d\n", a);
27 | 


--------------------------------------------------------------------------------
/yyparse/ply/__init__.py:
--------------------------------------------------------------------------------
1 | # PLY package
2 | # Author: David Beazley (dave@dabeaz.com)
3 | 
4 | __version__ = '3.7'
5 | __all__ = ['lex','yacc']
6 | 


--------------------------------------------------------------------------------
/yyparse/ply/cpp.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------------
  2 | # cpp.py
  3 | #
  4 | # Author:  David Beazley (http://www.dabeaz.com)
  5 | # Copyright (C) 2007
  6 | # All rights reserved
  7 | #
  8 | # This module implements an ANSI-C style lexical preprocessor for PLY. 
  9 | # -----------------------------------------------------------------------------
 10 | from __future__ import generators
 11 | 
 12 | # -----------------------------------------------------------------------------
 13 | # Default preprocessor lexer definitions.   These tokens are enough to get
 14 | # a basic preprocessor working.   Other modules may import these if they want
 15 | # -----------------------------------------------------------------------------
 16 | 
 17 | tokens = (
 18 |    'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND'
 19 | )
 20 | 
 21 | literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
 22 | 
 23 | # Whitespace
 24 | def t_CPP_WS(t):
 25 |     r'\s+'
 26 |     t.lexer.lineno += t.value.count("\n")
 27 |     return t
 28 | 
 29 | t_CPP_POUND = r'\#'
 30 | t_CPP_DPOUND = r'\#\#'
 31 | 
 32 | # Identifier
 33 | t_CPP_ID = r'[A-Za-z_][\w_]*'
 34 | 
 35 | # Integer literal
 36 | def CPP_INTEGER(t):
 37 |     r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)'
 38 |     return t
 39 | 
 40 | t_CPP_INTEGER = CPP_INTEGER
 41 | 
 42 | # Floating literal
 43 | t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
 44 | 
 45 | # String literal
 46 | def t_CPP_STRING(t):
 47 |     r'\"([^\\\n]|(\\(.|\n)))*?\"'
 48 |     t.lexer.lineno += t.value.count("\n")
 49 |     return t
 50 | 
 51 | # Character constant 'c' or L'c'
 52 | def t_CPP_CHAR(t):
 53 |     r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
 54 |     t.lexer.lineno += t.value.count("\n")
 55 |     return t
 56 | 
 57 | # Comment
 58 | def t_CPP_COMMENT1(t):
 59 |     r'(/\*(.|\n)*?\*/)'
 60 |     ncr = t.value.count("\n")
 61 |     t.lexer.lineno += ncr
 62 |     # replace with one space or a number of '\n'
 63 |     t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' '
 64 |     return t
 65 | 
 66 | # Line comment
 67 | def t_CPP_COMMENT2(t):
 68 |     r'(//.*?(\n|$))'
 69 |     # replace with '/n'
 70 |     t.type = 'CPP_WS'; t.value = '\n'
 71 |     
 72 | def t_error(t):
 73 |     t.type = t.value[0]
 74 |     t.value = t.value[0]
 75 |     t.lexer.skip(1)
 76 |     return t
 77 | 
 78 | import re
 79 | import copy
 80 | import time
 81 | import os.path
 82 | 
 83 | # -----------------------------------------------------------------------------
 84 | # trigraph()
 85 | # 
 86 | # Given an input string, this function replaces all trigraph sequences. 
 87 | # The following mapping is used:
 88 | #
 89 | #     ??=    #
 90 | #     ??/    \
 91 | #     ??'    ^
 92 | #     ??(    [
 93 | #     ??)    ]
 94 | #     ??!    |
 95 | #     ??<    {
 96 | #     ??>    }
 97 | #     ??-    ~
 98 | # -----------------------------------------------------------------------------
 99 | 
100 | _trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
101 | _trigraph_rep = {
102 |     '=':'#',
103 |     '/':'\\',
104 |     "'":'^',
105 |     '(':'[',
106 |     ')':']',
107 |     '!':'|',
108 |     '<':'{',
109 |     '>':'}',
110 |     '-':'~'
111 | }
112 | 
113 | def trigraph(input):
114 |     return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
115 | 
116 | # ------------------------------------------------------------------
117 | # Macro object
118 | #
119 | # This object holds information about preprocessor macros
120 | #
121 | #    .name      - Macro name (string)
122 | #    .value     - Macro value (a list of tokens)
123 | #    .arglist   - List of argument names
124 | #    .variadic  - Boolean indicating whether or not variadic macro
125 | #    .vararg    - Name of the variadic parameter
126 | #
127 | # When a macro is created, the macro replacement token sequence is
128 | # pre-scanned and used to create patch lists that are later used
129 | # during macro expansion
130 | # ------------------------------------------------------------------
131 | 
132 | class Macro(object):
133 |     def __init__(self,name,value,arglist=None,variadic=False):
134 |         self.name = name
135 |         self.value = value
136 |         self.arglist = arglist
137 |         self.variadic = variadic
138 |         if variadic:
139 |             self.vararg = arglist[-1]
140 |         self.source = None
141 | 
142 | # ------------------------------------------------------------------
143 | # Preprocessor object
144 | #
145 | # Object representing a preprocessor.  Contains macro definitions,
146 | # include directories, and other information
147 | # ------------------------------------------------------------------
148 | 
149 | class Preprocessor(object):
150 |     def __init__(self,lexer=None):
151 |         if lexer is None:
152 |             lexer = lex.lexer
153 |         self.lexer = lexer
154 |         self.macros = { }
155 |         self.path = []
156 |         self.temp_path = []
157 | 
158 |         # Probe the lexer for selected tokens
159 |         self.lexprobe()
160 | 
161 |         tm = time.localtime()
162 |         self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
163 |         self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
164 |         self.parser = None
165 | 
166 |     # -----------------------------------------------------------------------------
167 |     # tokenize()
168 |     #
169 |     # Utility function. Given a string of text, tokenize into a list of tokens
170 |     # -----------------------------------------------------------------------------
171 | 
172 |     def tokenize(self,text):
173 |         tokens = []
174 |         self.lexer.input(text)
175 |         while True:
176 |             tok = self.lexer.token()
177 |             if not tok: break
178 |             tokens.append(tok)
179 |         return tokens
180 | 
181 |     # ---------------------------------------------------------------------
182 |     # error()
183 |     #
184 |     # Report a preprocessor error/warning of some kind
185 |     # ----------------------------------------------------------------------
186 | 
187 |     def error(self,file,line,msg):
188 |         print("%s:%d %s" % (file,line,msg))
189 | 
190 |     # ----------------------------------------------------------------------
191 |     # lexprobe()
192 |     #
193 |     # This method probes the preprocessor lexer object to discover
194 |     # the token types of symbols that are important to the preprocessor.
195 |     # If this works right, the preprocessor will simply "work"
196 |     # with any suitable lexer regardless of how tokens have been named.
197 |     # ----------------------------------------------------------------------
198 | 
199 |     def lexprobe(self):
200 | 
201 |         # Determine the token type for identifiers
202 |         self.lexer.input("identifier")
203 |         tok = self.lexer.token()
204 |         if not tok or tok.value != "identifier":
205 |             print("Couldn't determine identifier type")
206 |         else:
207 |             self.t_ID = tok.type
208 | 
209 |         # Determine the token type for integers
210 |         self.lexer.input("12345")
211 |         tok = self.lexer.token()
212 |         if not tok or int(tok.value) != 12345:
213 |             print("Couldn't determine integer type")
214 |         else:
215 |             self.t_INTEGER = tok.type
216 |             self.t_INTEGER_TYPE = type(tok.value)
217 | 
218 |         # Determine the token type for strings enclosed in double quotes
219 |         self.lexer.input("\"filename\"")
220 |         tok = self.lexer.token()
221 |         if not tok or tok.value != "\"filename\"":
222 |             print("Couldn't determine string type")
223 |         else:
224 |             self.t_STRING = tok.type
225 | 
226 |         # Determine the token type for whitespace--if any
227 |         self.lexer.input("  ")
228 |         tok = self.lexer.token()
229 |         if not tok or tok.value != "  ":
230 |             self.t_SPACE = None
231 |         else:
232 |             self.t_SPACE = tok.type
233 | 
234 |         # Determine the token type for newlines
235 |         self.lexer.input("\n")
236 |         tok = self.lexer.token()
237 |         if not tok or tok.value != "\n":
238 |             self.t_NEWLINE = None
239 |             print("Couldn't determine token for newlines")
240 |         else:
241 |             self.t_NEWLINE = tok.type
242 | 
243 |         self.t_WS = (self.t_SPACE, self.t_NEWLINE)
244 | 
245 |         # Check for other characters used by the preprocessor
246 |         chars = [ '<','>','#','##','\\','(',')',',','.']
247 |         for c in chars:
248 |             self.lexer.input(c)
249 |             tok = self.lexer.token()
250 |             if not tok or tok.value != c:
251 |                 print("Unable to lex '%s' required for preprocessor" % c)
252 | 
253 |     # ----------------------------------------------------------------------
254 |     # add_path()
255 |     #
256 |     # Adds a search path to the preprocessor.  
257 |     # ----------------------------------------------------------------------
258 | 
259 |     def add_path(self,path):
260 |         self.path.append(path)
261 | 
262 |     # ----------------------------------------------------------------------
263 |     # group_lines()
264 |     #
265 |     # Given an input string, this function splits it into lines.  Trailing whitespace
266 |     # is removed.   Any line ending with \ is grouped with the next line.  This
267 |     # function forms the lowest level of the preprocessor---grouping into text into
268 |     # a line-by-line format.
269 |     # ----------------------------------------------------------------------
270 | 
271 |     def group_lines(self,input):
272 |         lex = self.lexer.clone()
273 |         lines = [x.rstrip() for x in input.splitlines()]
274 |         for i in xrange(len(lines)):
275 |             j = i+1
276 |             while lines[i].endswith('\\') and (j < len(lines)):
277 |                 lines[i] = lines[i][:-1]+lines[j]
278 |                 lines[j] = ""
279 |                 j += 1
280 | 
281 |         input = "\n".join(lines)
282 |         lex.input(input)
283 |         lex.lineno = 1
284 | 
285 |         current_line = []
286 |         while True:
287 |             tok = lex.token()
288 |             if not tok:
289 |                 break
290 |             current_line.append(tok)
291 |             if tok.type in self.t_WS and '\n' in tok.value:
292 |                 yield current_line
293 |                 current_line = []
294 | 
295 |         if current_line:
296 |             yield current_line
297 | 
298 |     # ----------------------------------------------------------------------
299 |     # tokenstrip()
300 |     # 
301 |     # Remove leading/trailing whitespace tokens from a token list
302 |     # ----------------------------------------------------------------------
303 | 
304 |     def tokenstrip(self,tokens):
305 |         i = 0
306 |         while i < len(tokens) and tokens[i].type in self.t_WS:
307 |             i += 1
308 |         del tokens[:i]
309 |         i = len(tokens)-1
310 |         while i >= 0 and tokens[i].type in self.t_WS:
311 |             i -= 1
312 |         del tokens[i+1:]
313 |         return tokens
314 | 
315 | 
316 |     # ----------------------------------------------------------------------
317 |     # collect_args()
318 |     #
319 |     # Collects comma separated arguments from a list of tokens.   The arguments
320 |     # must be enclosed in parenthesis.  Returns a tuple (tokencount,args,positions)
321 |     # where tokencount is the number of tokens consumed, args is a list of arguments,
322 |     # and positions is a list of integers containing the starting index of each
323 |     # argument.  Each argument is represented by a list of tokens.
324 |     #
325 |     # When collecting arguments, leading and trailing whitespace is removed
326 |     # from each argument.  
327 |     #
328 |     # This function properly handles nested parenthesis and commas---these do not
329 |     # define new arguments.
330 |     # ----------------------------------------------------------------------
331 | 
332 |     def collect_args(self,tokenlist):
333 |         args = []
334 |         positions = []
335 |         current_arg = []
336 |         nesting = 1
337 |         tokenlen = len(tokenlist)
338 |     
339 |         # Search for the opening '('.
340 |         i = 0
341 |         while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
342 |             i += 1
343 | 
344 |         if (i < tokenlen) and (tokenlist[i].value == '('):
345 |             positions.append(i+1)
346 |         else:
347 |             self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
348 |             return 0, [], []
349 | 
350 |         i += 1
351 | 
352 |         while i < tokenlen:
353 |             t = tokenlist[i]
354 |             if t.value == '(':
355 |                 current_arg.append(t)
356 |                 nesting += 1
357 |             elif t.value == ')':
358 |                 nesting -= 1
359 |                 if nesting == 0:
360 |                     if current_arg:
361 |                         args.append(self.tokenstrip(current_arg))
362 |                         positions.append(i)
363 |                     return i+1,args,positions
364 |                 current_arg.append(t)
365 |             elif t.value == ',' and nesting == 1:
366 |                 args.append(self.tokenstrip(current_arg))
367 |                 positions.append(i+1)
368 |                 current_arg = []
369 |             else:
370 |                 current_arg.append(t)
371 |             i += 1
372 |     
373 |         # Missing end argument
374 |         self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
375 |         return 0, [],[]
376 | 
377 |     # ----------------------------------------------------------------------
378 |     # macro_prescan()
379 |     #
380 |     # Examine the macro value (token sequence) and identify patch points
381 |     # This is used to speed up macro expansion later on---we'll know
382 |     # right away where to apply patches to the value to form the expansion
383 |     # ----------------------------------------------------------------------
384 |     
385 |     def macro_prescan(self,macro):
386 |         macro.patch     = []             # Standard macro arguments 
387 |         macro.str_patch = []             # String conversion expansion
388 |         macro.var_comma_patch = []       # Variadic macro comma patch
389 |         i = 0
390 |         while i < len(macro.value):
391 |             if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
392 |                 argnum = macro.arglist.index(macro.value[i].value)
393 |                 # Conversion of argument to a string
394 |                 if i > 0 and macro.value[i-1].value == '#':
395 |                     macro.value[i] = copy.copy(macro.value[i])
396 |                     macro.value[i].type = self.t_STRING
397 |                     del macro.value[i-1]
398 |                     macro.str_patch.append((argnum,i-1))
399 |                     continue
400 |                 # Concatenation
401 |                 elif (i > 0 and macro.value[i-1].value == '##'):
402 |                     macro.patch.append(('c',argnum,i-1))
403 |                     del macro.value[i-1]
404 |                     continue
405 |                 elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
406 |                     macro.patch.append(('c',argnum,i))
407 |                     i += 1
408 |                     continue
409 |                 # Standard expansion
410 |                 else:
411 |                     macro.patch.append(('e',argnum,i))
412 |             elif macro.value[i].value == '##':
413 |                 if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
414 |                         ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
415 |                         (macro.value[i+1].value == macro.vararg):
416 |                     macro.var_comma_patch.append(i-1)
417 |             i += 1
418 |         macro.patch.sort(key=lambda x: x[2],reverse=True)
419 | 
420 |     # ----------------------------------------------------------------------
421 |     # macro_expand_args()
422 |     #
423 |     # Given a Macro and list of arguments (each a token list), this method
424 |     # returns an expanded version of a macro.  The return value is a token sequence
425 |     # representing the replacement macro tokens
426 |     # ----------------------------------------------------------------------
427 | 
428 |     def macro_expand_args(self,macro,args):
429 |         # Make a copy of the macro token sequence
430 |         rep = [copy.copy(_x) for _x in macro.value]
431 | 
432 |         # Make string expansion patches.  These do not alter the length of the replacement sequence
433 |         
434 |         str_expansion = {}
435 |         for argnum, i in macro.str_patch:
436 |             if argnum not in str_expansion:
437 |                 str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
438 |             rep[i] = copy.copy(rep[i])
439 |             rep[i].value = str_expansion[argnum]
440 | 
441 |         # Make the variadic macro comma patch.  If the variadic macro argument is empty, we get rid
442 |         comma_patch = False
443 |         if macro.variadic and not args[-1]:
444 |             for i in macro.var_comma_patch:
445 |                 rep[i] = None
446 |                 comma_patch = True
447 | 
448 |         # Make all other patches.   The order of these matters.  It is assumed that the patch list
449 |         # has been sorted in reverse order of patch location since replacements will cause the
450 |         # size of the replacement sequence to expand from the patch point.
451 |         
452 |         expanded = { }
453 |         for ptype, argnum, i in macro.patch:
454 |             # Concatenation.   Argument is left unexpanded
455 |             if ptype == 'c':
456 |                 rep[i:i+1] = args[argnum]
457 |             # Normal expansion.  Argument is macro expanded first
458 |             elif ptype == 'e':
459 |                 if argnum not in expanded:
460 |                     expanded[argnum] = self.expand_macros(args[argnum])
461 |                 rep[i:i+1] = expanded[argnum]
462 | 
463 |         # Get rid of removed comma if necessary
464 |         if comma_patch:
465 |             rep = [_i for _i in rep if _i]
466 | 
467 |         return rep
468 | 
469 | 
470 |     # ----------------------------------------------------------------------
471 |     # expand_macros()
472 |     #
473 |     # Given a list of tokens, this function performs macro expansion.
474 |     # The expanded argument is a dictionary that contains macros already
475 |     # expanded.  This is used to prevent infinite recursion.
476 |     # ----------------------------------------------------------------------
477 | 
478 |     def expand_macros(self,tokens,expanded=None):
479 |         if expanded is None:
480 |             expanded = {}
481 |         i = 0
482 |         while i < len(tokens):
483 |             t = tokens[i]
484 |             if t.type == self.t_ID:
485 |                 if t.value in self.macros and t.value not in expanded:
486 |                     # Yes, we found a macro match
487 |                     expanded[t.value] = True
488 |                     
489 |                     m = self.macros[t.value]
490 |                     if not m.arglist:
491 |                         # A simple macro
492 |                         ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
493 |                         for e in ex:
494 |                             e.lineno = t.lineno
495 |                         tokens[i:i+1] = ex
496 |                         i += len(ex)
497 |                     else:
498 |                         # A macro with arguments
499 |                         j = i + 1
500 |                         while j < len(tokens) and tokens[j].type in self.t_WS:
501 |                             j += 1
502 |                         if tokens[j].value == '(':
503 |                             tokcount,args,positions = self.collect_args(tokens[j:])
504 |                             if not m.variadic and len(args) !=  len(m.arglist):
505 |                                 self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
506 |                                 i = j + tokcount
507 |                             elif m.variadic and len(args) < len(m.arglist)-1:
508 |                                 if len(m.arglist) > 2:
509 |                                     self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
510 |                                 else:
511 |                                     self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
512 |                                 i = j + tokcount
513 |                             else:
514 |                                 if m.variadic:
515 |                                     if len(args) == len(m.arglist)-1:
516 |                                         args.append([])
517 |                                     else:
518 |                                         args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
519 |                                         del args[len(m.arglist):]
520 |                                         
521 |                                 # Get macro replacement text
522 |                                 rep = self.macro_expand_args(m,args)
523 |                                 rep = self.expand_macros(rep,expanded)
524 |                                 for r in rep:
525 |                                     r.lineno = t.lineno
526 |                                 tokens[i:j+tokcount] = rep
527 |                                 i += len(rep)
528 |                     del expanded[t.value]
529 |                     continue
530 |                 elif t.value == '__LINE__':
531 |                     t.type = self.t_INTEGER
532 |                     t.value = self.t_INTEGER_TYPE(t.lineno)
533 |                 
534 |             i += 1
535 |         return tokens
536 | 
537 |     # ----------------------------------------------------------------------    
538 |     # evalexpr()
539 |     # 
540 |     # Evaluate an expression token sequence for the purposes of evaluating
541 |     # integral expressions.
542 |     # ----------------------------------------------------------------------
543 | 
544 |     def evalexpr(self,tokens):
545 |         # tokens = tokenize(line)
546 |         # Search for defined macros
547 |         i = 0
548 |         while i < len(tokens):
549 |             if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
550 |                 j = i + 1
551 |                 needparen = False
552 |                 result = "0L"
553 |                 while j < len(tokens):
554 |                     if tokens[j].type in self.t_WS:
555 |                         j += 1
556 |                         continue
557 |                     elif tokens[j].type == self.t_ID:
558 |                         if tokens[j].value in self.macros:
559 |                             result = "1L"
560 |                         else:
561 |                             result = "0L"
562 |                         if not needparen: break
563 |                     elif tokens[j].value == '(':
564 |                         needparen = True
565 |                     elif tokens[j].value == ')':
566 |                         break
567 |                     else:
568 |                         self.error(self.source,tokens[i].lineno,"Malformed defined()")
569 |                     j += 1
570 |                 tokens[i].type = self.t_INTEGER
571 |                 tokens[i].value = self.t_INTEGER_TYPE(result)
572 |                 del tokens[i+1:j+1]
573 |             i += 1
574 |         tokens = self.expand_macros(tokens)
575 |         for i,t in enumerate(tokens):
576 |             if t.type == self.t_ID:
577 |                 tokens[i] = copy.copy(t)
578 |                 tokens[i].type = self.t_INTEGER
579 |                 tokens[i].value = self.t_INTEGER_TYPE("0L")
580 |             elif t.type == self.t_INTEGER:
581 |                 tokens[i] = copy.copy(t)
582 |                 # Strip off any trailing suffixes
583 |                 tokens[i].value = str(tokens[i].value)
584 |                 while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
585 |                     tokens[i].value = tokens[i].value[:-1]
586 |         
587 |         expr = "".join([str(x.value) for x in tokens])
588 |         expr = expr.replace("&&"," and ")
589 |         expr = expr.replace("||"," or ")
590 |         expr = expr.replace("!"," not ")
591 |         try:
592 |             result = eval(expr)
593 |         except StandardError:
594 |             self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
595 |             result = 0
596 |         return result
597 | 
598 |     # ----------------------------------------------------------------------
599 |     # parsegen()
600 |     #
601 |     # Parse an input string/
602 |     # ----------------------------------------------------------------------
603 |     def parsegen(self,input,source=None):
604 | 
605 |         # Replace trigraph sequences
606 |         t = trigraph(input)
607 |         lines = self.group_lines(t)
608 | 
609 |         if not source:
610 |             source = ""
611 |             
612 |         self.define("__FILE__ \"%s\"" % source)
613 | 
614 |         self.source = source
615 |         chunk = []
616 |         enable = True
617 |         iftrigger = False
618 |         ifstack = []
619 | 
620 |         for x in lines:
621 |             for i,tok in enumerate(x):
622 |                 if tok.type not in self.t_WS: break
623 |             if tok.value == '#':
624 |                 # Preprocessor directive
625 | 
626 |                 # insert necessary whitespace instead of eaten tokens
627 |                 for tok in x:
628 |                     if tok.type in self.t_WS and '\n' in tok.value:
629 |                         chunk.append(tok)
630 |                 
631 |                 dirtokens = self.tokenstrip(x[i+1:])
632 |                 if dirtokens:
633 |                     name = dirtokens[0].value
634 |                     args = self.tokenstrip(dirtokens[1:])
635 |                 else:
636 |                     name = ""
637 |                     args = []
638 |                 
639 |                 if name == 'define':
640 |                     if enable:
641 |                         for tok in self.expand_macros(chunk):
642 |                             yield tok
643 |                         chunk = []
644 |                         self.define(args)
645 |                 elif name == 'include':
646 |                     if enable:
647 |                         for tok in self.expand_macros(chunk):
648 |                             yield tok
649 |                         chunk = []
650 |                         oldfile = self.macros['__FILE__']
651 |                         for tok in self.include(args):
652 |                             yield tok
653 |                         self.macros['__FILE__'] = oldfile
654 |                         self.source = source
655 |                 elif name == 'undef':
656 |                     if enable:
657 |                         for tok in self.expand_macros(chunk):
658 |                             yield tok
659 |                         chunk = []
660 |                         self.undef(args)
661 |                 elif name == 'ifdef':
662 |                     ifstack.append((enable,iftrigger))
663 |                     if enable:
664 |                         if not args[0].value in self.macros:
665 |                             enable = False
666 |                             iftrigger = False
667 |                         else:
668 |                             iftrigger = True
669 |                 elif name == 'ifndef':
670 |                     ifstack.append((enable,iftrigger))
671 |                     if enable:
672 |                         if args[0].value in self.macros:
673 |                             enable = False
674 |                             iftrigger = False
675 |                         else:
676 |                             iftrigger = True
677 |                 elif name == 'if':
678 |                     ifstack.append((enable,iftrigger))
679 |                     if enable:
680 |                         result = self.evalexpr(args)
681 |                         if not result:
682 |                             enable = False
683 |                             iftrigger = False
684 |                         else:
685 |                             iftrigger = True
686 |                 elif name == 'elif':
687 |                     if ifstack:
688 |                         if ifstack[-1][0]:     # We only pay attention if outer "if" allows this
689 |                             if enable:         # If already true, we flip enable False
690 |                                 enable = False
691 |                             elif not iftrigger:   # If False, but not triggered yet, we'll check expression
692 |                                 result = self.evalexpr(args)
693 |                                 if result:
694 |                                     enable  = True
695 |                                     iftrigger = True
696 |                     else:
697 |                         self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
698 |                         
699 |                 elif name == 'else':
700 |                     if ifstack:
701 |                         if ifstack[-1][0]:
702 |                             if enable:
703 |                                 enable = False
704 |                             elif not iftrigger:
705 |                                 enable = True
706 |                                 iftrigger = True
707 |                     else:
708 |                         self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
709 | 
710 |                 elif name == 'endif':
711 |                     if ifstack:
712 |                         enable,iftrigger = ifstack.pop()
713 |                     else:
714 |                         self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
715 |                 else:
716 |                     # Unknown preprocessor directive
717 |                     pass
718 | 
719 |             else:
720 |                 # Normal text
721 |                 if enable:
722 |                     chunk.extend(x)
723 | 
724 |         for tok in self.expand_macros(chunk):
725 |             yield tok
726 |         chunk = []
727 | 
728 |     # ----------------------------------------------------------------------
729 |     # include()
730 |     #
731 |     # Implementation of file-inclusion
732 |     # ----------------------------------------------------------------------
733 | 
734 |     def include(self,tokens):
735 |         # Try to extract the filename and then process an include file
736 |         if not tokens:
737 |             return
738 |         if tokens:
739 |             if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
740 |                 tokens = self.expand_macros(tokens)
741 | 
742 |             if tokens[0].value == '<':
743 |                 # Include <...>
744 |                 i = 1
745 |                 while i < len(tokens):
746 |                     if tokens[i].value == '>':
747 |                         break
748 |                     i += 1
749 |                 else:
750 |                     print("Malformed #include <...>")
751 |                     return
752 |                 filename = "".join([x.value for x in tokens[1:i]])
753 |                 path = self.path + [""] + self.temp_path
754 |             elif tokens[0].type == self.t_STRING:
755 |                 filename = tokens[0].value[1:-1]
756 |                 path = self.temp_path + [""] + self.path
757 |             else:
758 |                 print("Malformed #include statement")
759 |                 return
760 |         for p in path:
761 |             iname = os.path.join(p,filename)
762 |             try:
763 |                 data = open(iname,"r").read()
764 |                 dname = os.path.dirname(iname)
765 |                 if dname:
766 |                     self.temp_path.insert(0,dname)
767 |                 for tok in self.parsegen(data,filename):
768 |                     yield tok
769 |                 if dname:
770 |                     del self.temp_path[0]
771 |                 break
772 |             except IOError:
773 |                 pass
774 |         else:
775 |             print("Couldn't find '%s'" % filename)
776 | 
777 |     # ----------------------------------------------------------------------
778 |     # define()
779 |     #
780 |     # Define a new macro
781 |     # ----------------------------------------------------------------------
782 | 
783 |     def define(self,tokens):
784 |         if isinstance(tokens,(str,unicode)):
785 |             tokens = self.tokenize(tokens)
786 | 
787 |         linetok = tokens
788 |         try:
789 |             name = linetok[0]
790 |             if len(linetok) > 1:
791 |                 mtype = linetok[1]
792 |             else:
793 |                 mtype = None
794 |             if not mtype:
795 |                 m = Macro(name.value,[])
796 |                 self.macros[name.value] = m
797 |             elif mtype.type in self.t_WS:
798 |                 # A normal macro
799 |                 m = Macro(name.value,self.tokenstrip(linetok[2:]))
800 |                 self.macros[name.value] = m
801 |             elif mtype.value == '(':
802 |                 # A macro with arguments
803 |                 tokcount, args, positions = self.collect_args(linetok[1:])
804 |                 variadic = False
805 |                 for a in args:
806 |                     if variadic:
807 |                         print("No more arguments may follow a variadic argument")
808 |                         break
809 |                     astr = "".join([str(_i.value) for _i in a])
810 |                     if astr == "...":
811 |                         variadic = True
812 |                         a[0].type = self.t_ID
813 |                         a[0].value = '__VA_ARGS__'
814 |                         variadic = True
815 |                         del a[1:]
816 |                         continue
817 |                     elif astr[-3:] == "..." and a[0].type == self.t_ID:
818 |                         variadic = True
819 |                         del a[1:]
820 |                         # If, for some reason, "." is part of the identifier, strip off the name for the purposes
821 |                         # of macro expansion
822 |                         if a[0].value[-3:] == '...':
823 |                             a[0].value = a[0].value[:-3]
824 |                         continue
825 |                     if len(a) > 1 or a[0].type != self.t_ID:
826 |                         print("Invalid macro argument")
827 |                         break
828 |                 else:
829 |                     mvalue = self.tokenstrip(linetok[1+tokcount:])
830 |                     i = 0
831 |                     while i < len(mvalue):
832 |                         if i+1 < len(mvalue):
833 |                             if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
834 |                                 del mvalue[i]
835 |                                 continue
836 |                             elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
837 |                                 del mvalue[i+1]
838 |                         i += 1
839 |                     m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
840 |                     self.macro_prescan(m)
841 |                     self.macros[name.value] = m
842 |             else:
843 |                 print("Bad macro definition")
844 |         except LookupError:
845 |             print("Bad macro definition")
846 | 
847 |     # ----------------------------------------------------------------------
848 |     # undef()
849 |     #
850 |     # Undefine a macro
851 |     # ----------------------------------------------------------------------
852 | 
853 |     def undef(self,tokens):
854 |         id = tokens[0].value
855 |         try:
856 |             del self.macros[id]
857 |         except LookupError:
858 |             pass
859 | 
860 |     # ----------------------------------------------------------------------
861 |     # parse()
862 |     #
863 |     # Parse input text.
864 |     # ----------------------------------------------------------------------
865 |     def parse(self,input,source=None,ignore={}):
866 |         self.ignore = ignore
867 |         self.parser = self.parsegen(input,source)
868 |         
869 |     # ----------------------------------------------------------------------
870 |     # token()
871 |     #
872 |     # Method to return individual tokens
873 |     # ----------------------------------------------------------------------
874 |     def token(self):
875 |         try:
876 |             while True:
877 |                 tok = next(self.parser)
878 |                 if tok.type not in self.ignore: return tok
879 |         except StopIteration:
880 |             self.parser = None
881 |             return None
882 | 
883 | if __name__ == '__main__':
884 |     import ply.lex as lex
885 |     lexer = lex.lex()
886 | 
887 |     # Run a preprocessor
888 |     import sys
889 |     f = open(sys.argv[1])
890 |     input = f.read()
891 | 
892 |     p = Preprocessor(lexer)
893 |     p.parse(input,sys.argv[1])
894 |     while True:
895 |         tok = p.token()
896 |         if not tok: break
897 |         print(p.source, tok)
898 | 
899 | 
900 | 
901 | 
902 |     
903 | 
904 | 
905 | 
906 | 
907 | 
908 | 
909 | 


--------------------------------------------------------------------------------
/yyparse/ply/ctokens.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------
  2 | # ctokens.py
  3 | #
  4 | # Token specifications for symbols in ANSI C and C++.  This file is
  5 | # meant to be used as a library in other tokenizers.
  6 | # ----------------------------------------------------------------------
  7 | 
  8 | # Reserved words
  9 | 
 10 | tokens = [
 11 |     # Literals (identifier, integer constant, float constant, string constant, char const)
 12 |     'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER',
 13 | 
 14 |     # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
 15 |     'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO',
 16 |     'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
 17 |     'LOR', 'LAND', 'LNOT',
 18 |     'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
 19 |     
 20 |     # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
 21 |     'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
 22 |     'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
 23 | 
 24 |     # Increment/decrement (++,--)
 25 |     'INCREMENT', 'DECREMENT',
 26 | 
 27 |     # Structure dereference (->)
 28 |     'ARROW',
 29 | 
 30 |     # Ternary operator (?)
 31 |     'TERNARY',
 32 |     
 33 |     # Delimeters ( ) [ ] { } , . ; :
 34 |     'LPAREN', 'RPAREN',
 35 |     'LBRACKET', 'RBRACKET',
 36 |     'LBRACE', 'RBRACE',
 37 |     'COMMA', 'PERIOD', 'SEMI', 'COLON',
 38 | 
 39 |     # Ellipsis (...)
 40 |     'ELLIPSIS',
 41 | ]
 42 |     
 43 | # Operators
 44 | t_PLUS             = r'\+'
 45 | t_MINUS            = r'-'
 46 | t_TIMES            = r'\*'
 47 | t_DIVIDE           = r'/'
 48 | t_MODULO           = r'%'
 49 | t_OR               = r'\|'
 50 | t_AND              = r'&'
 51 | t_NOT              = r'~'
 52 | t_XOR              = r'\^'
 53 | t_LSHIFT           = r'<<'
 54 | t_RSHIFT           = r'>>'
 55 | t_LOR              = r'\|\|'
 56 | t_LAND             = r'&&'
 57 | t_LNOT             = r'!'
 58 | t_LT               = r'<'
 59 | t_GT               = r'>'
 60 | t_LE               = r'<='
 61 | t_GE               = r'>='
 62 | t_EQ               = r'=='
 63 | t_NE               = r'!='
 64 | 
 65 | # Assignment operators
 66 | 
 67 | t_EQUALS           = r'='
 68 | t_TIMESEQUAL       = r'\*='
 69 | t_DIVEQUAL         = r'/='
 70 | t_MODEQUAL         = r'%='
 71 | t_PLUSEQUAL        = r'\+='
 72 | t_MINUSEQUAL       = r'-='
 73 | t_LSHIFTEQUAL      = r'<<='
 74 | t_RSHIFTEQUAL      = r'>>='
 75 | t_ANDEQUAL         = r'&='
 76 | t_OREQUAL          = r'\|='
 77 | t_XOREQUAL         = r'\^='
 78 | 
 79 | # Increment/decrement
 80 | t_INCREMENT        = r'\+\+'
 81 | t_DECREMENT        = r'--'
 82 | 
 83 | # ->
 84 | t_ARROW            = r'->'
 85 | 
 86 | # ?
 87 | t_TERNARY          = r'\?'
 88 | 
 89 | # Delimeters
 90 | t_LPAREN           = r'\('
 91 | t_RPAREN           = r'\)'
 92 | t_LBRACKET         = r'\['
 93 | t_RBRACKET         = r'\]'
 94 | t_LBRACE           = r'\{'
 95 | t_RBRACE           = r'\}'
 96 | t_COMMA            = r','
 97 | t_PERIOD           = r'\.'
 98 | t_SEMI             = r';'
 99 | t_COLON            = r':'
100 | t_ELLIPSIS         = r'\.\.\.'
101 | 
102 | # Identifiers
103 | t_ID = r'[A-Za-z_][A-Za-z0-9_]*'
104 | 
105 | # Integer literal
106 | t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
107 | 
108 | # Floating literal
109 | t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
110 | 
111 | # String literal
112 | t_STRING = r'\"([^\\\n]|(\\.))*?\"'
113 | 
114 | # Character constant 'c' or L'c'
115 | t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\''
116 | 
117 | # Comment (C-Style)
118 | def t_COMMENT(t):
119 |     r'/\*(.|\n)*?\*/'
120 |     t.lexer.lineno += t.value.count('\n')
121 |     return t
122 | 
123 | # Comment (C++-Style)
124 | def t_CPPCOMMENT(t):
125 |     r'//.*\n'
126 |     t.lexer.lineno += 1
127 |     return t
128 | 
129 | 
130 |     
131 | 
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/yyparse/ply/ygen.py:
--------------------------------------------------------------------------------
 1 | # ply: ygen.py
 2 | #
 3 | # This is a support program that auto-generates different versions of the YACC parsing
 4 | # function with different features removed for the purposes of performance.
 5 | #
 6 | # Users should edit the method LParser.parsedebug() in yacc.py.   The source code 
 7 | # for that method is then used to create the other methods.   See the comments in
 8 | # yacc.py for further details.
 9 | 
10 | import os.path
11 | import shutil
12 | 
13 | def get_source_range(lines, tag):
14 |     srclines = enumerate(lines)
15 |     start_tag = '#--! %s-start' % tag
16 |     end_tag = '#--! %s-end' % tag
17 | 
18 |     for start_index, line in srclines:
19 |         if line.strip().startswith(start_tag):
20 |             break
21 | 
22 |     for end_index, line in srclines:
23 |         if line.strip().endswith(end_tag):
24 |             break
25 | 
26 |     return (start_index + 1, end_index)
27 | 
28 | def filter_section(lines, tag):
29 |     filtered_lines = []
30 |     include = True
31 |     tag_text = '#--! %s' % tag
32 |     for line in lines:
33 |         if line.strip().startswith(tag_text):
34 |             include = not include
35 |         elif include:
36 |             filtered_lines.append(line)
37 |     return filtered_lines
38 | 
39 | def main():
40 |     dirname = os.path.dirname(__file__)
41 |     shutil.copy2(os.path.join(dirname, 'yacc.py'), os.path.join(dirname, 'yacc.py.bak'))
42 |     with open(os.path.join(dirname, 'yacc.py'), 'r') as f:
43 |         lines = f.readlines()
44 | 
45 |     parse_start, parse_end = get_source_range(lines, 'parsedebug')
46 |     parseopt_start, parseopt_end = get_source_range(lines, 'parseopt')
47 |     parseopt_notrack_start, parseopt_notrack_end = get_source_range(lines, 'parseopt-notrack')
48 | 
49 |     # Get the original source
50 |     orig_lines = lines[parse_start:parse_end]
51 | 
52 |     # Filter the DEBUG sections out
53 |     parseopt_lines = filter_section(orig_lines, 'DEBUG')
54 | 
55 |     # Filter the TRACKING sections out
56 |     parseopt_notrack_lines = filter_section(parseopt_lines, 'TRACKING')
57 | 
58 |     # Replace the parser source sections with updated versions
59 |     lines[parseopt_notrack_start:parseopt_notrack_end] = parseopt_notrack_lines
60 |     lines[parseopt_start:parseopt_end] = parseopt_lines
61 | 
62 |     lines = [line.rstrip()+'\n' for line in lines]
63 |     with open(os.path.join(dirname, 'yacc.py'), 'w') as f:
64 |         f.writelines(lines)
65 | 
66 |     print('Updated yacc.py')
67 | 
68 | if __name__ == '__main__':
69 |     main()
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/yyparse/test1.c:
--------------------------------------------------------------------------------
 1 | extern x;
 2 | enum Boolean
 3 | {
 4 | 	false,
 5 | 	true
 6 | };
 7 | 
 8 | typedef struct{
 9 | 	int a;
10 | 	double c;
11 | }mytype;
12 | 
13 | void fff(){
14 | 	int asdf = 1;
15 | }
16 | 
17 | int main(int argc, char *argv[]) {
18 | 	int a, c;
19 | 	double b;
20 | 	int i;
21 | 	char ch;
22 | 	long f = 122L;
23 | 	const unsigned short g = 1;
24 | //	double b = 12.3E2;	
25 | 	printf("a + b = c\n");
26 | 	printf("%d\n", sizeof(int));
27 | 	
28 | 	b = 12.3E2;
29 | 	b = 12.3 + 345 - 1. * 0.9999;
30 | 	c = 345;
31 | 	
32 | 	if (1) {
33 | 		a = b;
34 | 	}else if(0){
35 | 		c = 1;
36 | 	}else {
37 | 		b = a *c;}
38 | 	
39 | 	for (i = 0; i < 10; i++) {
40 | 		a += c?1:2;
41 | 	}
42 | 	
43 | 	do {
44 | 		a >>= 1;
45 | 		if (a < 0) {
46 | 			break;
47 | 		}else {
48 | 			continue;
49 | 		}
50 | 	} while (1);
51 | 	
52 | 	while (a) {
53 | 		a--;
54 | 		getc_unlocked(a);
55 | 	}
56 | 	
57 | 	switch (ch) {
58 | 		case 'a':
59 | 		case 'b':
60 | 			 break;
61 | 		case 'c':
62 | 			putchar(ch);
63 | 		case 'd':
64 | 		default:
65 | 			break;
66 | 	}
67 | 	
68 | 	a = 1;
69 | }


--------------------------------------------------------------------------------
/yyparse/test2.c:
--------------------------------------------------------------------------------
1 | 
2 | int a;
3 | a + b = c;
4 | int c;


--------------------------------------------------------------------------------
/yyparse/testChar.c:
--------------------------------------------------------------------------------
 1 | 0
 2 | 123
 3 | 12.4
 4 | 1.
 5 | .123
 6 | 1.3E1
 7 | 1.3e0
 8 | 2e10
 9 | 
10 | 
11 | ()[]{};.,&*+-~!/%<>^|?:=


--------------------------------------------------------------------------------