├── makefile
├── h.c
├── hello.c
├── .gitignore
├── README.md
├── LICENSE
├── xc.c
└── xc-tutor.c


/makefile:
--------------------------------------------------------------------------------
1 | CC=gcc
2 | CFLAGS = -m32 -g
3 | xc-tutor: xc-tutor.o
4 | 	$(CC) $(CFLAGS) -o $@ $<
5 | xc-tutor.o: xc-tutor.c
6 | 	$(CC) $(CFLAGS) -c $<
7 | clean:
8 | 	rm -rf *.o xc-tutor
9 | 


--------------------------------------------------------------------------------
/h.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main(int argc, char **argv)
 4 | {
 5 |   int i,j;
 6 |   j = 1 + 2;
 7 |   //printf("hello\n");
 8 |   printf("hello %d\n", j);
 9 | //  printf("xx %d\n", i); 
10 |   return 2;
11 | }
12 | 


--------------------------------------------------------------------------------
/hello.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int fibonacci(int i) {
 4 |     if (i <= 1) {
 5 |         return 1;
 6 |     }
 7 |     return fibonacci(i-1) + fibonacci(i-2);
 8 | }
 9 | 
10 | int main()
11 | {
12 |     int i;
13 |     i = 0;
14 |     while (i <= 10) {
15 |         printf("fibonacci(%2d) = %d\n", i, fibonacci(i));
16 |         i = i + 1;
17 |     }
18 |     return 0;
19 | }
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Object files
 2 | *.o
 3 | *.ko
 4 | *.obj
 5 | *.elf
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Libraries
12 | *.lib
13 | *.a
14 | *.la
15 | *.lo
16 | 
17 | # Shared objects (inc. Windows DLLs)
18 | *.dll
19 | *.so
20 | *.so.*
21 | *.dylib
22 | 
23 | # Executables
24 | *.exe
25 | *.out
26 | *.app
27 | *.i*86
28 | *.x86_64
29 | *.hex
30 | 
31 | # Debug files
32 | *.dSYM/
33 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | C interpreter that interpretes itself.
 2 | 
 3 | # How to Run the Code
 4 | 
 5 | File `xc.c` is the original one and `xc-tutor.c` is the one that I make for
 6 | the tutorial step by step.
 7 | 
 8 | ```
 9 | gcc -o xc xc.c (you may need the -m32 option on 64bit machines)
10 | ./xc hello.c
11 | ./xc -s hello.c
12 | 
13 | ./xc c4.c hello.c
14 | ./xc c4.c c4.c hello.c
15 | ```
16 | 
17 | # About
18 | 
19 | This project is inspired by [c4](https://github.com/rswier/c4) and is largely
20 | based on it.
21 | 
22 | However, I rewrited them all to make it more understable and help myself to
23 | understand it.
24 | 
25 | Despite the complexity we saw in books about compiler design, writing one is
26 | not that hard. You don't need that much theory though they will help for
27 | better understanding the logic behind the code.
28 | 
29 | Also I write a series of article about how this compiler is built(in Chinese though):
30 | 
31 | 1. [手把手教你构建 C 语言编译器（0）——前言](http://lotabout.me/2015/write-a-C-interpreter-0/)
32 | 2. [手把手教你构建 C 语言编译器（1）——设计](http://lotabout.me/2015/write-a-C-interpreter-1/)
33 | 3. [手把手教你构建 C 语言编译器（2）——虚拟机](http://lotabout.me/2015/write-a-C-interpreter-2/)
34 | 4. [手把手教你构建 C 语言编译器（3）——词法分析器](http://lotabout.me/2015/write-a-C-interpreter-3/)
35 | 4. [手把手教你构建 C 语言编译器（4）——递归下降](http://lotabout.me/2016/write-a-C-interpreter-4/)
36 | 5. [手把手教你构建 C 语言编译器（5）——变量定义](http://lotabout.me/2016/write-a-C-interpreter-5/)
37 | 6. [手把手教你构建 C 语言编译器（6）——函数定义](http://lotabout.me/2016/write-a-C-interpreter-6/)
38 | 7. [手把手教你构建 C 语言编译器（7）——语句](http://lotabout.me/2016/write-a-C-interpreter-7/)
39 | 8. [手把手教你构建 C 语言编译器（8）——表达式](http://lotabout.me/2016/write-a-C-interpreter-8/)
40 | 0. [手把手教你构建 C 语言编译器（9）——总结](http://lotabout.me/2016/write-a-C-interpreter-9/)
41 | 
42 | # Resources
43 | 
44 | Further Reading:
45 | 
46 | - [Let's Build a Compiler](http://compilers.iecc.com/crenshaw/): An excellent
47 |     starting material for building compiler.
48 | 
49 | 
50 | Forks:
51 | 
52 | - [A fork that implement debugger for xc.c](https://github.com/descent/write-a-C-interpreter)
53 | 
54 | 
55 | # Licence
56 | 
57 | The original code is licenced with GPL2, so this code will use the same
58 | licence.
59 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 
341 | 


--------------------------------------------------------------------------------
/xc.c:
--------------------------------------------------------------------------------
   1 | #include <stdio.h>
   2 | #include <stdlib.h>
   3 | #include <memory.h>
   4 | #include <string.h>
   5 | 
   6 | int debug;    // print the executed instructions
   7 | int assembly; // print out the assembly and source
   8 | 
   9 | int token; // current token
  10 | 
  11 | // instructions
  12 | enum { LEA ,IMM ,JMP ,CALL,JZ  ,JNZ ,ENT ,ADJ ,LEV ,LI  ,LC  ,SI  ,SC  ,PUSH,
  13 |        OR  ,XOR ,AND ,EQ  ,NE  ,LT  ,GT  ,LE  ,GE  ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,
  14 |        OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT };
  15 | 
  16 | // tokens and classes (operators last and in precedence order)
  17 | // copied from c4
  18 | enum {
  19 |   Num = 128, Fun, Sys, Glo, Loc, Id,
  20 |   Char, Else, Enum, If, Int, Return, Sizeof, While,
  21 |   Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
  22 | };
  23 | 
  24 | // fields of identifier
  25 | enum {Token, Hash, Name, Type, Class, Value, BType, BClass, BValue, IdSize};
  26 | 
  27 | 
  28 | // types of variable/function
  29 | enum { CHAR, INT, PTR };
  30 | 
  31 | // type of declaration.
  32 | enum {Global, Local};
  33 | 
  34 | int *text, // text segment
  35 |     *stack;// stack
  36 | int * old_text; // for dump text segment
  37 | char *data; // data segment
  38 | int *idmain;
  39 | 
  40 | char *src, *old_src;  // pointer to source code string;
  41 | 
  42 | int poolsize; // default size of text/data/stack
  43 | int *pc, *bp, *sp, ax, cycle; // virtual machine registers
  44 | 
  45 | int *current_id, // current parsed ID
  46 |     *symbols,    // symbol table
  47 |     line,        // line number of source code
  48 |     token_val;   // value of current token (mainly for number)
  49 | 
  50 | int basetype;    // the type of a declaration, make it global for convenience
  51 | int expr_type;   // the type of an expression
  52 | 
  53 | // function frame
  54 | //
  55 | // 0: arg 1
  56 | // 1: arg 2
  57 | // 2: arg 3
  58 | // 3: return address
  59 | // 4: old bp pointer  <- index_of_bp
  60 | // 5: local var 1
  61 | // 6: local var 2
  62 | int index_of_bp; // index of bp pointer on stack
  63 | 
  64 | void next() {
  65 |     char *last_pos;
  66 |     int hash;
  67 | 
  68 |     while (token = *src) {
  69 |         ++src;
  70 | 
  71 |         if (token == '\n') {
  72 |             if (assembly) {
  73 |                 // print compile info
  74 |                 printf("%d: %.*s", line, src-old_src, old_src);
  75 |                 old_src = src;
  76 | 
  77 |                 while (old_text < text) {
  78 |                     printf("%8.4s", & "LEA ,IMM ,JMP ,CALL,JZ  ,JNZ ,ENT ,ADJ ,LEV ,LI  ,LC  ,SI  ,SC  ,PUSH,"
  79 |                                       "OR  ,XOR ,AND ,EQ  ,NE  ,LT  ,GT  ,LE  ,GE  ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
  80 |                                       "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT"[*++old_text * 5]);
  81 | 
  82 |                     if (*old_text <= ADJ)
  83 |                         printf(" %d\n", *++old_text);
  84 |                     else
  85 |                         printf("\n");
  86 |                 }
  87 |             }
  88 |             ++line;
  89 |         }
  90 |         else if (token == '#') {
  91 |             // skip macro, because we will not support it
  92 |             while (*src != 0 && *src != '\n') {
  93 |                 src++;
  94 |             }
  95 |         }
  96 |         else if ((token >= 'a' && token <= 'z') || (token >= 'A' && token <= 'Z') || (token == '_')) {
  97 | 
  98 |             // parse identifier
  99 |             last_pos = src - 1;
 100 |             hash = token;
 101 | 
 102 |             while ((*src >= 'a' && *src <= 'z') || (*src >= 'A' && *src <= 'Z') || (*src >= '0' && *src <= '9') || (*src == '_')) {
 103 |                 hash = hash * 147 + *src;
 104 |                 src++;
 105 |             }
 106 | 
 107 |             // look for existing identifier, linear search
 108 |             current_id = symbols;
 109 |             while (current_id[Token]) {
 110 |                 if (current_id[Hash] == hash && !memcmp((char *)current_id[Name], last_pos, src - last_pos)) {
 111 |                     //found one, return
 112 |                     token = current_id[Token];
 113 |                     return;
 114 |                 }
 115 |                 current_id = current_id + IdSize;
 116 |             }
 117 | 
 118 | 
 119 |             // store new ID
 120 |             current_id[Name] = (int)last_pos;
 121 |             current_id[Hash] = hash;
 122 |             token = current_id[Token] = Id;
 123 |             return;
 124 |         }
 125 |         else if (token >= '0' && token <= '9') {
 126 |             // parse number, three kinds: dec(123) hex(0x123) oct(017)
 127 |             token_val = token - '0';
 128 |             if (token_val > 0) {
 129 |                 // dec, starts with [1-9]
 130 |                 while (*src >= '0' && *src <= '9') {
 131 |                     token_val = token_val*10 + *src++ - '0';
 132 |                 }
 133 |             } else {
 134 |                 // starts with number 0
 135 |                 if (*src == 'x' || *src == 'X') {
 136 |                     //hex
 137 |                     token = *++src;
 138 |                     while ((token >= '0' && token <= '9') || (token >= 'a' && token <= 'f') || (token >= 'A' && token <= 'F')) {
 139 |                         token_val = token_val * 16 + (token & 15) + (token >= 'A' ? 9 : 0);
 140 |                         token = *++src;
 141 |                     }
 142 |                 } else {
 143 |                     // oct
 144 |                     while (*src >= '0' && *src <= '7') {
 145 |                         token_val = token_val*8 + *src++ - '0';
 146 |                     }
 147 |                 }
 148 |             }
 149 | 
 150 |             token = Num;
 151 |             return;
 152 |         }
 153 |         else if (token == '/') {
 154 |             if (*src == '/') {
 155 |                 // skip comments
 156 |                 while (*src != 0 && *src != '\n') {
 157 |                     ++src;
 158 |                 }
 159 |             } else {
 160 |                 // divide operator
 161 |                 token = Div;
 162 |                 return;
 163 |             }
 164 |         }
 165 |         else if (token == '"' || token == '\'') {
 166 |             // parse string literal, currently, the only supported escape
 167 |             // character is '\n', store the string literal into data.
 168 |             last_pos = data;
 169 |             while (*src != 0 && *src != token) {
 170 |                 token_val = *src++;
 171 |                 if (token_val == '\\') {
 172 |                     // escape character
 173 |                     token_val = *src++;
 174 |                     if (token_val == 'n') {
 175 |                         token_val = '\n';
 176 |                     }
 177 |                 }
 178 | 
 179 |                 if (token == '"') {
 180 |                     *data++ = token_val;
 181 |                 }
 182 |             }
 183 | 
 184 |             src++;
 185 |             // if it is a single character, return Num token
 186 |             if (token == '"') {
 187 |                 token_val = (int)last_pos;
 188 |             } else {
 189 |                 token = Num;
 190 |             }
 191 | 
 192 |             return;
 193 |         }
 194 |         else if (token == '=') {
 195 |             // parse '==' and '='
 196 |             if (*src == '=') {
 197 |                 src ++;
 198 |                 token = Eq;
 199 |             } else {
 200 |                 token = Assign;
 201 |             }
 202 |             return;
 203 |         }
 204 |         else if (token == '+') {
 205 |             // parse '+' and '++'
 206 |             if (*src == '+') {
 207 |                 src ++;
 208 |                 token = Inc;
 209 |             } else {
 210 |                 token = Add;
 211 |             }
 212 |             return;
 213 |         }
 214 |         else if (token == '-') {
 215 |             // parse '-' and '--'
 216 |             if (*src == '-') {
 217 |                 src ++;
 218 |                 token = Dec;
 219 |             } else {
 220 |                 token = Sub;
 221 |             }
 222 |             return;
 223 |         }
 224 |         else if (token == '!') {
 225 |             // parse '!='
 226 |             if (*src == '=') {
 227 |                 src++;
 228 |                 token = Ne;
 229 |             }
 230 |             return;
 231 |         }
 232 |         else if (token == '<') {
 233 |             // parse '<=', '<<' or '<'
 234 |             if (*src == '=') {
 235 |                 src ++;
 236 |                 token = Le;
 237 |             } else if (*src == '<') {
 238 |                 src ++;
 239 |                 token = Shl;
 240 |             } else {
 241 |                 token = Lt;
 242 |             }
 243 |             return;
 244 |         }
 245 |         else if (token == '>') {
 246 |             // parse '>=', '>>' or '>'
 247 |             if (*src == '=') {
 248 |                 src ++;
 249 |                 token = Ge;
 250 |             } else if (*src == '>') {
 251 |                 src ++;
 252 |                 token = Shr;
 253 |             } else {
 254 |                 token = Gt;
 255 |             }
 256 |             return;
 257 |         }
 258 |         else if (token == '|') {
 259 |             // parse '|' or '||'
 260 |             if (*src == '|') {
 261 |                 src ++;
 262 |                 token = Lor;
 263 |             } else {
 264 |                 token = Or;
 265 |             }
 266 |             return;
 267 |         }
 268 |         else if (token == '&') {
 269 |             // parse '&' and '&&'
 270 |             if (*src == '&') {
 271 |                 src ++;
 272 |                 token = Lan;
 273 |             } else {
 274 |                 token = And;
 275 |             }
 276 |             return;
 277 |         }
 278 |         else if (token == '^') {
 279 |             token = Xor;
 280 |             return;
 281 |         }
 282 |         else if (token == '%') {
 283 |             token = Mod;
 284 |             return;
 285 |         }
 286 |         else if (token == '*') {
 287 |             token = Mul;
 288 |             return;
 289 |         }
 290 |         else if (token == '[') {
 291 |             token = Brak;
 292 |             return;
 293 |         }
 294 |         else if (token == '?') {
 295 |             token = Cond;
 296 |             return;
 297 |         }
 298 |         else if (token == '~' || token == ';' || token == '{' || token == '}' || token == '(' || token == ')' || token == ']' || token == ',' || token == ':') {
 299 |             // directly return the character as token;
 300 |             return;
 301 |         }
 302 |     }
 303 | }
 304 | 
 305 | void match(int tk) {
 306 |     if (token == tk) {
 307 |         next();
 308 |     } else {
 309 |         printf("%d: expected token: %d\n", line, tk);
 310 |         exit(-1);
 311 |     }
 312 | }
 313 | 
 314 | 
 315 | void expression(int level) {
 316 |     // expressions have various format.
 317 |     // but majorly can be divided into two parts: unit and operator
 318 |     // for example `(char) *a[10] = (int *) func(b > 0 ? 10 : 20);
 319 |     // `a[10]` is an unit while `*` is an operator.
 320 |     // `func(...)` in total is an unit.
 321 |     // so we should first parse those unit and unary operators
 322 |     // and then the binary ones
 323 |     //
 324 |     // also the expression can be in the following types:
 325 |     //
 326 |     // 1. unit_unary ::= unit | unit unary_op | unary_op unit
 327 |     // 2. expr ::= unit_unary (bin_op unit_unary ...)
 328 | 
 329 |     // unit_unary()
 330 |     int *id;
 331 |     int tmp;
 332 |     int *addr;
 333 |     {
 334 |         if (!token) {
 335 |             printf("%d: unexpected token EOF of expression\n", line);
 336 |             exit(-1);
 337 |         }
 338 |         if (token == Num) {
 339 |             match(Num);
 340 | 
 341 |             // emit code
 342 |             *++text = IMM;
 343 |             *++text = token_val;
 344 |             expr_type = INT;
 345 |         }
 346 |         else if (token == '"') {
 347 |             // continous string "abc" "abc"
 348 | 
 349 | 
 350 |             // emit code
 351 |             *++text = IMM;
 352 |             *++text = token_val;
 353 | 
 354 |             match('"');
 355 |             // store the rest strings
 356 |             while (token == '"') {
 357 |                 match('"');
 358 |             }
 359 | 
 360 |             // append the end of string character '\0', all the data are default
 361 |             // to 0, so just move data one position forward.
 362 |             data = (char *)(((int)data + sizeof(int)) & (-sizeof(int)));
 363 |             expr_type = PTR;
 364 |         }
 365 |         else if (token == Sizeof) {
 366 |             // sizeof is actually an unary operator
 367 |             // now only `sizeof(int)`, `sizeof(char)` and `sizeof(*...)` are
 368 |             // supported.
 369 |             match(Sizeof);
 370 |             match('(');
 371 |             expr_type = INT;
 372 | 
 373 |             if (token == Int) {
 374 |                 match(Int);
 375 |             } else if (token == Char) {
 376 |                 match(Char);
 377 |                 expr_type = CHAR;
 378 |             }
 379 | 
 380 |             while (token == Mul) {
 381 |                 match(Mul);
 382 |                 expr_type = expr_type + PTR;
 383 |             }
 384 | 
 385 |             match(')');
 386 | 
 387 |             // emit code
 388 |             *++text = IMM;
 389 |             *++text = (expr_type == CHAR) ? sizeof(char) : sizeof(int);
 390 | 
 391 |             expr_type = INT;
 392 |         }
 393 |         else if (token == Id) {
 394 |             // there are several type when occurs to Id
 395 |             // but this is unit, so it can only be
 396 |             // 1. function call
 397 |             // 2. Enum variable
 398 |             // 3. global/local variable
 399 |             match(Id);
 400 | 
 401 |             id = current_id;
 402 | 
 403 |             if (token == '(') {
 404 |                 // function call
 405 |                 match('(');
 406 | 
 407 |                 // pass in arguments
 408 |                 tmp = 0; // number of arguments
 409 |                 while (token != ')') {
 410 |                     expression(Assign);
 411 |                     *++text = PUSH;
 412 |                     tmp ++;
 413 | 
 414 |                     if (token == ',') {
 415 |                         match(',');
 416 |                     }
 417 | 
 418 |                 }
 419 |                 match(')');
 420 | 
 421 |                 // emit code
 422 |                 if (id[Class] == Sys) {
 423 |                     // system functions
 424 |                     *++text = id[Value];
 425 |                 }
 426 |                 else if (id[Class] == Fun) {
 427 |                     // function call
 428 |                     *++text = CALL;
 429 |                     *++text = id[Value];
 430 |                 }
 431 |                 else {
 432 |                     printf("%d: bad function call\n", line);
 433 |                     exit(-1);
 434 |                 }
 435 | 
 436 |                 // clean the stack for arguments
 437 |                 if (tmp > 0) {
 438 |                     *++text = ADJ;
 439 |                     *++text = tmp;
 440 |                 }
 441 |                 expr_type = id[Type];
 442 |             }
 443 |             else if (id[Class] == Num) {
 444 |                 // enum variable
 445 |                 *++text = IMM;
 446 |                 *++text = id[Value];
 447 |                 expr_type = INT;
 448 |             }
 449 |             else {
 450 |                 // variable
 451 |                 if (id[Class] == Loc) {
 452 |                     *++text = LEA;
 453 |                     *++text = index_of_bp - id[Value];
 454 |                 }
 455 |                 else if (id[Class] == Glo) {
 456 |                     *++text = IMM;
 457 |                     *++text = id[Value];
 458 |                 }
 459 |                 else {
 460 |                     printf("%d: undefined variable\n", line);
 461 |                     exit(-1);
 462 |                 }
 463 | 
 464 |                 // emit code, default behaviour is to load the value of the
 465 |                 // address which is stored in `ax`
 466 |                 expr_type = id[Type];
 467 |                 *++text = (expr_type == Char) ? LC : LI;
 468 |             }
 469 |         }
 470 |         else if (token == '(') {
 471 |             // cast or parenthesis
 472 |             match('(');
 473 |             if (token == Int || token == Char) {
 474 |                 tmp = (token == Char) ? CHAR : INT; // cast type
 475 |                 match(token);
 476 |                 while (token == Mul) {
 477 |                     match(Mul);
 478 |                     tmp = tmp + PTR;
 479 |                 }
 480 | 
 481 |                 match(')');
 482 | 
 483 |                 expression(Inc); // cast has precedence as Inc(++)
 484 | 
 485 |                 expr_type  = tmp;
 486 |             } else {
 487 |                 // normal parenthesis
 488 |                 expression(Assign);
 489 |                 match(')');
 490 |             }
 491 |         }
 492 |         else if (token == Mul) {
 493 |             // dereference *<addr>
 494 |             match(Mul);
 495 |             expression(Inc); // dereference has the same precedence as Inc(++)
 496 | 
 497 |             if (expr_type >= PTR) {
 498 |                 expr_type = expr_type - PTR;
 499 |             } else {
 500 |                 printf("%d: bad dereference\n", line);
 501 |                 exit(-1);
 502 |             }
 503 | 
 504 |             *++text = (expr_type == CHAR) ? LC : LI;
 505 |         }
 506 |         else if (token == And) {
 507 |             // get the address of
 508 |             match(And);
 509 |             expression(Inc); // get the address of
 510 |             if (*text == LC || *text == LI) {
 511 |                 text --;
 512 |             } else {
 513 |                 printf("%d: bad address of\n", line);
 514 |                 exit(-1);
 515 |             }
 516 | 
 517 |             expr_type = expr_type + PTR;
 518 |         }
 519 |         else if (token == '!') {
 520 |             // not
 521 |             match('!');
 522 |             expression(Inc);
 523 | 
 524 |             // emit code, use <expr> == 0
 525 |             *++text = PUSH;
 526 |             *++text = IMM;
 527 |             *++text = 0;
 528 |             *++text = EQ;
 529 | 
 530 |             expr_type = INT;
 531 |         }
 532 |         else if (token == '~') {
 533 |             // bitwise not
 534 |             match('~');
 535 |             expression(Inc);
 536 | 
 537 |             // emit code, use <expr> XOR -1
 538 |             *++text = PUSH;
 539 |             *++text = IMM;
 540 |             *++text = -1;
 541 |             *++text = XOR;
 542 | 
 543 |             expr_type = INT;
 544 |         }
 545 |         else if (token == Add) {
 546 |             // +var, do nothing
 547 |             match(Add);
 548 |             expression(Inc);
 549 | 
 550 |             expr_type = INT;
 551 |         }
 552 |         else if (token == Sub) {
 553 |             // -var
 554 |             match(Sub);
 555 | 
 556 |             if (token == Num) {
 557 |                 *++text = IMM;
 558 |                 *++text = -token_val;
 559 |                 match(Num);
 560 |             } else {
 561 | 
 562 |                 *++text = IMM;
 563 |                 *++text = -1;
 564 |                 *++text = PUSH;
 565 |                 expression(Inc);
 566 |                 *++text = MUL;
 567 |             }
 568 | 
 569 |             expr_type = INT;
 570 |         }
 571 |         else if (token == Inc || token == Dec) {
 572 |             tmp = token;
 573 |             match(token);
 574 |             expression(Inc);
 575 |             if (*text == LC) {
 576 |                 *text = PUSH;  // to duplicate the address
 577 |                 *++text = LC;
 578 |             } else if (*text == LI) {
 579 |                 *text = PUSH;
 580 |                 *++text = LI;
 581 |             } else {
 582 |                 printf("%d: bad lvalue of pre-increment\n", line);
 583 |                 exit(-1);
 584 |             }
 585 |             *++text = PUSH;
 586 |             *++text = IMM;
 587 |             *++text = (expr_type > PTR) ? sizeof(int) : sizeof(char);
 588 |             *++text = (tmp == Inc) ? ADD : SUB;
 589 |             *++text = (expr_type == CHAR) ? SC : SI;
 590 |         }
 591 |         else {
 592 |             printf("%d: bad expression\n", line);
 593 |             exit(-1);
 594 |         }
 595 |     }
 596 | 
 597 |     // binary operator and postfix operators.
 598 |     {
 599 |         while (token >= level) {
 600 |             // handle according to current operator's precedence
 601 |             tmp = expr_type;
 602 |             if (token == Assign) {
 603 |                 // var = expr;
 604 |                 match(Assign);
 605 |                 if (*text == LC || *text == LI) {
 606 |                     *text = PUSH; // save the lvalue's pointer
 607 |                 } else {
 608 |                     printf("%d: bad lvalue in assignment\n", line);
 609 |                     exit(-1);
 610 |                 }
 611 |                 expression(Assign);
 612 | 
 613 |                 expr_type = tmp;
 614 |                 *++text = (expr_type == CHAR) ? SC : SI;
 615 |             }
 616 |             else if (token == Cond) {
 617 |                 // expr ? a : b;
 618 |                 match(Cond);
 619 |                 *++text = JZ;
 620 |                 addr = ++text;
 621 |                 expression(Assign);
 622 |                 if (token == ':') {
 623 |                     match(':');
 624 |                 } else {
 625 |                     printf("%d: missing colon in conditional\n", line);
 626 |                     exit(-1);
 627 |                 }
 628 |                 *addr = (int)(text + 3);
 629 |                 *++text = JMP;
 630 |                 addr = ++text;
 631 |                 expression(Cond);
 632 |                 *addr = (int)(text + 1);
 633 |             }
 634 |             else if (token == Lor) {
 635 |                 // logic or
 636 |                 match(Lor);
 637 |                 *++text = JNZ;
 638 |                 addr = ++text;
 639 |                 expression(Lan);
 640 |                 *addr = (int)(text + 1);
 641 |                 expr_type = INT;
 642 |             }
 643 |             else if (token == Lan) {
 644 |                 // logic and
 645 |                 match(Lan);
 646 |                 *++text = JZ;
 647 |                 addr = ++text;
 648 |                 expression(Or);
 649 |                 *addr = (int)(text + 1);
 650 |                 expr_type = INT;
 651 |             }
 652 |             else if (token == Or) {
 653 |                 // bitwise or
 654 |                 match(Or);
 655 |                 *++text = PUSH;
 656 |                 expression(Xor);
 657 |                 *++text = OR;
 658 |                 expr_type = INT;
 659 |             }
 660 |             else if (token == Xor) {
 661 |                 // bitwise xor
 662 |                 match(Xor);
 663 |                 *++text = PUSH;
 664 |                 expression(And);
 665 |                 *++text = XOR;
 666 |                 expr_type = INT;
 667 |             }
 668 |             else if (token == And) {
 669 |                 // bitwise and
 670 |                 match(And);
 671 |                 *++text = PUSH;
 672 |                 expression(Eq);
 673 |                 *++text = AND;
 674 |                 expr_type = INT;
 675 |             }
 676 |             else if (token == Eq) {
 677 |                 // equal ==
 678 |                 match(Eq);
 679 |                 *++text = PUSH;
 680 |                 expression(Ne);
 681 |                 *++text = EQ;
 682 |                 expr_type = INT;
 683 |             }
 684 |             else if (token == Ne) {
 685 |                 // not equal !=
 686 |                 match(Ne);
 687 |                 *++text = PUSH;
 688 |                 expression(Lt);
 689 |                 *++text = NE;
 690 |                 expr_type = INT;
 691 |             }
 692 |             else if (token == Lt) {
 693 |                 // less than
 694 |                 match(Lt);
 695 |                 *++text = PUSH;
 696 |                 expression(Shl);
 697 |                 *++text = LT;
 698 |                 expr_type = INT;
 699 |             }
 700 |             else if (token == Gt) {
 701 |                 // greater than
 702 |                 match(Gt);
 703 |                 *++text = PUSH;
 704 |                 expression(Shl);
 705 |                 *++text = GT;
 706 |                 expr_type = INT;
 707 |             }
 708 |             else if (token == Le) {
 709 |                 // less than or equal to
 710 |                 match(Le);
 711 |                 *++text = PUSH;
 712 |                 expression(Shl);
 713 |                 *++text = LE;
 714 |                 expr_type = INT;
 715 |             }
 716 |             else if (token == Ge) {
 717 |                 // greater than or equal to
 718 |                 match(Ge);
 719 |                 *++text = PUSH;
 720 |                 expression(Shl);
 721 |                 *++text = GE;
 722 |                 expr_type = INT;
 723 |             }
 724 |             else if (token == Shl) {
 725 |                 // shift left
 726 |                 match(Shl);
 727 |                 *++text = PUSH;
 728 |                 expression(Add);
 729 |                 *++text = SHL;
 730 |                 expr_type = INT;
 731 |             }
 732 |             else if (token == Shr) {
 733 |                 // shift right
 734 |                 match(Shr);
 735 |                 *++text = PUSH;
 736 |                 expression(Add);
 737 |                 *++text = SHR;
 738 |                 expr_type = INT;
 739 |             }
 740 |             else if (token == Add) {
 741 |                 // add
 742 |                 match(Add);
 743 |                 *++text = PUSH;
 744 |                 expression(Mul);
 745 | 
 746 |                 expr_type = tmp;
 747 |                 if (expr_type > PTR) {
 748 |                     // pointer type, and not `char *`
 749 |                     *++text = PUSH;
 750 |                     *++text = IMM;
 751 |                     *++text = sizeof(int);
 752 |                     *++text = MUL;
 753 |                 }
 754 |                 *++text = ADD;
 755 |             }
 756 |             else if (token == Sub) {
 757 |                 // sub
 758 |                 match(Sub);
 759 |                 *++text = PUSH;
 760 |                 expression(Mul);
 761 |                 if (tmp > PTR && tmp == expr_type) {
 762 |                     // pointer subtraction
 763 |                     *++text = SUB;
 764 |                     *++text = PUSH;
 765 |                     *++text = IMM;
 766 |                     *++text = sizeof(int);
 767 |                     *++text = DIV;
 768 |                     expr_type = INT;
 769 |                 } else if (tmp > PTR) {
 770 |                     // pointer movement
 771 |                     *++text = PUSH;
 772 |                     *++text = IMM;
 773 |                     *++text = sizeof(int);
 774 |                     *++text = MUL;
 775 |                     *++text = SUB;
 776 |                     expr_type = tmp;
 777 |                 } else {
 778 |                     // numeral subtraction
 779 |                     *++text = SUB;
 780 |                     expr_type = tmp;
 781 |                 }
 782 |             }
 783 |             else if (token == Mul) {
 784 |                 // multiply
 785 |                 match(Mul);
 786 |                 *++text = PUSH;
 787 |                 expression(Inc);
 788 |                 *++text = MUL;
 789 |                 expr_type = tmp;
 790 |             }
 791 |             else if (token == Div) {
 792 |                 // divide
 793 |                 match(Div);
 794 |                 *++text = PUSH;
 795 |                 expression(Inc);
 796 |                 *++text = DIV;
 797 |                 expr_type = tmp;
 798 |             }
 799 |             else if (token == Mod) {
 800 |                 // Modulo
 801 |                 match(Mod);
 802 |                 *++text = PUSH;
 803 |                 expression(Inc);
 804 |                 *++text = MOD;
 805 |                 expr_type = tmp;
 806 |             }
 807 |             else if (token == Inc || token == Dec) {
 808 |                 // postfix inc(++) and dec(--)
 809 |                 // we will increase the value to the variable and decrease it
 810 |                 // on `ax` to get its original value.
 811 |                 if (*text == LI) {
 812 |                     *text = PUSH;
 813 |                     *++text = LI;
 814 |                 }
 815 |                 else if (*text == LC) {
 816 |                     *text = PUSH;
 817 |                     *++text = LC;
 818 |                 }
 819 |                 else {
 820 |                     printf("%d: bad value in increment\n", line);
 821 |                     exit(-1);
 822 |                 }
 823 | 
 824 |                 *++text = PUSH;
 825 |                 *++text = IMM;
 826 |                 *++text = (expr_type > PTR) ? sizeof(int) : sizeof(char);
 827 |                 *++text = (token == Inc) ? ADD : SUB;
 828 |                 *++text = (expr_type == CHAR) ? SC : SI;
 829 |                 *++text = PUSH;
 830 |                 *++text = IMM;
 831 |                 *++text = (expr_type > PTR) ? sizeof(int) : sizeof(char);
 832 |                 *++text = (token == Inc) ? SUB : ADD;
 833 |                 match(token);
 834 |             }
 835 |             else if (token == Brak) {
 836 |                 // array access var[xx]
 837 |                 match(Brak);
 838 |                 *++text = PUSH;
 839 |                 expression(Assign);
 840 |                 match(']');
 841 | 
 842 |                 if (tmp > PTR) {
 843 |                     // pointer, `not char *`
 844 |                     *++text = PUSH;
 845 |                     *++text = IMM;
 846 |                     *++text = sizeof(int);
 847 |                     *++text = MUL;
 848 |                 }
 849 |                 else if (tmp < PTR) {
 850 |                     printf("%d: pointer type expected\n", line);
 851 |                     exit(-1);
 852 |                 }
 853 |                 expr_type = tmp - PTR;
 854 |                 *++text = ADD;
 855 |                 *++text = (expr_type == CHAR) ? LC : LI;
 856 |             }
 857 |             else {
 858 |                 printf("%d: compiler error, token = %d\n", line, token);
 859 |                 exit(-1);
 860 |             }
 861 |         }
 862 |     }
 863 | }
 864 | 
 865 | void statement() {
 866 |     // there are 8 kinds of statements here:
 867 |     // 1. if (...) <statement> [else <statement>]
 868 |     // 2. while (...) <statement>
 869 |     // 3. { <statement> }
 870 |     // 4. return xxx;
 871 |     // 5. <empty statement>;
 872 |     // 6. expression; (expression end with semicolon)
 873 | 
 874 |     int *a, *b; // bess for branch control
 875 | 
 876 |     if (token == If) {
 877 |         // if (...) <statement> [else <statement>]
 878 |         //
 879 |         //   if (...)           <cond>
 880 |         //                      JZ a
 881 |         //     <statement>      <statement>
 882 |         //   else:              JMP b
 883 |         // a:
 884 |         //     <statement>      <statement>
 885 |         // b:                   b:
 886 |         //
 887 |         //
 888 |         match(If);
 889 |         match('(');
 890 |         expression(Assign);  // parse condition
 891 |         match(')');
 892 | 
 893 |         // emit code for if
 894 |         *++text = JZ;
 895 |         b = ++text;
 896 | 
 897 |         statement();         // parse statement
 898 |         if (token == Else) { // parse else
 899 |             match(Else);
 900 | 
 901 |             // emit code for JMP B
 902 |             *b = (int)(text + 3);
 903 |             *++text = JMP;
 904 |             b = ++text;
 905 | 
 906 |             statement();
 907 |         }
 908 | 
 909 |         *b = (int)(text + 1);
 910 |     }
 911 |     else if (token == While) {
 912 |         //
 913 |         // a:                     a:
 914 |         //    while (<cond>)        <cond>
 915 |         //                          JZ b
 916 |         //     <statement>          <statement>
 917 |         //                          JMP a
 918 |         // b:                     b:
 919 |         match(While);
 920 | 
 921 |         a = text + 1;
 922 | 
 923 |         match('(');
 924 |         expression(Assign);
 925 |         match(')');
 926 | 
 927 |         *++text = JZ;
 928 |         b = ++text;
 929 | 
 930 |         statement();
 931 | 
 932 |         *++text = JMP;
 933 |         *++text = (int)a;
 934 |         *b = (int)(text + 1);
 935 |     }
 936 |     else if (token == '{') {
 937 |         // { <statement> ... }
 938 |         match('{');
 939 | 
 940 |         while (token != '}') {
 941 |             statement();
 942 |         }
 943 | 
 944 |         match('}');
 945 |     }
 946 |     else if (token == Return) {
 947 |         // return [expression];
 948 |         match(Return);
 949 | 
 950 |         if (token != ';') {
 951 |             expression(Assign);
 952 |         }
 953 | 
 954 |         match(';');
 955 | 
 956 |         // emit code for return
 957 |         *++text = LEV;
 958 |     }
 959 |     else if (token == ';') {
 960 |         // empty statement
 961 |         match(';');
 962 |     }
 963 |     else {
 964 |         // a = b; or function_call();
 965 |         expression(Assign);
 966 |         match(';');
 967 |     }
 968 | }
 969 | 
 970 | void enum_declaration() {
 971 |     // parse enum [id] { a = 1, b = 3, ...}
 972 |     int i;
 973 |     i = 0;
 974 |     while (token != '}') {
 975 |         if (token != Id) {
 976 |             printf("%d: bad enum identifier %d\n", line, token);
 977 |             exit(-1);
 978 |         }
 979 |         next();
 980 |         if (token == Assign) {
 981 |             // like {a=10}
 982 |             next();
 983 |             if (token != Num) {
 984 |                 printf("%d: bad enum initializer\n", line);
 985 |                 exit(-1);
 986 |             }
 987 |             i = token_val;
 988 |             next();
 989 |         }
 990 | 
 991 |         current_id[Class] = Num;
 992 |         current_id[Type] = INT;
 993 |         current_id[Value] = i++;
 994 | 
 995 |         if (token == ',') {
 996 |             next();
 997 |         }
 998 |     }
 999 | }
1000 | 
1001 | void function_parameter() {
1002 |     int type;
1003 |     int params;
1004 |     params = 0;
1005 |     while (token != ')') {
1006 |         // int name, ...
1007 |         type = INT;
1008 |         if (token == Int) {
1009 |             match(Int);
1010 |         } else if (token == Char) {
1011 |             type = CHAR;
1012 |             match(Char);
1013 |         }
1014 | 
1015 |         // pointer type
1016 |         while (token == Mul) {
1017 |             match(Mul);
1018 |             type = type + PTR;
1019 |         }
1020 | 
1021 |         // parameter name
1022 |         if (token != Id) {
1023 |             printf("%d: bad parameter declaration\n", line);
1024 |             exit(-1);
1025 |         }
1026 |         if (current_id[Class] == Loc) {
1027 |             printf("%d: duplicate parameter declaration\n", line);
1028 |             exit(-1);
1029 |         }
1030 | 
1031 |         match(Id);
1032 |         // store the local variable
1033 |         current_id[BClass] = current_id[Class]; current_id[Class]  = Loc;
1034 |         current_id[BType]  = current_id[Type];  current_id[Type]   = type;
1035 |         current_id[BValue] = current_id[Value]; current_id[Value]  = params++;   // index of current parameter
1036 | 
1037 |         if (token == ',') {
1038 |             match(',');
1039 |         }
1040 |     }
1041 |     index_of_bp = params+1;
1042 | }
1043 | 
1044 | void function_body() {
1045 |     // type func_name (...) {...}
1046 |     //                   -->|   |<--
1047 | 
1048 |     // ... {
1049 |     // 1. local declarations
1050 |     // 2. statements
1051 |     // }
1052 | 
1053 |     int pos_local; // position of local variables on the stack.
1054 |     int type;
1055 |     pos_local = index_of_bp;
1056 | 
1057 |     while (token == Int || token == Char) {
1058 |         // local variable declaration, just like global ones.
1059 |         basetype = (token == Int) ? INT : CHAR;
1060 |         match(token);
1061 | 
1062 |         while (token != ';') {
1063 |             type = basetype;
1064 |             while (token == Mul) {
1065 |                 match(Mul);
1066 |                 type = type + PTR;
1067 |             }
1068 | 
1069 |             if (token != Id) {
1070 |                 // invalid declaration
1071 |                 printf("%d: bad local declaration\n", line);
1072 |                 exit(-1);
1073 |             }
1074 |             if (current_id[Class] == Loc) {
1075 |                 // identifier exists
1076 |                 printf("%d: duplicate local declaration\n", line);
1077 |                 exit(-1);
1078 |             }
1079 |             match(Id);
1080 | 
1081 |             // store the local variable
1082 |             current_id[BClass] = current_id[Class]; current_id[Class]  = Loc;
1083 |             current_id[BType]  = current_id[Type];  current_id[Type]   = type;
1084 |             current_id[BValue] = current_id[Value]; current_id[Value]  = ++pos_local;   // index of current parameter
1085 | 
1086 |             if (token == ',') {
1087 |                 match(',');
1088 |             }
1089 |         }
1090 |         match(';');
1091 |     }
1092 | 
1093 |     // save the stack size for local variables
1094 |     *++text = ENT;
1095 |     *++text = pos_local - index_of_bp;
1096 | 
1097 |     // statements
1098 |     while (token != '}') {
1099 |         statement();
1100 |     }
1101 | 
1102 |     // emit code for leaving the sub function
1103 |     *++text = LEV;
1104 | }
1105 | 
1106 | void function_declaration() {
1107 |     // type func_name (...) {...}
1108 |     //               | this part
1109 | 
1110 |     match('(');
1111 |     function_parameter();
1112 |     match(')');
1113 |     match('{');
1114 |     function_body();
1115 |     //match('}');
1116 | 
1117 |     // unwind local variable declarations for all local variables.
1118 |     current_id = symbols;
1119 |     while (current_id[Token]) {
1120 |         if (current_id[Class] == Loc) {
1121 |             current_id[Class] = current_id[BClass];
1122 |             current_id[Type]  = current_id[BType];
1123 |             current_id[Value] = current_id[BValue];
1124 |         }
1125 |         current_id = current_id + IdSize;
1126 |     }
1127 | }
1128 | 
1129 | void global_declaration() {
1130 |     // int [*]id [; | (...) {...}]
1131 | 
1132 | 
1133 |     int type; // tmp, actual type for variable
1134 |     int i; // tmp
1135 | 
1136 |     basetype = INT;
1137 | 
1138 |     // parse enum, this should be treated alone.
1139 |     if (token == Enum) {
1140 |         // enum [id] { a = 10, b = 20, ... }
1141 |         match(Enum);
1142 |         if (token != '{') {
1143 |             match(Id); // skip the [id] part
1144 |         }
1145 |         if (token == '{') {
1146 |             // parse the assign part
1147 |             match('{');
1148 |             enum_declaration();
1149 |             match('}');
1150 |         }
1151 | 
1152 |         match(';');
1153 |         return;
1154 |     }
1155 | 
1156 |     // parse type information
1157 |     if (token == Int) {
1158 |         match(Int);
1159 |     }
1160 |     else if (token == Char) {
1161 |         match(Char);
1162 |         basetype = CHAR;
1163 |     }
1164 | 
1165 |     // parse the comma seperated variable declaration.
1166 |     while (token != ';' && token != '}') {
1167 |         type = basetype;
1168 |         // parse pointer type, note that there may exist `int ****x;`
1169 |         while (token == Mul) {
1170 |             match(Mul);
1171 |             type = type + PTR;
1172 |         }
1173 | 
1174 |         if (token != Id) {
1175 |             // invalid declaration
1176 |             printf("%d: bad global declaration\n", line);
1177 |             exit(-1);
1178 |         }
1179 |         if (current_id[Class]) {
1180 |             // identifier exists
1181 |             printf("%d: duplicate global declaration\n", line);
1182 |             exit(-1);
1183 |         }
1184 |         match(Id);
1185 |         current_id[Type] = type;
1186 | 
1187 |         if (token == '(') {
1188 |             current_id[Class] = Fun;
1189 |             current_id[Value] = (int)(text + 1); // the memory address of function
1190 |             function_declaration();
1191 |         } else {
1192 |             // variable declaration
1193 |             current_id[Class] = Glo; // global variable
1194 |             current_id[Value] = (int)data; // assign memory address
1195 |             data = data + sizeof(int);
1196 |         }
1197 | 
1198 |         if (token == ',') {
1199 |             match(',');
1200 |         }
1201 |     }
1202 |     next();
1203 | }
1204 | 
1205 | void program() {
1206 |     // get next token
1207 |     next();
1208 |     while (token > 0) {
1209 |         global_declaration();
1210 |     }
1211 | }
1212 | 
1213 | int eval() {
1214 |     int op, *tmp;
1215 |     cycle = 0;
1216 |     while (1) {
1217 |         cycle ++;
1218 |         op = *pc++; // get next operation code
1219 | 
1220 |         // print debug info
1221 |         if (debug) {
1222 |             printf("%d> %.4s", cycle,
1223 |                    & "LEA ,IMM ,JMP ,CALL,JZ  ,JNZ ,ENT ,ADJ ,LEV ,LI  ,LC  ,SI  ,SC  ,PUSH,"
1224 |                    "OR  ,XOR ,AND ,EQ  ,NE  ,LT  ,GT  ,LE  ,GE  ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
1225 |                    "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT"[op * 5]);
1226 |             if (op <= ADJ)
1227 |                 printf(" %d\n", *pc);
1228 |             else
1229 |                 printf("\n");
1230 |         }
1231 |         if (op == IMM)       {ax = *pc++;}                                     // load immediate value to ax
1232 |         else if (op == LC)   {ax = *(char *)ax;}                               // load character to ax, address in ax
1233 |         else if (op == LI)   {ax = *(int *)ax;}                                // load integer to ax, address in ax
1234 |         else if (op == SC)   {ax = *(char *)*sp++ = ax;}                       // save character to address, value in ax, address on stack
1235 |         else if (op == SI)   {*(int *)*sp++ = ax;}                             // save integer to address, value in ax, address on stack
1236 |         else if (op == PUSH) {*--sp = ax;}                                     // push the value of ax onto the stack
1237 |         else if (op == JMP)  {pc = (int *)*pc;}                                // jump to the address
1238 |         else if (op == JZ)   {pc = ax ? pc + 1 : (int *)*pc;}                   // jump if ax is zero
1239 |         else if (op == JNZ)  {pc = ax ? (int *)*pc : pc + 1;}                   // jump if ax is zero
1240 |         else if (op == CALL) {*--sp = (int)(pc+1); pc = (int *)*pc;}           // call subroutine
1241 |         //else if (op == RET)  {pc = (int *)*sp++;}                              // return from subroutine;
1242 |         else if (op == ENT)  {*--sp = (int)bp; bp = sp; sp = sp - *pc++;}      // make new stack frame
1243 |         else if (op == ADJ)  {sp = sp + *pc++;}                                // add esp, <size>
1244 |         else if (op == LEV)  {sp = bp; bp = (int *)*sp++; pc = (int *)*sp++;}  // restore call frame and PC
1245 |         else if (op == LEA)  {ax = (int)(bp + *pc++);}                         // load address for arguments.
1246 | 
1247 |         else if (op == OR)  ax = *sp++ | ax;
1248 |         else if (op == XOR) ax = *sp++ ^ ax;
1249 |         else if (op == AND) ax = *sp++ & ax;
1250 |         else if (op == EQ)  ax = *sp++ == ax;
1251 |         else if (op == NE)  ax = *sp++ != ax;
1252 |         else if (op == LT)  ax = *sp++ < ax;
1253 |         else if (op == LE)  ax = *sp++ <= ax;
1254 |         else if (op == GT)  ax = *sp++ >  ax;
1255 |         else if (op == GE)  ax = *sp++ >= ax;
1256 |         else if (op == SHL) ax = *sp++ << ax;
1257 |         else if (op == SHR) ax = *sp++ >> ax;
1258 |         else if (op == ADD) ax = *sp++ + ax;
1259 |         else if (op == SUB) ax = *sp++ - ax;
1260 |         else if (op == MUL) ax = *sp++ * ax;
1261 |         else if (op == DIV) ax = *sp++ / ax;
1262 |         else if (op == MOD) ax = *sp++ % ax;
1263 | 
1264 |         else if (op == EXIT) { printf("exit(%d)", *sp); return *sp;}
1265 |         else if (op == OPEN) { ax = open((char *)sp[1], sp[0]); }
1266 |         else if (op == CLOS) { ax = close(*sp);}
1267 |         else if (op == READ) { ax = read(sp[2], (char *)sp[1], *sp); }
1268 |         else if (op == PRTF) { tmp = sp + pc[1]; ax = printf((char *)tmp[-1], tmp[-2], tmp[-3], tmp[-4], tmp[-5], tmp[-6]); }
1269 |         else if (op == MALC) { ax = (int)malloc(*sp);}
1270 |         else if (op == MSET) { ax = (int)memset((char *)sp[2], sp[1], *sp);}
1271 |         else if (op == MCMP) { ax = memcmp((char *)sp[2], (char *)sp[1], *sp);}
1272 |         else {
1273 |             printf("unknown instruction:%d\n", op);
1274 |             return -1;
1275 |         }
1276 |     }
1277 | }
1278 | 
1279 | int main(int argc, char **argv)
1280 | {
1281 |     int i, fd;
1282 |     int *tmp;
1283 | 
1284 |     argc--;
1285 |     argv++;
1286 | 
1287 |     // parse arguments
1288 |     if (argc > 0 && **argv == '-' && (*argv)[1] == 's') {
1289 |         assembly = 1;
1290 |         --argc;
1291 |         ++argv;
1292 |     }
1293 |     if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') {
1294 |         debug = 1;
1295 |         --argc;
1296 |         ++argv;
1297 |     }
1298 |     if (argc < 1) {
1299 |         printf("usage: xc [-s] [-d] file ...\n");
1300 |         return -1;
1301 |     }
1302 | 
1303 |     if ((fd = open(*argv, 0)) < 0) {
1304 |         printf("could not open(%s)\n", *argv);
1305 |         return -1;
1306 |     }
1307 | 
1308 |     poolsize = 256 * 1024; // arbitrary size
1309 |     line = 1;
1310 | 
1311 |     // allocate memory
1312 |     if (!(text = malloc(poolsize))) {
1313 |         printf("could not malloc(%d) for text area\n", poolsize);
1314 |         return -1;
1315 |     }
1316 |     if (!(data = malloc(poolsize))) {
1317 |         printf("could not malloc(%d) for data area\n", poolsize);
1318 |         return -1;
1319 |     }
1320 |     if (!(stack = malloc(poolsize))) {
1321 |         printf("could not malloc(%d) for stack area\n", poolsize);
1322 |         return -1;
1323 |     }
1324 |     if (!(symbols = malloc(poolsize))) {
1325 |         printf("could not malloc(%d) for symbol table\n", poolsize);
1326 |         return -1;
1327 |     }
1328 | 
1329 |     memset(text, 0, poolsize);
1330 |     memset(data, 0, poolsize);
1331 |     memset(stack, 0, poolsize);
1332 |     memset(symbols, 0, poolsize);
1333 | 
1334 |     old_text = text;
1335 | 
1336 |     src = "char else enum if int return sizeof while "
1337 |           "open read close printf malloc memset memcmp exit void main";
1338 | 
1339 |      // add keywords to symbol table
1340 |     i = Char;
1341 |     while (i <= While) {
1342 |         next();
1343 |         current_id[Token] = i++;
1344 |     }
1345 | 
1346 |     // add library to symbol table
1347 |     i = OPEN;
1348 |     while (i <= EXIT) {
1349 |         next();
1350 |         current_id[Class] = Sys;
1351 |         current_id[Type] = INT;
1352 |         current_id[Value] = i++;
1353 |     }
1354 | 
1355 |     next(); current_id[Token] = Char; // handle void type
1356 |     next(); idmain = current_id; // keep track of main
1357 | 
1358 |     if (!(src = old_src = malloc(poolsize))) {
1359 |         printf("could not malloc(%d) for source area\n", poolsize);
1360 |         return -1;
1361 |     }
1362 |     // read the source file
1363 |     if ((i = read(fd, src, poolsize-1)) <= 0) {
1364 |         printf("read() returned %d\n", i);
1365 |         return -1;
1366 |     }
1367 |     src[i] = 0; // add EOF character
1368 |     close(fd);
1369 | 
1370 |     program();
1371 | 
1372 |     if (!(pc = (int *)idmain[Value])) {
1373 |         printf("main() not defined\n");
1374 |         return -1;
1375 |     }
1376 | 
1377 |     // dump_text();
1378 |     if (assembly) {
1379 |         // only for compile
1380 |         return 0;
1381 |     }
1382 | 
1383 |     // setup stack
1384 |     sp = (int *)((int)stack + poolsize);
1385 |     *--sp = EXIT; // call exit if main returns
1386 |     *--sp = PUSH; tmp = sp;
1387 |     *--sp = argc;
1388 |     *--sp = (int)argv;
1389 |     *--sp = (int)tmp;
1390 | 
1391 |     return eval();
1392 | }
1393 | 


--------------------------------------------------------------------------------
/xc-tutor.c:
--------------------------------------------------------------------------------
   1 | // this file is used for tutorial to build the compiler step by step
   2 | 
   3 | #include <sys/types.h>
   4 | #include <sys/stat.h>
   5 | #include <fcntl.h>
   6 | #include <unistd.h>
   7 | 
   8 | 
   9 | #include <stdio.h>
  10 | #include <stdlib.h>
  11 | #include <memory.h>
  12 | #include <string.h>
  13 | 
  14 |  #define SUPPORT_DEBUG
  15 | 
  16 | #ifdef SUPPORT_DEBUG
  17 | int print_spec_text(int *cur_text);
  18 | void print_text();
  19 | void print_data();
  20 | void print_symbol_table();
  21 | void print_source_code();
  22 | void debug_usage();
  23 | #define MAX_BREAK_POINT 10
  24 | unsigned int break_points[MAX_BREAK_POINT];
  25 | int last_bp;
  26 | int meet_break_point(int *pc);
  27 | #endif
  28 | 
  29 | int token;                    // current token
  30 | int token_val;                // value of current token (mainly for number)
  31 | char *src_begin;
  32 | char *src, *old_src;          // pointer to source code string;
  33 | int poolsize;                 // default size of text/data/stack
  34 | int line;                     // line number
  35 | int *begin_text;
  36 | int *begin_stack;
  37 | signed char *begin_data;
  38 | int *text,                    // text segment
  39 |     *old_text,                // for dump text segment
  40 |     *stack;                   // stack
  41 | signed char *data;                   // data segment
  42 | int *pc, *bp, *sp, ax, cycle; // virtual machine registers
  43 | int *current_id,              // current parsed ID
  44 |     *symbols;                 // symbol table
  45 | int *idmain;                  // the `main` function
  46 | 
  47 | // instructions
  48 | enum { LEA ,IMM ,JMP ,CALL,JZ  ,JNZ ,ENT ,ADJ ,LEV ,LI  ,LC  ,SI  ,SC  ,PUSH,
  49 |        OR  ,XOR ,AND ,EQ  ,NE  ,LT  ,GT  ,LE  ,GE  ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,
  50 |        OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT };
  51 | 
  52 | #ifdef SUPPORT_DEBUG
  53 | const char *inst_str[] = 
  54 | { 
  55 |   "LEA","IMM","JMP","CALL","JZ ","JNZ","ENT","ADJ","LEV","LI ","LC ","SI ","SC ","PUSH"," OR ","XOR","AND","EQ ","NE ","LT ","GT ","LE ","GE ","SHL","SHR","ADD","SUB","MUL","DIV","MOD"," OPEN","READ","CLOS","PRTF","MALC","MSET","MCMP","EXIT"
  56 | };
  57 | 
  58 | #define INST_LEN (sizeof(inst_str)/sizeof(char*))
  59 | 
  60 | int inst_has_argu(int inst)
  61 | {
  62 |   if ( (LEA <= inst) && (inst <= ADJ))
  63 |     return 1;
  64 |   else
  65 |     return 0;
  66 | }
  67 | 
  68 | const char* inst_2_str(int inst)
  69 | {
  70 |   if ( (0 <= inst) && (inst < INST_LEN))
  71 |   {
  72 |     return inst_str[inst];
  73 |   }
  74 |   else
  75 |     return 0;
  76 | }
  77 | #endif
  78 | 
  79 | // tokens and classes (operators last and in precedence order)
  80 | enum {
  81 |   Num = 128, Fun, Sys, Glo, Loc, Id,
  82 |   Char, Else, Enum, If, Int, Return, Sizeof, While,
  83 |   Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
  84 | };
  85 | 
  86 | #ifdef SUPPORT_DEBUG
  87 | const char *class_str_array[] = 
  88 | {
  89 |   "Num", "Fun", "Sys", "Glo", "Loc", "Id", "Char", "Else", "Enum", "If", "Int", "Return", "Sizeof", "While", "Assign", "Cond", "Lor", "Lan", "Or", "Xor", "And", "Eq", "Ne", "Lt", "Gt", "Le", "Ge", "Shl", "Shr", "Add", "Sub", "Mul", "Div", "Mod", "Inc", "Dec", "Brak"
  90 | };
  91 | 
  92 | const char *class_str(int cls)
  93 | {
  94 |   int class_index = cls-Num;
  95 | 
  96 |   if (0 <= class_index && class_index <= Brak)
  97 |     return class_str_array[class_index];
  98 |   else
  99 |     return "no such class";
 100 | }
 101 | #endif
 102 | 
 103 | 
 104 | // fields of identifier
 105 | enum {Token, Hash, Name, Type, Class, Value, BType, BClass, BValue, IdSize};
 106 | 
 107 | // types of variable/function
 108 | enum { CHAR, INT, PTR };
 109 | 
 110 | #ifdef SUPPORT_DEBUG
 111 | const char *type_str_array[] = {"CHAR", "INT", "PTR"};
 112 | const char * type_str(int type)
 113 | {
 114 |   if (0 <= type && type <= PTR)
 115 |     return type_str_array[type];
 116 |   else
 117 |     return "no such type";
 118 | }
 119 | #endif
 120 | 
 121 | int basetype;    // the type of a declaration, make it global for convenience
 122 | int expr_type;   // the type of an expression
 123 | 
 124 | // function frame
 125 | //
 126 | // 0: arg 1
 127 | // 1: arg 2
 128 | // 2: arg 3
 129 | // 3: return address
 130 | // 4: old bp pointer  <- index_of_bp
 131 | // 5: local var 1
 132 | // 6: local var 2
 133 | int index_of_bp; // index of bp pointer on stack
 134 | 
 135 | void next() {
 136 |     char *last_pos;
 137 |     int hash;
 138 | 
 139 |     while (token = *src) {
 140 |         ++src;
 141 | 
 142 |         // parse token here
 143 |         if (token == '\n') {
 144 |             ++line;
 145 |         }
 146 |         else if (token == '#') {
 147 |             // skip macro, because we will not support it
 148 |             while (*src != 0 && *src != '\n') {
 149 |                 src++;
 150 |             }
 151 |         }
 152 |         else if ((token >= 'a' && token <= 'z') || (token >= 'A' && token <= 'Z') || (token == '_')) {
 153 | 
 154 |             // parse identifier
 155 |             last_pos = src - 1;
 156 |             hash = token;
 157 | 
 158 |             while ((*src >= 'a' && *src <= 'z') || (*src >= 'A' && *src <= 'Z') || (*src >= '0' && *src <= '9') || (*src == '_')) {
 159 |                 hash = hash * 147 + *src;
 160 |                 src++;
 161 |             }
 162 | 
 163 |             // look for existing identifier, linear search
 164 |             current_id = symbols;
 165 |             while (current_id[Token]) {
 166 |                 if (current_id[Hash] == hash && !memcmp((char *)current_id[Name], last_pos, src - last_pos)) {
 167 |                     //found one, return
 168 |                     token = current_id[Token];
 169 |                     return;
 170 |                 }
 171 |                 current_id = current_id + IdSize;
 172 |             }
 173 | 
 174 | 
 175 |             // store new ID
 176 |             current_id[Name] = (int)last_pos;
 177 |             current_id[Hash] = hash;
 178 |             token = current_id[Token] = Id;
 179 |             return;
 180 |         }
 181 |         else if (token >= '0' && token <= '9') {
 182 |             // parse number, three kinds: dec(123) hex(0x123) oct(017)
 183 |             token_val = token - '0';
 184 |             if (token_val > 0) {
 185 |                 // dec, starts with [1-9]
 186 |                 while (*src >= '0' && *src <= '9') {
 187 |                     token_val = token_val*10 + *src++ - '0';
 188 |                 }
 189 |             } else {
 190 |                 // starts with 0
 191 |                 if (*src == 'x' || *src == 'X') {
 192 |                     //hex
 193 |                     token = *++src;
 194 |                     while ((token >= '0' && token <= '9') || (token >= 'a' && token <= 'f') || (token >= 'A' && token <= 'F')) {
 195 |                         token_val = token_val * 16 + (token & 15) + (token >= 'A' ? 9 : 0);
 196 |                         token = *++src;
 197 |                     }
 198 |                 } else {
 199 |                     // oct
 200 |                     while (*src >= '0' && *src <= '7') {
 201 |                         token_val = token_val*8 + *src++ - '0';
 202 |                     }
 203 |                 }
 204 |             }
 205 | 
 206 |             token = Num;
 207 |             return;
 208 |         }
 209 |         else if (token == '"' || token == '\'') {
 210 |             // parse string literal, currently, the only supported escape
 211 |             // character is '\n', store the string literal into data.
 212 |             last_pos = data;
 213 |             while (*src != 0 && *src != token) {
 214 |                 token_val = *src++;
 215 |                 if (token_val == '\\') {
 216 |                     // escape character
 217 |                     token_val = *src++;
 218 |                     if (token_val == 'n') {
 219 |                         token_val = '\n';
 220 |                     }
 221 |                 }
 222 | 
 223 |                 if (token == '"') {
 224 |                     *data++ = token_val;
 225 |                 }
 226 |             }
 227 | 
 228 |             src++;
 229 |             // if it is a single character, return Num token
 230 |             if (token == '"') {
 231 |                 token_val = (int)last_pos;
 232 |             } else {
 233 |                 token = Num;
 234 |             }
 235 | 
 236 |             return;
 237 |         }
 238 |         else if (token == '/') {
 239 |             if (*src == '/') {
 240 |                 // skip comments
 241 |                 while (*src != 0 && *src != '\n') {
 242 |                     ++src;
 243 |                 }
 244 |             } else {
 245 |                 // divide operator
 246 |                 token = Div;
 247 |                 return;
 248 |             }
 249 |         }
 250 |         else if (token == '=') {
 251 |             // parse '==' and '='
 252 |             if (*src == '=') {
 253 |                 src ++;
 254 |                 token = Eq;
 255 |             } else {
 256 |                 token = Assign;
 257 |             }
 258 |             return;
 259 |         }
 260 |         else if (token == '+') {
 261 |             // parse '+' and '++'
 262 |             if (*src == '+') {
 263 |                 src ++;
 264 |                 token = Inc;
 265 |             } else {
 266 |                 token = Add;
 267 |             }
 268 |             return;
 269 |         }
 270 |         else if (token == '-') {
 271 |             // parse '-' and '--'
 272 |             if (*src == '-') {
 273 |                 src ++;
 274 |                 token = Dec;
 275 |             } else {
 276 |                 token = Sub;
 277 |             }
 278 |             return;
 279 |         }
 280 |         else if (token == '!') {
 281 |             // parse '!='
 282 |             if (*src == '=') {
 283 |                 src++;
 284 |                 token = Ne;
 285 |             }
 286 |             return;
 287 |         }
 288 |         else if (token == '<') {
 289 |             // parse '<=', '<<' or '<'
 290 |             if (*src == '=') {
 291 |                 src ++;
 292 |                 token = Le;
 293 |             } else if (*src == '<') {
 294 |                 src ++;
 295 |                 token = Shl;
 296 |             } else {
 297 |                 token = Lt;
 298 |             }
 299 |             return;
 300 |         }
 301 |         else if (token == '>') {
 302 |             // parse '>=', '>>' or '>'
 303 |             if (*src == '=') {
 304 |                 src ++;
 305 |                 token = Ge;
 306 |             } else if (*src == '>') {
 307 |                 src ++;
 308 |                 token = Shr;
 309 |             } else {
 310 |                 token = Gt;
 311 |             }
 312 |             return;
 313 |         }
 314 |         else if (token == '|') {
 315 |             // parse '|' or '||'
 316 |             if (*src == '|') {
 317 |                 src ++;
 318 |                 token = Lor;
 319 |             } else {
 320 |                 token = Or;
 321 |             }
 322 |             return;
 323 |         }
 324 |         else if (token == '&') {
 325 |             // parse '&' and '&&'
 326 |             if (*src == '&') {
 327 |                 src ++;
 328 |                 token = Lan;
 329 |             } else {
 330 |                 token = And;
 331 |             }
 332 |             return;
 333 |         }
 334 |         else if (token == '^') {
 335 |             token = Xor;
 336 |             return;
 337 |         }
 338 |         else if (token == '%') {
 339 |             token = Mod;
 340 |             return;
 341 |         }
 342 |         else if (token == '*') {
 343 |             token = Mul;
 344 |             return;
 345 |         }
 346 |         else if (token == '[') {
 347 |             token = Brak;
 348 |             return;
 349 |         }
 350 |         else if (token == '?') {
 351 |             token = Cond;
 352 |             return;
 353 |         }
 354 |         else if (token == '~' || token == ';' || token == '{' || token == '}' || token == '(' || token == ')' || token == ']' || token == ',' || token == ':') {
 355 |             // directly return the character as token;
 356 |             return;
 357 |         }
 358 |     }
 359 |     return;
 360 | }
 361 | 
 362 | void match(int tk) {
 363 |     if (token == tk) {
 364 |         next();
 365 |     } else {
 366 |         printf("%d: expected token: %d\n", line, tk);
 367 |         exit(-1);
 368 |     }
 369 | }
 370 | 
 371 | void expression(int level) {
 372 |     // expressions have various format.
 373 |     // but majorly can be divided into two parts: unit and operator
 374 |     // for example `(char) *a[10] = (int *) func(b > 0 ? 10 : 20);
 375 |     // `a[10]` is an unit while `*` is an operator.
 376 |     // `func(...)` in total is an unit.
 377 |     // so we should first parse those unit and unary operators
 378 |     // and then the binary ones
 379 |     //
 380 |     // also the expression can be in the following types:
 381 |     //
 382 |     // 1. unit_unary ::= unit | unit unary_op | unary_op unit
 383 |     // 2. expr ::= unit_unary (bin_op unit_unary ...)
 384 | 
 385 |     // unit_unary()
 386 |     int *id;
 387 |     int tmp;
 388 |     int *addr;
 389 |     {
 390 |         if (!token) {
 391 |             printf("%d: unexpected token EOF of expression\n", line);
 392 |             exit(-1);
 393 |         }
 394 |         if (token == Num) {
 395 |             match(Num);
 396 | 
 397 |             // emit code
 398 |             *++text = IMM;
 399 |             *++text = token_val;
 400 |             expr_type = INT;
 401 |         }
 402 |         else if (token == '"') {
 403 |             // continous string "abc" "abc"
 404 | 
 405 | 
 406 |             // emit code
 407 |             *++text = IMM;
 408 |             *++text = token_val;
 409 | 
 410 |             match('"');
 411 |             // store the rest strings
 412 |             while (token == '"') {
 413 |                 match('"');
 414 |             }
 415 | 
 416 |             // append the end of string character '\0', all the data are default
 417 |             // to 0, so just move data one position forward.
 418 |             data = (char *)(((int)data + sizeof(int)) & (-sizeof(int)));
 419 |             // *data = 0;
 420 |             expr_type = PTR;
 421 |         }
 422 |         else if (token == Sizeof) {
 423 |             // sizeof is actually an unary operator
 424 |             // now only `sizeof(int)`, `sizeof(char)` and `sizeof(*...)` are
 425 |             // supported.
 426 |             match(Sizeof);
 427 |             match('(');
 428 |             expr_type = INT;
 429 | 
 430 |             if (token == Int) {
 431 |                 match(Int);
 432 |             } else if (token == Char) {
 433 |                 match(Char);
 434 |                 expr_type = CHAR;
 435 |             }
 436 | 
 437 |             while (token == Mul) {
 438 |                 match(Mul);
 439 |                 expr_type = expr_type + PTR;
 440 |             }
 441 | 
 442 |             match(')');
 443 | 
 444 |             // emit code
 445 |             *++text = IMM;
 446 |             *++text = (expr_type == CHAR) ? sizeof(char) : sizeof(int);
 447 | 
 448 |             expr_type = INT;
 449 |         }
 450 |         else if (token == Id) {
 451 |             // there are several type when occurs to Id
 452 |             // but this is unit, so it can only be
 453 |             // 1. function call
 454 |             // 2. Enum variable
 455 |             // 3. global/local variable
 456 |             match(Id);
 457 | 
 458 |             id = current_id;
 459 | 
 460 |             if (token == '(') {
 461 |                 // function call
 462 |                 match('(');
 463 | 
 464 |                 // pass in arguments
 465 |                 tmp = 0; // number of arguments
 466 |                 while (token != ')') {
 467 |                     expression(Assign);
 468 |                     *++text = PUSH;
 469 |                     tmp ++;
 470 | 
 471 |                     if (token == ',') {
 472 |                         match(',');
 473 |                     }
 474 | 
 475 |                 }
 476 |                 match(')');
 477 | 
 478 |                 // emit code
 479 |                 if (id[Class] == Sys) {
 480 |                     // system functions
 481 |                     *++text = id[Value];
 482 |                 }
 483 |                 else if (id[Class] == Fun) {
 484 |                     // function call
 485 |                     *++text = CALL;
 486 |                     *++text = id[Value];
 487 |                 }
 488 |                 else {
 489 |                     printf("%d: bad function call\n", line);
 490 |                     exit(-1);
 491 |                 }
 492 | 
 493 |                 // clean the stack for arguments
 494 |                 if (tmp > 0) {
 495 |                     *++text = ADJ;
 496 |                     *++text = tmp;
 497 |                 }
 498 |                 expr_type = id[Type];
 499 |             }
 500 |             else if (id[Class] == Num) {
 501 |                 // enum variable
 502 |                 *++text = IMM;
 503 |                 *++text = id[Value];
 504 |                 expr_type = INT;
 505 |             }
 506 |             else {
 507 |                 // variable
 508 |                 if (id[Class] == Loc) {
 509 |                     *++text = LEA;
 510 |                     *++text = index_of_bp - id[Value];
 511 |                 }
 512 |                 else if (id[Class] == Glo) {
 513 |                     *++text = IMM;
 514 |                     *++text = id[Value];
 515 |                 }
 516 |                 else {
 517 |                     printf("%d: undefined variable\n", line);
 518 |                     exit(-1);
 519 |                 }
 520 | 
 521 |                 // emit code, default behaviour is to load the value of the
 522 |                 // address which is stored in `ax`
 523 |                 expr_type = id[Type];
 524 |                 *++text = (expr_type == Char) ? LC : LI;
 525 |             }
 526 |         }
 527 |         else if (token == '(') {
 528 |             // cast or parenthesis
 529 |             match('(');
 530 |             if (token == Int || token == Char) {
 531 |                 tmp = (token == Char) ? CHAR : INT; // cast type
 532 |                 match(token);
 533 |                 while (token == Mul) {
 534 |                     match(Mul);
 535 |                     tmp = tmp + PTR;
 536 |                 }
 537 | 
 538 |                 match(')');
 539 | 
 540 |                 expression(Inc); // cast has precedence as Inc(++)
 541 | 
 542 |                 expr_type  = tmp;
 543 |             } else {
 544 |                 // normal parenthesis
 545 |                 expression(Assign);
 546 |                 match(')');
 547 |             }
 548 |         }
 549 |         else if (token == Mul) {
 550 |             // dereference *<addr>
 551 |             match(Mul);
 552 |             expression(Inc); // dereference has the same precedence as Inc(++)
 553 | 
 554 |             if (expr_type >= PTR) {
 555 |                 expr_type = expr_type - PTR;
 556 |             } else {
 557 |                 printf("%d: bad dereference\n", line);
 558 |                 exit(-1);
 559 |             }
 560 | 
 561 |             *++text = (expr_type == CHAR) ? LC : LI;
 562 |         }
 563 |         else if (token == And) {
 564 |             // get the address of
 565 |             match(And);
 566 |             expression(Inc); // get the address of
 567 |             if (*text == LC || *text == LI) {
 568 |                 text --;
 569 |             } else {
 570 |                 printf("%d: bad address of\n", line);
 571 |                 exit(-1);
 572 |             }
 573 | 
 574 |             expr_type = expr_type + PTR;
 575 |         }
 576 |         else if (token == '!') {
 577 |             // not
 578 |             match('!');
 579 |             expression(Inc);
 580 | 
 581 |             // emit code, use <expr> == 0
 582 |             *++text = PUSH;
 583 |             *++text = IMM;
 584 |             *++text = 0;
 585 |             *++text = EQ;
 586 | 
 587 |             expr_type = INT;
 588 |         }
 589 |         else if (token == '~') {
 590 |             // bitwise not
 591 |             match('~');
 592 |             expression(Inc);
 593 | 
 594 |             // emit code, use <expr> XOR -1
 595 |             *++text = PUSH;
 596 |             *++text = IMM;
 597 |             *++text = -1;
 598 |             *++text = XOR;
 599 | 
 600 |             expr_type = INT;
 601 |         }
 602 |         else if (token == Add) {
 603 |             // +var, do nothing
 604 |             match(Add);
 605 |             expression(Inc);
 606 | 
 607 |             expr_type = INT;
 608 |         }
 609 |         else if (token == Sub) {
 610 |             // -var
 611 |             match(Sub);
 612 | 
 613 |             if (token == Num) {
 614 |                 *++text = IMM;
 615 |                 *++text = -token_val;
 616 |                 match(Num);
 617 |             } else {
 618 | 
 619 |                 *++text = IMM;
 620 |                 *++text = -1;
 621 |                 *++text = PUSH;
 622 |                 expression(Inc);
 623 |                 *++text = MUL;
 624 |             }
 625 | 
 626 |             expr_type = INT;
 627 |         }
 628 |         else if (token == Inc || token == Dec) {
 629 |             tmp = token;
 630 |             match(token);
 631 |             expression(Inc);
 632 |             if (*text == LC) {
 633 |                 *text = PUSH;  // to duplicate the address
 634 |                 *++text = LC;
 635 |             } else if (*text == LI) {
 636 |                 *text = PUSH;
 637 |                 *++text = LI;
 638 |             } else {
 639 |                 printf("%d: bad lvalue of pre-increment\n", line);
 640 |                 exit(-1);
 641 |             }
 642 |             *++text = PUSH;
 643 |             *++text = IMM;
 644 |             *++text = (expr_type > PTR) ? sizeof(int) : sizeof(char);
 645 |             *++text = (tmp == Inc) ? ADD : SUB;
 646 |             *++text = (expr_type == CHAR) ? SC : SI;
 647 |         }
 648 |         else {
 649 |             printf("%d: bad expression\n", line);
 650 |             exit(-1);
 651 |         }
 652 |     }
 653 | 
 654 |     // binary operator and postfix operators.
 655 |     {
 656 |         while (token >= level) {
 657 |             // handle according to current operator's precedence
 658 |             tmp = expr_type;
 659 |             if (token == Assign) {
 660 |                 // var = expr;
 661 |                 match(Assign);
 662 |                 if (*text == LC || *text == LI) {
 663 |                     *text = PUSH; // save the lvalue's pointer
 664 |                 } else {
 665 |                     printf("%d: bad lvalue in assignment\n", line);
 666 |                     exit(-1);
 667 |                 }
 668 |                 expression(Assign);
 669 | 
 670 |                 expr_type = tmp;
 671 |                 *++text = (expr_type == CHAR) ? SC : SI;
 672 |             }
 673 |             else if (token == Cond) {
 674 |                 // expr ? a : b;
 675 |                 match(Cond);
 676 |                 *++text = JZ;
 677 |                 addr = ++text;
 678 |                 expression(Assign);
 679 |                 if (token == ':') {
 680 |                     match(':');
 681 |                 } else {
 682 |                     printf("%d: missing colon in conditional\n", line);
 683 |                     exit(-1);
 684 |                 }
 685 |                 *addr = (int)(text + 3);
 686 |                 *++text = JMP;
 687 |                 addr = ++text;
 688 |                 expression(Cond);
 689 |                 *addr = (int)(text + 1);
 690 |             }
 691 |             else if (token == Lor) {
 692 |                 // logic or
 693 |                 match(Lor);
 694 |                 *++text = JNZ;
 695 |                 addr = ++text;
 696 |                 expression(Lan);
 697 |                 *addr = (int)(text + 1);
 698 |                 expr_type = INT;
 699 |             }
 700 |             else if (token == Lan) {
 701 |                 // logic and
 702 |                 match(Lan);
 703 |                 *++text = JZ;
 704 |                 addr = ++text;
 705 |                 expression(Or);
 706 |                 *addr = (int)(text + 1);
 707 |                 expr_type = INT;
 708 |             }
 709 |             else if (token == Or) {
 710 |                 // bitwise or
 711 |                 match(Or);
 712 |                 *++text = PUSH;
 713 |                 expression(Xor);
 714 |                 *++text = OR;
 715 |                 expr_type = INT;
 716 |             }
 717 |             else if (token == Xor) {
 718 |                 // bitwise xor
 719 |                 match(Xor);
 720 |                 *++text = PUSH;
 721 |                 expression(And);
 722 |                 *++text = XOR;
 723 |                 expr_type = INT;
 724 |             }
 725 |             else if (token == And) {
 726 |                 // bitwise and
 727 |                 match(And);
 728 |                 *++text = PUSH;
 729 |                 expression(Eq);
 730 |                 *++text = AND;
 731 |                 expr_type = INT;
 732 |             }
 733 |             else if (token == Eq) {
 734 |                 // equal ==
 735 |                 match(Eq);
 736 |                 *++text = PUSH;
 737 |                 expression(Ne);
 738 |                 *++text = EQ;
 739 |                 expr_type = INT;
 740 |             }
 741 |             else if (token == Ne) {
 742 |                 // not equal !=
 743 |                 match(Ne);
 744 |                 *++text = PUSH;
 745 |                 expression(Lt);
 746 |                 *++text = NE;
 747 |                 expr_type = INT;
 748 |             }
 749 |             else if (token == Lt) {
 750 |                 // less than
 751 |                 match(Lt);
 752 |                 *++text = PUSH;
 753 |                 expression(Shl);
 754 |                 *++text = LT;
 755 |                 expr_type = INT;
 756 |             }
 757 |             else if (token == Gt) {
 758 |                 // greater than
 759 |                 match(Gt);
 760 |                 *++text = PUSH;
 761 |                 expression(Shl);
 762 |                 *++text = GT;
 763 |                 expr_type = INT;
 764 |             }
 765 |             else if (token == Le) {
 766 |                 // less than or equal to
 767 |                 match(Le);
 768 |                 *++text = PUSH;
 769 |                 expression(Shl);
 770 |                 *++text = LE;
 771 |                 expr_type = INT;
 772 |             }
 773 |             else if (token == Ge) {
 774 |                 // greater than or equal to
 775 |                 match(Ge);
 776 |                 *++text = PUSH;
 777 |                 expression(Shl);
 778 |                 *++text = GE;
 779 |                 expr_type = INT;
 780 |             }
 781 |             else if (token == Shl) {
 782 |                 // shift left
 783 |                 match(Shl);
 784 |                 *++text = PUSH;
 785 |                 expression(Add);
 786 |                 *++text = SHL;
 787 |                 expr_type = INT;
 788 |             }
 789 |             else if (token == Shr) {
 790 |                 // shift right
 791 |                 match(Shr);
 792 |                 *++text = PUSH;
 793 |                 expression(Add);
 794 |                 *++text = SHR;
 795 |                 expr_type = INT;
 796 |             }
 797 |             else if (token == Add) {
 798 |                 // add
 799 |                 match(Add);
 800 |                 *++text = PUSH;
 801 |                 expression(Mul);
 802 | 
 803 |                 expr_type = tmp;
 804 |                 if (expr_type > PTR) {
 805 |                     // pointer type, and not `char *`
 806 |                     *++text = PUSH;
 807 |                     *++text = IMM;
 808 |                     *++text = sizeof(int);
 809 |                     *++text = MUL;
 810 |                 }
 811 |                 *++text = ADD;
 812 |             }
 813 |             else if (token == Sub) {
 814 |                 // sub
 815 |                 match(Sub);
 816 |                 *++text = PUSH;
 817 |                 expression(Mul);
 818 |                 if (tmp > PTR && tmp == expr_type) {
 819 |                     // pointer subtraction
 820 |                     *++text = SUB;
 821 |                     *++text = PUSH;
 822 |                     *++text = IMM;
 823 |                     *++text = sizeof(int);
 824 |                     *++text = DIV;
 825 |                     expr_type = INT;
 826 |                 } else if (tmp > PTR) {
 827 |                     // pointer movement
 828 |                     *++text = PUSH;
 829 |                     *++text = IMM;
 830 |                     *++text = sizeof(int);
 831 |                     *++text = MUL;
 832 |                     *++text = SUB;
 833 |                     expr_type = tmp;
 834 |                 } else {
 835 |                     // numeral subtraction
 836 |                     *++text = SUB;
 837 |                     expr_type = tmp;
 838 |                 }
 839 |             }
 840 |             else if (token == Mul) {
 841 |                 // multiply
 842 |                 match(Mul);
 843 |                 *++text = PUSH;
 844 |                 expression(Inc);
 845 |                 *++text = MUL;
 846 |                 expr_type = tmp;
 847 |             }
 848 |             else if (token == Div) {
 849 |                 // divide
 850 |                 match(Div);
 851 |                 *++text = PUSH;
 852 |                 expression(Inc);
 853 |                 *++text = DIV;
 854 |                 expr_type = tmp;
 855 |             }
 856 |             else if (token == Mod) {
 857 |                 // Modulo
 858 |                 match(Mod);
 859 |                 *++text = PUSH;
 860 |                 expression(Inc);
 861 |                 *++text = MOD;
 862 |                 expr_type = tmp;
 863 |             }
 864 |             else if (token == Inc || token == Dec) {
 865 |                 // postfix inc(++) and dec(--)
 866 |                 // we will increase the value to the variable and decrease it
 867 |                 // on `ax` to get its original value.
 868 |                 if (*text == LI) {
 869 |                     *text = PUSH;
 870 |                     *++text = LI;
 871 |                 }
 872 |                 else if (*text == LC) {
 873 |                     *text = PUSH;
 874 |                     *++text = LC;
 875 |                 }
 876 |                 else {
 877 |                     printf("%d: bad value in increment\n", line);
 878 |                     exit(-1);
 879 |                 }
 880 | 
 881 |                 *++text = PUSH;
 882 |                 *++text = IMM;
 883 |                 *++text = (expr_type > PTR) ? sizeof(int) : sizeof(char);
 884 |                 *++text = (token == Inc) ? ADD : SUB;
 885 |                 *++text = (expr_type == CHAR) ? SC : SI;
 886 |                 *++text = PUSH;
 887 |                 *++text = IMM;
 888 |                 *++text = (expr_type > PTR) ? sizeof(int) : sizeof(char);
 889 |                 *++text = (token == Inc) ? SUB : ADD;
 890 |                 match(token);
 891 |             }
 892 |             else if (token == Brak) {
 893 |                 // array access var[xx]
 894 |                 match(Brak);
 895 |                 *++text = PUSH;
 896 |                 expression(Assign);
 897 |                 match(']');
 898 | 
 899 |                 if (tmp > PTR) {
 900 |                     // pointer, `not char *`
 901 |                     *++text = PUSH;
 902 |                     *++text = IMM;
 903 |                     *++text = sizeof(int);
 904 |                     *++text = MUL;
 905 |                 }
 906 |                 else if (tmp < PTR) {
 907 |                     printf("%d: pointer type expected\n", line);
 908 |                     exit(-1);
 909 |                 }
 910 |                 expr_type = tmp - PTR;
 911 |                 *++text = ADD;
 912 |                 *++text = (expr_type == CHAR) ? LC : LI;
 913 |             }
 914 |             else {
 915 |                 printf("%d: compiler error, token = %d\n", line, token);
 916 |                 exit(-1);
 917 |             }
 918 |         }
 919 |     }
 920 | }
 921 | 
 922 | void statement() {
 923 |     // there are 6 kinds of statements here:
 924 |     // 1. if (...) <statement> [else <statement>]
 925 |     // 2. while (...) <statement>
 926 |     // 3. { <statement> }
 927 |     // 4. return xxx;
 928 |     // 5. <empty statement>;
 929 |     // 6. expression; (expression end with semicolon)
 930 | 
 931 |     int *a, *b; // bess for branch control
 932 | 
 933 |     if (token == If) {
 934 |         // if (...) <statement> [else <statement>]
 935 |         //
 936 |         //   if (...)           <cond>
 937 |         //                      JZ a
 938 |         //     <statement>      <statement>
 939 |         //   else:              JMP b
 940 |         // a:                 a:
 941 |         //     <statement>      <statement>
 942 |         // b:                 b:
 943 |         //
 944 |         //
 945 |         match(If);
 946 |         match('(');
 947 |         expression(Assign);  // parse condition
 948 |         match(')');
 949 | 
 950 |         // emit code for if
 951 |         *++text = JZ;
 952 |         b = ++text;
 953 | 
 954 |         statement();         // parse statement
 955 |         if (token == Else) { // parse else
 956 |             match(Else);
 957 | 
 958 |             // emit code for JMP B
 959 |             *b = (int)(text + 3);
 960 |             *++text = JMP;
 961 |             b = ++text;
 962 | 
 963 |             statement();
 964 |         }
 965 | 
 966 |         *b = (int)(text + 1);
 967 |     }
 968 |     else if (token == While) {
 969 |         //
 970 |         // a:                     a:
 971 |         //    while (<cond>)        <cond>
 972 |         //                          JZ b
 973 |         //     <statement>          <statement>
 974 |         //                          JMP a
 975 |         // b:                     b:
 976 |         match(While);
 977 | 
 978 |         a = text + 1;
 979 | 
 980 |         match('(');
 981 |         expression(Assign);
 982 |         match(')');
 983 | 
 984 |         *++text = JZ;
 985 |         b = ++text;
 986 | 
 987 |         statement();
 988 | 
 989 |         *++text = JMP;
 990 |         *++text = (int)a;
 991 |         *b = (int)(text + 1);
 992 |     }
 993 |     else if (token == '{') {
 994 |         // { <statement> ... }
 995 |         match('{');
 996 | 
 997 |         while (token != '}') {
 998 |             statement();
 999 |         }
1000 | 
1001 |         match('}');
1002 |     }
1003 |     else if (token == Return) {
1004 |         // return [expression];
1005 |         match(Return);
1006 | 
1007 |         if (token != ';') {
1008 |             expression(Assign);
1009 |         }
1010 | 
1011 |         match(';');
1012 | 
1013 |         // emit code for return
1014 |         *++text = LEV;
1015 |     }
1016 |     else if (token == ';') {
1017 |         // empty statement
1018 |         match(';');
1019 |     }
1020 |     else {
1021 |         // a = b; or function_call();
1022 |         expression(Assign);
1023 |         match(';');
1024 |     }
1025 | }
1026 | 
1027 | void function_parameter() {
1028 |     int type;
1029 |     int params;
1030 |     params = 0;
1031 |     while (token != ')') {
1032 |         // int name, ...
1033 |         type = INT;
1034 |         if (token == Int) {
1035 |             match(Int);
1036 |         } else if (token == Char) {
1037 |             type = CHAR;
1038 |             match(Char);
1039 |         }
1040 | 
1041 |         // pointer type
1042 |         while (token == Mul) {
1043 |             match(Mul);
1044 |             type = type + PTR;
1045 |         }
1046 | 
1047 |         // parameter name
1048 |         if (token != Id) {
1049 |             printf("%d: bad parameter declaration\n", line);
1050 |             exit(-1);
1051 |         }
1052 |         if (current_id[Class] == Loc) {
1053 |             printf("%d: duplicate parameter declaration\n", line);
1054 |             exit(-1);
1055 |         }
1056 | 
1057 |         match(Id);
1058 |         // store the local variable
1059 |         current_id[BClass] = current_id[Class]; current_id[Class]  = Loc;
1060 |         current_id[BType]  = current_id[Type];  current_id[Type]   = type;
1061 |         current_id[BValue] = current_id[Value]; current_id[Value]  = params++;   // index of current parameter
1062 | 
1063 |         if (token == ',') {
1064 |             match(',');
1065 |         }
1066 |     }
1067 |     index_of_bp = params+1;
1068 | }
1069 | 
1070 | void function_body() {
1071 |     // type func_name (...) {...}
1072 |     //                   -->|   |<--
1073 | 
1074 |     // ... {
1075 |     // 1. local declarations
1076 |     // 2. statements
1077 |     // }
1078 | 
1079 |     int pos_local; // position of local variables on the stack.
1080 |     int type;
1081 |     pos_local = index_of_bp;
1082 | 
1083 |     while (token == Int || token == Char) {
1084 |         // local variable declaration, just like global ones.
1085 |         basetype = (token == Int) ? INT : CHAR;
1086 |         match(token);
1087 | 
1088 |         while (token != ';') {
1089 |             type = basetype;
1090 |             while (token == Mul) {
1091 |                 match(Mul);
1092 |                 type = type + PTR;
1093 |             }
1094 | 
1095 |             if (token != Id) {
1096 |                 // invalid declaration
1097 |                 printf("%d: bad local declaration\n", line);
1098 |                 exit(-1);
1099 |             }
1100 |             if (current_id[Class] == Loc) {
1101 |                 // identifier exists
1102 |                 printf("%d: duplicate local declaration\n", line);
1103 |                 exit(-1);
1104 |             }
1105 |             match(Id);
1106 | 
1107 |             // store the local variable
1108 |             current_id[BClass] = current_id[Class]; current_id[Class]  = Loc;
1109 |             current_id[BType]  = current_id[Type];  current_id[Type]   = type;
1110 |             current_id[BValue] = current_id[Value]; current_id[Value]  = ++pos_local;   // index of current parameter
1111 | 
1112 |             if (token == ',') {
1113 |                 match(',');
1114 |             }
1115 |         }
1116 |         match(';');
1117 |     }
1118 | 
1119 |     // save the stack size for local variables
1120 |     *++text = ENT;
1121 |     *++text = pos_local - index_of_bp;
1122 | 
1123 |     // statements
1124 |     while (token != '}') {
1125 |         statement();
1126 |     }
1127 | 
1128 |     // emit code for leaving the sub function
1129 |     *++text = LEV;
1130 | }
1131 | 
1132 | void function_declaration() {
1133 |     // type func_name (...) {...}
1134 |     //               | this part
1135 | 
1136 |     match('(');
1137 |     function_parameter();
1138 |     match(')');
1139 |     match('{');
1140 |     function_body();
1141 |     //match('}');
1142 | 
1143 |     // unwind local variable declarations for all local variables.
1144 |     current_id = symbols;
1145 |     while (current_id[Token]) {
1146 |         if (current_id[Class] == Loc) {
1147 |             current_id[Class] = current_id[BClass];
1148 |             current_id[Type]  = current_id[BType];
1149 |             current_id[Value] = current_id[BValue];
1150 |         }
1151 |         current_id = current_id + IdSize;
1152 |     }
1153 | }
1154 | 
1155 | void enum_declaration() {
1156 |     // parse enum [id] { a = 1, b = 3, ...}
1157 |     int i;
1158 |     i = 0;
1159 |     while (token != '}') {
1160 |         if (token != Id) {
1161 |             printf("%d: bad enum identifier %d\n", line, token);
1162 |             exit(-1);
1163 |         }
1164 |         next();
1165 |         if (token == Assign) {
1166 |             // like {a=10}
1167 |             next();
1168 |             if (token != Num) {
1169 |                 printf("%d: bad enum initializer\n", line);
1170 |                 exit(-1);
1171 |             }
1172 |             i = token_val;
1173 |             next();
1174 |         }
1175 | 
1176 |         current_id[Class] = Num;
1177 |         current_id[Type] = INT;
1178 |         current_id[Value] = i++;
1179 | 
1180 |         if (token == ',') {
1181 |             next();
1182 |         }
1183 |     }
1184 | }
1185 | 
1186 | void global_declaration() {
1187 |     // global_declaration ::= enum_decl | variable_decl | function_decl
1188 |     //
1189 |     // enum_decl ::= 'enum' [id] '{' id ['=' 'num'] {',' id ['=' 'num'} '}'
1190 |     //
1191 |     // variable_decl ::= type {'*'} id { ',' {'*'} id } ';'
1192 |     //
1193 |     // function_decl ::= type {'*'} id '(' parameter_decl ')' '{' body_decl '}'
1194 | 
1195 | 
1196 |     int type; // tmp, actual type for variable
1197 |     int i; // tmp
1198 | 
1199 |     basetype = INT;
1200 | 
1201 |     // parse enum, this should be treated alone.
1202 |     if (token == Enum) {
1203 |         // enum [id] { a = 10, b = 20, ... }
1204 |         match(Enum);
1205 |         if (token != '{') {
1206 |             match(Id); // skip the [id] part
1207 |         }
1208 |         if (token == '{') {
1209 |             // parse the assign part
1210 |             match('{');
1211 |             enum_declaration();
1212 |             match('}');
1213 |         }
1214 | 
1215 |         match(';');
1216 |         return;
1217 |     }
1218 | 
1219 |     // parse type information
1220 |     if (token == Int) {
1221 |         match(Int);
1222 |     }
1223 |     else if (token == Char) {
1224 |         match(Char);
1225 |         basetype = CHAR;
1226 |     }
1227 | 
1228 |     // parse the comma seperated variable declaration.
1229 |     while (token != ';' && token != '}') {
1230 |         type = basetype;
1231 |         // parse pointer type, note that there may exist `int ****x;`
1232 |         while (token == Mul) {
1233 |             match(Mul);
1234 |             type = type + PTR;
1235 |         }
1236 | 
1237 |         if (token != Id) {
1238 |             // invalid declaration
1239 |             printf("%d: bad global declaration\n", line);
1240 |             exit(-1);
1241 |         }
1242 |         if (current_id[Class]) {
1243 |             // identifier exists
1244 |             printf("%d: duplicate global declaration\n", line);
1245 |             exit(-1);
1246 |         }
1247 |         match(Id);
1248 |         current_id[Type] = type;
1249 | 
1250 |         if (token == '(') {
1251 |             current_id[Class] = Fun;
1252 |             current_id[Value] = (int)(text + 1); // the memory address of function
1253 |             function_declaration();
1254 |         } else {
1255 |             // variable declaration
1256 |             current_id[Class] = Glo; // global variable
1257 |             current_id[Value] = (int)data; // assign memory address
1258 |             data = data + sizeof(int);
1259 |             // *data = 0;
1260 |         }
1261 | 
1262 |         if (token == ',') {
1263 |             match(',');
1264 |         }
1265 |     }
1266 |     next();
1267 | }
1268 | 
1269 | void program() {
1270 |     // get next token
1271 |     next();
1272 |     while (token > 0) {
1273 |         global_declaration();
1274 |     }
1275 | }
1276 | 
1277 | void show_regs()
1278 | {
1279 |   printf("ax: %#x(%d)\n", ax, ax);
1280 |   printf("sp: %p\n", sp);
1281 |   printf("bp: %p\n", bp);
1282 |   printf("pc: %p\n", pc);
1283 | }
1284 | 
1285 | int is_data(unsigned int addr)
1286 | {
1287 |   if (((unsigned int)begin_data <= addr) && (addr < (unsigned int)data))
1288 |     return 1;
1289 |   else
1290 |     return 0;
1291 | }
1292 | 
1293 | int is_text(unsigned int addr)
1294 | {
1295 |   if (((unsigned int)begin_text <= addr) && (addr < (unsigned int)text))
1296 |     return 1;
1297 |   else
1298 |     return 0;
1299 | }
1300 | 
1301 | int is_stack(unsigned int addr)
1302 | {
1303 |   if (((unsigned int)begin_stack <= addr) && (addr < (unsigned int)begin_stack + poolsize))
1304 |     return 1;
1305 |   else
1306 |     return 0;
1307 | }
1308 | 
1309 | int run_debug_func(char *cmd_line)
1310 | {
1311 |   switch (cmd_line[0])
1312 |   {
1313 |     case 'q':
1314 |     {
1315 |       exit(1);
1316 |     }
1317 |     case 'd':
1318 |     {
1319 |       print_data();
1320 |       break;
1321 |     }
1322 |     case 't':
1323 |     {
1324 |       print_symbol_table();
1325 |       break;
1326 |     }
1327 |     case 'h':
1328 |     case '?':
1329 |     {
1330 |       debug_usage();
1331 |       break;
1332 |     }
1333 |     case 'l':
1334 |     {
1335 |       print_source_code();
1336 |       break;
1337 |     }
1338 |     case 'e':
1339 |     {
1340 |       print_text();
1341 |       break;
1342 |     }
1343 |     case 'b': // break pointer, ex: b 0xf756301c
1344 |     {
1345 |       unsigned int addr;
1346 |       char cmd;
1347 | 
1348 |       sscanf(cmd_line, "%c %x\n", &cmd, &addr);
1349 |       if (last_bp < MAX_BREAK_POINT)
1350 |       {
1351 |         break_points[last_bp] = addr;
1352 |         printf("set break_points[%d]: %x\n", last_bp, addr);
1353 |         ++last_bp;
1354 |       }
1355 |       else
1356 |       {
1357 |         printf("exceed %d break points\n", MAX_BREAK_POINT);
1358 |       }
1359 |       break;
1360 |     }
1361 |     case 'x': // show data segment content, ex: x 0xf756301c
1362 |     {
1363 |       char cmd_str[5];
1364 | 
1365 |       switch (cmd_line[1])
1366 |       {
1367 |         case 's':
1368 |         {
1369 |           break;
1370 |         }
1371 |         case 'c':
1372 |         {
1373 |           break;
1374 |         }
1375 |         case 'i':
1376 |         {
1377 |           break;
1378 |         }
1379 |         case 'x':
1380 |         {
1381 |           break;
1382 |         }
1383 |         default:
1384 |         {
1385 |           printf("xxx\n");
1386 |           return -1;
1387 |         }
1388 | 
1389 |       }
1390 |       unsigned int addr;
1391 | 
1392 |       sscanf(cmd_line, "%s %x\n", cmd_str, &addr);
1393 |       printf("cmd: %s, addr: %#x\n", cmd_str, addr);
1394 |       #if 1
1395 |       is_text(addr);
1396 |       is_stack(addr);
1397 |       //if (((unsigned int)begin_data <= addr) && (addr < (unsigned int)data))
1398 |       if (is_data(addr) )
1399 |       {
1400 |         if (cmd_line[1] == 's')
1401 |           printf("data seg: %s\n", (char *)addr);
1402 |         if (cmd_line[1] == 'i')
1403 |           printf("data seg: %#d\n", *(int *)addr);
1404 |         if (cmd_line[1] == 'x')
1405 |           printf("data seg: %#x\n", *(int *)addr);
1406 |       }
1407 |       else if (is_text(addr) )
1408 |              printf("text seg: %#x(%d)\n", *((int *)addr), *((int *)addr));
1409 |            else if (is_stack(addr) )
1410 |                   printf("stack area: %#x(%d)\n", *((int *)addr), *((int *)addr));
1411 |                 else
1412 |                 {
1413 |                   printf("%x is not in \ntext segment (%p ~ %p)\ndata segment (%p ~ %p)\nstack range (%p ~ %p)\n", addr, begin_text, text, begin_data, data, begin_stack, begin_stack + poolsize);
1414 | 
1415 |                 }
1416 | 
1417 |       //printf("%#x(%d)\n", *((int *)addr));
1418 |       #endif
1419 |       break;
1420 |     }
1421 |     case 'r': // show registers
1422 |     {
1423 |       show_regs(); 
1424 |       break;
1425 |     }
1426 |     default:
1427 |     {
1428 |       break;
1429 |     }
1430 | 
1431 |   } // end switch (input_str[0])
1432 |   return 0;
1433 | }
1434 | 
1435 | void debug_usage()
1436 | {
1437 |   printf("command\n");
1438 |   printf("s: step\n");
1439 |   printf("q: quit\n");
1440 |   printf("c: continue\n");
1441 |   printf("r: print all register content\n");
1442 |   printf("d: print data\n");
1443 |   printf("e: print text\n");
1444 |   printf("l: print source code\n");
1445 |   printf("t: print symbol table\n");
1446 |   printf("xs address: print text/data segment stack area content as string\n");
1447 |   printf("xc address: not yet complete. print text/data segment stack area content as char\n");
1448 |   printf("xi address: print text/data segment stack area content as int\n");
1449 |   printf("xx address: print text/data segment stack area content as hex\n");
1450 |   printf("b address: set breakpoint, max breakpoint is %d\n", MAX_BREAK_POINT);
1451 | }
1452 | 
1453 | #define INPUT_SIZE 20
1454 | int eval() {
1455 |   int line=0, ch;
1456 |   unsigned int break_point = 0;
1457 |   int continue_run = 0;
1458 |   int meet_bp = 0;
1459 |   char input_str[INPUT_SIZE+1];
1460 |     int op, *tmp;
1461 |     while (1) 
1462 |     {
1463 | #ifdef SUPPORT_DEBUG
1464 |         print_spec_text(pc);
1465 |         //if (break_point == (unsigned int)pc || continue_run == 0)
1466 | 
1467 |   debug_input:
1468 |         while(continue_run == 0)
1469 |         {
1470 |           printf("%d ## debug> ", line++);
1471 |           fgets(input_str, INPUT_SIZE, stdin);
1472 | 
1473 |           if (input_str[0] == 's' || input_str[0] == '\n')
1474 |           {
1475 |             continue_run = 0;
1476 |             break;
1477 |           }
1478 | 
1479 |           if (input_str[0] == 'c')
1480 |           {
1481 |             continue_run = 1;
1482 |             break;
1483 |           }
1484 |           else
1485 |           {
1486 |             run_debug_func(input_str);
1487 |             continue_run = 0;
1488 |           }
1489 |         }
1490 | 
1491 |         if (meet_bp == 0 && meet_break_point(pc))
1492 |         {
1493 |           meet_bp = 1;
1494 |           // printf("meet break pointer: %x\n", (unsigned int)pc);
1495 |           continue_run = 0;
1496 |           goto debug_input;
1497 |         }
1498 |         else
1499 |         {
1500 |           meet_bp = 0;
1501 |         }
1502 | 
1503 | #endif
1504 |           op = *pc++; // get next operation code
1505 | 
1506 |         if (op == IMM)       {ax = *pc++;}                                     // load immediate value to ax
1507 |         else if (op == LC)   {ax = *(char *)ax;}                               // load character to ax, address in ax
1508 |         else if (op == LI)   {ax = *(int *)ax;}                                // load integer to ax, address in ax
1509 |         else if (op == SC)   {ax = *(char *)*sp++ = ax;}                       // save character to address, value in ax, address on stack
1510 |         else if (op == SI)   {*(int *)*sp++ = ax;}                             // save integer to address, value in ax, address on stack
1511 |         else if (op == PUSH) {*--sp = ax;}                                     // push the value of ax onto the stack
1512 |         else if (op == JMP)  {pc = (int *)*pc;}                                // jump to the address
1513 |         else if (op == JZ)   {pc = ax ? pc + 1 : (int *)*pc;}                   // jump if ax is zero
1514 |         else if (op == JNZ)  {pc = ax ? (int *)*pc : pc + 1;}                   // jump if ax is zero
1515 |         else if (op == CALL) {*--sp = (int)(pc+1); pc = (int *)*pc;}           // call subroutine
1516 |         //else if (op == RET)  {pc = (int *)*sp++;}                              // return from subroutine;
1517 |         else if (op == ENT)  {*--sp = (int)bp; bp = sp; sp = sp - *pc++;}      // make new stack frame
1518 |         else if (op == ADJ)  {sp = sp + *pc++;}                                // add esp, <size>
1519 |         else if (op == LEV)  {sp = bp; bp = (int *)*sp++; pc = (int *)*sp++;}  // restore call frame and PC
1520 |         else if (op == ENT)  {*--sp = (int)bp; bp = sp; sp = sp - *pc++;}      // make new stack frame
1521 |         else if (op == ADJ)  {sp = sp + *pc++;}                                // add esp, <size>
1522 |         else if (op == LEV)  {sp = bp; bp = (int *)*sp++; pc = (int *)*sp++;}  // restore call frame and PC
1523 |         else if (op == LEA)  {ax = (int)(bp + *pc++);}                         // load address for arguments.
1524 | 
1525 |         else if (op == OR)  ax = *sp++ | ax;
1526 |         else if (op == XOR) ax = *sp++ ^ ax;
1527 |         else if (op == AND) ax = *sp++ & ax;
1528 |         else if (op == EQ)  ax = *sp++ == ax;
1529 |         else if (op == NE)  ax = *sp++ != ax;
1530 |         else if (op == LT)  ax = *sp++ < ax;
1531 |         else if (op == LE)  ax = *sp++ <= ax;
1532 |         else if (op == GT)  ax = *sp++ >  ax;
1533 |         else if (op == GE)  ax = *sp++ >= ax;
1534 |         else if (op == SHL) ax = *sp++ << ax;
1535 |         else if (op == SHR) ax = *sp++ >> ax;
1536 |         else if (op == ADD) ax = *sp++ + ax;
1537 |         else if (op == SUB) ax = *sp++ - ax;
1538 |         else if (op == MUL) ax = *sp++ * ax;
1539 |         else if (op == DIV) ax = *sp++ / ax;
1540 |         else if (op == MOD) ax = *sp++ % ax;
1541 | 
1542 | 
1543 |         else if (op == EXIT) { printf("exit(%d)", *sp); return *sp;}
1544 |         else if (op == OPEN) { ax = open((char *)sp[1], sp[0]); }
1545 |         else if (op == CLOS) { ax = close(*sp);}
1546 |         else if (op == READ) { ax = read(sp[2], (char *)sp[1], *sp); }
1547 |         else if (op == PRTF) { tmp = sp + pc[1]; ax = printf((char *)tmp[-1], tmp[-2], tmp[-3], tmp[-4], tmp[-5], tmp[-6]); }
1548 |         else if (op == MALC) { ax = (int)malloc(*sp);}
1549 |         else if (op == MSET) { ax = (int)memset((char *)sp[2], sp[1], *sp);}
1550 |         else if (op == MCMP) { ax = memcmp((char *)sp[2], (char *)sp[1], *sp);}
1551 |         else {
1552 |             printf("unknown instruction:%d\n", op);
1553 |             return -1;
1554 |         }
1555 |     }
1556 |     return 0;
1557 | }
1558 | 
1559 | #ifdef SUPPORT_DEBUG
1560 | int meet_break_point(int *pc)
1561 | {
1562 |   int i;
1563 |   unsigned int cur_addr = (unsigned int)pc;
1564 |   //printf("cur_addr: %x\n", cur_addr);
1565 | 
1566 |   for (i = 0 ; i < last_bp ; ++i)
1567 |   {
1568 |     //printf("cur break_points[%d]: %x\n", i, break_points[i]);
1569 |     if (cur_addr == break_points[i])
1570 |     {
1571 |       printf("break_points[%d]: %x\n", i, break_points[i]);
1572 |       return 1;
1573 |     }
1574 |   }
1575 |   return 0;
1576 | }
1577 | 
1578 | #ifdef SUPPORT_DEBUG
1579 | void print_symbol_table()
1580 | {
1581 |   int *cur_id;
1582 |   cur_id = symbols;
1583 | 
1584 |   printf("symbol table:\n");
1585 | 
1586 |   while(cur_id[Token])
1587 |   {
1588 |     printf("cur_id[Name]: %s\n", cur_id[Name]);
1589 |     printf("cur_id[Hash]: %#x\n", cur_id[Hash]);
1590 |     printf("cur_id[Type]: %s (%d)\n", type_str(cur_id[Type]), cur_id[Type]);
1591 |     printf("cur_id[Class]: %s (%d)\n", class_str(cur_id[Class]), cur_id[Class]);
1592 |     printf("cur_id[Value]: %#x (%d)\n", cur_id[Value], cur_id[Value]);
1593 |     cur_id = cur_id + IdSize;
1594 |   }
1595 | }
1596 | #endif
1597 | 
1598 | // return 1: has argument
1599 | // return 0: has no argument
1600 | int print_spec_text(int *cur_text)
1601 | {
1602 |   int has_argu=0;
1603 | 
1604 |   const char* inst_str = inst_2_str(*cur_text);
1605 |   has_argu = inst_has_argu(*cur_text);
1606 | 
1607 |   if (inst_str)
1608 |     printf("addr %p ## %s", cur_text, inst_str);
1609 |   else
1610 |     printf("addr %p ## %x", cur_text, *cur_text);
1611 | 
1612 |   if (has_argu)
1613 |   {
1614 |     ++cur_text;
1615 |     printf(" %#x(%d)", *cur_text, *cur_text);
1616 |   }
1617 |   printf("\n");
1618 |   return has_argu;
1619 | }
1620 | 
1621 | void print_source_code()
1622 | {
1623 |   printf("%s\n", src_begin);
1624 | }
1625 | 
1626 | void print_text()
1627 | {
1628 |   printf("text segment:\n");
1629 | 
1630 |   int *cur_text = begin_text+1;
1631 |   int i;
1632 |   int has_argu=0;
1633 |   while (cur_text != text)
1634 |   {
1635 |     has_argu = print_spec_text(cur_text);
1636 |     if (has_argu)
1637 |       cur_text += 2;
1638 |     else
1639 |       ++cur_text;
1640 | #if 0
1641 |     const char* inst_str = inst_2_str(*cur_text);
1642 |     has_argu = inst_has_argu(*cur_text);
1643 |     if (*cur_text == -1)
1644 |     {
1645 |       ++cur_text;
1646 |       continue; 
1647 |     }
1648 |     if (inst_str)
1649 |       printf("addr %p ## %s\n", cur_text, inst_str);
1650 |     else
1651 |       printf("addr %p ## %x\n", cur_text, *cur_text);
1652 | 
1653 |     if (has_argu)
1654 |     {
1655 |       ++cur_text;
1656 |       printf("addr %p ## %#x (%d)\n", cur_text, *cur_text, *cur_text);
1657 |       has_argu = 0;
1658 |     }
1659 | #endif
1660 |   }
1661 | 
1662 | }
1663 | 
1664 | void print_data()
1665 | {
1666 |   signed char *cur_data = begin_data;
1667 |   int print_addr = 1;
1668 | 
1669 |   printf("data segment:\n");
1670 | 
1671 |   while(cur_data != data)
1672 |   {
1673 |     if (*cur_data != 0)
1674 |     {
1675 |       if (print_addr)
1676 |       {
1677 |         printf("%p: ", cur_data);
1678 |         print_addr = 0;
1679 |       }
1680 |       printf("%c", *cur_data);
1681 |     }
1682 |     else
1683 |     {
1684 |       printf("\n");
1685 |       print_addr = 1;
1686 |     }
1687 |     ++cur_data;
1688 |   }
1689 | }
1690 | #endif
1691 | 
1692 | int main(int argc, char **argv)
1693 | {
1694 | 
1695 |     int i, fd;
1696 |     int *tmp;
1697 | 
1698 |     argc--;
1699 |     argv++;
1700 | 
1701 |     poolsize = 256 * 1024; // arbitrary size
1702 |     line = 1;
1703 | 
1704 |     if ((fd = open(*argv, 0)) < 0) {
1705 |         printf("could not open(%s)\n", *argv);
1706 |         return -1;
1707 |     }
1708 | 
1709 |     // allocate memory for virtual machine
1710 |     if (!(text = old_text = malloc(poolsize))) {
1711 |         printf("could not malloc(%d) for text area\n", poolsize);
1712 |         return -1;
1713 |     }
1714 |     begin_text = text;
1715 | 
1716 |     if (!(data = malloc(poolsize))) {
1717 |         printf("could not malloc(%d) for data area\n", poolsize);
1718 |         return -1;
1719 |     }
1720 | 
1721 |     begin_data = data;
1722 | 
1723 |     if (!(stack = malloc(poolsize))) {
1724 |         printf("could not malloc(%d) for stack area\n", poolsize);
1725 |         return -1;
1726 |     }
1727 | 
1728 |     begin_stack = stack;
1729 | 
1730 |     if (!(symbols = malloc(poolsize))) {
1731 |         printf("could not malloc(%d) for symbol table\n", poolsize);
1732 |         return -1;
1733 |     }
1734 | 
1735 | #ifdef SUPPORT_DEBUG
1736 |     printf("text: %p\n", text);
1737 |     printf("data: %p\n", data);
1738 |     printf("stack: %p\n", stack);
1739 | #endif
1740 | 
1741 |     memset(text, 0, poolsize);
1742 |     memset(data, 0, poolsize);
1743 |     memset(stack, 0, poolsize);
1744 |     memset(symbols, 0, poolsize);
1745 |     bp = sp = (int *)((int)stack + poolsize);
1746 |     ax = 0;
1747 | 
1748 |     src = "char else enum if int return sizeof while "
1749 |           "open read close printf malloc memset memcmp exit void main";
1750 | 
1751 |      // add keywords to symbol table
1752 |     i = Char;
1753 |     while (i <= While) {
1754 |         next();
1755 |         current_id[Token] = i++;
1756 |     }
1757 | 
1758 |     // add library to symbol table
1759 |     i = OPEN;
1760 |     while (i <= EXIT) {
1761 |         next();
1762 |         current_id[Class] = Sys;
1763 |         current_id[Type] = INT;
1764 |         current_id[Value] = i++;
1765 |     }
1766 | 
1767 |     next(); current_id[Token] = Char; // handle void type
1768 |     next(); idmain = current_id; // keep track of main
1769 | 
1770 | 
1771 |     // read the source file
1772 |     if ((fd = open(*argv, 0)) < 0) {
1773 |         printf("could not open(%s)\n", *argv);
1774 |         return -1;
1775 |     }
1776 | 
1777 |     if (!(src = old_src = malloc(poolsize))) {
1778 |         printf("could not malloc(%d) for source area\n", poolsize);
1779 |         return -1;
1780 |     }
1781 |     // read the source file
1782 |     if ((i = read(fd, src, poolsize-1)) <= 0) {
1783 |         printf("read() returned %d\n", i);
1784 |         return -1;
1785 |     }
1786 |     src[i] = 0; // add EOF character
1787 |     src_begin = src;
1788 |     close(fd);
1789 | 
1790 |     program();
1791 | 
1792 |     if (!(pc = (int *)idmain[Value])) {
1793 |         printf("main() not defined\n");
1794 |         return -1;
1795 |     }
1796 | 
1797 |     // setup stack
1798 |     sp = (int *)((int)stack + poolsize);
1799 | 
1800 | #ifdef SUPPORT_DEBUG
1801 |     printf("sp: %x\n", sp);
1802 | #endif
1803 | 
1804 |     *--sp = EXIT; // call exit if main returns
1805 |     *--sp = PUSH; tmp = sp;
1806 |     *--sp = argc;
1807 |     *--sp = (int)argv;
1808 |     *--sp = (int)tmp;
1809 | 
1810 | #ifdef SUPPORT_DEBUG
1811 |     //print_text();
1812 |     //print_data();
1813 |     // print_symbol_table();
1814 | #endif
1815 |     return eval();
1816 | }
1817 | 


--------------------------------------------------------------------------------