├── .gitignore ├── .gitmodules ├── LICENSE ├── Makefile ├── README.md ├── _more ├── c6_linux.c ├── c6_macos.c └── c6_win.c ├── c6 ├── c6.c ├── c6_test.sh ├── doc ├── asm.md ├── jit.md └── lea.md ├── genasm ├── genasm.c ├── jit ├── jit.c ├── jit.md ├── jit_test.sh └── test ├── arg.c ├── explore ├── dl0 ├── dl0.c ├── dl0b ├── dl0b.c ├── dl1 ├── dl1.c └── test1.c ├── fib.c ├── fib.vm ├── fib.vm.s ├── fib64.c ├── fib64.s ├── hello.c ├── hello.s ├── hello.vm ├── hello.vm.s ├── hello64.c ├── hello64.s ├── helloccc.c ├── helloccc.s ├── helloccc.vm ├── helloccc.vm.s ├── sum.c ├── sum.s ├── sum.vm ├── sum.vm.s ├── sum64.c ├── sum64.s └── var.c /.gitignore: -------------------------------------------------------------------------------- 1 | bak 2 | 3 | # Prerequisites 4 | *.d 5 | 6 | # Object files 7 | *.o 8 | *.ko 9 | *.obj 10 | # *.elf 11 | 12 | # Linker output 13 | *.ilk 14 | *.map 15 | *.exp 16 | 17 | # Precompiled Headers 18 | *.gch 19 | *.pch 20 | 21 | # Libraries 22 | *.lib 23 | *.a 24 | *.la 25 | *.lo 26 | 27 | # Shared objects (inc. Windows DLLs) 28 | *.dll 29 | *.so 30 | *.so.* 31 | *.dylib 32 | 33 | # Executables 34 | *.exe 35 | *.out 36 | *.app 37 | *.i*86 38 | *.x86_64 39 | *.hex 40 | 41 | # Debug files 42 | *.dSYM/ 43 | *.su 44 | *.idb 45 | *.pdb 46 | 47 | # Kernel Module Compile Results 48 | *.mod* 49 | *.cmd 50 | .tmp_versions/ 51 | modules.order 52 | Module.symvers 53 | Mkfile.old 54 | dkms.conf 55 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "c6.wiki"] 2 | path = c6.wiki 3 | url = https://github.com/ccc-c/c4.wiki.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -w -g -m32 3 | OBJS = c6 genasm jit 4 | 5 | all: c6 genasm jit 6 | 7 | c6: c6.c 8 | $(CC) $(CFLAGS) $^ -o c6 9 | 10 | genasm: genasm.c 11 | $(CC) $(CFLAGS) $^ -o genasm 12 | 13 | jit: jit.c 14 | $(CC) $(CFLAGS) $^ -o jit -ldl 15 | 16 | clean: 17 | rm -f $(OBJS) 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # C6 -- 700 行的 C 語言編譯器 2 | 3 | * 原作者 -- Robert Swierczek, https://github.com/rswier/ 4 | * 原專案 -- https://github.com/rswier/c4 5 | * 修改者 -- 陳鍾誠 6 | 7 | ## 使用方式 8 | 9 | 建議在 Linux 下編譯本專案 (WSL 亦可,Windows 的 MinGW 比較容易有錯) 10 | 11 | ``` 12 | $ sudo apt update 13 | $ sudo apt install gcc-multilib 14 | 15 | $ make 16 | gcc -w -g -m32 c6.c -o c6 17 | gcc -w -g -m32 genasm.c -o genasm 18 | gcc -w -g -m32 jit.c -o jit -ldl 19 | 20 | $ ./c6 test/hello.c 21 | hello, world 22 | exit(0) cycle = 9 23 | 24 | $ ./c6 test/sum.c 25 | sum(10)=55 26 | exit(0) cycle = 303 27 | 28 | $ ./c6 test/fib.c 29 | f(7)=13 30 | exit(8) cycle = 920 31 | ``` 32 | 33 | ## 自我編譯 34 | 35 | ``` 36 | $ ./c6 c6.c test/fib.c 37 | f(7)=13 38 | exit(8) cycle = 920 39 | exit(8) cycle = 147464 40 | 41 | $ ./c6 c6.c c6.c test/fib.c 42 | f(7)=13 43 | exit(8) cycle = 920 44 | exit(8) cycle = 147464 45 | exit(8) cycle = 24476550 46 | ``` 47 | 48 | ## 印出組合語言 (堆疊機) 49 | 50 | ``` 51 | $ ./c6 -s test/hello.c 52 | 1: #include 53 | 2: 54 | 3: int main() 55 | 4: { 56 | 5: printf("hello, world\n"); 57 | 1:F7D47014 ENT 0 58 | 3:F7D4701C ADDR 0:F7D06010 59 | 5:F7D47024 PSH 60 | 6:F7D47028 PRTF 61 | 7:F7D4702C ADJ 1 62 | 6: } 63 | 9:F7D47034 LEV 64 | ``` 65 | 66 | ## 印出執行過程 67 | 68 | ``` 69 | $ ./c6 -d test/hello.c 70 | 1:F7C5E014 ENT 0 71 | 3:F7C5E01C ADDR 0:F7C1D010 72 | 5:F7C5E024 PSH 73 | 6:F7C5E028 PRTF 74 | hello, world 75 | 7:F7C5E02C ADJ 1 76 | 9:F7C5E034 LEV 77 | -67586:F7C1C008 PSH 78 | -67585:F7C1C00C EXIT 79 | exit(13) cycle = 8 80 | ``` 81 | 82 | ## JIT 編譯執行 83 | 84 | ``` 85 | $ wsl 86 | wsl> make clean 87 | rm -f c6 genasm jit 88 | 89 | wsl> make 90 | gcc -w -g -m32 c6.c -o c6 91 | gcc -w -g -m32 genasm.c -o genasm 92 | gcc -w -g -m32 jit.c -o jit -ldl 93 | 94 | wsl> ./jit_test.sh hello 95 | Compile test/hello.c success! 96 | Output: test/hello.vm 97 | hello, world 98 | 99 | wsl> ./jit_test.sh sum 100 | Compile test/sum.c success! 101 | Output: test/sum.vm 102 | sum(10)=55 103 | 104 | wsl> ./jit_test.sh fib 105 | Compile test/fib.c success! 106 | Output: test/fib.vm 107 | f(7)=13 108 | ``` 109 | 110 | -------------------------------------------------------------------------------- /_more/c6_linux.c: -------------------------------------------------------------------------------- 1 | // c6.c - A mini compiler derived from the c4 by Robert Swierczek 2 | // 修改者: 陳鍾誠 (模組化+中文註解+目的檔處理) 3 | // char, int, and pointer types 4 | // if, while, return, and expression statements 5 | // just enough features to allow self-compilation and a bit more 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #define int long long // c6 要求 int 與處理器位址同樣長度,因此將 int 定義為 64 位元整數。 13 | 14 | char *p, *lp, // current position in source code (p: 目前原始碼指標, lp: 上一行原始碼指標) 15 | *data,*datap, // data/bss pointer (資料段機器碼指標) 16 | *op; // 指令字串列表 17 | 18 | int *e, *le, *code, // current position in emitted code (e: 目前機器碼指標, le: 上一行機器碼指標) 19 | *id, // currently parsed identifier (id: 目前的 id) 20 | *sym, // symbol table (simple list of identifiers) (符號表) 21 | tk, // current token (目前 token) 22 | ival, // current token value (目前的 token 值) 23 | ty, // current expression type (目前的運算式型態) 24 | loc, // local variable offset (區域變數的位移) 25 | line, // current line number (目前行號) 26 | src, // print source and assembly flag (印出原始碼) 27 | debug, // print executed instructions (印出執行指令 -- 除錯模式) 28 | o_run, // 執行目的檔 29 | o_save, // 反組譯目的檔 30 | o_dump; // 傾印目的檔 31 | 32 | int fd, poolsz, *idmain; 33 | int *pc, *bp, *sp, codeLen, dataLen; 34 | 35 | // tokens and classes (operators last and in precedence order) (按優先權順序排列) 36 | enum { // token : 0-127 直接用該字母表達, 128 以後用代號。 37 | Num = 128, Fun, Sys, Glo, Loc, Id, 38 | Char, Else, Enum, If, Int, Return, Sizeof, While, 39 | Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak 40 | }; 41 | 42 | // opcodes (機器碼的 op) 43 | enum { LEA ,IMM ,ADDR,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH , 44 | OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD , 45 | OPEN,READ,WRIT,CLOS,PRTF,MALC,FREE,MSET,MCMP,EXIT }; 46 | 47 | // types (支援型態,只有 int, char, pointer) 48 | enum { CHAR, INT, PTR }; 49 | 50 | // 因為沒有 struct,所以使用 offset 代替,例如 id[Tk] 代表 id.Tk (token), id[Hash] 代表 id.Hash, id[Name] 代表 id.Name, ..... 51 | // identifier offsets (since we can't create an ident struct) 52 | enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; // HClass, HType, HVal 是暫存的備份 ??? 53 | 54 | int stepInstr(int *p) { 55 | // 傳回下一個指令大小:ADJ 之前有一個參數,之後沒有參數。 56 | if (*++p <= ADJ) return 2; else return 1; 57 | } 58 | 59 | void printInstr(int *p, int *code, char *data) { 60 | int ir, arg; 61 | // 印出下一個指令 62 | ir = *++p; 63 | printf(" %4X:%X %8.4s", p-code, p, &op[ir * 5]); 64 | if (ir <= ADJ) { // ADJ 之前的指令有一個參數 65 | arg = *++p; 66 | if (ir==JSR || ir==JMP || ir==BZ || ir==BNZ) { 67 | if (arg==0) printf("0?\n"); else printf(" %X:%X\n", (int*)arg-code, (int*)arg); 68 | } else if (ir==ADDR) 69 | printf(" %X:%X\n", (char*)arg-data, arg); 70 | else 71 | printf(" %d\n", arg); 72 | } else { // ADJ 之後的指令沒有任何參數 73 | printf("\n"); 74 | } 75 | } 76 | 77 | void next() { 78 | char *pp; 79 | // 詞彙解析 lexer 80 | while (tk = *p) { 81 | ++p; 82 | if (tk == '\n') { // 換行 83 | if (src) { 84 | printf("%d: %.*s", line, p - lp, lp); // 印出該行 85 | lp = p; // lp = p = 新一行的原始碼開頭 86 | while (le < e) { // 印出上一行的所有目的碼 87 | printInstr(le, code, data); 88 | le = le + stepInstr(le); 89 | } 90 | } 91 | ++line; 92 | } 93 | else if (tk == '#') { // 取得 #include 這類的一整行 94 | while (*p != 0 && *p != '\n') ++p; 95 | } 96 | else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { // 取得變數名稱 97 | pp = p - 1; 98 | while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') 99 | tk = tk * 147 + *p++; // 計算雜湊值 100 | tk = (tk << 6) + (p - pp); // 符號表的雜湊位址 ?? 101 | id = sym; 102 | while (id[Tk]) { // 檢查是否碰撞 ? 103 | if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } // 沒碰撞就傳回 token 104 | id = id + Idsz; // 碰撞,前進到下一格。 105 | } 106 | id[Name] = (int)pp; // id.Name = ptr(變數名稱) 107 | id[Hash] = tk; // id.Hash = 雜湊值 108 | tk = id[Tk] = Id; // token = id.Tk = Id 109 | return; 110 | } 111 | else if (tk >= '0' && tk <= '9') { // 取得數字串 112 | if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; } // 十進位 113 | else if (*p == 'x' || *p == 'X') { // 十六進位 114 | while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F'))) // 16 進位 115 | ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0); 116 | } 117 | else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; } // 八進位 118 | tk = Num; // token = Number 119 | return; 120 | } 121 | else if (tk == '/') { 122 | if (*p == '/') { // 註解 123 | ++p; 124 | while (*p != 0 && *p != '\n') ++p; // 略過註解 125 | } 126 | else { // 除法 127 | tk = Div; 128 | return; 129 | } 130 | } 131 | else if (tk == '\'' || tk == '"') { // 字元或字串 132 | pp = datap; 133 | while (*p != 0 && *p != tk) { 134 | if ((ival = *p++) == '\\') { 135 | if ((ival = *p++) == 'n') ival = '\n'; // 處理 \n 的特殊情況 136 | } 137 | if (tk == '"') *datap++ = ival; // 把字串塞到資料段裏 138 | } 139 | ++p; 140 | if (tk == '"') ival = (int)pp; else tk = Num; // (若是字串) ? (ival = 字串 (在資料段中的) 指標) : (字元值) 141 | return; 142 | } // 以下為運算元 =+-!<>|&^%*[?~, ++, --, !=, <=, >=, ||, &&, ~ ;{}()],: 143 | else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; } 144 | else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; } 145 | else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; } 146 | else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; } 147 | else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; } 148 | else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; } 149 | else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; } 150 | else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; } 151 | else if (tk == '^') { tk = Xor; return; } 152 | else if (tk == '%') { tk = Mod; return; } 153 | else if (tk == '*') { tk = Mul; return; } 154 | else if (tk == '[') { tk = Brak; return; } 155 | else if (tk == '?') { tk = Cond; return; } 156 | else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return; 157 | } 158 | } 159 | 160 | void expr(int lev) { 161 | int t, *d; 162 | // 運算式 expression, 其中 lev 代表優先等級 163 | if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } // EOF 164 | else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; } // 數值 165 | else if (tk == '"') { // 字串 166 | *++e = ADDR; *++e = ival; next(); 167 | while (tk == '"') next(); 168 | datap = (char *)((int)datap + sizeof(int) & -sizeof(int)); ty = PTR; // 用 int 為大小對齊 ?? 169 | } 170 | else if (tk == Sizeof) { // 處理 sizeof(type) ,其中 type 可能為 char, int 或 ptr 171 | next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } 172 | ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } 173 | while (tk == Mul) { next(); ty = ty + PTR; } 174 | if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } 175 | *++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int); 176 | ty = INT; 177 | } 178 | else if (tk == Id) { // 處理 id ... 179 | d = id; next(); 180 | if (tk == '(') { // id (args) ,這是 call 181 | next(); 182 | t = 0; 183 | while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); } // 推入參數 184 | next(); 185 | // d[Class] 可能為 Num = 128, Fun, Sys, Glo, Loc, ... 186 | if (d[Class] == Sys) *++e = d[Val]; // token 是系統呼叫,直接呼叫之... 187 | else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; } // token 是自訂函數,用 JSR : jump to subroutine 指令呼叫 188 | else { printf("%d: bad function call\n", line); exit(-1); } 189 | if (t) { *++e = ADJ; *++e = t; } // 有參數,要調整堆疊 (ADJ : stack adjust) 190 | ty = d[Type]; 191 | } 192 | else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; } // 該 id 是數值 193 | else { 194 | if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; } // 該 id 是區域變數,載入區域變數 (LEA : load local address) 195 | else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; } // 該 id 是全域變數,載入該全域變數 (IMM : load global address or immediate 載入全域變數或立即值) 196 | else { printf("%d: undefined variable\n", line); exit(-1); } 197 | *++e = ((ty = d[Type]) == CHAR) ? LC : LI; // LI : load int, LC : load char 198 | } 199 | } 200 | else if (tk == '(') { // (E) : 有括號的運算式 ... 201 | next(); 202 | if (tk == Int || tk == Char) { 203 | t = (tk == Int) ? INT : CHAR; next(); 204 | while (tk == Mul) { next(); t = t + PTR; } 205 | if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } 206 | expr(Inc); // 處理 ++, -- 的情況 207 | ty = t; 208 | } 209 | else { 210 | expr(Assign); // 處理 (E) 中的 E (E 運算式必須能處理 (t=x) op y 的情況,所以用 expr(Assign)) 211 | if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } 212 | } 213 | } 214 | else if (tk == Mul) { // * 乘法 215 | next(); expr(Inc); 216 | if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } 217 | *++e = (ty == CHAR) ? LC : LI; 218 | } 219 | else if (tk == And) { // & AND 220 | next(); expr(Inc); 221 | if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); } 222 | ty = ty + PTR; 223 | } 224 | else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; } // NOT 225 | else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; } // Logical NOT 226 | else if (tk == Add) { next(); expr(Inc); ty = INT; } 227 | else if (tk == Sub) { 228 | next(); *++e = IMM; 229 | if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; } // -Num or -E 230 | ty = INT; 231 | } 232 | else if (tk == Inc || tk == Dec) { // ++ or -- 233 | t = tk; next(); expr(Inc); 234 | if (*e == LC) { *e = PSH; *++e = LC; } 235 | else if (*e == LI) { *e = PSH; *++e = LI; } 236 | else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } 237 | *++e = PSH; 238 | *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); 239 | *++e = (t == Inc) ? ADD : SUB; 240 | *++e = (ty == CHAR) ? SC : SI; 241 | } 242 | else { printf("%d: bad expression\n", line); exit(-1); } 243 | // 參考: https://en.wikipedia.org/wiki/Operator-precedence_parser, https://www.cnblogs.com/rubylouvre/archive/2012/09/08/2657682.html https://web.archive.org/web/20151223215421/http://hall.org.ua/halls/wizzard/pdf/Vaughan.Pratt.TDOP.pdf 244 | while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method 245 | t = ty; 246 | if (tk == Assign) { 247 | next(); 248 | if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); } 249 | expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI; 250 | } 251 | else if (tk == Cond) { 252 | next(); 253 | *++e = BZ; d = ++e; 254 | expr(Assign); 255 | if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } 256 | *d = (int)(e + 3); *++e = JMP; d = ++e; 257 | expr(Cond); 258 | *d = (int)(e + 1); 259 | } 260 | else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; } 261 | else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; } 262 | else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; } 263 | else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; } 264 | else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; } 265 | else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; } 266 | else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; } 267 | else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; } 268 | else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; } 269 | else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; } 270 | else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; } 271 | else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; } 272 | else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; } 273 | else if (tk == Add) { 274 | next(); *++e = PSH; expr(Mul); 275 | if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } 276 | *++e = ADD; 277 | } 278 | else if (tk == Sub) { 279 | next(); *++e = PSH; expr(Mul); 280 | if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; } 281 | else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; } 282 | else *++e = SUB; 283 | } 284 | else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; } 285 | else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; } 286 | else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; } 287 | else if (tk == Inc || tk == Dec) { 288 | if (*e == LC) { *e = PSH; *++e = LC; } 289 | else if (*e == LI) { *e = PSH; *++e = LI; } 290 | else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } 291 | *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); 292 | *++e = (tk == Inc) ? ADD : SUB; 293 | *++e = (ty == CHAR) ? SC : SI; 294 | *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); 295 | *++e = (tk == Inc) ? SUB : ADD; 296 | next(); 297 | } 298 | else if (tk == Brak) { 299 | next(); *++e = PSH; expr(Assign); 300 | if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } 301 | if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } 302 | else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } 303 | *++e = ADD; 304 | *++e = ((ty = t - PTR) == CHAR) ? LC : LI; 305 | } 306 | else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } 307 | } 308 | } 309 | 310 | void stmt() { 311 | int *a, *b; 312 | // 陳述 statement 313 | if (tk == If) { // if 語句 314 | next(); 315 | if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } 316 | expr(Assign); 317 | if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } 318 | *++e = BZ; b = ++e; 319 | stmt(); 320 | if (tk == Else) { // else 語句 321 | *b = (int)(e + 3); *++e = JMP; b = ++e; 322 | next(); 323 | stmt(); 324 | } 325 | *b = (int)(e + 1); 326 | } 327 | else if (tk == While) { // while 語句 328 | next(); 329 | a = e + 1; 330 | if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } 331 | expr(Assign); 332 | if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } 333 | *++e = BZ; b = ++e; 334 | stmt(); 335 | *++e = JMP; *++e = (int)a; 336 | *b = (int)(e + 1); 337 | } 338 | else if (tk == Return) { // return 語句 339 | next(); 340 | if (tk != ';') expr(Assign); 341 | *++e = LEV; 342 | if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } 343 | } 344 | else if (tk == '{') { // 區塊 {...} 345 | next(); 346 | while (tk != '}') stmt(); 347 | next(); 348 | } 349 | else if (tk == ';') { // ; 空陳述 350 | next(); 351 | } 352 | else { // 指定 assign 353 | expr(Assign); 354 | if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } 355 | } 356 | } 357 | 358 | int prog() { 359 | int bt, i; 360 | // 編譯整個程式 Program 361 | line = 1; 362 | next(); 363 | while (tk) { 364 | bt = INT; // basetype 365 | if (tk == Int) next(); 366 | else if (tk == Char) { next(); bt = CHAR; } 367 | else if (tk == Enum) { // enum Id? {... 列舉 368 | next(); 369 | if (tk != '{') next(); // 略過 Id 370 | if (tk == '{') { 371 | next(); 372 | i = 0; // 紀錄 enum 的目前值 373 | while (tk != '}') { 374 | if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; } 375 | next(); 376 | if (tk == Assign) { // 有 Id=Num 的情況 377 | next(); 378 | if (tk != Num) { printf("%d: bad enum initializer\n", line); return -1; } 379 | i = ival; 380 | next(); 381 | } 382 | id[Class] = Num; id[Type] = INT; id[Val] = i++; 383 | if (tk == ',') next(); 384 | } 385 | next(); 386 | } 387 | } 388 | while (tk != ';' && tk != '}') { // 掃描直到區塊結束 389 | ty = bt; 390 | while (tk == Mul) { next(); ty = ty + PTR; } 391 | if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } 392 | if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } // id.Class 已經存在,重複宣告了! 393 | next(); 394 | id[Type] = ty; 395 | if (tk == '(') { // function 函數定義 ex: int f( ... 396 | id[Class] = Fun; 397 | id[Val] = (int)(e + 1); 398 | next(); i = 0; 399 | while (tk != ')') { // 掃描參數直到 ...) 400 | ty = INT; 401 | if (tk == Int) next(); 402 | else if (tk == Char) { next(); ty = CHAR; } 403 | while (tk == Mul) { next(); ty = ty + PTR; } 404 | if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } 405 | if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } // 這裡的 id 會指向 hash 搜尋過的 symTable 裏的那個 (在 next 裏處理的),所以若是該 id 已經是 Local,那麼就重複了! 406 | // 把 id.Class, id.Type, id.Val 暫存到 id.HClass, id.HType, id.Hval ,因為 Local 優先於 Global 407 | id[HClass] = id[Class]; id[Class] = Loc; 408 | id[HType] = id[Type]; id[Type] = ty; 409 | id[HVal] = id[Val]; id[Val] = i++; 410 | next(); 411 | if (tk == ',') next(); 412 | } 413 | next(); 414 | if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } // BODY 開始 {... 415 | loc = ++i; 416 | next(); 417 | while (tk == Int || tk == Char) { // 宣告 418 | bt = (tk == Int) ? INT : CHAR; 419 | next(); 420 | while (tk != ';') { 421 | ty = bt; 422 | while (tk == Mul) { next(); ty = ty + PTR; } 423 | if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } 424 | if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } 425 | // 把 id.Class, id.Type, id.Val 暫存到 id.HClass, id.HType, id.Hval ,因為 Local 優先於 Global 426 | id[HClass] = id[Class]; id[Class] = Loc; 427 | id[HType] = id[Type]; id[Type] = ty; 428 | id[HVal] = id[Val]; id[Val] = ++i; 429 | next(); 430 | if (tk == ',') next(); 431 | } 432 | next(); 433 | } 434 | *++e = ENT; *++e = i - loc; 435 | while (tk != '}') stmt(); 436 | *++e = LEV; 437 | id = sym; // unwind symbol table locals (把被區域變數隱藏掉的那些 Local id 還原,恢復全域變數的符號定義) 438 | while (id[Tk]) { 439 | if (id[Class] == Loc) { 440 | id[Class] = id[HClass]; 441 | id[Type] = id[HType]; 442 | id[Val] = id[HVal]; 443 | } 444 | id = id + Idsz; 445 | } 446 | } 447 | else { 448 | id[Class] = Glo; 449 | id[Val] = (int)datap; 450 | datap = datap + sizeof(int); 451 | } 452 | if (tk == ',') next(); 453 | } 454 | next(); 455 | } 456 | return 0; 457 | } 458 | 459 | int compile(int fd) { 460 | int i, *t; 461 | // 編譯器 462 | p = "char else enum if int return sizeof while " 463 | "open read write close printf malloc free memset memcmp exit void main"; 464 | i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table 465 | i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table 466 | next(); id[Tk] = Char; // handle void type 467 | next(); idmain = id; // keep track of main 468 | 469 | if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } 470 | if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; } 471 | p[i] = 0; // 設定程式 p 字串結束符號 \0 472 | 473 | return prog(); 474 | } 475 | 476 | int run(int *pc, int *bp, int *sp) { 477 | int a, cycle; // a: 累積器, cycle: 執行指令數 478 | int i, *t; // temps 479 | // 虛擬機 => pc: 程式計數器, sp: 堆疊暫存器, bp: 框架暫存器 480 | cycle = 0; 481 | while (1) { 482 | i = *pc++; ++cycle; 483 | if (debug) { 484 | printInstr(pc-2, code, data); // pc-2, 因為已經 pc++ 過了,而 printInstr 又是落後一個的情況。 485 | } 486 | if (i == LEA) a = (int)(bp + *pc++); // load local address 載入區域變數 487 | else if (i == IMM) a = *pc++; // load immediate 載入立即值 488 | else if (i == ADDR) { a = *pc; pc++; } // load address 載入位址 489 | else if (i == JMP) pc = (int *)*pc; // jump 躍躍指令 490 | else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine 跳到副程式 491 | else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero if (a==0) goto m[pc] 492 | else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero if (a!=0) goto m[pc] 493 | else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine 進入副程式 494 | else if (i == ADJ) sp = sp + *pc++; // stack adjust 調整堆疊 495 | else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine 離開副程式 496 | else if (i == LI) a = *(int *)a; // load int 載入整數 497 | else if (i == LC) a = *(char *)a; // load char 載入字元 498 | else if (i == SI) *(int *)*sp++ = a; // store int 儲存整數 499 | else if (i == SC) a = *(char *)*sp++ = a; // store char 儲存字元 500 | else if (i == PSH) *--sp = a; // push 推入堆疊 501 | 502 | else if (i == OR) a = *sp++ | a; // a = a OR *sp 503 | else if (i == XOR) a = *sp++ ^ a; // a = a XOR *sp 504 | else if (i == AND) a = *sp++ & a; // ... 505 | else if (i == EQ) a = *sp++ == a; 506 | else if (i == NE) a = *sp++ != a; 507 | else if (i == LT) a = *sp++ < a; 508 | else if (i == GT) a = *sp++ > a; 509 | else if (i == LE) a = *sp++ <= a; 510 | else if (i == GE) a = *sp++ >= a; 511 | else if (i == SHL) a = *sp++ << a; 512 | else if (i == SHR) a = *sp++ >> a; 513 | else if (i == ADD) a = *sp++ + a; 514 | else if (i == SUB) a = *sp++ - a; 515 | else if (i == MUL) a = *sp++ * a; 516 | else if (i == DIV) a = *sp++ / a; 517 | else if (i == MOD) a = *sp++ % a; 518 | 519 | else if (i == OPEN) a = open((char *)sp[1], *sp); // 開檔 520 | else if (i == READ) a = read(sp[2], (char *)sp[1], *sp); // 讀檔 521 | else if (i == WRIT) a = write(sp[2], (char *)sp[1], *sp); // 寫檔 522 | else if (i == CLOS) a = close(*sp); // 關檔 523 | else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); } // printf("....", a, b, c, d, e) 524 | else if (i == MALC) a = (int)malloc(*sp); // 分配記憶體 525 | else if (i == FREE) free((void *)*sp); // 釋放記憶體 526 | else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp); // 設定記憶體 527 | else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp); // 比較記憶體 528 | else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; } // EXIT 離開 529 | else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; } // 錯誤處理 530 | } 531 | } 532 | 533 | int vm(int argc, char **argv) { 534 | int *t; 535 | // 虛擬機: setup stack 536 | bp = sp = (int *)((int)sp + poolsz); 537 | *--sp = EXIT; // call exit if main returns 538 | *--sp = PSH; t = sp; 539 | *--sp = argc; // 把 argc,argv 放入堆疊,這樣 main(argc,argv) 才能取得到 540 | *--sp = (int)argv; 541 | *--sp = (int)t; // 推入返回點,於是最後 RET 時會跳回 t=sp 指定的位址,接著呼叫 EXIT 離開。 542 | return run(pc, bp, sp); 543 | } 544 | 545 | int obj_relocate(int *code, int codeLen, int *pcode1, char *pdata1, int *pcode2, char *pdata2) { 546 | int *p, ir; 547 | // 程式段機器碼重定位 548 | p=code; 549 | while (p 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; } 620 | if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; } 621 | if (argc > 0 && **argv == '-' && (*argv)[1] == 'r') { o_run = 1; --argc; ++argv; } 622 | if (argc > 0 && **argv == '-' && (*argv)[1] == 'u') { o_dump = 1; --argc; ++argv; } 623 | if (argc < 1) { printf("usage: c6 [-s] [-d] [-r] [-u] in_file [-o] out_file...\n"); return -1; } 624 | iFile = *argv; 625 | if (argc > 1) { 626 | narg = *(argv+1); 627 | if (*narg == '-' && narg[1] == 'o') { 628 | o_save = 1; 629 | oFile = *(argv+2); 630 | } 631 | } 632 | if ((fd = open(iFile, 0100000)) < 0) { // 0100000 代表以 BINARY mode 開啟 (Windows 中預設為 TEXT mode) 633 | printf("could not open(%s)\n", iFile); 634 | return -1; 635 | } 636 | 637 | poolsz = 256*1024; // 最大記憶體大小 (程式碼/資料/堆疊/符號表) 638 | if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; } // 符號段 639 | if (!(code = le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; } // 程式段 640 | if (!(data = datap = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } // 資料段 641 | if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; } // 堆疊段 642 | 643 | memset(sym, 0, poolsz); 644 | memset(e, 0, poolsz); 645 | memset(data, 0, poolsz); 646 | 647 | op = "LEA ,IMM ,ADDR,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," 648 | "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," 649 | "OPEN,READ,WRIT,CLOS,PRTF,MALC,FREE,MSET,MCMP,EXIT,"; 650 | if (o_dump) { // -u: 印出目的檔 651 | obj_load(fd); 652 | obj_dump(pc, code, codeLen, data, dataLen); 653 | return 0; 654 | } 655 | if (o_run) { // -r: 執行目的檔 656 | obj_load(fd); 657 | vm(argc, argv); 658 | return 0; 659 | } 660 | if (compile(fd)==-1) return -1; // 編譯 661 | if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; } 662 | if (src) return 0; // 編譯並列印,不執行 663 | if (o_save) { // -o 輸出目的檔,但不執行 664 | obj_save(oFile, pc, code, e-code, data, datap-data); 665 | printf("Compile %s success!\nOutput: %s\n", iFile, oFile); 666 | return 0; 667 | } 668 | close(fd); 669 | vm(argc, argv); // 用虛擬機執行編譯出來的碼 670 | } -------------------------------------------------------------------------------- /_more/c6_macos.c: -------------------------------------------------------------------------------- 1 | // c6.c - A mini compiler derived from the c4 by Robert Swierczek 2 | // 修改者: 陳鍾誠 (模組化+中文註解+目的檔處理) 3 | // char, int, and pointer types 4 | // if, while, return, and expression statements 5 | // just enough features to allow self-compilation and a bit more 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #define int int64_t // c6 要求 int 與處理器位址同樣長度,因此將 int 定義為 64 位元整數。 13 | 14 | char *p, *lp, // current position in source code (p: 目前原始碼指標, lp: 上一行原始碼指標) 15 | *data,*datap, // data/bss pointer (資料段機器碼指標) 16 | *op; // 指令字串列表 17 | 18 | int *e, *le, *code, // current position in emitted code (e: 目前機器碼指標, le: 上一行機器碼指標) 19 | *id, // currently parsed identifier (id: 目前的 id) 20 | *sym, // symbol table (simple list of identifiers) (符號表) 21 | tk, // current token (目前 token) 22 | ival, // current token value (目前的 token 值) 23 | ty, // current expression type (目前的運算式型態) 24 | loc, // local variable offset (區域變數的位移) 25 | line, // current line number (目前行號) 26 | src, // print source and assembly flag (印出原始碼) 27 | debug, // print executed instructions (印出執行指令 -- 除錯模式) 28 | o_run, // 執行目的檔 29 | o_save, // 反組譯目的檔 30 | o_dump; // 傾印目的檔 31 | 32 | int fd, poolsz, *idmain; 33 | int *pc, *bp, *sp, codeLen, dataLen; 34 | 35 | // tokens and classes (operators last and in precedence order) (按優先權順序排列) 36 | enum { // token : 0-127 直接用該字母表達, 128 以後用代號。 37 | Num = 128, Fun, Sys, Glo, Loc, Id, 38 | Char, Else, Enum, If, Int, Return, Sizeof, While, 39 | Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak 40 | }; 41 | 42 | // opcodes (機器碼的 op) 43 | enum { LEA ,IMM ,ADDR,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH , 44 | OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD , 45 | OPEN,READ,WRIT,CLOS,PRTF,MALC,FREE,MSET,MCMP,EXIT }; 46 | 47 | // types (支援型態,只有 int, char, pointer) 48 | enum { CHAR, INT, PTR }; 49 | 50 | // 因為沒有 struct,所以使用 offset 代替,例如 id[Tk] 代表 id.Tk (token), id[Hash] 代表 id.Hash, id[Name] 代表 id.Name, ..... 51 | // identifier offsets (since we can't create an ident struct) 52 | enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; // HClass, HType, HVal 是暫存的備份 ??? 53 | 54 | int stepInstr(int *p) { 55 | // 傳回下一個指令大小:ADJ 之前有一個參數,之後沒有參數。 56 | if (*++p <= ADJ) return 2; else return 1; 57 | } 58 | 59 | void printInstr(int *p, int *code, char *data) { 60 | int ir, arg; 61 | // 印出下一個指令 62 | ir = *++p; 63 | printf(" %4X:%X %8.4s", p-code, p, &op[ir * 5]); 64 | if (ir <= ADJ) { // ADJ 之前的指令有一個參數 65 | arg = *++p; 66 | if (ir==JSR || ir==JMP || ir==BZ || ir==BNZ) { 67 | if (arg==0) printf("0?\n"); else printf(" %X:%X\n", (int*)arg-code, (int*)arg); 68 | } else if (ir==ADDR) 69 | printf(" %X:%X\n", (char*)arg-data, arg); 70 | else 71 | printf(" %d\n", arg); 72 | } else { // ADJ 之後的指令沒有任何參數 73 | printf("\n"); 74 | } 75 | } 76 | 77 | void next() { 78 | char *pp; 79 | // 詞彙解析 lexer 80 | while (tk = *p) { 81 | ++p; 82 | if (tk == '\n') { // 換行 83 | if (src) { 84 | printf("%d: %.*s", line, p - lp, lp); // 印出該行 85 | lp = p; // lp = p = 新一行的原始碼開頭 86 | while (le < e) { // 印出上一行的所有目的碼 87 | printInstr(le, code, data); 88 | le = le + stepInstr(le); 89 | } 90 | } 91 | ++line; 92 | } 93 | else if (tk == '#') { // 取得 #include 這類的一整行 94 | while (*p != 0 && *p != '\n') ++p; 95 | } 96 | else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { // 取得變數名稱 97 | pp = p - 1; 98 | while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') 99 | tk = tk * 147 + *p++; // 計算雜湊值 100 | tk = (tk << 6) + (p - pp); // 符號表的雜湊位址 ?? 101 | id = sym; 102 | while (id[Tk]) { // 檢查是否碰撞 ? 103 | if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } // 沒碰撞就傳回 token 104 | id = id + Idsz; // 碰撞,前進到下一格。 105 | } 106 | id[Name] = (int)pp; // id.Name = ptr(變數名稱) 107 | id[Hash] = tk; // id.Hash = 雜湊值 108 | tk = id[Tk] = Id; // token = id.Tk = Id 109 | return; 110 | } 111 | else if (tk >= '0' && tk <= '9') { // 取得數字串 112 | if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; } // 十進位 113 | else if (*p == 'x' || *p == 'X') { // 十六進位 114 | while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F'))) // 16 進位 115 | ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0); 116 | } 117 | else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; } // 八進位 118 | tk = Num; // token = Number 119 | return; 120 | } 121 | else if (tk == '/') { 122 | if (*p == '/') { // 註解 123 | ++p; 124 | while (*p != 0 && *p != '\n') ++p; // 略過註解 125 | } 126 | else { // 除法 127 | tk = Div; 128 | return; 129 | } 130 | } 131 | else if (tk == '\'' || tk == '"') { // 字元或字串 132 | pp = datap; 133 | while (*p != 0 && *p != tk) { 134 | if ((ival = *p++) == '\\') { 135 | if ((ival = *p++) == 'n') ival = '\n'; // 處理 \n 的特殊情況 136 | } 137 | if (tk == '"') *datap++ = ival; // 把字串塞到資料段裏 138 | } 139 | ++p; 140 | if (tk == '"') ival = (int)pp; else tk = Num; // (若是字串) ? (ival = 字串 (在資料段中的) 指標) : (字元值) 141 | return; 142 | } // 以下為運算元 =+-!<>|&^%*[?~, ++, --, !=, <=, >=, ||, &&, ~ ;{}()],: 143 | else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; } 144 | else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; } 145 | else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; } 146 | else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; } 147 | else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; } 148 | else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; } 149 | else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; } 150 | else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; } 151 | else if (tk == '^') { tk = Xor; return; } 152 | else if (tk == '%') { tk = Mod; return; } 153 | else if (tk == '*') { tk = Mul; return; } 154 | else if (tk == '[') { tk = Brak; return; } 155 | else if (tk == '?') { tk = Cond; return; } 156 | else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return; 157 | } 158 | } 159 | 160 | void expr(int lev) { 161 | int t, *d; 162 | // 運算式 expression, 其中 lev 代表優先等級 163 | if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } // EOF 164 | else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; } // 數值 165 | else if (tk == '"') { // 字串 166 | *++e = ADDR; *++e = ival; next(); 167 | while (tk == '"') next(); 168 | datap = (char *)((int)datap + sizeof(int) & -sizeof(int)); ty = PTR; // 用 int 為大小對齊 ?? 169 | } 170 | else if (tk == Sizeof) { // 處理 sizeof(type) ,其中 type 可能為 char, int 或 ptr 171 | next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } 172 | ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } 173 | while (tk == Mul) { next(); ty = ty + PTR; } 174 | if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } 175 | *++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int); 176 | ty = INT; 177 | } 178 | else if (tk == Id) { // 處理 id ... 179 | d = id; next(); 180 | if (tk == '(') { // id (args) ,這是 call 181 | next(); 182 | t = 0; 183 | while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); } // 推入參數 184 | next(); 185 | // d[Class] 可能為 Num = 128, Fun, Sys, Glo, Loc, ... 186 | if (d[Class] == Sys) *++e = d[Val]; // token 是系統呼叫,直接呼叫之... 187 | else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; } // token 是自訂函數,用 JSR : jump to subroutine 指令呼叫 188 | else { printf("%d: bad function call\n", line); exit(-1); } 189 | if (t) { *++e = ADJ; *++e = t; } // 有參數,要調整堆疊 (ADJ : stack adjust) 190 | ty = d[Type]; 191 | } 192 | else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; } // 該 id 是數值 193 | else { 194 | if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; } // 該 id 是區域變數,載入區域變數 (LEA : load local address) 195 | else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; } // 該 id 是全域變數,載入該全域變數 (IMM : load global address or immediate 載入全域變數或立即值) 196 | else { printf("%d: undefined variable\n", line); exit(-1); } 197 | *++e = ((ty = d[Type]) == CHAR) ? LC : LI; // LI : load int, LC : load char 198 | } 199 | } 200 | else if (tk == '(') { // (E) : 有括號的運算式 ... 201 | next(); 202 | if (tk == Int || tk == Char) { 203 | t = (tk == Int) ? INT : CHAR; next(); 204 | while (tk == Mul) { next(); t = t + PTR; } 205 | if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } 206 | expr(Inc); // 處理 ++, -- 的情況 207 | ty = t; 208 | } 209 | else { 210 | expr(Assign); // 處理 (E) 中的 E (E 運算式必須能處理 (t=x) op y 的情況,所以用 expr(Assign)) 211 | if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } 212 | } 213 | } 214 | else if (tk == Mul) { // * 乘法 215 | next(); expr(Inc); 216 | if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } 217 | *++e = (ty == CHAR) ? LC : LI; 218 | } 219 | else if (tk == And) { // & AND 220 | next(); expr(Inc); 221 | if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); } 222 | ty = ty + PTR; 223 | } 224 | else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; } // NOT 225 | else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; } // Logical NOT 226 | else if (tk == Add) { next(); expr(Inc); ty = INT; } 227 | else if (tk == Sub) { 228 | next(); *++e = IMM; 229 | if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; } // -Num or -E 230 | ty = INT; 231 | } 232 | else if (tk == Inc || tk == Dec) { // ++ or -- 233 | t = tk; next(); expr(Inc); 234 | if (*e == LC) { *e = PSH; *++e = LC; } 235 | else if (*e == LI) { *e = PSH; *++e = LI; } 236 | else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } 237 | *++e = PSH; 238 | *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); 239 | *++e = (t == Inc) ? ADD : SUB; 240 | *++e = (ty == CHAR) ? SC : SI; 241 | } 242 | else { printf("%d: bad expression\n", line); exit(-1); } 243 | // 參考: https://en.wikipedia.org/wiki/Operator-precedence_parser, https://www.cnblogs.com/rubylouvre/archive/2012/09/08/2657682.html https://web.archive.org/web/20151223215421/http://hall.org.ua/halls/wizzard/pdf/Vaughan.Pratt.TDOP.pdf 244 | while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method 245 | t = ty; 246 | if (tk == Assign) { 247 | next(); 248 | if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); } 249 | expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI; 250 | } 251 | else if (tk == Cond) { 252 | next(); 253 | *++e = BZ; d = ++e; 254 | expr(Assign); 255 | if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } 256 | *d = (int)(e + 3); *++e = JMP; d = ++e; 257 | expr(Cond); 258 | *d = (int)(e + 1); 259 | } 260 | else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; } 261 | else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; } 262 | else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; } 263 | else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; } 264 | else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; } 265 | else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; } 266 | else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; } 267 | else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; } 268 | else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; } 269 | else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; } 270 | else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; } 271 | else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; } 272 | else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; } 273 | else if (tk == Add) { 274 | next(); *++e = PSH; expr(Mul); 275 | if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } 276 | *++e = ADD; 277 | } 278 | else if (tk == Sub) { 279 | next(); *++e = PSH; expr(Mul); 280 | if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; } 281 | else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; } 282 | else *++e = SUB; 283 | } 284 | else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; } 285 | else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; } 286 | else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; } 287 | else if (tk == Inc || tk == Dec) { 288 | if (*e == LC) { *e = PSH; *++e = LC; } 289 | else if (*e == LI) { *e = PSH; *++e = LI; } 290 | else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } 291 | *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); 292 | *++e = (tk == Inc) ? ADD : SUB; 293 | *++e = (ty == CHAR) ? SC : SI; 294 | *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); 295 | *++e = (tk == Inc) ? SUB : ADD; 296 | next(); 297 | } 298 | else if (tk == Brak) { 299 | next(); *++e = PSH; expr(Assign); 300 | if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } 301 | if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } 302 | else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } 303 | *++e = ADD; 304 | *++e = ((ty = t - PTR) == CHAR) ? LC : LI; 305 | } 306 | else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } 307 | } 308 | } 309 | 310 | void stmt() { 311 | int *a, *b; 312 | // 陳述 statement 313 | if (tk == If) { // if 語句 314 | next(); 315 | if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } 316 | expr(Assign); 317 | if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } 318 | *++e = BZ; b = ++e; 319 | stmt(); 320 | if (tk == Else) { // else 語句 321 | *b = (int)(e + 3); *++e = JMP; b = ++e; 322 | next(); 323 | stmt(); 324 | } 325 | *b = (int)(e + 1); 326 | } 327 | else if (tk == While) { // while 語句 328 | next(); 329 | a = e + 1; 330 | if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } 331 | expr(Assign); 332 | if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } 333 | *++e = BZ; b = ++e; 334 | stmt(); 335 | *++e = JMP; *++e = (int)a; 336 | *b = (int)(e + 1); 337 | } 338 | else if (tk == Return) { // return 語句 339 | next(); 340 | if (tk != ';') expr(Assign); 341 | *++e = LEV; 342 | if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } 343 | } 344 | else if (tk == '{') { // 區塊 {...} 345 | next(); 346 | while (tk != '}') stmt(); 347 | next(); 348 | } 349 | else if (tk == ';') { // ; 空陳述 350 | next(); 351 | } 352 | else { // 指定 assign 353 | expr(Assign); 354 | if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } 355 | } 356 | } 357 | 358 | int prog() { 359 | int bt, i; 360 | // 編譯整個程式 Program 361 | line = 1; 362 | next(); 363 | while (tk) { 364 | bt = INT; // basetype 365 | if (tk == Int) next(); 366 | else if (tk == Char) { next(); bt = CHAR; } 367 | else if (tk == Enum) { // enum Id? {... 列舉 368 | next(); 369 | if (tk != '{') next(); // 略過 Id 370 | if (tk == '{') { 371 | next(); 372 | i = 0; // 紀錄 enum 的目前值 373 | while (tk != '}') { 374 | if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; } 375 | next(); 376 | if (tk == Assign) { // 有 Id=Num 的情況 377 | next(); 378 | if (tk != Num) { printf("%d: bad enum initializer\n", line); return -1; } 379 | i = ival; 380 | next(); 381 | } 382 | id[Class] = Num; id[Type] = INT; id[Val] = i++; 383 | if (tk == ',') next(); 384 | } 385 | next(); 386 | } 387 | } 388 | while (tk != ';' && tk != '}') { // 掃描直到區塊結束 389 | ty = bt; 390 | while (tk == Mul) { next(); ty = ty + PTR; } 391 | if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } 392 | if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } // id.Class 已經存在,重複宣告了! 393 | next(); 394 | id[Type] = ty; 395 | if (tk == '(') { // function 函數定義 ex: int f( ... 396 | id[Class] = Fun; 397 | id[Val] = (int)(e + 1); 398 | next(); i = 0; 399 | while (tk != ')') { // 掃描參數直到 ...) 400 | ty = INT; 401 | if (tk == Int) next(); 402 | else if (tk == Char) { next(); ty = CHAR; } 403 | while (tk == Mul) { next(); ty = ty + PTR; } 404 | if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } 405 | if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } // 這裡的 id 會指向 hash 搜尋過的 symTable 裏的那個 (在 next 裏處理的),所以若是該 id 已經是 Local,那麼就重複了! 406 | // 把 id.Class, id.Type, id.Val 暫存到 id.HClass, id.HType, id.Hval ,因為 Local 優先於 Global 407 | id[HClass] = id[Class]; id[Class] = Loc; 408 | id[HType] = id[Type]; id[Type] = ty; 409 | id[HVal] = id[Val]; id[Val] = i++; 410 | next(); 411 | if (tk == ',') next(); 412 | } 413 | next(); 414 | if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } // BODY 開始 {... 415 | loc = ++i; 416 | next(); 417 | while (tk == Int || tk == Char) { // 宣告 418 | bt = (tk == Int) ? INT : CHAR; 419 | next(); 420 | while (tk != ';') { 421 | ty = bt; 422 | while (tk == Mul) { next(); ty = ty + PTR; } 423 | if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } 424 | if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } 425 | // 把 id.Class, id.Type, id.Val 暫存到 id.HClass, id.HType, id.Hval ,因為 Local 優先於 Global 426 | id[HClass] = id[Class]; id[Class] = Loc; 427 | id[HType] = id[Type]; id[Type] = ty; 428 | id[HVal] = id[Val]; id[Val] = ++i; 429 | next(); 430 | if (tk == ',') next(); 431 | } 432 | next(); 433 | } 434 | *++e = ENT; *++e = i - loc; 435 | while (tk != '}') stmt(); 436 | *++e = LEV; 437 | id = sym; // unwind symbol table locals (把被區域變數隱藏掉的那些 Local id 還原,恢復全域變數的符號定義) 438 | while (id[Tk]) { 439 | if (id[Class] == Loc) { 440 | id[Class] = id[HClass]; 441 | id[Type] = id[HType]; 442 | id[Val] = id[HVal]; 443 | } 444 | id = id + Idsz; 445 | } 446 | } 447 | else { 448 | id[Class] = Glo; 449 | id[Val] = (int)datap; 450 | datap = datap + sizeof(int); 451 | } 452 | if (tk == ',') next(); 453 | } 454 | next(); 455 | } 456 | return 0; 457 | } 458 | 459 | int compile(int fd) { 460 | int i, *t; 461 | // 編譯器 462 | p = "char else enum if int return sizeof while " 463 | "open read write close printf malloc free memset memcmp exit void main"; 464 | i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table 465 | i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table 466 | next(); id[Tk] = Char; // handle void type 467 | next(); idmain = id; // keep track of main 468 | 469 | if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } 470 | if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; } 471 | p[i] = 0; // 設定程式 p 字串結束符號 \0 472 | 473 | return prog(); 474 | } 475 | 476 | int run(int *pc, int *bp, int *sp) { 477 | int a, cycle; // a: 累積器, cycle: 執行指令數 478 | int i, *t; // temps 479 | // 虛擬機 => pc: 程式計數器, sp: 堆疊暫存器, bp: 框架暫存器 480 | cycle = 0; 481 | while (1) { 482 | i = *pc++; ++cycle; 483 | if (debug) { 484 | printInstr(pc-2, code, data); // pc-2, 因為已經 pc++ 過了,而 printInstr 又是落後一個的情況。 485 | } 486 | if (i == LEA) a = (int)(bp + *pc++); // load local address 載入區域變數 487 | else if (i == IMM) a = *pc++; // load immediate 載入立即值 488 | else if (i == ADDR) { a = *pc; pc++; } // load address 載入位址 489 | else if (i == JMP) pc = (int *)*pc; // jump 躍躍指令 490 | else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine 跳到副程式 491 | else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero if (a==0) goto m[pc] 492 | else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero if (a!=0) goto m[pc] 493 | else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine 進入副程式 494 | else if (i == ADJ) sp = sp + *pc++; // stack adjust 調整堆疊 495 | else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine 離開副程式 496 | else if (i == LI) a = *(int *)a; // load int 載入整數 497 | else if (i == LC) a = *(char *)a; // load char 載入字元 498 | else if (i == SI) *(int *)*sp++ = a; // store int 儲存整數 499 | else if (i == SC) a = *(char *)*sp++ = a; // store char 儲存字元 500 | else if (i == PSH) *--sp = a; // push 推入堆疊 501 | 502 | else if (i == OR) a = *sp++ | a; // a = a OR *sp 503 | else if (i == XOR) a = *sp++ ^ a; // a = a XOR *sp 504 | else if (i == AND) a = *sp++ & a; // ... 505 | else if (i == EQ) a = *sp++ == a; 506 | else if (i == NE) a = *sp++ != a; 507 | else if (i == LT) a = *sp++ < a; 508 | else if (i == GT) a = *sp++ > a; 509 | else if (i == LE) a = *sp++ <= a; 510 | else if (i == GE) a = *sp++ >= a; 511 | else if (i == SHL) a = *sp++ << a; 512 | else if (i == SHR) a = *sp++ >> a; 513 | else if (i == ADD) a = *sp++ + a; 514 | else if (i == SUB) a = *sp++ - a; 515 | else if (i == MUL) a = *sp++ * a; 516 | else if (i == DIV) a = *sp++ / a; 517 | else if (i == MOD) a = *sp++ % a; 518 | 519 | else if (i == OPEN) a = open((char *)sp[1], *sp); // 開檔 520 | else if (i == READ) a = read(sp[2], (char *)sp[1], *sp); // 讀檔 521 | else if (i == WRIT) a = write(sp[2], (char *)sp[1], *sp); // 寫檔 522 | else if (i == CLOS) a = close(*sp); // 關檔 523 | else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); } // printf("....", a, b, c, d, e) 524 | else if (i == MALC) a = (int)malloc(*sp); // 分配記憶體 525 | else if (i == FREE) free((void *)*sp); // 釋放記憶體 526 | else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp); // 設定記憶體 527 | else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp); // 比較記憶體 528 | else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; } // EXIT 離開 529 | else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; } // 錯誤處理 530 | } 531 | } 532 | 533 | int vm(int argc, char **argv) { 534 | int *t; 535 | // 虛擬機: setup stack 536 | bp = sp = (int *)((int)sp + poolsz); 537 | *--sp = EXIT; // call exit if main returns 538 | *--sp = PSH; t = sp; 539 | *--sp = argc; // 把 argc,argv 放入堆疊,這樣 main(argc,argv) 才能取得到 540 | *--sp = (int)argv; 541 | *--sp = (int)t; // 推入返回點,於是最後 RET 時會跳回 t=sp 指定的位址,接著呼叫 EXIT 離開。 542 | return run(pc, bp, sp); 543 | } 544 | 545 | int obj_relocate(int *code, int codeLen, int *pcode1, char *pdata1, int *pcode2, char *pdata2) { 546 | int *p, ir; 547 | // 程式段機器碼重定位 548 | p=code; 549 | while (p 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; } 620 | if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; } 621 | if (argc > 0 && **argv == '-' && (*argv)[1] == 'r') { o_run = 1; --argc; ++argv; } 622 | if (argc > 0 && **argv == '-' && (*argv)[1] == 'u') { o_dump = 1; --argc; ++argv; } 623 | if (argc < 1) { printf("usage: c6 [-s] [-d] [-r] [-u] in_file [-o] out_file...\n"); return -1; } 624 | iFile = *argv; 625 | if (argc > 1) { 626 | narg = *(argv+1); 627 | if (*narg == '-' && narg[1] == 'o') { 628 | o_save = 1; 629 | oFile = *(argv+2); 630 | } 631 | } 632 | if ((fd = open(iFile, 0100000)) < 0) { // 0100000 代表以 BINARY mode 開啟 (Windows 中預設為 TEXT mode) 633 | printf("could not open(%s)\n", iFile); 634 | return -1; 635 | } 636 | 637 | poolsz = 256*1024; // 最大記憶體大小 (程式碼/資料/堆疊/符號表) 638 | if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; } // 符號段 639 | if (!(code = le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; } // 程式段 640 | if (!(data = datap = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } // 資料段 641 | if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; } // 堆疊段 642 | 643 | memset(sym, 0, poolsz); 644 | memset(e, 0, poolsz); 645 | memset(data, 0, poolsz); 646 | 647 | op = "LEA ,IMM ,ADDR,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," 648 | "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," 649 | "OPEN,READ,WRIT,CLOS,PRTF,MALC,FREE,MSET,MCMP,EXIT,"; 650 | if (o_dump) { // -u: 印出目的檔 651 | obj_load(fd); 652 | obj_dump(pc, code, codeLen, data, dataLen); 653 | return 0; 654 | } 655 | if (o_run) { // -r: 執行目的檔 656 | obj_load(fd); 657 | vm(argc, argv); 658 | return 0; 659 | } 660 | if (compile(fd)==-1) return -1; // 編譯 661 | if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; } 662 | if (src) return 0; // 編譯並列印,不執行 663 | if (o_save) { // -o 輸出目的檔,但不執行 664 | obj_save(oFile, pc, code, e-code, data, datap-data); 665 | printf("Compile %s success!\nOutput: %s\n", iFile, oFile); 666 | return 0; 667 | } 668 | close(fd); 669 | vm(argc, argv); // 用虛擬機執行編譯出來的碼 670 | } 671 | -------------------------------------------------------------------------------- /_more/c6_win.c: -------------------------------------------------------------------------------- 1 | // c6.c - A mini compiler derived from the c4 by Robert Swierczek 2 | // 修改者: 陳鍾誠 (模組化+中文註解+目的檔處理) 3 | // char, int, and pointer types 4 | // if, while, return, and expression statements 5 | // just enough features to allow self-compilation and a bit more 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #define int long long // c6 要求 int 與處理器位址同樣長度,因此將 int 定義為 64 位元整數。 13 | 14 | char *p, *lp, // current position in source code (p: 目前原始碼指標, lp: 上一行原始碼指標) 15 | *data,*datap, // data/bss pointer (資料段機器碼指標) 16 | *op; // 指令字串列表 17 | 18 | int *e, *le, *code, // current position in emitted code (e: 目前機器碼指標, le: 上一行機器碼指標) 19 | *id, // currently parsed identifier (id: 目前的 id) 20 | *sym, // symbol table (simple list of identifiers) (符號表) 21 | tk, // current token (目前 token) 22 | ival, // current token value (目前的 token 值) 23 | ty, // current expression type (目前的運算式型態) 24 | loc, // local variable offset (區域變數的位移) 25 | line, // current line number (目前行號) 26 | src, // print source and assembly flag (印出原始碼) 27 | debug, // print executed instructions (印出執行指令 -- 除錯模式) 28 | o_run, // 執行目的檔 29 | o_save, // 反組譯目的檔 30 | o_dump; // 傾印目的檔 31 | 32 | int fd, poolsz, *idmain; 33 | int *pc, *bp, *sp, codeLen, dataLen; 34 | 35 | // tokens and classes (operators last and in precedence order) (按優先權順序排列) 36 | enum { // token : 0-127 直接用該字母表達, 128 以後用代號。 37 | Num = 128, Fun, Sys, Glo, Loc, Id, 38 | Char, Else, Enum, If, Int, Return, Sizeof, While, 39 | Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak 40 | }; 41 | 42 | // opcodes (機器碼的 op) 43 | enum { LEA ,IMM ,ADDR,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH , 44 | OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD , 45 | OPEN,READ,WRIT,CLOS,PRTF,MALC,FREE,MSET,MCMP,EXIT }; 46 | 47 | // types (支援型態,只有 int, char, pointer) 48 | enum { CHAR, INT, PTR }; 49 | 50 | // 因為沒有 struct,所以使用 offset 代替,例如 id[Tk] 代表 id.Tk (token), id[Hash] 代表 id.Hash, id[Name] 代表 id.Name, ..... 51 | // identifier offsets (since we can't create an ident struct) 52 | enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; // HClass, HType, HVal 是暫存的備份 ??? 53 | 54 | int stepInstr(int *p) { 55 | // 傳回下一個指令大小:ADJ 之前有一個參數,之後沒有參數。 56 | if (*++p <= ADJ) return 2; else return 1; 57 | } 58 | 59 | void printInstr(int *p, int *code, char *data) { 60 | int ir, arg; 61 | // 印出下一個指令 62 | ir = *++p; 63 | printf(" %4X:%X %8.4s", p-code, p, &op[ir * 5]); 64 | if (ir <= ADJ) { // ADJ 之前的指令有一個參數 65 | arg = *++p; 66 | if (ir==JSR || ir==JMP || ir==BZ || ir==BNZ) { 67 | if (arg==0) printf("0?\n"); else printf(" %X:%X\n", (int*)arg-code, (int*)arg); 68 | } else if (ir==ADDR) 69 | printf(" %X:%X\n", (char*)arg-data, arg); 70 | else 71 | printf(" %d\n", arg); 72 | } else { // ADJ 之後的指令沒有任何參數 73 | printf("\n"); 74 | } 75 | } 76 | 77 | void next() { 78 | char *pp; 79 | // 詞彙解析 lexer 80 | while (tk = *p) { 81 | ++p; 82 | if (tk == '\n') { // 換行 83 | if (src) { 84 | printf("%d: %.*s", line, p - lp, lp); // 印出該行 85 | lp = p; // lp = p = 新一行的原始碼開頭 86 | while (le < e) { // 印出上一行的所有目的碼 87 | printInstr(le, code, data); 88 | le = le + stepInstr(le); 89 | } 90 | } 91 | ++line; 92 | } 93 | else if (tk == '#') { // 取得 #include 這類的一整行 94 | while (*p != 0 && *p != '\n') ++p; 95 | } 96 | else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { // 取得變數名稱 97 | pp = p - 1; 98 | while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') 99 | tk = tk * 147 + *p++; // 計算雜湊值 100 | tk = (tk << 6) + (p - pp); // 符號表的雜湊位址 ?? 101 | id = sym; 102 | while (id[Tk]) { // 檢查是否碰撞 ? 103 | if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } // 沒碰撞就傳回 token 104 | id = id + Idsz; // 碰撞,前進到下一格。 105 | } 106 | id[Name] = (int)pp; // id.Name = ptr(變數名稱) 107 | id[Hash] = tk; // id.Hash = 雜湊值 108 | tk = id[Tk] = Id; // token = id.Tk = Id 109 | return; 110 | } 111 | else if (tk >= '0' && tk <= '9') { // 取得數字串 112 | if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; } // 十進位 113 | else if (*p == 'x' || *p == 'X') { // 十六進位 114 | while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F'))) // 16 進位 115 | ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0); 116 | } 117 | else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; } // 八進位 118 | tk = Num; // token = Number 119 | return; 120 | } 121 | else if (tk == '/') { 122 | if (*p == '/') { // 註解 123 | ++p; 124 | while (*p != 0 && *p != '\n') ++p; // 略過註解 125 | } 126 | else { // 除法 127 | tk = Div; 128 | return; 129 | } 130 | } 131 | else if (tk == '\'' || tk == '"') { // 字元或字串 132 | pp = datap; 133 | while (*p != 0 && *p != tk) { 134 | if ((ival = *p++) == '\\') { 135 | if ((ival = *p++) == 'n') ival = '\n'; // 處理 \n 的特殊情況 136 | } 137 | if (tk == '"') *datap++ = ival; // 把字串塞到資料段裏 138 | } 139 | ++p; 140 | if (tk == '"') ival = (int)pp; else tk = Num; // (若是字串) ? (ival = 字串 (在資料段中的) 指標) : (字元值) 141 | return; 142 | } // 以下為運算元 =+-!<>|&^%*[?~, ++, --, !=, <=, >=, ||, &&, ~ ;{}()],: 143 | else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; } 144 | else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; } 145 | else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; } 146 | else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; } 147 | else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; } 148 | else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; } 149 | else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; } 150 | else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; } 151 | else if (tk == '^') { tk = Xor; return; } 152 | else if (tk == '%') { tk = Mod; return; } 153 | else if (tk == '*') { tk = Mul; return; } 154 | else if (tk == '[') { tk = Brak; return; } 155 | else if (tk == '?') { tk = Cond; return; } 156 | else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return; 157 | } 158 | } 159 | 160 | void expr(int lev) { 161 | int t, *d; 162 | // 運算式 expression, 其中 lev 代表優先等級 163 | if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } // EOF 164 | else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; } // 數值 165 | else if (tk == '"') { // 字串 166 | *++e = ADDR; *++e = ival; next(); 167 | while (tk == '"') next(); 168 | datap = (char *)((int)datap + sizeof(int) & -sizeof(int)); ty = PTR; // 用 int 為大小對齊 ?? 169 | } 170 | else if (tk == Sizeof) { // 處理 sizeof(type) ,其中 type 可能為 char, int 或 ptr 171 | next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } 172 | ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } 173 | while (tk == Mul) { next(); ty = ty + PTR; } 174 | if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } 175 | *++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int); 176 | ty = INT; 177 | } 178 | else if (tk == Id) { // 處理 id ... 179 | d = id; next(); 180 | if (tk == '(') { // id (args) ,這是 call 181 | next(); 182 | t = 0; 183 | while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); } // 推入參數 184 | next(); 185 | // d[Class] 可能為 Num = 128, Fun, Sys, Glo, Loc, ... 186 | if (d[Class] == Sys) *++e = d[Val]; // token 是系統呼叫,直接呼叫之... 187 | else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; } // token 是自訂函數,用 JSR : jump to subroutine 指令呼叫 188 | else { printf("%d: bad function call\n", line); exit(-1); } 189 | if (t) { *++e = ADJ; *++e = t; } // 有參數,要調整堆疊 (ADJ : stack adjust) 190 | ty = d[Type]; 191 | } 192 | else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; } // 該 id 是數值 193 | else { 194 | if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; } // 該 id 是區域變數,載入區域變數 (LEA : load local address) 195 | else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; } // 該 id 是全域變數,載入該全域變數 (IMM : load global address or immediate 載入全域變數或立即值) 196 | else { printf("%d: undefined variable\n", line); exit(-1); } 197 | *++e = ((ty = d[Type]) == CHAR) ? LC : LI; // LI : load int, LC : load char 198 | } 199 | } 200 | else if (tk == '(') { // (E) : 有括號的運算式 ... 201 | next(); 202 | if (tk == Int || tk == Char) { 203 | t = (tk == Int) ? INT : CHAR; next(); 204 | while (tk == Mul) { next(); t = t + PTR; } 205 | if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } 206 | expr(Inc); // 處理 ++, -- 的情況 207 | ty = t; 208 | } 209 | else { 210 | expr(Assign); // 處理 (E) 中的 E (E 運算式必須能處理 (t=x) op y 的情況,所以用 expr(Assign)) 211 | if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } 212 | } 213 | } 214 | else if (tk == Mul) { // * 乘法 215 | next(); expr(Inc); 216 | if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } 217 | *++e = (ty == CHAR) ? LC : LI; 218 | } 219 | else if (tk == And) { // & AND 220 | next(); expr(Inc); 221 | if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); } 222 | ty = ty + PTR; 223 | } 224 | else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; } // NOT 225 | else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; } // Logical NOT 226 | else if (tk == Add) { next(); expr(Inc); ty = INT; } 227 | else if (tk == Sub) { 228 | next(); *++e = IMM; 229 | if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; } // -Num or -E 230 | ty = INT; 231 | } 232 | else if (tk == Inc || tk == Dec) { // ++ or -- 233 | t = tk; next(); expr(Inc); 234 | if (*e == LC) { *e = PSH; *++e = LC; } 235 | else if (*e == LI) { *e = PSH; *++e = LI; } 236 | else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } 237 | *++e = PSH; 238 | *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); 239 | *++e = (t == Inc) ? ADD : SUB; 240 | *++e = (ty == CHAR) ? SC : SI; 241 | } 242 | else { printf("%d: bad expression\n", line); exit(-1); } 243 | // 參考: https://en.wikipedia.org/wiki/Operator-precedence_parser, https://www.cnblogs.com/rubylouvre/archive/2012/09/08/2657682.html https://web.archive.org/web/20151223215421/http://hall.org.ua/halls/wizzard/pdf/Vaughan.Pratt.TDOP.pdf 244 | while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method 245 | t = ty; 246 | if (tk == Assign) { 247 | next(); 248 | if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); } 249 | expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI; 250 | } 251 | else if (tk == Cond) { 252 | next(); 253 | *++e = BZ; d = ++e; 254 | expr(Assign); 255 | if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } 256 | *d = (int)(e + 3); *++e = JMP; d = ++e; 257 | expr(Cond); 258 | *d = (int)(e + 1); 259 | } 260 | else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; } 261 | else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; } 262 | else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; } 263 | else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; } 264 | else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; } 265 | else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; } 266 | else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; } 267 | else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; } 268 | else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; } 269 | else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; } 270 | else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; } 271 | else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; } 272 | else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; } 273 | else if (tk == Add) { 274 | next(); *++e = PSH; expr(Mul); 275 | if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } 276 | *++e = ADD; 277 | } 278 | else if (tk == Sub) { 279 | next(); *++e = PSH; expr(Mul); 280 | if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; } 281 | else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; } 282 | else *++e = SUB; 283 | } 284 | else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; } 285 | else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; } 286 | else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; } 287 | else if (tk == Inc || tk == Dec) { 288 | if (*e == LC) { *e = PSH; *++e = LC; } 289 | else if (*e == LI) { *e = PSH; *++e = LI; } 290 | else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } 291 | *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); 292 | *++e = (tk == Inc) ? ADD : SUB; 293 | *++e = (ty == CHAR) ? SC : SI; 294 | *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); 295 | *++e = (tk == Inc) ? SUB : ADD; 296 | next(); 297 | } 298 | else if (tk == Brak) { 299 | next(); *++e = PSH; expr(Assign); 300 | if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } 301 | if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } 302 | else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } 303 | *++e = ADD; 304 | *++e = ((ty = t - PTR) == CHAR) ? LC : LI; 305 | } 306 | else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } 307 | } 308 | } 309 | 310 | void stmt() { 311 | int *a, *b; 312 | // 陳述 statement 313 | if (tk == If) { // if 語句 314 | next(); 315 | if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } 316 | expr(Assign); 317 | if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } 318 | *++e = BZ; b = ++e; 319 | stmt(); 320 | if (tk == Else) { // else 語句 321 | *b = (int)(e + 3); *++e = JMP; b = ++e; 322 | next(); 323 | stmt(); 324 | } 325 | *b = (int)(e + 1); 326 | } 327 | else if (tk == While) { // while 語句 328 | next(); 329 | a = e + 1; 330 | if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } 331 | expr(Assign); 332 | if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } 333 | *++e = BZ; b = ++e; 334 | stmt(); 335 | *++e = JMP; *++e = (int)a; 336 | *b = (int)(e + 1); 337 | } 338 | else if (tk == Return) { // return 語句 339 | next(); 340 | if (tk != ';') expr(Assign); 341 | *++e = LEV; 342 | if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } 343 | } 344 | else if (tk == '{') { // 區塊 {...} 345 | next(); 346 | while (tk != '}') stmt(); 347 | next(); 348 | } 349 | else if (tk == ';') { // ; 空陳述 350 | next(); 351 | } 352 | else { // 指定 assign 353 | expr(Assign); 354 | if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } 355 | } 356 | } 357 | 358 | int prog() { 359 | int bt, i; 360 | // 編譯整個程式 Program 361 | line = 1; 362 | next(); 363 | while (tk) { 364 | bt = INT; // basetype 365 | if (tk == Int) next(); 366 | else if (tk == Char) { next(); bt = CHAR; } 367 | else if (tk == Enum) { // enum Id? {... 列舉 368 | next(); 369 | if (tk != '{') next(); // 略過 Id 370 | if (tk == '{') { 371 | next(); 372 | i = 0; // 紀錄 enum 的目前值 373 | while (tk != '}') { 374 | if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; } 375 | next(); 376 | if (tk == Assign) { // 有 Id=Num 的情況 377 | next(); 378 | if (tk != Num) { printf("%d: bad enum initializer\n", line); return -1; } 379 | i = ival; 380 | next(); 381 | } 382 | id[Class] = Num; id[Type] = INT; id[Val] = i++; 383 | if (tk == ',') next(); 384 | } 385 | next(); 386 | } 387 | } 388 | while (tk != ';' && tk != '}') { // 掃描直到區塊結束 389 | ty = bt; 390 | while (tk == Mul) { next(); ty = ty + PTR; } 391 | if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } 392 | if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } // id.Class 已經存在,重複宣告了! 393 | next(); 394 | id[Type] = ty; 395 | if (tk == '(') { // function 函數定義 ex: int f( ... 396 | id[Class] = Fun; 397 | id[Val] = (int)(e + 1); 398 | next(); i = 0; 399 | while (tk != ')') { // 掃描參數直到 ...) 400 | ty = INT; 401 | if (tk == Int) next(); 402 | else if (tk == Char) { next(); ty = CHAR; } 403 | while (tk == Mul) { next(); ty = ty + PTR; } 404 | if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } 405 | if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } // 這裡的 id 會指向 hash 搜尋過的 symTable 裏的那個 (在 next 裏處理的),所以若是該 id 已經是 Local,那麼就重複了! 406 | // 把 id.Class, id.Type, id.Val 暫存到 id.HClass, id.HType, id.Hval ,因為 Local 優先於 Global 407 | id[HClass] = id[Class]; id[Class] = Loc; 408 | id[HType] = id[Type]; id[Type] = ty; 409 | id[HVal] = id[Val]; id[Val] = i++; 410 | next(); 411 | if (tk == ',') next(); 412 | } 413 | next(); 414 | if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } // BODY 開始 {... 415 | loc = ++i; 416 | next(); 417 | while (tk == Int || tk == Char) { // 宣告 418 | bt = (tk == Int) ? INT : CHAR; 419 | next(); 420 | while (tk != ';') { 421 | ty = bt; 422 | while (tk == Mul) { next(); ty = ty + PTR; } 423 | if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } 424 | if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } 425 | // 把 id.Class, id.Type, id.Val 暫存到 id.HClass, id.HType, id.Hval ,因為 Local 優先於 Global 426 | id[HClass] = id[Class]; id[Class] = Loc; 427 | id[HType] = id[Type]; id[Type] = ty; 428 | id[HVal] = id[Val]; id[Val] = ++i; 429 | next(); 430 | if (tk == ',') next(); 431 | } 432 | next(); 433 | } 434 | *++e = ENT; *++e = i - loc; 435 | while (tk != '}') stmt(); 436 | *++e = LEV; 437 | id = sym; // unwind symbol table locals (把被區域變數隱藏掉的那些 Local id 還原,恢復全域變數的符號定義) 438 | while (id[Tk]) { 439 | if (id[Class] == Loc) { 440 | id[Class] = id[HClass]; 441 | id[Type] = id[HType]; 442 | id[Val] = id[HVal]; 443 | } 444 | id = id + Idsz; 445 | } 446 | } 447 | else { 448 | id[Class] = Glo; 449 | id[Val] = (int)datap; 450 | datap = datap + sizeof(int); 451 | } 452 | if (tk == ',') next(); 453 | } 454 | next(); 455 | } 456 | return 0; 457 | } 458 | 459 | int compile(int fd) { 460 | int i, *t; 461 | // 編譯器 462 | p = "char else enum if int return sizeof while " 463 | "open read write close printf malloc free memset memcmp exit void main"; 464 | i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table 465 | i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table 466 | next(); id[Tk] = Char; // handle void type 467 | next(); idmain = id; // keep track of main 468 | 469 | if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } 470 | if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; } 471 | p[i] = 0; // 設定程式 p 字串結束符號 \0 472 | 473 | return prog(); 474 | } 475 | 476 | int run(int *pc, int *bp, int *sp) { 477 | int a, cycle; // a: 累積器, cycle: 執行指令數 478 | int i, *t; // temps 479 | // 虛擬機 => pc: 程式計數器, sp: 堆疊暫存器, bp: 框架暫存器 480 | cycle = 0; 481 | while (1) { 482 | i = *pc++; ++cycle; 483 | if (debug) { 484 | printInstr(pc-2, code, data); // pc-2, 因為已經 pc++ 過了,而 printInstr 又是落後一個的情況。 485 | } 486 | if (i == LEA) a = (int)(bp + *pc++); // load local address 載入區域變數 487 | else if (i == IMM) a = *pc++; // load immediate 載入立即值 488 | else if (i == ADDR) { a = *pc; pc++; } // load address 載入位址 489 | else if (i == JMP) pc = (int *)*pc; // jump 躍躍指令 490 | else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine 跳到副程式 491 | else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero if (a==0) goto m[pc] 492 | else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero if (a!=0) goto m[pc] 493 | else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine 進入副程式 494 | else if (i == ADJ) sp = sp + *pc++; // stack adjust 調整堆疊 495 | else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine 離開副程式 496 | else if (i == LI) a = *(int *)a; // load int 載入整數 497 | else if (i == LC) a = *(char *)a; // load char 載入字元 498 | else if (i == SI) *(int *)*sp++ = a; // store int 儲存整數 499 | else if (i == SC) a = *(char *)*sp++ = a; // store char 儲存字元 500 | else if (i == PSH) *--sp = a; // push 推入堆疊 501 | 502 | else if (i == OR) a = *sp++ | a; // a = a OR *sp 503 | else if (i == XOR) a = *sp++ ^ a; // a = a XOR *sp 504 | else if (i == AND) a = *sp++ & a; // ... 505 | else if (i == EQ) a = *sp++ == a; 506 | else if (i == NE) a = *sp++ != a; 507 | else if (i == LT) a = *sp++ < a; 508 | else if (i == GT) a = *sp++ > a; 509 | else if (i == LE) a = *sp++ <= a; 510 | else if (i == GE) a = *sp++ >= a; 511 | else if (i == SHL) a = *sp++ << a; 512 | else if (i == SHR) a = *sp++ >> a; 513 | else if (i == ADD) a = *sp++ + a; 514 | else if (i == SUB) a = *sp++ - a; 515 | else if (i == MUL) a = *sp++ * a; 516 | else if (i == DIV) a = *sp++ / a; 517 | else if (i == MOD) a = *sp++ % a; 518 | 519 | else if (i == OPEN) a = open((char *)sp[1], *sp); // 開檔 520 | else if (i == READ) a = read(sp[2], (char *)sp[1], *sp); // 讀檔 521 | else if (i == WRIT) a = write(sp[2], (char *)sp[1], *sp); // 寫檔 522 | else if (i == CLOS) a = close(*sp); // 關檔 523 | else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); } // printf("....", a, b, c, d, e) 524 | else if (i == MALC) a = (int)malloc(*sp); // 分配記憶體 525 | else if (i == FREE) free((void *)*sp); // 釋放記憶體 526 | else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp); // 設定記憶體 527 | else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp); // 比較記憶體 528 | else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; } // EXIT 離開 529 | else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; } // 錯誤處理 530 | } 531 | } 532 | 533 | int vm(int argc, char **argv) { 534 | int *t; 535 | // 虛擬機: setup stack 536 | bp = sp = (int *)((int)sp + poolsz); 537 | *--sp = EXIT; // call exit if main returns 538 | *--sp = PSH; t = sp; 539 | *--sp = argc; // 把 argc,argv 放入堆疊,這樣 main(argc,argv) 才能取得到 540 | *--sp = (int)argv; 541 | *--sp = (int)t; // 推入返回點,於是最後 RET 時會跳回 t=sp 指定的位址,接著呼叫 EXIT 離開。 542 | return run(pc, bp, sp); 543 | } 544 | 545 | int obj_relocate(int *code, int codeLen, int *pcode1, char *pdata1, int *pcode2, char *pdata2) { 546 | int *p, ir; 547 | // 程式段機器碼重定位 548 | p=code; 549 | while (p 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; } 620 | if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; } 621 | if (argc > 0 && **argv == '-' && (*argv)[1] == 'r') { o_run = 1; --argc; ++argv; } 622 | if (argc > 0 && **argv == '-' && (*argv)[1] == 'u') { o_dump = 1; --argc; ++argv; } 623 | if (argc < 1) { printf("usage: c6 [-s] [-d] [-r] [-u] in_file [-o] out_file...\n"); return -1; } 624 | iFile = *argv; 625 | if (argc > 1) { 626 | narg = *(argv+1); 627 | if (*narg == '-' && narg[1] == 'o') { 628 | o_save = 1; 629 | oFile = *(argv+2); 630 | } 631 | } 632 | if ((fd = open(iFile, 0100000)) < 0) { // 0100000 代表以 BINARY mode 開啟 (Windows 中預設為 TEXT mode) 633 | printf("could not open(%s)\n", iFile); 634 | return -1; 635 | } 636 | 637 | poolsz = 256*1024; // 最大記憶體大小 (程式碼/資料/堆疊/符號表) 638 | if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; } // 符號段 639 | if (!(code = le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; } // 程式段 640 | if (!(data = datap = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } // 資料段 641 | if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; } // 堆疊段 642 | 643 | memset(sym, 0, poolsz); 644 | memset(e, 0, poolsz); 645 | memset(data, 0, poolsz); 646 | 647 | op = "LEA ,IMM ,ADDR,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," 648 | "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," 649 | "OPEN,READ,WRIT,CLOS,PRTF,MALC,FREE,MSET,MCMP,EXIT,"; 650 | if (o_dump) { // -u: 印出目的檔 651 | obj_load(fd); 652 | obj_dump(pc, code, codeLen, data, dataLen); 653 | return 0; 654 | } 655 | if (o_run) { // -r: 執行目的檔 656 | obj_load(fd); 657 | vm(argc, argv); 658 | return 0; 659 | } 660 | if (compile(fd)==-1) return -1; // 編譯 661 | if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; } 662 | if (src) return 0; // 編譯並列印,不執行 663 | if (o_save) { // -o 輸出目的檔,但不執行 664 | obj_save(oFile, pc, code, e-code, data, datap-data); 665 | printf("Compile %s success!\nOutput: %s\n", iFile, oFile); 666 | return 0; 667 | } 668 | close(fd); 669 | vm(argc, argv); // 用虛擬機執行編譯出來的碼 670 | } -------------------------------------------------------------------------------- /c6: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccc-c/c6/6183df0fb5c249df78b5adb5d6bb730cac4b896e/c6 -------------------------------------------------------------------------------- /c6.c: -------------------------------------------------------------------------------- 1 | // c6.c - A mini compiler derived from the c4 by Robert Swierczek 2 | // 修改者: 陳鍾誠 (模組化+中文註解+目的檔處理) 3 | // char, int, and pointer types 4 | // if, while, return, and expression statements 5 | // just enough features to allow self-compilation and a bit more 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | // #define int long long // c6 要求 int 與處理器位址同樣長度,因此將 int 定義為 64 位元整數。 13 | 14 | char *p, *lp, // current position in source code (p: 目前原始碼指標, lp: 上一行原始碼指標) 15 | *data,*datap, // data/bss pointer (資料段機器碼指標) 16 | *op; // 指令字串列表 17 | 18 | int *e, *le, *code, // current position in emitted code (e: 目前機器碼指標, le: 上一行機器碼指標) 19 | *id, // currently parsed identifier (id: 目前的 id) 20 | *sym, // symbol table (simple list of identifiers) (符號表) 21 | tk, // current token (目前 token) 22 | ival, // current token value (目前的 token 值) 23 | ty, // current expression type (目前的運算式型態) 24 | loc, // local variable offset (區域變數的位移) 25 | line, // current line number (目前行號) 26 | src, // print source and assembly flag (印出原始碼) 27 | debug, // print executed instructions (印出執行指令 -- 除錯模式) 28 | o_run, // 執行目的檔 29 | o_save, // 反組譯目的檔 30 | o_dump; // 傾印目的檔 31 | 32 | int fd, poolsz, *idmain; 33 | int *pc, *bp, *sp, codeLen, dataLen; 34 | 35 | // tokens and classes (operators last and in precedence order) (按優先權順序排列) 36 | enum { // token : 0-127 直接用該字母表達, 128 以後用代號。 37 | Num = 128, Fun, Sys, Glo, Loc, Id, 38 | Char, Else, Enum, If, Int, Return, Sizeof, While, 39 | Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak 40 | }; 41 | 42 | // opcodes (機器碼的 op) 43 | enum { LEA ,IMM ,ADDR,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH , 44 | OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD , 45 | OPEN,READ,WRIT,CLOS,PRTF,MALC,FREE,MSET,MCMP,EXIT }; 46 | 47 | // types (支援型態,只有 int, char, pointer) 48 | enum { CHAR, INT, PTR }; 49 | 50 | // 因為沒有 struct,所以使用 offset 代替,例如 id[Tk] 代表 id.Tk (token), id[Hash] 代表 id.Hash, id[Name] 代表 id.Name, ..... 51 | // identifier offsets (since we can't create an ident struct) 52 | enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; // HClass, HType, HVal 是暫存的備份 ??? 53 | 54 | int stepInstr(int *p) { 55 | // 傳回下一個指令大小:ADJ 之前有一個參數,之後沒有參數。 56 | if (*++p <= ADJ) return 2; else return 1; 57 | } 58 | 59 | void printInstr(int *p, int *code, char *data) { 60 | int ir, arg; 61 | // 印出下一個指令 62 | ir = *++p; 63 | printf(" %4d:%X %8.4s", p-code, p, &op[ir * 5]); 64 | if (ir <= ADJ) { // ADJ 之前的指令有一個參數 65 | arg = *++p; 66 | if (ir==JSR || ir==JMP || ir==BZ || ir==BNZ) { 67 | if (arg==0) printf("0?\n"); else printf(" %d:%X\n", (int*)arg-code, (int*)arg); 68 | } else if (ir==ADDR) 69 | printf(" %d:%X\n", (char*)arg-data, arg); 70 | else 71 | printf(" %d\n", arg); 72 | } else { // ADJ 之後的指令沒有任何參數 73 | printf("\n"); 74 | } 75 | } 76 | 77 | void next() { 78 | char *pp; 79 | // 詞彙解析 lexer 80 | while (tk = *p) { 81 | ++p; 82 | if (tk == '\n') { // 換行 83 | if (src) { 84 | printf("%d: %.*s", line, p - lp, lp); // 印出該行 85 | lp = p; // lp = p = 新一行的原始碼開頭 86 | while (le < e) { // 印出上一行的所有目的碼 87 | printInstr(le, code, data); 88 | le = le + stepInstr(le); 89 | } 90 | } 91 | ++line; 92 | } 93 | else if (tk == '#') { // 取得 #include 這類的一整行 94 | while (*p != 0 && *p != '\n') ++p; 95 | } 96 | else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { // 取得變數名稱 97 | pp = p - 1; 98 | while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') 99 | tk = tk * 147 + *p++; // 計算雜湊值 100 | tk = (tk << 6) + (p - pp); // 符號表的雜湊位址 ?? 101 | id = sym; 102 | while (id[Tk]) { // 檢查是否碰撞 ? 103 | if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } // 沒碰撞就傳回 token 104 | id = id + Idsz; // 碰撞,前進到下一格。 105 | } 106 | id[Name] = (int)pp; // id.Name = ptr(變數名稱) 107 | id[Hash] = tk; // id.Hash = 雜湊值 108 | tk = id[Tk] = Id; // token = id.Tk = Id 109 | return; 110 | } 111 | else if (tk >= '0' && tk <= '9') { // 取得數字串 112 | if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; } // 十進位 113 | else if (*p == 'x' || *p == 'X') { // 十六進位 114 | while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F'))) // 16 進位 115 | ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0); 116 | } 117 | else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; } // 八進位 118 | tk = Num; // token = Number 119 | return; 120 | } 121 | else if (tk == '/') { 122 | if (*p == '/') { // 註解 123 | ++p; 124 | while (*p != 0 && *p != '\n') ++p; // 略過註解 125 | } 126 | else { // 除法 127 | tk = Div; 128 | return; 129 | } 130 | } 131 | else if (tk == '\'' || tk == '"') { // 字元或字串 132 | pp = datap; 133 | while (*p != 0 && *p != tk) { 134 | if ((ival = *p++) == '\\') { 135 | if ((ival = *p++) == 'n') ival = '\n'; // 處理 \n 的特殊情況 136 | } 137 | if (tk == '"') *datap++ = ival; // 把字串塞到資料段裏 138 | } 139 | ++p; 140 | if (tk == '"') ival = (int)pp; else tk = Num; // (若是字串) ? (ival = 字串 (在資料段中的) 指標) : (字元值) 141 | return; 142 | } // 以下為運算元 =+-!<>|&^%*[?~, ++, --, !=, <=, >=, ||, &&, ~ ;{}()],: 143 | else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; } 144 | else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; } 145 | else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; } 146 | else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; } 147 | else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; } 148 | else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; } 149 | else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; } 150 | else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; } 151 | else if (tk == '^') { tk = Xor; return; } 152 | else if (tk == '%') { tk = Mod; return; } 153 | else if (tk == '*') { tk = Mul; return; } 154 | else if (tk == '[') { tk = Brak; return; } 155 | else if (tk == '?') { tk = Cond; return; } 156 | else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return; 157 | } 158 | } 159 | 160 | void expr(int lev) { 161 | int t, *d; 162 | // 運算式 expression, 其中 lev 代表優先等級 163 | if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } // EOF 164 | else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; } // 數值 165 | else if (tk == '"') { // 字串 166 | *++e = ADDR; *++e = ival; next(); 167 | while (tk == '"') next(); 168 | datap = (char *)((int)datap + sizeof(int) & -sizeof(int)); ty = PTR; // 用 int 為大小對齊 ?? 169 | } 170 | else if (tk == Sizeof) { // 處理 sizeof(type) ,其中 type 可能為 char, int 或 ptr 171 | next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } 172 | ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } 173 | while (tk == Mul) { next(); ty = ty + PTR; } 174 | if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } 175 | *++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int); 176 | ty = INT; 177 | } 178 | else if (tk == Id) { // 處理 id ... 179 | d = id; next(); 180 | if (tk == '(') { // id (args) ,這是 call 181 | next(); 182 | t = 0; 183 | while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); } // 推入參數 184 | next(); 185 | // d[Class] 可能為 Num = 128, Fun, Sys, Glo, Loc, ... 186 | if (d[Class] == Sys) *++e = d[Val]; // token 是系統呼叫,直接呼叫之... 187 | else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; } // token 是自訂函數,用 JSR : jump to subroutine 指令呼叫 188 | else { printf("%d: bad function call\n", line); exit(-1); } 189 | if (t) { *++e = ADJ; *++e = t; } // 有參數,要調整堆疊 (ADJ : stack adjust) 190 | ty = d[Type]; 191 | } 192 | else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; } // 該 id 是數值 193 | else { 194 | if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; } // 該 id 是區域變數,載入區域變數 (LEA : load local address) 195 | else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; } // 該 id 是全域變數,載入該全域變數 (IMM : load global address or immediate 載入全域變數或立即值) 196 | else { printf("%d: undefined variable\n", line); exit(-1); } 197 | *++e = ((ty = d[Type]) == CHAR) ? LC : LI; // LI : load int, LC : load char 198 | } 199 | } 200 | else if (tk == '(') { // (E) : 有括號的運算式 ... 201 | next(); 202 | if (tk == Int || tk == Char) { 203 | t = (tk == Int) ? INT : CHAR; next(); 204 | while (tk == Mul) { next(); t = t + PTR; } 205 | if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } 206 | expr(Inc); // 處理 ++, -- 的情況 207 | ty = t; 208 | } 209 | else { 210 | expr(Assign); // 處理 (E) 中的 E (E 運算式必須能處理 (t=x) op y 的情況,所以用 expr(Assign)) 211 | if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } 212 | } 213 | } 214 | else if (tk == Mul) { // * 乘法 215 | next(); expr(Inc); 216 | if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } 217 | *++e = (ty == CHAR) ? LC : LI; 218 | } 219 | else if (tk == And) { // & AND 220 | next(); expr(Inc); 221 | if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); } 222 | ty = ty + PTR; 223 | } 224 | else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; } // NOT 225 | else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; } // Logical NOT 226 | else if (tk == Add) { next(); expr(Inc); ty = INT; } 227 | else if (tk == Sub) { 228 | next(); *++e = IMM; 229 | if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; } // -Num or -E 230 | ty = INT; 231 | } 232 | else if (tk == Inc || tk == Dec) { // ++ or -- 233 | t = tk; next(); expr(Inc); 234 | if (*e == LC) { *e = PSH; *++e = LC; } 235 | else if (*e == LI) { *e = PSH; *++e = LI; } 236 | else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } 237 | *++e = PSH; 238 | *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); 239 | *++e = (t == Inc) ? ADD : SUB; 240 | *++e = (ty == CHAR) ? SC : SI; 241 | } 242 | else { printf("%d: bad expression\n", line); exit(-1); } 243 | // 參考: https://en.wikipedia.org/wiki/Operator-precedence_parser, https://www.cnblogs.com/rubylouvre/archive/2012/09/08/2657682.html https://web.archive.org/web/20151223215421/http://hall.org.ua/halls/wizzard/pdf/Vaughan.Pratt.TDOP.pdf 244 | while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method 245 | t = ty; 246 | if (tk == Assign) { 247 | next(); 248 | if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); } 249 | expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI; 250 | } 251 | else if (tk == Cond) { 252 | next(); 253 | *++e = BZ; d = ++e; 254 | expr(Assign); 255 | if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } 256 | *d = (int)(e + 3); *++e = JMP; d = ++e; 257 | expr(Cond); 258 | *d = (int)(e + 1); 259 | } 260 | else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; } 261 | else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; } 262 | else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; } 263 | else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; } 264 | else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; } 265 | else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; } 266 | else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; } 267 | else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; } 268 | else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; } 269 | else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; } 270 | else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; } 271 | else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; } 272 | else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; } 273 | else if (tk == Add) { 274 | next(); *++e = PSH; expr(Mul); 275 | if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } 276 | *++e = ADD; 277 | } 278 | else if (tk == Sub) { 279 | next(); *++e = PSH; expr(Mul); 280 | if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; } 281 | else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; } 282 | else *++e = SUB; 283 | } 284 | else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; } 285 | else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; } 286 | else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; } 287 | else if (tk == Inc || tk == Dec) { 288 | if (*e == LC) { *e = PSH; *++e = LC; } 289 | else if (*e == LI) { *e = PSH; *++e = LI; } 290 | else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } 291 | *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); 292 | *++e = (tk == Inc) ? ADD : SUB; 293 | *++e = (ty == CHAR) ? SC : SI; 294 | *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); 295 | *++e = (tk == Inc) ? SUB : ADD; 296 | next(); 297 | } 298 | else if (tk == Brak) { 299 | next(); *++e = PSH; expr(Assign); 300 | if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } 301 | if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } 302 | else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } 303 | *++e = ADD; 304 | *++e = ((ty = t - PTR) == CHAR) ? LC : LI; 305 | } 306 | else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } 307 | } 308 | } 309 | 310 | void stmt() { 311 | int *a, *b; 312 | // 陳述 statement 313 | if (tk == If) { // if 語句 314 | next(); 315 | if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } 316 | expr(Assign); 317 | if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } 318 | *++e = BZ; b = ++e; 319 | stmt(); 320 | if (tk == Else) { // else 語句 321 | *b = (int)(e + 3); *++e = JMP; b = ++e; 322 | next(); 323 | stmt(); 324 | } 325 | *b = (int)(e + 1); 326 | } 327 | else if (tk == While) { // while 語句 328 | next(); 329 | a = e + 1; 330 | if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } 331 | expr(Assign); 332 | if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } 333 | *++e = BZ; b = ++e; 334 | stmt(); 335 | *++e = JMP; *++e = (int)a; 336 | *b = (int)(e + 1); 337 | } 338 | else if (tk == Return) { // return 語句 339 | next(); 340 | if (tk != ';') expr(Assign); 341 | *++e = LEV; 342 | if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } 343 | } 344 | else if (tk == '{') { // 區塊 {...} 345 | next(); 346 | while (tk != '}') stmt(); 347 | next(); 348 | } 349 | else if (tk == ';') { // ; 空陳述 350 | next(); 351 | } 352 | else { // 指定 assign 353 | expr(Assign); 354 | if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } 355 | } 356 | } 357 | 358 | int prog() { 359 | int bt, i; 360 | // 編譯整個程式 Program 361 | line = 1; 362 | next(); 363 | while (tk) { 364 | bt = INT; // basetype 365 | if (tk == Int) next(); 366 | else if (tk == Char) { next(); bt = CHAR; } 367 | else if (tk == Enum) { // enum Id? {... 列舉 368 | next(); 369 | if (tk != '{') next(); // 略過 Id 370 | if (tk == '{') { 371 | next(); 372 | i = 0; // 紀錄 enum 的目前值 373 | while (tk != '}') { 374 | if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; } 375 | next(); 376 | if (tk == Assign) { // 有 Id=Num 的情況 377 | next(); 378 | if (tk != Num) { printf("%d: bad enum initializer\n", line); return -1; } 379 | i = ival; 380 | next(); 381 | } 382 | id[Class] = Num; id[Type] = INT; id[Val] = i++; 383 | if (tk == ',') next(); 384 | } 385 | next(); 386 | } 387 | } 388 | while (tk != ';' && tk != '}') { // 掃描直到區塊結束 389 | ty = bt; 390 | while (tk == Mul) { next(); ty = ty + PTR; } 391 | if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } 392 | if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } // id.Class 已經存在,重複宣告了! 393 | next(); 394 | id[Type] = ty; 395 | if (tk == '(') { // function 函數定義 ex: int f( ... 396 | id[Class] = Fun; 397 | id[Val] = (int)(e + 1); 398 | next(); i = 0; 399 | while (tk != ')') { // 掃描參數直到 ...) 400 | ty = INT; 401 | if (tk == Int) next(); 402 | else if (tk == Char) { next(); ty = CHAR; } 403 | while (tk == Mul) { next(); ty = ty + PTR; } 404 | if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } 405 | if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } // 這裡的 id 會指向 hash 搜尋過的 symTable 裏的那個 (在 next 裏處理的),所以若是該 id 已經是 Local,那麼就重複了! 406 | // 把 id.Class, id.Type, id.Val 暫存到 id.HClass, id.HType, id.Hval ,因為 Local 優先於 Global 407 | id[HClass] = id[Class]; id[Class] = Loc; 408 | id[HType] = id[Type]; id[Type] = ty; 409 | id[HVal] = id[Val]; id[Val] = i++; 410 | next(); 411 | if (tk == ',') next(); 412 | } 413 | next(); 414 | if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } // BODY 開始 {... 415 | loc = ++i; 416 | next(); 417 | while (tk == Int || tk == Char) { // 宣告 418 | bt = (tk == Int) ? INT : CHAR; 419 | next(); 420 | while (tk != ';') { 421 | ty = bt; 422 | while (tk == Mul) { next(); ty = ty + PTR; } 423 | if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } 424 | if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } 425 | // 把 id.Class, id.Type, id.Val 暫存到 id.HClass, id.HType, id.Hval ,因為 Local 優先於 Global 426 | id[HClass] = id[Class]; id[Class] = Loc; 427 | id[HType] = id[Type]; id[Type] = ty; 428 | id[HVal] = id[Val]; id[Val] = ++i; 429 | next(); 430 | if (tk == ',') next(); 431 | } 432 | next(); 433 | } 434 | *++e = ENT; *++e = i - loc; 435 | while (tk != '}') stmt(); 436 | *++e = LEV; 437 | id = sym; // unwind symbol table locals (把被區域變數隱藏掉的那些 Local id 還原,恢復全域變數的符號定義) 438 | while (id[Tk]) { 439 | if (id[Class] == Loc) { 440 | id[Class] = id[HClass]; 441 | id[Type] = id[HType]; 442 | id[Val] = id[HVal]; 443 | } 444 | id = id + Idsz; 445 | } 446 | } 447 | else { 448 | id[Class] = Glo; 449 | id[Val] = (int)datap; 450 | datap = datap + sizeof(int); 451 | } 452 | if (tk == ',') next(); 453 | } 454 | next(); 455 | } 456 | return 0; 457 | } 458 | 459 | int compile(int fd) { 460 | int i, *t; 461 | // 編譯器 462 | p = "char else enum if int return sizeof while " 463 | "open read write close printf malloc free memset memcmp exit void main"; 464 | i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table 465 | i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table 466 | next(); id[Tk] = Char; // handle void type 467 | next(); idmain = id; // keep track of main 468 | 469 | if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } 470 | if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; } 471 | p[i] = 0; // 設定程式 p 字串結束符號 \0 472 | 473 | return prog(); 474 | } 475 | 476 | int run(int *pc, int *bp, int *sp) { 477 | int a, cycle; // a: 累積器, cycle: 執行指令數 478 | int i, *t; // temps 479 | // 虛擬機 => pc: 程式計數器, sp: 堆疊暫存器, bp: 框架暫存器 480 | cycle = 0; 481 | while (1) { 482 | i = *pc++; ++cycle; 483 | if (debug) { 484 | printInstr(pc-2, code, data); // pc-2, 因為已經 pc++ 過了,而 printInstr 又是落後一個的情況。 485 | } 486 | if (i == LEA) a = (int)(bp + *pc++); // load local address 載入區域變數 487 | else if (i == IMM) a = *pc++; // load immediate 載入立即值 488 | else if (i == ADDR) { a = *pc; pc++; } // load address 載入位址 489 | else if (i == JMP) pc = (int *)*pc; // jump 躍躍指令 490 | else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine 跳到副程式 491 | else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero if (a==0) goto m[pc] 492 | else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero if (a!=0) goto m[pc] 493 | else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine 進入副程式 494 | else if (i == ADJ) sp = sp + *pc++; // stack adjust 調整堆疊 495 | else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine 離開副程式 496 | else if (i == LI) a = *(int *)a; // load int 載入整數 497 | else if (i == LC) a = *(char *)a; // load char 載入字元 498 | else if (i == SI) *(int *)*sp++ = a; // store int 儲存整數 499 | else if (i == SC) a = *(char *)*sp++ = a; // store char 儲存字元 500 | else if (i == PSH) *--sp = a; // push 推入堆疊 501 | 502 | else if (i == OR) a = *sp++ | a; // a = a OR *sp 503 | else if (i == XOR) a = *sp++ ^ a; // a = a XOR *sp 504 | else if (i == AND) a = *sp++ & a; // ... 505 | else if (i == EQ) a = *sp++ == a; 506 | else if (i == NE) a = *sp++ != a; 507 | else if (i == LT) a = *sp++ < a; 508 | else if (i == GT) a = *sp++ > a; 509 | else if (i == LE) a = *sp++ <= a; 510 | else if (i == GE) a = *sp++ >= a; 511 | else if (i == SHL) a = *sp++ << a; 512 | else if (i == SHR) a = *sp++ >> a; 513 | else if (i == ADD) a = *sp++ + a; 514 | else if (i == SUB) a = *sp++ - a; 515 | else if (i == MUL) a = *sp++ * a; 516 | else if (i == DIV) a = *sp++ / a; 517 | else if (i == MOD) a = *sp++ % a; 518 | 519 | else if (i == OPEN) a = open((char *)sp[1], *sp); // 開檔 520 | else if (i == READ) a = read(sp[2], (char *)sp[1], *sp); // 讀檔 521 | else if (i == WRIT) a = write(sp[2], (char *)sp[1], *sp); // 寫檔 522 | else if (i == CLOS) a = close(*sp); // 關檔 523 | else if (i == PRTF) { // PRTF 後面都跟著 ADJ #參數個數 524 | t = sp + pc[1]; // pc[1] 就是取得 #參數個數,於是 t 指向堆疊參數尾端 525 | a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); // printf("....", a, b, c, d, e) 526 | } 527 | else if (i == MALC) a = (int)malloc(*sp); // 分配記憶體 528 | else if (i == FREE) free((void *)*sp); // 釋放記憶體 529 | else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp); // 設定記憶體 530 | else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp); // 比較記憶體 531 | else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; } // EXIT 離開 532 | else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; } // 錯誤處理 533 | } 534 | } 535 | 536 | int vm(int argc, char **argv) { 537 | int *t; 538 | // 虛擬機: setup stack 539 | bp = sp = (int *)((int)sp + poolsz); 540 | *--sp = EXIT; // call exit if main returns 541 | *--sp = PSH; t = sp; 542 | *--sp = argc; // 把 argc,argv 放入堆疊,這樣 main(argc,argv) 才能取得到 543 | *--sp = (int)argv; 544 | *--sp = (int)t; // 推入返回點,於是最後 RET 時會跳回 t=sp 指定的位址,接著呼叫 EXIT 離開。 545 | return run(pc, bp, sp); 546 | } 547 | 548 | int obj_relocate(int *code, int codeLen, int *pcode1, char *pdata1, int *pcode2, char *pdata2) { 549 | int *p, ir; 550 | // 程式段機器碼重定位 551 | p=code; 552 | while (p 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; } 641 | if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; } 642 | if (argc > 0 && **argv == '-' && (*argv)[1] == 'r') { o_run = 1; --argc; ++argv; } 643 | if (argc > 0 && **argv == '-' && (*argv)[1] == 'u') { o_dump = 1; --argc; ++argv; } 644 | if (argc < 1) { printf("usage: c6 [-s] [-d] [-r] [-u] in_file [-o] out_file...\n"); return -1; } 645 | iFile = *argv; 646 | if (argc > 1) { 647 | narg = *(argv+1); 648 | if (*narg == '-' && narg[1] == 'o') { 649 | o_save = 1; 650 | oFile = *(argv+2); 651 | } 652 | } 653 | if ((fd = open(iFile, 0100000)) < 0) { // 0100000 代表以 BINARY mode 開啟 (Windows 中預設為 TEXT mode) 654 | printf("could not open(%s)\n", iFile); 655 | return -1; 656 | } 657 | init(); 658 | if (o_dump) { // -u: 印出目的檔 659 | obj_load(fd); 660 | obj_dump(pc, code, codeLen, data, dataLen); 661 | return 0; 662 | } 663 | if (o_run) { // -r: 執行目的檔 664 | obj_load(fd); 665 | vm(argc, argv); 666 | return 0; 667 | } 668 | if (compile(fd)==-1) return -1; // 編譯 669 | if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; } 670 | if (src) return 0; // 編譯並列印,不執行 671 | if (o_save) { // -o 輸出目的檔,但不執行 672 | obj_save(oFile, pc, code, e-code+1, data, datap-data); 673 | printf("Compile %s success!\nOutput: %s\n", iFile, oFile); 674 | return 0; 675 | } 676 | close(fd); 677 | vm(argc, argv); // 用虛擬機執行編譯出來的碼 678 | } 679 | #endif 680 | -------------------------------------------------------------------------------- /c6_test.sh: -------------------------------------------------------------------------------- 1 | ./c6 test/$1.c 2 | ./c6 c6.c test/$1.c 3 | ./c6 c6.c c6.c test/$1.c 4 | -------------------------------------------------------------------------------- /doc/asm.md: -------------------------------------------------------------------------------- 1 | * https://ftp.gnu.org/old-gnu/Manuals/gas-2.9.1/html_chapter/as_7.html#SEC72 2 | 3 | .ascii "string"... 4 | .ascii expects zero or more string literals (see section Strings) separated by commas. It assembles each string (with no automatic trailing zero byte) into consecutive addresses. 5 | 6 | .asciz "string"... 7 | .asciz is just like .ascii, but each string is followed by a zero byte. The "z" in `.asciz' stands for "zero". 8 | 9 | ``` 10 | wsl> gcc -g -w -DOS=LINUX jit.c -o jit 11 | wsl> ./jit test/hello/hello.vm 12 | entry: 0x6332C018 13 | code: start=0x6332C010 length=0xA 14 | 1:6332C018 ENT 0 15 | 3:6332C028 ADDR 0:632EB010 16 | 5:6332C038 PSH 17 | 6:6332C040 PRTF 18 | 7:6332C048 ADJ 1 19 | 9:6332C058 LEV 20 | data: start=0x632EB010 length=0x10 21 | hello, world 22 | 23 | =========== obj_toasm() =============== 24 | .section .rodata 25 | .LC0: .string "hello, world\n" 26 | .text 27 | .globl main 28 | .type main, @function 29 | main: 30 | # 1:6332C018 ENT 0 31 | pushq %rbp 32 | movq %rsp, %rbp 33 | # 3:6332C028 ADDR 0:632EB010 34 | leaq .LC0(%rip), %rax 35 | # 5:6332C038 PSH 36 | pushq %rax 37 | # 6:6332C040 PRTF 38 | popq %rdi 39 | call printf@PLT 40 | movl $0, %eax 41 | # 7:6332C048 ADJ 1 42 | 43 | # 9:6332C058 LEV 44 | popq %rbp 45 | ret 46 | ``` 47 | 48 | 49 | 50 | byte array: 51 | 52 | https://stackoverflow.com/questions/39557010/int-vs-byte-for-creating-an-array-on-gnu-assembler -------------------------------------------------------------------------------- /doc/jit.md: -------------------------------------------------------------------------------- 1 | # jit 2 | 3 | ## run 4 | 5 | ``` 6 | wsl> ./jit test/hello.vm > test/hello.vm.s 7 | wsl> gcc test/hello.vm.s -o test/hello.vm.o 8 | wsl> ./test/hello.vm.o 9 | hello, world 10 | ``` 11 | 12 | ## hello.vm.s 13 | 14 | ``` 15 | .section .rodata 16 | .stab: .ascii "hello, world\n\0\0\0" 17 | .text 18 | .globl main 19 | .type main, @function 20 | main: 21 | # 1:90812018 ENT 0 22 | pushq %rbp 23 | movq %rsp, %rbp 24 | subq $0, %rsp 25 | # 3:90812028 ADDR 0:907D1010 26 | leaq .stab(%rip), %rax 27 | addq $0,%rax 28 | # 5:90812038 PSH 29 | pushq %rax 30 | # 6:90812040 PRTF 31 | popq %rdi 32 | call printf@PLT 33 | # 7:90812048 ADJ 1 34 | 35 | # 9:90812058 LEV 36 | popq %rbp 37 | ret 38 | ``` 39 | -------------------------------------------------------------------------------- /doc/lea.md: -------------------------------------------------------------------------------- 1 | # lea(load effective address) 2 | 3 | * https://stackoverflow.com/questions/46597055/using-lea-on-values-that-arent-addresses-pointers 4 | * https://courses.cs.washington.edu/courses/cse351/17wi/lectures/CSE351-L09-x86-II_17wi.pdf 5 | 6 | Example: leaq (%rdx,%rcx,4), %rax 7 | 8 | rax = rcx*4+rdx 9 | 10 | 範例: p = &x[i]; 11 | 12 | => x+k*i 13 | 14 | lea (see Intel's instruction-set manual entry) is a shift-and-add instruction that uses memory-operand syntax and machine encoding. This explains the name, but it's not the only thing it's good for. It never actually accesses memory, so it's like using & in C. -------------------------------------------------------------------------------- /genasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccc-c/c6/6183df0fb5c249df78b5adb5d6bb730cac4b896e/genasm -------------------------------------------------------------------------------- /genasm.c: -------------------------------------------------------------------------------- 1 | #define NO_MAIN 2 | #include "./c6.c" 3 | 4 | #define emit printf 5 | 6 | int obj_toasm(int *entry, int *code, int codeLen, char *data, int dataLen) { 7 | int *p, i, n, step, a, t; 8 | char *dp; 9 | printf(" .section .rodata\n.stab: .ascii \""); 10 | dp = data; 11 | while (dp0) { // 參數前六個放在 rdi,rsi,rcx,rdx,r8, r9 69 | if (n == 1) emit("popq %%rdi\n\t"); 70 | if (n == 2) emit("popq %%rsi\n\t"); 71 | if (n == 3) emit("popq %%rcx\n\t"); 72 | if (n == 4) emit("popq %%rdx\n\t"); 73 | if (n == 5) emit("popq %%r8\n\t"); 74 | if (n == 6) emit("popq %%r9\n\t"); 75 | n--; 76 | } 77 | emit("call printf@PLT"); 78 | // a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); // printf("....", a, b, c, d, e) 79 | } 80 | else if (i == MALC) { emit("popq %%rdi\n\tcall malloc@PLT"); } // a = (int)malloc(*sp); // 分配記憶體 81 | else if (i == FREE) { emit("popq %%rdi\n\tcall malloc@PLT"); } // free((void *)*sp); // 釋放記憶體 82 | else if (i == MSET) { emit("popq %%rdi\n\tpopq %%rsi\n\tpopq %%rcx\n\tcall memset@PLT"); } // a = (int)memset((char *)sp[2], sp[1], *sp); // 設定記憶體 83 | else if (i == MCMP) { emit("popq %%rdi\n\tpopq %%rsi\n\tpopq %%rcx\n\tcall memcmp@PLT"); } // a = memcmp((char *)sp[2], (char *)sp[1], *sp); // 比較記憶體 84 | else if (i == EXIT) { emit("popq %%rdi\n\tcall exit@PLT"); } // { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; } // EXIT 離開 85 | else { emit("unknown instruction = %d!", i); return -1; } // 錯誤處理 86 | emit("\n"); 87 | } 88 | 89 | // printf("data: start=0x%X length=0x%X\n", data, dataLen); 90 | 91 | /* 92 | dp = data; 93 | while (dp 6 | #include 7 | #include 8 | 9 | void *dl; 10 | char *je, // current position in emitted native code 11 | **jitmap; // maps c4 bytecode index into native code position 12 | int (*jitmain)(); 13 | char *jitmem; // executable memory for JIT-compiled native code 14 | 15 | int jit_build() { 16 | int tmp, i, *cp; 17 | 18 | dl = dlopen(0, RTLD_LAZY | RTLD_GLOBAL); // RTLD_LAZY = 1 19 | assert(dl); 20 | // setup jit memory 21 | //jitmem = mmap(0, poolsz, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 22 | jitmem = mmap(0, poolsz, 7, 0x1002 | MAP_ANON, -1, 0); 23 | if (!jitmem) { printf("could not mmap(%d) jit executable memory\n", poolsz); return -1; } 24 | 25 | jitmap = (char **)(jitmem + poolsz / 2); 26 | 27 | // first pass: emit native code 28 | cp = code + 1; je = jitmem; line = 0; 29 | while (cp < code+codeLen) { 30 | i = *cp; 31 | if (src) { 32 | // while (line < srcmap[cp - code]) { 33 | // line++; printf("% 4d | %.*s", line, linemap[line + 1] - linemap[line], linemap[line]); 34 | // } 35 | printf("0x%05x (%p):\t%8.4s", cp - code, je, &op[i * 5]); 36 | if (i <= ADJ) printf(" 0x%x\n", *(cp + 1)); else printf("\n"); 37 | } 38 | jitmap[cp - code] = je; // for later relocation of JMP/JSR/BZ/BNZ 39 | cp++; 40 | if (i == LEA) { 41 | i = 4 * *cp++; if (i < -128 || i > 127) { printf("jit: LEA out of bounds\n"); return -1; } 42 | *(int*)je = 0x458d; je = je + 2; *je++ = i; // leal $(4 * n)(%ebp), %eax 43 | } 44 | else if (i == ENT) { 45 | i = 4 * *cp++; if (i < -128 || i > 127) { printf("jit: ENT out of bounds\n"); return -1; } 46 | *(int *)je = 0xe58955; je = je + 3; // push %ebp; movl %esp, %ebp 47 | if (i > 0) { *(int *)je = 0xec83; je = je + 2; *(int*)je++ = i; } // subl $(i*4), %esp 48 | } 49 | else if (i == IMM) { *je++ = 0xb8; *(int *)je = *cp++; je = je + 4; } // movl $imm, %eax 50 | else if (i == ADDR) { *je++ = 0xb8; *(int *)je = *cp++; je = je + 4; } // movl $imm, %eax 51 | else if (i == ADJ) { i = 4 * *cp++; *(int *)je = 0xc483; je = je + 2; *(int *)je = i; je++; } // addl $(n * 4), %esp 52 | else if (i == PSH) *(int *)je++ = 0x50; // push %eax 53 | else if (i == LEV) { *(int *)je = 0xc35dec89; je = je + 4; } // mov %ebp, %esp; pop %ebp; ret 54 | else if (i == LI) { *(int *)je = 0x008b; je = je + 2; } // movl (%eax), %eax 55 | else if (i == LC) { *(int *)je = 0x00b60f; je = je + 3; } // movzbl (%eax), %eax 56 | else if (i == SI) { *(int *)je = 0x018959; je = je + 3; } // pop %ecx; movl %eax, (%ecx) 57 | else if (i == SC) { *(int *)je = 0x018859; je = je + 3; } // pop %ecx; movb %al, (%ecx) 58 | else if (i == OR) { *(int *)je = 0xc80959; je = je + 3; } // pop %ecx; orl %ecx, %eax 59 | else if (i == XOR) { *(int *)je = 0xc83159; je = je + 3; } // pop %ecx; xorl %ecx, %eax 60 | else if (i == AND) { *(int *)je = 0xc82159; je = je + 3; } // pop %ecx; andl %ecx, %eax 61 | else if (EQ <= i && i <= GE) { 62 | *(int*)je=0x0fc13959; je = je + 4; *(int*)je=0x9866c094; // pop %ecx; cmp %ecx, %eax; sete %al; cbw; - EQ 63 | if (i == NE) { *je = 0x95; } // setne %al 64 | else if (i == LT) { *je = 0x9c; } // setl %al 65 | else if (i == GT) { *je = 0x9f; } // setg %al 66 | else if (i == LE) { *je = 0x9e; } // setle %al 67 | else if (i == GE) { *je = 0x9d; } // setge %al 68 | je=je+4; *je++=0x98; // cwde 69 | } 70 | else if (i == SHL) { *(int*)je = 0xe0d39159; je = je + 4; } // pop %ecx; xchg %eax, %ecx; shl %cl, %eax 71 | else if (i == SHR) { *(int*)je = 0xe8d39159; je = je + 4; } // pop %ecx; xchg %eax, %ecx; shr %cl, %eax 72 | else if (i == ADD) { *(int*)je = 0xc80159; je = je + 3; } // pop %ecx; addl %ecx, %eax 73 | else if (i == SUB) { *(int*)je = 0xc8299159; je = je + 4; } // pop %ecx; xchg %eax, %ecx; subl %ecx, %eax 74 | else if (i == MUL) { *(int*)je = 0xc1af0f59; je = je + 4; } // pop %ecx; imul %ecx, %eax 75 | else if (i == DIV) { *(int*)je = 0xf9f79159; je = je + 4; } // pop %ecx; xchg %eax, %ecx; idiv %ecx, %eax 76 | else if (i == MOD) { *(int*)je = 0xd2319159; je = je + 4; *(int *)je = 0x92f9f7; je = je + 3; } 77 | else if (i == JMP) { ++cp; *je = 0xe9; je = je + 5; } // jmp 78 | else if (i == JSR) { ++cp; *je = 0xe8; je = je + 5; } // call 79 | else if (i == BZ) { ++cp; *(int*)je = 0x840fc085; je = je + 8; } // test %eax, %eax; jz 80 | else if (i == BNZ) { ++cp; *(int*)je = 0x850fc085; je = je + 8; } // test %eax, %eax; jnz 81 | else if (i >= OPEN) { 82 | if (i == OPEN) tmp = (int)dlsym(dl, "open"); 83 | else if (i == READ) tmp = (int)dlsym(dl, "read"); 84 | else if (i == CLOS) tmp = (int)dlsym(dl, "close"); 85 | else if (i == PRTF) tmp = (int)dlsym(dl, "printf"); 86 | else if (i == MALC) tmp = (int)dlsym(dl, "malloc"); 87 | else if (i == MSET) tmp = (int)dlsym(dl, "memset"); 88 | else if (i == MCMP) tmp = (int)dlsym(dl, "memcmp"); 89 | else if (i == EXIT) tmp = (int)dlsym(dl, "exit"); 90 | 91 | if (*cp++ == ADJ) { i = *cp++; } else { printf("no ADJ after native proc!\n"); exit(2); } 92 | 93 | *je++ = 0xb9; *(int*)je = i << 2; je = je + 4; // movl $(4 * n), %ecx; 94 | *(int*)je = 0xce29e689; je = je + 4; // mov %esp, %esi; sub %ecx, %esi; -- %esi will adjust the stack 95 | *(int*)je = 0x8302e9c1; je = je + 4; // shr $2, %ecx; and -- alignment of %esp for OS X 96 | *(int*)je = 0x895af0e6; je = je + 4; // $0xfffffff0, %esi; pop %edx; mov.. 97 | *(int*)je = 0xe2fc8e54; je = je + 4; // ..%edx, -4(%esi,%ecx,4); loop.. -- reversing args order 98 | *(int*)je = 0xe8f487f9; je = je + 4; // ..<'pop' offset>; xchg %esi, %esp; call -- saving old stack in %esi 99 | *(int*)je = tmp - (int)(je + 4); je = je + 4; // <*tmp offset>; 100 | *(int*)je = 0xf487; je = je + 2; // xchg %esi, %esp -- ADJ, back to old stack without arguments 101 | } 102 | else { printf("code generation failed for %d!\n", i); return -1; } 103 | } 104 | 105 | // second pass, relocation 106 | cp = code + 1; 107 | while (cp < code+codeLen) { 108 | je = jitmap[cp - code]; 109 | i = *cp++; 110 | // ADDR 需不需要 relocate ?? 應該不用,因為 vm 的目的檔已經重定位過了,把參數當常數即可 111 | if (i == JSR || i == JMP || i == BZ || i == BNZ) { 112 | tmp = (int)jitmap[(int *)*cp++ - code]; 113 | if (i == JSR || i == JMP) { je = je + 1; *(int*)je = tmp - (int)(je + 4); } 114 | else if (i == BZ || i == BNZ) { je = je + 4; *(int*)je = tmp - (int)(je + 4); } 115 | } 116 | else if (i < LEV) { ++cp; } 117 | } 118 | } 119 | 120 | int jit_run(int argc, char *argv[]) { 121 | // run jitted code 122 | jitmain = (void *) jitmap[ pc - code ]; 123 | return jitmain(argv, argc); // c4 vm pushes first argument first, unlike cdecl 124 | } 125 | 126 | int main(int argc, char *argv[]) { 127 | char *iFile; 128 | 129 | iFile = argv[1]; 130 | fd = open(iFile, 0100000); 131 | assert(fd != -1); 132 | init(); 133 | obj_load(fd); 134 | // obj_dump(pc, code, codeLen, data, dataLen); 135 | // src = 1; 136 | jit_build(); 137 | jit_run(argc, argv); 138 | } 139 | 140 | -------------------------------------------------------------------------------- /jit.md: -------------------------------------------------------------------------------- 1 | # jit 2 | 3 | ## build & run 4 | 5 | ``` 6 | $ wsl 7 | wsl> make clean 8 | rm -f c6 genasm jit 9 | wsl> make 10 | gcc -w -g -m32 c6.c -o c6 11 | gcc -w -g -m32 genasm.c -o genasm 12 | gcc -w -g -m32 jit.c -o jit -ldl 13 | wsl> ./jit_test.sh hello 14 | Compile test/hello.c success! 15 | Output: test/hello.vm 16 | hello, world 17 | wsl> ./jit_test.sh sum 18 | Compile test/sum.c success! 19 | Output: test/sum.vm 20 | sum(10)=55 21 | wsl> ./jit_test.sh fib 22 | Compile test/fib.c success! 23 | Output: test/fib.vm 24 | f(7)=13 25 | ``` 26 | -------------------------------------------------------------------------------- /jit_test.sh: -------------------------------------------------------------------------------- 1 | ./c6 test/$1.c -o test/$1.vm 2 | ./jit test/$1.vm -------------------------------------------------------------------------------- /test/arg.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char **argv) { 4 | printf("argc=%d argv[0]=%s argv[1]=%s\n", argc, argv[0], argv[1]); 5 | } 6 | -------------------------------------------------------------------------------- /test/explore/dl0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccc-c/c6/6183df0fb5c249df78b5adb5d6bb730cac4b896e/test/explore/dl0 -------------------------------------------------------------------------------- /test/explore/dl0.c: -------------------------------------------------------------------------------- 1 | // https://tldp.org/HOWTO/Program-Library-HOWTO/dl-libraries.html 2 | // gcc -o dl1 dl1.c -ldl 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char **argv) { 8 | void *handle; 9 | void* (*pmalloc)(int); 10 | char *error; 11 | 12 | // handle = dlopen ("/lib/libm.so.6", RTLD_LAZY); 13 | handle = dlopen (0, RTLD_LAZY); 14 | printf("handle=%p\n", handle); 15 | if (!handle) { 16 | fputs (dlerror(), stderr); 17 | exit(1); 18 | } 19 | 20 | pmalloc = dlsym(handle, "malloc"); 21 | if ((error = dlerror()) != NULL) { 22 | fputs(error, stderr); 23 | exit(1); 24 | } 25 | 26 | char *m=(*pmalloc)(100); 27 | printf("m=%p\n", m); 28 | dlclose(handle); 29 | } -------------------------------------------------------------------------------- /test/explore/dl0b: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccc-c/c6/6183df0fb5c249df78b5adb5d6bb730cac4b896e/test/explore/dl0b -------------------------------------------------------------------------------- /test/explore/dl0b.c: -------------------------------------------------------------------------------- 1 | // https://tldp.org/HOWTO/Program-Library-HOWTO/dl-libraries.html 2 | // gcc -o dl1 dl1.c -ldl 3 | #include 4 | #include 5 | #include 6 | 7 | void *handle; 8 | int dltest() { 9 | // void *handle; 10 | void* (*pmalloc)(int); 11 | char *error; 12 | 13 | // handle = dlopen ("/lib/libm.so.6", RTLD_LAZY); 14 | handle = dlopen (0, RTLD_LAZY | RTLD_GLOBAL); 15 | printf("handle=%p\n", handle); 16 | if (!handle) { 17 | fputs (dlerror(), stderr); 18 | exit(1); 19 | } 20 | 21 | pmalloc = dlsym(handle, "malloc"); 22 | if ((error = dlerror()) != NULL) { 23 | fputs(error, stderr); 24 | exit(1); 25 | } 26 | 27 | char *m=(*pmalloc)(100); 28 | printf("m=%p\n", m); 29 | dlclose(handle); 30 | } 31 | 32 | int main(int argc, char **argv) { 33 | dltest(); 34 | } -------------------------------------------------------------------------------- /test/explore/dl1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccc-c/c6/6183df0fb5c249df78b5adb5d6bb730cac4b896e/test/explore/dl1 -------------------------------------------------------------------------------- /test/explore/dl1.c: -------------------------------------------------------------------------------- 1 | // https://tldp.org/HOWTO/Program-Library-HOWTO/dl-libraries.html 2 | // gcc -o dl1 dl1.c -ldl 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char **argv) { 8 | void *handle; 9 | double (*cosine)(double); 10 | char *error; 11 | 12 | // handle = dlopen ("/lib/libm.so.6", RTLD_LAZY); 13 | handle = dlopen ("/usr/lib/x86_64-linux-gnu/libm.so", RTLD_LAZY); 14 | if (!handle) { 15 | fputs (dlerror(), stderr); 16 | exit(1); 17 | } 18 | 19 | cosine = dlsym(handle, "cos"); 20 | if ((error = dlerror()) != NULL) { 21 | fputs(error, stderr); 22 | exit(1); 23 | } 24 | 25 | printf ("%f\n", (*cosine)(2.0)); 26 | dlclose(handle); 27 | } -------------------------------------------------------------------------------- /test/explore/test1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | printf("O_BINARY=%o\n", O_BINARY); 6 | printf("O_CREAT|O_WRONLY|O_TRUNC|O_BINARY=%o\n", O_CREAT|O_WRONLY|O_TRUNC|O_BINARY); 7 | printf("O_BINARY|O_RDONLY=%o\n", O_BINARY|O_RDONLY); 8 | } 9 | -------------------------------------------------------------------------------- /test/fib.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int f(int n) { 4 | if (n<=0) return 0; 5 | if (n==1) return 1; 6 | return f(n-1) + f(n-2); 7 | } 8 | 9 | int main() { 10 | printf("f(7)=%d\n", f(7)); 11 | } 12 | -------------------------------------------------------------------------------- /test/fib.vm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccc-c/c6/6183df0fb5c249df78b5adb5d6bb730cac4b896e/test/fib.vm -------------------------------------------------------------------------------- /test/fib.vm.s: -------------------------------------------------------------------------------- 1 | .section .rodata 2 | .stab: .ascii "f(7)=%d\n\0\0\0\0\0\0\0\0" 3 | .text 4 | .globl main 5 | .type main, @function 6 | main: 7 | .L1: # 1:3C332018 ENT 0 8 | pushq %rbp 9 | movq %rsp, %rbp 10 | subq $0, %rsp 11 | .L3: # 3:3C332028 LEA 2 12 | movq $2, %rax 13 | leaq (%rbp,%rax,8), %rax 14 | .L5: # 5:3C332038 LI 15 | movq 0(%rax), %rax 16 | .L6: # 6:3C332040 PSH 17 | pushq %rax 18 | .L7: # 7:3C332048 IMM 0 19 | movq $0, %rax 20 | .L9: # 9:3C332058 LE 21 | popl %rbx 22 | le %rbx, %rax 23 | 24 | .L10: # 10:3C332060 BZ 15:3C332088 25 | cmp $0, %rax 26 | jeq .L15 27 | .L12: # 12:3C332070 IMM 0 28 | movq $0, %rax 29 | .L14: # 14:3C332080 LEV 30 | popq %rbp 31 | ret 32 | .L15: # 15:3C332088 LEA 2 33 | movq $2, %rax 34 | leaq (%rbp,%rax,8), %rax 35 | .L17: # 17:3C332098 LI 36 | movq 0(%rax), %rax 37 | .L18: # 18:3C3320A0 PSH 38 | pushq %rax 39 | .L19: # 19:3C3320A8 IMM 1 40 | movq $1, %rax 41 | .L21: # 21:3C3320B8 EQ 42 | popl %rbx 43 | eq %rbx, %rax 44 | 45 | .L22: # 22:3C3320C0 BZ 27:3C3320E8 46 | cmp $0, %rax 47 | jeq .L27 48 | .L24: # 24:3C3320D0 IMM 1 49 | movq $1, %rax 50 | .L26: # 26:3C3320E0 LEV 51 | popq %rbp 52 | ret 53 | .L27: # 27:3C3320E8 LEA 2 54 | movq $2, %rax 55 | leaq (%rbp,%rax,8), %rax 56 | .L29: # 29:3C3320F8 LI 57 | movq 0(%rax), %rax 58 | .L30: # 30:3C332100 PSH 59 | pushq %rax 60 | .L31: # 31:3C332108 IMM 1 61 | movq $1, %rax 62 | .L33: # 33:3C332118 SUB 63 | popl %rbx 64 | sub %rbx, %rax 65 | 66 | .L34: # 34:3C332120 PSH 67 | pushq %rax 68 | .L35: # 35:3C332128 JSR 1:3C332018 69 | call .L1 70 | .L37: # 37:3C332138 ADJ 1 71 | 72 | .L39: # 39:3C332148 PSH 73 | pushq %rax 74 | .L40: # 40:3C332150 LEA 2 75 | movq $2, %rax 76 | leaq (%rbp,%rax,8), %rax 77 | .L42: # 42:3C332160 LI 78 | movq 0(%rax), %rax 79 | .L43: # 43:3C332168 PSH 80 | pushq %rax 81 | .L44: # 44:3C332170 IMM 2 82 | movq $2, %rax 83 | .L46: # 46:3C332180 SUB 84 | popl %rbx 85 | sub %rbx, %rax 86 | 87 | .L47: # 47:3C332188 PSH 88 | pushq %rax 89 | .L48: # 48:3C332190 JSR 1:3C332018 90 | call .L1 91 | .L50: # 50:3C3321A0 ADJ 1 92 | 93 | .L52: # 52:3C3321B0 ADD 94 | popl %rbx 95 | add %rbx, %rax 96 | 97 | .L53: # 53:3C3321B8 LEV 98 | popq %rbp 99 | ret 100 | .L54: # 54:3C3321C0 LEV 101 | popq %rbp 102 | ret 103 | .L55: # 55:3C3321C8 ENT 0 104 | pushq %rbp 105 | movq %rsp, %rbp 106 | subq $0, %rsp 107 | .L57: # 57:3C3321D8 ADDR 0:3C2F1010 108 | leaq .stab(%rip), %rax 109 | addq $0,%rax 110 | .L59: # 59:3C3321E8 PSH 111 | pushq %rax 112 | .L60: # 60:3C3321F0 IMM 7 113 | movq $7, %rax 114 | .L62: # 62:3C332200 PSH 115 | pushq %rax 116 | .L63: # 63:3C332208 JSR 1:3C332018 117 | call .L1 118 | .L65: # 65:3C332218 ADJ 1 119 | 120 | .L67: # 67:3C332228 PSH 121 | pushq %rax 122 | .L68: # 68:3C332230 PRTF 123 | popq %rsi 124 | popq %rdi 125 | call printf@PLT 126 | .L69: # 69:3C332238 ADJ 2 127 | 128 | .L71: # 71:3C332248 LEV 129 | popq %rbp 130 | ret 131 | -------------------------------------------------------------------------------- /test/fib64.c: -------------------------------------------------------------------------------- 1 | #include 2 | #define int long long 3 | 4 | int f(int n) { 5 | if (n<=0) return 0; 6 | if (n==1) return 1; 7 | return f(n-1) + f(n-2); 8 | } 9 | 10 | int main() { 11 | printf("f(7)=%d\n", f(7)); 12 | } 13 | -------------------------------------------------------------------------------- /test/fib64.s: -------------------------------------------------------------------------------- 1 | .file "fib64.c" 2 | # GNU C17 (Ubuntu 9.3.0-17ubuntu1~20.04) version 9.3.0 (x86_64-linux-gnu) 3 | # compiled by GNU C version 9.3.0, GMP version 6.2.0, MPFR version 4.0.2, MPC version 1.1.0, isl version isl-0.22.1-GMP 4 | 5 | # GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 6 | # options passed: -imultiarch x86_64-linux-gnu fib64.c -mtune=generic 7 | # -march=x86-64 -auxbase-strip fib64.s -w -fverbose-asm 8 | # -fasynchronous-unwind-tables -fstack-protector-strong -Wformat 9 | # -Wformat-security -fstack-clash-protection -fcf-protection 10 | # options enabled: -fPIC -fPIE -faggressive-loop-optimizations 11 | # -fassume-phsa -fasynchronous-unwind-tables -fauto-inc-dec -fcommon 12 | # -fdelete-null-pointer-checks -fdwarf2-cfi-asm -fearly-inlining 13 | # -feliminate-unused-debug-types -ffp-int-builtin-inexact -ffunction-cse 14 | # -fgcse-lm -fgnu-runtime -fgnu-unique -fident -finline-atomics 15 | # -fipa-stack-alignment -fira-hoist-pressure -fira-share-save-slots 16 | # -fira-share-spill-slots -fivopts -fkeep-static-consts 17 | # -fleading-underscore -flifetime-dse -flto-odr-type-merging -fmath-errno 18 | # -fmerge-debug-strings -fpeephole -fplt -fprefetch-loop-arrays 19 | # -freg-struct-return -fsched-critical-path-heuristic 20 | # -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock 21 | # -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec 22 | # -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fschedule-fusion 23 | # -fsemantic-interposition -fshow-column -fshrink-wrap-separate 24 | # -fsigned-zeros -fsplit-ivs-in-unroller -fssa-backprop 25 | # -fstack-clash-protection -fstack-protector-strong -fstdarg-opt 26 | # -fstrict-volatile-bitfields -fsync-libcalls -ftrapping-math -ftree-cselim 27 | # -ftree-forwprop -ftree-loop-if-convert -ftree-loop-im -ftree-loop-ivcanon 28 | # -ftree-loop-optimize -ftree-parallelize-loops= -ftree-phiprop 29 | # -ftree-reassoc -ftree-scev-cprop -funit-at-a-time -funwind-tables 30 | # -fverbose-asm -fzero-initialized-in-bss -m128bit-long-double -m64 -m80387 31 | # -malign-stringops -mavx256-split-unaligned-load 32 | # -mavx256-split-unaligned-store -mfancy-math-387 -mfp-ret-in-387 -mfxsr 33 | # -mglibc -mieee-fp -mlong-double-80 -mmmx -mno-sse4 -mpush-args -mred-zone 34 | # -msse -msse2 -mstv -mtls-direct-seg-refs -mvzeroupper 35 | 36 | .text 37 | .globl f 38 | .type f, @function 39 | f: 40 | .LFB0: 41 | .cfi_startproc 42 | endbr64 43 | pushq %rbp # 44 | .cfi_def_cfa_offset 16 45 | .cfi_offset 6, -16 46 | movq %rsp, %rbp #, 47 | .cfi_def_cfa_register 6 48 | pushq %rbx # 49 | subq $24, %rsp #, 50 | .cfi_offset 3, -24 51 | movq %rdi, -24(%rbp) # n, n 52 | # fib64.c:5: if (n<=0) return 0; 53 | cmpq $0, -24(%rbp) #, n 54 | jg .L2 #, 55 | # fib64.c:5: if (n<=0) return 0; 56 | movl $0, %eax #, _5 57 | jmp .L3 # 58 | .L2: 59 | # fib64.c:6: if (n==1) return 1; 60 | cmpq $1, -24(%rbp) #, n 61 | jne .L4 #, 62 | # fib64.c:6: if (n==1) return 1; 63 | movl $1, %eax #, _5 64 | jmp .L3 # 65 | .L4: 66 | # fib64.c:7: return f(n-1) + f(n-2); 67 | movq -24(%rbp), %rax # n, tmp88 68 | subq $1, %rax #, _1 69 | movq %rax, %rdi # _1, 70 | call f # 71 | movq %rax, %rbx #, _2 72 | # fib64.c:7: return f(n-1) + f(n-2); 73 | movq -24(%rbp), %rax # n, tmp89 74 | subq $2, %rax #, _3 75 | movq %rax, %rdi # _3, 76 | call f # 77 | # fib64.c:7: return f(n-1) + f(n-2); 78 | addq %rbx, %rax # _2, _5 79 | .L3: 80 | # fib64.c:8: } 81 | addq $24, %rsp #, 82 | popq %rbx # 83 | popq %rbp # 84 | .cfi_def_cfa 7, 8 85 | ret 86 | .cfi_endproc 87 | .LFE0: 88 | .size f, .-f 89 | .section .rodata 90 | .LC0: 91 | .string "f(7)=%d\n" 92 | .text 93 | .globl main 94 | .type main, @function 95 | main: 96 | .LFB1: 97 | .cfi_startproc 98 | endbr64 99 | pushq %rbp # 100 | .cfi_def_cfa_offset 16 101 | .cfi_offset 6, -16 102 | movq %rsp, %rbp #, 103 | .cfi_def_cfa_register 6 104 | # fib64.c:11: printf("f(7)=%d\n", f(7)); 105 | movl $7, %edi #, 106 | call f # 107 | movq %rax, %rsi # _1, 108 | leaq .LC0(%rip), %rdi #, 109 | movl $0, %eax #, 110 | call printf@PLT # 111 | # fib64.c:12: } 112 | nop 113 | popq %rbp # 114 | .cfi_def_cfa 7, 8 115 | ret 116 | .cfi_endproc 117 | .LFE1: 118 | .size main, .-main 119 | .ident "GCC: (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0" 120 | .section .note.GNU-stack,"",@progbits 121 | .section .note.gnu.property,"a" 122 | .align 8 123 | .long 1f - 0f 124 | .long 4f - 1f 125 | .long 5 126 | 0: 127 | .string "GNU" 128 | 1: 129 | .align 8 130 | .long 0xc0000002 131 | .long 3f - 2f 132 | 2: 133 | .long 0x3 134 | 3: 135 | .align 8 136 | 4: 137 | -------------------------------------------------------------------------------- /test/hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | printf("hello, world\n"); 6 | } 7 | -------------------------------------------------------------------------------- /test/hello.s: -------------------------------------------------------------------------------- 1 | .file "hello.c" 2 | .text 3 | .section .rodata 4 | .LC0: 5 | .string "hello, world" 6 | .text 7 | .globl main 8 | .type main, @function 9 | main: 10 | .LFB0: 11 | .cfi_startproc 12 | endbr64 13 | pushq %rbp 14 | .cfi_def_cfa_offset 16 15 | .cfi_offset 6, -16 16 | movq %rsp, %rbp 17 | .cfi_def_cfa_register 6 18 | leaq .LC0(%rip), %rdi 19 | call puts@PLT 20 | movl $0, %eax 21 | popq %rbp 22 | .cfi_def_cfa 7, 8 23 | ret 24 | .cfi_endproc 25 | .LFE0: 26 | .size main, .-main 27 | .ident "GCC: (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0" 28 | .section .note.GNU-stack,"",@progbits 29 | .section .note.gnu.property,"a" 30 | .align 8 31 | .long 1f - 0f 32 | .long 4f - 1f 33 | .long 5 34 | 0: 35 | .string "GNU" 36 | 1: 37 | .align 8 38 | .long 0xc0000002 39 | .long 3f - 2f 40 | 2: 41 | .long 0x3 42 | 3: 43 | .align 8 44 | 4: 45 | -------------------------------------------------------------------------------- /test/hello.vm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccc-c/c6/6183df0fb5c249df78b5adb5d6bb730cac4b896e/test/hello.vm -------------------------------------------------------------------------------- /test/hello.vm.s: -------------------------------------------------------------------------------- 1 | .section .rodata 2 | .stab: .ascii "hello, world\n\0\0\0" 3 | .text 4 | .globl main 5 | .type main, @function 6 | main: 7 | # 1:90812018 ENT 0 8 | pushq %rbp 9 | movq %rsp, %rbp 10 | subq $0, %rsp 11 | # 3:90812028 ADDR 0:907D1010 12 | leaq .stab(%rip), %rax 13 | addq $0,%rax 14 | # 5:90812038 PSH 15 | pushq %rax 16 | # 6:90812040 PRTF 17 | popq %rdi 18 | call printf@PLT 19 | # 7:90812048 ADJ 1 20 | 21 | # 9:90812058 LEV 22 | popq %rbp 23 | ret 24 | -------------------------------------------------------------------------------- /test/hello64.c: -------------------------------------------------------------------------------- 1 | #include 2 | #define int long long 3 | 4 | int main() 5 | { 6 | printf("hello, world\n"); 7 | } 8 | -------------------------------------------------------------------------------- /test/hello64.s: -------------------------------------------------------------------------------- 1 | .file "hello64.c" 2 | # GNU C17 (x86_64-posix-seh-rev0, Built by MinGW-W64 project) version 8.1.0 (x86_64-w64-mingw32) 3 | # compiled by GNU C version 8.1.0, GMP version 6.1.2, MPFR version 4.0.1, MPC version 1.1.0, isl version isl-0.18-GMP 4 | 5 | # GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 6 | # options passed: 7 | # -iprefix C:/Program Files/CodeBlocks/MinGW/bin/../lib/gcc/x86_64-w64-mingw32/8.1.0/ 8 | # -D_REENTRANT hello64.c -mtune=core2 -march=nocona 9 | # -auxbase-strip hello64.s -fverbose-asm 10 | # options enabled: -faggressive-loop-optimizations 11 | # -fasynchronous-unwind-tables -fauto-inc-dec -fchkp-check-incomplete-type 12 | # -fchkp-check-read -fchkp-check-write -fchkp-instrument-calls 13 | # -fchkp-narrow-bounds -fchkp-optimize -fchkp-store-bounds 14 | # -fchkp-use-static-bounds -fchkp-use-static-const-bounds 15 | # -fchkp-use-wrappers -fcommon -fdelete-null-pointer-checks 16 | # -fdwarf2-cfi-asm -fearly-inlining -feliminate-unused-debug-types 17 | # -ffp-int-builtin-inexact -ffunction-cse -fgcse-lm -fgnu-runtime 18 | # -fgnu-unique -fident -finline-atomics -fira-hoist-pressure 19 | # -fira-share-save-slots -fira-share-spill-slots -fivopts 20 | # -fkeep-inline-dllexport -fkeep-static-consts -fleading-underscore 21 | # -flifetime-dse -flto-odr-type-merging -fmath-errno -fmerge-debug-strings 22 | # -fpeephole -fpic -fplt -fprefetch-loop-arrays -freg-struct-return 23 | # -fsched-critical-path-heuristic -fsched-dep-count-heuristic 24 | # -fsched-group-heuristic -fsched-interblock -fsched-last-insn-heuristic 25 | # -fsched-rank-heuristic -fsched-spec -fsched-spec-insn-heuristic 26 | # -fsched-stalled-insns-dep -fschedule-fusion -fsemantic-interposition 27 | # -fset-stack-executable -fshow-column -fshrink-wrap-separate 28 | # -fsigned-zeros -fsplit-ivs-in-unroller -fssa-backprop -fstdarg-opt 29 | # -fstrict-volatile-bitfields -fsync-libcalls -ftrapping-math 30 | # -ftree-cselim -ftree-forwprop -ftree-loop-if-convert -ftree-loop-im 31 | # -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops= 32 | # -ftree-phiprop -ftree-reassoc -ftree-scev-cprop -funit-at-a-time 33 | # -funwind-tables -fverbose-asm -fzero-initialized-in-bss 34 | # -m128bit-long-double -m64 -m80387 -maccumulate-outgoing-args 35 | # -malign-double -malign-stringops -mcx16 -mfancy-math-387 -mfentry 36 | # -mfp-ret-in-387 -mfxsr -mieee-fp -mlong-double-80 -mmmx -mms-bitfields 37 | # -mno-sse4 -mpush-args -mred-zone -msse -msse2 -msse3 -mstack-arg-probe 38 | # -mstackrealign -mvzeroupper 39 | 40 | .text 41 | .def __main; .scl 2; .type 32; .endef 42 | .section .rdata,"dr" 43 | .LC0: 44 | .ascii "hello, world\0" 45 | .text 46 | .globl main 47 | .def main; .scl 2; .type 32; .endef 48 | .seh_proc main 49 | main: 50 | pushq %rbp # 51 | .seh_pushreg %rbp 52 | movq %rsp, %rbp #, 53 | .seh_setframe %rbp, 0 54 | subq $32, %rsp #, 55 | .seh_stackalloc 32 56 | .seh_endprologue 57 | # hello64.c:5: { 58 | call __main # 59 | # hello64.c:6: printf("hello, world\n"); 60 | leaq .LC0(%rip), %rcx #, 61 | call puts # 62 | # hello64.c:7: } 63 | nop 64 | addq $32, %rsp #, 65 | popq %rbp # 66 | ret 67 | .seh_endproc 68 | .ident "GCC: (x86_64-posix-seh-rev0, Built by MinGW-W64 project) 8.1.0" 69 | .def puts; .scl 2; .type 32; .endef 70 | -------------------------------------------------------------------------------- /test/helloccc.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | char *name; 6 | name = "ccc"; 7 | printf("hello %s\n", name); 8 | } 9 | -------------------------------------------------------------------------------- /test/helloccc.s: -------------------------------------------------------------------------------- 1 | .file "helloccc.c" 2 | .text 3 | .def __main; .scl 2; .type 32; .endef 4 | .section .rdata,"dr" 5 | .LC0: 6 | .ascii "ccc\0" 7 | .LC1: 8 | .ascii "hello %s\12\0" 9 | .text 10 | .globl main 11 | .def main; .scl 2; .type 32; .endef 12 | .seh_proc main 13 | main: 14 | pushq %rbp 15 | .seh_pushreg %rbp 16 | movq %rsp, %rbp 17 | .seh_setframe %rbp, 0 18 | subq $48, %rsp 19 | .seh_stackalloc 48 20 | .seh_endprologue 21 | call __main 22 | leaq .LC0(%rip), %rax 23 | movq %rax, -8(%rbp) 24 | movq -8(%rbp), %rax 25 | movq %rax, %rdx 26 | leaq .LC1(%rip), %rcx 27 | call printf 28 | movl $0, %eax 29 | addq $48, %rsp 30 | popq %rbp 31 | ret 32 | .seh_endproc 33 | .ident "GCC: (x86_64-posix-seh-rev0, Built by MinGW-W64 project) 8.1.0" 34 | .def printf; .scl 2; .type 32; .endef 35 | -------------------------------------------------------------------------------- /test/helloccc.vm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccc-c/c6/6183df0fb5c249df78b5adb5d6bb730cac4b896e/test/helloccc.vm -------------------------------------------------------------------------------- /test/helloccc.vm.s: -------------------------------------------------------------------------------- 1 | .section .rodata 2 | .stab: .ascii "ccc\0\0\0\0\0hello %s\n\0\0\0\0\0\0\0" 3 | .text 4 | .globl main 5 | .type main, @function 6 | main: 7 | # 1:64EBE018 ENT 1 8 | .L1: 9 | pushq %rbp 10 | movq %rsp, %rbp 11 | subq $8, %rsp 12 | # 3:64EBE028 LEA -1 13 | .L3: 14 | movq $-1, %rax 15 | leaq (%rbp,%rax,8), %rax 16 | # 5:64EBE038 PSH 17 | .L5: 18 | pushq %rax 19 | # 6:64EBE040 ADDR 0:64E7D010 20 | .L6: 21 | leaq .stab(%rip), %rax 22 | addq $0,%rax 23 | # 8:64EBE050 SI 24 | .L8: 25 | pushq %rax 26 | # 9:64EBE058 ADDR 8:64E7D018 27 | .L9: 28 | leaq .stab(%rip), %rax 29 | addq $8,%rax 30 | # 11:64EBE068 PSH 31 | .L11: 32 | pushq %rax 33 | # 12:64EBE070 LEA -1 34 | .L12: 35 | movq $-1, %rax 36 | leaq (%rbp,%rax,8), %rax 37 | # 14:64EBE080 LI 38 | .L14: 39 | movq 0(%rax), %rax 40 | # 15:64EBE088 PSH 41 | .L15: 42 | pushq %rax 43 | # 16:64EBE090 PRTF 44 | .L16: 45 | popq %rsi 46 | popq %rdi 47 | call printf@PLT 48 | # 17:64EBE098 ADJ 2 49 | .L17: 50 | 51 | # 19:64EBE0A8 LEV 52 | .L19: 53 | popq %rbp 54 | ret 55 | -------------------------------------------------------------------------------- /test/sum.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // sum(n) = 1+2+...+n 4 | int sum(int n) { 5 | int s; 6 | int i; 7 | s=0; 8 | i=1; 9 | while (i <= n) { 10 | s = s + i; 11 | i ++; 12 | } 13 | return s; 14 | } 15 | 16 | int main() { 17 | printf("sum(10)=%d\n", sum(10)); 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /test/sum.s: -------------------------------------------------------------------------------- 1 | .file "sum.c" 2 | # GNU C17 (x86_64-posix-seh-rev0, Built by MinGW-W64 project) version 8.1.0 (x86_64-w64-mingw32) 3 | # compiled by GNU C version 8.1.0, GMP version 6.1.2, MPFR version 4.0.1, MPC version 1.1.0, isl version isl-0.18-GMP 4 | 5 | # GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 6 | # options passed: 7 | # -iprefix C:/Program Files/CodeBlocks/MinGW/bin/../lib/gcc/x86_64-w64-mingw32/8.1.0/ 8 | # -D_REENTRANT sum.c -mtune=core2 -march=nocona -auxbase-strip sum.s 9 | # -fverbose-asm 10 | # options enabled: -faggressive-loop-optimizations 11 | # -fasynchronous-unwind-tables -fauto-inc-dec -fchkp-check-incomplete-type 12 | # -fchkp-check-read -fchkp-check-write -fchkp-instrument-calls 13 | # -fchkp-narrow-bounds -fchkp-optimize -fchkp-store-bounds 14 | # -fchkp-use-static-bounds -fchkp-use-static-const-bounds 15 | # -fchkp-use-wrappers -fcommon -fdelete-null-pointer-checks 16 | # -fdwarf2-cfi-asm -fearly-inlining -feliminate-unused-debug-types 17 | # -ffp-int-builtin-inexact -ffunction-cse -fgcse-lm -fgnu-runtime 18 | # -fgnu-unique -fident -finline-atomics -fira-hoist-pressure 19 | # -fira-share-save-slots -fira-share-spill-slots -fivopts 20 | # -fkeep-inline-dllexport -fkeep-static-consts -fleading-underscore 21 | # -flifetime-dse -flto-odr-type-merging -fmath-errno -fmerge-debug-strings 22 | # -fpeephole -fpic -fplt -fprefetch-loop-arrays -freg-struct-return 23 | # -fsched-critical-path-heuristic -fsched-dep-count-heuristic 24 | # -fsched-group-heuristic -fsched-interblock -fsched-last-insn-heuristic 25 | # -fsched-rank-heuristic -fsched-spec -fsched-spec-insn-heuristic 26 | # -fsched-stalled-insns-dep -fschedule-fusion -fsemantic-interposition 27 | # -fset-stack-executable -fshow-column -fshrink-wrap-separate 28 | # -fsigned-zeros -fsplit-ivs-in-unroller -fssa-backprop -fstdarg-opt 29 | # -fstrict-volatile-bitfields -fsync-libcalls -ftrapping-math 30 | # -ftree-cselim -ftree-forwprop -ftree-loop-if-convert -ftree-loop-im 31 | # -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops= 32 | # -ftree-phiprop -ftree-reassoc -ftree-scev-cprop -funit-at-a-time 33 | # -funwind-tables -fverbose-asm -fzero-initialized-in-bss 34 | # -m128bit-long-double -m64 -m80387 -maccumulate-outgoing-args 35 | # -malign-double -malign-stringops -mcx16 -mfancy-math-387 -mfentry 36 | # -mfp-ret-in-387 -mfxsr -mieee-fp -mlong-double-80 -mmmx -mms-bitfields 37 | # -mno-sse4 -mpush-args -mred-zone -msse -msse2 -msse3 -mstack-arg-probe 38 | # -mstackrealign -mvzeroupper 39 | 40 | .text 41 | .globl sum 42 | .def sum; .scl 2; .type 32; .endef 43 | .seh_proc sum 44 | sum: 45 | pushq %rbp # 46 | .seh_pushreg %rbp 47 | movq %rsp, %rbp #, 48 | .seh_setframe %rbp, 0 49 | subq $16, %rsp #, 50 | .seh_stackalloc 16 51 | .seh_endprologue 52 | movl %ecx, 16(%rbp) # n, n 53 | # sum.c:7: s=0; 54 | movl $0, -4(%rbp) #, s 55 | # sum.c:8: i=1; 56 | movl $1, -8(%rbp) #, i 57 | # sum.c:9: while (i <= n) { 58 | jmp .L2 # 59 | .L3: 60 | # sum.c:10: s = s + i; 61 | movl -8(%rbp), %eax # i, tmp89 62 | addl %eax, -4(%rbp) # tmp89, s 63 | # sum.c:11: i ++; 64 | addl $1, -8(%rbp) #, i 65 | .L2: 66 | # sum.c:9: while (i <= n) { 67 | movl -8(%rbp), %eax # i, tmp90 68 | cmpl 16(%rbp), %eax # n, tmp90 69 | jle .L3 #, 70 | # sum.c:13: return s; 71 | movl -4(%rbp), %eax # s, _8 72 | # sum.c:14: } 73 | addq $16, %rsp #, 74 | popq %rbp # 75 | ret 76 | .seh_endproc 77 | .def __main; .scl 2; .type 32; .endef 78 | .section .rdata,"dr" 79 | .LC0: 80 | .ascii "sum(10)=%d\12\0" 81 | .text 82 | .globl main 83 | .def main; .scl 2; .type 32; .endef 84 | .seh_proc main 85 | main: 86 | pushq %rbp # 87 | .seh_pushreg %rbp 88 | movq %rsp, %rbp #, 89 | .seh_setframe %rbp, 0 90 | subq $32, %rsp #, 91 | .seh_stackalloc 32 92 | .seh_endprologue 93 | # sum.c:16: int main() { 94 | call __main # 95 | # sum.c:17: printf("sum(10)=%d\n", sum(10)); 96 | movl $10, %ecx #, 97 | call sum # 98 | movl %eax, %edx # _1, 99 | leaq .LC0(%rip), %rcx #, 100 | call printf # 101 | # sum.c:18: return 0; 102 | movl $0, %eax #, _5 103 | # sum.c:19: } 104 | addq $32, %rsp #, 105 | popq %rbp # 106 | ret 107 | .seh_endproc 108 | .ident "GCC: (x86_64-posix-seh-rev0, Built by MinGW-W64 project) 8.1.0" 109 | .def printf; .scl 2; .type 32; .endef 110 | -------------------------------------------------------------------------------- /test/sum.vm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccc-c/c6/6183df0fb5c249df78b5adb5d6bb730cac4b896e/test/sum.vm -------------------------------------------------------------------------------- /test/sum.vm.s: -------------------------------------------------------------------------------- 1 | .section .rodata 2 | .stab: .ascii "sum(10)=%d\n\0\0\0\0\0" 3 | .text 4 | .globl main 5 | .type main, @function 6 | main: 7 | .L1: # 1:93009018 ENT 2 8 | pushq %rbp 9 | movq %rsp, %rbp 10 | subq $16, %rsp 11 | .L3: # 3:93009028 LEA -1 12 | movq $-1, %rax 13 | leaq (%rbp,%rax,8), %rax 14 | .L5: # 5:93009038 PSH 15 | pushq %rax 16 | .L6: # 6:93009040 IMM 0 17 | movq $0, %rax 18 | .L8: # 8:93009050 SI 19 | pushq %rax 20 | .L9: # 9:93009058 LEA -2 21 | movq $-2, %rax 22 | leaq (%rbp,%rax,8), %rax 23 | .L11: # 11:93009068 PSH 24 | pushq %rax 25 | .L12: # 12:93009070 IMM 1 26 | movq $1, %rax 27 | .L14: # 14:93009080 SI 28 | pushq %rax 29 | .L15: # 15:93009088 LEA -2 30 | movq $-2, %rax 31 | leaq (%rbp,%rax,8), %rax 32 | .L17: # 17:93009098 LI 33 | movq 0(%rax), %rax 34 | .L18: # 18:930090A0 PSH 35 | pushq %rax 36 | .L19: # 19:930090A8 LEA 2 37 | movq $2, %rax 38 | leaq (%rbp,%rax,8), %rax 39 | .L21: # 21:930090B8 LI 40 | movq 0(%rax), %rax 41 | .L22: # 22:930090C0 LE 42 | popl %rbx 43 | le %rbx, %rax 44 | 45 | .L23: # 23:930090C8 BZ 52:930091B0 46 | cmp $0, %rax 47 | jeq .L52 48 | .L25: # 25:930090D8 LEA -1 49 | movq $-1, %rax 50 | leaq (%rbp,%rax,8), %rax 51 | .L27: # 27:930090E8 PSH 52 | pushq %rax 53 | .L28: # 28:930090F0 LEA -1 54 | movq $-1, %rax 55 | leaq (%rbp,%rax,8), %rax 56 | .L30: # 30:93009100 LI 57 | movq 0(%rax), %rax 58 | .L31: # 31:93009108 PSH 59 | pushq %rax 60 | .L32: # 32:93009110 LEA -2 61 | movq $-2, %rax 62 | leaq (%rbp,%rax,8), %rax 63 | .L34: # 34:93009120 LI 64 | movq 0(%rax), %rax 65 | .L35: # 35:93009128 ADD 66 | popl %rbx 67 | add %rbx, %rax 68 | 69 | .L36: # 36:93009130 SI 70 | pushq %rax 71 | .L37: # 37:93009138 LEA -2 72 | movq $-2, %rax 73 | leaq (%rbp,%rax,8), %rax 74 | .L39: # 39:93009148 PSH 75 | pushq %rax 76 | .L40: # 40:93009150 LI 77 | movq 0(%rax), %rax 78 | .L41: # 41:93009158 PSH 79 | pushq %rax 80 | .L42: # 42:93009160 IMM 1 81 | movq $1, %rax 82 | .L44: # 44:93009170 ADD 83 | popl %rbx 84 | add %rbx, %rax 85 | 86 | .L45: # 45:93009178 SI 87 | pushq %rax 88 | .L46: # 46:93009180 PSH 89 | pushq %rax 90 | .L47: # 47:93009188 IMM 1 91 | movq $1, %rax 92 | .L49: # 49:93009198 SUB 93 | popl %rbx 94 | sub %rbx, %rax 95 | 96 | .L50: # 50:930091A0 JMP 15:93009088 97 | jmp .L15 98 | .L52: # 52:930091B0 LEA -1 99 | movq $-1, %rax 100 | leaq (%rbp,%rax,8), %rax 101 | .L54: # 54:930091C0 LI 102 | movq 0(%rax), %rax 103 | .L55: # 55:930091C8 LEV 104 | popq %rbp 105 | ret 106 | .L56: # 56:930091D0 LEV 107 | popq %rbp 108 | ret 109 | .L57: # 57:930091D8 ENT 0 110 | pushq %rbp 111 | movq %rsp, %rbp 112 | subq $0, %rsp 113 | .L59: # 59:930091E8 ADDR 0:92FC8010 114 | leaq .stab(%rip), %rax 115 | addq $0,%rax 116 | .L61: # 61:930091F8 PSH 117 | pushq %rax 118 | .L62: # 62:93009200 IMM 10 119 | movq $10, %rax 120 | .L64: # 64:93009210 PSH 121 | pushq %rax 122 | .L65: # 65:93009218 JSR 1:93009018 123 | call .L1 124 | .L67: # 67:93009228 ADJ 1 125 | 126 | .L69: # 69:93009238 PSH 127 | pushq %rax 128 | .L70: # 70:93009240 PRTF 129 | popq %rsi 130 | popq %rdi 131 | call printf@PLT 132 | .L71: # 71:93009248 ADJ 2 133 | 134 | .L73: # 73:93009258 IMM 0 135 | movq $0, %rax 136 | .L75: # 75:93009268 LEV 137 | popq %rbp 138 | ret 139 | .L76: # 76:93009270 LEV 140 | popq %rbp 141 | ret 142 | -------------------------------------------------------------------------------- /test/sum64.c: -------------------------------------------------------------------------------- 1 | #include 2 | #define int long long 3 | // sum(n) = 1+2+...+n 4 | int sum(int n) { 5 | int s; 6 | int i; 7 | s=0; 8 | i=1; 9 | while (i <= n) { 10 | s = s + i; 11 | i ++; 12 | } 13 | return s; 14 | } 15 | 16 | int main() { 17 | printf("sum(10)=%d\n", sum(10)); 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /test/sum64.s: -------------------------------------------------------------------------------- 1 | .file "sum64.c" 2 | # GNU C17 (x86_64-posix-seh-rev0, Built by MinGW-W64 project) version 8.1.0 (x86_64-w64-mingw32) 3 | # compiled by GNU C version 8.1.0, GMP version 6.1.2, MPFR version 4.0.1, MPC version 1.1.0, isl version isl-0.18-GMP 4 | 5 | # GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 6 | # options passed: 7 | # -iprefix C:/Program Files/CodeBlocks/MinGW/bin/../lib/gcc/x86_64-w64-mingw32/8.1.0/ 8 | # -D_REENTRANT sum64.c -mtune=core2 -march=nocona -auxbase-strip sum64.s 9 | # -fverbose-asm 10 | # options enabled: -faggressive-loop-optimizations 11 | # -fasynchronous-unwind-tables -fauto-inc-dec -fchkp-check-incomplete-type 12 | # -fchkp-check-read -fchkp-check-write -fchkp-instrument-calls 13 | # -fchkp-narrow-bounds -fchkp-optimize -fchkp-store-bounds 14 | # -fchkp-use-static-bounds -fchkp-use-static-const-bounds 15 | # -fchkp-use-wrappers -fcommon -fdelete-null-pointer-checks 16 | # -fdwarf2-cfi-asm -fearly-inlining -feliminate-unused-debug-types 17 | # -ffp-int-builtin-inexact -ffunction-cse -fgcse-lm -fgnu-runtime 18 | # -fgnu-unique -fident -finline-atomics -fira-hoist-pressure 19 | # -fira-share-save-slots -fira-share-spill-slots -fivopts 20 | # -fkeep-inline-dllexport -fkeep-static-consts -fleading-underscore 21 | # -flifetime-dse -flto-odr-type-merging -fmath-errno -fmerge-debug-strings 22 | # -fpeephole -fpic -fplt -fprefetch-loop-arrays -freg-struct-return 23 | # -fsched-critical-path-heuristic -fsched-dep-count-heuristic 24 | # -fsched-group-heuristic -fsched-interblock -fsched-last-insn-heuristic 25 | # -fsched-rank-heuristic -fsched-spec -fsched-spec-insn-heuristic 26 | # -fsched-stalled-insns-dep -fschedule-fusion -fsemantic-interposition 27 | # -fset-stack-executable -fshow-column -fshrink-wrap-separate 28 | # -fsigned-zeros -fsplit-ivs-in-unroller -fssa-backprop -fstdarg-opt 29 | # -fstrict-volatile-bitfields -fsync-libcalls -ftrapping-math 30 | # -ftree-cselim -ftree-forwprop -ftree-loop-if-convert -ftree-loop-im 31 | # -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops= 32 | # -ftree-phiprop -ftree-reassoc -ftree-scev-cprop -funit-at-a-time 33 | # -funwind-tables -fverbose-asm -fzero-initialized-in-bss 34 | # -m128bit-long-double -m64 -m80387 -maccumulate-outgoing-args 35 | # -malign-double -malign-stringops -mcx16 -mfancy-math-387 -mfentry 36 | # -mfp-ret-in-387 -mfxsr -mieee-fp -mlong-double-80 -mmmx -mms-bitfields 37 | # -mno-sse4 -mpush-args -mred-zone -msse -msse2 -msse3 -mstack-arg-probe 38 | # -mstackrealign -mvzeroupper 39 | 40 | .text 41 | .globl sum 42 | .def sum; .scl 2; .type 32; .endef 43 | .seh_proc sum 44 | sum: 45 | pushq %rbp # 46 | .seh_pushreg %rbp 47 | movq %rsp, %rbp #, 48 | .seh_setframe %rbp, 0 49 | subq $16, %rsp #, 50 | .seh_stackalloc 16 51 | .seh_endprologue 52 | movq %rcx, 16(%rbp) # n, n 53 | # sum64.c:7: s=0; 54 | movq $0, -8(%rbp) #, s 55 | # sum64.c:8: i=1; 56 | movq $1, -16(%rbp) #, i 57 | # sum64.c:9: while (i <= n) { 58 | jmp .L2 # 59 | .L3: 60 | # sum64.c:10: s = s + i; 61 | movq -16(%rbp), %rax # i, tmp89 62 | addq %rax, -8(%rbp) # tmp89, s 63 | # sum64.c:11: i ++; 64 | addq $1, -16(%rbp) #, i 65 | .L2: 66 | # sum64.c:9: while (i <= n) { 67 | movq -16(%rbp), %rax # i, tmp90 68 | cmpq 16(%rbp), %rax # n, tmp90 69 | jle .L3 #, 70 | # sum64.c:13: return s; 71 | movq -8(%rbp), %rax # s, _8 72 | # sum64.c:14: } 73 | addq $16, %rsp #, 74 | popq %rbp # 75 | ret 76 | .seh_endproc 77 | .def __main; .scl 2; .type 32; .endef 78 | .section .rdata,"dr" 79 | .LC0: 80 | .ascii "sum(10)=%d\12\0" 81 | .text 82 | .globl main 83 | .def main; .scl 2; .type 32; .endef 84 | .seh_proc main 85 | main: 86 | pushq %rbp # 87 | .seh_pushreg %rbp 88 | movq %rsp, %rbp #, 89 | .seh_setframe %rbp, 0 90 | subq $32, %rsp #, 91 | .seh_stackalloc 32 92 | .seh_endprologue 93 | # sum64.c:16: int main() { 94 | call __main # 95 | # sum64.c:17: printf("sum(10)=%d\n", sum(10)); 96 | movl $10, %ecx #, 97 | call sum # 98 | movq %rax, %rdx # _1, 99 | leaq .LC0(%rip), %rcx #, 100 | call printf # 101 | # sum64.c:18: return 0; 102 | movl $0, %eax #, _5 103 | # sum64.c:19: } 104 | addq $32, %rsp #, 105 | popq %rbp # 106 | ret 107 | .seh_endproc 108 | .ident "GCC: (x86_64-posix-seh-rev0, Built by MinGW-W64 project) 8.1.0" 109 | .def printf; .scl 2; .type 32; .endef 110 | -------------------------------------------------------------------------------- /test/var.c: -------------------------------------------------------------------------------- 1 | int a,b,c; 2 | int *p; 3 | 4 | int main() { 5 | a = 3; c = 5; 6 | p = &a; 7 | printf("a=%d b=%d c=%d\n", a, b, c); 8 | printf("p=%p *p=%d\n", p, *p); 9 | } 10 | --------------------------------------------------------------------------------