├── .ci ├── cross-check.sh └── cross-tool.sh ├── .github └── workflows │ └── main.yml ├── .gitignore ├── AUTHORS ├── LICENSE ├── Makefile ├── README.md ├── amacc.c ├── docs └── IR.md ├── mk ├── arm.mk ├── common.mk └── python.mk ├── scripts ├── disasm └── runtest.py └── tests ├── .clang-format ├── arginc.c ├── arginc.list ├── assign.c ├── char.c ├── comments.c ├── cond.c ├── duff.c ├── enum.c ├── eq.c ├── fib.c ├── for.c ├── func_call.c ├── func_param.c ├── goto.c ├── hello.c ├── inc.c ├── jit.c ├── literal.c ├── local.c ├── maze.c ├── printf.c ├── ptr.c ├── read.c ├── shift.c ├── struct.c ├── switch.c ├── union.c └── while.c /.ci/cross-check.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | MACHINE_TYPE=`uname -m` 4 | if [ ${MACHINE_TYPE} != 'x86_64' ]; then 5 | exit 6 | fi 7 | 8 | OS_TYPE=`uname -s` 9 | if [ ${OS_TYPE} != 'Linux' ]; then 10 | exit 11 | fi 12 | 13 | # Clang/LLVM is natively a cross-compiler. 14 | # TODO: Do cross-compilation using Clang 15 | # https://clang.llvm.org/docs/CrossCompilation.html 16 | if [ $(printenv CXX | grep clang) ]; then 17 | exit 18 | fi 19 | 20 | GCC_REL=11.2-2022.02 21 | 22 | set -x 23 | 24 | export PATH=gcc-arm-${GCC_REL}-x86_64-arm-none-linux-gnueabihf/bin:$PATH 25 | make CROSS_COMPILE=arm-none-linux-gnueabihf- check || exit 1 26 | -------------------------------------------------------------------------------- /.ci/cross-tool.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ARM_MIRROR=https://github.com/DLTcollab/toolchain-arm/raw/main 4 | GCC_REL=11.2-2022.02 5 | 6 | MACHINE_TYPE=`uname -m` 7 | if [ ${MACHINE_TYPE} != 'x86_64' ]; then 8 | exit 9 | fi 10 | 11 | OS_TYPE=`uname -s` 12 | if [ ${OS_TYPE} != 'Linux' ]; then 13 | exit 14 | fi 15 | 16 | set -x 17 | 18 | sudo apt-get update -q -y 19 | sudo apt-get install -q -y qemu-user 20 | 21 | sudo apt-get install -y curl xz-utils 22 | 23 | curl -L \ 24 | ${ARM_MIRROR}/gcc-arm-${GCC_REL}-x86_64-arm-none-linux-gnueabihf.tar.xz \ 25 | | tar -Jx || exit 1 26 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Github Actions 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | host_x86: 7 | runs-on: ubuntu-20.04 8 | strategy: 9 | matrix: 10 | compiler: [gcc-10] 11 | steps: 12 | - name: checkout code 13 | uses: actions/checkout@v3 14 | - name: build artifact 15 | env: 16 | CC: ${{ matrix.compiler }} 17 | run: | 18 | sh .ci/cross-tool.sh 19 | sh .ci/cross-check.sh 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | amacc 2 | amacc-native 3 | elf/ 4 | out-gcc/ 5 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | AMaCC is written by: 2 | Jim Huang 3 | Ying-Ruei Liang (KK) 4 | lecopzer 5 | yodalee 6 | Logan Chien 7 | splasky 8 | HPCguy 9 | 10 | Based on the original work from Robert Swierczek. 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | AMaCC is freely redistributable under the GNU GPL: 2 | 3 | Copyright (C) 2016-2023 National Cheng Kung University, Taiwan. 4 | Copyright (C) 2014-2015 Robert Swierczek. 5 | 6 | GNU GENERAL PUBLIC LICENSE 7 | Version 2, June 1991 8 | 9 | Copyright (C) 1989, 1991 Free Software Foundation, Inc. 10 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 11 | Everyone is permitted to copy and distribute verbatim copies 12 | of this license document, but changing it is not allowed. 13 | 14 | Preamble 15 | 16 | The licenses for most software are designed to take away your 17 | freedom to share and change it. By contrast, the GNU General Public 18 | License is intended to guarantee your freedom to share and change free 19 | software--to make sure the software is free for all its users. This 20 | General Public License applies to most of the Free Software 21 | Foundation's software and to any other program whose authors commit to 22 | using it. (Some other Free Software Foundation software is covered by 23 | the GNU Library General Public License instead.) You can apply it to 24 | your programs, too. 25 | 26 | When we speak of free software, we are referring to freedom, not 27 | price. Our General Public Licenses are designed to make sure that you 28 | have the freedom to distribute copies of free software (and charge for 29 | this service if you wish), that you receive source code or can get it 30 | if you want it, that you can change the software or use pieces of it 31 | in new free programs; and that you know you can do these things. 32 | 33 | To protect your rights, we need to make restrictions that forbid 34 | anyone to deny you these rights or to ask you to surrender the rights. 35 | These restrictions translate to certain responsibilities for you if you 36 | distribute copies of the software, or if you modify it. 37 | 38 | For example, if you distribute copies of such a program, whether 39 | gratis or for a fee, you must give the recipients all the rights that 40 | you have. You must make sure that they, too, receive or can get the 41 | source code. And you must show them these terms so they know their 42 | rights. 43 | 44 | We protect your rights with two steps: (1) copyright the software, and 45 | (2) offer you this license which gives you legal permission to copy, 46 | distribute and/or modify the software. 47 | 48 | Also, for each author's protection and ours, we want to make certain 49 | that everyone understands that there is no warranty for this free 50 | software. If the software is modified by someone else and passed on, we 51 | want its recipients to know that what they have is not the original, so 52 | that any problems introduced by others will not reflect on the original 53 | authors' reputations. 54 | 55 | Finally, any free program is threatened constantly by software 56 | patents. We wish to avoid the danger that redistributors of a free 57 | program will individually obtain patent licenses, in effect making the 58 | program proprietary. To prevent this, we have made it clear that any 59 | patent must be licensed for everyone's free use or not licensed at all. 60 | 61 | The precise terms and conditions for copying, distribution and 62 | modification follow. 63 | 64 | GNU GENERAL PUBLIC LICENSE 65 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 66 | 67 | 0. This License applies to any program or other work which contains 68 | a notice placed by the copyright holder saying it may be distributed 69 | under the terms of this General Public License. The "Program", below, 70 | refers to any such program or work, and a "work based on the Program" 71 | means either the Program or any derivative work under copyright law: 72 | that is to say, a work containing the Program or a portion of it, 73 | either verbatim or with modifications and/or translated into another 74 | language. (Hereinafter, translation is included without limitation in 75 | the term "modification".) Each licensee is addressed as "you". 76 | 77 | Activities other than copying, distribution and modification are not 78 | covered by this License; they are outside its scope. The act of 79 | running the Program is not restricted, and the output from the Program 80 | is covered only if its contents constitute a work based on the 81 | Program (independent of having been made by running the Program). 82 | Whether that is true depends on what the Program does. 83 | 84 | 1. You may copy and distribute verbatim copies of the Program's 85 | source code as you receive it, in any medium, provided that you 86 | conspicuously and appropriately publish on each copy an appropriate 87 | copyright notice and disclaimer of warranty; keep intact all the 88 | notices that refer to this License and to the absence of any warranty; 89 | and give any other recipients of the Program a copy of this License 90 | along with the Program. 91 | 92 | You may charge a fee for the physical act of transferring a copy, and 93 | you may at your option offer warranty protection in exchange for a fee. 94 | 95 | 2. You may modify your copy or copies of the Program or any portion 96 | of it, thus forming a work based on the Program, and copy and 97 | distribute such modifications or work under the terms of Section 1 98 | above, provided that you also meet all of these conditions: 99 | 100 | a) You must cause the modified files to carry prominent notices 101 | stating that you changed the files and the date of any change. 102 | 103 | b) You must cause any work that you distribute or publish, that in 104 | whole or in part contains or is derived from the Program or any 105 | part thereof, to be licensed as a whole at no charge to all third 106 | parties under the terms of this License. 107 | 108 | c) If the modified program normally reads commands interactively 109 | when run, you must cause it, when started running for such 110 | interactive use in the most ordinary way, to print or display an 111 | announcement including an appropriate copyright notice and a 112 | notice that there is no warranty (or else, saying that you provide 113 | a warranty) and that users may redistribute the program under 114 | these conditions, and telling the user how to view a copy of this 115 | License. (Exception: if the Program itself is interactive but 116 | does not normally print such an announcement, your work based on 117 | the Program is not required to print an announcement.) 118 | 119 | These requirements apply to the modified work as a whole. If 120 | identifiable sections of that work are not derived from the Program, 121 | and can be reasonably considered independent and separate works in 122 | themselves, then this License, and its terms, do not apply to those 123 | sections when you distribute them as separate works. But when you 124 | distribute the same sections as part of a whole which is a work based 125 | on the Program, the distribution of the whole must be on the terms of 126 | this License, whose permissions for other licensees extend to the 127 | entire whole, and thus to each and every part regardless of who wrote it. 128 | 129 | Thus, it is not the intent of this section to claim rights or contest 130 | your rights to work written entirely by you; rather, the intent is to 131 | exercise the right to control the distribution of derivative or 132 | collective works based on the Program. 133 | 134 | In addition, mere aggregation of another work not based on the Program 135 | with the Program (or with a work based on the Program) on a volume of 136 | a storage or distribution medium does not bring the other work under 137 | the scope of this License. 138 | 139 | 3. You may copy and distribute the Program (or a work based on it, 140 | under Section 2) in object code or executable form under the terms of 141 | Sections 1 and 2 above provided that you also do one of the following: 142 | 143 | a) Accompany it with the complete corresponding machine-readable 144 | source code, which must be distributed under the terms of Sections 145 | 1 and 2 above on a medium customarily used for software interchange; or, 146 | 147 | b) Accompany it with a written offer, valid for at least three 148 | years, to give any third party, for a charge no more than your 149 | cost of physically performing source distribution, a complete 150 | machine-readable copy of the corresponding source code, to be 151 | distributed under the terms of Sections 1 and 2 above on a medium 152 | customarily used for software interchange; or, 153 | 154 | c) Accompany it with the information you received as to the offer 155 | to distribute corresponding source code. (This alternative is 156 | allowed only for noncommercial distribution and only if you 157 | received the program in object code or executable form with such 158 | an offer, in accord with Subsection b above.) 159 | 160 | The source code for a work means the preferred form of the work for 161 | making modifications to it. For an executable work, complete source 162 | code means all the source code for all modules it contains, plus any 163 | associated interface definition files, plus the scripts used to 164 | control compilation and installation of the executable. However, as a 165 | special exception, the source code distributed need not include 166 | anything that is normally distributed (in either source or binary 167 | form) with the major components (compiler, kernel, and so on) of the 168 | operating system on which the executable runs, unless that component 169 | itself accompanies the executable. 170 | 171 | If distribution of executable or object code is made by offering 172 | access to copy from a designated place, then offering equivalent 173 | access to copy the source code from the same place counts as 174 | distribution of the source code, even though third parties are not 175 | compelled to copy the source along with the object code. 176 | 177 | 4. You may not copy, modify, sublicense, or distribute the Program 178 | except as expressly provided under this License. Any attempt 179 | otherwise to copy, modify, sublicense or distribute the Program is 180 | void, and will automatically terminate your rights under this License. 181 | However, parties who have received copies, or rights, from you under 182 | this License will not have their licenses terminated so long as such 183 | parties remain in full compliance. 184 | 185 | 5. You are not required to accept this License, since you have not 186 | signed it. However, nothing else grants you permission to modify or 187 | distribute the Program or its derivative works. These actions are 188 | prohibited by law if you do not accept this License. Therefore, by 189 | modifying or distributing the Program (or any work based on the 190 | Program), you indicate your acceptance of this License to do so, and 191 | all its terms and conditions for copying, distributing or modifying 192 | the Program or works based on it. 193 | 194 | 6. Each time you redistribute the Program (or any work based on the 195 | Program), the recipient automatically receives a license from the 196 | original licensor to copy, distribute or modify the Program subject to 197 | these terms and conditions. You may not impose any further 198 | restrictions on the recipients' exercise of the rights granted herein. 199 | You are not responsible for enforcing compliance by third parties to 200 | this License. 201 | 202 | 7. If, as a consequence of a court judgment or allegation of patent 203 | infringement or for any other reason (not limited to patent issues), 204 | conditions are imposed on you (whether by court order, agreement or 205 | otherwise) that contradict the conditions of this License, they do not 206 | excuse you from the conditions of this License. If you cannot 207 | distribute so as to satisfy simultaneously your obligations under this 208 | License and any other pertinent obligations, then as a consequence you 209 | may not distribute the Program at all. For example, if a patent 210 | license would not permit royalty-free redistribution of the Program by 211 | all those who receive copies directly or indirectly through you, then 212 | the only way you could satisfy both it and this License would be to 213 | refrain entirely from distribution of the Program. 214 | 215 | If any portion of this section is held invalid or unenforceable under 216 | any particular circumstance, the balance of the section is intended to 217 | apply and the section as a whole is intended to apply in other 218 | circumstances. 219 | 220 | It is not the purpose of this section to induce you to infringe any 221 | patents or other property right claims or to contest validity of any 222 | such claims; this section has the sole purpose of protecting the 223 | integrity of the free software distribution system, which is 224 | implemented by public license practices. Many people have made 225 | generous contributions to the wide range of software distributed 226 | through that system in reliance on consistent application of that 227 | system; it is up to the author/donor to decide if he or she is willing 228 | to distribute software through any other system and a licensee cannot 229 | impose that choice. 230 | 231 | This section is intended to make thoroughly clear what is believed to 232 | be a consequence of the rest of this License. 233 | 234 | 8. If the distribution and/or use of the Program is restricted in 235 | certain countries either by patents or by copyrighted interfaces, the 236 | original copyright holder who places the Program under this License 237 | may add an explicit geographical distribution limitation excluding 238 | those countries, so that distribution is permitted only in or among 239 | countries not thus excluded. In such case, this License incorporates 240 | the limitation as if written in the body of this License. 241 | 242 | 9. The Free Software Foundation may publish revised and/or new versions 243 | of the General Public License from time to time. Such new versions will 244 | be similar in spirit to the present version, but may differ in detail to 245 | address new problems or concerns. 246 | 247 | Each version is given a distinguishing version number. If the Program 248 | specifies a version number of this License which applies to it and "any 249 | later version", you have the option of following the terms and conditions 250 | either of that version or of any later version published by the Free 251 | Software Foundation. If the Program does not specify a version number of 252 | this License, you may choose any version ever published by the Free Software 253 | Foundation. 254 | 255 | 10. If you wish to incorporate parts of the Program into other free 256 | programs whose distribution conditions are different, write to the author 257 | to ask for permission. For software which is copyrighted by the Free 258 | Software Foundation, write to the Free Software Foundation; we sometimes 259 | make exceptions for this. Our decision will be guided by the two goals 260 | of preserving the free status of all derivatives of our free software and 261 | of promoting the sharing and reuse of software generally. 262 | 263 | NO WARRANTY 264 | 265 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 266 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 267 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 268 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 269 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 270 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 271 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 272 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 273 | REPAIR OR CORRECTION. 274 | 275 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 276 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 277 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 278 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 279 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 280 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 281 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 282 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 283 | POSSIBILITY OF SUCH DAMAGES. 284 | 285 | END OF TERMS AND CONDITIONS 286 | 287 | How to Apply These Terms to Your New Programs 288 | 289 | If you develop a new program, and you want it to be of the greatest 290 | possible use to the public, the best way to achieve this is to make it 291 | free software which everyone can redistribute and change under these terms. 292 | 293 | To do so, attach the following notices to the program. It is safest 294 | to attach them to the start of each source file to most effectively 295 | convey the exclusion of warranty; and each file should have at least 296 | the "copyright" line and a pointer to where the full notice is found. 297 | 298 | 299 | Copyright (C) 300 | 301 | This program is free software; you can redistribute it and/or modify 302 | it under the terms of the GNU General Public License as published by 303 | the Free Software Foundation; either version 2 of the License, or 304 | (at your option) any later version. 305 | 306 | This program is distributed in the hope that it will be useful, 307 | but WITHOUT ANY WARRANTY; without even the implied warranty of 308 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 309 | GNU General Public License for more details. 310 | 311 | You should have received a copy of the GNU General Public License 312 | along with this program; if not, write to the Free Software 313 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 314 | 315 | 316 | Also add information on how to contact you by electronic and paper mail. 317 | 318 | If the program is interactive, make it output a short notice like this 319 | when it starts in an interactive mode: 320 | 321 | Gnomovision version 69, Copyright (C) year name of author 322 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 323 | This is free software, and you are welcome to redistribute it 324 | under certain conditions; type `show c' for details. 325 | 326 | The hypothetical commands `show w' and `show c' should show the appropriate 327 | parts of the General Public License. Of course, the commands you use may 328 | be called something other than `show w' and `show c'; they could even be 329 | mouse-clicks or menu items--whatever suits your program. 330 | 331 | You should also get your employer (if you work as a programmer) or your 332 | school, if any, to sign a "copyright disclaimer" for the program, if 333 | necessary. Here is a sample; alter the names: 334 | 335 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 336 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 337 | 338 | , 1 April 1989 339 | Ty Coon, President of Vice 340 | 341 | This General Public License does not permit incorporating your program into 342 | proprietary programs. If your program is a subroutine library, you may 343 | consider it more useful to permit linking proprietary applications with the 344 | library. If this is what you want to do, use the GNU Library General 345 | Public License instead of this License. 346 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS = -O0 -Wall -Wno-misleading-indentation 2 | OBJ_DIR = elf 3 | TEST_DIR = tests 4 | TEST_SRC = $(wildcard $(TEST_DIR)/*.c) 5 | TEST_OBJ = $(TEST_SRC:.c=.o) 6 | 7 | BIN = amacc 8 | EXEC = $(BIN) $(BIN)-native 9 | 10 | include mk/arm.mk 11 | include mk/common.mk 12 | include mk/python.mk 13 | 14 | ## Build AMaCC 15 | all: $(EXEC) 16 | $(BIN): $(BIN).c 17 | $(VECHO) " CC+LD\t\t$@\n" 18 | $(Q)$(ARM_CC) $(CFLAGS) -o $@ $< -g -ldl 19 | 20 | $(BIN)-native: $(BIN).c 21 | $(VECHO) " CC+LD\t\t$@\n" 22 | $(Q)$(CC) $(CFLAGS) -o $@ $< \ 23 | -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast -Wno-format \ 24 | -ldl 25 | ## Run tests and show message 26 | check: $(EXEC) $(TEST_OBJ) 27 | $(VECHO) "[ C to IR translation ]" 28 | $(Q)./$(BIN)-native -s tests/arginc.c | diff tests/arginc.list - \ 29 | && $(call pass) 30 | $(VECHO) "[ JIT compilation + execution ]" 31 | $(Q)if [ "$(shell $(ARM_EXEC) ./$(BIN) tests/hello.c)" = "hello, world" ]; then \ 32 | $(call pass); \ 33 | fi 34 | $(VECHO) "[ ELF generation ]" 35 | $(Q)$(ARM_EXEC) ./$(BIN) -o $(OBJ_DIR)/hello tests/hello.c 36 | $(Q)if [ "$(shell $(ARM_EXEC) $(OBJ_DIR)/hello)" = "hello, world" ]; then \ 37 | $(call pass); \ 38 | fi 39 | $(VECHO) "[ nested/self compilation ]" 40 | $(Q)if [ "$(shell $(ARM_EXEC) ./$(BIN) $(BIN).c tests/hello.c)" = "hello, world" ]; then \ 41 | $(call pass); \ 42 | fi 43 | $(VECHO) "[ Compatibility with GCC/Arm ] " 44 | $(Q)$(PYTHON) scripts/runtest.py || echo 45 | 46 | $(OBJ_DIR)/$(BIN): $(BIN) 47 | $(VECHO) " SelfCC\t$@\n" 48 | $(Q)$(ARM_EXEC) ./$^ -o $@ $(BIN).c 49 | 50 | SHELL_HACK := $(shell mkdir -p $(OBJ_DIR)) 51 | $(TEST_DIR)/%.o: $(TEST_DIR)/%.c $(BIN) $(OBJ_DIR)/$(BIN) 52 | $(VECHO) "[*** verify $< *******]\n" 53 | $(Q)$(ARM_EXEC) ./$(BIN) $< 2 $(REDIR) 54 | $(VECHO) "[*** verify $< *******]\n" 55 | $(Q)$(ARM_EXEC) ./$(BIN) -o $(OBJ_DIR)/$(notdir $(basename $<)) $< $(REDIR) 56 | $(Q)$(ARM_EXEC) $(OBJ_DIR)/$(notdir $(basename $<)) 2 $(REDIR) 57 | $(VECHO) "[*** verify $< **]\n" 58 | $(Q)$(ARM_EXEC) ./$(OBJ_DIR)/$(BIN) $< 2 $(REDIR) 59 | $(Q)$(call pass,$<) 60 | 61 | ## Print available build targets 62 | help: 63 | @cat $(MAKEFILE_LIST) | \ 64 | awk '/^##.*$$/{l1=$$0;getline;l2=(l1 "##" $$0); print l2 $$0}' | awk -F"##" '{split($$3,t,":");printf "\033[36m%-11s\033[0m %s\n",t[1],$$2}' 65 | 66 | ## Dump assembly from source file. Usage: "make dump-ir FILE=tests/hello.c" 67 | dump-ir: $(BIN) 68 | @$(ARM_EXEC) $(BIN) -s $(FILE) 69 | 70 | ## Remove all generated files 71 | clean: 72 | $(RM) $(EXEC) $(OBJ_DIR)/* elf/* out-gcc/* 73 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AMaCC = Arguably Minimalist Arm C Compiler 2 | 3 | ## Introduction 4 | AMaCC is a 32-bit Arm architecture compiler built from scratch. 5 | It serves as a stripped-down version of C, designed as a pedagogical tool for 6 | learning about compilers, linkers, and loaders. 7 | 8 | There are two execution modes AMaCC implements: 9 | * Just-in-Time (JIT) compiler for Arm backend. 10 | * Generation of valid GNU/Linux executables using the Executable and Linkable Format (ELF). 11 | 12 | It is worth mentioning that AMaCC is designed to compile a subset of C necessary 13 | to self-host with the above execution modes. For instance, it supports global 14 | variables, particularly global arrays. 15 | 16 | A simple stack-based Abstract Syntax Tree (AST) is generated through cooperative 17 | `stmt()` and `expr()` parsing functions, both fed by a token-generating function. 18 | The `expr()` function performs some literal constant optimizations. The AST is 19 | transformed into a stack-based VM Intermediate Representation (IR) using the 20 | `gen()` function. The IR can be examined via a command-line option. Finally, the 21 | `codegen()` function generates Arm32 instructions from the IR, which can be 22 | executed via either `jit()` or `elf32()` executable generation 23 | 24 | AMaCC combines classical recursive descent and operator precedence parsing. An 25 | operator precedence parser proves to be considerably faster than a recursive 26 | descent parser (RDP) for expressions when operator precedence is defined using 27 | grammar productions that would otherwise be turned into methods. 28 | 29 | ## Compatibility 30 | AMaCC is capable of compiling C source files written in the following 31 | syntax: 32 | 33 | * support for all C89 statements except typedef. 34 | * support for all C89 expression operators. 35 | * data types: char, int, enum, struct, union, and multi-level pointers 36 | - type modifiers, qualifiers, and storage class specifiers are 37 | currently unsupported, though many keywords of this nature 38 | are not routinely used, and can be easily worked around with 39 | simple alternative constructs. 40 | - struct/union assignments are not supported at the language level 41 | in AMaCC, e.g. s1 = s2. This also applies to function return 42 | values and parameters. Passing and returning pointers is recommended. 43 | Use memcpy if you want to copy a full struct, e.g. 44 | memcpy(&s1, &s2, sizeof(struct xxx)); 45 | * global/local variable initializations for supported data types 46 | - e.g., `int i = [expr]` 47 | - New variables are allowed to be declared within functions anywhere. 48 | - item-by-item array initialization is supported 49 | - but aggregate array declaration and initialization is yet to be supported 50 | e.g., `int foo[2][2] = { { 1, 0 }, { 0, 1 } };` 51 | 52 | The architecture support targets armv7hf with Linux ABI, and it has been verified 53 | on Raspberry Pi 2/3/4 with GNU/Linux. 54 | 55 | ## Prerequisites 56 | * Code generator in AMaCC relies on several GNU/Linux behaviors, and it 57 | is necessary to have Arm/Linux installed in your build environment. 58 | * Install [GNU Toolchain for the A-profile Architecture](https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads) 59 | - Select `arm-linux-none-gnueabihf` (AArch32 target with hard float) 60 | 61 | * Install QEMU for Arm user emulation 62 | ```shell 63 | sudo apt-get install qemu-user 64 | ``` 65 | 66 | ## Running AMaCC 67 | Run `make check` and you should see this: 68 | ``` 69 | [ C to IR translation ] Passed 70 | [ JIT compilation + execution ] Passed 71 | [ ELF generation ] Passed 72 | [ nested/self compilation ] Passed 73 | [ Compatibility with GCC/Arm ] ........................................ 74 | ---------------------------------------------------------------------- 75 | Ran 52 tests in 8.842s 76 | 77 | OK 78 | ``` 79 | 80 | Check the messages generated by `make help` to learn more. 81 | 82 | ## Benchmark 83 | AMaCC is able to generate machine code really fast and provides 70% of the performance of `gcc -O0`. 84 | 85 | Test environment: 86 | * Raspberry Pi 4B (SoC: bcm2711, ARMv8-A architecture) 87 | * Raspbian GNU/Linux, kernel 5.10.17-v7l+, gcc 8.3.0 (armv7l userland) 88 | 89 | Input source file: `amacc.c` 90 | 91 | | compiler driver | binary size (KiB) | compile time (s) | 92 | | ---------------------------------- | ----------------- | ---------------- | 93 | | gcc with `-O0 -ldl` (compile+link) | 56 | 0.5683 | 94 | | gcc with `-O0 -c` (compile only) | 56 | 0.4884 | 95 | | AMaCC | 100 | 0.0217 | 96 | 97 | 98 | ## Internals 99 | Check [Intermediate Representation (IR) for AMaCC Compilation](docs/IR.md). 100 | 101 | ## Acknowledgements 102 | AMaCC is based on the infrastructure of [c4](https://github.com/rswier/c4). 103 | 104 | ## Related Materials 105 | * [Curated list of awesome resources on Compilers, Interpreters and Runtimes](http://aalhour.com/awesome-compilers/) 106 | * [Hacker News discussions](https://news.ycombinator.com/item?id=11411124) 107 | * [A Compiler Writing Journey](https://github.com/DoctorWkt/acwj) by Warren Toomey. 108 | -------------------------------------------------------------------------------- /amacc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * AMaCC is capable of compiling (subset of) C source files into GNU/Linux 3 | * executables or running via just-in-time compilation on 32-bit ARM 4 | * processor-based platforms. There is no preprocessor. 5 | * 6 | * The following options are supported: 7 | * -s : Print source and generated intermediate representation (IR). 8 | * -o : Create executable file and terminate normally. 9 | * 10 | * If -o and -s are omitted, the compiled code is executed immediately (if 11 | * there were no compile errors) with the command line arguments passed 12 | * after the source file parameter. 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | /* 64-bit host support */ 27 | #if defined(__x86_64__) || defined(__aarch64__) 28 | #define int long 29 | #endif 30 | 31 | char *freep, *p, *lp; // current position in source code 32 | char *freedata, *data, *_data; // data/bss pointer 33 | 34 | int *e, *le, *text; // current position in emitted code 35 | int *cas; // case statement patch-up pointer 36 | int *def; // default statement patch-up pointer 37 | int *brks; // break statement patch-up pointer 38 | int *cnts; // continue statement patch-up pointer 39 | int swtc; // !0 -> in a switch-stmt context 40 | int brkc; // !0 -> in a break-stmt context 41 | int cntc; // !0 -> in a continue-stmt context 42 | int *tsize; // array (indexed by type) of type sizes 43 | int tnew; // next available type 44 | int tk; // current token 45 | int ival; // current token value 46 | int ty; // current expression type 47 | int compound; // handle precedence of compound assignment 48 | int loc; // local variable offset 49 | int line; // current line number 50 | int src; // print source and assembly flag 51 | int signed_char; // use `signed char` for `char` 52 | int elf; // print ELF format 53 | int *n; // current position in emitted abstract syntax tree 54 | // With an AST, the compiler is not limited to generate 55 | // code on the fly with parsing. 56 | // This capability allows function parameter code to be 57 | // emitted and pushed on the stack in the proper 58 | // right-to-left order. 59 | int ld; // local variable depth 60 | 61 | // identifier 62 | struct ident_s { 63 | int tk; // type-id or keyword 64 | int hash; 65 | char *name; // name of this identifier 66 | /* fields starting with 'h' were designed to save and restore 67 | * the global class/type/val in order to handle the case if a 68 | * function declares a local with the same name as a global. 69 | */ 70 | int class, hclass; // FUNC, GLO (global var), LOC (local var), Syscall 71 | int type, htype; // data type such as char and int 72 | int val, hval; 73 | int stype; 74 | } *id, // currently parsed identifier 75 | *sym; // symbol table (simple list of identifiers) 76 | 77 | // (library) external functions 78 | struct ef_s { 79 | char *name; 80 | int addr; 81 | } **ef_cache; 82 | int ef_count; 83 | 84 | struct member_s { 85 | struct ident_s *id; 86 | int offset; 87 | int type; 88 | struct member_s *next; 89 | } **members; // array (indexed by type) of struct member lists 90 | 91 | // tokens and classes (operators last and in precedence order) 92 | // ( >= 128 so not to collide with ASCII-valued tokens) 93 | enum { 94 | Num = 128, // the character set of given source is limited to 7-bit ASCII 95 | Func, Syscall, Main, ClearCache, Glo, Par, Loc, Keyword, Id, Label, Load, Enter, 96 | Break, Continue, Case, Char, Default, Else, Enum, If, Int, Return, 97 | Sizeof, Struct, Union, Switch, For, While, DoWhile, Goto, 98 | Assign, // operator =, keep Assign as highest priority operator 99 | OrAssign, XorAssign, AndAssign, ShlAssign, ShrAssign, // |=, ^=, &=, <<=, >>= 100 | AddAssign, SubAssign, MulAssign, DivAssign, ModAssign, // +=, -=, *=, /=, %= 101 | Cond, // operator: ? 102 | Lor, Lan, Or, Xor, And, // operator: ||, &&, |, ^, & 103 | Eq, Ne, Lt, Gt, Le, Ge, // operator: ==, !=, <, >, <=, >= 104 | Shl, Shr, Add, Sub, Mul, Div, Mod, // operator: <<, >>, +, -, *, /, % 105 | Inc, Dec, Dot, Arrow, Bracket, // operator: ++, --, ., ->, [ 106 | }; 107 | 108 | // opcodes 109 | /* The instruction set is designed for building intermediate representation. 110 | * Expression 10 + 20 will be translated into the following instructions: 111 | * i = 0; 112 | * text[i++] = IMM; 113 | * text[i++] = 10; 114 | * text[i++] = PSH; 115 | * text[i++] = IMM; 116 | * text[i++] = 20; 117 | * text[i++] = ADD; 118 | * text[i++] = PSH; 119 | * text[i++] = EXIT; 120 | * pc = text; 121 | */ 122 | enum { 123 | LEA , /* 0 */ 124 | /* LEA addressed the problem how to fetch arguments inside sub-function. 125 | * Let's check out what a calling frame looks like before learning how 126 | * to fetch arguments (Note that arguments are pushed in its calling 127 | * order): 128 | * 129 | * sub_function(arg1, arg2, arg3); 130 | * 131 | * | .... | high address 132 | * +---------------+ 133 | * | arg: 1 | new_bp + 4 134 | * +---------------+ 135 | * | arg: 2 | new_bp + 3 136 | * +---------------+ 137 | * | arg: 3 | new_bp + 2 138 | * +---------------+ 139 | * |return address | new_bp + 1 140 | * +---------------+ 141 | * | old BP | <- new BP 142 | * +---------------+ 143 | * | local var 1 | new_bp - 1 144 | * +---------------+ 145 | * | local var 2 | new_bp - 2 146 | * +---------------+ 147 | * | .... | low address 148 | * 149 | * If we need to refer to arg1, we need to fetch new_bp + 4, which can not 150 | * be achieved by restricted ADD instruction. Thus another special 151 | * Instruction is introduced to do this: LEA . 152 | * The following pseudocode illustrates how LEA works. 153 | * if (op == LEA) { ax = (int) (bp + *pc++); } // load address for arguments 154 | * Together with JSR, ENT, ADJ, LEV, and LEA instruction, we are able to make 155 | * function calls. 156 | */ 157 | 158 | IMM , /* 1 */ 159 | /* IMM to put immediate into general register */ 160 | 161 | JMP , /* 2 */ 162 | /* JMP will unconditionally set the value PC register to */ 163 | /* The following pseudocode illustrates how JMP works: 164 | * if (op == JMP) { pc = (int *) *pc; } // jump to the address 165 | * Note that PC points to the NEXT instruction to be executed. Thus *pc 166 | * stores the argument of JMP instruction, i.e. the . 167 | */ 168 | 169 | JSR , /* 3 */ 170 | /* A function is a block of code, which may be far from the instruction 171 | * we are currently executing. That is reason why JMP instruction exists, 172 | * jumping into starting point of a function. JSR is introduced to perform 173 | * some bookkeeping: store the current execution position so that the 174 | * program can resume after function call returns. 175 | * 176 | * JSR to invoke the function whose starting point is and 177 | * LEV to fetch the bookkeeping information to resume previous execution. 178 | */ 179 | 180 | BZ , /* 4 : conditional jump if general register is zero (jump-if-zero) */ 181 | BNZ , /* 5 : conditional jump if general register is not zero */ 182 | 183 | ENT , /* 6 */ 184 | /* ENT is called when we are about to enter the function call to 185 | * "make a new calling frame". It will store the current PC value onto 186 | * the stack, and save some space( bytes) to store the local 187 | * variables for function. 188 | */ 189 | 190 | ADJ , /* 7 */ 191 | /* ADJ is to adjust the stack, to "remove arguments from frame" 192 | * The following pseudocode illustrates how ADJ works: 193 | * if (op == ADJ) { sp += *pc++; } // add esp, 194 | */ 195 | 196 | LEV , /* 8 */ 197 | /* LEV fetches bookkeeping info to resume previous execution. 198 | * There is no POP instruction in our design, and the following pseudocode 199 | * illustrates how LEV works: 200 | * if (op == LEV) { sp = bp; bp = (int *) *sp++; 201 | * pc = (int *) *sp++; } // restore call frame and PC 202 | */ 203 | 204 | LI , /* 9 */ 205 | /* LI loads an integer into general register from a given memory 206 | * address which is stored in general register before execution. 207 | */ 208 | 209 | LC , /* 10 */ 210 | /* LC loads a character into general register from a given memory 211 | * address which is stored in general register before execution. 212 | */ 213 | 214 | SI , /* 11 */ 215 | /* SI stores the integer in general register into the memory whose 216 | * address is stored on the top of the stack. 217 | */ 218 | 219 | SC , /* 12 */ 220 | /* SC stores the character in general register into the memory whose 221 | * address is stored on the top of the stack. 222 | */ 223 | 224 | PSH , /* 13 */ 225 | /* PSH pushes the value in general register onto the stack */ 226 | 227 | OR , /* 14 */ XOR , /* 15 */ AND , /* 16 */ 228 | EQ , /* 17 */ NE , /* 18 */ 229 | LT , /* 19 */ GT , /* 20 */ LE , /* 21 */ GE , /* 22 */ 230 | SHL , /* 23 */ SHR , /* 24 */ 231 | ADD , /* 25 */ SUB , /* 26 */ MUL , /* 27 */ DIV, /* 28 */ MOD, /* 29 */ 232 | /* arithmetic instructions 233 | * Each operator has two arguments: the first one is stored on the top 234 | * of the stack while the second is stored in general register. 235 | * After the calculation is done, the argument on the stack will be poped 236 | * out and the result will be stored in general register. 237 | * So you are not able to fetch the first argument from the stack after 238 | * the calculation. 239 | */ 240 | 241 | SYSC, /* 30 system call */ 242 | CLCA, /* 31 clear cache, used by JIT compilation */ 243 | INVALID 244 | }; 245 | 246 | // types 247 | enum { CHAR, INT, PTR = 256, PTR2 = 512 }; 248 | 249 | // ELF generation 250 | char **plt_func_addr; 251 | char *freebuf; 252 | 253 | char *append_strtab(char **strtab, char *str) 254 | { 255 | char *s; 256 | for (s = str; *s && (*s != ' '); s++) ; /* ignore trailing space */ 257 | int nbytes = s - str + 1; 258 | char *res = *strtab; 259 | memcpy(res, str, nbytes); 260 | res[s - str] = 0; // null terminator 261 | *strtab = res + nbytes; 262 | return res; 263 | } 264 | 265 | char fatal(char *msg) { printf("%d: %s\n", line, msg); exit(-1); } 266 | 267 | void ef_add(char *name, int addr) // add external function 268 | { 269 | ef_cache[ef_count] = malloc(sizeof(struct ef_s)) ; 270 | ef_cache[ef_count]->name = malloc(strlen(name)+1); 271 | strcpy(ef_cache[ef_count]->name, name); 272 | ef_cache[ef_count]->addr = addr; 273 | ++ef_count; 274 | } 275 | 276 | int ef_getaddr(int idx) // get address external function 277 | { 278 | return (elf ? (int) plt_func_addr[idx] : ef_cache[idx]->addr); 279 | } 280 | 281 | int ef_getidx(char *name) // get cache index of external function 282 | { 283 | int i; 284 | for (i = 0; i < ef_count; ++i) 285 | if (!strcmp(ef_cache[i]->name, name)) 286 | break; 287 | 288 | if (i == ef_count) { // add new external lib func to cache 289 | int dladdr; 290 | if ((dladdr = (int) dlsym(0, name))) { 291 | ef_add(name, dladdr); 292 | } else { 293 | void *divmod_handle = dlopen("libgcc_s.so.1", 1); 294 | if (!divmod_handle) fatal("failed to open libgcc_s.so.1"); 295 | dladdr = (int) dlsym(divmod_handle, name); 296 | if (!dladdr) fatal("bad function call"); 297 | ef_add(name, dladdr); 298 | } 299 | } 300 | return i; 301 | } 302 | 303 | /* parse next token 304 | * 1. store data into id and then set the id to current lexical form 305 | * 2. set tk to appropriate type 306 | */ 307 | void next() 308 | { 309 | char *pp; 310 | 311 | /* using loop to ignore whitespace characters, but characters that 312 | * cannot be recognized by the lexical analyzer are considered blank 313 | * characters, such as '@' and '$'. 314 | */ 315 | while ((tk = *p)) { 316 | ++p; 317 | if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || 318 | (tk == '_')) { 319 | pp = p - 1; 320 | while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || 321 | (*p >= '0' && *p <= '9') || (*p == '_')) 322 | tk = tk * 147 + *p++; // 147 is the magic number generating hash value 323 | tk = (tk << 6) + (p - pp); // hash plus symbol length 324 | // hash value is used for fast comparison. Since it is inaccurate, 325 | // we have to validate the memory content as well. 326 | for (id = sym; id->tk; id++) { // find one free slot in table 327 | if (tk == id->hash && /* if token is found (hash match), overwrite */ 328 | !memcmp(id->name, pp, p - pp)) { 329 | tk = id->tk; 330 | return; 331 | } 332 | } 333 | /* At this point, existing symbol name is not found. 334 | * "id" points to the first unused symbol table entry. 335 | */ 336 | id->name = pp; 337 | id->hash = tk; 338 | tk = id->tk = Id; // token type identifier 339 | return; 340 | } 341 | /* Calculate the constant */ 342 | // first byte is a number, and it is considered a numerical value 343 | else if (tk >= '0' && tk <= '9') { 344 | /* Parse with 3 conditions: 345 | * 1) not starting with 0 :=> decimal number; 346 | * 2) starting with 0x :=> hex number; 347 | * 3) starting with 0: octal number; 348 | */ 349 | if ((ival = tk - '0')) { 350 | while (*p >= '0' && *p <= '9') 351 | ival = ival * 10 + *p++ - '0'; 352 | } 353 | // first digit is 0 and it starts with 'x', and it is considered 354 | // to be a hexadecimal number 355 | else if (*p == 'x' || *p == 'X') { 356 | while ((tk = *++p) && 357 | ((tk >= '0' && tk <= '9') || 358 | (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F'))) 359 | ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0); 360 | } else { // considered octal 361 | while (*p >= '0' && *p <= '7') 362 | ival = ival * 8 + *p++ - '0'; 363 | } 364 | tk = Num; // token is numeric, return 365 | return; 366 | } 367 | switch (tk) { 368 | case '\n': 369 | /* Take an integer (representing an operation) and print out 370 | * the name of that operation. First thing to say is that "* ++le" 371 | * is the integer representing the operation to perform. 372 | * This basically walks through the array of instructions 373 | * returning each integer in turn. 374 | * 375 | * Starting at the beginning of the line, we have "printf" with 376 | * a format string of "%8.4s". This means printing out the first 4 377 | * characters of the string that we are about to pass next (padded 378 | * to 8 characters). There then follows a string containing all of 379 | * the operation names, in numerical order, padded to 4 characters 380 | * and separated by commas (so the start of each is 5 apart). 381 | * 382 | * Finally, we do a lookup into this string (treating it as an 383 | * array) at offset "* ++le * 5", i.e. the integer representing 384 | * the operation multiplied by "5", being the number of characters 385 | * between the start of each operation name). Doing this lookup 386 | * gives us a char, but actually we wanted the pointer to this 387 | * char (as we want printf to print out this char and the 388 | * following 3 chars), so we take the address of this char 389 | * (the "&" at the beginning of the whole expression). 390 | */ 391 | if (src) { 392 | int *base = le; 393 | printf("%d: %.*s", line, p - lp, lp); 394 | lp = p; 395 | while (le < e) { 396 | int off = le - base; // Func IR instruction memory offset 397 | printf("%04d: %8.4s", off, 398 | & "LEA IMM JMP JSR BZ BNZ ENT ADJ LEV " 399 | "LI LC SI SC PSH " 400 | "OR XOR AND EQ NE LT GT LE GE " 401 | "SHL SHR ADD SUB MUL DIV MOD " 402 | "SYSC CLCA" [*++le * 5]); 403 | if (*le <= ADJ) { 404 | ++le; 405 | if (*le > (int) base && *le <= (int) e) 406 | printf(" %04d\n", off + ((*le - (int) le) >> 2) + 1); 407 | else 408 | printf(" %d\n", *le); 409 | } 410 | else if (*le == SYSC) { 411 | printf(" %s\n", ef_cache[*(++le)]->name); 412 | } 413 | else printf("\n"); 414 | } 415 | } 416 | ++line; 417 | case ' ': 418 | case '\t': 419 | case '\v': 420 | case '\f': 421 | case '\r': 422 | break; 423 | case '/': 424 | if (*p == '/') { // comment 425 | case '#': // skip #include statement, preprocessor directives ignored 426 | while (*p != 0 && *p != '\n') ++p; 427 | } else if (*p == '*') { // C-style multiline comments 428 | int t = 0; 429 | for (++p; (*p != 0) && (t == 0); ++p) { 430 | pp = p + 1; 431 | if (*p == '\n') line++; 432 | else if (*p == '*' && *pp == '/') t = 1; 433 | } 434 | ++p; 435 | } else { 436 | if (*p == '=') { ++p; tk = DivAssign; } 437 | else tk = Div; return; 438 | } 439 | break; 440 | case '\'': // quotes start with character (string) 441 | case '"': 442 | pp = data; 443 | // While current character is not `\0` and current character is 444 | // not the quote character. 445 | while (*p != 0 && *p != tk) { 446 | // If current character is '\', it is escape notation or simply 447 | // '\' character. 448 | if ((ival = *p++) == '\\') { 449 | switch (ival = *p++) { 450 | case 'n': ival = '\n'; break; // new line 451 | case 't': ival = '\t'; break; // horizontal tab 452 | case 'v': ival = '\v'; break; // vertical tab 453 | case 'f': ival = '\f'; break; // form feed 454 | case 'r': ival = '\r'; break; // carriage return 455 | case '0': ival = '\0'; break; // an int with value 0 456 | } 457 | } 458 | // If it is double quotes (string literal), it is considered as 459 | // a string, copying characters to data 460 | if (tk == '"') *data++ = ival; 461 | } 462 | ++p; 463 | // If .text too big rwdata v_addr will overlap it, add that to stay away from .text 464 | if (tk == '"') ival = (int) pp; else tk = Num; 465 | return; 466 | case '=': if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; 467 | case '+': if (*p == '+') { ++p; tk = Inc; } 468 | else if (*p == '=') { ++p; tk = AddAssign; } 469 | else tk = Add; return; 470 | case '-': if (*p == '-') { ++p; tk = Dec; } 471 | else if (*p == '>') { ++p; tk = Arrow; } 472 | else if (*p == '=') { ++p; tk = SubAssign; } 473 | else tk = Sub; return; 474 | case '!': if (*p == '=') { ++p; tk = Ne; } return; 475 | case '<': if (*p == '=') { ++p; tk = Le; } 476 | else if (*p == '<') { 477 | ++p; if (*p == '=') { ++p ; tk = ShlAssign; } else tk = Shl; 478 | } 479 | else tk = Lt; return; 480 | case '>': if (*p == '=') { ++p; tk = Ge; } 481 | else if (*p == '>') { 482 | ++p; if (*p == '=') { ++p ; tk = ShrAssign; } else tk = Shr; 483 | } 484 | else tk = Gt; return; 485 | case '|': if (*p == '|') { ++p; tk = Lor; } 486 | else if (*p == '=') { ++p; tk = OrAssign; } 487 | else tk = Or; return; 488 | case '&': if (*p == '&') { ++p; tk = Lan; } 489 | else if (*p == '=') { ++p; tk = AndAssign; } 490 | else tk = And; return; 491 | case '^': if (*p == '=') { ++p; tk = XorAssign; } else tk = Xor; return; 492 | case '*': if (*p == '=') { ++p; tk = MulAssign; } 493 | else tk = Mul; return; 494 | case '%': if (*p == '=') { ++p; tk = ModAssign; } 495 | else tk = Mod; return; 496 | case '[': tk = Bracket; return; 497 | case '?': tk = Cond; return; 498 | case '.': tk = Dot; return; 499 | default: return; 500 | } 501 | } 502 | } 503 | 504 | // https://stackoverflow.com/questions/109023/how-to-count-the-number-of-set-bits-in-a-32-bit-integer 505 | int popcount(int i) 506 | { 507 | i = i - ((i >> 1) & 0x55555555); // add pairs of bits 508 | i = (i & 0x33333333) + ((i >> 2) & 0x33333333); // quads 509 | i = (i + (i >> 4)) & 0x0F0F0F0F; // groups of 8 510 | return (i * 0x01010101) >> 24; // horizontal sum of bytes 511 | } 512 | 513 | /* expression parsing 514 | * lev represents an operator. 515 | * because each operator `token` is arranged in order of priority, so 516 | * large `lev` indicates a high priority. 517 | * 518 | * Operator precedence (lower first): 519 | * Assign = 520 | * Cond ? 521 | * Lor || 522 | * Lan && 523 | * Or | 524 | * Xor ^ 525 | * And & 526 | * Eq == 527 | * Ne != 528 | * Lt < 529 | * Gt > 530 | * Le <= 531 | * Ge >= 532 | * Shl << 533 | * Shr >> 534 | * Add + 535 | * Sub - 536 | * Mul * 537 | * Div / 538 | * Mod % 539 | * Inc ++ 540 | * Dec -- 541 | * Bracket [ 542 | */ 543 | 544 | enum { REENTRANT = 0x10000 }; 545 | 546 | void expr(int lev) 547 | { 548 | int tc; 549 | int t, *b, sz, *c; 550 | struct ident_s *d; 551 | struct member_s *m; 552 | 553 | switch (tk) { 554 | case 0: fatal("unexpected EOF in expression"); 555 | // directly take an immediate value as the expression value 556 | // IMM recorded in emit sequence 557 | case Num: *--n = ival; *--n = Num; next(); ty = INT; break; 558 | case '"': // string, as a literal in data segment 559 | *--n = ival; *--n = Num; next(); 560 | // continuous `"` handles C-style multiline text such as `"abc" "def"` 561 | while (tk == '"') next(); 562 | /* Point "data" to next integer-aligned address. 563 | * e.g. "-sizeof(int)" is -4, i.e. 0b11111100. 564 | * This guarantees to leave at least one '\0' after the string. 565 | * 566 | * append the end of string character '\0', all the data is defaulted 567 | * to 0, so just move data one position forward. Specify result value 568 | * type to char pointer. CHAR + PTR = PTR because CHAR is 0. 569 | */ 570 | data = (char *) (((int) data + sizeof(int)) & (-sizeof(int))); 571 | ty = PTR; 572 | break; 573 | /* SIZEOF_expr -> 'sizeof' '(' 'TYPE' ')' 574 | * sizeof is actually an unary operator. 575 | * now only `sizeof(int)`, `sizeof(char)` and `sizeof(*...)` are supported. 576 | * FIXME: not support "sizeof (Id)". 577 | * In second line will not get next token, match ')' will fail. 578 | */ 579 | case Sizeof: 580 | next(); 581 | if (tk != '(') fatal("open parentheses expected in sizeof"); 582 | next(); 583 | ty = INT; 584 | switch (tk) { 585 | case Int: next(); break; 586 | case Char: next(); ty = CHAR; break; 587 | case Struct: 588 | case Union: 589 | next(); 590 | if (tk != Id) fatal("bad struct/union type"); 591 | ty = id->stype; next(); break; 592 | } 593 | // multi-level pointers, plus `PTR` for each level 594 | while (tk == Mul) { next(); ty += PTR; } 595 | if (tk != ')') fatal("close parentheses expected in sizeof"); 596 | next(); 597 | *--n = ty >= PTR ? sizeof(int) : tsize[ty]; *--n = Num; 598 | ty = INT; 599 | break; 600 | case Id: 601 | d = id; next(); 602 | // function call 603 | if (tk == '(') { 604 | if (d->class < Func || d->class > ClearCache) { 605 | if (d->class != 0) fatal("bad function call"); 606 | int namelen = d->hash & 0x3f; 607 | char ch = d->name[namelen]; 608 | d->name[namelen] = 0; 609 | d->val = ef_getidx(d->name) ; 610 | d->name[namelen] = ch; 611 | d->class = Syscall; 612 | d->type = INT; 613 | } 614 | next(); 615 | t = 0; b = 0; // parameters count 616 | while (tk != ')') { 617 | expr(Assign); *--n = (int) b; b = n; ++t; 618 | if (tk == ',') { 619 | next(); 620 | if (tk == ')') fatal("unexpected comma in function call"); 621 | } else if (tk != ')') fatal("missing comma in function call"); 622 | } 623 | next(); 624 | // function or system call id 625 | *--n = t; *--n = d->val; *--n = (int) b; *--n = d->class; 626 | ty = d->type; 627 | } 628 | // enumeration, only enums have ->class == Num 629 | else if (d->class == Num) { *--n = d->val; *--n = Num; ty = INT; } 630 | else { 631 | // Variable get offset 632 | switch (d->class) { 633 | case Loc: case Par: *--n = loc - d->val; *--n = Loc; break; 634 | case Glo: *--n = d->val; *--n = Num; break; 635 | default: fatal("undefined variable"); 636 | } 637 | *--n = ty = d->type; *--n = Load; 638 | } 639 | break; 640 | // Type cast or parenthesis 641 | case '(': 642 | next(); 643 | if (tk == Int || tk == Char || tk == Struct || tk == Union) { 644 | switch (tk) { 645 | case Int: next(); t = INT; break; 646 | case Char: next(); t = CHAR; break; 647 | default: 648 | next(); 649 | if (tk != Id) fatal("bad struct/union type"); 650 | t = id->stype; next(); break; 651 | } 652 | // t: pointer 653 | while (tk == Mul) { next(); t += PTR; } 654 | if (tk != ')') fatal("bad cast"); 655 | next(); 656 | expr(Inc); // cast has precedence as Inc(++) 657 | ty = t; 658 | } else { 659 | expr(Assign); 660 | while (tk == ',') { next(); expr(Assign); } 661 | if (tk != ')') fatal("close parentheses expected"); 662 | next(); 663 | } 664 | break; 665 | case Mul: // "*", dereferencing the pointer operation 666 | next(); 667 | expr(Inc); // dereference has the same precedence as Inc(++) 668 | if (ty < PTR) fatal("bad dereference"); 669 | ty -= PTR; 670 | if (ty < CHAR || ty >= PTR2) fatal("unexpected type"); 671 | *--n = ty; *--n = Load; 672 | break; 673 | case And: // "&", take the address operation 674 | /* when "token" is a variable, it takes the address first and 675 | * then LI/LC, so `--e` becomes the address of "a". 676 | */ 677 | next(); expr(Inc); 678 | if (*n != Load) fatal("bad address-of"); 679 | n += 2; 680 | ty += PTR; 681 | break; 682 | case '!': // "!x" is equivalent to "x == 0" 683 | next(); expr(Inc); 684 | if (*n == Num) n[1] = !n[1]; 685 | else { *--n = 0; *--n = Num; --n; *n = (int) (n + 3); *--n = Eq; } 686 | ty = INT; 687 | break; 688 | case '~': // "~x" is equivalent to "x ^ -1" 689 | next(); expr(Inc); 690 | if (*n == Num) n[1] = ~n[1]; 691 | else { *--n = -1; *--n = Num; --n; *n = (int) (n + 3); *--n = Xor; } 692 | ty = INT; 693 | break; 694 | case Add: 695 | next(); expr(Inc); ty = INT; 696 | break; 697 | case Sub: 698 | next(); 699 | expr(Inc); 700 | if (*n == Num) n[1] = -n[1]; 701 | else { *--n = -1; *--n = Num; --n; *n = (int) (n + 3); *--n = Mul; } 702 | ty = INT; 703 | break; 704 | case Div: 705 | case Mod: 706 | break; 707 | // processing ++x and --x. x-- and x++ is handled later 708 | case Inc: 709 | case Dec: 710 | t = tk; next(); expr(Inc); 711 | if (*n != Load) fatal("bad lvalue in pre-increment"); 712 | *n = t; 713 | break; 714 | default: 715 | if (tk & REENTRANT) tk ^= REENTRANT; 716 | else fatal("bad expression"); 717 | } 718 | 719 | // "precedence climbing" or "Top Down Operator Precedence" method 720 | while (tk >= lev) { 721 | // tk is ASCII code will not exceed `Num=128`. Its value may be changed 722 | // during recursion, so back up currently processed expression type 723 | t = ty; b = n; 724 | switch (tk) { 725 | case Assign: 726 | next(); 727 | // the left part is processed by the variable part of `tk=ID` 728 | // and pushes the address 729 | if (*n != Load) fatal("bad lvalue in assignment"); 730 | // get the value of the right part `expr` as the result of `a=expr` 731 | expr(Assign); *--n = (int) (b + 2); *--n = ty = t; *--n = Assign; 732 | break; 733 | case OrAssign: // right associated 734 | case XorAssign: 735 | case AndAssign: 736 | case ShlAssign: 737 | case ShrAssign: 738 | case AddAssign: 739 | case SubAssign: 740 | case MulAssign: 741 | case DivAssign: 742 | case ModAssign: 743 | *--n=';'; *--n = t; *--n = Load; 744 | if (tk < ShlAssign) tk = Or + (tk - OrAssign); 745 | else tk = Shl + (tk - ShlAssign); 746 | tk |= REENTRANT; compound = 1; expr(Assign); 747 | *--n = (int) (b + 2); *--n = ty = t; *--n = Assign; 748 | break; 749 | case Cond: // `x?a:b` is similar to if except that it relies on else 750 | next(); expr(Assign); 751 | if (tk != ':') fatal("conditional missing colon"); 752 | next(); c = n; 753 | expr(Cond); --n; 754 | *n = (int) (n + 1); *--n = (int) c; *--n = (int) b; *--n = Cond; 755 | break; 756 | case Lor: // short circuit, the logical or 757 | next(); expr(Lan); 758 | if (*n == Num && *b == Num) n[1] = b[1] || n[1]; 759 | else { *--n = (int) b; *--n = Lor; } 760 | ty = INT; 761 | break; 762 | case Lan: // short circuit, logic and 763 | next(); expr(Or); 764 | if (*n == Num && *b == Num) n[1] = b[1] && n[1]; 765 | else { *--n = (int) b; *--n = Lan; } 766 | ty = INT; 767 | break; 768 | case Or: // push the current value, calculate the right value 769 | next(); 770 | if (compound) { compound = 0; expr(Assign); } 771 | else expr(Xor); 772 | if (*n == Num && *b == Num) n[1] = b[1] | n[1]; 773 | else { *--n = (int) b; *--n = Or; } 774 | ty = INT; 775 | break; 776 | case Xor: 777 | next(); 778 | if (compound) { compound = 0; expr(Assign); } 779 | else expr(And); 780 | if (*n == Num && *b == Num) n[1] = b[1] ^ n[1]; 781 | else { *--n = (int) b; *--n = Xor; } 782 | ty = INT; 783 | break; 784 | case And: 785 | next(); 786 | if (compound) { compound = 0; expr(Assign); } 787 | else expr(Eq); 788 | if (*n == Num && *b == Num) n[1] = b[1] & n[1]; 789 | else { *--n = (int) b; *--n = And; } 790 | ty = INT; 791 | break; 792 | case Eq: 793 | next(); expr(Lt); 794 | if (*n == Num && *b == Num) n[1] = b[1] == n[1]; 795 | else { *--n = (int) b; *--n = Eq; } 796 | ty = INT; 797 | break; 798 | case Ne: 799 | next(); expr(Lt); 800 | if (*n == Num && *b == Num) n[1] = b[1] != n[1]; 801 | else { *--n = (int) b; *--n = Ne; } 802 | ty = INT; 803 | break; 804 | case Lt: 805 | next(); expr(Shl); 806 | if (*n == Num && *b == Num) n[1] = b[1] < n[1]; 807 | else { *--n = (int) b; *--n = Lt; } 808 | ty = INT; 809 | break; 810 | case Gt: 811 | next(); expr(Shl); 812 | if (*n == Num && *b == Num) n[1] = b[1] > n[1]; 813 | else { *--n = (int) b; *--n = Gt; } 814 | ty = INT; 815 | break; 816 | case Le: 817 | next(); expr(Shl); 818 | if (*n == Num && *b == Num) n[1] = b[1] <= n[1]; 819 | else { *--n = (int) b; *--n = Le; } 820 | ty = INT; 821 | break; 822 | case Ge: 823 | next(); expr(Shl); 824 | if (*n == Num && *b == Num) n[1] = b[1] >= n[1]; 825 | else { *--n = (int) b; *--n = Ge; } 826 | ty = INT; 827 | break; 828 | case Shl: 829 | next(); 830 | if (compound) { compound = 0; expr(Assign); } 831 | else expr(Add); 832 | if (*n == Num && *b == Num) { 833 | n[1] = b[1] << n[1]; 834 | } else { *--n = (int) b; *--n = Shl; } 835 | ty = INT; 836 | break; 837 | case Shr: 838 | next(); 839 | if (compound) { compound = 0; expr(Assign); } 840 | else expr(Add); 841 | if (*n == Num && *b == Num) { 842 | n[1] = b[1] >> n[1]; 843 | } else { *--n = (int) b; *--n = Shr; } 844 | ty = INT; 845 | break; 846 | case Add: 847 | next(); 848 | if (compound) { compound = 0; expr(Assign); } 849 | else expr(Mul); 850 | tc = ((t | ty) & (PTR | PTR2)) ? (t >= PTR) : (t >= ty); 851 | c = n; if (tc) ty = t; 852 | sz = (ty >= PTR2) ? sizeof(int) : 853 | ((ty >= PTR) ? tsize[ty - PTR] : 1); 854 | if (*n == Num && tc) { n[1] *= sz; sz = 1; } 855 | else if (*b == Num && !tc) { b[1] *= sz; sz = 1; } 856 | if (*n == Num && *b == Num) n[1] += b[1]; 857 | else if (sz != 1) { 858 | *--n = sz; *--n = Num; 859 | *--n = (int) (tc ? c : b); *--n = Mul; 860 | *--n = (int) (tc ? b : c); *--n = Add; 861 | } 862 | else { *--n = (int) b; *--n = Add; } 863 | break; 864 | case Sub: // 4 cases: ptr-ptr, ptr-int, int-ptr (err), int-int 865 | next(); 866 | if (compound) { compound = 0; expr(Assign); } 867 | else expr(Mul); // t = left type, ty = right type 868 | if (t < PTR && ty >= PTR) fatal("bad pointer subtraction"); 869 | if (t >= PTR) { // left arg is ptr 870 | sz = (t >= PTR2) ? sizeof(int) : tsize[t - PTR]; 871 | if (ty >= PTR) { // ptr - ptr 872 | if (t != ty) fatal("mismatched ptr type subtraction"); 873 | if (*n == Num && *b == Num) n[1] = (b[1] - n[1]) / sz; 874 | else { 875 | *--n = (int) b; *--n = Sub; 876 | if (sz > 1) { 877 | if ((sz & (sz - 1)) == 0) { // 2^n 878 | *--n = popcount(sz - 1); *--n = Num; 879 | --n; *n = (int) (n + 3); *--n = Shr; 880 | } else { 881 | *--n = sz; *--n = Num; --n; *n = (int) (n + 3); 882 | *--n = Div; ef_getidx("__aeabi_idiv"); 883 | } 884 | } 885 | } 886 | ty = INT; 887 | } else { // ptr - int 888 | if (*n == Num) { 889 | n[1] *= sz; 890 | if (*b == Num) n[1] = b[1] - n[1]; 891 | else { *--n = (int) b; *--n = Sub; } 892 | } else { 893 | if (sz > 1) { 894 | if ((sz & (sz - 1)) == 0) { // 2^n 895 | *--n = popcount(sz - 1); *--n = Num; 896 | --n; *n = (int) (n + 3); *--n = Shl; 897 | } 898 | else { 899 | *--n = sz; *--n = Num; 900 | --n; *n = (int) (n + 3); *--n = Mul; 901 | } 902 | } 903 | *--n = (int) b; *--n = Sub; 904 | } 905 | ty = t; 906 | } 907 | } else { // int - int 908 | if (*n == Num && *b == Num) n[1] = b[1] - n[1]; 909 | else { *--n = (int) b; *--n = Sub; } 910 | ty = INT; 911 | } 912 | break; 913 | case Mul: 914 | next(); 915 | if (compound) { compound = 0; expr(Assign); } 916 | else expr(Inc); 917 | if (*n == Num && *b == Num) n[1] *= b[1]; 918 | else { 919 | *--n = (int) b; 920 | if (n[1] == Num && n[2] > 0 && (n[2] & (n[2] - 1)) == 0) { 921 | n[2] = popcount(n[2] - 1); *--n = Shl; // 2^n 922 | } 923 | else *--n = Mul; 924 | } 925 | ty = INT; 926 | break; 927 | case Inc: 928 | case Dec: 929 | sz = ty >= PTR2 ? sizeof(int) : ty >= PTR ? tsize[ty - PTR] : 1; 930 | if (*n != Load) fatal("bad lvalue in post-increment"); 931 | *n = tk; 932 | *--n = sz; *--n = Num; 933 | *--n = (int) b; *--n = (tk == Inc) ? Sub : Add; 934 | next(); 935 | break; 936 | case Div: 937 | next(); 938 | if (compound) { compound = 0; expr(Assign); } 939 | else expr(Inc); 940 | if (*n == Num && *b == Num) n[1] = b[1] / n[1]; 941 | else { 942 | *--n = (int) b; 943 | if (n[1] == Num && n[2] > 0 && (n[2] & (n[2] - 1)) == 0) { 944 | n[2] = popcount(n[2] - 1); *--n = Shr; // 2^n 945 | } else { 946 | *--n = Div; 947 | ef_getidx("__aeabi_idiv"); 948 | } 949 | } 950 | ty = INT; 951 | break; 952 | case Mod: 953 | next(); 954 | if (compound) { compound = 0; expr(Assign); } 955 | else expr(Inc); 956 | if (*n == Num && *b == Num) n[1] = b[1] % n[1]; 957 | else { 958 | *--n = (int) b; 959 | if (n[1] == Num && n[2] > 0 && (n[2] & (n[2] - 1)) == 0) { 960 | --n[2]; *--n = And; // 2^n 961 | } else { 962 | *--n = Mod; 963 | ef_getidx("__aeabi_idivmod"); 964 | } 965 | } 966 | ty = INT; 967 | break; 968 | case Dot: 969 | ty += PTR; 970 | if (n[0] == Load && n[1] > INT && n[1] < PTR) n += 2; // struct 971 | case Arrow: 972 | if (ty <= PTR+INT || ty >= PTR2) fatal("structure expected"); 973 | next(); 974 | if (tk != Id) fatal("structure member expected"); 975 | m = members[ty - PTR]; while (m && m->id != id) m = m->next; 976 | if (!m) fatal("structure member not found"); 977 | if (m->offset) { 978 | *--n = m->offset; *--n = Num; --n; *n = (int) (n + 3); 979 | *--n = Add; 980 | } 981 | ty = m->type; 982 | if (ty <= INT || ty >= PTR) *--n = (ty == CHAR) ? CHAR : INT; 983 | else *--n = ty; // struct, not struct pointer 984 | *--n = Load; 985 | next(); 986 | break; 987 | case Bracket: 988 | next(); expr(Assign); 989 | if (tk != ']') fatal("close bracket expected"); 990 | next(); 991 | if (t < PTR) fatal("pointer type expected"); 992 | sz = (t = t - PTR) >= PTR ? sizeof(int) : tsize[t]; 993 | if (sz > 1) { 994 | if (*n == Num) n[1] *= sz; 995 | else { 996 | *--n = sz; *--n = Num; --n; *n = (int) (n + 3); *--n = Mul; 997 | } 998 | } 999 | if (*n == Num && *b == Num) n[1] += b[1]; 1000 | else { *--n = (int) b; *--n = Add; } 1001 | if ((ty = t) <= INT || ty >= PTR) *--n = (ty == CHAR) ? CHAR : INT; 1002 | else *--n = ty; // struct, not struct pointer 1003 | *--n = Load; 1004 | break; 1005 | default: 1006 | printf("%d: compiler error tk=%d\n", line, tk); exit(-1); 1007 | } 1008 | } 1009 | } 1010 | 1011 | // AST parsing for IR generation 1012 | // With a modular code generator, new targets can be easily supported such as 1013 | // native Arm machine code. 1014 | void gen(int *n) 1015 | { 1016 | int i = *n, j, k, l; 1017 | int *a, *b, *c, *d, *t; 1018 | struct ident_s *label; 1019 | 1020 | switch (i) { 1021 | case Num: // get the value of integer 1022 | *++e = IMM; *++e = n[1]; 1023 | break; 1024 | case Loc: // get the value of variable 1025 | *++e = LEA; *++e = n[1]; 1026 | break; 1027 | case Label: // target of goto 1028 | label = (struct ident_s *) n[1]; 1029 | if (label->class != 0) fatal("duplicate label definition"); 1030 | d = e + 1; b = (int *) label->val; 1031 | while (b != 0) { t = (int *) *b; *b = (int) d; b = t; } 1032 | label->val = (int) d; label->class = Label; 1033 | break; 1034 | case Load: 1035 | gen(n + 2); // load the value 1036 | if (n[1] > INT && n[1] < PTR) fatal("struct copies not yet supported"); 1037 | *++e = (n[1] == CHAR) ? LC : LI; 1038 | break; 1039 | case Assign: // assign the value to variables 1040 | gen((int *) n[2]); *++e = PSH; gen(n + 3); 1041 | // Add SC/SI instruction to save value in register to variable address 1042 | // held on stack. 1043 | *++e = (n[1] == CHAR) ? SC : SI; 1044 | break; 1045 | // increment or decrement variables 1046 | case Inc: 1047 | case Dec: 1048 | gen(n + 2); 1049 | *++e = PSH; *++e = (n[1] == CHAR) ? LC : LI; *++e = PSH; 1050 | *++e = IMM; *++e = (n[1] >= PTR2) ? sizeof(int) : 1051 | n[1] >= PTR ? tsize[n[1] - PTR] : 1; 1052 | *++e = (i == Inc) ? ADD : SUB; 1053 | *++e = (n[1] == CHAR) ? SC : SI; 1054 | break; 1055 | case Cond: // if else condition case 1056 | gen((int *) n[1]); // condition 1057 | // Add jump-if-zero instruction "BZ" to jump to false branch. 1058 | // Point "b" to the jump address field to be patched later. 1059 | *++e = BZ; b = ++e; 1060 | gen((int *) n[2]); // expression 1061 | // Patch the jump address field pointed to by "b" to hold the address 1062 | // of false branch. "+ 3" counts the "JMP" instruction added below. 1063 | // 1064 | // Add "JMP" instruction after true branch to jump over false branch. 1065 | // Point "b" to the jump address field to be patched later. 1066 | if (n[3]) { 1067 | *b = (int) (e + 3); *++e = JMP; b = ++e; gen((int *) n[3]); 1068 | } // else statement 1069 | // Patch the jump address field pointed to by "d" to hold the address 1070 | // past the false branch. 1071 | *b = (int) (e + 1); 1072 | break; 1073 | // operators 1074 | /* If current token is logical OR operator: 1075 | * Add jump-if-nonzero instruction "BNZ" to implement short circuit. 1076 | * Point "b" to the jump address field to be patched later. 1077 | * Parse RHS expression. 1078 | * Patch the jump address field pointed to by "b" to hold the address past 1079 | * the RHS expression. 1080 | */ 1081 | case Lor: gen((int *) n[1]); *++e = BNZ; 1082 | b = ++e; gen(n + 2); *b = (int) (e + 1); break; 1083 | case Lan: gen((int *) n[1]); *++e = BZ; 1084 | b = ++e; gen(n + 2); *b = (int) (e + 1); break; 1085 | /* If current token is bitwise OR operator: 1086 | * Add "PSH" instruction to push LHS value in register to stack. 1087 | * Parse RHS expression. 1088 | * Add "OR" instruction to compute the result. 1089 | */ 1090 | case Or: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = OR; break; 1091 | case Xor: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = XOR; break; 1092 | case And: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = AND; break; 1093 | case Eq: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = EQ; break; 1094 | case Ne: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = NE; break; 1095 | case Lt: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = LT; break; 1096 | case Gt: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = GT; break; 1097 | case Le: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = LE; break; 1098 | case Ge: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = GE; break; 1099 | case Shl: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = SHL; break; 1100 | case Shr: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = SHR; break; 1101 | case Add: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = ADD; break; 1102 | case Sub: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = SUB; break; 1103 | case Mul: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = MUL; break; 1104 | case Div: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = DIV; break; 1105 | case Mod: gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = MOD; break; 1106 | case Func: 1107 | case Syscall: 1108 | case ClearCache: 1109 | c = b = (int *) n[1]; k = 0; l = 1; 1110 | // how many parameters 1111 | while (b && l) { ++k; if (!(int *) *b) l = 0; else b = (int *) *b; } 1112 | j = 0; a = malloc(sizeof(int *) * k); b = c; l = 1; 1113 | while (b && l) { 1114 | a[j] = (int) b; 1115 | if (!(int *) *b) l = 0; else b = (int *) *b; ++j; 1116 | } 1117 | if (j > 0) --j; 1118 | // push parameters 1119 | while (j >= 0 && k > 0) { 1120 | gen(b + 1); *++e = PSH; --j; b = (int *) a[j]; 1121 | } 1122 | free(a); 1123 | if (i == Syscall) *++e = SYSC; 1124 | if (i == Func) *++e = JSR; 1125 | *++e = n[2]; 1126 | if (n[3]) { *++e = ADJ; *++e = n[3]; } 1127 | break; 1128 | case While: 1129 | case DoWhile: 1130 | if (i == While) { *++e = JMP; a = ++e; } 1131 | d = (e + 1); 1132 | b = brks; brks = 0; 1133 | c = cnts; cnts = 0; 1134 | gen((int *) n[1]); // loop body 1135 | if (i == While) *a = (int) (e + 1); 1136 | while (cnts) { t = (int *) *cnts; *cnts = (int) (e + 1); cnts = t; } 1137 | cnts = c; 1138 | gen((int *) n[2]); // condition 1139 | *++e = BNZ; *++e = (int) d; 1140 | while (brks) { t = (int *) *brks; *brks = (int) (e + 1); brks = t; } 1141 | brks = b; 1142 | break; 1143 | case For: 1144 | gen((int *) n[4]); // init 1145 | *++e = JMP; a = ++e; 1146 | d = (e + 1); 1147 | b = brks; brks = 0; 1148 | c = cnts; cnts = 0; 1149 | gen((int *) n[3]); // loop body 1150 | while (cnts) { t = (int *) *cnts; *cnts = (int) (e + 1); cnts = t; } 1151 | cnts = c; 1152 | gen((int *) n[2]); // increment 1153 | *a = (int) (e + 1); 1154 | gen((int *) n[1]); // condition 1155 | *++e = BNZ; *++e = (int) d; 1156 | while (brks) { t = (int *) *brks; *brks = (int) (e + 1); brks = t; } 1157 | brks = b; 1158 | break; 1159 | case Switch: 1160 | gen((int *) n[1]); // condition 1161 | a = cas; *++e = JMP; cas = ++e; 1162 | b = brks; d = def; brks = def = 0; 1163 | gen((int *) n[2]); // case statement 1164 | // deal with no default inside switch case 1165 | *cas = def ? (int) def : (int) (e + 1); cas = a; 1166 | while (brks) { t = (int *) * brks; *brks = (int) (e + 1); brks = t; } 1167 | brks = b; def = d; 1168 | break; 1169 | case Case: 1170 | *++e = JMP; ++e; 1171 | a = 0; 1172 | *e = (int) (e + 7); *++e = PSH; i = *cas; *cas = (int) e; 1173 | gen((int *) n[1]); // condition 1174 | if (e[-1] != IMM) fatal("bad case immediate"); 1175 | *++e = SUB; *++e = BNZ; cas = ++e; *e = i + e[-3]; 1176 | if (*(int *) n[2] == Switch) a = cas; 1177 | gen((int *) n[2]); // expression 1178 | if (a != 0) cas = a; 1179 | break; 1180 | case Break: 1181 | // set jump locate 1182 | *++e = JMP; *++e = (int) brks; brks = e; 1183 | break; 1184 | case Continue: 1185 | // set jump locate 1186 | *++e = JMP; *++e = (int) cnts; cnts = e; 1187 | break; 1188 | case Goto: 1189 | label = (struct ident_s *) n[1]; 1190 | *++e = JMP; *++e = label->val; 1191 | if (label->class == 0) label->val = (int) e; // Define label address later 1192 | break; 1193 | case Default: 1194 | def = e + 1; 1195 | gen((int *) n[1]); break; 1196 | case Return: 1197 | if (n[1]) gen((int *) n[1]); *++e = LEV; break; // parse return AST 1198 | case '{': 1199 | // parse expression or statement from AST 1200 | gen((int *) n[1]); gen(n + 2); break; 1201 | case Enter: *++e = ENT; *++e = n[1]; gen(n + 2); 1202 | if (*e != LEV) *++e = LEV; break; 1203 | default: 1204 | if (i != ';') { 1205 | printf("%d: compiler error gen=%d\n", line, i); exit(-1); 1206 | } 1207 | } 1208 | } 1209 | 1210 | void check_label(int **tt) 1211 | { 1212 | if (tk != Id) return; 1213 | char *ss = p; 1214 | while (*ss == ' ' || *ss == '\t') ++ss; 1215 | if (*ss == ':') { 1216 | if (id->class != 0 || !(id->type == 0 || id->type == -1)) 1217 | fatal("invalid label"); 1218 | id->type = -1 ; // hack for id->class deficiency 1219 | *--n = (int) id; *--n = Label; 1220 | *--n = (int) *tt; *--n = '{'; *tt = n; 1221 | next(); next(); 1222 | } 1223 | } 1224 | 1225 | // statement parsing (syntax analysis, except for declarations) 1226 | void stmt(int ctx) 1227 | { 1228 | int *a, *b, *c, *d; 1229 | int i, j, atk; 1230 | int bt; 1231 | 1232 | switch (tk) { 1233 | case Enum: 1234 | next(); 1235 | // If current token is not "{", it means having enum type name. 1236 | // Skip the enum type name. 1237 | if (tk != '{') next(); 1238 | if (tk == '{') { 1239 | next(); 1240 | i = 0; // Enum value starts from 0 1241 | while (tk != '}') { 1242 | // Current token should be enum name. 1243 | // If current token is not identifier, stop parsing. 1244 | if (tk != Id) fatal("bad enum identifier"); 1245 | next(); 1246 | if (tk == Assign) { 1247 | next(); 1248 | expr(Cond); 1249 | if (*n != Num) fatal("bad enum initializer"); 1250 | i = n[1]; // Set enum value 1251 | } 1252 | /* "id" is pointing to the enum name's symbol table entry. 1253 | * Set the symbol table entry's symbol type be "Num". 1254 | * Set the symbol table entry's associated value type be "INT". 1255 | * Set the symbol table entry's associated value be enum value. 1256 | */ 1257 | id->class = Num; id->type = INT; id->val = i++; 1258 | if (tk == ',') next(); // If current token is ",", skip. 1259 | } 1260 | next(); // Skip "}" 1261 | } else if (tk == Id) { 1262 | id->type = INT; id->class = ctx; id->val = ld++; 1263 | next(); 1264 | } 1265 | return; 1266 | case Int: 1267 | case Char: 1268 | case Struct: 1269 | case Union: 1270 | switch (tk) { 1271 | case Struct: 1272 | case Union: 1273 | atk = tk; next(); 1274 | if (tk == Id) { 1275 | if (!id->stype) id->stype = tnew++; 1276 | bt = id->stype; 1277 | next(); 1278 | } else { 1279 | bt = tnew++; 1280 | } 1281 | if (tk == '{') { 1282 | tsize[bt] = 0; // for unions 1283 | next(); 1284 | if (members[bt]) fatal("duplicate structure definition"); 1285 | i = 0; 1286 | while (tk != '}') { 1287 | int mbt = INT; 1288 | switch (tk) { 1289 | case Int: next(); break; 1290 | case Char: next(); mbt = CHAR; break; 1291 | case Struct: 1292 | case Union: 1293 | next(); 1294 | if (tk != Id) fatal("bad struct/union declaration"); 1295 | mbt = id->stype; 1296 | next(); break; 1297 | } 1298 | while (tk != ';') { 1299 | ty = mbt; 1300 | // If the beginning of * is a pointer type, 1301 | // then type plus `PTR` indicates what kind of pointer 1302 | while (tk == Mul) { next(); ty += PTR; } 1303 | if (tk != Id) fatal("bad struct member definition"); 1304 | struct member_s *m = malloc(sizeof(struct member_s)); 1305 | m->id = id; 1306 | m->offset = i; 1307 | m->type = ty; 1308 | m->next = members[bt]; 1309 | members[bt] = m; 1310 | i += (ty >= PTR) ? sizeof(int) : tsize[ty]; 1311 | i = (i + 3) & -4; 1312 | if (atk == Union) { if (i > tsize[bt]) tsize[bt] = i ; i = 0; } 1313 | next(); 1314 | if (tk == ',') next(); 1315 | } 1316 | next(); 1317 | } 1318 | next(); 1319 | if (atk != Union) tsize[bt] = i; 1320 | } 1321 | break; 1322 | case Int: 1323 | case Char: 1324 | bt = (tk == Int) ? INT : CHAR; // basetype 1325 | next(); 1326 | break; 1327 | } 1328 | /* parse statement such as 'int a, b, c;' 1329 | * "enum" finishes by "tk == ';'", so the code below will be skipped. 1330 | * While current token is not statement end or block end. 1331 | */ 1332 | b = 0; 1333 | while (tk != ';' && tk != '}' && tk != ',' && tk != ')') { 1334 | ty = bt; 1335 | // If the beginning of * is a pointer type, then type plus `PTR` 1336 | // indicates what kind of pointer 1337 | while (tk == Mul) { next(); ty += PTR; } 1338 | switch (ctx) { 1339 | case Glo: 1340 | if (tk != Id) fatal("bad global declaration"); 1341 | if (id->class >= ctx) fatal("duplicate global definition"); 1342 | break; 1343 | case Loc: 1344 | if (tk != Id) fatal("bad local declaration"); 1345 | if (id->class >= ctx) fatal("duplicate local definition"); 1346 | break; 1347 | } 1348 | next(); 1349 | id->type = ty; 1350 | if (tk == '(') { // function 1351 | if (b != 0) fatal("func decl can't be mixed with var decl(s)"); 1352 | if (ctx != Glo) fatal("nested function"); 1353 | if (ty > INT && ty < PTR) fatal("return type can't be struct"); 1354 | id->class = Func; // type is function 1355 | // "+ 1" is because the code to add instruction always uses "++e". 1356 | id->val = (int) (e + 1); // function Pointer? offset/address 1357 | id->type = ty; 1358 | next(); ld = 0; // "ld" is parameter's index. 1359 | while (tk != ')') { stmt(Par); if (tk == ',') next(); } 1360 | next(); 1361 | if (tk != '{') fatal("bad function definition"); 1362 | loc = ++ld; 1363 | next(); 1364 | // Not declare and must not be function, analyze inner block. 1365 | // e represents the address which will store pc 1366 | // (ld - loc) indicates memory size to allocate 1367 | *--n = ';'; 1368 | while (tk != '}') { 1369 | int *t = n; check_label(&t); stmt(Loc); 1370 | if (t != n) { *--n = (int) t; *--n = '{'; } 1371 | } 1372 | *--n = ld - loc; *--n = Enter; 1373 | cas = 0; 1374 | gen(n); 1375 | id = sym; // unwind symbol table locals 1376 | while (id->tk) { 1377 | if (id->class == Loc || id->class == Par) { 1378 | id->class = id->hclass; 1379 | id->type = id->htype; 1380 | id->val = id->hval; 1381 | } 1382 | else if (id->class == Label) { // clear id for next func 1383 | id->class = 0; id->val = 0; id->type = 0; 1384 | } 1385 | else if (id->class == 0 && id->type == -1) { 1386 | printf("%d: label %.*s not defined\n", 1387 | line, id->hash & 0x3f, id->name); 1388 | exit(-1); 1389 | } 1390 | id++; 1391 | } 1392 | } 1393 | else { 1394 | int sz = ((ty <= INT || ty >= PTR) ? sizeof(int) : tsize[ty]); 1395 | id->hclass = id->class; id->class = ctx; 1396 | id->htype = id->type; id->type = ty; 1397 | id->hval = id->val; 1398 | if (ctx == Glo) { id->val = (int) data; data += sz; } 1399 | else if (ctx == Loc) { id->val = (ld += sz / sizeof(int)); } 1400 | else if (ctx == Par) { 1401 | if (ty > INT && ty < PTR) // local struct decl 1402 | fatal("struct parameters must be pointers"); 1403 | id->val = ld++; 1404 | } 1405 | if (ctx == Loc && tk == Assign) { 1406 | int ptk = tk; 1407 | if (b == 0) *--n = ';'; 1408 | b = n; *--n = loc - id->val; *--n = Loc; 1409 | next(); a = n; expr(ptk); 1410 | *--n = (int)a; *--n = ty; *--n = Assign; 1411 | *--n = (int) b; *--n = '{'; 1412 | } 1413 | } 1414 | if (ctx != Par && tk == ',') next(); 1415 | } 1416 | return; 1417 | case If: 1418 | next(); 1419 | if (tk != '(') fatal("open parentheses expected"); 1420 | next(); 1421 | expr(Assign); a = n; 1422 | if (tk != ')') fatal("close parentheses expected"); 1423 | next(); 1424 | stmt(ctx); 1425 | b = n; 1426 | if (tk == Else) { next(); stmt(ctx); d = n; } else d = 0; 1427 | *--n = (int)d; *--n = (int) b; *--n = (int) a; *--n = Cond; 1428 | return; 1429 | case While: 1430 | next(); 1431 | if (tk != '(') fatal("open parentheses expected"); 1432 | next(); 1433 | expr(Assign); b = n; // condition 1434 | if (tk != ')') fatal("close parentheses expected"); 1435 | next(); 1436 | ++brkc; ++cntc; 1437 | stmt(ctx); a = n; // parse body of "while" 1438 | --brkc; --cntc; 1439 | *--n = (int) b; *--n = (int) a; *--n = While; 1440 | return; 1441 | case DoWhile: 1442 | next(); 1443 | ++brkc; ++cntc; 1444 | stmt(ctx); a = n; // parse body of "do-while" 1445 | --brkc; --cntc; 1446 | if (tk != While) fatal("while expected"); 1447 | next(); 1448 | if (tk != '(') fatal("open parentheses expected"); 1449 | next(); 1450 | *--n = ';'; 1451 | expr(Assign); b = n; 1452 | if (tk != ')') fatal("close parentheses expected"); 1453 | next(); 1454 | *--n = (int) b; *--n = (int) a; *--n = DoWhile; 1455 | return; 1456 | case Switch: 1457 | i = 0; j = 0; 1458 | if (cas) j = (int) cas; 1459 | cas = &i; 1460 | next(); 1461 | if (tk != '(') fatal("open parentheses expected"); 1462 | next(); 1463 | expr(Assign); 1464 | a = n; 1465 | if (tk != ')') fatal("close parentheses expected"); 1466 | next(); 1467 | ++swtc; ++brkc; 1468 | stmt(ctx); 1469 | --swtc; --brkc; 1470 | b = n; 1471 | *--n = (int) b; *--n = (int) a; *--n = Switch; 1472 | if (j) cas = (int *) j; 1473 | return; 1474 | case Case: 1475 | if (!swtc) fatal("case-statement outside of switch"); 1476 | i = *cas; 1477 | next(); 1478 | expr(Or); 1479 | a = n; 1480 | if (*n != Num) fatal("bad case immediate"); 1481 | j = n[1]; n[1] -= i; *cas = j; 1482 | *--n = ';'; 1483 | if (tk != ':') fatal("colon expected"); 1484 | next(); 1485 | stmt(ctx); 1486 | b = n; 1487 | *--n = (int) b;*--n = (int) a; *--n = Case; 1488 | return; 1489 | case Break: 1490 | if (!brkc) fatal("misplaced break statement"); 1491 | next(); 1492 | if (tk != ';') fatal("semicolon expected"); 1493 | next(); 1494 | *--n = Break; 1495 | return; 1496 | case Continue: 1497 | if (!cntc) fatal("misplaced continue statement"); 1498 | next(); 1499 | if (tk != ';') fatal("semicolon expected"); 1500 | next(); 1501 | *--n = Continue; 1502 | return; 1503 | case Default: 1504 | if (!swtc) fatal("default-statement outside of switch"); 1505 | next(); 1506 | if (tk != ':') fatal("colon expected"); 1507 | next(); 1508 | stmt(ctx); a = n; 1509 | *--n = (int) a; *--n = Default; 1510 | return; 1511 | // RETURN_stmt -> 'return' expr ';' | 'return' ';' 1512 | case Return: 1513 | a = 0; next(); 1514 | if (tk != ';') { expr(Assign); a = n; } 1515 | *--n = (int) a; *--n = Return; 1516 | if (tk != ';') fatal("semicolon expected"); 1517 | next(); 1518 | return; 1519 | /* For iteration is implemented as: 1520 | * Init -> Cond -> Bz to end -> Jmp to Body 1521 | * After -> Jmp to Cond -> Body -> Jmp to After 1522 | */ 1523 | case For: 1524 | next(); 1525 | if (tk != '(') fatal("open parentheses expected"); 1526 | next(); 1527 | *--n = ';'; 1528 | if (tk != ';') expr(Assign); 1529 | while (tk == ',') { 1530 | int *f = n; next(); expr(Assign); *--n = (int) f; *--n = '{'; 1531 | } 1532 | d = n; 1533 | if (tk != ';') fatal("semicolon expected"); 1534 | next(); 1535 | *--n = ';'; 1536 | expr(Assign); a = n; // Point to entry of for cond 1537 | if (tk != ';') fatal("semicolon expected"); 1538 | next(); 1539 | *--n = ';'; 1540 | if (tk != ')') expr(Assign); 1541 | while (tk == ',') { 1542 | int *g = n; next(); expr(Assign); *--n = (int) g; *--n = '{'; 1543 | } 1544 | b = n; 1545 | if (tk != ')') fatal("close parentheses expected"); 1546 | next(); 1547 | ++brkc; ++cntc; 1548 | stmt(ctx); c = n; 1549 | --brkc; --cntc; 1550 | *--n = (int) d; *--n = (int) c; *--n = (int) b; *--n = (int) a; 1551 | *--n = For; 1552 | return; 1553 | case Goto: 1554 | next(); 1555 | if (tk != Id || (id->type != 0 && id->type != -1) 1556 | || (id->class != Label && id->class != 0)) 1557 | fatal("goto expects label"); 1558 | id->type = -1; // hack for id->class deficiency 1559 | *--n = (int) id; *--n = Goto; next(); 1560 | if (tk != ';') fatal("semicolon expected"); 1561 | next(); 1562 | return; 1563 | // stmt -> '{' stmt '}' 1564 | case '{': 1565 | next(); 1566 | *--n = ';'; 1567 | while (tk != '}') { 1568 | a = n; check_label(&a); stmt(ctx); 1569 | if (a != n) { *--n = (int) a; *--n = '{'; } 1570 | } 1571 | next(); 1572 | return; 1573 | // stmt -> ';' 1574 | case ';': 1575 | next(); 1576 | *--n = ';'; 1577 | return; 1578 | default: 1579 | // general statements are considered assignment statements/expressions 1580 | expr(Assign); 1581 | if (tk != ';' && tk != ',') fatal("semicolon expected"); 1582 | next(); 1583 | } 1584 | } 1585 | 1586 | void die(char *msg) { printf("%s\n", msg); exit(-1); } 1587 | 1588 | int reloc_imm(int offset) { return (((offset) - 8) >> 2) & 0x00ffffff; } 1589 | int reloc_bl(int offset) { return 0xeb000000 | reloc_imm(offset); } 1590 | 1591 | int *codegen(int *jitmem, int *jitmap) 1592 | { 1593 | int i, ii, tmp, c; 1594 | int *je, *tje; // current position in emitted native code 1595 | int *immloc, *il; 1596 | 1597 | immloc = il = malloc(1024 * 4); 1598 | int *iv = malloc(1024 * 4); 1599 | int *imm0 = 0; 1600 | 1601 | // first pass: emit native code 1602 | int *pc = text + 1; je = jitmem; line = 0; 1603 | while (pc <= e) { 1604 | i = *pc; 1605 | // Store mapping from IR index to native instruction buffer location 1606 | // "pc - text" gets the index of IR. 1607 | // "je" points to native instruction buffer's current location. 1608 | jitmap[((int) pc++ - (int) text) >> 2] = (int) je; 1609 | switch (i) { 1610 | case LEA: 1611 | tmp = *pc++; 1612 | if (tmp >= 64 || tmp <= -64) { 1613 | printf("jit: LEA %d out of bounds\n", tmp); exit(6); 1614 | } 1615 | if (tmp >= 0) 1616 | *je++ = 0xe28b0000 | tmp * 4; // add r0, fp, #(tmp) 1617 | else 1618 | *je++ = 0xe24b0000 | (-tmp) * 4; // sub r0, fp, #(tmp) 1619 | break; 1620 | case IMM: 1621 | tmp = *pc++; 1622 | if (0 <= tmp && tmp < 256) 1623 | *je++ = 0xe3a00000 + tmp; // mov r0, #(tmp) 1624 | else { if (!imm0) imm0 = je; *il++ = (int) (je++); *iv++ = tmp; } 1625 | break; 1626 | case JSR: 1627 | case JMP: 1628 | pc++; je++; // postponed till second pass 1629 | break; 1630 | case BZ: 1631 | case BNZ: 1632 | *je++ = 0xe3500000; pc++; je++; // cmp r0, #0 1633 | break; 1634 | case ENT: 1635 | *je++ = 0xe92d4800; *je++ = 0xe28db000; // push {fp, lr}; add fp, sp, #0 1636 | ii = c = 0; tmp = 4 * (*pc++); 1637 | while (tmp >= 255) { c |= tmp & 3; tmp >>= 2; ++ii; } 1638 | tmp += (c ? 1 : 0); if ((tmp << (2*ii)) >= 32768 || tmp < 0) { 1639 | printf("jit: ENT %d out of bounds\n", tmp << (2*ii)); exit(6); 1640 | } // sub sp, sp, #tmp (scaled) 1641 | if (tmp) *je++ = 0xe24dd000 | (((16-ii) & 0xf) << 8) | tmp; 1642 | break; 1643 | case ADJ: 1644 | *je++ = 0xe28dd000 + *pc++ * 4; // add sp, sp, #(tmp * 4) 1645 | break; 1646 | case LEV: 1647 | *je++ = 0xe28bd000; *je++ = 0xe8bd8800; // add sp, fp, #0; pop {fp, pc} 1648 | break; 1649 | case LI: 1650 | *je++ = 0xe5900000; // ldr r0, [r0] 1651 | break; 1652 | case LC: 1653 | *je++ = 0xe5d00000; if (signed_char) *je++ = 0xe6af0070; // ldrb r0, [r0]; (sxtb r0, r0) 1654 | break; 1655 | case SI: 1656 | *je++ = 0xe49d1004; *je++ = 0xe5810000; // pop {r1}; str r0, [r1] 1657 | break; 1658 | case SC: 1659 | *je++ = 0xe49d1004; *je++ = 0xe5c10000; // pop {r1}; strb r0, [r1] 1660 | break; 1661 | case PSH: 1662 | *je++ = 0xe52d0004; // push {r0} 1663 | break; 1664 | case OR: 1665 | *je++ = 0xe49d1004; *je++ = 0xe1810000; // pop {r1}; orr r0, r1, r0 1666 | break; 1667 | case XOR: 1668 | *je++ = 0xe49d1004; *je++ = 0xe0210000; // pop {r1}; eor r0, r1, r0 1669 | break; 1670 | case AND: 1671 | *je++ = 0xe49d1004; *je++ = 0xe0010000; // pop {r1}; and r0, r1, r0 1672 | break; 1673 | case SHL: 1674 | *je++ = 0xe49d1004; *je++ = 0xe1a00011; // pop {r1}; lsl r0, r1, r0 1675 | break; 1676 | case SHR: 1677 | *je++ = 0xe49d1004; *je++ = 0xe1a00051; // pop {r1}; asr r0, r1, r0 1678 | break; 1679 | case ADD: 1680 | *je++ = 0xe49d1004; *je++ = 0xe0800001; // pop {r1}; add r0, r0, r1 1681 | break; 1682 | case SUB: 1683 | *je++ = 0xe49d1004; *je++ = 0xe0410000; // pop {r1}; sub r0, r1, r0 1684 | break; 1685 | case MUL: 1686 | *je++ = 0xe49d1004; *je++ = 0xe0000091; // pop {r1}; mul r0, r1, r0 1687 | break; 1688 | case DIV: 1689 | case MOD: 1690 | *je++ = 0xe52d0004; // push {r0} 1691 | int ti = ef_getidx((i == DIV) ? "__aeabi_idiv" : "__aeabi_idivmod"); 1692 | tmp = ef_getaddr(ti); 1693 | *je++ = 0xe49d0004 | (1 << 12); // pop r1 1694 | *je++ = 0xe49d0004 | (0 << 12); // pop r0 1695 | *je++ = 0xe28fe000; // add lr, pc, #0 1696 | if (!imm0) imm0 = je; 1697 | *il++ = (int) je++ + 1; 1698 | *iv++ = tmp; 1699 | // ARM EABI modulo helper function produces quotient in r0 1700 | // and the remainder in r1. 1701 | if (i == MOD) 1702 | *je++ = 0xe1a00001; // mov r0, r1 1703 | break; 1704 | case SYSC: 1705 | tmp = ef_getaddr(*pc++); // look up address from ef index 1706 | if (*pc++ != ADJ) die("codegen: no ADJ after native proc"); 1707 | i = *pc; 1708 | if (i > 10) die("codegen: no support for 10+ arguments"); 1709 | while (i > 0) *je++ = 0xe49d0004 | (--i << 12); // pop r(i-1) 1710 | i = *pc++; 1711 | if (i > 4) *je++ = 0xe92d03f0; // push {r4-r9} 1712 | *je++ = 0xe28fe000; // add lr, pc, #0 1713 | if (!imm0) imm0 = je; 1714 | *il++ = (int) je++ + 1; 1715 | *iv++ = tmp; 1716 | if (i > 4) *je++ = 0xe28dd018; // add sp, sp, #24 1717 | break; 1718 | case CLCA: 1719 | *je++ = 0xe59d0004; *je++ = 0xe59d1000; // ldr r0, [sp, #4] 1720 | // ldr r1, [sp] 1721 | *je++ = 0xe3a0780f; *je++ = 0xe2877002; // mov r7, #0xf0000 1722 | // add r7, r7, #2 1723 | *je++ = 0xe3a02000; *je++ = 0xef000000; // mov r2, #0 1724 | // svc 0 1725 | break; 1726 | default: 1727 | if (EQ <= i && i <= GE) { 1728 | *je++ = 0xe49d1004; *je++ = 0xe1510000; // pop {r1}; cmp r1, r0 1729 | if (i <= NE) { je[0] = 0x03a00000; je[1] = 0x13a00000; } // moveq r0, #0; movne r0, #0 1730 | else if (i == LT || i == GE) { je[0] = 0xb3a00000; je[1] = 0xa3a00000; } // movlt r0, #0; movge r0, #0 1731 | else { je[0] = 0xc3a00000; je[1] = 0xd3a00000; } // movgt r0, #0; movle r0, #0 1732 | if (i == EQ || i == LT || i == GT) je[0] = je[0] | 1; 1733 | else je[1] = je[1] | 1; 1734 | je += 2; 1735 | break; 1736 | } else { 1737 | printf("code generation failed for %d!\n", i); 1738 | free(iv); 1739 | return 0; 1740 | } 1741 | } 1742 | 1743 | int genpool = 0; 1744 | if (imm0) { 1745 | if (i == LEV) genpool = 1; 1746 | else if ((int) je > (int) imm0 + 3000) { 1747 | tje = je++; genpool = 2; 1748 | } 1749 | } 1750 | if (genpool) { 1751 | *iv = 0; 1752 | while (il > immloc) { 1753 | tmp = *--il; 1754 | if ((int) je > tmp + 4096 + 8) die("codegen: can't reach the pool"); 1755 | iv--; if (iv[0] == iv[1]) je--; 1756 | if (tmp & 1) { 1757 | // ldr pc, [pc, #..] 1758 | *(int *) (tmp - 1) = 0xe59ff000 | ((int) je - tmp - 7); 1759 | } else { 1760 | // ldr r0, [pc, #..] 1761 | *(int *) tmp = 0xe59f0000 | ((int) je - tmp - 8); 1762 | } 1763 | *je++ = *iv; 1764 | } 1765 | if (genpool == 2) { // jump past the pool 1766 | tmp = ((int) je - (int) tje - 8) >> 2; 1767 | *tje = 0xea000000 | (tmp & 0x00ffffff); // b #(je) 1768 | } 1769 | imm0 = 0; 1770 | genpool = 0; 1771 | } 1772 | } 1773 | if (il > immloc) die("codegen: not terminated by a LEV"); 1774 | tje = je; 1775 | 1776 | // second pass 1777 | pc = text + 1; // Point instruction pointer "pc" to the first instruction. 1778 | while (pc <= e) { // While instruction end is not met. 1779 | // Get the IR's corresponding native instruction buffer address. 1780 | je = (int *) jitmap[((int) pc - (int) text) >> 2]; 1781 | i = *pc++; // Get current instruction 1782 | // If the instruction is one of the jumps. 1783 | if (i == JSR || i == JMP || i == BZ || i == BNZ) { 1784 | switch (i) { 1785 | case JSR: 1786 | *je = 0xeb000000; // bl #(tmp) 1787 | break; 1788 | case JMP: 1789 | *je = 0xea000000; // b #(tmp) 1790 | break; 1791 | case BZ: 1792 | *++je = 0x0a000000; // beq #(tmp) 1793 | break; 1794 | case BNZ: 1795 | *++je = 0x1a000000; // bne #(tmp) 1796 | break; 1797 | } 1798 | tmp = *pc++; 1799 | *je = (*je | 1800 | reloc_imm(jitmap[(tmp - (int) text) >> 2] - (int) je)); 1801 | } 1802 | // If the instruction has operand, increment instruction pointer to 1803 | // skip the operand. 1804 | else if (i <= ADJ || i == SYSC) { ++pc; } 1805 | } 1806 | free(iv); 1807 | return tje; 1808 | } 1809 | 1810 | enum { 1811 | _PROT_EXEC = 4, _PROT_READ = 1, _PROT_WRITE = 2, 1812 | _MAP_PRIVATE = 2, _MAP_ANON = 32 1813 | }; 1814 | 1815 | int jit(int poolsz, int *main, int argc, char **argv) 1816 | { 1817 | char *jitmem; // executable memory for JIT-compiled native code 1818 | int retval; 1819 | if (src) return 0; // skip for IR listing 1820 | 1821 | // setup JIT memory 1822 | if (!(jitmem = mmap(0, poolsz, _PROT_EXEC | _PROT_READ | _PROT_WRITE, 1823 | _MAP_PRIVATE | _MAP_ANON, -1, 0))) { 1824 | printf("could not mmap(%d) jit executable memory\n", poolsz); 1825 | return -1; 1826 | } 1827 | int *jitmap = (int *) (jitmem + (poolsz >> 1)); 1828 | int *je = (int *) jitmem; 1829 | *je++ = (int) &retval; 1830 | *je++ = argc; 1831 | *je++ = (int) argv; 1832 | int *_start = je; 1833 | *je++ = 0xe92d5ff0; // push {r4-r12, lr} 1834 | *je++ = 0xe51f0014; // ldr r0, [pc, #-20] ; argc 1835 | *je++ = 0xe51f1014; // ldr r1, [pc, #-20] ; argv 1836 | *je++ = 0xe52d0004; // push {r0} 1837 | *je++ = 0xe52d1004; // push {r1} 1838 | int *tje = je++; // bl jitmain 1839 | *je++ = 0xe51f502c; // ldr r5, [pc, #-44] ; retval 1840 | *je++ = 0xe5850000; // str r0, [r5] 1841 | *je++ = 0xe28dd008; // add sp, sp, #8 1842 | *je++ = 0xe8bd9ff0; // pop {r4-r12, pc} 1843 | if (!(je = codegen(je, jitmap))) return 1; 1844 | if (je >= jitmap) die("jitmem too small"); 1845 | *tje = reloc_bl(jitmap[((int) main - (int) text) >> 2] - (int) tje); 1846 | 1847 | // hack to jump into specific function pointer 1848 | __clear_cache(jitmem, je); 1849 | int *res = bsearch(&sym, sym, 1, 1, (void *) _start); 1850 | if (((void *) 0) != res) return 0; return -1; // make compiler happy 1851 | } 1852 | 1853 | int ELF32_ST_INFO(int b, int t) { return (b << 4) + (t & 0xf); } 1854 | enum { 1855 | EHDR_SIZE = 52, ET_EXEC = 2, EM_ARM = 40, 1856 | PHDR_ENT_SIZE = 32, SHDR_ENT_SIZE = 40, 1857 | SYM_ENT_SIZE = 16, REL_ENT_SIZE = 8, PLT_ENT_SIZE = 12, 1858 | DYN_ENT_SIZE = 8 1859 | }; 1860 | 1861 | struct Elf32_Shdr { 1862 | int sh_name; // [Elf32_Word] Section name (index into string table) 1863 | int sh_type; // [Elf32_Word] Section type (SHT_*) 1864 | int sh_flags; // [Elf32_Word] Section flags (SHF_*) 1865 | int sh_addr; // [Elf32_Addr] Address where section is to be loaded 1866 | int sh_offset; // [Elf32_Off] File offset of section data, in bytes 1867 | int sh_size; // [Elf32_Word] Size of section, in bytes 1868 | int sh_link; // [Elf32_Word] Section type-specific header table 1869 | // index link 1870 | int sh_info; // [Elf32_Word] Section type-specific extra information 1871 | int sh_addralign; // [Elf32_Word] Section address alignment 1872 | int sh_entsize; // [Elf32_Word] Size of records contained within section 1873 | }; 1874 | 1875 | enum { 1876 | // Special section indices 1877 | SHN_UNDEF = 0, // Undefined, missing, irrelevant, or meaningless 1878 | 1879 | // Section types 1880 | SHT_NULL = 0, // No associated section (inactive entry) 1881 | SHT_PROGBITS = 1, // Program-defined contents 1882 | SHT_STRTAB = 3, // String table 1883 | SHT_DYNAMIC = 6, // Information for dynamic linking 1884 | SHT_REL = 9, // Relocation entries; no explicit addends 1885 | SHT_DYNSYM = 11, // Symbol table 1886 | 1887 | // Section flags 1888 | SHF_WRITE = 0x1, 1889 | SHF_ALLOC = 0x2, 1890 | SHF_EXECINSTR = 0x4, 1891 | }; 1892 | 1893 | // Symbol table entries for ELF32 1894 | struct Elf32_Sym { 1895 | int st_name; // [Elf32_Word] Symbol name (index into string table) 1896 | int st_value; // [Elf32_Addr] Value or address associated with the symbol 1897 | int st_size; // [Elf32_Word] Size of the symbol 1898 | char st_info; // [unsigned] Symbol's type and binding attributes 1899 | char st_other;// [unsigned] Must be zero; reserved 1900 | char st_shndx, st_shndx_1, st_shndx_2, st_shndx_3; // [Elf32_Half] 1901 | // Which section (header table index) it's defined 1902 | }; 1903 | 1904 | enum { 1905 | // Symbol bindings 1906 | STB_LOCAL = 0, /* Local symbol, not visible outside obj file 1907 | containing def */ 1908 | STB_GLOBAL = 1, /* Global symbol, visible to all object files 1909 | being combined */ 1910 | 1911 | // Symbol types 1912 | STT_NOTYPE = 0, // Symbol's type is not specified 1913 | STT_FUNC = 2, // Symbol is executable code (function, etc.) 1914 | 1915 | // Symbol number 1916 | STN_UNDEF = 0 1917 | }; 1918 | 1919 | // Program header for ELF32 1920 | struct Elf32_Phdr { 1921 | int p_type; // [Elf32_Word] Type of segment 1922 | int p_offset; // [Elf32_Off] File offset where segment is located, in bytes 1923 | int p_vaddr; // [Elf32_Addr] Virtual address of beginning of segment 1924 | int p_paddr; // [Elf32_Addr] Physical address of beginning of segment 1925 | // (OS-specific) 1926 | int p_filesz; // [Elf32_Word] Number of bytes in file image of segment 1927 | // (may be zero) 1928 | int p_memsz; // [Elf32_Word] Number of bytes in mem image of segment 1929 | // (may be zero) 1930 | int p_flags; // [Elf32_Word] Segment flags 1931 | int p_align; // [Elf32_Word] Segment alignment constraint 1932 | }; 1933 | 1934 | // Segment types 1935 | enum { 1936 | PT_NULL = 0, // Unused segment 1937 | PT_LOAD = 1, // Loadable segment 1938 | PT_DYNAMIC = 2, // Dynamic linking information 1939 | PT_INTERP = 3, // Interpreter pathname 1940 | 1941 | // Segment flag bits 1942 | PF_X = 1, // Execute 1943 | PF_W = 2, // Write 1944 | PF_R = 4, // Read 1945 | }; 1946 | 1947 | int phdr_idx, shdr_idx, sym_idx; 1948 | 1949 | int gen_phdr(char *ptr, int type, int offset, int addr, int size, 1950 | int flag, int align) 1951 | { 1952 | struct Elf32_Phdr *phdr = (struct Elf32_Phdr *) ptr; 1953 | phdr->p_type = type; 1954 | phdr->p_offset = offset; 1955 | phdr->p_vaddr = addr; 1956 | phdr->p_paddr = addr; 1957 | phdr->p_filesz = size; 1958 | phdr->p_memsz = size; 1959 | phdr->p_flags = flag; 1960 | phdr->p_align = align; 1961 | return phdr_idx++; 1962 | } 1963 | 1964 | int gen_shdr(char *ptr, int type, int name, int offset, int addr, 1965 | int size, int link, int info, 1966 | int flag, int align, int entsize) 1967 | { 1968 | struct Elf32_Shdr *shdr = (struct Elf32_Shdr *) ptr; 1969 | shdr->sh_name = name; shdr->sh_type = type; 1970 | shdr->sh_addr = addr; shdr->sh_offset = offset; 1971 | shdr->sh_size = size; shdr->sh_link = link; 1972 | shdr->sh_info = info; shdr->sh_flags = flag; 1973 | shdr->sh_addralign = align; shdr->sh_entsize = entsize; 1974 | return shdr_idx++; 1975 | } 1976 | 1977 | int gen_sym(char *ptr, int name, char info, 1978 | int shndx, int size, int value) 1979 | { 1980 | struct Elf32_Sym *s = (struct Elf32_Sym *) ptr; 1981 | s->st_name = name; 1982 | s->st_info = info; 1983 | s->st_other = 0; 1984 | // s->st_shndx = shndx; 1985 | memcpy(&(s->st_shndx), (char *) &shndx, 2); 1986 | s->st_value = value; 1987 | s->st_size = size; 1988 | return sym_idx++; 1989 | } 1990 | 1991 | int append_func_sym(char **sdata, int name) 1992 | { 1993 | int idx = gen_sym(*sdata, name, ELF32_ST_INFO(STB_GLOBAL, STT_FUNC), 0, 0, 0); 1994 | *sdata += SYM_ENT_SIZE; 1995 | return idx; 1996 | } 1997 | 1998 | // shdr names which start with 'S' 1999 | enum { 2000 | SNONE = 0, SSTAB, STEXT, SDATA, SDYNS, SDYNM, SDYNC, 2001 | SINTP, SREL, SPLT, SGOT 2002 | }; 2003 | 2004 | enum { 2005 | PAGE_SIZE = 0x1000, PHDR_NUM = 4, SHDR_NUM = 11, 2006 | DYN_NUM = 15 2007 | }; 2008 | 2009 | void elf32_init(int poolsz) 2010 | { 2011 | int i; 2012 | freebuf = malloc(poolsz); 2013 | char *o = (char *) (((int) freebuf + PAGE_SIZE - 1) & -PAGE_SIZE); 2014 | /* We must assign the plt_func_addr[x] a non-zero value, and also, 2015 | * plt_func_addr[i] and plt_func_addr[i-1] has an offset of 16 2016 | * (4 instruction * 4 bytes), so the first codegen and second codegen 2017 | * have consistent code_size. Dummy address at this point. 2018 | */ 2019 | plt_func_addr = malloc(sizeof(char *) * PTR); 2020 | for (i = 0; i < PTR; ++i) 2021 | plt_func_addr[i] = o + i * 16; 2022 | 2023 | ef_getidx("__libc_start_main"); // slot 0 of external func cache 2024 | } 2025 | 2026 | int elf32(int poolsz, int *main, int elf_fd) 2027 | { 2028 | int i; 2029 | char *freecode; 2030 | char *code = freecode = malloc(poolsz); 2031 | char *buf = freebuf; 2032 | int *jitmap = (int *) (code + (poolsz >> 1)); 2033 | memset(buf, 0, poolsz); 2034 | char *o = buf = (char *) (((int) buf + PAGE_SIZE - 1) & -PAGE_SIZE); 2035 | code = (char *) (((int) code + PAGE_SIZE - 1) & -PAGE_SIZE); 2036 | 2037 | phdr_idx = 0; 2038 | shdr_idx = 0; 2039 | sym_idx = 0; 2040 | 2041 | /* Run __libc_start_main() and pass main trampoline. 2042 | * 2043 | * Note: The function prototype of __libc_start_main() is: 2044 | * 2045 | * int __libc_start_main(int (*main)(int, char**, char**), 2046 | * int argc, char **argv, 2047 | * int (*init)(int, char**, char**), 2048 | * void (*fini)(void), 2049 | * void (*rtld_fini)(void), 2050 | * void *stack_end); 2051 | * 2052 | * Usually, we should pass __libc_csu_init as init and __libc_csu_fini 2053 | * as fini; however, we will need a interp to link the non-shared part 2054 | * of libc. It sounds too complex. To keep this compiler simple, 2055 | * let's simply pass NULL pointer. 2056 | */ 2057 | int *stub_end = (int *) code; 2058 | 2059 | *stub_end++ = 0xe3a0b000; // mov fp, #0 @ initialize frame pointer 2060 | *stub_end++ = 0xe3a0e000; // mov lr, #0 @ initialize link register 2061 | *stub_end++ = 0xe49d1004; // pop {r1} @ get argc 2062 | *stub_end++ = 0xe1a0200d; // mov r2, sp @ get argv 2063 | *stub_end++ = 0xe52d2004; // push {r2} @ setup stack end 2064 | *stub_end++ = 0xe52d0004; // push {r0} @ setup rtld_fini 2065 | *stub_end++ = 0xe3a0c000; // mov ip, #0 @ FIXME: __libc_csu_fini() 2066 | *stub_end++ = 0xe52dc004; // push {ip} @ setup fini 2067 | *stub_end++ = 0xe28f0010; // add r0, pc, #16 @ load main trampoline 2068 | *stub_end++ = 0xe3a03000; // mov r3, #0 @ FIXME: __libc_csu_init() 2069 | *stub_end++ = 0xebfffffe; // bl __libc_start_main @ Need relocation 2070 | 2071 | // Return 127 if __libc_start_main() returns (which should not.) 2072 | *stub_end++ = 0xe3a0007f; // mov r0, #127 2073 | *stub_end++ = 0xe3a07001; // mov r7, #1 2074 | *stub_end++ = 0xef000000; // svc 0x00000000 2075 | 2076 | // main() trampoline: convert ARM AAPCS calling convention to ours. 2077 | *stub_end++ = 0xe92d5ff0; // push {r4-r12, lr} 2078 | *stub_end++ = 0xe52d0004; // push {r0} 2079 | *stub_end++ = 0xe52d1004; // push {r1} 2080 | *stub_end++ = 0xebfffffe; // bl 0
@ Need relocation 2081 | *stub_end++ = 0xe28dd008; // add sp, sp, #8 2082 | *stub_end++ = 0xe8bd9ff0; // pop {r4-r12, pc} 2083 | 2084 | int start_stub_size = (char *) stub_end - code; 2085 | 2086 | // Compile and generate the code. 2087 | char *je = (char *) codegen((int *) (code + start_stub_size), jitmap); 2088 | if (!je) return 1; 2089 | if ((int *) je >= jitmap) die("elf32: jitmem too small"); 2090 | 2091 | // elf32_hdr 2092 | *o++ = 0x7f; *o++ = 'E'; *o++ = 'L'; *o++ = 'F'; 2093 | *o++ = 1; *o++ = 1; *o++ = 1; *o++ = 0; 2094 | o += 8; 2095 | *o++ = ET_EXEC; *o++ = 0; // e_type 2096 | *o++ = EM_ARM; *o++ = 0; // e_machine 2097 | *(int *) o = 1; o += 4; 2098 | char *entry = o; o += 4; // e_entry 2099 | *(int *) o = EHDR_SIZE; o += 4; // e_phoff 2100 | char *e_shoff = o; o += 4; // e_shoff 2101 | *(int *) o = 0x5000400; o += 4; // e_flags 2102 | *o++ = EHDR_SIZE; *o++ = 0; 2103 | *o++ = PHDR_ENT_SIZE; *o++ = 0; *o++ = PHDR_NUM; *o++ = 0; // e_phentsize & e_phnum 2104 | *o++ = SHDR_ENT_SIZE; *o++ = 0; *o++ = SHDR_NUM; *o++ = 0; // e_shentsize & e_shnum 2105 | *o++ = 1; *o++ = 0; 2106 | 2107 | int phdr_size = PHDR_ENT_SIZE * PHDR_NUM; 2108 | char *phdr = o; o += phdr_size; 2109 | 2110 | // .text 2111 | int code_off = o - buf; 2112 | int code_size = je - code; 2113 | char *code_addr = o; 2114 | o += code_size; 2115 | 2116 | // .rel.plt (embedded in PT_LOAD of text) 2117 | int rel_size = REL_ENT_SIZE * ef_count; 2118 | int rel_off = code_off + code_size; 2119 | char *rel_addr = code_addr + code_size; 2120 | o += rel_size; 2121 | 2122 | // .plt (embedded in PT_LOAD of text) 2123 | int plt_size = 20 + PLT_ENT_SIZE * ef_count; // 20 is the size of .plt entry code to .got 2124 | int plt_off = rel_off + rel_size; 2125 | char *plt_addr = rel_addr + rel_size; 2126 | o += plt_size; 2127 | 2128 | memcpy(code_addr, code, code_size); 2129 | *(int *) entry = (int) code_addr; 2130 | 2131 | // .data 2132 | char *_data_end = data; 2133 | // Use load_bias to align offset and v_addr, the elf loader 2134 | // needs PAGE_SIZE align to do mmap(). 2135 | int load_bias = PAGE_SIZE + ((int) _data & (PAGE_SIZE - 1)) 2136 | - ((o - buf) & (PAGE_SIZE - 1)); 2137 | o += load_bias; 2138 | char *dseg = o; 2139 | 2140 | // rwdata (embedded in PT_LOAD of data) 2141 | // rwdata is all the data (R/O and R/W) in source code, 2142 | // e.g, the variable with initial value and all the string. 2143 | int rwdata_off = dseg - buf; 2144 | int rwdata_size = _data_end - _data; 2145 | o += rwdata_size; 2146 | 2147 | // .dynamic (embedded in PT_LOAD of data) 2148 | char *pt_dyn = data; 2149 | int pt_dyn_size = DYN_NUM * DYN_ENT_SIZE; 2150 | int pt_dyn_off = rwdata_off + rwdata_size; data += pt_dyn_size; 2151 | o += pt_dyn_size; 2152 | 2153 | // .interp (embedded in PT_LOAD of data) 2154 | char *interp_str = "/lib/ld-linux-armhf.so.3"; 2155 | int interp_str_size = 25; // strlen(interp_str) + 1 2156 | char *interp = data; memcpy(interp, interp_str, interp_str_size); 2157 | int interp_off = pt_dyn_off + pt_dyn_size; data += interp_str_size; 2158 | o += interp_str_size; 2159 | 2160 | // .shstrtab (embedded in PT_LOAD of data) 2161 | char *shstrtab_addr = data; 2162 | int shstrtab_off = interp_off + interp_str_size; 2163 | int shstrtab_size = 0; 2164 | 2165 | int *shdr_names = malloc(sizeof(int) * SHDR_NUM); 2166 | if (!shdr_names) die("elf32: could not malloc shdr_names table"); 2167 | 2168 | shdr_names[SNONE] = append_strtab(&data, "") - shstrtab_addr; 2169 | shdr_names[SSTAB] = append_strtab(&data, ".shstrtab") - shstrtab_addr; 2170 | shdr_names[STEXT] = append_strtab(&data, ".text") - shstrtab_addr; 2171 | shdr_names[SDATA] = append_strtab(&data, ".data") - shstrtab_addr; 2172 | shdr_names[SDYNS] = append_strtab(&data, ".dynstr") - shstrtab_addr; 2173 | shdr_names[SDYNM] = append_strtab(&data, ".dynsym") - shstrtab_addr; 2174 | shdr_names[SDYNC] = append_strtab(&data, ".dynamic") - shstrtab_addr; 2175 | shdr_names[SINTP] = append_strtab(&data, ".interp") - shstrtab_addr; 2176 | shdr_names[SREL] = append_strtab(&data, ".rel.plt") - shstrtab_addr; 2177 | shdr_names[SPLT] = append_strtab(&data, ".plt") - shstrtab_addr; 2178 | shdr_names[SGOT] = append_strtab(&data, ".got") - shstrtab_addr; 2179 | shstrtab_size = data - shstrtab_addr; 2180 | o += shstrtab_size; 2181 | 2182 | // .dynstr (embedded in PT_LOAD of data) 2183 | char *dynstr_addr = data; 2184 | int dynstr_off = shstrtab_off + shstrtab_size; 2185 | append_strtab(&data, ""); 2186 | char *libc = append_strtab(&data, "libc.so.6"); 2187 | char *ldso = append_strtab(&data, "libdl.so.2"); 2188 | char *libgcc_s = append_strtab(&data, "libgcc_s.so.1"); 2189 | 2190 | int *func_entries = malloc(sizeof(int) * ef_count); 2191 | if (!func_entries) die("elf32: could not malloc func_entries table"); 2192 | 2193 | for (i = 0; i < ef_count; ++i) 2194 | func_entries[i] = append_strtab(&data, ef_cache[i]->name) - dynstr_addr; 2195 | 2196 | int dynstr_size = data - dynstr_addr; 2197 | o += dynstr_size; 2198 | 2199 | // .dynsym (embedded in PT_LOAD of data) 2200 | char *dynsym_addr = data; 2201 | int dynsym_off = dynstr_off + dynstr_size; 2202 | memset(data, 0, SYM_ENT_SIZE); 2203 | data += SYM_ENT_SIZE; 2204 | 2205 | for (i = 0; i < ef_count; ++i) 2206 | append_func_sym(&data, func_entries[i]); 2207 | 2208 | int dynsym_size = SYM_ENT_SIZE * (ef_count + 1); 2209 | o += dynsym_size; 2210 | 2211 | // .got (embedded in PT_LOAD of data) 2212 | char *got_addr = data; 2213 | int got_off = dynsym_off + dynsym_size; 2214 | *(int *) data = (int) pt_dyn; data += 4; 2215 | data += 4; // reserved 2 and 3 entry for interp 2216 | char *to_got_movw = data; // The address manipulates dynamic 2217 | char *to_got_movt = data; // linking, plt must jump here. 2218 | data += 4; // reserved 2 and 3 entry for interp 2219 | // .got function slot 2220 | char **got_func_slot = malloc(sizeof(char *) * ef_count); 2221 | for (i = 0; i < ef_count; i++) { 2222 | got_func_slot[i] = data; 2223 | *(int *) data = (int) plt_addr; data += 4; 2224 | } 2225 | data += 4; // end with 0x0 2226 | int got_size = (int) data - (int) got_addr; 2227 | o += got_size; 2228 | 2229 | int dseg_size = o - dseg; 2230 | 2231 | // .plt -- Now we back to handle .plt after .got was initial 2232 | char *to = plt_addr; 2233 | *(int *) to = 0xe52de004; to += 4; // push {lr} 2234 | // movw r10 addr_to_got 2235 | *(int *) to = 0xe300a000 | (0xfff & (int) (to_got_movw)) | 2236 | (0xf0000 & ((int) (to_got_movw) << 4)); 2237 | to += 4; 2238 | // movt r10 addr_to_got 2239 | *(int *) to = 0xe340a000 | (0xfff & ((int) (to_got_movt) >> 16)) | 2240 | (0xf0000 & ((int) (to_got_movt) >> 12)); 2241 | to += 4; 2242 | *(int *) to = 0xe1a0e00a; to += 4; // mov lr,r10 2243 | *(int *) to = 0xe59ef000; to += 4; // ldr pc, [lr] 2244 | 2245 | // We must preserve ip for code below, dyn link use this as return address 2246 | for (i = 0; i < ef_count; i++) { 2247 | plt_func_addr[i] = to; 2248 | // movt ip addr_to_got 2249 | *(int *) to = 0xe300c000 | (0xfff & (int) (got_func_slot[i])) | 2250 | (0xf0000 & ((int) (got_func_slot[i]) << 4)); 2251 | to += 4; 2252 | // movw ip addr_to_got 2253 | *(int *) to = 0xe340c000 | 2254 | (0xfff & ((int) (got_func_slot[i]) >> 16)) | 2255 | (0xf0000 & ((int) (got_func_slot[i]) >> 12)); 2256 | to += 4; 2257 | *(int *) to = 0xe59cf000; to += 4; // ldr pc, [ip] 2258 | } 2259 | 2260 | // .rel.plt 2261 | to = rel_addr; 2262 | for (i = 0; i < ef_count; i++) { 2263 | *(int *) to = (int) got_func_slot[i]; to += 4; 2264 | *(int *) to = 0x16 | (i + 1) << 8 ; to += 4; 2265 | // 0x16 R_ARM_JUMP_SLOT | .dymstr index << 8 2266 | } 2267 | 2268 | // Generate program header after we got address, offset and size. 2269 | to = phdr; 2270 | // PT_LOAD for .text 2271 | gen_phdr(to, PT_LOAD, 0, (int) buf, 2272 | EHDR_SIZE + phdr_size + code_size + rel_size + plt_size, 2273 | PF_X | PF_R, PAGE_SIZE); 2274 | to += PHDR_ENT_SIZE; 2275 | 2276 | // PT_LOAD for .data 2277 | gen_phdr(to, PT_LOAD, rwdata_off, (int) _data, 2278 | dseg_size, PF_W | PF_R, PAGE_SIZE); 2279 | to += PHDR_ENT_SIZE; 2280 | 2281 | // PT_INTERP for .interp 2282 | gen_phdr(to, PT_INTERP, interp_off, (int) interp, 2283 | interp_str_size , PF_R, 0x1); 2284 | to += PHDR_ENT_SIZE; 2285 | 2286 | // PT_DYNAMIC for .dynamic 2287 | gen_phdr(to, PT_DYNAMIC, pt_dyn_off, (int) pt_dyn, 2288 | pt_dyn_size , PF_R | PF_W, 0x4); 2289 | 2290 | // .dynamic (embedded in PT_LOAD of data) 2291 | to = pt_dyn; 2292 | *(int *) to = 5; to += 4; *(int *) to = (int) dynstr_addr; to += 4; 2293 | *(int *) to = 10; to += 4; *(int *) to = dynstr_size; to += 4; 2294 | *(int *) to = 6; to += 4; *(int *) to = (int) dynsym_addr; to += 4; 2295 | *(int *) to = 11; to += 4; *(int *) to = 16; to += 4; 2296 | *(int *) to = 17; to += 4; *(int *) to = (int) rel_addr; to += 4; 2297 | *(int *) to = 18; to += 4; *(int *) to = rel_size; to += 4; 2298 | *(int *) to = 19; to += 4; *(int *) to = 8; to += 4; 2299 | *(int *) to = 3; to += 4; *(int *) to = (int) got_addr; to += 4; 2300 | *(int *) to = 2; to += 4; *(int *) to = rel_size; to += 4; 2301 | *(int *) to = 20; to += 4; *(int *) to = 17; to += 4; 2302 | *(int *) to = 23; to += 4; *(int *) to = (int) rel_addr; to += 4; 2303 | *(int *) to = 1; to += 4; *(int *) to = libc - dynstr_addr; to += 4; 2304 | *(int *) to = 1; to += 4; *(int *) to = ldso - dynstr_addr; to += 4; 2305 | *(int *) to = 1; to += 4; *(int *) to = libgcc_s - dynstr_addr; to += 4; 2306 | *(int *) to = 0; 2307 | 2308 | /* Generate code again bacause address of .plt function slots must 2309 | * be confirmed before codegen() to make sure the code is correct. 2310 | */ 2311 | je = (char *) codegen((int *) (code + start_stub_size), jitmap); 2312 | if (!je) { 2313 | free(func_entries); 2314 | free(shdr_names); 2315 | return 1; 2316 | } 2317 | if ((int *) je >= jitmap) die("elf32: jitmem too small"); 2318 | 2319 | // Relocate __libc_start_main() and main(). 2320 | *((int *) (code + 0x28)) = reloc_bl(plt_func_addr[0] - code_addr - 0x28); 2321 | *((int *) (code + 0x44)) = 2322 | reloc_bl(jitmap[((int) main - (int) text) >> 2] - (int) code - 0x44); 2323 | 2324 | // Copy the generated binary. 2325 | memcpy(code_addr, code, je - code); 2326 | 2327 | // Generate section header 2328 | *(int *) e_shoff = (int) (o - buf); 2329 | gen_shdr(o, SHT_NULL, shdr_names[SNONE], 0, 0, 0, 2330 | 0, 0, 0, 0, 0); 2331 | o += SHDR_ENT_SIZE; 2332 | 2333 | // sh_shstrtab_idx 2334 | gen_shdr(o, SHT_STRTAB, shdr_names[SSTAB], shstrtab_off, 0, 2335 | shstrtab_size, 0, 0, 0, 1, 0); 2336 | o += SHDR_ENT_SIZE; 2337 | 2338 | // sh_text_idx 2339 | gen_shdr(o, SHT_PROGBITS, shdr_names[STEXT], code_off, (int) code_addr, 2340 | code_size, 0, 0, SHF_ALLOC | SHF_EXECINSTR, 4, 0); 2341 | o += SHDR_ENT_SIZE; 2342 | 2343 | // sh_data_idx 2344 | gen_shdr(o, SHT_PROGBITS, shdr_names[SDATA], rwdata_off, (int) _data, 2345 | dseg_size, 0, 0, SHF_ALLOC | SHF_WRITE, 4, 0); 2346 | o += SHDR_ENT_SIZE; 2347 | 2348 | int sh_dynstr_idx = 2349 | gen_shdr(o, SHT_STRTAB, shdr_names[SDYNS], dynstr_off, (int) dynstr_addr, 2350 | dynstr_size, 0, 0, SHF_ALLOC, 1, 0); 2351 | o += SHDR_ENT_SIZE; 2352 | 2353 | int sh_dynsym_idx = 2354 | gen_shdr(o, SHT_DYNSYM, shdr_names[SDYNM], dynsym_off, (int) dynsym_addr, 2355 | dynsym_size, sh_dynstr_idx, 1, SHF_ALLOC, 4, 0x10); 2356 | o += SHDR_ENT_SIZE; 2357 | 2358 | // sh_dynamic_idx 2359 | gen_shdr(o, SHT_DYNAMIC, shdr_names[SDYNC], pt_dyn_off, (int) pt_dyn, 2360 | pt_dyn_size, sh_dynstr_idx, 0, SHF_ALLOC | SHF_WRITE, 4, 0); 2361 | o += SHDR_ENT_SIZE; 2362 | 2363 | // sh_interp_idx 2364 | gen_shdr(o, SHT_PROGBITS, shdr_names[SINTP], interp_off, (int) interp, 2365 | interp_str_size, 0, 0, SHF_ALLOC, 1, 0); 2366 | o += SHDR_ENT_SIZE; 2367 | 2368 | // sh_rel_idx 2369 | gen_shdr(o, SHT_REL, shdr_names[SREL], rel_off, (int) rel_addr, 2370 | rel_size, sh_dynsym_idx, 11, SHF_ALLOC | 0x40, 4, 8); 2371 | o += SHDR_ENT_SIZE; 2372 | 2373 | // sh_plt_idx 2374 | gen_shdr(o, SHT_PROGBITS, shdr_names[SPLT], plt_off, (int) plt_addr, 2375 | plt_size, 0, 0, SHF_ALLOC | SHF_EXECINSTR, 4, 4); 2376 | o += SHDR_ENT_SIZE; 2377 | 2378 | // sh_got_idx 2379 | gen_shdr(o, SHT_PROGBITS, shdr_names[SGOT], got_off, (int) got_addr, 2380 | got_size, 0, 0, SHF_ALLOC | SHF_WRITE, 4, 4); 2381 | o += SHDR_ENT_SIZE; 2382 | 2383 | // Copy .data to a part of (o - buf) where _data located. 2384 | memcpy(dseg, _data, dseg_size); 2385 | write(elf_fd, buf, o - buf); 2386 | 2387 | free(func_entries); 2388 | free(shdr_names); 2389 | free(freebuf); 2390 | free(freecode); 2391 | free(plt_func_addr); 2392 | free(got_func_slot); 2393 | return 0; 2394 | } 2395 | 2396 | enum { _O_CREAT = 64, _O_WRONLY = 1 }; 2397 | 2398 | #ifdef int 2399 | /* Eliminate clang compilation error: 2400 | * first parameter of 'main' (argument count) must be of type 'int' 2401 | */ 2402 | #undef int 2403 | #endif 2404 | int main(int argc, char **argv) 2405 | { 2406 | /* 64-bit host support */ 2407 | #if defined(__x86_64__) || defined(__aarch64__) 2408 | #define int long 2409 | #endif 2410 | 2411 | int *freed_ast, *ast; 2412 | int elf_fd; 2413 | int i; 2414 | 2415 | --argc; ++argv; 2416 | if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { 2417 | src = 1; --argc; ++argv; 2418 | } 2419 | if (argc > 0 && **argv == '-' && !strcmp(*argv, "-fsigned-char")) { 2420 | signed_char = 1; --argc; ++argv; 2421 | } 2422 | if (argc > 0 && **argv == '-' && (*argv)[1] == 'o') { 2423 | elf = 1; --argc; ++argv; 2424 | if (argc < 1) die("no output file argument"); 2425 | if ((elf_fd = open(*argv, _O_CREAT | _O_WRONLY, 0775)) < 0) { 2426 | printf("could not open(%s)\n", *argv); return -1; 2427 | } 2428 | --argc; ++argv; 2429 | } 2430 | if (argc < 1) die("usage: amacc [-s] [-o object] file"); 2431 | 2432 | int fd; 2433 | if ((fd = open(*argv, 0)) < 0) { 2434 | printf("could not open(%s)\n", *argv); return -1; 2435 | } 2436 | 2437 | int poolsz = 256 * 1024; // arbitrary size 2438 | if (!(text = le = e = malloc(poolsz))) 2439 | die("could not allocate text area"); 2440 | if (!(sym = malloc(poolsz))) 2441 | die("could not allocate symbol area"); 2442 | if (!(freedata = _data = data = malloc(poolsz))) 2443 | printf("could not allocat data area"); 2444 | if (!(tsize = malloc(PTR * sizeof(int)))) 2445 | die("could not allocate tsize area"); 2446 | if (!(members = malloc(PTR * sizeof(struct member_s *)))) 2447 | die("could not malloc() members area"); 2448 | if (!(freed_ast = ast = malloc(poolsz))) 2449 | die("could not allocate abstract syntax tree area"); 2450 | if (!(ef_cache = malloc(PTR * sizeof(struct ef_s *)))) 2451 | die("could not malloc() external function cache"); 2452 | 2453 | memset(sym, 0, poolsz); 2454 | memset(e, 0, poolsz); 2455 | memset(data, 0, poolsz); 2456 | 2457 | memset(tsize, 0, PTR * sizeof(int)); 2458 | memset(members, 0, PTR * sizeof(struct member_s *)); 2459 | memset(ast, 0, poolsz); 2460 | ast = (int *) ((int) ast + poolsz); // abstract syntax tree is most efficiently built as a stack 2461 | 2462 | /* Register keywords and system calls to symbol stack 2463 | * must match the sequence of enum 2464 | */ 2465 | p = "break continue case char default else enum if int return sizeof " 2466 | "struct union switch for while do goto __clear_cache void main"; 2467 | 2468 | // call "next" to create symbol table entry. 2469 | // store the keyword's token type in the symbol table entry's "tk" field. 2470 | for (i = Break; i <= Goto; i++) { 2471 | next(); id->tk = i; id->class = Keyword; // add keywords to symbol table 2472 | } 2473 | 2474 | // add __clear_cache to symbol table 2475 | next(); id->class = ClearCache; id->type = INT; id->val = CLCA; 2476 | 2477 | next(); id->tk = Char; id->class = Keyword; // handle void type 2478 | next(); 2479 | struct ident_s *idmain = id; id->class = Main; // keep track of main 2480 | 2481 | if (elf) elf32_init(poolsz); // call before source code parsing 2482 | 2483 | if (!(freep = lp = p = malloc(poolsz))) 2484 | die("could not allocate source area"); 2485 | if ((i = read(fd, p, poolsz - 1)) <= 0) 2486 | die("unable to read from source file"); 2487 | p[i] = 0; 2488 | close(fd); 2489 | 2490 | // add primitive types 2491 | tsize[tnew++] = sizeof(char); 2492 | tsize[tnew++] = sizeof(int); 2493 | 2494 | // real C parser begins here 2495 | // parse the program 2496 | line = 1; 2497 | next(); 2498 | n = ast; 2499 | while (tk) { 2500 | stmt(Glo); 2501 | next(); 2502 | } 2503 | 2504 | int ret = elf ? elf32(poolsz, (int *) idmain->val, elf_fd) : 2505 | jit(poolsz, (int *) idmain->val, argc, argv); 2506 | free(freep); 2507 | free(freed_ast); 2508 | free(tsize); 2509 | free(freedata); 2510 | free(sym); 2511 | free(text); 2512 | 2513 | return ret; 2514 | } 2515 | -------------------------------------------------------------------------------- /docs/IR.md: -------------------------------------------------------------------------------- 1 | # Intermediate Representation (IR) for AMaCC Compilation 2 | 3 | ## What is an IR 4 | An Intermediate representation (IR) is the specific data structure or code 5 | used internally by a compiler or virtual machine to represent a "program" 6 | between source code and target languages. Before generating binary, the 7 | compiler front-end will generate IR to aid the compiler backend to produce 8 | the intermediate form which is independent of the source file. 9 | 10 | 11 | ## Why is an IR used 12 | * Because translation appears to inherently require analysis and synthesis. 13 | * Break the difficult problem of translation into two simpler,more manageable pieces. 14 | * To build retargetable compilers: 15 | - Build new back ends for an existing front end(make source language more portable and 16 | across machine). 17 | - Can build a new front-end for an existing back end. 18 | - We only have to write 2n half-compilers instead of n(n-1) full compilers. 19 | - To perform machine independent optimizations. 20 | 21 | So how does the IR actually work? Let's have an example: 22 | ```c 23 | int a; 24 | a = 10 + 1 + 11; 25 | ``` 26 | 27 | Inside AMaCC, the above C source will be converted into following IR: 28 | ``` 29 | IMM 10 30 | PSH 31 | IMM 1 32 | ADD 33 | PSH 34 | IMM 11 35 | ADD 36 | ``` 37 | 38 | These instructions will be stored inside the stack. According to the stack 39 | LIFO (Last in First Out) order, they will be executed sequentially from 40 | top to bottom illustrated as following: 41 | 42 | ``` 43 | | IMM 10 | pop "IMM 10" 44 | | PSH |-------------> | PSH | pop "PSH" 45 | | IMM 1 | | IMM 1 |----------> | IMM 1 | pop "IMM 1" 46 | | ADD | | ADD | | ADD |-----------> 47 | | PSH | | PSH | | PSH | 48 | | IMM 11 | | IMM 11| | IMM 11| 49 | | ADD | | ADD | | ADD | 50 | 51 | * stack * 52 | | | | | | 10 | 53 | * register * 54 | | | | 10 | | | 55 | ``` 56 | 57 | ``` 58 | | ADD | pop "ADD" 59 | | PSH |-------------> | PSH | pop "PSH" 60 | | IMM 11 | | IMM 11|----------> | IMM 11| 61 | | ADD | | ADD | | ADD | 62 | 63 | * stack * 64 | | 10 | | | | 11 | 65 | * register * 66 | | 1 | | 11 | | | 67 | ``` 68 | 69 | ``` 70 | | IMM 11 | pop "IMM 11" pop "ADD" 71 | | ADD | -----------> | ADD | --------> | | 72 | 73 | * stack * 74 | | 11 | | 11 | | | 75 | * register * 76 | | | | 11 | | 22 | -> the result we get 77 | ``` 78 | 79 | ## Instructsion sets 80 | 81 | | opcode | format | ARM instructions | comments | 82 | |-----------|-------------------|-------------------------------|------------------------------------------------------------------| 83 | |LEA | LEA \ |add r0, r11, #\ |fetch arguments inside sub function | 84 | |IMM | IMM \ |mov r0, #20 |put immediate \ into general register | 85 | |JMP | JMP \ |b \ |set PC register to \ | 86 | |JSR | JSR \ |bl \ |stores current execution position and jump to \ | 87 | |LEV | LEV |add sp, r11, #0; pop {r11, pc} |fetch bookkeeping info to resume previous execution | 88 | |ENT | ENT \ |push {r11, lr} ;add r11, sp, #0|called when we are about to enter the function call to "make a new calling frame".It will store the current PC value onto the stack, and save \ bytes to store the local variable for function.| 89 | |ADJ | ADJ \ |add sp, sp, #\ |adjust the stack(to remove argument from frame) | 90 | |LI | LI |ldr r0, [r0] |loads an integer into general register from a given memory address which is stored in general register before execution| 91 | |SI | SI |pop {r1};str r0, [r1] |stores the integer in general register into the memory whose address is stored on the top of the stack| 92 | |LC | LC |ldrb r0, [r0] |loads an character into general register from a given memory address which is stored in general register before execution| 93 | |SC | SC |pop {r1}; strb r0, [r1] |stores the character in general register into the memory whose address is stored on the top of the stack| 94 | |PSH | PSH |push {r0} |pushes the value in general register onto the stack | 95 | 96 | ## Function call example 97 | 98 | ```c 99 | int func(int a) { 100 | return a * 10; 101 | } 102 | 103 | int main() { 104 | func(20); 105 | return 0; 106 | } 107 | ``` 108 | 109 | while compiled with AMaCC, passing argument `-s` can generate IR along with 110 | corresponding source.. 111 | ```c 112 | 1: int func(int a) { 113 | 2: return a * 10; 114 | 3: } 115 | ENT 0 ; save func addres on stack 116 | LEA 2 ; fetch a's address on stack and save into general register 117 | LI ; Load integer from memory which address is inside general register 118 | PSH ; push interger to top of stack which is inside general register 119 | IMM 10 ; move 10 into general register 120 | MUL ; pop 'a' on the top of stack,and multiply 10 which is inside general register,store result into general register 121 | LEV ; return to main 122 | 4: 123 | 5: int main() 124 | 6: { 125 | 7: func(20); 126 | 8: return 0; 127 | 9: } 128 | ENT 0 ; save main address on stack 129 | IMM 20 ; move 20 into general register 130 | PSH ; push r0 on top of stack 131 | JSR -11120300 ; save sp on stack,save current execute position to lr, jump to func 132 | ADJ 1 ; remove 20 from stack 133 | IMM 0 ; move 0 into general register 134 | LEV ; return to entry 135 | ``` 136 | 137 | ### Arithmetic instructions 138 | 139 | Each operator has two arguments: 140 | * the first is stored on the top of the stack; 141 | * the second is stored in general register; 142 | 143 | After the calculation is done, the argument on the stack will be poped out, 144 | and the result will be stored in general register. So, you are not able to 145 | fetch the first argument from the stack after the calculation. 146 | 147 | You can see the above example to know how arithmetic instructions work. 148 | 149 | ### Conditional jump instructions 150 | 151 | The `BZ` and `BNZ` instructions must be used with arithmetic instructions, 152 | such as `EQ`,`NE`,`LT`,`GT`,`LE` and `GE`. 153 | 154 | Example: 155 | 156 | ```c 157 | 7: if (n > 0) { 158 | LEA 2 ; fetch n's address 159 | LI ; load n's value into r0 register 160 | PSH ; push n on to stack 161 | IMM 0 ; move 0 into r0 register 162 | GT ; compare r0 and r1(pop r1 first on top of stack) 163 | BZ 0 ; jump when r1 > r0 164 | ``` 165 | The arithmetic instructions for comparisons will be translated to ARM instructions. Example: 166 | ``` 167 | # GT 168 | pop {r1} 169 | cmp r1, r0 170 | movgt r0, 1 171 | movle r0, 0 172 | ``` 173 | 174 | Branch-on-zero instruction is about to be translated as following: 175 | ``` 176 | # BZ 177 | cmp r0, 0 178 | beq 0xff4a31d4 179 | ``` 180 | 181 | | opcode | format | ARM instructions | comments | 182 | | ------------ | ----------- | ---------------------------- | -------- | 183 | | BZ | BZ |cmp r0, 0;beq \ |branch on zero 184 | | BNZ | BNZ |cmp r0, 0;bne \ |branch on not zero 185 | -------------------------------------------------------------------------------- /mk/arm.mk: -------------------------------------------------------------------------------- 1 | CROSS_COMPILE ?= arm-none-linux-gnueabihf- 2 | 3 | ARM_CC = $(CROSS_COMPILE)gcc 4 | ARM_CC := $(shell which $(ARM_CC)) 5 | ifndef ARM_CC 6 | # Try Debian/Ubuntu package 7 | CROSS_COMPILE = arm-linux-gnueabihf- 8 | ARM_CC = $(CROSS_COMPILE)gcc 9 | ARM_CC := $(shell which $(ARM_CC)) 10 | ifndef ARM_CC 11 | $(error "no $(CROSS_COMPILE)gcc found.") 12 | endif 13 | endif 14 | export CROSS_COMPILE 15 | 16 | ARM_CC2 = $(shell echo | $(CROSS_COMPILE)cpp -dM - | grep ARM && echo 1) 17 | ifeq ("$(ARM_CC2)","") 18 | $(error "no valid GNU toolchain for ARM found.") 19 | endif 20 | 21 | ARM_QEMU = qemu-arm 22 | ARM_QEMU := $(shell which $(ARM_QEMU)) 23 | ifndef ARM_QEMU 24 | $(error "no qemu-arm found. Please check package installation") 25 | endif 26 | 27 | # FIXME: check ld-linux.so as well 28 | ARM_LD_LINUX_PATH := $(shell cd $(shell $(ARM_CC) --print-sysroot) 2>/dev/null && pwd) 29 | ifeq ("$(ARM_LD_LINUX_PATH)","/") # packaged GNU toolchain 30 | ARM_LD_LINUX_PATH := $(shell dirname "$(shell which $(ARM_CC))")/.. 31 | ARM_LD_LINUX_PATH := $(shell cd $(ARM_LD_LINUX_PATH) 2>/dev/null && pwd) 32 | ARM_LD_LINUX_PATH := $(ARM_LD_LINUX_PATH)/$(shell echo $(CROSS_COMPILE) | sed s'/.$$//')/libc 33 | ARM_LD_LINUX_PATH := $(shell cd $(ARM_LD_LINUX_PATH) 2>/dev/null && pwd) 34 | ifndef ARM_LD_LINUX_PATH 35 | ARM_LD_LINUX_PATH = /usr/$(shell echo $(CROSS_COMPILE) | sed s'/.$$//') 36 | ARM_LD_LINUX_PATH := $(shell cd $(ARM_LD_LINUX_PATH) 2>/dev/null && pwd) 37 | endif 38 | endif 39 | ifndef ARM_LD_LINUX_PATH 40 | $(error "AMaCC requires ld-linux.so") 41 | endif 42 | 43 | ARM_EXEC = $(ARM_QEMU) -L $(ARM_LD_LINUX_PATH) 44 | export ARM_EXEC 45 | -------------------------------------------------------------------------------- /mk/common.mk: -------------------------------------------------------------------------------- 1 | UNAME_S := $(shell uname -s) 2 | ifeq ($(UNAME_S),Darwin) 3 | PRINTF = printf 4 | else 5 | PRINTF = env printf 6 | endif 7 | 8 | # Control the build verbosity 9 | ifeq ("$(VERBOSE)","1") 10 | Q := 11 | VECHO = @true 12 | REDIR = 13 | else 14 | Q := @ 15 | VECHO = @$(PRINTF) 16 | REDIR = >/dev/null 17 | endif 18 | 19 | # Test suite 20 | PASS_COLOR = \e[32;01m 21 | NO_COLOR = \e[0m 22 | 23 | pass = $(PRINTF) "$(PASS_COLOR)$1 Passed$(NO_COLOR)\n" 24 | -------------------------------------------------------------------------------- /mk/python.mk: -------------------------------------------------------------------------------- 1 | PYTHON = python3 2 | PYTHON := $(shell which $(PYTHON)) 3 | ifndef PYTHON 4 | $(error "python3 is required.") 5 | endif 6 | -------------------------------------------------------------------------------- /scripts/disasm: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if [ "$#" != "1" ]; then 3 | echo "Usage: disasm " 4 | exit 5 | fi 6 | DD_BYTES=`readelf -a $1 2>/dev/null | awk '/\.text.*PROGBITS/ { s = sprintf("skip=%d", "0x" $6) ; c = sprintf("count=%d", "0x" $7) ; print s, c }'` 7 | dd bs=1 if=$1 of=$1.asmtmp $DD_BYTES 8 | objdump -b binary -m arm -D $1.asmtmp 9 | rm $1.asmtmp 10 | -------------------------------------------------------------------------------- /scripts/runtest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import unittest 5 | import subprocess as sp 6 | import os 7 | import sys 8 | 9 | amacc = './amacc' 10 | gcc = os.getenv("CROSS_COMPILE", "arm-none-linux-gnueabihf-") + "gcc" 11 | amaccdir = 'elf' 12 | gccdir = 'out-gcc' 13 | 14 | 15 | def mkdir_p(path): 16 | try: 17 | os.makedirs(path) 18 | except OSError as exc: # Python >2.5 19 | if exc.errno == errno.EEXIST and os.path.isdir(path): 20 | pass 21 | else: 22 | raise 23 | 24 | 25 | class TestCC_UC(unittest.TestCase): 26 | """ Test cases without -fsigned-char (default) """ 27 | pass 28 | 29 | 30 | class TestCC_SC(unittest.TestCase): 31 | """ Test cases with -fsigned-char """ 32 | pass 33 | 34 | 35 | def _generate_test(test_name, test_file, extra_cflags): 36 | def test(self): 37 | args = ['3'] 38 | 39 | test_file_name = os.path.splitext(os.path.basename(test_file))[0] 40 | 41 | # compile test program with gcc and run the output executable 42 | prog_exe = os.path.join(gccdir, test_file_name) 43 | # parameter '-w' inhibits all warning messages of gcc 44 | gcc_params = [gcc, '-w', '-o', prog_exe, test_file] + extra_cflags 45 | sp.run(gcc_params) 46 | 47 | proc = sp.run(qemuCmd + [prog_exe] + args, timeout=10, stdout=sp.PIPE) 48 | gcc_out, gcc_err, gcc_retcode = proc.stdout, proc.stderr, proc.returncode 49 | 50 | # run amacc in jit mode 51 | amacc_params = [amacc] + extra_cflags + [test_file] + args 52 | proc = sp.run(qemuCmd + amacc_params, timeout=10, stdout=sp.PIPE) 53 | amacc_out, amacc_err, amacc_retcode = proc.stdout, proc.stderr, proc.returncode 54 | self.assertEqual(amacc_out.decode('utf-8'), gcc_out.decode('utf-8')) 55 | self.assertEqual(amacc_retcode, gcc_retcode) 56 | 57 | # run amacc in compiler mode 58 | prog_exe = os.path.join(amaccdir, test_file_name) 59 | amacc_params = [amacc] + extra_cflags + ['-o', prog_exe, test_file] 60 | sp.run(qemuCmd + amacc_params) 61 | 62 | proc = sp.run(qemuCmd + [prog_exe] + args, timeout=10, stdout=sp.PIPE) 63 | amacc_out, amacc_err, amacc_retcode = proc.stdout, proc.stderr, proc.returncode 64 | self.assertEqual(amacc_out.decode('utf-8'), gcc_out.decode('utf-8')) 65 | self.assertEqual(amacc_retcode, gcc_retcode) 66 | 67 | return test 68 | 69 | 70 | def _define_tests(): 71 | if not os.access(amaccdir, os.F_OK): 72 | mkdir_p(amaccdir) 73 | if not os.access(gccdir, os.F_OK): 74 | mkdir_p(gccdir) 75 | for dirpath, _, filenames in os.walk('tests'): 76 | for f in filenames: 77 | if f.endswith('.c'): 78 | test_file = os.path.abspath(os.path.join(dirpath, f)) 79 | test_name = 'test_%s' % (os.path.splitext(f)[0]) 80 | 81 | # test without -fsigned-char (default ABI) 82 | test_func = _generate_test(test_name, test_file, []) 83 | setattr(TestCC_UC, test_name, test_func) 84 | 85 | # test with -fsigned-char 86 | test_func = _generate_test(test_name, test_file, ['-fsigned-char']) 87 | setattr(TestCC_SC, test_name, test_func) 88 | 89 | _define_tests() 90 | 91 | 92 | if __name__ == '__main__': 93 | try: 94 | qemuCmd = os.getenv('ARM_EXEC').split() 95 | except AttributeError: 96 | qemuCmd = 'qemu-arm -L /usr/arm-linux-gnueabihf'.split() 97 | unittest.main() 98 | -------------------------------------------------------------------------------- /tests/.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Chromium 2 | Language: Cpp 3 | MaxEmptyLinesToKeep: 3 4 | IndentCaseLabels: false 5 | AllowShortIfStatementsOnASingleLine: false 6 | AllowShortCaseLabelsOnASingleLine: false 7 | AllowShortLoopsOnASingleLine: false 8 | DerivePointerAlignment: false 9 | PointerAlignment: Right 10 | SpaceAfterCStyleCast: true 11 | TabWidth: 4 12 | UseTab: Never 13 | IndentWidth: 4 14 | BreakBeforeBraces: Linux 15 | AccessModifierOffset: -4 16 | -------------------------------------------------------------------------------- /tests/arginc.c: -------------------------------------------------------------------------------- 1 | int main(int argc, char **argv) 2 | { 3 | return argc - 2; 4 | } 5 | -------------------------------------------------------------------------------- /tests/arginc.list: -------------------------------------------------------------------------------- 1 | 1: int main(int argc, char **argv) 2 | 2: { 3 | 3: return argc - 2; 4 | 4: } 5 | 0000: ENT 0 6 | 0002: LEA 3 7 | 0004: LI 8 | 0005: PSH 9 | 0006: IMM 2 10 | 0008: SUB 11 | 0009: LEV 12 | -------------------------------------------------------------------------------- /tests/assign.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int assert_eq(int a, int b) 5 | { 6 | if (a != b) { 7 | printf("Assertion: %d != %d\n", a, b); 8 | exit(1); 9 | } 10 | return 0; 11 | } 12 | 13 | int main(int argc, char **argv) 14 | { 15 | /* Value test */ 16 | int a, b, c; 17 | int d = 5, e = 10; 18 | 19 | assert_eq(d, 5); 20 | assert_eq(e, 10); 21 | 22 | a = 1; 23 | a += 101; 24 | assert_eq(a, 102); 25 | 26 | a = 10; 27 | a -= 101; 28 | assert_eq(a, -91); 29 | 30 | a = 10; 31 | a *= 101; 32 | assert_eq(a, 1010); 33 | 34 | /* precedence test */ 35 | a = 1; 36 | a += 3 * 4; 37 | assert_eq(a, 13); 38 | 39 | a = 1; 40 | a -= 3 * 4; 41 | assert_eq(a, -11); 42 | 43 | a = 2; 44 | a *= 3 * 4; 45 | assert_eq(a, 24); 46 | 47 | a = 10; 48 | a /= 5; 49 | assert_eq(a, 2); 50 | 51 | a = 4; 52 | a %= 3; 53 | assert_eq(a, 1); 54 | 55 | a = 1; 56 | a <<= 2; 57 | assert_eq(a, 4); 58 | 59 | a = 4; 60 | a >>= 2; 61 | assert_eq(a, 1); 62 | 63 | a = 17; 64 | a |= 14; 65 | assert_eq(a, 31); 66 | 67 | /* precedence test */ 68 | a = 0xff; 69 | b = 1; 70 | a ^= b | 2; 71 | assert_eq(a, 0xfc); 72 | 73 | a = 17; 74 | a &= 7; 75 | assert_eq(a, 1); 76 | 77 | /* comma operator tests */ 78 | a = 0; 79 | b = 10; 80 | a++, b++; 81 | assert_eq(a, 1); 82 | assert_eq(b, 11); 83 | 84 | c = (++a, ++b); 85 | assert_eq(c, 12); 86 | 87 | return 0; 88 | } 89 | -------------------------------------------------------------------------------- /tests/char.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int mcmp(char *a, char *b, int n) 5 | { 6 | int ret; 7 | while (n--) { 8 | ret = *a++ - *b++; 9 | if (ret) 10 | return ret; 11 | } 12 | return 0; 13 | } 14 | 15 | void mcpy(char *a, char *b, int n) 16 | { 17 | char *dst; 18 | dst = a; 19 | while (n--) 20 | *a++ = *b++; 21 | } 22 | 23 | int main() 24 | { 25 | char *p; 26 | int v; 27 | p = malloc(128); 28 | mcpy(p, "hello world", 12); 29 | printf("%s\n", p); 30 | printf("memcmp = %d\n", mcmp(p, "hello world", 12)); 31 | printf("memcmp = %d\n", mcmp(p, "hello xorld", 12)); 32 | printf("memcmp = %d\n", mcmp(p, "hello yorld", 12)); 33 | p[0] = -1; 34 | v = p[0]; 35 | printf("%x %d %d %x\n", p[0], p[0], v, p[1]); 36 | printf("\0"); /* shall be nothing generated */ 37 | 38 | return 0; 39 | } 40 | -------------------------------------------------------------------------------- /tests/comments.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | // single-line comment 6 | 7 | /* C-style multiline comments 8 | */ 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /tests/cond.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char **argv) 5 | { 6 | if (argc == 1) { 7 | printf("more arguments are required\n"); 8 | exit(1); 9 | } 10 | printf("argc = %d\n", argc); 11 | 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /tests/duff.c: -------------------------------------------------------------------------------- 1 | void copy(char *to, char *from, int count) 2 | { 3 | int n = (count + 7) >> 3; 4 | switch (count & 7) { 5 | case 0: 6 | do { 7 | *to++ = *from++; 8 | case 7: 9 | *to++ = *from++; 10 | case 6: 11 | *to++ = *from++; 12 | case 5: 13 | *to++ = *from++; 14 | case 4: 15 | *to++ = *from++; 16 | case 3: 17 | *to++ = *from++; 18 | case 2: 19 | *to++ = *from++; 20 | case 1: 21 | *to++ = *from++; 22 | } while (--n > 0); 23 | } 24 | } 25 | 26 | void fastcopy(char *to, char *from, int count) 27 | { 28 | int n = (count + 7) >> 3; 29 | 30 | switch (count & 7) { 31 | case 7: 32 | goto r7; 33 | case 6: 34 | goto r6; 35 | case 5: 36 | goto r5; 37 | case 4: 38 | goto r4; 39 | case 3: 40 | goto r3; 41 | case 2: 42 | goto r2; 43 | case 1: 44 | goto r1; 45 | } 46 | 47 | do { 48 | *to++ = *from++; 49 | r7: 50 | *to++ = *from++; 51 | r6: 52 | *to++ = *from++; 53 | r5: 54 | *to++ = *from++; 55 | r4: 56 | *to++ = *from++; 57 | r3: 58 | *to++ = *from++; 59 | r2: 60 | *to++ = *from++; 61 | r1: 62 | *to++ = *from++; 63 | } while (--n > 0); 64 | } 65 | 66 | int main() 67 | { 68 | char *message = "This is a test of duff's device\n"; 69 | char *output = malloc(64); 70 | char *output2 = malloc(64); 71 | 72 | copy(output, message, 33); 73 | printf(output); 74 | 75 | fastcopy(output2, message, 33); 76 | printf(output2); 77 | 78 | free(output2); 79 | free(output); 80 | 81 | return 0; 82 | } 83 | -------------------------------------------------------------------------------- /tests/enum.c: -------------------------------------------------------------------------------- 1 | enum color { RED, GREEN, YELLO }; 2 | enum { BLACK = 10, BLUE }; 3 | 4 | int main(void) 5 | { 6 | int a = GREEN; 7 | printf("blue: %d\n", BLUE); 8 | printf("a:%d\n", a); 9 | printf("a + 1:%d", a + 1); 10 | return 0; 11 | } 12 | -------------------------------------------------------------------------------- /tests/eq.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | printf("16000 == 16000 : %d\n", 16000 == 16000); 6 | printf("16000 == 17000 : %d\n", 16000 == 17000); 7 | printf("2 == 2 : %d\n", 2 == 2); 8 | printf("0 == 0 : %d\n", 0 == 0); 9 | printf("-1 == -1 : %d\n", -1 == -1); 10 | printf("\n"); 11 | 12 | printf("16000 != 16000 : %d\n", 16000 != 16000); 13 | printf("16000 != 17000 : %d\n", 16000 != 17000); 14 | printf("2 != 2 : %d\n", 2 != 2); 15 | printf("0 != 0 : %d\n", 0 != 0); 16 | printf("-1 != -1 : %d\n", -1 != -1); 17 | printf("\n"); 18 | 19 | printf("17000 > 16000 : %d\n", 17000 > 16000); 20 | printf("17000 > 17000 : %d\n", 17000 > 17000); 21 | printf("16000 > 17000 : %d\n", 16000 > 17000); 22 | printf("16000 > -17000 : %d\n", 16000 > -17000); 23 | printf("-16000 > -17000 : %d\n", -16000 > -17000); 24 | printf("\n"); 25 | 26 | printf("17000 >= 16000 : %d\n", 17000 >= 16000); 27 | printf("17000 >= 17000 : %d\n", 17000 >= 17000); 28 | printf("16000 >= 17000 : %d\n", 16000 >= 17000); 29 | printf("16000 >= -17000 : %d\n", 16000 >= -17000); 30 | printf("-16000 >= -17000 : %d\n", -16000 >= -17000); 31 | printf("\n"); 32 | 33 | printf("16000 < 17000 : %d\n", 16000 < 17000); 34 | printf("16000 < 16000 : %d\n", 16000 < 16000); 35 | printf("17000 < 16000 : %d\n", 17000 < 16000); 36 | printf("17000 < -16000 : %d\n", 17000 < -16000); 37 | printf("-17000 < -16000 : %d\n", -17000 < -16000); 38 | printf("\n"); 39 | 40 | printf("16000 <= 17000 : %d\n", 16000 <= 17000); 41 | printf("16000 <= 16000 : %d\n", 16000 <= 16000); 42 | printf("17000 <= 16000 : %d\n", 17000 <= 16000); 43 | printf("17000 <= -16000 : %d\n", 17000 <= -16000); 44 | printf("-17000 <= -16000 : %d\n", -17000 <= -16000); 45 | printf("\n"); 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /tests/fib.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int my_atoi(char *s) 5 | { 6 | int res; 7 | res = 0; 8 | while (*s) { 9 | if (*s < '0' || '9' < *s) 10 | return 0; 11 | res = res * 10 + (*s - '0'); 12 | ++s; 13 | } 14 | return res; 15 | } 16 | 17 | int fib(int n) 18 | { 19 | if (n < 2) 20 | return 1; 21 | return fib(n - 1) + fib(n - 2); 22 | } 23 | 24 | int main(int argc, char **argv) 25 | { 26 | int n; 27 | if (argc < 2) { 28 | printf("Usage: %s \n", argv[0]); 29 | exit(1); 30 | } 31 | 32 | n = my_atoi(argv[1]); 33 | printf("%d\n", fib(n)); 34 | 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /tests/for.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char **argv) 4 | { 5 | int i, j; 6 | 7 | j = 10; 8 | 9 | for (i = 0, printf("let's loop\n"); i < j; i++, printf("loops again\n")) 10 | printf("loop %d\n", i); 11 | 12 | printf("nested loop\n"); 13 | for (i = 1; i < 10; i++) { 14 | for (j = 1; j < 10; j++) { 15 | printf("%d * %d = %d\t", i, j, i * j); 16 | } 17 | printf("\n"); 18 | } 19 | 20 | printf("\n"); 21 | for (i = 1; i <= 5; ++i) { 22 | for (j = 1; j <= 5; ++j) { 23 | if (j > i) 24 | break; 25 | printf("* "); 26 | } 27 | printf("\n"); 28 | } 29 | printf("\n"); 30 | 31 | printf("\n"); 32 | for (i = 1; i <= 30; ++i) { 33 | if (i > 10 && i < 20) 34 | continue; 35 | printf("%d ", i); 36 | } 37 | printf("\n"); 38 | 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /tests/func_call.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int test(int a, int b) 4 | { 5 | return a + b; 6 | } 7 | 8 | int main() 9 | { 10 | int result; 11 | result = test(1, 2); 12 | printf("result is %d\n", result); 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /tests/func_param.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | int i; 6 | i = 10; 7 | printf("hello, world %d \n", i); 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /tests/goto.c: -------------------------------------------------------------------------------- 1 | // Finite state machine 2 | 3 | // $* -> AB* 4 | // [^$]* -> AC* 5 | // [^$]$$ -> ACD 6 | // $[^$]$$ -> ABCD 7 | 8 | int main() 9 | { 10 | char *data = "$$$$7o$n*r*0rj$o*$c0*d**dj0$$gbwj0"; 11 | 12 | A: 13 | printf("A"); 14 | if (*data++ != '$') 15 | goto C; 16 | 17 | B: 18 | printf("B"); 19 | if (*data++ == '$') 20 | goto B; 21 | 22 | C: 23 | printf("C"); 24 | if (*data++ != '$') 25 | goto C; 26 | 27 | D: 28 | printf("D"); 29 | if (*data++ != '$') 30 | goto C; 31 | 32 | printf("\n"); 33 | 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /tests/hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | printf("hello, world\n"); 6 | return 0; 7 | } 8 | -------------------------------------------------------------------------------- /tests/inc.c: -------------------------------------------------------------------------------- 1 | int inc(int x) 2 | { 3 | return x + 1; 4 | } 5 | 6 | int add2(int x) 7 | { 8 | return inc(inc(x)); 9 | } 10 | 11 | int main(int argc, char **argv) 12 | { 13 | return add2(argc) - 4; 14 | } 15 | -------------------------------------------------------------------------------- /tests/jit.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int ac, char **av) 6 | { 7 | char *jitmem; 8 | int *je, var; 9 | 10 | jitmem = mmap(0, 256, 7, 0x22, -1, 0); 11 | je = (int *) jitmem; 12 | *je++ = 0xe59f000c; // ldr r0, [pc, #12] 13 | *je++ = 0xe5901000; // ldr r1, [r0] 14 | *je++ = 0xe2811009; // add r1, r1, #9 15 | *je++ = 0xe5801000; // str r1, [r0] 16 | *je++ = 0xe1a0f00e; // mov pc, lr 17 | *je = (int) &var; 18 | __clear_cache(jitmem, je); 19 | 20 | var = ac; 21 | bsearch(&av, av, 1, 1, (void *) jitmem); 22 | printf("ac = %d, var = %d\n", ac, var); 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /tests/literal.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | int a; 6 | a = 256; 7 | while (a++ < 512) 8 | printf("a = %d\n", a); 9 | 10 | return 0; 11 | } 12 | -------------------------------------------------------------------------------- /tests/local.c: -------------------------------------------------------------------------------- 1 | int main(void) 2 | { 3 | int n = 10; 4 | printf("%d\n", n); 5 | char cc = 'a'; 6 | char *ptr = &cc; 7 | printf("%c\n", *ptr); 8 | int *c = &n; 9 | printf("%d\n", *c); 10 | int r; 11 | r = 0; 12 | return r; 13 | } 14 | -------------------------------------------------------------------------------- /tests/maze.c: -------------------------------------------------------------------------------- 1 | /* Maze generator in C. 2 | * Written by Joe Wingbermuehle 3 | * 1999-08-05 4 | * Sourced from https://raw.githubusercontent.com/joewing/maze/master/maze.c 5 | * Tweaked for AMaCC. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | int maze_rand_v; 12 | int maze_rand() 13 | { 14 | return ((maze_rand_v = maze_rand_v * 214013 + 2531011) >> 16) & 0x7fff; 15 | } 16 | 17 | int maze_atoi(char *str, int radix) 18 | { 19 | int v, sign; 20 | 21 | v = 0; 22 | sign = 1; 23 | if (*str == '-') { 24 | sign = -1; 25 | ++str; 26 | } 27 | while ((*str >= 'A' && *str <= 'Z') || (*str >= 'a' && *str <= 'z') || 28 | (*str >= '0' && *str <= '9')) { 29 | v = v * radix + 30 | ((*str > '9') ? (*str & ~0x20) - 'A' + 10 : (*str - '0')); 31 | ++str; 32 | } 33 | return v * sign; 34 | } 35 | 36 | /* Display the maze. */ 37 | void show_maze(char *maze, int width, int height) 38 | { 39 | int x, y; 40 | for (y = 0; y < height; y++) { 41 | for (x = 0; x < width; x++) { 42 | switch (maze[y * width + x]) { 43 | case 1: 44 | printf("[]"); 45 | break; 46 | case 2: 47 | printf("<>"); 48 | break; 49 | default: 50 | printf(" "); 51 | break; 52 | } 53 | } 54 | printf("\n"); 55 | } 56 | } 57 | 58 | /* Carve the maze starting at x, y. */ 59 | void carve_maze(char *maze, int width, int height, int x, int y) 60 | { 61 | int x1, y1; 62 | int x2, y2; 63 | int dx, dy; 64 | int dir, count; 65 | 66 | dir = maze_rand() % 4; 67 | count = 0; 68 | while (count < 4) { 69 | dx = 0; 70 | dy = 0; 71 | switch (dir) { 72 | case 0: 73 | dx = 1; 74 | break; 75 | case 1: 76 | dy = 1; 77 | break; 78 | case 2: 79 | dx = -1; 80 | break; 81 | default: 82 | dy = -1; 83 | break; 84 | } 85 | x1 = x + dx; 86 | y1 = y + dy; 87 | x2 = x1 + dx; 88 | y2 = y1 + dy; 89 | if (x2 > 0 && x2 < width && y2 > 0 && y2 < height && 90 | maze[y1 * width + x1] == 1 && maze[y2 * width + x2] == 1) { 91 | maze[y1 * width + x1] = 0; 92 | maze[y2 * width + x2] = 0; 93 | x = x2; 94 | y = y2; 95 | dir = maze_rand() % 4; 96 | count = 0; 97 | } else { 98 | dir = (dir + 1) % 4; 99 | count++; 100 | } 101 | } 102 | } 103 | 104 | /* Generate maze in matrix maze with size width, height. */ 105 | void generate_maze(char *maze, int width, int height) 106 | { 107 | int x, y; 108 | 109 | /* Initialize the maze. */ 110 | for (x = 0; x < width * height; x++) 111 | maze[x] = 1; 112 | maze[1 * width + 1] = 0; 113 | 114 | /* Carve the maze. */ 115 | for (y = 1; y < height; y += 2) 116 | for (x = 1; x < width; x += 2) 117 | carve_maze(maze, width, height, x, y); 118 | 119 | /* Set up the entry and exit. */ 120 | maze[0 * width + 1] = 0; 121 | maze[(height - 1) * width + (width - 2)] = 0; 122 | } 123 | 124 | /* Solve the maze. */ 125 | void solve_maze(char *maze, int width, int height) 126 | { 127 | int dir, count; 128 | int x, y; 129 | int dx, dy; 130 | int forward; 131 | 132 | /* Remove the entry and exit. */ 133 | maze[0 * width + 1] = 1; 134 | maze[(height - 1) * width + (width - 2)] = 1; 135 | 136 | forward = 1; 137 | dir = 0; 138 | count = 0; 139 | x = 1; 140 | y = 1; 141 | while (x != width - 2 || y != height - 2) { 142 | dx = 0; 143 | dy = 0; 144 | switch (dir) { 145 | case 0: 146 | dx = 1; 147 | break; 148 | case 1: 149 | dy = 1; 150 | break; 151 | case 2: 152 | dx = -1; 153 | break; 154 | default: 155 | dy = -1; 156 | break; 157 | } 158 | if ((forward && maze[(y + dy) * width + (x + dx)] == 0) || 159 | (!forward && maze[(y + dy) * width + (x + dx)] == 2)) { 160 | maze[y * width + x] = forward ? 2 : 3; 161 | x = x + dx; 162 | y = y + dy; 163 | forward = 1; 164 | count = 0; 165 | dir = 0; 166 | } else { 167 | dir = (dir + 1) % 4; 168 | count = count + 1; 169 | if (count > 3) { 170 | forward = 0; 171 | count = 0; 172 | } 173 | } 174 | } 175 | 176 | /* Replace the entry and exit. */ 177 | maze[(height - 2) * width + (width - 2)] = 2; 178 | maze[(height - 1) * width + (width - 2)] = 2; 179 | } 180 | 181 | enum { A_RANDV, A_WIDTH, A_HEIGHT, A_SOLVE }; 182 | 183 | int main(int argc, char **argv) 184 | { 185 | int width, height, solve, mode, v; 186 | char *maze, *invocation; 187 | 188 | maze_rand_v = 6; // chosen by fair dice roll, guaranteed to be random 189 | width = 10 * 2 + 3; 190 | height = 10 * 2 + 3; 191 | solve = 0; 192 | 193 | invocation = *argv; 194 | --argc; 195 | ++argv; 196 | mode = A_RANDV; 197 | while (argc > 0) { 198 | if (**argv == '-' && *(*argv + 1) == 'h') { 199 | printf("Usage: %s [seed] [width] [height] [s]\n", invocation); 200 | return 0; 201 | } 202 | v = maze_atoi(*argv, 10); 203 | if (**argv == 's' || **argv == '2') 204 | solve = 1; 205 | else if (mode == A_RANDV) 206 | maze_rand_v = v * 0xfffa; 207 | else if (mode == A_WIDTH) 208 | width = v * 2 + 3; 209 | else if (mode == A_HEIGHT) 210 | height = v * 2 + 3; 211 | else { 212 | printf("Unknown argument: '%s'\n", *argv); 213 | return 1; 214 | } 215 | ++mode; 216 | --argc; 217 | ++argv; 218 | } 219 | /* Get and validate the size. */ 220 | if (width < 7) 221 | width = 7; 222 | if (height < 7) 223 | height = 7; 224 | 225 | /* Allocate the maze array. */ 226 | maze = (char *) malloc(width * height * sizeof(char)); 227 | if (maze == 0) { 228 | printf("error: not enough memory\n"); 229 | exit(1); 230 | } 231 | 232 | /* Generate and display the maze. */ 233 | generate_maze(maze, width, height); 234 | show_maze(maze, width, height); 235 | 236 | /* Solve the maze if requested. */ 237 | if (solve) { 238 | solve_maze(maze, width, height); 239 | show_maze(maze, width, height); 240 | } 241 | 242 | /* Clean up. */ 243 | free(maze); 244 | return 0; 245 | } 246 | -------------------------------------------------------------------------------- /tests/printf.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | printf("arg1 %s %s %s %s %s\n", "arg2", "arg3", "arg4", "arg5", "arg6"); 6 | 7 | return 0; 8 | } 9 | -------------------------------------------------------------------------------- /tests/ptr.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int assert_eq(int a, int b) 5 | { 6 | if (a != b) { 7 | printf("Assertion: %d != %d\n", a, b); 8 | exit(1); 9 | } 10 | return 0; 11 | } 12 | 13 | int main() 14 | { 15 | int i; 16 | int *s, *e, v; 17 | int *data; 18 | struct abc_s { 19 | int a, b, c; 20 | } * sptr; 21 | 22 | s = (int *) 0xbebebeb0; 23 | e = (int *) 0xbebebeb4; 24 | v = e - s; 25 | if (v == 1) 26 | printf("passed\n"); 27 | else 28 | printf("failed, e - s = %x\n", v); 29 | v = (int) (e - 1); 30 | if (v == (int) s) 31 | printf("passed\n"); 32 | else 33 | printf("failed, e - s = %x\n", v); 34 | 35 | data = (int *) malloc(sizeof(int) * 10); 36 | sptr = (struct abc_s *) malloc(sizeof(struct abc_s) * 10); 37 | 38 | assert_eq(&sptr[5] - &sptr[2], 3); 39 | assert_eq((int) (&sptr[5] - 3), (int) &sptr[2]); 40 | assert_eq((int) &sptr[5], (int) (sptr + 5)); 41 | assert_eq((int) &sptr[5], (int) (5 + sptr)); 42 | 43 | for (i = 0; i < 10; ++i) 44 | data[i] = i; 45 | 46 | s = data; 47 | e = &data[9]; 48 | for (i = 0; i < 10; ++i) { 49 | assert_eq(s[i], *(s + i)); 50 | assert_eq(e[-i], *(e - i)); 51 | } 52 | 53 | free(sptr); 54 | free(data); 55 | 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /tests/read.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char **argv) 6 | { 7 | char *p; 8 | int fd, readsz; 9 | 10 | if (!(p = malloc(1024 * 16))) { 11 | printf("failed to malloc memory\n"); 12 | exit(1); 13 | } 14 | 15 | fd = open("amacc.c", 0); 16 | 17 | readsz = read(fd, p, 1024 * 16); 18 | printf("read %d bytes\nContents:\n______________________________\n%s", 19 | readsz, p); 20 | printf("\n_______________________________\n"); 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /tests/shift.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | int a, b, c; 6 | printf("1 << 0 = %x\n", 1 << 0); 7 | printf("1 << 2 = %x\n", 1 << 2); 8 | printf("0 << 4 = %x\n", 0 << 4); 9 | printf("1 << 31 = %x\n", 1 << 31); 10 | printf("1 << 32 = %x\n", 1 << 32); 11 | printf("4 << -1 = %x\n", 4 << -1); 12 | printf("4 >> -1 = %x\n", 4 >> -1); 13 | printf("-1 << 1 = %x\n", -1 << 1); 14 | printf("-1 << 0 = %x\n", -1 << 0); 15 | 16 | printf("4 >> 1 = %x\n", 4 >> 1); 17 | printf("4 >> 5 = %x\n", 4 >> 5); 18 | printf("0x80000000 >> 31 = %x\n", (int) 0x80000000 >> 31); 19 | printf("-1 >> 2 = %x\n", -1 >> 2); 20 | b = 0xbef6d568; 21 | c = 0xbef6d56a; 22 | printf("%d - %d = %d(%x)\n", b, c, b - c, b - c); 23 | a = (b << 8) | 12; 24 | c = (a >> 8) | ((int) b & 0xff000000); 25 | printf("a = %x, b = %x, c = %x\n", a, b, c); 26 | a = ((b & 0x007fffff) << 8) | 12; 27 | c = (a >> 8) | ((int) b & 0xff800000); 28 | printf("a = %x, b = %x, c = %x\n", a, b, c); 29 | 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /tests/struct.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | int len; 4 | struct foo { 5 | char *p; 6 | char c; 7 | char *pad; 8 | int x; 9 | } * d; 10 | 11 | char *str; 12 | 13 | int main(int argc, char **argv) 14 | { 15 | struct foo bar, *ptr; 16 | char c = '1'; 17 | int i; 18 | str = "I am a String!\n"; 19 | len = 10; 20 | bar.x = 1; 21 | struct foo *p = &bar; 22 | p->c = 'a'; 23 | 24 | // FIXME: 32-bit only 25 | if (*(int *) ((void *) p + sizeof(struct foo) - 4) != bar.x) 26 | exit(-1); 27 | 28 | printf("%zu\n", sizeof(struct foo)); 29 | printf("%c\n", bar.c); 30 | 31 | d = malloc(sizeof(struct foo) * len); 32 | ptr = d; 33 | for (i = 0; i < len / 2; ++i) { 34 | ptr->p = "one"; 35 | ptr->pad = str; 36 | ptr->x = i; 37 | ptr->c = c; 38 | ++ptr; 39 | ++c; 40 | } 41 | 42 | for (; i < len; ++i) { 43 | d[i].p = "one"; 44 | d[i].pad = str; 45 | d[i].x = i; 46 | d[i].c = c; 47 | ++c; 48 | } 49 | 50 | for (i = 0; i < len; ++i) { 51 | printf("%d------------\n", i); 52 | printf("%s\n", d[i].p); 53 | printf("%s\n", d[i].pad); 54 | printf("%d\n", d[i].x); 55 | printf("%c\n", d[i].c); 56 | printf("--------------\n"); 57 | } 58 | 59 | ptr = d; 60 | for (i = 0; i < len; ++i) { 61 | printf("%d------------\n", i); 62 | printf("%s\n", ptr->p); 63 | printf("%s\n", ptr->pad); 64 | printf("%d\n", ptr->x); 65 | printf("%c\n", ptr->c); 66 | printf("--------------\n"); 67 | ++ptr; 68 | } 69 | 70 | ptr = d; 71 | for (i = 0; i < len; ++i) { 72 | printf("%d------------\n", i); 73 | printf("%s\n", (*ptr).p); 74 | printf("%s\n", (*ptr).pad); 75 | printf("%d\n", (*ptr).x); 76 | printf("%c\n", (*ptr).c); 77 | printf("--------------\n"); 78 | ++ptr; 79 | } 80 | 81 | return 0; 82 | } 83 | -------------------------------------------------------------------------------- /tests/switch.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char **argv) 4 | { 5 | switch (argc) { 6 | case 1: 7 | printf("No arguments\n"); 8 | return 0; 9 | case 2: 10 | printf("arg = %s\n", argv[1]); 11 | break; 12 | default: 13 | printf("More than 1 argument\n"); 14 | break; 15 | } 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /tests/union.c: -------------------------------------------------------------------------------- 1 | struct s2 { 2 | int x, y; 3 | }; 4 | 5 | struct s3 { 6 | int x, y, z; 7 | }; 8 | 9 | union pt { 10 | struct s2 plane; 11 | struct s3 space; 12 | }; 13 | 14 | int main() 15 | { 16 | union pt *p; 17 | int i; 18 | 19 | p = malloc(4 * sizeof(union pt)); 20 | 21 | for (i = 0; i < 4; ++i) { 22 | p[i].space.x = i; 23 | p[i].space.y = i + 1; 24 | p[i].space.z = 4 - i; 25 | printf("(%d, %d, %d)\n", p[i].space.x, p[i].space.y, p[i].space.z); 26 | } 27 | 28 | for (i = 0; i < 4; ++i) { 29 | printf("(%d, %d)\n", p[i].plane.x, p[i].plane.y); 30 | } 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /tests/while.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int fact(int n) 4 | { 5 | int r; 6 | r = 1; 7 | while (n > 0) { 8 | r = r * n; 9 | printf("n = %d, r = %d\n", n, r); 10 | --n; 11 | } 12 | return r; 13 | } 14 | 15 | int filteradd(char *data) 16 | { 17 | int sum = 0; 18 | do { 19 | if (*data == '*') 20 | break; 21 | if (*data < '0' || *data > '9') 22 | continue; 23 | sum += *data - '0'; 24 | } while (*++data != 0); 25 | 26 | return sum; 27 | } 28 | 29 | int main(int argc, char **argv) 30 | { 31 | printf("%d\n", fact(8)); 32 | printf("\n%d\n", filteradd("445h5h5g*45hb7b4g5")); 33 | 34 | return 0; 35 | } 36 | --------------------------------------------------------------------------------