├── .ci
    ├── cross-check.sh
    └── cross-tool.sh
├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── AUTHORS
├── LICENSE
├── Makefile
├── README.md
├── amacc.c
├── docs
    └── IR.md
├── mk
    ├── arm.mk
    ├── common.mk
    └── python.mk
├── scripts
    ├── disasm
    └── runtest.py
└── tests
    ├── .clang-format
    ├── arginc.c
    ├── arginc.list
    ├── assign.c
    ├── char.c
    ├── comments.c
    ├── cond.c
    ├── duff.c
    ├── enum.c
    ├── eq.c
    ├── fib.c
    ├── for.c
    ├── func_call.c
    ├── func_param.c
    ├── goto.c
    ├── hello.c
    ├── inc.c
    ├── jit.c
    ├── literal.c
    ├── local.c
    ├── maze.c
    ├── printf.c
    ├── ptr.c
    ├── read.c
    ├── shift.c
    ├── struct.c
    ├── switch.c
    ├── union.c
    └── while.c


/.ci/cross-check.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | MACHINE_TYPE=`uname -m`
 4 | if [ ${MACHINE_TYPE} != 'x86_64' ]; then
 5 |     exit
 6 | fi
 7 | 
 8 | OS_TYPE=`uname -s`
 9 | if [ ${OS_TYPE} != 'Linux' ]; then
10 |     exit
11 | fi
12 | 
13 | # Clang/LLVM is natively a cross-compiler.
14 | # TODO: Do cross-compilation using Clang
15 | # https://clang.llvm.org/docs/CrossCompilation.html
16 | if [ $(printenv CXX | grep clang) ]; then
17 |     exit
18 | fi
19 | 
20 | GCC_REL=11.2-2022.02
21 | 
22 | set -x
23 | 
24 | export PATH=gcc-arm-${GCC_REL}-x86_64-arm-none-linux-gnueabihf/bin:$PATH
25 | make CROSS_COMPILE=arm-none-linux-gnueabihf- check || exit 1
26 | 


--------------------------------------------------------------------------------
/.ci/cross-tool.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ARM_MIRROR=https://github.com/DLTcollab/toolchain-arm/raw/main
 4 | GCC_REL=11.2-2022.02
 5 | 
 6 | MACHINE_TYPE=`uname -m`
 7 | if [ ${MACHINE_TYPE} != 'x86_64' ]; then
 8 |     exit
 9 | fi
10 | 
11 | OS_TYPE=`uname -s`
12 | if [ ${OS_TYPE} != 'Linux' ]; then
13 |     exit
14 | fi
15 | 
16 | set -x
17 | 
18 | sudo apt-get update -q -y
19 | sudo apt-get install -q -y qemu-user
20 | 
21 | sudo apt-get install -y curl xz-utils
22 | 
23 | curl -L \
24 |     ${ARM_MIRROR}/gcc-arm-${GCC_REL}-x86_64-arm-none-linux-gnueabihf.tar.xz \
25 |     | tar -Jx || exit 1
26 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Github Actions
 2 |   
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   host_x86:
 7 |     runs-on: ubuntu-20.04
 8 |     strategy:
 9 |       matrix:
10 |         compiler: [gcc-10]
11 |     steps:
12 |       - name: checkout code
13 |         uses: actions/checkout@v3
14 |       - name: build artifact
15 |         env:
16 |           CC: ${{ matrix.compiler }}
17 |         run: |
18 |           sh .ci/cross-tool.sh
19 |           sh .ci/cross-check.sh
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | amacc
2 | amacc-native
3 | elf/
4 | out-gcc/
5 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
 1 | AMaCC is written by:
 2 |   Jim Huang <jserv.tw@gmail.com>
 3 |   Ying-Ruei Liang (KK) <thumbd03803@gmail.com>
 4 |   lecopzer <james455096@gmail.com>
 5 |   yodalee <lc85301@gmail.com>
 6 |   Logan Chien <tzuhsiang.chien@gmail.com>
 7 |   splasky <henrychung860326@gmail.com>
 8 |   HPCguy <https://github.com/HPCguy>
 9 |  
10 | Based on the original work from Robert Swierczek.
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | AMaCC is freely redistributable under the GNU GPL:
  2 | 
  3 | Copyright (C) 2016-2023 National Cheng Kung University, Taiwan.
  4 | Copyright (C) 2014-2015 Robert Swierczek.
  5 | 
  6 | 		    GNU GENERAL PUBLIC LICENSE
  7 | 		       Version 2, June 1991
  8 | 
  9 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.
 10 |      51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 11 |  Everyone is permitted to copy and distribute verbatim copies
 12 |  of this license document, but changing it is not allowed.
 13 | 
 14 | 			    Preamble
 15 | 
 16 |   The licenses for most software are designed to take away your
 17 | freedom to share and change it.  By contrast, the GNU General Public
 18 | License is intended to guarantee your freedom to share and change free
 19 | software--to make sure the software is free for all its users.  This
 20 | General Public License applies to most of the Free Software
 21 | Foundation's software and to any other program whose authors commit to
 22 | using it.  (Some other Free Software Foundation software is covered by
 23 | the GNU Library General Public License instead.)  You can apply it to
 24 | your programs, too.
 25 | 
 26 |   When we speak of free software, we are referring to freedom, not
 27 | price.  Our General Public Licenses are designed to make sure that you
 28 | have the freedom to distribute copies of free software (and charge for
 29 | this service if you wish), that you receive source code or can get it
 30 | if you want it, that you can change the software or use pieces of it
 31 | in new free programs; and that you know you can do these things.
 32 | 
 33 |   To protect your rights, we need to make restrictions that forbid
 34 | anyone to deny you these rights or to ask you to surrender the rights.
 35 | These restrictions translate to certain responsibilities for you if you
 36 | distribute copies of the software, or if you modify it.
 37 | 
 38 |   For example, if you distribute copies of such a program, whether
 39 | gratis or for a fee, you must give the recipients all the rights that
 40 | you have.  You must make sure that they, too, receive or can get the
 41 | source code.  And you must show them these terms so they know their
 42 | rights.
 43 | 
 44 |   We protect your rights with two steps: (1) copyright the software, and
 45 | (2) offer you this license which gives you legal permission to copy,
 46 | distribute and/or modify the software.
 47 | 
 48 |   Also, for each author's protection and ours, we want to make certain
 49 | that everyone understands that there is no warranty for this free
 50 | software.  If the software is modified by someone else and passed on, we
 51 | want its recipients to know that what they have is not the original, so
 52 | that any problems introduced by others will not reflect on the original
 53 | authors' reputations.
 54 | 
 55 |   Finally, any free program is threatened constantly by software
 56 | patents.  We wish to avoid the danger that redistributors of a free
 57 | program will individually obtain patent licenses, in effect making the
 58 | program proprietary.  To prevent this, we have made it clear that any
 59 | patent must be licensed for everyone's free use or not licensed at all.
 60 | 
 61 |   The precise terms and conditions for copying, distribution and
 62 | modification follow.
 63 | 
 64 | 		    GNU GENERAL PUBLIC LICENSE
 65 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 66 | 
 67 |   0. This License applies to any program or other work which contains
 68 | a notice placed by the copyright holder saying it may be distributed
 69 | under the terms of this General Public License.  The "Program", below,
 70 | refers to any such program or work, and a "work based on the Program"
 71 | means either the Program or any derivative work under copyright law:
 72 | that is to say, a work containing the Program or a portion of it,
 73 | either verbatim or with modifications and/or translated into another
 74 | language.  (Hereinafter, translation is included without limitation in
 75 | the term "modification".)  Each licensee is addressed as "you".
 76 | 
 77 | Activities other than copying, distribution and modification are not
 78 | covered by this License; they are outside its scope.  The act of
 79 | running the Program is not restricted, and the output from the Program
 80 | is covered only if its contents constitute a work based on the
 81 | Program (independent of having been made by running the Program).
 82 | Whether that is true depends on what the Program does.
 83 | 
 84 |   1. You may copy and distribute verbatim copies of the Program's
 85 | source code as you receive it, in any medium, provided that you
 86 | conspicuously and appropriately publish on each copy an appropriate
 87 | copyright notice and disclaimer of warranty; keep intact all the
 88 | notices that refer to this License and to the absence of any warranty;
 89 | and give any other recipients of the Program a copy of this License
 90 | along with the Program.
 91 | 
 92 | You may charge a fee for the physical act of transferring a copy, and
 93 | you may at your option offer warranty protection in exchange for a fee.
 94 | 
 95 |   2. You may modify your copy or copies of the Program or any portion
 96 | of it, thus forming a work based on the Program, and copy and
 97 | distribute such modifications or work under the terms of Section 1
 98 | above, provided that you also meet all of these conditions:
 99 | 
100 |     a) You must cause the modified files to carry prominent notices
101 |     stating that you changed the files and the date of any change.
102 | 
103 |     b) You must cause any work that you distribute or publish, that in
104 |     whole or in part contains or is derived from the Program or any
105 |     part thereof, to be licensed as a whole at no charge to all third
106 |     parties under the terms of this License.
107 | 
108 |     c) If the modified program normally reads commands interactively
109 |     when run, you must cause it, when started running for such
110 |     interactive use in the most ordinary way, to print or display an
111 |     announcement including an appropriate copyright notice and a
112 |     notice that there is no warranty (or else, saying that you provide
113 |     a warranty) and that users may redistribute the program under
114 |     these conditions, and telling the user how to view a copy of this
115 |     License.  (Exception: if the Program itself is interactive but
116 |     does not normally print such an announcement, your work based on
117 |     the Program is not required to print an announcement.)
118 | 
119 | These requirements apply to the modified work as a whole.  If
120 | identifiable sections of that work are not derived from the Program,
121 | and can be reasonably considered independent and separate works in
122 | themselves, then this License, and its terms, do not apply to those
123 | sections when you distribute them as separate works.  But when you
124 | distribute the same sections as part of a whole which is a work based
125 | on the Program, the distribution of the whole must be on the terms of
126 | this License, whose permissions for other licensees extend to the
127 | entire whole, and thus to each and every part regardless of who wrote it.
128 | 
129 | Thus, it is not the intent of this section to claim rights or contest
130 | your rights to work written entirely by you; rather, the intent is to
131 | exercise the right to control the distribution of derivative or
132 | collective works based on the Program.
133 | 
134 | In addition, mere aggregation of another work not based on the Program
135 | with the Program (or with a work based on the Program) on a volume of
136 | a storage or distribution medium does not bring the other work under
137 | the scope of this License.
138 | 
139 |   3. You may copy and distribute the Program (or a work based on it,
140 | under Section 2) in object code or executable form under the terms of
141 | Sections 1 and 2 above provided that you also do one of the following:
142 | 
143 |     a) Accompany it with the complete corresponding machine-readable
144 |     source code, which must be distributed under the terms of Sections
145 |     1 and 2 above on a medium customarily used for software interchange; or,
146 | 
147 |     b) Accompany it with a written offer, valid for at least three
148 |     years, to give any third party, for a charge no more than your
149 |     cost of physically performing source distribution, a complete
150 |     machine-readable copy of the corresponding source code, to be
151 |     distributed under the terms of Sections 1 and 2 above on a medium
152 |     customarily used for software interchange; or,
153 | 
154 |     c) Accompany it with the information you received as to the offer
155 |     to distribute corresponding source code.  (This alternative is
156 |     allowed only for noncommercial distribution and only if you
157 |     received the program in object code or executable form with such
158 |     an offer, in accord with Subsection b above.)
159 | 
160 | The source code for a work means the preferred form of the work for
161 | making modifications to it.  For an executable work, complete source
162 | code means all the source code for all modules it contains, plus any
163 | associated interface definition files, plus the scripts used to
164 | control compilation and installation of the executable.  However, as a
165 | special exception, the source code distributed need not include
166 | anything that is normally distributed (in either source or binary
167 | form) with the major components (compiler, kernel, and so on) of the
168 | operating system on which the executable runs, unless that component
169 | itself accompanies the executable.
170 | 
171 | If distribution of executable or object code is made by offering
172 | access to copy from a designated place, then offering equivalent
173 | access to copy the source code from the same place counts as
174 | distribution of the source code, even though third parties are not
175 | compelled to copy the source along with the object code.
176 | 
177 |   4. You may not copy, modify, sublicense, or distribute the Program
178 | except as expressly provided under this License.  Any attempt
179 | otherwise to copy, modify, sublicense or distribute the Program is
180 | void, and will automatically terminate your rights under this License.
181 | However, parties who have received copies, or rights, from you under
182 | this License will not have their licenses terminated so long as such
183 | parties remain in full compliance.
184 | 
185 |   5. You are not required to accept this License, since you have not
186 | signed it.  However, nothing else grants you permission to modify or
187 | distribute the Program or its derivative works.  These actions are
188 | prohibited by law if you do not accept this License.  Therefore, by
189 | modifying or distributing the Program (or any work based on the
190 | Program), you indicate your acceptance of this License to do so, and
191 | all its terms and conditions for copying, distributing or modifying
192 | the Program or works based on it.
193 | 
194 |   6. Each time you redistribute the Program (or any work based on the
195 | Program), the recipient automatically receives a license from the
196 | original licensor to copy, distribute or modify the Program subject to
197 | these terms and conditions.  You may not impose any further
198 | restrictions on the recipients' exercise of the rights granted herein.
199 | You are not responsible for enforcing compliance by third parties to
200 | this License.
201 | 
202 |   7. If, as a consequence of a court judgment or allegation of patent
203 | infringement or for any other reason (not limited to patent issues),
204 | conditions are imposed on you (whether by court order, agreement or
205 | otherwise) that contradict the conditions of this License, they do not
206 | excuse you from the conditions of this License.  If you cannot
207 | distribute so as to satisfy simultaneously your obligations under this
208 | License and any other pertinent obligations, then as a consequence you
209 | may not distribute the Program at all.  For example, if a patent
210 | license would not permit royalty-free redistribution of the Program by
211 | all those who receive copies directly or indirectly through you, then
212 | the only way you could satisfy both it and this License would be to
213 | refrain entirely from distribution of the Program.
214 | 
215 | If any portion of this section is held invalid or unenforceable under
216 | any particular circumstance, the balance of the section is intended to
217 | apply and the section as a whole is intended to apply in other
218 | circumstances.
219 | 
220 | It is not the purpose of this section to induce you to infringe any
221 | patents or other property right claims or to contest validity of any
222 | such claims; this section has the sole purpose of protecting the
223 | integrity of the free software distribution system, which is
224 | implemented by public license practices.  Many people have made
225 | generous contributions to the wide range of software distributed
226 | through that system in reliance on consistent application of that
227 | system; it is up to the author/donor to decide if he or she is willing
228 | to distribute software through any other system and a licensee cannot
229 | impose that choice.
230 | 
231 | This section is intended to make thoroughly clear what is believed to
232 | be a consequence of the rest of this License.
233 | 
234 |   8. If the distribution and/or use of the Program is restricted in
235 | certain countries either by patents or by copyrighted interfaces, the
236 | original copyright holder who places the Program under this License
237 | may add an explicit geographical distribution limitation excluding
238 | those countries, so that distribution is permitted only in or among
239 | countries not thus excluded.  In such case, this License incorporates
240 | the limitation as if written in the body of this License.
241 | 
242 |   9. The Free Software Foundation may publish revised and/or new versions
243 | of the General Public License from time to time.  Such new versions will
244 | be similar in spirit to the present version, but may differ in detail to
245 | address new problems or concerns.
246 | 
247 | Each version is given a distinguishing version number.  If the Program
248 | specifies a version number of this License which applies to it and "any
249 | later version", you have the option of following the terms and conditions
250 | either of that version or of any later version published by the Free
251 | Software Foundation.  If the Program does not specify a version number of
252 | this License, you may choose any version ever published by the Free Software
253 | Foundation.
254 | 
255 |   10. If you wish to incorporate parts of the Program into other free
256 | programs whose distribution conditions are different, write to the author
257 | to ask for permission.  For software which is copyrighted by the Free
258 | Software Foundation, write to the Free Software Foundation; we sometimes
259 | make exceptions for this.  Our decision will be guided by the two goals
260 | of preserving the free status of all derivatives of our free software and
261 | of promoting the sharing and reuse of software generally.
262 | 
263 | 			    NO WARRANTY
264 | 
265 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
266 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
267 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
268 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
269 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
270 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
271 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
272 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
273 | REPAIR OR CORRECTION.
274 | 
275 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
276 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
277 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
278 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
279 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
280 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
281 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
282 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
283 | POSSIBILITY OF SUCH DAMAGES.
284 | 
285 | 		     END OF TERMS AND CONDITIONS
286 | 
287 | 	    How to Apply These Terms to Your New Programs
288 | 
289 |   If you develop a new program, and you want it to be of the greatest
290 | possible use to the public, the best way to achieve this is to make it
291 | free software which everyone can redistribute and change under these terms.
292 | 
293 |   To do so, attach the following notices to the program.  It is safest
294 | to attach them to the start of each source file to most effectively
295 | convey the exclusion of warranty; and each file should have at least
296 | the "copyright" line and a pointer to where the full notice is found.
297 | 
298 |     <one line to give the program's name and a brief idea of what it does.>
299 |     Copyright (C) <year>  <name of author>
300 | 
301 |     This program is free software; you can redistribute it and/or modify
302 |     it under the terms of the GNU General Public License as published by
303 |     the Free Software Foundation; either version 2 of the License, or
304 |     (at your option) any later version.
305 | 
306 |     This program is distributed in the hope that it will be useful,
307 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
308 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
309 |     GNU General Public License for more details.
310 | 
311 |     You should have received a copy of the GNU General Public License
312 |     along with this program; if not, write to the Free Software
313 |     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
314 | 
315 | 
316 | Also add information on how to contact you by electronic and paper mail.
317 | 
318 | If the program is interactive, make it output a short notice like this
319 | when it starts in an interactive mode:
320 | 
321 |     Gnomovision version 69, Copyright (C) year  name of author
322 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
323 |     This is free software, and you are welcome to redistribute it
324 |     under certain conditions; type `show c' for details.
325 | 
326 | The hypothetical commands `show w' and `show c' should show the appropriate
327 | parts of the General Public License.  Of course, the commands you use may
328 | be called something other than `show w' and `show c'; they could even be
329 | mouse-clicks or menu items--whatever suits your program.
330 | 
331 | You should also get your employer (if you work as a programmer) or your
332 | school, if any, to sign a "copyright disclaimer" for the program, if
333 | necessary.  Here is a sample; alter the names:
334 | 
335 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
336 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
337 | 
338 |   <signature of Ty Coon>, 1 April 1989
339 |   Ty Coon, President of Vice
340 | 
341 | This General Public License does not permit incorporating your program into
342 | proprietary programs.  If your program is a subroutine library, you may
343 | consider it more useful to permit linking proprietary applications with the
344 | library.  If this is what you want to do, use the GNU Library General
345 | Public License instead of this License.
346 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CFLAGS = -O0 -Wall -Wno-misleading-indentation
 2 | OBJ_DIR = elf
 3 | TEST_DIR = tests
 4 | TEST_SRC = $(wildcard $(TEST_DIR)/*.c)
 5 | TEST_OBJ = $(TEST_SRC:.c=.o)
 6 | 
 7 | BIN = amacc
 8 | EXEC = $(BIN) $(BIN)-native
 9 | 
10 | include mk/arm.mk
11 | include mk/common.mk
12 | include mk/python.mk
13 | 
14 | ## Build AMaCC
15 | all: $(EXEC)
16 | $(BIN): $(BIN).c
17 | 	$(VECHO) "  CC+LD\t\t$@\n"
18 | 	$(Q)$(ARM_CC) $(CFLAGS) -o $@ $< -g -ldl
19 | 
20 | $(BIN)-native: $(BIN).c
21 | 	$(VECHO) "  CC+LD\t\t$@\n"
22 | 	$(Q)$(CC) $(CFLAGS) -o $@ $< \
23 | 	    -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast -Wno-format \
24 | 	    -ldl
25 | ## Run tests and show message
26 | check: $(EXEC) $(TEST_OBJ)
27 | 	$(VECHO) "[ C to IR translation          ]"
28 | 	$(Q)./$(BIN)-native -s tests/arginc.c | diff tests/arginc.list - \
29 | 	    && $(call pass)
30 | 	$(VECHO) "[ JIT compilation + execution  ]"
31 | 	$(Q)if [ "$(shell $(ARM_EXEC) ./$(BIN) tests/hello.c)" = "hello, world" ]; then \
32 | 	$(call pass); \
33 | 	fi
34 | 	$(VECHO) "[ ELF generation               ]"
35 | 	$(Q)$(ARM_EXEC) ./$(BIN) -o $(OBJ_DIR)/hello tests/hello.c
36 | 	$(Q)if [ "$(shell $(ARM_EXEC) $(OBJ_DIR)/hello)" = "hello, world" ]; then \
37 | 	$(call pass); \
38 | 	fi
39 | 	$(VECHO) "[ nested/self compilation      ]"
40 | 	$(Q)if [ "$(shell $(ARM_EXEC) ./$(BIN) $(BIN).c tests/hello.c)" = "hello, world" ]; then \
41 | 	$(call pass); \
42 | 	fi
43 | 	$(VECHO) "[ Compatibility with GCC/Arm   ] "
44 | 	$(Q)$(PYTHON) scripts/runtest.py || echo
45 | 
46 | $(OBJ_DIR)/$(BIN): $(BIN)
47 | 	$(VECHO) "  SelfCC\t$@\n"
48 | 	$(Q)$(ARM_EXEC) ./$^ -o $@ $(BIN).c
49 | 
50 | SHELL_HACK := $(shell mkdir -p $(OBJ_DIR))
51 | $(TEST_DIR)/%.o: $(TEST_DIR)/%.c $(BIN) $(OBJ_DIR)/$(BIN)
52 | 	$(VECHO) "[*** verify $< <JIT> *******]\n"
53 | 	$(Q)$(ARM_EXEC) ./$(BIN) $< 2 $(REDIR)
54 | 	$(VECHO) "[*** verify $< <ELF> *******]\n"
55 | 	$(Q)$(ARM_EXEC) ./$(BIN) -o $(OBJ_DIR)/$(notdir $(basename $<)) $< $(REDIR)
56 | 	$(Q)$(ARM_EXEC) $(OBJ_DIR)/$(notdir $(basename $<)) 2 $(REDIR)
57 | 	$(VECHO) "[*** verify $< <ELF-self> **]\n"
58 | 	$(Q)$(ARM_EXEC) ./$(OBJ_DIR)/$(BIN) $< 2 $(REDIR)
59 | 	$(Q)$(call pass,$<)
60 | 
61 | ## Print available build targets
62 | help:
63 | 	@cat $(MAKEFILE_LIST) | \
64 | 	 awk '/^##.*$$/{l1=$$0;getline;l2=(l1 "##" $$0); print l2 $$0}' | awk -F"##" '{split($$3,t,":");printf "\033[36m%-11s\033[0m %s\n",t[1],$$2}'
65 | 
66 | ## Dump assembly from source file. Usage: "make dump-ir FILE=tests/hello.c"
67 | dump-ir: $(BIN)
68 | 	@$(ARM_EXEC) $(BIN) -s $(FILE)
69 | 
70 | ## Remove all generated files
71 | clean:
72 | 	$(RM) $(EXEC) $(OBJ_DIR)/* elf/* out-gcc/*
73 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # AMaCC = Arguably Minimalist Arm C Compiler
  2 | 
  3 | ## Introduction
  4 | AMaCC is a 32-bit Arm architecture compiler built from scratch.
  5 | It serves as a stripped-down version of C, designed as a pedagogical tool for
  6 | learning about compilers, linkers, and loaders.
  7 | 
  8 | There are two execution modes AMaCC implements:
  9 | * Just-in-Time (JIT) compiler for Arm backend.
 10 | * Generation of valid GNU/Linux executables using the Executable and Linkable Format (ELF).
 11 | 
 12 | It is worth mentioning that AMaCC is designed to compile a subset of C necessary
 13 | to self-host with the above execution modes. For instance, it supports global
 14 | variables, particularly global arrays.
 15 | 
 16 | A simple stack-based Abstract Syntax Tree (AST) is generated through cooperative
 17 | `stmt()` and `expr()` parsing functions, both fed by a token-generating function.
 18 | The `expr()` function performs some literal constant optimizations. The AST is
 19 | transformed into a stack-based VM Intermediate Representation (IR) using the
 20 | `gen()` function. The IR can be examined via a command-line option. Finally, the
 21 | `codegen()` function generates Arm32 instructions from the IR, which can be
 22 | executed via either `jit()` or `elf32()` executable generation
 23 | 
 24 | AMaCC combines classical recursive descent and operator precedence parsing. An
 25 | operator precedence parser proves to be considerably faster than a recursive
 26 | descent parser (RDP) for expressions when operator precedence is defined using
 27 | grammar productions that would otherwise be turned into methods.
 28 | 
 29 | ## Compatibility
 30 | AMaCC is capable of compiling C source files written in the following
 31 | syntax:
 32 | 
 33 | * support for all C89 statements except typedef.
 34 | * support for all C89 expression operators.
 35 | * data types: char, int, enum, struct, union, and multi-level pointers
 36 |     - type modifiers, qualifiers, and storage class specifiers are
 37 |       currently unsupported, though many keywords of this nature
 38 |       are not routinely used, and can be easily worked around with
 39 |       simple alternative constructs.
 40 |     - struct/union assignments are not supported at the language level
 41 |       in AMaCC, e.g. s1 = s2.  This also applies to function return
 42 |       values and parameters. Passing and returning pointers is recommended.
 43 |       Use memcpy if you want to copy a full struct, e.g.
 44 |       memcpy(&s1, &s2, sizeof(struct xxx));
 45 | * global/local variable initializations for supported data types
 46 |     - e.g., `int i = [expr]`
 47 |     - New variables are allowed to be declared within functions anywhere.
 48 |     - item-by-item array initialization is supported
 49 |     - but aggregate array declaration and initialization is yet to be supported
 50 |       e.g., `int foo[2][2] = { { 1, 0 }, { 0, 1 } };`
 51 | 
 52 | The architecture support targets armv7hf with Linux ABI, and it has been verified
 53 | on Raspberry Pi 2/3/4 with GNU/Linux.
 54 | 
 55 | ## Prerequisites
 56 | * Code generator in AMaCC relies on several GNU/Linux behaviors, and it
 57 |   is necessary to have Arm/Linux installed in your build environment.
 58 | * Install [GNU Toolchain for the A-profile Architecture](https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads)
 59 |     - Select `arm-linux-none-gnueabihf` (AArch32 target with hard float)
 60 | 
 61 | * Install QEMU for Arm user emulation
 62 | ```shell
 63 | sudo apt-get install qemu-user
 64 | ```
 65 | 
 66 | ## Running AMaCC
 67 | Run `make check` and you should see this:
 68 | ```
 69 | [ C to IR translation          ] Passed
 70 | [ JIT compilation + execution  ] Passed
 71 | [ ELF generation               ] Passed
 72 | [ nested/self compilation      ] Passed
 73 | [ Compatibility with GCC/Arm   ] ........................................
 74 | ----------------------------------------------------------------------
 75 | Ran 52 tests in 8.842s
 76 | 
 77 | OK
 78 | ```
 79 | 
 80 | Check the messages generated by `make help` to learn more.
 81 | 
 82 | ## Benchmark
 83 | AMaCC is able to generate machine code really fast and provides 70% of the performance of `gcc -O0`.
 84 | 
 85 | Test environment:
 86 | * Raspberry Pi 4B (SoC: bcm2711, ARMv8-A architecture)
 87 | * Raspbian GNU/Linux, kernel 5.10.17-v7l+, gcc 8.3.0 (armv7l userland)
 88 | 
 89 | Input source file: `amacc.c`
 90 | 
 91 | | compiler driver                    | binary size (KiB) | compile time (s) |
 92 | | ---------------------------------- | ----------------- | ---------------- |
 93 | | gcc with `-O0 -ldl` (compile+link) | 56                |  0.5683          |
 94 | | gcc with `-O0 -c` (compile only)   | 56                |  0.4884          |
 95 | | AMaCC                              | 100               |  0.0217          |
 96 | 
 97 | 
 98 | ## Internals
 99 | Check [Intermediate Representation (IR) for AMaCC Compilation](docs/IR.md).
100 | 
101 | ## Acknowledgements
102 | AMaCC is based on the infrastructure of [c4](https://github.com/rswier/c4).
103 | 
104 | ## Related Materials
105 | * [Curated list of awesome resources on Compilers, Interpreters and Runtimes](http://aalhour.com/awesome-compilers/)
106 | * [Hacker News discussions](https://news.ycombinator.com/item?id=11411124)
107 | * [A Compiler Writing Journey](https://github.com/DoctorWkt/acwj) by Warren Toomey.
108 | 


--------------------------------------------------------------------------------
/amacc.c:
--------------------------------------------------------------------------------
   1 | /*
   2 |  * AMaCC is capable of compiling (subset of) C source files into GNU/Linux
   3 |  * executables or running via just-in-time compilation on 32-bit ARM
   4 |  * processor-based platforms. There is no preprocessor.
   5 |  *
   6 |  * The following options are supported:
   7 |  *   -s : Print source and generated intermediate representation (IR).
   8 |  *   -o : Create executable file and terminate normally.
   9 |  *
  10 |  * If -o and -s are omitted, the compiled code is executed immediately (if
  11 |  * there were no compile errors) with the command line arguments passed
  12 |  * after the source file parameter.
  13 |  */
  14 | 
  15 | #include <stdio.h>
  16 | #include <stdlib.h>
  17 | #include <string.h>
  18 | #include <memory.h>
  19 | #include <sys/mman.h>
  20 | #include <unistd.h>
  21 | #include <sys/types.h>
  22 | #include <sys/stat.h>
  23 | #include <fcntl.h>
  24 | #include <dlfcn.h>
  25 | 
  26 | /* 64-bit host support */
  27 | #if defined(__x86_64__) || defined(__aarch64__)
  28 | #define int long
  29 | #endif
  30 | 
  31 | char *freep, *p, *lp; // current position in source code
  32 | char *freedata, *data, *_data;   // data/bss pointer
  33 | 
  34 | int *e, *le, *text;  // current position in emitted code
  35 | int *cas;            // case statement patch-up pointer
  36 | int *def;            // default statement patch-up pointer
  37 | int *brks;           // break statement patch-up pointer
  38 | int *cnts;           // continue statement patch-up pointer
  39 | int  swtc;           // !0 -> in a switch-stmt context
  40 | int  brkc;           // !0 -> in a break-stmt context
  41 | int  cntc;           // !0 -> in a continue-stmt context
  42 | int *tsize;          // array (indexed by type) of type sizes
  43 | int tnew;            // next available type
  44 | int tk;              // current token
  45 | int ival;            // current token value
  46 | int ty;              // current expression type
  47 | int compound;        // handle precedence of compound assignment
  48 | int loc;             // local variable offset
  49 | int line;            // current line number
  50 | int src;             // print source and assembly flag
  51 | int signed_char;     // use `signed char` for `char`
  52 | int elf;             // print ELF format
  53 | int *n;              // current position in emitted abstract syntax tree
  54 |                      // With an AST, the compiler is not limited to generate
  55 |                      // code on the fly with parsing.
  56 |                      // This capability allows function parameter code to be
  57 |                      // emitted and pushed on the stack in the proper
  58 |                      // right-to-left order.
  59 | int ld;              // local variable depth
  60 | 
  61 | // identifier
  62 | struct ident_s {
  63 |     int tk;          // type-id or keyword
  64 |     int hash;
  65 |     char *name;     // name of this identifier
  66 |     /* fields starting with 'h' were designed to save and restore
  67 |      * the global class/type/val in order to handle the case if a
  68 |      * function declares a local with the same name as a global.
  69 |      */
  70 |     int class, hclass; // FUNC, GLO (global var), LOC (local var), Syscall
  71 |     int type, htype;   // data type such as char and int
  72 |     int val, hval;
  73 |     int stype;
  74 | } *id,  // currently parsed identifier
  75 |   *sym; // symbol table (simple list of identifiers)
  76 | 
  77 | // (library) external functions
  78 | struct ef_s {
  79 |     char *name;
  80 |     int addr;
  81 | } **ef_cache;
  82 | int ef_count;
  83 | 
  84 | struct member_s {
  85 |     struct ident_s *id;
  86 |     int offset;
  87 |     int type;
  88 |     struct member_s *next;
  89 | } **members; // array (indexed by type) of struct member lists
  90 | 
  91 | // tokens and classes (operators last and in precedence order)
  92 | // ( >= 128 so not to collide with ASCII-valued tokens)
  93 | enum {
  94 |     Num = 128, // the character set of given source is limited to 7-bit ASCII
  95 |     Func, Syscall, Main, ClearCache, Glo, Par, Loc, Keyword, Id, Label, Load, Enter,
  96 |     Break, Continue, Case, Char, Default, Else, Enum, If, Int, Return,
  97 |     Sizeof, Struct, Union, Switch, For, While, DoWhile, Goto,
  98 |     Assign, // operator =, keep Assign as highest priority operator
  99 |     OrAssign, XorAssign, AndAssign, ShlAssign, ShrAssign, // |=, ^=, &=, <<=, >>=
 100 |     AddAssign, SubAssign, MulAssign, DivAssign, ModAssign, // +=, -=, *=, /=, %=
 101 |     Cond, // operator: ?
 102 |     Lor, Lan, Or, Xor, And, // operator: ||, &&, |, ^, &
 103 |     Eq, Ne, Lt, Gt, Le, Ge, // operator: ==, !=, <, >, <=, >=
 104 |     Shl, Shr, Add, Sub, Mul, Div, Mod, // operator: <<, >>, +, -, *, /, %
 105 |     Inc, Dec, Dot, Arrow, Bracket, // operator: ++, --, ., ->, [
 106 | };
 107 | 
 108 | // opcodes
 109 | /* The instruction set is designed for building intermediate representation.
 110 |  * Expression 10 + 20 will be translated into the following instructions:
 111 |  *     i = 0;
 112 |  *     text[i++] = IMM;
 113 |  *     text[i++] = 10;
 114 |  *     text[i++] = PSH;
 115 |  *     text[i++] = IMM;
 116 |  *     text[i++] = 20;
 117 |  *     text[i++] = ADD;
 118 |  *     text[i++] = PSH;
 119 |  *     text[i++] = EXIT;
 120 |  *     pc = text;
 121 |  */
 122 | enum {
 123 |     LEA , /*  0 */
 124 |     /* LEA addressed the problem how to fetch arguments inside sub-function.
 125 |      * Let's check out what a calling frame looks like before learning how
 126 |      * to fetch arguments (Note that arguments are pushed in its calling
 127 |      * order):
 128 |      *
 129 |      *     sub_function(arg1, arg2, arg3);
 130 |      *
 131 |      *     |    ....       | high address
 132 |      *     +---------------+
 133 |      *     | arg: 1        |    new_bp + 4
 134 |      *     +---------------+
 135 |      *     | arg: 2        |    new_bp + 3
 136 |      *     +---------------+
 137 |      *     | arg: 3        |    new_bp + 2
 138 |      *     +---------------+
 139 |      *     |return address |    new_bp + 1
 140 |      *     +---------------+
 141 |      *     | old BP        | <- new BP
 142 |      *     +---------------+
 143 |      *     | local var 1   |    new_bp - 1
 144 |      *     +---------------+
 145 |      *     | local var 2   |    new_bp - 2
 146 |      *     +---------------+
 147 |      *     |    ....       |  low address
 148 |      *
 149 |      * If we need to refer to arg1, we need to fetch new_bp + 4, which can not
 150 |      * be achieved by restricted ADD instruction. Thus another special
 151 |      * Instruction is introduced to do this: LEA <offset>.
 152 |      * The following pseudocode illustrates how LEA works.
 153 |      *     if (op == LEA) { ax = (int) (bp + *pc++); } // load address for arguments
 154 |      * Together with JSR, ENT, ADJ, LEV, and LEA instruction, we are able to make
 155 |      * function calls.
 156 |      */
 157 | 
 158 |     IMM , /*  1 */
 159 |     /* IMM <num> to put immediate <num> into general register */
 160 | 
 161 |     JMP , /*  2 */
 162 |     /* JMP <addr> will unconditionally set the value PC register to <addr> */
 163 |     /* The following pseudocode illustrates how JMP works:
 164 |      *     if (op == JMP) { pc = (int *) *pc; } // jump to the address
 165 |      * Note that PC points to the NEXT instruction to be executed. Thus *pc
 166 |      * stores the argument of JMP instruction, i.e. the <addr>.
 167 |      */
 168 | 
 169 |     JSR , /*  3 */
 170 |     /* A function is a block of code, which may be far from the instruction
 171 |      * we are currently executing. That is reason why JMP instruction exists,
 172 |      * jumping into starting point of a function. JSR is introduced to perform
 173 |      * some bookkeeping: store the current execution position so that the
 174 |      * program can resume after function call returns.
 175 |      *
 176 |      * JSR <addr> to invoke the function whose starting point is <addr> and
 177 |      * LEV to fetch the bookkeeping information to resume previous execution.
 178 |      */
 179 | 
 180 |     BZ  , /*  4 : conditional jump if general register is zero (jump-if-zero) */
 181 |     BNZ , /*  5 : conditional jump if general register is not zero */
 182 | 
 183 |     ENT , /*  6 */
 184 |     /* ENT <size> is called when we are about to enter the function call to
 185 |      * "make a new calling frame". It will store the current PC value onto
 186 |      * the stack, and save some space(<size> bytes) to store the local
 187 |      * variables for function.
 188 |      */
 189 | 
 190 |     ADJ , /*  7 */
 191 |     /* ADJ <size> is to adjust the stack, to "remove arguments from frame"
 192 |      * The following pseudocode illustrates how ADJ works:
 193 |      *     if (op == ADJ) { sp += *pc++; } // add esp, <size>
 194 |      */
 195 | 
 196 |     LEV , /*  8 */
 197 |     /* LEV fetches bookkeeping info to resume previous execution.
 198 |      * There is no POP instruction in our design, and the following pseudocode
 199 |      * illustrates how LEV works:
 200 |      *     if (op == LEV) { sp = bp; bp = (int *) *sp++;
 201 |      *                      pc = (int *) *sp++; } // restore call frame and PC
 202 |      */
 203 | 
 204 |     LI  , /*  9 */
 205 |     /* LI loads an integer into general register from a given memory
 206 |      * address which is stored in general register before execution.
 207 |      */
 208 | 
 209 |     LC  , /* 10 */
 210 |     /* LC loads a character into general register from a given memory
 211 |      * address which is stored in general register before execution.
 212 |      */
 213 | 
 214 |     SI  , /* 11 */
 215 |     /* SI stores the integer in general register into the memory whose
 216 |      * address is stored on the top of the stack.
 217 |      */
 218 | 
 219 |     SC  , /* 12 */
 220 |     /* SC stores the character in general register into the memory whose
 221 |      * address is stored on the top of the stack.
 222 |      */
 223 | 
 224 |     PSH , /* 13 */
 225 |     /* PSH pushes the value in general register onto the stack */
 226 | 
 227 |     OR  , /* 14 */  XOR , /* 15 */  AND , /* 16 */
 228 |     EQ  , /* 17 */  NE  , /* 18 */
 229 |     LT  , /* 19 */  GT  , /* 20 */  LE  , /* 21 */ GE  , /* 22 */
 230 |     SHL , /* 23 */  SHR , /* 24 */
 231 |     ADD , /* 25 */  SUB , /* 26 */  MUL , /* 27 */ DIV, /* 28 */ MOD, /* 29 */
 232 |     /* arithmetic instructions
 233 |      * Each operator has two arguments: the first one is stored on the top
 234 |      * of the stack while the second is stored in general register.
 235 |      * After the calculation is done, the argument on the stack will be poped
 236 |      * out and the result will be stored in general register.
 237 |      * So you are not able to fetch the first argument from the stack after
 238 |      * the calculation.
 239 |      */
 240 | 
 241 |     SYSC, /* 30 system call */
 242 |     CLCA, /* 31 clear cache, used by JIT compilation */
 243 |     INVALID
 244 | };
 245 | 
 246 | // types
 247 | enum { CHAR, INT, PTR = 256, PTR2 = 512 };
 248 | 
 249 | // ELF generation
 250 | char **plt_func_addr;
 251 | char *freebuf;
 252 | 
 253 | char *append_strtab(char **strtab, char *str)
 254 | {
 255 |     char *s;
 256 |     for (s = str; *s && (*s != ' '); s++) ; /* ignore trailing space */
 257 |     int nbytes = s - str + 1;
 258 |     char *res = *strtab;
 259 |     memcpy(res, str, nbytes);
 260 |     res[s - str] = 0; // null terminator
 261 |     *strtab = res + nbytes;
 262 |     return res;
 263 | }
 264 | 
 265 | char fatal(char *msg) { printf("%d: %s\n", line, msg); exit(-1); }
 266 | 
 267 | void ef_add(char *name, int addr) // add external function
 268 | {
 269 |     ef_cache[ef_count] = malloc(sizeof(struct ef_s)) ;
 270 |     ef_cache[ef_count]->name = malloc(strlen(name)+1);
 271 |     strcpy(ef_cache[ef_count]->name, name);
 272 |     ef_cache[ef_count]->addr = addr;
 273 |     ++ef_count;
 274 | }
 275 | 
 276 | int ef_getaddr(int idx) // get address external function
 277 | {
 278 |     return (elf ? (int) plt_func_addr[idx] : ef_cache[idx]->addr);
 279 | }
 280 | 
 281 | int ef_getidx(char *name) // get cache index of external function
 282 | {
 283 |     int i;
 284 |     for (i = 0; i < ef_count; ++i)
 285 |         if (!strcmp(ef_cache[i]->name, name))
 286 |             break;
 287 | 
 288 |     if (i == ef_count) { // add new external lib func to cache
 289 |         int dladdr;
 290 |         if ((dladdr = (int) dlsym(0, name))) {
 291 |             ef_add(name, dladdr);
 292 |         } else {
 293 |             void *divmod_handle = dlopen("libgcc_s.so.1", 1);
 294 |             if (!divmod_handle) fatal("failed to open libgcc_s.so.1");
 295 |             dladdr = (int) dlsym(divmod_handle, name);
 296 |             if (!dladdr) fatal("bad function call");
 297 |             ef_add(name, dladdr);
 298 |         }
 299 |     }
 300 |     return i;
 301 | }
 302 | 
 303 | /* parse next token
 304 |  * 1. store data into id and then set the id to current lexical form
 305 |  * 2. set tk to appropriate type
 306 |  */
 307 | void next()
 308 | {
 309 |     char *pp;
 310 | 
 311 |     /* using loop to ignore whitespace characters, but characters that
 312 |      * cannot be recognized by the lexical analyzer are considered blank
 313 |      * characters, such as '@' and '$'.
 314 |      */
 315 |     while ((tk = *p)) {
 316 |         ++p;
 317 |         if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') ||
 318 |             (tk == '_')) {
 319 |             pp = p - 1;
 320 |             while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
 321 |                    (*p >= '0' && *p <= '9') || (*p == '_'))
 322 |                 tk = tk * 147 + *p++; // 147 is the magic number generating hash value
 323 |             tk = (tk << 6) + (p - pp);  // hash plus symbol length
 324 |             // hash value is used for fast comparison. Since it is inaccurate,
 325 |             // we have to validate the memory content as well.
 326 |             for (id = sym; id->tk; id++) { // find one free slot in table
 327 |                 if (tk == id->hash && /* if token is found (hash match), overwrite */
 328 |                     !memcmp(id->name, pp, p - pp)) {
 329 |                     tk = id->tk;
 330 |                     return;
 331 |                 }
 332 |             }
 333 |             /* At this point, existing symbol name is not found.
 334 |              * "id" points to the first unused symbol table entry.
 335 |              */
 336 |             id->name = pp;
 337 |             id->hash = tk;
 338 |             tk = id->tk = Id;  // token type identifier
 339 |             return;
 340 |         }
 341 |         /* Calculate the constant */
 342 |         // first byte is a number, and it is considered a numerical value
 343 |         else if (tk >= '0' && tk <= '9') {
 344 |             /* Parse with 3 conditions:
 345 |              * 1) not starting with 0 :=> decimal number;
 346 |              * 2) starting with 0x :=> hex number;
 347 |              * 3) starting with 0: octal number;
 348 |              */
 349 |             if ((ival = tk - '0')) {
 350 |                 while (*p >= '0' && *p <= '9')
 351 |                     ival = ival * 10 + *p++ - '0';
 352 |             }
 353 |             // first digit is 0 and it starts with 'x', and it is considered
 354 |             // to be a hexadecimal number
 355 |             else if (*p == 'x' || *p == 'X') {
 356 |                 while ((tk = *++p) &&
 357 |                        ((tk >= '0' && tk <= '9') ||
 358 |                         (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F')))
 359 |                     ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0);
 360 |             } else { // considered octal
 361 |                 while (*p >= '0' && *p <= '7')
 362 |                     ival = ival * 8 + *p++ - '0';
 363 |             }
 364 |             tk = Num; // token is numeric, return
 365 |             return;
 366 |         }
 367 |         switch (tk) {
 368 |         case '\n':
 369 |             /* Take an integer (representing an operation) and print out
 370 |              * the name of that operation. First thing to say is that "* ++le"
 371 |              * is the integer representing the operation to perform.
 372 |              * This basically walks through the array of instructions
 373 |              * returning each integer in turn.
 374 |              *
 375 |              * Starting at the beginning of the line, we have "printf" with
 376 |              * a format string of "%8.4s". This means printing out the first 4
 377 |              * characters of the string that we are about to pass next (padded
 378 |              * to 8 characters). There then follows a string containing all of
 379 |              * the operation names, in numerical order, padded to 4 characters
 380 |              * and separated by commas (so the start of each is 5 apart).
 381 |              *
 382 |              * Finally, we do a lookup into this string (treating it as an
 383 |              * array) at offset "* ++le * 5", i.e. the integer representing
 384 |              * the operation multiplied by "5", being the number of characters
 385 |              * between the start of each operation name). Doing this lookup
 386 |              * gives us a char, but actually we wanted the pointer to this
 387 |              * char (as we want printf to print out this char and the
 388 |              * following 3 chars), so we take the address of this char
 389 |              * (the "&" at the beginning of the whole expression).
 390 |              */
 391 |             if (src) {
 392 |                 int *base = le;
 393 |                 printf("%d: %.*s", line, p - lp, lp);
 394 |                 lp = p;
 395 |                 while (le < e) {
 396 |                     int off = le - base; // Func IR instruction memory offset
 397 |                     printf("%04d: %8.4s", off,
 398 |                            & "LEA  IMM  JMP  JSR  BZ   BNZ  ENT  ADJ  LEV  "
 399 |                              "LI   LC   SI   SC   PSH  "
 400 |                              "OR   XOR  AND  EQ   NE   LT   GT   LE   GE   "
 401 |                              "SHL  SHR  ADD  SUB  MUL  DIV  MOD  "
 402 |                              "SYSC CLCA" [*++le * 5]);
 403 |                     if (*le <= ADJ) {
 404 |                         ++le;
 405 |                         if (*le > (int) base && *le <= (int) e)
 406 |                             printf(" %04d\n", off + ((*le - (int) le) >> 2) + 1);
 407 |                         else
 408 |                             printf(" %d\n", *le);
 409 |                     }
 410 |                     else if (*le == SYSC) {
 411 |                         printf(" %s\n", ef_cache[*(++le)]->name);
 412 |                     }
 413 |                     else printf("\n");
 414 |                 }
 415 |             }
 416 |             ++line;
 417 |         case ' ':
 418 |         case '\t':
 419 |         case '\v':
 420 |         case '\f':
 421 |         case '\r':
 422 |             break;
 423 |         case '/':
 424 |             if (*p == '/') { // comment
 425 |         case '#': // skip #include statement, preprocessor directives ignored
 426 |                 while (*p != 0 && *p != '\n') ++p;
 427 |             } else if (*p == '*') { // C-style multiline comments
 428 |                 int t = 0;
 429 |                 for (++p; (*p != 0) && (t == 0); ++p) {
 430 |                     pp = p + 1;
 431 |                     if (*p == '\n') line++;
 432 |                     else if (*p == '*' && *pp == '/') t = 1;
 433 |                 }
 434 |                 ++p;
 435 |             } else {
 436 |                 if (*p == '=') { ++p; tk = DivAssign; }
 437 |                 else tk = Div; return;
 438 |             }
 439 |             break;
 440 |         case '\'': // quotes start with character (string)
 441 |         case '"':
 442 |             pp = data;
 443 |             // While current character is not `\0` and current character is
 444 |             // not the quote character.
 445 |             while (*p != 0 && *p != tk) {
 446 |                 // If current character is '\', it is escape notation or simply
 447 |                 // '\' character.
 448 |                 if ((ival = *p++) == '\\') {
 449 |                     switch (ival = *p++) {
 450 |                     case 'n': ival = '\n'; break; // new line
 451 |                     case 't': ival = '\t'; break; // horizontal tab
 452 |                     case 'v': ival = '\v'; break; // vertical tab
 453 |                     case 'f': ival = '\f'; break; // form feed
 454 |                     case 'r': ival = '\r'; break; // carriage return
 455 |                     case '0': ival = '\0'; break; // an int with value 0
 456 |                     }
 457 |                 }
 458 |                 // If it is double quotes (string literal), it is considered as
 459 |                 // a string, copying characters to data
 460 |                 if (tk == '"') *data++ = ival;
 461 |             }
 462 |             ++p;
 463 |             //  If .text too big rwdata v_addr will overlap it, add that to stay away from .text
 464 |             if (tk == '"') ival = (int) pp; else tk = Num;
 465 |             return;
 466 |         case '=': if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return;
 467 |         case '+': if (*p == '+') { ++p; tk = Inc; }
 468 |                   else if (*p == '=') { ++p; tk = AddAssign; }
 469 |                   else tk = Add; return;
 470 |         case '-': if (*p == '-') { ++p; tk = Dec; }
 471 |                   else if (*p == '>') { ++p; tk = Arrow; }
 472 |                   else if (*p == '=') { ++p; tk = SubAssign; }
 473 |                   else tk = Sub; return;
 474 |         case '!': if (*p == '=') { ++p; tk = Ne; } return;
 475 |         case '<': if (*p == '=') { ++p; tk = Le; }
 476 |                   else if (*p == '<') {
 477 |                       ++p; if (*p == '=') { ++p ; tk = ShlAssign; } else tk = Shl;
 478 |                   }
 479 |                   else tk = Lt; return;
 480 |         case '>': if (*p == '=') { ++p; tk = Ge; }
 481 |                   else if (*p == '>') {
 482 |                       ++p; if (*p == '=') { ++p ; tk = ShrAssign; } else tk = Shr;
 483 |                   }
 484 |                   else tk = Gt; return;
 485 |         case '|': if (*p == '|') { ++p; tk = Lor; }
 486 |                   else if (*p == '=') { ++p; tk = OrAssign; }
 487 |                   else tk = Or; return;
 488 |         case '&': if (*p == '&') { ++p; tk = Lan; }
 489 |                   else if (*p == '=') { ++p; tk = AndAssign; }
 490 |                   else tk = And; return;
 491 |         case '^': if (*p == '=') { ++p; tk = XorAssign; } else tk = Xor; return;
 492 |         case '*': if (*p == '=') { ++p; tk = MulAssign; }
 493 |                   else tk = Mul; return;
 494 |         case '%': if (*p == '=') { ++p; tk = ModAssign; }
 495 |                   else tk = Mod; return;
 496 |         case '[': tk = Bracket; return;
 497 |         case '?': tk = Cond; return;
 498 |         case '.': tk = Dot; return;
 499 |         default: return;
 500 |         }
 501 |     }
 502 | }
 503 | 
 504 | // https://stackoverflow.com/questions/109023/how-to-count-the-number-of-set-bits-in-a-32-bit-integer
 505 | int popcount(int i)
 506 | {
 507 |     i = i - ((i >> 1) & 0x55555555); // add pairs of bits
 508 |     i = (i & 0x33333333) + ((i >> 2) & 0x33333333); // quads
 509 |     i = (i + (i >> 4)) & 0x0F0F0F0F; // groups of 8
 510 |     return (i * 0x01010101) >> 24; // horizontal sum of bytes
 511 | }
 512 | 
 513 | /* expression parsing
 514 |  * lev represents an operator.
 515 |  * because each operator `token` is arranged in order of priority, so
 516 |  * large `lev` indicates a high priority.
 517 |  *
 518 |  * Operator precedence (lower first):
 519 |  * Assign  =
 520 |  * Cond    ?
 521 |  * Lor     ||
 522 |  * Lan     &&
 523 |  * Or      |
 524 |  * Xor     ^
 525 |  * And     &
 526 |  * Eq      ==
 527 |  * Ne      !=
 528 |  * Lt      <
 529 |  * Gt      >
 530 |  * Le      <=
 531 |  * Ge      >=
 532 |  * Shl     <<
 533 |  * Shr     >>
 534 |  * Add     +
 535 |  * Sub     -
 536 |  * Mul     *
 537 |  * Div     /
 538 |  * Mod     %
 539 |  * Inc     ++
 540 |  * Dec     --
 541 |  * Bracket [
 542 |  */
 543 | 
 544 | enum { REENTRANT = 0x10000 };
 545 | 
 546 | void expr(int lev)
 547 | {
 548 |     int tc;
 549 |     int t, *b, sz, *c;
 550 |     struct ident_s *d;
 551 |     struct member_s *m;
 552 | 
 553 |     switch (tk) {
 554 |     case 0: fatal("unexpected EOF in expression");
 555 |     // directly take an immediate value as the expression value
 556 |     // IMM recorded in emit sequence
 557 |     case Num: *--n = ival; *--n = Num; next(); ty = INT; break;
 558 |     case '"': // string, as a literal in data segment
 559 |         *--n = ival; *--n = Num; next();
 560 |         // continuous `"` handles C-style multiline text such as `"abc" "def"`
 561 |         while (tk == '"') next();
 562 |         /* Point "data" to next integer-aligned address.
 563 |          * e.g. "-sizeof(int)" is -4, i.e. 0b11111100.
 564 |          * This guarantees to leave at least one '\0' after the string.
 565 |          *
 566 |          * append the end of string character '\0', all the data is defaulted
 567 |          * to 0, so just move data one position forward. Specify result value
 568 |          * type to char pointer. CHAR + PTR = PTR because CHAR is 0.
 569 |          */
 570 |         data = (char *) (((int) data + sizeof(int)) & (-sizeof(int)));
 571 |         ty = PTR;
 572 |         break;
 573 |     /* SIZEOF_expr -> 'sizeof' '(' 'TYPE' ')'
 574 |      * sizeof is actually an unary operator.
 575 |      * now only `sizeof(int)`, `sizeof(char)` and `sizeof(*...)` are supported.
 576 |      * FIXME: not support "sizeof (Id)".
 577 |      * In second line will not get next token, match ')' will fail.
 578 |      */
 579 |     case Sizeof:
 580 |         next();
 581 |         if (tk != '(') fatal("open parentheses expected in sizeof");
 582 |         next();
 583 |         ty = INT;
 584 |         switch (tk) {
 585 |         case Int: next(); break;
 586 |         case Char: next(); ty = CHAR; break;
 587 |         case Struct:
 588 |         case Union:
 589 |             next();
 590 |             if (tk != Id) fatal("bad struct/union type");
 591 |             ty = id->stype; next(); break;
 592 |         }
 593 |         // multi-level pointers, plus `PTR` for each level
 594 |         while (tk == Mul) { next(); ty += PTR; }
 595 |         if (tk != ')') fatal("close parentheses expected in sizeof");
 596 |         next();
 597 |         *--n = ty >= PTR ? sizeof(int) : tsize[ty]; *--n = Num;
 598 |         ty = INT;
 599 |         break;
 600 |     case Id:
 601 |         d = id; next();
 602 |         // function call
 603 |         if (tk == '(') {
 604 |             if (d->class < Func || d->class > ClearCache) {
 605 |                 if (d->class != 0) fatal("bad function call");
 606 |                 int namelen = d->hash & 0x3f;
 607 |                 char ch = d->name[namelen];
 608 |                 d->name[namelen] = 0;
 609 |                 d->val = ef_getidx(d->name) ;
 610 |                 d->name[namelen] = ch;
 611 |                 d->class = Syscall;
 612 |                 d->type = INT;
 613 |             }
 614 |             next();
 615 |             t = 0; b = 0; // parameters count
 616 |             while (tk != ')') {
 617 |                 expr(Assign); *--n = (int) b; b = n; ++t;
 618 |                 if (tk == ',') {
 619 |                     next();
 620 |                     if (tk == ')') fatal("unexpected comma in function call");
 621 |                 } else if (tk != ')') fatal("missing comma in function call");
 622 |             }
 623 |             next();
 624 |             // function or system call id
 625 |             *--n = t; *--n = d->val; *--n = (int) b; *--n = d->class;
 626 |             ty = d->type;
 627 |         }
 628 |         // enumeration, only enums have ->class == Num
 629 |         else if (d->class == Num) { *--n = d->val; *--n = Num; ty = INT; }
 630 |         else {
 631 |             // Variable get offset
 632 |             switch (d->class) {
 633 |             case Loc: case Par: *--n = loc - d->val; *--n = Loc; break;
 634 |             case Glo: *--n = d->val; *--n = Num; break;
 635 |             default: fatal("undefined variable");
 636 |             }
 637 |             *--n = ty = d->type; *--n = Load;
 638 |         }
 639 |         break;
 640 |     // Type cast or parenthesis
 641 |     case '(':
 642 |         next();
 643 |         if (tk == Int || tk == Char || tk == Struct || tk == Union) {
 644 |             switch (tk) {
 645 |             case Int: next(); t = INT; break;
 646 |             case Char: next(); t = CHAR; break;
 647 |             default:
 648 |                 next();
 649 |                 if (tk != Id) fatal("bad struct/union type");
 650 |                 t = id->stype; next(); break;
 651 |             }
 652 |             // t: pointer
 653 |             while (tk == Mul) { next(); t += PTR; }
 654 |             if (tk != ')') fatal("bad cast");
 655 |             next();
 656 |             expr(Inc); // cast has precedence as Inc(++)
 657 |             ty = t;
 658 |         } else {
 659 |             expr(Assign);
 660 |             while (tk == ',') { next(); expr(Assign); }
 661 |             if (tk != ')') fatal("close parentheses expected");
 662 |             next();
 663 |         }
 664 |         break;
 665 |     case Mul: // "*", dereferencing the pointer operation
 666 |         next();
 667 |         expr(Inc); // dereference has the same precedence as Inc(++)
 668 |         if (ty < PTR) fatal("bad dereference");
 669 |         ty -= PTR;
 670 |         if (ty < CHAR || ty >= PTR2) fatal("unexpected type");
 671 |         *--n = ty; *--n = Load;
 672 |         break;
 673 |     case And: // "&", take the address operation
 674 |         /* when "token" is a variable, it takes the address first and
 675 |          * then LI/LC, so `--e` becomes the address of "a".
 676 |          */
 677 |         next(); expr(Inc);
 678 |         if (*n != Load) fatal("bad address-of");
 679 |         n += 2;
 680 |         ty += PTR;
 681 |         break;
 682 |     case '!': // "!x" is equivalent to "x == 0"
 683 |         next(); expr(Inc);
 684 |         if (*n == Num) n[1] = !n[1];
 685 |         else { *--n = 0; *--n = Num; --n; *n = (int) (n + 3); *--n = Eq; }
 686 |         ty = INT;
 687 |         break;
 688 |     case '~': // "~x" is equivalent to "x ^ -1"
 689 |         next(); expr(Inc);
 690 |         if (*n == Num) n[1] = ~n[1];
 691 |         else { *--n = -1; *--n = Num; --n; *n = (int) (n + 3); *--n = Xor; }
 692 |         ty = INT;
 693 |         break;
 694 |     case Add:
 695 |         next(); expr(Inc); ty = INT;
 696 |         break;
 697 |     case Sub:
 698 |         next();
 699 |         expr(Inc);
 700 |         if (*n == Num) n[1] = -n[1];
 701 |         else { *--n = -1; *--n = Num; --n; *n = (int) (n + 3); *--n = Mul; }
 702 |         ty = INT;
 703 |         break;
 704 |     case Div:
 705 |     case Mod:
 706 |         break;
 707 |     // processing ++x and --x. x-- and x++ is handled later
 708 |     case Inc:
 709 |     case Dec:
 710 |         t = tk; next(); expr(Inc);
 711 |         if (*n != Load) fatal("bad lvalue in pre-increment");
 712 |         *n = t;
 713 |         break;
 714 |     default:
 715 |         if (tk & REENTRANT) tk ^= REENTRANT;
 716 |         else fatal("bad expression");
 717 |     }
 718 | 
 719 |     // "precedence climbing" or "Top Down Operator Precedence" method
 720 |     while (tk >= lev) {
 721 |         // tk is ASCII code will not exceed `Num=128`. Its value may be changed
 722 |         // during recursion, so back up currently processed expression type
 723 |         t = ty; b = n;
 724 |         switch (tk) {
 725 |         case Assign:
 726 |             next();
 727 |             // the left part is processed by the variable part of `tk=ID`
 728 |             // and pushes the address
 729 |             if (*n != Load) fatal("bad lvalue in assignment");
 730 |             // get the value of the right part `expr` as the result of `a=expr`
 731 |             expr(Assign); *--n = (int) (b + 2); *--n = ty = t; *--n = Assign;
 732 |             break;
 733 |         case  OrAssign: // right associated
 734 |         case XorAssign:
 735 |         case AndAssign:
 736 |         case ShlAssign:
 737 |         case ShrAssign:
 738 |         case AddAssign:
 739 |         case SubAssign:
 740 |         case MulAssign:
 741 |         case DivAssign:
 742 |         case ModAssign:
 743 |             *--n=';'; *--n = t; *--n = Load;
 744 |             if (tk < ShlAssign) tk = Or + (tk - OrAssign);
 745 |             else tk = Shl + (tk - ShlAssign);
 746 |             tk |= REENTRANT; compound = 1; expr(Assign);
 747 |             *--n = (int) (b + 2); *--n = ty = t; *--n = Assign;
 748 |             break;
 749 |         case Cond: // `x?a:b` is similar to if except that it relies on else
 750 |             next(); expr(Assign);
 751 |             if (tk != ':') fatal("conditional missing colon");
 752 |             next(); c = n;
 753 |             expr(Cond); --n;
 754 |             *n = (int) (n + 1); *--n = (int) c; *--n = (int) b; *--n = Cond;
 755 |             break;
 756 |         case Lor: // short circuit, the logical or
 757 |             next(); expr(Lan);
 758 |             if (*n == Num && *b == Num) n[1] = b[1] || n[1];
 759 |             else { *--n = (int) b; *--n = Lor; }
 760 |             ty = INT;
 761 |             break;
 762 |         case Lan: // short circuit, logic and
 763 |             next(); expr(Or);
 764 |             if (*n == Num && *b == Num) n[1] = b[1] && n[1];
 765 |             else { *--n = (int) b; *--n = Lan; }
 766 |             ty = INT;
 767 |             break;
 768 |         case Or: // push the current value, calculate the right value
 769 |             next();
 770 |             if (compound) { compound = 0; expr(Assign); }
 771 |             else expr(Xor);
 772 |             if (*n == Num && *b == Num) n[1] = b[1] | n[1];
 773 |             else { *--n = (int) b; *--n = Or; }
 774 |             ty = INT;
 775 |             break;
 776 |         case Xor:
 777 |             next();
 778 |             if (compound) { compound = 0; expr(Assign); }
 779 |             else expr(And);
 780 |             if (*n == Num && *b == Num) n[1] = b[1] ^ n[1];
 781 |             else { *--n = (int) b; *--n = Xor; }
 782 |             ty = INT;
 783 |             break;
 784 |         case And:
 785 |             next();
 786 |             if (compound) { compound = 0; expr(Assign); }
 787 |             else expr(Eq);
 788 |             if (*n == Num && *b == Num) n[1] = b[1] & n[1];
 789 |             else { *--n = (int) b; *--n = And; }
 790 |             ty = INT;
 791 |             break;
 792 |         case Eq:
 793 |             next(); expr(Lt);
 794 |             if (*n == Num && *b == Num) n[1] = b[1] == n[1];
 795 |             else { *--n = (int) b; *--n = Eq; }
 796 |             ty = INT;
 797 |             break;
 798 |         case Ne:
 799 |             next(); expr(Lt);
 800 |             if (*n == Num && *b == Num) n[1] = b[1] != n[1];
 801 |             else { *--n = (int) b; *--n = Ne; }
 802 |             ty = INT;
 803 |             break;
 804 |         case Lt:
 805 |             next(); expr(Shl);
 806 |             if (*n == Num && *b == Num) n[1] = b[1] < n[1];
 807 |             else { *--n = (int) b; *--n = Lt; }
 808 |             ty = INT;
 809 |             break;
 810 |         case Gt:
 811 |             next(); expr(Shl);
 812 |             if (*n == Num && *b == Num) n[1] = b[1] > n[1];
 813 |             else { *--n = (int) b; *--n = Gt; }
 814 |             ty = INT;
 815 |             break;
 816 |         case Le:
 817 |             next(); expr(Shl);
 818 |             if (*n == Num && *b == Num) n[1] = b[1] <= n[1];
 819 |             else { *--n = (int) b; *--n = Le; }
 820 |             ty = INT;
 821 |             break;
 822 |         case Ge:
 823 |             next(); expr(Shl);
 824 |             if (*n == Num && *b == Num) n[1] = b[1] >= n[1];
 825 |             else { *--n = (int) b; *--n = Ge; }
 826 |             ty = INT;
 827 |             break;
 828 |         case Shl:
 829 |             next();
 830 |             if (compound) { compound = 0; expr(Assign); }
 831 |             else expr(Add);
 832 |             if (*n == Num && *b == Num) {
 833 |                 n[1] = b[1] << n[1];
 834 |             } else { *--n = (int) b; *--n = Shl; }
 835 |             ty = INT;
 836 |             break;
 837 |         case Shr:
 838 |             next();
 839 |             if (compound) { compound = 0; expr(Assign); }
 840 |             else expr(Add);
 841 |             if (*n == Num && *b == Num) {
 842 |                 n[1] = b[1] >> n[1];
 843 |             } else { *--n = (int) b; *--n = Shr; }
 844 |             ty = INT;
 845 |             break;
 846 |         case Add:
 847 |             next();
 848 |             if (compound) { compound = 0; expr(Assign); }
 849 |             else expr(Mul);
 850 |             tc = ((t | ty) & (PTR | PTR2)) ? (t >= PTR) : (t >= ty);
 851 |             c = n; if (tc) ty = t;
 852 |             sz = (ty >= PTR2) ? sizeof(int) :
 853 |                  ((ty >= PTR) ? tsize[ty - PTR] : 1);
 854 |             if (*n == Num && tc) { n[1] *= sz; sz = 1; }
 855 |             else if (*b == Num && !tc) { b[1] *= sz; sz = 1; }
 856 |             if (*n == Num && *b == Num) n[1] += b[1];
 857 |             else if (sz != 1) {
 858 |                 *--n = sz; *--n = Num;
 859 |                 *--n = (int) (tc ? c : b); *--n = Mul;
 860 |                 *--n = (int) (tc ? b : c); *--n = Add;
 861 |             }
 862 |             else { *--n = (int) b; *--n = Add; }
 863 |             break;
 864 |         case Sub: // 4 cases: ptr-ptr, ptr-int, int-ptr (err), int-int
 865 |             next();
 866 |             if (compound) { compound = 0; expr(Assign); }
 867 |             else expr(Mul); // t = left type, ty = right type
 868 |             if (t < PTR && ty >= PTR) fatal("bad pointer subtraction");
 869 |             if (t >= PTR) { // left arg is ptr
 870 |                 sz = (t >= PTR2) ? sizeof(int) : tsize[t - PTR];
 871 |                 if (ty >= PTR) { // ptr - ptr
 872 |                     if (t != ty) fatal("mismatched ptr type subtraction");
 873 |                     if (*n == Num && *b == Num) n[1] = (b[1] - n[1]) / sz;
 874 |                     else {
 875 |                         *--n = (int) b; *--n = Sub;
 876 |                         if (sz > 1) {
 877 |                             if ((sz & (sz - 1)) == 0) { // 2^n
 878 |                                 *--n = popcount(sz - 1); *--n = Num;
 879 |                                 --n; *n = (int) (n + 3); *--n = Shr;
 880 |                             } else {
 881 |                                 *--n = sz; *--n = Num; --n; *n = (int) (n + 3);
 882 |                                 *--n = Div; ef_getidx("__aeabi_idiv");
 883 |                             }
 884 |                         }
 885 |                     }
 886 |                     ty = INT;
 887 |                 } else { // ptr - int
 888 |                     if (*n == Num) {
 889 |                         n[1] *= sz;
 890 |                         if (*b == Num) n[1] = b[1] - n[1];
 891 |                         else { *--n = (int) b; *--n = Sub; }
 892 |                     } else {
 893 |                         if (sz > 1) {
 894 |                             if ((sz & (sz - 1)) == 0) { // 2^n
 895 |                                 *--n = popcount(sz - 1); *--n = Num;
 896 |                                 --n; *n = (int) (n + 3); *--n = Shl;
 897 |                             }
 898 |                             else {
 899 |                                 *--n = sz; *--n = Num;
 900 |                                 --n; *n = (int) (n + 3); *--n = Mul;
 901 |                             }
 902 |                         }
 903 |                         *--n = (int) b; *--n = Sub;
 904 |                     }
 905 |                     ty = t;
 906 |                 }
 907 |             } else { // int - int
 908 |                 if (*n == Num && *b == Num) n[1] = b[1] - n[1];
 909 |                 else { *--n = (int) b; *--n = Sub; }
 910 |                 ty = INT;
 911 |             }
 912 |             break;
 913 |         case Mul:
 914 |             next();
 915 |             if (compound) { compound = 0; expr(Assign); }
 916 |             else expr(Inc);
 917 |             if (*n == Num && *b == Num) n[1] *= b[1];
 918 |             else {
 919 |                 *--n = (int) b;
 920 |                 if (n[1] == Num && n[2] > 0 && (n[2] & (n[2] - 1)) == 0) {
 921 |                     n[2] = popcount(n[2] - 1); *--n = Shl; // 2^n
 922 |                 }
 923 |                 else *--n = Mul;
 924 |             }
 925 |             ty = INT;
 926 |             break;
 927 |         case Inc:
 928 |         case Dec:
 929 |             sz = ty >= PTR2 ? sizeof(int) : ty >= PTR ? tsize[ty - PTR] : 1;
 930 |             if (*n != Load) fatal("bad lvalue in post-increment");
 931 |             *n = tk;
 932 |             *--n = sz; *--n = Num;
 933 |             *--n = (int) b; *--n = (tk == Inc) ? Sub : Add;
 934 |             next();
 935 |             break;
 936 |         case Div:
 937 |             next();
 938 |             if (compound) { compound = 0; expr(Assign); }
 939 |             else expr(Inc);
 940 |             if (*n == Num && *b == Num) n[1] = b[1] / n[1];
 941 |             else {
 942 |                 *--n = (int) b;
 943 |                 if (n[1] == Num && n[2] > 0 && (n[2] & (n[2] - 1)) == 0) {
 944 |                     n[2] = popcount(n[2] - 1); *--n = Shr; // 2^n
 945 |                 } else {
 946 |                     *--n = Div;
 947 |                     ef_getidx("__aeabi_idiv");
 948 |                 }
 949 |             }
 950 |             ty = INT;
 951 |             break;
 952 |         case Mod:
 953 |             next();
 954 |             if (compound) { compound = 0; expr(Assign); }
 955 |             else expr(Inc);
 956 |             if (*n == Num && *b == Num) n[1] = b[1] % n[1];
 957 |             else {
 958 |                 *--n = (int) b;
 959 |                 if (n[1] == Num && n[2] > 0 && (n[2] & (n[2] - 1)) == 0) {
 960 |                     --n[2]; *--n = And; // 2^n
 961 |                 } else {
 962 |                     *--n = Mod;
 963 |                     ef_getidx("__aeabi_idivmod");
 964 |                 }
 965 |             }
 966 |             ty = INT;
 967 |             break;
 968 |         case Dot:
 969 |             ty += PTR;
 970 |             if (n[0] == Load && n[1] > INT && n[1] < PTR) n += 2; // struct
 971 |         case Arrow:
 972 |             if (ty <= PTR+INT || ty >= PTR2) fatal("structure expected");
 973 |             next();
 974 |             if (tk != Id) fatal("structure member expected");
 975 |             m = members[ty - PTR]; while (m && m->id != id) m = m->next;
 976 |             if (!m) fatal("structure member not found");
 977 |             if (m->offset) {
 978 |                 *--n = m->offset; *--n = Num; --n; *n = (int) (n + 3);
 979 |                 *--n = Add;
 980 |             }
 981 |             ty = m->type;
 982 |             if (ty <= INT || ty >= PTR) *--n = (ty == CHAR) ? CHAR : INT;
 983 |             else *--n = ty; // struct, not struct pointer
 984 |             *--n = Load;
 985 |             next();
 986 |             break;
 987 |         case Bracket:
 988 |             next(); expr(Assign);
 989 |             if (tk != ']') fatal("close bracket expected");
 990 |             next();
 991 |             if (t < PTR) fatal("pointer type expected");
 992 |             sz = (t = t - PTR) >= PTR ? sizeof(int) : tsize[t];
 993 |             if (sz > 1) {
 994 |                 if (*n == Num) n[1] *= sz;
 995 |                 else {
 996 |                     *--n = sz; *--n = Num; --n; *n = (int) (n + 3); *--n = Mul;
 997 |                 }
 998 |             }
 999 |             if (*n == Num && *b == Num) n[1] += b[1];
1000 |             else { *--n = (int) b; *--n = Add; }
1001 |             if ((ty = t) <= INT || ty >= PTR) *--n = (ty == CHAR) ? CHAR : INT;
1002 |             else *--n = ty; // struct, not struct pointer
1003 |             *--n = Load;
1004 |             break;
1005 |         default:
1006 |             printf("%d: compiler error tk=%d\n", line, tk); exit(-1);
1007 |         }
1008 |     }
1009 | }
1010 | 
1011 | // AST parsing for IR generation
1012 | // With a modular code generator, new targets can be easily supported such as
1013 | // native Arm machine code.
1014 | void gen(int *n)
1015 | {
1016 |     int i = *n, j, k, l;
1017 |     int *a, *b, *c, *d, *t;
1018 |     struct ident_s *label;
1019 | 
1020 |     switch (i) {
1021 |     case Num: // get the value of integer
1022 |         *++e = IMM; *++e = n[1];
1023 |         break;
1024 |     case Loc: // get the value of variable
1025 |         *++e = LEA; *++e = n[1];
1026 |         break;
1027 |     case Label: // target of goto
1028 |         label = (struct ident_s *) n[1];
1029 |         if (label->class != 0) fatal("duplicate label definition");
1030 |         d = e + 1; b = (int *) label->val;
1031 |         while (b != 0) { t = (int *) *b; *b = (int) d; b = t; }
1032 |         label->val = (int) d; label->class = Label;
1033 |         break;
1034 |     case Load:
1035 |         gen(n + 2); // load the value
1036 |         if (n[1] > INT && n[1] < PTR) fatal("struct copies not yet supported");
1037 |         *++e = (n[1] == CHAR) ? LC : LI;
1038 |         break;
1039 |     case Assign: // assign the value to variables
1040 |         gen((int *) n[2]); *++e = PSH; gen(n + 3);
1041 |         // Add SC/SI instruction to save value in register to variable address
1042 |         // held on stack.
1043 |         *++e = (n[1] == CHAR) ? SC : SI;
1044 |         break;
1045 |     // increment or decrement variables
1046 |     case Inc:
1047 |     case Dec:
1048 |         gen(n + 2);
1049 |         *++e = PSH; *++e = (n[1] == CHAR) ? LC : LI; *++e = PSH;
1050 |         *++e = IMM; *++e = (n[1] >= PTR2) ? sizeof(int) :
1051 |                                             n[1] >= PTR ? tsize[n[1] - PTR] : 1;
1052 |         *++e = (i == Inc) ? ADD : SUB;
1053 |         *++e = (n[1] == CHAR) ? SC : SI;
1054 |         break;
1055 |     case Cond: // if else condition case
1056 |         gen((int *) n[1]); // condition
1057 |         // Add jump-if-zero instruction "BZ" to jump to false branch.
1058 |         // Point "b" to the jump address field to be patched later.
1059 |         *++e = BZ; b = ++e;
1060 |         gen((int *) n[2]); // expression
1061 |         // Patch the jump address field pointed to by "b" to hold the address
1062 |         // of false branch. "+ 3" counts the "JMP" instruction added below.
1063 |         //
1064 |         // Add "JMP" instruction after true branch to jump over false branch.
1065 |         // Point "b" to the jump address field to be patched later.
1066 |         if (n[3]) {
1067 |             *b = (int) (e + 3); *++e = JMP; b = ++e; gen((int *) n[3]);
1068 |         } // else statement
1069 |         // Patch the jump address field pointed to by "d" to hold the address
1070 |         // past the false branch.
1071 |         *b = (int) (e + 1);
1072 |         break;
1073 |     // operators
1074 |     /* If current token is logical OR operator:
1075 |      * Add jump-if-nonzero instruction "BNZ" to implement short circuit.
1076 |      * Point "b" to the jump address field to be patched later.
1077 |      * Parse RHS expression.
1078 |      * Patch the jump address field pointed to by "b" to hold the address past
1079 |      * the RHS expression.
1080 |      */
1081 |     case Lor:  gen((int *) n[1]); *++e = BNZ;
1082 |                b = ++e; gen(n + 2); *b = (int) (e + 1); break;
1083 |     case Lan:  gen((int *) n[1]); *++e = BZ;
1084 |                b = ++e; gen(n + 2); *b = (int) (e + 1); break;
1085 |     /* If current token is bitwise OR operator:
1086 |      * Add "PSH" instruction to push LHS value in register to stack.
1087 |      * Parse RHS expression.
1088 |      * Add "OR" instruction to compute the result.
1089 |      */
1090 |     case Or:   gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = OR; break;
1091 |     case Xor:  gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = XOR; break;
1092 |     case And:  gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = AND; break;
1093 |     case Eq:   gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = EQ; break;
1094 |     case Ne:   gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = NE; break;
1095 |     case Lt:   gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = LT; break;
1096 |     case Gt:   gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = GT; break;
1097 |     case Le:   gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = LE; break;
1098 |     case Ge:   gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = GE; break;
1099 |     case Shl:  gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = SHL; break;
1100 |     case Shr:  gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = SHR; break;
1101 |     case Add:  gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = ADD; break;
1102 |     case Sub:  gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = SUB; break;
1103 |     case Mul:  gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = MUL; break;
1104 |     case Div:  gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = DIV; break;
1105 |     case Mod:  gen((int *) n[1]); *++e = PSH; gen(n + 2); *++e = MOD; break;
1106 |     case Func:
1107 |     case Syscall:
1108 |     case ClearCache:
1109 |         c = b = (int *) n[1]; k = 0; l = 1;
1110 |         // how many parameters
1111 |         while (b && l) { ++k; if (!(int *) *b) l = 0; else b = (int *) *b; }
1112 |         j = 0; a = malloc(sizeof(int *) * k); b = c; l = 1;
1113 |         while (b && l) {
1114 |             a[j] = (int) b;
1115 |             if (!(int *) *b) l = 0; else b = (int *) *b; ++j;
1116 |         }
1117 |         if (j > 0) --j;
1118 |         // push parameters
1119 |         while (j >= 0 && k > 0) {
1120 |             gen(b + 1); *++e = PSH; --j; b = (int *) a[j];
1121 |         }
1122 |         free(a);
1123 |         if (i == Syscall) *++e = SYSC;
1124 |         if (i == Func) *++e = JSR;
1125 |         *++e = n[2];
1126 |         if (n[3]) { *++e = ADJ; *++e = n[3]; }
1127 |         break;
1128 |     case While:
1129 |     case DoWhile:
1130 |         if (i == While) { *++e = JMP; a = ++e; }
1131 |         d = (e + 1);
1132 |         b = brks; brks = 0;
1133 |         c = cnts; cnts = 0;
1134 |         gen((int *) n[1]); // loop body
1135 |         if (i == While) *a = (int) (e + 1);
1136 |         while (cnts) { t = (int *) *cnts; *cnts = (int) (e + 1); cnts = t; }
1137 |         cnts = c;
1138 |         gen((int *) n[2]); // condition
1139 |         *++e = BNZ; *++e = (int) d;
1140 |         while (brks) { t = (int *) *brks; *brks = (int) (e + 1); brks = t; }
1141 |         brks = b;
1142 |         break;
1143 |     case For:
1144 |         gen((int *) n[4]);  // init
1145 |         *++e = JMP; a = ++e;
1146 |         d = (e + 1);  
1147 |         b = brks; brks = 0;
1148 |         c = cnts; cnts = 0;
1149 |         gen((int *) n[3]); // loop body
1150 |         while (cnts) { t = (int *) *cnts; *cnts = (int) (e + 1); cnts = t; }
1151 |         cnts = c;
1152 |         gen((int *) n[2]); // increment
1153 |         *a = (int) (e + 1);
1154 |         gen((int *) n[1]); // condition
1155 |         *++e = BNZ; *++e = (int) d;
1156 |         while (brks) { t = (int *) *brks; *brks = (int) (e + 1); brks = t; }
1157 |         brks = b;
1158 |         break;
1159 |     case Switch:
1160 |         gen((int *) n[1]); // condition
1161 |         a = cas; *++e = JMP; cas = ++e;
1162 |         b = brks; d = def; brks = def = 0;
1163 |         gen((int *) n[2]); // case statement
1164 |         // deal with no default inside switch case
1165 |         *cas = def ? (int) def : (int) (e + 1); cas = a;
1166 |         while (brks) { t = (int *) * brks; *brks = (int) (e + 1); brks = t; }
1167 |         brks = b; def = d;
1168 |         break;
1169 |     case Case:
1170 |         *++e = JMP; ++e;
1171 |         a = 0;
1172 |         *e = (int) (e + 7); *++e = PSH; i = *cas; *cas = (int) e;
1173 |         gen((int *) n[1]); // condition
1174 |         if (e[-1] != IMM) fatal("bad case immediate");
1175 |         *++e = SUB; *++e = BNZ; cas = ++e; *e = i + e[-3];
1176 |         if (*(int *) n[2] == Switch) a = cas;
1177 |         gen((int *) n[2]); // expression
1178 |         if (a != 0) cas = a;
1179 |         break;
1180 |     case Break:
1181 |         // set jump locate
1182 |         *++e = JMP; *++e = (int) brks; brks = e;
1183 |         break;
1184 |     case Continue:
1185 |         // set jump locate
1186 |         *++e = JMP; *++e = (int) cnts; cnts = e;
1187 |         break;
1188 |     case Goto:
1189 |         label = (struct ident_s *) n[1];
1190 |         *++e = JMP; *++e = label->val;
1191 |         if (label->class == 0) label->val = (int) e; // Define label address later
1192 |         break;
1193 |     case Default:
1194 |         def = e + 1;
1195 |         gen((int *) n[1]); break;
1196 |     case Return:
1197 |         if (n[1]) gen((int *) n[1]); *++e = LEV; break; // parse return AST
1198 |     case '{':
1199 |         // parse expression or statement from AST
1200 |         gen((int *) n[1]); gen(n + 2); break;
1201 |     case Enter: *++e = ENT; *++e = n[1]; gen(n + 2);
1202 |                 if (*e != LEV) *++e = LEV; break;
1203 |     default:
1204 |         if (i != ';') {
1205 |             printf("%d: compiler error gen=%d\n", line, i); exit(-1);
1206 |         }
1207 |     }
1208 | }
1209 | 
1210 | void check_label(int **tt)
1211 | {
1212 |     if (tk != Id) return;
1213 |     char *ss = p;
1214 |     while (*ss == ' ' || *ss == '\t') ++ss;
1215 |     if (*ss == ':') {
1216 |         if (id->class != 0 || !(id->type == 0 || id->type == -1))
1217 |             fatal("invalid label");
1218 |         id->type = -1 ; // hack for id->class deficiency
1219 |         *--n = (int) id; *--n = Label;
1220 |         *--n = (int) *tt; *--n = '{'; *tt = n;
1221 |         next(); next();
1222 |     }
1223 | }
1224 | 
1225 | // statement parsing (syntax analysis, except for declarations)
1226 | void stmt(int ctx)
1227 | {
1228 |     int *a, *b, *c, *d;
1229 |     int i, j, atk;
1230 |     int bt;
1231 | 
1232 |     switch (tk) {
1233 |     case Enum:
1234 |         next();
1235 |         // If current token is not "{", it means having enum type name.
1236 |         // Skip the enum type name.
1237 |         if (tk != '{') next();
1238 |         if (tk == '{') {
1239 |             next();
1240 |             i = 0; // Enum value starts from 0
1241 |             while (tk != '}') {
1242 |                 // Current token should be enum name.
1243 |                 // If current token is not identifier, stop parsing.
1244 |                 if (tk != Id) fatal("bad enum identifier");
1245 |                 next();
1246 |                 if (tk == Assign) {
1247 |                     next();
1248 |                     expr(Cond);
1249 |                     if (*n != Num) fatal("bad enum initializer");
1250 |                     i = n[1]; // Set enum value
1251 |                 }
1252 |                 /* "id" is pointing to the enum name's symbol table entry.
1253 |                  * Set the symbol table entry's symbol type be "Num".
1254 |                  * Set the symbol table entry's associated value type be "INT".
1255 |                  * Set the symbol table entry's associated value be enum value.
1256 |                  */
1257 |                 id->class = Num; id->type = INT; id->val = i++;
1258 |                 if (tk == ',') next(); // If current token is ",", skip.
1259 |             }
1260 |             next(); // Skip "}"
1261 |         } else if (tk == Id) {
1262 |             id->type = INT; id->class = ctx; id->val = ld++;
1263 |             next();
1264 |         }
1265 |         return;
1266 |     case Int:
1267 |     case Char:
1268 |     case Struct:
1269 |     case Union:
1270 |         switch (tk) {
1271 |         case Struct:
1272 |         case Union:
1273 |             atk = tk; next();
1274 |             if (tk == Id) {
1275 |                 if (!id->stype) id->stype = tnew++;
1276 |                 bt = id->stype;
1277 |                 next();
1278 |             } else {
1279 |                 bt = tnew++;
1280 |             }
1281 |             if (tk == '{') {
1282 |                 tsize[bt] = 0; // for unions
1283 |                 next();
1284 |                 if (members[bt]) fatal("duplicate structure definition");
1285 |                 i = 0;
1286 |                 while (tk != '}') {
1287 |                     int mbt = INT;
1288 |                     switch (tk) {
1289 |                     case Int: next(); break;
1290 |                     case Char: next(); mbt = CHAR; break;
1291 |                     case Struct:
1292 |                     case Union:
1293 |                         next();
1294 |                         if (tk != Id) fatal("bad struct/union declaration");
1295 |                         mbt = id->stype;
1296 |                         next(); break;
1297 |                     }
1298 |                     while (tk != ';') {
1299 |                         ty = mbt;
1300 |                         // If the beginning of * is a pointer type,
1301 |                         // then type plus `PTR` indicates what kind of pointer
1302 |                         while (tk == Mul) { next(); ty += PTR; }
1303 |                         if (tk != Id) fatal("bad struct member definition");
1304 |                         struct member_s *m = malloc(sizeof(struct member_s));
1305 |                         m->id = id;
1306 |                         m->offset = i;
1307 |                         m->type = ty;
1308 |                         m->next = members[bt];
1309 |                         members[bt] = m;
1310 |                         i += (ty >= PTR) ? sizeof(int) : tsize[ty];
1311 |                         i = (i + 3) & -4;
1312 |                         if (atk == Union) { if (i > tsize[bt]) tsize[bt] = i ; i = 0; }
1313 |                         next();
1314 |                         if (tk == ',') next();
1315 |                     }
1316 |                     next();
1317 |                 }
1318 |                 next();
1319 |                 if (atk != Union) tsize[bt] = i;
1320 |             }
1321 |             break;
1322 |         case Int:
1323 |         case Char:
1324 |             bt = (tk == Int) ? INT : CHAR; // basetype
1325 |             next();
1326 |             break;
1327 |         }
1328 |         /* parse statement such as 'int a, b, c;'
1329 |          * "enum" finishes by "tk == ';'", so the code below will be skipped.
1330 |          * While current token is not statement end or block end.
1331 |          */
1332 |         b = 0;
1333 |         while (tk != ';' && tk != '}' && tk != ',' && tk != ')') {
1334 |             ty = bt;
1335 |             // If the beginning of * is a pointer type, then type plus `PTR`
1336 |             // indicates what kind of pointer
1337 |             while (tk == Mul) { next(); ty += PTR; }
1338 |             switch (ctx) {
1339 |             case Glo:
1340 |                 if (tk != Id) fatal("bad global declaration");
1341 |                 if (id->class >= ctx) fatal("duplicate global definition");
1342 |                 break;
1343 |             case Loc:
1344 |                 if (tk != Id) fatal("bad local declaration");
1345 |                 if (id->class >= ctx) fatal("duplicate local definition");
1346 |                 break;
1347 |             }
1348 |             next();
1349 |             id->type = ty;
1350 |             if (tk == '(') { // function
1351 |                 if (b != 0) fatal("func decl can't be mixed with var decl(s)");
1352 |                 if (ctx != Glo) fatal("nested function");
1353 |                 if (ty > INT && ty < PTR) fatal("return type can't be struct");
1354 |                 id->class = Func; // type is function
1355 |                 // "+ 1" is because the code to add instruction always uses "++e".
1356 |                 id->val = (int) (e + 1); // function Pointer? offset/address
1357 |                 id->type = ty;
1358 |                 next(); ld = 0; // "ld" is parameter's index.
1359 |                 while (tk != ')') { stmt(Par); if (tk == ',') next(); }
1360 |                 next();
1361 |                 if (tk != '{') fatal("bad function definition");
1362 |                 loc = ++ld;
1363 |                 next();
1364 |                 // Not declare and must not be function, analyze inner block.
1365 |                 // e represents the address which will store pc
1366 |                 // (ld - loc) indicates memory size to allocate
1367 |                 *--n = ';';
1368 |                 while (tk != '}') {
1369 |                     int *t = n; check_label(&t); stmt(Loc);
1370 |                     if (t != n) { *--n = (int) t; *--n = '{'; }
1371 |                 }
1372 |                 *--n = ld - loc; *--n = Enter;
1373 |                 cas = 0;
1374 |                 gen(n);
1375 |                 id = sym; // unwind symbol table locals
1376 |                 while (id->tk) {
1377 |                     if (id->class == Loc || id->class == Par) {
1378 |                         id->class = id->hclass;
1379 |                         id->type = id->htype;
1380 |                         id->val = id->hval;
1381 |                     }
1382 |                     else if (id->class == Label) { // clear id for next func
1383 |                         id->class = 0; id->val = 0; id->type = 0;
1384 |                     }
1385 |                     else if (id->class == 0 && id->type == -1) {
1386 |                         printf("%d: label %.*s not defined\n",
1387 |                                line, id->hash & 0x3f, id->name);
1388 |                         exit(-1);
1389 |                     }
1390 |                     id++;
1391 |                 }
1392 |             }
1393 |             else {
1394 |                 int sz = ((ty <= INT || ty >= PTR) ? sizeof(int) : tsize[ty]);
1395 |                 id->hclass = id->class; id->class = ctx;
1396 |                 id->htype = id->type; id->type = ty;
1397 |                 id->hval = id->val;
1398 |                 if (ctx == Glo) { id->val = (int) data; data += sz; }
1399 |                 else if (ctx == Loc) { id->val = (ld += sz / sizeof(int)); }
1400 |                 else if (ctx == Par) {
1401 |                     if (ty > INT && ty < PTR) // local struct decl
1402 |                         fatal("struct parameters must be pointers");
1403 |                     id->val = ld++;
1404 |                 }
1405 |                 if (ctx == Loc && tk == Assign) {
1406 |                     int ptk = tk;
1407 |                     if (b == 0) *--n = ';';
1408 |                     b = n; *--n = loc - id->val; *--n = Loc;
1409 |                     next(); a = n; expr(ptk);
1410 |                     *--n = (int)a; *--n = ty; *--n = Assign;
1411 |                     *--n = (int) b; *--n = '{';
1412 |                 }
1413 |             }
1414 |             if (ctx != Par && tk == ',') next();
1415 |         }
1416 |         return;
1417 |     case If:
1418 |         next();
1419 |         if (tk != '(') fatal("open parentheses expected");
1420 |         next();
1421 |         expr(Assign); a = n;
1422 |         if (tk != ')') fatal("close parentheses expected");
1423 |         next();
1424 |         stmt(ctx);
1425 |         b = n;
1426 |         if (tk == Else) { next(); stmt(ctx); d = n; } else d = 0;
1427 |         *--n = (int)d; *--n = (int) b; *--n = (int) a; *--n = Cond;
1428 |         return;
1429 |     case While:
1430 |         next();
1431 |         if (tk != '(') fatal("open parentheses expected");
1432 |         next();
1433 |         expr(Assign); b = n; // condition
1434 |         if (tk != ')') fatal("close parentheses expected");
1435 |         next();
1436 |         ++brkc; ++cntc;
1437 |         stmt(ctx); a = n; // parse body of "while"
1438 |         --brkc; --cntc;
1439 |         *--n = (int) b; *--n = (int) a; *--n = While;
1440 |         return;
1441 |     case DoWhile:
1442 |         next();
1443 |         ++brkc; ++cntc;
1444 |         stmt(ctx); a = n; // parse body of "do-while"
1445 |         --brkc; --cntc;
1446 |         if (tk != While) fatal("while expected");
1447 |         next();
1448 |         if (tk != '(') fatal("open parentheses expected");
1449 |         next();
1450 |         *--n = ';';
1451 |         expr(Assign); b = n;
1452 |         if (tk != ')') fatal("close parentheses expected");
1453 |         next();
1454 |         *--n = (int) b; *--n = (int) a; *--n = DoWhile;
1455 |         return;
1456 |     case Switch:
1457 |         i = 0; j = 0;
1458 |         if (cas) j = (int) cas;
1459 |         cas = &i;
1460 |         next();
1461 |         if (tk != '(') fatal("open parentheses expected");
1462 |         next();
1463 |         expr(Assign);
1464 |         a = n;
1465 |         if (tk != ')') fatal("close parentheses expected");
1466 |         next();
1467 |         ++swtc; ++brkc;
1468 |         stmt(ctx);
1469 |         --swtc; --brkc;
1470 |         b = n;
1471 |         *--n = (int) b; *--n = (int) a; *--n = Switch;
1472 |         if (j) cas = (int *) j;
1473 |         return;
1474 |     case Case:
1475 |         if (!swtc) fatal("case-statement outside of switch");
1476 |         i = *cas;
1477 |         next();
1478 |         expr(Or);
1479 |         a = n;
1480 |         if (*n != Num) fatal("bad case immediate");
1481 |         j = n[1]; n[1] -= i; *cas = j;
1482 |         *--n = ';';
1483 |         if (tk != ':') fatal("colon expected");
1484 |         next();
1485 |         stmt(ctx);
1486 |         b = n;
1487 |         *--n = (int) b;*--n = (int) a; *--n = Case;
1488 |         return;
1489 |     case Break:
1490 |         if (!brkc) fatal("misplaced break statement");
1491 |         next();
1492 |         if (tk != ';') fatal("semicolon expected");
1493 |         next();
1494 |         *--n = Break;
1495 |         return;
1496 |     case Continue:
1497 |         if (!cntc) fatal("misplaced continue statement");
1498 |         next();
1499 |         if (tk != ';') fatal("semicolon expected");
1500 |         next();
1501 |         *--n = Continue;
1502 |         return;
1503 |     case Default:
1504 |         if (!swtc) fatal("default-statement outside of switch");
1505 |         next();
1506 |         if (tk != ':') fatal("colon expected");
1507 |         next();
1508 |         stmt(ctx); a = n;
1509 |         *--n = (int) a; *--n = Default;
1510 |         return;
1511 |     // RETURN_stmt -> 'return' expr ';' | 'return' ';'
1512 |     case Return:
1513 |         a = 0; next();
1514 |         if (tk != ';') { expr(Assign); a = n; }
1515 |         *--n = (int) a; *--n = Return;
1516 |         if (tk != ';') fatal("semicolon expected");
1517 |         next();
1518 |         return;
1519 |     /* For iteration is implemented as:
1520 |      * Init -> Cond -> Bz to end -> Jmp to Body
1521 |      * After -> Jmp to Cond -> Body -> Jmp to After
1522 |      */
1523 |     case For:
1524 |         next();
1525 |         if (tk != '(') fatal("open parentheses expected");
1526 |         next();
1527 |         *--n = ';';
1528 |         if (tk != ';') expr(Assign);
1529 |         while (tk == ',') {
1530 |             int *f = n; next(); expr(Assign); *--n = (int) f; *--n = '{';
1531 |         }
1532 |         d = n;
1533 |         if (tk != ';') fatal("semicolon expected");
1534 |         next();
1535 |         *--n = ';';
1536 |         expr(Assign); a = n; // Point to entry of for cond
1537 |         if (tk != ';') fatal("semicolon expected");
1538 |         next();
1539 |         *--n = ';';
1540 |         if (tk != ')') expr(Assign);
1541 |         while (tk == ',') {
1542 |             int *g = n; next(); expr(Assign); *--n = (int) g; *--n = '{';
1543 |         }
1544 |         b = n;
1545 |         if (tk != ')') fatal("close parentheses expected");
1546 |         next();
1547 |         ++brkc; ++cntc;
1548 |         stmt(ctx); c = n;
1549 |         --brkc; --cntc;
1550 |         *--n = (int) d; *--n = (int) c; *--n = (int) b; *--n = (int) a;
1551 |         *--n = For;
1552 |         return;
1553 |     case Goto:
1554 |         next();
1555 |         if (tk != Id || (id->type != 0 && id->type != -1)
1556 |                      || (id->class != Label && id->class != 0))
1557 |             fatal("goto expects label");
1558 |         id->type = -1; // hack for id->class deficiency
1559 |         *--n = (int) id; *--n = Goto; next();
1560 |         if (tk != ';') fatal("semicolon expected");
1561 |         next();
1562 |         return;
1563 |     // stmt -> '{' stmt '}'
1564 |     case '{':
1565 |         next();
1566 |         *--n = ';';
1567 |         while (tk != '}') {
1568 |             a = n; check_label(&a); stmt(ctx);
1569 |             if (a != n) { *--n = (int) a; *--n = '{'; }
1570 |         }
1571 |         next();
1572 |         return;
1573 |     // stmt -> ';'
1574 |     case ';':
1575 |         next();
1576 |         *--n = ';';
1577 |         return;
1578 |     default:
1579 |         // general statements are considered assignment statements/expressions
1580 |         expr(Assign);
1581 |         if (tk != ';' && tk != ',') fatal("semicolon expected");
1582 |         next();
1583 |     }
1584 | }
1585 | 
1586 | void die(char *msg) { printf("%s\n", msg); exit(-1); }
1587 | 
1588 | int reloc_imm(int offset) { return (((offset) - 8) >> 2) & 0x00ffffff; }
1589 | int reloc_bl(int offset) { return 0xeb000000 | reloc_imm(offset); }
1590 | 
1591 | int *codegen(int *jitmem, int *jitmap)
1592 | {
1593 |     int i, ii, tmp, c;
1594 |     int *je, *tje;    // current position in emitted native code
1595 |     int *immloc, *il;
1596 | 
1597 |     immloc = il = malloc(1024 * 4);
1598 |     int *iv = malloc(1024 * 4);
1599 |     int *imm0 = 0;
1600 | 
1601 |     // first pass: emit native code
1602 |     int *pc = text + 1; je = jitmem; line = 0;
1603 |     while (pc <= e) {
1604 |         i = *pc;
1605 |         // Store mapping from IR index to native instruction buffer location
1606 |         // "pc - text" gets the index of IR.
1607 |         // "je" points to native instruction buffer's current location.
1608 |         jitmap[((int) pc++ - (int) text) >> 2] = (int) je;
1609 |         switch (i) {
1610 |         case LEA:
1611 |             tmp = *pc++;
1612 |             if (tmp >= 64 || tmp <= -64) {
1613 |                 printf("jit: LEA %d out of bounds\n", tmp); exit(6);
1614 |             }
1615 |             if (tmp >= 0)
1616 |                 *je++ = 0xe28b0000 | tmp * 4;    // add     r0, fp, #(tmp)
1617 |             else
1618 |                 *je++ = 0xe24b0000 | (-tmp) * 4; // sub     r0, fp, #(tmp)
1619 |             break;
1620 |         case IMM:
1621 |             tmp = *pc++;
1622 |             if (0 <= tmp && tmp < 256)
1623 |                 *je++ = 0xe3a00000 + tmp;        // mov r0, #(tmp)
1624 |             else { if (!imm0) imm0 = je; *il++ = (int) (je++); *iv++ = tmp; }
1625 |             break;
1626 |         case JSR:
1627 |         case JMP:
1628 |             pc++; je++; // postponed till second pass
1629 |             break;
1630 |         case BZ:
1631 |         case BNZ:
1632 |             *je++ = 0xe3500000; pc++; je++;      // cmp r0, #0
1633 |             break;
1634 |         case ENT:
1635 |             *je++ = 0xe92d4800; *je++ = 0xe28db000; // push {fp, lr}; add  fp, sp, #0
1636 |             ii = c = 0; tmp = 4 * (*pc++);
1637 |             while (tmp >= 255) { c |= tmp & 3; tmp >>= 2; ++ii; }
1638 |             tmp += (c ? 1 : 0); if ((tmp << (2*ii)) >= 32768 || tmp < 0) {
1639 |                 printf("jit: ENT %d out of bounds\n", tmp << (2*ii)); exit(6);
1640 |             } // sub  sp, sp, #tmp (scaled)
1641 |             if (tmp) *je++ = 0xe24dd000 | (((16-ii) & 0xf) << 8) | tmp;
1642 |             break;
1643 |         case ADJ:
1644 |             *je++ = 0xe28dd000 + *pc++ * 4;      // add sp, sp, #(tmp * 4)
1645 |             break;
1646 |         case LEV:
1647 |             *je++ = 0xe28bd000; *je++ = 0xe8bd8800; // add sp, fp, #0; pop {fp, pc}
1648 |             break;
1649 |         case LI:
1650 |             *je++ = 0xe5900000;                  // ldr r0, [r0]
1651 |             break;
1652 |         case LC:
1653 |             *je++ = 0xe5d00000; if (signed_char)  *je++ = 0xe6af0070; // ldrb r0, [r0]; (sxtb r0, r0)
1654 |             break;
1655 |         case SI:
1656 |             *je++ = 0xe49d1004; *je++ = 0xe5810000; // pop {r1}; str r0, [r1]
1657 |             break;
1658 |         case SC:
1659 |             *je++ = 0xe49d1004; *je++ = 0xe5c10000; // pop {r1}; strb r0, [r1]
1660 |             break;
1661 |         case PSH:
1662 |             *je++ = 0xe52d0004;                       // push {r0}
1663 |             break;
1664 |         case OR:
1665 |             *je++ = 0xe49d1004; *je++ = 0xe1810000; // pop {r1}; orr r0, r1, r0
1666 |             break;
1667 |         case XOR:
1668 |             *je++ = 0xe49d1004; *je++ = 0xe0210000; // pop {r1}; eor r0, r1, r0
1669 |             break;
1670 |         case AND:
1671 |             *je++ = 0xe49d1004; *je++ = 0xe0010000; // pop {r1}; and r0, r1, r0
1672 |             break;
1673 |         case SHL:
1674 |             *je++ = 0xe49d1004; *je++ = 0xe1a00011; // pop {r1}; lsl r0, r1, r0
1675 |             break;
1676 |         case SHR:
1677 |             *je++ = 0xe49d1004; *je++ = 0xe1a00051; // pop {r1}; asr r0, r1, r0
1678 |             break;
1679 |         case ADD:
1680 |             *je++ = 0xe49d1004; *je++ = 0xe0800001; // pop {r1}; add r0, r0, r1
1681 |             break;
1682 |         case SUB:
1683 |             *je++ = 0xe49d1004; *je++ = 0xe0410000; // pop {r1}; sub r0, r1, r0
1684 |             break;
1685 |         case MUL:
1686 |             *je++ = 0xe49d1004; *je++ = 0xe0000091; // pop {r1}; mul r0, r1, r0
1687 |             break;
1688 |         case DIV:
1689 |         case MOD:
1690 |             *je++ = 0xe52d0004;                     // push {r0}
1691 |             int ti = ef_getidx((i == DIV) ? "__aeabi_idiv" : "__aeabi_idivmod");
1692 |             tmp = ef_getaddr(ti);
1693 |             *je++ = 0xe49d0004 | (1 << 12); // pop r1
1694 |             *je++ = 0xe49d0004 | (0 << 12); // pop r0
1695 |             *je++ = 0xe28fe000;                          // add lr, pc, #0
1696 |             if (!imm0) imm0 = je;
1697 |             *il++ = (int) je++ + 1;
1698 |             *iv++ = tmp;
1699 |             // ARM EABI modulo helper function produces quotient in r0
1700 |             // and the remainder in r1.
1701 |             if (i == MOD)
1702 |                 *je++ = 0xe1a00001;                 // mov r0, r1
1703 |             break;
1704 |         case SYSC:
1705 |             tmp = ef_getaddr(*pc++);  // look up address from ef index
1706 |             if (*pc++ != ADJ) die("codegen: no ADJ after native proc");
1707 |             i = *pc;
1708 |             if (i > 10) die("codegen: no support for 10+ arguments");
1709 |             while (i > 0) *je++ = 0xe49d0004 | (--i << 12); // pop r(i-1)
1710 |             i = *pc++;
1711 |             if (i > 4) *je++ = 0xe92d03f0;               // push {r4-r9}
1712 |             *je++ = 0xe28fe000;                          // add lr, pc, #0
1713 |             if (!imm0) imm0 = je;
1714 |             *il++ = (int) je++ + 1;
1715 |             *iv++ = tmp;
1716 |             if (i > 4) *je++ = 0xe28dd018;              // add sp, sp, #24
1717 |             break;
1718 |         case CLCA:
1719 |             *je++ = 0xe59d0004; *je++ = 0xe59d1000; // ldr r0, [sp, #4]
1720 |                                                     // ldr r1, [sp]
1721 |             *je++ = 0xe3a0780f; *je++ = 0xe2877002; // mov r7, #0xf0000
1722 |                                                     // add r7, r7, #2
1723 |             *je++ = 0xe3a02000; *je++ = 0xef000000; // mov r2, #0
1724 |                                                     // svc 0
1725 |             break;
1726 |         default:
1727 |             if (EQ <= i && i <= GE) {
1728 |                 *je++ = 0xe49d1004; *je++ = 0xe1510000; // pop {r1}; cmp r1, r0
1729 |                 if (i <= NE) { je[0] = 0x03a00000; je[1] = 0x13a00000; }   // moveq r0, #0; movne r0, #0
1730 |                 else if (i == LT || i == GE) { je[0] = 0xb3a00000; je[1] = 0xa3a00000; } // movlt r0, #0; movge   r0, #0
1731 |                 else { je[0] = 0xc3a00000; je[1] = 0xd3a00000; }           // movgt r0, #0; movle r0, #0
1732 |                 if (i == EQ || i == LT || i == GT) je[0] = je[0] | 1;
1733 |                 else je[1] = je[1] | 1;
1734 |                 je += 2;
1735 |                 break;
1736 |             } else {
1737 |                 printf("code generation failed for %d!\n", i);
1738 |                 free(iv);
1739 |                 return 0;
1740 |             }
1741 |         }
1742 | 
1743 |         int genpool = 0;
1744 |         if (imm0) {
1745 |             if (i == LEV) genpool = 1;
1746 |             else if ((int) je > (int) imm0 + 3000) {
1747 |                 tje = je++; genpool = 2;
1748 |             }
1749 |         }
1750 |         if (genpool) {
1751 |             *iv = 0;
1752 |             while (il > immloc) {
1753 |                 tmp = *--il;
1754 |                 if ((int) je > tmp + 4096 + 8) die("codegen: can't reach the pool");
1755 |                 iv--; if (iv[0] == iv[1]) je--;
1756 |                 if (tmp & 1) {
1757 |                     // ldr pc, [pc, #..]
1758 |                     *(int *) (tmp - 1) = 0xe59ff000 | ((int) je - tmp - 7);
1759 |                 } else {
1760 |                     // ldr r0, [pc, #..]
1761 |                     *(int *) tmp = 0xe59f0000 | ((int) je - tmp - 8);
1762 |                 }
1763 |                 *je++ = *iv;
1764 |             }
1765 |             if (genpool == 2) { // jump past the pool
1766 |                 tmp = ((int) je - (int) tje - 8) >> 2;
1767 |                 *tje = 0xea000000 | (tmp & 0x00ffffff); // b #(je)
1768 |             }
1769 |             imm0 = 0;
1770 |             genpool = 0;
1771 |         }
1772 |     }
1773 |     if (il > immloc) die("codegen: not terminated by a LEV");
1774 |     tje = je;
1775 | 
1776 |     // second pass
1777 |     pc = text + 1; // Point instruction pointer "pc" to the first instruction.
1778 |     while (pc <= e) { // While instruction end is not met.
1779 |         // Get the IR's corresponding native instruction buffer address.
1780 |         je = (int *) jitmap[((int) pc - (int) text) >> 2];
1781 |         i = *pc++; // Get current instruction
1782 |         // If the instruction is one of the jumps.
1783 |         if (i == JSR || i == JMP || i == BZ || i == BNZ) {
1784 |             switch (i) {
1785 |             case JSR:
1786 |                 *je = 0xeb000000;  // bl #(tmp)
1787 |                 break;
1788 |             case JMP:
1789 |                 *je = 0xea000000;  // b #(tmp)
1790 |                 break;
1791 |             case BZ:
1792 |                 *++je = 0x0a000000; // beq #(tmp)
1793 |                 break;
1794 |             case BNZ:
1795 |                 *++je = 0x1a000000; // bne #(tmp)
1796 |                 break;
1797 |             }
1798 |             tmp = *pc++;
1799 |             *je = (*je |
1800 |                    reloc_imm(jitmap[(tmp - (int) text) >> 2] - (int) je));
1801 |         }
1802 |         // If the instruction has operand, increment instruction pointer to
1803 |         // skip the operand.
1804 |         else if (i <= ADJ || i == SYSC) { ++pc; }
1805 |     }
1806 |     free(iv);
1807 |     return tje;
1808 | }
1809 | 
1810 | enum {
1811 |     _PROT_EXEC = 4, _PROT_READ = 1, _PROT_WRITE = 2,
1812 |     _MAP_PRIVATE = 2, _MAP_ANON = 32
1813 | };
1814 | 
1815 | int jit(int poolsz, int *main, int argc, char **argv)
1816 | {
1817 |     char *jitmem;  // executable memory for JIT-compiled native code
1818 |     int retval;
1819 |     if (src) return 0; // skip for IR listing
1820 | 
1821 |     // setup JIT memory
1822 |     if (!(jitmem = mmap(0, poolsz, _PROT_EXEC | _PROT_READ | _PROT_WRITE,
1823 |                         _MAP_PRIVATE | _MAP_ANON, -1, 0))) {
1824 |         printf("could not mmap(%d) jit executable memory\n", poolsz);
1825 |         return -1;
1826 |     }
1827 |     int *jitmap = (int *) (jitmem + (poolsz >> 1));
1828 |     int *je = (int *) jitmem;
1829 |     *je++ = (int) &retval;
1830 |     *je++ = argc;
1831 |     *je++ = (int) argv;
1832 |     int *_start = je;
1833 |     *je++ = 0xe92d5ff0;       // push    {r4-r12, lr}
1834 |     *je++ = 0xe51f0014;       // ldr     r0, [pc, #-20] ; argc
1835 |     *je++ = 0xe51f1014;       // ldr     r1, [pc, #-20] ; argv
1836 |     *je++ = 0xe52d0004;       // push    {r0}
1837 |     *je++ = 0xe52d1004;       // push    {r1}
1838 |     int *tje = je++;          // bl      jitmain
1839 |     *je++ = 0xe51f502c;       // ldr     r5, [pc, #-44] ; retval
1840 |     *je++ = 0xe5850000;       // str     r0, [r5]
1841 |     *je++ = 0xe28dd008;       // add     sp, sp, #8
1842 |     *je++ = 0xe8bd9ff0;       // pop     {r4-r12, pc}
1843 |     if (!(je = codegen(je, jitmap))) return 1;
1844 |     if (je >= jitmap) die("jitmem too small");
1845 |     *tje = reloc_bl(jitmap[((int) main - (int) text) >> 2] - (int) tje);
1846 | 
1847 |     // hack to jump into specific function pointer
1848 |     __clear_cache(jitmem, je);
1849 |     int *res = bsearch(&sym, sym, 1, 1, (void *) _start);
1850 |     if (((void *) 0) != res) return 0; return -1; // make compiler happy
1851 | }
1852 | 
1853 | int ELF32_ST_INFO(int b, int t) { return (b << 4) + (t & 0xf); }
1854 | enum {
1855 |     EHDR_SIZE = 52, ET_EXEC = 2, EM_ARM = 40,
1856 |     PHDR_ENT_SIZE = 32, SHDR_ENT_SIZE = 40,
1857 |     SYM_ENT_SIZE = 16, REL_ENT_SIZE = 8, PLT_ENT_SIZE = 12,
1858 |     DYN_ENT_SIZE = 8
1859 | };
1860 | 
1861 | struct Elf32_Shdr {
1862 |     int sh_name;      // [Elf32_Word] Section name (index into string table)
1863 |     int sh_type;      // [Elf32_Word] Section type (SHT_*)
1864 |     int sh_flags;     // [Elf32_Word] Section flags (SHF_*)
1865 |     int sh_addr;      // [Elf32_Addr] Address where section is to be loaded
1866 |     int sh_offset;    // [Elf32_Off] File offset of section data, in bytes
1867 |     int sh_size;      // [Elf32_Word] Size of section, in bytes
1868 |     int sh_link;      // [Elf32_Word] Section type-specific header table
1869 |                       //              index link
1870 |     int sh_info;      // [Elf32_Word] Section type-specific extra information
1871 |     int sh_addralign; // [Elf32_Word] Section address alignment
1872 |     int sh_entsize;   // [Elf32_Word] Size of records contained within section
1873 | };
1874 | 
1875 | enum {
1876 |     // Special section indices
1877 |     SHN_UNDEF     = 0,      // Undefined, missing, irrelevant, or meaningless
1878 | 
1879 |     // Section types
1880 |     SHT_NULL          = 0,  // No associated section (inactive entry)
1881 |     SHT_PROGBITS      = 1,  // Program-defined contents
1882 |     SHT_STRTAB        = 3,  // String table
1883 |     SHT_DYNAMIC       = 6,  // Information for dynamic linking
1884 |     SHT_REL           = 9,  // Relocation entries; no explicit addends
1885 |     SHT_DYNSYM        = 11, // Symbol table
1886 | 
1887 |     // Section flags
1888 |     SHF_WRITE = 0x1,
1889 |     SHF_ALLOC = 0x2,
1890 |     SHF_EXECINSTR = 0x4,
1891 | };
1892 | 
1893 | // Symbol table entries for ELF32
1894 | struct Elf32_Sym {
1895 |     int st_name;  // [Elf32_Word] Symbol name (index into string table)
1896 |     int st_value; // [Elf32_Addr] Value or address associated with the symbol
1897 |     int st_size;  // [Elf32_Word] Size of the symbol
1898 |     char st_info; // [unsigned] Symbol's type and binding attributes
1899 |     char st_other;// [unsigned] Must be zero; reserved
1900 |     char st_shndx, st_shndx_1, st_shndx_2, st_shndx_3; // [Elf32_Half]
1901 |                   // Which section (header table index) it's defined
1902 | };
1903 | 
1904 | enum {
1905 |     // Symbol bindings
1906 |     STB_LOCAL = 0,   /* Local symbol, not visible outside obj file
1907 |                                       containing def */
1908 |     STB_GLOBAL = 1,  /* Global symbol, visible to all object files
1909 |                                      being combined */
1910 | 
1911 |     // Symbol types
1912 |     STT_NOTYPE  = 0,   // Symbol's type is not specified
1913 |     STT_FUNC    = 2,   // Symbol is executable code (function, etc.)
1914 | 
1915 |     // Symbol number
1916 |     STN_UNDEF = 0
1917 | };
1918 | 
1919 | // Program header for ELF32
1920 | struct Elf32_Phdr {
1921 |     int p_type;   // [Elf32_Word] Type of segment
1922 |     int p_offset; // [Elf32_Off] File offset where segment is located, in bytes
1923 |     int p_vaddr;  // [Elf32_Addr] Virtual address of beginning of segment
1924 |     int p_paddr;  // [Elf32_Addr] Physical address of beginning of segment
1925 |                   //              (OS-specific)
1926 |     int p_filesz; // [Elf32_Word] Number of bytes in file image of segment
1927 |                   //              (may be zero)
1928 |     int p_memsz;  // [Elf32_Word] Number of bytes in mem image of segment
1929 |                   //              (may be zero)
1930 |     int p_flags;  // [Elf32_Word] Segment flags
1931 |     int p_align;  // [Elf32_Word] Segment alignment constraint
1932 | };
1933 | 
1934 | // Segment types
1935 | enum {
1936 |     PT_NULL    = 0, // Unused segment
1937 |     PT_LOAD    = 1, // Loadable segment
1938 |     PT_DYNAMIC = 2, // Dynamic linking information
1939 |     PT_INTERP  = 3, // Interpreter pathname
1940 | 
1941 |     // Segment flag bits
1942 |     PF_X        = 1,         // Execute
1943 |     PF_W        = 2,         // Write
1944 |     PF_R        = 4,         // Read
1945 | };
1946 | 
1947 | int phdr_idx, shdr_idx, sym_idx;
1948 | 
1949 | int gen_phdr(char *ptr, int type, int offset, int addr, int size,
1950 |              int flag, int align)
1951 | {
1952 |     struct Elf32_Phdr *phdr = (struct Elf32_Phdr *) ptr;
1953 |     phdr->p_type =  type;
1954 |     phdr->p_offset = offset;
1955 |     phdr->p_vaddr = addr;
1956 |     phdr->p_paddr = addr;
1957 |     phdr->p_filesz = size;
1958 |     phdr->p_memsz = size;
1959 |     phdr->p_flags = flag;
1960 |     phdr->p_align = align;
1961 |     return phdr_idx++;
1962 | }
1963 | 
1964 | int gen_shdr(char *ptr, int type, int name, int offset, int addr,
1965 |              int size, int link, int info,
1966 |              int flag, int align, int entsize)
1967 | {
1968 |     struct Elf32_Shdr *shdr = (struct Elf32_Shdr *) ptr;
1969 |     shdr->sh_name = name;       shdr->sh_type = type;
1970 |     shdr->sh_addr = addr;       shdr->sh_offset = offset;
1971 |     shdr->sh_size = size;       shdr->sh_link = link;
1972 |     shdr->sh_info = info;       shdr->sh_flags = flag;
1973 |     shdr->sh_addralign = align; shdr->sh_entsize = entsize;
1974 |     return shdr_idx++;
1975 | }
1976 | 
1977 | int gen_sym(char *ptr, int name, char info,
1978 |             int shndx, int size, int value)
1979 | {
1980 |     struct Elf32_Sym *s = (struct Elf32_Sym *) ptr;
1981 |     s->st_name = name;
1982 |     s->st_info = info;
1983 |     s->st_other = 0;
1984 |     // s->st_shndx = shndx;
1985 |     memcpy(&(s->st_shndx), (char *) &shndx, 2);
1986 |     s->st_value = value;
1987 |     s->st_size = size;
1988 |     return sym_idx++;
1989 | }
1990 | 
1991 | int append_func_sym(char **sdata, int name)
1992 | {
1993 |     int idx = gen_sym(*sdata, name, ELF32_ST_INFO(STB_GLOBAL, STT_FUNC), 0, 0, 0);
1994 |     *sdata += SYM_ENT_SIZE;
1995 |     return idx;
1996 | }
1997 | 
1998 | // shdr names which start with 'S'
1999 | enum {
2000 |     SNONE = 0, SSTAB, STEXT, SDATA, SDYNS, SDYNM, SDYNC,
2001 |     SINTP, SREL, SPLT, SGOT
2002 | };
2003 | 
2004 | enum {
2005 |     PAGE_SIZE = 0x1000, PHDR_NUM = 4, SHDR_NUM = 11,
2006 |     DYN_NUM = 15
2007 | };
2008 | 
2009 | void elf32_init(int poolsz)
2010 | {
2011 |     int i;
2012 |     freebuf = malloc(poolsz);
2013 |     char *o = (char *) (((int) freebuf + PAGE_SIZE - 1)  & -PAGE_SIZE);
2014 |     /* We must assign the plt_func_addr[x] a non-zero value, and also,
2015 |      * plt_func_addr[i] and plt_func_addr[i-1] has an offset of 16
2016 |      * (4 instruction * 4 bytes), so the first codegen and second codegen
2017 |      * have consistent code_size. Dummy address at this point.
2018 |      */
2019 |     plt_func_addr = malloc(sizeof(char *) * PTR);
2020 |     for (i = 0; i < PTR; ++i)
2021 |         plt_func_addr[i] = o + i * 16;
2022 | 
2023 |     ef_getidx("__libc_start_main"); // slot 0 of external func cache
2024 | }
2025 | 
2026 | int elf32(int poolsz, int *main, int elf_fd)
2027 | {
2028 |     int i;
2029 |     char *freecode;
2030 |     char *code = freecode = malloc(poolsz);
2031 |     char *buf = freebuf;
2032 |     int *jitmap = (int *) (code + (poolsz >> 1));
2033 |     memset(buf, 0, poolsz);
2034 |     char *o = buf = (char *) (((int) buf + PAGE_SIZE - 1)  & -PAGE_SIZE);
2035 |     code = (char *) (((int) code + PAGE_SIZE - 1) & -PAGE_SIZE);
2036 | 
2037 |     phdr_idx = 0;
2038 |     shdr_idx = 0;
2039 |     sym_idx = 0;
2040 | 
2041 |     /* Run __libc_start_main() and pass main trampoline.
2042 |      *
2043 |      * Note: The function prototype of __libc_start_main() is:
2044 |      *
2045 |      *     int __libc_start_main(int (*main)(int, char**, char**),
2046 |      *                           int argc, char **argv,
2047 |      *                           int (*init)(int, char**, char**),
2048 |      *                           void (*fini)(void),
2049 |      *                           void (*rtld_fini)(void),
2050 |      *                           void *stack_end);
2051 |      *
2052 |      * Usually, we should pass __libc_csu_init as init and __libc_csu_fini
2053 |      * as fini; however, we will need a interp to link the non-shared part
2054 |      * of libc.  It sounds too complex.  To keep this compiler simple,
2055 |      * let's simply pass NULL pointer.
2056 |      */
2057 |     int *stub_end = (int *) code;
2058 | 
2059 |     *stub_end++ = 0xe3a0b000;  // mov   fp, #0  @ initialize frame pointer
2060 |     *stub_end++ = 0xe3a0e000;  // mov   lr, #0  @ initialize link register
2061 |     *stub_end++ = 0xe49d1004;  // pop   {r1}    @ get argc
2062 |     *stub_end++ = 0xe1a0200d;  // mov   r2, sp  @ get argv
2063 |     *stub_end++ = 0xe52d2004;  // push  {r2}    @ setup stack end
2064 |     *stub_end++ = 0xe52d0004;  // push  {r0}    @ setup rtld_fini
2065 |     *stub_end++ = 0xe3a0c000;  // mov   ip, #0  @ FIXME: __libc_csu_fini()
2066 |     *stub_end++ = 0xe52dc004;  // push  {ip}    @ setup fini
2067 |     *stub_end++ = 0xe28f0010;  // add   r0, pc, #16  @ load main trampoline
2068 |     *stub_end++ = 0xe3a03000;  // mov   r3, #0  @ FIXME: __libc_csu_init()
2069 |     *stub_end++ = 0xebfffffe;  // bl    __libc_start_main  @ Need relocation
2070 | 
2071 |     // Return 127 if __libc_start_main() returns (which should not.)
2072 |     *stub_end++ = 0xe3a0007f;  // mov   r0, #127
2073 |     *stub_end++ = 0xe3a07001;  // mov   r7, #1
2074 |     *stub_end++ = 0xef000000;  // svc   0x00000000
2075 | 
2076 |     // main() trampoline: convert ARM AAPCS calling convention to ours.
2077 |     *stub_end++ = 0xe92d5ff0;  // push  {r4-r12, lr}
2078 |     *stub_end++ = 0xe52d0004;  // push  {r0}
2079 |     *stub_end++ = 0xe52d1004;  // push  {r1}
2080 |     *stub_end++ = 0xebfffffe;  // bl    0 <main>  @ Need relocation
2081 |     *stub_end++ = 0xe28dd008;  // add   sp, sp, #8
2082 |     *stub_end++ = 0xe8bd9ff0;  // pop   {r4-r12, pc}
2083 | 
2084 |     int start_stub_size = (char *) stub_end - code;
2085 | 
2086 |     // Compile and generate the code.
2087 |     char *je = (char *) codegen((int *) (code + start_stub_size), jitmap);
2088 |     if (!je) return 1;
2089 |     if ((int *) je >= jitmap) die("elf32: jitmem too small");
2090 | 
2091 |     // elf32_hdr
2092 |     *o++ = 0x7f; *o++ = 'E'; *o++ = 'L'; *o++ = 'F';
2093 |     *o++ = 1;    *o++ = 1;   *o++ = 1;   *o++ = 0;
2094 |     o += 8;
2095 |     *o++ = ET_EXEC; *o++ = 0; // e_type
2096 |     *o++ = EM_ARM;  *o++ = 0; // e_machine
2097 |     *(int *) o = 1;          o += 4;
2098 |     char *entry = o;               o += 4; // e_entry
2099 |     *(int *) o = EHDR_SIZE;  o += 4; // e_phoff
2100 |     char *e_shoff = o;             o += 4; // e_shoff
2101 |     *(int *) o = 0x5000400;  o += 4; // e_flags
2102 |     *o++ = EHDR_SIZE; *o++ = 0;
2103 |     *o++ = PHDR_ENT_SIZE; *o++ = 0; *o++ = PHDR_NUM; *o++ = 0; // e_phentsize & e_phnum
2104 |     *o++ = SHDR_ENT_SIZE; *o++ = 0; *o++ = SHDR_NUM; *o++ = 0; // e_shentsize & e_shnum
2105 |     *o++ =  1; *o++ = 0;
2106 | 
2107 |     int phdr_size = PHDR_ENT_SIZE * PHDR_NUM;
2108 |     char *phdr = o; o += phdr_size;
2109 | 
2110 |     // .text
2111 |     int code_off = o - buf;
2112 |     int code_size = je - code;
2113 |     char *code_addr = o;
2114 |     o += code_size;
2115 | 
2116 |     // .rel.plt (embedded in PT_LOAD of text)
2117 |     int rel_size = REL_ENT_SIZE * ef_count;
2118 |     int rel_off = code_off + code_size;
2119 |     char *rel_addr = code_addr + code_size;
2120 |     o += rel_size;
2121 | 
2122 |     // .plt (embedded in PT_LOAD of text)
2123 |     int plt_size = 20 + PLT_ENT_SIZE * ef_count; // 20 is the size of .plt entry code to .got
2124 |     int plt_off = rel_off + rel_size;
2125 |     char *plt_addr = rel_addr + rel_size;
2126 |     o += plt_size;
2127 | 
2128 |     memcpy(code_addr, code,  code_size);
2129 |     *(int *) entry = (int) code_addr;
2130 | 
2131 |     // .data
2132 |     char *_data_end = data;
2133 |     // Use load_bias to align offset and v_addr, the elf loader
2134 |     // needs PAGE_SIZE align to do mmap().
2135 |     int load_bias = PAGE_SIZE + ((int) _data & (PAGE_SIZE - 1))
2136 |                     - ((o - buf) & (PAGE_SIZE - 1));
2137 |     o += load_bias;
2138 |     char *dseg = o;
2139 | 
2140 |     // rwdata (embedded in PT_LOAD of data)
2141 |     // rwdata is all the data (R/O and R/W) in source code,
2142 |     // e.g, the variable with initial value and all the string.
2143 |     int rwdata_off = dseg - buf;
2144 |     int rwdata_size = _data_end - _data;
2145 |     o += rwdata_size;
2146 | 
2147 |     // .dynamic (embedded in PT_LOAD of data)
2148 |     char *pt_dyn = data;
2149 |     int pt_dyn_size = DYN_NUM * DYN_ENT_SIZE;
2150 |     int pt_dyn_off = rwdata_off + rwdata_size; data += pt_dyn_size;
2151 |     o += pt_dyn_size;
2152 | 
2153 |     // .interp (embedded in PT_LOAD of data)
2154 |     char *interp_str = "/lib/ld-linux-armhf.so.3";
2155 |     int interp_str_size = 25; // strlen(interp_str) + 1
2156 |     char *interp = data; memcpy(interp, interp_str, interp_str_size);
2157 |     int interp_off = pt_dyn_off + pt_dyn_size; data += interp_str_size;
2158 |     o += interp_str_size;
2159 | 
2160 |     // .shstrtab (embedded in PT_LOAD of data)
2161 |     char *shstrtab_addr = data;
2162 |     int shstrtab_off = interp_off + interp_str_size;
2163 |     int shstrtab_size = 0;
2164 | 
2165 |     int *shdr_names = malloc(sizeof(int) * SHDR_NUM);
2166 |     if (!shdr_names) die("elf32: could not malloc shdr_names table");
2167 | 
2168 |     shdr_names[SNONE] = append_strtab(&data, "") - shstrtab_addr;
2169 |     shdr_names[SSTAB] = append_strtab(&data, ".shstrtab") - shstrtab_addr;
2170 |     shdr_names[STEXT] = append_strtab(&data, ".text") - shstrtab_addr;
2171 |     shdr_names[SDATA] = append_strtab(&data, ".data") - shstrtab_addr;
2172 |     shdr_names[SDYNS] = append_strtab(&data, ".dynstr") - shstrtab_addr;
2173 |     shdr_names[SDYNM] = append_strtab(&data, ".dynsym") - shstrtab_addr;
2174 |     shdr_names[SDYNC] = append_strtab(&data, ".dynamic") - shstrtab_addr;
2175 |     shdr_names[SINTP] = append_strtab(&data, ".interp") - shstrtab_addr;
2176 |     shdr_names[SREL] = append_strtab(&data, ".rel.plt") - shstrtab_addr;
2177 |     shdr_names[SPLT] = append_strtab(&data, ".plt") - shstrtab_addr;
2178 |     shdr_names[SGOT] = append_strtab(&data, ".got") - shstrtab_addr;
2179 |     shstrtab_size = data - shstrtab_addr;
2180 |     o += shstrtab_size;
2181 | 
2182 |     // .dynstr (embedded in PT_LOAD of data)
2183 |     char *dynstr_addr = data;
2184 |     int dynstr_off = shstrtab_off + shstrtab_size;
2185 |     append_strtab(&data, "");
2186 |     char *libc = append_strtab(&data, "libc.so.6");
2187 |     char *ldso = append_strtab(&data, "libdl.so.2");
2188 |     char *libgcc_s = append_strtab(&data, "libgcc_s.so.1");
2189 | 
2190 |     int *func_entries = malloc(sizeof(int) * ef_count);
2191 |     if (!func_entries) die("elf32: could not malloc func_entries table");
2192 | 
2193 |     for (i = 0; i < ef_count; ++i)
2194 |         func_entries[i] = append_strtab(&data, ef_cache[i]->name) - dynstr_addr;
2195 | 
2196 |     int dynstr_size = data - dynstr_addr;
2197 |     o += dynstr_size;
2198 | 
2199 |     // .dynsym (embedded in PT_LOAD of data)
2200 |     char *dynsym_addr = data;
2201 |     int dynsym_off = dynstr_off + dynstr_size;
2202 |     memset(data, 0, SYM_ENT_SIZE);
2203 |     data += SYM_ENT_SIZE;
2204 | 
2205 |     for (i = 0; i < ef_count; ++i)
2206 |         append_func_sym(&data, func_entries[i]);
2207 | 
2208 |     int dynsym_size = SYM_ENT_SIZE * (ef_count + 1);
2209 |     o += dynsym_size;
2210 | 
2211 |     // .got (embedded in PT_LOAD of data)
2212 |     char *got_addr = data;
2213 |     int got_off = dynsym_off + dynsym_size;
2214 |     *(int *) data = (int) pt_dyn; data += 4;
2215 |     data += 4;  // reserved 2 and 3 entry for interp
2216 |     char *to_got_movw = data;  // The address manipulates dynamic
2217 |     char *to_got_movt = data;  // linking, plt must jump here.
2218 |     data += 4;  // reserved 2 and 3 entry for interp
2219 |     // .got function slot
2220 |     char **got_func_slot = malloc(sizeof(char *) * ef_count);
2221 |     for (i = 0; i < ef_count; i++) {
2222 |         got_func_slot[i] = data;
2223 |         *(int *) data = (int) plt_addr; data += 4;
2224 |     }
2225 |     data += 4;  // end with 0x0
2226 |     int got_size = (int) data - (int) got_addr;
2227 |     o += got_size;
2228 | 
2229 |     int dseg_size = o - dseg;
2230 | 
2231 |     // .plt -- Now we back to handle .plt after .got was initial
2232 |     char *to = plt_addr;
2233 |     *(int *) to = 0xe52de004; to += 4; // push {lr}
2234 |     // movw r10 addr_to_got
2235 |     *(int *) to = 0xe300a000 | (0xfff & (int) (to_got_movw)) |
2236 |                   (0xf0000 & ((int) (to_got_movw) << 4));
2237 |     to += 4;
2238 |     // movt r10 addr_to_got
2239 |     *(int *) to = 0xe340a000 | (0xfff & ((int) (to_got_movt) >> 16)) |
2240 |                   (0xf0000 & ((int) (to_got_movt) >> 12));
2241 |     to += 4;
2242 |     *(int *) to = 0xe1a0e00a; to += 4;  // mov lr,r10
2243 |     *(int *) to = 0xe59ef000; to += 4;  // ldr pc, [lr]
2244 | 
2245 |     // We must preserve ip for code below, dyn link use this as return address
2246 |     for (i = 0; i < ef_count; i++) {
2247 |         plt_func_addr[i] = to;
2248 |         // movt ip addr_to_got
2249 |         *(int *) to = 0xe300c000 | (0xfff & (int) (got_func_slot[i])) |
2250 |                       (0xf0000 & ((int) (got_func_slot[i]) << 4));
2251 |         to += 4;
2252 |         // movw ip addr_to_got
2253 |         *(int *) to = 0xe340c000 |
2254 |                       (0xfff & ((int) (got_func_slot[i]) >> 16)) |
2255 |                       (0xf0000 & ((int) (got_func_slot[i]) >> 12));
2256 |         to += 4;
2257 |         *(int *) to = 0xe59cf000; to += 4;  // ldr pc, [ip]
2258 |     }
2259 | 
2260 |     // .rel.plt
2261 |     to = rel_addr;
2262 |     for (i = 0; i < ef_count; i++) {
2263 |         *(int *) to = (int) got_func_slot[i]; to += 4;
2264 |         *(int *) to = 0x16 | (i + 1) << 8 ; to += 4;
2265 |         // 0x16 R_ARM_JUMP_SLOT | .dymstr index << 8
2266 |     }
2267 | 
2268 |     // Generate program header after we got address, offset and size.
2269 |     to = phdr;
2270 |     // PT_LOAD for .text
2271 |     gen_phdr(to, PT_LOAD, 0, (int) buf,
2272 |             EHDR_SIZE + phdr_size + code_size + rel_size + plt_size,
2273 |             PF_X | PF_R, PAGE_SIZE);
2274 |     to += PHDR_ENT_SIZE;
2275 | 
2276 |     // PT_LOAD for .data
2277 |     gen_phdr(to, PT_LOAD, rwdata_off, (int) _data,
2278 |             dseg_size, PF_W | PF_R, PAGE_SIZE);
2279 |     to += PHDR_ENT_SIZE;
2280 | 
2281 |     // PT_INTERP for .interp
2282 |     gen_phdr(to, PT_INTERP, interp_off, (int) interp,
2283 |             interp_str_size , PF_R, 0x1);
2284 |     to += PHDR_ENT_SIZE;
2285 | 
2286 |     // PT_DYNAMIC for .dynamic
2287 |     gen_phdr(to, PT_DYNAMIC, pt_dyn_off, (int) pt_dyn,
2288 |             pt_dyn_size , PF_R | PF_W, 0x4);
2289 | 
2290 |     // .dynamic (embedded in PT_LOAD of data)
2291 |     to = pt_dyn;
2292 |     *(int *) to =  5; to += 4; *(int *) to = (int) dynstr_addr;  to += 4;
2293 |     *(int *) to = 10; to += 4; *(int *) to = dynstr_size;        to += 4;
2294 |     *(int *) to =  6; to += 4; *(int *) to = (int) dynsym_addr;  to += 4;
2295 |     *(int *) to = 11; to += 4; *(int *) to = 16;                 to += 4;
2296 |     *(int *) to = 17; to += 4; *(int *) to = (int) rel_addr;     to += 4;
2297 |     *(int *) to = 18; to += 4; *(int *) to = rel_size;           to += 4;
2298 |     *(int *) to = 19; to += 4; *(int *) to = 8;                  to += 4;
2299 |     *(int *) to =  3; to += 4; *(int *) to = (int) got_addr;     to += 4;
2300 |     *(int *) to =  2; to += 4; *(int *) to = rel_size;           to += 4;
2301 |     *(int *) to = 20; to += 4; *(int *) to = 17;                 to += 4;
2302 |     *(int *) to = 23; to += 4; *(int *) to = (int) rel_addr;     to += 4;
2303 |     *(int *) to =  1; to += 4; *(int *) to = libc - dynstr_addr; to += 4;
2304 |     *(int *) to =  1; to += 4; *(int *) to = ldso - dynstr_addr; to += 4;
2305 |     *(int *) to =  1; to += 4; *(int *) to = libgcc_s - dynstr_addr; to += 4;
2306 |     *(int *) to =  0;
2307 | 
2308 |     /* Generate code again bacause address of .plt function slots must
2309 |      * be confirmed before codegen() to make sure the code is correct.
2310 |      */
2311 |     je = (char *) codegen((int *) (code + start_stub_size), jitmap);
2312 |     if (!je) {
2313 |         free(func_entries);
2314 |         free(shdr_names);
2315 |         return 1;
2316 |     }
2317 |     if ((int *) je >= jitmap) die("elf32: jitmem too small");
2318 | 
2319 |     // Relocate __libc_start_main() and main().
2320 |     *((int *) (code + 0x28)) = reloc_bl(plt_func_addr[0] - code_addr - 0x28);
2321 |     *((int *) (code + 0x44)) =
2322 |         reloc_bl(jitmap[((int) main - (int) text) >> 2] - (int) code - 0x44);
2323 | 
2324 |     // Copy the generated binary.
2325 |     memcpy(code_addr, code,  je - code);
2326 | 
2327 |     // Generate section header
2328 |     *(int *) e_shoff = (int) (o - buf);
2329 |     gen_shdr(o, SHT_NULL, shdr_names[SNONE], 0, 0, 0,
2330 |              0, 0, 0, 0, 0);
2331 |     o += SHDR_ENT_SIZE;
2332 | 
2333 |     // sh_shstrtab_idx
2334 |     gen_shdr(o, SHT_STRTAB, shdr_names[SSTAB], shstrtab_off, 0,
2335 |              shstrtab_size, 0, 0, 0, 1, 0);
2336 |     o += SHDR_ENT_SIZE;
2337 | 
2338 |     // sh_text_idx
2339 |     gen_shdr(o, SHT_PROGBITS, shdr_names[STEXT], code_off, (int) code_addr,
2340 |             code_size, 0, 0, SHF_ALLOC | SHF_EXECINSTR, 4, 0);
2341 |     o += SHDR_ENT_SIZE;
2342 | 
2343 |     // sh_data_idx
2344 |     gen_shdr(o, SHT_PROGBITS, shdr_names[SDATA], rwdata_off, (int) _data,
2345 |              dseg_size, 0, 0, SHF_ALLOC | SHF_WRITE, 4, 0);
2346 |     o += SHDR_ENT_SIZE;
2347 | 
2348 |     int sh_dynstr_idx =
2349 |     gen_shdr(o, SHT_STRTAB, shdr_names[SDYNS], dynstr_off, (int) dynstr_addr,
2350 |              dynstr_size, 0, 0, SHF_ALLOC, 1, 0);
2351 |     o += SHDR_ENT_SIZE;
2352 | 
2353 |     int sh_dynsym_idx =
2354 |     gen_shdr(o, SHT_DYNSYM, shdr_names[SDYNM], dynsym_off, (int) dynsym_addr,
2355 |              dynsym_size, sh_dynstr_idx, 1, SHF_ALLOC, 4, 0x10);
2356 |     o += SHDR_ENT_SIZE;
2357 | 
2358 |     // sh_dynamic_idx
2359 |     gen_shdr(o, SHT_DYNAMIC, shdr_names[SDYNC], pt_dyn_off, (int) pt_dyn,
2360 |              pt_dyn_size, sh_dynstr_idx, 0, SHF_ALLOC | SHF_WRITE, 4, 0);
2361 |     o += SHDR_ENT_SIZE;
2362 | 
2363 |     // sh_interp_idx
2364 |     gen_shdr(o, SHT_PROGBITS, shdr_names[SINTP], interp_off, (int) interp,
2365 |              interp_str_size, 0, 0, SHF_ALLOC, 1, 0);
2366 |     o += SHDR_ENT_SIZE;
2367 | 
2368 |     // sh_rel_idx
2369 |     gen_shdr(o, SHT_REL, shdr_names[SREL], rel_off, (int) rel_addr,
2370 |              rel_size, sh_dynsym_idx, 11, SHF_ALLOC | 0x40, 4, 8);
2371 |     o += SHDR_ENT_SIZE;
2372 | 
2373 |     // sh_plt_idx
2374 |     gen_shdr(o, SHT_PROGBITS, shdr_names[SPLT], plt_off, (int) plt_addr,
2375 |              plt_size, 0, 0, SHF_ALLOC | SHF_EXECINSTR, 4, 4);
2376 |     o += SHDR_ENT_SIZE;
2377 | 
2378 |     // sh_got_idx
2379 |     gen_shdr(o, SHT_PROGBITS, shdr_names[SGOT], got_off, (int) got_addr,
2380 |              got_size, 0, 0, SHF_ALLOC | SHF_WRITE, 4, 4);
2381 |     o += SHDR_ENT_SIZE;
2382 | 
2383 |     // Copy .data to a part of (o - buf) where _data located.
2384 |     memcpy(dseg, _data, dseg_size);
2385 |     write(elf_fd, buf, o - buf);
2386 | 
2387 |     free(func_entries);
2388 |     free(shdr_names);
2389 |     free(freebuf);
2390 |     free(freecode);
2391 |     free(plt_func_addr);
2392 |     free(got_func_slot);
2393 |     return 0;
2394 | }
2395 | 
2396 | enum { _O_CREAT = 64, _O_WRONLY = 1 };
2397 | 
2398 | #ifdef int
2399 | /* Eliminate clang compilation error:
2400 |  *   first parameter of 'main' (argument count) must be of type 'int'
2401 |  */
2402 | #undef int
2403 | #endif
2404 | int main(int argc, char **argv)
2405 | {
2406 | /* 64-bit host support */
2407 | #if defined(__x86_64__) || defined(__aarch64__)
2408 | #define int long
2409 | #endif
2410 | 
2411 |     int *freed_ast, *ast;
2412 |     int elf_fd;
2413 |     int i;
2414 | 
2415 |     --argc; ++argv;
2416 |     if (argc > 0 && **argv == '-' && (*argv)[1] == 's') {
2417 |         src = 1; --argc; ++argv;
2418 |     }
2419 |     if (argc > 0 && **argv == '-' && !strcmp(*argv, "-fsigned-char")) {
2420 |         signed_char = 1; --argc; ++argv;
2421 |     }
2422 |     if (argc > 0 && **argv == '-' && (*argv)[1] == 'o') {
2423 |         elf = 1; --argc; ++argv;
2424 |         if (argc < 1) die("no output file argument");
2425 |         if ((elf_fd = open(*argv, _O_CREAT | _O_WRONLY, 0775)) < 0) {
2426 |             printf("could not open(%s)\n", *argv); return -1;
2427 |         }
2428 |         --argc; ++argv;
2429 |     }
2430 |     if (argc < 1) die("usage: amacc [-s] [-o object] file");
2431 | 
2432 |     int fd;
2433 |     if ((fd = open(*argv, 0)) < 0) {
2434 |         printf("could not open(%s)\n", *argv); return -1;
2435 |     }
2436 | 
2437 |     int poolsz = 256 * 1024; // arbitrary size
2438 |     if (!(text = le = e = malloc(poolsz)))
2439 |         die("could not allocate text area");
2440 |     if (!(sym = malloc(poolsz)))
2441 |         die("could not allocate symbol area");
2442 |     if (!(freedata = _data = data = malloc(poolsz)))
2443 |         printf("could not allocat data area");
2444 |     if (!(tsize = malloc(PTR * sizeof(int))))
2445 |         die("could not allocate tsize area");
2446 |     if (!(members = malloc(PTR * sizeof(struct member_s *))))
2447 |         die("could not malloc() members area");
2448 |     if (!(freed_ast = ast = malloc(poolsz)))
2449 |         die("could not allocate abstract syntax tree area");
2450 |     if (!(ef_cache = malloc(PTR * sizeof(struct ef_s *))))
2451 |         die("could not malloc() external function cache");
2452 | 
2453 |     memset(sym, 0, poolsz);
2454 |     memset(e, 0, poolsz);
2455 |     memset(data, 0, poolsz);
2456 | 
2457 |     memset(tsize,   0, PTR * sizeof(int));
2458 |     memset(members, 0, PTR * sizeof(struct member_s *));
2459 |     memset(ast, 0, poolsz);
2460 |     ast = (int *) ((int) ast + poolsz); // abstract syntax tree is most efficiently built as a stack
2461 | 
2462 |     /* Register keywords and system calls to symbol stack
2463 |      * must match the sequence of enum
2464 |      */
2465 |     p = "break continue case char default else enum if int return sizeof "
2466 |         "struct union switch for while do goto __clear_cache void main";
2467 | 
2468 |     // call "next" to create symbol table entry.
2469 |     // store the keyword's token type in the symbol table entry's "tk" field.
2470 |     for (i = Break; i <= Goto; i++) {
2471 |         next(); id->tk = i; id->class = Keyword; // add keywords to symbol table
2472 |     }
2473 | 
2474 |     // add __clear_cache to symbol table
2475 |     next(); id->class = ClearCache; id->type = INT; id->val = CLCA;
2476 | 
2477 |     next(); id->tk = Char; id->class = Keyword; // handle void type
2478 |     next();
2479 |     struct ident_s *idmain = id; id->class = Main; // keep track of main
2480 | 
2481 |     if (elf) elf32_init(poolsz); // call before source code parsing
2482 | 
2483 |     if (!(freep = lp = p = malloc(poolsz)))
2484 |         die("could not allocate source area");
2485 |     if ((i = read(fd, p, poolsz - 1)) <= 0)
2486 |         die("unable to read from source file");
2487 |     p[i] = 0;
2488 |     close(fd);
2489 | 
2490 |     // add primitive types
2491 |     tsize[tnew++] = sizeof(char);
2492 |     tsize[tnew++] = sizeof(int);
2493 | 
2494 |     // real C parser begins here
2495 |     // parse the program
2496 |     line = 1;
2497 |     next();
2498 |     n = ast;
2499 |     while (tk) {
2500 |         stmt(Glo);
2501 |         next();
2502 |     }
2503 | 
2504 |     int ret = elf ? elf32(poolsz, (int *) idmain->val, elf_fd) :
2505 |                     jit(poolsz,   (int *) idmain->val, argc, argv);
2506 |     free(freep);
2507 |     free(freed_ast);
2508 |     free(tsize);
2509 |     free(freedata);
2510 |     free(sym);
2511 |     free(text);
2512 | 
2513 |     return ret;
2514 | }
2515 | 


--------------------------------------------------------------------------------
/docs/IR.md:
--------------------------------------------------------------------------------
  1 | # Intermediate Representation (IR) for AMaCC Compilation
  2 | 
  3 | ## What is an IR
  4 | An Intermediate representation (IR) is the specific data structure or code
  5 | used internally by a compiler or virtual machine to represent a "program"
  6 | between source code and target languages. Before generating binary, the
  7 | compiler front-end will generate IR to aid the compiler backend to produce
  8 | the intermediate form which is independent of the source file.
  9 | 
 10 | 
 11 | ## Why is an IR used
 12 | * Because translation appears to inherently require analysis and synthesis.
 13 | * Break the difficult problem of translation into two simpler,more manageable pieces.
 14 | * To build retargetable compilers:
 15 |   - Build new back ends for an existing front end(make source language more portable and
 16 |     across machine).
 17 |   - Can build a new front-end for an existing back end.
 18 |   - We only have to write 2n half-compilers instead of n(n-1) full compilers.
 19 |   - To perform machine independent optimizations.
 20 | 
 21 | So how does the IR actually work? Let's have an example:
 22 | ```c
 23 | int a;
 24 | a = 10 + 1 + 11;
 25 | ```
 26 | 
 27 | Inside AMaCC, the above C source will be converted into following IR:
 28 | ```
 29 | IMM  10
 30 | PSH 
 31 | IMM  1
 32 | ADD 
 33 | PSH 
 34 | IMM  11
 35 | ADD 
 36 | ```
 37 | 
 38 | These instructions will be stored inside the stack. According to the stack
 39 | LIFO (Last in First Out) order, they will be executed sequentially from
 40 | top to bottom illustrated as following:
 41 | 
 42 | ```
 43 | | IMM  10 | pop "IMM 10"
 44 | | PSH     |-------------> | PSH   | pop "PSH"
 45 | | IMM  1  |               | IMM 1 |----------> | IMM 1 | pop "IMM 1"
 46 | | ADD     |               | ADD   |            | ADD   |----------->
 47 | | PSH     |               | PSH   |            | PSH   |
 48 | | IMM  11 |               | IMM 11|            | IMM 11|
 49 | | ADD     |               | ADD   |            | ADD   |
 50 | 
 51 | * stack    *
 52 | |         |               |       |            |   10  |
 53 | * register *
 54 | |         |               |   10  |            |       |
 55 | ```
 56 | 
 57 | ```
 58 | |  ADD    |  pop "ADD"
 59 | |  PSH    |-------------> | PSH   | pop "PSH"
 60 | |  IMM 11 |               | IMM 11|----------> | IMM 11|
 61 | |  ADD    |               | ADD   |            | ADD   |
 62 | 
 63 | * stack    *
 64 | |   10    |               |       |            |   11  |
 65 | * register *
 66 | |   1     |               |   11  |            |       |
 67 | ```
 68 | 
 69 | ```
 70 | |  IMM 11 | pop "IMM 11"             pop "ADD"
 71 | |  ADD    | ----------->  | ADD   |  --------> |       |
 72 | 
 73 | * stack    *
 74 | |   11    |               |   11  |            |       |
 75 | * register *
 76 | |         |               |   11  |            |   22  | -> the result we get
 77 | ```
 78 | 
 79 | ## Instructsion sets
 80 | 
 81 | |   opcode  |       format      |       ARM instructions        |                       comments                                   |
 82 | |-----------|-------------------|-------------------------------|------------------------------------------------------------------|
 83 | |LEA        | LEA \<offset\>    |add r0, r11, #\<offset>        |fetch arguments inside sub function                               |
 84 | |IMM        | IMM \<num\>       |mov r0, #20                    |put immediate \<num\> into general register                       |
 85 | |JMP        | JMP \<addr\>      |b \<addr\>                     |set PC register to \<addr\>                                       |
 86 | |JSR        | JSR \<addr\>      |bl \<addr\>                    |stores current execution position and jump to \<addr\>            |
 87 | |LEV        | LEV               |add sp, r11, #0; pop {r11, pc} |fetch bookkeeping info to resume previous execution             |
 88 | |ENT        | ENT \<size\>      |push {r11, lr} ;add r11, sp, #0|called when we are about to enter the function call to "make a new calling frame".It will store the current PC value onto the stack, and save \<size\> bytes to store the local variable for function.|
 89 | |ADJ        | ADJ \<size\>      |add sp, sp, #\<size\>          |adjust the stack(to remove argument from frame)                   |
 90 | |LI         | LI                |ldr r0, [r0]                   |loads an integer into general register from a given memory address which is stored in general register before execution|
 91 | |SI         | SI                |pop {r1};str r0, [r1]          |stores the integer in general register into the memory whose  address is stored on the top of the stack|
 92 | |LC         | LC                |ldrb r0, [r0]                  |loads an character into general register from a given memory address which is stored in general register before execution|
 93 | |SC         | SC                |pop {r1}; strb r0, [r1]        |stores the character in general register into the memory whose address is stored on the top of the stack| 
 94 | |PSH        | PSH               |push {r0}                      |pushes the value in general register onto the stack               |
 95 | 
 96 | ## Function call example
 97 | 
 98 | ```c
 99 | int func(int a) {
100 |     return a * 10;
101 | }
102 | 
103 | int main() {
104 |     func(20);
105 |     return 0;
106 | }
107 | ```
108 | 
109 | while compiled with AMaCC, passing argument `-s` can generate IR along with
110 | corresponding source..
111 | ```c
112 | 1: int func(int a) {
113 | 2:     return a * 10;
114 | 3: }
115 |     ENT  0          ; save func addres on stack
116 |     LEA  2          ; fetch a's address on stack and save into general register
117 |     LI              ; Load integer from memory which address is inside general register
118 |     PSH             ; push interger to top of stack which is inside general register
119 |     IMM  10         ; move 10 into general register
120 |     MUL             ; pop 'a' on the top of stack,and multiply 10 which is inside general register,store result into general register
121 |     LEV             ; return to main
122 | 4:
123 | 5: int main()
124 | 6: {
125 | 7:     func(20);
126 | 8:     return 0;
127 | 9: }
128 |     ENT  0          ; save main address on stack
129 |     IMM  20         ; move 20 into general register
130 |     PSH             ; push r0 on top of stack
131 |     JSR  -11120300  ; save sp on stack,save current execute position to lr, jump to func
132 |     ADJ  1          ; remove 20 from stack
133 |     IMM  0          ; move 0 into general register
134 |     LEV             ; return to entry
135 | ```
136 | 
137 | ### Arithmetic instructions
138 | 
139 | Each operator has two arguments:
140 | * the first is stored on the top of the stack;
141 | * the second is stored in general register;
142 | 
143 | After the calculation is done, the argument on the stack will be poped out,
144 | and the result will be stored in general register. So, you are not able to
145 | fetch the first argument from the stack after the calculation.
146 | 
147 | You can see the above example to know how arithmetic instructions work.
148 | 
149 | ### Conditional jump instructions
150 | 
151 | The `BZ` and `BNZ` instructions must be used with arithmetic instructions,
152 | such as `EQ`,`NE`,`LT`,`GT`,`LE` and `GE`.
153 | 
154 | Example:
155 | 
156 | ```c
157 | 7:     if (n > 0) {
158 |     LEA  2          ; fetch n's address
159 |     LI              ; load n's value into r0 register
160 |     PSH             ; push n on to stack
161 |     IMM  0          ; move 0 into r0 register
162 |     GT              ; compare r0 and r1(pop r1 first on top of stack)
163 |     BZ   0          ; jump when r1 > r0
164 | ```
165 | The arithmetic instructions for comparisons will be translated to ARM instructions. Example:
166 | ```
167 | # GT
168 | pop  {r1}
169 | cmp  r1, r0
170 | movgt r0, 1
171 | movle r0, 0
172 | ```
173 | 
174 | Branch-on-zero instruction is about to be translated as following:
175 | ```
176 | # BZ
177 | cmp  r0, 0
178 | beq  0xff4a31d4
179 | ```
180 | 
181 | |   opcode     | format      |      ARM instructions        | comments |
182 | | ------------ | ----------- | ---------------------------- | -------- |
183 | | BZ           | BZ <value>  |cmp  r0, 0;beq  \<address\>   |branch on zero
184 | | BNZ          | BNZ <value> |cmp  r0, 0;bne  \<address\>   |branch on not zero
185 | 


--------------------------------------------------------------------------------
/mk/arm.mk:
--------------------------------------------------------------------------------
 1 | CROSS_COMPILE ?= arm-none-linux-gnueabihf-
 2 | 
 3 | ARM_CC = $(CROSS_COMPILE)gcc
 4 | ARM_CC := $(shell which $(ARM_CC))
 5 | ifndef ARM_CC
 6 |   # Try Debian/Ubuntu package
 7 |   CROSS_COMPILE = arm-linux-gnueabihf-
 8 |   ARM_CC = $(CROSS_COMPILE)gcc
 9 |   ARM_CC := $(shell which $(ARM_CC))
10 |   ifndef ARM_CC
11 |   $(error "no $(CROSS_COMPILE)gcc found.")
12 |   endif
13 | endif
14 | export CROSS_COMPILE
15 | 
16 | ARM_CC2 = $(shell echo | $(CROSS_COMPILE)cpp -dM - | grep ARM && echo 1)
17 | ifeq ("$(ARM_CC2)","")
18 | $(error "no valid GNU toolchain for ARM found.")
19 | endif
20 | 
21 | ARM_QEMU = qemu-arm
22 | ARM_QEMU := $(shell which $(ARM_QEMU))
23 | ifndef ARM_QEMU
24 | $(error "no qemu-arm found. Please check package installation")
25 | endif
26 | 
27 | # FIXME: check ld-linux.so as well
28 | ARM_LD_LINUX_PATH := $(shell cd $(shell $(ARM_CC) --print-sysroot) 2>/dev/null && pwd)
29 | ifeq ("$(ARM_LD_LINUX_PATH)","/") # packaged GNU toolchain
30 |   ARM_LD_LINUX_PATH := $(shell dirname "$(shell which $(ARM_CC))")/..
31 |   ARM_LD_LINUX_PATH := $(shell cd $(ARM_LD_LINUX_PATH) 2>/dev/null && pwd)
32 |   ARM_LD_LINUX_PATH := $(ARM_LD_LINUX_PATH)/$(shell echo $(CROSS_COMPILE) | sed s'/.$$//')/libc
33 |   ARM_LD_LINUX_PATH := $(shell cd $(ARM_LD_LINUX_PATH) 2>/dev/null && pwd)
34 |   ifndef ARM_LD_LINUX_PATH
35 |     ARM_LD_LINUX_PATH = /usr/$(shell echo $(CROSS_COMPILE) | sed s'/.$$//')
36 |     ARM_LD_LINUX_PATH := $(shell cd $(ARM_LD_LINUX_PATH) 2>/dev/null && pwd)
37 |   endif
38 | endif
39 | ifndef ARM_LD_LINUX_PATH
40 | $(error "AMaCC requires ld-linux.so")
41 | endif
42 | 
43 | ARM_EXEC = $(ARM_QEMU) -L $(ARM_LD_LINUX_PATH)
44 | export ARM_EXEC
45 | 


--------------------------------------------------------------------------------
/mk/common.mk:
--------------------------------------------------------------------------------
 1 | UNAME_S := $(shell uname -s)
 2 | ifeq ($(UNAME_S),Darwin)
 3 |     PRINTF = printf
 4 | else
 5 |     PRINTF = env printf
 6 | endif
 7 | 
 8 | # Control the build verbosity
 9 | ifeq ("$(VERBOSE)","1")
10 |     Q :=
11 |     VECHO = @true
12 |     REDIR =
13 | else
14 |     Q := @
15 |     VECHO = @$(PRINTF)
16 |     REDIR = >/dev/null
17 | endif
18 | 
19 | # Test suite
20 | PASS_COLOR = \e[32;01m
21 | NO_COLOR = \e[0m
22 | 
23 | pass = $(PRINTF) "$(PASS_COLOR)$1 Passed$(NO_COLOR)\n"
24 | 


--------------------------------------------------------------------------------
/mk/python.mk:
--------------------------------------------------------------------------------
1 | PYTHON = python3
2 | PYTHON := $(shell which $(PYTHON))
3 | ifndef PYTHON
4 | $(error "python3 is required.")
5 | endif
6 | 


--------------------------------------------------------------------------------
/scripts/disasm:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | if [ "$#" != "1" ]; then
 3 |     echo "Usage: disasm <amacc ELF executable file>"
 4 |     exit
 5 | fi
 6 | DD_BYTES=`readelf -a $1 2>/dev/null | awk '/\.text.*PROGBITS/ { s = sprintf("skip=%d", "0x" $6) ; c = sprintf("count=%d", "0x" $7) ; print s, c }'`
 7 | dd bs=1 if=$1 of=$1.asmtmp $DD_BYTES
 8 | objdump -b binary -m arm -D $1.asmtmp
 9 | rm $1.asmtmp
10 | 


--------------------------------------------------------------------------------
/scripts/runtest.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | 
 4 | import unittest
 5 | import subprocess as sp
 6 | import os
 7 | import sys
 8 | 
 9 | amacc = './amacc'
10 | gcc = os.getenv("CROSS_COMPILE", "arm-none-linux-gnueabihf-") + "gcc"
11 | amaccdir = 'elf'
12 | gccdir = 'out-gcc'
13 | 
14 | 
15 | def mkdir_p(path):
16 |     try:
17 |         os.makedirs(path)
18 |     except OSError as exc:  # Python >2.5
19 |         if exc.errno == errno.EEXIST and os.path.isdir(path):
20 |             pass
21 |         else:
22 |             raise
23 | 
24 | 
25 | class TestCC_UC(unittest.TestCase):
26 |     """ Test cases without -fsigned-char (default) """
27 |     pass
28 | 
29 | 
30 | class TestCC_SC(unittest.TestCase):
31 |     """ Test cases with -fsigned-char """
32 |     pass
33 | 
34 | 
35 | def _generate_test(test_name, test_file, extra_cflags):
36 |     def test(self):
37 |         args = ['3']
38 | 
39 |         test_file_name = os.path.splitext(os.path.basename(test_file))[0]
40 | 
41 |         # compile test program with gcc and run the output executable
42 |         prog_exe = os.path.join(gccdir, test_file_name)
43 |         # parameter '-w' inhibits all warning messages of gcc
44 |         gcc_params = [gcc, '-w', '-o', prog_exe, test_file] + extra_cflags
45 |         sp.run(gcc_params)
46 | 
47 |         proc = sp.run(qemuCmd + [prog_exe] + args, timeout=10, stdout=sp.PIPE)
48 |         gcc_out, gcc_err, gcc_retcode = proc.stdout, proc.stderr, proc.returncode
49 | 
50 |         # run amacc in jit mode
51 |         amacc_params = [amacc] + extra_cflags + [test_file] + args
52 |         proc = sp.run(qemuCmd + amacc_params, timeout=10, stdout=sp.PIPE)
53 |         amacc_out, amacc_err, amacc_retcode = proc.stdout, proc.stderr, proc.returncode
54 |         self.assertEqual(amacc_out.decode('utf-8'), gcc_out.decode('utf-8'))
55 |         self.assertEqual(amacc_retcode, gcc_retcode)
56 | 
57 |         # run amacc in compiler mode
58 |         prog_exe = os.path.join(amaccdir, test_file_name)
59 |         amacc_params = [amacc] + extra_cflags + ['-o', prog_exe, test_file]
60 |         sp.run(qemuCmd + amacc_params)
61 | 
62 |         proc = sp.run(qemuCmd + [prog_exe] + args, timeout=10, stdout=sp.PIPE)
63 |         amacc_out, amacc_err, amacc_retcode = proc.stdout, proc.stderr, proc.returncode
64 |         self.assertEqual(amacc_out.decode('utf-8'), gcc_out.decode('utf-8'))
65 |         self.assertEqual(amacc_retcode, gcc_retcode)
66 | 
67 |     return test
68 | 
69 | 
70 | def _define_tests():
71 |     if not os.access(amaccdir, os.F_OK):
72 |         mkdir_p(amaccdir)
73 |     if not os.access(gccdir, os.F_OK):
74 |         mkdir_p(gccdir)
75 |     for dirpath, _, filenames in os.walk('tests'):
76 |         for f in filenames:
77 |             if f.endswith('.c'):
78 |                 test_file = os.path.abspath(os.path.join(dirpath, f))
79 |                 test_name = 'test_%s' % (os.path.splitext(f)[0])
80 | 
81 |                 # test without -fsigned-char (default ABI)
82 |                 test_func = _generate_test(test_name, test_file, [])
83 |                 setattr(TestCC_UC, test_name, test_func)
84 | 
85 |                 # test with -fsigned-char
86 |                 test_func = _generate_test(test_name, test_file, ['-fsigned-char'])
87 |                 setattr(TestCC_SC, test_name, test_func)
88 | 
89 | _define_tests()
90 | 
91 | 
92 | if __name__ == '__main__':
93 |     try:
94 |         qemuCmd = os.getenv('ARM_EXEC').split()
95 |     except AttributeError:
96 |         qemuCmd = 'qemu-arm -L /usr/arm-linux-gnueabihf'.split()
97 |     unittest.main()
98 | 


--------------------------------------------------------------------------------
/tests/.clang-format:
--------------------------------------------------------------------------------
 1 | BasedOnStyle: Chromium
 2 | Language: Cpp
 3 | MaxEmptyLinesToKeep: 3
 4 | IndentCaseLabels: false
 5 | AllowShortIfStatementsOnASingleLine: false
 6 | AllowShortCaseLabelsOnASingleLine: false
 7 | AllowShortLoopsOnASingleLine: false
 8 | DerivePointerAlignment: false
 9 | PointerAlignment: Right
10 | SpaceAfterCStyleCast: true
11 | TabWidth: 4
12 | UseTab: Never
13 | IndentWidth: 4
14 | BreakBeforeBraces: Linux
15 | AccessModifierOffset: -4
16 | 


--------------------------------------------------------------------------------
/tests/arginc.c:
--------------------------------------------------------------------------------
1 | int main(int argc, char **argv)
2 | {
3 |     return argc - 2;
4 | }
5 | 


--------------------------------------------------------------------------------
/tests/arginc.list:
--------------------------------------------------------------------------------
 1 | 1: int main(int argc, char **argv)
 2 | 2: {
 3 | 3:     return argc - 2;
 4 | 4: }
 5 | 0000:     ENT  0
 6 | 0002:     LEA  3
 7 | 0004:     LI  
 8 | 0005:     PSH 
 9 | 0006:     IMM  2
10 | 0008:     SUB 
11 | 0009:     LEV 
12 | 


--------------------------------------------------------------------------------
/tests/assign.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | int assert_eq(int a, int b)
 5 | {
 6 |     if (a != b) {
 7 |         printf("Assertion: %d != %d\n", a, b);
 8 |         exit(1);
 9 |     }
10 |     return 0;
11 | }
12 | 
13 | int main(int argc, char **argv)
14 | {
15 |     /* Value test */
16 |     int a, b, c;
17 |     int d = 5, e = 10;
18 | 
19 |     assert_eq(d, 5);
20 |     assert_eq(e, 10);
21 | 
22 |     a = 1;
23 |     a += 101;
24 |     assert_eq(a, 102);
25 | 
26 |     a = 10;
27 |     a -= 101;
28 |     assert_eq(a, -91);
29 | 
30 |     a = 10;
31 |     a *= 101;
32 |     assert_eq(a, 1010);
33 | 
34 |     /* precedence test */
35 |     a = 1;
36 |     a += 3 * 4;
37 |     assert_eq(a, 13);
38 | 
39 |     a = 1;
40 |     a -= 3 * 4;
41 |     assert_eq(a, -11);
42 | 
43 |     a = 2;
44 |     a *= 3 * 4;
45 |     assert_eq(a, 24);
46 | 
47 |     a = 10;
48 |     a /= 5;
49 |     assert_eq(a, 2);
50 | 
51 |     a = 4;
52 |     a %= 3;
53 |     assert_eq(a, 1);
54 | 
55 |     a = 1;
56 |     a <<= 2;
57 |     assert_eq(a, 4);
58 | 
59 |     a = 4;
60 |     a >>= 2;
61 |     assert_eq(a, 1);
62 | 
63 |     a = 17;
64 |     a |= 14;
65 |     assert_eq(a, 31);
66 | 
67 |     /* precedence test */
68 |     a = 0xff;
69 |     b = 1;
70 |     a ^= b | 2;
71 |     assert_eq(a, 0xfc);
72 | 
73 |     a = 17;
74 |     a &= 7;
75 |     assert_eq(a, 1);
76 | 
77 |     /* comma operator tests */
78 |     a = 0;
79 |     b = 10;
80 |     a++, b++;
81 |     assert_eq(a, 1);
82 |     assert_eq(b, 11);
83 | 
84 |     c = (++a, ++b);
85 |     assert_eq(c, 12);
86 | 
87 |     return 0;
88 | }
89 | 


--------------------------------------------------------------------------------
/tests/char.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | int mcmp(char *a, char *b, int n)
 5 | {
 6 |     int ret;
 7 |     while (n--) {
 8 |         ret = *a++ - *b++;
 9 |         if (ret)
10 |             return ret;
11 |     }
12 |     return 0;
13 | }
14 | 
15 | void mcpy(char *a, char *b, int n)
16 | {
17 |     char *dst;
18 |     dst = a;
19 |     while (n--)
20 |         *a++ = *b++;
21 | }
22 | 
23 | int main()
24 | {
25 |     char *p;
26 |     int v;
27 |     p = malloc(128);
28 |     mcpy(p, "hello world", 12);
29 |     printf("%s\n", p);
30 |     printf("memcmp = %d\n", mcmp(p, "hello world", 12));
31 |     printf("memcmp = %d\n", mcmp(p, "hello xorld", 12));
32 |     printf("memcmp = %d\n", mcmp(p, "hello yorld", 12));
33 |     p[0] = -1;
34 |     v = p[0];
35 |     printf("%x %d %d %x\n", p[0], p[0], v, p[1]);
36 |     printf("\0"); /* shall be nothing generated */
37 | 
38 |     return 0;
39 | }
40 | 


--------------------------------------------------------------------------------
/tests/comments.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main()
 4 | {
 5 |     // single-line comment
 6 | 
 7 |     /* C-style multiline comments
 8 |      */
 9 |     return 0;
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/cond.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | int main(int argc, char **argv)
 5 | {
 6 |     if (argc == 1) {
 7 |         printf("more arguments are required\n");
 8 |         exit(1);
 9 |     }
10 |     printf("argc = %d\n", argc);
11 | 
12 |     return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/duff.c:
--------------------------------------------------------------------------------
 1 | void copy(char *to, char *from, int count)
 2 | {
 3 |     int n = (count + 7) >> 3;
 4 |     switch (count & 7) {
 5 |     case 0:
 6 |         do {
 7 |             *to++ = *from++;
 8 |         case 7:
 9 |             *to++ = *from++;
10 |         case 6:
11 |             *to++ = *from++;
12 |         case 5:
13 |             *to++ = *from++;
14 |         case 4:
15 |             *to++ = *from++;
16 |         case 3:
17 |             *to++ = *from++;
18 |         case 2:
19 |             *to++ = *from++;
20 |         case 1:
21 |             *to++ = *from++;
22 |         } while (--n > 0);
23 |     }
24 | }
25 | 
26 | void fastcopy(char *to, char *from, int count)
27 | {
28 |     int n = (count + 7) >> 3;
29 | 
30 |     switch (count & 7) {
31 |     case 7:
32 |         goto r7;
33 |     case 6:
34 |         goto r6;
35 |     case 5:
36 |         goto r5;
37 |     case 4:
38 |         goto r4;
39 |     case 3:
40 |         goto r3;
41 |     case 2:
42 |         goto r2;
43 |     case 1:
44 |         goto r1;
45 |     }
46 | 
47 |     do {
48 |         *to++ = *from++;
49 |     r7:
50 |         *to++ = *from++;
51 |     r6:
52 |         *to++ = *from++;
53 |     r5:
54 |         *to++ = *from++;
55 |     r4:
56 |         *to++ = *from++;
57 |     r3:
58 |         *to++ = *from++;
59 |     r2:
60 |         *to++ = *from++;
61 |     r1:
62 |         *to++ = *from++;
63 |     } while (--n > 0);
64 | }
65 | 
66 | int main()
67 | {
68 |     char *message = "This is a test of duff's device\n";
69 |     char *output = malloc(64);
70 |     char *output2 = malloc(64);
71 | 
72 |     copy(output, message, 33);
73 |     printf(output);
74 | 
75 |     fastcopy(output2, message, 33);
76 |     printf(output2);
77 | 
78 |     free(output2);
79 |     free(output);
80 | 
81 |     return 0;
82 | }
83 | 


--------------------------------------------------------------------------------
/tests/enum.c:
--------------------------------------------------------------------------------
 1 | enum color { RED, GREEN, YELLO };
 2 | enum { BLACK = 10, BLUE };
 3 | 
 4 | int main(void)
 5 | {
 6 |     int a = GREEN;
 7 |     printf("blue: %d\n", BLUE);
 8 |     printf("a:%d\n", a);
 9 |     printf("a + 1:%d", a + 1);
10 |     return 0;
11 | }
12 | 


--------------------------------------------------------------------------------
/tests/eq.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main()
 4 | {
 5 |     printf("16000 == 16000 : %d\n", 16000 == 16000);
 6 |     printf("16000 == 17000 : %d\n", 16000 == 17000);
 7 |     printf("2 == 2         : %d\n", 2 == 2);
 8 |     printf("0 == 0         : %d\n", 0 == 0);
 9 |     printf("-1 == -1       : %d\n", -1 == -1);
10 |     printf("\n");
11 | 
12 |     printf("16000 != 16000 : %d\n", 16000 != 16000);
13 |     printf("16000 != 17000 : %d\n", 16000 != 17000);
14 |     printf("2 != 2         : %d\n", 2 != 2);
15 |     printf("0 != 0         : %d\n", 0 != 0);
16 |     printf("-1 != -1       : %d\n", -1 != -1);
17 |     printf("\n");
18 | 
19 |     printf("17000 > 16000  : %d\n", 17000 > 16000);
20 |     printf("17000 > 17000  : %d\n", 17000 > 17000);
21 |     printf("16000 > 17000  : %d\n", 16000 > 17000);
22 |     printf("16000 > -17000 : %d\n", 16000 > -17000);
23 |     printf("-16000 > -17000 : %d\n", -16000 > -17000);
24 |     printf("\n");
25 | 
26 |     printf("17000 >= 16000  : %d\n", 17000 >= 16000);
27 |     printf("17000 >= 17000  : %d\n", 17000 >= 17000);
28 |     printf("16000 >= 17000  : %d\n", 16000 >= 17000);
29 |     printf("16000 >= -17000 : %d\n", 16000 >= -17000);
30 |     printf("-16000 >= -17000 : %d\n", -16000 >= -17000);
31 |     printf("\n");
32 | 
33 |     printf("16000 < 17000  : %d\n", 16000 < 17000);
34 |     printf("16000 < 16000  : %d\n", 16000 < 16000);
35 |     printf("17000 < 16000  : %d\n", 17000 < 16000);
36 |     printf("17000 < -16000 : %d\n", 17000 < -16000);
37 |     printf("-17000 < -16000 : %d\n", -17000 < -16000);
38 |     printf("\n");
39 | 
40 |     printf("16000 <= 17000  : %d\n", 16000 <= 17000);
41 |     printf("16000 <= 16000  : %d\n", 16000 <= 16000);
42 |     printf("17000 <= 16000  : %d\n", 17000 <= 16000);
43 |     printf("17000 <= -16000 : %d\n", 17000 <= -16000);
44 |     printf("-17000 <= -16000 : %d\n", -17000 <= -16000);
45 |     printf("\n");
46 | 
47 |     return 0;
48 | }
49 | 


--------------------------------------------------------------------------------
/tests/fib.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | int my_atoi(char *s)
 5 | {
 6 |     int res;
 7 |     res = 0;
 8 |     while (*s) {
 9 |         if (*s < '0' || '9' < *s)
10 |             return 0;
11 |         res = res * 10 + (*s - '0');
12 |         ++s;
13 |     }
14 |     return res;
15 | }
16 | 
17 | int fib(int n)
18 | {
19 |     if (n < 2)
20 |         return 1;
21 |     return fib(n - 1) + fib(n - 2);
22 | }
23 | 
24 | int main(int argc, char **argv)
25 | {
26 |     int n;
27 |     if (argc < 2) {
28 |         printf("Usage: %s <number>\n", argv[0]);
29 |         exit(1);
30 |     }
31 | 
32 |     n = my_atoi(argv[1]);
33 |     printf("%d\n", fib(n));
34 | 
35 |     return 0;
36 | }
37 | 


--------------------------------------------------------------------------------
/tests/for.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main(int argc, char **argv)
 4 | {
 5 |     int i, j;
 6 | 
 7 |     j = 10;
 8 | 
 9 |     for (i = 0, printf("let's loop\n"); i < j; i++, printf("loops again\n"))
10 |         printf("loop %d\n", i);
11 | 
12 |     printf("nested loop\n");
13 |     for (i = 1; i < 10; i++) {
14 |         for (j = 1; j < 10; j++) {
15 |             printf("%d * %d = %d\t", i, j, i * j);
16 |         }
17 |         printf("\n");
18 |     }
19 | 
20 |     printf("\n");
21 |     for (i = 1; i <= 5; ++i) {
22 |         for (j = 1; j <= 5; ++j) {
23 |             if (j > i)
24 |                 break;
25 |             printf("* ");
26 |         }
27 |         printf("\n");
28 |     }
29 |     printf("\n");
30 | 
31 |     printf("\n");
32 |     for (i = 1; i <= 30; ++i) {
33 |         if (i > 10 && i < 20)
34 |             continue;
35 |         printf("%d ", i);
36 |     }
37 |     printf("\n");
38 | 
39 |     return 0;
40 | }
41 | 


--------------------------------------------------------------------------------
/tests/func_call.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int test(int a, int b)
 4 | {
 5 |     return a + b;
 6 | }
 7 | 
 8 | int main()
 9 | {
10 |     int result;
11 |     result = test(1, 2);
12 |     printf("result is %d\n", result);
13 |     return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/tests/func_param.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main()
 4 | {
 5 |     int i;
 6 |     i = 10;
 7 |     printf("hello, world %d \n", i);
 8 |     return 0;
 9 | }
10 | 


--------------------------------------------------------------------------------
/tests/goto.c:
--------------------------------------------------------------------------------
 1 | // Finite state machine
 2 | 
 3 | // $*      -> AB*
 4 | // [^$]*   -> AC*
 5 | // [^$]$$  -> ACD
 6 | // $[^$]$$ -> ABCD
 7 | 
 8 | int main()
 9 | {
10 |     char *data = "$$$$7o$n*r*0rj$o*$c0*d**dj0$$gbwj0";
11 | 
12 | A:
13 |     printf("A");
14 |     if (*data++ != '$')
15 |         goto C;
16 | 
17 | B:
18 |     printf("B");
19 |     if (*data++ == '$')
20 |         goto B;
21 | 
22 | C:
23 |     printf("C");
24 |     if (*data++ != '$')
25 |         goto C;
26 | 
27 | D:
28 |     printf("D");
29 |     if (*data++ != '$')
30 |         goto C;
31 | 
32 |     printf("\n");
33 | 
34 |     return 0;
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/hello.c:
--------------------------------------------------------------------------------
1 | #include <stdio.h>
2 | 
3 | int main()
4 | {
5 |     printf("hello, world\n");
6 |     return 0;
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/inc.c:
--------------------------------------------------------------------------------
 1 | int inc(int x)
 2 | {
 3 |     return x + 1;
 4 | }
 5 | 
 6 | int add2(int x)
 7 | {
 8 |     return inc(inc(x));
 9 | }
10 | 
11 | int main(int argc, char **argv)
12 | {
13 |     return add2(argc) - 4;
14 | }
15 | 


--------------------------------------------------------------------------------
/tests/jit.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <sys/mman.h>
 4 | 
 5 | int main(int ac, char **av)
 6 | {
 7 |     char *jitmem;
 8 |     int *je, var;
 9 | 
10 |     jitmem = mmap(0, 256, 7, 0x22, -1, 0);
11 |     je = (int *) jitmem;
12 |     *je++ = 0xe59f000c;  // ldr r0, [pc, #12]
13 |     *je++ = 0xe5901000;  // ldr r1, [r0]
14 |     *je++ = 0xe2811009;  // add r1, r1, #9
15 |     *je++ = 0xe5801000;  // str r1, [r0]
16 |     *je++ = 0xe1a0f00e;  // mov pc, lr
17 |     *je = (int) &var;
18 |     __clear_cache(jitmem, je);
19 | 
20 |     var = ac;
21 |     bsearch(&av, av, 1, 1, (void *) jitmem);
22 |     printf("ac = %d, var = %d\n", ac, var);
23 |     return 0;
24 | }
25 | 


--------------------------------------------------------------------------------
/tests/literal.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main()
 4 | {
 5 |     int a;
 6 |     a = 256;
 7 |     while (a++ < 512)
 8 |         printf("a = %d\n", a);
 9 | 
10 |     return 0;
11 | }
12 | 


--------------------------------------------------------------------------------
/tests/local.c:
--------------------------------------------------------------------------------
 1 | int main(void)
 2 | {
 3 |     int n = 10;
 4 |     printf("%d\n", n);
 5 |     char cc = 'a';
 6 |     char *ptr = &cc;
 7 |     printf("%c\n", *ptr);
 8 |     int *c = &n;
 9 |     printf("%d\n", *c);
10 |     int r;
11 |     r = 0;
12 |     return r;
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/maze.c:
--------------------------------------------------------------------------------
  1 | /* Maze generator in C.
  2 |  * Written by Joe Wingbermuehle
  3 |  * 1999-08-05
  4 |  * Sourced from https://raw.githubusercontent.com/joewing/maze/master/maze.c
  5 |  * Tweaked for AMaCC.
  6 |  */
  7 | 
  8 | #include <stdio.h>
  9 | #include <stdlib.h>
 10 | 
 11 | int maze_rand_v;
 12 | int maze_rand()
 13 | {
 14 |     return ((maze_rand_v = maze_rand_v * 214013 + 2531011) >> 16) & 0x7fff;
 15 | }
 16 | 
 17 | int maze_atoi(char *str, int radix)
 18 | {
 19 |     int v, sign;
 20 | 
 21 |     v = 0;
 22 |     sign = 1;
 23 |     if (*str == '-') {
 24 |         sign = -1;
 25 |         ++str;
 26 |     }
 27 |     while ((*str >= 'A' && *str <= 'Z') || (*str >= 'a' && *str <= 'z') ||
 28 |            (*str >= '0' && *str <= '9')) {
 29 |         v = v * radix +
 30 |             ((*str > '9') ? (*str & ~0x20) - 'A' + 10 : (*str - '0'));
 31 |         ++str;
 32 |     }
 33 |     return v * sign;
 34 | }
 35 | 
 36 | /* Display the maze. */
 37 | void show_maze(char *maze, int width, int height)
 38 | {
 39 |     int x, y;
 40 |     for (y = 0; y < height; y++) {
 41 |         for (x = 0; x < width; x++) {
 42 |             switch (maze[y * width + x]) {
 43 |             case 1:
 44 |                 printf("[]");
 45 |                 break;
 46 |             case 2:
 47 |                 printf("<>");
 48 |                 break;
 49 |             default:
 50 |                 printf("  ");
 51 |                 break;
 52 |             }
 53 |         }
 54 |         printf("\n");
 55 |     }
 56 | }
 57 | 
 58 | /*  Carve the maze starting at x, y. */
 59 | void carve_maze(char *maze, int width, int height, int x, int y)
 60 | {
 61 |     int x1, y1;
 62 |     int x2, y2;
 63 |     int dx, dy;
 64 |     int dir, count;
 65 | 
 66 |     dir = maze_rand() % 4;
 67 |     count = 0;
 68 |     while (count < 4) {
 69 |         dx = 0;
 70 |         dy = 0;
 71 |         switch (dir) {
 72 |         case 0:
 73 |             dx = 1;
 74 |             break;
 75 |         case 1:
 76 |             dy = 1;
 77 |             break;
 78 |         case 2:
 79 |             dx = -1;
 80 |             break;
 81 |         default:
 82 |             dy = -1;
 83 |             break;
 84 |         }
 85 |         x1 = x + dx;
 86 |         y1 = y + dy;
 87 |         x2 = x1 + dx;
 88 |         y2 = y1 + dy;
 89 |         if (x2 > 0 && x2 < width && y2 > 0 && y2 < height &&
 90 |             maze[y1 * width + x1] == 1 && maze[y2 * width + x2] == 1) {
 91 |             maze[y1 * width + x1] = 0;
 92 |             maze[y2 * width + x2] = 0;
 93 |             x = x2;
 94 |             y = y2;
 95 |             dir = maze_rand() % 4;
 96 |             count = 0;
 97 |         } else {
 98 |             dir = (dir + 1) % 4;
 99 |             count++;
100 |         }
101 |     }
102 | }
103 | 
104 | /* Generate maze in matrix maze with size width, height. */
105 | void generate_maze(char *maze, int width, int height)
106 | {
107 |     int x, y;
108 | 
109 |     /* Initialize the maze. */
110 |     for (x = 0; x < width * height; x++)
111 |         maze[x] = 1;
112 |     maze[1 * width + 1] = 0;
113 | 
114 |     /* Carve the maze. */
115 |     for (y = 1; y < height; y += 2)
116 |         for (x = 1; x < width; x += 2)
117 |             carve_maze(maze, width, height, x, y);
118 | 
119 |     /* Set up the entry and exit. */
120 |     maze[0 * width + 1] = 0;
121 |     maze[(height - 1) * width + (width - 2)] = 0;
122 | }
123 | 
124 | /* Solve the maze. */
125 | void solve_maze(char *maze, int width, int height)
126 | {
127 |     int dir, count;
128 |     int x, y;
129 |     int dx, dy;
130 |     int forward;
131 | 
132 |     /* Remove the entry and exit. */
133 |     maze[0 * width + 1] = 1;
134 |     maze[(height - 1) * width + (width - 2)] = 1;
135 | 
136 |     forward = 1;
137 |     dir = 0;
138 |     count = 0;
139 |     x = 1;
140 |     y = 1;
141 |     while (x != width - 2 || y != height - 2) {
142 |         dx = 0;
143 |         dy = 0;
144 |         switch (dir) {
145 |         case 0:
146 |             dx = 1;
147 |             break;
148 |         case 1:
149 |             dy = 1;
150 |             break;
151 |         case 2:
152 |             dx = -1;
153 |             break;
154 |         default:
155 |             dy = -1;
156 |             break;
157 |         }
158 |         if ((forward && maze[(y + dy) * width + (x + dx)] == 0) ||
159 |             (!forward && maze[(y + dy) * width + (x + dx)] == 2)) {
160 |             maze[y * width + x] = forward ? 2 : 3;
161 |             x = x + dx;
162 |             y = y + dy;
163 |             forward = 1;
164 |             count = 0;
165 |             dir = 0;
166 |         } else {
167 |             dir = (dir + 1) % 4;
168 |             count = count + 1;
169 |             if (count > 3) {
170 |                 forward = 0;
171 |                 count = 0;
172 |             }
173 |         }
174 |     }
175 | 
176 |     /* Replace the entry and exit. */
177 |     maze[(height - 2) * width + (width - 2)] = 2;
178 |     maze[(height - 1) * width + (width - 2)] = 2;
179 | }
180 | 
181 | enum { A_RANDV, A_WIDTH, A_HEIGHT, A_SOLVE };
182 | 
183 | int main(int argc, char **argv)
184 | {
185 |     int width, height, solve, mode, v;
186 |     char *maze, *invocation;
187 | 
188 |     maze_rand_v = 6;  // chosen by fair dice roll, guaranteed to be random
189 |     width = 10 * 2 + 3;
190 |     height = 10 * 2 + 3;
191 |     solve = 0;
192 | 
193 |     invocation = *argv;
194 |     --argc;
195 |     ++argv;
196 |     mode = A_RANDV;
197 |     while (argc > 0) {
198 |         if (**argv == '-' && *(*argv + 1) == 'h') {
199 |             printf("Usage: %s [seed] [width] [height] [s]\n", invocation);
200 |             return 0;
201 |         }
202 |         v = maze_atoi(*argv, 10);
203 |         if (**argv == 's' || **argv == '2')
204 |             solve = 1;
205 |         else if (mode == A_RANDV)
206 |             maze_rand_v = v * 0xfffa;
207 |         else if (mode == A_WIDTH)
208 |             width = v * 2 + 3;
209 |         else if (mode == A_HEIGHT)
210 |             height = v * 2 + 3;
211 |         else {
212 |             printf("Unknown argument: '%s'\n", *argv);
213 |             return 1;
214 |         }
215 |         ++mode;
216 |         --argc;
217 |         ++argv;
218 |     }
219 |     /* Get and validate the size. */
220 |     if (width < 7)
221 |         width = 7;
222 |     if (height < 7)
223 |         height = 7;
224 | 
225 |     /* Allocate the maze array. */
226 |     maze = (char *) malloc(width * height * sizeof(char));
227 |     if (maze == 0) {
228 |         printf("error: not enough memory\n");
229 |         exit(1);
230 |     }
231 | 
232 |     /* Generate and display the maze. */
233 |     generate_maze(maze, width, height);
234 |     show_maze(maze, width, height);
235 | 
236 |     /* Solve the maze if requested. */
237 |     if (solve) {
238 |         solve_maze(maze, width, height);
239 |         show_maze(maze, width, height);
240 |     }
241 | 
242 |     /* Clean up. */
243 |     free(maze);
244 |     return 0;
245 | }
246 | 


--------------------------------------------------------------------------------
/tests/printf.c:
--------------------------------------------------------------------------------
1 | #include <stdio.h>
2 | 
3 | int main()
4 | {
5 |     printf("arg1 %s %s %s %s %s\n", "arg2", "arg3", "arg4", "arg5", "arg6");
6 | 
7 |     return 0;
8 | }
9 | 


--------------------------------------------------------------------------------
/tests/ptr.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | int assert_eq(int a, int b)
 5 | {
 6 |     if (a != b) {
 7 |         printf("Assertion: %d != %d\n", a, b);
 8 |         exit(1);
 9 |     }
10 |     return 0;
11 | }
12 | 
13 | int main()
14 | {
15 |     int i;
16 |     int *s, *e, v;
17 |     int *data;
18 |     struct abc_s {
19 |         int a, b, c;
20 |     } * sptr;
21 | 
22 |     s = (int *) 0xbebebeb0;
23 |     e = (int *) 0xbebebeb4;
24 |     v = e - s;
25 |     if (v == 1)
26 |         printf("passed\n");
27 |     else
28 |         printf("failed, e - s = %x\n", v);
29 |     v = (int) (e - 1);
30 |     if (v == (int) s)
31 |         printf("passed\n");
32 |     else
33 |         printf("failed, e - s = %x\n", v);
34 | 
35 |     data = (int *) malloc(sizeof(int) * 10);
36 |     sptr = (struct abc_s *) malloc(sizeof(struct abc_s) * 10);
37 | 
38 |     assert_eq(&sptr[5] - &sptr[2], 3);
39 |     assert_eq((int) (&sptr[5] - 3), (int) &sptr[2]);
40 |     assert_eq((int) &sptr[5], (int) (sptr + 5));
41 |     assert_eq((int) &sptr[5], (int) (5 + sptr));
42 | 
43 |     for (i = 0; i < 10; ++i)
44 |         data[i] = i;
45 | 
46 |     s = data;
47 |     e = &data[9];
48 |     for (i = 0; i < 10; ++i) {
49 |         assert_eq(s[i], *(s + i));
50 |         assert_eq(e[-i], *(e - i));
51 |     }
52 | 
53 |     free(sptr);
54 |     free(data);
55 | 
56 |     return 0;
57 | }
58 | 


--------------------------------------------------------------------------------
/tests/read.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | 
 5 | int main(int argc, char **argv)
 6 | {
 7 |     char *p;
 8 |     int fd, readsz;
 9 | 
10 |     if (!(p = malloc(1024 * 16))) {
11 |         printf("failed to malloc memory\n");
12 |         exit(1);
13 |     }
14 | 
15 |     fd = open("amacc.c", 0);
16 | 
17 |     readsz = read(fd, p, 1024 * 16);
18 |     printf("read %d bytes\nContents:\n______________________________\n%s",
19 |            readsz, p);
20 |     printf("\n_______________________________\n");
21 | 
22 |     return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/shift.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main()
 4 | {
 5 |     int a, b, c;
 6 |     printf("1 << 0 = %x\n", 1 << 0);
 7 |     printf("1 << 2 = %x\n", 1 << 2);
 8 |     printf("0 << 4 = %x\n", 0 << 4);
 9 |     printf("1 << 31 = %x\n", 1 << 31);
10 |     printf("1 << 32 = %x\n", 1 << 32);
11 |     printf("4 << -1 = %x\n", 4 << -1);
12 |     printf("4 >> -1 = %x\n", 4 >> -1);
13 |     printf("-1 << 1 = %x\n", -1 << 1);
14 |     printf("-1 << 0 = %x\n", -1 << 0);
15 | 
16 |     printf("4 >> 1 = %x\n", 4 >> 1);
17 |     printf("4 >> 5 = %x\n", 4 >> 5);
18 |     printf("0x80000000 >> 31 = %x\n", (int) 0x80000000 >> 31);
19 |     printf("-1 >> 2 = %x\n", -1 >> 2);
20 |     b = 0xbef6d568;
21 |     c = 0xbef6d56a;
22 |     printf("%d - %d = %d(%x)\n", b, c, b - c, b - c);
23 |     a = (b << 8) | 12;
24 |     c = (a >> 8) | ((int) b & 0xff000000);
25 |     printf("a = %x, b = %x, c = %x\n", a, b, c);
26 |     a = ((b & 0x007fffff) << 8) | 12;
27 |     c = (a >> 8) | ((int) b & 0xff800000);
28 |     printf("a = %x, b = %x, c = %x\n", a, b, c);
29 | 
30 |     return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/struct.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | int len;
 4 | struct foo {
 5 |     char *p;
 6 |     char c;
 7 |     char *pad;
 8 |     int x;
 9 | } * d;
10 | 
11 | char *str;
12 | 
13 | int main(int argc, char **argv)
14 | {
15 |     struct foo bar, *ptr;
16 |     char c = '1';
17 |     int i;
18 |     str = "I am a String!\n";
19 |     len = 10;
20 |     bar.x = 1;
21 |     struct foo *p = &bar;
22 |     p->c = 'a';
23 | 
24 |     // FIXME: 32-bit only
25 |     if (*(int *) ((void *) p + sizeof(struct foo) - 4) != bar.x)
26 |         exit(-1);
27 | 
28 |     printf("%zu\n", sizeof(struct foo));
29 |     printf("%c\n", bar.c);
30 | 
31 |     d = malloc(sizeof(struct foo) * len);
32 |     ptr = d;
33 |     for (i = 0; i < len / 2; ++i) {
34 |         ptr->p = "one";
35 |         ptr->pad = str;
36 |         ptr->x = i;
37 |         ptr->c = c;
38 |         ++ptr;
39 |         ++c;
40 |     }
41 | 
42 |     for (; i < len; ++i) {
43 |         d[i].p = "one";
44 |         d[i].pad = str;
45 |         d[i].x = i;
46 |         d[i].c = c;
47 |         ++c;
48 |     }
49 | 
50 |     for (i = 0; i < len; ++i) {
51 |         printf("%d------------\n", i);
52 |         printf("%s\n", d[i].p);
53 |         printf("%s\n", d[i].pad);
54 |         printf("%d\n", d[i].x);
55 |         printf("%c\n", d[i].c);
56 |         printf("--------------\n");
57 |     }
58 | 
59 |     ptr = d;
60 |     for (i = 0; i < len; ++i) {
61 |         printf("%d------------\n", i);
62 |         printf("%s\n", ptr->p);
63 |         printf("%s\n", ptr->pad);
64 |         printf("%d\n", ptr->x);
65 |         printf("%c\n", ptr->c);
66 |         printf("--------------\n");
67 |         ++ptr;
68 |     }
69 | 
70 |     ptr = d;
71 |     for (i = 0; i < len; ++i) {
72 |         printf("%d------------\n", i);
73 |         printf("%s\n", (*ptr).p);
74 |         printf("%s\n", (*ptr).pad);
75 |         printf("%d\n", (*ptr).x);
76 |         printf("%c\n", (*ptr).c);
77 |         printf("--------------\n");
78 |         ++ptr;
79 |     }
80 | 
81 |     return 0;
82 | }
83 | 


--------------------------------------------------------------------------------
/tests/switch.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main(int argc, char **argv)
 4 | {
 5 |     switch (argc) {
 6 |     case 1:
 7 |         printf("No arguments\n");
 8 |         return 0;
 9 |     case 2:
10 |         printf("arg = %s\n", argv[1]);
11 |         break;
12 |     default:
13 |         printf("More than 1 argument\n");
14 |         break;
15 |     }
16 |     return 0;
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/union.c:
--------------------------------------------------------------------------------
 1 | struct s2 {
 2 |     int x, y;
 3 | };
 4 | 
 5 | struct s3 {
 6 |     int x, y, z;
 7 | };
 8 | 
 9 | union pt {
10 |     struct s2 plane;
11 |     struct s3 space;
12 | };
13 | 
14 | int main()
15 | {
16 |     union pt *p;
17 |     int i;
18 | 
19 |     p = malloc(4 * sizeof(union pt));
20 | 
21 |     for (i = 0; i < 4; ++i) {
22 |         p[i].space.x = i;
23 |         p[i].space.y = i + 1;
24 |         p[i].space.z = 4 - i;
25 |         printf("(%d, %d, %d)\n", p[i].space.x, p[i].space.y, p[i].space.z);
26 |     }
27 | 
28 |     for (i = 0; i < 4; ++i) {
29 |         printf("(%d, %d)\n", p[i].plane.x, p[i].plane.y);
30 |     }
31 | 
32 |     return 0;
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/while.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int fact(int n)
 4 | {
 5 |     int r;
 6 |     r = 1;
 7 |     while (n > 0) {
 8 |         r = r * n;
 9 |         printf("n = %d, r = %d\n", n, r);
10 |         --n;
11 |     }
12 |     return r;
13 | }
14 | 
15 | int filteradd(char *data)
16 | {
17 |     int sum = 0;
18 |     do {
19 |         if (*data == '*')
20 |             break;
21 |         if (*data < '0' || *data > '9')
22 |             continue;
23 |         sum += *data - '0';
24 |     } while (*++data != 0);
25 | 
26 |     return sum;
27 | }
28 | 
29 | int main(int argc, char **argv)
30 | {
31 |     printf("%d\n", fact(8));
32 |     printf("\n%d\n", filteradd("445h5h5g*45hb7b4g5"));
33 | 
34 |     return 0;
35 | }
36 | 


--------------------------------------------------------------------------------