├── LICENSE
├── Makefile
├── README.md
├── char_ptr_arr.h
├── image
    ├── screenshot000.png
    └── screenshot001.png
├── insn.h
├── macro.inc
├── reloc.c
├── reloc_add_aarch64.c
├── sleep_drift.sh
├── stringx.s
├── strlen_linux.s
├── test_common
    ├── Makefile
    ├── memset.s
    ├── memset_woa.s
    ├── test_bounce_data_aosoa_alt_0.s
    ├── test_bounce_data_aosoa_alt_1.s
    ├── test_bounce_data_aosoa_alt_2.s
    └── test_bounce_data_aosoa_alt_3.s
├── test_linux
    ├── Makefile
    ├── test_bitcount.s
    ├── test_bounce.s
    ├── test_bounce.sh
    ├── test_bounce_neon.s
    ├── test_bounce_neon.sh
    ├── test_bounce_neon_aosoa.s
    ├── test_bounce_neon_aosoa.sh
    ├── test_bounce_neon_aosoa_bg.s
    ├── test_bss.s
    ├── test_cross.sh
    ├── test_cross_0.s
    ├── test_cross_1.s
    ├── test_data.s
    ├── test_memset.s
    ├── test_memset_woa.s
    ├── test_rodata.s
    ├── test_text.s
    ├── test_timeval.s
    └── test_timeval.sh
├── test_macos
    ├── Makefile
    ├── test_bitcount.s
    ├── test_bounce.s
    ├── test_bounce.sh
    ├── test_bounce_neon.s
    ├── test_bounce_neon.sh
    ├── test_bounce_neon_aosoa.s
    ├── test_bounce_neon_aosoa.sh
    ├── test_bounce_neon_aosoa_bg.s
    ├── test_bss.s
    ├── test_cross.sh
    ├── test_cross_0.s
    ├── test_cross_1.s
    ├── test_data.s
    ├── test_memset.s
    ├── test_memset_woa.s
    ├── test_rodata.s
    ├── test_text.s
    ├── test_timeval.s
    └── test_timeval.sh
├── vma.cpp
└── vma.h


/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Elven Relativism top makefile for Linux/aarch64 and macOS/arm64
 2 | 
 3 | UNAME := $(shell uname)
 4 | UNAME_M := $(shell uname -m)
 5 | TARGET := elvenrel
 6 | SOURCE := reloc.c reloc_add_aarch64.c insn.h
 7 | CFLAGS += -std=gnu11 -Ofast -DNDEBUG -DPAGE_SIZE=$(shell getconf PAGE_SIZE) -fno-stack-protector -fPIC
 8 | CXXFLAGS += -std=c++11 -Ofast -fno-exceptions -fno-rtti -DNDEBUG -DPAGE_SIZE=$(shell getconf PAGE_SIZE) -fno-stack-protector -fPIC
 9 | 
10 | ifeq ($(UNAME), Linux)
11 | 
12 | ifneq ($(UNAME_M), aarch64)
13 | 	$(error unsupported arch)
14 | endif
15 | 
16 | # Update state for linux/aarch64
17 | SOURCE += stringx.s strlen_linux.s vma.cpp vma.h
18 | LDFLAGS += -lelf
19 | TEST_SUBDIR := test_linux
20 | 
21 | else ifeq ($(UNAME), Darwin)
22 | 
23 | ifneq ($(UNAME_M), arm64)
24 | 	$(error unsupported arch)
25 | endif
26 | 
27 | # Update state for macos/arm64
28 | CFLAGS += -I/opt/homebrew/include
29 | LDFLAGS += /opt/homebrew/lib/libelf.a
30 | TEST_SUBDIR := test_macos
31 | 
32 | else # unsupported os
33 | 	$(error unsupposrted os)
34 | endif
35 | 
36 | OBJ := $(addsuffix .o, $(basename $(filter %.s %.c %.cpp, $(SOURCE))))
37 | 
38 | $(TARGET): $(OBJ)
39 | 	$(CC) $^ $(LDFLAGS) -o $(TARGET)
40 | 
41 | reloc.o: reloc.c vma.h char_ptr_arr.h
42 | 
43 | reloc_add_aarch64.o: reloc_add_aarch64.c insn.h
44 | 
45 | vma.o: vma.cpp vma.h char_ptr_arr.h
46 | 
47 | all: $(TARGET)
48 | 	$(MAKE) -C test_common all
49 | 	$(MAKE) -C $(TEST_SUBDIR) all
50 | 
51 | clean:
52 | 	rm -f $(TARGET) $(OBJ)
53 | 	$(MAKE) -C test_common clean
54 | 	$(MAKE) -C $(TEST_SUBDIR) clean
55 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## elvenrel
 2 | 
 3 | Elven Relativism -- relocation and execution of aarch64 ELF relocatable objects (REL) on Linux and macOS.
 4 | 
 5 | Program loads a multitude of ELF REL files, resolves all relocations (currently only SHT_RELA) and if symbol `_start` in some section `.text` is found, passes control to the former.
 6 | 
 7 | ## Details
 8 | 
 9 | * RELs loaded in the order specified on the command line; all relocations in a given REL performed at its loading time.
10 | * Missing-symbol (SHN_UNDEF) resolution via reverse-direction search among the preceding RELs; first-match deterministic.
11 | * Support for RO sections `.rodata` and `.text`; every other type of section is RW.
12 | * Address-space sanitation (linux-only) -- disposing of pre-existing VMAs (*VMA filtering*) via string matching to VMA backing path.
13 | 
14 | ## ToDo
15 | 
16 | * Relocation type SHT_REL; as needed.
17 | * Explicit (CLI) control over the mapping addresses of each REL; as needed.
18 | 
19 | ## Acknowledgements
20 | 
21 | Files used, with or without modifications, from external repositories:
22 | 
23 | 	linux.org/ arch/arm64/include/asm/insn.h -> insn.h
24 | 	linux.org/ arch/arm64/kernel/module.c    -> reloc_add_aarch64.c
25 | 	linux.org/ arch/arm64/lib/strlen.S       -> strlen_linux.s
26 | 
27 | ## Building
28 | 
29 | Sole prerequisite is `libelf`. On macOS `homebrew` provides v0.8.13; on Linux there should be a dedicated package with an up-to-date version for your distro of choice. That provided:
30 | 
31 | ```sh
32 | $ make all
33 | ```
34 | 
35 | ## Building with own assembler
36 | 
37 | If you have `gas` on macOS and prefer to use that:
38 | 
39 | ```sh
40 | $ make AS=/full/path/to/gas all
41 | ```
42 | 
43 | Building `gas` itself from source:
44 | 
45 | ```sh
46 | $ git clone git://sourceware.org/git/binutils-gdb.git && cd binutils-gdb
47 | $ cd bfd
48 | $ ./configure --target=aarch64-linux-gnu && make && cd -
49 | $ cd libiberty
50 | $ ./configure --target=aarch64-linux-gnu && make && cd -
51 | $ cd opcodes
52 | $ ./configure --target=aarch64-linux-gnu && make && cd -
53 | $ cd gas
54 | $ ./configure --target=aarch64-linux-gnu && make && cd -
55 | ```
56 | 
57 | ## Usage
58 | 
59 | ```sh
60 | $ ./elvenrel test_cross_0.o test_cross_1.o # order of RELs matters for symbol resolution; undefined symbols in later RELs are sought in earlier RELs
61 | ```
62 | ```sh
63 | $ ./elvenrel test_rodata.o --filter /lib/aarch64-linux-gnu # before executing the REL dispose of VMAs from file mappings containing /lib/aarch64-linux-gnu in the path
64 | ```
65 | ```sh
66 | $ ./elvenrel test_data.o --filter [heap] # before executing the REL dispose of the VMA designated as `[heap]`, i.e. the process heap
67 | ```
68 | 
69 | ## Using --break option
70 | 
71 | Using `--break` raises a `SIGTRAP` right before passing control to REL for quick debugging. Unfortunately both GDB and LLDB currently get stuck on the instruction that caused the trap. To progress one needs to manually move the program counter past the break point:
72 | 
73 | ```sh
74 | (gdb) set $pc += 4
75 | ```
76 | 
77 | ```sh
78 | (lldb) register write pc `$pc + 4`
79 | ```
80 | 
81 | ## Screenshots
82 | 
83 | ![hello_sample](image/screenshot000.png "hello sample")
84 | ![vma_sample](image/screenshot001.png "vma sample")
85 | 


--------------------------------------------------------------------------------
/char_ptr_arr.h:
--------------------------------------------------------------------------------
 1 | #ifndef __char_ptr_arr_H__
 2 | #include <stddef.h>
 3 | 
 4 | struct char_ptr_arr_t {
 5 | 	size_t count;
 6 | 	char **arr;
 7 | };
 8 | 
 9 | #endif /* __char_ptr_arr_H__ */
10 | 
11 | 


--------------------------------------------------------------------------------
/image/screenshot000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blu/elvenrel/56d27dd509572ff68f11fb41cf7ea7248586a049/image/screenshot000.png


--------------------------------------------------------------------------------
/image/screenshot001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blu/elvenrel/56d27dd509572ff68f11fb41cf7ea7248586a049/image/screenshot001.png


--------------------------------------------------------------------------------
/insn.h:
--------------------------------------------------------------------------------
  1 | /* SPDX-License-Identifier: GPL-2.0-only */
  2 | /*
  3 |  * Copyright (C) 2013 Huawei Ltd.
  4 |  * Author: Jiang Liu <liuj97@gmail.com>
  5 |  *
  6 |  * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com>
  7 |  */
  8 | #ifndef	__ASM_INSN_H
  9 | #define	__ASM_INSN_H
 10 | 
 11 | /* A64 instructions are always 32 bits. */
 12 | #define	AARCH64_INSN_SIZE		4
 13 | 
 14 | #ifndef __ASSEMBLY__
 15 | /*
 16 |  * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
 17 |  * Section C3.1 "A64 instruction index by encoding":
 18 |  * AArch64 main encoding table
 19 |  *  Bit position
 20 |  *   28 27 26 25	Encoding Group
 21 |  *   0  0  -  -		Unallocated
 22 |  *   1  0  0  -		Data processing, immediate
 23 |  *   1  0  1  -		Branch, exception generation and system instructions
 24 |  *   -  1  -  0		Loads and stores
 25 |  *   -  1  0  1		Data processing - register
 26 |  *   0  1  1  1		Data processing - SIMD and floating point
 27 |  *   1  1  1  1		Data processing - SIMD and floating point
 28 |  * "-" means "don't care"
 29 |  */
 30 | enum aarch64_insn_encoding_class {
 31 | 	AARCH64_INSN_CLS_UNKNOWN,	/* UNALLOCATED */
 32 | 	AARCH64_INSN_CLS_SVE,		/* SVE instructions */
 33 | 	AARCH64_INSN_CLS_DP_IMM,	/* Data processing - immediate */
 34 | 	AARCH64_INSN_CLS_DP_REG,	/* Data processing - register */
 35 | 	AARCH64_INSN_CLS_DP_FPSIMD,	/* Data processing - SIMD and FP */
 36 | 	AARCH64_INSN_CLS_LDST,		/* Loads and stores */
 37 | 	AARCH64_INSN_CLS_BR_SYS,	/* Branch, exception generation and
 38 | 					 * system instructions */
 39 | };
 40 | 
 41 | enum aarch64_insn_hint_cr_op {
 42 | 	AARCH64_INSN_HINT_NOP	= 0x0 << 5,
 43 | 	AARCH64_INSN_HINT_YIELD	= 0x1 << 5,
 44 | 	AARCH64_INSN_HINT_WFE	= 0x2 << 5,
 45 | 	AARCH64_INSN_HINT_WFI	= 0x3 << 5,
 46 | 	AARCH64_INSN_HINT_SEV	= 0x4 << 5,
 47 | 	AARCH64_INSN_HINT_SEVL	= 0x5 << 5,
 48 | 
 49 | 	AARCH64_INSN_HINT_XPACLRI    = 0x07 << 5,
 50 | 	AARCH64_INSN_HINT_PACIA_1716 = 0x08 << 5,
 51 | 	AARCH64_INSN_HINT_PACIB_1716 = 0x0A << 5,
 52 | 	AARCH64_INSN_HINT_AUTIA_1716 = 0x0C << 5,
 53 | 	AARCH64_INSN_HINT_AUTIB_1716 = 0x0E << 5,
 54 | 	AARCH64_INSN_HINT_PACIAZ     = 0x18 << 5,
 55 | 	AARCH64_INSN_HINT_PACIASP    = 0x19 << 5,
 56 | 	AARCH64_INSN_HINT_PACIBZ     = 0x1A << 5,
 57 | 	AARCH64_INSN_HINT_PACIBSP    = 0x1B << 5,
 58 | 	AARCH64_INSN_HINT_AUTIAZ     = 0x1C << 5,
 59 | 	AARCH64_INSN_HINT_AUTIASP    = 0x1D << 5,
 60 | 	AARCH64_INSN_HINT_AUTIBZ     = 0x1E << 5,
 61 | 	AARCH64_INSN_HINT_AUTIBSP    = 0x1F << 5,
 62 | 
 63 | 	AARCH64_INSN_HINT_ESB  = 0x10 << 5,
 64 | 	AARCH64_INSN_HINT_PSB  = 0x11 << 5,
 65 | 	AARCH64_INSN_HINT_TSB  = 0x12 << 5,
 66 | 	AARCH64_INSN_HINT_CSDB = 0x14 << 5,
 67 | 
 68 | 	AARCH64_INSN_HINT_BTI   = 0x20 << 5,
 69 | 	AARCH64_INSN_HINT_BTIC  = 0x22 << 5,
 70 | 	AARCH64_INSN_HINT_BTIJ  = 0x24 << 5,
 71 | 	AARCH64_INSN_HINT_BTIJC = 0x26 << 5,
 72 | };
 73 | 
 74 | enum aarch64_insn_imm_type {
 75 | 	AARCH64_INSN_IMM_ADR,
 76 | 	AARCH64_INSN_IMM_26,
 77 | 	AARCH64_INSN_IMM_19,
 78 | 	AARCH64_INSN_IMM_16,
 79 | 	AARCH64_INSN_IMM_14,
 80 | 	AARCH64_INSN_IMM_12,
 81 | 	AARCH64_INSN_IMM_9,
 82 | 	AARCH64_INSN_IMM_7,
 83 | 	AARCH64_INSN_IMM_6,
 84 | 	AARCH64_INSN_IMM_S,
 85 | 	AARCH64_INSN_IMM_R,
 86 | 	AARCH64_INSN_IMM_N,
 87 | 	AARCH64_INSN_IMM_MAX
 88 | };
 89 | 
 90 | enum aarch64_insn_register_type {
 91 | 	AARCH64_INSN_REGTYPE_RT,
 92 | 	AARCH64_INSN_REGTYPE_RN,
 93 | 	AARCH64_INSN_REGTYPE_RT2,
 94 | 	AARCH64_INSN_REGTYPE_RM,
 95 | 	AARCH64_INSN_REGTYPE_RD,
 96 | 	AARCH64_INSN_REGTYPE_RA,
 97 | 	AARCH64_INSN_REGTYPE_RS,
 98 | };
 99 | 
100 | enum aarch64_insn_register {
101 | 	AARCH64_INSN_REG_0  = 0,
102 | 	AARCH64_INSN_REG_1  = 1,
103 | 	AARCH64_INSN_REG_2  = 2,
104 | 	AARCH64_INSN_REG_3  = 3,
105 | 	AARCH64_INSN_REG_4  = 4,
106 | 	AARCH64_INSN_REG_5  = 5,
107 | 	AARCH64_INSN_REG_6  = 6,
108 | 	AARCH64_INSN_REG_7  = 7,
109 | 	AARCH64_INSN_REG_8  = 8,
110 | 	AARCH64_INSN_REG_9  = 9,
111 | 	AARCH64_INSN_REG_10 = 10,
112 | 	AARCH64_INSN_REG_11 = 11,
113 | 	AARCH64_INSN_REG_12 = 12,
114 | 	AARCH64_INSN_REG_13 = 13,
115 | 	AARCH64_INSN_REG_14 = 14,
116 | 	AARCH64_INSN_REG_15 = 15,
117 | 	AARCH64_INSN_REG_16 = 16,
118 | 	AARCH64_INSN_REG_17 = 17,
119 | 	AARCH64_INSN_REG_18 = 18,
120 | 	AARCH64_INSN_REG_19 = 19,
121 | 	AARCH64_INSN_REG_20 = 20,
122 | 	AARCH64_INSN_REG_21 = 21,
123 | 	AARCH64_INSN_REG_22 = 22,
124 | 	AARCH64_INSN_REG_23 = 23,
125 | 	AARCH64_INSN_REG_24 = 24,
126 | 	AARCH64_INSN_REG_25 = 25,
127 | 	AARCH64_INSN_REG_26 = 26,
128 | 	AARCH64_INSN_REG_27 = 27,
129 | 	AARCH64_INSN_REG_28 = 28,
130 | 	AARCH64_INSN_REG_29 = 29,
131 | 	AARCH64_INSN_REG_FP = 29, /* Frame pointer */
132 | 	AARCH64_INSN_REG_30 = 30,
133 | 	AARCH64_INSN_REG_LR = 30, /* Link register */
134 | 	AARCH64_INSN_REG_ZR = 31, /* Zero: as source register */
135 | 	AARCH64_INSN_REG_SP = 31  /* Stack pointer: as load/store base reg */
136 | };
137 | 
138 | enum aarch64_insn_special_register {
139 | 	AARCH64_INSN_SPCLREG_SPSR_EL1	= 0xC200,
140 | 	AARCH64_INSN_SPCLREG_ELR_EL1	= 0xC201,
141 | 	AARCH64_INSN_SPCLREG_SP_EL0	= 0xC208,
142 | 	AARCH64_INSN_SPCLREG_SPSEL	= 0xC210,
143 | 	AARCH64_INSN_SPCLREG_CURRENTEL	= 0xC212,
144 | 	AARCH64_INSN_SPCLREG_DAIF	= 0xDA11,
145 | 	AARCH64_INSN_SPCLREG_NZCV	= 0xDA10,
146 | 	AARCH64_INSN_SPCLREG_FPCR	= 0xDA20,
147 | 	AARCH64_INSN_SPCLREG_DSPSR_EL0	= 0xDA28,
148 | 	AARCH64_INSN_SPCLREG_DLR_EL0	= 0xDA29,
149 | 	AARCH64_INSN_SPCLREG_SPSR_EL2	= 0xE200,
150 | 	AARCH64_INSN_SPCLREG_ELR_EL2	= 0xE201,
151 | 	AARCH64_INSN_SPCLREG_SP_EL1	= 0xE208,
152 | 	AARCH64_INSN_SPCLREG_SPSR_INQ	= 0xE218,
153 | 	AARCH64_INSN_SPCLREG_SPSR_ABT	= 0xE219,
154 | 	AARCH64_INSN_SPCLREG_SPSR_UND	= 0xE21A,
155 | 	AARCH64_INSN_SPCLREG_SPSR_FIQ	= 0xE21B,
156 | 	AARCH64_INSN_SPCLREG_SPSR_EL3	= 0xF200,
157 | 	AARCH64_INSN_SPCLREG_ELR_EL3	= 0xF201,
158 | 	AARCH64_INSN_SPCLREG_SP_EL2	= 0xF210
159 | };
160 | 
161 | enum aarch64_insn_variant {
162 | 	AARCH64_INSN_VARIANT_32BIT,
163 | 	AARCH64_INSN_VARIANT_64BIT
164 | };
165 | 
166 | enum aarch64_insn_condition {
167 | 	AARCH64_INSN_COND_EQ = 0x0, /* == */
168 | 	AARCH64_INSN_COND_NE = 0x1, /* != */
169 | 	AARCH64_INSN_COND_CS = 0x2, /* unsigned >= */
170 | 	AARCH64_INSN_COND_CC = 0x3, /* unsigned < */
171 | 	AARCH64_INSN_COND_MI = 0x4, /* < 0 */
172 | 	AARCH64_INSN_COND_PL = 0x5, /* >= 0 */
173 | 	AARCH64_INSN_COND_VS = 0x6, /* overflow */
174 | 	AARCH64_INSN_COND_VC = 0x7, /* no overflow */
175 | 	AARCH64_INSN_COND_HI = 0x8, /* unsigned > */
176 | 	AARCH64_INSN_COND_LS = 0x9, /* unsigned <= */
177 | 	AARCH64_INSN_COND_GE = 0xa, /* signed >= */
178 | 	AARCH64_INSN_COND_LT = 0xb, /* signed < */
179 | 	AARCH64_INSN_COND_GT = 0xc, /* signed > */
180 | 	AARCH64_INSN_COND_LE = 0xd, /* signed <= */
181 | 	AARCH64_INSN_COND_AL = 0xe, /* always */
182 | };
183 | 
184 | enum aarch64_insn_branch_type {
185 | 	AARCH64_INSN_BRANCH_NOLINK,
186 | 	AARCH64_INSN_BRANCH_LINK,
187 | 	AARCH64_INSN_BRANCH_RETURN,
188 | 	AARCH64_INSN_BRANCH_COMP_ZERO,
189 | 	AARCH64_INSN_BRANCH_COMP_NONZERO,
190 | };
191 | 
192 | enum aarch64_insn_size_type {
193 | 	AARCH64_INSN_SIZE_8,
194 | 	AARCH64_INSN_SIZE_16,
195 | 	AARCH64_INSN_SIZE_32,
196 | 	AARCH64_INSN_SIZE_64,
197 | };
198 | 
199 | enum aarch64_insn_ldst_type {
200 | 	AARCH64_INSN_LDST_LOAD_REG_OFFSET,
201 | 	AARCH64_INSN_LDST_STORE_REG_OFFSET,
202 | 	AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX,
203 | 	AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
204 | 	AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
205 | 	AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
206 | 	AARCH64_INSN_LDST_LOAD_EX,
207 | 	AARCH64_INSN_LDST_STORE_EX,
208 | };
209 | 
210 | enum aarch64_insn_adsb_type {
211 | 	AARCH64_INSN_ADSB_ADD,
212 | 	AARCH64_INSN_ADSB_SUB,
213 | 	AARCH64_INSN_ADSB_ADD_SETFLAGS,
214 | 	AARCH64_INSN_ADSB_SUB_SETFLAGS
215 | };
216 | 
217 | enum aarch64_insn_movewide_type {
218 | 	AARCH64_INSN_MOVEWIDE_ZERO,
219 | 	AARCH64_INSN_MOVEWIDE_KEEP,
220 | 	AARCH64_INSN_MOVEWIDE_INVERSE
221 | };
222 | 
223 | enum aarch64_insn_bitfield_type {
224 | 	AARCH64_INSN_BITFIELD_MOVE,
225 | 	AARCH64_INSN_BITFIELD_MOVE_UNSIGNED,
226 | 	AARCH64_INSN_BITFIELD_MOVE_SIGNED
227 | };
228 | 
229 | enum aarch64_insn_data1_type {
230 | 	AARCH64_INSN_DATA1_REVERSE_16,
231 | 	AARCH64_INSN_DATA1_REVERSE_32,
232 | 	AARCH64_INSN_DATA1_REVERSE_64,
233 | };
234 | 
235 | enum aarch64_insn_data2_type {
236 | 	AARCH64_INSN_DATA2_UDIV,
237 | 	AARCH64_INSN_DATA2_SDIV,
238 | 	AARCH64_INSN_DATA2_LSLV,
239 | 	AARCH64_INSN_DATA2_LSRV,
240 | 	AARCH64_INSN_DATA2_ASRV,
241 | 	AARCH64_INSN_DATA2_RORV,
242 | };
243 | 
244 | enum aarch64_insn_data3_type {
245 | 	AARCH64_INSN_DATA3_MADD,
246 | 	AARCH64_INSN_DATA3_MSUB,
247 | };
248 | 
249 | enum aarch64_insn_logic_type {
250 | 	AARCH64_INSN_LOGIC_AND,
251 | 	AARCH64_INSN_LOGIC_BIC,
252 | 	AARCH64_INSN_LOGIC_ORR,
253 | 	AARCH64_INSN_LOGIC_ORN,
254 | 	AARCH64_INSN_LOGIC_EOR,
255 | 	AARCH64_INSN_LOGIC_EON,
256 | 	AARCH64_INSN_LOGIC_AND_SETFLAGS,
257 | 	AARCH64_INSN_LOGIC_BIC_SETFLAGS
258 | };
259 | 
260 | enum aarch64_insn_prfm_type {
261 | 	AARCH64_INSN_PRFM_TYPE_PLD,
262 | 	AARCH64_INSN_PRFM_TYPE_PLI,
263 | 	AARCH64_INSN_PRFM_TYPE_PST,
264 | };
265 | 
266 | enum aarch64_insn_prfm_target {
267 | 	AARCH64_INSN_PRFM_TARGET_L1,
268 | 	AARCH64_INSN_PRFM_TARGET_L2,
269 | 	AARCH64_INSN_PRFM_TARGET_L3,
270 | };
271 | 
272 | enum aarch64_insn_prfm_policy {
273 | 	AARCH64_INSN_PRFM_POLICY_KEEP,
274 | 	AARCH64_INSN_PRFM_POLICY_STRM,
275 | };
276 | 
277 | enum aarch64_insn_adr_type {
278 | 	AARCH64_INSN_ADR_TYPE_ADRP,
279 | 	AARCH64_INSN_ADR_TYPE_ADR,
280 | };
281 | 
282 | #define	__AARCH64_INSN_FUNCS(abbr, mask, val)				\
283 | static __always_inline bool aarch64_insn_is_##abbr(u32 code)		\
284 | {									\
285 | 	BUILD_BUG_ON(~(mask) & (val));					\
286 | 	return (code & (mask)) == (val);				\
287 | }									\
288 | static __always_inline u32 aarch64_insn_get_##abbr##_value(void)	\
289 | {									\
290 | 	return (val);							\
291 | }
292 | 
293 | __AARCH64_INSN_FUNCS(adr,	0x9F000000, 0x10000000)
294 | __AARCH64_INSN_FUNCS(adrp,	0x9F000000, 0x90000000)
295 | __AARCH64_INSN_FUNCS(prfm,	0x3FC00000, 0x39800000)
296 | __AARCH64_INSN_FUNCS(prfm_lit,	0xFF000000, 0xD8000000)
297 | __AARCH64_INSN_FUNCS(store_imm,	0x3FC00000, 0x39000000)
298 | __AARCH64_INSN_FUNCS(load_imm,	0x3FC00000, 0x39400000)
299 | __AARCH64_INSN_FUNCS(store_pre,	0x3FE00C00, 0x38000C00)
300 | __AARCH64_INSN_FUNCS(load_pre,	0x3FE00C00, 0x38400C00)
301 | __AARCH64_INSN_FUNCS(store_post,	0x3FE00C00, 0x38000400)
302 | __AARCH64_INSN_FUNCS(load_post,	0x3FE00C00, 0x38400400)
303 | __AARCH64_INSN_FUNCS(str_reg,	0x3FE0EC00, 0x38206800)
304 | __AARCH64_INSN_FUNCS(ldadd,	0x3F20FC00, 0x38200000)
305 | __AARCH64_INSN_FUNCS(ldr_reg,	0x3FE0EC00, 0x38606800)
306 | __AARCH64_INSN_FUNCS(ldr_lit,	0xBF000000, 0x18000000)
307 | __AARCH64_INSN_FUNCS(ldrsw_lit,	0xFF000000, 0x98000000)
308 | __AARCH64_INSN_FUNCS(exclusive,	0x3F800000, 0x08000000)
309 | __AARCH64_INSN_FUNCS(load_ex,	0x3F400000, 0x08400000)
310 | __AARCH64_INSN_FUNCS(store_ex,	0x3F400000, 0x08000000)
311 | __AARCH64_INSN_FUNCS(stp,	0x7FC00000, 0x29000000)
312 | __AARCH64_INSN_FUNCS(ldp,	0x7FC00000, 0x29400000)
313 | __AARCH64_INSN_FUNCS(stp_post,	0x7FC00000, 0x28800000)
314 | __AARCH64_INSN_FUNCS(ldp_post,	0x7FC00000, 0x28C00000)
315 | __AARCH64_INSN_FUNCS(stp_pre,	0x7FC00000, 0x29800000)
316 | __AARCH64_INSN_FUNCS(ldp_pre,	0x7FC00000, 0x29C00000)
317 | __AARCH64_INSN_FUNCS(add_imm,	0x7F000000, 0x11000000)
318 | __AARCH64_INSN_FUNCS(adds_imm,	0x7F000000, 0x31000000)
319 | __AARCH64_INSN_FUNCS(sub_imm,	0x7F000000, 0x51000000)
320 | __AARCH64_INSN_FUNCS(subs_imm,	0x7F000000, 0x71000000)
321 | __AARCH64_INSN_FUNCS(movn,	0x7F800000, 0x12800000)
322 | __AARCH64_INSN_FUNCS(sbfm,	0x7F800000, 0x13000000)
323 | __AARCH64_INSN_FUNCS(bfm,	0x7F800000, 0x33000000)
324 | __AARCH64_INSN_FUNCS(movz,	0x7F800000, 0x52800000)
325 | __AARCH64_INSN_FUNCS(ubfm,	0x7F800000, 0x53000000)
326 | __AARCH64_INSN_FUNCS(movk,	0x7F800000, 0x72800000)
327 | __AARCH64_INSN_FUNCS(add,	0x7F200000, 0x0B000000)
328 | __AARCH64_INSN_FUNCS(adds,	0x7F200000, 0x2B000000)
329 | __AARCH64_INSN_FUNCS(sub,	0x7F200000, 0x4B000000)
330 | __AARCH64_INSN_FUNCS(subs,	0x7F200000, 0x6B000000)
331 | __AARCH64_INSN_FUNCS(madd,	0x7FE08000, 0x1B000000)
332 | __AARCH64_INSN_FUNCS(msub,	0x7FE08000, 0x1B008000)
333 | __AARCH64_INSN_FUNCS(udiv,	0x7FE0FC00, 0x1AC00800)
334 | __AARCH64_INSN_FUNCS(sdiv,	0x7FE0FC00, 0x1AC00C00)
335 | __AARCH64_INSN_FUNCS(lslv,	0x7FE0FC00, 0x1AC02000)
336 | __AARCH64_INSN_FUNCS(lsrv,	0x7FE0FC00, 0x1AC02400)
337 | __AARCH64_INSN_FUNCS(asrv,	0x7FE0FC00, 0x1AC02800)
338 | __AARCH64_INSN_FUNCS(rorv,	0x7FE0FC00, 0x1AC02C00)
339 | __AARCH64_INSN_FUNCS(rev16,	0x7FFFFC00, 0x5AC00400)
340 | __AARCH64_INSN_FUNCS(rev32,	0x7FFFFC00, 0x5AC00800)
341 | __AARCH64_INSN_FUNCS(rev64,	0x7FFFFC00, 0x5AC00C00)
342 | __AARCH64_INSN_FUNCS(and,	0x7F200000, 0x0A000000)
343 | __AARCH64_INSN_FUNCS(bic,	0x7F200000, 0x0A200000)
344 | __AARCH64_INSN_FUNCS(orr,	0x7F200000, 0x2A000000)
345 | __AARCH64_INSN_FUNCS(mov_reg,	0x7FE0FFE0, 0x2A0003E0)
346 | __AARCH64_INSN_FUNCS(orn,	0x7F200000, 0x2A200000)
347 | __AARCH64_INSN_FUNCS(eor,	0x7F200000, 0x4A000000)
348 | __AARCH64_INSN_FUNCS(eon,	0x7F200000, 0x4A200000)
349 | __AARCH64_INSN_FUNCS(ands,	0x7F200000, 0x6A000000)
350 | __AARCH64_INSN_FUNCS(bics,	0x7F200000, 0x6A200000)
351 | __AARCH64_INSN_FUNCS(and_imm,	0x7F800000, 0x12000000)
352 | __AARCH64_INSN_FUNCS(orr_imm,	0x7F800000, 0x32000000)
353 | __AARCH64_INSN_FUNCS(eor_imm,	0x7F800000, 0x52000000)
354 | __AARCH64_INSN_FUNCS(ands_imm,	0x7F800000, 0x72000000)
355 | __AARCH64_INSN_FUNCS(extr,	0x7FA00000, 0x13800000)
356 | __AARCH64_INSN_FUNCS(b,		0xFC000000, 0x14000000)
357 | __AARCH64_INSN_FUNCS(bl,	0xFC000000, 0x94000000)
358 | __AARCH64_INSN_FUNCS(cbz,	0x7F000000, 0x34000000)
359 | __AARCH64_INSN_FUNCS(cbnz,	0x7F000000, 0x35000000)
360 | __AARCH64_INSN_FUNCS(tbz,	0x7F000000, 0x36000000)
361 | __AARCH64_INSN_FUNCS(tbnz,	0x7F000000, 0x37000000)
362 | __AARCH64_INSN_FUNCS(bcond,	0xFF000010, 0x54000000)
363 | __AARCH64_INSN_FUNCS(svc,	0xFFE0001F, 0xD4000001)
364 | __AARCH64_INSN_FUNCS(hvc,	0xFFE0001F, 0xD4000002)
365 | __AARCH64_INSN_FUNCS(smc,	0xFFE0001F, 0xD4000003)
366 | __AARCH64_INSN_FUNCS(brk,	0xFFE0001F, 0xD4200000)
367 | __AARCH64_INSN_FUNCS(exception,	0xFF000000, 0xD4000000)
368 | __AARCH64_INSN_FUNCS(hint,	0xFFFFF01F, 0xD503201F)
369 | __AARCH64_INSN_FUNCS(br,	0xFFFFFC1F, 0xD61F0000)
370 | __AARCH64_INSN_FUNCS(br_auth,	0xFEFFF800, 0xD61F0800)
371 | __AARCH64_INSN_FUNCS(blr,	0xFFFFFC1F, 0xD63F0000)
372 | __AARCH64_INSN_FUNCS(blr_auth,	0xFEFFF800, 0xD63F0800)
373 | __AARCH64_INSN_FUNCS(ret,	0xFFFFFC1F, 0xD65F0000)
374 | __AARCH64_INSN_FUNCS(ret_auth,	0xFFFFFBFF, 0xD65F0BFF)
375 | __AARCH64_INSN_FUNCS(eret,	0xFFFFFFFF, 0xD69F03E0)
376 | __AARCH64_INSN_FUNCS(eret_auth,	0xFFFFFBFF, 0xD69F0BFF)
377 | __AARCH64_INSN_FUNCS(mrs,	0xFFF00000, 0xD5300000)
378 | __AARCH64_INSN_FUNCS(msr_imm,	0xFFF8F01F, 0xD500401F)
379 | __AARCH64_INSN_FUNCS(msr_reg,	0xFFF00000, 0xD5100000)
380 | __AARCH64_INSN_FUNCS(dmb,	0xFFFFF0FF, 0xD50330BF)
381 | __AARCH64_INSN_FUNCS(dsb_base,	0xFFFFF0FF, 0xD503309F)
382 | __AARCH64_INSN_FUNCS(dsb_nxs,	0xFFFFF3FF, 0xD503323F)
383 | __AARCH64_INSN_FUNCS(isb,	0xFFFFF0FF, 0xD50330DF)
384 | __AARCH64_INSN_FUNCS(sb,	0xFFFFFFFF, 0xD50330FF)
385 | __AARCH64_INSN_FUNCS(clrex,	0xFFFFF0FF, 0xD503305F)
386 | __AARCH64_INSN_FUNCS(ssbb,	0xFFFFFFFF, 0xD503309F)
387 | __AARCH64_INSN_FUNCS(pssbb,	0xFFFFFFFF, 0xD503349F)
388 | 
389 | #undef	__AARCH64_INSN_FUNCS
390 | 
391 | bool aarch64_insn_is_steppable_hint(u32 insn);
392 | bool aarch64_insn_is_branch_imm(u32 insn);
393 | 
394 | static inline bool aarch64_insn_is_adr_adrp(u32 insn)
395 | {
396 | 	return aarch64_insn_is_adr(insn) || aarch64_insn_is_adrp(insn);
397 | }
398 | 
399 | static inline bool aarch64_insn_is_dsb(u32 insn)
400 | {
401 | 	return aarch64_insn_is_dsb_base(insn) || aarch64_insn_is_dsb_nxs(insn);
402 | }
403 | 
404 | static inline bool aarch64_insn_is_barrier(u32 insn)
405 | {
406 | 	return aarch64_insn_is_dmb(insn) || aarch64_insn_is_dsb(insn) ||
407 | 	       aarch64_insn_is_isb(insn) || aarch64_insn_is_sb(insn) ||
408 | 	       aarch64_insn_is_clrex(insn) || aarch64_insn_is_ssbb(insn) ||
409 | 	       aarch64_insn_is_pssbb(insn);
410 | }
411 | 
412 | static inline bool aarch64_insn_is_store_single(u32 insn)
413 | {
414 | 	return aarch64_insn_is_store_imm(insn) ||
415 | 	       aarch64_insn_is_store_pre(insn) ||
416 | 	       aarch64_insn_is_store_post(insn);
417 | }
418 | 
419 | static inline bool aarch64_insn_is_store_pair(u32 insn)
420 | {
421 | 	return aarch64_insn_is_stp(insn) ||
422 | 	       aarch64_insn_is_stp_pre(insn) ||
423 | 	       aarch64_insn_is_stp_post(insn);
424 | }
425 | 
426 | static inline bool aarch64_insn_is_load_single(u32 insn)
427 | {
428 | 	return aarch64_insn_is_load_imm(insn) ||
429 | 	       aarch64_insn_is_load_pre(insn) ||
430 | 	       aarch64_insn_is_load_post(insn);
431 | }
432 | 
433 | static inline bool aarch64_insn_is_load_pair(u32 insn)
434 | {
435 | 	return aarch64_insn_is_ldp(insn) ||
436 | 	       aarch64_insn_is_ldp_pre(insn) ||
437 | 	       aarch64_insn_is_ldp_post(insn);
438 | }
439 | 
440 | enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
441 | bool aarch64_insn_uses_literal(u32 insn);
442 | bool aarch64_insn_is_branch(u32 insn);
443 | u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn);
444 | u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
445 | 				  u32 insn, u64 imm);
446 | u32 aarch64_insn_decode_register(enum aarch64_insn_register_type type,
447 | 					 u32 insn);
448 | u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
449 | 				enum aarch64_insn_branch_type type);
450 | u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
451 | 				     enum aarch64_insn_register reg,
452 | 				     enum aarch64_insn_variant variant,
453 | 				     enum aarch64_insn_branch_type type);
454 | u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
455 | 				     enum aarch64_insn_condition cond);
456 | u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op);
457 | u32 aarch64_insn_gen_nop(void);
458 | u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg,
459 | 				enum aarch64_insn_branch_type type);
460 | u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
461 | 				    enum aarch64_insn_register base,
462 | 				    enum aarch64_insn_register offset,
463 | 				    enum aarch64_insn_size_type size,
464 | 				    enum aarch64_insn_ldst_type type);
465 | u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
466 | 				     enum aarch64_insn_register reg2,
467 | 				     enum aarch64_insn_register base,
468 | 				     int offset,
469 | 				     enum aarch64_insn_variant variant,
470 | 				     enum aarch64_insn_ldst_type type);
471 | u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
472 | 				   enum aarch64_insn_register base,
473 | 				   enum aarch64_insn_register state,
474 | 				   enum aarch64_insn_size_type size,
475 | 				   enum aarch64_insn_ldst_type type);
476 | u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
477 | 			   enum aarch64_insn_register address,
478 | 			   enum aarch64_insn_register value,
479 | 			   enum aarch64_insn_size_type size);
480 | u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
481 | 			   enum aarch64_insn_register value,
482 | 			   enum aarch64_insn_size_type size);
483 | u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
484 | 				 enum aarch64_insn_register src,
485 | 				 int imm, enum aarch64_insn_variant variant,
486 | 				 enum aarch64_insn_adsb_type type);
487 | u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
488 | 			 enum aarch64_insn_register reg,
489 | 			 enum aarch64_insn_adr_type type);
490 | u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
491 | 			      enum aarch64_insn_register src,
492 | 			      int immr, int imms,
493 | 			      enum aarch64_insn_variant variant,
494 | 			      enum aarch64_insn_bitfield_type type);
495 | u32 aarch64_insn_gen_movewide(enum aarch64_insn_register dst,
496 | 			      int imm, int shift,
497 | 			      enum aarch64_insn_variant variant,
498 | 			      enum aarch64_insn_movewide_type type);
499 | u32 aarch64_insn_gen_add_sub_shifted_reg(enum aarch64_insn_register dst,
500 | 					 enum aarch64_insn_register src,
501 | 					 enum aarch64_insn_register reg,
502 | 					 int shift,
503 | 					 enum aarch64_insn_variant variant,
504 | 					 enum aarch64_insn_adsb_type type);
505 | u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst,
506 | 			   enum aarch64_insn_register src,
507 | 			   enum aarch64_insn_variant variant,
508 | 			   enum aarch64_insn_data1_type type);
509 | u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst,
510 | 			   enum aarch64_insn_register src,
511 | 			   enum aarch64_insn_register reg,
512 | 			   enum aarch64_insn_variant variant,
513 | 			   enum aarch64_insn_data2_type type);
514 | u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst,
515 | 			   enum aarch64_insn_register src,
516 | 			   enum aarch64_insn_register reg1,
517 | 			   enum aarch64_insn_register reg2,
518 | 			   enum aarch64_insn_variant variant,
519 | 			   enum aarch64_insn_data3_type type);
520 | u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
521 | 					 enum aarch64_insn_register src,
522 | 					 enum aarch64_insn_register reg,
523 | 					 int shift,
524 | 					 enum aarch64_insn_variant variant,
525 | 					 enum aarch64_insn_logic_type type);
526 | u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst,
527 | 			      enum aarch64_insn_register src,
528 | 			      enum aarch64_insn_variant variant);
529 | u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
530 | 				       enum aarch64_insn_variant variant,
531 | 				       enum aarch64_insn_register Rn,
532 | 				       enum aarch64_insn_register Rd,
533 | 				       u64 imm);
534 | u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
535 | 			  enum aarch64_insn_register Rm,
536 | 			  enum aarch64_insn_register Rn,
537 | 			  enum aarch64_insn_register Rd,
538 | 			  u8 lsb);
539 | u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
540 | 			      enum aarch64_insn_prfm_type type,
541 | 			      enum aarch64_insn_prfm_target target,
542 | 			      enum aarch64_insn_prfm_policy policy);
543 | s32 aarch64_get_branch_offset(u32 insn);
544 | u32 aarch64_set_branch_offset(u32 insn, s32 offset);
545 | 
546 | s32 aarch64_insn_adrp_get_offset(u32 insn);
547 | u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset);
548 | 
549 | bool aarch32_insn_is_wide(u32 insn);
550 | 
551 | #define A32_RN_OFFSET	16
552 | #define A32_RT_OFFSET	12
553 | #define A32_RT2_OFFSET	 0
554 | 
555 | u32 aarch64_insn_extract_system_reg(u32 insn);
556 | u32 aarch32_insn_extract_reg_num(u32 insn, int offset);
557 | u32 aarch32_insn_mcr_extract_opc2(u32 insn);
558 | u32 aarch32_insn_mcr_extract_crm(u32 insn);
559 | 
560 | typedef bool (pstate_check_t)(unsigned long);
561 | extern pstate_check_t * const aarch32_opcode_cond_checks[16];
562 | 
563 | #endif /* __ASSEMBLY__ */
564 | 
565 | #endif	/* __ASM_INSN_H */
566 | 


--------------------------------------------------------------------------------
/macro.inc:
--------------------------------------------------------------------------------
 1 | // load 'far' address as a +/-4GB offset from PC
 2 | .macro adrf Xn:req, addr:req
 3 | 	adrp	\Xn, \addr
 4 | 	add	\Xn, \Xn, :lo12:\addr
 5 | .endm
 6 | 
 7 | // load a 32-bit immediate
 8 | .macro movl Wn:req, imm:req
 9 | 	.if (\imm) > 0xffffffff
10 | 	.error "Immediate out of range"
11 | 	.endif
12 | 	.if (\imm) & 0xffff
13 | 	movz	\Wn, (\imm) & 0xffff
14 | 	.if (\imm) & (0xffff << 16)
15 | 	movk	\Wn, ((\imm) >> 16) & 0xffff, lsl 16
16 | 	.endif
17 | 	.elseif (\imm) & (0xffff << 16)
18 | 	movz	\Wn, ((\imm) >> 16) & 0xffff, lsl 16
19 | 	.else
20 | 	mov	\Wn, wzr
21 | 	.endif
22 | .endm
23 | 
24 | .macro __tail_movq Xn:req, imm:req
25 | 	.irp iter,1,2,3
26 | 	.if (\imm) & (0xffff << (\iter * 16))
27 | 	movk	\Xn, ((\imm) >> (\iter * 16)) & 0xffff, lsl \iter * 16
28 | 	.endif
29 | 	.endr
30 | .endm
31 | 
32 | // load a 64-bit immediate
33 | .macro movq Xn:req, imm:req
34 | 	.if (\imm) & 0xffff
35 | 	movz	\Xn, (\imm) & 0xffff
36 | 	__tail_movq \Xn, "((\imm) & ~0xffff)"
37 | 	.elseif (\imm) & (0xffff << 16)
38 | 	movz	\Xn, ((\imm) >> 16) & 0xffff, lsl 16
39 | 	__tail_movq \Xn, "((\imm) & ~0xffffffff)"
40 | 	.elseif (\imm) & (0xffff << 32)
41 | 	movz	\Xn, ((\imm) >> 32) & 0xffff, lsl 32
42 | 	__tail_movq \Xn, "((\imm) & ~0xffffffffffff)"
43 | 	.elseif (\imm) & (0xffff << 48)
44 | 	movz	\Xn, ((\imm) >> 48) & 0xffff, lsl 48
45 | 	.else
46 | 	mov     \Xn, xzr
47 | 	.endif
48 | .endm
49 | 


--------------------------------------------------------------------------------
/reloc.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Loading and relocation of relocatable ELF objects (REL)
  3 |  *
  4 |  * Copyright (C) 2021 Martin Krastev <blu.dark@gmail.com>
  5 |  */
  6 | 
  7 | #include <stdio.h>
  8 | #include <stdlib.h>
  9 | #include <string.h>
 10 | #if __APPLE__ != 0
 11 | #include <libelf/libelf.h>
 12 | #else
 13 | #include <libelf.h>
 14 | #endif
 15 | #include <fcntl.h>
 16 | #include <stddef.h>
 17 | #include <stdint.h>
 18 | #include <errno.h>
 19 | #include <unistd.h>
 20 | #include <sys/stat.h>
 21 | #include <sys/mman.h>
 22 | 
 23 | #if __APPLE__ != 0
 24 | #include "char_ptr_arr.h"
 25 | #else
 26 | #include "vma.h"
 27 | #endif
 28 | 
 29 | #if __APPLE__ != 0
 30 | #ifndef EM_AARCH64
 31 | #define EM_AARCH64 183
 32 | #endif
 33 | 
 34 | #ifndef MAP_POPULATE
 35 | #define MAP_POPULATE 0
 36 | #endif
 37 | 
 38 | typedef Elf64_Half Elf64_Section;
 39 | #endif
 40 | 
 41 | int apply_relocate_add(Elf64_Shdr **sechdrs,
 42 |                        unsigned int symsec,
 43 |                        unsigned int relsec);
 44 | 
 45 | /* Following code based on IBM s390 ELF relocation sample */
 46 | /* https://www.ibm.com/docs/en/zos/2.2.0?topic=file-example-relocating-addresses-within-elf */
 47 | 
 48 | typedef uint64_t uint64;
 49 | typedef int64_t  int64;
 50 | 
 51 | /*  ELF file details
 52 | */
 53 | typedef struct ElfDetails_s {
 54 | 	Elf*                ed_elf;          /* ->ELF instance for CU          */
 55 | 
 56 | 	/* ELF Section details                                                 */
 57 | 	Elf64_Shdr**        ed_shdrs;        /* List of ->ELF section header   */
 58 | 	uint64              ed_n_elf_scns;   /* Number of ELF sections         */
 59 | 
 60 | 	Elf64_Section       ed_text_idx;     /* .text section index            */
 61 | 	Elf64_Section       ed_rel_text_idx; /* .rel.text section index        */
 62 | 	Elf64_Section       ed_rela_text_idx;/* .rela.text section index       */
 63 | 	Elf64_Section       ed_symtab_idx;   /* .symtab section index          */
 64 | 	Elf64_Section       ed_strtab_idx;   /* .strtab section index          */
 65 | } *ElfDetails;
 66 | 
 67 | const char *str_from_st_type(uint8_t x)
 68 | {
 69 | 	switch (x) {
 70 | 	case STT_NOTYPE:
 71 | 		return "STT_NOTYPE";
 72 | 	case STT_OBJECT:
 73 | 		return "STT_OBJECT";
 74 | 	case STT_FUNC:
 75 | 		return "STT_FUNC";
 76 | 	case STT_SECTION:
 77 | 		return "STT_SECTION";
 78 | 	case STT_FILE:
 79 | 		return "STT_FILE";
 80 | 	case STT_COMMON:
 81 | 		return "STT_COMMON";
 82 | 	case STT_TLS:
 83 | 		return "STT_TLS";
 84 | 	case STT_NUM:
 85 | 		return "STT_NUM";
 86 | 	case STT_LOOS:
 87 | 		return "STT_LOOS";
 88 | #if 0
 89 | 	case STT_GNU_IFUNC:
 90 | 		return "STT_GNU_IFUNC";
 91 | #endif
 92 | 	case STT_HIOS:
 93 | 		return "STT_HIOS";
 94 | 	case STT_LOPROC:
 95 | 		return "STT_LOPROC";
 96 | 	case STT_HIPROC:
 97 | 		return "STT_HIPROC";
 98 | 	}
 99 | 	return "unknown_st_type";
100 | }
101 | 
102 | const char *str_from_st_bind(uint8_t x)
103 | {
104 | 	switch (x) {
105 | 	case STB_LOCAL:
106 | 		return "STB_LOCAL";
107 | 	case STB_GLOBAL:
108 | 		return "STB_GLOBAL";
109 | 	case STB_WEAK:
110 | 		return "STB_WEAK";
111 | 	case STB_NUM:
112 | 		return "STB_NUM";
113 | 	case STB_LOOS:
114 | 		return "STB_LOOS";
115 | #if 0
116 | 	case STB_GNU_UNIQUE:
117 | 		return "STB_GNU_UNIQUE";
118 | #endif
119 | 	case STB_HIOS:
120 | 		return "STB_HIOS";
121 | 	case STB_LOPROC:
122 | 		return "STB_LOPROC";
123 | 	case STB_HIPROC:
124 | 		return "STB_HIPROC";
125 | 	}
126 | 	return "unknown_st_bind";
127 | }
128 | 
129 | const char *str_from_sh_name(
130 | 	Elf64_Word name,
131 | 	Elf *elf)
132 | {
133 | 	return elf_strptr(elf,
134 | 		elf64_getehdr(elf)->e_shstrndx, name);
135 | }
136 | 
137 | const char *str_from_st_shndx(
138 | 	Elf64_Section shndx,
139 | 	Elf *elf)
140 | {
141 | 	if (shndx != SHN_UNDEF && shndx < SHN_LORESERVE)
142 | 		return elf_strptr(elf,
143 | 			elf64_getehdr(elf)->e_shstrndx,
144 | 			elf64_getshdr(elf_getscn(elf, shndx))->sh_name);
145 | 
146 | 	switch (shndx) {
147 | 	case SHN_UNDEF:
148 | 		return "SHN_UNDEF";
149 | 	case SHN_ABS:
150 | 		return "SHN_ABS";
151 | 	case SHN_COMMON:
152 | 		return "SHN_COMMON";
153 | 	case SHN_XINDEX:
154 | 		return "SHN_XINDEX";
155 | 	}
156 | 
157 | 	return "<unknown section>";
158 | }
159 | 
160 | /* Seek a symbol by name in a previously loaded REL; don't try to resolve section
161 |    names, as Elf64_Ehdr.e_shstrndx has been repurposed as e_strtabndx */
162 | static Elf64_Sym *seek_prev_symbol(Elf *elf, const char *name)
163 | {
164 | 	Elf_Scn *scn;
165 | 	Elf64_Shdr *shdr64;
166 | 
167 | 	if (elf == NULL) {
168 | 		return NULL;
169 | 	}
170 | 
171 | 	/* Enumerate the ELF sections, seeking for .symtab */
172 | 	scn = NULL;
173 | 
174 | 	while ((scn = elf_nextscn(elf, scn)) != NULL) {
175 | 		shdr64 = elf64_getshdr(scn);
176 | 
177 | 		if (shdr64->sh_type == SHT_SYMTAB) {
178 | 			uint64 n_symbols, i;
179 | 			Elf64_Sym *symtab;
180 | 
181 | 			n_symbols = shdr64->sh_size / sizeof(*symtab);
182 | 
183 | 			/* Process the .symtab section, skipping the first dummy */
184 | 			symtab = (Elf64_Sym*)shdr64->sh_addr;
185 | 			symtab++;
186 | 
187 | 			for (i = 1; i < n_symbols; i++, symtab++) {
188 | 				if (symtab->st_shndx != SHN_UNDEF &&
189 | 					ELF64_ST_TYPE(symtab->st_info) != STT_SECTION &&
190 | 					ELF64_ST_BIND(symtab->st_info) == STB_GLOBAL) {
191 | 					/* Elf64_Ehdr.e_shstrndx has been repurposed as e_strtabndx;
192 | 					   getting a section name actually gives us a symbol name */
193 | 					const char *sym_name = str_from_sh_name(symtab->st_name, elf);
194 | 
195 | 					if (strcmp(sym_name, name) == 0) {
196 | 						return symtab;
197 | 					}
198 | 				}
199 | 			}
200 | 			/* More than one SHT_SYMTAB section is not supported */
201 | 			break;
202 | 		}
203 | 	}
204 | 
205 | 	/* Fall back to earlier RELs */
206 | 	return seek_prev_symbol((Elf *)elf64_getehdr(elf)->e_entry, name);
207 | }
208 | 
209 | /* Process 64-bit ELF symbol table
210 | */
211 | static int
212 | 	_load_elf64_symbol_table(
213 | 		ElfDetails details)
214 | {
215 | 	Elf *elf;
216 | 	Elf64_Sym *symtab;
217 | 	uint64 n_symbols, i;
218 | 
219 | 	elf = details->ed_elf;
220 | 	if (elf == NULL) {
221 | 		return -1;
222 | 	}
223 | 
224 | 	n_symbols = (details->ed_shdrs[details->ed_symtab_idx]->sh_size) / sizeof(*symtab);
225 | 	if (n_symbols == 0) {
226 | 		return -1;
227 | 	}
228 | 
229 | 	/* Process the .symtab section, skipping the first dummy */
230 | 	symtab = (Elf64_Sym*)(details->ed_shdrs[details->ed_symtab_idx]->sh_addr);
231 | 	symtab++;
232 | 
233 | 	for (i = 1; i < n_symbols; i++, symtab++) {
234 | 		if (ELF64_ST_TYPE(symtab->st_info) == STT_SECTION) {
235 | 			/* Section symbols cannot index anything else but their respective sections */
236 | 			if (symtab->st_shndx == SHN_UNDEF || symtab->st_shndx >= SHN_LORESERVE) {
237 | 				return -1;
238 | 			}
239 | 			if (symtab->st_shndx >= details->ed_n_elf_scns) {
240 | 				return -1;
241 | 			}
242 | 
243 | 			symtab->st_value = details->ed_shdrs[symtab->st_shndx]->sh_addr;
244 | 		}
245 | 		else if (symtab->st_shndx != SHN_UNDEF && symtab->st_shndx < SHN_LORESERVE) {
246 | 			/* Non-section symbols without special indices must index valid sections */
247 | 			if (symtab->st_shndx >= details->ed_n_elf_scns) {
248 | 				return -1;
249 | 			}
250 | 
251 | 			symtab->st_value += details->ed_shdrs[symtab->st_shndx]->sh_addr;
252 | 		}
253 | 		else if (symtab->st_shndx == SHN_UNDEF && ELF64_ST_BIND(symtab->st_info) == STB_GLOBAL) {
254 | 			/* Seek undefined symbols from this REL in previous RELs */
255 | 			const char *name = elf_strptr(elf, details->ed_strtab_idx, symtab->st_name);
256 | 			const Elf64_Sym *prev_symtab = seek_prev_symbol((Elf *)elf64_getehdr(elf)->e_entry, name);
257 | 
258 | 			if (prev_symtab == NULL) {
259 | 				fprintf(stderr, "error: undefined symbol '%s'\n", name);
260 | 				return -1;
261 | 			}
262 | 
263 | 			symtab->st_value = prev_symtab->st_value;
264 | 		}
265 | 	}
266 | 
267 | 	return 0;
268 | }
269 | 
270 | enum {
271 | 	REL_CAPS_RW_SECTIONS = 1U, /* REL has read-Write SHT_PROGBITS sections */
272 | 	REL_CAPS_RO_SECTIONS = 2U, /* REL has read-only SHT_PROGBITS sections */
273 | 	REL_CAPS_RX_SECTIONS = 4U  /* REL has read-exec SHT_PROGBITS sections */
274 | };
275 | 
276 | /* Load ELF file section and symbol tables; relocate sections and symbols based on loading/mapping VA
277 | */
278 | static int
279 | 	_load_elf_file_details(
280 | 		Elf *elf,
281 | 		ElfDetails *ret_details,
282 | 		unsigned *ret_caps,
283 | 		void *rawdata_rw,
284 | 		void *rawdata_ro)
285 | {
286 | 	ElfDetails details;
287 | 	char *ehdr_ident;
288 | 	Elf64_Ehdr *ehdr64;
289 | 	Elf64_Shdr *shdr64;
290 | 	Elf_Scn *scn;
291 | 	const char *scn_name;
292 | 	Elf64_Shdr **section_list;
293 | 	size_t scn_idx, n_elf_scns;
294 | 	Elf64_Section shstrtab_idx = 0;
295 | 	Elf64_Section symtab_idx = 0;
296 | 	Elf64_Section strtab_idx = 0;
297 | 	Elf64_Section rodata_idx = 0;
298 | 	Elf64_Section data_idx = 0;
299 | 	Elf64_Section bss_idx = 0;
300 | 	Elf64_Section text_idx = 0;
301 | 	Elf64_Section rel_text_idx = 0;
302 | 	Elf64_Section rela_text_idx = 0;
303 | 	unsigned caps = 0;
304 | 	int rc;
305 | 
306 | 	/* Determine if 64-bit or 32-bit ELF file */
307 | 	if ((ehdr_ident = elf_getident(elf, NULL)) == NULL) {
308 | 		return -1;
309 | 	}
310 | 
311 | 	if (ehdr_ident[EI_CLASS] != ELFCLASS64) {
312 | 		return -1;
313 | 	}
314 | 
315 | 	/* Access the ELF file header */
316 | 	if ((ehdr64 = elf64_getehdr(elf)) == NULL) {
317 | 		return -1;
318 | 	}
319 | 
320 | 	/* Validate the ELF type */
321 | 	if (ehdr64->e_type != ET_REL) {
322 | 		return -1;
323 | 	}
324 | 
325 | 	/* Validate machine type */
326 | 	if (ehdr64->e_machine != EM_AARCH64) {
327 | 		return -1;
328 | 	}
329 | 
330 | 	n_elf_scns   = ehdr64->e_shnum;
331 | 	shstrtab_idx = ehdr64->e_shstrndx;
332 | 
333 | 	/* Allocate the new ElfDetails object */
334 | 	if (n_elf_scns == 0) {
335 | 		return -1;
336 | 	}
337 | 
338 | 	details = (ElfDetails) calloc(sizeof(*details), 1);
339 | 	if (details == NULL) {
340 | 		return -2; /* out of memory */
341 | 	}
342 | 
343 | 	/* Initialize the new object */
344 | 	details->ed_elf          = elf;
345 | 	details->ed_n_elf_scns   = n_elf_scns;
346 | 
347 | 	/* Allocate list object (array of Elf64_Shdr*) for the ELF sections */
348 | 	section_list = (Elf64_Shdr**) calloc(sizeof(*section_list), n_elf_scns);
349 | 	if (section_list == NULL) {
350 | 		return -2; /* out of memory */
351 | 	}
352 | 	details->ed_shdrs = section_list;
353 | 
354 | 	/* Enumerate the ELF sections and compute their mapping addresses */
355 | 	scn_idx = 0;
356 | 	scn = NULL;
357 | 
358 | 	while ((scn = elf_nextscn(elf, scn)) != NULL) {
359 | 		scn_idx = elf_ndxscn(scn);
360 | 
361 | 		if (scn_idx >= n_elf_scns) {
362 | 			return -1;
363 | 		}
364 | 
365 | 		if ((shdr64 = elf64_getshdr(scn)) == NULL) {
366 | 			return -1;
367 | 		}
368 | 
369 | 		section_list[scn_idx] = shdr64;
370 | 
371 | 		if ((scn_name = elf_strptr(elf, shstrtab_idx, shdr64->sh_name)) == NULL) {
372 | 			return -1;
373 | 		}
374 | 
375 | 		if (strcmp(scn_name,".text") == 0) {
376 | 			if (shdr64->sh_type != SHT_PROGBITS) {
377 | 				return -1;
378 | 			}
379 | 			/* Validate there is only one .text section */
380 | 			if (text_idx != 0) {
381 | 				return -1;
382 | 			}
383 | 			details->ed_text_idx = text_idx = scn_idx;
384 | 		}
385 | 		else if (strcmp(scn_name,".rodata") == 0) {
386 | 			if (shdr64->sh_type != SHT_PROGBITS) {
387 | 				return -1;
388 | 			}
389 | 			/* Validate there is only one .rodata section */
390 | 			if (rodata_idx != 0) {
391 | 				return -1;
392 | 			}
393 | 			rodata_idx = scn_idx;
394 | 		}
395 | 		else if (strcmp(scn_name,".data") == 0) {
396 | 			if (shdr64->sh_type != SHT_PROGBITS) {
397 | 				return -1;
398 | 			}
399 | 			/* Validate there is only one .data section */
400 | 			if (data_idx != 0) {
401 | 				return -1;
402 | 			}
403 | 			data_idx = scn_idx;
404 | 		}
405 | 		else if (strcmp(scn_name, ".bss") == 0) {
406 | 			if (shdr64->sh_type != SHT_NOBITS) {
407 | 				return -1;
408 | 			}
409 | 			/* Validate there is only one .bss section */
410 | 			if (bss_idx != 0) {
411 | 				return -1;
412 | 			}
413 | 			bss_idx = scn_idx;
414 | 		}
415 | 		else if (strcmp(scn_name,".rel.text") == 0) {
416 | 			if (shdr64->sh_type != SHT_REL) {
417 | 				return -1;
418 | 			}
419 | 			/* Validate there is only one .rel.text section */
420 | 			if (rel_text_idx != 0) {
421 | 				return -1;
422 | 			}
423 | 			details->ed_rel_text_idx = rel_text_idx = scn_idx;
424 | 		}
425 | 		else if (strcmp(scn_name,".rela.text") == 0) {
426 | 			if (shdr64->sh_type != SHT_RELA) {
427 | 				return -1;
428 | 			}
429 | 			/* Validate there is only one .rela.text section */
430 | 			if (rela_text_idx != 0) {
431 | 				return -1;
432 | 			}
433 | 			details->ed_rela_text_idx = rela_text_idx = scn_idx;
434 | 		}
435 | 		else if (strcmp(scn_name,".symtab") == 0) {
436 | 			if (shdr64->sh_type != SHT_SYMTAB) {
437 | 				return -1;
438 | 			}
439 | 			/* Validate there is only one .symtab section */
440 | 			if (symtab_idx != 0) {
441 | 				return -1;
442 | 			}
443 | 			details->ed_symtab_idx = symtab_idx = scn_idx;
444 | 		}
445 | 		else if (strcmp(scn_name,".strtab") == 0) {
446 | 			if (shdr64->sh_type != SHT_STRTAB) {
447 | 				return -1;
448 | 			}
449 | 			/* Validate there is only one .strtab section */
450 | 			if (strtab_idx != 0) {
451 | 				return -1;
452 | 			}
453 | 			details->ed_strtab_idx = strtab_idx = scn_idx;
454 | 		}
455 | 		else if (strcmp(scn_name,".shstrtab") == 0) {
456 | 			if (shdr64->sh_type != SHT_STRTAB) {
457 | 				return -1;
458 | 			}
459 | 			/* Validate there is only one .shstrtab section */
460 | 			if (shstrtab_idx != scn_idx) {
461 | 				return -1;
462 | 			}
463 | 		}
464 | 
465 | 		/* Resolve the VA of non-empty ELF section data */
466 | 		if (shdr64->sh_size != 0) {
467 | 			/* Section .bss does not have file backing */
468 | 			if (scn_idx == bss_idx) {
469 | 				const int prot_rw = PROT_READ | PROT_WRITE;
470 | 				const int flag_priv_anon = MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE;
471 | 
472 | 				void *p = mmap(NULL, shdr64->sh_size, prot_rw, flag_priv_anon, -1, 0);
473 | 
474 | 				if (p == MAP_FAILED) {
475 | 					fprintf(stderr, "error: cannot mmap bss\n");
476 | 					return -1;
477 | 				}
478 | 
479 | 				shdr64->sh_addr = (Elf64_Addr)p;
480 | 			} else if (scn_idx == text_idx) {
481 | 				shdr64->sh_addr = (Elf64_Addr)(rawdata_ro + shdr64->sh_offset);
482 | 				caps |= REL_CAPS_RX_SECTIONS;
483 | 			} else if (scn_idx == rodata_idx) {
484 | 				shdr64->sh_addr = (Elf64_Addr)(rawdata_ro + shdr64->sh_offset);
485 | 				caps |= REL_CAPS_RO_SECTIONS;
486 | 			} else {
487 | 				shdr64->sh_addr = (Elf64_Addr)(rawdata_rw + shdr64->sh_offset);
488 | 				if (shdr64->sh_type == SHT_PROGBITS) {
489 | 					caps |= REL_CAPS_RW_SECTIONS;
490 | 				}
491 | 			}
492 | 		}
493 | 	}
494 | 
495 | 	/* Ensure the file has all required sections */
496 | 	if (text_idx     == 0 ||
497 | 	    symtab_idx   == 0 ||
498 | 	    strtab_idx   == 0 ||
499 | 	    shstrtab_idx == 0) {
500 | 		return -1;
501 | 	}
502 | 
503 | 	/* Process the symbol table from the ELF .symtab section */
504 | 	rc = _load_elf64_symbol_table(details);
505 | 	if (rc) return rc;
506 | 
507 | 	/* Return the ElfDetails object to the caller */
508 | 	*ret_details = details;
509 | 	*ret_caps = caps;
510 | 
511 | 	return 0;
512 | }
513 | 
514 | /* Terminate ELF loader processing, release resources
515 | */
516 | static int
517 | 	_load_elf_term(
518 | 		ElfDetails details)
519 | {
520 | 	if (details == NULL) {
521 | 		return 0;
522 | 	}
523 | 
524 | 	if (details->ed_shdrs != NULL) {
525 | 		free(details->ed_shdrs);
526 | 	}
527 | 
528 | 	free(details);
529 | 
530 | 	return 0;
531 | }
532 | 
533 | /* Load ELF file, relocate based on loading/mapping VA, and print symtab
534 | */
535 | static int
536 | 	relocate_elf_load_cu(
537 | 		Elf *elf,
538 | 		void **start,
539 | 		unsigned *caps,
540 | 		void *rawdata_rw,
541 | 		void *rawdata_ro,
542 | 		int flag_quiet)
543 | {
544 | 	ElfDetails details = NULL;
545 | 	Elf64_Sym *symtab;
546 | 	uint64 n_symbols, i;
547 | 	int rc;
548 | 
549 | 	if (!elf || !start || !caps || !rawdata_rw || !rawdata_ro) {
550 | 		return -1;
551 | 	}
552 | 
553 | 	/* Load ELF file section and symbol tables */
554 | 	rc = _load_elf_file_details(elf, &details, caps, rawdata_rw, rawdata_ro);
555 | 	if (rc)
556 | 		goto term;
557 | 
558 | 	/* Print all symbols, except the first dummy */
559 | 	symtab = (Elf64_Sym*)(details->ed_shdrs[details->ed_symtab_idx]->sh_addr);
560 | 	symtab++;
561 | 	n_symbols = (details->ed_shdrs[details->ed_symtab_idx]->sh_size) / sizeof(*symtab);
562 | 
563 | 	if (!flag_quiet) {
564 | 		printf("    symtab_value____ symtab_type__ symtab_bind___ symtab_section___ symtab_name__\n");
565 | 	}
566 | 
567 | 	for (i = 1; i < n_symbols; i++, symtab++) {
568 | 		const char *name = "_____________";
569 | 
570 | 		/* Resolve the symbol name */
571 | 		if (symtab->st_name == 0) {
572 | 			if (ELF64_ST_TYPE(symtab->st_info) == STT_SECTION) {
573 | 				name = str_from_st_shndx(symtab->st_shndx, elf);
574 | 			}
575 | 		}
576 | 		else {
577 | 			name = elf_strptr(details->ed_elf, details->ed_strtab_idx, symtab->st_name);
578 | 
579 | 			if (symtab->st_shndx == details->ed_text_idx && strcmp(name, "_start") == 0) {
580 | 				if (*start != NULL) {
581 | 					fprintf(stderr, "error: multiple _start\n");
582 | 					rc = -1;
583 | 					goto term;
584 | 				}
585 | 				*start = (void *)symtab->st_value;
586 | 			}
587 | 		}
588 | 
589 | 		if (!flag_quiet) {
590 | 			printf("%2lu: %016lx %-13s %-14s %-17s %s\n",
591 | 				i,
592 | 				symtab->st_value,
593 | 				str_from_st_type(ELF64_ST_TYPE(symtab->st_info)),
594 | 				str_from_st_bind(ELF64_ST_BIND(symtab->st_info)),
595 | 				str_from_st_shndx(symtab->st_shndx, elf),
596 | 				name);
597 | 		}
598 | 	}
599 | 
600 | 	/* Apply any SHT_REL relocations */
601 | 	if (details->ed_rel_text_idx != 0) {
602 | 		fprintf(stderr, "error: cannot handle SHT_REL section\n");
603 | 		rc = -1;
604 | 		goto term;
605 | 	}
606 | 
607 | 	/* Apply any SHT_RELA relocations */
608 | 	if (details->ed_rela_text_idx != 0) {
609 | 		rc = apply_relocate_add(details->ed_shdrs,
610 | 				details->ed_symtab_idx,
611 | 				details->ed_rela_text_idx);
612 | 	}
613 | 
614 | 	/* Repurpose Elf64_Ehdr.e_shstrndx as e_strtabndx */
615 | 	elf64_getehdr(elf)->e_shstrndx = details->ed_strtab_idx;
616 | 
617 | term:
618 | 	/* Remove temporary tables */
619 | 	_load_elf_term(details);
620 | 
621 | 	return rc;
622 | }
623 | 
624 | static void print_usage(char **argv)
625 | {
626 | 	printf("usage: %s <elf_rel_file> [<elf_rel_file>] ..\n"
627 | #if __linux__ != 0
628 | 	       "\t--filter <string> : filter file mappings containing the specified string\n"
629 | #endif
630 | 	       "\t--quiet           : suppress all reports\n"
631 | 	       "\t--break           : raise SIGTRAP before passing control to REL\n"
632 | 	       "\t--help            : this message\n", argv[0]);
633 | }
634 | 
635 | struct rel_info_t {
636 | 	char *name;   /* File name */
637 | 	void *vma_rw; /* Ptr to RW VMA */
638 | 	size_t size;  /* File size */
639 | };
640 | 
641 | int main(int argc, char **argv)
642 | {
643 | 	size_t areas_capacity = 0, objs_capacity = 0;
644 | #if __linux__ != 0
645 | 	struct char_ptr_arr_t areas = { .count = 0, .arr = NULL };
646 | #endif
647 | 	struct {
648 | 		size_t count;
649 | 		struct rel_info_t *arr;
650 | 	} objs = { .count = 0, .arr = NULL };
651 | 	Elf *prev_elf = NULL;
652 | 	void *start = NULL;
653 | 	int flag_quiet = 0;
654 | 	int flag_break = 0;
655 | 	size_t i;
656 | 
657 | 	if (argc == 1) {
658 | 		print_usage(argv);
659 | 		return -1;
660 | 	}
661 | 
662 | 	for (i = 1; i < argc; ++i) {
663 | 		if (!strcmp(argv[i], "--help")) {
664 | 			print_usage(argv);
665 | 			return 0;
666 | 		}
667 | 
668 | 		if (!strcmp(argv[i], "--quiet")) {
669 | 			flag_quiet = 1;
670 | 			continue;
671 | 		}
672 | 
673 | 		if (!strcmp(argv[i], "--break")) {
674 | 			flag_break = 1;
675 | 			continue;
676 | 		}
677 | 
678 | #if __linux__ != 0
679 | 		if (!strcmp(argv[i], "--filter")) {
680 | 			if (++i == argc) {
681 | 				print_usage(argv);
682 | 				return -1;
683 | 			}
684 | 			if (areas.count == areas_capacity) {
685 | 				areas.arr = (char **)realloc(areas.arr, sizeof(*areas.arr) * (areas_capacity = (areas_capacity + 1) * 2));
686 | 			}
687 | 			areas.arr[areas.count++] = argv[i];
688 | 			continue;
689 | 		}
690 | 
691 | #endif
692 | 		/* Unprefixed arg must be a file */
693 | 		if (objs.count == objs_capacity) {
694 | 			objs.arr = (struct rel_info_t *)realloc(objs.arr, sizeof(*objs.arr) * (objs_capacity = (objs_capacity + 1) * 2));
695 | 		}
696 | 
697 | 		objs.arr[objs.count++].name = argv[i];
698 | 	}
699 | 
700 | 	if (objs.count == 0) {
701 | 		print_usage(argv);
702 | 		return -1;
703 | 	}
704 | 
705 | 	elf_version(EV_CURRENT);
706 | 
707 | 	for (i = 0; i < objs.count; ++i) {
708 | 		struct stat sb;
709 | 		void *p, *q;
710 | 		Elf *elf;
711 | 		Elf64_Ehdr *ehdr64;
712 | 		unsigned caps;
713 | 
714 | 		const int fd = open(objs.arr[i].name, O_RDONLY);
715 | 
716 | 		if (fd < 0) {
717 | 			fprintf(stderr, "error: cannot open file\n");
718 | 			return -1;
719 | 		}
720 | 
721 | 		if (fstat(fd, &sb) < 0) {
722 | 			close(fd);
723 | 			fprintf(stderr, "error: cannot stat file\n");
724 | 			return -1;
725 | 		}
726 | 
727 | 		/* Get two distinct mappings to the same file -- first to be used for
728 | 		   writable sections, second -- for the read-only/exec sections; use
729 | 		   the first mapping for libelf purposes */
730 | 		p = mmap(NULL, sb.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
731 | 		q = mmap(NULL, sb.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
732 | 		close(fd);
733 | 
734 | 		if (p == MAP_FAILED || q == MAP_FAILED) {
735 | 			fprintf(stderr, "error: cannot mmap file\n");
736 | 			return -1;
737 | 		}
738 | 
739 | 		elf = elf_memory(p, sb.st_size);
740 | 
741 | 		if (elf == NULL) {
742 | 			fprintf(stderr, "error: cannot elf_memory\n");
743 | 			return -1;
744 | 		}
745 | 
746 | 		/* Elf64_Ehdr.e_entry is nil in a REL -- repurpose it to
747 | 		   form a linked list of all RELs loaded to this point */
748 | 		if ((ehdr64 = elf64_getehdr(elf)) == NULL) {
749 | 			return -1;
750 | 		}
751 | 
752 | 		ehdr64->e_entry = (Elf64_Addr)prev_elf;
753 | 		prev_elf = elf;
754 | 
755 | 		if (relocate_elf_load_cu(elf, &start, &caps, p, q, flag_quiet)) {
756 | 			fprintf(stderr, "error: cannot relocate_elf_load_cu\n");
757 | 			return -1;
758 | 		}
759 | 
760 | 		/* Finalize RO mapping depending on presence of RO SHT_PROGBITS */
761 | 		if (caps & REL_CAPS_RX_SECTIONS) {
762 | 			if (mprotect(q, sb.st_size, PROT_READ | PROT_EXEC)) {
763 | 				fprintf(stderr, "error: cannot mprotect\n");
764 | 				return -1;
765 | 			}
766 | 		} else if (caps & REL_CAPS_RO_SECTIONS) {
767 | 			if (mprotect(q, sb.st_size, PROT_READ)) {
768 | 				fprintf(stderr, "error: cannot mprotect\n");
769 | 				return -1;
770 | 			}
771 | 		} else {
772 | 			if (munmap(q, sb.st_size)) {
773 | 				fprintf(stderr, "error: cannot munmap\n");
774 | 				return -1;
775 | 			}
776 | 		}
777 | 
778 | 		/* Defer unmapping of RW mapping depending on presence of RW SHT_PROGBITS */
779 | 		if (caps & REL_CAPS_RW_SECTIONS) {
780 | 			objs.arr[i].vma_rw = NULL;
781 | 		} else {
782 | 			objs.arr[i].vma_rw = p;
783 | 			objs.arr[i].size = sb.st_size;
784 | 		}
785 | 	}
786 | 
787 | 	/* All SHN_UNDEFs have been processed -- unmap unneeded RW mappings */
788 | 	for (i = 0; i < objs.count; ++i) {
789 | 		if (objs.arr[i].vma_rw != NULL) {
790 | 			if (munmap(objs.arr[i].vma_rw, objs.arr[i].size)) {
791 | 				fprintf(stderr, "error: cannot munmap\n");
792 | 				return -1;
793 | 			}
794 | 		}
795 | 	}
796 | 
797 | #if __linux__ != 0
798 | 	if (areas.count && areas.arr != NULL)
799 | 		vma_process(&areas, flag_quiet);
800 | 
801 | 	/* Don't try to free anything from heap here as there may not be a heap */
802 | 
803 | #endif
804 | 	if (start != NULL) {
805 | 		if (flag_break) {
806 | 			__asm__ __volatile__ ("brk 42");
807 | 		}
808 | 		((void (*)(void))start)();
809 | 	}
810 | 
811 | 	return 0;
812 | }
813 | 


--------------------------------------------------------------------------------
/reloc_add_aarch64.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdint.h>
  3 | #if __APPLE__ != 0
  4 | #include <libelf/libelf.h>
  5 | #else
  6 | #include <libelf.h>
  7 | #endif
  8 | #include <errno.h>
  9 | 
 10 | /* Preparations for inclusion of some Linux kernel routines */
 11 | 
 12 | typedef char		bool;
 13 | typedef int8_t		s8;
 14 | typedef uint8_t		u8;
 15 | typedef int16_t		s16;
 16 | typedef uint16_t	u16;
 17 | typedef int32_t		s32;
 18 | typedef uint32_t	u32;
 19 | typedef int64_t		s64;
 20 | typedef uint64_t	u64;
 21 | 
 22 | #define true	1
 23 | #define false	0
 24 | #define fallthrough __attribute__((__fallthrough__))
 25 | #define BIT(nr) (1UL << (nr))
 26 | 
 27 | #ifndef __always_inline
 28 | #define __always_inline inline __attribute__((__always_inline__))
 29 | #endif
 30 | 
 31 | #if __APPLE__ != 0
 32 | #ifndef R_AARCH64_NONE
 33 | 
 34 | /*
 35 |  * AArch64 static relocation types.
 36 |  */
 37 | 
 38 | /* Miscellaneous. */
 39 | #define R_AARCH64_NONE			256
 40 | 
 41 | /* Data. */
 42 | #define R_AARCH64_ABS64			257
 43 | #define R_AARCH64_ABS32			258
 44 | #define R_AARCH64_ABS16			259
 45 | #define R_AARCH64_PREL64		260
 46 | #define R_AARCH64_PREL32		261
 47 | #define R_AARCH64_PREL16		262
 48 | 
 49 | /* Instructions. */
 50 | #define R_AARCH64_MOVW_UABS_G0		263
 51 | #define R_AARCH64_MOVW_UABS_G0_NC	264
 52 | #define R_AARCH64_MOVW_UABS_G1		265
 53 | #define R_AARCH64_MOVW_UABS_G1_NC	266
 54 | #define R_AARCH64_MOVW_UABS_G2		267
 55 | #define R_AARCH64_MOVW_UABS_G2_NC	268
 56 | #define R_AARCH64_MOVW_UABS_G3		269
 57 | 
 58 | #define R_AARCH64_MOVW_SABS_G0		270
 59 | #define R_AARCH64_MOVW_SABS_G1		271
 60 | #define R_AARCH64_MOVW_SABS_G2		272
 61 | 
 62 | #define R_AARCH64_LD_PREL_LO19		273
 63 | #define R_AARCH64_ADR_PREL_LO21		274
 64 | #define R_AARCH64_ADR_PREL_PG_HI21	275
 65 | #define R_AARCH64_ADR_PREL_PG_HI21_NC	276
 66 | #define R_AARCH64_ADD_ABS_LO12_NC	277
 67 | #define R_AARCH64_LDST8_ABS_LO12_NC	278
 68 | 
 69 | #define R_AARCH64_TSTBR14		279
 70 | #define R_AARCH64_CONDBR19		280
 71 | #define R_AARCH64_JUMP26		282
 72 | #define R_AARCH64_CALL26		283
 73 | #define R_AARCH64_LDST16_ABS_LO12_NC	284
 74 | #define R_AARCH64_LDST32_ABS_LO12_NC	285
 75 | #define R_AARCH64_LDST64_ABS_LO12_NC	286
 76 | #define R_AARCH64_LDST128_ABS_LO12_NC	299
 77 | 
 78 | #define R_AARCH64_MOVW_PREL_G0		287
 79 | #define R_AARCH64_MOVW_PREL_G0_NC	288
 80 | #define R_AARCH64_MOVW_PREL_G1		289
 81 | #define R_AARCH64_MOVW_PREL_G1_NC	290
 82 | #define R_AARCH64_MOVW_PREL_G2		291
 83 | #define R_AARCH64_MOVW_PREL_G2_NC	292
 84 | #define R_AARCH64_MOVW_PREL_G3		293
 85 | 
 86 | #endif
 87 | #endif
 88 | 
 89 | #define S16_MIN INT16_MIN
 90 | #define S16_MAX INT16_MAX
 91 | #define U16_MAX UINT16_MAX
 92 | 
 93 | #define S32_MIN INT32_MIN
 94 | #define S32_MAX INT32_MAX
 95 | #define U32_MAX UINT32_MAX
 96 | 
 97 | #define S64_MIN INT64_MIN
 98 | #define S64_MAX INT64_MAX
 99 | #define U64_MAX UINT64_MAX
100 | 
101 | #define cpu_to_le16
102 | #define cpu_to_le32
103 | #define cpu_to_le64
104 | #define le16_to_cpu
105 | #define le32_to_cpu
106 | #define le64_to_cpu
107 | 
108 | #define FAULT_BRK_IMM		0x100
109 | #define AARCH64_BREAK_MON	0xd4200000
110 | #define AARCH64_BREAK_FAULT	(AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5))
111 | 
112 | #define SZ_2M			0x00200000
113 | 
114 | #define ADR_IMM_HILOSPLIT	2
115 | #define ADR_IMM_SIZE		SZ_2M
116 | #define ADR_IMM_LOMASK		((1 << ADR_IMM_HILOSPLIT) - 1)
117 | #define ADR_IMM_HIMASK		((ADR_IMM_SIZE >> ADR_IMM_HILOSPLIT) - 1)
118 | #define ADR_IMM_LOSHIFT		29
119 | #define ADR_IMM_HISHIFT		5
120 | 
121 | typedef u16 __le16;
122 | typedef u16 __be16;
123 | typedef u32 __le32;
124 | typedef u32 __be32;
125 | typedef u64 __le64;
126 | typedef u64 __be64;
127 | 
128 | typedef u16 __sum16;
129 | typedef u32 __wsum;
130 | 
131 | #define __aligned_u64  __u64  __attribute__((aligned(8)))
132 | #define __aligned_be64 __be64 __attribute__((aligned(8)))
133 | #define __aligned_le64 __le64 __attribute__((aligned(8)))
134 | 
135 | #define CATENATE(x, y) x##y
136 | #define CAT(x, y) CATENATE(x, y)
137 | 
138 | #define BUILD_BUG_ON(cond)	\
139 | 	enum { CAT(assert_line, __COUNTER__) = sizeof(int[-!!(cond)]) }
140 | 
141 | #include "insn.h"
142 | 
143 | static inline bool is_forbidden_offset_for_adrp(void *place)
144 | {
145 | 	return false;
146 | }
147 | 
148 | /* Following routines copied nearly verbatim from Linux kernel */
149 | /* arch/arm64/kernel/module.c */
150 | 
151 | /* SPDX-License-Identifier: GPL-2.0-only */
152 | /*
153 |  * AArch64 loadable module support.
154 |  *
155 |  * Copyright (C) 2012 ARM Limited
156 |  *
157 |  * Author: Will Deacon <will.deacon@arm.com>
158 |  */
159 | 
160 | enum aarch64_reloc_op {
161 | 	RELOC_OP_NONE,
162 | 	RELOC_OP_ABS,
163 | 	RELOC_OP_PREL,
164 | 	RELOC_OP_PAGE,
165 | };
166 | 
167 | static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
168 | {
169 | 	switch (reloc_op) {
170 | 	case RELOC_OP_NONE:
171 | 		return 0;
172 | 	case RELOC_OP_ABS:
173 | 		return val;
174 | 	case RELOC_OP_PREL:
175 | 		return val - (u64)place;
176 | 	case RELOC_OP_PAGE:
177 | 		return (val & ~0xfff) - ((u64)place & ~0xfff);
178 | 	}
179 | 
180 | 	fprintf(stderr, "do_reloc: unknown relocation operation %d\n", reloc_op);
181 | 	return 0;
182 | }
183 | 
184 | static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
185 | {
186 | 	s64 sval = do_reloc(op, place, val);
187 | 
188 | 	/*
189 | 	 * The ELF psABI for AArch64 documents the 16-bit and 32-bit place
190 | 	 * relative and absolute relocations as having a range of [-2^15, 2^16)
191 | 	 * or [-2^31, 2^32), respectively. However, in order to be able to
192 | 	 * detect overflows reliably, we have to choose whether we interpret
193 | 	 * such quantities as signed or as unsigned, and stick with it.
194 | 	 * The way we organize our address space requires a signed
195 | 	 * interpretation of 32-bit relative references, so let's use that
196 | 	 * for all R_AARCH64_PRELxx relocations. This means our upper
197 | 	 * bound for overflow detection should be Sxx_MAX rather than Uxx_MAX.
198 | 	 */
199 | 
200 | 	switch (len) {
201 | 	case 16:
202 | 		*(s16 *)place = sval;
203 | 		switch (op) {
204 | 		case RELOC_OP_ABS:
205 | 			if (sval < 0 || sval > U16_MAX)
206 | 				return -ERANGE;
207 | 			break;
208 | 		case RELOC_OP_PREL:
209 | 			if (sval < S16_MIN || sval > S16_MAX)
210 | 				return -ERANGE;
211 | 			break;
212 | 		default:
213 | 			fprintf(stderr, "Invalid 16-bit data relocation (%d)\n", op);
214 | 			return 0;
215 | 		}
216 | 		break;
217 | 	case 32:
218 | 		*(s32 *)place = sval;
219 | 		switch (op) {
220 | 		case RELOC_OP_ABS:
221 | 			if (sval < 0 || sval > U32_MAX)
222 | 				return -ERANGE;
223 | 			break;
224 | 		case RELOC_OP_PREL:
225 | 			if (sval < S32_MIN || sval > S32_MAX)
226 | 				return -ERANGE;
227 | 			break;
228 | 		default:
229 | 			fprintf(stderr, "Invalid 32-bit data relocation (%d)\n", op);
230 | 			return 0;
231 | 		}
232 | 		break;
233 | 	case 64:
234 | 		*(s64 *)place = sval;
235 | 		break;
236 | 	default:
237 | 		fprintf(stderr, "Invalid length (%d) for data relocation\n", len);
238 | 		return 0;
239 | 	}
240 | 	return 0;
241 | }
242 | 
243 | static int aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
244 | 						u32 *maskp, int *shiftp)
245 | {
246 | 	u32 mask;
247 | 	int shift;
248 | 
249 | 	switch (type) {
250 | 	case AARCH64_INSN_IMM_26:
251 | 		mask = BIT(26) - 1;
252 | 		shift = 0;
253 | 		break;
254 | 	case AARCH64_INSN_IMM_19:
255 | 		mask = BIT(19) - 1;
256 | 		shift = 5;
257 | 		break;
258 | 	case AARCH64_INSN_IMM_16:
259 | 		mask = BIT(16) - 1;
260 | 		shift = 5;
261 | 		break;
262 | 	case AARCH64_INSN_IMM_14:
263 | 		mask = BIT(14) - 1;
264 | 		shift = 5;
265 | 		break;
266 | 	case AARCH64_INSN_IMM_12:
267 | 		mask = BIT(12) - 1;
268 | 		shift = 10;
269 | 		break;
270 | 	case AARCH64_INSN_IMM_9:
271 | 		mask = BIT(9) - 1;
272 | 		shift = 12;
273 | 		break;
274 | 	case AARCH64_INSN_IMM_7:
275 | 		mask = BIT(7) - 1;
276 | 		shift = 15;
277 | 		break;
278 | 	case AARCH64_INSN_IMM_6:
279 | 	case AARCH64_INSN_IMM_S:
280 | 		mask = BIT(6) - 1;
281 | 		shift = 10;
282 | 		break;
283 | 	case AARCH64_INSN_IMM_R:
284 | 		mask = BIT(6) - 1;
285 | 		shift = 16;
286 | 		break;
287 | 	case AARCH64_INSN_IMM_N:
288 | 		mask = 1;
289 | 		shift = 22;
290 | 		break;
291 | 	default:
292 | 		return -EINVAL;
293 | 	}
294 | 
295 | 	*maskp = mask;
296 | 	*shiftp = shift;
297 | 
298 | 	return 0;
299 | }
300 | 
301 | enum aarch64_insn_movw_imm_type {
302 | 	AARCH64_INSN_IMM_MOVNZ,
303 | 	AARCH64_INSN_IMM_MOVKZ,
304 | };
305 | 
306 | u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
307 | 				  u32 insn, u64 imm)
308 | {
309 | 	u32 immlo, immhi, mask;
310 | 	int shift;
311 | 
312 | 	if (insn == AARCH64_BREAK_FAULT)
313 | 		return AARCH64_BREAK_FAULT;
314 | 
315 | 	switch (type) {
316 | 	case AARCH64_INSN_IMM_ADR:
317 | 		shift = 0;
318 | 		immlo = (imm & ADR_IMM_LOMASK) << ADR_IMM_LOSHIFT;
319 | 		imm >>= ADR_IMM_HILOSPLIT;
320 | 		immhi = (imm & ADR_IMM_HIMASK) << ADR_IMM_HISHIFT;
321 | 		imm = immlo | immhi;
322 | 		mask = ((ADR_IMM_LOMASK << ADR_IMM_LOSHIFT) |
323 | 			(ADR_IMM_HIMASK << ADR_IMM_HISHIFT));
324 | 		break;
325 | 	default:
326 | 		if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
327 | 			fprintf(stderr, "aarch64_insn_encode_immediate: unknown immediate encoding %d\n", type);
328 | 			return AARCH64_BREAK_FAULT;
329 | 		}
330 | 	}
331 | 
332 | 	/* Update the immediate field. */
333 | 	insn &= ~(mask << shift);
334 | 	insn |= (imm & mask) << shift;
335 | 
336 | 	return insn;
337 | }
338 | 
339 | static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
340 | 			   int lsb, enum aarch64_insn_movw_imm_type imm_type)
341 | {
342 | 	u64 imm;
343 | 	s64 sval;
344 | 	u32 insn = le32_to_cpu(*place);
345 | 
346 | 	sval = do_reloc(op, place, val);
347 | 	imm = sval >> lsb;
348 | 
349 | 	if (imm_type == AARCH64_INSN_IMM_MOVNZ) {
350 | 		/*
351 | 		 * For signed MOVW relocations, we have to manipulate the
352 | 		 * instruction encoding depending on whether or not the
353 | 		 * immediate is less than zero.
354 | 		 */
355 | 		insn &= ~(3 << 29);
356 | 		if (sval >= 0) {
357 | 			/* >=0: Set the instruction to MOVZ (opcode 10b). */
358 | 			insn |= 2 << 29;
359 | 		} else {
360 | 			/*
361 | 			 * <0: Set the instruction to MOVN (opcode 00b).
362 | 			 *     Since we've masked the opcode already, we
363 | 			 *     don't need to do anything other than
364 | 			 *     inverting the new immediate field.
365 | 			 */
366 | 			imm = ~imm;
367 | 		}
368 | 	}
369 | 
370 | 	/* Update the instruction with the new encoding. */
371 | 	insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
372 | 	*place = cpu_to_le32(insn);
373 | 
374 | 	if (imm > U16_MAX)
375 | 		return -ERANGE;
376 | 
377 | 	return 0;
378 | }
379 | 
380 | static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
381 | 			  int lsb, int len, enum aarch64_insn_imm_type imm_type)
382 | {
383 | 	u64 imm, imm_mask;
384 | 	s64 sval;
385 | 	u32 insn = le32_to_cpu(*place);
386 | 
387 | 	/* Calculate the relocation value. */
388 | 	sval = do_reloc(op, place, val);
389 | 	sval >>= lsb;
390 | 
391 | 	/* Extract the value bits and shift them to bit 0. */
392 | 	imm_mask = (BIT(lsb + len) - 1) >> lsb;
393 | 	imm = sval & imm_mask;
394 | 
395 | 	/* Update the instruction's immediate field. */
396 | 	insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
397 | 	*place = cpu_to_le32(insn);
398 | 
399 | 	/*
400 | 	 * Extract the upper value bits (including the sign bit) and
401 | 	 * shift them to bit 0.
402 | 	 */
403 | 	sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1);
404 | 
405 | 	/*
406 | 	 * Overflow has occurred if the upper bits are not all equal to
407 | 	 * the sign bit of the value.
408 | 	 */
409 | 	if ((u64)(sval + 1) >= 2)
410 | 		return -ERANGE;
411 | 
412 | 	return 0;
413 | }
414 | 
415 | static int reloc_insn_adrp(__le32 *place, u64 val)
416 | {
417 | 	u32 insn;
418 | 
419 | 	if (!is_forbidden_offset_for_adrp(place))
420 | 		return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21,
421 | 				      AARCH64_INSN_IMM_ADR);
422 | 
423 | 	/* patch ADRP to ADR if it is in range */
424 | 	if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
425 | 			    AARCH64_INSN_IMM_ADR)) {
426 | 		insn = le32_to_cpu(*place);
427 | 		insn &= ~BIT(31);
428 | 	} else {
429 | 		/* don't emit a veneer */
430 | 		return -ENOEXEC;
431 | 	}
432 | 
433 | 	*place = cpu_to_le32(insn);
434 | 	return 0;
435 | }
436 | 
437 | int apply_relocate_add(Elf64_Shdr **sechdrs,
438 | 		       unsigned int symsec,
439 | 		       unsigned int relsec)
440 | {
441 | 	unsigned int i;
442 | 	int ovf;
443 | 	bool overflow_check;
444 | 	Elf64_Sym *sym;
445 | 	void *loc;
446 | 	u64 val;
447 | 	Elf64_Rela *rel = (void *)sechdrs[relsec]->sh_addr;
448 | 
449 | 	for (i = 0; i < sechdrs[relsec]->sh_size / sizeof(*rel); i++) {
450 | 		/* loc corresponds to P in the AArch64 ELF document. */
451 | 		loc = (void *)sechdrs[sechdrs[relsec]->sh_info]->sh_addr
452 | 			+ rel[i].r_offset;
453 | 
454 | 		/* sym is the ELF symbol we're referring to. */
455 | 		sym = (Elf64_Sym *)sechdrs[symsec]->sh_addr
456 | 			+ ELF64_R_SYM(rel[i].r_info);
457 | 
458 | 		/* val corresponds to (S + A) in the AArch64 ELF document. */
459 | 		val = sym->st_value + rel[i].r_addend;
460 | 
461 | 		/* Check for overflow by default. */
462 | 		overflow_check = true;
463 | 
464 | 		/* Perform the static relocation. */
465 | 		switch (ELF64_R_TYPE(rel[i].r_info)) {
466 | 		/* Null relocations. */
467 | 		case R_AARCH64_NONE:
468 | 			ovf = 0;
469 | 			break;
470 | 
471 | 		/* Data relocations. */
472 | 		case R_AARCH64_ABS64:
473 | 			overflow_check = false;
474 | 			ovf = reloc_data(RELOC_OP_ABS, loc, val, 64);
475 | 			break;
476 | 		case R_AARCH64_ABS32:
477 | 			ovf = reloc_data(RELOC_OP_ABS, loc, val, 32);
478 | 			break;
479 | 		case R_AARCH64_ABS16:
480 | 			ovf = reloc_data(RELOC_OP_ABS, loc, val, 16);
481 | 			break;
482 | 		case R_AARCH64_PREL64:
483 | 			overflow_check = false;
484 | 			ovf = reloc_data(RELOC_OP_PREL, loc, val, 64);
485 | 			break;
486 | 		case R_AARCH64_PREL32:
487 | 			ovf = reloc_data(RELOC_OP_PREL, loc, val, 32);
488 | 			break;
489 | 		case R_AARCH64_PREL16:
490 | 			ovf = reloc_data(RELOC_OP_PREL, loc, val, 16);
491 | 			break;
492 | 
493 | 		/* MOVW instruction relocations. */
494 | 		case R_AARCH64_MOVW_UABS_G0_NC:
495 | 			overflow_check = false;
496 | 			fallthrough;
497 | 		case R_AARCH64_MOVW_UABS_G0:
498 | 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
499 | 					      AARCH64_INSN_IMM_MOVKZ);
500 | 			break;
501 | 		case R_AARCH64_MOVW_UABS_G1_NC:
502 | 			overflow_check = false;
503 | 			fallthrough;
504 | 		case R_AARCH64_MOVW_UABS_G1:
505 | 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
506 | 					      AARCH64_INSN_IMM_MOVKZ);
507 | 			break;
508 | 		case R_AARCH64_MOVW_UABS_G2_NC:
509 | 			overflow_check = false;
510 | 			fallthrough;
511 | 		case R_AARCH64_MOVW_UABS_G2:
512 | 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
513 | 					      AARCH64_INSN_IMM_MOVKZ);
514 | 			break;
515 | 		case R_AARCH64_MOVW_UABS_G3:
516 | 			/* We're using the top bits so we can't overflow. */
517 | 			overflow_check = false;
518 | 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
519 | 					      AARCH64_INSN_IMM_MOVKZ);
520 | 			break;
521 | 		case R_AARCH64_MOVW_SABS_G0:
522 | 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
523 | 					      AARCH64_INSN_IMM_MOVNZ);
524 | 			break;
525 | 		case R_AARCH64_MOVW_SABS_G1:
526 | 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
527 | 					      AARCH64_INSN_IMM_MOVNZ);
528 | 			break;
529 | 		case R_AARCH64_MOVW_SABS_G2:
530 | 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
531 | 					      AARCH64_INSN_IMM_MOVNZ);
532 | 			break;
533 | 		case R_AARCH64_MOVW_PREL_G0_NC:
534 | 			overflow_check = false;
535 | 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
536 | 					      AARCH64_INSN_IMM_MOVKZ);
537 | 			break;
538 | 		case R_AARCH64_MOVW_PREL_G0:
539 | 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
540 | 					      AARCH64_INSN_IMM_MOVNZ);
541 | 			break;
542 | 		case R_AARCH64_MOVW_PREL_G1_NC:
543 | 			overflow_check = false;
544 | 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
545 | 					      AARCH64_INSN_IMM_MOVKZ);
546 | 			break;
547 | 		case R_AARCH64_MOVW_PREL_G1:
548 | 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
549 | 					      AARCH64_INSN_IMM_MOVNZ);
550 | 			break;
551 | 		case R_AARCH64_MOVW_PREL_G2_NC:
552 | 			overflow_check = false;
553 | 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
554 | 					      AARCH64_INSN_IMM_MOVKZ);
555 | 			break;
556 | 		case R_AARCH64_MOVW_PREL_G2:
557 | 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
558 | 					      AARCH64_INSN_IMM_MOVNZ);
559 | 			break;
560 | 		case R_AARCH64_MOVW_PREL_G3:
561 | 			/* We're using the top bits so we can't overflow. */
562 | 			overflow_check = false;
563 | 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48,
564 | 					      AARCH64_INSN_IMM_MOVNZ);
565 | 			break;
566 | 
567 | 		/* Immediate instruction relocations. */
568 | 		case R_AARCH64_LD_PREL_LO19:
569 | 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
570 | 					     AARCH64_INSN_IMM_19);
571 | 			break;
572 | 		case R_AARCH64_ADR_PREL_LO21:
573 | 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
574 | 					     AARCH64_INSN_IMM_ADR);
575 | 			break;
576 | 		case R_AARCH64_ADR_PREL_PG_HI21_NC:
577 | 			overflow_check = false;
578 | 			fallthrough;
579 | 		case R_AARCH64_ADR_PREL_PG_HI21:
580 | 			ovf = reloc_insn_adrp(loc, val);
581 | 			if (ovf && ovf != -ERANGE)
582 | 				return ovf;
583 | 			break;
584 | 		case R_AARCH64_ADD_ABS_LO12_NC:
585 | 		case R_AARCH64_LDST8_ABS_LO12_NC:
586 | 			overflow_check = false;
587 | 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12,
588 | 					     AARCH64_INSN_IMM_12);
589 | 			break;
590 | 		case R_AARCH64_LDST16_ABS_LO12_NC:
591 | 			overflow_check = false;
592 | 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11,
593 | 					     AARCH64_INSN_IMM_12);
594 | 			break;
595 | 		case R_AARCH64_LDST32_ABS_LO12_NC:
596 | 			overflow_check = false;
597 | 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10,
598 | 					     AARCH64_INSN_IMM_12);
599 | 			break;
600 | 		case R_AARCH64_LDST64_ABS_LO12_NC:
601 | 			overflow_check = false;
602 | 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9,
603 | 					     AARCH64_INSN_IMM_12);
604 | 			break;
605 | 		case R_AARCH64_LDST128_ABS_LO12_NC:
606 | 			overflow_check = false;
607 | 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8,
608 | 					     AARCH64_INSN_IMM_12);
609 | 			break;
610 | 		case R_AARCH64_TSTBR14:
611 | 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14,
612 | 					     AARCH64_INSN_IMM_14);
613 | 			break;
614 | 		case R_AARCH64_CONDBR19:
615 | 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
616 | 					     AARCH64_INSN_IMM_19);
617 | 			break;
618 | 		case R_AARCH64_JUMP26:
619 | 		case R_AARCH64_CALL26:
620 | 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
621 | 					     AARCH64_INSN_IMM_26);
622 | 			break;
623 | 		default:
624 | 			fprintf(stderr, "unsupported RELA relocation: %lu\n",
625 | 			       ELF64_R_TYPE(rel[i].r_info));
626 | 			return -ENOEXEC;
627 | 		}
628 | 
629 | 		if (overflow_check && ovf == -ERANGE)
630 | 			goto overflow;
631 | 	}
632 | 
633 | 	return 0;
634 | 
635 | overflow:
636 | 	fprintf(stderr, "overflow in relocation type %d val %lx\n",
637 | 	       (int)ELF64_R_TYPE(rel[i].r_info), val);
638 | 	return -ENOEXEC;
639 | }
640 | 


--------------------------------------------------------------------------------
/sleep_drift.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | UNAME=`uname`
 4 | 
 5 | if [[ "${UNAME}" == "Linux" ]] ; then
 6 | 	HOSTDIR=test_linux
 7 | else
 8 | 	HOSTDIR=test_macos
 9 | fi
10 | 
11 | # timeval::tv_sec and timeval::tv_usec at target-wake-up and actual-wake-up times, in times[0..3], respectively
12 | times=(`./elvenrel ${HOSTDIR}/stringx.o ${HOSTDIR}/test_timeval.o --quiet | tail -n 2 | awk -F ':' '{ print toupper($1), toupper($2) }'`)
13 | # bc accepts only upper case
14 | echo "ibase=16; (${times[2]} - ${times[0]}) * F4240 + ${times[3]} - ${times[1]}" | bc
15 | 


--------------------------------------------------------------------------------
/stringx.s:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Hex-to-ascii routines for various bitnesses
  3 |  *
  4 |  * Copyright (C) 2019-2021 Martin Krastev <blu.dark@gmail.com>
  5 |  */
  6 | 
  7 | 	.arch armv8-a
  8 | 
  9 | 	.global string_x8
 10 | 	.global string_x16
 11 | 	.global string_x16_1
 12 | 	.global string_x16_2
 13 | 	.global string_x32
 14 | 	.global string_x64
 15 | 	.global string_x64_1
 16 | 	.text
 17 | 
 18 | // convert x8 to string
 19 | // x0: output buffer
 20 | // w1: value to convert, bits [7:0]
 21 | // clobbers: x2, x3, x4, x5
 22 | 	.align 4
 23 | string_x8:
 24 | 	mov     w4, '0' - 0x0
 25 | 	mov     w5, 'a' - 0xa
 26 | 	ubfx    w2, w1,  4, 4
 27 | 	and     w1, w1, 15
 28 | 	cmp     w2, 0xa
 29 | 	csel    w3, w4, w5, LO
 30 | 	cmp     w1, 0xa
 31 | 	csel    w4, w4, w5, LO
 32 | 	add     w2, w2, w3
 33 | 	strb    w2, [x0, 0]
 34 | 	add     w1, w1, w4
 35 | 	strb    w1, [x0, 1]
 36 | 	ret
 37 | 
 38 | // convert x16 to string
 39 | // x0: output buffer
 40 | // w1: value to convert, bits [15:0]
 41 | // clobbers: x2, x3, x4, x5, x6, x7, x8, x9
 42 | 	.align 4
 43 | string_x16:
 44 | 	mov     w5, '0' - 0x0
 45 | 	mov     w6, 'a' - 0xa
 46 | 	ubfx    w4, w1, 12, 4
 47 | 	ubfx    w3, w1,  8, 4
 48 | 	ubfx    w2, w1,  4, 4
 49 | 	and     w1, w1, 15
 50 | 	cmp     w4, 0xa
 51 | 	csel    w7, w5, w6, LO
 52 | 	cmp     w3, 0xa
 53 | 	csel    w8, w5, w6, LO
 54 | 	cmp     w2, 0xa
 55 | 	csel    w9, w5, w6, LO
 56 | 	cmp     w1, 0xa
 57 | 	csel    w5, w5, w6, LO
 58 | 	add     w4, w4, w7
 59 | 	strb    w4, [x0, 0]
 60 | 	add     w3, w3, w8
 61 | 	strb    w3, [x0, 1]
 62 | 	add     w2, w2, w9
 63 | 	strb    w2, [x0, 2]
 64 | 	add     w1, w1, w5
 65 | 	strb    w1, [x0, 3]
 66 | 	ret
 67 | 
 68 | // convert x16 to string
 69 | // x0: output buffer
 70 | // w1: value to convert, bits [15:0]
 71 | // clobbers: x2, x3, x4, x5, x6, x7
 72 | 	.align 4
 73 | string_x16_1:
 74 | 	mov     w3, '0' - 0x0
 75 | 	mov     w4, 'a' - 0xa
 76 | 	mov     w5, 0xa
 77 | 	mov     w6, 0x0f0f
 78 | 	lsr     w2, w1, 4
 79 | 	and     w1, w1, w6 // even-index nibbles
 80 | 	and     w2, w2, w6 // odd-index nibbles
 81 | 	cmp     w5, w1, UXTB
 82 | 	csel    w6, w3, w4, HI
 83 | 	cmp     w5, w2, UXTB
 84 | 	csel    w7, w3, w4, HI
 85 | 	add     w1, w1, w6
 86 | 	add     w2, w2, w7
 87 | 	strb    w1, [x0, 3]
 88 | 	strb    w2, [x0, 2]
 89 | 	lsr     w1, w1, 8
 90 | 	lsr     w2, w2, 8
 91 | 	cmp     w5, w1, UXTB
 92 | 	csel    w6, w3, w4, HI
 93 | 	cmp     w5, w2, UXTB
 94 | 	csel    w7, w3, w4, HI
 95 | 	add     w1, w1, w6
 96 | 	add     w2, w2, w7
 97 | 	strb    w1, [x0, 1]
 98 | 	strb    w2, [x0, 0]
 99 | 	ret
100 | 
101 | // convert x16 to string
102 | // x0: output buffer
103 | // w1: value to convert, bits [15:0]
104 | // clobbers: v0, v1, v3, v4, v5, v6
105 | 	.align 4
106 | string_x16_2:
107 | 	rev16   w1, w1 // we write the result via a single store op, so correct for digit order, part one: swap octet order
108 | 	movi    v3.8b, '0' - 0x0
109 | 	movi    v4.8b, 'a' - 0xa
110 | 	movi    v5.8b, 0xa
111 | 	movi    v6.8b, 0xf
112 | 	fmov    s0, w1
113 | 	ushr    v1.8b, v0.8b, 4
114 | 	and     v0.8b, v0.8b, v6.8b
115 | 	zip1    v0.8b, v1.8b, v0.8b // we write the result via a single store op, so correct for digit order, part two: swap nibble order
116 | 	cmhi    v1.8b, v5.8b, v0.8b
117 | 	bsl     v1.8b, v3.8b, v4.8b
118 | 	add     v0.8b, v0.8b, v1.8b
119 | 	str     s0, [x0]
120 | 	ret
121 | 
122 | // convert x32 to string
123 | // x0: output buffer
124 | // w1: value to convert, bits [31:0]
125 | // clobbers: v0, v1, v3, v4, v5, v6
126 | 	.align 4
127 | string_x32:
128 | 	rev     w1, w1 // we write the result via a single store op, so correct for digit order, part one: swap octet order
129 | 	movi    v3.8b, '0' - 0x0
130 | 	movi    v4.8b, 'a' - 0xa
131 | 	movi    v5.8b, 0xa
132 | 	movi    v6.8b, 0xf
133 | 	fmov    s0, w1
134 | 	ushr    v1.8b, v0.8b, 4
135 | 	and     v0.8b, v0.8b, v6.8b
136 | 	zip1    v0.8b, v1.8b, v0.8b // we write the result via a single store op, so correct for digit order, part two: swap nibble order
137 | 	cmhi    v1.8b, v5.8b, v0.8b
138 | 	bsl     v1.8b, v3.8b, v4.8b
139 | 	add     v0.8b, v0.8b, v1.8b
140 | 	str     d0, [x0]
141 | 	ret
142 | 
143 | // convert x64 to string
144 | // x0: output buffer
145 | // x1: value to convert, bits [63:0]
146 | // clobbers: v0, v1, v3, v4, v5, v6
147 | 	.align 4
148 | string_x64:
149 | 	rev     x1, x1 // we write the result via a single store op, so correct for digit order, part one: swap octet order
150 | 	movi    v3.16b, '0' - 0x0
151 | 	movi    v4.16b, 'a' - 0xa
152 | 	movi    v5.16b, 0xa
153 | 	movi    v6.16b, 0xf
154 | 	fmov    d0, x1
155 | 	ushr    v1.8b, v0.8b, 4
156 | 	and     v0.8b, v0.8b, v6.8b
157 | 	zip1    v0.16b, v1.16b, v0.16b // we write the result via a single store op, so correct for digit order, part two: swap nibble order
158 | 	cmhi    v1.16b, v5.16b, v0.16b
159 | 	bsl     v1.16b, v3.16b, v4.16b
160 | 	add     v0.16b, v0.16b, v1.16b
161 | 	str     q0, [x0]
162 | 	ret
163 | 
164 | // convert x64 to string
165 | // x0: output buffer
166 | // x1: value to convert, bits [63:0]
167 | // clobbers: v0, v1, v3, v4, v5, v6
168 | 	.align 4
169 | string_x64_1:
170 | 	rev     x1, x1 // we write the result via a single store op, so correct for digit order, part one: swap octet order
171 | 	movi    v6.16b, 0xf
172 | 	fmov    d0, x1
173 | 	movi    v3.16b, '0' - 0x0
174 | 	movi    v4.16b, 'a' - 0xa
175 | 	movi    v5.16b, 0xa
176 | 	ushr    v1.8b, v0.8b, 4
177 | 	and     v0.8b, v0.8b, v6.8b
178 | 	zip1    v0.16b, v1.16b, v0.16b // we write the result via a single store op, so correct for digit order, part two: swap nibble order
179 | 	cmhi    v1.16b, v5.16b, v0.16b
180 | 	bsl     v1.16b, v3.16b, v4.16b
181 | 	add     v0.16b, v0.16b, v1.16b
182 | 	str     q0, [x0]
183 | 	ret
184 | 


--------------------------------------------------------------------------------
/strlen_linux.s:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2013 ARM Ltd.
  3 |  * Copyright (C) 2013 Linaro.
  4 |  *
  5 |  * This code is based on glibc cortex strings work originally authored by Linaro
  6 |  * and re-licensed under GPLv2 for the Linux kernel. The original code can
  7 |  * be found @
  8 |  *
  9 |  * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
 10 |  * files/head:/src/aarch64/
 11 |  *
 12 |  * This program is free software; you can redistribute it and/or modify
 13 |  * it under the terms of the GNU General Public License version 2 as
 14 |  * published by the Free Software Foundation.
 15 |  *
 16 |  * This program is distributed in the hope that it will be useful,
 17 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 19 |  * GNU General Public License for more details.
 20 |  *
 21 |  * You should have received a copy of the GNU General Public License
 22 |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 23 |  */
 24 | 
 25 | 	.arch armv8-a
 26 | 
 27 | 	.global strlen_linux
 28 | 	.text
 29 | 
 30 | /*
 31 |  * calculate the length of a string
 32 |  *
 33 |  * Parameters:
 34 |  *	x0 - const string pointer
 35 |  * Returns:
 36 |  *	x0 - the return length of specific string
 37 |  */
 38 | 
 39 | /* Arguments and results.  */
 40 | srcin		.req	x0
 41 | len			.req	x0
 42 | 
 43 | /* Locals and temporaries.  */
 44 | src			.req	x1
 45 | data1		.req	x2
 46 | data2		.req	x3
 47 | data2a		.req	x4
 48 | has_nul1	.req	x5
 49 | has_nul2	.req	x6
 50 | tmp1		.req	x7
 51 | tmp2		.req	x8
 52 | tmp3		.req	x9
 53 | tmp4		.req	x10
 54 | zeroones	.req	x11
 55 | pos			.req	x12
 56 | 
 57 | 	.equ REP8_01, 0x0101010101010101
 58 | 	.equ REP8_7f, 0x7f7f7f7f7f7f7f7f
 59 | 	.equ REP8_80, 0x8080808080808080
 60 | 
 61 | 	.align 4
 62 | strlen_linux:
 63 | 	mov		zeroones, #REP8_01
 64 | 	bic		src, srcin, #15
 65 | 	ands	tmp1, srcin, #15
 66 | 	b.ne	.Lmisaligned
 67 | 	/*
 68 | 	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
 69 | 	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
 70 | 	* can be done in parallel across the entire word.
 71 | 	*/
 72 | 	/*
 73 | 	* The inner loop deals with two Dwords at a time. This has a
 74 | 	* slightly higher start-up cost, but we should win quite quickly,
 75 | 	* especially on cores with a high number of issue slots per
 76 | 	* cycle, as we get much better parallelism out of the operations.
 77 | 	*/
 78 | .Lloop:
 79 | 	ldp		data1, data2, [src], #16
 80 | .Lrealigned:
 81 | 	sub		tmp1, data1, zeroones
 82 | 	orr		tmp2, data1, #REP8_7f
 83 | 	sub		tmp3, data2, zeroones
 84 | 	orr		tmp4, data2, #REP8_7f
 85 | 	bic		has_nul1, tmp1, tmp2
 86 | 	bics	has_nul2, tmp3, tmp4
 87 | 	ccmp	has_nul1, #0, #0, eq	/* NZCV = 0000  */
 88 | 	b.eq	.Lloop
 89 | 
 90 | 	sub		len, src, srcin
 91 | 	cbz		has_nul1, .Lnul_in_data2
 92 | 	sub		len, len, #8
 93 | 	mov		has_nul2, has_nul1
 94 | .Lnul_in_data2:
 95 | 	sub		len, len, #8
 96 | 	rev		has_nul2, has_nul2
 97 | 	clz		pos, has_nul2
 98 | 	add		len, len, pos, lsr #3		/* Bits to bytes.  */
 99 | 	ret
100 | 
101 | .Lmisaligned:
102 | 	cmp		tmp1, #8
103 | 	neg		tmp1, tmp1
104 | 	ldp		data1, data2, [src], #16
105 | 	lsl		tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
106 | 	mov		tmp2, #~0
107 | 	lsr		tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
108 | 
109 | 	orr		data1, data1, tmp2
110 | 	orr		data2a, data2, tmp2
111 | 	csinv	data1, data1, xzr, le
112 | 	csel	data2, data2, data2a, le
113 | 	b		.Lrealigned
114 | 
115 | 


--------------------------------------------------------------------------------
/test_common/Makefile:
--------------------------------------------------------------------------------
 1 | # Differentiate between GAS and Apple clang
 2 | AS_VENDOR := $(word 1, $(shell $(AS) --version))
 3 | ifeq ($(AS_VENDOR), GNU)
 4 | 	ASFLAGS += --strip-local-absolute
 5 | 	defsym = --defsym $(1)=$(2)
 6 | else
 7 | 	ASFLAGS += --target=aarch64-linux-gnu
 8 | 	defsym = -Wa,-defsym,$(1)=$(2)
 9 | endif
10 | REL := test_bounce_data_aosoa_alt_0.o \
11 |        test_bounce_data_aosoa_alt_1.o \
12 |        test_bounce_data_aosoa_alt_2.o \
13 |        test_bounce_data_aosoa_alt_3.o \
14 |        memset.o
15 | 
16 | test_bounce_data_aosoa_alt_3.o: test_bounce_data_aosoa_alt_3.s
17 | 	$(AS) $(ASFLAGS) $(call defsym,FB_DIM_X,$(shell tput cols)) -o $@ $^
18 | 
19 | all: $(REL)
20 | 
21 | clean:
22 | 	rm -f $(REL)
23 | 


--------------------------------------------------------------------------------
/test_common/memset.s:
--------------------------------------------------------------------------------
 1 | 	.arch armv8-a
 2 | 
 3 | 	.global memset
 4 | 	.text
 5 | 
 6 | // memset a buffer to a given value; does unaligned writes
 7 | // x0: buffer
 8 | // x1: length
 9 | // v0: byte value replicated to Q-form
10 | // clobbers: x2
11 | 	.align 4
12 | memset:
13 | 	lsr	x2, x1, 5
14 | 	cbz	x2, .LLtail0
15 | .LLloop:
16 | 	stp	q0, q0, [x0], 32
17 | 	subs	x2, x2, 1
18 | 	bne	.LLloop
19 | .LLtail0:
20 | 	tbz	x1, 4, .LLtail1
21 | 	str	q0, [x0], 16
22 | .LLtail1:
23 | 	tbz	x1, 3, .LLtail2
24 | 	str	d0, [x0], 8
25 | .LLtail2:
26 | 	tbz	x1, 2, .LLtail3
27 | 	str	s0, [x0], 4
28 | .LLtail3:
29 | 	tbz	x1, 1, .LLtail4
30 | 	str	h0, [x0], 2
31 | .LLtail4:
32 | 	tbz	x1, 0, .LLdone
33 | 	str	b0, [x0]
34 | .LLdone:
35 | 	ret
36 | 


--------------------------------------------------------------------------------
/test_common/memset_woa.s:
--------------------------------------------------------------------------------
 1 | // DISPCLAIMER: the source code in this translation unit originates from 3rd-
 2 | // party software and is not subject to the repo license agreement; such code
 3 | // is included solely for research purposes and is not otherwise used by the
 4 | // rest of the project in any capacity other than testing.
 5 | 
 6 | 	.arch armv8-a
 7 | 
 8 | 	.global memset_woa
 9 | 	.text
10 | 
11 | // WindowsOnArm RTC_memset -- rountine reverse-engineered from WoA CRT
12 | // x0: buffer
13 | // x1: value
14 | // x2: length
15 | // clobbers: x9
16 | 	.align 4
17 | memset_woa:
18 | 	ands	x9, x2, -16
19 | 	and	x2, x2, 15
20 | 	beq	.Lwoa_tail0
21 | 	add	x9, x9, x0
22 | .Lwoa_loop16:
23 | 	stp	x1, x1, [x0], 16
24 | 	cmp	x0, x9
25 | 	blo	.Lwoa_loop16
26 | 	cbnz	x2, .Lwoa_tail0
27 | .Lwoa_done:
28 | 	ret
29 | .Lwoa_tail0:
30 | 	cmp	x2, 8
31 | 	blo	.Lwoa_tail1
32 | 	str	x1, [x0], 8
33 | 	sub	x2, x2, 8
34 | .Lwoa_tail1:
35 | 	cbz	x2, .Lwoa_done
36 | 	add	x2, x2, x0
37 | .Lwoa_loop1:
38 | 	strb	w1, [x0], 1
39 | 	cmp	x0, x2
40 | 	blo	.Lwoa_loop1
41 | 	ret
42 | 


--------------------------------------------------------------------------------
/test_common/test_bounce_data_aosoa_alt_0.s:
--------------------------------------------------------------------------------
 1 | 	.global blip, blip_end, erase_end
 2 | 
 3 | 	.section .data
 4 | 	.align 6
 5 | blip: // AoSoA
 6 | 	.word 0x00000000, 0x00000002, 0x00000004, 0x00000000 // blip{0..3} pos_x
 7 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000001 // blip{0..3} pos_y
 8 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{0..3} step_x
 9 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{0..3} step_y
10 | 
11 | 	.word 0x00000000, 0x00000002, 0x00000004, 0x00000000 // blip{4..7} pos_x
12 | 	.word 0x00000002, 0x00000002, 0x00000002, 0x00000003 // blip{4..7} pos_y
13 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{4..7} step_x
14 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{4..7} step_y
15 | 
16 | 	.word 0x00000000, 0x00000002, 0x00000004, 0x00000008
17 | 	.word 0x00000004, 0x00000004, 0x00000004, 0x00000000
18 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
19 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
20 | 
21 | 	.word 0x00000008, 0x00000008, 0x00000008, 0x00000008
22 | 	.word 0x00000001, 0x00000002, 0x00000003, 0x00000004
23 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
24 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
25 | 
26 | 	.word 0x0000000a, 0x0000000c, 0x00000010, 0x00000012
27 | 	.word 0x00000004, 0x00000004, 0x00000000, 0x00000000
28 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
29 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
30 | 
31 | 	.word 0x00000014, 0x00000010, 0x00000010, 0x00000012
32 | 	.word 0x00000000, 0x00000001, 0x00000002, 0x00000002
33 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
34 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
35 | 
36 | 	.word 0x00000014, 0x00000010, 0x00000010, 0x0000001c
37 | 	.word 0x00000002, 0x00000003, 0x00000004, 0x00000000
38 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
39 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
40 | 
41 | 	.word 0x0000001e, 0x00000020, 0x0000001c, 0x00000022
42 | 	.word 0x00000000, 0x00000000, 0x00000001, 0x00000001
43 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
44 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
45 | 
46 | 	.word 0x0000001c, 0x0000001e, 0x00000020, 0x0000001c
47 | 	.word 0x00000002, 0x00000002, 0x00000002, 0x00000003
48 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
49 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
50 | 
51 | 	.word 0x00000022, 0x0000001c, 0x00000022, 0x00000026
52 | 	.word 0x00000003, 0x00000004, 0x00000004, 0x00000000
53 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
54 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
55 | 
56 | 	.word 0x00000028, 0x0000002a, 0x00000026, 0x00000026
57 | 	.word 0x00000000, 0x00000000, 0x00000001, 0x00000002
58 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
59 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
60 | 
61 | 	.word 0x00000028, 0x0000002a, 0x00000026, 0x00000026
62 | 	.word 0x00000002, 0x00000002, 0x00000003, 0x00000004
63 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
64 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
65 | 
66 | 	.word 0x00000028, 0x0000002a, 0x0000002e, 0x0000002e
67 | 	.word 0x00000004, 0x00000004, 0x00000000, 0x00000001
68 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
69 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
70 | 
71 | 	.word 0x0000002e, 0x0000002e, 0x0000002e, 0x00000030
72 | 	.word 0x00000002, 0x00000003, 0x00000004, 0x00000004
73 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
74 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
75 | 
76 | 	.word 0x00000032, 0x00000036, 0x00000038, 0x00000036
77 | 	.word 0x00000004, 0x00000001, 0x00000002, 0x00000003
78 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
79 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
80 | blip_end:
81 | 	.fill (blip_end - blip) / 16, 4
82 | erase_end:
83 | 


--------------------------------------------------------------------------------
/test_common/test_bounce_data_aosoa_alt_1.s:
--------------------------------------------------------------------------------
 1 | 	.global blip, blip_end, erase_end
 2 | 
 3 | 	.section .data
 4 | 	.align 6
 5 | blip: // AoSoA
 6 | 	.word 0x00000000, 0x00000006, 0x00000000, 0x00000006 // blip{0..3} pos_x
 7 | 	.word 0x00000000, 0x00000000, 0x00000001, 0x00000001 // blip{0..3} pos_y
 8 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{0..3} step_x
 9 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{0..3} step_y
10 | 
11 | 	.word 0x00000000, 0x00000002, 0x00000004, 0x00000006 // blip{4..7} pos_x
12 | 	.word 0x00000002, 0x00000002, 0x00000002, 0x00000002 // blip{4..7} pos_y
13 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{4..7} step_x
14 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{4..7} step_y
15 | 
16 | 	.word 0x00000000, 0x00000006, 0x00000000, 0x00000006
17 | 	.word 0x00000003, 0x00000003, 0x00000004, 0x00000004
18 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
19 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
20 | 
21 | 	.word 0x0000000a, 0x0000000c, 0x0000000e, 0x0000000a
22 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000001
23 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
24 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
25 | 
26 | 	.word 0x0000000a, 0x0000000c, 0x0000000e, 0x0000000a
27 | 	.word 0x00000002, 0x00000002, 0x00000002, 0x00000003
28 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
29 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
30 | 
31 | 	.word 0x0000000a, 0x0000000c, 0x0000000e, 0x00000012
32 | 	.word 0x00000004, 0x00000004, 0x00000004, 0x00000000
33 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
34 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
35 | 
36 | 	.word 0x00000012, 0x00000012, 0x00000012, 0x00000012
37 | 	.word 0x00000001, 0x00000002, 0x00000003, 0x00000004
38 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
39 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
40 | 
41 | 	.word 0x00000014, 0x00000016, 0x0000001a, 0x0000001a
42 | 	.word 0x00000004, 0x00000004, 0x00000000, 0x00000001
43 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
44 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
45 | 
46 | 	.word 0x0000001a, 0x0000001a, 0x0000001a, 0x0000001c
47 | 	.word 0x00000002, 0x00000003, 0x00000004, 0x00000004
48 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
49 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
50 | 
51 | 	.word 0x0000001e, 0x00000024, 0x00000026, 0x00000028
52 | 	.word 0x00000004, 0x00000000, 0x00000000, 0x00000000
53 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
54 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
55 | 
56 | 	.word 0x00000022, 0x00000028, 0x00000022, 0x00000028
57 | 	.word 0x00000001, 0x00000001, 0x00000002, 0x00000002
58 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
59 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
60 | 
61 | 	.word 0x00000022, 0x00000028, 0x00000022, 0x00000024
62 | 	.word 0x00000003, 0x00000003, 0x00000004, 0x00000004
63 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
64 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
65 | 
66 | 	.word 0x00000026, 0x0000002c, 0x0000002e, 0x0000002c
67 | 	.word 0x00000004, 0x00000001, 0x00000002, 0x00000003
68 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
69 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
70 | blip_end:
71 | 	.fill (blip_end - blip) / 16, 4
72 | erase_end:
73 | 


--------------------------------------------------------------------------------
/test_common/test_bounce_data_aosoa_alt_2.s:
--------------------------------------------------------------------------------
  1 | 	.global blip, blip_end, erase_end
  2 | 
  3 | 	.section .data
  4 | 	.align 6
  5 | blip: // AoSoA
  6 | 	.word 0x00000000, 0x00000002, 0x00000004, 0x00000000 // blip{0..3} pos_x
  7 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000001 // blip{0..3} pos_y
  8 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{0..3} step_x
  9 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{0..3} step_y
 10 | 
 11 | 	.word 0x00000000, 0x00000002, 0x00000004, 0x00000000 // blip{4..7} pos_x
 12 | 	.word 0x00000002, 0x00000002, 0x00000002, 0x00000003 // blip{4..7} pos_y
 13 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{4..7} step_x
 14 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{4..7} step_y
 15 | 
 16 | 	.word 0x00000000, 0x00000002, 0x00000004, 0x00000008
 17 | 	.word 0x00000004, 0x00000004, 0x00000004, 0x00000000
 18 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 19 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 20 | 
 21 | 	.word 0x00000008, 0x00000008, 0x00000008, 0x00000008
 22 | 	.word 0x00000001, 0x00000002, 0x00000003, 0x00000004
 23 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 24 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 25 | 
 26 | 	.word 0x0000000a, 0x0000000c, 0x00000010, 0x00000012
 27 | 	.word 0x00000004, 0x00000004, 0x00000000, 0x00000000
 28 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 29 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 30 | 
 31 | 	.word 0x00000014, 0x00000010, 0x00000010, 0x00000012
 32 | 	.word 0x00000000, 0x00000001, 0x00000002, 0x00000002
 33 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 34 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 35 | 
 36 | 	.word 0x00000014, 0x00000010, 0x00000010, 0x0000001c
 37 | 	.word 0x00000002, 0x00000003, 0x00000004, 0x00000000
 38 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 39 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 40 | 
 41 | 	.word 0x0000001e, 0x00000020, 0x0000001c, 0x00000022
 42 | 	.word 0x00000000, 0x00000000, 0x00000001, 0x00000001
 43 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 44 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 45 | 
 46 | 	.word 0x0000001c, 0x0000001e, 0x00000020, 0x0000001c
 47 | 	.word 0x00000002, 0x00000002, 0x00000002, 0x00000003
 48 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 49 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 50 | 
 51 | 	.word 0x00000022, 0x0000001c, 0x00000022, 0x00000026
 52 | 	.word 0x00000003, 0x00000004, 0x00000004, 0x00000000
 53 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 54 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 55 | 
 56 | 	.word 0x00000028, 0x0000002a, 0x00000026, 0x00000026
 57 | 	.word 0x00000000, 0x00000000, 0x00000001, 0x00000002
 58 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 59 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 60 | 
 61 | 	.word 0x00000028, 0x0000002a, 0x00000026, 0x00000026
 62 | 	.word 0x00000002, 0x00000002, 0x00000003, 0x00000004
 63 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 64 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 65 | 
 66 | 	.word 0x00000028, 0x0000002a, 0x0000002e, 0x0000002e
 67 | 	.word 0x00000004, 0x00000004, 0x00000000, 0x00000001
 68 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 69 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 70 | 
 71 | 	.word 0x0000002e, 0x0000002e, 0x0000002e, 0x00000030
 72 | 	.word 0x00000002, 0x00000003, 0x00000004, 0x00000004
 73 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 74 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 75 | 
 76 | 	.word 0x00000032, 0x00000036, 0x00000038, 0x00000036
 77 | 	.word 0x00000004, 0x00000001, 0x00000002, 0x00000003
 78 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 79 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 80 | 
 81 | 	.word 0x00000050, 0x00000056, 0x00000050, 0x00000056 // blip{60..63} pos_x
 82 | 	.word 0x00000020, 0x00000020, 0x00000021, 0x00000021 // blip{60..63} pos_y
 83 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff // blip{60..63} step_x
 84 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{60..63} step_y
 85 | 
 86 | 	.word 0x00000050, 0x00000052, 0x00000054, 0x00000056 // blip{64..67} pos_x
 87 | 	.word 0x00000022, 0x00000022, 0x00000022, 0x00000022 // blip{64..67} pos_y
 88 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff // blip{64..67} step_x
 89 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{64..67} step_y
 90 | 
 91 | 	.word 0x00000050, 0x00000056, 0x00000050, 0x00000056
 92 | 	.word 0x00000023, 0x00000023, 0x00000024, 0x00000024
 93 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
 94 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 95 | 
 96 | 	.word 0x0000005a, 0x0000005c, 0x0000005e, 0x0000005a
 97 | 	.word 0x00000020, 0x00000020, 0x00000020, 0x00000021
 98 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
 99 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
100 | 
101 | 	.word 0x0000005a, 0x0000005c, 0x0000005e, 0x0000005a
102 | 	.word 0x00000022, 0x00000022, 0x00000022, 0x00000023
103 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
104 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
105 | 
106 | 	.word 0x0000005a, 0x0000005c, 0x0000005e, 0x00000062
107 | 	.word 0x00000024, 0x00000024, 0x00000024, 0x00000020
108 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
109 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
110 | 
111 | 	.word 0x00000062, 0x00000062, 0x00000062, 0x00000062
112 | 	.word 0x00000021, 0x00000022, 0x00000023, 0x00000024
113 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
114 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
115 | 
116 | 	.word 0x00000064, 0x00000066, 0x0000006a, 0x0000006a
117 | 	.word 0x00000024, 0x00000024, 0x00000020, 0x00000021
118 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
119 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
120 | 
121 | 	.word 0x0000006a, 0x0000006a, 0x0000006a, 0x0000006c
122 | 	.word 0x00000022, 0x00000023, 0x00000024, 0x00000024
123 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
124 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
125 | 
126 | 	.word 0x0000006e, 0x00000074, 0x00000076, 0x00000078
127 | 	.word 0x00000024, 0x00000020, 0x00000020, 0x00000020
128 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
129 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
130 | 
131 | 	.word 0x00000072, 0x00000078, 0x00000072, 0x00000078
132 | 	.word 0x00000021, 0x00000021, 0x00000022, 0x00000022
133 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
134 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
135 | 
136 | 	.word 0x00000072, 0x00000078, 0x00000072, 0x00000074
137 | 	.word 0x00000023, 0x00000023, 0x00000024, 0x00000024
138 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
139 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
140 | 
141 | 	.word 0x00000076, 0x0000007c, 0x0000007e, 0x0000007c
142 | 	.word 0x00000024, 0x00000021, 0x00000022, 0x00000023
143 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
144 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
145 | blip_end:
146 | 	.fill (blip_end - blip) / 16, 4
147 | erase_end:
148 | 


--------------------------------------------------------------------------------
/test_common/test_bounce_data_aosoa_alt_3.s:
--------------------------------------------------------------------------------
  1 | 	.global blip, blip_end, erase_end
  2 | 
  3 | 	.if FB_DIM_X < 48
  4 | 		.err // FB must have at least 48 columns
  5 | 	.endif
  6 | 
  7 | 	.equ FB_MID, (FB_DIM_X - 48) / 2
  8 | 
  9 | 	.section .data
 10 | 	.align 6
 11 | blip: // AoSoA
 12 | 	.word 0x00000000, 0x00000002, 0x00000004, 0x00000000 // blip{0..3} pos_x
 13 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000001 // blip{0..3} pos_y
 14 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{0..3} step_x
 15 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{0..3} step_y
 16 | 
 17 | 	.word 0x00000000, 0x00000002, 0x00000004, 0x00000000 // blip{4..7} pos_x
 18 | 	.word 0x00000002, 0x00000002, 0x00000002, 0x00000003 // blip{4..7} pos_y
 19 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{4..7} step_x
 20 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{4..7} step_y
 21 | 
 22 | 	.word 0x00000000, 0x00000002, 0x00000004, 0x00000008
 23 | 	.word 0x00000004, 0x00000004, 0x00000004, 0x00000000
 24 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 25 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 26 | 
 27 | 	.word 0x00000008, 0x00000008, 0x00000008, 0x00000008
 28 | 	.word 0x00000001, 0x00000002, 0x00000003, 0x00000004
 29 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 30 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 31 | 
 32 | 	.word 0x0000000a, 0x0000000c, 0x00000010, 0x00000012
 33 | 	.word 0x00000004, 0x00000004, 0x00000000, 0x00000000
 34 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 35 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 36 | 
 37 | 	.word 0x00000014, 0x00000010, 0x00000010, 0x00000012
 38 | 	.word 0x00000000, 0x00000001, 0x00000002, 0x00000002
 39 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 40 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 41 | 
 42 | 	.word 0x00000014, 0x00000010, 0x00000010, 0x0000001c
 43 | 	.word 0x00000002, 0x00000003, 0x00000004, 0x00000000
 44 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 45 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 46 | 
 47 | 	.word 0x0000001e, 0x00000020, 0x0000001c, 0x00000022
 48 | 	.word 0x00000000, 0x00000000, 0x00000001, 0x00000001
 49 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 50 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 51 | 
 52 | 	.word 0x0000001c, 0x0000001e, 0x00000020, 0x0000001c
 53 | 	.word 0x00000002, 0x00000002, 0x00000002, 0x00000003
 54 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 55 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 56 | 
 57 | 	.word 0x00000022, 0x0000001c, 0x00000022, 0x00000026
 58 | 	.word 0x00000003, 0x00000004, 0x00000004, 0x00000000
 59 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 60 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 61 | 
 62 | 	.word 0x00000028, 0x0000002a, 0x00000026, 0x00000026
 63 | 	.word 0x00000000, 0x00000000, 0x00000001, 0x00000002
 64 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 65 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 66 | 
 67 | 	.word 0x00000028, 0x0000002a, 0x00000026, 0x00000026
 68 | 	.word 0x00000002, 0x00000002, 0x00000003, 0x00000004
 69 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 70 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 71 | 
 72 | 	.word 0x00000028, 0x0000002a, 0x0000002e, 0x0000002e
 73 | 	.word 0x00000004, 0x00000004, 0x00000000, 0x00000001
 74 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 75 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 76 | 
 77 | 	.word 0x0000002e, 0x0000002e, 0x0000002e, 0x00000030
 78 | 	.word 0x00000002, 0x00000003, 0x00000004, 0x00000004
 79 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 80 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 81 | 
 82 | 	.word 0x00000032, 0x00000036, 0x00000038, 0x00000036
 83 | 	.word 0x00000004, 0x00000001, 0x00000002, 0x00000003
 84 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 85 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
 86 | 
 87 | 	.word 0x00000050, 0x00000056, 0x00000050, 0x00000056 // blip{60..63} pos_x
 88 | 	.word 0x00000020, 0x00000020, 0x00000021, 0x00000021 // blip{60..63} pos_y
 89 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff // blip{60..63} step_x
 90 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{60..63} step_y
 91 | 
 92 | 	.word 0x00000050, 0x00000052, 0x00000054, 0x00000056 // blip{64..67} pos_x
 93 | 	.word 0x00000022, 0x00000022, 0x00000022, 0x00000022 // blip{64..67} pos_y
 94 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff // blip{64..67} step_x
 95 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{64..67} step_y
 96 | 
 97 | 	.word 0x00000050, 0x00000056, 0x00000050, 0x00000056
 98 | 	.word 0x00000023, 0x00000023, 0x00000024, 0x00000024
 99 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
100 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
101 | 
102 | 	.word 0x0000005a, 0x0000005c, 0x0000005e, 0x0000005a
103 | 	.word 0x00000020, 0x00000020, 0x00000020, 0x00000021
104 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
105 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
106 | 
107 | 	.word 0x0000005a, 0x0000005c, 0x0000005e, 0x0000005a
108 | 	.word 0x00000022, 0x00000022, 0x00000022, 0x00000023
109 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
110 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
111 | 
112 | 	.word 0x0000005a, 0x0000005c, 0x0000005e, 0x00000062
113 | 	.word 0x00000024, 0x00000024, 0x00000024, 0x00000020
114 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
115 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
116 | 
117 | 	.word 0x00000062, 0x00000062, 0x00000062, 0x00000062
118 | 	.word 0x00000021, 0x00000022, 0x00000023, 0x00000024
119 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
120 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
121 | 
122 | 	.word 0x00000064, 0x00000066, 0x0000006a, 0x0000006a
123 | 	.word 0x00000024, 0x00000024, 0x00000020, 0x00000021
124 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
125 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
126 | 
127 | 	.word 0x0000006a, 0x0000006a, 0x0000006a, 0x0000006c
128 | 	.word 0x00000022, 0x00000023, 0x00000024, 0x00000024
129 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
130 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
131 | 
132 | 	.word 0x0000006e, 0x00000074, 0x00000076, 0x00000078
133 | 	.word 0x00000024, 0x00000020, 0x00000020, 0x00000020
134 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
135 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
136 | 
137 | 	.word 0x00000072, 0x00000078, 0x00000072, 0x00000078
138 | 	.word 0x00000021, 0x00000021, 0x00000022, 0x00000022
139 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
140 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
141 | 
142 | 	.word 0x00000072, 0x00000078, 0x00000072, 0x00000074
143 | 	.word 0x00000023, 0x00000023, 0x00000024, 0x00000024
144 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
145 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
146 | 
147 | 	.word 0x00000076, 0x0000007c, 0x0000007e, 0x0000007c
148 | 	.word 0x00000024, 0x00000021, 0x00000022, 0x00000023
149 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
150 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001
151 | 
152 | 	.word 0x00 + FB_MID, 0x06 + FB_MID, 0x00 + FB_MID, 0x06 + FB_MID // blip{112..115} pos_x
153 | 	.word 0x00000020, 0x00000020, 0x00000021, 0x00000021             // blip{112..115} pos_y
154 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000000             // blip{112..115} step_x
155 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff             // blip{112..115} step_y
156 | 
157 | 	.word 0x00 + FB_MID, 0x02 + FB_MID, 0x04 + FB_MID, 0x06 + FB_MID // blip{116..119} pos_x
158 | 	.word 0x00000022, 0x00000022, 0x00000022, 0x00000022             // blip{116..119} pos_y
159 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000000             // blip{116..119} step_x
160 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff             // blip{116..119} step_y
161 | 
162 | 	.word 0x00 + FB_MID, 0x06 + FB_MID, 0x00 + FB_MID, 0x06 + FB_MID
163 | 	.word 0x00000023, 0x00000023, 0x00000024, 0x00000024
164 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000000
165 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
166 | 
167 | 	.word 0x0a + FB_MID, 0x0c + FB_MID, 0x0e + FB_MID, 0x0a + FB_MID
168 | 	.word 0x00000020, 0x00000020, 0x00000020, 0x00000021
169 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000000
170 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
171 | 
172 | 	.word 0x0a + FB_MID, 0x0c + FB_MID, 0x0e + FB_MID, 0x0a + FB_MID
173 | 	.word 0x00000022, 0x00000022, 0x00000022, 0x00000023
174 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000000
175 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
176 | 
177 | 	.word 0x0a + FB_MID, 0x0c + FB_MID, 0x0e + FB_MID, 0x12 + FB_MID
178 | 	.word 0x00000024, 0x00000024, 0x00000024, 0x00000020
179 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000000
180 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
181 | 
182 | 	.word 0x12 + FB_MID, 0x12 + FB_MID, 0x12 + FB_MID, 0x12 + FB_MID
183 | 	.word 0x00000021, 0x00000022, 0x00000023, 0x00000024
184 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000000
185 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
186 | 
187 | 	.word 0x14 + FB_MID, 0x16 + FB_MID, 0x1a + FB_MID, 0x1a + FB_MID
188 | 	.word 0x00000024, 0x00000024, 0x00000020, 0x00000021
189 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000000
190 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
191 | 
192 | 	.word 0x1a + FB_MID, 0x1a + FB_MID, 0x1a + FB_MID, 0x1c + FB_MID
193 | 	.word 0x00000022, 0x00000023, 0x00000024, 0x00000024
194 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000000
195 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
196 | 
197 | 	.word 0x1e + FB_MID, 0x24 + FB_MID, 0x26 + FB_MID, 0x28 + FB_MID
198 | 	.word 0x00000024, 0x00000020, 0x00000020, 0x00000020
199 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000000
200 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
201 | 
202 | 	.word 0x22 + FB_MID, 0x28 + FB_MID, 0x22 + FB_MID, 0x28 + FB_MID
203 | 	.word 0x00000021, 0x00000021, 0x00000022, 0x00000022
204 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000000
205 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
206 | 
207 | 	.word 0x22 + FB_MID, 0x28 + FB_MID, 0x22 + FB_MID, 0x24 + FB_MID
208 | 	.word 0x00000023, 0x00000023, 0x00000024, 0x00000024
209 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000000
210 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
211 | 
212 | 	.word 0x26 + FB_MID, 0x2c + FB_MID, 0x2e + FB_MID, 0x2c + FB_MID
213 | 	.word 0x00000024, 0x00000021, 0x00000022, 0x00000023
214 | 	.word 0x00000000, 0x00000000, 0x00000000, 0x00000000
215 | 	.word 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
216 | blip_end:
217 | 	.fill (blip_end - blip) / 16, 4
218 | erase_end:
219 | 


--------------------------------------------------------------------------------
/test_linux/Makefile:
--------------------------------------------------------------------------------
 1 | ASFLAGS += --strip-local-absolute -I..
 2 | REL := test_text.o \
 3 |        test_rodata.o \
 4 |        test_data.o \
 5 |        test_bss.o \
 6 |        test_cross_0.o \
 7 |        test_cross_1.o \
 8 |        test_timeval.o \
 9 |        stringx.o \
10 |        test_bounce.o \
11 |        test_bounce_neon.o \
12 |        test_bounce_neon_aosoa.o \
13 |        test_bounce_neon_aosoa_bg.o \
14 |        test_memset.o
15 | 
16 | stringx.o: ../stringx.s
17 | 	$(AS) $(ASFLAGS) -o $@ $^
18 | 
19 | test_bounce.o: test_bounce.s
20 | 	$(AS) $(ASFLAGS) --defsym FB_DIM_X=$(shell tput cols) --defsym FB_DIM_Y=$(shell tput lines) --defsym FRAMES=1024 -o $@ $^
21 | 
22 | test_bounce_neon.o: test_bounce_neon.s
23 | 	$(AS) $(ASFLAGS) --defsym FB_DIM_X=$(shell tput cols) --defsym FB_DIM_Y=$(shell tput lines) --defsym FRAMES=2048 -o $@ $^
24 | 
25 | test_bounce_neon_aosoa.o: test_bounce_neon_aosoa.s
26 | 	$(AS) $(ASFLAGS) --defsym FB_DIM_X=$(shell tput cols) --defsym FB_DIM_Y=$(shell tput lines) --defsym FRAMES=1024 -o $@ $^
27 | 
28 | test_bounce_neon_aosoa_bg.o: test_bounce_neon_aosoa_bg.s
29 | 	$(AS) $(ASFLAGS) --defsym FB_DIM_X=$(shell tput cols) --defsym FB_DIM_Y=$(shell tput lines) --defsym FRAMES=2048 -o $@ $^
30 | 
31 | all: $(REL)
32 | 
33 | clean:
34 | 	rm -f $(REL)
35 | 


--------------------------------------------------------------------------------
/test_linux/test_bitcount.s:
--------------------------------------------------------------------------------
  1 | 	.global _start
  2 | 
  3 | 	.equ SYS_write, 64
  4 | 	.equ SYS_exit, 93
  5 | 	.equ STDOUT_FILENO, 1
  6 | 
  7 | 	.equ sample_bitset_num_u32, 100 * 1000 * 1000
  8 | 
  9 | 	.include "macro.inc"
 10 | 
 11 | 	.text
 12 | _start:
 13 | 	/* alloca local room */
 14 | 	sub	sp, sp, 32
 15 | 
 16 | 	mrs	x0, cntfrq_el0
 17 | 	mrs	x1, cntvct_el0
 18 | 	stp	x0, x1, [sp, 16]
 19 | 
 20 | 	/* block tested { */
 21 | 	adrf	x0, sample_bitset_u32
 22 | 	movl	x1, sample_bitset_num_u32
 23 | 	ands	x2, x1, -16
 24 | 	add	x3, x0, 32
 25 | 	movi	v0.2d, 0
 26 | 	movi	v1.2d, 0
 27 | 	movi	v2.2d, 0
 28 | 	movi	v3.2d, 0
 29 | 	b.eq	.Lbulk8
 30 | .Lbulk16:
 31 | 	ldp	q4, q5, [x3, -32]
 32 | 	ldp	q6, q7, [x3], 64
 33 | 	cnt	v4.16b, v4.16b
 34 | 	cnt	v5.16b, v5.16b
 35 | 	cnt	v6.16b, v6.16b
 36 | 	cnt	v7.16b, v7.16b
 37 | 
 38 | 	uaddlp	v4.8h, v4.16b
 39 | 	uaddlp	v5.8h, v5.16b
 40 | 	uaddlp	v6.8h, v6.16b
 41 | 	uaddlp	v7.8h, v7.16b
 42 | 
 43 | 	uadalp	v0.4s, v4.8h
 44 | 	uadalp	v1.4s, v5.8h
 45 | 	uadalp	v2.4s, v6.8h
 46 | 	uadalp	v3.4s, v7.8h
 47 | 	subs	x2, x2, 16
 48 | 	b.ne	.Lbulk16
 49 | .Lbulk8:
 50 | 	sub	x3, x3, 32
 51 | 	tbz	x1, 3, .Lbulk4
 52 | 	ldp	q4, q5, [x3], 32
 53 | 	cnt	v4.16b, v4.16b
 54 | 	cnt	v5.16b, v5.16b
 55 | 
 56 | 	uaddlp	v4.8h, v4.16b
 57 | 	uaddlp	v5.8h, v5.16b
 58 | 
 59 | 	uadalp	v0.4s, v4.8h
 60 | 	uadalp	v1.4s, v5.8h
 61 | .Lbulk4:
 62 | 	tbz	x1, 2, .Lbulk2
 63 | 	ldr	q6, [x3], 16
 64 | 	cnt	v6.16b, v6.16b
 65 | 
 66 | 	uaddlp	v6.8h, v6.16b
 67 | 	uadalp	v2.4s, v6.8h
 68 | .Lbulk2:
 69 | 	tbz	x1, 1, .Lunit
 70 | 	ldr	d7, [x3], 8
 71 | 	cnt	v7.8b, v7.8b
 72 | 
 73 | 	uaddlp	v7.4h, v7.8b
 74 | 	uadalp	v3.4s, v7.8h /* implicit widening to match acc width */
 75 | .Lunit:
 76 | 	tbz	x1, 0, .Lfinal
 77 | 	ldr	s4, [x3]
 78 | 	cnt	v4.8b, v4.8b
 79 | 
 80 | 	uaddlp	v4.4h, v4.8b
 81 | 	uadalp	v0.4s, v4.8h /* implicit widening to match acc width */
 82 | .Lfinal:
 83 | 	add	v0.4s, v1.4s, v0.4s
 84 | 	add	v1.4s, v3.4s, v2.4s
 85 | 	add	v0.4s, v1.4s, v0.4s
 86 | 	addv	s0, v0.4s
 87 | 	fmov	w2, s0
 88 | 	/* } block tested */
 89 | 
 90 | 	/* fill in elapsed time message */
 91 | 	mrs	x0, cntvct_el0
 92 | 	ldr	x1, [sp, 24]
 93 | 	sub	x0, x0, x1
 94 | 	stp	x2, x0, [sp]
 95 | 
 96 | 	ldr	x2, =string_x64
 97 | 	ldr	x1, [sp, 16]
 98 | 	adrf	x0, msg01_arg0
 99 | 	blr	x2 /* string_x64 far call */
100 | 
101 | 	ldr	x1, [sp, 8]
102 | 	adrf	x0, msg01_arg1
103 | 	blr	x2 /* string_x64 far call */
104 | 
105 | 	/* fill in result message */
106 | 	ldr	x2, =string_x32
107 | 	ldr	w1, [sp]
108 | 	adrf	x0, msg02_arg0
109 | 	blr	x2 /* string_x32 far call */
110 | 
111 | 	/* dealloc local room */
112 | 	add	sp, sp, 32
113 | 
114 | 	mov	x8, SYS_write
115 | 	mov	x2, msg01_len + msg02_len
116 | 	adrf	x1, msg01
117 | 	mov	x0, STDOUT_FILENO
118 | 	svc	0
119 | 
120 | 	mov	x8, SYS_exit
121 | 	mov	x0, xzr
122 | 	svc	0
123 | 
124 | 	.section .data
125 | msg01:
126 | 	.ascii	"elapsed_frq: "
127 | msg01_arg0:
128 | 	.ascii	"0123456789abcdef\n"
129 | 	.ascii	"elapsed_vct: "
130 | msg01_arg1:
131 | 	.ascii	"0123456789abcdef\n"
132 | msg01_len = . - msg01
133 | 
134 | msg02:
135 | 	.ascii	"count: "
136 | msg02_arg0:
137 | 	.ascii "01234567\n"
138 | msg02_len = . - msg02
139 | 
140 | 	.section .rodata
141 | 
142 | 	.align 12
143 | sample_bitset_u32:
144 | 	.rept	sample_bitset_num_u32
145 | 	.long	0x01020408
146 | 	.endr
147 | 


--------------------------------------------------------------------------------
/test_linux/test_bounce.s:
--------------------------------------------------------------------------------
  1 | 	.global _start
  2 | 
  3 | 	.equ SYS_write, 64
  4 | 	.equ SYS_exit, 93
  5 | 	.equ SYS_nanosleep, 101
  6 | 	.equ STDOUT_FILENO, 1
  7 | 
  8 | 	.equ COORD_FRAC, 16
  9 | .if 0
 10 | 	.equ FB_DIM_X, 203
 11 | 	.equ FB_DIM_Y, 48
 12 | 	.equ FRAMES, 1024
 13 | .else
 14 | 	// symbols supplied by CLI
 15 | .endif
 16 | 	.include "macro.inc"
 17 | 
 18 | 	.text
 19 | _start:
 20 | 	// clear screen
 21 | 	mov	x8, SYS_write
 22 | 	mov	x2, fb_clear_len
 23 | 	adr	x1, fb_clear_cmd
 24 | 	mov	x0, STDOUT_FILENO
 25 | 	svc	0
 26 | 
 27 | 	// clear fb
 28 | 	movi	v0.16b, ' '
 29 | 	adrf	x0, fb
 30 | 	ldr	w1, =fb_len
 31 | 	bl	memset
 32 | 
 33 | .Lfb_done:
 34 | 	mov	w5, wzr // blip pos_x
 35 | 	mov	w6, wzr // blip pos_y
 36 | 	mov	x7, FRAMES
 37 | 	mov	w9, (FB_DIM_X - 2) << COORD_FRAC // max bound_x
 38 | 	mov	w10, (FB_DIM_Y - 1) << COORD_FRAC // max bound_y
 39 | 	mov	w11, 0x1 << (COORD_FRAC - 0) // blip step_x
 40 | 	mov	w12, 0x8 << (COORD_FRAC - 4) // blip step_y
 41 | .Lframe:
 42 | 	// reset cursor; x8 = SYS_write
 43 | 	mov	x2, fb_cursor_len
 44 | 	adr	x1, fb_cursor_cmd
 45 | 	mov	x0, STDOUT_FILENO
 46 | 	svc	0
 47 | 
 48 | 	// access to fb: addr & len as per SYS_write
 49 | 	ldr	w2, =fb_len
 50 | 	adrf	x1, fb
 51 | 
 52 | 	// plot blip in fb
 53 | 	asr	w13, w5, COORD_FRAC
 54 | 	asr	w14, w6, COORD_FRAC
 55 | 	// round fractional coords to +inf
 56 | 	ubfx	w17, w5, (COORD_FRAC - 1), #1
 57 | 	ubfx	w19, w6, (COORD_FRAC - 1), #1
 58 | 	add	w13, w13, w17
 59 | 	add	w14, w14, w19
 60 | 
 61 | 	mov	w3, 0x5d5b
 62 | 	mov	w4, FB_DIM_X
 63 | 	madd	w4, w4, w14, w13
 64 | 	strh	w3, [x1, x4]
 65 | 
 66 | 	// update position
 67 | 	add	w5, w5, w11
 68 | 	add	w6, w6, w12
 69 | 
 70 | 	// check bounds & update step accordingly
 71 | 	cmp	w5, w9
 72 | 	ccmp	w5, 0, 4, NE
 73 | 	cneg	w11, w11, EQ
 74 | 
 75 | 	cmp	w6, w10
 76 | 	ccmp	w6, 0, 4, NE
 77 | 	cneg	w12, w12, EQ
 78 | 
 79 | 	// output fb; x8 = SYS_write
 80 | 	mov	x0, STDOUT_FILENO
 81 | 	svc	0
 82 | 
 83 | 	// erase blip from fb
 84 | 	mov	w3, 0x2020
 85 | 	strh	w3, [x1, x4]
 86 | 
 87 | 	mov	x8, SYS_nanosleep
 88 | 	mov	x1, xzr
 89 | 	adr	x0, timespec
 90 | 	svc	0
 91 | 
 92 | 	mov	x8, SYS_write
 93 | 	subs	x7, x7, 1
 94 | 	bne	.Lframe
 95 | 
 96 | 	mov	x8, SYS_exit
 97 | 	mov	x0, xzr
 98 | 	svc	0
 99 | 
100 | fb_clear_cmd:
101 | 	.ascii "\033[2J"
102 | fb_clear_len = . - fb_clear_cmd
103 | 
104 | fb_cursor_cmd:
105 | 	.ascii "\033[1;1H"
106 | fb_cursor_len = . - fb_cursor_cmd
107 | 
108 | 	.align 3
109 | timespec:
110 | 	.dword 0, 15500000
111 | 
112 | 	.section .bss
113 | 	.align 6
114 | fb:
115 | 	.fill FB_DIM_Y * FB_DIM_X
116 | fb_len = . - fb
117 | 


--------------------------------------------------------------------------------
/test_linux/test_bounce.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | BUILD=..
 3 | COMMON=../test_common
 4 | 
 5 | make -C ${BUILD} all
 6 | 
 7 | # Hide term cursor before loading REL; nuke all VMAs from common libraries and
 8 | # the process heap VMA, before passing control to _start; restore term cursor
 9 | # upon termination
10 | 
11 | tput civis
12 | ${BUILD}/elvenrel ${COMMON}/memset.o test_bounce.o --filter /lib/aarch64-linux-gnu --filter [heap]
13 | tput cnorm
14 | 


--------------------------------------------------------------------------------
/test_linux/test_bounce_neon.s:
--------------------------------------------------------------------------------
  1 | 	.global _start
  2 | 
  3 | 	.equ SYS_write, 64
  4 | 	.equ SYS_exit, 93
  5 | 	.equ SYS_nanosleep, 101
  6 | 	.equ STDOUT_FILENO, 1
  7 | 
  8 | .if 0
  9 | 	.equ FB_DIM_X, 203
 10 | 	.equ FB_DIM_Y, 48
 11 | 	.equ FRAMES, 2048
 12 | .else
 13 | 	// symbols supplied by CLI
 14 | .endif
 15 | 	.include "macro.inc"
 16 | 
 17 | 	.text
 18 | _start:
 19 | 	// clear screen
 20 | 	mov	x8, SYS_write
 21 | 	mov	x2, fb_clear_len
 22 | 	adr	x1, fb_clear_cmd
 23 | 	mov	x0, STDOUT_FILENO
 24 | 	svc	0
 25 | 
 26 | 	// clear fb
 27 | 	movi	v0.16b, ' '
 28 | 	adrf	x0, fb
 29 | 	ldr	w1, =fb_len
 30 | 	bl	memset
 31 | 
 32 | .Lfb_done:
 33 | 	// four Q-form regs hold SoA { pos_x, pos_y, step_x, step_y }
 34 | 	ldr	q0, =0x00000000000000100000002000000030 // blip{0..3} pos_x
 35 | 	ldr	q1, =0x00000000000000100000000000000010 // blip{0..3} pos_y
 36 | 	ldr	q2, =0x00000001000000010000000100000001 // blip{0..3} step_x
 37 | 	ldr	q3, =0x00000001ffffffff00000001ffffffff // blip{0..3} step_y
 38 | 
 39 | 	mov	w4, FB_DIM_X
 40 | 	mov	w5, FB_DIM_X - 2
 41 | 	mov	w6, FB_DIM_Y - 1
 42 | 	fmov	s4, w4
 43 | 	dup	v5.4s, w5
 44 | 	dup	v6.4s, w6
 45 | 
 46 | 	mov	x9, FRAMES
 47 | .Lframe:
 48 | 	// reset cursor; x8 = SYS_write
 49 | 	mov	x2, fb_cursor_len
 50 | 	adr	x1, fb_cursor_cmd
 51 | 	mov	x0, STDOUT_FILENO
 52 | 	svc	0
 53 | 
 54 | 	// access to fb: addr & len as per SYS_write
 55 | 	ldr	w2, =fb_len
 56 | 	adrf	x1, fb
 57 | 
 58 | 	// plot blips in fb
 59 | 	mov	v7.16b, v0.16b
 60 | 	mla	v7.4s, v1.4s, v4.s[0]
 61 | 
 62 | 	fmov	w4, s7
 63 | 	mov	w5, v7.s[1]
 64 | 	mov	w6, v7.s[2]
 65 | 	mov	w7, v7.s[3]
 66 | 
 67 | 	mov	w3, 0x5d5b
 68 | 	strh	w3, [x1, x4]
 69 | 	strh	w3, [x1, x5]
 70 | 	strh	w3, [x1, x6]
 71 | 	strh	w3, [x1, x7]
 72 | 
 73 | 	// update positions
 74 | 	add	v0.4s, v0.4s, v2.4s
 75 | 	add	v1.4s, v1.4s, v3.4s
 76 | 
 77 | 	// check bounds & update steps accordingly
 78 | 	cmeq	v7.4s, v0.4s, v5.4s
 79 | 	cmeq	v8.4s, v0.4s, 0
 80 | 	cmeq	v9.4s, v1.4s, v6.4s
 81 | 	cmeq	v10.4s, v1.4s, 0
 82 | 	orr	v7.16b, v7.16b, v8.16b
 83 | 	orr	v9.16b, v9.16b, v10.16b
 84 | 	eor	v2.16b, v7.16b, v2.16b
 85 | 	eor	v3.16b, v9.16b, v3.16b
 86 | 	sub	v2.4s, v2.4s, v7.4s
 87 | 	sub	v3.4s, v3.4s, v9.4s
 88 | 
 89 | 	// output fb; x8 = SYS_write
 90 | 	mov	x0, STDOUT_FILENO
 91 | 	svc	0
 92 | 
 93 | 	// erase blips from fb
 94 | 	mov	w3, 0x2020
 95 | 	strh	w3, [x1, x4]
 96 | 	strh	w3, [x1, x5]
 97 | 	strh	w3, [x1, x6]
 98 | 	strh	w3, [x1, x7]
 99 | 
100 | 	mov	x8, SYS_nanosleep
101 | 	mov	x1, xzr
102 | 	adr	x0, timespec
103 | 	svc	0
104 | 
105 | 	mov	x8, SYS_write
106 | 	subs	x9, x9, 1
107 | 	bne	.Lframe
108 | 
109 | 	mov	x8, SYS_exit
110 | 	mov	x0, xzr
111 | 	svc	0
112 | 
113 | fb_clear_cmd:
114 | 	.ascii "\033[2J"
115 | fb_clear_len = . - fb_clear_cmd
116 | 
117 | fb_cursor_cmd:
118 | 	.ascii "\033[1;1H"
119 | fb_cursor_len = . - fb_cursor_cmd
120 | 
121 | 	.align 3
122 | timespec:
123 | 	.dword 0, 15500000
124 | 
125 | 	.section .bss
126 | 	.align 6
127 | fb:
128 | 	.fill FB_DIM_Y * FB_DIM_X
129 | fb_len = . - fb
130 | 


--------------------------------------------------------------------------------
/test_linux/test_bounce_neon.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | BUILD=..
 3 | COMMON=../test_common
 4 | 
 5 | make -C ${BUILD} all
 6 | 
 7 | # Hide term cursor before loading REL; nuke all VMAs from common libraries and
 8 | # the process heap VMA, before passing control to _start; restore term cursor
 9 | # upon termination
10 | 
11 | tput civis
12 | ${BUILD}/elvenrel ${COMMON}/memset.o test_bounce_neon.o --filter /lib/aarch64-linux-gnu --filter [heap]
13 | tput cnorm
14 | 


--------------------------------------------------------------------------------
/test_linux/test_bounce_neon_aosoa.s:
--------------------------------------------------------------------------------
  1 | 	.global _start
  2 | 
  3 | 	.equ SYS_write, 64
  4 | 	.equ SYS_exit, 93
  5 | 	.equ SYS_nanosleep, 101
  6 | 	.equ STDOUT_FILENO, 1
  7 | 
  8 | .if 0
  9 | 	.equ FB_DIM_X, 203
 10 | 	.equ FB_DIM_Y, 48
 11 | 	.equ FRAMES, 2048
 12 | .else
 13 | 	// symbols supplied by CLI
 14 | .endif
 15 | 	.include "macro.inc"
 16 | 
 17 | 	.text
 18 | _start:
 19 | 	// clear screen
 20 | 	mov	x8, SYS_write
 21 | 	mov	x2, fb_clear_len
 22 | 	adr	x1, fb_clear_cmd
 23 | 	mov	x0, STDOUT_FILENO
 24 | 	svc	0
 25 | 
 26 | 	// clear fb
 27 | 	movi	v0.16b, ' '
 28 | 	adrf	x0, fb
 29 | 	ldr	w1, =fb_len
 30 | 	bl	memset
 31 | 
 32 | .Lfb_done:
 33 | 	mov	w4, FB_DIM_X
 34 | 	mov	w5, FB_DIM_X - 2
 35 | 	mov	w6, FB_DIM_Y - 1
 36 | 	fmov	s4, w4
 37 | 	dup	v5.4s, w5
 38 | 	dup	v6.4s, w6
 39 | 
 40 | 	mov	x9, FRAMES
 41 | .Lframe:
 42 | 	// reset cursor; x8 = SYS_write
 43 | 	mov	x2, fb_cursor_len
 44 | 	adr	x1, fb_cursor_cmd
 45 | 	mov	x0, STDOUT_FILENO
 46 | 	svc	0
 47 | 
 48 | 	// access to fb: addr & len as per SYS_write
 49 | 	ldr	w2, =fb_len
 50 | 	adrf	x1, fb
 51 | 
 52 | 	// plot blips in fb
 53 | 	adrf	x10, blip
 54 | 	adrf	x11, blip_end
 55 | 	mov	x12, x11
 56 | .Lpack_plot:
 57 | 	// four Q-form regs hold SoA { pos_x, pos_y, step_x, step_y }
 58 | 	ldp	q0, q1, [x10]
 59 | 	ldp	q2, q3, [x10, 32]
 60 | 
 61 | 	mov	v7.16b, v0.16b
 62 | 	mla	v7.4s, v1.4s, v4.s[0]
 63 | 
 64 | 	str	q7, [x12], 16
 65 | 
 66 | 	fmov	w4, s7
 67 | 	mov	w5, v7.s[1]
 68 | 	mov	w6, v7.s[2]
 69 | 	mov	w7, v7.s[3]
 70 | 
 71 | 	mov	w3, 0x5d5b
 72 | 	strh	w3, [x1, x4]
 73 | 	strh	w3, [x1, x5]
 74 | 	strh	w3, [x1, x6]
 75 | 	strh	w3, [x1, x7]
 76 | 
 77 | 	// update positions
 78 | 	add	v0.4s, v0.4s, v2.4s
 79 | 	add	v1.4s, v1.4s, v3.4s
 80 | 
 81 | 	// check bounds & update steps accordingly
 82 | 	cmeq	v7.4s, v0.4s, v5.4s
 83 | 	cmeq	v8.4s, v0.4s, 0
 84 | 	cmeq	v9.4s, v1.4s, v6.4s
 85 | 	cmeq	v10.4s, v1.4s, 0
 86 | 	orr	v7.16b, v7.16b, v8.16b
 87 | 	orr	v9.16b, v9.16b, v10.16b
 88 | 	eor	v2.16b, v7.16b, v2.16b
 89 | 	eor	v3.16b, v9.16b, v3.16b
 90 | 	sub	v2.4s, v2.4s, v7.4s
 91 | 	sub	v3.4s, v3.4s, v9.4s
 92 | 
 93 | 	stp	q0, q1, [x10], 32
 94 | 	stp	q2, q3, [x10], 32
 95 | 
 96 | 	cmp	x10, x11
 97 | 	bne	.Lpack_plot
 98 | 
 99 | 	// output fb; x8 = SYS_write
100 | 	mov	x0, STDOUT_FILENO
101 | 	svc	0
102 | 
103 | 	// erase blips from fb
104 | 	adrf	x10, blip_end
105 | 	adrf	x11, erase_end
106 | .Lpack_erase:
107 | 	ldp	w4, w5, [x10], 8
108 | 	ldp	w6, w7, [x10], 8
109 | 
110 | 	mov	w3, 0x2020
111 | 	strh	w3, [x1, x4]
112 | 	strh	w3, [x1, x5]
113 | 	strh	w3, [x1, x6]
114 | 	strh	w3, [x1, x7]
115 | 
116 | 	cmp	x10, x11
117 | 	bne	.Lpack_erase
118 | 
119 | 	mov	x8, SYS_nanosleep
120 | 	mov	x1, xzr
121 | 	adr	x0, timespec
122 | 	svc	0
123 | 
124 | 	mov	x8, SYS_write
125 | 	subs	x9, x9, 1
126 | 	bne	.Lframe
127 | 
128 | 	mov	x8, SYS_exit
129 | 	mov	x0, xzr
130 | 	svc	0
131 | 
132 | fb_clear_cmd:
133 | 	.ascii "\033[2J"
134 | fb_clear_len = . - fb_clear_cmd
135 | 
136 | fb_cursor_cmd:
137 | 	.ascii "\033[1;1H"
138 | fb_cursor_len = . - fb_cursor_cmd
139 | 
140 | 	.align 3
141 | timespec:
142 | 	.dword 0, 15500000
143 | 
144 | 	.section .bss
145 | 	.align 6
146 | fb:
147 | 	.fill FB_DIM_Y * FB_DIM_X
148 | fb_len = . - fb
149 | 


--------------------------------------------------------------------------------
/test_linux/test_bounce_neon_aosoa.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | BUILD=..
 3 | COMMON=../test_common
 4 | 
 5 | make -C ${BUILD} all
 6 | 
 7 | # Hide term cursor before loading RELs; nuke all VMAs from common libraries and
 8 | # the process heap VMA, before passing control to _start; suppress reports to
 9 | # stdout; restore term cursor upon termination
10 | 
11 | tput civis
12 | ${BUILD}/elvenrel ${COMMON}/memset.o ${COMMON}/test_bounce_data_aosoa_alt_0.o test_bounce_neon_aosoa.o    --filter /lib/aarch64-linux-gnu --filter [heap] --quiet
13 | #${BUILD}/elvenrel ${COMMON}/memset.o ${COMMON}/test_bounce_data_aosoa_alt_1.o test_bounce_neon_aosoa.o    --filter /lib/aarch64-linux-gnu --filter [heap] --quiet
14 | ${BUILD}/elvenrel ${COMMON}/memset.o ${COMMON}/test_bounce_data_aosoa_alt_2.o test_bounce_neon_aosoa.o    --filter /lib/aarch64-linux-gnu --filter [heap] --quiet
15 | ${BUILD}/elvenrel ${COMMON}/memset.o ${COMMON}/test_bounce_data_aosoa_alt_3.o test_bounce_neon_aosoa.o    --filter /lib/aarch64-linux-gnu --filter [heap] --quiet
16 | ${BUILD}/elvenrel ${COMMON}/memset.o ${COMMON}/test_bounce_data_aosoa_alt_3.o test_bounce_neon_aosoa_bg.o --filter /lib/aarch64-linux-gnu --filter [heap] --quiet
17 | tput cnorm
18 | 


--------------------------------------------------------------------------------
/test_linux/test_bounce_neon_aosoa_bg.s:
--------------------------------------------------------------------------------
  1 | 	.global _start
  2 | 
  3 | 	.equ SYS_write, 64
  4 | 	.equ SYS_exit, 93
  5 | 	.equ SYS_nanosleep, 101
  6 | 	.equ STDOUT_FILENO, 1
  7 | 
  8 | .if 0
  9 | 	.equ FB_DIM_X, 203
 10 | 	.equ FB_DIM_Y, 48
 11 | 	.equ FRAMES, 2048
 12 | .else
 13 | 	// symbols supplied by CLI
 14 | .endif
 15 | 	.equ GRID_DISTANCE_X, 8
 16 | 	.equ GRID_DISTANCE_Y, 8
 17 | 
 18 | .if 0
 19 | 	.equ GRID_STEP_X_0, 1
 20 | 	.equ GRID_STEP_X_1, 0
 21 | 	.equ GRID_STEP_X_2, 1
 22 | 	.equ GRID_STEP_X_3, 0
 23 | 
 24 | 	.equ GRID_STEP_Y_0, 0
 25 | 	.equ GRID_STEP_Y_1, 1
 26 | 	.equ GRID_STEP_Y_2, 0
 27 | 	.equ GRID_STEP_Y_3, 1
 28 | .else
 29 | 	.equ GRID_STEP_X_0, 1
 30 | 	.equ GRID_STEP_X_1, 1
 31 | 	.equ GRID_STEP_X_2, 1
 32 | 	.equ GRID_STEP_X_3, 1
 33 | 
 34 | 	.equ GRID_STEP_Y_0, 0
 35 | 	.equ GRID_STEP_Y_1, 0
 36 | 	.equ GRID_STEP_Y_2, 0
 37 | 	.equ GRID_STEP_Y_3, 0
 38 | .endif
 39 | 
 40 | 	.include "macro.inc"
 41 | 
 42 | 	.text
 43 | _start:
 44 | 	// clear screen
 45 | 	mov	x8, SYS_write
 46 | 	mov	x2, fb_clear_len
 47 | 	adr	x1, fb_clear_cmd
 48 | 	mov	x0, STDOUT_FILENO
 49 | 	svc	0
 50 | 
 51 | 	// clear fb
 52 | 	movi	v0.16b, ' '
 53 | 	adrf	x0, fb
 54 | 	ldr	w1, =fb_len
 55 | 	bl	memset
 56 | 
 57 | .Lfb_done:
 58 | 	mov	w4, FB_DIM_X
 59 | 	mov	w5, FB_DIM_X - 2
 60 | 	mov	w6, FB_DIM_Y - 1
 61 | 	fmov	s4, w4
 62 | 	dup	v5.4s, w5
 63 | 	dup	v6.4s, w6
 64 | 
 65 | 	// generate a grid of axes-traversing particles
 66 | 	ldr	q3, grid_pos_0123
 67 | 	ldr	q7, grid_step_0123
 68 | 	ldr	q8, grid_step_0123 + 16
 69 | 	ldr	q11, =0x10000000200000003
 70 | 
 71 | 	adr	x10, grid_pos_xxx0
 72 | 	adr	x11, grid_pos_1234
 73 | 	adr	x12, grid_step_xxx0
 74 | 	adr	x13, grid_step_1234
 75 | 
 76 | 	adrf	x7, grid
 77 | 	adrf	x8, grid_end
 78 | 	mov	w9, wzr
 79 | .Lgen_grid:
 80 | 	// how many x-coords exceed end-of-line - 2?
 81 | 	cmhi	v2.4s, v3.4s, v5.4s
 82 | 	addv	s1, v2.4s
 83 | 	fmov	w0, s1
 84 | 	dup	v0.4s, w9
 85 | 	add	v0.4s, v0.4s, v11.4s
 86 | 	cbz	w0, .Lgen_grid_next
 87 | 
 88 | 	// produce a transitional end-of-one/start-of-another
 89 | 	// pack inbetween subsequent lines
 90 | 	mvn	w1, w0
 91 | 	ldr	q1, [x10, x1, LSL 4]
 92 | 	bic	v3.16b, v3.16b, v2.16b
 93 | 	orr	v3.16b, v3.16b, v1.16b
 94 | 
 95 | 	add	x2, x12, x1, LSL 5
 96 | 	ldp	q9, q10, [x2]
 97 | 	bic	v7.16b, v7.16b, v2.16b
 98 | 	orr	v7.16b, v7.16b, v9.16b
 99 | 	bic	v8.16b, v8.16b, v2.16b
100 | 	orr	v8.16b, v8.16b, v10.16b
101 | 
102 | 	dup	v0.4s, w9
103 | 	mov	v12.16b, v11.16b
104 | 	add	w9, w9, GRID_DISTANCE_Y
105 | 	// clamp pos_y at bottom-of-fb; affects only padding particles
106 | 	cmp	w9, w6
107 | 	blo	.Lgen_grid_pos_y
108 | 	mov	w9, FB_DIM_Y - 1
109 | 	bic	v7.16b, v7.16b, v2.16b
110 | 	bic	v8.16b, v8.16b, v2.16b
111 | 	bic	v12.16b, v12.16b, v2.16b
112 | .Lgen_grid_pos_y:
113 | 	dup	v1.4s, w9
114 | 
115 | 	bic	v0.16b, v0.16b, v2.16b
116 | 	and	v1.16b, v1.16b, v2.16b
117 | 	orr	v0.16b, v0.16b, v1.16b
118 | 	add	v0.4s, v0.4s, v12.4s
119 | 
120 | 	// if the fist pack is entirely from the new line
121 | 	// then move over to that
122 | 	cmn	w0, 4
123 | 	beq	.Lgen_grid_next
124 | 
125 | 	// first pack is actually transitional
126 | 	// pos_x, pos_y, step_x, step_y
127 | 	stp	q3, q0, [x7], 32
128 | 	stp	q7, q8, [x7], 32
129 | 
130 | 	cmp	x7, x8
131 | 	beq	.Lgen_grid_done
132 | 
133 | 	// prepare next pack entirely from the new line
134 | 	ldr	q3, [x11, x1, LSL 4]
135 | 	dup	v0.4s, w9
136 | 	add	v0.4s, v0.4s, v11.4s
137 | 
138 | 	add	x2, x13, x1, LSL 5
139 | 	ldp	q7, q8, [x2]
140 | 
141 | .Lgen_grid_next:
142 | 	// pos_x, pos_y, step_x, step_y
143 | 	stp	q3, q0, [x7], 32
144 | 	stp	q7, q8, [x7], 32
145 | 
146 | 	mov	w0, GRID_DISTANCE_X * 4
147 | 	dup	v2.4s, w0
148 | 
149 | 	add	v3.4s, v3.4s, v2.4s
150 | 
151 | 	cmp	x7, x8
152 | 	bne	.Lgen_grid
153 | 
154 | .Lgen_grid_done:
155 | 	mov	x8, SYS_write
156 | 	mov	x9, FRAMES
157 | 	movi	v11.4s, 1
158 | 	add	v11.4s, v11.4s, v5.4s
159 | .Lframe:
160 | 	// reset cursor; x8 = SYS_write
161 | 	mov	x2, fb_cursor_len
162 | 	adr	x1, fb_cursor_cmd
163 | 	mov	x0, STDOUT_FILENO
164 | 	svc	0
165 | 
166 | 	// access to fb: addr & len as per SYS_write
167 | 	ldr	w2, =fb_len
168 | 	adrf	x1, fb
169 | 
170 | 	// plot grid in fb
171 | 	adrf	x10, grid
172 | 	adrf	x11, grid_end
173 | 	mov	x12, x11
174 | .Lgrid_plot:
175 | 	// four Q-form regs hold SoA { pos_x, pos_y, step_x, step_y }
176 | 	ldp	q0, q1, [x10]
177 | 	ldp	q2, q3, [x10, 32]
178 | 
179 | 	mov	v7.16b, v0.16b
180 | 	mla	v7.4s, v1.4s, v4.s[0]
181 | 
182 | 	str	q7, [x12], 16
183 | 
184 | 	fmov	w4, s7
185 | 	mov	w5, v7.s[1]
186 | 	mov	w6, v7.s[2]
187 | 	mov	w7, v7.s[3]
188 | 
189 | 	mov	w3, 'o'
190 | 	strb	w3, [x1, x4]
191 | 	strb	w3, [x1, x5]
192 | 	strb	w3, [x1, x6]
193 | 	strb	w3, [x1, x7]
194 | 
195 | 	// update positions
196 | 	add	v0.4s, v0.4s, v2.4s
197 | 	add	v1.4s, v1.4s, v3.4s
198 | 
199 | 	// check bounds & update steps accordingly
200 | 	cmeq	v7.4s, v0.4s, v11.4s
201 | 	cmeq	v8.4s, v0.4s, 0
202 | 	cmeq	v9.4s, v1.4s, v6.4s
203 | 	cmeq	v10.4s, v1.4s, 0
204 | 	orr	v7.16b, v7.16b, v8.16b
205 | 	orr	v9.16b, v9.16b, v10.16b
206 | 	eor	v2.16b, v7.16b, v2.16b
207 | 	eor	v3.16b, v9.16b, v3.16b
208 | 	sub	v2.4s, v2.4s, v7.4s
209 | 	sub	v3.4s, v3.4s, v9.4s
210 | 
211 | 	stp	q0, q1, [x10], 32
212 | 	stp	q2, q3, [x10], 32
213 | 
214 | 	cmp	x10, x11
215 | 	bne	.Lgrid_plot
216 | 
217 | 	// plot blips in fb
218 | 	adrf	x10, blip
219 | 	adrf	x11, blip_end
220 | 	mov	x12, x11
221 | .Lpack_plot:
222 | 	// four Q-form regs hold SoA { pos_x, pos_y, step_x, step_y }
223 | 	ldp	q0, q1, [x10]
224 | 	ldp	q2, q3, [x10, 32]
225 | 
226 | 	mov	v7.16b, v0.16b
227 | 	mla	v7.4s, v1.4s, v4.s[0]
228 | 
229 | 	str	q7, [x12], 16
230 | 
231 | 	fmov	w4, s7
232 | 	mov	w5, v7.s[1]
233 | 	mov	w6, v7.s[2]
234 | 	mov	w7, v7.s[3]
235 | 
236 | 	mov	w3, 0x5d5b
237 | 	strh	w3, [x1, x4]
238 | 	strh	w3, [x1, x5]
239 | 	strh	w3, [x1, x6]
240 | 	strh	w3, [x1, x7]
241 | 
242 | 	// update positions
243 | 	add	v0.4s, v0.4s, v2.4s
244 | 	add	v1.4s, v1.4s, v3.4s
245 | 
246 | 	// check bounds & update steps accordingly
247 | 	cmeq	v7.4s, v0.4s, v5.4s
248 | 	cmeq	v8.4s, v0.4s, 0
249 | 	cmeq	v9.4s, v1.4s, v6.4s
250 | 	cmeq	v10.4s, v1.4s, 0
251 | 	orr	v7.16b, v7.16b, v8.16b
252 | 	orr	v9.16b, v9.16b, v10.16b
253 | 	eor	v2.16b, v7.16b, v2.16b
254 | 	eor	v3.16b, v9.16b, v3.16b
255 | 	sub	v2.4s, v2.4s, v7.4s
256 | 	sub	v3.4s, v3.4s, v9.4s
257 | 
258 | 	stp	q0, q1, [x10], 32
259 | 	stp	q2, q3, [x10], 32
260 | 
261 | 	cmp	x10, x11
262 | 	bne	.Lpack_plot
263 | 
264 | 	// output fb; x8 = SYS_write
265 | 	mov	x0, STDOUT_FILENO
266 | 	svc	0
267 | 
268 | 	// erase grid from fb
269 | 	adrf	x10, grid_end
270 | 	adrf	x11, grid_erase_end
271 | .Lgrid_erase:
272 | 	ldp	w4, w5, [x10], 8
273 | 	ldp	w6, w7, [x10], 8
274 | 
275 | 	mov	w3, 0x20
276 | 	strb	w3, [x1, x4]
277 | 	strb	w3, [x1, x5]
278 | 	strb	w3, [x1, x6]
279 | 	strb	w3, [x1, x7]
280 | 
281 | 	cmp	x10, x11
282 | 	bne	.Lgrid_erase
283 | 
284 | 	// erase blips from fb
285 | 	adrf	x10, blip_end
286 | 	adrf	x11, erase_end
287 | .Lpack_erase:
288 | 	ldp	w4, w5, [x10], 8
289 | 	ldp	w6, w7, [x10], 8
290 | 
291 | 	mov	w3, 0x2020
292 | 	strh	w3, [x1, x4]
293 | 	strh	w3, [x1, x5]
294 | 	strh	w3, [x1, x6]
295 | 	strh	w3, [x1, x7]
296 | 
297 | 	cmp	x10, x11
298 | 	bne	.Lpack_erase
299 | 
300 | 	mov	x8, SYS_nanosleep
301 | 	mov	x1, xzr
302 | 	adr	x0, timespec
303 | 	svc	0
304 | 
305 | 	mov	x8, SYS_write
306 | 	subs	x9, x9, 1
307 | 	bne	.Lframe
308 | 
309 | 	mov	x8, SYS_exit
310 | 	mov	x0, xzr
311 | 	svc	0
312 | 
313 | 	.align 4
314 | grid_pos_xxx0:
315 | 	.word	0,                   0,                   0,                   GRID_DISTANCE_X * 0
316 | grid_pos_xx01:
317 | 	.word	0,                   0,                   GRID_DISTANCE_X * 0, GRID_DISTANCE_X * 1
318 | grid_pos_x012:
319 | 	.word	0,                   GRID_DISTANCE_X * 0, GRID_DISTANCE_X * 1, GRID_DISTANCE_X * 2
320 | grid_pos_0123:
321 | 	.word	GRID_DISTANCE_X * 0, GRID_DISTANCE_X * 1, GRID_DISTANCE_X * 2, GRID_DISTANCE_X * 3
322 | grid_pos_1234:
323 | 	.word	GRID_DISTANCE_X * 1, GRID_DISTANCE_X * 2, GRID_DISTANCE_X * 3, GRID_DISTANCE_X * 4
324 | grid_pos_2345:
325 | 	.word	GRID_DISTANCE_X * 2, GRID_DISTANCE_X * 3, GRID_DISTANCE_X * 4, GRID_DISTANCE_X * 5
326 | grid_pos_3456:
327 | 	.word	GRID_DISTANCE_X * 3, GRID_DISTANCE_X * 4, GRID_DISTANCE_X * 5, GRID_DISTANCE_X * 6
328 | 
329 | grid_step_xxx0:
330 | 	.word	0,             0,             0,             GRID_STEP_X_0
331 | 	.word	0,             0,             0,             GRID_STEP_Y_0
332 | grid_step_xx01:
333 | 	.word	0,             0,             GRID_STEP_X_0, GRID_STEP_X_1
334 | 	.word	0,             0,             GRID_STEP_Y_0, GRID_STEP_Y_1
335 | grid_step_x012:
336 | 	.word	0,             GRID_STEP_X_0, GRID_STEP_X_1, GRID_STEP_X_2
337 | 	.word	0,             GRID_STEP_Y_0, GRID_STEP_Y_1, GRID_STEP_Y_2
338 | grid_step_0123:
339 | 	.word	GRID_STEP_X_0, GRID_STEP_X_1, GRID_STEP_X_2, GRID_STEP_X_3
340 | 	.word	GRID_STEP_Y_0, GRID_STEP_Y_1, GRID_STEP_Y_2, GRID_STEP_Y_3
341 | grid_step_1234:
342 | 	.word	GRID_STEP_X_1, GRID_STEP_X_2, GRID_STEP_X_3, GRID_STEP_X_0
343 | 	.word	GRID_STEP_Y_1, GRID_STEP_Y_2, GRID_STEP_Y_3, GRID_STEP_Y_0
344 | grid_step_2345:
345 | 	.word	GRID_STEP_X_2, GRID_STEP_X_3, GRID_STEP_X_0, GRID_STEP_X_1
346 | 	.word	GRID_STEP_Y_2, GRID_STEP_Y_3, GRID_STEP_Y_0, GRID_STEP_Y_1
347 | grid_step_3456:
348 | 	.word	GRID_STEP_X_3, GRID_STEP_X_0, GRID_STEP_X_1, GRID_STEP_X_2
349 | 	.word	GRID_STEP_Y_3, GRID_STEP_Y_0, GRID_STEP_Y_1, GRID_STEP_Y_2
350 | 
351 | fb_clear_cmd:
352 | 	.ascii "\033[2J"
353 | fb_clear_len = . - fb_clear_cmd
354 | 
355 | fb_cursor_cmd:
356 | 	.ascii "\033[1;1H"
357 | fb_cursor_len = . - fb_cursor_cmd
358 | 
359 | 	.align 3
360 | timespec:
361 | 	.dword 0, 15500000
362 | 
363 | 	.section .bss
364 | 	.align 6
365 | fb:
366 | 	.fill FB_DIM_Y * FB_DIM_X
367 | fb_len = . - fb
368 | 
369 | 	.align 6
370 | grid:
371 | 	.fill (((FB_DIM_X + GRID_DISTANCE_X - 2) / GRID_DISTANCE_X) * ((FB_DIM_Y + GRID_DISTANCE_Y - 2 - 3) / GRID_DISTANCE_Y) + 3) / 4 * 64
372 | grid_end:
373 | 	.fill (grid_end - grid) / 16, 4
374 | grid_erase_end:
375 | 


--------------------------------------------------------------------------------
/test_linux/test_bss.s:
--------------------------------------------------------------------------------
 1 | 	.global _start
 2 | 
 3 | 	.equ SYS_write, 64
 4 | 	.equ SYS_exit, 93
 5 | 	.equ STDOUT_FILENO, 1
 6 | 
 7 | 	.include "macro.inc"
 8 | 
 9 | 	.text
10 | _start:
11 | 	mov	x8, SYS_write
12 | 	mov	x2, len
13 | 	adrf	x1, buf
14 | 	mov	x0, STDOUT_FILENO
15 | 	svc	0
16 | 
17 | 	adrp	x1, code
18 | 	ldr	x0, [x1, :lo12:code]
19 | 	add	x0, x0, 1
20 | 	str	x0, [x1, :lo12:code]
21 | 
22 | 	mov	x8, SYS_exit
23 | 	svc	0
24 | 
25 | 	.section .bss
26 | code:
27 | 	.dword	0
28 | 
29 | 	.section .rodata
30 | buf:
31 | 	.ascii	"hello from ET_REL\n"
32 | len = . - buf
33 | 


--------------------------------------------------------------------------------
/test_linux/test_cross.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | BUILD=..
 3 | 
 4 | make -C ${BUILD} all
 5 | 
 6 | # Load two RELs with cross-relocations; nuke all VMAs from common libraries and
 7 | # the process heap VMA, before passing control to _start
 8 | 
 9 | ${BUILD}/elvenrel test_cross_0.o test_cross_1.o --filter /lib/aarch64-linux-gnu --filter [heap]
10 | 


--------------------------------------------------------------------------------
/test_linux/test_cross_0.s:
--------------------------------------------------------------------------------
1 | 	.global buf
2 | 
3 | 	.section .rodata
4 | 
5 | 	.byte len
6 | buf:
7 | 	.ascii	"hello from ET_REL\n"
8 | len = . - buf
9 | 


--------------------------------------------------------------------------------
/test_linux/test_cross_1.s:
--------------------------------------------------------------------------------
 1 | 	.global _start
 2 | 
 3 | 	.equ SYS_write, 64
 4 | 	.equ SYS_exit, 93
 5 | 	.equ STDOUT_FILENO, 1
 6 | 
 7 | 	.include "macro.inc"
 8 | 
 9 | 	.text
10 | _start:
11 | 	mov	x8, SYS_write
12 | 	adrf	x1, buf
13 | 	ldrb	w2, [x1, -1]
14 | 	mov	x0, STDOUT_FILENO
15 | 	svc	0
16 | 
17 | 	mov	x8, SYS_exit
18 | 	mov	x0, xzr
19 | 	svc	0
20 | 


--------------------------------------------------------------------------------
/test_linux/test_data.s:
--------------------------------------------------------------------------------
 1 | 	.global _start
 2 | 
 3 | 	.equ SYS_write, 64
 4 | 	.equ SYS_exit, 93
 5 | 	.equ STDOUT_FILENO, 1
 6 | 
 7 | 	.include "macro.inc"
 8 | 
 9 | 	.text
10 | _start:
11 | 	mov	x8, SYS_write
12 | 	mov	x2, len
13 | 	adrf	x1, buf
14 | 	movl	w3, 0x4c45525f
15 | 	str	w3, [x1, 13]
16 | 	mov	x0, STDOUT_FILENO
17 | 	svc	0
18 | 
19 | 	mov	x8, SYS_exit
20 | 	mov	x0, xzr
21 | 	svc	0
22 | 
23 | 	.section .data
24 | buf:
25 | 	.ascii	"hello from ET....\n"
26 | len = . - buf
27 | 


--------------------------------------------------------------------------------
/test_linux/test_memset.s:
--------------------------------------------------------------------------------
 1 | 	.global _start
 2 | 
 3 | 	.equ SYS_write, 64
 4 | 	.equ SYS_exit, 93
 5 | 	.equ STDOUT_FILENO, 1
 6 | 
 7 | 	.equ COLUMNS, 64
 8 | 	.equ LINES, 48
 9 | 
10 | 	.include "macro.inc"
11 | _start:
12 | 	adrf	x4, fb
13 | 	add	x5, x4, (COLUMNS + 1) * LINES
14 | 	mov	x6, 1
15 | 	mov	x7, 1
16 | .Lloop:
17 | 	movi	v0.16b, '.'
18 | 	mov	x0, x4
19 | 	mov	x1, x6
20 | 	bl	memset
21 | 
22 | 	cmp	x7, LINES / (COLUMNS - LINES)
23 | 	csel	x7, xzr, x7, EQ
24 | 	add	x7, x7, 1
25 | 	add	x6, x6, 1
26 | 	cinc	x4, x4, EQ
27 | 	add	x4, x4, COLUMNS + 1
28 | 	cmp	x4, x5
29 | 	blo	.Lloop
30 | 
31 | 	mov	x8, SYS_write
32 | 	mov	x2, fb_len
33 | 	adrf	x1, fb
34 | 	mov	x0, STDOUT_FILENO
35 | 	svc	0
36 | 
37 | 	mov	x8, SYS_exit
38 | 	mov	x0, xzr
39 | 	svc	0
40 | 
41 | 	.data
42 | fb:
43 | 	.rept LINES
44 | 	.fill COLUMNS, 1, '='
45 | 	.byte '\n'
46 | 	.endr
47 | fb_len = . - fb
48 | 


--------------------------------------------------------------------------------
/test_linux/test_memset_woa.s:
--------------------------------------------------------------------------------
 1 | 	.global _start
 2 | 
 3 | 	.equ SYS_write, 64
 4 | 	.equ SYS_exit, 93
 5 | 	.equ STDOUT_FILENO, 1
 6 | 
 7 | 	.equ COLUMNS, 64
 8 | 	.equ LINES, 48
 9 | 
10 | 	.include "macro.inc"
11 | _start:
12 | 	adrf	x4, fb
13 | 	add	x5, x4, (COLUMNS + 1) * LINES
14 | 	mov	x6, 1
15 | 	mov	x7, 1
16 | .Lloop:
17 | 	mov	x0, x4
18 | 	movq	x1, 0x2e2e2e2e2e2e2e2e
19 | 	mov	x2, x6
20 | 	bl	memset_woa
21 | 
22 | 	cmp	x7, LINES / (COLUMNS - LINES)
23 | 	csel	x7, xzr, x7, EQ
24 | 	add	x7, x7, 1
25 | 	add	x6, x6, 1
26 | 	cinc	x4, x4, EQ
27 | 	add	x4, x4, COLUMNS + 1
28 | 	cmp	x4, x5
29 | 	blo	.Lloop
30 | 
31 | 	mov	x8, SYS_write
32 | 	mov	x2, fb_len
33 | 	adrf	x1, fb
34 | 	mov	x0, STDOUT_FILENO
35 | 	svc	0
36 | 
37 | 	mov	x8, SYS_exit
38 | 	mov	x0, xzr
39 | 	svc	0
40 | 
41 | 	.data
42 | fb:
43 | 	.rept LINES
44 | 	.fill COLUMNS, 1, '='
45 | 	.byte '\n'
46 | 	.endr
47 | fb_len = . - fb
48 | 


--------------------------------------------------------------------------------
/test_linux/test_rodata.s:
--------------------------------------------------------------------------------
 1 | 	.global _start
 2 | 
 3 | 	.equ SYS_write, 64
 4 | 	.equ SYS_exit, 93
 5 | 	.equ STDOUT_FILENO, 1
 6 | 
 7 | 	.include "macro.inc"
 8 | 
 9 | 	.text
10 | _start:
11 | 	mov	x8, SYS_write
12 | 	mov	x2, len
13 | 	adrf	x1, buf
14 | 	mov	x0, STDOUT_FILENO
15 | 	svc	0
16 | 
17 | 	mov	x8, SYS_exit
18 | 	mov	x0, xzr
19 | 	svc	0
20 | 
21 | 	.section .rodata
22 | buf:
23 | 	.ascii	"hello from ET_REL\n"
24 | len = . - buf
25 | 


--------------------------------------------------------------------------------
/test_linux/test_text.s:
--------------------------------------------------------------------------------
 1 | 	.global _start
 2 | 
 3 | 	.equ SYS_write, 64
 4 | 	.equ SYS_exit, 93
 5 | 	.equ STDOUT_FILENO, 1
 6 | 
 7 | 	.text
 8 | _start:
 9 | 	mov	x8, SYS_write
10 | 	mov	x2, len
11 | 	adr	x1, buf
12 | 	mov	x0, STDOUT_FILENO
13 | 	svc	0
14 | 
15 | 	mov	x8, SYS_exit
16 | 	mov	x0, xzr
17 | 	svc	0
18 | 
19 | buf:
20 | 	.ascii	"hello from ET_REL\n"
21 | len = . - buf
22 | 


--------------------------------------------------------------------------------
/test_linux/test_timeval.s:
--------------------------------------------------------------------------------
  1 | 	.global _start
  2 | 
  3 | 	.equ SYS_write, 64
  4 | 	.equ SYS_exit, 93
  5 | 	.equ SYS_nanosleep, 101
  6 | 	.equ SYS_gettimeofday, 169
  7 | 	.equ STDOUT_FILENO, 1
  8 | .ifndef DTIME
  9 | 	.equ DTIME, 15500
 10 | .endif
 11 | 	.include "macro.inc"
 12 | 
 13 | 	.text
 14 | 
 15 | // advance timeval by a non-negative dtime in us
 16 | // x0: timeval ptr
 17 | // w1: dtime us; must be less than 1e6
 18 | // clobbers: x2, x3, x4
 19 | 	.align 4
 20 | advance_timeval_us:
 21 | 	movl	w2, 1000000
 22 | 	sub	w2, w2, w1
 23 | 	ldp	x3, x4, [x0]
 24 | 	subs	w2, w4, w2
 25 | 	blo	.Lupdate_only_us
 26 | 	add	x3, x3, 1
 27 | 	stp	x3, x2, [x0]
 28 | 	ret
 29 | .Lupdate_only_us:
 30 | 	add	w4, w4, w1
 31 | 	str	x4, [x0, 8]
 32 | 	ret
 33 | 
 34 | _start:
 35 | 	adrf	x17, timeval
 36 | 	adrf	x19, msg
 37 | 
 38 | 	mov	x8, SYS_gettimeofday
 39 | 	mov	x2, xzr
 40 | 	mov	x1, xzr
 41 | 	mov	x0, x17
 42 | 	svc	0
 43 | 
 44 | 	mov	x8, SYS_nanosleep
 45 | 	mov	x1, xzr
 46 | 	adr	x0, timespec_nanosleep
 47 | 	svc	0
 48 | 
 49 | 	mov	x8, SYS_gettimeofday
 50 | 	mov	x2, xzr
 51 | 	mov	x1, xzr
 52 | 	add	x0, x17, 16
 53 | 	svc	0
 54 | 
 55 | 	// itoa start time (timeval::tv_sec and timeval::tv_usec)
 56 | 	ldr	x1, [x17]
 57 | 	mov	x0, x19
 58 | 	bl	string_x64
 59 | 
 60 | 	ldr	w1, [x17, 8]
 61 | 	add	x0, x19, 17
 62 | 	bl	string_x32
 63 | 
 64 | 	// advance start time by DTIME us
 65 | 	mov	w1, DTIME
 66 | 	mov	x0, x17
 67 | 	bl	advance_timeval_us
 68 | 
 69 | 	// itoa target time
 70 | 	ldr	x1, [x17]
 71 | 	add	x0, x19, 26
 72 | 	bl	string_x64
 73 | 
 74 | 	ldr	w1, [x17, 8]
 75 | 	add	x0, x19, 43
 76 | 	bl	string_x32
 77 | 
 78 | 	// itoa post-sleep time
 79 | 	ldr	x1, [x17, 16]
 80 | 	add	x0, x19, 52
 81 | 	bl	string_x64
 82 | 
 83 | 	ldr	w1, [x17, 24]
 84 | 	add	x0, x19, 69
 85 | 	bl	string_x32
 86 | 
 87 | 	// output start, target and post-sleep times
 88 | 	mov	x8, SYS_write
 89 | 	mov	x2, msg_len
 90 | 	mov	x1, x19
 91 | 	mov	x0, STDOUT_FILENO
 92 | 	svc	0
 93 | 
 94 | 	mov	x8, SYS_exit
 95 | 	mov	x0, xzr
 96 | 	svc	0
 97 | 
 98 | timespec_nanosleep:
 99 | 	.dword 0, DTIME * 1000
100 | 
101 | 	.section .bss
102 | 	.align 4
103 | timeval:
104 | 	.dword 0, 0
105 | 	.dword 0, 0
106 | 
107 | 	.section .data
108 | msg:
109 | 	.ascii "################:########\n"
110 | 	.ascii "################:########\n"
111 | 	.ascii "################:########\n"
112 | msg_len = . - msg
113 | 


--------------------------------------------------------------------------------
/test_linux/test_timeval.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | BUILD=..
3 | 
4 | make -C ${BUILD} all
5 | 
6 | # Advance a timeval structure by some us
7 | 
8 | ${BUILD}/elvenrel stringx.o test_timeval.o --quiet
9 | 


--------------------------------------------------------------------------------
/test_macos/Makefile:
--------------------------------------------------------------------------------
 1 | # Differentiate between GAS and Apple clang
 2 | AS_VENDOR := $(word 1, $(shell $(AS) --version))
 3 | ifeq ($(AS_VENDOR), GNU)
 4 | 	ASFLAGS += --strip-local-absolute -I..
 5 | 	defsym = --defsym $(1)=$(2)
 6 | else
 7 | 	ASFLAGS += --target=aarch64-linux-gnu -I..
 8 | 	defsym = -Wa,-defsym,$(1)=$(2)
 9 | endif
10 | REL := test_text.o \
11 |        test_rodata.o \
12 |        test_data.o \
13 |        test_bss.o \
14 |        test_cross_0.o \
15 |        test_cross_1.o \
16 |        test_timeval.o \
17 |        stringx.o \
18 |        test_bounce.o \
19 |        test_bounce_neon.o \
20 |        test_bounce_neon_aosoa.o \
21 |        test_bounce_neon_aosoa_bg.o \
22 |        test_memset.o
23 | 
24 | stringx.o: ../stringx.s
25 | 	$(AS) $(ASFLAGS) -o $@ $^
26 | 
27 | test_bounce.o: test_bounce.s
28 | 	$(AS) $(ASFLAGS) $(call defsym,FB_DIM_X,$(shell tput cols)) $(call defsym,FB_DIM_Y,$(shell tput lines)) $(call defsym,FRAMES,1024) -o $@ $^
29 | 
30 | test_bounce_neon.o: test_bounce_neon.s
31 | 	$(AS) $(ASFLAGS) $(call defsym,FB_DIM_X,$(shell tput cols)) $(call defsym,FB_DIM_Y,$(shell tput lines)) $(call defsym,FRAMES,2048) -o $@ $^
32 | 
33 | test_bounce_neon_aosoa.o: test_bounce_neon_aosoa.s
34 | 	$(AS) $(ASFLAGS) $(call defsym,FB_DIM_X,$(shell tput cols)) $(call defsym,FB_DIM_Y,$(shell tput lines)) $(call defsym,FRAMES,1024) -o $@ $^
35 | 
36 | test_bounce_neon_aosoa_bg.o: test_bounce_neon_aosoa_bg.s
37 | 	$(AS) $(ASFLAGS) $(call defsym,FB_DIM_X,$(shell tput cols)) $(call defsym,FB_DIM_Y,$(shell tput lines)) $(call defsym,FRAMES,2048) -o $@ $^
38 | 
39 | all: $(REL)
40 | 
41 | clean:
42 | 	rm -f $(REL)
43 | 


--------------------------------------------------------------------------------
/test_macos/test_bitcount.s:
--------------------------------------------------------------------------------
  1 | 	.global _start
  2 | 
  3 | 	.equ SYS_write, 4
  4 | 	.equ SYS_exit, 1
  5 | 	.equ STDOUT_FILENO, 1
  6 | 
  7 | 	.equ sample_bitset_num_u32, 100 * 1000 * 1000
  8 | 
  9 | 	.include "macro.inc"
 10 | 
 11 | 	.text
 12 | _start:
 13 | 	/* alloca local room */
 14 | 	sub	sp, sp, 32
 15 | 
 16 | 	mrs	x0, cntfrq_el0
 17 | 	mrs	x1, cntvct_el0
 18 | 	stp	x0, x1, [sp, 16]
 19 | 
 20 | 	/* block tested { */
 21 | 	adrf	x0, sample_bitset_u32
 22 | 	movl	x1, sample_bitset_num_u32
 23 | 	ands	x2, x1, -16
 24 | 	add	x3, x0, 32
 25 | 	movi	v0.2d, 0
 26 | 	movi	v1.2d, 0
 27 | 	movi	v2.2d, 0
 28 | 	movi	v3.2d, 0
 29 | 	b.eq	.Lbulk8
 30 | .Lbulk16:
 31 | 	ldp	q4, q5, [x3, -32]
 32 | 	ldp	q6, q7, [x3], 64
 33 | 	cnt	v4.16b, v4.16b
 34 | 	cnt	v5.16b, v5.16b
 35 | 	cnt	v6.16b, v6.16b
 36 | 	cnt	v7.16b, v7.16b
 37 | 
 38 | 	uaddlp	v4.8h, v4.16b
 39 | 	uaddlp	v5.8h, v5.16b
 40 | 	uaddlp	v6.8h, v6.16b
 41 | 	uaddlp	v7.8h, v7.16b
 42 | 
 43 | 	uadalp	v0.4s, v4.8h
 44 | 	uadalp	v1.4s, v5.8h
 45 | 	uadalp	v2.4s, v6.8h
 46 | 	uadalp	v3.4s, v7.8h
 47 | 	subs	x2, x2, 16
 48 | 	b.ne	.Lbulk16
 49 | .Lbulk8:
 50 | 	sub	x3, x3, 32
 51 | 	tbz	x1, 3, .Lbulk4
 52 | 	ldp	q4, q5, [x3], 32
 53 | 	cnt	v4.16b, v4.16b
 54 | 	cnt	v5.16b, v5.16b
 55 | 
 56 | 	uaddlp	v4.8h, v4.16b
 57 | 	uaddlp	v5.8h, v5.16b
 58 | 
 59 | 	uadalp	v0.4s, v4.8h
 60 | 	uadalp	v1.4s, v5.8h
 61 | .Lbulk4:
 62 | 	tbz	x1, 2, .Lbulk2
 63 | 	ldr	q6, [x3], 16
 64 | 	cnt	v6.16b, v6.16b
 65 | 
 66 | 	uaddlp	v6.8h, v6.16b
 67 | 	uadalp	v2.4s, v6.8h
 68 | .Lbulk2:
 69 | 	tbz	x1, 1, .Lunit
 70 | 	ldr	d7, [x3], 8
 71 | 	cnt	v7.8b, v7.8b
 72 | 
 73 | 	uaddlp	v7.4h, v7.8b
 74 | 	uadalp	v3.4s, v7.8h /* implicit widening to match acc width */
 75 | .Lunit:
 76 | 	tbz	x1, 0, .Lfinal
 77 | 	ldr	s4, [x3]
 78 | 	cnt	v4.8b, v4.8b
 79 | 
 80 | 	uaddlp	v4.4h, v4.8b
 81 | 	uadalp	v0.4s, v4.8h /* implicit widening to match acc width */
 82 | .Lfinal:
 83 | 	add	v0.4s, v1.4s, v0.4s
 84 | 	add	v1.4s, v3.4s, v2.4s
 85 | 	add	v0.4s, v1.4s, v0.4s
 86 | 	addv	s0, v0.4s
 87 | 	fmov	w2, s0
 88 | 	/* } block tested */
 89 | 
 90 | 	/* fill in elapsed time message */
 91 | 	mrs	x0, cntvct_el0
 92 | 	ldr	x1, [sp, 24]
 93 | 	sub	x0, x0, x1
 94 | 	stp	x2, x0, [sp]
 95 | 
 96 | 	ldr	x2, =string_x64
 97 | 	ldr	x1, [sp, 16]
 98 | 	adrf	x0, msg01_arg0
 99 | 	blr	x2 /* string_x64 far call */
100 | 
101 | 	ldr	x1, [sp, 8]
102 | 	adrf	x0, msg01_arg1
103 | 	blr	x2 /* string_x64 far call */
104 | 
105 | 	/* fill in result message */
106 | 	ldr	x2, =string_x32
107 | 	ldr	w1, [sp]
108 | 	adrf	x0, msg02_arg0
109 | 	blr	x2 /* string_x32 far call */
110 | 
111 | 	/* dealloc local room */
112 | 	add	sp, sp, 32
113 | 
114 | 	mov	x16, SYS_write
115 | 	mov	x2, msg01_len + msg02_len
116 | 	adrf	x1, msg01
117 | 	mov	x0, STDOUT_FILENO
118 | 	svc	0
119 | 
120 | 	mov	x16, SYS_exit
121 | 	mov	x0, xzr
122 | 	svc	0
123 | 
124 | 	.section .data
125 | msg01:
126 | 	.ascii	"elapsed_frq: "
127 | msg01_arg0:
128 | 	.ascii	"0123456789abcdef\n"
129 | 	.ascii	"elapsed_vct: "
130 | msg01_arg1:
131 | 	.ascii	"0123456789abcdef\n"
132 | msg01_len = . - msg01
133 | 
134 | msg02:
135 | 	.ascii	"count: "
136 | msg02_arg0:
137 | 	.ascii "01234567\n"
138 | msg02_len = . - msg02
139 | 
140 | 	.section .rodata
141 | 
142 | 	.align 12
143 | sample_bitset_u32:
144 | 	.rept	sample_bitset_num_u32
145 | 	.long	0x01020408
146 | 	.endr
147 | 


--------------------------------------------------------------------------------
/test_macos/test_bounce.s:
--------------------------------------------------------------------------------
  1 | 	.global _start
  2 | 
  3 | 	.equ SYS_write, 4
  4 | 	.equ SYS_exit, 1
  5 | 	.equ SYS_select, 93
  6 | 	.equ STDOUT_FILENO, 1
  7 | 
  8 | 	.equ COORD_FRAC, 16
  9 | .if 0
 10 | 	.equ FB_DIM_X, 203
 11 | 	.equ FB_DIM_Y, 48
 12 | 	.equ FRAMES, 1024
 13 | .else
 14 | 	// symbols supplied by CLI
 15 | .endif
 16 | 	.include "macro.inc"
 17 | 
 18 | 	.text
 19 | _start:
 20 | 	// clear screen
 21 | 	mov	x16, SYS_write
 22 | 	mov	x2, fb_clear_len
 23 | 	adr	x1, fb_clear_cmd
 24 | 	mov	x0, STDOUT_FILENO
 25 | 	svc	0
 26 | 
 27 | 	// clear fb
 28 | 	movi	v0.16b, ' '
 29 | 	adrf	x0, fb
 30 | 	ldr	w1, =fb_len
 31 | 	bl	memset
 32 | 
 33 | .Lfb_done:
 34 | 	mov	w5, wzr // blip pos_x
 35 | 	mov	w6, wzr // blip pos_y
 36 | 	mov	x7, FRAMES
 37 | 	mov	w9, (FB_DIM_X - 2) << COORD_FRAC // max bound_x
 38 | 	mov	w10, (FB_DIM_Y - 1) << COORD_FRAC // max bound_y
 39 | 	mov	w11, 0x1 << (COORD_FRAC - 0) // blip step_x
 40 | 	mov	w12, 0x8 << (COORD_FRAC - 4) // blip step_y
 41 | .Lframe:
 42 | 	// reset cursor; x16 = SYS_write
 43 | 	mov	x2, fb_cursor_len
 44 | 	adr	x1, fb_cursor_cmd
 45 | 	mov	x0, STDOUT_FILENO
 46 | 	svc	0
 47 | 
 48 | 	// access to fb: addr & len as per SYS_write
 49 | 	ldr	w2, =fb_len
 50 | 	adrf	x1, fb
 51 | 
 52 | 	// plot blip in fb
 53 | 	asr	w13, w5, COORD_FRAC
 54 | 	asr	w14, w6, COORD_FRAC
 55 | 	// round fractional coords to +inf
 56 | 	ubfx	w17, w5, (COORD_FRAC - 1), #1
 57 | 	ubfx	w19, w6, (COORD_FRAC - 1), #1
 58 | 	add	w13, w13, w17
 59 | 	add	w14, w14, w19
 60 | 
 61 | 	mov	w3, 0x5d5b
 62 | 	mov	w4, FB_DIM_X
 63 | 	madd	w4, w4, w14, w13
 64 | 	strh	w3, [x1, x4]
 65 | 
 66 | 	// update position
 67 | 	add	w5, w5, w11
 68 | 	add	w6, w6, w12
 69 | 
 70 | 	// check bounds & update step accordingly
 71 | 	cmp	w5, w9
 72 | 	ccmp	w5, 0, 4, NE
 73 | 	cneg	w11, w11, EQ
 74 | 
 75 | 	cmp	w6, w10
 76 | 	ccmp	w6, 0, 4, NE
 77 | 	cneg	w12, w12, EQ
 78 | 
 79 | 	// output fb; x16 = SYS_write
 80 | 	mov	x0, STDOUT_FILENO
 81 | 	svc	0
 82 | 
 83 | 	// erase blip from fb
 84 | 	adrf	x1, fb
 85 | 	mov	w3, 0x2020
 86 | 	strh	w3, [x1, x4]
 87 | 
 88 | 	// xnu has no nanosleep
 89 | 	mov	x16, SYS_select
 90 | 	adr	x4, timeval
 91 | 	mov	x3, xzr
 92 | 	mov	x2, xzr
 93 | 	mov	x1, xzr
 94 | 	mov	x0, xzr
 95 | 	svc	0
 96 | 
 97 | 	mov	x16, SYS_write
 98 | 	subs	x7, x7, 1
 99 | 	bne	.Lframe
100 | 
101 | 	mov	x16, SYS_exit
102 | 	mov	x0, xzr
103 | 	svc	0
104 | 
105 | fb_clear_cmd:
106 | 	.ascii "\033[2J"
107 | fb_clear_len = . - fb_clear_cmd
108 | 
109 | fb_cursor_cmd:
110 | 	.ascii "\033[1;1H"
111 | fb_cursor_len = . - fb_cursor_cmd
112 | 
113 | 	.align 3
114 | timeval:
115 | 	.dword 0, 15500
116 | 
117 | 	.section .bss
118 | 	.align 6
119 | fb:
120 | 	.fill FB_DIM_Y * FB_DIM_X
121 | fb_len = . - fb
122 | 


--------------------------------------------------------------------------------
/test_macos/test_bounce.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | BUILD=..
 3 | COMMON=../test_common
 4 | 
 5 | make -C ${BUILD} all
 6 | 
 7 | # De-nice ourselves and our kitty term emu for smooth fps; some root required
 8 | 
 9 | DENICE=
10 | PID_KITTY=
11 | 
12 | if [[ $# == 1 ]] && [[ $1 == "denice" ]] ; then
13 | 
14 | 	DENICE="sudo nice -n -20"
15 | 
16 | 	# Check if terminal is kitty -- normally our terminal should be our grandpatent
17 | 
18 | 	PID_KITTY=`ps -p $PPID -o ppid=''`
19 | 	COMM_KITTY=`ps -p $PID_KITTY -o command='' -c`
20 | 
21 | 	if [[ ${COMM_KITTY} != "kitty" ]] ; then
22 | 		PID_KITTY=
23 | 	fi
24 | fi
25 | 
26 | # Boost kitty to top warp
27 | 
28 | if [[ ! -z ${PID_KITTY} ]] ; then
29 | 	sudo renice -n -20 -p ${PID_KITTY}
30 | fi
31 | 
32 | # Hide term cursor before loading REL; restore term cursor
33 | # upon termination
34 | 
35 | tput civis
36 | ${DENICE} ${BUILD}/elvenrel ${COMMON}/memset.o test_bounce.o
37 | tput cnorm
38 | 
39 | # De-boost kitty to normal warp
40 | 
41 | if [[ ! -z ${PID_KITTY} ]] ; then
42 | 	sudo renice -n 20 -p ${PID_KITTY}
43 | fi
44 | 


--------------------------------------------------------------------------------
/test_macos/test_bounce_neon.s:
--------------------------------------------------------------------------------
  1 | 	.global _start
  2 | 
  3 | 	.equ SYS_write, 4
  4 | 	.equ SYS_exit, 1
  5 | 	.equ SYS_select, 93
  6 | 	.equ STDOUT_FILENO, 1
  7 | 
  8 | .if 0
  9 | 	.equ FB_DIM_X, 203
 10 | 	.equ FB_DIM_Y, 48
 11 | 	.equ FRAMES, 2048
 12 | .else
 13 | 	// symbols supplied by CLI
 14 | .endif
 15 | 	.include "macro.inc"
 16 | 
 17 | 	.text
 18 | _start:
 19 | 	// clear screen
 20 | 	mov	x16, SYS_write
 21 | 	mov	x2, fb_clear_len
 22 | 	adr	x1, fb_clear_cmd
 23 | 	mov	x0, STDOUT_FILENO
 24 | 	svc	0
 25 | 
 26 | 	// clear fb
 27 | 	movi	v0.16b, ' '
 28 | 	adrf	x0, fb
 29 | 	ldr	w1, =fb_len
 30 | 	bl	memset
 31 | 
 32 | .Lfb_done:
 33 | 	// four Q-form regs hold SoA { pos_x, pos_y, step_x, step_y }
 34 | 	ldr	q0, blip +  0 // blip{0..3} pos_x
 35 | 	ldr	q1, blip + 16 // blip{0..3} pos_y
 36 | 	ldr	q2, blip + 32 // blip{0..3} step_x
 37 | 	ldr	q3, blip + 48 // blip{0..3} step_y
 38 | 
 39 | 	mov	w4, FB_DIM_X
 40 | 	mov	w5, FB_DIM_X - 2
 41 | 	mov	w6, FB_DIM_Y - 1
 42 | 	fmov	s4, w4
 43 | 	dup	v5.4s, w5
 44 | 	dup	v6.4s, w6
 45 | 
 46 | 	mov	x9, FRAMES
 47 | .Lframe:
 48 | 	// reset cursor; x16 = SYS_write
 49 | 	mov	x2, fb_cursor_len
 50 | 	adr	x1, fb_cursor_cmd
 51 | 	mov	x0, STDOUT_FILENO
 52 | 	svc	0
 53 | 
 54 | 	// access to fb: addr & len as per SYS_write
 55 | 	ldr	w2, =fb_len
 56 | 	adrf	x1, fb
 57 | 
 58 | 	// plot blips in fb
 59 | 	mov	v7.16b, v0.16b
 60 | 	mla	v7.4s, v1.4s, v4.s[0]
 61 | 
 62 | 	fmov	w4, s7
 63 | 	mov	w5, v7.s[1]
 64 | 	mov	w6, v7.s[2]
 65 | 	mov	w7, v7.s[3]
 66 | 
 67 | 	mov	w3, 0x5d5b
 68 | 	strh	w3, [x1, x4]
 69 | 	strh	w3, [x1, x5]
 70 | 	strh	w3, [x1, x6]
 71 | 	strh	w3, [x1, x7]
 72 | 
 73 | 	// update positions
 74 | 	add	v0.4s, v0.4s, v2.4s
 75 | 	add	v1.4s, v1.4s, v3.4s
 76 | 
 77 | 	// check bounds & update steps accordingly
 78 | 	cmeq	v7.4s, v0.4s, v5.4s
 79 | 	cmeq	v8.4s, v0.4s, 0
 80 | 	cmeq	v9.4s, v1.4s, v6.4s
 81 | 	cmeq	v10.4s, v1.4s, 0
 82 | 	orr	v7.16b, v7.16b, v8.16b
 83 | 	orr	v9.16b, v9.16b, v10.16b
 84 | 	eor	v2.16b, v7.16b, v2.16b
 85 | 	eor	v3.16b, v9.16b, v3.16b
 86 | 	sub	v2.4s, v2.4s, v7.4s
 87 | 	sub	v3.4s, v3.4s, v9.4s
 88 | 
 89 | 	// output fb; x16 = SYS_write
 90 | 	mov	x0, STDOUT_FILENO
 91 | 	svc	0
 92 | 
 93 | 	// erase blips from fb
 94 | 	adrf	x1, fb
 95 | 	mov	w3, 0x2020
 96 | 	strh	w3, [x1, x4]
 97 | 	strh	w3, [x1, x5]
 98 | 	strh	w3, [x1, x6]
 99 | 	strh	w3, [x1, x7]
100 | 
101 | 	// xnu has no nanosleep
102 | 	mov	x16, SYS_select
103 | 	adr	x4, timeval
104 | 	mov	x3, xzr
105 | 	mov	x2, xzr
106 | 	mov	x1, xzr
107 | 	mov	x0, xzr
108 | 	svc	0
109 | 
110 | 	mov	x16, SYS_write
111 | 	subs	x9, x9, 1
112 | 	bne	.Lframe
113 | 
114 | 	mov	x16, SYS_exit
115 | 	mov	x0, xzr
116 | 	svc	0
117 | 
118 | 	.align 4
119 | blip:
120 | 	.word 0x00000030, 0x00000020, 0x00000010, 0x00000000 // blip{0..3} pos_x
121 | 	.word 0x00000010, 0x00000000, 0x00000010, 0x00000000 // blip{0..3} pos_y
122 | 	.word 0x00000001, 0x00000001, 0x00000001, 0x00000001 // blip{0..3} step_x
123 | 	.word 0xffffffff, 0x00000001, 0xffffffff, 0x00000001 // blip{0..3} step_y
124 | 
125 | fb_clear_cmd:
126 | 	.ascii "\033[2J"
127 | fb_clear_len = . - fb_clear_cmd
128 | 
129 | fb_cursor_cmd:
130 | 	.ascii "\033[1;1H"
131 | fb_cursor_len = . - fb_cursor_cmd
132 | 
133 | 	.align 3
134 | timeval:
135 | 	.dword 0, 12300
136 | 
137 | 	.section .bss
138 | 	.align 6
139 | fb:
140 | 	.fill FB_DIM_Y * FB_DIM_X
141 | fb_len = . - fb
142 | 


--------------------------------------------------------------------------------
/test_macos/test_bounce_neon.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | BUILD=..
 3 | COMMON=../test_common
 4 | 
 5 | make -C ${BUILD} all
 6 | 
 7 | # De-nice ourselves and our kitty term emu for smooth fps; some root required
 8 | 
 9 | DENICE=
10 | PID_KITTY=
11 | 
12 | if [[ $# == 1 ]] && [[ $1 == "denice" ]] ; then
13 | 
14 | 	DENICE="sudo nice -n -20"
15 | 
16 | 	# Check if terminal is kitty -- normally our terminal should be our grandpatent
17 | 
18 | 	PID_KITTY=`ps -p $PPID -o ppid=''`
19 | 	COMM_KITTY=`ps -p $PID_KITTY -o command='' -c`
20 | 
21 | 	if [[ ${COMM_KITTY} != "kitty" ]] ; then
22 | 		PID_KITTY=
23 | 	fi
24 | fi
25 | 
26 | # Boost kitty to top warp
27 | 
28 | if [[ ! -z ${PID_KITTY} ]] ; then
29 | 	sudo renice -n -20 -p ${PID_KITTY}
30 | fi
31 | 
32 | # Hide term cursor before loading REL; restore term cursor
33 | # upon termination
34 | 
35 | tput civis
36 | ${DENICE} ${BUILD}/elvenrel ${COMMON}/memset.o test_bounce_neon.o
37 | tput cnorm
38 | 
39 | # De-boost kitty to normal warp
40 | 
41 | if [[ ! -z ${PID_KITTY} ]] ; then
42 | 	sudo renice -n 20 -p ${PID_KITTY}
43 | fi
44 | 


--------------------------------------------------------------------------------
/test_macos/test_bounce_neon_aosoa.s:
--------------------------------------------------------------------------------
  1 | 	.global _start
  2 | 
  3 | 	.equ SYS_write, 4
  4 | 	.equ SYS_exit, 1
  5 | 	.equ SYS_select, 93
  6 | 	.equ STDOUT_FILENO, 1
  7 | 
  8 | .if 0
  9 | 	.equ FB_DIM_X, 203
 10 | 	.equ FB_DIM_Y, 48
 11 | 	.equ FRAMES, 2048
 12 | .else
 13 | 	// symbols supplied by CLI
 14 | .endif
 15 | 	.include "macro.inc"
 16 | 
 17 | 	.text
 18 | _start:
 19 | 	// clear screen
 20 | 	mov	x16, SYS_write
 21 | 	mov	x2, fb_clear_len
 22 | 	adr	x1, fb_clear_cmd
 23 | 	mov	x0, STDOUT_FILENO
 24 | 	svc	0
 25 | 
 26 | 	// clear fb
 27 | 	movi	v0.16b, ' '
 28 | 	adrf	x0, fb
 29 | 	ldr	w1, =fb_len
 30 | 	bl	memset
 31 | 
 32 | .Lfb_done:
 33 | 	mov	w4, FB_DIM_X
 34 | 	mov	w5, FB_DIM_X - 2
 35 | 	mov	w6, FB_DIM_Y - 1
 36 | 	fmov	s4, w4
 37 | 	dup	v5.4s, w5
 38 | 	dup	v6.4s, w6
 39 | 
 40 | 	mov	x9, FRAMES
 41 | .Lframe:
 42 | 	// reset cursor; x16 = SYS_write
 43 | 	mov	x2, fb_cursor_len
 44 | 	adr	x1, fb_cursor_cmd
 45 | 	mov	x0, STDOUT_FILENO
 46 | 	svc	0
 47 | 
 48 | 	// access to fb: addr & len as per SYS_write
 49 | 	ldr	w2, =fb_len
 50 | 	adrf	x1, fb
 51 | 
 52 | 	// plot blips in fb
 53 | 	adrf	x10, blip
 54 | 	adrf	x11, blip_end
 55 | 	mov	x12, x11
 56 | .Lpack_plot:
 57 | 	// four Q-form regs hold SoA { pos_x, pos_y, step_x, step_y }
 58 | 	ldp	q0, q1, [x10]
 59 | 	ldp	q2, q3, [x10, 32]
 60 | 
 61 | 	mov	v7.16b, v0.16b
 62 | 	mla	v7.4s, v1.4s, v4.s[0]
 63 | 
 64 | 	str	q7, [x12], 16
 65 | 
 66 | 	fmov	w4, s7
 67 | 	mov	w5, v7.s[1]
 68 | 	mov	w6, v7.s[2]
 69 | 	mov	w7, v7.s[3]
 70 | 
 71 | 	mov	w3, 0x5d5b
 72 | 	strh	w3, [x1, x4]
 73 | 	strh	w3, [x1, x5]
 74 | 	strh	w3, [x1, x6]
 75 | 	strh	w3, [x1, x7]
 76 | 
 77 | 	// update positions
 78 | 	add	v0.4s, v0.4s, v2.4s
 79 | 	add	v1.4s, v1.4s, v3.4s
 80 | 
 81 | 	// check bounds & update steps accordingly
 82 | 	cmeq	v7.4s, v0.4s, v5.4s
 83 | 	cmeq	v8.4s, v0.4s, 0
 84 | 	cmeq	v9.4s, v1.4s, v6.4s
 85 | 	cmeq	v10.4s, v1.4s, 0
 86 | 	orr	v7.16b, v7.16b, v8.16b
 87 | 	orr	v9.16b, v9.16b, v10.16b
 88 | 	eor	v2.16b, v7.16b, v2.16b
 89 | 	eor	v3.16b, v9.16b, v3.16b
 90 | 	sub	v2.4s, v2.4s, v7.4s
 91 | 	sub	v3.4s, v3.4s, v9.4s
 92 | 
 93 | 	stp	q0, q1, [x10], 32
 94 | 	stp	q2, q3, [x10], 32
 95 | 
 96 | 	cmp	x10, x11
 97 | 	bne	.Lpack_plot
 98 | 
 99 | 	// output fb; x16 = SYS_write
100 | 	mov	x0, STDOUT_FILENO
101 | 	svc	0
102 | 
103 | 	// erase blips from fb
104 | 	adrf	x1, fb
105 | 	adrf	x10, blip_end
106 | 	adrf	x11, erase_end
107 | .Lpack_erase:
108 | 	ldp	w4, w5, [x10], 8
109 | 	ldp	w6, w7, [x10], 8
110 | 
111 | 	mov	w3, 0x2020
112 | 	strh	w3, [x1, x4]
113 | 	strh	w3, [x1, x5]
114 | 	strh	w3, [x1, x6]
115 | 	strh	w3, [x1, x7]
116 | 
117 | 	cmp	x10, x11
118 | 	bne	.Lpack_erase
119 | 
120 | 	// xnu has no nanosleep
121 | 	mov	x16, SYS_select
122 | 	adr	x4, timeval
123 | 	mov	x3, xzr
124 | 	mov	x2, xzr
125 | 	mov	x1, xzr
126 | 	mov	x0, xzr
127 | 	svc	0
128 | 
129 | 	mov	x16, SYS_write
130 | 	subs	x9, x9, 1
131 | 	bne	.Lframe
132 | 
133 | 	mov	x16, SYS_exit
134 | 	mov	x0, xzr
135 | 	svc	0
136 | 
137 | fb_clear_cmd:
138 | 	.ascii "\033[2J"
139 | fb_clear_len = . - fb_clear_cmd
140 | 
141 | fb_cursor_cmd:
142 | 	.ascii "\033[1;1H"
143 | fb_cursor_len = . - fb_cursor_cmd
144 | 
145 | 	.align 3
146 | timeval:
147 | 	.dword 0, 12300
148 | 
149 | 	.section .bss
150 | 	.align 6
151 | fb:
152 | 	.fill FB_DIM_Y * FB_DIM_X
153 | fb_len = . - fb
154 | 


--------------------------------------------------------------------------------
/test_macos/test_bounce_neon_aosoa.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | BUILD=..
 3 | COMMON=../test_common
 4 | 
 5 | make -C ${BUILD} all
 6 | 
 7 | # De-nice ourselves and our kitty term emu for smooth fps; some root required
 8 | 
 9 | DENICE=
10 | PID_KITTY=
11 | 
12 | if [[ $# == 1 ]] && [[ $1 == "denice" ]] ; then
13 | 
14 | 	DENICE="sudo nice -n -20"
15 | 
16 | 	# Check if terminal is kitty -- normally our terminal should be our grandpatent
17 | 
18 | 	PID_KITTY=`ps -p $PPID -o ppid=''`
19 | 	COMM_KITTY=`ps -p $PID_KITTY -o command='' -c`
20 | 
21 | 	if [[ ${COMM_KITTY} != "kitty" ]] ; then
22 | 		PID_KITTY=
23 | 	fi
24 | fi
25 | 
26 | # Boost kitty to top warp
27 | 
28 | if [[ ! -z ${PID_KITTY} ]] ; then
29 | 	sudo renice -n -20 -p ${PID_KITTY}
30 | fi
31 | 
32 | # Hide term cursor before loading RELs; suppress reports to
33 | # stdout; restore term cursor upon termination
34 | 
35 | tput civis
36 | ${DENICE} ${BUILD}/elvenrel ${COMMON}/memset.o ${COMMON}/test_bounce_data_aosoa_alt_0.o test_bounce_neon_aosoa.o    --quiet
37 | #${DENICE} ${BUILD}/elvenrel ${COMMON}/memset.o ${COMMON}/test_bounce_data_aosoa_alt_1.o test_bounce_neon_aosoa.o    --quiet
38 | ${DENICE} ${BUILD}/elvenrel ${COMMON}/memset.o ${COMMON}/test_bounce_data_aosoa_alt_2.o test_bounce_neon_aosoa.o    --quiet
39 | ${DENICE} ${BUILD}/elvenrel ${COMMON}/memset.o ${COMMON}/test_bounce_data_aosoa_alt_3.o test_bounce_neon_aosoa.o    --quiet
40 | ${DENICE} ${BUILD}/elvenrel ${COMMON}/memset.o ${COMMON}/test_bounce_data_aosoa_alt_3.o test_bounce_neon_aosoa_bg.o --quiet
41 | tput cnorm
42 | 
43 | # De-boost kitty to normal warp
44 | 
45 | if [[ ! -z ${PID_KITTY} ]] ; then
46 | 	sudo renice -n 20 -p ${PID_KITTY}
47 | fi
48 | 


--------------------------------------------------------------------------------
/test_macos/test_bounce_neon_aosoa_bg.s:
--------------------------------------------------------------------------------
  1 | 	.global _start
  2 | 
  3 | 	.equ SYS_write, 4
  4 | 	.equ SYS_exit, 1
  5 | 	.equ SYS_select, 93
  6 | 	.equ STDOUT_FILENO, 1
  7 | 
  8 | .if 0
  9 | 	.equ FB_DIM_X, 203
 10 | 	.equ FB_DIM_Y, 48
 11 | 	.equ FRAMES, 2048
 12 | .else
 13 | 	// symbols supplied by CLI
 14 | .endif
 15 | 	.equ GRID_DISTANCE_X, 8
 16 | 	.equ GRID_DISTANCE_Y, 8
 17 | 
 18 | .if 0
 19 | 	.equ GRID_STEP_X_0, 1
 20 | 	.equ GRID_STEP_X_1, 0
 21 | 	.equ GRID_STEP_X_2, 1
 22 | 	.equ GRID_STEP_X_3, 0
 23 | 
 24 | 	.equ GRID_STEP_Y_0, 0
 25 | 	.equ GRID_STEP_Y_1, 1
 26 | 	.equ GRID_STEP_Y_2, 0
 27 | 	.equ GRID_STEP_Y_3, 1
 28 | .else
 29 | 	.equ GRID_STEP_X_0, 1
 30 | 	.equ GRID_STEP_X_1, 1
 31 | 	.equ GRID_STEP_X_2, 1
 32 | 	.equ GRID_STEP_X_3, 1
 33 | 
 34 | 	.equ GRID_STEP_Y_0, 0
 35 | 	.equ GRID_STEP_Y_1, 0
 36 | 	.equ GRID_STEP_Y_2, 0
 37 | 	.equ GRID_STEP_Y_3, 0
 38 | .endif
 39 | 
 40 | 	.include "macro.inc"
 41 | 
 42 | 	.text
 43 | _start:
 44 | 	// clear screen
 45 | 	mov	x16, SYS_write
 46 | 	mov	x2, fb_clear_len
 47 | 	adr	x1, fb_clear_cmd
 48 | 	mov	x0, STDOUT_FILENO
 49 | 	svc	0
 50 | 
 51 | 	// clear fb
 52 | 	movi	v0.16b, ' '
 53 | 	adrf	x0, fb
 54 | 	ldr	w1, =fb_len
 55 | 	bl	memset
 56 | 
 57 | .Lfb_done:
 58 | 	mov	w4, FB_DIM_X
 59 | 	mov	w5, FB_DIM_X - 2
 60 | 	mov	w6, FB_DIM_Y - 1
 61 | 	fmov	s4, w4
 62 | 	dup	v5.4s, w5
 63 | 	dup	v6.4s, w6
 64 | 
 65 | 	// generate a grid of axes-traversing particles
 66 | 	ldr	q3, grid_pos_0123
 67 | 	ldr	q7, grid_step_0123
 68 | 	ldr	q8, grid_step_0123 + 16
 69 | 	ldr	q11, =0x10000000200000003
 70 | 
 71 | 	adr	x10, grid_pos_xxx0
 72 | 	adr	x11, grid_pos_1234
 73 | 	adr	x12, grid_step_xxx0
 74 | 	adr	x13, grid_step_1234
 75 | 
 76 | 	adrf	x7, grid
 77 | 	adrf	x8, grid_end
 78 | 	mov	w9, wzr
 79 | .Lgen_grid:
 80 | 	// how many x-coords exceed end-of-line - 2?
 81 | 	cmhi	v2.4s, v3.4s, v5.4s
 82 | 	addv	s1, v2.4s
 83 | 	fmov	w0, s1
 84 | 	dup	v0.4s, w9
 85 | 	add	v0.4s, v0.4s, v11.4s
 86 | 	cbz	w0, .Lgen_grid_next
 87 | 
 88 | 	// produce a transitional end-of-one/start-of-another
 89 | 	// pack inbetween subsequent lines
 90 | 	mvn	w1, w0
 91 | 	ldr	q1, [x10, x1, LSL 4]
 92 | 	bic	v3.16b, v3.16b, v2.16b
 93 | 	orr	v3.16b, v3.16b, v1.16b
 94 | 
 95 | 	add	x2, x12, x1, LSL 5
 96 | 	ldp	q9, q10, [x2]
 97 | 	bic	v7.16b, v7.16b, v2.16b
 98 | 	orr	v7.16b, v7.16b, v9.16b
 99 | 	bic	v8.16b, v8.16b, v2.16b
100 | 	orr	v8.16b, v8.16b, v10.16b
101 | 
102 | 	dup	v0.4s, w9
103 | 	mov	v12.16b, v11.16b
104 | 	add	w9, w9, GRID_DISTANCE_Y
105 | 	// clamp pos_y at bottom-of-fb; affects only padding particles
106 | 	cmp	w9, w6
107 | 	blo	.Lgen_grid_pos_y
108 | 	mov	w9, FB_DIM_Y - 1
109 | 	bic	v7.16b, v7.16b, v2.16b
110 | 	bic	v8.16b, v8.16b, v2.16b
111 | 	bic	v12.16b, v12.16b, v2.16b
112 | .Lgen_grid_pos_y:
113 | 	dup	v1.4s, w9
114 | 
115 | 	bic	v0.16b, v0.16b, v2.16b
116 | 	and	v1.16b, v1.16b, v2.16b
117 | 	orr	v0.16b, v0.16b, v1.16b
118 | 	add	v0.4s, v0.4s, v12.4s
119 | 
120 | 	// if the fist pack is entirely from the new line
121 | 	// then move over to that
122 | 	cmn	w0, 4
123 | 	beq	.Lgen_grid_next
124 | 
125 | 	// first pack is actually transitional
126 | 	// pos_x, pos_y, step_x, step_y
127 | 	stp	q3, q0, [x7], 32
128 | 	stp	q7, q8, [x7], 32
129 | 
130 | 	cmp	x7, x8
131 | 	beq	.Lgen_grid_done
132 | 
133 | 	// prepare next pack entirely from the new line
134 | 	ldr	q3, [x11, x1, LSL 4]
135 | 	dup	v0.4s, w9
136 | 	add	v0.4s, v0.4s, v11.4s
137 | 
138 | 	add	x2, x13, x1, LSL 5
139 | 	ldp	q7, q8, [x2]
140 | 
141 | .Lgen_grid_next:
142 | 	// pos_x, pos_y, step_x, step_y
143 | 	stp	q3, q0, [x7], 32
144 | 	stp	q7, q8, [x7], 32
145 | 
146 | 	mov	w0, GRID_DISTANCE_X * 4
147 | 	dup	v2.4s, w0
148 | 
149 | 	add	v3.4s, v3.4s, v2.4s
150 | 
151 | 	cmp	x7, x8
152 | 	bne	.Lgen_grid
153 | 
154 | .Lgen_grid_done:
155 | 	mov	x16, SYS_write
156 | 	mov	x9, FRAMES
157 | 	movi	v11.4s, 1
158 | 	add	v11.4s, v11.4s, v5.4s
159 | .Lframe:
160 | 	// reset cursor; x16 = SYS_write
161 | 	mov	x2, fb_cursor_len
162 | 	adr	x1, fb_cursor_cmd
163 | 	mov	x0, STDOUT_FILENO
164 | 	svc	0
165 | 
166 | 	// access to fb: addr & len as per SYS_write
167 | 	ldr	w2, =fb_len
168 | 	adrf	x1, fb
169 | 
170 | 	// plot grid in fb
171 | 	adrf	x10, grid
172 | 	adrf	x11, grid_end
173 | 	mov	x12, x11
174 | .Lgrid_plot:
175 | 	// four Q-form regs hold SoA { pos_x, pos_y, step_x, step_y }
176 | 	ldp	q0, q1, [x10]
177 | 	ldp	q2, q3, [x10, 32]
178 | 
179 | 	mov	v7.16b, v0.16b
180 | 	mla	v7.4s, v1.4s, v4.s[0]
181 | 
182 | 	str	q7, [x12], 16
183 | 
184 | 	fmov	w4, s7
185 | 	mov	w5, v7.s[1]
186 | 	mov	w6, v7.s[2]
187 | 	mov	w7, v7.s[3]
188 | 
189 | 	mov	w3, 'o'
190 | 	strb	w3, [x1, x4]
191 | 	strb	w3, [x1, x5]
192 | 	strb	w3, [x1, x6]
193 | 	strb	w3, [x1, x7]
194 | 
195 | 	// update positions
196 | 	add	v0.4s, v0.4s, v2.4s
197 | 	add	v1.4s, v1.4s, v3.4s
198 | 
199 | 	// check bounds & update steps accordingly
200 | 	cmeq	v7.4s, v0.4s, v11.4s
201 | 	cmeq	v8.4s, v0.4s, 0
202 | 	cmeq	v9.4s, v1.4s, v6.4s
203 | 	cmeq	v10.4s, v1.4s, 0
204 | 	orr	v7.16b, v7.16b, v8.16b
205 | 	orr	v9.16b, v9.16b, v10.16b
206 | 	eor	v2.16b, v7.16b, v2.16b
207 | 	eor	v3.16b, v9.16b, v3.16b
208 | 	sub	v2.4s, v2.4s, v7.4s
209 | 	sub	v3.4s, v3.4s, v9.4s
210 | 
211 | 	stp	q0, q1, [x10], 32
212 | 	stp	q2, q3, [x10], 32
213 | 
214 | 	cmp	x10, x11
215 | 	bne	.Lgrid_plot
216 | 
217 | 	// plot blips in fb
218 | 	adrf	x10, blip
219 | 	adrf	x11, blip_end
220 | 	mov	x12, x11
221 | .Lpack_plot:
222 | 	// four Q-form regs hold SoA { pos_x, pos_y, step_x, step_y }
223 | 	ldp	q0, q1, [x10]
224 | 	ldp	q2, q3, [x10, 32]
225 | 
226 | 	mov	v7.16b, v0.16b
227 | 	mla	v7.4s, v1.4s, v4.s[0]
228 | 
229 | 	str	q7, [x12], 16
230 | 
231 | 	fmov	w4, s7
232 | 	mov	w5, v7.s[1]
233 | 	mov	w6, v7.s[2]
234 | 	mov	w7, v7.s[3]
235 | 
236 | 	mov	w3, 0x5d5b
237 | 	strh	w3, [x1, x4]
238 | 	strh	w3, [x1, x5]
239 | 	strh	w3, [x1, x6]
240 | 	strh	w3, [x1, x7]
241 | 
242 | 	// update positions
243 | 	add	v0.4s, v0.4s, v2.4s
244 | 	add	v1.4s, v1.4s, v3.4s
245 | 
246 | 	// check bounds & update steps accordingly
247 | 	cmeq	v7.4s, v0.4s, v5.4s
248 | 	cmeq	v8.4s, v0.4s, 0
249 | 	cmeq	v9.4s, v1.4s, v6.4s
250 | 	cmeq	v10.4s, v1.4s, 0
251 | 	orr	v7.16b, v7.16b, v8.16b
252 | 	orr	v9.16b, v9.16b, v10.16b
253 | 	eor	v2.16b, v7.16b, v2.16b
254 | 	eor	v3.16b, v9.16b, v3.16b
255 | 	sub	v2.4s, v2.4s, v7.4s
256 | 	sub	v3.4s, v3.4s, v9.4s
257 | 
258 | 	stp	q0, q1, [x10], 32
259 | 	stp	q2, q3, [x10], 32
260 | 
261 | 	cmp	x10, x11
262 | 	bne	.Lpack_plot
263 | 
264 | 	// output fb; x16 = SYS_write
265 | 	mov	x0, STDOUT_FILENO
266 | 	svc	0
267 | 
268 | 	// erase grid from fb
269 | 	adrf	x1, fb
270 | 	adrf	x10, grid_end
271 | 	adrf	x11, grid_erase_end
272 | .Lgrid_erase:
273 | 	ldp	w4, w5, [x10], 8
274 | 	ldp	w6, w7, [x10], 8
275 | 
276 | 	mov	w3, 0x20
277 | 	strb	w3, [x1, x4]
278 | 	strb	w3, [x1, x5]
279 | 	strb	w3, [x1, x6]
280 | 	strb	w3, [x1, x7]
281 | 
282 | 	cmp	x10, x11
283 | 	bne	.Lgrid_erase
284 | 
285 | 	// erase blips from fb
286 | 	adrf	x10, blip_end
287 | 	adrf	x11, erase_end
288 | .Lpack_erase:
289 | 	ldp	w4, w5, [x10], 8
290 | 	ldp	w6, w7, [x10], 8
291 | 
292 | 	mov	w3, 0x2020
293 | 	strh	w3, [x1, x4]
294 | 	strh	w3, [x1, x5]
295 | 	strh	w3, [x1, x6]
296 | 	strh	w3, [x1, x7]
297 | 
298 | 	cmp	x10, x11
299 | 	bne	.Lpack_erase
300 | 
301 | 	// xnu has no nanosleep
302 | 	mov	x16, SYS_select
303 | 	adr	x4, timeval
304 | 	mov	x3, xzr
305 | 	mov	x2, xzr
306 | 	mov	x1, xzr
307 | 	mov	x0, xzr
308 | 	svc	0
309 | 
310 | 	mov	x16, SYS_write
311 | 	subs	x9, x9, 1
312 | 	bne	.Lframe
313 | 
314 | 	mov	x16, SYS_exit
315 | 	mov	x0, xzr
316 | 	svc	0
317 | 
318 | 	.align 4
319 | grid_pos_xxx0:
320 | 	.word	0,                   0,                   0,                   GRID_DISTANCE_X * 0
321 | grid_pos_xx01:
322 | 	.word	0,                   0,                   GRID_DISTANCE_X * 0, GRID_DISTANCE_X * 1
323 | grid_pos_x012:
324 | 	.word	0,                   GRID_DISTANCE_X * 0, GRID_DISTANCE_X * 1, GRID_DISTANCE_X * 2
325 | grid_pos_0123:
326 | 	.word	GRID_DISTANCE_X * 0, GRID_DISTANCE_X * 1, GRID_DISTANCE_X * 2, GRID_DISTANCE_X * 3
327 | grid_pos_1234:
328 | 	.word	GRID_DISTANCE_X * 1, GRID_DISTANCE_X * 2, GRID_DISTANCE_X * 3, GRID_DISTANCE_X * 4
329 | grid_pos_2345:
330 | 	.word	GRID_DISTANCE_X * 2, GRID_DISTANCE_X * 3, GRID_DISTANCE_X * 4, GRID_DISTANCE_X * 5
331 | grid_pos_3456:
332 | 	.word	GRID_DISTANCE_X * 3, GRID_DISTANCE_X * 4, GRID_DISTANCE_X * 5, GRID_DISTANCE_X * 6
333 | 
334 | grid_step_xxx0:
335 | 	.word	0,             0,             0,             GRID_STEP_X_0
336 | 	.word	0,             0,             0,             GRID_STEP_Y_0
337 | grid_step_xx01:
338 | 	.word	0,             0,             GRID_STEP_X_0, GRID_STEP_X_1
339 | 	.word	0,             0,             GRID_STEP_Y_0, GRID_STEP_Y_1
340 | grid_step_x012:
341 | 	.word	0,             GRID_STEP_X_0, GRID_STEP_X_1, GRID_STEP_X_2
342 | 	.word	0,             GRID_STEP_Y_0, GRID_STEP_Y_1, GRID_STEP_Y_2
343 | grid_step_0123:
344 | 	.word	GRID_STEP_X_0, GRID_STEP_X_1, GRID_STEP_X_2, GRID_STEP_X_3
345 | 	.word	GRID_STEP_Y_0, GRID_STEP_Y_1, GRID_STEP_Y_2, GRID_STEP_Y_3
346 | grid_step_1234:
347 | 	.word	GRID_STEP_X_1, GRID_STEP_X_2, GRID_STEP_X_3, GRID_STEP_X_0
348 | 	.word	GRID_STEP_Y_1, GRID_STEP_Y_2, GRID_STEP_Y_3, GRID_STEP_Y_0
349 | grid_step_2345:
350 | 	.word	GRID_STEP_X_2, GRID_STEP_X_3, GRID_STEP_X_0, GRID_STEP_X_1
351 | 	.word	GRID_STEP_Y_2, GRID_STEP_Y_3, GRID_STEP_Y_0, GRID_STEP_Y_1
352 | grid_step_3456:
353 | 	.word	GRID_STEP_X_3, GRID_STEP_X_0, GRID_STEP_X_1, GRID_STEP_X_2
354 | 	.word	GRID_STEP_Y_3, GRID_STEP_Y_0, GRID_STEP_Y_1, GRID_STEP_Y_2
355 | 
356 | fb_clear_cmd:
357 | 	.ascii "\033[2J"
358 | fb_clear_len = . - fb_clear_cmd
359 | 
360 | fb_cursor_cmd:
361 | 	.ascii "\033[1;1H"
362 | fb_cursor_len = . - fb_cursor_cmd
363 | 
364 | 	.align 3
365 | timeval:
366 | 	.dword 0, 12300
367 | 
368 | 	.section .bss
369 | 	.align 6
370 | fb:
371 | 	.fill FB_DIM_Y * FB_DIM_X
372 | fb_len = . - fb
373 | 
374 | 	.align 6
375 | grid:
376 | 	.fill (((FB_DIM_X + GRID_DISTANCE_X - 2) / GRID_DISTANCE_X) * ((FB_DIM_Y + GRID_DISTANCE_Y - 2 - 3) / GRID_DISTANCE_Y) + 3) / 4 * 64
377 | grid_end:
378 | 	.fill (grid_end - grid) / 16, 4
379 | grid_erase_end:
380 | 


--------------------------------------------------------------------------------
/test_macos/test_bss.s:
--------------------------------------------------------------------------------
 1 | 	.global _start
 2 | 
 3 | 	.equ SYS_write, 4
 4 | 	.equ SYS_exit, 1
 5 | 	.equ STDOUT_FILENO, 1
 6 | 
 7 | 	.include "macro.inc"
 8 | 
 9 | 	.text
10 | _start:
11 | 	mov	x16, SYS_write
12 | 	mov	x2, len
13 | 	adrf	x1, buf
14 | 	mov	x0, STDOUT_FILENO
15 | 	svc	0
16 | 
17 | 	adrp	x1, code
18 | 	ldr	x0, [x1, :lo12:code]
19 | 	add	x0, x0, 1
20 | 	str	x0, [x1, :lo12:code]
21 | 
22 | 	mov	x16, SYS_exit
23 | 	svc	0
24 | 
25 | 	.section .bss
26 | code:
27 | 	.dword	0
28 | 
29 | 	.section .rodata
30 | buf:
31 | 	.ascii	"hello from ET_REL\n"
32 | len = . - buf
33 | 


--------------------------------------------------------------------------------
/test_macos/test_cross.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | BUILD=..
3 | 
4 | make -C ${BUILD} all
5 | 
6 | # Load two RELs with cross-relocations
7 | 
8 | ${BUILD}/elvenrel test_cross_0.o test_cross_1.o
9 | 


--------------------------------------------------------------------------------
/test_macos/test_cross_0.s:
--------------------------------------------------------------------------------
1 | 	.global buf
2 | 
3 | 	.section .rodata
4 | 
5 | 	.byte len
6 | buf:
7 | 	.ascii	"hello from ET_REL\n"
8 | len = . - buf
9 | 


--------------------------------------------------------------------------------
/test_macos/test_cross_1.s:
--------------------------------------------------------------------------------
 1 | 	.global _start
 2 | 
 3 | 	.equ SYS_write, 4
 4 | 	.equ SYS_exit, 1
 5 | 	.equ STDOUT_FILENO, 1
 6 | 
 7 | 	.include "macro.inc"
 8 | 
 9 | 	.text
10 | _start:
11 | 	mov	x16, SYS_write
12 | 	adrf	x1, buf
13 | 	ldrb	w2, [x1, -1]
14 | 	mov	x0, STDOUT_FILENO
15 | 	svc	0
16 | 
17 | 	mov	x16, SYS_exit
18 | 	mov	x0, xzr
19 | 	svc	0
20 | 


--------------------------------------------------------------------------------
/test_macos/test_data.s:
--------------------------------------------------------------------------------
 1 | 	.global _start
 2 | 
 3 | 	.equ SYS_write, 4
 4 | 	.equ SYS_exit, 1
 5 | 	.equ STDOUT_FILENO, 1
 6 | 
 7 | 	.include "macro.inc"
 8 | 
 9 | 	.text
10 | _start:
11 | 	mov	x16, SYS_write
12 | 	mov	x2, len
13 | 	adrf	x1, buf
14 | 	movl	w3, 0x4c45525f
15 | 	str	w3, [x1, 13]
16 | 	mov	x0, STDOUT_FILENO
17 | 	svc	0
18 | 
19 | 	mov	x16, SYS_exit
20 | 	mov	x0, xzr
21 | 	svc	0
22 | 
23 | 	.section .data
24 | buf:
25 | 	.ascii	"hello from ET....\n"
26 | len = . - buf
27 | 


--------------------------------------------------------------------------------
/test_macos/test_memset.s:
--------------------------------------------------------------------------------
 1 | 	.global _start
 2 | 
 3 | 	.equ SYS_write, 4
 4 | 	.equ SYS_exit, 1
 5 | 	.equ STDOUT_FILENO, 1
 6 | 
 7 | 	.equ COLUMNS, 64
 8 | 	.equ LINES, 48
 9 | 
10 | 	.include "macro.inc"
11 | _start:
12 | 	adrf	x4, fb
13 | 	add	x5, x4, (COLUMNS + 1) * LINES
14 | 	mov	x6, 1
15 | 	mov	x7, 1
16 | .Lloop:
17 | 	movi	v0.16b, '.'
18 | 	mov	x0, x4
19 | 	mov	x1, x6
20 | 	bl	memset
21 | 
22 | 	cmp	x7, LINES / (COLUMNS - LINES)
23 | 	csel	x7, xzr, x7, EQ
24 | 	add	x7, x7, 1
25 | 	add	x6, x6, 1
26 | 	cinc	x4, x4, EQ
27 | 	add	x4, x4, COLUMNS + 1
28 | 	cmp	x4, x5
29 | 	blo	.Lloop
30 | 
31 | 	mov	x16, SYS_write
32 | 	mov	x2, fb_len
33 | 	adrf	x1, fb
34 | 	mov	x0, STDOUT_FILENO
35 | 	svc	0
36 | 
37 | 	mov	x16, SYS_exit
38 | 	mov	x0, xzr
39 | 	svc	0
40 | 
41 | 	.data
42 | fb:
43 | 	.rept LINES
44 | 	.fill COLUMNS, 1, '='
45 | 	.byte '\n'
46 | 	.endr
47 | fb_len = . - fb
48 | 


--------------------------------------------------------------------------------
/test_macos/test_memset_woa.s:
--------------------------------------------------------------------------------
 1 | 	.global _start
 2 | 
 3 | 	.equ SYS_write, 4
 4 | 	.equ SYS_exit, 1
 5 | 	.equ STDOUT_FILENO, 1
 6 | 
 7 | 	.equ COLUMNS, 64
 8 | 	.equ LINES, 48
 9 | 
10 | 	.include "macro.inc"
11 | _start:
12 | 	adrf	x4, fb
13 | 	add	x5, x4, (COLUMNS + 1) * LINES
14 | 	mov	x6, 1
15 | 	mov	x7, 1
16 | .Lloop:
17 | 	mov	x0, x4
18 | 	movq	x1, 0x2e2e2e2e2e2e2e2e
19 | 	mov	x2, x6
20 | 	bl	memset_woa
21 | 
22 | 	cmp	x7, LINES / (COLUMNS - LINES)
23 | 	csel	x7, xzr, x7, EQ
24 | 	add	x7, x7, 1
25 | 	add	x6, x6, 1
26 | 	cinc	x4, x4, EQ
27 | 	add	x4, x4, COLUMNS + 1
28 | 	cmp	x4, x5
29 | 	blo	.Lloop
30 | 
31 | 	mov	x16, SYS_write
32 | 	mov	x2, fb_len
33 | 	adrf	x1, fb
34 | 	mov	x0, STDOUT_FILENO
35 | 	svc	0
36 | 
37 | 	mov	x16, SYS_exit
38 | 	mov	x0, xzr
39 | 	svc	0
40 | 
41 | 	.data
42 | fb:
43 | 	.rept LINES
44 | 	.fill COLUMNS, 1, '='
45 | 	.byte '\n'
46 | 	.endr
47 | fb_len = . - fb
48 | 


--------------------------------------------------------------------------------
/test_macos/test_rodata.s:
--------------------------------------------------------------------------------
 1 | 	.global _start
 2 | 
 3 | 	.equ SYS_write, 4
 4 | 	.equ SYS_exit, 1
 5 | 	.equ STDOUT_FILENO, 1
 6 | 
 7 | 	.include "macro.inc"
 8 | 
 9 | 	.text
10 | _start:
11 | 	mov	x16, SYS_write
12 | 	mov	x2, len
13 | 	adrf	x1, buf
14 | 	mov	x0, STDOUT_FILENO
15 | 	svc	0
16 | 
17 | 	mov	x16, SYS_exit
18 | 	mov	x0, xzr
19 | 	svc	0
20 | 
21 | 	.section .rodata
22 | buf:
23 | 	.ascii	"hello from ET_REL\n"
24 | len = . - buf
25 | 


--------------------------------------------------------------------------------
/test_macos/test_text.s:
--------------------------------------------------------------------------------
 1 | 	.global _start
 2 | 
 3 | 	.equ SYS_write, 4
 4 | 	.equ SYS_exit, 1
 5 | 	.equ STDOUT_FILENO, 1
 6 | 
 7 | 	.text
 8 | _start:
 9 | 	mov	x16, SYS_write
10 | 	mov	x2, len
11 | 	adr	x1, buf
12 | 	mov	x0, STDOUT_FILENO
13 | 	svc	0
14 | 
15 | 	mov	x16, SYS_exit
16 | 	mov	x0, xzr
17 | 	svc	0
18 | 
19 | buf:
20 | 	.ascii	"hello from ET_REL\n"
21 | len = . - buf
22 | 


--------------------------------------------------------------------------------
/test_macos/test_timeval.s:
--------------------------------------------------------------------------------
  1 | 	.global _start
  2 | 
  3 | 	.equ SYS_write, 4
  4 | 	.equ SYS_exit, 1
  5 | 	.equ SYS_select, 93
  6 | 	.equ SYS_gettimeofday, 116
  7 | 	.equ STDOUT_FILENO, 1
  8 | .ifndef DTIME
  9 | 	.equ DTIME, 15500
 10 | .endif
 11 | 	.include "macro.inc"
 12 | 
 13 | 	.text
 14 | 
 15 | // advance timeval by a non-negative dtime in us
 16 | // x0: timeval ptr
 17 | // w1: dtime us; must be less than 1e6
 18 | // clobbers: x2, x3, x4
 19 | 	.align 4
 20 | advance_timeval_us:
 21 | 	movl	w2, 1000000
 22 | 	sub	w2, w2, w1
 23 | 	ldp	x3, x4, [x0]
 24 | 	subs	w2, w4, w2
 25 | 	blo	.Lupdate_only_us
 26 | 	add	x3, x3, 1
 27 | 	stp     x3, x2, [x0]
 28 | 	ret
 29 | .Lupdate_only_us:
 30 | 	add	w4, w4, w1
 31 | 	str     x4, [x0, 8]
 32 | 	ret
 33 | 
 34 | _start:
 35 | 	adrf	x17, timeval
 36 | 	adrf	x19, msg
 37 | 
 38 | 	mov	x16, SYS_gettimeofday
 39 | 	mov	x2, xzr
 40 | 	mov	x1, xzr
 41 | 	mov	x0, x17
 42 | 	svc	0
 43 | 
 44 | 	// xnu has no nano/usleep -- use select with empty fd sets
 45 | 	mov	x16, SYS_select
 46 | 	adr	x4, timeval_select
 47 | 	mov	x3, xzr
 48 | 	mov	x2, xzr
 49 | 	mov	x1, xzr
 50 | 	mov	x0, xzr
 51 | 	svc	0
 52 | 
 53 | 	mov	x16, SYS_gettimeofday
 54 | 	mov	x2, xzr
 55 | 	mov	x1, xzr
 56 | 	add	x0, x17, 16
 57 | 	svc	0
 58 | 
 59 | 	// itoa start time (timeval::tv_sec and timeval::tv_usec)
 60 | 	ldr	x1, [x17]
 61 | 	mov	x0, x19
 62 | 	bl	string_x64
 63 | 
 64 | 	ldr	w1, [x17, 8]
 65 | 	add	x0, x19, 17
 66 | 	bl	string_x32
 67 | 
 68 | 	// advance start time by DTIME us
 69 | 	mov	w1, DTIME
 70 | 	mov	x0, x17
 71 | 	bl	advance_timeval_us
 72 | 
 73 | 	// itoa target time
 74 | 	ldr	x1, [x17]
 75 | 	add	x0, x19, 26
 76 | 	bl	string_x64
 77 | 
 78 | 	ldr	w1, [x17, 8]
 79 | 	add	x0, x19, 43
 80 | 	bl	string_x32
 81 | 
 82 | 	// itoa post-sleep time
 83 | 	ldr	x1, [x17, 16]
 84 | 	add	x0, x19, 52
 85 | 	bl	string_x64
 86 | 
 87 | 	ldr	w1, [x17, 24]
 88 | 	add	x0, x19, 69
 89 | 	bl	string_x32
 90 | 
 91 | 	// output start, target and post-sleep times
 92 | 	mov	x16, SYS_write
 93 | 	mov	x2, msg_len
 94 | 	mov	x1, x19
 95 | 	mov	x0, STDOUT_FILENO
 96 | 	svc	0
 97 | 
 98 | 	mov	x16, SYS_exit
 99 | 	mov	x0, xzr
100 | 	svc	0
101 | 
102 | timeval_select:
103 | 	.dword 0, DTIME
104 | 
105 | 	.section .bss
106 | 	.align 4
107 | timeval:
108 | 	.dword 0, 0
109 | 	.dword 0, 0
110 | 
111 | 	.section .data
112 | msg:
113 | 	.ascii "################:########\n"
114 | 	.ascii "################:########\n"
115 | 	.ascii "################:########\n"
116 | msg_len = . - msg
117 | 


--------------------------------------------------------------------------------
/test_macos/test_timeval.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | BUILD=..
3 | 
4 | make -C ${BUILD} all
5 | 
6 | # Advance a timeval structure by some us
7 | 
8 | ${BUILD}/elvenrel stringx.o test_timeval.o --quiet
9 | 


--------------------------------------------------------------------------------
/vma.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Parsing of /proc/self/maps and optional disposing or string-matched VMAs
  3 |  *
  4 |  * Copyright (C) 2021 Martin Krastev <blu.dark@gmail.com>
  5 |  */
  6 | 
  7 | #if __aarch64__ == 0
  8 | #error wrong target architecture
  9 | #endif
 10 | 
 11 | #include <stdio.h>
 12 | #include <stdlib.h>
 13 | #include <stdint.h>
 14 | #include <unistd.h>
 15 | #include <string.h>
 16 | #include <errno.h>
 17 | #include <assert.h>
 18 | 
 19 | #include <sys/types.h>
 20 | #include <sys/stat.h>
 21 | #include <sys/mman.h>
 22 | #include <fcntl.h>
 23 | 
 24 | #include "vma.h"
 25 | 
 26 | extern "C" {
 27 | 	void string_x8(void*, uint8_t) asm ("string_x8");
 28 | 	void string_x16(void*, uint16_t) asm ("string_x16");
 29 | 	void string_x32(void*, uint32_t) asm ("string_x32");
 30 | 	void string_x64(void*, uint64_t) asm ("string_x64");
 31 | 	size_t strlen_linux(const char*) asm ("strlen_linux");
 32 | }
 33 | 
 34 | #define FILENO_STDIN  0
 35 | #define FILENO_STDOUT 1
 36 | #define FILENO_STDERR 2
 37 | 
 38 | #define SYS_write     0x40
 39 | #define SYS_exit      0x5d
 40 | #define SYS_munmap    0xd7
 41 | #define SYS_mremap    0xd8
 42 | #define SYS_mmap      0xde
 43 | 
 44 | #define xxstr(s) #s
 45 | #define xstr(s) xxstr(s)
 46 | 
 47 | template < typename T >
 48 | using ref = T&;
 49 | 
 50 | template < size_t N >
 51 | class char_array_t
 52 | {
 53 | 	char arr[N];
 54 | 
 55 | public:
 56 | 	char_array_t(const char (& arg)[N]) { for (size_t i = 0; i < N; ++i) arr[i] = arg[i]; }
 57 | 	operator ref<       char[N] >()       { return arr; }
 58 | 	operator ref< const char[N] >() const { return arr; }
 59 | };
 60 | 
 61 | template < size_t N >
 62 | char_array_t< N - 1 > strip_str(const char (& arg)[N])
 63 | {
 64 | 	return char_array_t< N - 1 >(reinterpret_cast< const char (&)[N - 1] >(arg));
 65 | }
 66 | 
 67 | template < typename T, size_t N >
 68 | constexpr size_t countof(const T (&)[N])
 69 | {
 70 | 	return N;
 71 | }
 72 | 
 73 | template < size_t N >
 74 | constexpr size_t countof(const char_array_t< N >&)
 75 | {
 76 | 	return N;
 77 | }
 78 | 
 79 | namespace sys {
 80 | 
 81 | static intptr_t mmap(
 82 | 	void *addr, size_t length, int prot, int flags, int fd, off_t offset)
 83 | {
 84 | 	register uint64_t x0 asm ("x0") = (uintptr_t)addr;
 85 | 	register uint64_t x1 asm ("x1") = length;
 86 | 	register uint64_t x2 asm ("x2") = prot;
 87 | 	register uint64_t x3 asm ("x3") = flags;
 88 | 	register uint64_t x4 asm ("x4") = fd;
 89 | 	register uint64_t x5 asm ("x5") = offset;
 90 | 
 91 | 	asm volatile (
 92 | 		"mov	x8, " xstr(SYS_mmap) "\n\t"
 93 | 		"svc	0"
 94 | 		: "+r" (x0)
 95 | 		: "r" (x1), "r" (x2), "r" (x3), "r" (x4), "r" (x5)
 96 | 		: "x8", "memory");
 97 | 
 98 | 	return x0;
 99 | }
100 | 
101 | static int munmap(void *addr, size_t length)
102 | {
103 | 	register uint64_t x0 asm ("x0") = (uintptr_t)addr;
104 | 	register uint64_t x1 asm ("x1") = length;
105 | 
106 | 	asm volatile (
107 | 		"mov	x8, " xstr(SYS_munmap) "\n\t"
108 | 		"svc	0"
109 | 		: "+r" (x0)
110 | 		: "r" (x1)
111 | 		: "x2", "x3", "x4", "x5", "x8", "memory");
112 | 
113 | 	return x0;
114 | }
115 | 
116 | static intptr_t mremap(
117 | 	void *addr, size_t old_length, size_t new_length, int flags, void *new_addr)
118 | {
119 | 	register uint64_t x0 asm ("x0") = (uintptr_t)addr;
120 | 	register uint64_t x1 asm ("x1") = old_length;
121 | 	register uint64_t x2 asm ("x2") = new_length;
122 | 	register uint64_t x3 asm ("x3") = flags;
123 | 	register uint64_t x4 asm ("x4") = (uintptr_t)new_addr;
124 | 
125 | 	asm volatile (
126 | 		"mov	x8, " xstr(SYS_mremap) "\n\t"
127 | 		"svc	0"
128 | 		: "+r" (x0)
129 | 		: "r" (x1), "r" (x2), "r" (x3), "r" (x4)
130 | 		: "x8", "memory");
131 | 
132 | 	return x0;
133 | }
134 | 
135 | static int64_t write(int fileno, const void *ptr, size_t len)
136 | {
137 | 	register uint64_t x0 asm ("x0") = fileno;
138 | 	register uint64_t x1 asm ("x1") = (uintptr_t) ptr;
139 | 	register uint64_t x2 asm ("x2") = len;
140 | 
141 | 	asm volatile (
142 | 		"mov	x8, " xstr(SYS_write) "\n\t"
143 | 		"svc	0"
144 | 		: "+r" (x0)
145 | 		: "r" (x1), "r" (x2)
146 | 		: "x3", "x4", "x5", "x8", "memory");
147 | 
148 | 	return x0;
149 | }
150 | 
151 | static void exit(int code)
152 | {
153 | 	register uint64_t x0 asm ("x0") = code;
154 | 
155 | 	asm volatile (
156 | 		"mov	x8, " xstr(SYS_exit) "\n\t"
157 | 		"svc	0"
158 | 		: : "r" (x0));
159 | }
160 | 
161 | } // namespace sys
162 | 
163 | namespace alt {
164 | 
165 | static int64_t putc(int fileno, char c)
166 | {
167 | 	return sys::write(fileno, &c, 1);
168 | }
169 | 
170 | } // namespace alt
171 | 
172 | struct vma_t {
173 | 	uintptr_t start;
174 | 	uintptr_t end;
175 | 
176 | 	size_t offset;
177 | 	uint16_t src; // start of source string in the pool
178 | 
179 | 	static size_t str_image_offset; // offset of optional image path in string
180 | 
181 | 	int8_t perm_read  : 1;
182 | 	int8_t perm_write : 1;
183 | 	int8_t perm_exec  : 1;
184 | 	int8_t perm_priv  : 1;
185 | 	int8_t cookie     : 1;
186 | 
187 | 	int8_t major;
188 | 	int8_t minor;
189 | 
190 | 	// parse a line from /proc/pid/maps into stuctured data
191 | 	bool parse(const char *str)
192 | 	{
193 | 		char flag[4];
194 | 		unsigned off;
195 | 		if (6 != sscanf(str, "%lx-%lx %4c %x %hhx:%hhx",
196 | 			&start,
197 | 			&end,
198 | 			flag,
199 | 			&off,
200 | 			&major,
201 | 			&minor)) {
202 | 			return false;
203 | 		}
204 | 
205 | 		perm_read  = flag[0] == 'r' ? 1 : 0;
206 | 		perm_write = flag[1] == 'w' ? 1 : 0;
207 | 		perm_exec  = flag[2] == 'x' ? 1 : 0;
208 | 		perm_priv  = flag[3] == 'p' ? 1 : 0;
209 | 		offset = off;
210 | 
211 | 		return true;
212 | 	}
213 | 
214 | 	size_t str(char *const buffer, const size_t len) const
215 | 	{
216 | 		const size_t pos[] = { 0, 17, 34, 35, 36, 37, 39, 48, 51, 53 };
217 | 
218 | 		if (pos[countof(pos) - 1] > len)
219 | 			return 0;
220 | 
221 | 		size_t i = 0;
222 | 		string_x64(buffer + pos[i++], start);
223 | 		string_x64(buffer + pos[i++], end);
224 | 		buffer[pos[i++]] = perm_read  ? 'r' : '-';
225 | 		buffer[pos[i++]] = perm_write ? 'w' : '-';
226 | 		buffer[pos[i++]] = perm_exec  ? 'x' : '-';
227 | 		buffer[pos[i++]] = perm_priv  ? 'p' : '-';
228 | 		string_x32(buffer + pos[i++], offset);
229 | 		string_x8(buffer + pos[i++], major);
230 | 		string_x8(buffer + pos[i++], minor);
231 | 
232 | 		return pos[i];
233 | 	}
234 | 
235 | 	ssize_t print() const
236 | 	{
237 | 		auto buffer = strip_str("################-################ #### ######## ##:##");
238 | 		const size_t len = str(buffer, countof(buffer));
239 | 
240 | 		return sys::write(FILENO_STDOUT, buffer, len);
241 | 	}
242 | };
243 | 
244 | size_t vma_t::str_image_offset = 64;
245 | 
246 | class vma_set_t {
247 | 	char *pool; // char pool; sequentially filled in string chunks
248 | 	size_t last; // start of last added string in the pool
249 | 
250 | 	size_t index; // current-entry index
251 | 	size_t offset; // offset in current-entry string
252 | 
253 | 	size_t depth; // entries capacity
254 | 	size_t capa; // pool capacity
255 | 
256 | 	vma_t *vma; // entries array
257 | 
258 | 	// fill in a chunk of a line, possibly terminated, to the pool; parse ready lines
259 | 	void fill(const char* const src, const size_t len, const bool eol)
260 | 	{
261 | 		const int flag_move = MREMAP_MAYMOVE;
262 | 
263 | 		if (last + offset + len >= capa) {
264 | 			const size_t old_size = sizeof(*pool) * capa;
265 | 			capa += capa;
266 | 			const size_t new_size = sizeof(*pool) * capa;
267 | 			pool = (char *)sys::mremap(pool, old_size, new_size, flag_move, nullptr);
268 | 
269 | 			if (pool == MAP_FAILED) {
270 | 				fprintf(stderr, "error: cannot mremap char pool\n");
271 | 				sys::exit(-1);
272 | 			}
273 | 		}
274 | 
275 | 		if (index == depth) {
276 | 			const size_t old_size = sizeof(*vma) * depth;
277 | 			depth += depth;
278 | 			const size_t new_size = sizeof(*vma) * depth;
279 | 			vma = (vma_t *)sys::mremap(vma, old_size, new_size, flag_move, nullptr);
280 | 
281 | 			if (vma == MAP_FAILED) {
282 | 				fprintf(stderr, "error: cannot mremap entries array\n");
283 | 				sys::exit(-1);
284 | 			}
285 | 		}
286 | 
287 | 		char *const str = pool + last;
288 | 		memcpy(str + offset, src, len);
289 | 		offset += len;
290 | 
291 | 		if (!eol)
292 | 			return;
293 | 
294 | 		str[offset] = '\0';
295 | 
296 | 		if (!vma[index].parse(str))
297 | 			fprintf(stderr, "error: failed to parse line %lu\n", index);
298 | 
299 | 		assert((1UL << sizeof(vma_t::src) * 8) - 1 >= last);
300 | 		vma[index].src = last;
301 | 
302 | 		last += offset + 1;
303 | 		index += 1;
304 | 		offset = 0;
305 | 	}
306 | 
307 | public:
308 | 	vma_set_t() : last(0), index(0), offset(0), depth(PAGE_SIZE / sizeof(*vma)), capa(PAGE_SIZE / sizeof(*pool))
309 | 	{
310 | 		const int prot_rw = PROT_READ | PROT_WRITE;
311 | 		const int flag_priv_anon = MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE;
312 | 
313 | 		pool = (char *)sys::mmap(nullptr, sizeof(*pool) * capa, prot_rw, flag_priv_anon, -1, 0);
314 | 
315 | 		if (pool == MAP_FAILED) {
316 | 			fprintf(stderr, "error: cannot mmap char pool\n");
317 | 			sys::exit(-1);
318 | 		}
319 | 
320 | 		vma = (vma_t *)sys::mmap(nullptr, sizeof(*vma) * depth, prot_rw, flag_priv_anon, -1, 0);
321 | 
322 | 		if (vma == MAP_FAILED) {
323 | 			fprintf(stderr, "error: cannot mmap entries array\n");
324 | 			sys::exit(-1);
325 | 		}
326 | 	}
327 | 
328 | 	~vma_set_t()
329 | 	{
330 | 		sys::munmap((void *)vma, sizeof(*vma) * depth);
331 | 		sys::munmap((void *)pool, sizeof(*pool) * capa);
332 | 	}
333 | 
334 | 	// current size of the container
335 | 	size_t size() const
336 | 	{
337 | 		return index;
338 | 	}
339 | 
340 | 	// vma source string accessor
341 | 	const char* src(const size_t idx) const
342 | 	{
343 | 		assert(size() > idx);
344 | 		return pool + vma[idx].src;
345 | 	}
346 | 
347 | 	// vma sparse string writer in user buffer; return span of chars written
348 | 	size_t str(const size_t idx, char *const buffer, const size_t len) const
349 | 	{
350 | 		assert(size() > idx);
351 | 		return vma[idx].str(buffer, len);
352 | 	}
353 | 
354 | 	// vma printer; return number of chars printed
355 | 	ssize_t print(const size_t idx) const
356 | 	{
357 | 		assert(size() > idx);
358 | 		return vma[idx].print();
359 | 	}
360 | 
361 | 	// update vma_t::str_image_offset from an estimate to correct value
362 | 	void update_str_image_offset()
363 | 	{
364 | 		for (size_t i = 0; i < size(); ++i) {
365 | 			const char *const str = src(i);
366 | 
367 | 			// seek vma strings with images at the end
368 | 			for (size_t j = strlen(str); j > vma_t::str_image_offset; ) {
369 | 				const char c = str[--j];
370 | 				if (c == ' ' || c == '\t') {
371 | 					vma_t::str_image_offset = j;
372 | 					return;
373 | 				}
374 | 			}
375 | 		}
376 | 	}
377 | 
378 | 	// filter VMAs according to a set of image needles -- a matching image results in setting vma's cookie flag
379 | 	void filter(const size_t filter_count, char **const filter)
380 | 	{
381 | 		for (size_t i = 0; i < size(); ++i) {
382 | 			vma[i].cookie = 0;
383 | 
384 | 			for (size_t j = 0; j < filter_count; ++j) {
385 | 				const char *const str = src(i);
386 | 
387 | 				if (strlen(str) > vma_t::str_image_offset && strstr(str + vma_t::str_image_offset, filter[j])) {
388 | 					vma[i].cookie = 1;
389 | 					break;
390 | 				}
391 | 			}
392 | 		}
393 | 	}
394 | 
395 | 	// deserialize from /proc/pid/maps
396 | 	int read_from_proc();
397 | 
398 | 	const vma_t& operator [](const size_t idx) const
399 | 	{
400 | 		assert(size() > idx);
401 | 		return vma[idx];
402 | 	}
403 | };
404 | 
405 | // seek new-line in a string of specified length; return pos, -1 if no new-line
406 | static ssize_t seek_eol(const char *const buffer, const size_t len)
407 | {
408 | 	const char *seek = buffer;
409 | 	while (buffer + len != seek && *seek != '\n') ++seek;
410 | 
411 | 	if (buffer + len == seek)
412 | 		return -1;
413 | 
414 | 	return seek - buffer;
415 | }
416 | 
417 | int vma_set_t::read_from_proc()
418 | {
419 | 	char buffer[128];
420 | 	const int fd = open("/proc/self/maps", O_RDONLY);
421 | 
422 | 	if (-1 == fd)
423 | 		return -1;
424 | 
425 | 	// read /proc/pid/maps in uniform chunks
426 | 	ssize_t bytes;
427 | 	do {
428 | 		bytes = read(fd, buffer, countof(buffer));
429 | 
430 | 		if (-1 == bytes) {
431 | 			fprintf(stderr, "error: reading file: %s\n", strerror(errno));
432 | 			close(fd);
433 | 			return -1;
434 | 		}
435 | 
436 | 		// reached eof?
437 | 		if (0 == bytes)
438 | 			break;
439 | 
440 | 		size_t eol = 0;
441 | 		do {
442 | 			const size_t eol_last = eol;
443 | 			const ssize_t eol_inc = seek_eol(buffer + eol, bytes - eol);
444 | 
445 | 			if (-1 == eol_inc) {
446 | 				fill(buffer + eol, bytes - eol, false);
447 | 				break;
448 | 			}
449 | 
450 | 			eol += eol_inc;
451 | 			fill(buffer + eol_last, eol - eol_last, true);
452 | 			eol += 1;
453 | 		}
454 | 		while (eol != bytes);
455 | 	}
456 | 	while (bytes == countof(buffer));
457 | 
458 | 	close(fd);
459 | 	return 0;
460 | }
461 | 
462 | void vma_process(struct char_ptr_arr_t *areas, int flag_quiet)
463 | {
464 | 	assert(areas != nullptr);
465 | 
466 | 	vma_set_t vma;
467 | 	vma.read_from_proc();
468 | 	vma.update_str_image_offset();
469 | 	vma.filter(areas->count, areas->arr);
470 | 
471 | 	// libc-free zone from here
472 | 
473 | 	if (!flag_quiet)
474 | 		alt::putc(FILENO_STDOUT, '\n');
475 | 
476 | 	auto buffer = strip_str("#### \033[38;5;14m ################-################ #### ######## ##:##");
477 | 
478 | 	for (size_t i = 0; i < vma.size(); ++i) {
479 | 		const bool filtered = vma[i].cookie;
480 | 
481 | 		// nuke filtered VMAs
482 | 		if (filtered) {
483 | 			const uintptr_t start = vma[i].start;
484 | 			const uintptr_t end = vma[i].end;
485 | 			sys::munmap((void*)start, end - start);
486 | 		}
487 | 
488 | 		if (!flag_quiet) {
489 | 			string_x16(buffer, i);
490 | 			buffer[13] = filtered ? '3' : i & 1 ? '4' : '5';
491 | 			vma.str(i, buffer + 16, countof(buffer) - 16);
492 | 			sys::write(FILENO_STDOUT, buffer, countof(buffer));
493 | 
494 | 			const char *const src = vma.src(i);
495 | 			const size_t len = strlen_linux(src);
496 | 
497 | 			if (len > vma_t::str_image_offset)
498 | 				sys::write(FILENO_STDOUT, src + vma_t::str_image_offset, len - vma_t::str_image_offset);
499 | 
500 | 			const auto term = strip_str(" \033[0m\n");
501 | 			sys::write(FILENO_STDOUT, term, countof(term));
502 | 		}
503 | 	}
504 | 
505 | 	if (!flag_quiet)
506 | 		alt::putc(FILENO_STDOUT, '\n');
507 | }
508 | 


--------------------------------------------------------------------------------
/vma.h:
--------------------------------------------------------------------------------
 1 | #ifndef __vma_H__
 2 | #include "char_ptr_arr.h"
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void vma_process(struct char_ptr_arr_t *areas, int flag_quiet);
 9 | 
10 | #ifdef __cplusplus
11 | } /* extern "C" */
12 | #endif
13 | 
14 | #endif /* __vma_H__ */
15 | 


--------------------------------------------------------------------------------