├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
└── workflows
│ └── c-cpp.yml
├── .gitignore
├── CMakeLists.txt
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── SECURITY.md
├── blackbox.png
├── examples
├── rule110.ssa
├── sum.ssa
└── vararg.ssa
├── include
├── api.h
├── arena.h
├── lexer.h
├── optimisation.h
├── parser.h
├── strslice.h
├── target
│ └── x86_64
│ │ └── register.h
├── utils.h
└── vector.h
├── out.
├── src
├── lexer.c
├── main.c
├── optimise
│ ├── copyelim.c
│ ├── folding.c
│ ├── optimisation.c
│ └── unused_label_elim.c
├── parser.c
├── strslice.c
├── target
│ ├── IR
│ │ ├── build.c
│ │ └── instructions.c
│ └── x86_64
│ │ ├── build.c
│ │ ├── instructions.c
│ │ └── register.c
├── utils.c
└── vector.c
└── test.ssa
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem
25 |
26 | **Device**
27 | Specify the device you're using, including the CPU architecture, operating system, and perhaps the compiler you used when building UYB.
28 |
29 | **Additional context**
30 | Add any other context about the problem here.
31 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here. Specify if you'd like to be assigned to work on this.
21 |
--------------------------------------------------------------------------------
/.github/workflows/c-cpp.yml:
--------------------------------------------------------------------------------
1 | name: C/C++ CI
2 |
3 | on:
4 | push:
5 | branches: [ "main" ]
6 | pull_request:
7 | branches: [ "main" ]
8 |
9 | jobs:
10 | build:
11 |
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - uses: actions/checkout@v4
16 | - name: make
17 | run: make
18 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | /uyb
3 | /out
4 | /out.S
5 | /build
6 | /include/version.h
7 | /out.ssa
8 | /test*
9 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 | project(UYB)
3 | find_program(CLANG NAMES clang)
4 | if(CLANG)
5 | set(CMAKE_C_COMPILER ${CLANG})
6 | endif()
7 | add_compile_options(-Wall -Werror -g)
8 | include_directories(include)
9 | file(GLOB_RECURSE SRC_FILES "src/*.c")
10 | add_executable(uyb ${SRC_FILES})
11 |
12 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to UYB
2 | Contributions to UYB are welcome! While this is primarily a one person project, that doesn't by any means limit it to *only* one person. However, there are some ground rules that are important to set:
3 | - Keep commit messages short.
4 | - Pull requests should explain what change was made, how it improves the project, and if the change was complex enough, it should link to documentation on the topic or explain how it works in a short overview.
5 | - Don't majorly restructure the project.
6 | - Open an [issue](https://github.com/UnmappedStack/UYB/issues/new) before starting work on a pull request and ask to be assigned to work on it. Similar rules apply here to writing a pull request description.
7 | - Most tiny contributions, such as changing a single character in a documentation file, which are clearly just for the sake of being added to the contributor list, will be ignored.
8 |
9 | Now let's write a pretty nifty compiler backend!
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Mozilla Public License Version 2.0
2 | ==================================
3 |
4 | 1. Definitions
5 | --------------
6 |
7 | 1.1. "Contributor"
8 | means each individual or legal entity that creates, contributes to
9 | the creation of, or owns Covered Software.
10 |
11 | 1.2. "Contributor Version"
12 | means the combination of the Contributions of others (if any) used
13 | by a Contributor and that particular Contributor's Contribution.
14 |
15 | 1.3. "Contribution"
16 | means Covered Software of a particular Contributor.
17 |
18 | 1.4. "Covered Software"
19 | means Source Code Form to which the initial Contributor has attached
20 | the notice in Exhibit A, the Executable Form of such Source Code
21 | Form, and Modifications of such Source Code Form, in each case
22 | including portions thereof.
23 |
24 | 1.5. "Incompatible With Secondary Licenses"
25 | means
26 |
27 | (a) that the initial Contributor has attached the notice described
28 | in Exhibit B to the Covered Software; or
29 |
30 | (b) that the Covered Software was made available under the terms of
31 | version 1.1 or earlier of the License, but not also under the
32 | terms of a Secondary License.
33 |
34 | 1.6. "Executable Form"
35 | means any form of the work other than Source Code Form.
36 |
37 | 1.7. "Larger Work"
38 | means a work that combines Covered Software with other material, in
39 | a separate file or files, that is not Covered Software.
40 |
41 | 1.8. "License"
42 | means this document.
43 |
44 | 1.9. "Licensable"
45 | means having the right to grant, to the maximum extent possible,
46 | whether at the time of the initial grant or subsequently, any and
47 | all of the rights conveyed by this License.
48 |
49 | 1.10. "Modifications"
50 | means any of the following:
51 |
52 | (a) any file in Source Code Form that results from an addition to,
53 | deletion from, or modification of the contents of Covered
54 | Software; or
55 |
56 | (b) any new file in Source Code Form that contains any Covered
57 | Software.
58 |
59 | 1.11. "Patent Claims" of a Contributor
60 | means any patent claim(s), including without limitation, method,
61 | process, and apparatus claims, in any patent Licensable by such
62 | Contributor that would be infringed, but for the grant of the
63 | License, by the making, using, selling, offering for sale, having
64 | made, import, or transfer of either its Contributions or its
65 | Contributor Version.
66 |
67 | 1.12. "Secondary License"
68 | means either the GNU General Public License, Version 2.0, the GNU
69 | Lesser General Public License, Version 2.1, the GNU Affero General
70 | Public License, Version 3.0, or any later versions of those
71 | licenses.
72 |
73 | 1.13. "Source Code Form"
74 | means the form of the work preferred for making modifications.
75 |
76 | 1.14. "You" (or "Your")
77 | means an individual or a legal entity exercising rights under this
78 | License. For legal entities, "You" includes any entity that
79 | controls, is controlled by, or is under common control with You. For
80 | purposes of this definition, "control" means (a) the power, direct
81 | or indirect, to cause the direction or management of such entity,
82 | whether by contract or otherwise, or (b) ownership of more than
83 | fifty percent (50%) of the outstanding shares or beneficial
84 | ownership of such entity.
85 |
86 | 2. License Grants and Conditions
87 | --------------------------------
88 |
89 | 2.1. Grants
90 |
91 | Each Contributor hereby grants You a world-wide, royalty-free,
92 | non-exclusive license:
93 |
94 | (a) under intellectual property rights (other than patent or trademark)
95 | Licensable by such Contributor to use, reproduce, make available,
96 | modify, display, perform, distribute, and otherwise exploit its
97 | Contributions, either on an unmodified basis, with Modifications, or
98 | as part of a Larger Work; and
99 |
100 | (b) under Patent Claims of such Contributor to make, use, sell, offer
101 | for sale, have made, import, and otherwise transfer either its
102 | Contributions or its Contributor Version.
103 |
104 | 2.2. Effective Date
105 |
106 | The licenses granted in Section 2.1 with respect to any Contribution
107 | become effective for each Contribution on the date the Contributor first
108 | distributes such Contribution.
109 |
110 | 2.3. Limitations on Grant Scope
111 |
112 | The licenses granted in this Section 2 are the only rights granted under
113 | this License. No additional rights or licenses will be implied from the
114 | distribution or licensing of Covered Software under this License.
115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a
116 | Contributor:
117 |
118 | (a) for any code that a Contributor has removed from Covered Software;
119 | or
120 |
121 | (b) for infringements caused by: (i) Your and any other third party's
122 | modifications of Covered Software, or (ii) the combination of its
123 | Contributions with other software (except as part of its Contributor
124 | Version); or
125 |
126 | (c) under Patent Claims infringed by Covered Software in the absence of
127 | its Contributions.
128 |
129 | This License does not grant any rights in the trademarks, service marks,
130 | or logos of any Contributor (except as may be necessary to comply with
131 | the notice requirements in Section 3.4).
132 |
133 | 2.4. Subsequent Licenses
134 |
135 | No Contributor makes additional grants as a result of Your choice to
136 | distribute the Covered Software under a subsequent version of this
137 | License (see Section 10.2) or under the terms of a Secondary License (if
138 | permitted under the terms of Section 3.3).
139 |
140 | 2.5. Representation
141 |
142 | Each Contributor represents that the Contributor believes its
143 | Contributions are its original creation(s) or it has sufficient rights
144 | to grant the rights to its Contributions conveyed by this License.
145 |
146 | 2.6. Fair Use
147 |
148 | This License is not intended to limit any rights You have under
149 | applicable copyright doctrines of fair use, fair dealing, or other
150 | equivalents.
151 |
152 | 2.7. Conditions
153 |
154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
155 | in Section 2.1.
156 |
157 | 3. Responsibilities
158 | -------------------
159 |
160 | 3.1. Distribution of Source Form
161 |
162 | All distribution of Covered Software in Source Code Form, including any
163 | Modifications that You create or to which You contribute, must be under
164 | the terms of this License. You must inform recipients that the Source
165 | Code Form of the Covered Software is governed by the terms of this
166 | License, and how they can obtain a copy of this License. You may not
167 | attempt to alter or restrict the recipients' rights in the Source Code
168 | Form.
169 |
170 | 3.2. Distribution of Executable Form
171 |
172 | If You distribute Covered Software in Executable Form then:
173 |
174 | (a) such Covered Software must also be made available in Source Code
175 | Form, as described in Section 3.1, and You must inform recipients of
176 | the Executable Form how they can obtain a copy of such Source Code
177 | Form by reasonable means in a timely manner, at a charge no more
178 | than the cost of distribution to the recipient; and
179 |
180 | (b) You may distribute such Executable Form under the terms of this
181 | License, or sublicense it under different terms, provided that the
182 | license for the Executable Form does not attempt to limit or alter
183 | the recipients' rights in the Source Code Form under this License.
184 |
185 | 3.3. Distribution of a Larger Work
186 |
187 | You may create and distribute a Larger Work under terms of Your choice,
188 | provided that You also comply with the requirements of this License for
189 | the Covered Software. If the Larger Work is a combination of Covered
190 | Software with a work governed by one or more Secondary Licenses, and the
191 | Covered Software is not Incompatible With Secondary Licenses, this
192 | License permits You to additionally distribute such Covered Software
193 | under the terms of such Secondary License(s), so that the recipient of
194 | the Larger Work may, at their option, further distribute the Covered
195 | Software under the terms of either this License or such Secondary
196 | License(s).
197 |
198 | 3.4. Notices
199 |
200 | You may not remove or alter the substance of any license notices
201 | (including copyright notices, patent notices, disclaimers of warranty,
202 | or limitations of liability) contained within the Source Code Form of
203 | the Covered Software, except that You may alter any license notices to
204 | the extent required to remedy known factual inaccuracies.
205 |
206 | 3.5. Application of Additional Terms
207 |
208 | You may choose to offer, and to charge a fee for, warranty, support,
209 | indemnity or liability obligations to one or more recipients of Covered
210 | Software. However, You may do so only on Your own behalf, and not on
211 | behalf of any Contributor. You must make it absolutely clear that any
212 | such warranty, support, indemnity, or liability obligation is offered by
213 | You alone, and You hereby agree to indemnify every Contributor for any
214 | liability incurred by such Contributor as a result of warranty, support,
215 | indemnity or liability terms You offer. You may include additional
216 | disclaimers of warranty and limitations of liability specific to any
217 | jurisdiction.
218 |
219 | 4. Inability to Comply Due to Statute or Regulation
220 | ---------------------------------------------------
221 |
222 | If it is impossible for You to comply with any of the terms of this
223 | License with respect to some or all of the Covered Software due to
224 | statute, judicial order, or regulation then You must: (a) comply with
225 | the terms of this License to the maximum extent possible; and (b)
226 | describe the limitations and the code they affect. Such description must
227 | be placed in a text file included with all distributions of the Covered
228 | Software under this License. Except to the extent prohibited by statute
229 | or regulation, such description must be sufficiently detailed for a
230 | recipient of ordinary skill to be able to understand it.
231 |
232 | 5. Termination
233 | --------------
234 |
235 | 5.1. The rights granted under this License will terminate automatically
236 | if You fail to comply with any of its terms. However, if You become
237 | compliant, then the rights granted under this License from a particular
238 | Contributor are reinstated (a) provisionally, unless and until such
239 | Contributor explicitly and finally terminates Your grants, and (b) on an
240 | ongoing basis, if such Contributor fails to notify You of the
241 | non-compliance by some reasonable means prior to 60 days after You have
242 | come back into compliance. Moreover, Your grants from a particular
243 | Contributor are reinstated on an ongoing basis if such Contributor
244 | notifies You of the non-compliance by some reasonable means, this is the
245 | first time You have received notice of non-compliance with this License
246 | from such Contributor, and You become compliant prior to 30 days after
247 | Your receipt of the notice.
248 |
249 | 5.2. If You initiate litigation against any entity by asserting a patent
250 | infringement claim (excluding declaratory judgment actions,
251 | counter-claims, and cross-claims) alleging that a Contributor Version
252 | directly or indirectly infringes any patent, then the rights granted to
253 | You by any and all Contributors for the Covered Software under Section
254 | 2.1 of this License shall terminate.
255 |
256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
257 | end user license agreements (excluding distributors and resellers) which
258 | have been validly granted by You or Your distributors under this License
259 | prior to termination shall survive termination.
260 |
261 | ************************************************************************
262 | * *
263 | * 6. Disclaimer of Warranty *
264 | * ------------------------- *
265 | * *
266 | * Covered Software is provided under this License on an "as is" *
267 | * basis, without warranty of any kind, either expressed, implied, or *
268 | * statutory, including, without limitation, warranties that the *
269 | * Covered Software is free of defects, merchantable, fit for a *
270 | * particular purpose or non-infringing. The entire risk as to the *
271 | * quality and performance of the Covered Software is with You. *
272 | * Should any Covered Software prove defective in any respect, You *
273 | * (not any Contributor) assume the cost of any necessary servicing, *
274 | * repair, or correction. This disclaimer of warranty constitutes an *
275 | * essential part of this License. No use of any Covered Software is *
276 | * authorized under this License except under this disclaimer. *
277 | * *
278 | ************************************************************************
279 |
280 | ************************************************************************
281 | * *
282 | * 7. Limitation of Liability *
283 | * -------------------------- *
284 | * *
285 | * Under no circumstances and under no legal theory, whether tort *
286 | * (including negligence), contract, or otherwise, shall any *
287 | * Contributor, or anyone who distributes Covered Software as *
288 | * permitted above, be liable to You for any direct, indirect, *
289 | * special, incidental, or consequential damages of any character *
290 | * including, without limitation, damages for lost profits, loss of *
291 | * goodwill, work stoppage, computer failure or malfunction, or any *
292 | * and all other commercial damages or losses, even if such party *
293 | * shall have been informed of the possibility of such damages. This *
294 | * limitation of liability shall not apply to liability for death or *
295 | * personal injury resulting from such party's negligence to the *
296 | * extent applicable law prohibits such limitation. Some *
297 | * jurisdictions do not allow the exclusion or limitation of *
298 | * incidental or consequential damages, so this exclusion and *
299 | * limitation may not apply to You. *
300 | * *
301 | ************************************************************************
302 |
303 | 8. Litigation
304 | -------------
305 |
306 | Any litigation relating to this License may be brought only in the
307 | courts of a jurisdiction where the defendant maintains its principal
308 | place of business and such litigation shall be governed by laws of that
309 | jurisdiction, without reference to its conflict-of-law provisions.
310 | Nothing in this Section shall prevent a party's ability to bring
311 | cross-claims or counter-claims.
312 |
313 | 9. Miscellaneous
314 | ----------------
315 |
316 | This License represents the complete agreement concerning the subject
317 | matter hereof. If any provision of this License is held to be
318 | unenforceable, such provision shall be reformed only to the extent
319 | necessary to make it enforceable. Any law or regulation which provides
320 | that the language of a contract shall be construed against the drafter
321 | shall not be used to construe this License against a Contributor.
322 |
323 | 10. Versions of the License
324 | ---------------------------
325 |
326 | 10.1. New Versions
327 |
328 | Mozilla Foundation is the license steward. Except as provided in Section
329 | 10.3, no one other than the license steward has the right to modify or
330 | publish new versions of this License. Each version will be given a
331 | distinguishing version number.
332 |
333 | 10.2. Effect of New Versions
334 |
335 | You may distribute the Covered Software under the terms of the version
336 | of the License under which You originally received the Covered Software,
337 | or under the terms of any subsequent version published by the license
338 | steward.
339 |
340 | 10.3. Modified Versions
341 |
342 | If you create software not governed by this License, and you want to
343 | create a new license for such software, you may create and use a
344 | modified version of this License if you rename the license and remove
345 | any references to the name of the license steward (except to note that
346 | such modified license differs from this License).
347 |
348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary
349 | Licenses
350 |
351 | If You choose to distribute Source Code Form that is Incompatible With
352 | Secondary Licenses under the terms of this version of the License, the
353 | notice described in Exhibit B of this License must be attached.
354 |
355 | Exhibit A - Source Code Form License Notice
356 | -------------------------------------------
357 |
358 | This Source Code Form is subject to the terms of the Mozilla Public
359 | License, v. 2.0. If a copy of the MPL was not distributed with this
360 | file, You can obtain one at https://mozilla.org/MPL/2.0/.
361 |
362 | If it is not possible or desirable to put the notice in a particular
363 | file, then You may include the notice in a location (such as a LICENSE
364 | file in a relevant directory) where a recipient would be likely to look
365 | for such a notice.
366 |
367 | You may add additional accurate notices of copyright ownership.
368 |
369 | Exhibit B - "Incompatible With Secondary Licenses" Notice
370 | ---------------------------------------------------------
371 |
372 | This Source Code Form is "Incompatible With Secondary Licenses", as
373 | defined by the Mozilla Public License, v. 2.0.
374 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | VERSION := $(shell git rev-parse --short HEAD)
2 | .PHONY: all build install
3 |
4 | all: build install
5 |
6 | build:
7 | @echo "[CMake] Setting up configuration files..."
8 | @echo "#define COMMIT \"$(VERSION)\"" > include/version.h
9 | mkdir -p build
10 | cd build; cmake ..
11 | @echo "[CMake] Building..."
12 | cmake --build build
13 |
14 | install:
15 | @echo "[Here] Creating symbolic link in /usr/bin (password may be required)..."
16 | @if [ ! -e "/usr/bin/uyb" ]; then sudo ln -s $(realpath build/uyb) /usr/bin/uyb; fi
17 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
The UYB Compiler Backend
3 |
4 |
UYB is a from-scratch optimising compiler backend written in C, designed to be small and have fast compilation, while still being complete enough to be used for an actual compiler.
5 |
6 | [](https://github.com/UnmappedStack/UYB/blob/main/LICENSE)
7 | 
8 | 
9 |
10 |

11 |
12 |
13 | UYB is based heavily on QBE IR syntax and is almost fully instruction set compatible. The goal is self hosting through [cproc](https://github.com/michaelforney/cproc), which is a C compiler which targets QBE's IR.
14 |
15 | There's a Discord server for UYB which you can join for help setting up your language with UYB, helping to contribute, or just having a chat, which you can join [here](https://discord.gg/W5uYqPAJg5).
16 |
17 | ## Why not just the original QBE?
18 | I myself absolutely love QBE, and am a huge fan of the "80% of the performance in 10% of the code," however there are a few things that UYB improves upon (or may also just be a different use case rather than being better):
19 | - **QBE doesn't support inline assembly.** In most cases, this is fine, but when working on a very low level language where you simply need to interact with the CPU's instructions directly, a lack of inline assembly support can be unfortunate, and it's a massive Quality of Life feature to not need to put every single piece of assembly in a seperate file.
20 | - **There's still room for QBE to be smaller.** UYB accepts slower runtime speeds of generated assembly, and uses less optimisations in return for a smaller amount of code and faster compilation - the goal with UYB is more like 60% of the speed in 5% of the code.
21 | - **Debug symbols support.** Unfortunately, QBE doesn't support debug symbols, which means debugging generated programs with GDB is near impossible to do effectively.
22 |
23 | ## Support
24 | UYB supports every QBE instruction except for floating point instructions. UYB also supports:
25 |
26 | ### Optimisations
27 | - Folding
28 | - Copy elimination
29 | - Unused label removal
30 |
31 | ### Targets
32 | - x86_64 generic System-V
33 | - SSA IR
34 |
35 | ## Usage
36 | Since UYB is supposed to be based on QBE's IR, you can see [QBE's documentation](https://c9x.me/compile/doc/il.html) for a full IR reference.
37 |
38 | There are more examples for UYB programs in `/examples`, or try run this small "Hello World" program to test UYB like so:
39 | - Copy this code into a file named `test.ssa` or something similar:
40 | ```
41 | data $msg = {b "Hello, world!", b 10, b 0}
42 | export function w $main(l %argc, l %argv) {
43 | @start
44 | call $printf(l $msg)
45 | ret 0
46 | }
47 | ```
48 | - Compile the IR to x86_64 Assembly using the following command:
49 | ```sh
50 | $ uyb test.ssa -o out.S
51 | ```
52 | - Use a standard toolchain to assemble and link the generated Assembly to an executable program, then run it:
53 | ```sh
54 | $ gcc out.S -o out
55 | $ ./out
56 | Hello, world!
57 | ```
58 |
59 | **To use debug symbols**, you can use GAS-AT&T like syntax. To use the previous example program as an example:
60 | ```
61 | # Define the source file that this SSA was generated from.
62 | # The first argument is the index ID of this file (so if you have more files then they need to each
63 | # have a different ID) and the second argument is the filename.
64 | .file 1 "test.c"
65 |
66 | data $msg = {b "Hello, world!", b 10, b 0}
67 | export function w $main(l %argc, l %argv) {
68 | @start
69 | # The .loc pseudoinstruction specifies where in the file the following instructions are built from.
70 | # The first argument is the index of the file it came from (same as the ID for relevant .file),
71 | # and the next two arguments are the row and column, respectively.
72 | .loc 1 3 0
73 | call $printf(l $msg)
74 | .loc 1 4 0
75 | ret 0
76 | }
77 | ```
78 |
79 | **To use inline assembly**, you can use the following syntax. Note that inline assembly is not supported in the IR self-targetting target.
80 | ```
81 | asm("" : %inputValue | "", %inputValue2 | "" : %outputValue | "", %outputValue2 | "" : "", "")
82 | ```
83 | The types of inputs are split with colons (`:`):
84 |
85 | - The first input type is the raw assembly. It cannot contain any new lines within the source IL, however it may contain escape sequences such as `\t` and `\n`.
86 | - The second input type is the input list, split by commas. Each entry is in the format of `%label | "%rax"`, where the label contains the input value to pass in and `%rax` is replaced with the register that the input should be passed to in. The label and the register must both be 64 bits.
87 | - The third input type is the output list, which follows the same format as the input list.
88 | - The fourth and final input type is the clobber list. This is a list of string literals containing register names split by commas, in the form of `"%rax", "%rbx"`. These shouldn't contain input or output registers, but they *can*. These are the registers that are used by the inline assembly, so that UYB knows to be careful with them since they may be messed up. They must be 64 bit general purpose registers.
89 |
90 | Note that checking of most inline assembly is left to the assembler and linker for the sake of lightweightedness, which means that programs containing inline assembly cannot be confirmed to work while they are still not assembled or linked.
91 |
92 | You can use `uyb --help` to see all the command line options for UYB.
93 |
94 | ## Building
95 | To clone and build UYB, simply run:
96 | ```sh
97 | git clone https://github.com/UnmappedStack/UYB
98 | cd UYB
99 | make
100 | ```
101 | This will also install a symlink in your bin directory so that you can call UYB from anywhere. CMake is required.
102 |
103 | ## Thanks
104 | UYB uses [Tsoding's arena allocator](https://github.com/tsoding/arena) for quick allocations.
105 |
106 | ## License
107 | This project is under the Mozilla Public License 2.0, with the exclusion of `include/arena.h` which is from [this repo](https://github.com/tsoding/arena) and has a separate license. See `LICENSE` for more information.
108 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | UYB is currently still in a beta phase and thus cannot guarantee that everything is secure. Use at your own risk, and avoid using for production situations.
4 |
5 | To report a vulnerability, create an [issue](https://github.com/UnmappedStack/UYB/issues/new) with the bug report template.
6 |
--------------------------------------------------------------------------------
/blackbox.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnmappedStack/UYB/fd288c4c4695d5682e1df006863ed68d44b2102e/blackbox.png
--------------------------------------------------------------------------------
/examples/rule110.ssa:
--------------------------------------------------------------------------------
1 | # Generated by lewc compiler, a compiler written by Dcraftbg: https://github.com/Dcraftbg
2 | # Target: Linux x86_64
3 | # main :: () -> i32
4 | export function w $main () {
5 | @start
6 | %board =l alloc16 120
7 | %.s1 =l copy %board
8 | %.s2 =w copy 0
9 | %.s3 =w copy 30
10 | %.s4 =w copy 4
11 | %.s5 =w mul %.s3, %.s4
12 | call $memset(l %.s1, w %.s2, w %.s5)
13 | %i =l alloc4 4
14 | %.s6 =w copy 0
15 | storew %.s6, %i
16 | %j =l alloc4 4
17 | %.s7 =w copy 0
18 | storew %.s7, %j
19 | %.s8 =l copy %board
20 | %.s9 =w copy 30
21 | %.s10 =w copy 2
22 | %.s11 =w sub %.s9, %.s10
23 | %.s12 =l extsw %.s11
24 | %.s13 =l mul %.s12, 4
25 | %.s14 =l add %.s8, %.s13
26 | %.s15 =w copy 1
27 | storew %.s15, %.s14
28 | @while_cond_16
29 | %.s17 =w loadsw %i
30 | %.s18 =w copy 30
31 | %.s19 =w copy 2
32 | %.s20 =w sub %.s18, %.s19
33 | %.s21 =w csltw %.s17, %.s20
34 | jnz %.s21, @while_body_16, @while_end_16
35 | @while_body_16
36 | %.s22 =l copy %j
37 | %.s23 =w copy 0
38 | storew %.s23, %.s22
39 | @while_cond_24
40 | %.s25 =w loadsw %j
41 | %.s26 =w copy 30
42 | %.s27 =w csltw %.s25, %.s26
43 | jnz %.s27, @while_body_24, @while_end_24
44 | @while_body_24
45 | %.s28 =l copy $.g0
46 | %.s29 =l copy %board
47 | %.s30 =w loadsw %j
48 | %.s31 =l extsw %.s30
49 | %.s32 =l mul %.s31, 4
50 | %.s33 =l add %.s29, %.s32
51 | %.s34 =w loadsw %.s33
52 | %.s35 =l extsw %.s34
53 | %.s37 =l add %.s28, %.s35
54 | %.s38 =w loadub %.s37
55 | call $putchar(w %.s38)
56 | %.s39 =l copy %j
57 | %.s40 =w loadsw %j
58 | %.s41 =w copy 1
59 | %.s42 =w add %.s40, %.s41
60 | storew %.s42, %.s39
61 | jmp @while_cond_24
62 | @while_end_24
63 | %.s43 =w copy 10
64 | call $putchar(w %.s43)
65 | %pattern =l alloc4 4
66 | %.s44 =l copy %board
67 | %.s45 =w copy 0
68 | %.s46 =l extsw %.s45
69 | %.s47 =l mul %.s46, 4
70 | %.s48 =l add %.s44, %.s47
71 | %.s49 =w loadsw %.s48
72 | %.s50 =w copy 1
73 | %.s51 =w shl %.s49, %.s50
74 | %.s52 =l copy %board
75 | %.s53 =w copy 1
76 | %.s54 =l extsw %.s53
77 | %.s55 =l mul %.s54, 4
78 | %.s56 =l add %.s52, %.s55
79 | %.s57 =w loadsw %.s56
80 | %.s58 =w or %.s51, %.s57
81 | storew %.s58, %pattern
82 | %.s59 =l copy %j
83 | %.s60 =w copy 1
84 | storew %.s60, %.s59
85 | @while_cond_61
86 | %.s62 =w loadsw %j
87 | %.s63 =w copy 30
88 | %.s64 =w copy 1
89 | %.s65 =w sub %.s63, %.s64
90 | %.s66 =w csltw %.s62, %.s65
91 | jnz %.s66, @while_body_61, @while_end_61
92 | @while_body_61
93 | %.s67 =l copy %pattern
94 | %.s68 =w loadsw %pattern
95 | %.s69 =w copy 1
96 | %.s70 =w shl %.s68, %.s69
97 | %.s71 =w copy 7
98 | %.s72 =w and %.s70, %.s71
99 | %.s73 =l copy %board
100 | %.s74 =w loadsw %j
101 | %.s75 =w copy 1
102 | %.s76 =w add %.s74, %.s75
103 | %.s77 =l extsw %.s76
104 | %.s78 =l mul %.s77, 4
105 | %.s79 =l add %.s73, %.s78
106 | %.s80 =w loadsw %.s79
107 | %.s81 =w or %.s72, %.s80
108 | storew %.s81, %.s67
109 | %.s82 =l copy %board
110 | %.s83 =w loadsw %j
111 | %.s84 =l extsw %.s83
112 | %.s85 =l mul %.s84, 4
113 | %.s86 =l add %.s82, %.s85
114 | %.s87 =w copy 110
115 | %.s88 =w loadsw %pattern
116 | %.s89 =w shr %.s87, %.s88
117 | %.s90 =w copy 1
118 | %.s91 =w and %.s89, %.s90
119 | storew %.s91, %.s86
120 | %.s92 =l copy %j
121 | %.s93 =w loadsw %j
122 | %.s94 =w copy 1
123 | %.s95 =w add %.s93, %.s94
124 | storew %.s95, %.s92
125 | jmp @while_cond_61
126 | @while_end_61
127 | %.s96 =l copy %i
128 | %.s97 =w loadsw %i
129 | %.s98 =w copy 1
130 | %.s99 =w add %.s97, %.s98
131 | storew %.s99, %.s96
132 | jmp @while_cond_16
133 | @while_end_16
134 | %.s100 =w copy 0
135 | ret %.s100
136 | }
137 | # extern putchar :: (c: u8)
138 | # extern memset :: (data: *i32, c: i32, n: i32)
139 | data $.g0 = {b 32, b 42, b 0 }
140 |
--------------------------------------------------------------------------------
/examples/sum.ssa:
--------------------------------------------------------------------------------
1 | # This example was taken directly from QBE's website: https://c9x.me/compile/
2 |
3 | function w $add(w %a, w %b) { # Define a function add
4 | @start
5 | %c =w add %a, %b # Adds the 2 arguments
6 | ret %c # Return the result
7 | }
8 | export function w $main() { # Main function
9 | @start
10 | %r =w call $add(w 1, w 1) # Call add(1, 1)
11 | call $printf(l $fmt, ..., w %r) # Show the result
12 | ret 0
13 | }
14 | data $fmt = { b "One and one make %d!\n", b 0 }
15 |
--------------------------------------------------------------------------------
/examples/vararg.ssa:
--------------------------------------------------------------------------------
1 | function w $varargtest(...) {
2 | @start
3 | %ap =l alloc8 32
4 | vastart %ap
5 | %val1 =l vaarg %ap
6 | %val2 =l vaarg %ap
7 | %val3 =l vaarg %ap
8 | %val4 =l vaarg %ap
9 | %val5 =l vaarg %ap
10 | %val6 =l vaarg %ap
11 | call $printf(l $fmt, l %val1, l %val2, l %val3, l %val4, l %val5, l %val6)
12 | ret 0
13 | }
14 |
15 | export function w $main() {
16 | @start
17 | call $varargtest(w 1, w 2, w 3, w 4, w 5, w 6)
18 | ret 0
19 | }
20 |
21 | data $fmt = { b "The numbers recieved from varargs are %zu, %zu, %zu, %zu, %zu, %zu.\n", b 0 }
22 |
--------------------------------------------------------------------------------
/include/api.h:
--------------------------------------------------------------------------------
1 | /* This is the "library"/API file which is what is used to interact with the actual backend through
2 | * a non-textual representation.
3 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */
4 | #pragma once
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | typedef enum {
12 | ADD,
13 | SUB,
14 | DIV,
15 | MUL,
16 | COPY,
17 | RET,
18 | CALL,
19 | JZ,
20 | NEG,
21 | UDIV,
22 | REM,
23 | UREM,
24 | AND,
25 | OR,
26 | XOR,
27 | SHL,
28 | SHR,
29 | STORE,
30 | LOAD,
31 | BLIT,
32 | ALLOC,
33 | EQ,
34 | NE,
35 | SLE, // less than or equal (signed)
36 | SLT, // less than (signed)
37 | SGE, // higher than or equal (signed)
38 | SGT, // higher than (signed)
39 | ULE, // less than or equal (unsigned)
40 | ULT, // less than (unsigned)
41 | UGE, // higher than or equal (unsigned)
42 | UGT, // higher than (unsigned)
43 | EXT,
44 | HLT,
45 | BLKLBL,
46 | JMP,
47 | JNZ,
48 | PHI,
49 | VASTART,
50 | VAARG,
51 | LOC,
52 | ASM,
53 | } Instruction;
54 |
55 | typedef enum {
56 | Bits8,
57 | Bits16,
58 | Bits32,
59 | Bits64,
60 | None,
61 | } Type;
62 |
63 | typedef enum {
64 | Label,
65 | Number,
66 | Str,
67 | StrLit,
68 | FunctionArgs,
69 | BlkLbl,
70 | PhiArg,
71 | InlineAssembly,
72 | Empty,
73 | } ValType;
74 |
75 | typedef struct {
76 | char **args;
77 | Type *arg_sizes;
78 | char **arg_struct_types;
79 | bool *args_are_structs;
80 | ValType *arg_types;
81 | size_t num_args;
82 | } FunctionArgList;
83 |
84 | typedef struct {
85 | char *section; // NULL if in data section
86 | char *name;
87 | ValType *types; // Can only be StrLit or number. Anything else should panic.
88 | Type *sizes;
89 | size_t *vals;
90 | size_t num_vals;
91 | size_t alignment; // default is 1
92 | } Global;
93 |
94 | typedef struct {
95 | char *label; // to store result in (NULL if none (only if it's a function or something))
96 | Instruction instruction;
97 | Type type;
98 | uint64_t vals[3];
99 | ValType val_types[3];
100 | } Statement;
101 |
102 | typedef struct {
103 | bool type_is_struct;
104 | union {
105 | Type type;
106 | char *type_struct;
107 | };
108 | char *label;
109 | } FunctionArgument;
110 |
111 | typedef struct {
112 | bool is_global;
113 | char *name;
114 | FunctionArgument *args;
115 | size_t num_args;
116 | bool ret_is_struct;
117 | union {
118 | Type return_type;
119 | char *return_struct;
120 | };
121 | Statement *statements;
122 | size_t num_statements;
123 | bool is_variadic;
124 | } Function;
125 |
126 | typedef struct {
127 | char *name;
128 | size_t alignment; // default is size of largest value
129 | size_t size_bytes;
130 | } AggregateType;
131 |
132 | typedef struct {
133 | char *blklbl_name;
134 | size_t val;
135 | ValType type;
136 | } PhiVal;
137 |
138 | typedef struct {
139 | char *fname;
140 | size_t id;
141 | } FileDbg;
142 |
143 | typedef struct {
144 | char *reg;
145 | char *label;
146 | ValType type;
147 | } InlineAsmIO;
148 |
149 | typedef struct {
150 | char *assembly;
151 | InlineAsmIO **inputs_vec;
152 | InlineAsmIO **outputs_vec;
153 | char* **clobbers_vec;
154 | } InlineAsm;
155 |
156 | // for each target
157 | void build_program_x86_64(Function *IR, size_t num_functions, Global *global_vars, size_t num_global_vars, AggregateType *aggtypes, size_t num_aggtypes, FileDbg *dbgfiles, size_t num_dbgfiles, FILE *outf);
158 | void build_program_IR(Function *IR, size_t num_functions, Global *global_vars, size_t num_global_vars, AggregateType *aggtypes, size_t num_aggtypes, FileDbg *dbgfiles, size_t num_dbgfiles, FILE *outf);
159 |
160 | extern void (*instructions_x86_64[41])(uint64_t[2], ValType[2], Statement, String*);
161 | extern void (*instructions_IR[])(uint64_t[2], ValType[2], Statement, FILE*);
162 | char *instruction_as_str(Instruction instr);
163 | char *type_as_str(Type type, char *struct_type, bool is_struct);
164 | void disasm_instr(String *fnbuf, Statement statement);
165 |
--------------------------------------------------------------------------------
/include/arena.h:
--------------------------------------------------------------------------------
1 | /* This file is not by me, but rather from the header-only C arena allocator library
2 | * which can be found here: https://github.com/tsoding/arena, with some modifications for simple macros to
3 | * make it easier to integrate with the rest of the project.
4 | *
5 | * This is not under the same license as the rest of UYB. */
6 |
7 | // Copyright 2022 Alexey Kutepov
8 |
9 | // Permission is hereby granted, free of charge, to any person obtaining
10 | // a copy of this software and associated documentation files (the
11 | // "Software"), to deal in the Software without restriction, including
12 | // without limitation the rights to use, copy, modify, merge, publish,
13 | // distribute, sublicense, and/or sell copies of the Software, and to
14 | // permit persons to whom the Software is furnished to do so, subject to
15 | // the following conditions:
16 |
17 | // The above copyright notice and this permission notice shall be
18 | // included in all copies or substantial portions of the Software.
19 |
20 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 | #pragma once
28 |
29 | #ifndef ARENA_H_
30 | #define ARENA_H_
31 |
32 | #define aalloc(bytes) arena_alloc(&arena, bytes)
33 | #define delete_arenas() arena_free(&arena)
34 |
35 | #include
36 | #include
37 |
38 | #ifndef ARENA_NOSTDIO
39 | #include
40 | #include
41 | #endif // ARENA_NOSTDIO
42 |
43 | #ifndef ARENA_ASSERT
44 | #include
45 | #define ARENA_ASSERT assert
46 | #endif
47 |
48 | #define ARENA_BACKEND_LIBC_MALLOC 0
49 | #define ARENA_BACKEND_LINUX_MMAP 1
50 | #define ARENA_BACKEND_WIN32_VIRTUALALLOC 2
51 | #define ARENA_BACKEND_WASM_HEAPBASE 3
52 |
53 | #ifndef ARENA_BACKEND
54 | #define ARENA_BACKEND ARENA_BACKEND_LIBC_MALLOC
55 | #endif // ARENA_BACKEND
56 |
57 | typedef struct Region Region;
58 |
59 | struct Region {
60 | Region *next;
61 | size_t count;
62 | size_t capacity;
63 | uintptr_t data[];
64 | };
65 |
66 | typedef struct {
67 | Region *begin, *end;
68 | } Arena;
69 |
70 | extern Arena arena; // defined in src/main.c
71 |
72 | typedef struct {
73 | Region *region;
74 | size_t count;
75 | } Arena_Mark;
76 |
77 | #ifndef ARENA_REGION_DEFAULT_CAPACITY
78 | #define ARENA_REGION_DEFAULT_CAPACITY (8*1024)
79 | #endif // ARENA_REGION_DEFAULT_CAPACITY
80 |
81 | Region *new_region(size_t capacity);
82 | void free_region(Region *r);
83 |
84 | void *arena_alloc(Arena *a, size_t size_bytes);
85 | void *arena_realloc(Arena *a, void *oldptr, size_t oldsz, size_t newsz);
86 | char *arena_strdup(Arena *a, const char *cstr);
87 | void *arena_memdup(Arena *a, void *data, size_t size);
88 | #ifndef ARENA_NOSTDIO
89 | char *arena_sprintf(Arena *a, const char *format, ...);
90 | #endif // ARENA_NOSTDIO
91 |
92 | Arena_Mark arena_snapshot(Arena *a);
93 | void arena_reset(Arena *a);
94 | void arena_rewind(Arena *a, Arena_Mark m);
95 | void arena_free(Arena *a);
96 | void arena_trim(Arena *a);
97 |
98 | #ifndef ARENA_DA_INIT_CAP
99 | #define ARENA_DA_INIT_CAP 256
100 | #endif // ARENA_DA_INIT_CAP
101 |
102 | #ifdef __cplusplus
103 | #define cast_ptr(ptr) (decltype(ptr))
104 | #else
105 | #define cast_ptr(...)
106 | #endif
107 |
108 | #define arena_da_append(a, da, item) \
109 | do { \
110 | if ((da)->count >= (da)->capacity) { \
111 | size_t new_capacity = (da)->capacity == 0 ? ARENA_DA_INIT_CAP : (da)->capacity*2; \
112 | (da)->items = cast_ptr((da)->items)arena_realloc( \
113 | (a), (da)->items, \
114 | (da)->capacity*sizeof(*(da)->items), \
115 | new_capacity*sizeof(*(da)->items)); \
116 | (da)->capacity = new_capacity; \
117 | } \
118 | \
119 | (da)->items[(da)->count++] = (item); \
120 | } while (0)
121 |
122 | // Append several items to a dynamic array
123 | #define arena_da_append_many(a, da, new_items, new_items_count) \
124 | do { \
125 | if ((da)->count + (new_items_count) > (da)->capacity) { \
126 | size_t new_capacity = (da)->capacity; \
127 | if (new_capacity == 0) new_capacity = ARENA_DA_INIT_CAP; \
128 | while ((da)->count + (new_items_count) > new_capacity) new_capacity *= 2; \
129 | (da)->items = cast_ptr((da)->items)arena_realloc( \
130 | (a), (da)->items, \
131 | (da)->capacity*sizeof(*(da)->items), \
132 | new_capacity*sizeof(*(da)->items)); \
133 | (da)->capacity = new_capacity; \
134 | } \
135 | arena_memcpy((da)->items + (da)->count, (new_items), (new_items_count)*sizeof(*(da)->items)); \
136 | (da)->count += (new_items_count); \
137 | } while (0)
138 |
139 | // Append a sized buffer to a string builder
140 | #define arena_sb_append_buf arena_da_append_many
141 |
142 | // Append a NULL-terminated string to a string builder
143 | #define arena_sb_append_cstr(a, sb, cstr) \
144 | do { \
145 | const char *s = (cstr); \
146 | size_t n = arena_strlen(s); \
147 | arena_da_append_many(a, sb, s, n); \
148 | } while (0)
149 |
150 | // Append a single NULL character at the end of a string builder. So then you can
151 | // use it a NULL-terminated C string
152 | #define arena_sb_append_null(a, sb) arena_da_append(a, sb, 0)
153 |
154 | #endif // ARENA_H_
155 |
156 | #ifdef ARENA_IMPLEMENTATION
157 |
158 | #if ARENA_BACKEND == ARENA_BACKEND_LIBC_MALLOC
159 | #include
160 |
161 | // TODO: instead of accepting specific capacity new_region() should accept the size of the object we want to fit into the region
162 | // It should be up to new_region() to decide the actual capacity to allocate
163 | Region *new_region(size_t capacity)
164 | {
165 | size_t size_bytes = sizeof(Region) + sizeof(uintptr_t)*capacity;
166 | // TODO: it would be nice if we could guarantee that the regions are allocated by ARENA_BACKEND_LIBC_MALLOC are page aligned
167 | Region *r = (Region*)malloc(size_bytes);
168 | ARENA_ASSERT(r); // TODO: since ARENA_ASSERT is disableable go through all the places where we use it to check for failed memory allocation and return with NULL there.
169 | r->next = NULL;
170 | r->count = 0;
171 | r->capacity = capacity;
172 | return r;
173 | }
174 |
175 | void free_region(Region *r)
176 | {
177 | free(r);
178 | }
179 | #elif ARENA_BACKEND == ARENA_BACKEND_LINUX_MMAP
180 | #include
181 | #include
182 |
183 | Region *new_region(size_t capacity)
184 | {
185 | size_t size_bytes = sizeof(Region) + sizeof(uintptr_t) * capacity;
186 | Region *r = mmap(NULL, size_bytes, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
187 | ARENA_ASSERT(r != MAP_FAILED);
188 | r->next = NULL;
189 | r->count = 0;
190 | r->capacity = capacity;
191 | return r;
192 | }
193 |
194 | void free_region(Region *r)
195 | {
196 | size_t size_bytes = sizeof(Region) + sizeof(uintptr_t) * r->capacity;
197 | int ret = munmap(r, size_bytes);
198 | ARENA_ASSERT(ret == 0);
199 | }
200 |
201 | #elif ARENA_BACKEND == ARENA_BACKEND_WIN32_VIRTUALALLOC
202 |
203 | #if !defined(_WIN32)
204 | # error "Current platform is not Windows"
205 | #endif
206 |
207 | #define WIN32_LEAN_AND_MEAN
208 | #include
209 |
210 | #define INV_HANDLE(x) (((x) == NULL) || ((x) == INVALID_HANDLE_VALUE))
211 |
212 | Region *new_region(size_t capacity)
213 | {
214 | SIZE_T size_bytes = sizeof(Region) + sizeof(uintptr_t) * capacity;
215 | Region *r = VirtualAllocEx(
216 | GetCurrentProcess(), /* Allocate in current process address space */
217 | NULL, /* Unknown position */
218 | size_bytes, /* Bytes to allocate */
219 | MEM_COMMIT | MEM_RESERVE, /* Reserve and commit allocated page */
220 | PAGE_READWRITE /* Permissions ( Read/Write )*/
221 | );
222 | if (INV_HANDLE(r))
223 | ARENA_ASSERT(0 && "VirtualAllocEx() failed.");
224 |
225 | r->next = NULL;
226 | r->count = 0;
227 | r->capacity = capacity;
228 | return r;
229 | }
230 |
231 | void free_region(Region *r)
232 | {
233 | if (INV_HANDLE(r))
234 | return;
235 |
236 | BOOL free_result = VirtualFreeEx(
237 | GetCurrentProcess(), /* Deallocate from current process address space */
238 | (LPVOID)r, /* Address to deallocate */
239 | 0, /* Bytes to deallocate ( Unknown, deallocate entire page ) */
240 | MEM_RELEASE /* Release the page ( And implicitly decommit it ) */
241 | );
242 |
243 | if (FALSE == free_result)
244 | ARENA_ASSERT(0 && "VirtualFreeEx() failed.");
245 | }
246 |
247 | #elif ARENA_BACKEND == ARENA_BACKEND_WASM_HEAPBASE
248 |
249 | // Stolen from https://surma.dev/things/c-to-webassembly/
250 |
251 | extern unsigned char __heap_base;
252 | // Since ARENA_BACKEND_WASM_HEAPBASE entirely hijacks __heap_base it is expected that no other means of memory
253 | // allocation are used except the arenas.
254 | unsigned char* bump_pointer = &__heap_base;
255 | // TODO: provide a way to deallocate all the arenas at once by setting bump_pointer back to &__heap_base?
256 |
257 | // __builtin_wasm_memory_size and __builtin_wasm_memory_grow are defined in units of page sizes
258 | #define ARENA_WASM_PAGE_SIZE (64*1024)
259 |
260 | Region *new_region(size_t capacity)
261 | {
262 | size_t size_bytes = sizeof(Region) + sizeof(uintptr_t)*capacity;
263 | Region *r = (void*)bump_pointer;
264 |
265 | // grow memory brk() style
266 | size_t current_memory_size = ARENA_WASM_PAGE_SIZE * __builtin_wasm_memory_size(0);
267 | size_t desired_memory_size = (size_t) bump_pointer;
268 | if (desired_memory_size > current_memory_size) {
269 | size_t delta_bytes = desired_memory_size - current_memory_size;
270 | size_t delta_pages = (delta_bytes + (ARENA_WASM_PAGE_SIZE - 1))/ARENA_WASM_PAGE_SIZE;
271 | if (__builtin_wasm_memory_grow(0, delta_pages) < 0) {
272 | ARENA_ASSERT(0 && "memory.grow failed");
273 | return NULL;
274 | }
275 | }
276 |
277 | bump_pointer += size_bytes;
278 |
279 | r->next = NULL;
280 | r->count = 0;
281 | r->capacity = capacity;
282 | return r;
283 | }
284 |
285 | void free_region(Region *r)
286 | {
287 | // Since ARENA_BACKEND_WASM_HEAPBASE uses a primitive bump allocator to
288 | // allocate the regions, free_region() does nothing. It is generally
289 | // not recommended to free arenas anyway since it is better to keep
290 | // reusing already allocated memory with arena_reset().
291 | (void) r;
292 | }
293 |
294 | #else
295 | # error "Unknown Arena backend"
296 | #endif
297 |
298 | // TODO: add debug statistic collection mode for arena
299 | // Should collect things like:
300 | // - How many times new_region was called
301 | // - How many times existing region was skipped
302 | // - How many times allocation exceeded ARENA_REGION_DEFAULT_CAPACITY
303 |
304 | void *arena_alloc(Arena *a, size_t size_bytes)
305 | {
306 | size_t size = (size_bytes + sizeof(uintptr_t) - 1)/sizeof(uintptr_t);
307 |
308 | if (a->end == NULL) {
309 | ARENA_ASSERT(a->begin == NULL);
310 | size_t capacity = ARENA_REGION_DEFAULT_CAPACITY;
311 | if (capacity < size) capacity = size;
312 | a->end = new_region(capacity);
313 | a->begin = a->end;
314 | }
315 |
316 | while (a->end->count + size > a->end->capacity && a->end->next != NULL) {
317 | a->end = a->end->next;
318 | }
319 |
320 | if (a->end->count + size > a->end->capacity) {
321 | ARENA_ASSERT(a->end->next == NULL);
322 | size_t capacity = ARENA_REGION_DEFAULT_CAPACITY;
323 | if (capacity < size) capacity = size;
324 | a->end->next = new_region(capacity);
325 | a->end = a->end->next;
326 | }
327 |
328 | void *result = &a->end->data[a->end->count];
329 | a->end->count += size;
330 | return result;
331 | }
332 |
333 | void *arena_realloc(Arena *a, void *oldptr, size_t oldsz, size_t newsz)
334 | {
335 | if (newsz <= oldsz) return oldptr;
336 | void *newptr = arena_alloc(a, newsz);
337 | char *newptr_char = (char*)newptr;
338 | char *oldptr_char = (char*)oldptr;
339 | for (size_t i = 0; i < oldsz; ++i) {
340 | newptr_char[i] = oldptr_char[i];
341 | }
342 | return newptr;
343 | }
344 |
345 | size_t arena_strlen(const char *s)
346 | {
347 | size_t n = 0;
348 | while (*s++) n++;
349 | return n;
350 | }
351 |
352 | void *arena_memcpy(void *dest, const void *src, size_t n)
353 | {
354 | char *d = dest;
355 | const char *s = src;
356 | for (; n; n--) *d++ = *s++;
357 | return dest;
358 | }
359 |
360 | char *arena_strdup(Arena *a, const char *cstr)
361 | {
362 | size_t n = arena_strlen(cstr);
363 | char *dup = (char*)arena_alloc(a, n + 1);
364 | arena_memcpy(dup, cstr, n);
365 | dup[n] = '\0';
366 | return dup;
367 | }
368 |
369 | void *arena_memdup(Arena *a, void *data, size_t size)
370 | {
371 | return arena_memcpy(arena_alloc(a, size), data, size);
372 | }
373 |
374 | #ifndef ARENA_NOSTDIO
375 | char *arena_sprintf(Arena *a, const char *format, ...)
376 | {
377 | va_list args;
378 | va_start(args, format);
379 | int n = vsnprintf(NULL, 0, format, args);
380 | va_end(args);
381 |
382 | ARENA_ASSERT(n >= 0);
383 | char *result = (char*)arena_alloc(a, n + 1);
384 | va_start(args, format);
385 | vsnprintf(result, n + 1, format, args);
386 | va_end(args);
387 |
388 | return result;
389 | }
390 | #endif // ARENA_NOSTDIO
391 |
392 | Arena_Mark arena_snapshot(Arena *a)
393 | {
394 | Arena_Mark m;
395 | if(a->end == NULL){ //snapshot of uninitialized arena
396 | ARENA_ASSERT(a->begin == NULL);
397 | m.region = a->end;
398 | m.count = 0;
399 | }else{
400 | m.region = a->end;
401 | m.count = a->end->count;
402 | }
403 |
404 | return m;
405 | }
406 |
407 | void arena_reset(Arena *a)
408 | {
409 | for (Region *r = a->begin; r != NULL; r = r->next) {
410 | r->count = 0;
411 | }
412 |
413 | a->end = a->begin;
414 | }
415 |
416 | void arena_rewind(Arena *a, Arena_Mark m)
417 | {
418 | if(m.region == NULL){ //snapshot of uninitialized arena
419 | arena_reset(a); //leave allocation
420 | return;
421 | }
422 |
423 | m.region->count = m.count;
424 | for (Region *r = m.region->next; r != NULL; r = r->next) {
425 | r->count = 0;
426 | }
427 |
428 | a->end = m.region;
429 | }
430 |
431 | void arena_free(Arena *a)
432 | {
433 | Region *r = a->begin;
434 | while (r) {
435 | Region *r0 = r;
436 | r = r->next;
437 | free_region(r0);
438 | }
439 | a->begin = NULL;
440 | a->end = NULL;
441 | }
442 |
443 | void arena_trim(Arena *a){
444 | Region *r = a->end->next;
445 | while (r) {
446 | Region *r0 = r;
447 | r = r->next;
448 | free_region(r0);
449 | }
450 | a->end->next = NULL;
451 | }
452 |
453 | #endif // ARENA_IMPLEMENTATION
454 |
--------------------------------------------------------------------------------
/include/lexer.h:
--------------------------------------------------------------------------------
1 | /* Header for ../src/lexer.h, the lexer for the textual IR for UYB.
2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */
3 | #pragma once
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | typedef enum {
10 | TokLabel, // %labelname
11 | TokRawStr, // value
12 | TokStrLit, // "value"
13 | TokBlockLabel,
14 | TokInteger,
15 | TokAssign, // =t (with t being the type, stored in val (types defined in api.h))
16 | TokEqu, // just =, no type
17 | TokLBrace, TokRBrace,
18 | TokLParen, TokRParen,
19 | TokColon,
20 | TokBar,
21 | TokComma,
22 | TokNewLine,
23 | TokTripleDot,
24 | TokAggType,
25 | TokFunction, TokExport, TokData, TokSection, TokAlign, TokType, TokFile, // keywords
26 | } TokenType;
27 |
28 | typedef struct {
29 | size_t line;
30 | TokenType type;
31 | uint64_t val;
32 | } Token;
33 |
34 | void lex_line(char *str, size_t line_num, Token **ret);
35 | Token **lex_file(FILE *f);
36 | char *token_to_str(TokenType ttype);
37 | Type char_to_type(char t_ch);
38 |
--------------------------------------------------------------------------------
/include/optimisation.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 |
5 | typedef struct {
6 | char *label;
7 | size_t val;
8 | ValType type;
9 | } CopyVal;
10 |
11 | void optimise(Function *IR, size_t num_functions);
12 |
13 | /* Specific optimisations */
14 | void opt_fold(Function *IR, size_t num_functions);
15 | void opt_copy_elim(Function *IR, size_t num_functions);
16 | void opt_unused_label_elim(Function *IR, size_t num_functions);
17 |
--------------------------------------------------------------------------------
/include/parser.h:
--------------------------------------------------------------------------------
1 | /* Header for ../src/parser.c, the parser for the textual IR for UYB.
2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */
3 | #pragma once
4 | #include
5 |
6 | Function **parse_program(Token **toks, Global ***globals_buf, AggregateType ***aggtypes_buf, FileDbg ***filesdbg_buf);
7 |
--------------------------------------------------------------------------------
/include/strslice.h:
--------------------------------------------------------------------------------
1 | /* Header file for string slice implementation for UYB.
2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */
3 | #pragma once
4 | #include
5 |
6 | typedef struct {
7 | char *data;
8 | size_t len;
9 | } String;
10 |
11 | String *string_from(char *from);
12 | void string_push(String *str, char *new);
13 | void string_push_fmt(String *str, char *fmt, ...);
14 |
--------------------------------------------------------------------------------
/include/target/x86_64/register.h:
--------------------------------------------------------------------------------
1 | /* Header file for ../src/register.c, the register allocator for the UYB compiler backend.
2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */
3 | #pragma once
4 | #include
5 | #include
6 |
7 | #define update_regalloc() regalloc.statement_idx++
8 |
9 | static char *arg_regs[] = {
10 | "%rdi",
11 | "%rsi",
12 | "%rdx",
13 | "%rcx",
14 | "%r8",
15 | "%r9",
16 | };
17 |
18 | typedef struct {
19 | size_t bytes_rip_pad;
20 | char* **used_regs_vec;
21 | Function *current_fn;
22 | size_t statement_idx;
23 | size_t* **labels_as_offsets;
24 | } RegAlloc;
25 |
26 | extern RegAlloc regalloc;
27 |
28 | extern char *label_reg_tab[5][3];
29 | extern intptr_t reg_alloc_tab[5][3];
30 | void reg_init_fn(Function func);
31 | char *reg_alloc(char *label, Type reg_size);
32 | char *label_to_reg(size_t offset, char *label, bool allow_noexist);
33 | char *reg_as_size(char *reg, Type size);
34 | Type size_from_reg(char *reg);
35 | char *label_to_reg_noresize(size_t offset, char *label, bool allow_noexist);
36 | char *reg_alloc_noresize(char *label, Type reg_size);
37 | Type get_reg_size(char *reg, char *expected_label);
38 |
--------------------------------------------------------------------------------
/include/utils.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 | #include
5 |
6 | char size_as_char(Type type);
7 | char *get_full_char_str(bool is_struct, Type type, char *type_struct);
8 | int find_copyval(CopyVal **copyvals, char *label, CopyVal *val_buf);
9 | int find_sizet_in_copyvals(CopyVal **copyvals, char *label, size_t *val_buf);
10 | AggregateType *find_aggtype(char *name, AggregateType *aggtypes, size_t num_aggtypes);
11 | char *read_full_stdin();
12 |
--------------------------------------------------------------------------------
/include/vector.h:
--------------------------------------------------------------------------------
1 | /* Part of vector implementationf for UYB compiler backend project, see ../src/vector.c for the
2 | * rest of the code.
3 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under the MPL2.0 license, see /LICENSE for more information. */
4 | #pragma once
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | typedef struct {
11 | size_t len;
12 | size_t capacity;
13 | size_t data_size;
14 | void *data;
15 | } __attribute__((packed)) Vec;
16 |
17 | void *vec_new(size_t data_size);
18 | size_t vec_size(void *vec_data);
19 | int vec_contains(void *vec_data, size_t val);
20 |
21 | #define vec_push(vec_data, val) \
22 | do { \
23 | Vec *vec_internal = (Vec*) ((uintptr_t) vec_data - (sizeof(Vec) - sizeof(void*))); \
24 | ((typeof(val)*) vec_internal->data)[vec_internal->len] = val; \
25 | vec_internal->len++; \
26 | if (vec_internal->capacity == vec_internal->len) { \
27 | vec_internal->data = realloc(vec_internal->data, (vec_internal->len + 1) * sizeof(val) * 2); \
28 | vec_internal->capacity *= 2; \
29 | } \
30 | } while (0)
31 |
32 | /* Usage of this header:
33 | * - To create a new vector, use vec_new():
34 | * data_type **vec = vec_new(sizeof(data_type));
35 | * (replace `data_type` with the type that the vector is for, for example uint64_t)
36 | * - To append an element to a vector, use vec_push():
37 | * vec_push(vec, new_value);
38 | * - To access elements of the vector, including writing/reading specific elements, access it like a normal array but dereference vec:
39 | * value = (*vec)[8];
40 | * - To get the length of a vector, use vec_size():
41 | * length_of_vector = vec_size(vec);
42 | */
43 |
--------------------------------------------------------------------------------
/out.:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnmappedStack/UYB/fd288c4c4695d5682e1df006863ed68d44b2102e/out.
--------------------------------------------------------------------------------
/src/lexer.c:
--------------------------------------------------------------------------------
1 | /* Textual IR lexer for the UYB compiler backend project.
2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | #define valid_label_char(ch) (ch == '.' || ch == '_' || isdigit(ch) || isalpha(ch))
11 |
12 | Type char_to_type(char t_ch) {
13 | if (t_ch == 'b') return Bits8;
14 | else if (t_ch == 'h') return Bits16;
15 | else if (t_ch == 'w') return Bits32;
16 | else if (t_ch == 'l') return Bits64;
17 | else {
18 | printf("Invalid type: %c\n", t_ch);
19 | exit(1);
20 | }
21 | }
22 |
23 | char *token_to_str(TokenType ttype) {
24 | if (ttype == TokFunction) return "TokFunction";
25 | else if (ttype == TokExport) return "TokExport";
26 | else if (ttype == TokNewLine) return "TokNewLine";
27 | else if (ttype == TokLabel) return "TokLabel";
28 | else if (ttype == TokStrLit) return "TokStrLit";
29 | else if (ttype == TokRawStr) return "TokRawStr";
30 | else if (ttype == TokInteger) return "TokInteger";
31 | else if (ttype == TokLabel) return "TokLabel";
32 | else if (ttype == TokLParen) return "TokLParen";
33 | else if (ttype == TokRParen) return "TokRParen";
34 | else if (ttype == TokLBrace) return "TokLBrace";
35 | else if (ttype == TokRBrace) return "TokRBrace";
36 | else if (ttype == TokData) return "TokData";
37 | else if (ttype == TokSection) return "TokSection";
38 | else if (ttype == TokBlockLabel) return "TokBlkLbl";
39 | else if (ttype == TokTripleDot) return "TokTripleDot";
40 | else if (ttype == TokAlign) return "TokAlign";
41 | else if (ttype == TokAggType) return "TokAggType";
42 | else if (ttype == TokType) return "TokType";
43 | else if (ttype == TokComma) return "TokComma";
44 | else if (ttype == TokColon) return "TokColon";
45 | else if (ttype == TokBar) return "TokBar";
46 | else return "TokInvalid";
47 | }
48 |
49 | // `ret` argument is a buffer for a vector which all the tokens will be pushed to.
50 | void lex_line(char *str, size_t line_num, Token **ret) {
51 | size_t len = strlen(str);
52 | for (size_t i = 0; i < len; i++) {
53 | if (str[i] == '\t' || str[i] == ' ' || str[i] == '\r' || str[i] == 0) continue;
54 | else if (str[i] == '#') break;
55 | else if (str[i] == '(') vec_push(ret, ((Token) {.line=line_num,.type=TokLParen,.val=0}));
56 | else if (str[i] == ')') vec_push(ret, ((Token) {.line=line_num,.type=TokRParen,.val=0}));
57 | else if (str[i] == '{') vec_push(ret, ((Token) {.line=line_num,.type=TokLBrace,.val=0}));
58 | else if (str[i] == '}') vec_push(ret, ((Token) {.line=line_num,.type=TokRBrace,.val=0}));
59 | else if (str[i] == ',') vec_push(ret, ((Token) {.line=line_num,.type=TokComma,.val=0}));
60 | else if (str[i] == ':') vec_push(ret, ((Token) {.line=line_num,.type=TokColon,.val=0}));
61 | else if (str[i] == '|') vec_push(ret, ((Token) {.line=line_num,.type=TokBar,.val=0}));
62 | else if (!memcmp(&str[i], "...", 3)) {
63 | vec_push(ret, ((Token) {.line=line_num,.type=TokTripleDot,.val=0}));
64 | i += 2;
65 | }
66 | else if (str[i] == '=' && isalpha(str[i + 1])) {
67 | vec_push(ret, ((Token) {.line=line_num,.type=TokAssign,.val=char_to_type(str[i+1])}));
68 | i++;
69 | } else if (str[i] == '=') {
70 | vec_push(ret, ((Token) {.line=line_num,.type=TokEqu,.val=0}));
71 | } else if (isdigit(str[i]) || str[i] == '-') {
72 | size_t dig = 0;
73 | for (; isdigit(str[i + dig]) || (str[i + dig] == '-' && dig == 0); dig++);
74 | char *buf = aalloc(dig + 1); // perhaps I should move this to a fixed size buffer?
75 | memcpy(buf, &str[i], dig);
76 | buf[dig] = 0;
77 | int negative_flag = 0;
78 | if (str[i] == '-') {
79 | negative_flag = true;
80 | buf++;
81 | }
82 | uint64_t val = strtoll(buf,NULL,10);
83 | if (negative_flag) {
84 | val = -val;
85 | }
86 | vec_push(ret, ((Token) {.line=line_num,.type=TokInteger,.val=val}));
87 | i += dig - 1;
88 | } else if (str[i] == '"') {
89 | size_t dig = 0;
90 | for (; !(str[i + dig] == '"' && dig); dig++);
91 | char *buf = aalloc(dig + 1);
92 | memcpy(buf, &str[i + 1], dig);
93 | buf[dig - 1] = 0;
94 | vec_push(ret, ((Token) {.line=line_num,.type=TokStrLit,.val=(uint64_t) buf}));
95 | i += dig;
96 | } else if (str[i] == '%' || str[i] == '$' || str[i] == '@' || str[i] == ':') {
97 | i++;
98 | size_t dig = 0;
99 | for (; valid_label_char(str[i + dig]); dig++);
100 | char *buf = aalloc(dig + 2);
101 | memcpy(buf, &str[i], dig + 1);
102 | buf[dig] = 0;
103 | if (str[i - 1] == '%')
104 | vec_push(ret, ((Token) {.line=line_num,.type=TokLabel,.val=(uint64_t) buf}));
105 | else if (str[i - 1] == '$')
106 | vec_push(ret, ((Token) {.line=line_num,.type=TokRawStr,.val=(uint64_t) buf}));
107 | else if (str[i - 1] == '@')
108 | vec_push(ret, ((Token) {.line=line_num,.type=TokBlockLabel,.val=(uint64_t) buf}));
109 | else if (str[i - 1] == ':')
110 | vec_push(ret, ((Token) {.line=line_num,.type=TokAggType,.val=(uint64_t) buf}));
111 | i += dig - 1;
112 | } else if (valid_label_char(str[i])) {
113 | size_t dig = 0;
114 | for (; valid_label_char(str[i + dig]); dig++);
115 | char *buf = aalloc(dig + 1);
116 | memcpy(buf, &str[i], dig);
117 | buf[dig] = 0;
118 | if (!strcmp(buf, "function")) {
119 | vec_push(ret, ((Token) {.line=line_num,.type=TokFunction,.val=0}));
120 | } else if (!strcmp(buf, "export")) {
121 | vec_push(ret, ((Token) {.line=line_num,.type=TokExport,.val=0}));
122 | } else if (!strcmp(buf, "data")) {
123 | vec_push(ret, ((Token) {.line=line_num,.type=TokData,.val=0}));
124 | } else if (!strcmp(buf, "section")) {
125 | vec_push(ret, ((Token) {.line=line_num,.type=TokSection,.val=0}));
126 | } else if (!strcmp(buf, "align")) {
127 | vec_push(ret, ((Token) {.line=line_num,.type=TokAlign,.val=0}));
128 | } else if (!strcmp(buf, "type")) {
129 | vec_push(ret, ((Token) {.line=line_num,.type=TokType,.val=0}));
130 | } else if (!strcmp(buf, ".file")) {
131 | vec_push(ret, ((Token) {.line=line_num,.type=TokFile,.val=0}));
132 | } else {
133 | vec_push(ret, ((Token) {.line=line_num,.type=TokRawStr,.val=(uint64_t) buf}));
134 | }
135 | i += dig - 1;
136 | } else {
137 | printf("Invalid token on line %zu: %c (%u)\n", line_num, str[i], str[i]);
138 | exit(1);
139 | }
140 | }
141 | }
142 |
143 | Token **lex_file(FILE *f) {
144 | ssize_t sz;
145 | char *contents;
146 | Token **ret = vec_new(sizeof(Token));
147 | size_t ln = 1;
148 | size_t start = 0;
149 | size_t end = 0;
150 | if (f == stdin) {
151 | contents = read_full_stdin();
152 | sz = strlen(contents);
153 | goto end_readfile;
154 | }
155 | fseek(f, 0, SEEK_END);
156 | if ((sz = ftell(f)) < 0) {
157 | printf("Failed to get file length (ftell error).\n");
158 | exit(1);
159 | }
160 | fseek(f, 0, SEEK_SET);
161 | contents = aalloc(sz + 1);
162 | if (!fread(contents, sz, 1, f)) {
163 | printf("Failed to read from file.\n");
164 | exit(1);
165 | }
166 | end_readfile:
167 | for (; end <= sz; end++) {
168 | if (contents[end] == '\n') {
169 | contents[end] = 0;
170 | lex_line(&contents[start], ln, ret);
171 | vec_push(ret, ((Token) {.line=ln,.type=TokNewLine,.val=0}));
172 | start = end + 1;
173 | ln++;
174 | }
175 | }
176 | if (f == stdin) free(contents);
177 | return ret;
178 | }
179 |
--------------------------------------------------------------------------------
/src/main.c:
--------------------------------------------------------------------------------
1 | /* Main file of UYB for parsing command line arguments and calling the rest of the compiler.
2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */
3 | #define ARENA_IMPLEMENTATION
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 |
16 | Arena arena;
17 | int is_position_independent = 1;
18 |
19 | typedef enum {
20 | X86_64,
21 | IR,
22 | } Target;
23 |
24 | void (*targets[])(Function*, size_t, Global*, size_t, AggregateType*, size_t, FileDbg*, size_t, FILE*) = {
25 | build_program_x86_64,
26 | build_program_IR,
27 | };
28 |
29 | void help(char *cmd) {
30 | printf("%s [options] \n", cmd);
31 | printf("Options:\n"
32 | " --help Display this information.\n"
33 | " --version Check the version of this copy of UYB.\n"
34 | " --targets List targets supported by UYB which the IR can be compiled to.\n"
35 | " --no-pie Ensure that the generated program is not position independent.\n"
36 | " -o Specify that the resulting assembly should be outputted to .\n"
37 | " -t Specify that assembly should be generated specifically for .\n");
38 | }
39 |
40 | void targets_help() {
41 | printf("Use `-t ` to specify a target. Supported targets:\n");
42 | printf(" - x86_64\n"
43 | " - IR\n");
44 | }
45 |
46 | Target str_as_target(char *cmd, char *s) {
47 | if (!strcmp(s, "x86_64")) return X86_64;
48 | else if (!strcmp(s, "IR")) return IR;
49 | else {
50 | printf("No such target: %s. To list all targets, run:\n"
51 | "%s --targets\n", s, cmd);
52 | exit(1);
53 | }
54 | }
55 |
56 | void sigsegv_handler(int sig, siginfo_t *si, void *unused) {
57 | printf(":( Something went very wrong and UYB cannot continue (segmentation fault).\n\n"
58 | "Please report an issue for the bug on the GitHub repository (https://github.com/UnmappedStack) and describe what you did that caused this.\n"
59 | "Signal: %d, address: %p\n", sig, si->si_addr);
60 | exit(1);
61 | }
62 |
63 | void setup_sigsev() {
64 | struct sigaction sa;
65 | sa.sa_flags = SA_SIGINFO;
66 | sa.sa_sigaction = sigsegv_handler;
67 | sigaction(SIGSEGV, &sa, NULL);
68 | }
69 |
70 | int main(int argc, char **argv) {
71 | setup_sigsev();
72 | char *input_fname = NULL;
73 | char *output_fname = NULL;
74 | Target target = X86_64;
75 | for (size_t arg = 1; arg < argc; arg++) {
76 | if (argv[arg][0] != '-') {
77 | if (input_fname) {
78 | printf("More than one input file passed, not allowed.\n");
79 | return 1;
80 | }
81 | input_fname = argv[arg];
82 | continue;
83 | }
84 | if (argv[arg][1] == '-') argv[arg]++;
85 | if (!strcmp(argv[arg], "-o")) {
86 | if (output_fname) {
87 | printf("Output file provided more than once, not allowed.\n");
88 | return 1;
89 | }
90 | if (arg == argc - 1) {
91 | printf("Output file was expected to be provided after -o, got end of command instead.\n");
92 | return 1;
93 | }
94 | output_fname = argv[arg + 1];
95 | arg++;
96 | continue;
97 | } else if (!strcmp(argv[arg], "-t")) {
98 | if (argc == argc - 1) {
99 | printf("Target was expected to be provided after -t, got end of command instead.\n");
100 | return 1;
101 | }
102 | target = str_as_target(argv[0], argv[arg + 1]);
103 | arg++;
104 | } else if (!strcmp(argv[arg], "-targets")) {
105 | targets_help();
106 | return 0;
107 | } else if (!strcmp(argv[arg], "-no-pie")) {
108 | is_position_independent = 0;
109 | } else if (!strcmp(argv[arg], "-version")) {
110 | printf("UYB compiler backend version beta %s.\n"
111 | "Copyright (C) 2025 UnmappedStack (Jake Steinburger) under the Mozilla Public License 2.0.\n", COMMIT);
112 | return 0;
113 | } else if (!strcmp(argv[arg], "-help")) {
114 | help(argv[0]);
115 | return 0;
116 | } else {
117 | printf("Invalid argument: %s\n", argv[arg]);
118 | help(argv[0]);
119 | }
120 | }
121 | FILE *inf = stdin;
122 | if (input_fname) {
123 | inf = fopen(input_fname, "r");
124 | if (!inf) {
125 | printf("Failed to open %s\n", input_fname);
126 | return 1;
127 | }
128 | }
129 | Token **toks = lex_file(inf);
130 | fclose(inf);
131 | Global **globals;
132 | AggregateType **aggs;
133 | FileDbg **files_dbg;
134 | Function **functs = parse_program(toks, &globals, &aggs, &files_dbg);
135 | FILE *outf = stdout;
136 | if (output_fname) {
137 | outf = fopen(output_fname, "w");
138 | if (!outf) {
139 | printf("Failed to open out.S\n");
140 | exit(1);
141 | }
142 | }
143 | size_t num_functions = vec_size(functs);
144 | optimise(*functs, num_functions);
145 | // Assembly codegen
146 | targets[target](*functs, num_functions, *globals, vec_size(globals), *aggs, vec_size(aggs), *files_dbg, vec_size(files_dbg), outf);
147 | fclose(outf);
148 | delete_arenas();
149 | return 0;
150 | }
151 |
--------------------------------------------------------------------------------
/src/optimise/copyelim.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | void copy_elim_funct(Function *IR) {
8 | CopyVal val;
9 | Statement **statement_vec = vec_new(sizeof(Statement));
10 | CopyVal **copyvals = vec_new(sizeof(Statement));
11 | for (size_t s = 0; s < IR->num_statements; s++) {
12 | if (IR->statements[s].instruction == COPY) {
13 | vec_push(copyvals, ((CopyVal) {
14 | .label = IR->statements[s].label,
15 | .val = IR->statements[s].vals[0],
16 | .type = IR->statements[s].val_types[0],
17 | }));
18 | } else {
19 | if (IR->statements[s].instruction == CALL) {
20 | FunctionArgList *args = (FunctionArgList*) IR->statements[s].vals[1];
21 | for (size_t a = 0; a < args->num_args; a++) {
22 | if (args->arg_types[a] != Label) continue;
23 | if (!find_copyval(copyvals, (char*) args->args[a], &val)) continue;
24 | args->args[a] = (char*) val.val;
25 | args->arg_types[a] = val.type;
26 | }
27 | goto statement_end;
28 | } else if (IR->statements[s].instruction == ASM) {
29 | InlineAsm *info = (InlineAsm*) IR->statements[s].vals[0];
30 | for (size_t i = 0; i < vec_size(info->inputs_vec); i++) {
31 | if (!find_copyval(copyvals, (char*) (*info->inputs_vec)[i].label, &val)) continue;
32 | (*info->inputs_vec)[i].label = (char*) val.val;
33 | (*info->inputs_vec)[i].type = val.type;
34 | }
35 | }
36 | for (size_t i = 0; i < 2; i++) {
37 | if (IR->statements[s].val_types[i] != Label) continue;
38 | if (!find_copyval(copyvals, (char*) IR->statements[s].vals[i], &val)) continue;
39 | IR->statements[s].val_types[i] = val.type;
40 | IR->statements[s].vals[i] = val.val;
41 | }
42 | statement_end:
43 | vec_push(statement_vec, IR->statements[s]);
44 | }
45 | }
46 | IR->statements = *statement_vec;
47 | IR->num_statements = vec_size(statement_vec);
48 | }
49 |
50 | void opt_copy_elim(Function *IR, size_t num_functions) {
51 | for (size_t fn = 0; fn < num_functions; fn++) {
52 | copy_elim_funct(&IR[fn]);
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/optimise/folding.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | size_t get_val(ValType type, size_t val, size_t label_val) {
7 | if (type == Number) return val;
8 | else if (type == Label) return label_val;
9 | else return 0;
10 | }
11 |
12 | void fold_funct(Function *fn) {
13 | CopyVal **copyvals = vec_new(sizeof(CopyVal));
14 | for (size_t s = 0; s < fn->num_statements; s++) {
15 | ValType *valtypes = fn->statements[s].val_types;
16 | size_t *vals = fn->statements[s].vals;
17 | Instruction instr = fn->statements[s].instruction;
18 | // If it's a COPY, save the value
19 | if (instr == COPY && valtypes[0] == Number) {
20 | vec_push(copyvals, ((CopyVal) {
21 | .label = fn->statements[s].label,
22 | .val = fn->statements[s].vals[0],
23 | }));
24 | continue;
25 | }
26 | size_t in_vals[2];
27 | if ((valtypes[0] == Str || valtypes[0] == BlkLbl || (valtypes[0] != Number && !(valtypes[0] == Label && find_sizet_in_copyvals(copyvals, (char*) vals[0], &in_vals[0]))) ||
28 | valtypes[1] == Str || valtypes[1] == BlkLbl || (valtypes[1] != Number && !(valtypes[1] == Label && find_sizet_in_copyvals(copyvals, (char*) vals[1], &in_vals[1])))) && valtypes[1] != Empty) {
29 | // it can't constant fold it if the values can't be found at compile time
30 | continue;
31 | }
32 | // Now solve for the value and replace it with a COPY.
33 | size_t params[] = {get_val(valtypes[0], vals[0], in_vals[0]), get_val(valtypes[1], vals[1], in_vals[1])};
34 | if (instr == ADD) {
35 | fn->statements[s].vals[0] = params[0] + params[1];
36 | } else if (instr == MUL) {
37 | fn->statements[s].vals[0] = params[0] * params[1];
38 | } else if (instr == DIV) {
39 | fn->statements[s].vals[0] = params[0] / params[1];
40 | } else if (instr == SUB) {
41 | fn->statements[s].vals[0] = params[0] - params[1];
42 | } else if (instr == SHL) {
43 | fn->statements[s].vals[0] = params[0] << params[1];
44 | } else if (instr == SHR) {
45 | fn->statements[s].vals[0] = params[0] >> params[1];
46 | } else if (instr == EQ) {
47 | fn->statements[s].vals[0] = params[0] == params[1];
48 | } else if (instr == NE) {
49 | fn->statements[s].vals[0] = params[0] != params[1];
50 | } else if (instr == OR) {
51 | fn->statements[s].vals[0] = params[0] | params[1];
52 | } else if (instr == AND) {
53 | fn->statements[s].vals[0] = params[0] & params[1];
54 | } else if (instr == XOR) {
55 | fn->statements[s].vals[0] = params[0] ^ params[1];
56 | } else if (instr == NEG) {
57 | fn->statements[s].vals[0] = -params[0];
58 | } else {
59 | continue;
60 | }
61 | fn->statements[s].instruction = COPY;
62 | fn->statements[s].val_types[0] = Number;
63 | fn->statements[s].val_types[1] = Empty;
64 | }
65 | }
66 |
67 | void opt_fold(Function *IR, size_t num_functions) {
68 | for (size_t fn = 0; fn < num_functions; fn++) {
69 | fold_funct(&IR[fn]);
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/src/optimise/optimisation.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | /* Takes a pointer to an array of Function structures and the number of functions in the IR.
4 | * Changes the statements in the given function to be more optimised. */
5 | void optimise(Function *IR, size_t num_functions) {
6 | /* Planned optimisations:
7 | * - Folding [DONE]
8 | * - Copy elimination [DONE]
9 | * - Unused label removal [DONE]
10 | * - Function inlining
11 | * - Loop unravelling(?) */
12 | opt_fold(IR, num_functions);
13 | opt_copy_elim(IR, num_functions);
14 | opt_unused_label_elim(IR, num_functions);
15 | }
16 |
--------------------------------------------------------------------------------
/src/optimise/unused_label_elim.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | void elim_unused_labels_fn(Function *IR) {
5 | char* **used_labels = vec_new(sizeof(char*));
6 | Statement **statement_vec = vec_new(sizeof(Statement));
7 | for (ssize_t s = IR->num_statements - 1; s >= 0; s--) {
8 | if (IR->statements[s].label && IR->statements[s].instruction != CALL) {
9 | for (size_t i = 0; i < vec_size(used_labels); i++) {
10 | if (!vec_contains(used_labels, (size_t) IR->statements[s].label)) continue;
11 | }
12 | }
13 | if (IR->statements[s].instruction == CALL) {
14 | FunctionArgList *args = (FunctionArgList*) IR->statements[s].vals[1];
15 | for (size_t a = 0; a < args->num_args; a++) {
16 | if (args->arg_types[a] == Label) vec_push(used_labels, args->args[a]);
17 | }
18 | } else {
19 | for (size_t i = 0; i < 3; i++) {
20 | if (IR->statements[s].val_types[i] == Label) vec_push(used_labels, IR->statements[s].vals[i]);
21 | }
22 | }
23 | vec_push(statement_vec, IR->statements[s]);
24 | }
25 | // reverse it cos the previous thing inserts statements backwards
26 | for (size_t i = 0; i < vec_size(statement_vec) / 2; i++) {
27 | Statement tmp = (*statement_vec)[i];
28 | (*statement_vec)[i] = (*statement_vec)[vec_size(statement_vec) - 1 - i];
29 | (*statement_vec)[vec_size(statement_vec) - 1 - i] = tmp;
30 | }
31 | IR->statements = *statement_vec;
32 | IR->num_statements = vec_size(statement_vec);
33 | }
34 |
35 | void opt_unused_label_elim(Function *IR, size_t num_functions) {
36 | for (size_t fn = 0; fn < num_functions; fn++) {
37 | elim_unused_labels_fn(&IR[fn]);
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/parser.c:
--------------------------------------------------------------------------------
1 | /* Textual IR parser for the UYB compiler backend project.
2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 |
12 | // WARNING: Edits the original string
13 | void str_toupper(char* str) {
14 | while (*str) {
15 | *str = toupper(*str);
16 | str++;
17 | }
18 | }
19 |
20 | size_t bytes_from_size(Type sz) {
21 | switch (sz) {
22 | case Bits8: return 1;
23 | case Bits16: return 2;
24 | case Bits32: return 4;
25 | default: return 8;
26 | }
27 | }
28 |
29 | // really messy, there's probably a cleaner way to do this. Or at least, move it into another file.
30 | Instruction parse_instruction(char *instr, size_t line, Type *type) {
31 | str_toupper(instr);
32 | if (!strcmp(instr, "ADD" )) return ADD;
33 | else if (!strcmp(instr, "SUB" )) return SUB;
34 | else if (!strcmp(instr, "DIV" )) return DIV;
35 | else if (!strcmp(instr, "MUL" )) return MUL;
36 | else if (!strcmp(instr, "COPY" )) return COPY;
37 | else if (!strcmp(instr, "RET" )) return RET;
38 | else if (!strcmp(instr, "CALL" )) return CALL;
39 | else if (!strcmp(instr, "JZ" )) return JZ;
40 | else if (!strcmp(instr, "NEG" )) return NEG;
41 | else if (!strcmp(instr, "UDIV" )) return UDIV;
42 | else if (!memcmp(instr, "STORE", 5)) {
43 | if (strlen(instr) > 5)
44 | *type = char_to_type(tolower(instr[5]));
45 | return STORE;
46 | }
47 | else if (!memcmp(instr, "LOAD", 4)) {
48 | if (strlen(instr) > 5)
49 | *type = char_to_type(tolower(instr[5]));
50 | return LOAD;
51 | }
52 | else if (!strcmp(instr, "BLIT" )) return BLIT;
53 | else if (!strcmp(instr, "ALLOC" )) return ALLOC;
54 | else if (!memcmp(instr+1, "EQ", 2 )) return EQ;
55 | else if (!memcmp(instr+1, "NE", 2 )) return NE;
56 | else if (!memcmp(instr+1, "SGE", 3)) return SGE;
57 | else if (!memcmp(instr+1, "SGT", 3)) return SGT;
58 | else if (!memcmp(instr+1, "SLE", 3)) return SLE;
59 | else if (!memcmp(instr+1, "SLT", 3)) return SLT;
60 | else if (!memcmp(instr+1, "UGE", 3)) return UGE;
61 | else if (!memcmp(instr+1, "UGT", 3)) return UGT;
62 | else if (!memcmp(instr+1, "ULE", 3)) return ULE;
63 | else if (!memcmp(instr+1, "ULT", 3)) return ULT;
64 | else if (!memcmp(instr, "EXT", 3 )) return EXT;
65 | else if (!strcmp(instr, "HLT" )) return HLT;
66 | else if (!strcmp(instr, "BLKLBL" )) return BLKLBL;
67 | else if (!strcmp(instr, "JMP" )) return JMP;
68 | else if (!strcmp(instr, "JNZ" )) return JNZ;
69 | else if (!strcmp(instr, "SHL" )) return SHL;
70 | else if (!strcmp(instr, "SHR" )) return SHR;
71 | else if (!strcmp(instr, "OR" )) return OR;
72 | else if (!strcmp(instr, "AND" )) return AND;
73 | else if (!strcmp(instr, "PHI" )) return PHI;
74 | else if (!strcmp(instr, "VASTART" )) return VASTART;
75 | else if (!strcmp(instr, "VAARG" )) return VAARG;
76 | else if (!strcmp(instr, ".LOC" )) return LOC;
77 | else if (!strcmp(instr, "ASM" )) return ASM;
78 | else {
79 | printf("Invalid instruction on line %zu: %s\n", line, instr);
80 | exit(1);
81 | }
82 | }
83 |
84 | ValType tok_as_valtype(TokenType tok, size_t line) {
85 | if (tok == TokInteger) return Number;
86 | else if (tok == TokLabel) return Label;
87 | else if (tok == TokRawStr) return Str;
88 | else if (tok == TokBlockLabel) return BlkLbl;
89 | else if (tok == TokStrLit) return StrLit;
90 | else {
91 | printf("Token can't be converted to ValType: Invalid instruction value on line %zu\n", line);
92 | exit(1);
93 | }
94 | }
95 |
96 | void parse_statement_parameters(Token *toks, size_t at, Statement *ret) {
97 | size_t num_args = 0;
98 | size_t v = 0;
99 | for (size_t i = 0; v <= 3 && toks[at + i].type != TokNewLine; i++) {
100 | if (toks[at + i].type == TokComma) {
101 | continue;
102 | }
103 | ret->vals[v] = toks[at + i].val;
104 | ret->val_types[v] = tok_as_valtype(toks[at + i].type, toks[at + i].line);
105 | num_args++;
106 | v++;
107 | }
108 | for (size_t i = num_args; i < 3; i++) {
109 | ret->val_types[i] = Empty;
110 | }
111 | }
112 |
113 | void parse_phi_parameters(Token *toks, size_t at, Statement *ret) {
114 | if (toks[at].type != TokBlockLabel || toks[at + 3].type != TokBlockLabel) {
115 | printf("Phi instruction format is not correct, expected a block label on line %zu\n", toks->line);
116 | exit(1);
117 | }
118 | if (toks[at + 2].type != TokComma) {
119 | printf("Expected comma between phi node values on line %zu\n", toks->line);
120 | exit(1);
121 | }
122 | ret->vals[0] = (size_t) aalloc(sizeof(PhiVal));
123 | ret->vals[1] = (size_t) aalloc(sizeof(PhiVal));
124 | *((PhiVal*) ret->vals[0]) = (PhiVal) {
125 | .blklbl_name = (char*) toks[at].val,
126 | .val = toks[at + 1].val,
127 | .type = tok_as_valtype(toks[at + 1].type, toks[at + 1].line),
128 | };
129 | *((PhiVal*) ret->vals[1]) = (PhiVal) {
130 | .blklbl_name = (char*) toks[at + 3].val,
131 | .val = toks[at + 4].val,
132 | .type = tok_as_valtype(toks[at + 4].type, toks[at + 4].line),
133 | };
134 | ret->val_types[0] = PhiArg;
135 | ret->val_types[1] = PhiArg;
136 | ret->val_types[2] = Empty;
137 | }
138 |
139 | // returns number of tokens to skip
140 | size_t parse_asm_io(Token *toks, size_t at, InlineAsmIO ***io_vec_buf) {
141 | size_t at_start = at;
142 | at++;
143 | *io_vec_buf = vec_new(sizeof(InlineAsmIO));
144 | while (toks[at].type == TokLabel) {
145 | if (toks[at + 1].type != TokBar) {
146 | printf("Expected vertical bar (|) after label in I/O list for inline assembly on line %zu.\n", toks[at + 1].line);
147 | exit(1);
148 | }
149 | if (toks[at + 2].type != TokStrLit) {
150 | printf("Expected string literal referring to register in I/O list for inline assembly on line %zu.\n", toks[at + 2].line);
151 | exit(1);
152 | }
153 | vec_push(*io_vec_buf, ((InlineAsmIO) {
154 | .reg = (char*) toks[at + 2].val,
155 | .label = (char*) toks[at].val,
156 | .type = tok_as_valtype(toks[at].type, toks[at].line),
157 | }));
158 | at += 3;
159 | if (toks[at].type == TokComma) at++;
160 | }
161 | return at - at_start;
162 | }
163 |
164 | void parse_asm_clobbers(Token *toks, size_t at, char** **clobbers_buf_vec) {
165 | *clobbers_buf_vec = vec_new(sizeof(char*));
166 | at++;
167 | while (toks[at].type != TokRParen) {
168 | if (toks[at].type == TokComma) at++;
169 | else if (toks[at].type == TokStrLit)
170 | vec_push(*clobbers_buf_vec, (char*) toks[at++].val);
171 | else {
172 | printf("Invalid token in inline assembly clobber list, expected string literal or comma on line %zu.\n", toks[at].line);
173 | exit(1);
174 | }
175 | }
176 | }
177 |
178 | void parse_asm_parameters(Token *toks, size_t at, Statement *ret) {
179 | ret->val_types[0] = InlineAssembly;
180 | ret->val_types[1] = ret->val_types[2] = Empty;
181 | InlineAsm *buf = (InlineAsm*) malloc(sizeof(InlineAsm));
182 | // get the assembly itself
183 | if (toks[at].type != TokLParen) {
184 | printf("Expected left parenthesis after ASM instruction keyword on line %zu\n", toks[at].line);
185 | exit(1);
186 | }
187 | if (toks[at + 1].type != TokStrLit) {
188 | printf("Expected string literal after \"asm(\" on line %zu\n", toks[at + 1].line);
189 | exit(1);
190 | }
191 | buf->assembly = (char*) toks[at + 1].val;
192 | // replace instances of \t and \n with their correct values
193 | size_t len = strlen(buf->assembly);
194 | for (size_t c = 0; c < len; c++) {
195 | if (buf->assembly[c] != '\\') continue;
196 | if (buf->assembly[c + 1] == 'n')
197 | buf->assembly[c] = 10; // 10 is newline
198 | else if (buf->assembly[c + 1] == 't')
199 | buf->assembly[c] = 9; // 9 is carriage return
200 | else {
201 | printf("Unknown escape sequence (only \\t and \\n can be used in UYB)\n");
202 | exit(1);
203 | }
204 | memmove(&buf->assembly[c + 1], &buf->assembly[c + 2], len - c);
205 | c--;
206 | }
207 | at += 2;
208 | // get the inputs
209 | if (toks[at].type == TokColon)
210 | at += parse_asm_io(toks, at, &buf->inputs_vec);
211 | else
212 | goto end_asm_parse;
213 | // get the outputs
214 | if (toks[at].type == TokColon)
215 | at += parse_asm_io(toks, at, &buf->outputs_vec);
216 | else
217 | goto end_asm_parse;
218 | // get the clobbers
219 | if (toks[at].type == TokColon)
220 | parse_asm_clobbers(toks, at, &buf->clobbers_vec);
221 | end_asm_parse:
222 | ret->vals[0] = (uint64_t) buf;
223 | }
224 |
225 | void parse_call_parameters(Token *toks, size_t at, Statement *ret) {
226 | if (toks[at].type != TokRawStr) {
227 | printf("Expected function name after CALL instruction on line %zu.\n", toks[at].line);
228 | exit(1);
229 | }
230 | if (toks[at + 1].type != TokLParen) {
231 | printf("Expected function arguments within parenthesis for CALL instruction on line %zu.\n", toks[at + 1].line);
232 | exit(1);
233 | }
234 | ret->vals[0] = toks[at].val;
235 | at += 2;
236 | char* **args = vec_new(sizeof(char*));
237 | Type **arg_sizes = vec_new(sizeof(Type));
238 | char* **arg_struct_types = vec_new(sizeof(char*));
239 | bool **args_are_structs = vec_new(sizeof(bool));
240 | ValType **arg_types= vec_new(sizeof(ValType));
241 | while (toks[at].type != TokRParen) {
242 | if (toks[at].type == TokComma) {
243 | at++;
244 | continue;
245 | }
246 | if (toks[at].type == TokTripleDot) {
247 | at += 2;
248 | continue;
249 | }
250 | if ((toks[at].type != TokRawStr || ((char*) toks[at].val)[1] != 0) && toks[at].type != TokAggType) {
251 | printf("Expected argument type before argument in argument list in CALL instruction parameters on line %zu.\n", toks[at].line);
252 | exit(1);
253 | }
254 | if (toks[at + 1].type != TokLabel && toks[at + 1].type != TokRawStr && toks[at + 1].type != TokInteger) {
255 | printf("Expected label, integer literal, or global in argument list for CALL instruction on line %zu.\n", toks[at + 1].line);
256 | exit(1);
257 | }
258 | if (toks[at].type == TokRawStr) {
259 | vec_push(arg_sizes, char_to_type(((char*) toks[at].val)[0]));
260 | vec_push(arg_struct_types, 0);
261 | vec_push(args_are_structs, (bool) false);
262 | } else {
263 | vec_push(arg_sizes, 0);
264 | vec_push(arg_struct_types, (char*) toks[at].val);
265 | vec_push(args_are_structs, (bool) true);
266 | }
267 | vec_push(args, (char*) toks[at + 1].val);
268 | vec_push(arg_types, tok_as_valtype(toks[at + 1].type, toks[at + 1].line));
269 | at += 2;
270 | }
271 | ret->vals[1] = (uint64_t) aalloc(sizeof(FunctionArgList));
272 | *((FunctionArgList*) ret->vals[1]) = (FunctionArgList) {
273 | .args = *args,
274 | .arg_sizes = *arg_sizes,
275 | .arg_struct_types = *arg_struct_types,
276 | .args_are_structs = *args_are_structs,
277 | .arg_types = *arg_types,
278 | .num_args = vec_size(args),
279 | };
280 | ret->val_types[0] = Str;
281 | ret->val_types[1] = FunctionArgs;
282 | ret->val_types[2] = Empty;
283 | }
284 |
285 | Type instruction_remove_size(char *instr) {
286 | while (*instr) {
287 | if (*instr >= '0' && *instr <= '9') {
288 | *instr = 0;
289 | return Bits64;
290 | }
291 | instr++;
292 | }
293 | return 50;
294 | }
295 |
296 | // Expects tokens to end with TokNewLine
297 | Statement parse_statement(Token *toks) {
298 | if (toks[0].type == TokNewLine) toks++;
299 | if (toks[0].type == TokBlockLabel) {
300 | return (Statement) {
301 | .label = NULL,
302 | .instruction = BLKLBL,
303 | .vals = {toks[0].val},
304 | .val_types = {Str, Empty, Empty},
305 | };
306 | }
307 | Statement ret = {0};
308 | size_t at = 0;
309 | if (toks[0].type == TokLabel) {
310 | ret.label = (char*) toks[0].val;
311 | ret.type = toks[1].val;
312 | at = 2;
313 | } else {
314 | ret.label = NULL;
315 | }
316 | if (toks[at].type != TokRawStr) {
317 | printf("Expected instruction in statement on line %zu, got %s instead.\n", toks[at].line, token_to_str(toks[at].type));
318 | exit(1);
319 | }
320 | size_t new_size = instruction_remove_size((char*) toks[at].val);
321 | if (new_size != 50)
322 | ret.type = new_size;
323 | ret.instruction = parse_instruction((char*) toks[at].val, toks[at].line, &ret.type);
324 | at++;
325 | if (ret.instruction == CALL)
326 | parse_call_parameters(toks, at, &ret);
327 | else if (ret.instruction == PHI)
328 | parse_phi_parameters(toks, at, &ret);
329 | else if (ret.instruction == ASM)
330 | parse_asm_parameters(toks, at, &ret);
331 | else
332 | parse_statement_parameters(toks, at, &ret);
333 | return ret;
334 | }
335 |
336 | // returns number of tokens to skip
337 | size_t parse_function(Token **toks, size_t loc, Function *buf) {
338 | buf->is_global = (*toks)[loc].type == TokExport;
339 | size_t skip = 1 + loc;
340 | if ((*toks)[skip].type == TokNewLine) skip++;
341 | if (buf->is_global) skip++;
342 | if (((*toks)[skip].type != TokRawStr || ((char*) (*toks)[skip].val)[1])
343 | && (*toks)[skip].type != TokAggType) {
344 | printf("Not a valid function return type on line %zu.\n", (*toks)[skip].line);
345 | exit(1);
346 | }
347 | if ((*toks)[skip].type == TokRawStr) {
348 | buf->return_type = char_to_type(((char*) (*toks)[skip].val)[0]);
349 | buf->ret_is_struct = false;
350 | } else {
351 | buf->return_struct = (char*) (*toks)[skip].val;
352 | buf->ret_is_struct = true;
353 | }
354 | skip++;
355 | if ((*toks)[skip].type != TokRawStr) {
356 | printf("Expected function name on line %zu.\n", (*toks)[skip].line);
357 | exit(1);
358 | }
359 | buf->name = (char*) (*toks)[skip].val;
360 | if ((*toks)[skip + 1].type != TokLParen) {
361 | printf("Expected left parenthesis after function name in function definition on line %zu, got %s instead.\n", (*toks)[skip + 1].line, token_to_str((*toks)[skip + 1].type));
362 | exit(1);
363 | }
364 | skip += 2;
365 | FunctionArgument **args = vec_new(sizeof(FunctionArgument));
366 | buf->is_variadic = false;
367 | while ((*toks)[skip].type != TokRParen) {
368 | if ((*toks)[skip].type == TokComma) {
369 | skip++;
370 | continue;
371 | }
372 | if ((*toks)[skip].type == TokTripleDot) {
373 | buf->is_variadic = true;
374 | skip++;
375 | continue;
376 | }
377 | if (((*toks)[skip].type != TokRawStr || ((char*) (*toks)[skip].val)[1] != 0) && (*toks)[skip].type != TokAggType) {
378 | printf("Expected argument type as character (l,w,d,b), got something else instead on line %zu.\n", (*toks)[skip].line);
379 | exit(1);
380 | }
381 | if ((*toks)[skip + 1].type != TokLabel) {
382 | printf("Argument value isn't a label on line %zu.\n", (*toks)[skip + 1].line);
383 | exit(1);
384 | }
385 | FunctionArgument arg;
386 | arg.label = (char*) (*toks)[skip + 1].val;
387 | if ((*toks)[skip].type == TokRawStr) {
388 | arg.type_is_struct = false;
389 | arg.type = char_to_type(((char*) (*toks)[skip].val)[0]);
390 | } else {
391 | arg.type_is_struct = true;
392 | arg.type_struct = (char*) (*toks)[skip].val;
393 | }
394 | vec_push(args, arg);
395 | skip += 2;
396 | }
397 | buf->num_args = vec_size(args);
398 | buf->args = *args;
399 | skip++;
400 | if ((*toks)[skip].type != TokLBrace) {
401 | printf("Expected brace after function signature on line %zu\n", (*toks)[skip].line);
402 | exit(1);
403 | }
404 | skip++;
405 | if ((*toks)[skip].type != TokNewLine) {
406 | printf("Expected new line after left brace in function declaration on line %zu\n", (*toks)[skip].line);
407 | exit(1);
408 | }
409 | skip++;
410 | size_t depth = 1;
411 | size_t start = skip;
412 | Statement **statements = vec_new(sizeof(Statement));
413 | buf->num_statements = 0;
414 | for (;;) {
415 | if ((*toks)[skip].type == TokLBrace) {
416 | depth++;
417 | } else if ((*toks)[skip].type == TokRBrace) {
418 | depth--;
419 | if (!depth) {
420 | skip++;
421 | break;
422 | }
423 | } else if ((*toks)[skip].type == TokNewLine) {
424 | buf->num_statements++;
425 | vec_push(statements, parse_statement(&(*toks)[start]));
426 | start = skip;
427 | if ((*toks)[start + 1].type != TokRBrace) start++;
428 | }
429 | skip++;
430 | }
431 | buf->statements = *statements;
432 | return skip + 1 - loc;
433 | }
434 |
435 | // returns number of tokens to skip
436 | size_t parse_global(Token **toks, size_t loc, Global *buf) {
437 | size_t start_loc = loc;
438 | if ((*toks)[loc].type == TokSection) {
439 | loc++;
440 | if ((*toks)[loc].type != TokStrLit) {
441 | printf("Expected string literal after section keyword on line %zu\n", (*toks)[loc].line);
442 | exit(1);
443 | }
444 | buf->section = (char*) (*toks)[loc].val;
445 | loc += 2;
446 | } else {
447 | buf->section = NULL;
448 | }
449 | if ((*toks)[loc].type != TokData) {
450 | printf("Expected data global definition after section specification on line %zu\n", (*toks)[loc].line);
451 | exit(1);
452 | }
453 | if ((*toks)[loc + 1].type != TokRawStr) {
454 | printf("Expected name of global after data keyword on line %zu, got %s instead, data = %s\n", (*toks)[loc + 1].line, token_to_str((*toks)[loc + 1].type), (char*) (*toks)[loc + 1].val);
455 | exit(1);
456 | }
457 | buf->name = (char*) (*toks)[loc + 1].val;
458 | if ((*toks)[loc + 2].type != TokEqu) {
459 | printf("Expected = after global label name on line %zu\n", (*toks)[loc + 2].line);
460 | exit(1);
461 | }
462 | if ((*toks)[loc + 3].type == TokAlign) {
463 | if ((*toks)[loc + 4].type != TokInteger) {
464 | printf("Expected integer literal after Align token on line %zu\n", (*toks)[loc + 4].line);
465 | exit(1);
466 | }
467 | buf->alignment = (*toks)[loc + 4].val;
468 | loc += 2;
469 | } else
470 | buf->alignment = 1;
471 | if ((*toks)[loc + 3].type != TokLBrace) {
472 | printf("Expected left brace ({) after = on line %zu\n", (*toks)[loc + 3].line);
473 | exit(1);
474 | }
475 | loc += 4;
476 | Type **sizes = vec_new(sizeof(Type));
477 | size_t **vals = vec_new(sizeof(size_t));
478 | ValType **types= vec_new(sizeof(ValType));
479 | while ((*toks)[loc].type != TokRBrace) {
480 | if ((*toks)[loc].type == TokComma) {
481 | loc++;
482 | continue;
483 | }
484 | if ((*toks)[loc].type != TokRawStr || ((char*) (*toks)[loc].val)[1] != 0) {
485 | printf("Invalid type in global declaration on line %zu\n", (*toks)[loc].line);
486 | exit(1);
487 | }
488 | vec_push(sizes, char_to_type(((char*) (*toks)[loc].val)[0]));
489 | if ((*toks)[loc + 1].type == TokInteger) vec_push(types, Number);
490 | else if ((*toks)[loc + 1].type == TokStrLit) vec_push(types, StrLit);
491 | else {
492 | printf("Global values can only be a number or a strlit token on line %zu, got something else.\n", (*toks)[loc + 1].line);
493 | exit(1);
494 | }
495 | vec_push(vals, (*toks)[loc + 1].val);
496 | loc += 2;
497 | }
498 | buf->num_vals = vec_size(vals);
499 | buf->vals = *vals;
500 | buf->types = *types;
501 | buf->sizes = *sizes;
502 | return loc - start_loc;
503 | }
504 |
505 | size_t get_element_size(Token **toks, size_t *loc, AggregateType *buf) {
506 | if ((*toks)[*loc].type == TokRawStr && ((char*) (*toks)[*loc].val)[1] == 0) {
507 | // If it's a type element, like `l`
508 | size_t this_size = bytes_from_size(char_to_type(((char*) (*toks)[*loc].val)[0]));
509 | if (buf->alignment > this_size)
510 | return buf->alignment;
511 | else
512 | return this_size;
513 | } else if ((*toks)[*loc].type == TokInteger) {
514 | // If it's an opaque type just specifying the number of bytes, like `24`
515 | return (*toks)[*loc].val;
516 | } else if ((*toks)[*loc].type == TokLBrace) {
517 | // If it's an enum type, return the maximum size
518 | size_t max_size = 0;
519 | (*loc)++;
520 | while ((*toks)[*loc].type != TokRBrace) {
521 | if ((*toks)[*loc].type == TokComma) {
522 | (*loc)++;
523 | continue;
524 | }
525 | size_t this_size = get_element_size(toks, loc, buf); // eww recursion
526 | if (this_size > max_size)
527 | max_size = this_size;
528 | (*loc)++;
529 | }
530 | return max_size;
531 | } else {
532 | printf("Invalid element for aggregate type on line %zu.\n", (*toks)[*loc].line);
533 | exit(1);
534 | }
535 | }
536 |
537 | void parse_aggtype_size(Token **toks, size_t *loc, AggregateType *buf) {
538 | buf->size_bytes = 0;
539 | while ((*toks)[*loc].type != TokRBrace) {
540 | if ((*toks)[*loc].type == TokComma) {
541 | (*loc)++;
542 | continue;
543 | }
544 | buf->size_bytes += get_element_size(toks, loc, buf);
545 | (*loc)++;
546 | }
547 | }
548 |
549 | // return number of tokens to skip.
550 | size_t parse_aggtype(Token **toks, size_t loc, AggregateType *buf) {
551 | size_t start_loc = loc;
552 | if ((*toks)[loc + 1].type != TokAggType) {
553 | printf("Expected type name after type token, got something else on line %zu\n", (*toks)[loc + 1].line);
554 | exit(1);
555 | }
556 | buf->name = (char*) (*toks)[loc + 1].val;
557 | if ((*toks)[loc + 2].type != TokEqu) {
558 | printf("Equal sign expected after type name in aggregate type definiton, got something else on line %zu\n", (*toks)[loc + 2].line);
559 | exit(1);
560 | }
561 | if ((*toks)[loc + 3].type == TokAlign) {
562 | if ((*toks)[loc + 4].type != TokInteger) {
563 | printf("Expected integer literal after Align token on line %zu\n", (*toks)[loc + 4].line);
564 | exit(1);
565 | }
566 | buf->alignment = (*toks)[loc + 4].val;
567 | loc += 2;
568 | } else
569 | buf->alignment = 1;
570 | if ((*toks)[loc + 3].type != TokLBrace) {
571 | printf("Expected left brace in aggregate type definition on line %zu\n", (*toks)[loc + 3].line);
572 | exit(1);
573 | }
574 | loc += 4;
575 | parse_aggtype_size(toks, &loc, buf);
576 | return loc - start_loc;
577 | }
578 |
579 | // returns number of tokens to skip
580 | size_t parse_filedbg(Token **toks, size_t loc, FileDbg *filebuf) {
581 | size_t start_loc = loc;
582 | if ((*toks)[loc + 1].type != TokInteger) {
583 | printf("First argument of .file must be integer literal (file identification number)\n");
584 | exit(1);
585 | }
586 | if ((*toks)[loc + 2].type != TokStrLit) {
587 | printf("Second argument of .file must be string literal (file name)\n");
588 | exit(1);
589 | }
590 | filebuf->id = (*toks)[loc + 1].val;
591 | filebuf->fname = (char*) (*toks)[loc + 2].val;
592 | loc += 2;
593 | return loc - start_loc;
594 | }
595 |
596 | // Returns vector of functions
597 | Function **parse_program(Token **toks, Global ***globals_buf, AggregateType ***aggtypes_buf, FileDbg ***filesdbg_buf) {
598 | size_t num_toks = vec_size(toks);
599 | Function **functions = vec_new(sizeof(Function));
600 | *globals_buf = vec_new(sizeof(Global));
601 | *aggtypes_buf = vec_new(sizeof(AggregateType));
602 | *filesdbg_buf = vec_new(sizeof(FileDbg));
603 | for (size_t tok = 0; tok < num_toks; tok++) {
604 | if ((*toks)[tok].type == TokFunction || (*toks)[tok].type == TokExport) {
605 | Function fnbuf;
606 | tok += parse_function(toks, tok, &fnbuf) - 1;
607 | vec_push(functions, fnbuf);
608 | } else if ((*toks)[tok].type == TokNewLine) {
609 | continue;
610 | } else if ((*toks)[tok].type == TokData || (*toks)[tok].type == TokSection) {
611 | Global newglobal;
612 | tok += parse_global(toks, tok, &newglobal);
613 | vec_push(*globals_buf, newglobal);
614 | } else if ((*toks)[tok].type == TokType) {
615 | AggregateType newtype;
616 | tok += parse_aggtype(toks, tok, &newtype);
617 | vec_push(*aggtypes_buf, newtype);
618 | } else if ((*toks)[tok].type == TokFile) {
619 | FileDbg newfile;
620 | tok += parse_filedbg(toks, tok, &newfile);
621 | vec_push(*filesdbg_buf, newfile);
622 | } else {
623 | printf("Something was found outside of a function body which isn't a constant definition on line %zu: %s, token id %u, val %p\n", (*toks)[tok].line, token_to_str((*toks)[tok].type), (*toks)[tok].type, (void*) (*toks)[tok].val);
624 | exit(1);
625 | }
626 | }
627 | return functions;
628 | }
629 |
--------------------------------------------------------------------------------
/src/strslice.c:
--------------------------------------------------------------------------------
1 | /* String slice implementation for UYB.
2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | String *string_from(char *from) {
10 | String *str = (String*) malloc(sizeof(String));
11 | str->len = strlen(from);
12 | str->data = (char*) malloc(str->len + 1);
13 | strcpy(str->data, from);
14 | return str;
15 | }
16 |
17 | void string_push(String *str, char *new) {
18 | size_t new_len = str->len + strlen(new);
19 | str->data = realloc(str->data, new_len + 1);
20 | strcpy(str->data + str->len, new);
21 | str->len = new_len;
22 | }
23 |
24 | void string_push_fmt(String *str, char *fmt, ...) {
25 | va_list args;
26 | va_start(args, fmt);
27 | int length = vsnprintf(NULL, 0, fmt, args);
28 | va_end(args);
29 | size_t new_len = str->len + length;
30 | str->data = realloc(str->data, new_len + 1);
31 | va_start(args, fmt);
32 | vsnprintf(str->data + str->len, length + 1, fmt, args);
33 | va_end(args);
34 | str->len = new_len;
35 | }
36 |
--------------------------------------------------------------------------------
/src/target/IR/build.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 |
8 | void build_function(Function IR, FILE *outf) {
9 | char *rettype = get_full_char_str(IR.ret_is_struct, IR.return_type, IR.return_struct);
10 | fprintf(outf, "%sfunction %s $%s(", (IR.is_global) ? "export " : "", rettype, IR.name);
11 | for (size_t arg = 0; arg < IR.num_args; arg++) {
12 | char *argtype = get_full_char_str(IR.args[arg].type_is_struct, IR.args[arg].type, IR.args[arg].type_struct);
13 | fprintf(outf, "%s %%%s", argtype, IR.args[arg].label);
14 | if (!(arg == IR.num_args - 1 || IR.is_variadic))
15 | fprintf(outf, ", ");
16 | }
17 | if (IR.is_variadic) fprintf(outf, "...");
18 | fprintf(outf, ") {\n");
19 | for (size_t s = 0; s < IR.num_statements; s++) {
20 | if (IR.statements[s].label) {
21 | fprintf(outf, "\t%%%s =%c ", IR.statements[s].label, size_as_char(IR.statements[s].type));
22 | } else {
23 | fprintf(outf, "\t");
24 | }
25 | instructions_IR[IR.statements[s].instruction](IR.statements[s].vals, IR.statements[s].val_types, IR.statements[s], outf);
26 | }
27 | fprintf(outf, "}\n\n");
28 | }
29 |
30 | void build_globals(Global *global_vars, size_t num_global_vars, FILE *outf) {
31 | for (size_t g = 0; g < num_global_vars; g++) {
32 | if (global_vars[g].section) {
33 | fprintf(outf, "section \"%s\"\n", global_vars[g].section);
34 | }
35 | fprintf(outf, "data $%s = align %zu {", global_vars[g].name, global_vars[g].alignment);
36 | for (size_t v = 0; v < global_vars[g].num_vals; v++) {
37 | fprintf(outf, "%c ", size_as_char(global_vars[g].sizes[v]));
38 | if (global_vars[g].types[v] == Number)
39 | fprintf(outf, "%zu", global_vars[g].vals[v]);
40 | else if (global_vars[g].types[v] == StrLit)
41 | fprintf(outf, "\"%s\"", (char*) global_vars[g].vals[v]);
42 | else {
43 | printf("Type for global var must either be Number or StrLit.\n");
44 | exit(1);
45 | }
46 | if (v != global_vars[g].num_vals - 1)
47 | fprintf(outf, ", ");
48 | }
49 | fprintf(outf, "}\n");
50 | }
51 | }
52 |
53 | void build_aggtypes(AggregateType *aggtypes, size_t num_aggtypes, FILE *outf) {
54 | for (size_t i = 0; i < num_aggtypes; i++) {
55 | fprintf(outf, "type :%s = align %zu { %zu }\n", aggtypes[i].name, aggtypes[i].alignment, aggtypes[i].size_bytes);
56 | }
57 | }
58 |
59 | void build_filesdbg(FileDbg *filesdbg, size_t num_filesdbg, FILE *outf) {
60 | for (size_t f = 0; f < num_filesdbg; f++) {
61 | fprintf(outf, ".file %zu \"%s\"\n", filesdbg[f].id, filesdbg[f].fname);
62 | }
63 | }
64 |
65 | void build_program_IR(Function *IR, size_t num_functions, Global *global_vars, size_t num_global_vars, AggregateType *aggtypes, size_t num_aggtypes, FileDbg *dbgfiles, size_t num_dbgfiles, FILE *outf) {
66 | fprintf(outf, "# Generated by UYB for UYB IR\n\n");
67 | build_filesdbg(dbgfiles, num_dbgfiles, outf);
68 | build_globals(global_vars, num_global_vars, outf);
69 | build_aggtypes(aggtypes, num_aggtypes, outf);
70 | for (size_t f = 0; f < num_functions; f++) {
71 | build_function(IR[f], outf);
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/target/IR/instructions.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | char *get_full_char_str(bool is_struct, Type type, char *type_struct); // defined in build.c
5 |
6 | static void build_value(uint64_t val, ValType type, FILE *outf) {
7 | if (type == Number) fprintf(outf, "%zu", val);
8 | else if (type == BlkLbl) fprintf(outf, "@%s", (char*) val);
9 | else if (type == Label ) fprintf(outf, "%%%s", (char*) val);
10 | else if (type == Str ) fprintf(outf, "$%s", (char*) val);
11 | else if (type == PhiArg) {
12 | fprintf(outf, "@%s ", ((PhiVal*) val)->blklbl_name);
13 | build_value(((PhiVal*) val)->val, ((PhiVal*) val)->type, outf);
14 | }
15 | }
16 |
17 | static char size_as_char(Type type) {
18 | if (type == Bits8) return 'b';
19 | else if (type == Bits16) return 'h';
20 | else if (type == Bits32) return 'w';
21 | else return 'l';
22 | }
23 |
24 | static void add_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
25 | fprintf(outf, "add ");
26 | build_value(vals[0], types[0], outf);
27 | fprintf(outf, ", ");
28 | build_value(vals[1], types[1], outf);
29 | fprintf(outf, "\n");
30 | }
31 |
32 | static void sub_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
33 | fprintf(outf, "sub ");
34 | build_value(vals[0], types[0], outf);
35 | fprintf(outf, ", ");
36 | build_value(vals[1], types[1], outf);
37 | fprintf(outf, "\n");
38 | }
39 |
40 | static void div_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
41 | fprintf(outf, "div ");
42 | build_value(vals[0], types[0], outf);
43 | fprintf(outf, ", ");
44 | build_value(vals[1], types[1], outf);
45 | fprintf(outf, "\n");
46 | }
47 |
48 | static void udiv_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
49 | fprintf(outf, "udiv ");
50 | build_value(vals[0], types[0], outf);
51 | fprintf(outf, ", ");
52 | build_value(vals[1], types[1], outf);
53 | fprintf(outf, "\n");
54 | }
55 |
56 | static void rem_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
57 | fprintf(outf, "rem ");
58 | build_value(vals[0], types[0], outf);
59 | fprintf(outf, ", ");
60 | build_value(vals[1], types[1], outf);
61 | fprintf(outf, "\n");
62 | }
63 |
64 | static void urem_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
65 | fprintf(outf, "urem ");
66 | build_value(vals[0], types[0], outf);
67 | fprintf(outf, ", ");
68 | build_value(vals[1], types[1], outf);
69 | fprintf(outf, "\n");
70 | }
71 |
72 | static void mul_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
73 | fprintf(outf, "mul ");
74 | build_value(vals[0], types[0], outf);
75 | fprintf(outf, ", ");
76 | build_value(vals[1], types[1], outf);
77 | fprintf(outf, "\n");
78 | }
79 |
80 | static void copy_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
81 | fprintf(outf, "copy ");
82 | build_value(vals[0], types[0], outf);
83 | fprintf(outf, "\n");
84 | }
85 |
86 | static void ret_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
87 | fprintf(outf, "ret ");
88 | build_value(vals[0], types[0], outf);
89 | fprintf(outf, "\n");
90 | }
91 |
92 | static void jmp_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
93 | fprintf(outf, "jmp ");
94 | build_value(vals[0], types[0], outf);
95 | fprintf(outf, "\n");
96 | }
97 |
98 | static void jz_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
99 | fprintf(outf, "jz ");
100 | build_value(vals[0], types[0], outf);
101 | fprintf(outf, ", ");
102 | build_value(vals[1], types[1], outf);
103 | fprintf(outf, "\n");
104 | }
105 |
106 | static void and_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
107 | fprintf(outf, "and ");
108 | build_value(vals[0], types[0], outf);
109 | fprintf(outf, ", ");
110 | build_value(vals[1], types[1], outf);
111 | fprintf(outf, "\n");
112 | }
113 |
114 | static void or_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
115 | fprintf(outf, "or ");
116 | build_value(vals[0], types[0], outf);
117 | fprintf(outf, ", ");
118 | build_value(vals[1], types[1], outf);
119 | fprintf(outf, "\n");
120 | }
121 |
122 | static void blit_build(uint64_t vals[3], ValType types[3], Statement statement, FILE* outf) {
123 | fprintf(outf, "blit ");
124 | build_value(vals[0], types[0], outf);
125 | fprintf(outf, ", ");
126 | build_value(vals[1], types[1], outf);
127 | fprintf(outf, ", ");
128 | build_value(vals[2], types[2], outf);
129 | fprintf(outf, "\n");
130 | }
131 |
132 | static void jnz_build(uint64_t vals[3], ValType types[3], Statement statement, FILE* outf) {
133 | fprintf(outf, "jnz ");
134 | build_value(vals[0], types[0], outf);
135 | fprintf(outf, ", ");
136 | build_value(vals[1], types[1], outf);
137 | fprintf(outf, ", ");
138 | build_value(vals[2], types[2], outf);
139 | fprintf(outf, "\n");
140 | }
141 |
142 | static void xor_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
143 | fprintf(outf, "xor ");
144 | build_value(vals[0], types[0], outf);
145 | fprintf(outf, ", ");
146 | build_value(vals[1], types[1], outf);
147 | fprintf(outf, "\n");
148 | }
149 |
150 | static void shl_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
151 | fprintf(outf, "shl ");
152 | build_value(vals[0], types[0], outf);
153 | fprintf(outf, ", ");
154 | build_value(vals[1], types[1], outf);
155 | fprintf(outf, "\n");
156 | }
157 |
158 | static void shr_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
159 | fprintf(outf, "shr ");
160 | build_value(vals[0], types[0], outf);
161 | fprintf(outf, ", ");
162 | build_value(vals[1], types[1], outf);
163 | fprintf(outf, "\n");
164 | }
165 |
166 | static void store_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
167 | fprintf(outf, "store%c ", size_as_char(statement.type));
168 | build_value(vals[0], types[0], outf);
169 | fprintf(outf, ", ");
170 | build_value(vals[1], types[1], outf);
171 | fprintf(outf, "\n");
172 | }
173 |
174 | static void load_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
175 | fprintf(outf, "load%c ", size_as_char(statement.type));
176 | build_value(vals[0], types[0], outf);
177 | fprintf(outf, ", ");
178 | build_value(vals[1], types[1], outf);
179 | fprintf(outf, "\n");
180 | }
181 |
182 | static void neg_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
183 | fprintf(outf, "neg ");
184 | build_value(vals[0], types[0], outf);
185 | fprintf(outf, "\n");
186 | }
187 |
188 | static void alloc_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
189 | fprintf(outf, "alloc ");
190 | build_value(vals[0], types[0], outf);
191 | fprintf(outf, "\n");
192 | }
193 |
194 | static void call_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
195 | fprintf(outf, "call ");
196 | build_value(vals[0], types[0], outf);
197 | fprintf(outf, "(");
198 | FunctionArgList *args = (FunctionArgList*) vals[1];
199 | size_t num_args = args->num_args;
200 | for (size_t arg = 0; arg < num_args; arg++) {
201 | char *arg_type = get_full_char_str(args->args_are_structs[arg], args->arg_sizes[arg], args->arg_struct_types[arg]);
202 | fprintf(outf, "%s ", arg_type);
203 | build_value((uint64_t) args->args[arg], args->arg_types[arg], outf);
204 | if (arg != num_args - 1)
205 | fprintf(outf, ", ");
206 | }
207 | fprintf(outf, ")\n");
208 | }
209 |
210 | static void eq_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
211 | fprintf(outf, "ceq ");
212 | build_value(vals[0], types[0], outf);
213 | fprintf(outf, ", ");
214 | build_value(vals[1], types[1], outf);
215 | fprintf(outf, "\n");
216 | }
217 |
218 | static void ne_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
219 | fprintf(outf, "cne ");
220 | build_value(vals[0], types[0], outf);
221 | fprintf(outf, ", ");
222 | build_value(vals[1], types[1], outf);
223 | fprintf(outf, "\n");
224 | }
225 |
226 | static void sle_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
227 | fprintf(outf, "csle ");
228 | build_value(vals[0], types[0], outf);
229 | fprintf(outf, ", ");
230 | build_value(vals[1], types[1], outf);
231 | fprintf(outf, "\n");
232 | }
233 |
234 | static void slt_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
235 | fprintf(outf, "cslt ");
236 | build_value(vals[0], types[0], outf);
237 | fprintf(outf, ", ");
238 | build_value(vals[1], types[1], outf);
239 | fprintf(outf, "\n");
240 | }
241 |
242 | static void sge_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
243 | fprintf(outf, "csge ");
244 | build_value(vals[0], types[0], outf);
245 | fprintf(outf, ", ");
246 | build_value(vals[1], types[1], outf);
247 | fprintf(outf, "\n");
248 | }
249 |
250 | static void sgt_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
251 | fprintf(outf, "csgt ");
252 | build_value(vals[0], types[0], outf);
253 | fprintf(outf, ", ");
254 | build_value(vals[1], types[1], outf);
255 | fprintf(outf, "\n");
256 | }
257 |
258 | static void ule_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
259 | fprintf(outf, "cule ");
260 | build_value(vals[0], types[0], outf);
261 | fprintf(outf, ", ");
262 | build_value(vals[1], types[1], outf);
263 | fprintf(outf, "\n");
264 | }
265 |
266 | static void ult_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
267 | fprintf(outf, "cult ");
268 | build_value(vals[0], types[0], outf);
269 | fprintf(outf, ", ");
270 | build_value(vals[1], types[1], outf);
271 | fprintf(outf, "\n");
272 | }
273 |
274 | static void uge_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
275 | fprintf(outf, "cuge ");
276 | build_value(vals[0], types[0], outf);
277 | fprintf(outf, ", ");
278 | build_value(vals[1], types[1], outf);
279 | fprintf(outf, "\n");
280 | }
281 |
282 | static void ugt_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
283 | fprintf(outf, "cugt ");
284 | build_value(vals[0], types[0], outf);
285 | fprintf(outf, ", ");
286 | build_value(vals[1], types[1], outf);
287 | fprintf(outf, "\n");
288 | }
289 |
290 | static void ext_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
291 | fprintf(outf, "ext ");
292 | build_value(vals[0], types[0], outf);
293 | fprintf(outf, "\n");
294 | }
295 |
296 | static void hlt_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
297 | fprintf(outf, "hlt\n");
298 | }
299 |
300 | static void blklbl_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
301 | fprintf(outf, "@%s\n", (char*) vals[0]);
302 | }
303 |
304 | static void phi_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) {
305 | fprintf(outf, "phi ");
306 | build_value(vals[0], types[0], outf);
307 | fprintf(outf, ", ");
308 | build_value(vals[1], types[1], outf);
309 | fprintf(outf, "\n");
310 | }
311 |
312 | static void vastart_build(uint64_t vals[2], ValType types[2], Statement statement, FILE *outf) {
313 | fprintf(outf, "vastart ");
314 | build_value(vals[0], types[0], outf);
315 | fprintf(outf, "\n");
316 | }
317 |
318 | static void vaarg_build(uint64_t vals[2], ValType types[2], Statement statement, FILE *outf) {
319 | fprintf(outf, "vaarg ");
320 | build_value(vals[0], types[0], outf);
321 | fprintf(outf, "\n");
322 | }
323 |
324 | static void loc_build(uint64_t vals[3], ValType types[3], Statement statement, FILE *outf) {
325 | if (types[0] != Number || types[1] != Number || types[2] != Number) {
326 | printf("All arguments of .loc instruction must be an integer literal.\n");
327 | exit(1);
328 | }
329 | fprintf(outf, ".loc %zu %zu %zu\n", vals[0], vals[1], vals[2]);
330 | }
331 |
332 | static void asm_build(uint64_t vals[3], ValType types[3], Statement statement, FILE *outf) {
333 | printf("IR target does not support inline assembly statement in UYB. Please use an architecture-specific target for this feature.\n");
334 | exit(1);
335 | }
336 |
337 | void (*instructions_IR[])(uint64_t[2], ValType[2], Statement, FILE*) = {
338 | add_build, sub_build, div_build, mul_build, copy_build, ret_build, call_build, jz_build,
339 | neg_build, udiv_build, rem_build, urem_build, and_build, or_build, xor_build, shl_build, shr_build,
340 | store_build, load_build, blit_build, alloc_build, eq_build, ne_build, sle_build, slt_build, sge_build, sgt_build, ule_build, ult_build,
341 | uge_build, ugt_build, ext_build, hlt_build, blklbl_build, jmp_build, jnz_build, phi_build, vastart_build, vaarg_build,
342 | loc_build, asm_build,
343 | };
344 |
--------------------------------------------------------------------------------
/src/target/x86_64/build.c:
--------------------------------------------------------------------------------
1 | /* Main code generation file for UYB x86_64 target.
2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | AggregateType *aggregate_types; /* TODO: Move all global vars (including those in register.c) */
12 | size_t num_aggregate_types; /* into a single structure. */
13 |
14 | size_t type_to_size(Type type) {
15 | if (type == Bits8) return 1;
16 | else if (type == Bits8) return 2;
17 | else if (type == Bits16) return 4;
18 | else if (type == Bits64) return 8;
19 | return 0;
20 | }
21 |
22 | char *global_sizes[] = {
23 | ".byte", ".value", ".long", ".quad",
24 | };
25 |
26 | char *type_as_str(Type type, char *struct_type, bool is_struct) {
27 | if (is_struct) {
28 | char *buf = aalloc(strlen(struct_type) + 2);
29 | sprintf(buf, ":%s", struct_type);
30 | return buf;
31 | }
32 | if (type == Bits8) return "byte";
33 | else if (type == Bits16) return "word";
34 | else if (type == Bits32) return "dword";
35 | else if (type == Bits64) return "qword";
36 | else {
37 | printf("Invalid type: %u\n", type);
38 | return "invalid_type";
39 | }
40 | }
41 |
42 | static String *build_function(Function IR) {
43 | reg_init_fn(IR);
44 | String *fnbuf0 = string_from("\n");
45 | string_push_fmt(fnbuf0, "// %s %s(", type_as_str(IR.return_type, IR.return_struct, IR.ret_is_struct), IR.name);
46 | for (size_t arg = 0; arg < IR.num_args; arg++) {
47 | string_push_fmt(fnbuf0, "%s %%%s", type_as_str(IR.args[arg].type, IR.args[arg].type_struct, IR.args[arg].type_is_struct), IR.args[arg].label);
48 | if (arg != IR.num_args - 1) string_push(fnbuf0, ", ");
49 | }
50 | string_push_fmt(fnbuf0, ") {\n%s", IR.name);
51 | String *fnbuf = string_from(":\n");
52 | String *structarg_buf = string_from("\n");
53 | size_t reg_arg_off = 0;
54 | for (size_t arg = 0; arg < IR.num_args; arg++) {
55 | if (IR.args[arg].type_is_struct) {
56 | if (arg > 4) {
57 | printf("Only the first 5 arguments accepted by a function can be structures. (TODO)\n");
58 | exit(1);
59 | }
60 | AggregateType *aggtype = find_aggtype(IR.args[arg].type_struct, aggregate_types, num_aggregate_types);
61 | char *label_loc = reg_alloc(IR.args[arg].label, Bits64);
62 | if (aggtype->size_bytes <= 16) {
63 | // allocate space on the stack for it
64 | regalloc.bytes_rip_pad += (aggtype->size_bytes <= 8) ? 1 : 2;
65 | string_push_fmt(structarg_buf, "\tlea -%llu(%rbp), %%rdi\n"
66 | "\tmov %%rdi, %s\n",
67 | regalloc.bytes_rip_pad, label_loc);
68 | // copy the data
69 | string_push_fmt(structarg_buf, "\tmov %s, (%s)\n", arg_regs[arg], label_loc);
70 | if (aggtype->size_bytes > 8) {
71 | // copy the second byte
72 | string_push_fmt(structarg_buf, "\tmov %s, 8(%s)\n", arg_regs[arg + 1], label_loc);
73 | }
74 | } else {
75 | regalloc.bytes_rip_pad += aggtype->size_bytes;
76 | // copy all the data
77 | string_push_fmt(structarg_buf, "\tmov %s, %%rsi\n", arg_regs[arg + 1]);
78 | string_push_fmt(structarg_buf, "\tmov %zu, %%rdi\n", regalloc.bytes_rip_pad);
79 | string_push_fmt(structarg_buf, "\tmov %zu, %%rcx\n", aggtype->size_bytes);
80 | string_push(structarg_buf, "\trep movsb\n");
81 | string_push_fmt(structarg_buf, "\tmov %zu, %s\n", regalloc.bytes_rip_pad, label_loc);
82 | }
83 | } else if (arg > 5) {
84 | // it's on the stack
85 | size_t *new_vec_val = aalloc(sizeof(size_t) * 2);
86 | new_vec_val[0] = (size_t) IR.args[arg].label;
87 | reg_arg_off += type_to_size(IR.args[arg].type);
88 | new_vec_val[1] = reg_arg_off + 8;
89 | vec_push(regalloc.labels_as_offsets, new_vec_val);
90 | } else {
91 | reg_alloc(IR.args[arg].label, IR.args[arg].type);
92 | for (size_t i = 0; i < sizeof(label_reg_tab) / sizeof(label_reg_tab[0]); i++) {
93 | if (label_reg_tab[i][1] && !strcmp(IR.args[arg].label, label_reg_tab[i][1])) reg_alloc_tab[i][1]++;
94 | }
95 | }
96 | }
97 | for (size_t s = 0; s < IR.num_statements; s++) {
98 | update_regalloc();
99 | disasm_instr(fnbuf, IR.statements[s]);
100 | // expects result in rax
101 | instructions_x86_64[IR.statements[s].instruction](IR.statements[s].vals, IR.statements[s].val_types, IR.statements[s], fnbuf);
102 | }
103 | size_t sz = vec_size(regalloc.used_regs_vec);
104 | if (((regalloc.bytes_rip_pad & 0b11111) != 0b10000) && regalloc.bytes_rip_pad) regalloc.bytes_rip_pad += 8;
105 | if (sz & 1) regalloc.bytes_rip_pad += 8;
106 | string_push(fnbuf, "// }\n");
107 | string_push(fnbuf0, ":\n");
108 | if (IR.is_variadic) {
109 | string_push(fnbuf0, "\t // Start pushing all variadic argument registers\n");
110 | for (ssize_t arg = sizeof(arg_regs) / sizeof(arg_regs[0]) - 1; arg >= 0; arg--)
111 | string_push_fmt(fnbuf0, "\tpush %s\n", arg_regs[arg]);
112 | string_push(fnbuf0, "\t // End var args\n");
113 | regalloc.bytes_rip_pad += 8;
114 | }
115 | string_push(fnbuf0, "\tpush %rbp\n\tmov %rsp, %rbp\n");
116 | if (regalloc.bytes_rip_pad)
117 | string_push_fmt(fnbuf0, "\tsub $%llu, %%rsp\n", regalloc.bytes_rip_pad);
118 | for (size_t i = 0; i < sz; i++)
119 | string_push_fmt(fnbuf0, "\tpush %s // used reg\n", (*regalloc.used_regs_vec)[i]);
120 | char **argregs_at = arg_regs;
121 | for (size_t arg = 0; arg < IR.num_args; arg++) {
122 | if (IR.args[arg].type_is_struct) {
123 | AggregateType *aggtype = find_aggtype(IR.args[arg].type_struct, aggregate_types, num_aggregate_types);
124 | if (aggtype->size_bytes <= 8 || aggtype->size_bytes > 16)
125 | argregs_at++;
126 | else
127 | argregs_at += 2;
128 | continue;
129 | }
130 | char *reg = label_to_reg(0, IR.args[arg].label, true);
131 | if (reg)
132 | string_push_fmt(fnbuf0, "\tmov %s, %s\n", reg_as_size(*argregs_at, IR.args[arg].type), reg); // TODO: fix with >6 args
133 | argregs_at++;
134 | }
135 | string_push(fnbuf0, structarg_buf->data + 1);
136 | string_push(fnbuf0, fnbuf->data + 2);
137 | return fnbuf0;
138 | }
139 |
140 | void build_program_x86_64(Function *IR, size_t num_functions, Global *global_vars, size_t num_global_vars, AggregateType *aggtypes, size_t num_aggtypes, FileDbg *dbgfiles, size_t num_dbgfiles, FILE *outf) {
141 | aggregate_types = aggtypes;
142 | num_aggregate_types = num_aggtypes;
143 | char* **globals = vec_new(sizeof(char*));
144 | String* **function_statements = vec_new(sizeof(String**));
145 | for (size_t f = 0; f < num_functions; f++) {
146 | if (IR[f].is_global) vec_push(globals, IR[f].name);
147 | vec_push(function_statements, build_function(IR[f]));
148 | }
149 | fprintf(outf, "// Generated by UYB for x86_64\n");
150 | for (size_t f = 0; f < num_dbgfiles; f++)
151 | fprintf(outf, ".file %zu \"%s\"\n", dbgfiles[f].id, dbgfiles[f].fname);
152 | fprintf(outf, ".data\n");
153 | for (size_t g = 0; g < num_global_vars; g++) {
154 | if (global_vars[g].section)
155 | fprintf(outf, ".section \"%s\"\n", global_vars[g].section);
156 | fprintf(outf, "%s:\n", global_vars[g].name);
157 | for (size_t i = 0; i < global_vars[g].num_vals; i++) {
158 | if (global_vars[g].alignment > 1)
159 | fprintf(outf, ".align %zu\n", global_vars[g].alignment);
160 | if (global_vars[g].types[i] == Number)
161 | fprintf(outf, "\t%s %zu\n", global_sizes[global_vars[g].sizes[i]], global_vars[g].vals[i]);
162 | else if (global_vars[g].types[i] == StrLit)
163 | fprintf(outf, "\t.ascii \"%s\"\n", (char*) global_vars[g].vals[i]);
164 | else {
165 | printf("Type for global var must either be Number or StrLit.\n");
166 | exit(1);
167 | }
168 | }
169 | if (global_vars[g].section)
170 | fprintf(outf, ".data\n");
171 | }
172 | fprintf(outf, "\n.text\n");
173 | for (size_t i = 0; i < vec_size(globals); i++)
174 | fprintf(outf, ".globl %s\n", (*globals)[i]);
175 | for (size_t i = 0; i < vec_size(function_statements); i++)
176 | fprintf(outf, "%s", (*function_statements)[i]->data);
177 | }
178 |
--------------------------------------------------------------------------------
/src/target/x86_64/instructions.c:
--------------------------------------------------------------------------------
1 | /* Individual instruction implementations for x86_64 target of UYB.
2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 |
13 | // defined in build.c
14 | extern AggregateType *aggregate_types;
15 | extern size_t num_aggregate_types;
16 |
17 | // defined in main.c
18 | extern int is_position_independent;
19 |
20 | char sizes[] = {
21 | 'b', 'w', 'l', 'q'
22 | };
23 |
24 | // A quick alternative to reg_as_type since rax is used a lot
25 | char *rax_versions[] = {
26 | "al", "ax", "eax", "rax"
27 | };
28 |
29 | char *instruction_as_str(Instruction instr) {
30 | if (instr == ADD ) return "ADD";
31 | else if (instr == SUB ) return "SUB";
32 | else if (instr == DIV ) return "DIV";
33 | else if (instr == MUL ) return "MUL";
34 | else if (instr == COPY ) return "COPY";
35 | else if (instr == RET ) return "RET";
36 | else if (instr == CALL ) return "CALL";
37 | else if (instr == JZ ) return "JZ";
38 | else if (instr == NEG ) return "NEG";
39 | else if (instr == UDIV ) return "UDIV";
40 | else if (instr == STORE ) return "STORE";
41 | else if (instr == LOAD ) return "LOAD";
42 | else if (instr == BLIT ) return "BLIT";
43 | else if (instr == ALLOC ) return "ALLOC";
44 | else if (instr == EQ ) return "EQ";
45 | else if (instr == NE ) return "NE";
46 | else if (instr == SGE ) return "SGE";
47 | else if (instr == SGT ) return "SGT";
48 | else if (instr == SLE ) return "SLE";
49 | else if (instr == SLT ) return "SLT";
50 | else if (instr == UGE ) return "UGE";
51 | else if (instr == UGT ) return "UGT";
52 | else if (instr == ULE ) return "ULE";
53 | else if (instr == ULT ) return "ULT";
54 | else if (instr == EXT ) return "EXT";
55 | else if (instr == HLT ) return "HLT";
56 | else if (instr == BLKLBL ) return "BLKLBL";
57 | else if (instr == JMP ) return "JMP";
58 | else if (instr == JNZ ) return "JNZ";
59 | else if (instr == SHR ) return "SHR";
60 | else if (instr == SHL ) return "SHL";
61 | else if (instr == AND ) return "AND";
62 | else if (instr == OR ) return "OR";
63 | else if (instr == PHI ) return "PHI";
64 | else if (instr == VASTART) return "VASTART";
65 | else if (instr == VAARG ) return "VAARG";
66 | else if (instr == LOC ) return "LOC";
67 | else if (instr == ASM ) return "ASM";
68 | else return "Unknown instruction";
69 | }
70 |
71 | static void print_val(String *fnbuf, uint64_t val, ValType type) {
72 | if (type == Number ) string_push_fmt(fnbuf, "$%llu", val);
73 | else if (type == Label ) string_push_fmt(fnbuf, "%%%s", (char*) val);
74 | else if (type == Str ) string_push_fmt(fnbuf, "$%s", (char*) val);
75 | else if (type == FunctionArgs ) string_push_fmt(fnbuf, "(function arguments)");
76 | else if (type == BlkLbl ) string_push_fmt(fnbuf, "@%s", (char*) val);
77 | else if (type == InlineAssembly ) string_push_fmt(fnbuf, "(inline assembly values)");
78 | else if (type == PhiArg) {
79 | string_push_fmt(fnbuf, "@%s ", ((PhiVal*) val)->blklbl_name);
80 | print_val(fnbuf, ((PhiVal*) val)->val, ((PhiVal*) val)->type);
81 | } else {
82 | printf("Invalid value type\n");
83 | exit(1);
84 | }
85 | }
86 |
87 | void disasm_instr(String *fnbuf, Statement statement) {
88 | if (statement.instruction == BLKLBL) return;
89 | string_push(fnbuf, "\t// ");
90 | if (statement.label) {
91 | string_push_fmt(fnbuf, "%%%s =%s ", statement.label, type_as_str(statement.type, 0, false));
92 | }
93 | string_push_fmt(fnbuf, "%s ", instruction_as_str(statement.instruction));
94 | if (statement.val_types[0] != Empty) print_val(fnbuf, statement.vals[0], statement.val_types[0]);
95 | if (statement.val_types[1] != Empty) {
96 | string_push(fnbuf, ", ");
97 | print_val(fnbuf, statement.vals[1], statement.val_types[1]);
98 | }
99 | if (statement.val_types[2] != Empty) {
100 | string_push(fnbuf, ", ");
101 | print_val(fnbuf, statement.vals[2], statement.val_types[2]);
102 | }
103 | string_push(fnbuf, "\n");
104 | }
105 |
106 | static void build_value_noresize(ValType type, uint64_t val, bool can_prepend_dollar, String *fnbuf) {
107 | if (type == Number) string_push_fmt(fnbuf, "$%llu", val);
108 | else if (type == BlkLbl) string_push_fmt(fnbuf, ".%s_%s", regalloc.current_fn->name, (char*) val);
109 | else if (type == Label ) string_push_fmt(fnbuf, "%s", label_to_reg_noresize(0, (char*) val, false));
110 | else if (type == Str ) {
111 | if (is_position_independent)
112 | string_push_fmt(fnbuf, "%s(%%rip)", (char*) val);
113 | else
114 | string_push_fmt(fnbuf, "%s%s", (can_prepend_dollar) ? "$" : "", (char*) val);
115 | }
116 | }
117 |
118 | static void build_value(ValType type, uint64_t val, bool can_prepend_dollar, String *fnbuf) {
119 | if (type == Number) string_push_fmt(fnbuf, "$%llu", val);
120 | else if (type == BlkLbl) string_push_fmt(fnbuf, ".%s_%s", regalloc.current_fn->name, (char*) val);
121 | else if (type == Label ) string_push_fmt(fnbuf, "%s", label_to_reg(0, (char*) val, false));
122 | else if (type == Str ) {
123 | if (is_position_independent)
124 | string_push_fmt(fnbuf, "%s(%%rip)", (char*) val);
125 | else
126 | string_push_fmt(fnbuf, "%s%s", (can_prepend_dollar) ? "$" : "", (char*) val);
127 | }
128 | }
129 |
130 | static void operation_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf, char *operation) {
131 | char *label_loc = reg_alloc(statement.label, statement.type);
132 | if (label_loc[0] != '%') { // label stored in memory address on stack
133 | string_push_fmt(fnbuf, "\t%s%c ", (types[0] == Str && is_position_independent) ? "lea" : "mov", sizes[statement.type]);
134 | build_value(types[0], vals[0], true, fnbuf);
135 | string_push_fmt(fnbuf, ", %%%s\n", rax_versions[statement.type]);
136 | string_push_fmt(fnbuf, "\t%s ", operation);
137 | build_value(types[1], vals[1], true, fnbuf);
138 | string_push_fmt(fnbuf, ", %%%s\n", rax_versions[statement.type]);
139 | string_push_fmt(fnbuf, "\tmov%c %%%s, %s\n", sizes[statement.type], rax_versions[statement.type], label_loc);
140 | } else { // stored in register
141 | string_push_fmt(fnbuf, "\t%s%c ", (types[0] == Str && is_position_independent) ? "lea" : "mov", sizes[statement.type]);
142 | build_value(types[0], vals[0], true, fnbuf);
143 | string_push_fmt(fnbuf, ", %s\n", label_loc);
144 | string_push_fmt(fnbuf, "\t%s%c ", operation, sizes[statement.type]);
145 | build_value(types[1], vals[1], true, fnbuf);
146 | string_push_fmt(fnbuf, ", %s\n", label_loc);
147 | }
148 | }
149 |
150 | static void add_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
151 | operation_build(vals, types, statement, fnbuf, "add");
152 | }
153 |
154 | static void sub_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
155 | operation_build(vals, types, statement, fnbuf, "sub");
156 | }
157 |
158 | static void and_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
159 | operation_build(vals, types, statement, fnbuf, "and");
160 | }
161 |
162 | static void or_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
163 | operation_build(vals, types, statement, fnbuf, "or");
164 | }
165 |
166 | static void xor_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
167 | operation_build(vals, types, statement, fnbuf, "xor");
168 | }
169 |
170 | static void div_both_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf, bool is_signed, bool get_remainder) {
171 | char *label_loc = reg_alloc(statement.label, statement.type);
172 | string_push_fmt(fnbuf, "\tmov%c ", sizes[statement.type]);
173 | build_value(types[0], vals[0], true, fnbuf);
174 | string_push_fmt(fnbuf, ", %%%s\n"
175 | "\txor %rdx, %rdx\n", rax_versions[statement.type]);
176 | string_push_fmt(fnbuf, "\t%s%c ", (is_signed) ? "idiv" : "div", sizes[statement.type]);
177 | build_value(types[1], vals[1], true, fnbuf);
178 | string_push(fnbuf, "\n");
179 | string_push_fmt(fnbuf, "\tmov %%%s, %s\n", (get_remainder) ? reg_as_size("%rdx", statement.type) : rax_versions[statement.type], label_loc);
180 | }
181 |
182 | static void div_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
183 | div_both_build(vals, types, statement, fnbuf, true, false);
184 | }
185 |
186 | static void udiv_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
187 | div_both_build(vals, types, statement, fnbuf, false, false);
188 | }
189 |
190 | static void rem_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
191 | div_both_build(vals, types, statement, fnbuf, true, true);
192 | }
193 |
194 | static void urem_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
195 | div_both_build(vals, types, statement, fnbuf, false, true);
196 | }
197 |
198 | static void mul_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
199 | char *label_loc = reg_alloc(statement.label, statement.type);
200 | bool is_imm = types[1] == Number || types[1] == Str;
201 | if (is_imm) {
202 | string_push_fmt(fnbuf, "\tmov%c ", sizes[statement.type]);
203 | build_value(types[1], vals[1], true, fnbuf);
204 | string_push_fmt(fnbuf, ", %s\n", reg_as_size("%rdi", statement.type));
205 | }
206 | string_push_fmt(fnbuf, "\tmov%c ", sizes[statement.type]);
207 | build_value(types[0], vals[0], true, fnbuf);
208 | string_push_fmt(fnbuf, ", %%%s\n", rax_versions[statement.type]);
209 | string_push_fmt(fnbuf, "\tmul%c ", sizes[statement.type]);
210 | if (is_imm)
211 | string_push_fmt(fnbuf, "%s", reg_as_size("%rdi", statement.type));
212 | else
213 | build_value(types[1], vals[1], true, fnbuf);
214 | string_push(fnbuf, "\n");
215 | string_push_fmt(fnbuf, "\tmov%c %%%s, %s\n", sizes[statement.type], rax_versions[statement.type], label_loc);
216 | }
217 |
218 | static void copy_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
219 | char *label_loc = reg_alloc(statement.label, statement.type);
220 | string_push_fmt(fnbuf, "\t%s%c ", (types[0] == Str && is_position_independent) ? "lea" : "mov", sizes[statement.type]);
221 | build_value(types[0], vals[0], true, fnbuf);
222 | if (label_loc[0] == '%') // stored in reg
223 | string_push_fmt(fnbuf, ", %s\n", label_loc);
224 | else { // stored in memory
225 | string_push_fmt(fnbuf, ", %%%s\n", rax_versions[statement.type]);
226 | string_push_fmt(fnbuf, "\tmov%c %%%s, %s\n", sizes[statement.type], rax_versions[statement.type], label_loc);
227 | }
228 | }
229 |
230 | static void ret_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
231 | if (types[0] == Empty || (types[0] == Number && !vals[0])) {
232 | string_push(fnbuf, "\txor %rax, %rax\n");
233 | } else {
234 | if (regalloc.current_fn->ret_is_struct) {
235 | if (types[0] != Label) {
236 | printf("Tried to return a non-struct value from a function meant to return a struct.\n");
237 | exit(1);
238 | }
239 | AggregateType *aggtype = find_aggtype(regalloc.current_fn->return_struct, aggregate_types, num_aggregate_types);
240 | if (aggtype->size_bytes > 8 && aggtype->size_bytes <= 16) {
241 | char *label = label_to_reg_noresize(0, (char*) vals[0], false);
242 | string_push_fmt(fnbuf, "\tmov %s, %%rdi\n", label);
243 | string_push(fnbuf, "\tmov (%rdi), %rax\n"); // save lower 8 bytes
244 | string_push(fnbuf, "\tmov 8(%rdi), %rdx\n"); // save higher 8 bytes
245 | goto end_save;
246 | } else if (aggtype->size_bytes <= 8) {
247 | char *label = label_to_reg_noresize(0, (char*) vals[0], false);
248 | string_push_fmt(fnbuf, "\tmov %s, %%rdi\n", label);
249 | string_push(fnbuf, "\tmov (%rdi), %rax\n"); // save lower 8 bytes
250 | goto end_save;
251 | }
252 | }
253 | string_push_fmt(fnbuf, "\t%s ", (types[0] == Str && is_position_independent) ? "lea" : "mov");
254 | build_value_noresize(types[0], vals[0], true, fnbuf);
255 | string_push(fnbuf, ", %rax\n");
256 | }
257 | end_save:
258 | if (regalloc.current_fn->is_variadic)
259 | string_push_fmt(fnbuf, "\tmov %rbp, %rsp\n\tpop %rbp\n\tadd $%zu, %rsp\n\tret\n", sizeof(arg_regs) / sizeof(arg_regs[0]) * 8);
260 | else
261 | string_push_fmt(fnbuf, "\tmov %rbp, %rsp\n\tpop %rbp\n\tret\n");
262 | }
263 |
264 | static void call_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
265 | size_t pop_bytes = 0;
266 | if (((FunctionArgList*) vals[1])->num_args > 6 && ((FunctionArgList*) vals[1])->num_args & 1) {
267 | string_push(fnbuf, "\tsub $8, %rsp\n");
268 | }
269 | char **argregs_at = arg_regs;
270 | for (size_t arg = 0; arg < ((FunctionArgList*) vals[1])->num_args; arg++) {
271 | char *label_loc = NULL;
272 | if (((FunctionArgList*) vals[1])->arg_types[arg] == Label &&
273 | ((FunctionArgList*) vals[1])->args_are_structs[arg]) {
274 | AggregateType *aggtype = find_aggtype(((FunctionArgList*) vals[1])->arg_struct_types[arg], aggregate_types, num_aggregate_types);
275 | if (aggtype->size_bytes > 16) {
276 | // Make sure it's 64 bit then just continue and let it be passed as a pointer
277 | ((FunctionArgList*) vals[1])->arg_sizes[arg] = Bits64;
278 | } else if (aggtype->size_bytes > 8) {
279 | // copy 16 bytes
280 | label_loc = label_to_reg_noresize(0, ((FunctionArgList*) vals[1])->args[arg], true);
281 | string_push_fmt(fnbuf, "\tmovq %s, %%rax\n", label_loc);
282 | string_push_fmt(fnbuf, "\tmovq (%%rax), %s\n", argregs_at[0]);
283 | string_push_fmt(fnbuf, "\tmovq 8(%%rax), %s\n", argregs_at[1]);
284 | argregs_at += 2;
285 | continue;
286 | } else {
287 | // copy 8 bytes
288 | label_loc = label_to_reg_noresize(0, ((FunctionArgList*) vals[1])->args[arg], true);
289 | string_push_fmt(fnbuf, "\tmovq %s, %%rax\n", label_loc);
290 | string_push_fmt(fnbuf, "\tmovq (%%rax), %s\n", *argregs_at);
291 | argregs_at++;
292 | continue;
293 | }
294 | }
295 | if (((FunctionArgList*) vals[1])->arg_types[arg] != Number) {
296 | label_loc = label_to_reg_noresize(0, ((FunctionArgList*) vals[1])->args[arg], true);
297 | if (label_loc && arg < 6 && !strcmp(label_loc, reg_as_size(*argregs_at, get_reg_size(label_loc, ((FunctionArgList*) vals[1])->args[arg])))) {
298 | argregs_at++;
299 | continue;
300 | }
301 | }
302 | if (arg < 6) {
303 | if (((FunctionArgList*) vals[1])->arg_types[arg] == Label && (label_loc && label_loc[0] == '%')) {
304 | if (((FunctionArgList*) vals[1])->arg_types[arg] != Label)
305 | label_to_reg_noresize(0, ((FunctionArgList*) vals[1])->args[arg], true);
306 | string_push_fmt(fnbuf, "\t%s%c ", (((FunctionArgList*) vals[1])->arg_types[arg] == Str && is_position_independent) ? "lea" : "mov", sizes[((FunctionArgList*) vals[1])->arg_sizes[arg]]);
307 | build_value(((FunctionArgList*) vals[1])->arg_types[arg], (uint64_t) ((FunctionArgList*) vals[1])->args[arg], true, fnbuf);
308 | string_push_fmt(fnbuf, ", %s // arg = %zu\n", reg_as_size(*argregs_at, ((FunctionArgList*) vals[1])->arg_sizes[arg]), arg);
309 | } else {
310 | string_push_fmt(fnbuf, "\t%s%c ", (((FunctionArgList*) vals[1])->arg_types[arg] == Str && is_position_independent) ? "lea" : "mov", sizes[((FunctionArgList*) vals[1])->arg_sizes[arg]]);
311 | build_value(((FunctionArgList*) vals[1])->arg_types[arg], (uint64_t) ((FunctionArgList*) vals[1])->args[arg], true, fnbuf);
312 | string_push_fmt(fnbuf, ", %s // arg = %zu\n", reg_as_size(*argregs_at, ((FunctionArgList*) vals[1])->arg_sizes[arg]), arg);
313 | }
314 | } else {
315 | pop_bytes += 8;
316 | string_push_fmt(fnbuf, "\tpush ");
317 | build_value(((FunctionArgList*) vals[1])->arg_types[arg], (uint64_t) ((FunctionArgList*) vals[1])->args[arg], true, fnbuf);
318 | string_push_fmt(fnbuf, " // arg = %zu\n", arg);
319 | }
320 | argregs_at++;
321 | }
322 | string_push(fnbuf, "\tcall ");
323 | if (types[0] == Str && is_position_independent)
324 | string_push_fmt(fnbuf, "%s", (char*) vals[0]);
325 | else
326 | build_value(types[0], vals[0], false, fnbuf);
327 | string_push(fnbuf, "\n");
328 | if (((FunctionArgList*) vals[1])->num_args > 6 && ((FunctionArgList*) vals[1])->num_args & 1)
329 | pop_bytes += 8;
330 | if (pop_bytes)
331 | string_push_fmt(fnbuf, "\tadd $%zu, %rsp\n", pop_bytes);
332 | if (statement.label) {
333 | char *label_loc = reg_alloc(statement.label, statement.type);
334 | string_push_fmt(fnbuf, "\tmov %%%s, %s\n", rax_versions[statement.type], label_loc);
335 | }
336 | }
337 |
338 | static void jz_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
339 | if (types[0] != Label) {
340 | printf("First value of JZ instruction must be a label.\n");
341 | exit(1);
342 | }
343 | string_push_fmt(fnbuf, "\tcmp $0, %s\n"
344 | "\tje ", label_to_reg(0, (char*) vals[0], false));
345 | build_value(types[1], vals[1], false, fnbuf);
346 | string_push_fmt(fnbuf, "\n");
347 | }
348 |
349 | static void jmp_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
350 | string_push_fmt(fnbuf, "\tjmp ");
351 | build_value(types[0], vals[0], false, fnbuf);
352 | string_push_fmt(fnbuf, "\n");
353 | }
354 |
355 | static void jnz_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
356 | if (types[1] == Empty || types[2] == Empty) {
357 | printf("Expected two labels in JNZ instruction.\n");
358 | exit(1);
359 | }
360 | if (types[0] == Number) {
361 | string_push_fmt(fnbuf, "\tmov ");
362 | build_value(types[0], vals[0], false, fnbuf);
363 | string_push_fmt(fnbuf, ", %%rdi\n\tcmpq $0, %%rdi");
364 | } else if (types[0] == Label) {
365 | char *loc = label_to_reg_noresize(0, (char*) vals[0], false);
366 | Type sz = get_reg_size(loc, (char*) vals[0]);
367 | string_push_fmt(fnbuf, "\tcmp%c $0, %s\n", sizes[sz], reg_as_size(loc, sz));
368 | } else {
369 | printf("First value of JNZ must be either a label or a number.\n");
370 | exit(1);
371 | }
372 | string_push_fmt(fnbuf, "\n\tjne ");
373 | build_value(types[1], vals[1], false, fnbuf);
374 | string_push_fmt(fnbuf, "\n\tjmp ");
375 | build_value(types[2], vals[2], false, fnbuf);
376 | string_push_fmt(fnbuf, "\n");
377 | }
378 |
379 | static void neg_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
380 | char *label_loc = reg_alloc(statement.label, statement.type);
381 | string_push(fnbuf, "\tmov ");
382 | build_value(types[0], vals[0], true, fnbuf);
383 | string_push_fmt(fnbuf, ", %s\n"
384 | "\tneg%c %s\n", sizes[statement.type], label_loc, label_loc);
385 | }
386 |
387 | static void shift_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf, char direction) {
388 | char *label_loc = reg_alloc(statement.label, statement.type);
389 | // first val
390 | string_push_fmt(fnbuf, "\tmov%c ", sizes[statement.type]);
391 | build_value(types[1], vals[1], true, fnbuf);
392 | string_push_fmt(fnbuf, ", %s\n", reg_as_size("%rcx", statement.type));
393 | // second val
394 | string_push_fmt(fnbuf, "\tmov%c ", sizes[statement.type]);
395 | build_value(types[0], vals[0], true, fnbuf);
396 | string_push_fmt(fnbuf, ", %s\n", reg_as_size("%rdi", statement.type));
397 | // shift
398 | string_push_fmt(fnbuf, "\tsh%c%c %%cl, %s\n"
399 | "\tmov %s, %s\n",
400 | direction, sizes[statement.type], reg_as_size("%rdi", statement.type),
401 | reg_as_size("%rdi", statement.type), label_loc);
402 | }
403 |
404 | static void shl_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
405 | shift_build(vals, types, statement, fnbuf, 'l');
406 | }
407 |
408 | static void shr_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
409 | shift_build(vals, types, statement, fnbuf, 'r');
410 | }
411 |
412 | static void store_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
413 | char *reg = label_to_reg(0, (char*) vals[1], false);
414 | if (reg[0] == '%') {
415 | string_push_fmt(fnbuf, "\tmov%c ", sizes[statement.type]);
416 | build_value(types[0], vals[0], true, fnbuf);
417 | string_push_fmt(fnbuf, ", %s\n", reg_as_size("%rdi", statement.type));
418 | string_push_fmt(fnbuf, "\tmov%c %s", sizes[statement.type], reg_as_size("%rdi", statement.type));
419 | string_push_fmt(fnbuf, ", (%s) // addr of %s\n", reg, (char*) vals[1]);
420 | } else {
421 | string_push_fmt(fnbuf, "\tmovq %s, %%rdi\n", reg);
422 | string_push_fmt(fnbuf, "\tmov%c ", sizes[statement.type]);
423 | build_value(types[0], vals[0], true, fnbuf);
424 | string_push_fmt(fnbuf, ", %s\n", reg_as_size("%rsi", statement.type));
425 | string_push_fmt(fnbuf, "\tmov%c %s, (%%rdi)\n", sizes[statement.type], reg_as_size("%rsi", statement.type));
426 | }
427 | }
428 |
429 | static void load_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
430 | char *label_loc = reg_alloc(statement.label, statement.type);
431 | char *addr = label_to_reg(0, (char*) vals[0], false);
432 | bool use_brackets = addr[0] == '%';
433 | if (use_brackets) {
434 | // is a register that stores the address
435 | string_push_fmt(fnbuf, "\tmovq (%s), %%rdi\n", addr);
436 | string_push_fmt(fnbuf, "\tmov%c %s, %s\n", sizes[statement.type], reg_as_size("%rdi", statement.type), label_loc);
437 | } else {
438 | // address is on the stack
439 | string_push_fmt(fnbuf, "\tmovq %s, %%rdi\n", addr);
440 | string_push_fmt(fnbuf, "\tmovq (%rdi), %%rdi\n");
441 | string_push_fmt(fnbuf, "\tmov%c %s, %s\n", sizes[statement.type], reg_as_size("%rdi", statement.type), label_loc);
442 | }
443 | }
444 |
445 | static void blit_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
446 | string_push(fnbuf, "\tmovq ");
447 | build_value(types[1], vals[1], true, fnbuf);
448 | string_push(fnbuf, ", %rdi\n");
449 | string_push(fnbuf, "\tmovq ");
450 | build_value(types[0], vals[0], true, fnbuf);
451 | string_push(fnbuf, ", %rsi\n");
452 | string_push(fnbuf, "\tmovq ");
453 | build_value(types[2], vals[2], true, fnbuf);
454 | string_push(fnbuf, ", %rcx\n");
455 | string_push(fnbuf, "\trep movsb\n");
456 | }
457 |
458 | static void alloc_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
459 | if (types[0] != Number) {
460 | printf("ALLOC's argument must be a number literal.\n");
461 | exit(1);
462 | }
463 | char *label_loc = reg_alloc(statement.label, statement.type);
464 | regalloc.bytes_rip_pad += vals[0];
465 | string_push_fmt(fnbuf, "\tlea -%llu(%rbp), %s\n"
466 | "\tmov %s, %s\n",
467 | regalloc.bytes_rip_pad, reg_as_size("%rdi", statement.type), reg_as_size("%rdi", statement.type), label_loc);
468 | }
469 |
470 | static void comparison_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf, char *instr) {
471 | char *label_loc = reg_alloc_noresize(statement.label, statement.type);
472 | string_push_fmt(fnbuf, "\tmov ");
473 | build_value(types[1], vals[1], true, fnbuf);
474 | string_push_fmt(fnbuf, ", %s\n"
475 | "\tcmp%c %s, ", reg_as_size("%rdi", statement.type), sizes[statement.type], reg_as_size("%rdi", statement.type));
476 | build_value(types[0], vals[0], true, fnbuf);
477 | string_push_fmt(fnbuf, "\n");
478 | if (label_loc[0] == '%') { // label in reg
479 | char *sized_label = reg_as_size(label_loc, Bits8);
480 | string_push_fmt(fnbuf, "\t%s %s\n", instr, sized_label);
481 | string_push_fmt(fnbuf, "\tmovzb%c %s, %s\n", sizes[statement.type], sized_label, reg_as_size(label_loc, statement.type));
482 | } else { // on stack
483 | string_push_fmt(fnbuf, "\t%s %%al\n", instr);
484 | string_push_fmt(fnbuf, "\tmovzb%c %%al, %%%s\n", sizes[statement.type], rax_versions[statement.type]);
485 | string_push_fmt(fnbuf, "\tmov%c %%%s, %s\n", sizes[statement.type], rax_versions[statement.type], label_loc);
486 | }
487 | }
488 |
489 | static void eq_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
490 | comparison_build(vals, types, statement, fnbuf, "sete");
491 | }
492 |
493 | static void ne_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
494 | comparison_build(vals, types, statement, fnbuf, "setne");
495 | }
496 |
497 | static void sge_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
498 | comparison_build(vals, types, statement, fnbuf, "setge");
499 | }
500 |
501 | static void sgt_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
502 | comparison_build(vals, types, statement, fnbuf, "setg");
503 | }
504 |
505 | static void sle_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
506 | comparison_build(vals, types, statement, fnbuf, "setle");
507 | }
508 |
509 | static void slt_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
510 | comparison_build(vals, types, statement, fnbuf, "setl");
511 | }
512 |
513 | static void uge_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
514 | comparison_build(vals, types, statement, fnbuf, "setae");
515 | }
516 |
517 | static void ugt_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
518 | comparison_build(vals, types, statement, fnbuf, "seta");
519 | }
520 |
521 | static void ule_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
522 | comparison_build(vals, types, statement, fnbuf, "setbe");
523 | }
524 |
525 | static void ult_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
526 | comparison_build(vals, types, statement, fnbuf, "setb");
527 | }
528 |
529 | static void blklbl_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
530 | if (types[0] != Str) {
531 | printf("Expected label to have value RawStr, got something else instead.\n");
532 | exit(1);
533 | }
534 | string_push_fmt(fnbuf, ".%s_%s:\n", regalloc.current_fn->name, (char*) vals[0]);
535 | /* Now it needs to do Phi stuff:
536 | * - Go through the rest of the statements in this function and find a Phi instruction with this label
537 | * - Once it finds one:
538 | * - If this block label is the first one specified in the phi instruction, allocate the register
539 | * - Set the label's value to the value it should be for this branch, as specified by phi
540 | */
541 | for (size_t s = 0; s < regalloc.current_fn->num_statements; s++) {
542 | Statement phi = regalloc.current_fn->statements[s];
543 | if (phi.instruction != PHI) continue;
544 | bool is_first = !strcmp(((PhiVal*) phi.vals[0])->blklbl_name, (char*) vals[0]);
545 | bool is_second = !strcmp(((PhiVal*) phi.vals[1])->blklbl_name, (char*) vals[0]);
546 | if (is_first || is_second) {
547 | char *label_loc;
548 | string_push_fmt(fnbuf, "\tmov%c ", sizes[phi.type]);
549 | if (is_first) {
550 | label_loc = reg_alloc(phi.label, phi.type);
551 | build_value(((PhiVal*) phi.vals[0])->type, ((PhiVal*) phi.vals[0])->val, true, fnbuf);
552 | } else {
553 | label_loc = label_to_reg(0, phi.label, false);
554 | build_value(((PhiVal*) phi.vals[1])->type, ((PhiVal*) phi.vals[1])->val, true, fnbuf);
555 | }
556 | string_push_fmt(fnbuf, ", %s\n", label_loc);
557 | }
558 | }
559 | }
560 |
561 | // second val dictates whether or not it's a signed operation (signed if true).
562 | static void ext_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
563 | char *label_loc = reg_alloc_noresize(statement.label, statement.type);
564 | string_push_fmt(fnbuf, "\t%s ", (types[0] == Label) ? "movsx" : "mov");
565 | build_value(types[0], vals[0], true, fnbuf);
566 | string_push_fmt(fnbuf, ", %s\n", reg_as_size("%rdx", statement.type));
567 | string_push_fmt(fnbuf, "\tmov%c %s, %s\n", sizes[statement.type], reg_as_size("%rdx", statement.type), label_loc);
568 | }
569 |
570 | static void hlt_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
571 | string_push(fnbuf, "\tjmp .\n");
572 | }
573 |
574 | static void phi_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
575 | /* Phi doesn't actually do anything in the instruction itself in generated assembly.
576 | * All of the generated assembly to do with the phi instruction is done within block label
577 | * compilation. */
578 | }
579 |
580 | static void vastart_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
581 | if (types[0] != Label) {
582 | printf("vastart expects argument to be a label, got something else instead.\n");
583 | exit(1);
584 | }
585 | char *addr = label_to_reg(0, (char*) vals[0], false);
586 | string_push_fmt(fnbuf, "\tmovw $0, (%s)\n", addr); // Set current vararg index (off = 0)
587 | string_push_fmt(fnbuf, "\tmovq %%rbp, %%rax\n"
588 | "\taddq $8, %%rax\n"
589 | "\tmovq %%rax, 2(%s)\n", addr); // set address of arguments start
590 | }
591 |
592 | static void vaarg_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
593 | if (types[0] != Label) {
594 | printf("vastart expects argument to be a label, got something else instead.\n");
595 | exit(1);
596 | }
597 | char *addr = label_to_reg(0, (char*) vals[0], false);
598 | // get current index
599 | string_push_fmt(fnbuf, "\txor %%rax, %%rax\n");
600 | if (addr[0] == '%')
601 | string_push_fmt(fnbuf, "\tmovw (%s), %%ax\n", addr);
602 | else {
603 | string_push_fmt(fnbuf, "\tmov (%s), %%rax\n", addr);
604 | string_push_fmt(fnbuf, "\tmovw (%%rax), %%ax\n");
605 | }
606 | string_push_fmt(fnbuf, "\tmov $8, %%rsi\n"
607 | "\tmulq %%rsi\n"
608 | "\tmov %s, %%rcx\n"
609 | "\tadd $2, %%rcx\n"
610 | "\tmovq (%%rcx), %%rcx\n"
611 | "\taddq %%rcx, %%rax\n" // offset of value is now in rax
612 | "\taddw $1, (%s)\n"
613 | "\tmov (%%rax), %%rdi\n" // increase current index
614 | "\tmov %%rdi, %s\n", // increase current index
615 | addr, addr, reg_alloc_noresize(statement.label, statement.type), addr);
616 | }
617 |
618 | static void loc_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
619 | if (types[0] != Number || types[1] != Number || types[2] != Number) {
620 | printf("All arguments of .loc instruction must be an integer literal.\n");
621 | exit(1);
622 | }
623 | string_push_fmt(fnbuf, "\t.loc %zu %zu %zu\n", vals[0], vals[1], vals[2]);
624 | }
625 |
626 | static void pushpop_inputs(InlineAsm *info, char *op, String *fnbuf) {
627 | for (size_t i = 0; i < vec_size(info->inputs_vec); i++) {
628 | // check if the register is used to know if it needs to be pushed
629 | for (size_t reg = 0; reg < sizeof(reg_alloc_tab) / sizeof(reg_alloc_tab[0]); reg++) {
630 | InlineAsmIO input = (*info->inputs_vec)[i];
631 | if (!strcmp(input.reg, (char*) reg_alloc_tab[reg][0]) && reg_alloc_tab[reg][1]) { // second value is a non-zero value, aka it still has
632 | string_push_fmt(fnbuf, "\t%s %s\n", op, input.reg);
633 | break;
634 | }
635 | }
636 | }
637 | }
638 |
639 | static void pushpop_clobbers_and_inputs(InlineAsm *info, int is_push, String *fnbuf) {
640 | char *op = (is_push) ? "push" : "pop";
641 | // I don't love this solution the most but it should work fine
642 | if (is_push) {
643 | pushpop_inputs(info, op, fnbuf);
644 | for (size_t clobber = 0; clobber < vec_size(info->clobbers_vec); clobber++)
645 | string_push_fmt(fnbuf, "\t%s %s\n", op, (*info->clobbers_vec)[clobber]);
646 | } else {
647 | for (size_t clobber = 0; clobber < vec_size(info->clobbers_vec); clobber++)
648 | string_push_fmt(fnbuf, "\t%s %s\n", op, (*info->clobbers_vec)[clobber]);
649 | pushpop_inputs(info, op, fnbuf);
650 | }
651 | }
652 |
653 | static void asm_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) {
654 | assert(types[0] == InlineAssembly && "first type of inline assembly instruction must be an inline assembly value type");
655 | InlineAsm *info = (InlineAsm*) vals[0];
656 | // save registers for later
657 | pushpop_clobbers_and_inputs(info, 1, fnbuf);
658 | // move the input labels specified into the correct registers
659 | for (size_t input = 0; input < vec_size(info->inputs_vec); input++) {
660 | string_push_fmt(fnbuf, "\tmov ");
661 | if ((*info->inputs_vec)[input].type == Label) {
662 | size_t stack_offset = vec_size(info->inputs_vec) + vec_size(info->clobbers_vec);
663 | string_push_fmt(fnbuf, "%s", label_to_reg(stack_offset, (*info->inputs_vec)[input].label, false));
664 | } else {
665 | build_value_noresize((*info->inputs_vec)[input].type, (uint64_t) (*info->inputs_vec)[input].label, true, fnbuf);
666 | }
667 | string_push_fmt(fnbuf, ", %s\n", (*info->inputs_vec)[input].reg);
668 | }
669 | // copy the assembly
670 | string_push_fmt(fnbuf, "\t%s\n", info->assembly);
671 | // restore clobbers and saved registers that were used for inputs
672 | pushpop_clobbers_and_inputs(info, 0, fnbuf);
673 | // now move the output registers into the labels associated
674 | for (size_t out = 0; out < vec_size(info->outputs_vec); out++) {
675 | string_push_fmt(fnbuf, "\tmov %s, %s\n", (*info->outputs_vec)[out].reg, reg_alloc((*info->outputs_vec)[out].label, Bits64));
676 | }
677 | }
678 |
679 | void (*instructions_x86_64[])(uint64_t[2], ValType[2], Statement, String*) = {
680 | add_build, sub_build, div_build, mul_build,
681 | copy_build, ret_build, call_build, jz_build, neg_build,
682 | udiv_build, rem_build, urem_build, and_build, or_build, xor_build,
683 | shl_build, shr_build, store_build, load_build, blit_build, alloc_build,
684 | eq_build, ne_build, sle_build, slt_build, sge_build, sgt_build, ule_build, ult_build,
685 | uge_build, ugt_build, ext_build, hlt_build, blklbl_build, jmp_build, jnz_build, phi_build, vastart_build,
686 | vaarg_build, loc_build, asm_build,
687 | };
688 |
--------------------------------------------------------------------------------
/src/target/x86_64/register.c:
--------------------------------------------------------------------------------
1 | /* Register allocator for UYB project.
2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | /* all the scratch registers:
12 | * {reg_name, num_refs, reg_size}
13 | * num_refs is the number of references to the label corresponding to that register
14 | * *after* the current instruction. */
15 | intptr_t reg_alloc_tab[5][3] = {
16 | {(uintptr_t) "%rbx", 0, 0},
17 | {(uintptr_t) "%r12", 0, 0},
18 | {(uintptr_t) "%r13", 0, 0},
19 | {(uintptr_t) "%r14", 0, 0},
20 | {(uintptr_t) "%r15", 0, 0},
21 | };
22 |
23 | // Left side is register, middle is assigned label, right is number of instances of that label
24 | char *label_reg_tab[5][3] = {
25 | {"%rbx", 0, 0},
26 | {"%r12", 0, 0},
27 | {"%r13", 0, 0},
28 | {"%r14", 0, 0},
29 | {"%r15", 0, 0},
30 | };
31 |
32 | RegAlloc regalloc;
33 |
34 | bool check_label_in_args(char *label) {
35 | for (size_t i = 0; i < regalloc.current_fn->num_args; i++) {
36 | if (!strcmp(label, regalloc.current_fn->args[i].label)) return true;
37 | }
38 | return false;
39 | }
40 |
41 | char *reg_as_size_inner(char *reg, Type size) {
42 | reg++;
43 | if (reg[0] == 'r' && /* is digit: */ (reg[1] >= '0' && reg[1] <= '9')) {
44 | String *str = string_from(reg);
45 | if (size == Bits8 ) string_push(str, "b");
46 | else if (size == Bits16) string_push(str, "w");
47 | else if (size == Bits32) string_push(str, "d");
48 | return str->data;
49 | }
50 | if (size == Bits8) {
51 | if (!strcmp(reg, "rsi")) return "sil";
52 | else if (!strcmp(reg, "rdi")) return "dil";
53 | char *buf = aalloc(4);
54 | memcpy(buf, ®[1], 3);
55 | buf[1] = 'l';
56 | return buf;
57 | } else if (size == Bits16) {
58 | return ®[1];
59 | } else if (size == Bits32) {
60 | char *buf = aalloc(4);
61 | memcpy(buf, ®[0], 4);
62 | buf[0] = 'e';
63 | return buf;
64 | } else return reg;
65 | }
66 |
67 | Type size_from_reg(char *reg) {
68 | reg++;
69 | char last = reg[strlen(reg) - 1];
70 | if (reg[0] == 'r' && /* is digit: */ (reg[1] >= '0' && reg[1] <= '9')) {
71 | if (last == 'b') return Bits8;
72 | else if (last == 'w') return Bits16;
73 | else if (last == 'd') return Bits32;
74 | else return Bits64;
75 | }
76 | if (reg[0] == 'e') return Bits32;
77 | if (reg[0] == 'r') return Bits64;
78 | if (last == 'i' || last == 'x') return Bits16;
79 | return Bits8;
80 | }
81 |
82 | char *reg_as_size(char *reg, Type size) {
83 | if (reg[0] != '%') return reg;
84 | char *buf = aalloc(5);
85 | buf[0] = '%';
86 | strcpy(&buf[1], reg_as_size_inner(reg, size));
87 | return buf;
88 | }
89 |
90 | void reg_init_fn(Function func) {
91 | regalloc.bytes_rip_pad = 0;
92 | for (size_t i = 0; i < sizeof(reg_alloc_tab) / sizeof(reg_alloc_tab[0]); i++)
93 | reg_alloc_tab[i][1] = 0;
94 | regalloc.current_fn = (Function*) aalloc(sizeof(Function));
95 | *regalloc.current_fn = func;
96 | regalloc.labels_as_offsets = vec_new(sizeof(size_t) * 3);
97 | regalloc.used_regs_vec = vec_new(sizeof(char*));
98 | regalloc.statement_idx = 0;
99 | }
100 |
101 | char *reg_alloc_noresize(char *label, Type reg_size) {
102 | for (size_t l = 0; l < sizeof(label_reg_tab) / sizeof(label_reg_tab[0]); l++) {
103 | if (!label_reg_tab[l][1] || strcmp(label_reg_tab[l][1], label)) continue;
104 | size_t new_label_sz = strlen(label) + 5;
105 | char *new_label = (char*) aalloc(new_label_sz);
106 | label_reg_tab[l][2]++;
107 | snprintf(new_label, new_label_sz, "%s.%zu", label, (size_t) label_reg_tab[l][2]);
108 | label = new_label;
109 | }
110 | for (size_t i = 0; i < sizeof(reg_alloc_tab) / sizeof(reg_alloc_tab[0]); i++) {
111 | if (reg_alloc_tab[i][1]) continue;
112 | for (size_t s = regalloc.statement_idx; s < regalloc.current_fn->num_statements; s++) {
113 | if (regalloc.current_fn->statements[s].instruction == JMP || regalloc.current_fn->statements[s].instruction == JNZ) {
114 | reg_alloc_tab[i][1] = -1;
115 | break;
116 | }
117 | if (regalloc.current_fn->statements[s].val_types[1] == FunctionArgs) {
118 | for (size_t arg = 0; arg < ((FunctionArgList*) regalloc.current_fn->statements[s].vals[1])->num_args; arg++) {
119 | if (((FunctionArgList*) regalloc.current_fn->statements[s].vals[1])->arg_types[arg] != Number &&
120 | strcmp(label, ((FunctionArgList*) regalloc.current_fn->statements[s].vals[1])->args[arg])) continue;
121 | reg_alloc_tab[i][1] += 2;
122 | }
123 | }
124 | if (regalloc.current_fn->statements[s].instruction == ASM) {
125 | InlineAsm *info = (InlineAsm*) regalloc.current_fn->statements[s].vals[0];
126 | for (size_t in = 0; in < vec_size(info->inputs_vec); in++) {
127 | if (strcmp((*info->inputs_vec)[in].label, label)) continue;
128 | reg_alloc_tab[i][1]++;
129 | }
130 | }
131 | if ((regalloc.current_fn->statements[s].val_types[0] == Label && !strcmp((char*) regalloc.current_fn->statements[s].vals[0], label)) ||
132 | (regalloc.current_fn->statements[s].val_types[1] == Label && !strcmp((char*) regalloc.current_fn->statements[s].vals[1], label)) ||
133 | (regalloc.current_fn->statements[s].val_types[2] == Label && !strcmp((char*) regalloc.current_fn->statements[s].vals[2], label))) {
134 | reg_alloc_tab[i][1]++;
135 | }
136 | }
137 | if (check_label_in_args(label) && reg_alloc_tab[i][1]) reg_alloc_tab[i][1]++;
138 | label_reg_tab[i][1] = aalloc(strlen(label) + 1);
139 | strcpy(label_reg_tab[i][1], label);
140 | size_t used_sz = vec_size(regalloc.used_regs_vec);
141 | bool do_push = true;
142 | for (size_t y = 0; y < used_sz; y++) {
143 | if (strcmp((*regalloc.used_regs_vec)[y], (char*) reg_alloc_tab[i][0])) continue;
144 | }
145 | if (reg_alloc_tab[i][1]) {
146 | if (do_push)
147 | vec_push(regalloc.used_regs_vec, (char*) reg_alloc_tab[i][0]);
148 | regalloc.bytes_rip_pad += 8;
149 | }
150 | reg_alloc_tab[i][2] = reg_size;
151 | return (char*) reg_alloc_tab[i][0];
152 | }
153 | regalloc.bytes_rip_pad += 8;
154 | char *fmt = "-%llu(%%rbp)";
155 | size_t buf_sz = strlen("-(%rbp)") + 5;
156 | char *buf = (char*) aalloc(buf_sz + 1);
157 | snprintf(buf, buf_sz, fmt, regalloc.bytes_rip_pad);
158 | size_t *new_vec_val = aalloc(sizeof(size_t) * 3);
159 | new_vec_val[0] = (size_t) label;
160 | new_vec_val[1] = regalloc.bytes_rip_pad;
161 | new_vec_val[2] = reg_size;
162 | vec_push(regalloc.labels_as_offsets, new_vec_val);
163 | return buf;
164 | }
165 |
166 | char *reg_alloc(char *label, Type reg_size) {
167 | char *reg = reg_alloc_noresize(label, reg_size);
168 | if (reg[0] == '%')
169 | return reg_as_size((char*) reg, reg_size);
170 | else
171 | return reg;
172 | }
173 |
174 | char *label_to_reg_noresize(size_t offset, char *label, bool allow_noexist) {
175 | for (size_t i = 0; i < sizeof(label_reg_tab) / sizeof(label_reg_tab[1]); i++) {
176 | if (!label_reg_tab[i][1] || strcmp(label_reg_tab[i][1], label)) continue;
177 | if (reg_alloc_tab[i][1])
178 | reg_alloc_tab[i][1]--;
179 | if (!reg_alloc_tab[i][1])
180 | label_reg_tab[i][1] = 0;
181 | return label_reg_tab[i][0];
182 | }
183 | size_t label_offset_list_len = vec_size(regalloc.labels_as_offsets);
184 | for (size_t l = 0; l < label_offset_list_len; l++) {
185 | if (strcmp((char*) (*regalloc.labels_as_offsets)[l][0], label)) continue;
186 | char *fmt = "-%llu(%%rbp)";
187 | size_t buf_sz = strlen("-(%rbp)") + 5;
188 | char *buf = (char*) aalloc(buf_sz + 1);
189 | snprintf(buf, buf_sz, fmt, (*regalloc.labels_as_offsets)[l][1] + offset);
190 | return buf;
191 | }
192 | if (allow_noexist) return NULL;
193 | printf("Tried to use non-defined label: %s\n", label);
194 | exit(1);
195 | }
196 |
197 | Type get_reg_size(char *reg, char *expected_label) {
198 | for (size_t i = 0; i < sizeof(reg_alloc_tab) / sizeof(reg_alloc_tab[0]); i++) {
199 | if (strcmp(reg, (char*) reg_alloc_tab[i][0])) continue;
200 | return reg_alloc_tab[i][2];
201 | }
202 | size_t len = vec_size(regalloc.labels_as_offsets);
203 | for (size_t i = 0; i < len; i++) {
204 | if (strcmp(expected_label, (char*) (*regalloc.labels_as_offsets)[i][0])) continue;
205 | return (Type) (*regalloc.labels_as_offsets)[i][2];
206 | }
207 | printf("Invalid register in get_reg_size: %s\n", reg);
208 | exit(1);
209 | }
210 |
211 | // I think this is kinda slow
212 | char *label_to_reg(size_t offset, char *label, bool allow_noexist) {
213 | char *reg = label_to_reg_noresize(0, label, allow_noexist);
214 | if (!reg && allow_noexist) return NULL;
215 | for (size_t i = 0; i < sizeof(reg_alloc_tab) / sizeof(reg_alloc_tab[0]); i++) {
216 | if (strcmp(reg, (char*) reg_alloc_tab[i][0])) continue;
217 | if (!reg_alloc_tab[i][1] && allow_noexist) return NULL;
218 | return reg_as_size(reg, (Type) reg_alloc_tab[i][2]);
219 | }
220 | return reg;
221 | }
222 |
--------------------------------------------------------------------------------
/src/utils.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | char size_as_char(Type type) {
7 | return ((char[]){'b', 'h', 'w', 'l'})[type];
8 | if (type == Bits8) return 'b';
9 | else if (type == Bits16) return 'h';
10 | else if (type == Bits32) return 'w';
11 | else return 'l';
12 | }
13 |
14 | /* returns 1 or 0 depending on if it was found. if it was found it stores the result in val_buf unless
15 | * val_buf is null */
16 | int find_sizet_in_copyvals(CopyVal **copyvals, char *label, size_t *val_buf) {
17 | for (size_t i = 0; i < vec_size(copyvals); i++) {
18 | if (!strcmp((*copyvals)[i].label, label)) {
19 | if (val_buf)
20 | *val_buf = (*copyvals)[i].val;
21 | return 1;
22 | }
23 | }
24 | return 0;
25 | }
26 |
27 | int find_copyval(CopyVal **copyvals, char *label, CopyVal *val_buf) {
28 | for (size_t i = 0; i < vec_size(copyvals); i++) {
29 | if (!strcmp((*copyvals)[i].label, label)) {
30 | if (val_buf)
31 | *val_buf = (*copyvals)[i];
32 | return 1;
33 | }
34 | }
35 | return 0;
36 | }
37 |
38 | char *get_full_char_str(bool is_struct, Type type, char *type_struct) {
39 | char *rettype;
40 | if (is_struct) {
41 | rettype = (char*) aalloc(strlen(type_struct) + 2);
42 | sprintf(rettype, ":%s", type_struct);
43 | } else {
44 | rettype = (char*) aalloc(2);
45 | rettype[0] = size_as_char(type);
46 | rettype[1] = 0;
47 | }
48 | return rettype;
49 | }
50 |
51 | // Returns a pointer to an aggregate type from an array of aggregate types
52 | AggregateType *find_aggtype(char *name, AggregateType *aggtypes, size_t num_aggtypes) {
53 | for (size_t i = 0; i < num_aggtypes; i++) {
54 | if (!strcmp(name, aggtypes[i].name)) return &aggtypes[i];
55 | }
56 | printf("Tried to use undefined aggregate type.\n");
57 | exit(1);
58 | }
59 |
60 | /* Caller is expected to free return value.
61 | * Reads a full line of stdin. */
62 | char *read_full_stdin() {
63 | size_t pos = 0, size = 1025, nread;
64 | char *buf0 = malloc(size);
65 | char *buf = buf0;
66 | for (;;) {
67 | if (buf == NULL) {
68 | fprintf(stderr, "Not enough memory for %zu bytes in read_full_stdin()\n", size);
69 | free(buf0);
70 | return NULL;
71 | }
72 | nread = fread(buf + pos, 1, size - pos - 1, stdin);
73 | if (nread == 0) break;
74 | pos += nread;
75 | if (size - pos < size / 2)
76 | size += size / 2 + size / 8;
77 | buf = realloc(buf0 = buf, size);
78 | }
79 | buf[pos] = '\0';
80 | return buf;
81 | }
82 |
--------------------------------------------------------------------------------
/src/vector.c:
--------------------------------------------------------------------------------
1 | /* Part of vector implementationf for UYB compiler backend project, see ../include/vector.h for the
2 | * rest of the code and an explanation on how to use the full thing.
3 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under the MPL2.0 license, see /LICENSE for more information. */
4 | #include
5 |
6 | void *vec_new(size_t data_size) {
7 | Vec *vec = (Vec*) malloc(sizeof(Vec));
8 | *vec = (Vec) {
9 | .len = 0,
10 | .capacity = 1,
11 | .data_size = data_size,
12 | .data = (uint8_t*) malloc(data_size),
13 | };
14 | return &vec->data;
15 | }
16 |
17 | size_t vec_size(void *vec_data) {
18 | Vec *vec = (Vec*) ((uint64_t) vec_data - (sizeof(Vec) - sizeof(void*)));
19 | return vec->len;
20 | }
21 |
22 | int vec_contains(void *vec_data, size_t val) {
23 | Vec *vec = (Vec*) ((uint64_t) vec_data - (sizeof(Vec) - sizeof(void*)));
24 | for (size_t i = 0; i < vec->len; i++) {
25 | size_t masked_vec = (*((size_t**) vec_data))[i] & ((1ULL << (8 * vec->data_size)) - 1);
26 | size_t masked_val = val & ((1ULL << (8 * vec->data_size)) - 1);
27 | if (masked_vec == masked_val) return 1;
28 | }
29 | return 0;
30 | }
31 |
--------------------------------------------------------------------------------
/test.ssa:
--------------------------------------------------------------------------------
1 | # This is a simple program that I use for testing UYB.
2 | # Programs in this file often may not work. See the `examples` directory for tested and working
3 | # files.
4 |
5 | data $msg = {b "Hello, world!", b 10, b 0}
6 | export function w $main(l %argc, l %argv) {
7 | @start
8 | call $printf(l $msg)
9 | ret 0
10 | }
11 |
--------------------------------------------------------------------------------