├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ └── c-cpp.yml ├── .gitignore ├── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── blackbox.png ├── examples ├── rule110.ssa ├── sum.ssa └── vararg.ssa ├── include ├── api.h ├── arena.h ├── lexer.h ├── optimisation.h ├── parser.h ├── strslice.h ├── target │ └── x86_64 │ │ └── register.h ├── utils.h └── vector.h ├── out. ├── src ├── lexer.c ├── main.c ├── optimise │ ├── copyelim.c │ ├── folding.c │ ├── optimisation.c │ └── unused_label_elim.c ├── parser.c ├── strslice.c ├── target │ ├── IR │ │ ├── build.c │ │ └── instructions.c │ └── x86_64 │ │ ├── build.c │ │ ├── instructions.c │ │ └── register.c ├── utils.c └── vector.c └── test.ssa /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem 25 | 26 | **Device** 27 | Specify the device you're using, including the CPU architecture, operating system, and perhaps the compiler you used when building UYB. 28 | 29 | **Additional context** 30 | Add any other context about the problem here. 31 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. Specify if you'd like to be assigned to work on this. 21 | -------------------------------------------------------------------------------- /.github/workflows/c-cpp.yml: -------------------------------------------------------------------------------- 1 | name: C/C++ CI 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: make 17 | run: make 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | /uyb 3 | /out 4 | /out.S 5 | /build 6 | /include/version.h 7 | /out.ssa 8 | /test* 9 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(UYB) 3 | find_program(CLANG NAMES clang) 4 | if(CLANG) 5 | set(CMAKE_C_COMPILER ${CLANG}) 6 | endif() 7 | add_compile_options(-Wall -Werror -g) 8 | include_directories(include) 9 | file(GLOB_RECURSE SRC_FILES "src/*.c") 10 | add_executable(uyb ${SRC_FILES}) 11 | 12 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to UYB 2 | Contributions to UYB are welcome! While this is primarily a one person project, that doesn't by any means limit it to *only* one person. However, there are some ground rules that are important to set: 3 | - Keep commit messages short. 4 | - Pull requests should explain what change was made, how it improves the project, and if the change was complex enough, it should link to documentation on the topic or explain how it works in a short overview. 5 | - Don't majorly restructure the project. 6 | - Open an [issue](https://github.com/UnmappedStack/UYB/issues/new) before starting work on a pull request and ask to be assigned to work on it. Similar rules apply here to writing a pull request description. 7 | - Most tiny contributions, such as changing a single character in a documentation file, which are clearly just for the sake of being added to the contributor list, will be ignored. 8 | 9 | Now let's write a pretty nifty compiler backend! 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at https://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VERSION := $(shell git rev-parse --short HEAD) 2 | .PHONY: all build install 3 | 4 | all: build install 5 | 6 | build: 7 | @echo "[CMake] Setting up configuration files..." 8 | @echo "#define COMMIT \"$(VERSION)\"" > include/version.h 9 | mkdir -p build 10 | cd build; cmake .. 11 | @echo "[CMake] Building..." 12 | cmake --build build 13 | 14 | install: 15 | @echo "[Here] Creating symbolic link in /usr/bin (password may be required)..." 16 | @if [ ! -e "/usr/bin/uyb" ]; then sudo ln -s $(realpath build/uyb) /usr/bin/uyb; fi 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

The UYB Compiler Backend

3 | 4 | UYB is a from-scratch optimising compiler backend written in C, designed to be small and have fast compilation, while still being complete enough to be used for an actual compiler. 5 | 6 | [![License badge](https://img.shields.io/badge/license-MPL_2.0-red?style=flat-square&label=license)](https://github.com/UnmappedStack/UYB/blob/main/LICENSE) 7 | ![Repo size badge](https://img.shields.io/github/repo-size/UnmappedStack/UYB?style=flat-square) 8 | ![Commit activity badge](https://img.shields.io/github/commit-activity/t/UnmappedStack/UYB?style=flat-square) 9 | 10 |
Black box icon 11 |
12 | 13 | UYB is based heavily on QBE IR syntax and is almost fully instruction set compatible. The goal is self hosting through [cproc](https://github.com/michaelforney/cproc), which is a C compiler which targets QBE's IR. 14 | 15 | There's a Discord server for UYB which you can join for help setting up your language with UYB, helping to contribute, or just having a chat, which you can join [here](https://discord.gg/W5uYqPAJg5). 16 | 17 | ## Why not just the original QBE? 18 | I myself absolutely love QBE, and am a huge fan of the "80% of the performance in 10% of the code," however there are a few things that UYB improves upon (or may also just be a different use case rather than being better): 19 | - **QBE doesn't support inline assembly.** In most cases, this is fine, but when working on a very low level language where you simply need to interact with the CPU's instructions directly, a lack of inline assembly support can be unfortunate, and it's a massive Quality of Life feature to not need to put every single piece of assembly in a seperate file. 20 | - **There's still room for QBE to be smaller.** UYB accepts slower runtime speeds of generated assembly, and uses less optimisations in return for a smaller amount of code and faster compilation - the goal with UYB is more like 60% of the speed in 5% of the code. 21 | - **Debug symbols support.** Unfortunately, QBE doesn't support debug symbols, which means debugging generated programs with GDB is near impossible to do effectively. 22 | 23 | ## Support 24 | UYB supports every QBE instruction except for floating point instructions. UYB also supports: 25 | 26 | ### Optimisations 27 | - Folding 28 | - Copy elimination 29 | - Unused label removal 30 | 31 | ### Targets 32 | - x86_64 generic System-V 33 | - SSA IR 34 | 35 | ## Usage 36 | Since UYB is supposed to be based on QBE's IR, you can see [QBE's documentation](https://c9x.me/compile/doc/il.html) for a full IR reference. 37 | 38 | There are more examples for UYB programs in `/examples`, or try run this small "Hello World" program to test UYB like so: 39 | - Copy this code into a file named `test.ssa` or something similar: 40 | ``` 41 | data $msg = {b "Hello, world!", b 10, b 0} 42 | export function w $main(l %argc, l %argv) { 43 | @start 44 | call $printf(l $msg) 45 | ret 0 46 | } 47 | ``` 48 | - Compile the IR to x86_64 Assembly using the following command: 49 | ```sh 50 | $ uyb test.ssa -o out.S 51 | ``` 52 | - Use a standard toolchain to assemble and link the generated Assembly to an executable program, then run it: 53 | ```sh 54 | $ gcc out.S -o out 55 | $ ./out 56 | Hello, world! 57 | ``` 58 | 59 | **To use debug symbols**, you can use GAS-AT&T like syntax. To use the previous example program as an example: 60 | ``` 61 | # Define the source file that this SSA was generated from. 62 | # The first argument is the index ID of this file (so if you have more files then they need to each 63 | # have a different ID) and the second argument is the filename. 64 | .file 1 "test.c" 65 | 66 | data $msg = {b "Hello, world!", b 10, b 0} 67 | export function w $main(l %argc, l %argv) { 68 | @start 69 | # The .loc pseudoinstruction specifies where in the file the following instructions are built from. 70 | # The first argument is the index of the file it came from (same as the ID for relevant .file), 71 | # and the next two arguments are the row and column, respectively. 72 | .loc 1 3 0 73 | call $printf(l $msg) 74 | .loc 1 4 0 75 | ret 0 76 | } 77 | ``` 78 | 79 | **To use inline assembly**, you can use the following syntax. Note that inline assembly is not supported in the IR self-targetting target. 80 | ``` 81 | asm("" : %inputValue | "", %inputValue2 | "" : %outputValue | "", %outputValue2 | "" : "", "") 82 | ``` 83 | The types of inputs are split with colons (`:`): 84 | 85 | - The first input type is the raw assembly. It cannot contain any new lines within the source IL, however it may contain escape sequences such as `\t` and `\n`. 86 | - The second input type is the input list, split by commas. Each entry is in the format of `%label | "%rax"`, where the label contains the input value to pass in and `%rax` is replaced with the register that the input should be passed to in. The label and the register must both be 64 bits. 87 | - The third input type is the output list, which follows the same format as the input list. 88 | - The fourth and final input type is the clobber list. This is a list of string literals containing register names split by commas, in the form of `"%rax", "%rbx"`. These shouldn't contain input or output registers, but they *can*. These are the registers that are used by the inline assembly, so that UYB knows to be careful with them since they may be messed up. They must be 64 bit general purpose registers. 89 | 90 | Note that checking of most inline assembly is left to the assembler and linker for the sake of lightweightedness, which means that programs containing inline assembly cannot be confirmed to work while they are still not assembled or linked. 91 | 92 | You can use `uyb --help` to see all the command line options for UYB. 93 | 94 | ## Building 95 | To clone and build UYB, simply run: 96 | ```sh 97 | git clone https://github.com/UnmappedStack/UYB 98 | cd UYB 99 | make 100 | ``` 101 | This will also install a symlink in your bin directory so that you can call UYB from anywhere. CMake is required. 102 | 103 | ## Thanks 104 | UYB uses [Tsoding's arena allocator](https://github.com/tsoding/arena) for quick allocations. 105 | 106 | ## License 107 | This project is under the Mozilla Public License 2.0, with the exclusion of `include/arena.h` which is from [this repo](https://github.com/tsoding/arena) and has a separate license. See `LICENSE` for more information. 108 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | UYB is currently still in a beta phase and thus cannot guarantee that everything is secure. Use at your own risk, and avoid using for production situations. 4 | 5 | To report a vulnerability, create an [issue](https://github.com/UnmappedStack/UYB/issues/new) with the bug report template. 6 | -------------------------------------------------------------------------------- /blackbox.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnmappedStack/UYB/fd288c4c4695d5682e1df006863ed68d44b2102e/blackbox.png -------------------------------------------------------------------------------- /examples/rule110.ssa: -------------------------------------------------------------------------------- 1 | # Generated by lewc compiler, a compiler written by Dcraftbg: https://github.com/Dcraftbg 2 | # Target: Linux x86_64 3 | # main :: () -> i32 4 | export function w $main () { 5 | @start 6 | %board =l alloc16 120 7 | %.s1 =l copy %board 8 | %.s2 =w copy 0 9 | %.s3 =w copy 30 10 | %.s4 =w copy 4 11 | %.s5 =w mul %.s3, %.s4 12 | call $memset(l %.s1, w %.s2, w %.s5) 13 | %i =l alloc4 4 14 | %.s6 =w copy 0 15 | storew %.s6, %i 16 | %j =l alloc4 4 17 | %.s7 =w copy 0 18 | storew %.s7, %j 19 | %.s8 =l copy %board 20 | %.s9 =w copy 30 21 | %.s10 =w copy 2 22 | %.s11 =w sub %.s9, %.s10 23 | %.s12 =l extsw %.s11 24 | %.s13 =l mul %.s12, 4 25 | %.s14 =l add %.s8, %.s13 26 | %.s15 =w copy 1 27 | storew %.s15, %.s14 28 | @while_cond_16 29 | %.s17 =w loadsw %i 30 | %.s18 =w copy 30 31 | %.s19 =w copy 2 32 | %.s20 =w sub %.s18, %.s19 33 | %.s21 =w csltw %.s17, %.s20 34 | jnz %.s21, @while_body_16, @while_end_16 35 | @while_body_16 36 | %.s22 =l copy %j 37 | %.s23 =w copy 0 38 | storew %.s23, %.s22 39 | @while_cond_24 40 | %.s25 =w loadsw %j 41 | %.s26 =w copy 30 42 | %.s27 =w csltw %.s25, %.s26 43 | jnz %.s27, @while_body_24, @while_end_24 44 | @while_body_24 45 | %.s28 =l copy $.g0 46 | %.s29 =l copy %board 47 | %.s30 =w loadsw %j 48 | %.s31 =l extsw %.s30 49 | %.s32 =l mul %.s31, 4 50 | %.s33 =l add %.s29, %.s32 51 | %.s34 =w loadsw %.s33 52 | %.s35 =l extsw %.s34 53 | %.s37 =l add %.s28, %.s35 54 | %.s38 =w loadub %.s37 55 | call $putchar(w %.s38) 56 | %.s39 =l copy %j 57 | %.s40 =w loadsw %j 58 | %.s41 =w copy 1 59 | %.s42 =w add %.s40, %.s41 60 | storew %.s42, %.s39 61 | jmp @while_cond_24 62 | @while_end_24 63 | %.s43 =w copy 10 64 | call $putchar(w %.s43) 65 | %pattern =l alloc4 4 66 | %.s44 =l copy %board 67 | %.s45 =w copy 0 68 | %.s46 =l extsw %.s45 69 | %.s47 =l mul %.s46, 4 70 | %.s48 =l add %.s44, %.s47 71 | %.s49 =w loadsw %.s48 72 | %.s50 =w copy 1 73 | %.s51 =w shl %.s49, %.s50 74 | %.s52 =l copy %board 75 | %.s53 =w copy 1 76 | %.s54 =l extsw %.s53 77 | %.s55 =l mul %.s54, 4 78 | %.s56 =l add %.s52, %.s55 79 | %.s57 =w loadsw %.s56 80 | %.s58 =w or %.s51, %.s57 81 | storew %.s58, %pattern 82 | %.s59 =l copy %j 83 | %.s60 =w copy 1 84 | storew %.s60, %.s59 85 | @while_cond_61 86 | %.s62 =w loadsw %j 87 | %.s63 =w copy 30 88 | %.s64 =w copy 1 89 | %.s65 =w sub %.s63, %.s64 90 | %.s66 =w csltw %.s62, %.s65 91 | jnz %.s66, @while_body_61, @while_end_61 92 | @while_body_61 93 | %.s67 =l copy %pattern 94 | %.s68 =w loadsw %pattern 95 | %.s69 =w copy 1 96 | %.s70 =w shl %.s68, %.s69 97 | %.s71 =w copy 7 98 | %.s72 =w and %.s70, %.s71 99 | %.s73 =l copy %board 100 | %.s74 =w loadsw %j 101 | %.s75 =w copy 1 102 | %.s76 =w add %.s74, %.s75 103 | %.s77 =l extsw %.s76 104 | %.s78 =l mul %.s77, 4 105 | %.s79 =l add %.s73, %.s78 106 | %.s80 =w loadsw %.s79 107 | %.s81 =w or %.s72, %.s80 108 | storew %.s81, %.s67 109 | %.s82 =l copy %board 110 | %.s83 =w loadsw %j 111 | %.s84 =l extsw %.s83 112 | %.s85 =l mul %.s84, 4 113 | %.s86 =l add %.s82, %.s85 114 | %.s87 =w copy 110 115 | %.s88 =w loadsw %pattern 116 | %.s89 =w shr %.s87, %.s88 117 | %.s90 =w copy 1 118 | %.s91 =w and %.s89, %.s90 119 | storew %.s91, %.s86 120 | %.s92 =l copy %j 121 | %.s93 =w loadsw %j 122 | %.s94 =w copy 1 123 | %.s95 =w add %.s93, %.s94 124 | storew %.s95, %.s92 125 | jmp @while_cond_61 126 | @while_end_61 127 | %.s96 =l copy %i 128 | %.s97 =w loadsw %i 129 | %.s98 =w copy 1 130 | %.s99 =w add %.s97, %.s98 131 | storew %.s99, %.s96 132 | jmp @while_cond_16 133 | @while_end_16 134 | %.s100 =w copy 0 135 | ret %.s100 136 | } 137 | # extern putchar :: (c: u8) 138 | # extern memset :: (data: *i32, c: i32, n: i32) 139 | data $.g0 = {b 32, b 42, b 0 } 140 | -------------------------------------------------------------------------------- /examples/sum.ssa: -------------------------------------------------------------------------------- 1 | # This example was taken directly from QBE's website: https://c9x.me/compile/ 2 | 3 | function w $add(w %a, w %b) { # Define a function add 4 | @start 5 | %c =w add %a, %b # Adds the 2 arguments 6 | ret %c # Return the result 7 | } 8 | export function w $main() { # Main function 9 | @start 10 | %r =w call $add(w 1, w 1) # Call add(1, 1) 11 | call $printf(l $fmt, ..., w %r) # Show the result 12 | ret 0 13 | } 14 | data $fmt = { b "One and one make %d!\n", b 0 } 15 | -------------------------------------------------------------------------------- /examples/vararg.ssa: -------------------------------------------------------------------------------- 1 | function w $varargtest(...) { 2 | @start 3 | %ap =l alloc8 32 4 | vastart %ap 5 | %val1 =l vaarg %ap 6 | %val2 =l vaarg %ap 7 | %val3 =l vaarg %ap 8 | %val4 =l vaarg %ap 9 | %val5 =l vaarg %ap 10 | %val6 =l vaarg %ap 11 | call $printf(l $fmt, l %val1, l %val2, l %val3, l %val4, l %val5, l %val6) 12 | ret 0 13 | } 14 | 15 | export function w $main() { 16 | @start 17 | call $varargtest(w 1, w 2, w 3, w 4, w 5, w 6) 18 | ret 0 19 | } 20 | 21 | data $fmt = { b "The numbers recieved from varargs are %zu, %zu, %zu, %zu, %zu, %zu.\n", b 0 } 22 | -------------------------------------------------------------------------------- /include/api.h: -------------------------------------------------------------------------------- 1 | /* This is the "library"/API file which is what is used to interact with the actual backend through 2 | * a non-textual representation. 3 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */ 4 | #pragma once 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | typedef enum { 12 | ADD, 13 | SUB, 14 | DIV, 15 | MUL, 16 | COPY, 17 | RET, 18 | CALL, 19 | JZ, 20 | NEG, 21 | UDIV, 22 | REM, 23 | UREM, 24 | AND, 25 | OR, 26 | XOR, 27 | SHL, 28 | SHR, 29 | STORE, 30 | LOAD, 31 | BLIT, 32 | ALLOC, 33 | EQ, 34 | NE, 35 | SLE, // less than or equal (signed) 36 | SLT, // less than (signed) 37 | SGE, // higher than or equal (signed) 38 | SGT, // higher than (signed) 39 | ULE, // less than or equal (unsigned) 40 | ULT, // less than (unsigned) 41 | UGE, // higher than or equal (unsigned) 42 | UGT, // higher than (unsigned) 43 | EXT, 44 | HLT, 45 | BLKLBL, 46 | JMP, 47 | JNZ, 48 | PHI, 49 | VASTART, 50 | VAARG, 51 | LOC, 52 | ASM, 53 | } Instruction; 54 | 55 | typedef enum { 56 | Bits8, 57 | Bits16, 58 | Bits32, 59 | Bits64, 60 | None, 61 | } Type; 62 | 63 | typedef enum { 64 | Label, 65 | Number, 66 | Str, 67 | StrLit, 68 | FunctionArgs, 69 | BlkLbl, 70 | PhiArg, 71 | InlineAssembly, 72 | Empty, 73 | } ValType; 74 | 75 | typedef struct { 76 | char **args; 77 | Type *arg_sizes; 78 | char **arg_struct_types; 79 | bool *args_are_structs; 80 | ValType *arg_types; 81 | size_t num_args; 82 | } FunctionArgList; 83 | 84 | typedef struct { 85 | char *section; // NULL if in data section 86 | char *name; 87 | ValType *types; // Can only be StrLit or number. Anything else should panic. 88 | Type *sizes; 89 | size_t *vals; 90 | size_t num_vals; 91 | size_t alignment; // default is 1 92 | } Global; 93 | 94 | typedef struct { 95 | char *label; // to store result in (NULL if none (only if it's a function or something)) 96 | Instruction instruction; 97 | Type type; 98 | uint64_t vals[3]; 99 | ValType val_types[3]; 100 | } Statement; 101 | 102 | typedef struct { 103 | bool type_is_struct; 104 | union { 105 | Type type; 106 | char *type_struct; 107 | }; 108 | char *label; 109 | } FunctionArgument; 110 | 111 | typedef struct { 112 | bool is_global; 113 | char *name; 114 | FunctionArgument *args; 115 | size_t num_args; 116 | bool ret_is_struct; 117 | union { 118 | Type return_type; 119 | char *return_struct; 120 | }; 121 | Statement *statements; 122 | size_t num_statements; 123 | bool is_variadic; 124 | } Function; 125 | 126 | typedef struct { 127 | char *name; 128 | size_t alignment; // default is size of largest value 129 | size_t size_bytes; 130 | } AggregateType; 131 | 132 | typedef struct { 133 | char *blklbl_name; 134 | size_t val; 135 | ValType type; 136 | } PhiVal; 137 | 138 | typedef struct { 139 | char *fname; 140 | size_t id; 141 | } FileDbg; 142 | 143 | typedef struct { 144 | char *reg; 145 | char *label; 146 | ValType type; 147 | } InlineAsmIO; 148 | 149 | typedef struct { 150 | char *assembly; 151 | InlineAsmIO **inputs_vec; 152 | InlineAsmIO **outputs_vec; 153 | char* **clobbers_vec; 154 | } InlineAsm; 155 | 156 | // for each target 157 | void build_program_x86_64(Function *IR, size_t num_functions, Global *global_vars, size_t num_global_vars, AggregateType *aggtypes, size_t num_aggtypes, FileDbg *dbgfiles, size_t num_dbgfiles, FILE *outf); 158 | void build_program_IR(Function *IR, size_t num_functions, Global *global_vars, size_t num_global_vars, AggregateType *aggtypes, size_t num_aggtypes, FileDbg *dbgfiles, size_t num_dbgfiles, FILE *outf); 159 | 160 | extern void (*instructions_x86_64[41])(uint64_t[2], ValType[2], Statement, String*); 161 | extern void (*instructions_IR[])(uint64_t[2], ValType[2], Statement, FILE*); 162 | char *instruction_as_str(Instruction instr); 163 | char *type_as_str(Type type, char *struct_type, bool is_struct); 164 | void disasm_instr(String *fnbuf, Statement statement); 165 | -------------------------------------------------------------------------------- /include/arena.h: -------------------------------------------------------------------------------- 1 | /* This file is not by me, but rather from the header-only C arena allocator library 2 | * which can be found here: https://github.com/tsoding/arena, with some modifications for simple macros to 3 | * make it easier to integrate with the rest of the project. 4 | * 5 | * This is not under the same license as the rest of UYB. */ 6 | 7 | // Copyright 2022 Alexey Kutepov 8 | 9 | // Permission is hereby granted, free of charge, to any person obtaining 10 | // a copy of this software and associated documentation files (the 11 | // "Software"), to deal in the Software without restriction, including 12 | // without limitation the rights to use, copy, modify, merge, publish, 13 | // distribute, sublicense, and/or sell copies of the Software, and to 14 | // permit persons to whom the Software is furnished to do so, subject to 15 | // the following conditions: 16 | 17 | // The above copyright notice and this permission notice shall be 18 | // included in all copies or substantial portions of the Software. 19 | 20 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 22 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 23 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 24 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 25 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 26 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 27 | #pragma once 28 | 29 | #ifndef ARENA_H_ 30 | #define ARENA_H_ 31 | 32 | #define aalloc(bytes) arena_alloc(&arena, bytes) 33 | #define delete_arenas() arena_free(&arena) 34 | 35 | #include 36 | #include 37 | 38 | #ifndef ARENA_NOSTDIO 39 | #include 40 | #include 41 | #endif // ARENA_NOSTDIO 42 | 43 | #ifndef ARENA_ASSERT 44 | #include 45 | #define ARENA_ASSERT assert 46 | #endif 47 | 48 | #define ARENA_BACKEND_LIBC_MALLOC 0 49 | #define ARENA_BACKEND_LINUX_MMAP 1 50 | #define ARENA_BACKEND_WIN32_VIRTUALALLOC 2 51 | #define ARENA_BACKEND_WASM_HEAPBASE 3 52 | 53 | #ifndef ARENA_BACKEND 54 | #define ARENA_BACKEND ARENA_BACKEND_LIBC_MALLOC 55 | #endif // ARENA_BACKEND 56 | 57 | typedef struct Region Region; 58 | 59 | struct Region { 60 | Region *next; 61 | size_t count; 62 | size_t capacity; 63 | uintptr_t data[]; 64 | }; 65 | 66 | typedef struct { 67 | Region *begin, *end; 68 | } Arena; 69 | 70 | extern Arena arena; // defined in src/main.c 71 | 72 | typedef struct { 73 | Region *region; 74 | size_t count; 75 | } Arena_Mark; 76 | 77 | #ifndef ARENA_REGION_DEFAULT_CAPACITY 78 | #define ARENA_REGION_DEFAULT_CAPACITY (8*1024) 79 | #endif // ARENA_REGION_DEFAULT_CAPACITY 80 | 81 | Region *new_region(size_t capacity); 82 | void free_region(Region *r); 83 | 84 | void *arena_alloc(Arena *a, size_t size_bytes); 85 | void *arena_realloc(Arena *a, void *oldptr, size_t oldsz, size_t newsz); 86 | char *arena_strdup(Arena *a, const char *cstr); 87 | void *arena_memdup(Arena *a, void *data, size_t size); 88 | #ifndef ARENA_NOSTDIO 89 | char *arena_sprintf(Arena *a, const char *format, ...); 90 | #endif // ARENA_NOSTDIO 91 | 92 | Arena_Mark arena_snapshot(Arena *a); 93 | void arena_reset(Arena *a); 94 | void arena_rewind(Arena *a, Arena_Mark m); 95 | void arena_free(Arena *a); 96 | void arena_trim(Arena *a); 97 | 98 | #ifndef ARENA_DA_INIT_CAP 99 | #define ARENA_DA_INIT_CAP 256 100 | #endif // ARENA_DA_INIT_CAP 101 | 102 | #ifdef __cplusplus 103 | #define cast_ptr(ptr) (decltype(ptr)) 104 | #else 105 | #define cast_ptr(...) 106 | #endif 107 | 108 | #define arena_da_append(a, da, item) \ 109 | do { \ 110 | if ((da)->count >= (da)->capacity) { \ 111 | size_t new_capacity = (da)->capacity == 0 ? ARENA_DA_INIT_CAP : (da)->capacity*2; \ 112 | (da)->items = cast_ptr((da)->items)arena_realloc( \ 113 | (a), (da)->items, \ 114 | (da)->capacity*sizeof(*(da)->items), \ 115 | new_capacity*sizeof(*(da)->items)); \ 116 | (da)->capacity = new_capacity; \ 117 | } \ 118 | \ 119 | (da)->items[(da)->count++] = (item); \ 120 | } while (0) 121 | 122 | // Append several items to a dynamic array 123 | #define arena_da_append_many(a, da, new_items, new_items_count) \ 124 | do { \ 125 | if ((da)->count + (new_items_count) > (da)->capacity) { \ 126 | size_t new_capacity = (da)->capacity; \ 127 | if (new_capacity == 0) new_capacity = ARENA_DA_INIT_CAP; \ 128 | while ((da)->count + (new_items_count) > new_capacity) new_capacity *= 2; \ 129 | (da)->items = cast_ptr((da)->items)arena_realloc( \ 130 | (a), (da)->items, \ 131 | (da)->capacity*sizeof(*(da)->items), \ 132 | new_capacity*sizeof(*(da)->items)); \ 133 | (da)->capacity = new_capacity; \ 134 | } \ 135 | arena_memcpy((da)->items + (da)->count, (new_items), (new_items_count)*sizeof(*(da)->items)); \ 136 | (da)->count += (new_items_count); \ 137 | } while (0) 138 | 139 | // Append a sized buffer to a string builder 140 | #define arena_sb_append_buf arena_da_append_many 141 | 142 | // Append a NULL-terminated string to a string builder 143 | #define arena_sb_append_cstr(a, sb, cstr) \ 144 | do { \ 145 | const char *s = (cstr); \ 146 | size_t n = arena_strlen(s); \ 147 | arena_da_append_many(a, sb, s, n); \ 148 | } while (0) 149 | 150 | // Append a single NULL character at the end of a string builder. So then you can 151 | // use it a NULL-terminated C string 152 | #define arena_sb_append_null(a, sb) arena_da_append(a, sb, 0) 153 | 154 | #endif // ARENA_H_ 155 | 156 | #ifdef ARENA_IMPLEMENTATION 157 | 158 | #if ARENA_BACKEND == ARENA_BACKEND_LIBC_MALLOC 159 | #include 160 | 161 | // TODO: instead of accepting specific capacity new_region() should accept the size of the object we want to fit into the region 162 | // It should be up to new_region() to decide the actual capacity to allocate 163 | Region *new_region(size_t capacity) 164 | { 165 | size_t size_bytes = sizeof(Region) + sizeof(uintptr_t)*capacity; 166 | // TODO: it would be nice if we could guarantee that the regions are allocated by ARENA_BACKEND_LIBC_MALLOC are page aligned 167 | Region *r = (Region*)malloc(size_bytes); 168 | ARENA_ASSERT(r); // TODO: since ARENA_ASSERT is disableable go through all the places where we use it to check for failed memory allocation and return with NULL there. 169 | r->next = NULL; 170 | r->count = 0; 171 | r->capacity = capacity; 172 | return r; 173 | } 174 | 175 | void free_region(Region *r) 176 | { 177 | free(r); 178 | } 179 | #elif ARENA_BACKEND == ARENA_BACKEND_LINUX_MMAP 180 | #include 181 | #include 182 | 183 | Region *new_region(size_t capacity) 184 | { 185 | size_t size_bytes = sizeof(Region) + sizeof(uintptr_t) * capacity; 186 | Region *r = mmap(NULL, size_bytes, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 187 | ARENA_ASSERT(r != MAP_FAILED); 188 | r->next = NULL; 189 | r->count = 0; 190 | r->capacity = capacity; 191 | return r; 192 | } 193 | 194 | void free_region(Region *r) 195 | { 196 | size_t size_bytes = sizeof(Region) + sizeof(uintptr_t) * r->capacity; 197 | int ret = munmap(r, size_bytes); 198 | ARENA_ASSERT(ret == 0); 199 | } 200 | 201 | #elif ARENA_BACKEND == ARENA_BACKEND_WIN32_VIRTUALALLOC 202 | 203 | #if !defined(_WIN32) 204 | # error "Current platform is not Windows" 205 | #endif 206 | 207 | #define WIN32_LEAN_AND_MEAN 208 | #include 209 | 210 | #define INV_HANDLE(x) (((x) == NULL) || ((x) == INVALID_HANDLE_VALUE)) 211 | 212 | Region *new_region(size_t capacity) 213 | { 214 | SIZE_T size_bytes = sizeof(Region) + sizeof(uintptr_t) * capacity; 215 | Region *r = VirtualAllocEx( 216 | GetCurrentProcess(), /* Allocate in current process address space */ 217 | NULL, /* Unknown position */ 218 | size_bytes, /* Bytes to allocate */ 219 | MEM_COMMIT | MEM_RESERVE, /* Reserve and commit allocated page */ 220 | PAGE_READWRITE /* Permissions ( Read/Write )*/ 221 | ); 222 | if (INV_HANDLE(r)) 223 | ARENA_ASSERT(0 && "VirtualAllocEx() failed."); 224 | 225 | r->next = NULL; 226 | r->count = 0; 227 | r->capacity = capacity; 228 | return r; 229 | } 230 | 231 | void free_region(Region *r) 232 | { 233 | if (INV_HANDLE(r)) 234 | return; 235 | 236 | BOOL free_result = VirtualFreeEx( 237 | GetCurrentProcess(), /* Deallocate from current process address space */ 238 | (LPVOID)r, /* Address to deallocate */ 239 | 0, /* Bytes to deallocate ( Unknown, deallocate entire page ) */ 240 | MEM_RELEASE /* Release the page ( And implicitly decommit it ) */ 241 | ); 242 | 243 | if (FALSE == free_result) 244 | ARENA_ASSERT(0 && "VirtualFreeEx() failed."); 245 | } 246 | 247 | #elif ARENA_BACKEND == ARENA_BACKEND_WASM_HEAPBASE 248 | 249 | // Stolen from https://surma.dev/things/c-to-webassembly/ 250 | 251 | extern unsigned char __heap_base; 252 | // Since ARENA_BACKEND_WASM_HEAPBASE entirely hijacks __heap_base it is expected that no other means of memory 253 | // allocation are used except the arenas. 254 | unsigned char* bump_pointer = &__heap_base; 255 | // TODO: provide a way to deallocate all the arenas at once by setting bump_pointer back to &__heap_base? 256 | 257 | // __builtin_wasm_memory_size and __builtin_wasm_memory_grow are defined in units of page sizes 258 | #define ARENA_WASM_PAGE_SIZE (64*1024) 259 | 260 | Region *new_region(size_t capacity) 261 | { 262 | size_t size_bytes = sizeof(Region) + sizeof(uintptr_t)*capacity; 263 | Region *r = (void*)bump_pointer; 264 | 265 | // grow memory brk() style 266 | size_t current_memory_size = ARENA_WASM_PAGE_SIZE * __builtin_wasm_memory_size(0); 267 | size_t desired_memory_size = (size_t) bump_pointer; 268 | if (desired_memory_size > current_memory_size) { 269 | size_t delta_bytes = desired_memory_size - current_memory_size; 270 | size_t delta_pages = (delta_bytes + (ARENA_WASM_PAGE_SIZE - 1))/ARENA_WASM_PAGE_SIZE; 271 | if (__builtin_wasm_memory_grow(0, delta_pages) < 0) { 272 | ARENA_ASSERT(0 && "memory.grow failed"); 273 | return NULL; 274 | } 275 | } 276 | 277 | bump_pointer += size_bytes; 278 | 279 | r->next = NULL; 280 | r->count = 0; 281 | r->capacity = capacity; 282 | return r; 283 | } 284 | 285 | void free_region(Region *r) 286 | { 287 | // Since ARENA_BACKEND_WASM_HEAPBASE uses a primitive bump allocator to 288 | // allocate the regions, free_region() does nothing. It is generally 289 | // not recommended to free arenas anyway since it is better to keep 290 | // reusing already allocated memory with arena_reset(). 291 | (void) r; 292 | } 293 | 294 | #else 295 | # error "Unknown Arena backend" 296 | #endif 297 | 298 | // TODO: add debug statistic collection mode for arena 299 | // Should collect things like: 300 | // - How many times new_region was called 301 | // - How many times existing region was skipped 302 | // - How many times allocation exceeded ARENA_REGION_DEFAULT_CAPACITY 303 | 304 | void *arena_alloc(Arena *a, size_t size_bytes) 305 | { 306 | size_t size = (size_bytes + sizeof(uintptr_t) - 1)/sizeof(uintptr_t); 307 | 308 | if (a->end == NULL) { 309 | ARENA_ASSERT(a->begin == NULL); 310 | size_t capacity = ARENA_REGION_DEFAULT_CAPACITY; 311 | if (capacity < size) capacity = size; 312 | a->end = new_region(capacity); 313 | a->begin = a->end; 314 | } 315 | 316 | while (a->end->count + size > a->end->capacity && a->end->next != NULL) { 317 | a->end = a->end->next; 318 | } 319 | 320 | if (a->end->count + size > a->end->capacity) { 321 | ARENA_ASSERT(a->end->next == NULL); 322 | size_t capacity = ARENA_REGION_DEFAULT_CAPACITY; 323 | if (capacity < size) capacity = size; 324 | a->end->next = new_region(capacity); 325 | a->end = a->end->next; 326 | } 327 | 328 | void *result = &a->end->data[a->end->count]; 329 | a->end->count += size; 330 | return result; 331 | } 332 | 333 | void *arena_realloc(Arena *a, void *oldptr, size_t oldsz, size_t newsz) 334 | { 335 | if (newsz <= oldsz) return oldptr; 336 | void *newptr = arena_alloc(a, newsz); 337 | char *newptr_char = (char*)newptr; 338 | char *oldptr_char = (char*)oldptr; 339 | for (size_t i = 0; i < oldsz; ++i) { 340 | newptr_char[i] = oldptr_char[i]; 341 | } 342 | return newptr; 343 | } 344 | 345 | size_t arena_strlen(const char *s) 346 | { 347 | size_t n = 0; 348 | while (*s++) n++; 349 | return n; 350 | } 351 | 352 | void *arena_memcpy(void *dest, const void *src, size_t n) 353 | { 354 | char *d = dest; 355 | const char *s = src; 356 | for (; n; n--) *d++ = *s++; 357 | return dest; 358 | } 359 | 360 | char *arena_strdup(Arena *a, const char *cstr) 361 | { 362 | size_t n = arena_strlen(cstr); 363 | char *dup = (char*)arena_alloc(a, n + 1); 364 | arena_memcpy(dup, cstr, n); 365 | dup[n] = '\0'; 366 | return dup; 367 | } 368 | 369 | void *arena_memdup(Arena *a, void *data, size_t size) 370 | { 371 | return arena_memcpy(arena_alloc(a, size), data, size); 372 | } 373 | 374 | #ifndef ARENA_NOSTDIO 375 | char *arena_sprintf(Arena *a, const char *format, ...) 376 | { 377 | va_list args; 378 | va_start(args, format); 379 | int n = vsnprintf(NULL, 0, format, args); 380 | va_end(args); 381 | 382 | ARENA_ASSERT(n >= 0); 383 | char *result = (char*)arena_alloc(a, n + 1); 384 | va_start(args, format); 385 | vsnprintf(result, n + 1, format, args); 386 | va_end(args); 387 | 388 | return result; 389 | } 390 | #endif // ARENA_NOSTDIO 391 | 392 | Arena_Mark arena_snapshot(Arena *a) 393 | { 394 | Arena_Mark m; 395 | if(a->end == NULL){ //snapshot of uninitialized arena 396 | ARENA_ASSERT(a->begin == NULL); 397 | m.region = a->end; 398 | m.count = 0; 399 | }else{ 400 | m.region = a->end; 401 | m.count = a->end->count; 402 | } 403 | 404 | return m; 405 | } 406 | 407 | void arena_reset(Arena *a) 408 | { 409 | for (Region *r = a->begin; r != NULL; r = r->next) { 410 | r->count = 0; 411 | } 412 | 413 | a->end = a->begin; 414 | } 415 | 416 | void arena_rewind(Arena *a, Arena_Mark m) 417 | { 418 | if(m.region == NULL){ //snapshot of uninitialized arena 419 | arena_reset(a); //leave allocation 420 | return; 421 | } 422 | 423 | m.region->count = m.count; 424 | for (Region *r = m.region->next; r != NULL; r = r->next) { 425 | r->count = 0; 426 | } 427 | 428 | a->end = m.region; 429 | } 430 | 431 | void arena_free(Arena *a) 432 | { 433 | Region *r = a->begin; 434 | while (r) { 435 | Region *r0 = r; 436 | r = r->next; 437 | free_region(r0); 438 | } 439 | a->begin = NULL; 440 | a->end = NULL; 441 | } 442 | 443 | void arena_trim(Arena *a){ 444 | Region *r = a->end->next; 445 | while (r) { 446 | Region *r0 = r; 447 | r = r->next; 448 | free_region(r0); 449 | } 450 | a->end->next = NULL; 451 | } 452 | 453 | #endif // ARENA_IMPLEMENTATION 454 | -------------------------------------------------------------------------------- /include/lexer.h: -------------------------------------------------------------------------------- 1 | /* Header for ../src/lexer.h, the lexer for the textual IR for UYB. 2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */ 3 | #pragma once 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | typedef enum { 10 | TokLabel, // %labelname 11 | TokRawStr, // value 12 | TokStrLit, // "value" 13 | TokBlockLabel, 14 | TokInteger, 15 | TokAssign, // =t (with t being the type, stored in val (types defined in api.h)) 16 | TokEqu, // just =, no type 17 | TokLBrace, TokRBrace, 18 | TokLParen, TokRParen, 19 | TokColon, 20 | TokBar, 21 | TokComma, 22 | TokNewLine, 23 | TokTripleDot, 24 | TokAggType, 25 | TokFunction, TokExport, TokData, TokSection, TokAlign, TokType, TokFile, // keywords 26 | } TokenType; 27 | 28 | typedef struct { 29 | size_t line; 30 | TokenType type; 31 | uint64_t val; 32 | } Token; 33 | 34 | void lex_line(char *str, size_t line_num, Token **ret); 35 | Token **lex_file(FILE *f); 36 | char *token_to_str(TokenType ttype); 37 | Type char_to_type(char t_ch); 38 | -------------------------------------------------------------------------------- /include/optimisation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | typedef struct { 6 | char *label; 7 | size_t val; 8 | ValType type; 9 | } CopyVal; 10 | 11 | void optimise(Function *IR, size_t num_functions); 12 | 13 | /* Specific optimisations */ 14 | void opt_fold(Function *IR, size_t num_functions); 15 | void opt_copy_elim(Function *IR, size_t num_functions); 16 | void opt_unused_label_elim(Function *IR, size_t num_functions); 17 | -------------------------------------------------------------------------------- /include/parser.h: -------------------------------------------------------------------------------- 1 | /* Header for ../src/parser.c, the parser for the textual IR for UYB. 2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */ 3 | #pragma once 4 | #include 5 | 6 | Function **parse_program(Token **toks, Global ***globals_buf, AggregateType ***aggtypes_buf, FileDbg ***filesdbg_buf); 7 | -------------------------------------------------------------------------------- /include/strslice.h: -------------------------------------------------------------------------------- 1 | /* Header file for string slice implementation for UYB. 2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */ 3 | #pragma once 4 | #include 5 | 6 | typedef struct { 7 | char *data; 8 | size_t len; 9 | } String; 10 | 11 | String *string_from(char *from); 12 | void string_push(String *str, char *new); 13 | void string_push_fmt(String *str, char *fmt, ...); 14 | -------------------------------------------------------------------------------- /include/target/x86_64/register.h: -------------------------------------------------------------------------------- 1 | /* Header file for ../src/register.c, the register allocator for the UYB compiler backend. 2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */ 3 | #pragma once 4 | #include 5 | #include 6 | 7 | #define update_regalloc() regalloc.statement_idx++ 8 | 9 | static char *arg_regs[] = { 10 | "%rdi", 11 | "%rsi", 12 | "%rdx", 13 | "%rcx", 14 | "%r8", 15 | "%r9", 16 | }; 17 | 18 | typedef struct { 19 | size_t bytes_rip_pad; 20 | char* **used_regs_vec; 21 | Function *current_fn; 22 | size_t statement_idx; 23 | size_t* **labels_as_offsets; 24 | } RegAlloc; 25 | 26 | extern RegAlloc regalloc; 27 | 28 | extern char *label_reg_tab[5][3]; 29 | extern intptr_t reg_alloc_tab[5][3]; 30 | void reg_init_fn(Function func); 31 | char *reg_alloc(char *label, Type reg_size); 32 | char *label_to_reg(size_t offset, char *label, bool allow_noexist); 33 | char *reg_as_size(char *reg, Type size); 34 | Type size_from_reg(char *reg); 35 | char *label_to_reg_noresize(size_t offset, char *label, bool allow_noexist); 36 | char *reg_alloc_noresize(char *label, Type reg_size); 37 | Type get_reg_size(char *reg, char *expected_label); 38 | -------------------------------------------------------------------------------- /include/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | char size_as_char(Type type); 7 | char *get_full_char_str(bool is_struct, Type type, char *type_struct); 8 | int find_copyval(CopyVal **copyvals, char *label, CopyVal *val_buf); 9 | int find_sizet_in_copyvals(CopyVal **copyvals, char *label, size_t *val_buf); 10 | AggregateType *find_aggtype(char *name, AggregateType *aggtypes, size_t num_aggtypes); 11 | char *read_full_stdin(); 12 | -------------------------------------------------------------------------------- /include/vector.h: -------------------------------------------------------------------------------- 1 | /* Part of vector implementationf for UYB compiler backend project, see ../src/vector.c for the 2 | * rest of the code. 3 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under the MPL2.0 license, see /LICENSE for more information. */ 4 | #pragma once 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | typedef struct { 11 | size_t len; 12 | size_t capacity; 13 | size_t data_size; 14 | void *data; 15 | } __attribute__((packed)) Vec; 16 | 17 | void *vec_new(size_t data_size); 18 | size_t vec_size(void *vec_data); 19 | int vec_contains(void *vec_data, size_t val); 20 | 21 | #define vec_push(vec_data, val) \ 22 | do { \ 23 | Vec *vec_internal = (Vec*) ((uintptr_t) vec_data - (sizeof(Vec) - sizeof(void*))); \ 24 | ((typeof(val)*) vec_internal->data)[vec_internal->len] = val; \ 25 | vec_internal->len++; \ 26 | if (vec_internal->capacity == vec_internal->len) { \ 27 | vec_internal->data = realloc(vec_internal->data, (vec_internal->len + 1) * sizeof(val) * 2); \ 28 | vec_internal->capacity *= 2; \ 29 | } \ 30 | } while (0) 31 | 32 | /* Usage of this header: 33 | * - To create a new vector, use vec_new(): 34 | * data_type **vec = vec_new(sizeof(data_type)); 35 | * (replace `data_type` with the type that the vector is for, for example uint64_t) 36 | * - To append an element to a vector, use vec_push(): 37 | * vec_push(vec, new_value); 38 | * - To access elements of the vector, including writing/reading specific elements, access it like a normal array but dereference vec: 39 | * value = (*vec)[8]; 40 | * - To get the length of a vector, use vec_size(): 41 | * length_of_vector = vec_size(vec); 42 | */ 43 | -------------------------------------------------------------------------------- /out.: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnmappedStack/UYB/fd288c4c4695d5682e1df006863ed68d44b2102e/out. -------------------------------------------------------------------------------- /src/lexer.c: -------------------------------------------------------------------------------- 1 | /* Textual IR lexer for the UYB compiler backend project. 2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define valid_label_char(ch) (ch == '.' || ch == '_' || isdigit(ch) || isalpha(ch)) 11 | 12 | Type char_to_type(char t_ch) { 13 | if (t_ch == 'b') return Bits8; 14 | else if (t_ch == 'h') return Bits16; 15 | else if (t_ch == 'w') return Bits32; 16 | else if (t_ch == 'l') return Bits64; 17 | else { 18 | printf("Invalid type: %c\n", t_ch); 19 | exit(1); 20 | } 21 | } 22 | 23 | char *token_to_str(TokenType ttype) { 24 | if (ttype == TokFunction) return "TokFunction"; 25 | else if (ttype == TokExport) return "TokExport"; 26 | else if (ttype == TokNewLine) return "TokNewLine"; 27 | else if (ttype == TokLabel) return "TokLabel"; 28 | else if (ttype == TokStrLit) return "TokStrLit"; 29 | else if (ttype == TokRawStr) return "TokRawStr"; 30 | else if (ttype == TokInteger) return "TokInteger"; 31 | else if (ttype == TokLabel) return "TokLabel"; 32 | else if (ttype == TokLParen) return "TokLParen"; 33 | else if (ttype == TokRParen) return "TokRParen"; 34 | else if (ttype == TokLBrace) return "TokLBrace"; 35 | else if (ttype == TokRBrace) return "TokRBrace"; 36 | else if (ttype == TokData) return "TokData"; 37 | else if (ttype == TokSection) return "TokSection"; 38 | else if (ttype == TokBlockLabel) return "TokBlkLbl"; 39 | else if (ttype == TokTripleDot) return "TokTripleDot"; 40 | else if (ttype == TokAlign) return "TokAlign"; 41 | else if (ttype == TokAggType) return "TokAggType"; 42 | else if (ttype == TokType) return "TokType"; 43 | else if (ttype == TokComma) return "TokComma"; 44 | else if (ttype == TokColon) return "TokColon"; 45 | else if (ttype == TokBar) return "TokBar"; 46 | else return "TokInvalid"; 47 | } 48 | 49 | // `ret` argument is a buffer for a vector which all the tokens will be pushed to. 50 | void lex_line(char *str, size_t line_num, Token **ret) { 51 | size_t len = strlen(str); 52 | for (size_t i = 0; i < len; i++) { 53 | if (str[i] == '\t' || str[i] == ' ' || str[i] == '\r' || str[i] == 0) continue; 54 | else if (str[i] == '#') break; 55 | else if (str[i] == '(') vec_push(ret, ((Token) {.line=line_num,.type=TokLParen,.val=0})); 56 | else if (str[i] == ')') vec_push(ret, ((Token) {.line=line_num,.type=TokRParen,.val=0})); 57 | else if (str[i] == '{') vec_push(ret, ((Token) {.line=line_num,.type=TokLBrace,.val=0})); 58 | else if (str[i] == '}') vec_push(ret, ((Token) {.line=line_num,.type=TokRBrace,.val=0})); 59 | else if (str[i] == ',') vec_push(ret, ((Token) {.line=line_num,.type=TokComma,.val=0})); 60 | else if (str[i] == ':') vec_push(ret, ((Token) {.line=line_num,.type=TokColon,.val=0})); 61 | else if (str[i] == '|') vec_push(ret, ((Token) {.line=line_num,.type=TokBar,.val=0})); 62 | else if (!memcmp(&str[i], "...", 3)) { 63 | vec_push(ret, ((Token) {.line=line_num,.type=TokTripleDot,.val=0})); 64 | i += 2; 65 | } 66 | else if (str[i] == '=' && isalpha(str[i + 1])) { 67 | vec_push(ret, ((Token) {.line=line_num,.type=TokAssign,.val=char_to_type(str[i+1])})); 68 | i++; 69 | } else if (str[i] == '=') { 70 | vec_push(ret, ((Token) {.line=line_num,.type=TokEqu,.val=0})); 71 | } else if (isdigit(str[i]) || str[i] == '-') { 72 | size_t dig = 0; 73 | for (; isdigit(str[i + dig]) || (str[i + dig] == '-' && dig == 0); dig++); 74 | char *buf = aalloc(dig + 1); // perhaps I should move this to a fixed size buffer? 75 | memcpy(buf, &str[i], dig); 76 | buf[dig] = 0; 77 | int negative_flag = 0; 78 | if (str[i] == '-') { 79 | negative_flag = true; 80 | buf++; 81 | } 82 | uint64_t val = strtoll(buf,NULL,10); 83 | if (negative_flag) { 84 | val = -val; 85 | } 86 | vec_push(ret, ((Token) {.line=line_num,.type=TokInteger,.val=val})); 87 | i += dig - 1; 88 | } else if (str[i] == '"') { 89 | size_t dig = 0; 90 | for (; !(str[i + dig] == '"' && dig); dig++); 91 | char *buf = aalloc(dig + 1); 92 | memcpy(buf, &str[i + 1], dig); 93 | buf[dig - 1] = 0; 94 | vec_push(ret, ((Token) {.line=line_num,.type=TokStrLit,.val=(uint64_t) buf})); 95 | i += dig; 96 | } else if (str[i] == '%' || str[i] == '$' || str[i] == '@' || str[i] == ':') { 97 | i++; 98 | size_t dig = 0; 99 | for (; valid_label_char(str[i + dig]); dig++); 100 | char *buf = aalloc(dig + 2); 101 | memcpy(buf, &str[i], dig + 1); 102 | buf[dig] = 0; 103 | if (str[i - 1] == '%') 104 | vec_push(ret, ((Token) {.line=line_num,.type=TokLabel,.val=(uint64_t) buf})); 105 | else if (str[i - 1] == '$') 106 | vec_push(ret, ((Token) {.line=line_num,.type=TokRawStr,.val=(uint64_t) buf})); 107 | else if (str[i - 1] == '@') 108 | vec_push(ret, ((Token) {.line=line_num,.type=TokBlockLabel,.val=(uint64_t) buf})); 109 | else if (str[i - 1] == ':') 110 | vec_push(ret, ((Token) {.line=line_num,.type=TokAggType,.val=(uint64_t) buf})); 111 | i += dig - 1; 112 | } else if (valid_label_char(str[i])) { 113 | size_t dig = 0; 114 | for (; valid_label_char(str[i + dig]); dig++); 115 | char *buf = aalloc(dig + 1); 116 | memcpy(buf, &str[i], dig); 117 | buf[dig] = 0; 118 | if (!strcmp(buf, "function")) { 119 | vec_push(ret, ((Token) {.line=line_num,.type=TokFunction,.val=0})); 120 | } else if (!strcmp(buf, "export")) { 121 | vec_push(ret, ((Token) {.line=line_num,.type=TokExport,.val=0})); 122 | } else if (!strcmp(buf, "data")) { 123 | vec_push(ret, ((Token) {.line=line_num,.type=TokData,.val=0})); 124 | } else if (!strcmp(buf, "section")) { 125 | vec_push(ret, ((Token) {.line=line_num,.type=TokSection,.val=0})); 126 | } else if (!strcmp(buf, "align")) { 127 | vec_push(ret, ((Token) {.line=line_num,.type=TokAlign,.val=0})); 128 | } else if (!strcmp(buf, "type")) { 129 | vec_push(ret, ((Token) {.line=line_num,.type=TokType,.val=0})); 130 | } else if (!strcmp(buf, ".file")) { 131 | vec_push(ret, ((Token) {.line=line_num,.type=TokFile,.val=0})); 132 | } else { 133 | vec_push(ret, ((Token) {.line=line_num,.type=TokRawStr,.val=(uint64_t) buf})); 134 | } 135 | i += dig - 1; 136 | } else { 137 | printf("Invalid token on line %zu: %c (%u)\n", line_num, str[i], str[i]); 138 | exit(1); 139 | } 140 | } 141 | } 142 | 143 | Token **lex_file(FILE *f) { 144 | ssize_t sz; 145 | char *contents; 146 | Token **ret = vec_new(sizeof(Token)); 147 | size_t ln = 1; 148 | size_t start = 0; 149 | size_t end = 0; 150 | if (f == stdin) { 151 | contents = read_full_stdin(); 152 | sz = strlen(contents); 153 | goto end_readfile; 154 | } 155 | fseek(f, 0, SEEK_END); 156 | if ((sz = ftell(f)) < 0) { 157 | printf("Failed to get file length (ftell error).\n"); 158 | exit(1); 159 | } 160 | fseek(f, 0, SEEK_SET); 161 | contents = aalloc(sz + 1); 162 | if (!fread(contents, sz, 1, f)) { 163 | printf("Failed to read from file.\n"); 164 | exit(1); 165 | } 166 | end_readfile: 167 | for (; end <= sz; end++) { 168 | if (contents[end] == '\n') { 169 | contents[end] = 0; 170 | lex_line(&contents[start], ln, ret); 171 | vec_push(ret, ((Token) {.line=ln,.type=TokNewLine,.val=0})); 172 | start = end + 1; 173 | ln++; 174 | } 175 | } 176 | if (f == stdin) free(contents); 177 | return ret; 178 | } 179 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | /* Main file of UYB for parsing command line arguments and calling the rest of the compiler. 2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */ 3 | #define ARENA_IMPLEMENTATION 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | Arena arena; 17 | int is_position_independent = 1; 18 | 19 | typedef enum { 20 | X86_64, 21 | IR, 22 | } Target; 23 | 24 | void (*targets[])(Function*, size_t, Global*, size_t, AggregateType*, size_t, FileDbg*, size_t, FILE*) = { 25 | build_program_x86_64, 26 | build_program_IR, 27 | }; 28 | 29 | void help(char *cmd) { 30 | printf("%s [options] \n", cmd); 31 | printf("Options:\n" 32 | " --help Display this information.\n" 33 | " --version Check the version of this copy of UYB.\n" 34 | " --targets List targets supported by UYB which the IR can be compiled to.\n" 35 | " --no-pie Ensure that the generated program is not position independent.\n" 36 | " -o Specify that the resulting assembly should be outputted to .\n" 37 | " -t Specify that assembly should be generated specifically for .\n"); 38 | } 39 | 40 | void targets_help() { 41 | printf("Use `-t ` to specify a target. Supported targets:\n"); 42 | printf(" - x86_64\n" 43 | " - IR\n"); 44 | } 45 | 46 | Target str_as_target(char *cmd, char *s) { 47 | if (!strcmp(s, "x86_64")) return X86_64; 48 | else if (!strcmp(s, "IR")) return IR; 49 | else { 50 | printf("No such target: %s. To list all targets, run:\n" 51 | "%s --targets\n", s, cmd); 52 | exit(1); 53 | } 54 | } 55 | 56 | void sigsegv_handler(int sig, siginfo_t *si, void *unused) { 57 | printf(":( Something went very wrong and UYB cannot continue (segmentation fault).\n\n" 58 | "Please report an issue for the bug on the GitHub repository (https://github.com/UnmappedStack) and describe what you did that caused this.\n" 59 | "Signal: %d, address: %p\n", sig, si->si_addr); 60 | exit(1); 61 | } 62 | 63 | void setup_sigsev() { 64 | struct sigaction sa; 65 | sa.sa_flags = SA_SIGINFO; 66 | sa.sa_sigaction = sigsegv_handler; 67 | sigaction(SIGSEGV, &sa, NULL); 68 | } 69 | 70 | int main(int argc, char **argv) { 71 | setup_sigsev(); 72 | char *input_fname = NULL; 73 | char *output_fname = NULL; 74 | Target target = X86_64; 75 | for (size_t arg = 1; arg < argc; arg++) { 76 | if (argv[arg][0] != '-') { 77 | if (input_fname) { 78 | printf("More than one input file passed, not allowed.\n"); 79 | return 1; 80 | } 81 | input_fname = argv[arg]; 82 | continue; 83 | } 84 | if (argv[arg][1] == '-') argv[arg]++; 85 | if (!strcmp(argv[arg], "-o")) { 86 | if (output_fname) { 87 | printf("Output file provided more than once, not allowed.\n"); 88 | return 1; 89 | } 90 | if (arg == argc - 1) { 91 | printf("Output file was expected to be provided after -o, got end of command instead.\n"); 92 | return 1; 93 | } 94 | output_fname = argv[arg + 1]; 95 | arg++; 96 | continue; 97 | } else if (!strcmp(argv[arg], "-t")) { 98 | if (argc == argc - 1) { 99 | printf("Target was expected to be provided after -t, got end of command instead.\n"); 100 | return 1; 101 | } 102 | target = str_as_target(argv[0], argv[arg + 1]); 103 | arg++; 104 | } else if (!strcmp(argv[arg], "-targets")) { 105 | targets_help(); 106 | return 0; 107 | } else if (!strcmp(argv[arg], "-no-pie")) { 108 | is_position_independent = 0; 109 | } else if (!strcmp(argv[arg], "-version")) { 110 | printf("UYB compiler backend version beta %s.\n" 111 | "Copyright (C) 2025 UnmappedStack (Jake Steinburger) under the Mozilla Public License 2.0.\n", COMMIT); 112 | return 0; 113 | } else if (!strcmp(argv[arg], "-help")) { 114 | help(argv[0]); 115 | return 0; 116 | } else { 117 | printf("Invalid argument: %s\n", argv[arg]); 118 | help(argv[0]); 119 | } 120 | } 121 | FILE *inf = stdin; 122 | if (input_fname) { 123 | inf = fopen(input_fname, "r"); 124 | if (!inf) { 125 | printf("Failed to open %s\n", input_fname); 126 | return 1; 127 | } 128 | } 129 | Token **toks = lex_file(inf); 130 | fclose(inf); 131 | Global **globals; 132 | AggregateType **aggs; 133 | FileDbg **files_dbg; 134 | Function **functs = parse_program(toks, &globals, &aggs, &files_dbg); 135 | FILE *outf = stdout; 136 | if (output_fname) { 137 | outf = fopen(output_fname, "w"); 138 | if (!outf) { 139 | printf("Failed to open out.S\n"); 140 | exit(1); 141 | } 142 | } 143 | size_t num_functions = vec_size(functs); 144 | optimise(*functs, num_functions); 145 | // Assembly codegen 146 | targets[target](*functs, num_functions, *globals, vec_size(globals), *aggs, vec_size(aggs), *files_dbg, vec_size(files_dbg), outf); 147 | fclose(outf); 148 | delete_arenas(); 149 | return 0; 150 | } 151 | -------------------------------------------------------------------------------- /src/optimise/copyelim.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | void copy_elim_funct(Function *IR) { 8 | CopyVal val; 9 | Statement **statement_vec = vec_new(sizeof(Statement)); 10 | CopyVal **copyvals = vec_new(sizeof(Statement)); 11 | for (size_t s = 0; s < IR->num_statements; s++) { 12 | if (IR->statements[s].instruction == COPY) { 13 | vec_push(copyvals, ((CopyVal) { 14 | .label = IR->statements[s].label, 15 | .val = IR->statements[s].vals[0], 16 | .type = IR->statements[s].val_types[0], 17 | })); 18 | } else { 19 | if (IR->statements[s].instruction == CALL) { 20 | FunctionArgList *args = (FunctionArgList*) IR->statements[s].vals[1]; 21 | for (size_t a = 0; a < args->num_args; a++) { 22 | if (args->arg_types[a] != Label) continue; 23 | if (!find_copyval(copyvals, (char*) args->args[a], &val)) continue; 24 | args->args[a] = (char*) val.val; 25 | args->arg_types[a] = val.type; 26 | } 27 | goto statement_end; 28 | } else if (IR->statements[s].instruction == ASM) { 29 | InlineAsm *info = (InlineAsm*) IR->statements[s].vals[0]; 30 | for (size_t i = 0; i < vec_size(info->inputs_vec); i++) { 31 | if (!find_copyval(copyvals, (char*) (*info->inputs_vec)[i].label, &val)) continue; 32 | (*info->inputs_vec)[i].label = (char*) val.val; 33 | (*info->inputs_vec)[i].type = val.type; 34 | } 35 | } 36 | for (size_t i = 0; i < 2; i++) { 37 | if (IR->statements[s].val_types[i] != Label) continue; 38 | if (!find_copyval(copyvals, (char*) IR->statements[s].vals[i], &val)) continue; 39 | IR->statements[s].val_types[i] = val.type; 40 | IR->statements[s].vals[i] = val.val; 41 | } 42 | statement_end: 43 | vec_push(statement_vec, IR->statements[s]); 44 | } 45 | } 46 | IR->statements = *statement_vec; 47 | IR->num_statements = vec_size(statement_vec); 48 | } 49 | 50 | void opt_copy_elim(Function *IR, size_t num_functions) { 51 | for (size_t fn = 0; fn < num_functions; fn++) { 52 | copy_elim_funct(&IR[fn]); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/optimise/folding.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | size_t get_val(ValType type, size_t val, size_t label_val) { 7 | if (type == Number) return val; 8 | else if (type == Label) return label_val; 9 | else return 0; 10 | } 11 | 12 | void fold_funct(Function *fn) { 13 | CopyVal **copyvals = vec_new(sizeof(CopyVal)); 14 | for (size_t s = 0; s < fn->num_statements; s++) { 15 | ValType *valtypes = fn->statements[s].val_types; 16 | size_t *vals = fn->statements[s].vals; 17 | Instruction instr = fn->statements[s].instruction; 18 | // If it's a COPY, save the value 19 | if (instr == COPY && valtypes[0] == Number) { 20 | vec_push(copyvals, ((CopyVal) { 21 | .label = fn->statements[s].label, 22 | .val = fn->statements[s].vals[0], 23 | })); 24 | continue; 25 | } 26 | size_t in_vals[2]; 27 | if ((valtypes[0] == Str || valtypes[0] == BlkLbl || (valtypes[0] != Number && !(valtypes[0] == Label && find_sizet_in_copyvals(copyvals, (char*) vals[0], &in_vals[0]))) || 28 | valtypes[1] == Str || valtypes[1] == BlkLbl || (valtypes[1] != Number && !(valtypes[1] == Label && find_sizet_in_copyvals(copyvals, (char*) vals[1], &in_vals[1])))) && valtypes[1] != Empty) { 29 | // it can't constant fold it if the values can't be found at compile time 30 | continue; 31 | } 32 | // Now solve for the value and replace it with a COPY. 33 | size_t params[] = {get_val(valtypes[0], vals[0], in_vals[0]), get_val(valtypes[1], vals[1], in_vals[1])}; 34 | if (instr == ADD) { 35 | fn->statements[s].vals[0] = params[0] + params[1]; 36 | } else if (instr == MUL) { 37 | fn->statements[s].vals[0] = params[0] * params[1]; 38 | } else if (instr == DIV) { 39 | fn->statements[s].vals[0] = params[0] / params[1]; 40 | } else if (instr == SUB) { 41 | fn->statements[s].vals[0] = params[0] - params[1]; 42 | } else if (instr == SHL) { 43 | fn->statements[s].vals[0] = params[0] << params[1]; 44 | } else if (instr == SHR) { 45 | fn->statements[s].vals[0] = params[0] >> params[1]; 46 | } else if (instr == EQ) { 47 | fn->statements[s].vals[0] = params[0] == params[1]; 48 | } else if (instr == NE) { 49 | fn->statements[s].vals[0] = params[0] != params[1]; 50 | } else if (instr == OR) { 51 | fn->statements[s].vals[0] = params[0] | params[1]; 52 | } else if (instr == AND) { 53 | fn->statements[s].vals[0] = params[0] & params[1]; 54 | } else if (instr == XOR) { 55 | fn->statements[s].vals[0] = params[0] ^ params[1]; 56 | } else if (instr == NEG) { 57 | fn->statements[s].vals[0] = -params[0]; 58 | } else { 59 | continue; 60 | } 61 | fn->statements[s].instruction = COPY; 62 | fn->statements[s].val_types[0] = Number; 63 | fn->statements[s].val_types[1] = Empty; 64 | } 65 | } 66 | 67 | void opt_fold(Function *IR, size_t num_functions) { 68 | for (size_t fn = 0; fn < num_functions; fn++) { 69 | fold_funct(&IR[fn]); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/optimise/optimisation.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* Takes a pointer to an array of Function structures and the number of functions in the IR. 4 | * Changes the statements in the given function to be more optimised. */ 5 | void optimise(Function *IR, size_t num_functions) { 6 | /* Planned optimisations: 7 | * - Folding [DONE] 8 | * - Copy elimination [DONE] 9 | * - Unused label removal [DONE] 10 | * - Function inlining 11 | * - Loop unravelling(?) */ 12 | opt_fold(IR, num_functions); 13 | opt_copy_elim(IR, num_functions); 14 | opt_unused_label_elim(IR, num_functions); 15 | } 16 | -------------------------------------------------------------------------------- /src/optimise/unused_label_elim.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void elim_unused_labels_fn(Function *IR) { 5 | char* **used_labels = vec_new(sizeof(char*)); 6 | Statement **statement_vec = vec_new(sizeof(Statement)); 7 | for (ssize_t s = IR->num_statements - 1; s >= 0; s--) { 8 | if (IR->statements[s].label && IR->statements[s].instruction != CALL) { 9 | for (size_t i = 0; i < vec_size(used_labels); i++) { 10 | if (!vec_contains(used_labels, (size_t) IR->statements[s].label)) continue; 11 | } 12 | } 13 | if (IR->statements[s].instruction == CALL) { 14 | FunctionArgList *args = (FunctionArgList*) IR->statements[s].vals[1]; 15 | for (size_t a = 0; a < args->num_args; a++) { 16 | if (args->arg_types[a] == Label) vec_push(used_labels, args->args[a]); 17 | } 18 | } else { 19 | for (size_t i = 0; i < 3; i++) { 20 | if (IR->statements[s].val_types[i] == Label) vec_push(used_labels, IR->statements[s].vals[i]); 21 | } 22 | } 23 | vec_push(statement_vec, IR->statements[s]); 24 | } 25 | // reverse it cos the previous thing inserts statements backwards 26 | for (size_t i = 0; i < vec_size(statement_vec) / 2; i++) { 27 | Statement tmp = (*statement_vec)[i]; 28 | (*statement_vec)[i] = (*statement_vec)[vec_size(statement_vec) - 1 - i]; 29 | (*statement_vec)[vec_size(statement_vec) - 1 - i] = tmp; 30 | } 31 | IR->statements = *statement_vec; 32 | IR->num_statements = vec_size(statement_vec); 33 | } 34 | 35 | void opt_unused_label_elim(Function *IR, size_t num_functions) { 36 | for (size_t fn = 0; fn < num_functions; fn++) { 37 | elim_unused_labels_fn(&IR[fn]); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/parser.c: -------------------------------------------------------------------------------- 1 | /* Textual IR parser for the UYB compiler backend project. 2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | // WARNING: Edits the original string 13 | void str_toupper(char* str) { 14 | while (*str) { 15 | *str = toupper(*str); 16 | str++; 17 | } 18 | } 19 | 20 | size_t bytes_from_size(Type sz) { 21 | switch (sz) { 22 | case Bits8: return 1; 23 | case Bits16: return 2; 24 | case Bits32: return 4; 25 | default: return 8; 26 | } 27 | } 28 | 29 | // really messy, there's probably a cleaner way to do this. Or at least, move it into another file. 30 | Instruction parse_instruction(char *instr, size_t line, Type *type) { 31 | str_toupper(instr); 32 | if (!strcmp(instr, "ADD" )) return ADD; 33 | else if (!strcmp(instr, "SUB" )) return SUB; 34 | else if (!strcmp(instr, "DIV" )) return DIV; 35 | else if (!strcmp(instr, "MUL" )) return MUL; 36 | else if (!strcmp(instr, "COPY" )) return COPY; 37 | else if (!strcmp(instr, "RET" )) return RET; 38 | else if (!strcmp(instr, "CALL" )) return CALL; 39 | else if (!strcmp(instr, "JZ" )) return JZ; 40 | else if (!strcmp(instr, "NEG" )) return NEG; 41 | else if (!strcmp(instr, "UDIV" )) return UDIV; 42 | else if (!memcmp(instr, "STORE", 5)) { 43 | if (strlen(instr) > 5) 44 | *type = char_to_type(tolower(instr[5])); 45 | return STORE; 46 | } 47 | else if (!memcmp(instr, "LOAD", 4)) { 48 | if (strlen(instr) > 5) 49 | *type = char_to_type(tolower(instr[5])); 50 | return LOAD; 51 | } 52 | else if (!strcmp(instr, "BLIT" )) return BLIT; 53 | else if (!strcmp(instr, "ALLOC" )) return ALLOC; 54 | else if (!memcmp(instr+1, "EQ", 2 )) return EQ; 55 | else if (!memcmp(instr+1, "NE", 2 )) return NE; 56 | else if (!memcmp(instr+1, "SGE", 3)) return SGE; 57 | else if (!memcmp(instr+1, "SGT", 3)) return SGT; 58 | else if (!memcmp(instr+1, "SLE", 3)) return SLE; 59 | else if (!memcmp(instr+1, "SLT", 3)) return SLT; 60 | else if (!memcmp(instr+1, "UGE", 3)) return UGE; 61 | else if (!memcmp(instr+1, "UGT", 3)) return UGT; 62 | else if (!memcmp(instr+1, "ULE", 3)) return ULE; 63 | else if (!memcmp(instr+1, "ULT", 3)) return ULT; 64 | else if (!memcmp(instr, "EXT", 3 )) return EXT; 65 | else if (!strcmp(instr, "HLT" )) return HLT; 66 | else if (!strcmp(instr, "BLKLBL" )) return BLKLBL; 67 | else if (!strcmp(instr, "JMP" )) return JMP; 68 | else if (!strcmp(instr, "JNZ" )) return JNZ; 69 | else if (!strcmp(instr, "SHL" )) return SHL; 70 | else if (!strcmp(instr, "SHR" )) return SHR; 71 | else if (!strcmp(instr, "OR" )) return OR; 72 | else if (!strcmp(instr, "AND" )) return AND; 73 | else if (!strcmp(instr, "PHI" )) return PHI; 74 | else if (!strcmp(instr, "VASTART" )) return VASTART; 75 | else if (!strcmp(instr, "VAARG" )) return VAARG; 76 | else if (!strcmp(instr, ".LOC" )) return LOC; 77 | else if (!strcmp(instr, "ASM" )) return ASM; 78 | else { 79 | printf("Invalid instruction on line %zu: %s\n", line, instr); 80 | exit(1); 81 | } 82 | } 83 | 84 | ValType tok_as_valtype(TokenType tok, size_t line) { 85 | if (tok == TokInteger) return Number; 86 | else if (tok == TokLabel) return Label; 87 | else if (tok == TokRawStr) return Str; 88 | else if (tok == TokBlockLabel) return BlkLbl; 89 | else if (tok == TokStrLit) return StrLit; 90 | else { 91 | printf("Token can't be converted to ValType: Invalid instruction value on line %zu\n", line); 92 | exit(1); 93 | } 94 | } 95 | 96 | void parse_statement_parameters(Token *toks, size_t at, Statement *ret) { 97 | size_t num_args = 0; 98 | size_t v = 0; 99 | for (size_t i = 0; v <= 3 && toks[at + i].type != TokNewLine; i++) { 100 | if (toks[at + i].type == TokComma) { 101 | continue; 102 | } 103 | ret->vals[v] = toks[at + i].val; 104 | ret->val_types[v] = tok_as_valtype(toks[at + i].type, toks[at + i].line); 105 | num_args++; 106 | v++; 107 | } 108 | for (size_t i = num_args; i < 3; i++) { 109 | ret->val_types[i] = Empty; 110 | } 111 | } 112 | 113 | void parse_phi_parameters(Token *toks, size_t at, Statement *ret) { 114 | if (toks[at].type != TokBlockLabel || toks[at + 3].type != TokBlockLabel) { 115 | printf("Phi instruction format is not correct, expected a block label on line %zu\n", toks->line); 116 | exit(1); 117 | } 118 | if (toks[at + 2].type != TokComma) { 119 | printf("Expected comma between phi node values on line %zu\n", toks->line); 120 | exit(1); 121 | } 122 | ret->vals[0] = (size_t) aalloc(sizeof(PhiVal)); 123 | ret->vals[1] = (size_t) aalloc(sizeof(PhiVal)); 124 | *((PhiVal*) ret->vals[0]) = (PhiVal) { 125 | .blklbl_name = (char*) toks[at].val, 126 | .val = toks[at + 1].val, 127 | .type = tok_as_valtype(toks[at + 1].type, toks[at + 1].line), 128 | }; 129 | *((PhiVal*) ret->vals[1]) = (PhiVal) { 130 | .blklbl_name = (char*) toks[at + 3].val, 131 | .val = toks[at + 4].val, 132 | .type = tok_as_valtype(toks[at + 4].type, toks[at + 4].line), 133 | }; 134 | ret->val_types[0] = PhiArg; 135 | ret->val_types[1] = PhiArg; 136 | ret->val_types[2] = Empty; 137 | } 138 | 139 | // returns number of tokens to skip 140 | size_t parse_asm_io(Token *toks, size_t at, InlineAsmIO ***io_vec_buf) { 141 | size_t at_start = at; 142 | at++; 143 | *io_vec_buf = vec_new(sizeof(InlineAsmIO)); 144 | while (toks[at].type == TokLabel) { 145 | if (toks[at + 1].type != TokBar) { 146 | printf("Expected vertical bar (|) after label in I/O list for inline assembly on line %zu.\n", toks[at + 1].line); 147 | exit(1); 148 | } 149 | if (toks[at + 2].type != TokStrLit) { 150 | printf("Expected string literal referring to register in I/O list for inline assembly on line %zu.\n", toks[at + 2].line); 151 | exit(1); 152 | } 153 | vec_push(*io_vec_buf, ((InlineAsmIO) { 154 | .reg = (char*) toks[at + 2].val, 155 | .label = (char*) toks[at].val, 156 | .type = tok_as_valtype(toks[at].type, toks[at].line), 157 | })); 158 | at += 3; 159 | if (toks[at].type == TokComma) at++; 160 | } 161 | return at - at_start; 162 | } 163 | 164 | void parse_asm_clobbers(Token *toks, size_t at, char** **clobbers_buf_vec) { 165 | *clobbers_buf_vec = vec_new(sizeof(char*)); 166 | at++; 167 | while (toks[at].type != TokRParen) { 168 | if (toks[at].type == TokComma) at++; 169 | else if (toks[at].type == TokStrLit) 170 | vec_push(*clobbers_buf_vec, (char*) toks[at++].val); 171 | else { 172 | printf("Invalid token in inline assembly clobber list, expected string literal or comma on line %zu.\n", toks[at].line); 173 | exit(1); 174 | } 175 | } 176 | } 177 | 178 | void parse_asm_parameters(Token *toks, size_t at, Statement *ret) { 179 | ret->val_types[0] = InlineAssembly; 180 | ret->val_types[1] = ret->val_types[2] = Empty; 181 | InlineAsm *buf = (InlineAsm*) malloc(sizeof(InlineAsm)); 182 | // get the assembly itself 183 | if (toks[at].type != TokLParen) { 184 | printf("Expected left parenthesis after ASM instruction keyword on line %zu\n", toks[at].line); 185 | exit(1); 186 | } 187 | if (toks[at + 1].type != TokStrLit) { 188 | printf("Expected string literal after \"asm(\" on line %zu\n", toks[at + 1].line); 189 | exit(1); 190 | } 191 | buf->assembly = (char*) toks[at + 1].val; 192 | // replace instances of \t and \n with their correct values 193 | size_t len = strlen(buf->assembly); 194 | for (size_t c = 0; c < len; c++) { 195 | if (buf->assembly[c] != '\\') continue; 196 | if (buf->assembly[c + 1] == 'n') 197 | buf->assembly[c] = 10; // 10 is newline 198 | else if (buf->assembly[c + 1] == 't') 199 | buf->assembly[c] = 9; // 9 is carriage return 200 | else { 201 | printf("Unknown escape sequence (only \\t and \\n can be used in UYB)\n"); 202 | exit(1); 203 | } 204 | memmove(&buf->assembly[c + 1], &buf->assembly[c + 2], len - c); 205 | c--; 206 | } 207 | at += 2; 208 | // get the inputs 209 | if (toks[at].type == TokColon) 210 | at += parse_asm_io(toks, at, &buf->inputs_vec); 211 | else 212 | goto end_asm_parse; 213 | // get the outputs 214 | if (toks[at].type == TokColon) 215 | at += parse_asm_io(toks, at, &buf->outputs_vec); 216 | else 217 | goto end_asm_parse; 218 | // get the clobbers 219 | if (toks[at].type == TokColon) 220 | parse_asm_clobbers(toks, at, &buf->clobbers_vec); 221 | end_asm_parse: 222 | ret->vals[0] = (uint64_t) buf; 223 | } 224 | 225 | void parse_call_parameters(Token *toks, size_t at, Statement *ret) { 226 | if (toks[at].type != TokRawStr) { 227 | printf("Expected function name after CALL instruction on line %zu.\n", toks[at].line); 228 | exit(1); 229 | } 230 | if (toks[at + 1].type != TokLParen) { 231 | printf("Expected function arguments within parenthesis for CALL instruction on line %zu.\n", toks[at + 1].line); 232 | exit(1); 233 | } 234 | ret->vals[0] = toks[at].val; 235 | at += 2; 236 | char* **args = vec_new(sizeof(char*)); 237 | Type **arg_sizes = vec_new(sizeof(Type)); 238 | char* **arg_struct_types = vec_new(sizeof(char*)); 239 | bool **args_are_structs = vec_new(sizeof(bool)); 240 | ValType **arg_types= vec_new(sizeof(ValType)); 241 | while (toks[at].type != TokRParen) { 242 | if (toks[at].type == TokComma) { 243 | at++; 244 | continue; 245 | } 246 | if (toks[at].type == TokTripleDot) { 247 | at += 2; 248 | continue; 249 | } 250 | if ((toks[at].type != TokRawStr || ((char*) toks[at].val)[1] != 0) && toks[at].type != TokAggType) { 251 | printf("Expected argument type before argument in argument list in CALL instruction parameters on line %zu.\n", toks[at].line); 252 | exit(1); 253 | } 254 | if (toks[at + 1].type != TokLabel && toks[at + 1].type != TokRawStr && toks[at + 1].type != TokInteger) { 255 | printf("Expected label, integer literal, or global in argument list for CALL instruction on line %zu.\n", toks[at + 1].line); 256 | exit(1); 257 | } 258 | if (toks[at].type == TokRawStr) { 259 | vec_push(arg_sizes, char_to_type(((char*) toks[at].val)[0])); 260 | vec_push(arg_struct_types, 0); 261 | vec_push(args_are_structs, (bool) false); 262 | } else { 263 | vec_push(arg_sizes, 0); 264 | vec_push(arg_struct_types, (char*) toks[at].val); 265 | vec_push(args_are_structs, (bool) true); 266 | } 267 | vec_push(args, (char*) toks[at + 1].val); 268 | vec_push(arg_types, tok_as_valtype(toks[at + 1].type, toks[at + 1].line)); 269 | at += 2; 270 | } 271 | ret->vals[1] = (uint64_t) aalloc(sizeof(FunctionArgList)); 272 | *((FunctionArgList*) ret->vals[1]) = (FunctionArgList) { 273 | .args = *args, 274 | .arg_sizes = *arg_sizes, 275 | .arg_struct_types = *arg_struct_types, 276 | .args_are_structs = *args_are_structs, 277 | .arg_types = *arg_types, 278 | .num_args = vec_size(args), 279 | }; 280 | ret->val_types[0] = Str; 281 | ret->val_types[1] = FunctionArgs; 282 | ret->val_types[2] = Empty; 283 | } 284 | 285 | Type instruction_remove_size(char *instr) { 286 | while (*instr) { 287 | if (*instr >= '0' && *instr <= '9') { 288 | *instr = 0; 289 | return Bits64; 290 | } 291 | instr++; 292 | } 293 | return 50; 294 | } 295 | 296 | // Expects tokens to end with TokNewLine 297 | Statement parse_statement(Token *toks) { 298 | if (toks[0].type == TokNewLine) toks++; 299 | if (toks[0].type == TokBlockLabel) { 300 | return (Statement) { 301 | .label = NULL, 302 | .instruction = BLKLBL, 303 | .vals = {toks[0].val}, 304 | .val_types = {Str, Empty, Empty}, 305 | }; 306 | } 307 | Statement ret = {0}; 308 | size_t at = 0; 309 | if (toks[0].type == TokLabel) { 310 | ret.label = (char*) toks[0].val; 311 | ret.type = toks[1].val; 312 | at = 2; 313 | } else { 314 | ret.label = NULL; 315 | } 316 | if (toks[at].type != TokRawStr) { 317 | printf("Expected instruction in statement on line %zu, got %s instead.\n", toks[at].line, token_to_str(toks[at].type)); 318 | exit(1); 319 | } 320 | size_t new_size = instruction_remove_size((char*) toks[at].val); 321 | if (new_size != 50) 322 | ret.type = new_size; 323 | ret.instruction = parse_instruction((char*) toks[at].val, toks[at].line, &ret.type); 324 | at++; 325 | if (ret.instruction == CALL) 326 | parse_call_parameters(toks, at, &ret); 327 | else if (ret.instruction == PHI) 328 | parse_phi_parameters(toks, at, &ret); 329 | else if (ret.instruction == ASM) 330 | parse_asm_parameters(toks, at, &ret); 331 | else 332 | parse_statement_parameters(toks, at, &ret); 333 | return ret; 334 | } 335 | 336 | // returns number of tokens to skip 337 | size_t parse_function(Token **toks, size_t loc, Function *buf) { 338 | buf->is_global = (*toks)[loc].type == TokExport; 339 | size_t skip = 1 + loc; 340 | if ((*toks)[skip].type == TokNewLine) skip++; 341 | if (buf->is_global) skip++; 342 | if (((*toks)[skip].type != TokRawStr || ((char*) (*toks)[skip].val)[1]) 343 | && (*toks)[skip].type != TokAggType) { 344 | printf("Not a valid function return type on line %zu.\n", (*toks)[skip].line); 345 | exit(1); 346 | } 347 | if ((*toks)[skip].type == TokRawStr) { 348 | buf->return_type = char_to_type(((char*) (*toks)[skip].val)[0]); 349 | buf->ret_is_struct = false; 350 | } else { 351 | buf->return_struct = (char*) (*toks)[skip].val; 352 | buf->ret_is_struct = true; 353 | } 354 | skip++; 355 | if ((*toks)[skip].type != TokRawStr) { 356 | printf("Expected function name on line %zu.\n", (*toks)[skip].line); 357 | exit(1); 358 | } 359 | buf->name = (char*) (*toks)[skip].val; 360 | if ((*toks)[skip + 1].type != TokLParen) { 361 | printf("Expected left parenthesis after function name in function definition on line %zu, got %s instead.\n", (*toks)[skip + 1].line, token_to_str((*toks)[skip + 1].type)); 362 | exit(1); 363 | } 364 | skip += 2; 365 | FunctionArgument **args = vec_new(sizeof(FunctionArgument)); 366 | buf->is_variadic = false; 367 | while ((*toks)[skip].type != TokRParen) { 368 | if ((*toks)[skip].type == TokComma) { 369 | skip++; 370 | continue; 371 | } 372 | if ((*toks)[skip].type == TokTripleDot) { 373 | buf->is_variadic = true; 374 | skip++; 375 | continue; 376 | } 377 | if (((*toks)[skip].type != TokRawStr || ((char*) (*toks)[skip].val)[1] != 0) && (*toks)[skip].type != TokAggType) { 378 | printf("Expected argument type as character (l,w,d,b), got something else instead on line %zu.\n", (*toks)[skip].line); 379 | exit(1); 380 | } 381 | if ((*toks)[skip + 1].type != TokLabel) { 382 | printf("Argument value isn't a label on line %zu.\n", (*toks)[skip + 1].line); 383 | exit(1); 384 | } 385 | FunctionArgument arg; 386 | arg.label = (char*) (*toks)[skip + 1].val; 387 | if ((*toks)[skip].type == TokRawStr) { 388 | arg.type_is_struct = false; 389 | arg.type = char_to_type(((char*) (*toks)[skip].val)[0]); 390 | } else { 391 | arg.type_is_struct = true; 392 | arg.type_struct = (char*) (*toks)[skip].val; 393 | } 394 | vec_push(args, arg); 395 | skip += 2; 396 | } 397 | buf->num_args = vec_size(args); 398 | buf->args = *args; 399 | skip++; 400 | if ((*toks)[skip].type != TokLBrace) { 401 | printf("Expected brace after function signature on line %zu\n", (*toks)[skip].line); 402 | exit(1); 403 | } 404 | skip++; 405 | if ((*toks)[skip].type != TokNewLine) { 406 | printf("Expected new line after left brace in function declaration on line %zu\n", (*toks)[skip].line); 407 | exit(1); 408 | } 409 | skip++; 410 | size_t depth = 1; 411 | size_t start = skip; 412 | Statement **statements = vec_new(sizeof(Statement)); 413 | buf->num_statements = 0; 414 | for (;;) { 415 | if ((*toks)[skip].type == TokLBrace) { 416 | depth++; 417 | } else if ((*toks)[skip].type == TokRBrace) { 418 | depth--; 419 | if (!depth) { 420 | skip++; 421 | break; 422 | } 423 | } else if ((*toks)[skip].type == TokNewLine) { 424 | buf->num_statements++; 425 | vec_push(statements, parse_statement(&(*toks)[start])); 426 | start = skip; 427 | if ((*toks)[start + 1].type != TokRBrace) start++; 428 | } 429 | skip++; 430 | } 431 | buf->statements = *statements; 432 | return skip + 1 - loc; 433 | } 434 | 435 | // returns number of tokens to skip 436 | size_t parse_global(Token **toks, size_t loc, Global *buf) { 437 | size_t start_loc = loc; 438 | if ((*toks)[loc].type == TokSection) { 439 | loc++; 440 | if ((*toks)[loc].type != TokStrLit) { 441 | printf("Expected string literal after section keyword on line %zu\n", (*toks)[loc].line); 442 | exit(1); 443 | } 444 | buf->section = (char*) (*toks)[loc].val; 445 | loc += 2; 446 | } else { 447 | buf->section = NULL; 448 | } 449 | if ((*toks)[loc].type != TokData) { 450 | printf("Expected data global definition after section specification on line %zu\n", (*toks)[loc].line); 451 | exit(1); 452 | } 453 | if ((*toks)[loc + 1].type != TokRawStr) { 454 | printf("Expected name of global after data keyword on line %zu, got %s instead, data = %s\n", (*toks)[loc + 1].line, token_to_str((*toks)[loc + 1].type), (char*) (*toks)[loc + 1].val); 455 | exit(1); 456 | } 457 | buf->name = (char*) (*toks)[loc + 1].val; 458 | if ((*toks)[loc + 2].type != TokEqu) { 459 | printf("Expected = after global label name on line %zu\n", (*toks)[loc + 2].line); 460 | exit(1); 461 | } 462 | if ((*toks)[loc + 3].type == TokAlign) { 463 | if ((*toks)[loc + 4].type != TokInteger) { 464 | printf("Expected integer literal after Align token on line %zu\n", (*toks)[loc + 4].line); 465 | exit(1); 466 | } 467 | buf->alignment = (*toks)[loc + 4].val; 468 | loc += 2; 469 | } else 470 | buf->alignment = 1; 471 | if ((*toks)[loc + 3].type != TokLBrace) { 472 | printf("Expected left brace ({) after = on line %zu\n", (*toks)[loc + 3].line); 473 | exit(1); 474 | } 475 | loc += 4; 476 | Type **sizes = vec_new(sizeof(Type)); 477 | size_t **vals = vec_new(sizeof(size_t)); 478 | ValType **types= vec_new(sizeof(ValType)); 479 | while ((*toks)[loc].type != TokRBrace) { 480 | if ((*toks)[loc].type == TokComma) { 481 | loc++; 482 | continue; 483 | } 484 | if ((*toks)[loc].type != TokRawStr || ((char*) (*toks)[loc].val)[1] != 0) { 485 | printf("Invalid type in global declaration on line %zu\n", (*toks)[loc].line); 486 | exit(1); 487 | } 488 | vec_push(sizes, char_to_type(((char*) (*toks)[loc].val)[0])); 489 | if ((*toks)[loc + 1].type == TokInteger) vec_push(types, Number); 490 | else if ((*toks)[loc + 1].type == TokStrLit) vec_push(types, StrLit); 491 | else { 492 | printf("Global values can only be a number or a strlit token on line %zu, got something else.\n", (*toks)[loc + 1].line); 493 | exit(1); 494 | } 495 | vec_push(vals, (*toks)[loc + 1].val); 496 | loc += 2; 497 | } 498 | buf->num_vals = vec_size(vals); 499 | buf->vals = *vals; 500 | buf->types = *types; 501 | buf->sizes = *sizes; 502 | return loc - start_loc; 503 | } 504 | 505 | size_t get_element_size(Token **toks, size_t *loc, AggregateType *buf) { 506 | if ((*toks)[*loc].type == TokRawStr && ((char*) (*toks)[*loc].val)[1] == 0) { 507 | // If it's a type element, like `l` 508 | size_t this_size = bytes_from_size(char_to_type(((char*) (*toks)[*loc].val)[0])); 509 | if (buf->alignment > this_size) 510 | return buf->alignment; 511 | else 512 | return this_size; 513 | } else if ((*toks)[*loc].type == TokInteger) { 514 | // If it's an opaque type just specifying the number of bytes, like `24` 515 | return (*toks)[*loc].val; 516 | } else if ((*toks)[*loc].type == TokLBrace) { 517 | // If it's an enum type, return the maximum size 518 | size_t max_size = 0; 519 | (*loc)++; 520 | while ((*toks)[*loc].type != TokRBrace) { 521 | if ((*toks)[*loc].type == TokComma) { 522 | (*loc)++; 523 | continue; 524 | } 525 | size_t this_size = get_element_size(toks, loc, buf); // eww recursion 526 | if (this_size > max_size) 527 | max_size = this_size; 528 | (*loc)++; 529 | } 530 | return max_size; 531 | } else { 532 | printf("Invalid element for aggregate type on line %zu.\n", (*toks)[*loc].line); 533 | exit(1); 534 | } 535 | } 536 | 537 | void parse_aggtype_size(Token **toks, size_t *loc, AggregateType *buf) { 538 | buf->size_bytes = 0; 539 | while ((*toks)[*loc].type != TokRBrace) { 540 | if ((*toks)[*loc].type == TokComma) { 541 | (*loc)++; 542 | continue; 543 | } 544 | buf->size_bytes += get_element_size(toks, loc, buf); 545 | (*loc)++; 546 | } 547 | } 548 | 549 | // return number of tokens to skip. 550 | size_t parse_aggtype(Token **toks, size_t loc, AggregateType *buf) { 551 | size_t start_loc = loc; 552 | if ((*toks)[loc + 1].type != TokAggType) { 553 | printf("Expected type name after type token, got something else on line %zu\n", (*toks)[loc + 1].line); 554 | exit(1); 555 | } 556 | buf->name = (char*) (*toks)[loc + 1].val; 557 | if ((*toks)[loc + 2].type != TokEqu) { 558 | printf("Equal sign expected after type name in aggregate type definiton, got something else on line %zu\n", (*toks)[loc + 2].line); 559 | exit(1); 560 | } 561 | if ((*toks)[loc + 3].type == TokAlign) { 562 | if ((*toks)[loc + 4].type != TokInteger) { 563 | printf("Expected integer literal after Align token on line %zu\n", (*toks)[loc + 4].line); 564 | exit(1); 565 | } 566 | buf->alignment = (*toks)[loc + 4].val; 567 | loc += 2; 568 | } else 569 | buf->alignment = 1; 570 | if ((*toks)[loc + 3].type != TokLBrace) { 571 | printf("Expected left brace in aggregate type definition on line %zu\n", (*toks)[loc + 3].line); 572 | exit(1); 573 | } 574 | loc += 4; 575 | parse_aggtype_size(toks, &loc, buf); 576 | return loc - start_loc; 577 | } 578 | 579 | // returns number of tokens to skip 580 | size_t parse_filedbg(Token **toks, size_t loc, FileDbg *filebuf) { 581 | size_t start_loc = loc; 582 | if ((*toks)[loc + 1].type != TokInteger) { 583 | printf("First argument of .file must be integer literal (file identification number)\n"); 584 | exit(1); 585 | } 586 | if ((*toks)[loc + 2].type != TokStrLit) { 587 | printf("Second argument of .file must be string literal (file name)\n"); 588 | exit(1); 589 | } 590 | filebuf->id = (*toks)[loc + 1].val; 591 | filebuf->fname = (char*) (*toks)[loc + 2].val; 592 | loc += 2; 593 | return loc - start_loc; 594 | } 595 | 596 | // Returns vector of functions 597 | Function **parse_program(Token **toks, Global ***globals_buf, AggregateType ***aggtypes_buf, FileDbg ***filesdbg_buf) { 598 | size_t num_toks = vec_size(toks); 599 | Function **functions = vec_new(sizeof(Function)); 600 | *globals_buf = vec_new(sizeof(Global)); 601 | *aggtypes_buf = vec_new(sizeof(AggregateType)); 602 | *filesdbg_buf = vec_new(sizeof(FileDbg)); 603 | for (size_t tok = 0; tok < num_toks; tok++) { 604 | if ((*toks)[tok].type == TokFunction || (*toks)[tok].type == TokExport) { 605 | Function fnbuf; 606 | tok += parse_function(toks, tok, &fnbuf) - 1; 607 | vec_push(functions, fnbuf); 608 | } else if ((*toks)[tok].type == TokNewLine) { 609 | continue; 610 | } else if ((*toks)[tok].type == TokData || (*toks)[tok].type == TokSection) { 611 | Global newglobal; 612 | tok += parse_global(toks, tok, &newglobal); 613 | vec_push(*globals_buf, newglobal); 614 | } else if ((*toks)[tok].type == TokType) { 615 | AggregateType newtype; 616 | tok += parse_aggtype(toks, tok, &newtype); 617 | vec_push(*aggtypes_buf, newtype); 618 | } else if ((*toks)[tok].type == TokFile) { 619 | FileDbg newfile; 620 | tok += parse_filedbg(toks, tok, &newfile); 621 | vec_push(*filesdbg_buf, newfile); 622 | } else { 623 | printf("Something was found outside of a function body which isn't a constant definition on line %zu: %s, token id %u, val %p\n", (*toks)[tok].line, token_to_str((*toks)[tok].type), (*toks)[tok].type, (void*) (*toks)[tok].val); 624 | exit(1); 625 | } 626 | } 627 | return functions; 628 | } 629 | -------------------------------------------------------------------------------- /src/strslice.c: -------------------------------------------------------------------------------- 1 | /* String slice implementation for UYB. 2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | String *string_from(char *from) { 10 | String *str = (String*) malloc(sizeof(String)); 11 | str->len = strlen(from); 12 | str->data = (char*) malloc(str->len + 1); 13 | strcpy(str->data, from); 14 | return str; 15 | } 16 | 17 | void string_push(String *str, char *new) { 18 | size_t new_len = str->len + strlen(new); 19 | str->data = realloc(str->data, new_len + 1); 20 | strcpy(str->data + str->len, new); 21 | str->len = new_len; 22 | } 23 | 24 | void string_push_fmt(String *str, char *fmt, ...) { 25 | va_list args; 26 | va_start(args, fmt); 27 | int length = vsnprintf(NULL, 0, fmt, args); 28 | va_end(args); 29 | size_t new_len = str->len + length; 30 | str->data = realloc(str->data, new_len + 1); 31 | va_start(args, fmt); 32 | vsnprintf(str->data + str->len, length + 1, fmt, args); 33 | va_end(args); 34 | str->len = new_len; 35 | } 36 | -------------------------------------------------------------------------------- /src/target/IR/build.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | void build_function(Function IR, FILE *outf) { 9 | char *rettype = get_full_char_str(IR.ret_is_struct, IR.return_type, IR.return_struct); 10 | fprintf(outf, "%sfunction %s $%s(", (IR.is_global) ? "export " : "", rettype, IR.name); 11 | for (size_t arg = 0; arg < IR.num_args; arg++) { 12 | char *argtype = get_full_char_str(IR.args[arg].type_is_struct, IR.args[arg].type, IR.args[arg].type_struct); 13 | fprintf(outf, "%s %%%s", argtype, IR.args[arg].label); 14 | if (!(arg == IR.num_args - 1 || IR.is_variadic)) 15 | fprintf(outf, ", "); 16 | } 17 | if (IR.is_variadic) fprintf(outf, "..."); 18 | fprintf(outf, ") {\n"); 19 | for (size_t s = 0; s < IR.num_statements; s++) { 20 | if (IR.statements[s].label) { 21 | fprintf(outf, "\t%%%s =%c ", IR.statements[s].label, size_as_char(IR.statements[s].type)); 22 | } else { 23 | fprintf(outf, "\t"); 24 | } 25 | instructions_IR[IR.statements[s].instruction](IR.statements[s].vals, IR.statements[s].val_types, IR.statements[s], outf); 26 | } 27 | fprintf(outf, "}\n\n"); 28 | } 29 | 30 | void build_globals(Global *global_vars, size_t num_global_vars, FILE *outf) { 31 | for (size_t g = 0; g < num_global_vars; g++) { 32 | if (global_vars[g].section) { 33 | fprintf(outf, "section \"%s\"\n", global_vars[g].section); 34 | } 35 | fprintf(outf, "data $%s = align %zu {", global_vars[g].name, global_vars[g].alignment); 36 | for (size_t v = 0; v < global_vars[g].num_vals; v++) { 37 | fprintf(outf, "%c ", size_as_char(global_vars[g].sizes[v])); 38 | if (global_vars[g].types[v] == Number) 39 | fprintf(outf, "%zu", global_vars[g].vals[v]); 40 | else if (global_vars[g].types[v] == StrLit) 41 | fprintf(outf, "\"%s\"", (char*) global_vars[g].vals[v]); 42 | else { 43 | printf("Type for global var must either be Number or StrLit.\n"); 44 | exit(1); 45 | } 46 | if (v != global_vars[g].num_vals - 1) 47 | fprintf(outf, ", "); 48 | } 49 | fprintf(outf, "}\n"); 50 | } 51 | } 52 | 53 | void build_aggtypes(AggregateType *aggtypes, size_t num_aggtypes, FILE *outf) { 54 | for (size_t i = 0; i < num_aggtypes; i++) { 55 | fprintf(outf, "type :%s = align %zu { %zu }\n", aggtypes[i].name, aggtypes[i].alignment, aggtypes[i].size_bytes); 56 | } 57 | } 58 | 59 | void build_filesdbg(FileDbg *filesdbg, size_t num_filesdbg, FILE *outf) { 60 | for (size_t f = 0; f < num_filesdbg; f++) { 61 | fprintf(outf, ".file %zu \"%s\"\n", filesdbg[f].id, filesdbg[f].fname); 62 | } 63 | } 64 | 65 | void build_program_IR(Function *IR, size_t num_functions, Global *global_vars, size_t num_global_vars, AggregateType *aggtypes, size_t num_aggtypes, FileDbg *dbgfiles, size_t num_dbgfiles, FILE *outf) { 66 | fprintf(outf, "# Generated by UYB for UYB IR\n\n"); 67 | build_filesdbg(dbgfiles, num_dbgfiles, outf); 68 | build_globals(global_vars, num_global_vars, outf); 69 | build_aggtypes(aggtypes, num_aggtypes, outf); 70 | for (size_t f = 0; f < num_functions; f++) { 71 | build_function(IR[f], outf); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/target/IR/instructions.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | char *get_full_char_str(bool is_struct, Type type, char *type_struct); // defined in build.c 5 | 6 | static void build_value(uint64_t val, ValType type, FILE *outf) { 7 | if (type == Number) fprintf(outf, "%zu", val); 8 | else if (type == BlkLbl) fprintf(outf, "@%s", (char*) val); 9 | else if (type == Label ) fprintf(outf, "%%%s", (char*) val); 10 | else if (type == Str ) fprintf(outf, "$%s", (char*) val); 11 | else if (type == PhiArg) { 12 | fprintf(outf, "@%s ", ((PhiVal*) val)->blklbl_name); 13 | build_value(((PhiVal*) val)->val, ((PhiVal*) val)->type, outf); 14 | } 15 | } 16 | 17 | static char size_as_char(Type type) { 18 | if (type == Bits8) return 'b'; 19 | else if (type == Bits16) return 'h'; 20 | else if (type == Bits32) return 'w'; 21 | else return 'l'; 22 | } 23 | 24 | static void add_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 25 | fprintf(outf, "add "); 26 | build_value(vals[0], types[0], outf); 27 | fprintf(outf, ", "); 28 | build_value(vals[1], types[1], outf); 29 | fprintf(outf, "\n"); 30 | } 31 | 32 | static void sub_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 33 | fprintf(outf, "sub "); 34 | build_value(vals[0], types[0], outf); 35 | fprintf(outf, ", "); 36 | build_value(vals[1], types[1], outf); 37 | fprintf(outf, "\n"); 38 | } 39 | 40 | static void div_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 41 | fprintf(outf, "div "); 42 | build_value(vals[0], types[0], outf); 43 | fprintf(outf, ", "); 44 | build_value(vals[1], types[1], outf); 45 | fprintf(outf, "\n"); 46 | } 47 | 48 | static void udiv_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 49 | fprintf(outf, "udiv "); 50 | build_value(vals[0], types[0], outf); 51 | fprintf(outf, ", "); 52 | build_value(vals[1], types[1], outf); 53 | fprintf(outf, "\n"); 54 | } 55 | 56 | static void rem_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 57 | fprintf(outf, "rem "); 58 | build_value(vals[0], types[0], outf); 59 | fprintf(outf, ", "); 60 | build_value(vals[1], types[1], outf); 61 | fprintf(outf, "\n"); 62 | } 63 | 64 | static void urem_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 65 | fprintf(outf, "urem "); 66 | build_value(vals[0], types[0], outf); 67 | fprintf(outf, ", "); 68 | build_value(vals[1], types[1], outf); 69 | fprintf(outf, "\n"); 70 | } 71 | 72 | static void mul_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 73 | fprintf(outf, "mul "); 74 | build_value(vals[0], types[0], outf); 75 | fprintf(outf, ", "); 76 | build_value(vals[1], types[1], outf); 77 | fprintf(outf, "\n"); 78 | } 79 | 80 | static void copy_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 81 | fprintf(outf, "copy "); 82 | build_value(vals[0], types[0], outf); 83 | fprintf(outf, "\n"); 84 | } 85 | 86 | static void ret_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 87 | fprintf(outf, "ret "); 88 | build_value(vals[0], types[0], outf); 89 | fprintf(outf, "\n"); 90 | } 91 | 92 | static void jmp_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 93 | fprintf(outf, "jmp "); 94 | build_value(vals[0], types[0], outf); 95 | fprintf(outf, "\n"); 96 | } 97 | 98 | static void jz_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 99 | fprintf(outf, "jz "); 100 | build_value(vals[0], types[0], outf); 101 | fprintf(outf, ", "); 102 | build_value(vals[1], types[1], outf); 103 | fprintf(outf, "\n"); 104 | } 105 | 106 | static void and_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 107 | fprintf(outf, "and "); 108 | build_value(vals[0], types[0], outf); 109 | fprintf(outf, ", "); 110 | build_value(vals[1], types[1], outf); 111 | fprintf(outf, "\n"); 112 | } 113 | 114 | static void or_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 115 | fprintf(outf, "or "); 116 | build_value(vals[0], types[0], outf); 117 | fprintf(outf, ", "); 118 | build_value(vals[1], types[1], outf); 119 | fprintf(outf, "\n"); 120 | } 121 | 122 | static void blit_build(uint64_t vals[3], ValType types[3], Statement statement, FILE* outf) { 123 | fprintf(outf, "blit "); 124 | build_value(vals[0], types[0], outf); 125 | fprintf(outf, ", "); 126 | build_value(vals[1], types[1], outf); 127 | fprintf(outf, ", "); 128 | build_value(vals[2], types[2], outf); 129 | fprintf(outf, "\n"); 130 | } 131 | 132 | static void jnz_build(uint64_t vals[3], ValType types[3], Statement statement, FILE* outf) { 133 | fprintf(outf, "jnz "); 134 | build_value(vals[0], types[0], outf); 135 | fprintf(outf, ", "); 136 | build_value(vals[1], types[1], outf); 137 | fprintf(outf, ", "); 138 | build_value(vals[2], types[2], outf); 139 | fprintf(outf, "\n"); 140 | } 141 | 142 | static void xor_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 143 | fprintf(outf, "xor "); 144 | build_value(vals[0], types[0], outf); 145 | fprintf(outf, ", "); 146 | build_value(vals[1], types[1], outf); 147 | fprintf(outf, "\n"); 148 | } 149 | 150 | static void shl_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 151 | fprintf(outf, "shl "); 152 | build_value(vals[0], types[0], outf); 153 | fprintf(outf, ", "); 154 | build_value(vals[1], types[1], outf); 155 | fprintf(outf, "\n"); 156 | } 157 | 158 | static void shr_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 159 | fprintf(outf, "shr "); 160 | build_value(vals[0], types[0], outf); 161 | fprintf(outf, ", "); 162 | build_value(vals[1], types[1], outf); 163 | fprintf(outf, "\n"); 164 | } 165 | 166 | static void store_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 167 | fprintf(outf, "store%c ", size_as_char(statement.type)); 168 | build_value(vals[0], types[0], outf); 169 | fprintf(outf, ", "); 170 | build_value(vals[1], types[1], outf); 171 | fprintf(outf, "\n"); 172 | } 173 | 174 | static void load_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 175 | fprintf(outf, "load%c ", size_as_char(statement.type)); 176 | build_value(vals[0], types[0], outf); 177 | fprintf(outf, ", "); 178 | build_value(vals[1], types[1], outf); 179 | fprintf(outf, "\n"); 180 | } 181 | 182 | static void neg_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 183 | fprintf(outf, "neg "); 184 | build_value(vals[0], types[0], outf); 185 | fprintf(outf, "\n"); 186 | } 187 | 188 | static void alloc_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 189 | fprintf(outf, "alloc "); 190 | build_value(vals[0], types[0], outf); 191 | fprintf(outf, "\n"); 192 | } 193 | 194 | static void call_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 195 | fprintf(outf, "call "); 196 | build_value(vals[0], types[0], outf); 197 | fprintf(outf, "("); 198 | FunctionArgList *args = (FunctionArgList*) vals[1]; 199 | size_t num_args = args->num_args; 200 | for (size_t arg = 0; arg < num_args; arg++) { 201 | char *arg_type = get_full_char_str(args->args_are_structs[arg], args->arg_sizes[arg], args->arg_struct_types[arg]); 202 | fprintf(outf, "%s ", arg_type); 203 | build_value((uint64_t) args->args[arg], args->arg_types[arg], outf); 204 | if (arg != num_args - 1) 205 | fprintf(outf, ", "); 206 | } 207 | fprintf(outf, ")\n"); 208 | } 209 | 210 | static void eq_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 211 | fprintf(outf, "ceq "); 212 | build_value(vals[0], types[0], outf); 213 | fprintf(outf, ", "); 214 | build_value(vals[1], types[1], outf); 215 | fprintf(outf, "\n"); 216 | } 217 | 218 | static void ne_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 219 | fprintf(outf, "cne "); 220 | build_value(vals[0], types[0], outf); 221 | fprintf(outf, ", "); 222 | build_value(vals[1], types[1], outf); 223 | fprintf(outf, "\n"); 224 | } 225 | 226 | static void sle_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 227 | fprintf(outf, "csle "); 228 | build_value(vals[0], types[0], outf); 229 | fprintf(outf, ", "); 230 | build_value(vals[1], types[1], outf); 231 | fprintf(outf, "\n"); 232 | } 233 | 234 | static void slt_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 235 | fprintf(outf, "cslt "); 236 | build_value(vals[0], types[0], outf); 237 | fprintf(outf, ", "); 238 | build_value(vals[1], types[1], outf); 239 | fprintf(outf, "\n"); 240 | } 241 | 242 | static void sge_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 243 | fprintf(outf, "csge "); 244 | build_value(vals[0], types[0], outf); 245 | fprintf(outf, ", "); 246 | build_value(vals[1], types[1], outf); 247 | fprintf(outf, "\n"); 248 | } 249 | 250 | static void sgt_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 251 | fprintf(outf, "csgt "); 252 | build_value(vals[0], types[0], outf); 253 | fprintf(outf, ", "); 254 | build_value(vals[1], types[1], outf); 255 | fprintf(outf, "\n"); 256 | } 257 | 258 | static void ule_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 259 | fprintf(outf, "cule "); 260 | build_value(vals[0], types[0], outf); 261 | fprintf(outf, ", "); 262 | build_value(vals[1], types[1], outf); 263 | fprintf(outf, "\n"); 264 | } 265 | 266 | static void ult_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 267 | fprintf(outf, "cult "); 268 | build_value(vals[0], types[0], outf); 269 | fprintf(outf, ", "); 270 | build_value(vals[1], types[1], outf); 271 | fprintf(outf, "\n"); 272 | } 273 | 274 | static void uge_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 275 | fprintf(outf, "cuge "); 276 | build_value(vals[0], types[0], outf); 277 | fprintf(outf, ", "); 278 | build_value(vals[1], types[1], outf); 279 | fprintf(outf, "\n"); 280 | } 281 | 282 | static void ugt_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 283 | fprintf(outf, "cugt "); 284 | build_value(vals[0], types[0], outf); 285 | fprintf(outf, ", "); 286 | build_value(vals[1], types[1], outf); 287 | fprintf(outf, "\n"); 288 | } 289 | 290 | static void ext_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 291 | fprintf(outf, "ext "); 292 | build_value(vals[0], types[0], outf); 293 | fprintf(outf, "\n"); 294 | } 295 | 296 | static void hlt_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 297 | fprintf(outf, "hlt\n"); 298 | } 299 | 300 | static void blklbl_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 301 | fprintf(outf, "@%s\n", (char*) vals[0]); 302 | } 303 | 304 | static void phi_build(uint64_t vals[2], ValType types[2], Statement statement, FILE* outf) { 305 | fprintf(outf, "phi "); 306 | build_value(vals[0], types[0], outf); 307 | fprintf(outf, ", "); 308 | build_value(vals[1], types[1], outf); 309 | fprintf(outf, "\n"); 310 | } 311 | 312 | static void vastart_build(uint64_t vals[2], ValType types[2], Statement statement, FILE *outf) { 313 | fprintf(outf, "vastart "); 314 | build_value(vals[0], types[0], outf); 315 | fprintf(outf, "\n"); 316 | } 317 | 318 | static void vaarg_build(uint64_t vals[2], ValType types[2], Statement statement, FILE *outf) { 319 | fprintf(outf, "vaarg "); 320 | build_value(vals[0], types[0], outf); 321 | fprintf(outf, "\n"); 322 | } 323 | 324 | static void loc_build(uint64_t vals[3], ValType types[3], Statement statement, FILE *outf) { 325 | if (types[0] != Number || types[1] != Number || types[2] != Number) { 326 | printf("All arguments of .loc instruction must be an integer literal.\n"); 327 | exit(1); 328 | } 329 | fprintf(outf, ".loc %zu %zu %zu\n", vals[0], vals[1], vals[2]); 330 | } 331 | 332 | static void asm_build(uint64_t vals[3], ValType types[3], Statement statement, FILE *outf) { 333 | printf("IR target does not support inline assembly statement in UYB. Please use an architecture-specific target for this feature.\n"); 334 | exit(1); 335 | } 336 | 337 | void (*instructions_IR[])(uint64_t[2], ValType[2], Statement, FILE*) = { 338 | add_build, sub_build, div_build, mul_build, copy_build, ret_build, call_build, jz_build, 339 | neg_build, udiv_build, rem_build, urem_build, and_build, or_build, xor_build, shl_build, shr_build, 340 | store_build, load_build, blit_build, alloc_build, eq_build, ne_build, sle_build, slt_build, sge_build, sgt_build, ule_build, ult_build, 341 | uge_build, ugt_build, ext_build, hlt_build, blklbl_build, jmp_build, jnz_build, phi_build, vastart_build, vaarg_build, 342 | loc_build, asm_build, 343 | }; 344 | -------------------------------------------------------------------------------- /src/target/x86_64/build.c: -------------------------------------------------------------------------------- 1 | /* Main code generation file for UYB x86_64 target. 2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | AggregateType *aggregate_types; /* TODO: Move all global vars (including those in register.c) */ 12 | size_t num_aggregate_types; /* into a single structure. */ 13 | 14 | size_t type_to_size(Type type) { 15 | if (type == Bits8) return 1; 16 | else if (type == Bits8) return 2; 17 | else if (type == Bits16) return 4; 18 | else if (type == Bits64) return 8; 19 | return 0; 20 | } 21 | 22 | char *global_sizes[] = { 23 | ".byte", ".value", ".long", ".quad", 24 | }; 25 | 26 | char *type_as_str(Type type, char *struct_type, bool is_struct) { 27 | if (is_struct) { 28 | char *buf = aalloc(strlen(struct_type) + 2); 29 | sprintf(buf, ":%s", struct_type); 30 | return buf; 31 | } 32 | if (type == Bits8) return "byte"; 33 | else if (type == Bits16) return "word"; 34 | else if (type == Bits32) return "dword"; 35 | else if (type == Bits64) return "qword"; 36 | else { 37 | printf("Invalid type: %u\n", type); 38 | return "invalid_type"; 39 | } 40 | } 41 | 42 | static String *build_function(Function IR) { 43 | reg_init_fn(IR); 44 | String *fnbuf0 = string_from("\n"); 45 | string_push_fmt(fnbuf0, "// %s %s(", type_as_str(IR.return_type, IR.return_struct, IR.ret_is_struct), IR.name); 46 | for (size_t arg = 0; arg < IR.num_args; arg++) { 47 | string_push_fmt(fnbuf0, "%s %%%s", type_as_str(IR.args[arg].type, IR.args[arg].type_struct, IR.args[arg].type_is_struct), IR.args[arg].label); 48 | if (arg != IR.num_args - 1) string_push(fnbuf0, ", "); 49 | } 50 | string_push_fmt(fnbuf0, ") {\n%s", IR.name); 51 | String *fnbuf = string_from(":\n"); 52 | String *structarg_buf = string_from("\n"); 53 | size_t reg_arg_off = 0; 54 | for (size_t arg = 0; arg < IR.num_args; arg++) { 55 | if (IR.args[arg].type_is_struct) { 56 | if (arg > 4) { 57 | printf("Only the first 5 arguments accepted by a function can be structures. (TODO)\n"); 58 | exit(1); 59 | } 60 | AggregateType *aggtype = find_aggtype(IR.args[arg].type_struct, aggregate_types, num_aggregate_types); 61 | char *label_loc = reg_alloc(IR.args[arg].label, Bits64); 62 | if (aggtype->size_bytes <= 16) { 63 | // allocate space on the stack for it 64 | regalloc.bytes_rip_pad += (aggtype->size_bytes <= 8) ? 1 : 2; 65 | string_push_fmt(structarg_buf, "\tlea -%llu(%rbp), %%rdi\n" 66 | "\tmov %%rdi, %s\n", 67 | regalloc.bytes_rip_pad, label_loc); 68 | // copy the data 69 | string_push_fmt(structarg_buf, "\tmov %s, (%s)\n", arg_regs[arg], label_loc); 70 | if (aggtype->size_bytes > 8) { 71 | // copy the second byte 72 | string_push_fmt(structarg_buf, "\tmov %s, 8(%s)\n", arg_regs[arg + 1], label_loc); 73 | } 74 | } else { 75 | regalloc.bytes_rip_pad += aggtype->size_bytes; 76 | // copy all the data 77 | string_push_fmt(structarg_buf, "\tmov %s, %%rsi\n", arg_regs[arg + 1]); 78 | string_push_fmt(structarg_buf, "\tmov %zu, %%rdi\n", regalloc.bytes_rip_pad); 79 | string_push_fmt(structarg_buf, "\tmov %zu, %%rcx\n", aggtype->size_bytes); 80 | string_push(structarg_buf, "\trep movsb\n"); 81 | string_push_fmt(structarg_buf, "\tmov %zu, %s\n", regalloc.bytes_rip_pad, label_loc); 82 | } 83 | } else if (arg > 5) { 84 | // it's on the stack 85 | size_t *new_vec_val = aalloc(sizeof(size_t) * 2); 86 | new_vec_val[0] = (size_t) IR.args[arg].label; 87 | reg_arg_off += type_to_size(IR.args[arg].type); 88 | new_vec_val[1] = reg_arg_off + 8; 89 | vec_push(regalloc.labels_as_offsets, new_vec_val); 90 | } else { 91 | reg_alloc(IR.args[arg].label, IR.args[arg].type); 92 | for (size_t i = 0; i < sizeof(label_reg_tab) / sizeof(label_reg_tab[0]); i++) { 93 | if (label_reg_tab[i][1] && !strcmp(IR.args[arg].label, label_reg_tab[i][1])) reg_alloc_tab[i][1]++; 94 | } 95 | } 96 | } 97 | for (size_t s = 0; s < IR.num_statements; s++) { 98 | update_regalloc(); 99 | disasm_instr(fnbuf, IR.statements[s]); 100 | // expects result in rax 101 | instructions_x86_64[IR.statements[s].instruction](IR.statements[s].vals, IR.statements[s].val_types, IR.statements[s], fnbuf); 102 | } 103 | size_t sz = vec_size(regalloc.used_regs_vec); 104 | if (((regalloc.bytes_rip_pad & 0b11111) != 0b10000) && regalloc.bytes_rip_pad) regalloc.bytes_rip_pad += 8; 105 | if (sz & 1) regalloc.bytes_rip_pad += 8; 106 | string_push(fnbuf, "// }\n"); 107 | string_push(fnbuf0, ":\n"); 108 | if (IR.is_variadic) { 109 | string_push(fnbuf0, "\t // Start pushing all variadic argument registers\n"); 110 | for (ssize_t arg = sizeof(arg_regs) / sizeof(arg_regs[0]) - 1; arg >= 0; arg--) 111 | string_push_fmt(fnbuf0, "\tpush %s\n", arg_regs[arg]); 112 | string_push(fnbuf0, "\t // End var args\n"); 113 | regalloc.bytes_rip_pad += 8; 114 | } 115 | string_push(fnbuf0, "\tpush %rbp\n\tmov %rsp, %rbp\n"); 116 | if (regalloc.bytes_rip_pad) 117 | string_push_fmt(fnbuf0, "\tsub $%llu, %%rsp\n", regalloc.bytes_rip_pad); 118 | for (size_t i = 0; i < sz; i++) 119 | string_push_fmt(fnbuf0, "\tpush %s // used reg\n", (*regalloc.used_regs_vec)[i]); 120 | char **argregs_at = arg_regs; 121 | for (size_t arg = 0; arg < IR.num_args; arg++) { 122 | if (IR.args[arg].type_is_struct) { 123 | AggregateType *aggtype = find_aggtype(IR.args[arg].type_struct, aggregate_types, num_aggregate_types); 124 | if (aggtype->size_bytes <= 8 || aggtype->size_bytes > 16) 125 | argregs_at++; 126 | else 127 | argregs_at += 2; 128 | continue; 129 | } 130 | char *reg = label_to_reg(0, IR.args[arg].label, true); 131 | if (reg) 132 | string_push_fmt(fnbuf0, "\tmov %s, %s\n", reg_as_size(*argregs_at, IR.args[arg].type), reg); // TODO: fix with >6 args 133 | argregs_at++; 134 | } 135 | string_push(fnbuf0, structarg_buf->data + 1); 136 | string_push(fnbuf0, fnbuf->data + 2); 137 | return fnbuf0; 138 | } 139 | 140 | void build_program_x86_64(Function *IR, size_t num_functions, Global *global_vars, size_t num_global_vars, AggregateType *aggtypes, size_t num_aggtypes, FileDbg *dbgfiles, size_t num_dbgfiles, FILE *outf) { 141 | aggregate_types = aggtypes; 142 | num_aggregate_types = num_aggtypes; 143 | char* **globals = vec_new(sizeof(char*)); 144 | String* **function_statements = vec_new(sizeof(String**)); 145 | for (size_t f = 0; f < num_functions; f++) { 146 | if (IR[f].is_global) vec_push(globals, IR[f].name); 147 | vec_push(function_statements, build_function(IR[f])); 148 | } 149 | fprintf(outf, "// Generated by UYB for x86_64\n"); 150 | for (size_t f = 0; f < num_dbgfiles; f++) 151 | fprintf(outf, ".file %zu \"%s\"\n", dbgfiles[f].id, dbgfiles[f].fname); 152 | fprintf(outf, ".data\n"); 153 | for (size_t g = 0; g < num_global_vars; g++) { 154 | if (global_vars[g].section) 155 | fprintf(outf, ".section \"%s\"\n", global_vars[g].section); 156 | fprintf(outf, "%s:\n", global_vars[g].name); 157 | for (size_t i = 0; i < global_vars[g].num_vals; i++) { 158 | if (global_vars[g].alignment > 1) 159 | fprintf(outf, ".align %zu\n", global_vars[g].alignment); 160 | if (global_vars[g].types[i] == Number) 161 | fprintf(outf, "\t%s %zu\n", global_sizes[global_vars[g].sizes[i]], global_vars[g].vals[i]); 162 | else if (global_vars[g].types[i] == StrLit) 163 | fprintf(outf, "\t.ascii \"%s\"\n", (char*) global_vars[g].vals[i]); 164 | else { 165 | printf("Type for global var must either be Number or StrLit.\n"); 166 | exit(1); 167 | } 168 | } 169 | if (global_vars[g].section) 170 | fprintf(outf, ".data\n"); 171 | } 172 | fprintf(outf, "\n.text\n"); 173 | for (size_t i = 0; i < vec_size(globals); i++) 174 | fprintf(outf, ".globl %s\n", (*globals)[i]); 175 | for (size_t i = 0; i < vec_size(function_statements); i++) 176 | fprintf(outf, "%s", (*function_statements)[i]->data); 177 | } 178 | -------------------------------------------------------------------------------- /src/target/x86_64/instructions.c: -------------------------------------------------------------------------------- 1 | /* Individual instruction implementations for x86_64 target of UYB. 2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | // defined in build.c 14 | extern AggregateType *aggregate_types; 15 | extern size_t num_aggregate_types; 16 | 17 | // defined in main.c 18 | extern int is_position_independent; 19 | 20 | char sizes[] = { 21 | 'b', 'w', 'l', 'q' 22 | }; 23 | 24 | // A quick alternative to reg_as_type since rax is used a lot 25 | char *rax_versions[] = { 26 | "al", "ax", "eax", "rax" 27 | }; 28 | 29 | char *instruction_as_str(Instruction instr) { 30 | if (instr == ADD ) return "ADD"; 31 | else if (instr == SUB ) return "SUB"; 32 | else if (instr == DIV ) return "DIV"; 33 | else if (instr == MUL ) return "MUL"; 34 | else if (instr == COPY ) return "COPY"; 35 | else if (instr == RET ) return "RET"; 36 | else if (instr == CALL ) return "CALL"; 37 | else if (instr == JZ ) return "JZ"; 38 | else if (instr == NEG ) return "NEG"; 39 | else if (instr == UDIV ) return "UDIV"; 40 | else if (instr == STORE ) return "STORE"; 41 | else if (instr == LOAD ) return "LOAD"; 42 | else if (instr == BLIT ) return "BLIT"; 43 | else if (instr == ALLOC ) return "ALLOC"; 44 | else if (instr == EQ ) return "EQ"; 45 | else if (instr == NE ) return "NE"; 46 | else if (instr == SGE ) return "SGE"; 47 | else if (instr == SGT ) return "SGT"; 48 | else if (instr == SLE ) return "SLE"; 49 | else if (instr == SLT ) return "SLT"; 50 | else if (instr == UGE ) return "UGE"; 51 | else if (instr == UGT ) return "UGT"; 52 | else if (instr == ULE ) return "ULE"; 53 | else if (instr == ULT ) return "ULT"; 54 | else if (instr == EXT ) return "EXT"; 55 | else if (instr == HLT ) return "HLT"; 56 | else if (instr == BLKLBL ) return "BLKLBL"; 57 | else if (instr == JMP ) return "JMP"; 58 | else if (instr == JNZ ) return "JNZ"; 59 | else if (instr == SHR ) return "SHR"; 60 | else if (instr == SHL ) return "SHL"; 61 | else if (instr == AND ) return "AND"; 62 | else if (instr == OR ) return "OR"; 63 | else if (instr == PHI ) return "PHI"; 64 | else if (instr == VASTART) return "VASTART"; 65 | else if (instr == VAARG ) return "VAARG"; 66 | else if (instr == LOC ) return "LOC"; 67 | else if (instr == ASM ) return "ASM"; 68 | else return "Unknown instruction"; 69 | } 70 | 71 | static void print_val(String *fnbuf, uint64_t val, ValType type) { 72 | if (type == Number ) string_push_fmt(fnbuf, "$%llu", val); 73 | else if (type == Label ) string_push_fmt(fnbuf, "%%%s", (char*) val); 74 | else if (type == Str ) string_push_fmt(fnbuf, "$%s", (char*) val); 75 | else if (type == FunctionArgs ) string_push_fmt(fnbuf, "(function arguments)"); 76 | else if (type == BlkLbl ) string_push_fmt(fnbuf, "@%s", (char*) val); 77 | else if (type == InlineAssembly ) string_push_fmt(fnbuf, "(inline assembly values)"); 78 | else if (type == PhiArg) { 79 | string_push_fmt(fnbuf, "@%s ", ((PhiVal*) val)->blklbl_name); 80 | print_val(fnbuf, ((PhiVal*) val)->val, ((PhiVal*) val)->type); 81 | } else { 82 | printf("Invalid value type\n"); 83 | exit(1); 84 | } 85 | } 86 | 87 | void disasm_instr(String *fnbuf, Statement statement) { 88 | if (statement.instruction == BLKLBL) return; 89 | string_push(fnbuf, "\t// "); 90 | if (statement.label) { 91 | string_push_fmt(fnbuf, "%%%s =%s ", statement.label, type_as_str(statement.type, 0, false)); 92 | } 93 | string_push_fmt(fnbuf, "%s ", instruction_as_str(statement.instruction)); 94 | if (statement.val_types[0] != Empty) print_val(fnbuf, statement.vals[0], statement.val_types[0]); 95 | if (statement.val_types[1] != Empty) { 96 | string_push(fnbuf, ", "); 97 | print_val(fnbuf, statement.vals[1], statement.val_types[1]); 98 | } 99 | if (statement.val_types[2] != Empty) { 100 | string_push(fnbuf, ", "); 101 | print_val(fnbuf, statement.vals[2], statement.val_types[2]); 102 | } 103 | string_push(fnbuf, "\n"); 104 | } 105 | 106 | static void build_value_noresize(ValType type, uint64_t val, bool can_prepend_dollar, String *fnbuf) { 107 | if (type == Number) string_push_fmt(fnbuf, "$%llu", val); 108 | else if (type == BlkLbl) string_push_fmt(fnbuf, ".%s_%s", regalloc.current_fn->name, (char*) val); 109 | else if (type == Label ) string_push_fmt(fnbuf, "%s", label_to_reg_noresize(0, (char*) val, false)); 110 | else if (type == Str ) { 111 | if (is_position_independent) 112 | string_push_fmt(fnbuf, "%s(%%rip)", (char*) val); 113 | else 114 | string_push_fmt(fnbuf, "%s%s", (can_prepend_dollar) ? "$" : "", (char*) val); 115 | } 116 | } 117 | 118 | static void build_value(ValType type, uint64_t val, bool can_prepend_dollar, String *fnbuf) { 119 | if (type == Number) string_push_fmt(fnbuf, "$%llu", val); 120 | else if (type == BlkLbl) string_push_fmt(fnbuf, ".%s_%s", regalloc.current_fn->name, (char*) val); 121 | else if (type == Label ) string_push_fmt(fnbuf, "%s", label_to_reg(0, (char*) val, false)); 122 | else if (type == Str ) { 123 | if (is_position_independent) 124 | string_push_fmt(fnbuf, "%s(%%rip)", (char*) val); 125 | else 126 | string_push_fmt(fnbuf, "%s%s", (can_prepend_dollar) ? "$" : "", (char*) val); 127 | } 128 | } 129 | 130 | static void operation_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf, char *operation) { 131 | char *label_loc = reg_alloc(statement.label, statement.type); 132 | if (label_loc[0] != '%') { // label stored in memory address on stack 133 | string_push_fmt(fnbuf, "\t%s%c ", (types[0] == Str && is_position_independent) ? "lea" : "mov", sizes[statement.type]); 134 | build_value(types[0], vals[0], true, fnbuf); 135 | string_push_fmt(fnbuf, ", %%%s\n", rax_versions[statement.type]); 136 | string_push_fmt(fnbuf, "\t%s ", operation); 137 | build_value(types[1], vals[1], true, fnbuf); 138 | string_push_fmt(fnbuf, ", %%%s\n", rax_versions[statement.type]); 139 | string_push_fmt(fnbuf, "\tmov%c %%%s, %s\n", sizes[statement.type], rax_versions[statement.type], label_loc); 140 | } else { // stored in register 141 | string_push_fmt(fnbuf, "\t%s%c ", (types[0] == Str && is_position_independent) ? "lea" : "mov", sizes[statement.type]); 142 | build_value(types[0], vals[0], true, fnbuf); 143 | string_push_fmt(fnbuf, ", %s\n", label_loc); 144 | string_push_fmt(fnbuf, "\t%s%c ", operation, sizes[statement.type]); 145 | build_value(types[1], vals[1], true, fnbuf); 146 | string_push_fmt(fnbuf, ", %s\n", label_loc); 147 | } 148 | } 149 | 150 | static void add_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 151 | operation_build(vals, types, statement, fnbuf, "add"); 152 | } 153 | 154 | static void sub_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 155 | operation_build(vals, types, statement, fnbuf, "sub"); 156 | } 157 | 158 | static void and_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 159 | operation_build(vals, types, statement, fnbuf, "and"); 160 | } 161 | 162 | static void or_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 163 | operation_build(vals, types, statement, fnbuf, "or"); 164 | } 165 | 166 | static void xor_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 167 | operation_build(vals, types, statement, fnbuf, "xor"); 168 | } 169 | 170 | static void div_both_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf, bool is_signed, bool get_remainder) { 171 | char *label_loc = reg_alloc(statement.label, statement.type); 172 | string_push_fmt(fnbuf, "\tmov%c ", sizes[statement.type]); 173 | build_value(types[0], vals[0], true, fnbuf); 174 | string_push_fmt(fnbuf, ", %%%s\n" 175 | "\txor %rdx, %rdx\n", rax_versions[statement.type]); 176 | string_push_fmt(fnbuf, "\t%s%c ", (is_signed) ? "idiv" : "div", sizes[statement.type]); 177 | build_value(types[1], vals[1], true, fnbuf); 178 | string_push(fnbuf, "\n"); 179 | string_push_fmt(fnbuf, "\tmov %%%s, %s\n", (get_remainder) ? reg_as_size("%rdx", statement.type) : rax_versions[statement.type], label_loc); 180 | } 181 | 182 | static void div_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 183 | div_both_build(vals, types, statement, fnbuf, true, false); 184 | } 185 | 186 | static void udiv_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 187 | div_both_build(vals, types, statement, fnbuf, false, false); 188 | } 189 | 190 | static void rem_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 191 | div_both_build(vals, types, statement, fnbuf, true, true); 192 | } 193 | 194 | static void urem_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 195 | div_both_build(vals, types, statement, fnbuf, false, true); 196 | } 197 | 198 | static void mul_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 199 | char *label_loc = reg_alloc(statement.label, statement.type); 200 | bool is_imm = types[1] == Number || types[1] == Str; 201 | if (is_imm) { 202 | string_push_fmt(fnbuf, "\tmov%c ", sizes[statement.type]); 203 | build_value(types[1], vals[1], true, fnbuf); 204 | string_push_fmt(fnbuf, ", %s\n", reg_as_size("%rdi", statement.type)); 205 | } 206 | string_push_fmt(fnbuf, "\tmov%c ", sizes[statement.type]); 207 | build_value(types[0], vals[0], true, fnbuf); 208 | string_push_fmt(fnbuf, ", %%%s\n", rax_versions[statement.type]); 209 | string_push_fmt(fnbuf, "\tmul%c ", sizes[statement.type]); 210 | if (is_imm) 211 | string_push_fmt(fnbuf, "%s", reg_as_size("%rdi", statement.type)); 212 | else 213 | build_value(types[1], vals[1], true, fnbuf); 214 | string_push(fnbuf, "\n"); 215 | string_push_fmt(fnbuf, "\tmov%c %%%s, %s\n", sizes[statement.type], rax_versions[statement.type], label_loc); 216 | } 217 | 218 | static void copy_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 219 | char *label_loc = reg_alloc(statement.label, statement.type); 220 | string_push_fmt(fnbuf, "\t%s%c ", (types[0] == Str && is_position_independent) ? "lea" : "mov", sizes[statement.type]); 221 | build_value(types[0], vals[0], true, fnbuf); 222 | if (label_loc[0] == '%') // stored in reg 223 | string_push_fmt(fnbuf, ", %s\n", label_loc); 224 | else { // stored in memory 225 | string_push_fmt(fnbuf, ", %%%s\n", rax_versions[statement.type]); 226 | string_push_fmt(fnbuf, "\tmov%c %%%s, %s\n", sizes[statement.type], rax_versions[statement.type], label_loc); 227 | } 228 | } 229 | 230 | static void ret_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 231 | if (types[0] == Empty || (types[0] == Number && !vals[0])) { 232 | string_push(fnbuf, "\txor %rax, %rax\n"); 233 | } else { 234 | if (regalloc.current_fn->ret_is_struct) { 235 | if (types[0] != Label) { 236 | printf("Tried to return a non-struct value from a function meant to return a struct.\n"); 237 | exit(1); 238 | } 239 | AggregateType *aggtype = find_aggtype(regalloc.current_fn->return_struct, aggregate_types, num_aggregate_types); 240 | if (aggtype->size_bytes > 8 && aggtype->size_bytes <= 16) { 241 | char *label = label_to_reg_noresize(0, (char*) vals[0], false); 242 | string_push_fmt(fnbuf, "\tmov %s, %%rdi\n", label); 243 | string_push(fnbuf, "\tmov (%rdi), %rax\n"); // save lower 8 bytes 244 | string_push(fnbuf, "\tmov 8(%rdi), %rdx\n"); // save higher 8 bytes 245 | goto end_save; 246 | } else if (aggtype->size_bytes <= 8) { 247 | char *label = label_to_reg_noresize(0, (char*) vals[0], false); 248 | string_push_fmt(fnbuf, "\tmov %s, %%rdi\n", label); 249 | string_push(fnbuf, "\tmov (%rdi), %rax\n"); // save lower 8 bytes 250 | goto end_save; 251 | } 252 | } 253 | string_push_fmt(fnbuf, "\t%s ", (types[0] == Str && is_position_independent) ? "lea" : "mov"); 254 | build_value_noresize(types[0], vals[0], true, fnbuf); 255 | string_push(fnbuf, ", %rax\n"); 256 | } 257 | end_save: 258 | if (regalloc.current_fn->is_variadic) 259 | string_push_fmt(fnbuf, "\tmov %rbp, %rsp\n\tpop %rbp\n\tadd $%zu, %rsp\n\tret\n", sizeof(arg_regs) / sizeof(arg_regs[0]) * 8); 260 | else 261 | string_push_fmt(fnbuf, "\tmov %rbp, %rsp\n\tpop %rbp\n\tret\n"); 262 | } 263 | 264 | static void call_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 265 | size_t pop_bytes = 0; 266 | if (((FunctionArgList*) vals[1])->num_args > 6 && ((FunctionArgList*) vals[1])->num_args & 1) { 267 | string_push(fnbuf, "\tsub $8, %rsp\n"); 268 | } 269 | char **argregs_at = arg_regs; 270 | for (size_t arg = 0; arg < ((FunctionArgList*) vals[1])->num_args; arg++) { 271 | char *label_loc = NULL; 272 | if (((FunctionArgList*) vals[1])->arg_types[arg] == Label && 273 | ((FunctionArgList*) vals[1])->args_are_structs[arg]) { 274 | AggregateType *aggtype = find_aggtype(((FunctionArgList*) vals[1])->arg_struct_types[arg], aggregate_types, num_aggregate_types); 275 | if (aggtype->size_bytes > 16) { 276 | // Make sure it's 64 bit then just continue and let it be passed as a pointer 277 | ((FunctionArgList*) vals[1])->arg_sizes[arg] = Bits64; 278 | } else if (aggtype->size_bytes > 8) { 279 | // copy 16 bytes 280 | label_loc = label_to_reg_noresize(0, ((FunctionArgList*) vals[1])->args[arg], true); 281 | string_push_fmt(fnbuf, "\tmovq %s, %%rax\n", label_loc); 282 | string_push_fmt(fnbuf, "\tmovq (%%rax), %s\n", argregs_at[0]); 283 | string_push_fmt(fnbuf, "\tmovq 8(%%rax), %s\n", argregs_at[1]); 284 | argregs_at += 2; 285 | continue; 286 | } else { 287 | // copy 8 bytes 288 | label_loc = label_to_reg_noresize(0, ((FunctionArgList*) vals[1])->args[arg], true); 289 | string_push_fmt(fnbuf, "\tmovq %s, %%rax\n", label_loc); 290 | string_push_fmt(fnbuf, "\tmovq (%%rax), %s\n", *argregs_at); 291 | argregs_at++; 292 | continue; 293 | } 294 | } 295 | if (((FunctionArgList*) vals[1])->arg_types[arg] != Number) { 296 | label_loc = label_to_reg_noresize(0, ((FunctionArgList*) vals[1])->args[arg], true); 297 | if (label_loc && arg < 6 && !strcmp(label_loc, reg_as_size(*argregs_at, get_reg_size(label_loc, ((FunctionArgList*) vals[1])->args[arg])))) { 298 | argregs_at++; 299 | continue; 300 | } 301 | } 302 | if (arg < 6) { 303 | if (((FunctionArgList*) vals[1])->arg_types[arg] == Label && (label_loc && label_loc[0] == '%')) { 304 | if (((FunctionArgList*) vals[1])->arg_types[arg] != Label) 305 | label_to_reg_noresize(0, ((FunctionArgList*) vals[1])->args[arg], true); 306 | string_push_fmt(fnbuf, "\t%s%c ", (((FunctionArgList*) vals[1])->arg_types[arg] == Str && is_position_independent) ? "lea" : "mov", sizes[((FunctionArgList*) vals[1])->arg_sizes[arg]]); 307 | build_value(((FunctionArgList*) vals[1])->arg_types[arg], (uint64_t) ((FunctionArgList*) vals[1])->args[arg], true, fnbuf); 308 | string_push_fmt(fnbuf, ", %s // arg = %zu\n", reg_as_size(*argregs_at, ((FunctionArgList*) vals[1])->arg_sizes[arg]), arg); 309 | } else { 310 | string_push_fmt(fnbuf, "\t%s%c ", (((FunctionArgList*) vals[1])->arg_types[arg] == Str && is_position_independent) ? "lea" : "mov", sizes[((FunctionArgList*) vals[1])->arg_sizes[arg]]); 311 | build_value(((FunctionArgList*) vals[1])->arg_types[arg], (uint64_t) ((FunctionArgList*) vals[1])->args[arg], true, fnbuf); 312 | string_push_fmt(fnbuf, ", %s // arg = %zu\n", reg_as_size(*argregs_at, ((FunctionArgList*) vals[1])->arg_sizes[arg]), arg); 313 | } 314 | } else { 315 | pop_bytes += 8; 316 | string_push_fmt(fnbuf, "\tpush "); 317 | build_value(((FunctionArgList*) vals[1])->arg_types[arg], (uint64_t) ((FunctionArgList*) vals[1])->args[arg], true, fnbuf); 318 | string_push_fmt(fnbuf, " // arg = %zu\n", arg); 319 | } 320 | argregs_at++; 321 | } 322 | string_push(fnbuf, "\tcall "); 323 | if (types[0] == Str && is_position_independent) 324 | string_push_fmt(fnbuf, "%s", (char*) vals[0]); 325 | else 326 | build_value(types[0], vals[0], false, fnbuf); 327 | string_push(fnbuf, "\n"); 328 | if (((FunctionArgList*) vals[1])->num_args > 6 && ((FunctionArgList*) vals[1])->num_args & 1) 329 | pop_bytes += 8; 330 | if (pop_bytes) 331 | string_push_fmt(fnbuf, "\tadd $%zu, %rsp\n", pop_bytes); 332 | if (statement.label) { 333 | char *label_loc = reg_alloc(statement.label, statement.type); 334 | string_push_fmt(fnbuf, "\tmov %%%s, %s\n", rax_versions[statement.type], label_loc); 335 | } 336 | } 337 | 338 | static void jz_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 339 | if (types[0] != Label) { 340 | printf("First value of JZ instruction must be a label.\n"); 341 | exit(1); 342 | } 343 | string_push_fmt(fnbuf, "\tcmp $0, %s\n" 344 | "\tje ", label_to_reg(0, (char*) vals[0], false)); 345 | build_value(types[1], vals[1], false, fnbuf); 346 | string_push_fmt(fnbuf, "\n"); 347 | } 348 | 349 | static void jmp_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 350 | string_push_fmt(fnbuf, "\tjmp "); 351 | build_value(types[0], vals[0], false, fnbuf); 352 | string_push_fmt(fnbuf, "\n"); 353 | } 354 | 355 | static void jnz_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 356 | if (types[1] == Empty || types[2] == Empty) { 357 | printf("Expected two labels in JNZ instruction.\n"); 358 | exit(1); 359 | } 360 | if (types[0] == Number) { 361 | string_push_fmt(fnbuf, "\tmov "); 362 | build_value(types[0], vals[0], false, fnbuf); 363 | string_push_fmt(fnbuf, ", %%rdi\n\tcmpq $0, %%rdi"); 364 | } else if (types[0] == Label) { 365 | char *loc = label_to_reg_noresize(0, (char*) vals[0], false); 366 | Type sz = get_reg_size(loc, (char*) vals[0]); 367 | string_push_fmt(fnbuf, "\tcmp%c $0, %s\n", sizes[sz], reg_as_size(loc, sz)); 368 | } else { 369 | printf("First value of JNZ must be either a label or a number.\n"); 370 | exit(1); 371 | } 372 | string_push_fmt(fnbuf, "\n\tjne "); 373 | build_value(types[1], vals[1], false, fnbuf); 374 | string_push_fmt(fnbuf, "\n\tjmp "); 375 | build_value(types[2], vals[2], false, fnbuf); 376 | string_push_fmt(fnbuf, "\n"); 377 | } 378 | 379 | static void neg_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 380 | char *label_loc = reg_alloc(statement.label, statement.type); 381 | string_push(fnbuf, "\tmov "); 382 | build_value(types[0], vals[0], true, fnbuf); 383 | string_push_fmt(fnbuf, ", %s\n" 384 | "\tneg%c %s\n", sizes[statement.type], label_loc, label_loc); 385 | } 386 | 387 | static void shift_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf, char direction) { 388 | char *label_loc = reg_alloc(statement.label, statement.type); 389 | // first val 390 | string_push_fmt(fnbuf, "\tmov%c ", sizes[statement.type]); 391 | build_value(types[1], vals[1], true, fnbuf); 392 | string_push_fmt(fnbuf, ", %s\n", reg_as_size("%rcx", statement.type)); 393 | // second val 394 | string_push_fmt(fnbuf, "\tmov%c ", sizes[statement.type]); 395 | build_value(types[0], vals[0], true, fnbuf); 396 | string_push_fmt(fnbuf, ", %s\n", reg_as_size("%rdi", statement.type)); 397 | // shift 398 | string_push_fmt(fnbuf, "\tsh%c%c %%cl, %s\n" 399 | "\tmov %s, %s\n", 400 | direction, sizes[statement.type], reg_as_size("%rdi", statement.type), 401 | reg_as_size("%rdi", statement.type), label_loc); 402 | } 403 | 404 | static void shl_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 405 | shift_build(vals, types, statement, fnbuf, 'l'); 406 | } 407 | 408 | static void shr_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 409 | shift_build(vals, types, statement, fnbuf, 'r'); 410 | } 411 | 412 | static void store_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 413 | char *reg = label_to_reg(0, (char*) vals[1], false); 414 | if (reg[0] == '%') { 415 | string_push_fmt(fnbuf, "\tmov%c ", sizes[statement.type]); 416 | build_value(types[0], vals[0], true, fnbuf); 417 | string_push_fmt(fnbuf, ", %s\n", reg_as_size("%rdi", statement.type)); 418 | string_push_fmt(fnbuf, "\tmov%c %s", sizes[statement.type], reg_as_size("%rdi", statement.type)); 419 | string_push_fmt(fnbuf, ", (%s) // addr of %s\n", reg, (char*) vals[1]); 420 | } else { 421 | string_push_fmt(fnbuf, "\tmovq %s, %%rdi\n", reg); 422 | string_push_fmt(fnbuf, "\tmov%c ", sizes[statement.type]); 423 | build_value(types[0], vals[0], true, fnbuf); 424 | string_push_fmt(fnbuf, ", %s\n", reg_as_size("%rsi", statement.type)); 425 | string_push_fmt(fnbuf, "\tmov%c %s, (%%rdi)\n", sizes[statement.type], reg_as_size("%rsi", statement.type)); 426 | } 427 | } 428 | 429 | static void load_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 430 | char *label_loc = reg_alloc(statement.label, statement.type); 431 | char *addr = label_to_reg(0, (char*) vals[0], false); 432 | bool use_brackets = addr[0] == '%'; 433 | if (use_brackets) { 434 | // is a register that stores the address 435 | string_push_fmt(fnbuf, "\tmovq (%s), %%rdi\n", addr); 436 | string_push_fmt(fnbuf, "\tmov%c %s, %s\n", sizes[statement.type], reg_as_size("%rdi", statement.type), label_loc); 437 | } else { 438 | // address is on the stack 439 | string_push_fmt(fnbuf, "\tmovq %s, %%rdi\n", addr); 440 | string_push_fmt(fnbuf, "\tmovq (%rdi), %%rdi\n"); 441 | string_push_fmt(fnbuf, "\tmov%c %s, %s\n", sizes[statement.type], reg_as_size("%rdi", statement.type), label_loc); 442 | } 443 | } 444 | 445 | static void blit_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 446 | string_push(fnbuf, "\tmovq "); 447 | build_value(types[1], vals[1], true, fnbuf); 448 | string_push(fnbuf, ", %rdi\n"); 449 | string_push(fnbuf, "\tmovq "); 450 | build_value(types[0], vals[0], true, fnbuf); 451 | string_push(fnbuf, ", %rsi\n"); 452 | string_push(fnbuf, "\tmovq "); 453 | build_value(types[2], vals[2], true, fnbuf); 454 | string_push(fnbuf, ", %rcx\n"); 455 | string_push(fnbuf, "\trep movsb\n"); 456 | } 457 | 458 | static void alloc_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 459 | if (types[0] != Number) { 460 | printf("ALLOC's argument must be a number literal.\n"); 461 | exit(1); 462 | } 463 | char *label_loc = reg_alloc(statement.label, statement.type); 464 | regalloc.bytes_rip_pad += vals[0]; 465 | string_push_fmt(fnbuf, "\tlea -%llu(%rbp), %s\n" 466 | "\tmov %s, %s\n", 467 | regalloc.bytes_rip_pad, reg_as_size("%rdi", statement.type), reg_as_size("%rdi", statement.type), label_loc); 468 | } 469 | 470 | static void comparison_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf, char *instr) { 471 | char *label_loc = reg_alloc_noresize(statement.label, statement.type); 472 | string_push_fmt(fnbuf, "\tmov "); 473 | build_value(types[1], vals[1], true, fnbuf); 474 | string_push_fmt(fnbuf, ", %s\n" 475 | "\tcmp%c %s, ", reg_as_size("%rdi", statement.type), sizes[statement.type], reg_as_size("%rdi", statement.type)); 476 | build_value(types[0], vals[0], true, fnbuf); 477 | string_push_fmt(fnbuf, "\n"); 478 | if (label_loc[0] == '%') { // label in reg 479 | char *sized_label = reg_as_size(label_loc, Bits8); 480 | string_push_fmt(fnbuf, "\t%s %s\n", instr, sized_label); 481 | string_push_fmt(fnbuf, "\tmovzb%c %s, %s\n", sizes[statement.type], sized_label, reg_as_size(label_loc, statement.type)); 482 | } else { // on stack 483 | string_push_fmt(fnbuf, "\t%s %%al\n", instr); 484 | string_push_fmt(fnbuf, "\tmovzb%c %%al, %%%s\n", sizes[statement.type], rax_versions[statement.type]); 485 | string_push_fmt(fnbuf, "\tmov%c %%%s, %s\n", sizes[statement.type], rax_versions[statement.type], label_loc); 486 | } 487 | } 488 | 489 | static void eq_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 490 | comparison_build(vals, types, statement, fnbuf, "sete"); 491 | } 492 | 493 | static void ne_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 494 | comparison_build(vals, types, statement, fnbuf, "setne"); 495 | } 496 | 497 | static void sge_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 498 | comparison_build(vals, types, statement, fnbuf, "setge"); 499 | } 500 | 501 | static void sgt_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 502 | comparison_build(vals, types, statement, fnbuf, "setg"); 503 | } 504 | 505 | static void sle_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 506 | comparison_build(vals, types, statement, fnbuf, "setle"); 507 | } 508 | 509 | static void slt_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 510 | comparison_build(vals, types, statement, fnbuf, "setl"); 511 | } 512 | 513 | static void uge_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 514 | comparison_build(vals, types, statement, fnbuf, "setae"); 515 | } 516 | 517 | static void ugt_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 518 | comparison_build(vals, types, statement, fnbuf, "seta"); 519 | } 520 | 521 | static void ule_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 522 | comparison_build(vals, types, statement, fnbuf, "setbe"); 523 | } 524 | 525 | static void ult_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 526 | comparison_build(vals, types, statement, fnbuf, "setb"); 527 | } 528 | 529 | static void blklbl_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 530 | if (types[0] != Str) { 531 | printf("Expected label to have value RawStr, got something else instead.\n"); 532 | exit(1); 533 | } 534 | string_push_fmt(fnbuf, ".%s_%s:\n", regalloc.current_fn->name, (char*) vals[0]); 535 | /* Now it needs to do Phi stuff: 536 | * - Go through the rest of the statements in this function and find a Phi instruction with this label 537 | * - Once it finds one: 538 | * - If this block label is the first one specified in the phi instruction, allocate the register 539 | * - Set the label's value to the value it should be for this branch, as specified by phi 540 | */ 541 | for (size_t s = 0; s < regalloc.current_fn->num_statements; s++) { 542 | Statement phi = regalloc.current_fn->statements[s]; 543 | if (phi.instruction != PHI) continue; 544 | bool is_first = !strcmp(((PhiVal*) phi.vals[0])->blklbl_name, (char*) vals[0]); 545 | bool is_second = !strcmp(((PhiVal*) phi.vals[1])->blklbl_name, (char*) vals[0]); 546 | if (is_first || is_second) { 547 | char *label_loc; 548 | string_push_fmt(fnbuf, "\tmov%c ", sizes[phi.type]); 549 | if (is_first) { 550 | label_loc = reg_alloc(phi.label, phi.type); 551 | build_value(((PhiVal*) phi.vals[0])->type, ((PhiVal*) phi.vals[0])->val, true, fnbuf); 552 | } else { 553 | label_loc = label_to_reg(0, phi.label, false); 554 | build_value(((PhiVal*) phi.vals[1])->type, ((PhiVal*) phi.vals[1])->val, true, fnbuf); 555 | } 556 | string_push_fmt(fnbuf, ", %s\n", label_loc); 557 | } 558 | } 559 | } 560 | 561 | // second val dictates whether or not it's a signed operation (signed if true). 562 | static void ext_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 563 | char *label_loc = reg_alloc_noresize(statement.label, statement.type); 564 | string_push_fmt(fnbuf, "\t%s ", (types[0] == Label) ? "movsx" : "mov"); 565 | build_value(types[0], vals[0], true, fnbuf); 566 | string_push_fmt(fnbuf, ", %s\n", reg_as_size("%rdx", statement.type)); 567 | string_push_fmt(fnbuf, "\tmov%c %s, %s\n", sizes[statement.type], reg_as_size("%rdx", statement.type), label_loc); 568 | } 569 | 570 | static void hlt_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 571 | string_push(fnbuf, "\tjmp .\n"); 572 | } 573 | 574 | static void phi_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 575 | /* Phi doesn't actually do anything in the instruction itself in generated assembly. 576 | * All of the generated assembly to do with the phi instruction is done within block label 577 | * compilation. */ 578 | } 579 | 580 | static void vastart_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 581 | if (types[0] != Label) { 582 | printf("vastart expects argument to be a label, got something else instead.\n"); 583 | exit(1); 584 | } 585 | char *addr = label_to_reg(0, (char*) vals[0], false); 586 | string_push_fmt(fnbuf, "\tmovw $0, (%s)\n", addr); // Set current vararg index (off = 0) 587 | string_push_fmt(fnbuf, "\tmovq %%rbp, %%rax\n" 588 | "\taddq $8, %%rax\n" 589 | "\tmovq %%rax, 2(%s)\n", addr); // set address of arguments start 590 | } 591 | 592 | static void vaarg_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 593 | if (types[0] != Label) { 594 | printf("vastart expects argument to be a label, got something else instead.\n"); 595 | exit(1); 596 | } 597 | char *addr = label_to_reg(0, (char*) vals[0], false); 598 | // get current index 599 | string_push_fmt(fnbuf, "\txor %%rax, %%rax\n"); 600 | if (addr[0] == '%') 601 | string_push_fmt(fnbuf, "\tmovw (%s), %%ax\n", addr); 602 | else { 603 | string_push_fmt(fnbuf, "\tmov (%s), %%rax\n", addr); 604 | string_push_fmt(fnbuf, "\tmovw (%%rax), %%ax\n"); 605 | } 606 | string_push_fmt(fnbuf, "\tmov $8, %%rsi\n" 607 | "\tmulq %%rsi\n" 608 | "\tmov %s, %%rcx\n" 609 | "\tadd $2, %%rcx\n" 610 | "\tmovq (%%rcx), %%rcx\n" 611 | "\taddq %%rcx, %%rax\n" // offset of value is now in rax 612 | "\taddw $1, (%s)\n" 613 | "\tmov (%%rax), %%rdi\n" // increase current index 614 | "\tmov %%rdi, %s\n", // increase current index 615 | addr, addr, reg_alloc_noresize(statement.label, statement.type), addr); 616 | } 617 | 618 | static void loc_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 619 | if (types[0] != Number || types[1] != Number || types[2] != Number) { 620 | printf("All arguments of .loc instruction must be an integer literal.\n"); 621 | exit(1); 622 | } 623 | string_push_fmt(fnbuf, "\t.loc %zu %zu %zu\n", vals[0], vals[1], vals[2]); 624 | } 625 | 626 | static void pushpop_inputs(InlineAsm *info, char *op, String *fnbuf) { 627 | for (size_t i = 0; i < vec_size(info->inputs_vec); i++) { 628 | // check if the register is used to know if it needs to be pushed 629 | for (size_t reg = 0; reg < sizeof(reg_alloc_tab) / sizeof(reg_alloc_tab[0]); reg++) { 630 | InlineAsmIO input = (*info->inputs_vec)[i]; 631 | if (!strcmp(input.reg, (char*) reg_alloc_tab[reg][0]) && reg_alloc_tab[reg][1]) { // second value is a non-zero value, aka it still has 632 | string_push_fmt(fnbuf, "\t%s %s\n", op, input.reg); 633 | break; 634 | } 635 | } 636 | } 637 | } 638 | 639 | static void pushpop_clobbers_and_inputs(InlineAsm *info, int is_push, String *fnbuf) { 640 | char *op = (is_push) ? "push" : "pop"; 641 | // I don't love this solution the most but it should work fine 642 | if (is_push) { 643 | pushpop_inputs(info, op, fnbuf); 644 | for (size_t clobber = 0; clobber < vec_size(info->clobbers_vec); clobber++) 645 | string_push_fmt(fnbuf, "\t%s %s\n", op, (*info->clobbers_vec)[clobber]); 646 | } else { 647 | for (size_t clobber = 0; clobber < vec_size(info->clobbers_vec); clobber++) 648 | string_push_fmt(fnbuf, "\t%s %s\n", op, (*info->clobbers_vec)[clobber]); 649 | pushpop_inputs(info, op, fnbuf); 650 | } 651 | } 652 | 653 | static void asm_build(uint64_t vals[2], ValType types[2], Statement statement, String *fnbuf) { 654 | assert(types[0] == InlineAssembly && "first type of inline assembly instruction must be an inline assembly value type"); 655 | InlineAsm *info = (InlineAsm*) vals[0]; 656 | // save registers for later 657 | pushpop_clobbers_and_inputs(info, 1, fnbuf); 658 | // move the input labels specified into the correct registers 659 | for (size_t input = 0; input < vec_size(info->inputs_vec); input++) { 660 | string_push_fmt(fnbuf, "\tmov "); 661 | if ((*info->inputs_vec)[input].type == Label) { 662 | size_t stack_offset = vec_size(info->inputs_vec) + vec_size(info->clobbers_vec); 663 | string_push_fmt(fnbuf, "%s", label_to_reg(stack_offset, (*info->inputs_vec)[input].label, false)); 664 | } else { 665 | build_value_noresize((*info->inputs_vec)[input].type, (uint64_t) (*info->inputs_vec)[input].label, true, fnbuf); 666 | } 667 | string_push_fmt(fnbuf, ", %s\n", (*info->inputs_vec)[input].reg); 668 | } 669 | // copy the assembly 670 | string_push_fmt(fnbuf, "\t%s\n", info->assembly); 671 | // restore clobbers and saved registers that were used for inputs 672 | pushpop_clobbers_and_inputs(info, 0, fnbuf); 673 | // now move the output registers into the labels associated 674 | for (size_t out = 0; out < vec_size(info->outputs_vec); out++) { 675 | string_push_fmt(fnbuf, "\tmov %s, %s\n", (*info->outputs_vec)[out].reg, reg_alloc((*info->outputs_vec)[out].label, Bits64)); 676 | } 677 | } 678 | 679 | void (*instructions_x86_64[])(uint64_t[2], ValType[2], Statement, String*) = { 680 | add_build, sub_build, div_build, mul_build, 681 | copy_build, ret_build, call_build, jz_build, neg_build, 682 | udiv_build, rem_build, urem_build, and_build, or_build, xor_build, 683 | shl_build, shr_build, store_build, load_build, blit_build, alloc_build, 684 | eq_build, ne_build, sle_build, slt_build, sge_build, sgt_build, ule_build, ult_build, 685 | uge_build, ugt_build, ext_build, hlt_build, blklbl_build, jmp_build, jnz_build, phi_build, vastart_build, 686 | vaarg_build, loc_build, asm_build, 687 | }; 688 | -------------------------------------------------------------------------------- /src/target/x86_64/register.c: -------------------------------------------------------------------------------- 1 | /* Register allocator for UYB project. 2 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under MPL2.0, see /LICENSE for details. */ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | /* all the scratch registers: 12 | * {reg_name, num_refs, reg_size} 13 | * num_refs is the number of references to the label corresponding to that register 14 | * *after* the current instruction. */ 15 | intptr_t reg_alloc_tab[5][3] = { 16 | {(uintptr_t) "%rbx", 0, 0}, 17 | {(uintptr_t) "%r12", 0, 0}, 18 | {(uintptr_t) "%r13", 0, 0}, 19 | {(uintptr_t) "%r14", 0, 0}, 20 | {(uintptr_t) "%r15", 0, 0}, 21 | }; 22 | 23 | // Left side is register, middle is assigned label, right is number of instances of that label 24 | char *label_reg_tab[5][3] = { 25 | {"%rbx", 0, 0}, 26 | {"%r12", 0, 0}, 27 | {"%r13", 0, 0}, 28 | {"%r14", 0, 0}, 29 | {"%r15", 0, 0}, 30 | }; 31 | 32 | RegAlloc regalloc; 33 | 34 | bool check_label_in_args(char *label) { 35 | for (size_t i = 0; i < regalloc.current_fn->num_args; i++) { 36 | if (!strcmp(label, regalloc.current_fn->args[i].label)) return true; 37 | } 38 | return false; 39 | } 40 | 41 | char *reg_as_size_inner(char *reg, Type size) { 42 | reg++; 43 | if (reg[0] == 'r' && /* is digit: */ (reg[1] >= '0' && reg[1] <= '9')) { 44 | String *str = string_from(reg); 45 | if (size == Bits8 ) string_push(str, "b"); 46 | else if (size == Bits16) string_push(str, "w"); 47 | else if (size == Bits32) string_push(str, "d"); 48 | return str->data; 49 | } 50 | if (size == Bits8) { 51 | if (!strcmp(reg, "rsi")) return "sil"; 52 | else if (!strcmp(reg, "rdi")) return "dil"; 53 | char *buf = aalloc(4); 54 | memcpy(buf, ®[1], 3); 55 | buf[1] = 'l'; 56 | return buf; 57 | } else if (size == Bits16) { 58 | return ®[1]; 59 | } else if (size == Bits32) { 60 | char *buf = aalloc(4); 61 | memcpy(buf, ®[0], 4); 62 | buf[0] = 'e'; 63 | return buf; 64 | } else return reg; 65 | } 66 | 67 | Type size_from_reg(char *reg) { 68 | reg++; 69 | char last = reg[strlen(reg) - 1]; 70 | if (reg[0] == 'r' && /* is digit: */ (reg[1] >= '0' && reg[1] <= '9')) { 71 | if (last == 'b') return Bits8; 72 | else if (last == 'w') return Bits16; 73 | else if (last == 'd') return Bits32; 74 | else return Bits64; 75 | } 76 | if (reg[0] == 'e') return Bits32; 77 | if (reg[0] == 'r') return Bits64; 78 | if (last == 'i' || last == 'x') return Bits16; 79 | return Bits8; 80 | } 81 | 82 | char *reg_as_size(char *reg, Type size) { 83 | if (reg[0] != '%') return reg; 84 | char *buf = aalloc(5); 85 | buf[0] = '%'; 86 | strcpy(&buf[1], reg_as_size_inner(reg, size)); 87 | return buf; 88 | } 89 | 90 | void reg_init_fn(Function func) { 91 | regalloc.bytes_rip_pad = 0; 92 | for (size_t i = 0; i < sizeof(reg_alloc_tab) / sizeof(reg_alloc_tab[0]); i++) 93 | reg_alloc_tab[i][1] = 0; 94 | regalloc.current_fn = (Function*) aalloc(sizeof(Function)); 95 | *regalloc.current_fn = func; 96 | regalloc.labels_as_offsets = vec_new(sizeof(size_t) * 3); 97 | regalloc.used_regs_vec = vec_new(sizeof(char*)); 98 | regalloc.statement_idx = 0; 99 | } 100 | 101 | char *reg_alloc_noresize(char *label, Type reg_size) { 102 | for (size_t l = 0; l < sizeof(label_reg_tab) / sizeof(label_reg_tab[0]); l++) { 103 | if (!label_reg_tab[l][1] || strcmp(label_reg_tab[l][1], label)) continue; 104 | size_t new_label_sz = strlen(label) + 5; 105 | char *new_label = (char*) aalloc(new_label_sz); 106 | label_reg_tab[l][2]++; 107 | snprintf(new_label, new_label_sz, "%s.%zu", label, (size_t) label_reg_tab[l][2]); 108 | label = new_label; 109 | } 110 | for (size_t i = 0; i < sizeof(reg_alloc_tab) / sizeof(reg_alloc_tab[0]); i++) { 111 | if (reg_alloc_tab[i][1]) continue; 112 | for (size_t s = regalloc.statement_idx; s < regalloc.current_fn->num_statements; s++) { 113 | if (regalloc.current_fn->statements[s].instruction == JMP || regalloc.current_fn->statements[s].instruction == JNZ) { 114 | reg_alloc_tab[i][1] = -1; 115 | break; 116 | } 117 | if (regalloc.current_fn->statements[s].val_types[1] == FunctionArgs) { 118 | for (size_t arg = 0; arg < ((FunctionArgList*) regalloc.current_fn->statements[s].vals[1])->num_args; arg++) { 119 | if (((FunctionArgList*) regalloc.current_fn->statements[s].vals[1])->arg_types[arg] != Number && 120 | strcmp(label, ((FunctionArgList*) regalloc.current_fn->statements[s].vals[1])->args[arg])) continue; 121 | reg_alloc_tab[i][1] += 2; 122 | } 123 | } 124 | if (regalloc.current_fn->statements[s].instruction == ASM) { 125 | InlineAsm *info = (InlineAsm*) regalloc.current_fn->statements[s].vals[0]; 126 | for (size_t in = 0; in < vec_size(info->inputs_vec); in++) { 127 | if (strcmp((*info->inputs_vec)[in].label, label)) continue; 128 | reg_alloc_tab[i][1]++; 129 | } 130 | } 131 | if ((regalloc.current_fn->statements[s].val_types[0] == Label && !strcmp((char*) regalloc.current_fn->statements[s].vals[0], label)) || 132 | (regalloc.current_fn->statements[s].val_types[1] == Label && !strcmp((char*) regalloc.current_fn->statements[s].vals[1], label)) || 133 | (regalloc.current_fn->statements[s].val_types[2] == Label && !strcmp((char*) regalloc.current_fn->statements[s].vals[2], label))) { 134 | reg_alloc_tab[i][1]++; 135 | } 136 | } 137 | if (check_label_in_args(label) && reg_alloc_tab[i][1]) reg_alloc_tab[i][1]++; 138 | label_reg_tab[i][1] = aalloc(strlen(label) + 1); 139 | strcpy(label_reg_tab[i][1], label); 140 | size_t used_sz = vec_size(regalloc.used_regs_vec); 141 | bool do_push = true; 142 | for (size_t y = 0; y < used_sz; y++) { 143 | if (strcmp((*regalloc.used_regs_vec)[y], (char*) reg_alloc_tab[i][0])) continue; 144 | } 145 | if (reg_alloc_tab[i][1]) { 146 | if (do_push) 147 | vec_push(regalloc.used_regs_vec, (char*) reg_alloc_tab[i][0]); 148 | regalloc.bytes_rip_pad += 8; 149 | } 150 | reg_alloc_tab[i][2] = reg_size; 151 | return (char*) reg_alloc_tab[i][0]; 152 | } 153 | regalloc.bytes_rip_pad += 8; 154 | char *fmt = "-%llu(%%rbp)"; 155 | size_t buf_sz = strlen("-(%rbp)") + 5; 156 | char *buf = (char*) aalloc(buf_sz + 1); 157 | snprintf(buf, buf_sz, fmt, regalloc.bytes_rip_pad); 158 | size_t *new_vec_val = aalloc(sizeof(size_t) * 3); 159 | new_vec_val[0] = (size_t) label; 160 | new_vec_val[1] = regalloc.bytes_rip_pad; 161 | new_vec_val[2] = reg_size; 162 | vec_push(regalloc.labels_as_offsets, new_vec_val); 163 | return buf; 164 | } 165 | 166 | char *reg_alloc(char *label, Type reg_size) { 167 | char *reg = reg_alloc_noresize(label, reg_size); 168 | if (reg[0] == '%') 169 | return reg_as_size((char*) reg, reg_size); 170 | else 171 | return reg; 172 | } 173 | 174 | char *label_to_reg_noresize(size_t offset, char *label, bool allow_noexist) { 175 | for (size_t i = 0; i < sizeof(label_reg_tab) / sizeof(label_reg_tab[1]); i++) { 176 | if (!label_reg_tab[i][1] || strcmp(label_reg_tab[i][1], label)) continue; 177 | if (reg_alloc_tab[i][1]) 178 | reg_alloc_tab[i][1]--; 179 | if (!reg_alloc_tab[i][1]) 180 | label_reg_tab[i][1] = 0; 181 | return label_reg_tab[i][0]; 182 | } 183 | size_t label_offset_list_len = vec_size(regalloc.labels_as_offsets); 184 | for (size_t l = 0; l < label_offset_list_len; l++) { 185 | if (strcmp((char*) (*regalloc.labels_as_offsets)[l][0], label)) continue; 186 | char *fmt = "-%llu(%%rbp)"; 187 | size_t buf_sz = strlen("-(%rbp)") + 5; 188 | char *buf = (char*) aalloc(buf_sz + 1); 189 | snprintf(buf, buf_sz, fmt, (*regalloc.labels_as_offsets)[l][1] + offset); 190 | return buf; 191 | } 192 | if (allow_noexist) return NULL; 193 | printf("Tried to use non-defined label: %s\n", label); 194 | exit(1); 195 | } 196 | 197 | Type get_reg_size(char *reg, char *expected_label) { 198 | for (size_t i = 0; i < sizeof(reg_alloc_tab) / sizeof(reg_alloc_tab[0]); i++) { 199 | if (strcmp(reg, (char*) reg_alloc_tab[i][0])) continue; 200 | return reg_alloc_tab[i][2]; 201 | } 202 | size_t len = vec_size(regalloc.labels_as_offsets); 203 | for (size_t i = 0; i < len; i++) { 204 | if (strcmp(expected_label, (char*) (*regalloc.labels_as_offsets)[i][0])) continue; 205 | return (Type) (*regalloc.labels_as_offsets)[i][2]; 206 | } 207 | printf("Invalid register in get_reg_size: %s\n", reg); 208 | exit(1); 209 | } 210 | 211 | // I think this is kinda slow 212 | char *label_to_reg(size_t offset, char *label, bool allow_noexist) { 213 | char *reg = label_to_reg_noresize(0, label, allow_noexist); 214 | if (!reg && allow_noexist) return NULL; 215 | for (size_t i = 0; i < sizeof(reg_alloc_tab) / sizeof(reg_alloc_tab[0]); i++) { 216 | if (strcmp(reg, (char*) reg_alloc_tab[i][0])) continue; 217 | if (!reg_alloc_tab[i][1] && allow_noexist) return NULL; 218 | return reg_as_size(reg, (Type) reg_alloc_tab[i][2]); 219 | } 220 | return reg; 221 | } 222 | -------------------------------------------------------------------------------- /src/utils.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | char size_as_char(Type type) { 7 | return ((char[]){'b', 'h', 'w', 'l'})[type]; 8 | if (type == Bits8) return 'b'; 9 | else if (type == Bits16) return 'h'; 10 | else if (type == Bits32) return 'w'; 11 | else return 'l'; 12 | } 13 | 14 | /* returns 1 or 0 depending on if it was found. if it was found it stores the result in val_buf unless 15 | * val_buf is null */ 16 | int find_sizet_in_copyvals(CopyVal **copyvals, char *label, size_t *val_buf) { 17 | for (size_t i = 0; i < vec_size(copyvals); i++) { 18 | if (!strcmp((*copyvals)[i].label, label)) { 19 | if (val_buf) 20 | *val_buf = (*copyvals)[i].val; 21 | return 1; 22 | } 23 | } 24 | return 0; 25 | } 26 | 27 | int find_copyval(CopyVal **copyvals, char *label, CopyVal *val_buf) { 28 | for (size_t i = 0; i < vec_size(copyvals); i++) { 29 | if (!strcmp((*copyvals)[i].label, label)) { 30 | if (val_buf) 31 | *val_buf = (*copyvals)[i]; 32 | return 1; 33 | } 34 | } 35 | return 0; 36 | } 37 | 38 | char *get_full_char_str(bool is_struct, Type type, char *type_struct) { 39 | char *rettype; 40 | if (is_struct) { 41 | rettype = (char*) aalloc(strlen(type_struct) + 2); 42 | sprintf(rettype, ":%s", type_struct); 43 | } else { 44 | rettype = (char*) aalloc(2); 45 | rettype[0] = size_as_char(type); 46 | rettype[1] = 0; 47 | } 48 | return rettype; 49 | } 50 | 51 | // Returns a pointer to an aggregate type from an array of aggregate types 52 | AggregateType *find_aggtype(char *name, AggregateType *aggtypes, size_t num_aggtypes) { 53 | for (size_t i = 0; i < num_aggtypes; i++) { 54 | if (!strcmp(name, aggtypes[i].name)) return &aggtypes[i]; 55 | } 56 | printf("Tried to use undefined aggregate type.\n"); 57 | exit(1); 58 | } 59 | 60 | /* Caller is expected to free return value. 61 | * Reads a full line of stdin. */ 62 | char *read_full_stdin() { 63 | size_t pos = 0, size = 1025, nread; 64 | char *buf0 = malloc(size); 65 | char *buf = buf0; 66 | for (;;) { 67 | if (buf == NULL) { 68 | fprintf(stderr, "Not enough memory for %zu bytes in read_full_stdin()\n", size); 69 | free(buf0); 70 | return NULL; 71 | } 72 | nread = fread(buf + pos, 1, size - pos - 1, stdin); 73 | if (nread == 0) break; 74 | pos += nread; 75 | if (size - pos < size / 2) 76 | size += size / 2 + size / 8; 77 | buf = realloc(buf0 = buf, size); 78 | } 79 | buf[pos] = '\0'; 80 | return buf; 81 | } 82 | -------------------------------------------------------------------------------- /src/vector.c: -------------------------------------------------------------------------------- 1 | /* Part of vector implementationf for UYB compiler backend project, see ../include/vector.h for the 2 | * rest of the code and an explanation on how to use the full thing. 3 | * Copyright (C) 2025 Jake Steinburger (UnmappedStack) under the MPL2.0 license, see /LICENSE for more information. */ 4 | #include 5 | 6 | void *vec_new(size_t data_size) { 7 | Vec *vec = (Vec*) malloc(sizeof(Vec)); 8 | *vec = (Vec) { 9 | .len = 0, 10 | .capacity = 1, 11 | .data_size = data_size, 12 | .data = (uint8_t*) malloc(data_size), 13 | }; 14 | return &vec->data; 15 | } 16 | 17 | size_t vec_size(void *vec_data) { 18 | Vec *vec = (Vec*) ((uint64_t) vec_data - (sizeof(Vec) - sizeof(void*))); 19 | return vec->len; 20 | } 21 | 22 | int vec_contains(void *vec_data, size_t val) { 23 | Vec *vec = (Vec*) ((uint64_t) vec_data - (sizeof(Vec) - sizeof(void*))); 24 | for (size_t i = 0; i < vec->len; i++) { 25 | size_t masked_vec = (*((size_t**) vec_data))[i] & ((1ULL << (8 * vec->data_size)) - 1); 26 | size_t masked_val = val & ((1ULL << (8 * vec->data_size)) - 1); 27 | if (masked_vec == masked_val) return 1; 28 | } 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /test.ssa: -------------------------------------------------------------------------------- 1 | # This is a simple program that I use for testing UYB. 2 | # Programs in this file often may not work. See the `examples` directory for tested and working 3 | # files. 4 | 5 | data $msg = {b "Hello, world!", b 10, b 0} 6 | export function w $main(l %argc, l %argv) { 7 | @start 8 | call $printf(l $msg) 9 | ret 0 10 | } 11 | --------------------------------------------------------------------------------