├── .clang-format ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── codeql-analysis.yml │ └── make.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── THANKS.txt ├── libjas ├── Makefile ├── buffer.c ├── codegen.c ├── encoder.c ├── endian.c ├── error.c ├── exe.c ├── include │ ├── buffer.h │ ├── codegen.h │ ├── encoder.h │ ├── endian.h │ ├── error.h │ ├── exe.h │ ├── instruction.h │ ├── jas.h │ ├── label.h │ ├── mode.h │ ├── operand.h │ ├── operand.hpp │ ├── parse.h │ ├── register.h │ └── rex.h ├── instruction.c ├── instructions.tbl ├── label.c ├── operand.c ├── operand.cpp ├── parse.c ├── pre.c ├── register.c └── scripts │ ├── compile.js │ └── tablegen.js ├── logo.png ├── test └── tests ├── Makefile ├── endian.c ├── example.c ├── instruction.c ├── mov.c ├── operand.c ├── parse.c └── test.h /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: LLVM 2 | UseTab: Never 3 | IndentWidth: 2 4 | TabWidth: 2 5 | BreakBeforeBraces: Attach 6 | AllowShortIfStatementsOnASingleLine: true 7 | IndentCaseLabels: false 8 | ColumnLimit: 0 9 | AccessModifierOffset: -4 10 | NamespaceIndentation: All 11 | FixNamespaceComments: false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # TODO: Update to CodeQL version 3 before december 2024 2 | 3 | name: "CodeQL" 4 | 5 | on: 6 | push: 7 | branches: [main, dev] 8 | pull_request: 9 | branches: [main, dev] 10 | schedule: 11 | - cron: "43 11 * * 2" 12 | 13 | jobs: 14 | analyze: 15 | name: Analyze 16 | runs-on: macos-latest 17 | permissions: 18 | actions: read 19 | contents: read 20 | security-events: write 21 | 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | language: ["c"] 26 | 27 | steps: 28 | - name: Checkout repository 29 | uses: actions/checkout@v2 30 | 31 | - name: Initialize CodeQL 32 | uses: github/codeql-action/init@v2 33 | with: 34 | languages: ${{ matrix.language }} 35 | 36 | - name: Clean directory 37 | run: make clean 38 | 39 | - name: Build project 40 | run: make 41 | 42 | - name: Perform CodeQL Analysis 43 | uses: github/codeql-action/analyze@v2 44 | -------------------------------------------------------------------------------- /.github/workflows/make.yml: -------------------------------------------------------------------------------- 1 | name: Build project with Makefile 2 | 3 | on: 4 | push: 5 | branches: [main, dev] 6 | pull_request: 7 | branches: [main, dev] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: macos-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | 17 | - name: Clean project 18 | run: make clean 19 | 20 | - name: Build with `make` 21 | run: make 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | build/ 4 | 5 | # Object files 6 | *.o 7 | *.ko 8 | *.obj 9 | *.elf 10 | 11 | # Linker output 12 | *.ilk 13 | *.map 14 | *.exp 15 | 16 | # Precompiled Headers 17 | *.gch 18 | *.pch 19 | 20 | # Libraries 21 | *.lib 22 | *.a 23 | *.la 24 | *.lo 25 | 26 | # Shared objects (inc. Windows DLLs) 27 | *.dll 28 | *.so 29 | *.so.* 30 | *.dylib 31 | 32 | # Executables 33 | *.exe 34 | *.out 35 | *.app 36 | *.i*86 37 | *.x86_64 38 | *.hex 39 | 40 | # Debug files 41 | *.dSYM/ 42 | *.su 43 | *.idb 44 | *.pdb 45 | 46 | # Kernel Module Compile Results 47 | *.mod* 48 | *.cmd 49 | .tmp_versions/ 50 | modules.order 51 | Module.symvers 52 | Mkfile.old 53 | dkms.conf 54 | 55 | # Other 56 | .DS_Store 57 | .vscode/ 58 | .swp 59 | 60 | tabs.c -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | eventide1029+jas@gmail.com. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing to Jas 2 | 3 | Thank you for considering to contribute to the Jas assembler! The jas assembler is a project that is developed by a 4 | small group of people. We are always looking for new contributors to help us improve the assembler. This guide is 5 | for **DEVELOPERS looking to commit code to the Jas assembler**. If you are looking to report a bug or request a 6 | feature, please use the official mailing list: jas-assembler@google-groups.com. 7 | 8 | ### Getting started 9 | For beginners and those who are new to Git and GitHub, please see [here](https://guides.github.com/activities/hello-world/) 10 | for a quick tutorial on how to get started with Git and GitHub. The Jas project is hosted on this Github page and does not have any official mirrors on other sites at this point, 11 | you may create a un-official mirror on other sites and hosts if you wish, but all official development will occur 12 | here. 13 | 14 | First of all, clone and hop into the repository's directory: 15 | 16 | ```sh 17 | git clone https://github.com/cheng-alvin/jas/ 18 | cd jas 19 | ``` 20 | 21 | For the safety of already stable code, we'll use branches (Like everyone else) to develop software and squash bugs 22 | in parallel, before starting any development work, you must create a new branch and create a pull request with a 23 | detailed description of your work and changes you have committed. 24 | 25 | > Please note that branch and commit names shall be as short as possible, branch names should contain a one or 26 | > two word overview on the changes committed and commit messages shall show the added changes in another couple 27 | > words with details appearing in the description. 28 | 29 | To ensure the quality of the contributed code, all pull requests must be reviewed by a maintainer (which is most 30 | cases is me). Please direct all queries and concerns to eventide1029+jas@gmail.com as well as for any feedback 31 | on code and contributions to the Assembler. [See below](https://github.com/cheng-alvin/jas/blob/contributing-guide-changes/CONTRIBUTING.md#how-does-maintaining-work) 32 | 33 | ### Building and testing 34 | To build Jas, simply run `make` in the home or `libjas` directories (A debug binary must be built in `libjas`). 35 | Tests can be added in the `tests` directory and built using `make tests` as well in the home directory, all C 36 | test files will be automatically built and run. You may include the Jas testing framework [here](https://github.com/cheng-alvin/jas/blob/main/tests/test.h) 37 | 38 | ### Code style 39 | Preferably, if you have `clang-format` installed on your system, you can simply run `make format` in the home 40 | directory to *automatically* format the source files to conform to our programming style as specified in 41 | [this](https://github.com/cheng-alvin/jas/blob/main/.clang-format) file. Otherwise you are welcome to write 42 | your patch in your own coding style and have it formatted automatically when merged into the main branch, 43 | just as long as the maintainers can read it, then it'll okay. 44 | 45 | As mentioned above, a Github action will automatically run when new code is pushed onto the main branch to 46 | automatically format the code using `clang-format`. You can ignore this behavior by adding a `clang-format off` 47 | and a corresponding `clang-format on` comment in your code for small snippets that may break or cause issues 48 | down the line if formatted automatically. 49 | 50 | ### Adding support for a instruction to the assembler 51 | A common addition for the Jas assembler, especially since how complex the Intel x64 instruction set is, is the 52 | addition of new instructions and instruction encoder identities, which can be done by creating a instruction 53 | encoder table, adding and/or registering the instruction encoder tables to the instruction list(s), and finally testing 54 | and writing unit tests. 55 | 56 | **First, define a instruction encode table:** 57 | 58 | A instruction encoder table describes the identity of the instruction and how each instance can be encoded 59 | in binary as well as some key meta data such as what modes the instruction support and operand extensions 60 | etc. (Details will appear in the [`instruction.h`](https://github.com/cheng-alvin/jas/blob/main/libjas/include/instruction.h) file) 61 | Each instruction encoder table includes *entries*, each entry defines the meta data that correspond to a certain 62 | identity. For example, a MR identity (A identity with a m64 and r64) will be one entry and includes the 63 | opcode, and support status in different operating modes. 64 | 65 | ** These entries are defined in the [`instructions.tbl`](https://github.com/cheng-alvin/jas/blob/main/libjas/instructions.tbl) file and is compiled using a script that compiles it into native C structs that can be used and accessed by the other parts of the assembler. ** 66 | 67 | **Constructing a entry** 68 | 69 | Jas encoder banks has now been since compiled using a text-based format. Every entry is line-seperated and values are seperated using `|`s, spaces are used to pad and align the values with the others (mainly for cosmetics) but they are eventually ignored by the compiler. Characters such as `-` are in-place for `NULL` and will be replaced with `NULL` during compilation. To compile the instruction encoder table, make the `tabs.c` target in the `libjas` directory, this will invoke the Node.js script and automatically generate the `tabs.c` file. 70 | 71 | Here's a sample entry: 72 | 73 | ``` 74 | # name | identity | opcode extension | opcode | byte opcode | pre 75 | # ----------------------------------------------------------------------------------------------- 76 | cmc | zo | - | 0xF5 | - | no_operands 77 | ``` 78 | Note that lines prepended with a `#` will be ignored and removed from the output. 79 | 80 | This example text-based entry depicts the sample `cmc` instruction provided by intel in *Chapter 3 (Instruction set reference A-L) Vol 2A 3-1*, and will be compiled to the following C structure (Note that formatting may differ): 81 | 82 | ``` c 83 | instr_encode_table_t cmc[] = { 84 | { 85 | .ident = OP_ZO, 86 | .opcode_ext = NULL, 87 | .opcode = {0xF5}, 88 | .byte_instr_opcode = NULL, 89 | .opcode_size = 1, 90 | .pre = &no_operands, 91 | .byte_opcode_size = 0, 92 | }, 93 | 94 | INSTR_TAB_NULL, 95 | }; 96 | ``` 97 | 98 | **FAQ:** What does `zo`, `OP_ZO` mean? What are they for? These are called *operand identies*, Intel calls them *Instruction Operand Encoding*(s), usually found below a instruction encoder table on the manual. Every enum is mapped to one of these *encoder funtions* and encodes the operands. (See [here](https://github.com/cheng-alvin/jas/edit/main/CONTRIBUTING.md#adding-a-new-encoder)) 99 | 100 | **Next, register the new instruction:** 101 | 102 | Even though you have the instruction encoder table already setup, currently the assembler has no indication that 103 | this instruction actually *exists*. Therefore, we'll need to indicate to the assembler that this instruction 104 | actually exists and there's actually a encoder table for it somewhere in the source code. (Which we wrote in the 105 | previous "chapter") 106 | 107 | So, register the instruction and the instruction encoder table to the assembler, we'll add the instruction's name 108 | in the `instructions` enum in the `instruction.h` header, the name should be prefixed with the prefix of `INSTR_` 109 | and followed with the instruction's name as shown on the Intel manual to maintain consistency. 110 | 111 | > Please ensure the instruction is not appended in the enum, but instead added **before** the directives section, 112 | > otherwise the instruction you are trying to add as a *instruction* will be interpreted as an assembler directive. (All assembler directives are prefixed as `INSTR_DIR_` instead of just `INSTR_`) 113 | 114 | After your shiny new instruction is registered to the `instructions` enum, you'll also need to register the instruction's 115 | encoder table to the general lookup table for instructions called `instr_table`, they must be placed in a array in **the same** 116 | order as the enum is in. This is very important since the assembler uses a lookup and uses the enum as a indexing tool 117 | and fetch whatever table is needed. 118 | 119 | ### Adding a new encoder 120 | Although most of the encoders used by 80% of all Intel x64 instructions, many are still unsupported and require 121 | you to add it into the assembler manually, especially many of the specialized floating point instructions and 122 | CPU specific stuff. All encoders live in the `libjas/encoder.c` file, to write a new encoder function, use the `DEFINE_ENCODER` 123 | macro defined in `libjas/include/encoder.h` and providing the name of the encoder as the argument. (The name should be in 124 | lowercase and match the naming scheme as shown on the Intel manual.) See example below: 125 | ```c 126 | // ... 127 | DEFINE_ENCODER(xx){ 128 | // ... 129 | } 130 | ``` 131 | 132 | The `DEFINE_ENCODER` macro already gives developers some helper arguments such as the instruction encoder table to reference, 133 | the operand array, more details appear in the `encoder.h` header, all "invokers" of these encoder function, including the 134 | assembler itself will conform to this structure. 135 | 136 | After defining a encoder that you are happy with, there is no way the assembler can have any indication of the *existence* of 137 | this encoder and cannot be invoked. All encoders should be *appended* to the `enc_ident` enum in the `libjas/include/encoder.h` 138 | header so that the order can match up to the encoders array in the `enc_lookup()` function. 139 | 140 | > The `enc_ident` enum serves as a *indexer* for the `enc_lookup()` array and allows the lookup table to work properly. 141 | 142 | ### How does maintaining work? 143 | Code in Jas should be a collaborative project, there is no way that one person will have the ability to look after 144 | all the code in such a complicated project. Once a new block of code such as function or a new file is added, they 145 | will be automatically assigned to the author(s) of that block. For example, a new encoder is merged into the assembler, 146 | the people who initially wrote it will take care of the encoder's source, documentation and addressing issues. Their name(s) 147 | and email addresses as well as relevant contact details (preferably Github username) should be added on the documentation 148 | comment for the encoder similar to the one below: 149 | 150 | ```c 151 | // func.h 152 | 153 | /** 154 | * @author example 155 | * 156 | * ... 157 | */ 158 | void func(); 159 | ``` 160 | 161 | All questions, concerns and general inquires related to this block of code should be directed to this maintainer, 162 | he or she should also review any related PRs and keep in constant communication with other maintainers and the 163 | contributors. 164 | 165 | > If you do contribute some code and **do not wish** to look after it long-term as a maintainer (Which is 100% okay) 166 | > you may drop support for it at anytime, just drop a email to eventide1029@gmail.com and I (Alvin) will be keen to do my best to look after it. 167 | 168 | #### Becoming a maintainer 169 | Becoming a maintainer is easy! Everyone can do it! It's a great way to contribute to the community and help out 170 | with the constant stream of issues. To become a maintainer just drop in a quick email to me (Alvin) eventide1029@gmail.com 171 | and I'll be happy to let you look after that portion of code. 172 | 173 | ### What to remember before submitting a PR 174 | Once you have completed your work, remember to submit pull requests that are organized and have a clear sense of 175 | purpose, any change from one line of code to a whole file is okay, it just has to have a purpose and a clear 176 | reason to be merged upstream. (but also remember to try and keep it small) Speaking of size, please also ensure 177 | that you properly and logically organize branches and pull requests, if changes don't seem to fit in one pull 178 | request logically, feel free to submit multiple, as long as it makes sense logically. 179 | 180 | ### Don't like Github? Don't worry! 181 | If you don't prefer to use Github as the platform for communication and merging code, patches may be submitted via the mailing list (Just like old-school), the maintainer taking care of that portion of the code will be responsible in checking for patches and reviewing them before applying them to the source code and merging it using a Pull request. 182 | 183 | Basically, once the patch has been received by the maintainer from the mailing list, the maintainer should apply and review the patch in their **own** branch before merging it in, labeling it as *patch* with the person who originally sent in the patch mentioned or quoted in the title. 184 | 185 | ### What now? 186 | After reading the instructions here, you should have a good understanding of how to contribute 187 | to the Jas assembler project! Write some code, drink some coffee, and have fun! 188 | 189 | If you have any questions, please feel free to email me at eventide1029+jas@gmail.com 190 | 191 | Happy hacking! 192 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023-2024 Alvin Cheng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | BUILD = build 2 | CC = clang 3 | 4 | CFLAGS_COMMON = -I libjas/include 5 | CFLAGS = $(CFLAGS_COMMON) 6 | 7 | .PHONY: all tests format clean debug pre_build 8 | 9 | all: pre_build $(BUILD)/lib/libjas.a 10 | # all: pre_build $(BUILD)/lib/libjas.a $(BUILD)/jas debug 11 | debug: clean $(BUILD)/lib/libjas_debug.a $(BUILD)/jas_debug 12 | 13 | format: 14 | @find ./ -type f \( -name "*.cpp" -o -name "*.c" -o -name "*.h" \) -exec clang-format --verbose -i {} + 15 | 16 | clean: 17 | @find . -name "*.o" -type f -delete 18 | 19 | pre_build: 20 | @find . -name "*.o" -type f -delete 21 | @find . -name "*.a" -type f -delete 22 | @rm -r -f $(BUILD) 23 | @mkdir -p $(BUILD)/include 24 | @mkdir -p $(BUILD)/lib 25 | @mkdir -p $(BUILD) 26 | @$(MAKE) -C tests clean 27 | @rm -rf libjas/tabs.c 28 | @cp libjas/include/*.h $(BUILD)/include 29 | @cp README.md $(BUILD) 30 | @cp LICENSE $(BUILD) 31 | @cp THANKS.txt $(BUILD) 32 | 33 | tests: 34 | $(MAKE) -C tests 35 | 36 | $(BUILD)/lib/libjas.a: 37 | $(MAKE) -C libjas tabs.c 38 | $(MAKE) -C libjas libjas.a 39 | mv $(BUILD)/libjas.a $(BUILD)/lib/libjas.a 40 | 41 | $(BUILD)/lib/libjas_debug.a: 42 | $(MAKE) -C libjas tabs.c 43 | $(MAKE) -C libjas libjas_debug.a 44 | mv $(BUILD)/libjas_debug.a $(BUILD)/lib/libjas_debug.a 45 | 46 | $(BUILD)/jas: CFLAGS = $(CFLAGS_COMMON) -O3 47 | $(BUILD)/jas: main.o $(BUILD)/lib/libjas.a 48 | $(CC) -o $@ $^ $(CFLAGS) 49 | 50 | $(BUILD)/jas_debug: CFLAGS = $(CFLAGS_COMMON) -g -O0 51 | $(BUILD)/jas_debug: main_debug.o $(BUILD)/lib/libjas_debug.a 52 | $(CC) -o $@ $^ $(CFLAGS) 53 | 54 | main_debug.o: CFLAGS = $(CFLAGS_COMMON) -g -O0 55 | main_debug.o: main.c 56 | $(CC) $(CFLAGS) -c $< -o $@ 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Jas logo](https://github.com/cheng-alvin/jas/blob/main/logo.png) 2 |

3 | 4 | PRs Welcome 5 | GitHub code size in bytes 7 | GitHub License 8 | GitHub closed issues 9 |

10 | 11 |

Jas is a minimal, fast and simple zero-dependency assembler for the x64 family of processors, it comes packaged with tonnes of useful utility functions targeted for implementing into compilers, operating systems and JIT interpreters and other types of utility applications that need compilation to ELF or just a plain encoded buffer.

12 | 13 | - Supports **60+** instructions! 14 | - Assembles x86, x64 or 16-bit code 15 | - Outputs **ELF object files** 16 | - Unparalleled size and efficiency! 17 | 18 | ### ⚡Quick start 19 | First of all, install/link against the binary releases [here](https://github.com/cheng-alvin/jas/releases)* or build it from source with following the instructions below. Jas takes instructions in an array in a struct form defined in [instruction.h](https://github.com/cheng-alvin/jas/blob/0faa905be7cb1238796af46552b3271a11b4e2dd/libjas/instruction.h) and passes it to a `assemble_instr()` function which generates the the actual buffer of an array of `uint8_t` for you to process. (However, in this situation, we are using the `instr_gen()` function and operand generation macros to generate the instruction structure automatically without the janky C structure syntax) 20 | ```c 21 | #include 22 | #include 23 | 24 | int main(void) { 25 | instruction_t *instr = instr_gen(INSTR_MOV, 2, r64(REG_RAX), imm64(0)); 26 | buffer_t buf = assemble_instr(MODE_LONG, instr); 27 | 28 | /* Do something to `buf.data` - The uint8_t array */ 29 | 30 | instr_free(instr); 31 | free(buf.data); 32 | return 0; 33 | } 34 | ``` 35 | 36 | Yeah! that's it! Seriously! It's that simple, this little snippet of code generates and encodes a `mov rax, 0` instruction. 37 | 38 | > Since the returned buffer is in the form of a dynamically allocated pointer, care must be taken to `free` and manage the block of memory. 39 | 40 | ### 💡 Why Jas? 41 | Jas is a very versatile library, it opens the door for generating low level assembly and ELF object files to us average programers, suddenly, we can use write our own compilers from scratch, without the complexities of LLVM or compiling down to a GCC backend. Jas is optimized to speed and complexity, as well as size, allowing it to be used in small-scale compilers and interpreters where compile time matters. 42 | 43 | [brainfry](https://github.com/cheng-alvin/brainfry) is a project I wrote during the weekend and is simple Brainf*ck compiler that compiled down to raw machine code, it performed well and only weighed in at a couple KiBs big, leading to immensely quick compile times. Jas severely simplified the development process as anything written using LLVM or GCC backends would consist of long compile-times and huge binary sizes. 44 | 45 | ### 🏗️ Build and install 46 | The very first step in doing anything with jas is to build and link the library against your application code, since jas is a library. Ensure you have a C99 and C++17 compiler installed (Anything will do, preferably `clang`), a Javascript runner (such as `node`) for building the `instructions.tbl` file, as well as GNU `make` or something to run makefile for running the build script 47 | To obtain the Jas source files, run: 48 | ``` bash 49 | git clone https://github.com/cheng-alvin/jas/ 50 | ``` 51 | 52 | Now, you should have a folder named `jas` in your current directory, now you can just simply run `make` within that directory like so: 53 | (Some directories and folders can be missing, you must run `make clean` before you actually build the library.) 54 | 55 | ``` bash 56 | cd jas # Change into the cloned directory 57 | make clean # Clean directory and setup 58 | make # Build source code 59 | ``` 60 | 61 | Voila! Now you have a built copy of Jas in the `build` folder, just simply copy and link against the archive to start building your dream compiler or assembler frontend! 62 | 63 | ### 📝 Licensing 64 | All the code under the Jas assembler project repository is licensed under the [MIT license](https://en.wikipedia.org/wiki/MIT_License) which is a popular open-source license for projects like this! Please see [LICENSE](https://github.com/cheng-alvin/jas/blob/main/LICENSE) for the details. 65 | 66 | *No warranty of any kind is provided, please double check official sources before deploying code to production* 67 | 68 | ### ❤️ Contributing / reporting bugs 69 | No software is perfect, nor is Jas. As the assembler is written in C, many security issues may arise, causing issues across the program. It's a shared responsibility to safeguard Jas of all developers, contributors and users alike. Please report all security or any other concerns in a timely manner via email to the official mailing list: jas-assembler@google-groups.com. 70 | 71 | However, for developers interested and passionate about assemblers and low-level programming, please see the [contributing guide](https://github.com/cheng-alvin/jas/blob/a02fea10d9d398ef63a9fc9419ce54d8b406c3a5/CONTRIBUTING.txt) in the home directory for more information on how to contribute code to the repository! 72 | 73 | All changes and reports are welcome, no matter how big or small your changes are :-) 74 | 75 | --- 76 | *Made with love by Alvin / the Jas crew and contributors ❤️ .* 77 | 78 | **Note:* Only releases tagged with `v1.x` provides binary releases or as indicated in release notes. 79 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | ## Security and safety 2 | We value security of our assembler, nothing's perfect, and we acknowledge that there 3 | can be security vulnerabilities inside our code. Therefore, please follow the 4 | guidelines to report vulnerabilities and critical security issues. We'll aim to get 5 | back to any security issue or vulnerability within 7 days of submission. 6 | 7 | ### How to report? 8 | Some vulnerabilities are critical and people may use it to their advantage go affect 9 | the stable versions, that means, some vulnerabilities must be reported by the means 10 | of email and other more private ways to prevent it from leaking into the hands of 11 | "bad-actors". This table below outlines the methods of reporting security 12 | vulnerabilities to the Jas assembler. 13 | 14 | | Version | How to report | 15 | | ------- | --------------------------------- | 16 | | > 1.x.y | Report via private emailing | 17 | | 0.x.y | Report on the **mailing list** | 18 | 19 | If you are reporting for > 1.x.y versions, please send your emails to 20 | eventide+security@gmail.com. Write it up as if you are doing it for a Github 21 | issue for any other project. 22 | -------------------------------------------------------------------------------- /THANKS.txt: -------------------------------------------------------------------------------- 1 | Hey there! 2 | 3 | Thank you for choosing Jas as part of your development toolkit. We truly appreciate 4 | your interest and support. Whether you're using jas in a small script or a large- 5 | scale project, we're honored to be a part of your workflow. 6 | 7 | Your feedback, questions, and bug reports help us make jas better with each release. 8 | 9 | We hope jas makes your work easier, faster, and more efficient. If it does, consider 10 | sharing your experience or contributing back to the project in any way you can. 11 | 12 | With gratitude, 13 | - Alvin / the Jas crew and contributors ❤️ 14 | 15 | For more information, please see https://github.com/cheng-alvin/jas or the attached 16 | `README.md` file If you should encounter any issues or difficulties, please don't 17 | hesitate to reach our to our development community via the mailing list on: 18 | jas-assembler@google-groups.com 19 | 20 | ___ 21 | __/_ `. .-"""-. 22 | \_,` | \-' / )`-') 23 | "") `"` \ ((`"` 24 | ___Y , .'7 /| 25 | (_,___/...-` (_/_/ -------------------------------------------------------------------------------- /libjas/Makefile: -------------------------------------------------------------------------------- 1 | CC = clang 2 | 3 | CFLAGS_COMMON =-I include -Wno-incompatible-pointer-types -Wno-int-conversion -O3 4 | CFLAGS = $(CFLAGS_COMMON) 5 | 6 | C_SRC = $(wildcard *.c) # Includes only C source files 7 | OBJ = $(patsubst %.c, %.o, $(C_SRC)) operand_cpp.o 8 | BUILD = ../build 9 | 10 | libjas.a: $(OBJ) 11 | ar rcs $(BUILD)/$@ $^ 12 | 13 | libjas_debug.a: CFLAGS = $(CFLAGS_COMMON) -g -std=c99 -O0 14 | libjas_debug.a: $(OBJ) 15 | ar rcs $(BUILD)/$@ $^ 16 | 17 | tabs.c: instructions.tbl 18 | node ./scripts/compile.js $^ 19 | 20 | # Individual object files rules: 21 | operand_cpp.o: CFLAGS = $(CFLAGS_COMMON) -std=c++11 -O0 22 | operand_cpp.o: operand.cpp 23 | $(CC) $(CFLAGS) -c $< -o $@ 24 | -------------------------------------------------------------------------------- /libjas/buffer.c: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #include "buffer.h" 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | void buf_write(buffer_t *buf, const uint8_t *data, const size_t data_len) { 34 | if (data_len < 1) 35 | return; 36 | 37 | buf->data = 38 | (uint8_t *)(buf->data == NULL ? malloc(data_len) : realloc(buf->data, buf->len + data_len)); 39 | 40 | for (size_t i = 0; i < data_len; i++) 41 | buf->data[buf->len + i] = data[i]; 42 | 43 | buf->len += data_len; 44 | } 45 | 46 | void buf_write_byte(buffer_t *buf, const uint8_t data) { 47 | buf_write(buf, (uint8_t[]){data}, 1); 48 | } 49 | 50 | void buf_remove(buffer_t *buf, const size_t elem) { 51 | if (buf->len == 0) 52 | return; 53 | 54 | for (size_t i = elem; i < buf->len - 1; i++) 55 | buf->data[i] = buf->data[i + 1]; 56 | 57 | buf->len--; 58 | buf->data = realloc(buf->data, buf->len); 59 | } 60 | 61 | bool buf_element_exists(buffer_t *buf, const uint8_t elem) { 62 | for (size_t i = 0; i < buf->len; i++) 63 | if (buf->data[i] == elem) 64 | return true; 65 | 66 | return false; 67 | } 68 | 69 | void buf_concat(buffer_t *buf, size_t count, ...) { 70 | va_list args; 71 | va_start(args, count); 72 | 73 | for (size_t i = 0; i < count; i++) { 74 | buffer_t *other = va_arg(args, buffer_t *); 75 | buf_write(buf, other->data, other->len); 76 | } 77 | 78 | va_end(args); 79 | } -------------------------------------------------------------------------------- /libjas/codegen.c: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #include "codegen.h" 27 | #include "encoder.h" 28 | #include "error.h" 29 | #include "exe.h" 30 | #include "label.h" 31 | #include "operand.h" 32 | #include 33 | #include 34 | #include 35 | 36 | #define FREE_ALL(...) \ 37 | do { \ 38 | void *pointers[] = {__VA_ARGS__}; \ 39 | for (size_t i = 0; i < sizeof(pointers) / sizeof(*pointers); ++i) { \ 40 | free(pointers[i]); \ 41 | } \ 42 | } while (0) 43 | 44 | static instr_encode_table_t *get_instr_tabs(instruction_t *instr_arr, size_t arr_size) { 45 | instr_encode_table_t *tabs = malloc(sizeof(instr_encode_table_t) * arr_size); 46 | for (size_t i = 0; i < arr_size; i++) { 47 | if (IS_LABEL(instr_arr[i])) { 48 | tabs[i] = INSTR_TAB_NULL; 49 | continue; 50 | } 51 | tabs[i] = instr_get_tab(instr_arr[i]); 52 | } 53 | return tabs; 54 | } 55 | 56 | static buffer_t assemble(enum modes mode, instruction_t *instr_arr, size_t arr_size, 57 | instr_encode_table_t *tabs, bool is_pre, label_t *label_table, size_t label_table_size); 58 | struct codegen_ret 59 | codegen(enum modes mode, instruction_t **instr_input, size_t arr_count) { 60 | label_t *label_table = NULL; 61 | size_t label_table_size = 0; 62 | 63 | /* Implementing a "wrapper" due to old baggage ~~(And partially out of lazy-ness)~~ */ 64 | const size_t arr_size = arr_count * sizeof(instruction_t); 65 | instruction_t *instr_arr = malloc(arr_size); 66 | 67 | for (size_t i = 0; i < arr_count; i++) 68 | instr_arr[i] = *instr_input[i]; 69 | 70 | for (size_t i = 0; i < arr_size / sizeof(instruction_t); i++) { 71 | if (instr_arr[i].instr >= INSTR_DIR_LOCAL_LABEL) { 72 | if (instr_arr[i].operands[0].data) 73 | label_create( 74 | &label_table, &label_table_size, 75 | instr_arr[i].operands[0].data, 76 | instr_arr[i].instr == INSTR_DIR_GLOBAL_LABEL, 77 | instr_arr[i].instr == INSTR_DIR_GLOBAL_LABEL, //!! 78 | 0); 79 | } 80 | } 81 | 82 | const instr_encode_table_t *tabs = get_instr_tabs(instr_arr, arr_size / sizeof(instruction_t)); 83 | const uint8_t *pre_ret = assemble(mode, instr_arr, arr_size, tabs, true, label_table, label_table_size).data; 84 | if (pre_ret != NULL) free(pre_ret); 85 | 86 | const buffer_t code = assemble(mode, instr_arr, arr_size, tabs, false, label_table, label_table_size); 87 | FREE_ALL(tabs, instr_arr); 88 | 89 | const struct codegen_ret ret = {.code = code, .label_table = label_table, .label_table_size = label_table_size}; 90 | return ret; 91 | } 92 | 93 | static buffer_t assemble(enum modes mode, instruction_t *instr_arr, size_t arr_size, 94 | instr_encode_table_t *tabs, bool is_pre, label_t *label_table, size_t label_table_size) { 95 | 96 | arr_size /= sizeof(instruction_t); 97 | buffer_t buf = BUF_NULL; 98 | size_t label_index = 0; 99 | 100 | for (size_t i = 0; i < arr_size; i++) { 101 | /* -- Sanity checks -- */ 102 | if (is_pre && label_table_size == 0) break; 103 | if (is_pre && label_index >= label_table_size) break; 104 | if (instr_arr[i].operands == NULL) continue; 105 | 106 | if (INSTR_DIRECTIVE(instr_arr[i].instr)) { 107 | if (instr_arr[i].instr == INSTR_DIR_WRT_BUF) { 108 | const buffer_t *data = (buffer_t *)instr_arr[i].operands[0].data; 109 | buf_write(&buf, data->data, data->len); 110 | } 111 | if (is_pre && IS_LABEL(instr_arr[i])) { 112 | for (size_t j = 0; j < label_table_size; j++) { 113 | label_t *tab = label_table; 114 | if (strcmp(tab[j].name, instr_arr[i].operands[0].data) == 0) { 115 | tab[j].address = buf.len; 116 | break; 117 | } 118 | } 119 | label_index++; 120 | } 121 | 122 | continue; 123 | } 124 | 125 | const instr_encode_table_t ref = tabs[i]; 126 | 127 | uint8_t opcode_sz = ref.opcode_size; 128 | if (ref.byte_opcode_size > 0) opcode_sz = ref.byte_opcode_size; 129 | 130 | instruction_t current = instr_arr[i]; 131 | if (ref.pre != NULL) ref.pre(current.operands, &buf, &ref, mode, label_table, label_table_size); 132 | op_write_prefix(&buf, current.operands, mode); 133 | buf_write(&buf, op_write_opcode(current.operands, &ref), opcode_sz); 134 | const encoder_t function_ptr = enc_lookup(ref.ident); 135 | 136 | if (function_ptr == NULL) { 137 | if (instr_arr[i].operands[0].type != OP_NULL) 138 | err("instruction unsupported or improper usage"); 139 | continue; 140 | } 141 | function_ptr(current.operands, &buf, &ref, mode, label_table, label_table_size); 142 | } 143 | 144 | return buf; 145 | } 146 | 147 | buffer_t assemble_instr(enum modes mode, instruction_t *instr) { 148 | return codegen(mode, &instr, 1).code; 149 | } -------------------------------------------------------------------------------- /libjas/encoder.c: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #include "encoder.h" 27 | #include "codegen.h" 28 | #include "error.h" 29 | #include "instruction.h" 30 | #include "label.h" 31 | #include "operand.h" 32 | #include "register.h" 33 | #include 34 | 35 | #define EMPTY_SIB 0x24 36 | 37 | /** 38 | * @brief 39 | * - A label is a memory address NOT a value! 40 | * therefore, a instruction like mov rax, label 41 | * should move the ADDRESS of the label to rax 42 | * NOT THE VALUE 43 | * 44 | * - A empty 0x25 SIB byte is written if we want a 45 | * base pointer register (rsp, sp or esp) to be used 46 | * with an absolute address. 47 | */ 48 | static void ref_label(operand_t *op_arr, buffer_t *buf, uint8_t index, label_t *label_table, size_t label_table_size) { 49 | const uint8_t rel_sz = op_sizeof(op_arr[index].type) / 8; 50 | label_t *label = label_lookup(&label_table, &label_table_size, (char *)op_arr[index].label); 51 | 52 | if (rel_sz > 32) goto label_error; 53 | if (label == NULL) goto label_error; 54 | 55 | int32_t rel_offset = label->address == 0 ? 0 : label->address - (buf->len + rel_sz - 1) - 1; 56 | buf_write(buf, (uint8_t *)&rel_offset, rel_sz); 57 | return; 58 | 59 | label_error: 60 | err("invalid label"); 61 | } 62 | 63 | static void write_offset(uint8_t mode, buffer_t *buf, operand_t *op_arr, uint8_t index) { 64 | switch (mode) { 65 | case OP_MODRM_DISP8: 66 | buf_write_byte(buf, (int8_t)op_arr[index].offset); 67 | break; 68 | 69 | case OP_MODRM_DISP32: 70 | buf_write(buf, (uint8_t *)&(int32_t){op_arr[index].offset}, 4); 71 | break; 72 | } 73 | } 74 | 75 | DEFINE_ENCODER(i) { 76 | if (op_arr[1].type == OP_IMM64) { 77 | err("operand type mismatch."); 78 | return; 79 | } 80 | 81 | const uint8_t imm_size = op_sizeof(op_arr[1].type) / 8; 82 | buf_write(buf, (uint8_t *)op_arr[1].data, imm_size); 83 | } 84 | 85 | DEFINE_ENCODER(m) { 86 | const uint8_t opcode_extend = instr_ref->opcode_ext << 3; 87 | const uint8_t rm = reg_lookup_val(op_arr[0].data); 88 | 89 | const uint8_t mod = op_modrm_mode(op_arr[0]); 90 | buf_write_byte(buf, mod | opcode_extend | rm); 91 | 92 | if (op_m(op_arr[0].type) && rm == 4) 93 | buf_write_byte(buf, EMPTY_SIB); 94 | 95 | const enum registers deref_reg = (*(enum registers *)op_arr[0].data); 96 | if (deref_reg == REG_RIP || deref_reg == REG_EIP || deref_reg == REG_IP) 97 | ref_label(op_arr, buf, 0, label_table, label_table_size); 98 | 99 | if (rm == 5 && op_arr[0].offset == 0) 100 | buf_write_byte(buf, 0); 101 | 102 | write_offset(mod, buf, op_arr, 0); 103 | } 104 | 105 | static void i_common(operand_t *op_arr, buffer_t *buf, instr_encode_table_t *instr_ref, enum modes mode) { 106 | if (op_arr[1].type == OP_IMM64) { 107 | err("operand type mismatch."); 108 | return; 109 | } 110 | 111 | const uint8_t imm_size = op_sizeof(op_arr[1].type) / 8; 112 | uint8_t *imm = (uint8_t *)op_arr[1].data; 113 | buf_write(buf, imm, imm_size); 114 | } 115 | 116 | DEFINE_ENCODER(d) { 117 | /** 118 | * @brief This opcode identity should: 119 | * 1. Write the opcode to the buffer ✅ 120 | * 2. Calculate the relative offset of the label ✅ 121 | * 3. Write the relative offset to the buffer - kinda ✅ 122 | */ 123 | 124 | // IN BYTES!!! 125 | const uint8_t rel_sz = op_sizeof(op_arr[0].type) / 8; 126 | 127 | if (rel_sz == 8) { 128 | err("A relative address cannot be 64 bit-sized."); 129 | return; 130 | } 131 | 132 | // Calculate the relative offset of the label 133 | ref_label(op_arr, buf, 0, label_table, label_table_size); 134 | } 135 | 136 | DEFINE_ENCODER(mi) { 137 | m(op_arr, buf, instr_ref, mode, label_table, label_table_size); 138 | i_common(op_arr, buf, instr_ref, mode); 139 | } 140 | 141 | // Note if an operand wants to use a RIP relative address, or a label, 142 | // it must be listed as such with the label but into `.label` 143 | 144 | static void rm_mr_common(operand_t *op_arr, buffer_t *buf, instr_encode_table_t *instr_ref, enum modes mode, enum enc_ident ident, label_t *label_table, size_t label_table_size) { 145 | /** 146 | * @brief This opcode identity is a "Common ground for MR and RM" 147 | * Since rm and mr just has to be flipped, we can just use a boolean 148 | * to determine which one to use, and then reference it. 149 | * 150 | * @note An empty SIB byte will be written if the register is 4 151 | * (A rsp, sp or esp) register that activates the SIB byte. 152 | */ 153 | 154 | const bool is_rm = ident == ENC_RM; 155 | 156 | // Register - Since this is accessed quite often 157 | register const uint8_t reg_idx = is_rm ? 0 : 1; 158 | register const uint8_t rm_idx = is_rm ? 1 : 0; 159 | 160 | const uint8_t reg = reg_lookup_val(op_arr[reg_idx].data); 161 | const uint8_t rm = reg_lookup_val(op_arr[rm_idx].data); 162 | 163 | const uint8_t mod = op_modrm_mode(op_arr[rm_idx]); 164 | buf_write_byte(buf, mod | (reg << 3) | rm); 165 | 166 | /** 167 | * @note 168 | * Some edge cases for the ModR/M byte: 169 | * 170 | * 1. If the register is RIP, EIP or IP, then the label will be referenced 171 | * through the relative offset via the `ref_label` function, see above. 172 | * 173 | * 2. If the register is a 4 (rsp, sp or esp), then an empty SIB byte will 174 | * be written to the buffer. Since the value 4 in the reg field will auto 175 | * matically trigger the SIB to be expected. 176 | * 177 | * 3. If the register rbp is referenced, then the offset will be written 178 | * as 0 and the mode will be set to a 8 bit displacement. 179 | */ 180 | 181 | const enum registers deref_reg = (*(enum registers *)op_arr[rm_idx].data); 182 | if (deref_reg == REG_RIP || deref_reg == REG_EIP || deref_reg == REG_IP) { 183 | op_arr[rm_idx].type = deref_reg == REG_RIP ? OP_M32 : op_arr[rm_idx].type; 184 | ref_label(op_arr, buf, rm_idx, label_table, label_table_size); 185 | op_arr[rm_idx].type = deref_reg == REG_RIP ? OP_M64 : op_arr[rm_idx].type; 186 | } 187 | 188 | if (op_m(op_arr[rm_idx].type)) { 189 | if (rm == 4) { 190 | buf_write_byte(buf, EMPTY_SIB); 191 | } 192 | } 193 | 194 | if (rm == 5 && op_arr[rm_idx].offset == 0) 195 | buf_write_byte(buf, 0); 196 | 197 | write_offset(mod, buf, op_arr, rm_idx); 198 | } 199 | 200 | DEFINE_ENCODER(mr) { rm_mr_common(op_arr, buf, instr_ref, mode, ENC_MR, label_table, label_table_size); } 201 | DEFINE_ENCODER(rm) { rm_mr_common(op_arr, buf, instr_ref, mode, ENC_RM, label_table, label_table_size); } 202 | 203 | DEFINE_ENCODER(o) { 204 | for (uint8_t i = 1; i < 4; i++) { 205 | if (op_arr[i].type == OP_NULL) break; 206 | if (op_sizeof(op_arr[i].type) != op_sizeof(op_arr[0].type)) { 207 | err("operand type mismatch."); 208 | return; 209 | } 210 | } 211 | 212 | enum registers register_data = *(enum registers *)op_arr[0].data; 213 | if (!op_acc(register_data)) err("Invalid operand, unexpected accumulator used."); 214 | size_t offset = (buf->len - 1) * sizeof(uint8_t); 215 | buf->data[offset] += reg_lookup_val(op_arr[0].data); 216 | } 217 | 218 | DEFINE_ENCODER(oi) { 219 | o(op_arr, buf, instr_ref, mode, label_table, label_table_size); 220 | i_common(op_arr, buf, instr_ref, mode); 221 | } 222 | 223 | encoder_t enc_lookup(enum enc_ident input) { 224 | encoder_t lookup[] = {NULL, &mr, &rm, &oi, &mi, &i, &m, NULL, &d, &o, NULL}; 225 | return lookup[(size_t)input]; 226 | } -------------------------------------------------------------------------------- /libjas/endian.c: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #include "endian.h" 27 | #include 28 | #include 29 | 30 | // Note: this function seems only to be used in the `exe` module, may remove in the future 31 | 32 | uint8_t *endian(uint8_t *data, size_t data_size) { 33 | 34 | #ifdef __LITTLE_ENDIAN__ 35 | uint8_t *temp = (uint8_t *)malloc(data_size); 36 | memcpy(temp, data, data_size); 37 | 38 | for (size_t i = data_size; i > 0; i--) 39 | data[data_size - i] = temp[i - 1]; 40 | 41 | free(temp); 42 | #endif 43 | 44 | return data; 45 | } 46 | -------------------------------------------------------------------------------- /libjas/error.c: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #include "error.h" 27 | 28 | static err_callback_t err_callback; 29 | 30 | void err_add_callback(err_callback_t input) { 31 | err_callback = input; 32 | } 33 | 34 | void err(const char *msg) { 35 | if (err_callback) 36 | err_callback(msg); 37 | } 38 | -------------------------------------------------------------------------------- /libjas/exe.c: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #include "exe.h" 27 | #include "endian.h" 28 | #include "label.h" 29 | #include 30 | #include 31 | 32 | #define STD_UNIT_SIZE (uint8_t[]){0x40, 0x00} 33 | 34 | buffer_t exe_header(size_t sect_start, uint16_t sect_count, uint16_t sect_count_str) { 35 | buffer_t ret = BUF_NULL; 36 | size_t long_pad = 0; 37 | int int_pad = 0; 38 | 39 | const uint8_t magic_num[] = {0x7f, 'E', 'L', 'F'}; 40 | buf_write(&ret, magic_num, sizeof(magic_num)); // Magic number 41 | 42 | /** 43 | * @note The following fields are assumed to be x64 linux specific 44 | * and are not portable across different operating systems or 45 | * architectures, due to the supported archs of the jas project. 46 | */ 47 | 48 | buf_write_byte(&ret, 2); // Operating mode 49 | buf_write_byte(&ret, 1); // Endian-ness 50 | buf_write_byte(&ret, 1); // ELF version 51 | buf_write_byte(&ret, 0x03); // Target ABI 52 | buf_write_byte(&ret, 0); // ABI version 53 | 54 | buf_write(&ret, (uint8_t *)&long_pad, 7); // Padding - Takes 7 bytes off 8 55 | 56 | buf_write(&ret, (uint8_t[]){0x01, 0x00}, 2); // Executable type type 57 | buf_write(&ret, (uint8_t[]){0x3E, 0x00}, 2); // Machine ISA 58 | 59 | buf_write(&ret, (uint8_t[]){0x01, 0x00, 0x00, 0x00}, 4); // ELF version 60 | 61 | buf_write(&ret, (uint8_t *)&long_pad, 8); // Entry point 62 | buf_write(&ret, (uint8_t *)&long_pad, 8); // Program header table offset 63 | buf_write(&ret, (uint8_t *)§_start, 8); // Section header table offset 64 | 65 | buf_write(&ret, (uint8_t *)&int_pad, 4); // Flags - Not used 66 | buf_write(&ret, STD_UNIT_SIZE, 2); // ELF header size 67 | 68 | /** 69 | * @note The program header table is not used in this implementation of the ELF 70 | * file format, hence the program header table is only used when a program is 71 | * standalone and not a shared object or a relocatable file. 72 | */ 73 | buf_write(&ret, (uint8_t *)&int_pad, 4); // Program header table entry size and count - Combined not used 74 | 75 | buf_write(&ret, STD_UNIT_SIZE, 2); // Section header table entry size 76 | buf_write(&ret, (uint8_t *)§_count, 2); // Section header table count 77 | buf_write(&ret, (uint8_t *)§_count_str, 2); // Section header table string table index 78 | 79 | return ret; 80 | } 81 | 82 | buffer_t exe_sect_header(uint32_t str_offset, uint32_t type, uint64_t flags, uint64_t *off, uint64_t sect_sz, size_t info, uint32_t label_sect, uint64_t ent_size) { 83 | const uint64_t long_pad = 0; 84 | buffer_t ret = BUF_NULL; 85 | 86 | buf_write(&ret, (uint8_t *)&str_offset, 4); // String table name offset 87 | buf_write(&ret, (uint8_t *)&type, 4); // Section type 88 | buf_write(&ret, (uint8_t *)&flags, 8); // Section flags 89 | 90 | buf_write(&ret, (uint8_t *)&long_pad, 8); // Section address 91 | 92 | buf_write(&ret, (uint8_t *)off, 8); // Section file offset 93 | buf_write(&ret, (uint8_t *)§_sz, 8); // Section size 94 | *off += sect_sz; 95 | 96 | // Maybe padded: 97 | buf_write(&ret, &label_sect, 4); // Section link 98 | buf_write(&ret, &info, 4); // Section info 99 | buf_write(&ret, (uint8_t *)&long_pad, 8); // Section address alignment 100 | buf_write(&ret, &ent_size, 8); // Section entry size 101 | 102 | return ret; 103 | } 104 | 105 | buffer_t exe_sym_ent(char *name, uint64_t sym_val, uint16_t sect_idx, buffer_t *strtab, uint8_t info) { 106 | buffer_t symtab = BUF_NULL; 107 | uint64_t long_pad = 0; 108 | 109 | buf_write(&symtab, (uint32_t *)&strtab->len, 4); // Name offset 110 | buf_write_byte(&symtab, info); // Info 111 | buf_write_byte(&symtab, 0); // Other 112 | buf_write(&symtab, &(uint16_t){sect_idx}, 2); // Section index 113 | buf_write(&symtab, &sym_val, 8); // Value 114 | buf_write(&symtab, (uint8_t *)&long_pad, 8); // Size 115 | 116 | buf_write(strtab, (uint8_t *)name, strlen(name) + 1); // Add name to string table 117 | 118 | return symtab; 119 | } 120 | 121 | buffer_t exe_generate(struct codegen_ret ret) { 122 | uint8_t *p_padding = calloc(1, 0x40); 123 | buffer_t result_buffer = BUF_NULL; 124 | 125 | /// @note header size of 0x40 126 | size_t addr = 0x40 * 6; 127 | 128 | // Data has been HARD-CODED rather than messing around with 129 | // OOP and structs, we only need these for now, right? Add more. 130 | const char shstrtab[] = "\0.shstrtab\0.strtab\0.symtab\0.text\0"; 131 | buffer_t shstrtab_head = exe_sect_header(1, 3, 2, &addr, sizeof(shstrtab), 0, 0, 0); 132 | 133 | buffer_t strtab = BUF_NULL; 134 | buffer_t symtab = BUF_NULL; 135 | 136 | for (size_t i = 0; i < ret.label_table_size; i++) { 137 | // Truncating 24 bytes off the 64-byte allocated buffer using `calloc`. 138 | buf_write(&symtab, p_padding, 24); 139 | 140 | const char *name = ret.label_table[i].name; 141 | buf_write(&strtab, name, strlen(ret.label_table[i].name) + 1); 142 | 143 | uint8_t binding = 0; 144 | if (ret.label_table[i].exported || ret.label_table[i].ext) binding = 1; 145 | const buffer_t ent = 146 | exe_sym_ent(ret.label_table[i].name, ret.label_table[i].address, 4, &strtab, 147 | (((binding) << 4) + ((0) & 0xf))); 148 | 149 | buf_concat(&symtab, 1, ent); 150 | free(ent.data); 151 | } 152 | 153 | buffer_t strtab_head = exe_sect_header(11, 3, 2, &addr, strtab.len, 0, 0, 0); 154 | buffer_t symtab_head = exe_sect_header(19, 2, 2, &addr, symtab.len, ret.label_table_size, 2, 24); 155 | 156 | const size_t info = ret.label_table_size + 1; 157 | buffer_t text_head = exe_sect_header(27, 1, 7, &addr, ret.code.len, 0, 0, 0); 158 | 159 | // Adjust the amount of sections as required. 160 | const buffer_t header = exe_header(0x40, 5, 1); 161 | buf_write(&result_buffer, header.data, header.len); 162 | buf_write(&result_buffer, p_padding, 0x40); 163 | 164 | #define FREE_ALL(...) \ 165 | do { \ 166 | void *pointers[] = {__VA_ARGS__}; \ 167 | for (size_t i = 0; i < sizeof(pointers) / sizeof(*pointers); ++i) { \ 168 | free(pointers[i]); \ 169 | } \ 170 | } while (0) 171 | 172 | buf_concat(&result_buffer, 4, &shstrtab_head, &strtab_head, &symtab_head, &text_head); 173 | buf_write(&result_buffer, shstrtab, sizeof(shstrtab)); 174 | buf_concat(&result_buffer, 3, &strtab, &symtab, &ret.code); 175 | 176 | free(p_padding); 177 | 178 | // clang-format off 179 | 180 | FREE_ALL( 181 | shstrtab_head.data, strtab.data, symtab.data, strtab_head.data, 182 | symtab_head.data, text_head.data, header.data 183 | ); 184 | 185 | // clang-format on 186 | 187 | return result_buffer; 188 | } -------------------------------------------------------------------------------- /libjas/include/buffer.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #ifndef BUFFER_H 27 | #define BUFFER_H 28 | 29 | #include 30 | #include 31 | #include 32 | 33 | typedef struct { 34 | uint8_t *data; /* Buffer data */ 35 | size_t len; /* Length of buffer */ 36 | } buffer_t; 37 | 38 | void buf_write(buffer_t *buf, const uint8_t *data, const size_t data_len); 39 | void buf_write_byte(buffer_t *buf, const uint8_t data); 40 | void buf_remove(buffer_t *buf, const size_t elem); 41 | 42 | #define BUF_NULL \ 43 | (buffer_t) { \ 44 | .data = NULL, \ 45 | .len = 0 \ 46 | } 47 | 48 | bool buf_element_exists(buffer_t *buf, const uint8_t elem); 49 | void buf_concat(buffer_t *buf, size_t count, ...); 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /libjas/include/codegen.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #ifndef CODEGEN_H 27 | #define CODEGEN_H 28 | 29 | #include "buffer.h" 30 | #include "instruction.h" 31 | #include "mode.h" 32 | #include 33 | 34 | struct codegen_ret { 35 | buffer_t code; 36 | label_t *label_table; 37 | size_t label_table_size; 38 | }; 39 | 40 | /** 41 | * Codegen function that generates the machine code from the 42 | * instruction array. Basically a wrapper for a bunch of lib- 43 | * jas sub-modules function. 44 | * 45 | * @param mode The mode to generate the machine code in 46 | * @param instr_input The instruction array to generate the code from 47 | * @param arr_count The size of the instruction array (in elements) 48 | * 49 | * @return The `codegen_ret` struct containing the machine code and meta-data. 50 | */ 51 | struct codegen_ret codegen( 52 | enum modes mode, instruction_t **instr_input, size_t arr_count); 53 | 54 | /** 55 | * Wrapper function for the `codegen` function that gives boiler- 56 | * plate code to generate the binary of a single instruction given 57 | * in the instruction struct form, but not an array as seen in the 58 | * `codegen` function. 59 | * 60 | * @param mode The mode to generate the machine code in 61 | * @param instr The instruction pointer to generate the code from 62 | * 63 | * @return The buffer struct containing the machine code 64 | * 65 | * @see `codegen` 66 | */ 67 | buffer_t assemble_instr(enum modes mode, instruction_t *instr); 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /libjas/include/encoder.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #ifndef ENCODER_H 27 | #define ENCODER_H 28 | 29 | #include "buffer.h" 30 | #include "label.h" 31 | #include "mode.h" 32 | 33 | // Forward declarations - See `instruction.h` and `operand.h` respectively 34 | typedef struct instr_encode_table instr_encode_table_t; 35 | typedef struct operand operand_t; 36 | 37 | /** 38 | * Enumeration for the different operand identifiers. 39 | * Used to lookup the operand encoder functions. 40 | * 41 | * @note See identities and their corresponding encoder 42 | * methods in the main source file - `encoder.c`. 43 | */ 44 | enum enc_ident { 45 | ENC_NULL, 46 | ENC_MR, 47 | ENC_RM, 48 | ENC_OI, 49 | ENC_MI, 50 | ENC_I, 51 | ENC_M, 52 | ENC_ZO, 53 | ENC_D, 54 | ENC_O, 55 | ENC_IGN, /* Although **NOT** original Intel, this identity will 56 | void all operands, only writing the opcode (IGN meaning *ignore*) */ 57 | }; 58 | 59 | /** 60 | * Macro definition for the encoder function signature, 61 | * this function signature and it's parameters are all 62 | * documented below. 63 | * 64 | * For very very special cases where you need to define 65 | * a custom encoder function, or if you need to reference 66 | * it in a different file, you can use this macro, instead 67 | * of using the `enc_lookup()` function. 68 | * 69 | * @see `encoder_t` 70 | */ 71 | #define DEFINE_ENCODER(ident, ...) \ 72 | void ident(operand_t *op_arr, buffer_t *buf, instr_encode_table_t *instr_ref, enum modes mode, label_t *label_table, size_t label_table_size) 73 | 74 | /** 75 | * Type wrapper for the instruction encoder function pointer. Where 76 | * each operand encoder function takes an array of operands and 77 | * a buffer to write the encoded instruction to. 78 | * 79 | * (Based on the operand identities like MR, RM, etc.) 80 | * 81 | * @param op_arr The array of operands to encode 82 | * @param buf The buffer to write the encoded instruction to 83 | * @param instr_ref The instruction reference table 84 | * @param mode The operating mode of the instruction 85 | * @param label_table The label table applicable to the current context. 86 | * @param label_table_size The size of the label table (as described) 87 | * 88 | * @note All encoder functions will conform to this signature. 89 | */ 90 | typedef void (*encoder_t)(operand_t *op_arr, buffer_t *buf, instr_encode_table_t *instr_ref, enum modes mode, label_t *label_table, size_t label_table_size); 91 | 92 | /** 93 | * Lookup table for the different instruction encoder functions. 94 | * The lookup table is indexed by the operand encoding identity 95 | * and the corresponding encoder function is returned. 96 | * 97 | * @see `encoder.c` 98 | * 99 | * @param input The instruction encoding identity 100 | * @return The instruction encoder function pointer 101 | */ 102 | encoder_t enc_lookup(enum enc_ident input); 103 | 104 | #endif -------------------------------------------------------------------------------- /libjas/include/endian.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #ifndef ENDIAN_H 27 | #define ENDIAN_H 28 | 29 | #include 30 | #include 31 | 32 | /** 33 | * Stupid little function for converting a byte array from one 34 | * endian to the opposite endian mode. (Big to little, little to big) 35 | * Endian basically is a reversed array that's used for immediate va- 36 | * lues, especially in x86 37 | * 38 | * @param data The byte array to convert. 39 | * @param data_size The size of the byte array. 40 | * 41 | * @return The converted byte array. 42 | */ 43 | uint8_t *endian(uint8_t *data, size_t data_size); 44 | 45 | #endif -------------------------------------------------------------------------------- /libjas/include/error.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #ifndef ERROR_H 27 | #define ERROR_H 28 | 29 | /** 30 | * Type wrapper for a void dictated function pointer that 31 | * takes a const char pointer as an argument; Which represents 32 | * the function that would be used to handle errors in the jas 33 | * library. 34 | * 35 | * This function takes in an error message as a string and passes 36 | * it into the handler from the `err()` function. 37 | * 38 | * @param msg A string that represents the error message. 39 | */ 40 | typedef void (*err_callback_t)(const char *msg); 41 | 42 | /** 43 | * Function for throwing an error into this "error pipeline", and 44 | * finally hand-balling it into the user-defined error callback, 45 | * registered using `err_add_callback`. (For the caller to handle 46 | * the error accordingly) 47 | * 48 | * @param msg The error message to be passed down to the callback. 49 | * 50 | * @see `err_callback_t` For error callback type information/formats 51 | * @see `err_add_callback` 52 | */ 53 | void err(const char *msg); 54 | 55 | /** 56 | * Function to add a callback function to the error handler, a bit like 57 | * a setter function for the very *object orientated* C language :) 58 | * 59 | * @param input The function pointer to the error handler. 60 | * @see `err_callback_t` 61 | */ 62 | void err_add_callback(err_callback_t input); 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /libjas/include/exe.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #ifndef EXE_H 27 | #define EXE_H 28 | 29 | #include "buffer.h" 30 | #include "codegen.h" 31 | #include 32 | #include 33 | 34 | /** 35 | * Function for generating the ELF header, targeted only towards x64 36 | * 64-bit ELF relocatable executables. Generates only the header of 37 | * the file and subsequent stuff can be found below. 38 | * 39 | * @param sect_start The starting address of the section 40 | * @param sect_count The number of sections in the ELF file 41 | * @param sect_count_str The index of the string table section 42 | * 43 | * @return The buffer containing the ELF header 44 | */ 45 | buffer_t exe_header(size_t sect_start, uint16_t sect_count, uint16_t sect_count_str); 46 | 47 | /** 48 | * Function for generating the section header of the ELF object file. 49 | * And can be used in conjunction with the `exe_header` function and 50 | * other utilities to generate the entire ELF object file. 51 | * 52 | * @param str_offset The offset of the string table 53 | * @param type The type of the section 54 | * @param flags The flags of the section 55 | * @param off The pointer to the offset of the section 56 | * @param sect_sz The size of the section 57 | * @param info Extra information (if applicable) 58 | * @param label_sect The section of label table are located. 59 | * @param ent_size The size of the entries in the content of this section.abort 60 | * 61 | * @note Please note the `ent_size` is used to depict the size of the 62 | * entries of the section content, which could be applicable in some 63 | * contexts such as a symbol *table* where each table has a certain size. 64 | * 65 | * @note Values of `label_sect`, `info` and `ent_size` are 66 | * optional and can be set to 0 if not applicable for certain contexts. 67 | * 68 | * @return The buffer containing the section header 69 | * 70 | * @note Specific values and flags can be found in the linux source 71 | * tree as well as the POSIX ELF standard. 72 | * 73 | * @see https://github.com/torvalds/linux/blob/master/include/uapi/linux/elf.h 74 | * 75 | * @note The `off` pointer is used to keep track of the offset/size of the section 76 | * headers and helps the caller to keep track of the section headers. 77 | */ 78 | buffer_t exe_sect_header(uint32_t str_offset, uint32_t type, uint64_t flags, uint64_t *off, uint64_t sect_sz, size_t info, uint32_t label_sect, uint64_t ent_size); 79 | 80 | /** 81 | * Function for generating a symbol table entry in the ELF object file. 82 | * This function is used in conjunction with `exe_sect_header` function 83 | * for generating the entire symbol header and this is only used for 84 | * the individual symbol entries. 85 | * 86 | * @param name The name of the symbol 87 | * @param sym_val The value of the symbol (Which is the offset) 88 | * @param sect_idx The index of the section 89 | * @param strtab The string table buffer pointer 90 | * @param info The data for the corresponding info field 91 | * 92 | * @note The `strtab` buffer pointer is used as a descriptor for where the 93 | * symbol name shall be written and the index of the section is used to 94 | * determine the section the symbol is in. 95 | */ 96 | buffer_t exe_sym_ent(char *name, uint64_t sym_val, uint16_t sect_idx, buffer_t *strtab, uint8_t info); 97 | 98 | buffer_t exe_generate(struct codegen_ret ret); 99 | 100 | #endif -------------------------------------------------------------------------------- /libjas/include/instruction.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #ifndef INSTRUCTION_H 27 | #define INSTRUCTION_H 28 | 29 | #include "encoder.h" 30 | #include 31 | #include 32 | 33 | // Forward declaration - see instr_encode_table 34 | typedef struct instr_encode_table instr_encode_table_t; 35 | 36 | /** 37 | * Lookup macro for checking if an instruction is a directive. 38 | * Directives are instructions that are not executed by the CPU 39 | * but are used to provide information to the assembler. 40 | * 41 | * @param i The instruction to check 42 | * @example if (INSTR_DIRECTIVE(INSTR_MOV)) 43 | * 44 | * @note - Internal 45 | * Please update this macro if more directives and/or instructions 46 | * are added/supported in this enum list (In the future). 47 | */ 48 | #define INSTR_DIRECTIVE(i) ((uint8_t)i > (uint8_t)INSTR_DUMMY) 49 | 50 | enum instructions { 51 | INSTR_NOTHING, /* Note naming conflict below `INSTR_NULL` */ 52 | INSTR_MOV, 53 | INSTR_LEA, 54 | INSTR_ADD, 55 | INSTR_SUB, 56 | INSTR_MUL, 57 | INSTR_DIV, 58 | INSTR_AND, 59 | INSTR_OR, 60 | INSTR_XOR, 61 | INSTR_NOT, 62 | INSTR_INC, 63 | INSTR_DEC, 64 | INSTR_JMP, 65 | INSTR_JE, 66 | INSTR_JNE, 67 | INSTR_JZ, 68 | INSTR_JNZ, 69 | INSTR_CALL, 70 | INSTR_RET, 71 | INSTR_CMP, 72 | INSTR_PUSH, 73 | INSTR_POP, 74 | INSTR_IN, 75 | INSTR_OUT, 76 | INSTR_CLC, 77 | INSTR_STC, 78 | INSTR_CLI, 79 | INSTR_STI, 80 | INSTR_NOP, 81 | INSTR_HLT, 82 | INSTR_INT, 83 | INSTR_SYSCALL, 84 | INSTR_MOVZX, 85 | INSTR_MOVSX, 86 | INSTR_XCHG, 87 | INSTR_BSWAP, 88 | INSTR_CMOVA, 89 | INSTR_CMOVAE, 90 | INSTR_CMOVB, 91 | INSTR_CMOVBE, 92 | INSTR_CMOVE, 93 | INSTR_CMOVG, 94 | INSTR_CMOVGE, 95 | INSTR_CMOVL, 96 | INSTR_CMOVLE, 97 | INSTR_CMOVNA, 98 | INSTR_CMOVNAE, 99 | INSTR_CMOVNB, 100 | INSTR_CMOVNBE, 101 | INSTR_CMOVNE, 102 | INSTR_CMOVNG, 103 | INSTR_CMOVNGE, 104 | INSTR_CMOVNL, 105 | INSTR_CMOVNLE, 106 | INSTR_CMOVNO, 107 | INSTR_CMOVNP, 108 | INSTR_CMOVNS, 109 | INSTR_CMOVNZ, 110 | INSTR_CMOVO, 111 | INSTR_CMOVP, 112 | INSTR_CMOVPE, 113 | INSTR_CMOVPO, 114 | INSTR_CMOVS, 115 | INSTR_CMOVZ, 116 | 117 | INSTR_DUMMY, 118 | 119 | INSTR_DIR_WRT_BUF, 120 | 121 | INSTR_DIR_LOCAL_LABEL, 122 | INSTR_DIR_GLOBAL_LABEL, 123 | INSTR_DIR_EXTERN_LABEL, 124 | }; 125 | 126 | // Alias type for the encoder `encoder_t` function pointer. - See `encoder.h` 127 | typedef encoder_t pre_encoder_t; 128 | 129 | struct instr_encode_table { 130 | enum enc_ident ident; /* Operand encoding identity */ 131 | uint8_t opcode_ext; /* Opcode extension */ 132 | uint8_t opcode[3]; /* Opcode of the instruction */ 133 | uint8_t byte_instr_opcode[3]; /* 8 bit opcode fallback of the instruction */ 134 | uint8_t opcode_size; /* Size of the opcode (max. 3 bytes)*/ 135 | pre_encoder_t pre; /* Pre-encoder processor function (Optional, null if not applicable) */ 136 | uint8_t byte_opcode_size; /* Size of the byte opcode (max. 3 bytes, may be left null) */ 137 | }; 138 | 139 | /** 140 | * The lookup table using the `instructions_t` enum as the index 141 | * to get the corresponding instruction operand encoder structs. 142 | */ 143 | extern instr_encode_table_t *instr_table[]; 144 | 145 | typedef struct instruction { 146 | enum instructions instr; /* Type of instruction */ 147 | operand_t *operands; /* Operands of the instruction */ 148 | } instruction_t; 149 | 150 | #define INSTR_TAB_NULL \ 151 | (instr_encode_table_t) { \ 152 | .ident = NULL, \ 153 | .opcode_ext = NULL, \ 154 | .opcode = {NULL}, \ 155 | .byte_instr_opcode = {NULL}, \ 156 | .opcode_size = NULL, \ 157 | .pre = NULL, \ 158 | .byte_opcode_size = NULL, \ 159 | } 160 | 161 | #define INSTR_NULL \ 162 | (instruction_t) { .instr = INSTR_NOTHING, .operands = NULL } 163 | 164 | // Macro for checking if the instruction is a label and shall be handled 165 | #define IS_LABEL(x) \ 166 | (uint8_t)x.instr >= (uint8_t)INSTR_DIR_LOCAL_LABEL && \ 167 | (uint8_t)x.instr <= (uint8_t)INSTR_DIR_EXTERN_LABEL 168 | 169 | /** 170 | * Function for getting the instruction table based on the instruction 171 | * struct provided. The function will return a instruction table struct 172 | * as described above in this header file. 173 | * 174 | * @param instr The instruction struct to get the identifier from 175 | * @return The instruction table struct 176 | * 177 | * @see `instr_encode_table_t` 178 | * @see `instruction_t` 179 | */ 180 | instr_encode_table_t instr_get_tab(instruction_t instr); 181 | 182 | /** 183 | * Function for generating an instruction struct with the given 184 | * instruction type and operands. The function is used to create 185 | * a Jas buffer and write it into a instruction, similar to the 186 | * `db` and `dw` directives in NASM, but this uses the size and 187 | * variadic arguments to write the bytes into the buffer. 188 | * 189 | * @param data_sz The size of the data to write 190 | * @param ... The data to write into the buffer 191 | * 192 | * @return The instruction struct pointer 193 | * 194 | * @example The **Jas** function call of: 195 | * > instr_write_bytes(7, 0x48, 0x89, 0x80, 0xff, 0x00, 0x00, 0x00); 196 | * 197 | * Is equivalent to: (In NASM) 198 | * > db 0x48, 0x89, 0x80, 0xff, 0x00, 0x00, 0x00 199 | */ 200 | instruction_t *instr_write_bytes(size_t data_sz, ...); 201 | 202 | /** 203 | * Macros for defining the instruction operands in a more readable 204 | * form for passing into the `instr_gen` function. The macros are 205 | * used to define the operand type, register, immediate, memory 206 | * and relative operands. 207 | * 208 | * @example 209 | * We dont need to type in: `instr_gen(INSTR_MOV, 1, OP_R64, REG_RAX, 0);` 210 | * But instead, we can type in: `instr_gen(INSTR_MOV, 1, r64(REG_RAX));` 211 | */ 212 | 213 | #define r64(x) OP_R64, x, 0 214 | #define r32(x) OP_R32, x, 0 215 | #define r16(x) OP_R16, x, 0 216 | #define r8(x) OP_R8, x, 0 217 | 218 | // -- 219 | 220 | #define imm8(x) OP_IMM8, x, 0 221 | #define imm16(x) OP_IMM16, x, 0 222 | #define imm32(x) OP_IMM32, x, 0 223 | #define imm64(x) OP_IMM64, x, 0 224 | 225 | /** 226 | * These Relative macros are used to define the relative operand, 227 | * usually using labels. The relative operand is used in jump 228 | * instructions and other instructions that require a relative 229 | * offset to a label. 230 | * 231 | * @example rel("label", 10) 232 | */ 233 | 234 | #define rel8(x, off) OP_REL8, x, off 235 | #define rel32(x, off) OP_REL32, x, off 236 | 237 | // Note: offset must be provided - equivalent to: [eax + xyz] 238 | // (SIB bytes and another register for displacement not supported) 239 | 240 | #define m8(x, off) OP_M8, x, off 241 | #define m16(x, off) OP_M16, x, off 242 | #define m32(x, off) OP_M32, x, off 243 | #define m64(x, off) OP_M64, x, off 244 | 245 | /** 246 | * A function for easily defining a instruction in the `instruction_t` 247 | * form without having to use the struct initializer or mangle around 248 | * with void pointers and curly braces. This function is used to create 249 | * a instruction struct and its operands using variadic arguments 250 | * and grouping 3 arguments together and passed off into the function. 251 | * 252 | * @param instr The instruction type 253 | * @param operand_count The number of operands to pass 254 | * @param ... The operands to pass (Refer to below example) 255 | * 256 | * @return The instruction struct pointer 257 | * 258 | * @note Instruction pointers are `malloc`ed (aka dynamically allocated) 259 | * and shall be freed after useed to prevent memory leaks, please use the 260 | * `instr_free()` function to free the memory allocated for the instruction 261 | * structs by this function and similar functions. 262 | */ 263 | instruction_t *instr_gen(enum instructions instr, uint8_t operand_count, ...); 264 | 265 | /** 266 | * Helper macro for freeing an array of instruction structs and 267 | * their operands. The macro is used to free the memory allocated 268 | * using the help of the `instr_free()` function `free()` 269 | * 270 | * @param instr_arr The array of instruction structs 271 | * @param arr_size The size of the instruction array 272 | * 273 | * @note If the instruction array is dynamically allocated, the 274 | * memory allocated for the array *itself* must be taken care of 275 | * by the caller instead of this function/macro. 276 | */ 277 | #define instr_free_all(instr_arr, arr_size) \ 278 | do { \ 279 | for (size_t i = 0; i < arr_size; i++) \ 280 | instr_free(instr_arr[i]); \ 281 | } while (0) 282 | 283 | /** 284 | * Function for freeing the memory allocated for the instruction 285 | * struct and its operands. The function is used to prevent memory 286 | * leaks and free the memory allocated for the instruction structs 287 | * and the operand structs that are nested inside the instruction. 288 | * 289 | * @param instr The instruction struct to free 290 | * 291 | * @note This function should only be used to free instruction allocated 292 | * using built-in Jas instruction generation functions, unless you actually 293 | * know what you are doing. 294 | */ 295 | void instr_free(instruction_t *instr); 296 | 297 | #endif 298 | -------------------------------------------------------------------------------- /libjas/include/jas.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #ifndef JAS_H 27 | #define JAS_H 28 | 29 | /** 30 | * @author cheng-alvin 31 | * Copyright (c) 2023-2024 Alvin Cheng 32 | * 33 | * Jas is a minimal, fast and simple zero-dependency assembler for the x64 34 | * family of processors, jas not only aims to be fast and simple when using 35 | * it but also aims to be a learning resource for others to learn about 36 | * low-level system programming and the x64 instruction set. 37 | * 38 | * @see README.md For more information on the project 39 | * @see `LICENSE` For the license information 40 | * @see https://github.com/cheng-alvin/jas 41 | * 42 | * Made with love by Alvin / the Jas crew and contributors ❤️ . 43 | */ 44 | 45 | #ifdef __cplusplus 46 | namespace jas { 47 | extern "C" { 48 | #endif 49 | 50 | #include "buffer.h" 51 | #include "codegen.h" 52 | #include "error.h" 53 | #include "exe.h" 54 | #include "instruction.h" 55 | #include "label.h" 56 | #include "operand.h" 57 | #include "register.h" 58 | 59 | #ifdef __cplusplus 60 | } 61 | } 62 | #endif 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /libjas/include/label.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #ifndef LABEL_H 27 | #define LABEL_H 28 | 29 | #include 30 | #include 31 | 32 | typedef struct instruction instruction_t; 33 | 34 | typedef struct { 35 | char *name; /* Name of the label in a string format */ 36 | bool exported; /* Boolean for whether the label is exported to the linker table or not */ 37 | bool ext; /* Boolean for whether the label is external or not (If a relocation table should be created) */ 38 | size_t address; /* Address of the label entry, can use `buf.len` */ 39 | } label_t; 40 | 41 | /** 42 | * A factory function for creating a label entry in the label table. 43 | * The label entry is a struct that contains the name of the label, 44 | * whether it is exported or not, whether it is external or not, and 45 | * the address of the label. This data is constructed and then stored 46 | * into the given table pointer. 47 | * 48 | * @param label_table The label table to store the label entry in. 49 | * @param label_table_size The size of the label table. 50 | * 51 | * @param name The name of the label. 52 | * @param exported Boolean for whether the label is exported or not. 53 | * @param ext Boolean for whether the label is external or not. 54 | * @param address The address of the label. 55 | */ 56 | void label_create( 57 | label_t **label_table, size_t *label_table_size, 58 | char *name, bool exported, bool ext, size_t address); 59 | 60 | /** 61 | * Function for destroying the label table, freeing the memory 62 | * allocated for the label names and the label entries itself. 63 | * 64 | * @param label_table The label table to destroy. 65 | * @param label_table_size The size of the label table 66 | */ 67 | void label_destroy_all(label_t **label_table, size_t *label_table_size); 68 | 69 | /** 70 | * Function for looking up a label in the label table, and retu- 71 | * rning the label entry if found, otherwise returning a `NULL` 72 | * label entry will be returned back to the caller. 73 | * 74 | * @param label_table The label table to look up the label in. 75 | * @param label_table_size The size of the label table. 76 | * 77 | * @param name The name of the label to look up in the label table. 78 | * @return The pointer to the label entry if found, otherwise `NULL` 79 | * 80 | * @note The label table is **assumed** to be allocated and setup 81 | * correctly before calling this function, stored in the `label_table` 82 | * and `label_table_size` pointers (as shown above). 83 | */ 84 | label_t *label_lookup(label_t **label_table, size_t *label_table_size, char *name); 85 | 86 | /** 87 | * Enumeration for expressing the different types of labels used 88 | * in the assembler, such as local, global, and external labels. 89 | * 90 | * Determines the type of label to be created in the assembler. 91 | * (And if a label table should be generated) 92 | */ 93 | 94 | enum label_type { 95 | LABEL_LOCAL, 96 | LABEL_GLOBAL, 97 | LABEL_EXTERN, 98 | }; 99 | 100 | /** 101 | * Function similar to `instr_ge()` and `label_create()`, used to 102 | * generate a label instruction in the instruction array, and 103 | * return the instruction struct back to the caller. 104 | * 105 | * @param name The name of the label to be generated. 106 | * @param type The type of the label. @see `enum label_type` 107 | * 108 | * @return The instruction struct pointer of the label generated. 109 | * 110 | * @note The label name should not contain the `:` character, as 111 | * it is automatically added by the assembler. (The `:` character 112 | * is a label terminator usually found in assembly languages, and 113 | * does not get used in this assembler, typing in `:` would result 114 | * in that carrying over to the output) 115 | */ 116 | instruction_t *label_gen(char *name, enum label_type type); 117 | #endif -------------------------------------------------------------------------------- /libjas/include/mode.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #pragma once 27 | 28 | /** 29 | * Enum representing all the different modes of the assembler, and also 30 | * the different modes of the CPU. The modes are used to determine the 31 | * type of machine code to generate, matching to the modes described by Intel. 32 | */ 33 | enum modes { 34 | MODE_REAL, 35 | MODE_PROTECTED, 36 | MODE_LONG 37 | }; -------------------------------------------------------------------------------- /libjas/include/operand.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #ifndef OPERAND_H 27 | #define OPERAND_H 28 | 29 | #include "buffer.h" 30 | #include "codegen.h" 31 | #include "encoder.h" 32 | #include "mode.h" 33 | #include 34 | 35 | /** 36 | * Definitions to the macros used in the operand encoder functions. 37 | * to represent the addressing modes of the ModR/M byte. 38 | * 39 | * Basically, the ModR/M byte, in a nutshell is a byte that gives 40 | * the addressing mode of the operand, register and/or memory 41 | * data associated with the current operation. 42 | * 43 | * They definitely did a way better job explaining it here: 44 | * @see https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM_Byte 45 | */ 46 | 47 | #define OP_MODRM_INDIRECT 0b00000000 48 | #define OP_MODRM_DISP8 0b01000000 49 | #define OP_MODRM_DISP32 0b10000000 50 | #define OP_MODRM_REG 0b11000000 51 | 52 | /** 53 | * Macro definition for the 16-bit operand override byte for supporting 54 | * word-sized operands and addresses in the x86 family. 55 | * 56 | * @see https://stackoverflow.com/questions/74954166/ 57 | */ 58 | #define OP_WORD_OVERRIDE 0x66 59 | #define OP_ADDR_OVERRIDE 0x67 60 | 61 | /** 62 | * Enumeration for the different types of operands and 63 | * operand sizes supported by the jas assembler. 64 | */ 65 | enum operands { 66 | OP_NULL, 67 | OP_MISC, 68 | OP_REL8, 69 | OP_REL16, 70 | OP_REL32, 71 | OP_R8, 72 | OP_R16, 73 | OP_R32, 74 | OP_R64, 75 | OP_IMM8, 76 | OP_IMM16, 77 | OP_IMM32, 78 | OP_IMM64, 79 | OP_M8, 80 | OP_M16, 81 | OP_M32, 82 | OP_M64, 83 | }; 84 | 85 | /** 86 | * Definitions for the different macros responsible of checking 87 | * if the operand is a relative, register, immediate, memory, 88 | * depending on it's specific enumeration. 89 | * 90 | * (In other words, it just "generalizes" the specific operand enums) 91 | * 92 | * Btw, there's also general macros for finding the size of the 93 | * operand in assembly! 94 | * 95 | * @note You can just slop it between an if statement like so: 96 | * @example if (op_rel(x)) { ... } 97 | */ 98 | 99 | #define op_rel(x) ((x) <= OP_REL32 && (x) >= OP_REL8) 100 | #define op_r(x) ((x) <= OP_R64 && (x) >= OP_R8) 101 | #define op_imm(x) ((x) <= OP_IMM64 && (x) >= OP_IMM8) 102 | #define op_m(x) ((x) <= OP_M64 && (x) >= OP_M8) 103 | 104 | // -- 105 | 106 | #define op_byte(x) (x == OP_REL8 || x == OP_R8 || x == OP_IMM8 || x == OP_M8) 107 | #define op_word(x) (x == OP_REL16 || x == OP_R16 || x == OP_IMM16 || x == OP_M16) 108 | #define op_dword(x) (x == OP_REL32 || x == OP_R32 || x == OP_IMM32 || x == OP_M32) 109 | #define op_qword(x) (x == OP_R64 || x == OP_IMM64 || x == OP_M64) 110 | 111 | // -- 112 | 113 | #define op_rm(x) (op_r(x) || op_m(x)) 114 | 115 | typedef struct operand { 116 | void *data; /* Data in the operand */ 117 | enum operands type; /* Type tied to the void pointer*/ 118 | uint32_t offset; /* The offset applied to the data (if applicable) */ 119 | char *label; /* The name of a referenced label (if applicable) */ 120 | } operand_t; 121 | 122 | /** 123 | * Function for setting the prefix of the operand based on the 124 | * size of the reference operand array, and writes it the 125 | * buffer provided in `buf`. 126 | * 127 | * @param buf The pointer to the buffer to write the prefixes to. 128 | * @param op_arr The operand array to base the prefixes from. 129 | * @param mode The current operating mode of the instruction 130 | */ 131 | void op_write_prefix(buffer_t *buf, const operand_t *op_arr, enum modes mode); 132 | 133 | /** 134 | * Function for identifying the operand identity, created using 135 | * a large c++ unordered_map. 136 | * 137 | * @param input The input operand list 138 | * @return The operand identity enumeration 139 | */ 140 | enum enc_ident op_ident_identify(enum operands *input, instr_encode_table_t *instr_ref); 141 | 142 | /** 143 | * Simple function for determining the ModR/M mode based on the 144 | * operand type and offset provided by the operand struct. 145 | * (As defined below) 146 | * 147 | * @param input The operand struct to determine the mode from 148 | * @return The ModR/M mode 149 | * 150 | * @note Function also performs checks for RIP, ESP, IP instr- 151 | * uction pointers for offset and ModR/M bytes and modes. 152 | * 153 | * @note The function requires the offset value to be typed as 154 | * signed types to prevent confusion and to match with the Intel 155 | * -required specifications as outlined. 156 | * 157 | * @see `operand_t` 158 | */ 159 | uint8_t op_modrm_mode(operand_t input); 160 | 161 | /** 162 | * Function for finding the size of the operand type in bits, very 163 | * similar to the `sizeof` operator in C99. Used to determine 164 | * the size of the operand in the instruction. 165 | * 166 | * @see https://en.cppreference.com/w/c/types/sizeof 167 | * @see `enum operands` 168 | * 169 | * @param input The operand identifier in enum form 170 | * @return The size of the operand in bits 171 | * 172 | * @note returns 0 if the operand size is not recognized 173 | * 174 | * @note This function checks for the size of the OPERAND TYPE 175 | * NOT THE OPERAND DATA ITSELF! 176 | */ 177 | uint8_t op_sizeof(enum operands input); 178 | 179 | /** 180 | * Function for returning the opcode of the instruction based 181 | * on the instruction encoder table provided in the function 182 | * arguments as well as if a byte opcode is provided in the 183 | * the encoder table. 184 | * 185 | * @param op_arr The operand array to base the opcode from 186 | * @param instr_ref The instruction reference table 187 | * @return The opcode of the instruction 188 | * 189 | * You can literally wrap this into a buffer write function 190 | * to quickly and easily write in the opcode like as shown: 191 | * 192 | * buf_write(buf, op_write_opcode(, ), ); 193 | * 194 | * @see buffer.h 195 | */ 196 | uint8_t *op_write_opcode(operand_t *op_arr, instr_encode_table_t *instr_ref); 197 | 198 | #define OP_NONE \ 199 | (operand_t) { \ 200 | .data = NULL, \ 201 | .type = OP_NULL, \ 202 | .offset = 0, \ 203 | } 204 | 205 | #endif 206 | -------------------------------------------------------------------------------- /libjas/include/operand.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #ifndef OPERAND_HPP 27 | #define OPERAND_HPP 28 | 29 | #include 30 | 31 | using namespace std; 32 | 33 | /** 34 | * Type wrapper for an unsigned char that represents the 35 | * hash of an operand identifier. Used as a value for comparison 36 | * against the operand identity encoder lookup table. The individual 37 | * bits of the byte represent the type of operands within the 38 | * operand identifier as follows: 39 | * 40 | * 0: Relative operand (rel8/16/32) 41 | * 1: Register operand (r8/16/32/64) 42 | * 2: Immediate operand (imm8/16/32/64) 43 | * 3: Memory operand (m8/16/32/64) 44 | * 4: -- Reserved for future use -- 45 | * 5: Accumulator register operand (acc8/16/32/64) 46 | * 47 | * 6-7: Reserved for future use 48 | * 49 | * A bit set to 1 indicates that the operand is of the corresponding 50 | * type, while a bit set to 0 indicates that the operand is not of the 51 | * corresponding type. 52 | * 53 | * @note Not to be confused with the operand types (`enum operands`) 54 | * 55 | * Also, there are macros below to help you! 56 | */ 57 | typedef uint8_t op_ident_hash_t; 58 | 59 | /** 60 | * Macro definitions for the different operand hash values. 61 | * Used to compare against the operand identity encoder lookup table. 62 | * 63 | * @see `op_ident_hash_t` 64 | */ 65 | 66 | #define OP_HASH_REL 0b00000001 67 | #define OP_HASH_R 0b00000010 68 | #define OP_HASH_IMM 0b00000100 69 | #define OP_HASH_M 0b00001000 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /libjas/include/parse.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #include "instruction.h" 27 | #include 28 | #include 29 | 30 | /// @brief A bunch of utility functions where parsing of assembly 31 | /// instructions are applicable, such as in assemblers. 32 | 33 | /** 34 | * Function for parsing the instruction name and returning the 35 | * corresponding instruction enum. This is done by having a set 36 | * of all instruction names in order of the enum and comparing 37 | * the input name with the names in the set. 38 | * 39 | * @param name The name of the instruction to parse 40 | * @return The instruction enum 41 | */ 42 | enum instructions parse_instr_name(char *name); 43 | 44 | /** 45 | * Converts a regular hex or base-10 string to a number, that 46 | * C can interpret. (a `uint64_t`) This is done by checking the 47 | * prefix of the string such as `0x` for hex and `h` for hex, 48 | * converting it using the C standard library function `strtoull` 49 | * and returning the result. 50 | * 51 | * @param name The string to convert to a number 52 | * @return The number) This 53 | */ 54 | uint64_t parse_str_num(char *name); 55 | 56 | /** 57 | * Function for parsing the register name and returning the 58 | * corresponding register enum. This is done by having a set 59 | * of all register names in order of the enum and comparing 60 | * the input name with the names in the set. 61 | * 62 | * @param name The name of the register to parse 63 | * @return The register enum 64 | * 65 | * @note Implementation is similar to `parse_instr_name` 66 | * @see `register.h` 67 | */ 68 | enum registers parse_reg(char *name); -------------------------------------------------------------------------------- /libjas/include/register.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #ifndef REGISTER_H 27 | #define REGISTER_H 28 | 29 | #include 30 | #include 31 | 32 | enum registers { 33 | REG_AL, 34 | REG_CL, 35 | REG_DL, 36 | REG_BL, 37 | REG_AH, 38 | REG_CH, 39 | REG_DH, 40 | REG_BH, 41 | 42 | REG_AX, 43 | REG_CX, 44 | REG_DX, 45 | REG_BX, 46 | REG_SP, 47 | REG_BP, 48 | REG_SI, 49 | REG_DI, 50 | 51 | REG_EAX, 52 | REG_ECX, 53 | REG_EDX, 54 | REG_EBX, 55 | REG_ESP, 56 | REG_EBP, 57 | REG_ESI, 58 | REG_EDI, 59 | 60 | REG_RAX, 61 | REG_RCX, 62 | REG_RDX, 63 | REG_RBX, 64 | REG_RSP, 65 | REG_RBP, 66 | REG_RSI, 67 | REG_RDI, 68 | 69 | REG_R8B, 70 | REG_R9B, 71 | REG_R10B, 72 | REG_R11B, 73 | REG_R12B, 74 | REG_R13B, 75 | REG_R14B, 76 | REG_R15B, 77 | 78 | REG_R8W, 79 | REG_R9W, 80 | REG_R10W, 81 | REG_R11W, 82 | REG_R12W, 83 | REG_R13W, 84 | REG_R14W, 85 | REG_R15W, 86 | 87 | REG_R8D, 88 | REG_R9D, 89 | REG_R10D, 90 | REG_R11D, 91 | REG_R12D, 92 | REG_R13D, 93 | REG_R14D, 94 | REG_R15D, 95 | 96 | REG_R8, 97 | REG_R9, 98 | REG_R10, 99 | REG_R11, 100 | REG_R12, 101 | REG_R13, 102 | REG_R14, 103 | REG_R15, 104 | 105 | REG_SPL, 106 | REG_BPL, 107 | REG_SIL, 108 | REG_DIL, 109 | 110 | REG_IP, 111 | REG_EIP, 112 | REG_RIP, 113 | }; 114 | 115 | /** 116 | * Macro definition of the register values. These are the values that 117 | * are used in the instruction encoding, unlike the enum values which 118 | * serves as a 'marker', or a key value pair. 119 | * 120 | * @note Not to be confused with the enum `registers` 121 | */ 122 | 123 | #define REG_VALUE_AL 0 124 | #define REG_VALUE_CL 1 125 | #define REG_VALUE_DL 2 126 | #define REG_VALUE_BL 3 127 | #define REG_VALUE_AH 4 128 | #define REG_VALUE_CH 5 129 | #define REG_VALUE_DH 6 130 | #define REG_VALUE_BH 7 131 | 132 | #define REG_VALUE_AX 0 133 | #define REG_VALUE_CX 1 134 | #define REG_VALUE_DX 2 135 | #define REG_VALUE_BX 3 136 | #define REG_VALUE_SP 4 137 | #define REG_VALUE_BP 5 138 | #define REG_VALUE_SI 6 139 | #define REG_VALUE_DI 7 140 | 141 | #define REG_VALUE_EAX 0 142 | #define REG_VALUE_ECX 1 143 | #define REG_VALUE_EDX 2 144 | #define REG_VALUE_EBX 3 145 | #define REG_VALUE_ESP 4 146 | #define REG_VALUE_EBP 5 147 | #define REG_VALUE_ESI 6 148 | #define REG_VALUE_EDI 7 149 | 150 | #define REG_VALUE_RAX 0 151 | #define REG_VALUE_RCX 1 152 | #define REG_VALUE_RDX 2 153 | #define REG_VALUE_RBX 3 154 | #define REG_VALUE_RSP 4 155 | #define REG_VALUE_RBP 5 156 | #define REG_VALUE_RSI 6 157 | #define REG_VALUE_RDI 7 158 | 159 | // REX.B dictated registers: 160 | 161 | #define REG_VALUE_R8B 0 162 | #define REG_VALUE_R9B 1 163 | #define REG_VALUE_R10B 2 164 | #define REG_VALUE_R11B 3 165 | #define REG_VALUE_R12B 4 166 | #define REG_VALUE_R13B 5 167 | #define REG_VALUE_R14B 6 168 | #define REG_VALUE_R15B 7 169 | 170 | #define REG_VALUE_R8W 0 171 | #define REG_VALUE_R9W 1 172 | #define REG_VALUE_R10W 2 173 | #define REG_VALUE_R11W 3 174 | #define REG_VALUE_R12W 4 175 | #define REG_VALUE_R13W 5 176 | #define REG_VALUE_R14W 6 177 | #define REG_VALUE_R15W 7 178 | 179 | #define REG_VALUE_R8D 0 180 | #define REG_VALUE_R9D 1 181 | #define REG_VALUE_R10D 2 182 | #define REG_VALUE_R11D 3 183 | #define REG_VALUE_R12D 4 184 | #define REG_VALUE_R13D 5 185 | #define REG_VALUE_R14D 6 186 | #define REG_VALUE_R15D 7 187 | 188 | #define REG_VALUE_R8 0 189 | #define REG_VALUE_R9 1 190 | #define REG_VALUE_R10 2 191 | #define REG_VALUE_R11 3 192 | #define REG_VALUE_R12 4 193 | #define REG_VALUE_R13 5 194 | #define REG_VALUE_R14 6 195 | #define REG_VALUE_R15 7 196 | 197 | #define REG_VALUE_SPL 4 198 | #define REG_VALUE_BPL 5 199 | #define REG_VALUE_SIL 6 200 | #define REG_VALUE_DIL 7 201 | 202 | // Misc and instruction pointer registers: 203 | #define REG_VALUE_IP 5 204 | #define REG_VALUE_EIP 5 205 | #define REG_VALUE_RIP 5 206 | 207 | /** 208 | * Lookup table wrapper for finding register values from a single 209 | * centralized enumeration value, as per above's enum `registers`. 210 | * 211 | * @param input enum registers' pointer value 212 | * @return uint8_t register value 213 | */ 214 | uint8_t reg_lookup_val(enum registers *input); 215 | 216 | /** 217 | * Function for checking wether a register needs to have a REX.B 218 | * prefix before the opcode, the REX.B prefix is used to access 219 | * more general-purpose registers such as the higher 8 registers 220 | * in 64-bit mode. 221 | * 222 | * @param input enum registers value 223 | * @return Whether the register needs a REX.B prefix when encoded 224 | * @see `registers` and rex.h 225 | */ 226 | bool reg_needs_rex(enum registers input); 227 | 228 | // Orphan macro for checking if the operand data is used as accumulator register. 229 | #define op_acc(x) ((x) == REG_AL || (x) == REG_AX || (x) == REG_EAX || (x) == REG_RAX) 230 | 231 | #endif 232 | -------------------------------------------------------------------------------- /libjas/include/rex.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #ifndef REX_H 27 | #define REX_H 28 | 29 | #include "buffer.h" 30 | #include 31 | 32 | #define REX_W 0x48 33 | #define REX_R 0x44 34 | #define REX_X 0x42 35 | #define REX_B 0x41 36 | 37 | #define REX_DEFAULT 0b01000000 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /libjas/instruction.c: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #include "instruction.h" 27 | #include "error.h" 28 | #include "register.h" 29 | #include "tabs.c" 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | // clang-format off 36 | 37 | instr_encode_table_t *instr_table[] = 38 | { 39 | NULL, mov, lea, add, sub, mul, _div, and, or, xor, _not, inc, 40 | dec, jmp, je, jne, jz, jnz, call, ret, cmp, push, pop, 41 | in, out, clc, stc, cli, sti, nop, hlt, _int, syscall, 42 | movzx, movsx, xchg, bswap, cmova, cmovae, cmovb, cmovbe, cmove, 43 | cmovg, cmovge, cmovl, cmovle, cmovna, cmovnae, cmovnb, cmovnbe, 44 | cmovne, cmovng, cmovnge, cmovnl, cmovnle, cmovno, cmovnp, cmovns, 45 | cmovnz, cmovo, cmovp, cmovpe, cmovpo, cmovs, cmovz, 46 | }; 47 | 48 | 49 | #define CURR_TABLE instr_table[instr.instr][j] 50 | 51 | instr_encode_table_t instr_get_tab(instruction_t instr) { 52 | if (instr.instr == INSTR_NOTHING && instr.operands == NULL) return INSTR_TAB_NULL; 53 | if (INSTR_DIRECTIVE(instr.instr)) return INSTR_TAB_NULL; // aka empty 54 | const enum operands operand_list[4] = { 55 | instr.operands[0].type, instr.operands[1].type, 56 | instr.operands[2].type, instr.operands[3].type, 57 | }; 58 | // clang-format on 59 | 60 | enum enc_ident ident = 61 | op_ident_identify(operand_list, instr_table[(size_t)instr.instr]); 62 | 63 | for (uint8_t j = 0; CURR_TABLE.opcode_size; j++) 64 | if (CURR_TABLE.ident == ident) return CURR_TABLE; 65 | 66 | // fall-through; no corresponding instruction opcode found 67 | err("No corrsponding instruction opcode found."); 68 | return INSTR_TAB_NULL; // aka empty 69 | } 70 | #undef CURR_TABLE 71 | 72 | #define alloc_operand_data(type) \ 73 | do { \ 74 | type *type##_ = malloc(sizeof(type)); \ 75 | *type##_ = (type)va_arg(args, uint64_t); \ 76 | data = (void *)type##_; \ 77 | } while (0); 78 | 79 | /* Stupid almost-stub implementation */ 80 | instruction_t *instr_gen(enum instructions instr, uint8_t operand_count, ...) { 81 | va_list args; 82 | va_start(args, operand_count * 3); 83 | 84 | // Note, a temporary register type is used to prevent conflict 85 | // with the `enum registers` type by passing into `alloc_operand_data` 86 | typedef enum registers temp_reg; 87 | 88 | // clang-format off 89 | operand_t *operands = malloc(sizeof(operand_t) * 4); 90 | for (uint8_t i = 0; i < 4; i++) operands[i] = OP_NONE; 91 | // clang-format on 92 | 93 | for (uint8_t i = 0; i < operand_count; i++) { 94 | const enum operands type = va_arg(args, enum operands); 95 | char *label = ""; 96 | void *data; 97 | if (op_rel(type)) { 98 | char *lab = va_arg(args, char *); 99 | const size_t label_name_size = strlen(lab) + 1; 100 | char *copied_name = malloc(label_name_size); 101 | strcpy(copied_name, lab); 102 | 103 | label = copied_name; 104 | 105 | // clang-format off 106 | } else if (op_imm(type)) { 107 | switch (op_sizeof(type)) { 108 | case 8: alloc_operand_data(uint8_t); break; 109 | case 16: alloc_operand_data(uint16_t); break; 110 | case 32: alloc_operand_data(uint32_t); break; 111 | case 64: alloc_operand_data(uint64_t); break; 112 | default: 113 | err("Invalid operand size."); 114 | break; 115 | } 116 | // clang-format on 117 | } else { 118 | alloc_operand_data(temp_reg); /* Note braces as macro expands */ 119 | } 120 | const size_t off = va_arg(args, size_t); 121 | operands[i] = 122 | (operand_t){.type = type, .offset = off, .data = data, .label = label}; 123 | } 124 | 125 | va_end(args); 126 | instruction_t *instr_struct = malloc(sizeof(instruction_t)); 127 | *instr_struct = (instruction_t){.instr = instr, .operands = operands}; 128 | 129 | return instr_struct; 130 | } 131 | #undef alloc_data 132 | 133 | instruction_t *instr_write_bytes(size_t data_sz, ...) { 134 | buffer_t *buffer_ptr = malloc(sizeof(buffer_t)); 135 | buffer_t data = BUF_NULL; 136 | va_list args; 137 | va_start(args, data_sz); 138 | 139 | for (size_t i = 0; i < data_sz; i++) { 140 | const uint8_t byte = va_arg(args, uint8_t); 141 | buf_write_byte(&data, byte); 142 | } 143 | 144 | va_end(args); 145 | 146 | instruction_t *instr_ret = malloc(sizeof(instruction_t)); 147 | memcpy(buffer_ptr, &data, sizeof(buffer_t)); 148 | 149 | operand_t *operands = calloc(4, sizeof(operand_t)); 150 | operands[0] = 151 | (operand_t){ 152 | .type = (enum operands)OP_MISC, 153 | .offset = 0, 154 | .data = buffer_ptr, 155 | .label = NULL, 156 | }; 157 | 158 | *instr_ret = (instruction_t){ 159 | .instr = INSTR_DIR_WRT_BUF, 160 | .operands = operands, 161 | }; 162 | 163 | return instr_ret; 164 | } 165 | 166 | void instr_free(instruction_t *instr) { 167 | for (uint8_t i = 0; i < 4; i++) { 168 | if (instr->operands[i].type == OP_NULL) break; 169 | if (instr->operands[i].type == OP_MISC && instr->instr == INSTR_DIR_WRT_BUF) { 170 | buffer_t *data = (buffer_t *)instr->operands[i].data; 171 | free(data->data); 172 | } 173 | 174 | if (strlen(instr->operands[i].label)) { 175 | free(instr->operands[i].label); 176 | continue; 177 | } 178 | 179 | if (instr->operands[i].type) free(instr->operands[i].data); 180 | } 181 | 182 | free(instr->operands); 183 | free(instr); 184 | } -------------------------------------------------------------------------------- /libjas/instructions.tbl: -------------------------------------------------------------------------------- 1 | # **Notice:** The Jas encoder bank along with the `compiler.js` script 2 | # is a part of the Jas assembler project, licensed under the MIT license. 3 | # Please consult the `LICENSE` file for more information. 4 | 5 | # Contributors' list: (Add your own name here, if applicable) 6 | # Copyright (c) 2023-2024 Alvin Cheng 7 | 8 | # This is the Jas assembler instruction encoder bank, this file contains 9 | # the encoder tables for encoding the instructions into machine code, with 10 | # each one handcrafted and reviewed by actual humans, not a dumb script that 11 | # runs through the Intel Manual. 12 | 13 | # This is basically, (when boiled down) a successor to the `tabs.c` file as a 14 | # representation of structs by allowing a script to write boilerplate for us. 15 | # All columns are matching the values of the `instr_encode_table` struct, see 16 | # `instruction.h` for more information. (Data sizes and omitted fields are auto- 17 | # matically computed by the compiler script) 18 | 19 | # NOTE: All tables should be in order of the `instructions` enumeration. 20 | 21 | # name | identity | opcode extension | opcode | byte opcode | pre 22 | # ----------------------------------------------------------------------------------------------- 23 | mov | mr | - | 0x89 | 0x88 | - 24 | mov | rm | - | 0x8B | 0x8A | - 25 | mov | oi | - | 0xB8 | 0xB0 | - 26 | mov | mi | 0b10000000 | 0xC7 | 0xC6 | - 27 | 28 | lea | rm | - | 0x8D | - | - 29 | 30 | add | rm | - | 0x03 | 0x02 | - 31 | add | mr | - | 0x01 | 0x00 | - 32 | add | i | - | 0x05 | 0x04 | - 33 | add | mi | 0b10000000 | 0x81 | 0x80 | - 34 | 35 | sub | rm | - | 0x2b | 0x2a | - 36 | sub | mr | - | 0x29 | 0x28 | - 37 | sub | i | - | 0x2c | 0x2d | - 38 | sub | mi | 5 | 0x81 | 0x80 | - 39 | 40 | mul | m | 4 | 0xF7 | 0xF6 | - 41 | 42 | _div | m | 6 | 0xF7 | 0xF6 | - 43 | 44 | and | rm | - | 0x23 | 0x22 | - 45 | and | mr | - | 0x21 | 0x20 | - 46 | and | i | - | 0x25 | 0x24 | - 47 | and | mi | 1 | 0x81 | 0x80 | - 48 | 49 | or | rm | - | 0x0B | 0x0A | - 50 | or | mr | - | 0x09 | 0x08 | - 51 | or | i | - | 0x0d | 0x0c | - 52 | or | mi | 1 | 0x81 | 0x80 | - 53 | 54 | xor | rm | - | 0x33 | 0x32 | - 55 | xor | mr | - | 0x31 | 0x30 | - 56 | xor | i | - | 0x35 | 0x34 | - 57 | xor | mi | 6 | 0x81 | 0x80 | - 58 | 59 | _not | m | 2 | 0xF7 | 0xF6 | - 60 | 61 | inc | m | 0 | 0xFF | 0xFE | - 62 | dec | m | 1 | 0xFF | 0xFE | - 63 | 64 | jmp | d | - | 0xE9 | 0xEB | - 65 | jmp | m | 4 | 0xFF | - | - 66 | 67 | je | d | - | 0x0f, 0x84 | 0x74 | - 68 | jnz | d | - | 0x0f, 0x85 | 0x75 | - 69 | 70 | jne | d | - | 0x0f, 0x85 | - | - 71 | jz | d | - | 0x0f, 0x84 | - | - 72 | 73 | 74 | call | d | - | 0xE8 | 0xEB | - 75 | call | m | 2 | 0xFF | - | - 76 | 77 | ret | zo | - | 0xC3 | - | - 78 | ret | i | - | 0xC2 | - | - 79 | 80 | cmp | rm | - | 0x3B | 0x3A | - 81 | cmp | mr | - | 0x39 | 0x38 | - 82 | cmp | i | - | 0x3D | 0x3C | - 83 | cmp | mi | 8 | 0x81 | 0x80 | - 84 | 85 | push | m | 6 | 0xFF | - | - 86 | push | o | - | 0x50 | - | - 87 | push | i | - | 0x68 | 0x6A | - 88 | 89 | pop | m | 0 | 0x8F | - | - 90 | pop | o | - | 0x58 | - | - 91 | 92 | # Issue with `in` and `out`!!!! Note the non-standard imm8, AL/AX/EAX identity 93 | 94 | in | oi | - | - | 0xE4 | - 95 | 96 | out | oi | - | - | 0xE6 | - 97 | 98 | clc | zo | - | 0xF8 | - | - 99 | stc | zo | - | 0xF9 | - | - 100 | cli | zo | - | 0xFA | - | - 101 | sti | zo | - | 0xFB | - | - 102 | nop | zo | - | 0x90 | - | - 103 | hlt | zo | - | 0xF4 | - | - 104 | _int | i | - | - | 0xCD | - 105 | 106 | xchg | o | - | 0x90 | - | - 107 | xchg | mr | - | 0x87 | 0x86 | - 108 | xchg | rm | - | 0x87 | 0x86 | - 109 | 110 | # -------------------------------------------------------------------------------------------------- 111 | syscall | zo | - | 0x0f, 0x05 | - | - 112 | movzx | rm | - | 0x0F, 0xB7 | 0x0F, 0xB6 | pre_small_operands 113 | movsx | rm | - | 0x0F, 0xBF | 0x0F, 0xBE | pre_small_operands 114 | bswap | o | - | 0x0f, 0xC8 | - | - 115 | 116 | cmova | rm | - | 0x0F, 0x47 | - | - 117 | cmovae | rm | - | 0x0F, 0x43 | - | - 118 | cmovb | rm | - | 0x0F, 0x42 | - | - 119 | cmovbe | rm | - | 0x0F, 0x46 | - | - 120 | cmove | rm | - | 0x0F, 0x44 | - | - 121 | cmovg | rm | - | 0x0F, 0x4F | - | - 122 | cmovge | rm | - | 0x0F, 0x4D | - | - 123 | cmovl | rm | - | 0x0F, 0x4C | - | - 124 | cmovle | rm | - | 0x0F, 0x4E | - | - 125 | cmovna | rm | - | 0x0F, 0x46 | - | - 126 | cmovnae | rm | - | 0x0F, 0x42 | - | - 127 | cmovnb | rm | - | 0x0F, 0x43 | - | - 128 | cmovnbe | rm | - | 0x0F, 0x47 | - | - 129 | cmovne | rm | - | 0x0F, 0x45 | - | - 130 | cmovng | rm | - | 0x0F, 0x4E | - | - 131 | cmovnge | rm | - | 0x0F, 0x4C | - | - 132 | cmovnl | rm | - | 0x0F, 0x4D | - | - 133 | cmovnle | rm | - | 0x0F, 0x4F | - | - 134 | cmovno | rm | - | 0x0F, 0x41 | - | - 135 | cmovnp | rm | - | 0x0F, 0x4B | - | - 136 | cmovns | rm | - | 0x0F, 0x49 | - | - 137 | cmovnz | rm | - | 0x0F, 0x45 | - | - 138 | cmovo | rm | - | 0x0F, 0x40 | - | - 139 | cmovp | rm | - | 0x0F, 0x4A | - | - 140 | cmovpe | rm | - | 0x0F, 0x4A | - | - 141 | cmovpo | rm | - | 0x0F, 0x4B | - | - 142 | cmovs | rm | - | 0x0F, 0x48 | - | - 143 | cmovz | rm | - | 0x0F, 0x44 | - | - 144 | -------------------------------------------------------------------------------- /libjas/label.c: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #include "label.h" 27 | #include "codegen.h" 28 | #include "error.h" 29 | #include "operand.h" 30 | #include 31 | #include 32 | #include 33 | 34 | void label_create( 35 | label_t **label_table, size_t *label_table_size, 36 | char *name, bool exported, bool ext, size_t address) { 37 | 38 | if (label_lookup(label_table, label_table_size, name) != NULL) { 39 | err("Label conflict detected, a duplicate cannot be created."); 40 | return; 41 | } 42 | 43 | // clang-format off 44 | label_t label = 45 | {.name = name, .exported = exported, .ext = ext, 46 | .address = address, }; 47 | // clang-format on 48 | 49 | (*label_table_size)++; 50 | *label_table = (label_t *) 51 | realloc(*label_table, *label_table_size * sizeof(label_t)); 52 | 53 | *(label_table)[*label_table_size - 1] = label; 54 | } 55 | 56 | void label_destroy_all(label_t **label_table, size_t *label_table_size) { 57 | free(*label_table); 58 | 59 | label_table = NULL; 60 | *(label_table_size) = 0; 61 | } 62 | 63 | label_t *label_lookup(label_t **label_table, size_t *label_table_size, char *name) { 64 | const label_t *table_defref = *label_table; 65 | for (size_t i = 0; i < table_defref; i++) { 66 | if (table_defref[i].name == NULL) return NULL; 67 | if (strcmp(table_defref[i].name, name) == 0) return &table_defref[i]; 68 | } 69 | 70 | return NULL; 71 | } 72 | 73 | instruction_t *label_gen(char *name, enum label_type type) { 74 | enum instructions instr = INSTR_DIR_LOCAL_LABEL; 75 | 76 | // clang-format off 77 | switch (type) { 78 | case LABEL_LOCAL: instr = INSTR_DIR_LOCAL_LABEL; break; 79 | case LABEL_GLOBAL: instr = INSTR_DIR_GLOBAL_LABEL; break; 80 | case LABEL_EXTERN: instr = INSTR_DIR_EXTERN_LABEL; break; 81 | 82 | default: break; 83 | } 84 | // clang-format on 85 | 86 | const size_t label_name_size = strlen(name) + 1; 87 | char *copied_name = malloc(label_name_size); 88 | strcpy(copied_name, name); 89 | 90 | operand_t *operands = calloc(4, sizeof(operand_t)); 91 | operands[0] = 92 | (operand_t){ 93 | .type = OP_MISC, 94 | .offset = 0, 95 | .data = copied_name, 96 | .label = NULL, 97 | }; 98 | 99 | instruction_t *instr_ret = malloc(sizeof(instruction_t)); 100 | *instr_ret = (instruction_t){ 101 | .instr = instr, 102 | .operands = operands, 103 | }; 104 | 105 | return instr_ret; 106 | } -------------------------------------------------------------------------------- /libjas/operand.c: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #include "operand.h" 27 | #include "error.h" 28 | #include "instruction.h" 29 | #include "mode.h" 30 | #include "register.h" 31 | #include "rex.h" 32 | #include 33 | 34 | uint8_t op_modrm_mode(operand_t input) { 35 | register const enum registers deref_reg = (*(enum registers *)input.data); 36 | if (reg_lookup_val(input.data) == 5 && !reg_needs_rex(deref_reg)) { 37 | if (deref_reg == REG_RIP || deref_reg == REG_EIP || deref_reg == REG_IP) { 38 | if (op_r(input.type)) 39 | err("`rip`, `eip` or `ip` cannot be directly referenced"); 40 | return OP_MODRM_INDIRECT; 41 | } else if (input.offset == 0) 42 | return OP_MODRM_DISP8; 43 | } 44 | 45 | if (op_m(input.type) && input.offset == 0) 46 | return OP_MODRM_INDIRECT; 47 | 48 | if (input.offset != 0) { 49 | if ((intmax_t)input.offset > INT32_MAX) err("displacement too large"); 50 | if ((intmax_t)input.offset > INT8_MAX) 51 | return OP_MODRM_DISP32; 52 | 53 | return OP_MODRM_DISP8; 54 | } 55 | 56 | return OP_MODRM_REG; 57 | } 58 | 59 | uint8_t op_sizeof(enum operands input) { 60 | if (op_byte(input)) return 8; 61 | if (op_word(input)) return 16; 62 | if (op_dword(input)) return 32; 63 | if (op_qword(input)) return 64; 64 | 65 | return 0; 66 | } 67 | 68 | uint8_t *op_write_opcode(operand_t *op_arr, instr_encode_table_t *instr_ref) { 69 | if (op_byte(op_arr[0].type)) { 70 | if (instr_ref->byte_opcode_size == 0) goto op_e; 71 | return instr_ref->byte_instr_opcode; 72 | } 73 | if (!instr_ref->opcode_size) goto op_e; 74 | return instr_ref->opcode; 75 | 76 | op_e: 77 | err("operand type mismatch"); 78 | return NULL; 79 | } 80 | 81 | void op_write_prefix(buffer_t *buf, const operand_t *op_arr, enum modes mode) { 82 | uint8_t already_written = 0; 83 | uint8_t rex = REX_DEFAULT; 84 | 85 | for (uint8_t i = 0; i < 4; i++) { 86 | if (op_arr[i].type == OP_NULL) 87 | continue; 88 | 89 | const uint8_t size = op_sizeof(op_arr[i].type); 90 | uint8_t override = op_m(op_arr[i].type) ? OP_ADDR_OVERRIDE : OP_WORD_OVERRIDE; 91 | 92 | if (op_rm(op_arr[i].type)) 93 | rex |= reg_needs_rex(op_arr[i].data) ? REX_B : 0; 94 | 95 | switch (size) { 96 | case 16: 97 | if (mode == MODE_REAL) break; 98 | if (mode == MODE_LONG && op_m(op_arr[i].type)) { 99 | if (!buf_element_exists(buf, OP_WORD_OVERRIDE)) 100 | buf_write_byte(buf, OP_WORD_OVERRIDE); 101 | } 102 | 103 | goto override_write; 104 | 105 | case 32: 106 | if (mode == MODE_REAL) goto override_write; 107 | if (override != OP_ADDR_OVERRIDE || mode == MODE_PROTECTED) break; 108 | 109 | override_write: 110 | if (already_written == override) break; 111 | buf_write_byte(buf, override); 112 | already_written = override; 113 | break; 114 | 115 | case 64: 116 | if (mode != MODE_LONG) { 117 | err("64-bit operands not allowed"); 118 | break; 119 | } 120 | 121 | rex |= REX_W; 122 | break; 123 | } 124 | } 125 | 126 | if (rex != REX_DEFAULT) 127 | buf_write_byte(buf, rex); 128 | } 129 | 130 | /** 131 | * @brief 132 | * The 0x66 and 0x67 hex prefixes are the operand and address size 133 | * overrides respectively. They are used to change the size of the operand 134 | * or address in the instruction. Basically, if there's no prefix, the op- 135 | * erand as well as the address size is the same as the mode's bit size. 136 | * So long mode is 64 and so on. 137 | * 138 | * However, the override prefixes can change the size of the operand or 139 | * address to 16 bits in protected mode, or 32 bits in real mode. This is 140 | * useful for instructions that require a specific operand size. 141 | * 142 | * mode | size | output 143 | * ----------------------------- 144 | * real | 32 | 0x66 145 | * real | 32 addr | 0x67 146 | * prot | 16 | 0x66 147 | * prot | 16 addr | 0x67 148 | * long | 16 | 0x66 149 | * long | 32 addr | 0x67 150 | * long | 16 addr | 0x67 + 0x66 151 | * 152 | * long | 64 | REX.W 153 | * 154 | * Additional: Register REX prefix for REX.B 155 | * 156 | * @note 64-bit operands are not allowed in any modes other than long mode 157 | * hence, if a 64-bit operand is present throw an error if other than 158 | * long mode 159 | * 160 | */ -------------------------------------------------------------------------------- /libjas/operand.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | // clang-format off 27 | 28 | extern "C" { 29 | #include "operand.h" 30 | #include "encoder.h" 31 | #include "error.h" 32 | #include "instruction.h" 33 | } 34 | 35 | // clang-format on 36 | 37 | #include "operand.hpp" 38 | #include 39 | #include 40 | #include 41 | 42 | #define OP_HASH_NONE (uint32_t)0b11111111 43 | 44 | static constexpr uint32_t __combine__(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { 45 | return ((a) << 24 | (b) << 16 | (c) << 8 | (d)); 46 | } 47 | 48 | static op_ident_hash_t op_hash(enum operands input) { 49 | if (op_rel(input)) return OP_HASH_REL; 50 | if (op_r(input)) return OP_HASH_R; 51 | if (op_imm(input)) return OP_HASH_IMM; 52 | if (op_m(input)) return OP_HASH_M; 53 | 54 | return OP_HASH_NONE; 55 | } 56 | 57 | static multimap master = { 58 | {ENC_MR, __combine__(OP_HASH_R, OP_HASH_R, OP_HASH_NONE, OP_HASH_NONE)}, 59 | {ENC_MR, __combine__(OP_HASH_M, OP_HASH_R, OP_HASH_NONE, OP_HASH_NONE)}, 60 | 61 | {ENC_RM, __combine__(OP_HASH_R, OP_HASH_M, OP_HASH_NONE, OP_HASH_NONE)}, 62 | {ENC_RM, __combine__(OP_HASH_M, OP_HASH_R, OP_HASH_NONE, OP_HASH_NONE)}, 63 | 64 | {ENC_MI, __combine__(OP_HASH_R, OP_HASH_IMM, OP_HASH_NONE, OP_HASH_NONE)}, 65 | {ENC_MI, __combine__(OP_HASH_M, OP_HASH_IMM, OP_HASH_NONE, OP_HASH_NONE)}, 66 | 67 | {ENC_ZO, __combine__(OP_HASH_NONE, OP_HASH_NONE, OP_HASH_NONE, OP_HASH_NONE)}, 68 | {ENC_O, __combine__(OP_HASH_R, OP_HASH_IMM, OP_HASH_NONE, OP_HASH_NONE)}, 69 | {ENC_I, __combine__(OP_HASH_IMM, OP_HASH_NONE, OP_HASH_NONE, OP_HASH_NONE)}, 70 | {ENC_D, __combine__(OP_HASH_REL, OP_HASH_NONE, OP_HASH_NONE, OP_HASH_NONE)}, 71 | 72 | {ENC_M, __combine__(OP_HASH_R, OP_HASH_NONE, OP_HASH_NONE, OP_HASH_NONE)}, 73 | {ENC_M, __combine__(OP_HASH_M, OP_HASH_NONE, OP_HASH_NONE, OP_HASH_NONE)}, 74 | 75 | {ENC_OI, __combine__(OP_HASH_R, OP_HASH_IMM, OP_HASH_NONE, OP_HASH_NONE)}, 76 | }; 77 | 78 | extern "C" enum enc_ident op_ident_identify(enum operands *input, instr_encode_table_t *instr_ref) { 79 | uint32_t hash_key = 0; 80 | 81 | for (uint8_t i = 0; i < 4; i++) 82 | hash_key |= (uint32_t)op_hash(input[i]) << (24 - i * 8); 83 | 84 | auto j = 0; 85 | multimap lookup_table; 86 | while (instr_ref[j].opcode_size) { 87 | auto range = master.equal_range(instr_ref[j].ident); 88 | for (auto it = range.first; it != range.second; it++) { 89 | lookup_table.insert({it->second, instr_ref[j].ident}); 90 | } 91 | j++; 92 | } 93 | 94 | if (lookup_table.find(hash_key) != lookup_table.end()) { 95 | return lookup_table.find(hash_key)->second; 96 | } else { 97 | uint8_t k = 0; 98 | while (instr_ref[k].opcode_size) { 99 | if (instr_ref[k].ident == ENC_IGN) return ENC_IGN; 100 | k++; 101 | } /* Fall though: */ 102 | err("No corresponding instruction opcode found."); 103 | return ENC_NULL; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /libjas/parse.c: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * MIT License 4 | * Copyright (c) 2023-2024 Alvin Cheng 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * 24 | * @see `LICENSE` 25 | */ 26 | 27 | #include "parse.h" 28 | #include "tabs.c" 29 | #include 30 | #include 31 | #include 32 | 33 | enum instructions parse_instr_name(char *name) { 34 | for (int i = 0; name[i]; i++) 35 | name[i] = (unsigned char)tolower((int)name[i]); 36 | 37 | /// @note `instr_tab_names` is defined in `tabs.c` 38 | /// depicting instruction names in order of enum. 39 | 40 | for (int i = 0; i < sizeof(instr_tab_names); i++) { 41 | if (instr_tab_names[i][0] != name[0]) continue; 42 | if (strcmp(instr_tab_names[i], name) == 0) 43 | return (enum instructions)i + 1; 44 | } 45 | 46 | return INSTR_NOTHING; 47 | } 48 | 49 | uint64_t parse_str_num(char *name) { 50 | const uint8_t len = strlen(name); 51 | int base = 10; 52 | 53 | if (name[len] == 'h') base = 16; 54 | if (name[0] == '0' && name[1] == 'x') { 55 | base = 16; 56 | name += 2; 57 | } 58 | 59 | if (len > 16 && base == 16) { 60 | err("Number is out of range"); 61 | return 0; 62 | } 63 | 64 | return strtoull(name, NULL, base); 65 | } 66 | 67 | // clang-format off 68 | 69 | /// @brief A list of all instruction names in order of the enum. 70 | /// @see `registers` in `register.h` (Which this is in order of) 71 | 72 | const char *reg_names[] = { 73 | "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", 74 | "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", 75 | "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", 76 | "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", 77 | "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b", 78 | "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w", 79 | "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d", 80 | "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 81 | "spl", "bpl", "sil", "dil", "ip", "eip", "rip", 82 | }; 83 | 84 | // clang-format on 85 | 86 | enum registers parse_reg(char *name) { 87 | for (int i = 0; name[i]; i++) 88 | name[i] = (unsigned char)tolower((int)name[i]); 89 | 90 | for (int i = 0; i < sizeof(reg_names); i++) 91 | if (strcmp(reg_names[i], name) == 0) return (enum registers)i; 92 | 93 | return NULL; 94 | } -------------------------------------------------------------------------------- /libjas/pre.c: -------------------------------------------------------------------------------- 1 | #include "error.h" 2 | #include "instruction.h" 3 | #include "mode.h" 4 | #include "operand.h" 5 | #include "register.h" 6 | 7 | // Notes on refactoring of these: 8 | // - Only MI and I encoders need to have different operand sizes. 9 | // The rules for previous `same_operand_sizes` can be applied to 10 | // MR and similar encoders, check `mr_rm_common`. 11 | // 12 | 13 | #define DEFINE_PRE_ENCODER(name) \ 14 | static void name(operand_t *op_arr, buffer_t *buf, instr_encode_table_t *instr_ref, enum modes mode) 15 | 16 | DEFINE_PRE_ENCODER(pre_ret) { 17 | if (op_sizeof(op_arr[0].type) != 16) 18 | err("Other operand sizes cannot be used with this instruction."); 19 | } 20 | 21 | DEFINE_PRE_ENCODER(pre_small_operands) { 22 | if (op_sizeof(op_arr[1].type) < 16) 23 | err("Invalid operand size for MOVZX/MOVSX instruction"); 24 | } 25 | -------------------------------------------------------------------------------- /libjas/register.c: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * Copyright (c) 2023-2024 Alvin Cheng 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | * 23 | * @see `LICENSE` 24 | */ 25 | 26 | #include "register.h" 27 | #include 28 | #include 29 | 30 | uint8_t reg_lookup_val(enum registers *input) { 31 | uint8_t lookup[] = { 32 | REG_VALUE_AL, REG_VALUE_CL, REG_VALUE_DL, REG_VALUE_BL, REG_VALUE_AH, 33 | REG_VALUE_CH, REG_VALUE_DH, REG_VALUE_BH, REG_VALUE_AX, REG_VALUE_CX, REG_VALUE_DX, 34 | REG_VALUE_BX, REG_VALUE_SP, REG_VALUE_BP, REG_VALUE_SI, REG_VALUE_DI, REG_VALUE_EAX, 35 | REG_VALUE_ECX, REG_VALUE_EDX, REG_VALUE_EBX, REG_VALUE_ESP, REG_VALUE_EBP, REG_VALUE_ESI, 36 | REG_VALUE_EDI, REG_VALUE_RAX, REG_VALUE_RCX, REG_VALUE_RDX, REG_VALUE_RBX, REG_VALUE_RSP, 37 | REG_VALUE_RBP, REG_VALUE_RSI, REG_VALUE_RDI, REG_VALUE_R8B, REG_VALUE_R9B, REG_VALUE_R10B, 38 | REG_VALUE_R11B, REG_VALUE_R12B, REG_VALUE_R13B, REG_VALUE_R14B, REG_VALUE_R15B, REG_VALUE_R8W, 39 | REG_VALUE_R9W, REG_VALUE_R10W, REG_VALUE_R11W, REG_VALUE_R12W, REG_VALUE_R13W, REG_VALUE_R14W, 40 | REG_VALUE_R15W, REG_VALUE_R8D, REG_VALUE_R9D, REG_VALUE_R10D, REG_VALUE_R11D, REG_VALUE_R12D, 41 | REG_VALUE_R13D, REG_VALUE_R14D, REG_VALUE_R15D, REG_VALUE_R8, REG_VALUE_R9, REG_VALUE_R10, 42 | REG_VALUE_R11, REG_VALUE_R12, REG_VALUE_R13, REG_VALUE_R14, REG_VALUE_R15, REG_VALUE_SPL, 43 | REG_VALUE_BPL, REG_VALUE_SIL, REG_VALUE_DIL, REG_VALUE_IP, REG_VALUE_EIP, REG_VALUE_RIP}; 44 | 45 | return lookup[(unsigned int)*input]; 46 | } 47 | 48 | bool reg_needs_rex(enum registers input) { 49 | if (input > REG_RDI && input < REG_DIL) 50 | return true; 51 | 52 | return false; 53 | } 54 | -------------------------------------------------------------------------------- /libjas/scripts/compile.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | const fs = require('fs'); 4 | const file = process.argv[2]; 5 | 6 | let valid_data = [] 7 | 8 | fs.readFileSync(file, 'utf8').split('\n').forEach((line) => { 9 | let valid_lines = []; 10 | if (line[0] === '#') return; 11 | if (line === '') return; 12 | 13 | valid_lines.push(line); 14 | 15 | valid_lines.forEach((line) => { 16 | line = line.replaceAll(' ', ''); 17 | let parts = line.split('|'); 18 | valid_data.push(parts); 19 | }) 20 | }); 21 | 22 | let groups = {}; 23 | 24 | valid_data.forEach((line) => { 25 | if (groups[line[0]] == undefined) groups[line[0]] = []; 26 | groups[line[0]].push(line); 27 | }); 28 | 29 | function addQuotes(str) { return `{${str}}`; }; 30 | function countComma(input) { return (input.match(/,/g) || []).length; } 31 | 32 | let namesList = []; 33 | let output = ""; 34 | 35 | for (const [key, instr] of Object.entries(groups)) { 36 | output = output.concat(`instr_encode_table_t ${key.toString()}[] = {\n`) 37 | instr.forEach((group) => { 38 | if (group[1] == "-") group[1] = "ign"; 39 | const ident = `ENC_${group[1].toUpperCase()}`; 40 | if (group[2] === "-") group[2] = "NULL"; 41 | const ext = group[2]; 42 | const opcode = addQuotes(group[3] == "-" ? "NULL" : group[3]); 43 | const opcode_size = countComma(group[3]) + 1; 44 | const byte_opcode = addQuotes(group[4] == "-" ? "NULL" : group[4]); 45 | let byte_opcode_size = countComma(group[4]) + 1; 46 | if (group[4] == "-") byte_opcode_size = 0; 47 | 48 | let pre; 49 | if (group[5] == "-") 50 | pre = "NULL"; 51 | else 52 | pre = `&${group[5]}`; 53 | 54 | // Putting it all together 55 | output = output.concat( 56 | ` {${ident}, ${ext}, ${opcode}, ${byte_opcode}, ${opcode_size}, ${pre}, ${byte_opcode_size}},\n`); 57 | }); 58 | 59 | output = output.concat(` INSTR_TAB_NULL,\n};\n`); 60 | namesList.push(`"${key.toString().toLowerCase().replaceAll("_", "")}"`); 61 | } 62 | 63 | const names = `char *instr_tab_names[] = {${namesList.join(', ')}};` 64 | const prepend = `#include "instruction.h" \n#include "pre.c"\n\n`; 65 | fs.writeFileSync('tabs.c', prepend + output + names); 66 | 67 | process.exit(0); -------------------------------------------------------------------------------- /libjas/scripts/tablegen.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | // Usage: node tablegen.js
 4 | // And you can use `>>` to append to a file, usually for the instructions.tbl file.
 5 | 
 6 | // Note: if argv[3] zo, just type n an opcode and everything will fall into place automatically.
 7 | 
 8 | const process = require('process');
 9 | 
10 | let name = process.argv[2];
11 | let ident = process.argv[3];
12 | let opcode_ext = process.argv[4];
13 | let opcode = process.argv[5];
14 | let byte_opcode = process.argv[6];
15 | let pre = process.argv[7];
16 | 
17 | if (process.argv[3] == 'zo') {
18 |   opcode = process.argv[4];
19 |   byte_opcode = "-";
20 |   pre = "no_operands";
21 |   opcode_ext = "-";
22 | }
23 | 
24 | let out = ""
25 | let len = 5 - name.length
26 | if (len < 0) len = 1;
27 | 
28 | out = out.concat(`  ${name}${" ".repeat(len)}|`);
29 | out = out.concat(` ${ident}${" ".repeat(9 - ident.length)}|`);
30 | out = out.concat(` ${opcode_ext}${" ".repeat(17 - opcode_ext.length)}|`);
31 | out = out.concat(` ${opcode}${" ".repeat(18 - opcode.length)}|`);
32 | out = out.concat(` ${byte_opcode}${" ".repeat(18 - byte_opcode.length)}|`);
33 | out = out.concat(` ${pre}\n`);
34 | 
35 | console.log(out);
36 | process.exit(0);
37 | 


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cheng-alvin/jas/ac67b418ca006ed11b7b9253cc45d727f9544281/logo.png


--------------------------------------------------------------------------------
/test:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cd tests
 4 | 
 5 | for file in $(ls); do
 6 |     if [[ -x "$file" && $file != "test.sh" ]]; then
 7 |         ./"$file"
 8 | 
 9 |         if [[ $? != 0 ]]; then
10 |             kill $$
11 |         fi
12 |     fi
13 | done
14 | 


--------------------------------------------------------------------------------
/tests/Makefile:
--------------------------------------------------------------------------------
 1 | CC = clang
 2 | 
 3 | CFLAGS_COMMON = -I ../libjas/include -I ../libjas/ -lstdc++ -g -O0 -fsanitize=address 
 4 | CFLAGS = $(CFLAGS_COMMON)
 5 | 
 6 | TESTS = $(patsubst %.c, %, $(wildcard *.c)) 
 7 | 
 8 | all: CFLAGS = $(CFLAGS_COMMON)
 9 | all: $(TESTS)
10 | 
11 | $(TESTS): libjas_debug.a
12 | $(TESTS): %: %.o 
13 | 	$(CC) -o $@ $< ../build/libjas_debug.a $(CFLAGS)
14 | 
15 | libjas_debug.a: 
16 | 	$(MAKE) -C ../libjas tabs.c
17 | 	$(MAKE) -C ../libjas libjas_debug.a
18 | 
19 | clean:
20 | 	@find . -name "*.o" -type f -delete
21 | 	@find . -name "*.a" -type f -delete
22 | 	@rm -rf $(TESTS)
23 | 
24 | .PHONY: clean all
25 | 


--------------------------------------------------------------------------------
/tests/endian.c:
--------------------------------------------------------------------------------
 1 | #include "endian.h"
 2 | #include "test.h"
 3 | 
 4 | Test(endian, endian) {
 5 |   uint8_t data[] = {0xBE, 0xEF};
 6 |   const uint8_t expected[] = {0xEF, 0xBE};
 7 | 
 8 |   uint8_t *result = endian(data, sizeof(data));
 9 | 
10 |   assert(result[0] == expected[0]);
11 |   assert(result[1] == expected[1]);
12 | }
13 | 
14 | int main(void) {
15 |   TestSuite(endian);
16 |   RunTest(endian, endian);
17 | 
18 |   return 0;
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/example.c:
--------------------------------------------------------------------------------
 1 | /* Extracted and modified from `README.md` */
 2 | #include 
 3 | #include 
 4 | #include 
 5 | #include 
 6 | 
 7 | void error_handler(const char *msg) {
 8 |   fprintf(stderr, "Error: %s\n", msg);
 9 |   exit(1);
10 | }
11 | 
12 | int main(void) {
13 |   err_add_callback(error_handler);
14 |   instruction_t *instr[4];
15 | 
16 |   instr[2] = label_gen("label", LABEL_LOCAL);
17 |   instr[1] = instr_gen(INSTR_MOV, 2, r64(REG_RAX), imm64(0));
18 |   instr[0] = instr_gen(INSTR_JMP, 1, rel32("label", 0));
19 | 
20 |   buffer_t buf = codegen(MODE_LONG, &instr, 3, CODEGEN_RAW);
21 | 
22 |   for (int i = 0; i < buf.len; i++)
23 |     printf("%02X ", buf.data[i]);
24 | 
25 |   free(buf.data);
26 |   return 0;
27 | }


--------------------------------------------------------------------------------
/tests/instruction.c:
--------------------------------------------------------------------------------
 1 | #include "instruction.h"
 2 | #include "test.h"
 3 | 
 4 | Test(instr, instr_gen) {
 5 |   instruction_t *instr = instr_gen(INSTR_MOV, 2, r64(REG_RAX), imm64(0));
 6 |   assert_eq(instr->instr, INSTR_MOV);
 7 | 
 8 |   assert_eq(instr->operands[0].type, OP_R64);
 9 |   assert_eq(*(enum registers *)instr->operands[0].data, REG_RAX);
10 |   assert_eq(instr->operands[0].offset, 0);
11 | 
12 |   assert_eq(instr->operands[1].type, OP_IMM64);
13 |   assert_eq(*(uint64_t *)instr->operands[1].data, 0x0);
14 |   assert_eq(instr->operands[1].offset, 0);
15 | 
16 |   assert_eq(instr->operands[2].type, OP_NULL);
17 |   assert_eq(instr->operands[3].type, OP_NULL);
18 | 
19 |   instr_free(instr);
20 | 
21 |   instr = instr_gen(INSTR_MOV, 2, r32(REG_EAX), rel32("label", 0));
22 | 
23 |   assert_eq(instr->instr, INSTR_MOV);
24 | 
25 |   assert_eq(instr->operands[0].type, OP_R32);
26 |   assert_eq(*(enum registers *)instr->operands[0].data, REG_EAX);
27 |   assert_eq(instr->operands[0].offset, 0);
28 | 
29 |   assert_eq(instr->operands[1].type, OP_REL32);
30 |   assert_str_eq((char *)instr->operands[1].label, "label", "Expected label to be 'label'\n");
31 |   assert_eq(instr->operands[1].offset, 0);
32 | 
33 |   assert_eq(instr->operands[2].type, OP_NULL);
34 |   assert_eq(instr->operands[3].type, OP_NULL);
35 | 
36 |   instr_free(instr);
37 | }
38 | 
39 | int main(void) {
40 |   TestSuite(instr);
41 | 
42 |   RunTest(instr, instr_gen);
43 |   return 0;
44 | }


--------------------------------------------------------------------------------
/tests/mov.c:
--------------------------------------------------------------------------------
 1 | #include "jas.h"
 2 | #include "test.h"
 3 | 
 4 | #define mr_bytes {0x48, 0x89, 0x80, 0xff, 0x00, 0x00, 0x00};
 5 | #define rm_bytes {0x48, 0x8B, 0x80, 0xff, 0x00, 0x00, 0x00};
 6 | #define oi_bytes {0x48, 0xB8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
 7 | 
 8 | #define mi_bytes \
 9 |   {0x67, 0xC7, 0x80, 0xFF, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF};
10 | 
11 | // TODO: Encapsulate this into a global macro for all tests
12 | #define MOV_TEST(name, op1, op2, expected_bytes)                                        \
13 |   Test(mov, name) {                                                                     \
14 |     err_add_callback(test_error_handler);                                               \
15 |                                                                                         \
16 |     const operand_t operands[] = {op1, op2, OP_NONE, OP_NONE};                          \
17 |     const instruction_t instr = {                                                       \
18 |         .instr = INSTR_MOV,                                                             \
19 |         .operands = &(operand_t[]){operands[0], operands[1], operands[2], operands[3]}, \
20 |     };                                                                                  \
21 |                                                                                         \
22 |     const unsigned char expected[] = expected_bytes;                                    \
23 |     const buffer_t buf = assemble_instr(MODE_LONG, &instr);                             \
24 |                                                                                         \
25 |     assert_eq_buf_arr(buf, expected, sizeof(expected));                                 \
26 |     free(buf.data);                                                                     \
27 |   }
28 | 
29 | MOV_TEST(mr, m64, r64, mr_bytes);
30 | MOV_TEST(rm, r64, m64, rm_bytes);
31 | MOV_TEST(oi, r64, imm64, oi_bytes);
32 | MOV_TEST(mi, m32, imm32, mi_bytes);
33 | 
34 | int main(void) {
35 |   TestSuite(mov);
36 |   RunTest(mov, mr);
37 |   RunTest(mov, rm);
38 |   RunTest(mov, oi);
39 |   RunTest(mov, mi);
40 | 
41 |   return 0;
42 | }
43 | 


--------------------------------------------------------------------------------
/tests/operand.c:
--------------------------------------------------------------------------------
  1 | #include "operand.h"
  2 | #include "buffer.h"
  3 | #include "encoder.h"
  4 | #include "instruction.h"
  5 | #include "rex.h"
  6 | #include "test.h"
  7 | 
  8 | Test(operand, write_prefix) {
  9 |   buffer_t buf = BUF_NULL;
 10 | 
 11 |   const operand_t op_arr16[] = {r16, imm16, OP_NONE, OP_NONE};
 12 |   const operand_t op_addr16[] = {m16, imm16, OP_NONE, OP_NONE};
 13 |   const operand_t op_arr32[] = {r32, imm32, OP_NONE, OP_NONE};
 14 |   const operand_t op_addr32[] = {m32, imm32, OP_NONE, OP_NONE};
 15 |   const operand_t op_arr64[] = {r64, imm64, OP_NONE, OP_NONE};
 16 |   const operand_t op_addr64[] = {m64, imm64, OP_NONE, OP_NONE};
 17 | 
 18 | #define RUN_TEST(operands, mode, index, expected) \
 19 |   op_write_prefix(&buf, operands, mode);          \
 20 |   assert_eq(buf.data[index], expected);
 21 | 
 22 |   RUN_TEST(op_arr16, MODE_PROTECTED, 0, 0x66);
 23 | 
 24 |   op_write_prefix(&buf, op_addr16, MODE_PROTECTED);
 25 |   assert_eq(buf.data[1], 0x67);
 26 |   assert_eq(buf.data[2], 0x66);
 27 | 
 28 |   RUN_TEST(op_arr32, MODE_REAL, 3, 0x66);
 29 | 
 30 |   op_write_prefix(&buf, op_addr32, MODE_REAL);
 31 |   assert_eq(buf.data[4], 0x67);
 32 |   assert_eq(buf.data[5], 0x66);
 33 | 
 34 |   RUN_TEST(op_arr64, MODE_LONG, 6, REX_W);
 35 |   RUN_TEST(op_addr64, MODE_LONG, 7, REX_W);
 36 |   free(buf.data);
 37 | }
 38 | 
 39 | #define sample_tab instr_table[1] /* Corresponds to the `mov` table */
 40 | Test(operand, ident_identify) {
 41 |   const enum operands input[] = {OP_R8, OP_R16, OP_NULL, OP_NULL};
 42 |   const enum operands input2[] = {OP_R8, OP_M16, OP_NULL, OP_NULL};
 43 | 
 44 |   assert_eq(op_ident_identify(input, sample_tab), ENC_MR);
 45 |   assert_eq(op_ident_identify(input2, sample_tab), ENC_RM);
 46 | }
 47 | 
 48 | // Temporary function since the removal of `op_construct_operand` from the
 49 | // real codebase in commit number c77ffeca71222c8e837fd01348aafcc484d5adf6.
 50 | // Please see the commit message of 6422cd1f5124f34c7e2575282a6df1046ca9efaa
 51 | // for more information.
 52 | 
 53 | // Please also see the commit message of 964d162c67f60daee870ade242439097425eda57
 54 | // for more information regarding the refactoring works of the operand.c
 55 | // file and the justification for the removal of  `op_construct_operand`.
 56 | 
 57 | operand_t
 58 | op_construct_operand(enum operands type, size_t offset, void *data, char *label) {
 59 |   return (operand_t){
 60 |       .type = type,
 61 |       .offset = offset,
 62 |       .data = data,
 63 |       .label = label,
 64 |   };
 65 | }
 66 | 
 67 | Test(operand, modrm_mode) {
 68 |   struct {
 69 |     operand_t operand;
 70 |     int expected_mode;
 71 |   } test_cases[] = {
 72 |       {op_construct_operand(OP_M64, 0, &(enum registers){REG_RIP}, ""), OP_MODRM_INDIRECT},
 73 |       {op_construct_operand(OP_M64, 0, &(enum registers){REG_RAX}, ""), OP_MODRM_INDIRECT},
 74 |       {op_construct_operand(OP_M64, 8, &(enum registers){REG_RAX}, ""), OP_MODRM_DISP8},
 75 |       {op_construct_operand(OP_M64, 0, &(enum registers){REG_RBP}, ""), OP_MODRM_DISP8},
 76 |       {op_construct_operand(OP_M64, 0xFFFF, &(enum registers){REG_RAX}, ""), OP_MODRM_DISP32},
 77 |   };
 78 | 
 79 |   for (size_t i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) {
 80 |     assert_eq(op_modrm_mode(test_cases[i].operand), test_cases[i].expected_mode);
 81 |   }
 82 | }
 83 | 
 84 | Test(operand, write_opcode) {
 85 |   const operand_t op_arr[] = {r64, imm64, OP_NONE, OP_NONE};
 86 |   const instr_encode_table_t instr_ref = {ENC_MI, 0, {0xC7}, {0xC6}, 1, NULL, true};
 87 | 
 88 |   uint8_t *out = op_write_opcode(op_arr, &instr_ref);
 89 |   assert_eq(out, instr_ref.opcode);
 90 | 
 91 |   const operand_t op_arr_byte[] = {r8, imm8, OP_NONE, OP_NONE};
 92 |   out = op_write_opcode(op_arr_byte, &instr_ref);
 93 |   assert_eq(out, instr_ref.byte_instr_opcode);
 94 | }
 95 | 
 96 | int main(void) {
 97 |   TestSuite(operand);
 98 | 
 99 |   RunTest(operand, write_prefix);
100 |   RunTest(operand, construct_operand);
101 |   RunTest(operand, ident_identify);
102 |   RunTest(operand, modrm_mode);
103 |   RunTest(operand, write_opcode);
104 | 
105 |   return 0;
106 | }
107 | 


--------------------------------------------------------------------------------
/tests/parse.c:
--------------------------------------------------------------------------------
 1 | #include "parse.h"
 2 | #include "test.h"
 3 | 
 4 | Test(parse, instr_name) {
 5 |   const char name[] = "CMOVAE"; // Sample instruction
 6 |   enum instructions instr = parse_instr_name(name);
 7 |   assert_eq(instr, INSTR_CMOVAE);
 8 | }
 9 | 
10 | Test(parse, str_num) {
11 |   const char hex[] = "0x1A";
12 |   const char dec[] = "26";
13 |   uint64_t hex_num = parse_str_num(hex);
14 |   uint64_t dec_num = parse_str_num(dec);
15 |   assert_eq(hex_num, 26);
16 |   assert_eq(dec_num, 26);
17 | }
18 | 
19 | Test(parse, reg) {
20 |   const char name[] = "RAX";
21 |   enum registers reg = parse_reg(name);
22 |   assert_eq(reg, REG_RAX);
23 | }
24 | 
25 | int main(void) {
26 |   TestSuite(parse);
27 |   RunTest(parse, instr_name);
28 |   RunTest(parse, str_num);
29 |   RunTest(parse, reg);
30 | 
31 |   return 0;
32 | }
33 | 


--------------------------------------------------------------------------------
/tests/test.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * MIT License
  3 |  * Copyright (c) 2023-2024 Alvin Cheng 
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  6 |  * of this software and associated documentation files (the "Software"), to deal
  7 |  * in the Software without restriction, including without limitation the rights
  8 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 |  * copies of the Software, and to permit persons to whom the Software is
 10 |  * furnished to do so, subject to the following conditions:
 11 |  *
 12 |  * The above copyright notice and this permission notice shall be included in all
 13 |  * copies or substantial portions of the Software.
 14 |  *
 15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 |  * SOFTWARE.
 22 |  *
 23 |  * ---
 24 |  *
 25 |  * Simple and dumb testing framework used internally for
 26 |  * testing the assembler's instruction encoding and other
 27 |  * chores. Feel free to copy and modify this for your own
 28 |  * testing and programming purposes. :)
 29 |  */
 30 | 
 31 | #ifndef TEST_H
 32 | #define TEST_H
 33 | 
 34 | #include "operand.h"
 35 | #include "register.h"
 36 | #include 
 37 | #include 
 38 | #include 
 39 | #include 
 40 | 
 41 | static inline void test_printf(char *fmt, ...) {
 42 |   va_list args;
 43 |   va_start(args, fmt);
 44 |   vprintf(fmt, args);
 45 |   va_end(args);
 46 | }
 47 | 
 48 | #define assert_not_null(x)                               \
 49 |   if ((x) == NULL) {                                     \
 50 |     test_printf("\nAssertion failed: %s is NULL\n", #x); \
 51 |     exit(1);                                             \
 52 |   }
 53 | 
 54 | #define RunTest(name, func) \
 55 |   test_##name##_##func();   \
 56 |   test_printf("[ \033[32mOK\033[0m ] %s::%s\n", #name, #func);
 57 | 
 58 | #define assert(x)                                            \
 59 |   if (!(x)) {                                                \
 60 |     test_printf("\nAssertion failed: %s is not true\n", #x); \
 61 |     exit(1);                                                 \
 62 |   }
 63 | 
 64 | #define assert_eq(x, y)                                    \
 65 |   if ((x) != (y)) {                                        \
 66 |     test_printf("\nAssertion failed: %s != %s\n", #x, #y); \
 67 |     exit(1);                                               \
 68 |   }
 69 | 
 70 | #define assert_str_eq(x, y, message) \
 71 |   if (strcmp((x), (y)) != 0) {       \
 72 |     test_printf("\n%s", message);    \
 73 |     exit(1);                         \
 74 |   }
 75 | 
 76 | #define Test(name, func) \
 77 |   void test_##name##_##func()
 78 | 
 79 | #define fail(message)                          \
 80 |   test_printf("\nTest failed: %s\n", message); \
 81 |   exit(1);
 82 | 
 83 | #define TestSuite(name)
 84 | 
 85 | static operand_t r8 = (operand_t){.type = OP_R8, .offset = 0, .data = &(enum registers){REG_AL}};
 86 | static operand_t r16 = (operand_t){.type = OP_R16, .offset = 0, .data = &(enum registers){REG_AX}};
 87 | static operand_t r32 = (operand_t){.type = OP_R32, .offset = 0, .data = &(enum registers){REG_EAX}};
 88 | static operand_t r64 = (operand_t){.type = OP_R64, .offset = 0, .data = &(enum registers){REG_RAX}};
 89 | 
 90 | static operand_t m8 = (operand_t){.type = OP_M8, .offset = 0xff, .data = &(enum registers){REG_AL}};
 91 | static operand_t m16 = (operand_t){.type = OP_M16, .offset = 0xff, .data = &(enum registers){REG_AX}};
 92 | static operand_t m32 = (operand_t){.type = OP_M32, .offset = 0xff, .data = &(enum registers){REG_EAX}};
 93 | static operand_t m64 = (operand_t){.type = OP_M64, .offset = 0xff, .data = &(enum registers){REG_RAX}};
 94 | 
 95 | static operand_t imm8 = (operand_t){.type = OP_IMM8, .offset = 0, .data = &(unsigned char){0xFF}};
 96 | static operand_t imm16 = (operand_t){.type = OP_IMM16, .offset = 0, .data = &(unsigned short){0xFFFF}};
 97 | static operand_t imm32 = (operand_t){.type = OP_IMM32, .offset = 0, .data = &(unsigned int){0xFFFFFFFF}};
 98 | static operand_t imm64 = (operand_t){.type = OP_IMM64, .offset = 0, .data = &(unsigned long){0xFFFFFFFFFFFFFFFF}};
 99 | 
100 | static void test_error_handler(const char *msg) {
101 |   fail(msg);
102 | }
103 | 
104 | #define assert_eq_buf(a, b) assert_eq_buf_arr(a, b.data, b.len)
105 | #define assert_eq_buf_arr(a, b, arr_len)                                                       \
106 |   \ 
107 |   if (a.len != arr_len) test_printf("\nAssertion failed: %s is not the same as %s\n", #a, #b); \
108 |   for (size_t i = 0; i < a.len; i++) {                                                         \
109 |     if (a.data[i] != b[i]) {                                                                   \
110 |       test_printf("\nAssertion failed: `%s` is not the same as `%s`\n", #a, #b);               \
111 |       exit(1);                                                                                 \
112 |     }                                                                                          \
113 |   }
114 | 
115 | #endif
116 | 


--------------------------------------------------------------------------------