├── .travis.yml ├── CMakeLists.txt ├── LICENSE.md ├── NEWS.md ├── README.md ├── appveyor.yml ├── riscv_core ├── codegen.c ├── decode.c ├── decode.h ├── riscv.c ├── riscv.h ├── riscv_common.c ├── riscv_conf.h ├── riscv_jit.c └── riscv_private.h ├── riscv_vm ├── args.cpp ├── elf.cpp ├── elf.h ├── file.h ├── main.cpp ├── memory.h ├── state.h ├── syscall.cpp └── syscall_sdl.cpp ├── tests ├── coremark │ └── coremark.elf ├── dhrystone │ ├── dhrystone.c │ ├── dhrystone.elf │ ├── dhrystone.h │ ├── dhrystone_main.c │ └── encoding.h ├── doom │ ├── README.md │ └── doom.elf ├── helloworld │ ├── helloworld │ └── main.c ├── linpack │ ├── bench.h │ ├── linpack.c │ └── linpack.elf ├── mandelbrot │ ├── mandelbrot.c │ └── mandelbrot.elf ├── multiply │ ├── dataset1.h │ ├── multiply.c │ └── multiply.elf ├── pi │ ├── pi.c │ └── pi.elf ├── puzzle │ ├── bench.h │ ├── puzzle.c │ └── puzzle.elf ├── quake │ ├── quake_320.elf │ └── quake_640.elf ├── rsort │ ├── dataset1.h │ ├── rsort.c │ └── rsort.elf ├── smallpt │ ├── smallpt.cpp │ └── smallpt.elf ├── towers │ ├── towers.c │ └── towers.elf └── whetstone │ ├── bench.h │ ├── whetstone.c │ └── whetstone.elf └── tinycg ├── tinycg.c └── tinycg.h /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | compiler: 3 | - gcc 4 | - clang 5 | 6 | script: 7 | - mkdir build 8 | - cd build 9 | - cmake .. 10 | - make 11 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0) 2 | project(riscv) 3 | set(CMAKE_CXX_STANDARD 14) 4 | 5 | option(RVVM_STRICT "Enable strict compiler warnings" OFF) 6 | 7 | option(RVVM_X64_JIT "Enable x64 JIT" OFF) 8 | if (${RVVM_X64_JIT}) 9 | add_definitions(-DRISCV_VM_X64_JIT=1) 10 | else() 11 | add_definitions(-DRISCV_VM_X64_JIT=0) 12 | endif() 13 | 14 | option(RVVM_SUPPORT_RV32M "Enable RV32M ISA" ON) 15 | if (${RVVM_SUPPORT_RV32M}) 16 | add_definitions(-DRISCV_VM_SUPPORT_RV32M=1) 17 | else() 18 | add_definitions(-DRISCV_VM_SUPPORT_RV32M=0) 19 | endif() 20 | 21 | option(RVVM_SUPPORT_RV32F "Enable RV32F ISA" ON) 22 | if (${RVVM_SUPPORT_RV32F}) 23 | add_definitions(-DRISCV_VM_SUPPORT_RV32F=1) 24 | else() 25 | add_definitions(-DRISCV_VM_SUPPORT_RV32F=0) 26 | endif() 27 | 28 | option(RVVM_SUPPORT_RV32A "Enable RV32A ISA" ON) 29 | if (${RVVM_SUPPORT_RV32A}) 30 | add_definitions(-DRISCV_VM_SUPPORT_RV32A=1) 31 | else() 32 | add_definitions(-DRISCV_VM_SUPPORT_RV32A=0) 33 | endif() 34 | 35 | option(RVVM_SUPPORT_RV32Zicsr "Enable Zicsr ISA" ON) 36 | if (${RVVM_SUPPORT_RV32Zicsr}) 37 | add_definitions(-DRISCV_VM_SUPPORT_Zicsr=1) 38 | else() 39 | add_definitions(-DRISCV_VM_SUPPORT_Zicsr=0) 40 | endif() 41 | 42 | option(RVVM_SUPPORT_RV32Zifencei "Enable Zifencei ISA" ON) 43 | if (${RVVM_SUPPORT_RV32M}) 44 | add_definitions(-DRISCV_VM_SUPPORT_Zifencei=1) 45 | else() 46 | add_definitions(-DRISCV_VM_SUPPORT_Zifencei=0) 47 | endif() 48 | 49 | option(RVVM_USE_SDL "Use SDL for video and input services" OFF) 50 | if (${RVVM_USE_SDL}) 51 | find_package(SDL REQUIRED) 52 | include_directories(${SDL_INCLUDE_DIR}) 53 | add_definitions(-DRISCV_VM_USE_SDL=1) 54 | else() 55 | add_definitions(-DRISCV_VM_USE_SDL=0) 56 | endif() 57 | 58 | if (${RVVM_STRICT}) 59 | if (MSVC) 60 | add_compile_options(/W4 /WX) 61 | else() 62 | add_compile_options(-Wall -Wextra -pedantic -Werror) 63 | endif() 64 | endif() 65 | 66 | add_definitions(-D_CRT_SECURE_NO_WARNINGS) 67 | 68 | 69 | set(RISCV_COMMON_SRC 70 | "riscv_core/riscv.h" 71 | "riscv_core/riscv_conf.h" 72 | "riscv_core/riscv_private.h" 73 | "riscv_core/riscv_common.c" 74 | ) 75 | add_library(riscv_common ${RISCV_COMMON_SRC}) 76 | 77 | 78 | set(RISCV_CORE_SRC 79 | "riscv_core/riscv.c" 80 | ) 81 | add_library(riscv_core ${RISCV_CORE_SRC}) 82 | 83 | 84 | set(RISCV_CORE_JIT_SRC 85 | "riscv_core/riscv_jit.c" 86 | "riscv_core/decode.h" 87 | "riscv_core/decode.c" 88 | "riscv_core/codegen.c" 89 | ) 90 | add_library(riscv_core_jit ${RISCV_CORE_JIT_SRC}) 91 | 92 | 93 | set(TINYCG_SRC 94 | "tinycg/tinycg.c" 95 | "tinycg/tinycg.h" 96 | ) 97 | add_library(tinycg ${TINYCG_SRC}) 98 | 99 | 100 | set(DRV_SRC 101 | "riscv_vm/elf.h" 102 | "riscv_vm/elf.cpp" 103 | "riscv_vm/main.cpp" 104 | "riscv_vm/memory.h" 105 | "riscv_vm/syscall.cpp" 106 | "riscv_vm/state.h" 107 | "riscv_vm/args.cpp" 108 | "riscv_vm/syscall_sdl.cpp" 109 | ) 110 | 111 | add_executable(riscv_vm ${DRV_SRC}) 112 | target_link_libraries(riscv_vm riscv_common riscv_core) 113 | 114 | if (${RVVM_X64_JIT}) 115 | add_executable(riscv_vmx ${DRV_SRC}) 116 | target_link_libraries(riscv_vmx riscv_common riscv_core_jit tinycg) 117 | endif() 118 | 119 | if (${RVVM_USE_SDL}) 120 | target_link_libraries(riscv_vm ${SDL_LIBRARY}) 121 | if (${RVVM_X64_JIT}) 122 | target_link_libraries(riscv_vmx ${SDL_LIBRARY}) 123 | endif() 124 | endif() 125 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Aidan Dodds 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # RISCV-VM News 2 | 3 | ---- 4 | ### 18 Oct. 2020 5 | 6 | The last few days have been spent inserting an intermediate representation step into the JIT codepath. There is now an instruction decoder which takes in an instruction word and spits out a decoded instruction as a struct which can be passed onto the codegen step. The intention here was that I could decode a basic block entirely prior to the codegen phase thus giving the codegen more visibility about the block that is being translated. The most interesting metric to me is knowing if a block is a leaf function or not. 7 | 8 | An unexpected side effect was that it really improved the code quality, with a very clear partition between the JIT phases. 9 | 10 | I also instrumented the decoder to keep track of how many times it encounters instructions while decoding a basic block. Below is a list of the relative odds of encountering an instruction when decoding a basic block. This should not be confused with how many times and instruction will be executed, but its closely related. Knowing this gives me an interesting insight into which instructions I should be looking to tighten up the generated code for. 11 | 12 | Instruction decode frequencies when booting Doom: 13 | ``` 14 | 23.6% addi 15.6% lw 12.3% sw 6.6% auipc 15 | 5.7% jal 4.7% add 4.4% slli 3.4% beq 16 | 2.5% lbu 2.1% or 2.1% bne 1.8% xor 17 | 1.7% jalr 1.7% srli 1.4% sub 1.3% sb 18 | 1.6% blt 1.0% bge .9% andi .7% srai 19 | .6% lui .5% and .4% lh .4% sh 20 | .3% bgeu .3% bltu .2% mul .1% lhu 21 | .1% div ... xori ... sll ... divu 22 | ... sltu ... sltiu ... remu ... slt 23 | ... sra ... lb ... ori ... ecall 24 | ... rem ... srl ... mulh 0 slti 25 | 0 fence 0 ebreak 0 mulhsu 0 mulhu 26 | ``` 27 | 28 | ---- 29 | ### 15 Oct. 2020 30 | 31 | I am still looking at improving some of the worst parts of the generated JIT code. Keeping track of the hit-count per translated basic block turned out to be really useful as I can make sure i'm analyzing the hottest basic blocks which should have the most impact. This morning I turned my attention to the code generated by the branch instructions: 32 | 33 | ``` 34 | // before 35 | 24: 8b 86 9c 00 00 00 mov eax,DWORD PTR [rsi+0x9c] ; load rs1 36 | 2a: 8b 56 70 mov edx,DWORD PTR [rsi+0x70] ; load rs2 37 | 2d: 39 d0 cmp eax,edx ; compare 38 | 2f: b8 28 ac 01 00 mov eax,0x1ac28 ; PC + 4 39 | 34: ba cc ab 01 00 mov edx,0x1abcc ; PC + imm 40 | 39: 0f 44 c2 cmove eax,edx ; select target 41 | 3c: 89 86 d0 00 00 00 mov DWORD PTR [rsi+0xd0],eax ; rv->PC = eax 42 | ``` 43 | 44 | While I may rethink how I handle this instruction, there is a quick improvement that can be made: 45 | 46 | ``` 47 | // before 48 | 24: 8b 86 9c 00 00 00 mov eax,DWORD PTR [rsi+0x9c] ; load rs1 49 | 2a: 8b 56 70 cmp eax,DWORD PTR [rsi+0x70] ; compare with rs2 50 | 2f: b8 28 ac 01 00 mov eax,0x1ac28 ; PC + 4 51 | 34: ba cc ab 01 00 mov edx,0x1abcc ; PC + imm 52 | 39: 0f 44 c2 cmove eax,edx ; select target 53 | 3c: 89 86 d0 00 00 00 mov DWORD PTR [rsi+0xd0],eax ; rv->PC = eax 54 | ``` 55 | 56 | Unfortunately `cmov` is fairly limited with regard to the operands that it can accept which makes the remaining code a little lacking. It still gets `Quake` hitting the 450 MIPS mark at some points which is cool. `Doom` still shows no change making me think there is a bottleneck elsewhere that I should be hunting down. 57 | 58 | A different strategy to handle branches would be as follows (in pseudo assembly): 59 | ``` 60 | ... 61 | mov eax, rv->X[rs1] 62 | cmp eax, rv->X[rs2] 63 | je taken 64 | mov rv->PC, not_taken 65 | ; insert prologue 66 | ret 67 | .taken 68 | ; continue translation from taken PC 69 | ... 70 | ``` 71 | 72 | Like this we could essentially fall out of a block if the branch is not taken, thus optimizing for the taken case. It is just an idea however and I doubt i'll be implementing it any time soon though. 73 | 74 | ---- 75 | ### 15 Oct. 2020 76 | 77 | At this point I have achieved much more then I had in mind when I set out the original goals for the project. I did not expect to get Doom and Quake running or let alone to have JIT working. At this stage I have to decide which direction I want future work to focus on as I feel aimless development would be mostly wasted effort. There are two possible directions that the project could move; increasing the complexity of the simulation to the point where it can launch linux for instance, and the other to focus on becoming a high performance user mode simulator. The second direction has the most narrow scope with a focus on speed, accuracy and compatibility. The former is more technically difficult, requiring MMU emulation, multiple hart support, etc. 78 | 79 | While putting off this decision, I have been taking stock of the codebase, cleaning things up and having a look at the generated code. I tried multiple different ways of producing intermediate code before the JIT but was ultimately unhappy with both of them. The first method generated something akin to SSA code for each basic block but my design was too inflexible to be practical. My second attempt was more flexible and generated something like expression trees for various instructions, but again it ended up not feeling like the right approach. A third attempt is forming in my mind but it likely a ways off being committed to. 80 | 81 | One area that has improved a lot was that the JIT core has now become sufficiently fully featured that it doesn't need the emulation core to fallback on since they have almost the same level of support for instructions. Due to this, I was able to split the two cores entirely into separate modules with the ability to swap cores easily in CMake. 82 | 83 | Support for traps has also been added to the emulation core which can now pass the misaligned compliance tests. 84 | 85 | I have done a small amount of performance tuning on the JIT code, dumping the generated basic blocks, disassembling them and looking for simple ways to make the code better. 86 | 87 | The RISC-V LUI instruction was fairly poor and has been improved: 88 | ``` 89 | // before 90 | mov eax,0x668b8 91 | mov DWORD PTR [rsi+0xd0],eax 92 | 93 | // after 94 | mov DWORD PTR [rsi+0xd0], 0x668b8 95 | ``` 96 | 97 | Lots of the branch and PC related code was also be updated to not use an intermediate register too which had a positive impact on the execution times. 98 | 99 | For store instructions the value to be stored needs to be loaded into r8 before the store call, which could be improved: 100 | ``` 101 | // before 102 | mov eax, DWORD PTR [rsi+0x70] 103 | mov r8,rax 104 | 105 | // after 106 | movsx r8, DWORD PTR [rsi+0x70] 107 | ``` 108 | 109 | Another thing I have considered is that leaf functions don't require a stack frame, which could simplify the prologue and epilogue: 110 | ``` 111 | // before 112 | push rbp 113 | mov rbp,rsp 114 | sub rsp,0x40 115 | mov QWORD PTR [rsp+0x20],rsi 116 | ... 117 | mov rsi,QWORD PTR [rsp+0x20] 118 | mov rsp,rbp 119 | pop rbp 120 | ret 121 | 122 | // after (for leaf functions) 123 | push rsi 124 | ; align? 125 | ... 126 | ; pop align? 127 | pop rsi 128 | ret 129 | ``` 130 | 131 | To know if you are generating a leaf function however would require an IR representation, deferred prologue/epilogue generation or a lookahead decoder since you need to know if you will generate any call instructions. This optimization will have to wait for a future update however. 132 | 133 | Another improvement to make is to simply leave the `rv` structure in the RCX register and not move it into the RSI register. It would simplify the prologue/epilogue and simplify any call operations. I picked RSI mostly on a whim when I started writing the JIT and realise it was a poor choice now. 134 | 135 | I also tried to add in dynamic next block prediction but successfully slowed down the core once again which was interesting. It is still a bit of a puzzle to me why tracking and updating the last successor block taken would be worse than keeping that prediction static. My profiling runs show that about 10% of execution time is spent searching for the next block to execute, with a near 100% hash map hit rate (no further probing required). The bulk of the time is likely spent computing the hash of the address which is used to index the hash map. 136 | 137 | Getting back to looking at the code, there are frequent fragments similar to the code below: 138 | ``` 139 | // after 1 140 | 99: 8b 86 88 00 00 00 mov eax,DWORD PTR [rsi+0x88] 141 | 9f: c1 e0 02 shl eax,0x2 142 | a2: 89 86 88 00 00 00 mov DWORD PTR [rsi+0x88],eax 143 | 144 | // after 2 145 | 1a: 8b 86 88 00 00 00 mov eax,DWORD PTR [rsi+0x88] 146 | 20: 05 88 f8 ff ff add eax,0xfffff888 147 | 25: 89 86 88 00 00 00 mov DWORD PTR [rsi+0x88],eax 148 | ``` 149 | 150 | These could be replaced with much shorter in-place operations: 151 | ``` 152 | // after 1 153 | shl dword ptr [rsi+0x88], 0x2 154 | 155 | // after 2 156 | and dword ptr [rsi+0x88], 0xfffff888 157 | ``` 158 | 159 | I suspect these fragments are generated by immediate operations when `rd` matches `rs1`. It may be possible to apply similar optimizations to the register-register ALU operations too. 160 | 161 | A further optimization I made was to treat constant jumps with no linking as not a branch and continue translation of the code stream. This seemed like the blocks would get larger and there would be less calls to `block_find`. It is perplexing however that this made no difference in terms of MIPS at all. 162 | 163 | After making these optimizations `Quake` is now running at around 440 MIPS which is a large improvement over where it started. Quite strange however is that `Doom` still sits at the 550 MIPS mark being relatively unaffected by these improvements. 164 | 165 | ---- 166 | ### 04 Oct. 2020 167 | 168 | Some time was spent today looking at the largest bottlenecks in the generated code which were in RISC-V register file access and callbacks when doing io and syscalls. Both of these operations would involve accessing the members of a structure which stores the state of the emulation. The DynRec would place the address of these members into a register and then perform a write or read from that address. It is two instructions yet it involves a large 64bit immediate for the member address. 169 | 170 | To optimize this, I store the emulation struct address in the RSI register, and any access to it can then be performed in one mov operation using base addressing. As RSI is callee save, I also had to implement prologue and epilogue code to save this register. That turned out to be a win too as it simplified the call handling code. This does have the benefit that I could reuse the same already converted code blocks when running another `hart` simply by passing in a new state structure. It is not directly supported yet, but nice to know its an option. 171 | 172 | At this point `DOOM` is running around 550 MIPS, which is a nice boost over the results I had this morning. While `Quake` is less consistent in speed it is still faster than it was previously at around 282 MIPS. This should increase a lot when I support `RV32F` in the DynRec. 173 | 174 | The lesson I learned here is that encoding everything as immediate data is not good for instruction count or code size. Also it pays to read the ABI spec closely, and I was caught out by a few issues such as shadow space, alignment, etc. 175 | 176 | The list of things I am thinking about going forward: 177 | - Code cleanup 178 | - Linux support 179 | - Optimizing the generated code 180 | - Supporting RV32F in the DynRec 181 | - Testing 182 | - Adding user input to `Quake` and `Doom` 183 | 184 | Just a quick note on how I generated the code fragments. I made extensive use of an [Online Assembler](http://shell-storm.org/online/Online-Assembler-and-Disassembler) and the ever useful [Godbolt Compiler Exporer](https://godbolt.org/). Using Godbolt I could verify what I thought the assembly my instruction emulations would lower to, which was mainly useful for navigating all the different forms of comparison instructions. The instruction strings themselves were generated using the online assembler and could quickly be inserted into the JIT code header. 185 | 186 | ---- 187 | ### 04 Oct. 2020 188 | 189 | I have managed to progress this project far beyond my initial targets. Over the last few days I have implemented a binary translation engine from RISC-V instructions into native x64 code. The result of this is a hefty increase in execution speed with `DOOM` now running at ~460 MIPS compared to the emulation only 117 MIPS. Interestingly `Quake` also sees a speed boost running at ~247 MIPS, a win over the emulation only ~103 MIPS. 190 | 191 | The reason for the large different in MIPS between Quake and Doom is because the DynRec currently only supports the RV32I and some of the RV32IM instructions. Any basic blocks that make use of floating point instructions will fall back to pure emulation. As the DynRec is taught how to compile float instructions then this figure can be expected to rise significantly. 192 | 193 | At the time of writing the DynRec engine will only run on Windows X64 machines, because it makes use of the Windows API and follows the Windows x64 calling convention for generated code. I do plan to extend support to Linux in the future too, which would however require some thought about how avoid increasing the code complexity to handle different host ABIs. 194 | 195 | The high level operation of the DynRec can be summarized as follows: 196 | - Look in a hashmap for a code block matching the current PC 197 | - if a block is found 198 | - execute this block 199 | - if a block is not found 200 | - allocate a new block 201 | - invoke the translator for this block 202 | - insert it into the hash map 203 | - execute this block 204 | 205 | Translation happens at the basic block level, so each block will end after a branch instruction has been translated. Currently there is no optimization pass of the generated code, it is entirely unaware of the code surrounding it and so it is fast to generate and slow to execute. 206 | 207 | Using the current DynRec gives us a speed boost due to the following reasons: 208 | - No instruction fetch 209 | - No instruction decode 210 | - Immediate values are baked into translated instructions 211 | - Values of 0 can be optimized 212 | - Zero register can be optimized 213 | - No lookup required 214 | - Writes are discarded 215 | - Reduced emulation loop overhead 216 | - Blocks can be chained based on previous branch pattern for faster lookup 217 | 218 | ---- 219 | ### 01 Oct. 2020 220 | 221 | Another fairly significant milestone has been achieved; I have compiled and successfully run `Quake` on the RISC-VM. What makes this milestone stand apart from `Doom` is that `Quake` uses floating point math in a lot of areas. For this reason it feels like a fairly good test of the RV32F support I have been working on. 222 | 223 | This time around I did not find any `Quake` port that would be as comparatively simple to port as `generic doom` was. I rolled up my sleave and knocked out just such a port myself [here](https://github.com/bit-hack/portable_quake). This involved stripping out everything that was not part of the c standard library, all use of double, undefined behavior and a vast amount of cleanup. I used the same mechanism to display graphics that had worked for the `Doom` port, the twist being that quake used palletized graphics. 224 | 225 | Whats really interesting is that the framerate feels around the same as for the `Doom` demo despite the generational step. The `Quake` demo runs around 103928000 IPS (~103 MIPS) on average which is a little lower than `Doom`. I have not investigated yet where the gap comes from or what I can learn from this. 226 | 227 | It is interesting to see lots of the floating point instructions light up and some stay strangely dormant, for instance: 228 | - The `sqrtf` calls do seem to get lowered to the sqrt instruction. 229 | - The sign swapping instructions do get called. 230 | - Min/Max never seem to get called. 231 | - None of the FMA variants get called. 232 | - fclass does not get called. 233 | 234 | ---- 235 | ### 30 Sept. 2020 236 | 237 | Since the last update I have managed to tidy up the SDL code used for the DOOM demo and it has been merged back into master. 238 | 239 | At this point I started work on floating point support, which by now is reasonably functional and somewhat tested. There is missing support for rounding and exceptions being raised in the `fcsr` register as well as verifying correct `NaN` handling. 240 | 241 | Something on my list to look at is the `RISCV tests` repository which seemed to contain more tests than the compliance test suite. Hopefully I can use this to iron out any bugs and missing features in the atomic and float implementations. 242 | 243 | To test out the float implementation I added a number of tests and benchmarks to the tests folder, such as the classic `whetstone` and `linpack`. There are some fun ones too such at the `mandelbrot` test from the always amusing `IOCCC`. 244 | 245 | So next steps are looking like more testing, more code cleanups and perhaps another infamous (and more float oriented) demo to follow on from `Doom`. I also need to rewrite a lot of the README since many notes and build instructions are a little out of date now. 246 | 247 | ---- 248 | ### 28 Sept. 2020 249 | 250 | I hit a milestone of sorts yesterday evening. I was able to compile and run DOOM on the RISC-VM simulator! I was very surprised that it was quite so easy to get up and running. The execution speed is not quite high enough to maintain a decent framerate however. 251 | 252 | A round of profiling was done, which showed me that the instruction fetch step was a significant bottleneck. Some of the memory routines were then rewritten to improve their performance which brought the game up to interactive frame rates. 253 | 254 | I have added a MIPS counter to the project today which should give me an indication of instruction throughput. Currently when running DOOM in riscv-vm on my laptop (I7-6600U 2.60GHz) it reaches a fairly consistent 117955000 IPS (118 MIPS). The work done per instruction would be lower than a CISC design however, which is something to keep in mind. 255 | 256 | My work now is focused on cleaning up the code, merging my DOOM specific code back into the master branch in a clean and portable way. Once thats done I will think about optimizing as well as adding float support. 257 | 258 | ---- 259 | ### 27 Sept. 2020 260 | 261 | It has been just over a week since I started this project and it has come a long way in that time. I was fortunate that due to the well written ISA spec and its design that the emulator core came up fairly rapidly showing good initial correctness. Having the compliance test suite to hand was fantastic for validating the instructions as I added them. 262 | 263 | Once the core ISA emulator passed all of the compliance tests for the RV32I subset, I added support for the RV32M instructions. Sadly while I have added support for the Atomic instructions I cant seem to find any way to test them, as there are currently no compliance tests. 264 | 265 | With the core feeling fairly "stable" my focus has been on porting over executable programs to run so I can increase my confidence its working correctly. 266 | 267 | During this process, I fell down the rabbit hole of implementing syscalls so that the virtual machine could interact with the outside world. It is a very nice thing to see "Hello World!" printed by your own VM. 268 | 269 | I added some simple CI jobs to the project using travis and Appveyor to cover Linux and Visual Studio builds. A test suite of sorts will have to be created and integrated fairly soon. 270 | 271 | While bringing up `malloc` I learned a few things about the `brk` system call along the way which were interesting, as well as conflicting with most of the information I could find. A lot of time was spent poking through the newlib source code. 272 | 273 | Debugging issues so far has been a pain and has mostly consisted of dumping vast logs of PC values and then manually searching through the program disassembly to see which branches were taken. The situation improved a little when I added symbol names to the trace logs. At some stage I will certainly have to write an unwinder. 274 | 275 | Once malloc was working, I could move over to bringing up some of the basic `stdio` routines (`fopen`, `fwrite`, etc). Their implementation still leaves room for improvements but it works well as a start. 276 | 277 | I am happy so far with my design choice to keep the ISA core written in C for simplicity and ease of porting and defer to C++ for the more complex elements of the VM itself. 278 | 279 | For the next steps I will be thinking about: 280 | - Adding single precision `float` support. 281 | - Running a wider corpus of programs on the VM. 282 | - Producing a test suite to be executed by the CI jobs. 283 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RISC-V Virtual Machine 2 | 3 | This is a RISCV-V Virtual Machine and instruction set emulator implementing a 32 bit RISCV-V processor model. I started this project as a learning exercise to get more familiar with the RISC-V eco system and have increased the scope of the project as it matures. The project itself is still very much in the early stages however. 4 | 5 | Features: 6 | - Support for RV32I and RV32M 7 | - Partial support for RV32F and RV32A 8 | - Syscall emulation and host passthrough 9 | - Emulation using [Dynamic Binary Translation](https://en.wikipedia.org/wiki/Binary_translation#Dynamic_binary_translation) 10 | - It can run Doom, Quake and SmallPT 11 | 12 | There are broadly two elements to this project: 13 | - An ISA emulator core written in C which presents a low level API for interfacing. 14 | - The VM frontend written in C++ which interfaces the ISA emulator core with the host computer. 15 | 16 | Note: The Binary Translation emulator is currently only available when building for x64 Windows. This is due to the generated code being tailored to that ABI, however in time Linux support for the code generator will follow. 17 | 18 | See [news](NEWS.md) for a development log and updates. 19 | 20 | 21 | ---- 22 | ## Build Status 23 | [![Build Status](https://travis-ci.org/bit-hack/riscv-vm.svg?branch=master)](https://travis-ci.org/bit-hack/riscv-vm) 24 | [![Build status](https://ci.appveyor.com/api/projects/status/sxu9jv014g179mus/branch/master?svg=true)](https://ci.appveyor.com/project/8BitPimp/riscv-vm/branch/master) 25 | 26 | 27 | ---- 28 | ## Build requirements 29 | - C++ 14 compatible compiler (VisualStudio 15+, GCC 4.9+) 30 | - CMake 3.0 or above 31 | - SDL1.2 (if you want video support) 32 | 33 | 34 | ---- 35 | ## Compiling the project 36 | ``` 37 | mkdir build 38 | cd build 39 | cmake .. 40 | make 41 | ``` 42 | 43 | 44 | ---- 45 | ## Executing a basic program 46 | 47 | A simple RISC-V program (i.e. `main.c`) can be compiled and executed as follows: 48 | ``` 49 | riscv64-unknown-elf-gcc -march=rv32im -mabi=ilp32 main.c 50 | riscv_vm a.out 51 | ``` 52 | 53 | 54 | ---- 55 | ## Testing 56 | Please note that while the riscv-vm simulator is provided under the MIT license, any of the materials in the `tests` folder may not be. 57 | These tests were taken from a wide range of places to give the project a reasonable test coverage. 58 | If you are the author of any of these tests and are unhappy with its inclusion here then please get in touch and I will remove it from the project and its git history. 59 | 60 | 61 | ---- 62 | ## Notes 63 | 64 | - Written referencing the `20191214-draft` Volume I: Unprivileged ISA document 65 | - Complete RV32G compliance is the end goal 66 | - Testing is done currently using a private fork of the compliance test suite 67 | - As the project develops testing will become a key focus 68 | - It is enough that it can run doom, see [the README](tests/doom/README.md) 69 | 70 | 71 | ---- 72 | ## Newlib support 73 | 74 | There is rudimentary support for target programs that make use of the newlib library. 75 | Currently a number of syscalls have been implemented via the `ecall` instruction. 76 | This is just enough to run a number of simpler programs and do some basic file io operations. 77 | 78 | Try the following example program: 79 | 80 | ```C 81 | #include 82 | 83 | int main(int argc, const char **args) { 84 | printf("Hello World!\n"); 85 | return 0; 86 | } 87 | ``` 88 | 89 | Spoiler: 90 | ``` 91 | Hello World! 92 | ``` -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | version: '{build}' 2 | 3 | skip_tags: true 4 | 5 | install: [] 6 | 7 | image: 8 | - Visual Studio 2015 9 | 10 | platform: 11 | - Win32 12 | - x64 13 | 14 | configuration: 15 | - Debug 16 | - Release 17 | 18 | build_script: 19 | - md build 20 | - cd build 21 | - if "%PLATFORM%"=="x64" cmake -G "Visual Studio 14 2015" -A x64 .. 22 | - if "%PLATFORM%"=="Win32" cmake -G "Visual Studio 14 2015" -A Win32 .. 23 | - cmake --build . 24 | -------------------------------------------------------------------------------- /riscv_core/codegen.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "riscv_private.h" 5 | #include "decode.h" 6 | 7 | #include "../tinycg/tinycg.h" 8 | 9 | 10 | // byte offset from rv structure address to member address 11 | #define rv_offset(RV, MEMBER) ((int32_t)(((uintptr_t)&(RV.MEMBER)) - (uintptr_t)&RV)) 12 | 13 | // this struct is only used for offset calculations 14 | static struct riscv_t rv; 15 | 16 | static void get_reg(struct cg_state_t *cg, cg_r32_t dst, uint32_t src) { 17 | if (src == rv_reg_zero) { 18 | cg_xor_r32_r32(cg, dst, dst); 19 | } 20 | else { 21 | cg_mov_r32_r64disp(cg, dst, cg_rsi, rv_offset(rv, X[src])); 22 | } 23 | } 24 | 25 | static void set_reg(struct cg_state_t *cg, uint32_t dst, cg_r32_t src) { 26 | if (dst != rv_reg_zero) { 27 | cg_mov_r64disp_r32(cg, cg_rsi, rv_offset(rv, X[dst]), src); 28 | } 29 | } 30 | 31 | static void set_regi(struct cg_state_t *cg, uint32_t dst, int32_t imm) { 32 | if (dst != rv_reg_zero) { 33 | cg_mov_r64disp_i32(cg, cg_rsi, rv_offset(rv, X[dst]), imm); 34 | } 35 | } 36 | 37 | bool codegen(const struct rv_inst_t *i, struct cg_state_t *cg, uint32_t pc, uint32_t inst) { 38 | 39 | // skip instructions that would purely store to X0 40 | if (i->rd == rv_reg_zero) { 41 | // some instructions have rd == 0 but we process them anyway 42 | if (!inst_bypass_zero_store(i)) { 43 | return true; 44 | } 45 | } 46 | 47 | switch (i->opcode) { 48 | 49 | // ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 50 | // RV32I 51 | case rv_inst_lui: 52 | set_regi(cg, i->rd, i->imm); 53 | break; 54 | case rv_inst_auipc: 55 | set_regi(cg, i->rd, pc + i->imm); 56 | break; 57 | case rv_inst_jal: 58 | cg_mov_r64disp_i32(cg, cg_rsi, rv_offset(rv, PC), pc + i->imm); 59 | set_regi(cg, i->rd, pc + 4); 60 | break; 61 | case rv_inst_jalr: 62 | if (i->rs1 == rv_reg_zero) { 63 | cg_mov_r32_i32(cg, cg_eax, i->imm & 0xfffffffe); 64 | } 65 | else { 66 | get_reg(cg, cg_eax, i->rs1); 67 | if (i->imm) { 68 | cg_add_r32_i32(cg, cg_eax, i->imm); 69 | } 70 | cg_and_r32_i32(cg, cg_eax, 0xfffffffe); 71 | } 72 | cg_mov_r64disp_r32(cg, cg_rsi, rv_offset(rv, PC), cg_eax); // branch 73 | set_regi(cg, i->rd, pc + 4); // link 74 | break; 75 | case rv_inst_beq: 76 | case rv_inst_bne: 77 | case rv_inst_blt: 78 | case rv_inst_bge: 79 | case rv_inst_bltu: 80 | case rv_inst_bgeu: 81 | get_reg(cg, cg_eax, i->rs1); 82 | cg_cmp_r32_r64disp(cg, cg_eax, cg_rsi, rv_offset(rv, X[i->rs2])); 83 | cg_mov_r32_i32(cg, cg_eax, pc + 4); 84 | cg_mov_r32_i32(cg, cg_edx, pc + i->imm); 85 | switch (i->opcode) { 86 | case rv_inst_beq: 87 | cg_cmov_r32_r32(cg, cg_cc_eq, cg_eax, cg_edx); 88 | break; 89 | case rv_inst_bne: 90 | cg_cmov_r32_r32(cg, cg_cc_ne, cg_eax, cg_edx); 91 | break; 92 | case rv_inst_blt: 93 | cg_cmov_r32_r32(cg, cg_cc_lt, cg_eax, cg_edx); 94 | break; 95 | case rv_inst_bge: 96 | cg_cmov_r32_r32(cg, cg_cc_ge, cg_eax, cg_edx); 97 | break; 98 | case rv_inst_bltu: 99 | cg_cmov_r32_r32(cg, cg_cc_c, cg_eax, cg_edx); 100 | break; 101 | case rv_inst_bgeu: 102 | cg_cmov_r32_r32(cg, cg_cc_ae, cg_eax, cg_edx); 103 | break; 104 | } 105 | cg_mov_r64disp_r32(cg, cg_rsi, rv_offset(rv, PC), cg_eax); 106 | break; 107 | case rv_inst_lb: 108 | case rv_inst_lh: 109 | case rv_inst_lw: 110 | case rv_inst_lbu: 111 | case rv_inst_lhu: 112 | cg_mov_r64_r64(cg, cg_rcx, cg_rsi); // rv 113 | if (i->rs1 == rv_reg_zero) { 114 | cg_mov_r32_i32(cg, cg_edx, i->imm); // addr 115 | } 116 | else { 117 | get_reg(cg, cg_edx, i->rs1); 118 | if (i->imm) { 119 | cg_add_r32_i32(cg, cg_edx, i->imm); // addr 120 | } 121 | } 122 | switch (i->opcode) { 123 | case rv_inst_lb: 124 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, io.mem_read_b)); 125 | cg_movsx_r32_r8(cg, cg_eax, cg_al); 126 | break; 127 | case rv_inst_lh: 128 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, io.mem_read_s)); 129 | cg_movsx_r32_r16(cg, cg_eax, cg_ax); 130 | break; 131 | case rv_inst_lw: 132 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, io.mem_read_w)); 133 | break; 134 | case rv_inst_lbu: 135 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, io.mem_read_b)); 136 | break; 137 | case rv_inst_lhu: 138 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, io.mem_read_s)); 139 | break; 140 | } 141 | set_reg(cg, i->rd, cg_eax); 142 | break; 143 | case rv_inst_sb: 144 | case rv_inst_sh: 145 | case rv_inst_sw: 146 | cg_mov_r64_r64(cg, cg_rcx, cg_rsi); // rv 147 | if (i->rs1 == rv_reg_zero) { 148 | cg_mov_r32_i32(cg, cg_edx, i->imm); // addr 149 | } 150 | else { 151 | get_reg(cg, cg_edx, i->rs1); 152 | if (i->imm) { 153 | cg_add_r32_i32(cg, cg_edx, i->imm); // addr 154 | } 155 | } 156 | cg_movsx_r64_r64disp(cg, cg_r8, cg_rsi, rv_offset(rv, X[i->rs2])); // value 157 | switch (i->opcode) { 158 | case rv_inst_sb: 159 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, io.mem_write_b)); // store 160 | break; 161 | case rv_inst_sh: 162 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, io.mem_write_s)); // store 163 | break; 164 | case rv_inst_sw: 165 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, io.mem_write_w)); // store 166 | break; 167 | } 168 | break; 169 | 170 | case rv_inst_addi: 171 | if (i->rd == i->rs1) { 172 | cg_add_r64disp_i32(cg, cg_rsi, rv_offset(rv, X[i->rd]), i->imm); 173 | } 174 | else { 175 | if (i->rs1 == rv_reg_zero) { 176 | set_regi(cg, i->rd, i->imm); 177 | } 178 | else { 179 | get_reg(cg, cg_eax, i->rs1); 180 | if (i->imm) { 181 | cg_add_r32_i32(cg, cg_eax, i->imm); 182 | } 183 | set_reg(cg, i->rd, cg_eax); 184 | } 185 | } 186 | break; 187 | case rv_inst_slti: 188 | cg_cmp_r64disp_i32(cg, cg_rsi, rv_offset(rv, X[i->rs1]), i->imm); 189 | cg_setcc_r8(cg, cg_cc_lt, cg_dl); 190 | cg_movzx_r32_r8(cg, cg_eax, cg_dl); 191 | set_reg(cg, i->rd, cg_eax); 192 | break; 193 | case rv_inst_sltiu: 194 | cg_cmp_r64disp_i32(cg, cg_rsi, rv_offset(rv, X[i->rs1]), i->imm); 195 | cg_setcc_r8(cg, cg_cc_c, cg_dl); 196 | cg_movzx_r32_r8(cg, cg_eax, cg_dl); 197 | set_reg(cg, i->rd, cg_eax); 198 | break; 199 | case rv_inst_xori: 200 | if (i->rd == i->rs1) { 201 | cg_xor_r64disp_i32(cg, cg_rsi, rv_offset(rv, X[i->rd]), i->imm); 202 | } 203 | else { 204 | get_reg(cg, cg_eax, i->rs1); 205 | cg_xor_r32_i32(cg, cg_eax, i->imm); 206 | set_reg(cg, i->rd, cg_eax); 207 | } 208 | break; 209 | case rv_inst_ori: 210 | if (i->rd == i->rs1) { 211 | cg_or_r64disp_i32(cg, cg_rsi, rv_offset(rv, X[i->rd]), i->imm); 212 | } 213 | else { 214 | get_reg(cg, cg_eax, i->rs1); 215 | cg_or_r32_i32(cg, cg_eax, i->imm); 216 | set_reg(cg, i->rd, cg_eax); 217 | } 218 | break; 219 | case rv_inst_andi: 220 | if (i->rd == i->rs1) { 221 | cg_and_r64disp_i32(cg, cg_rsi, rv_offset(rv, X[i->rd]), i->imm); 222 | } 223 | else { 224 | get_reg(cg, cg_eax, i->rs1); 225 | cg_and_r32_i32(cg, cg_eax, i->imm); 226 | set_reg(cg, i->rd, cg_eax); 227 | } 228 | break; 229 | case rv_inst_slli: 230 | if (i->rd == i->rs1) { 231 | cg_shl_r64disp_i8(cg, cg_rsi, rv_offset(rv, X[i->rd]), i->imm & 0x1f); 232 | } 233 | else { 234 | get_reg(cg, cg_eax, i->rs1); 235 | cg_shl_r32_i8(cg, cg_eax, i->imm & 0x1f); 236 | set_reg(cg, i->rd, cg_eax); 237 | } 238 | break; 239 | case rv_inst_srli: 240 | if (i->rd == i->rs1) { 241 | cg_shr_r64disp_i8(cg, cg_rsi, rv_offset(rv, X[i->rd]), i->imm & 0x1f); 242 | } 243 | else { 244 | get_reg(cg, cg_eax, i->rs1); 245 | cg_shr_r32_i8(cg, cg_eax, i->imm & 0x1f); 246 | set_reg(cg, i->rd, cg_eax); 247 | } 248 | break; 249 | case rv_inst_srai: 250 | if (i->rd == i->rs1) { 251 | cg_sar_r64disp_i8(cg, cg_rsi, rv_offset(rv, X[i->rd]), i->imm & 0x1f); 252 | } 253 | else { 254 | get_reg(cg, cg_eax, i->rs1); 255 | cg_sar_r32_i8(cg, cg_eax, i->imm & 0x1f); 256 | set_reg(cg, i->rd, cg_eax); 257 | } 258 | break; 259 | 260 | case rv_inst_add: 261 | if (i->rs2 == rv_reg_zero) { 262 | get_reg(cg, cg_eax, i->rs1); 263 | set_reg(cg, i->rd, cg_eax); 264 | } 265 | else { 266 | get_reg(cg, cg_ecx, i->rs2); 267 | if (i->rs1 == i->rd) { 268 | cg_add_r64disp_r32(cg, cg_rsi, rv_offset(rv, X[i->rd]), cg_ecx); 269 | } 270 | else { 271 | if (i->rs1 == rv_reg_zero) { 272 | set_reg(cg, i->rd, cg_ecx); 273 | } 274 | else { 275 | get_reg(cg, cg_eax, i->rs1); 276 | cg_add_r32_r32(cg, cg_eax, cg_ecx); 277 | set_reg(cg, i->rd, cg_eax); 278 | } 279 | } 280 | } 281 | break; 282 | case rv_inst_sub: 283 | get_reg(cg, cg_ecx, i->rs2); 284 | if (i->rs1 == i->rd) { 285 | cg_sub_r64disp_r32(cg, cg_rsi, rv_offset(rv, X[i->rd]), cg_ecx); 286 | } 287 | else { 288 | get_reg(cg, cg_eax, i->rs1); 289 | cg_sub_r32_r32(cg, cg_eax, cg_ecx); 290 | set_reg(cg, i->rd, cg_eax); 291 | } 292 | break; 293 | case rv_inst_sll: 294 | get_reg(cg, cg_eax, i->rs1); 295 | get_reg(cg, cg_ecx, i->rs2); 296 | cg_and_r8_i8(cg, cg_cl, 0x1f); 297 | cg_shl_r32_cl(cg, cg_eax); 298 | set_reg(cg, i->rd, cg_eax); 299 | break; 300 | case rv_inst_slt: 301 | get_reg(cg, cg_eax, i->rs1); 302 | cg_cmp_r32_r64disp(cg, cg_eax, cg_rsi, rv_offset(rv, X[i->rs2])); 303 | cg_setcc_r8(cg, cg_cc_lt, cg_dl); 304 | cg_movzx_r32_r8(cg, cg_eax, cg_dl); 305 | set_reg(cg, i->rd, cg_eax); 306 | break; 307 | case rv_inst_sltu: 308 | get_reg(cg, cg_eax, i->rs1); 309 | cg_cmp_r32_r64disp(cg, cg_eax, cg_rsi, rv_offset(rv, X[i->rs2])); 310 | cg_setcc_r8(cg, cg_cc_c, cg_dl); 311 | cg_movzx_r32_r8(cg, cg_eax, cg_dl); 312 | set_reg(cg, i->rd, cg_eax); 313 | break; 314 | case rv_inst_xor: 315 | get_reg(cg, cg_ecx, i->rs2); 316 | if (i->rs1 == i->rd) { 317 | cg_xor_r64disp_r32(cg, cg_rsi, rv_offset(rv, X[i->rd]), cg_ecx); 318 | } 319 | else { 320 | get_reg(cg, cg_eax, i->rs1); 321 | cg_xor_r32_r32(cg, cg_eax, cg_ecx); 322 | set_reg(cg, i->rd, cg_eax); 323 | } 324 | break; 325 | case rv_inst_srl: 326 | get_reg(cg, cg_eax, i->rs1); 327 | get_reg(cg, cg_ecx, i->rs2); 328 | cg_and_r8_i8(cg, cg_cl, 0x1f); 329 | cg_shr_r32_cl(cg, cg_eax); 330 | set_reg(cg, i->rd, cg_eax); 331 | break; 332 | case rv_inst_sra: 333 | get_reg(cg, cg_eax, i->rs1); 334 | get_reg(cg, cg_ecx, i->rs2); 335 | cg_and_r8_i8(cg, cg_cl, 0x1f); 336 | cg_sar_r32_cl(cg, cg_eax); 337 | set_reg(cg, i->rd, cg_eax); 338 | break; 339 | case rv_inst_or: 340 | get_reg(cg, cg_ecx, i->rs2); 341 | if (i->rs1 == i->rd) { 342 | cg_or_r64disp_r32(cg, cg_rsi, rv_offset(rv, X[i->rd]), cg_ecx); 343 | } 344 | else { 345 | get_reg(cg, cg_eax, i->rs1); 346 | cg_or_r32_r32(cg, cg_eax, cg_ecx); 347 | set_reg(cg, i->rd, cg_eax); 348 | } 349 | break; 350 | case rv_inst_and: 351 | get_reg(cg, cg_ecx, i->rs2); 352 | if (i->rs1 == i->rd) { 353 | cg_and_r64disp_r32(cg, cg_rsi, rv_offset(rv, X[i->rd]), cg_ecx); 354 | } 355 | else { 356 | get_reg(cg, cg_eax, i->rs1); 357 | cg_and_r32_r32(cg, cg_eax, cg_ecx); 358 | set_reg(cg, i->rd, cg_eax); 359 | } 360 | break; 361 | 362 | case rv_inst_fence: 363 | break; 364 | 365 | case rv_inst_ecall: 366 | cg_mov_r64disp_i32(cg, cg_rsi, rv_offset(rv, PC), pc + 4); 367 | cg_mov_r64_r64(cg, cg_rcx, cg_rsi); 368 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, io.on_ecall)); 369 | break; 370 | case rv_inst_ebreak: 371 | cg_mov_r64disp_i32(cg, cg_rsi, rv_offset(rv, PC), pc + 4); 372 | cg_mov_r64_r64(cg, cg_rcx, cg_rsi); 373 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, io.on_ebreak)); 374 | break; 375 | 376 | // ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 377 | // RV32M 378 | case rv_inst_mul: 379 | get_reg(cg, cg_eax, i->rs1); 380 | cg_imul_r64disp(cg, cg_rsi, rv_offset(rv, X[i->rs2])); 381 | set_reg(cg, i->rd, cg_eax); 382 | break; 383 | case rv_inst_mulh: 384 | get_reg(cg, cg_eax, i->rs1); 385 | cg_imul_r64disp(cg, cg_rsi, rv_offset(rv, X[i->rs2])); 386 | set_reg(cg, i->rd, cg_edx); 387 | break; 388 | case rv_inst_mulhu: 389 | get_reg(cg, cg_eax, i->rs1); 390 | cg_mul_r64disp(cg, cg_rsi, rv_offset(rv, X[i->rs2])); 391 | set_reg(cg, i->rd, cg_edx); 392 | break; 393 | case rv_inst_mulhsu: 394 | case rv_inst_div: 395 | case rv_inst_divu: 396 | case rv_inst_rem: 397 | case rv_inst_remu: 398 | // offload to a specific instruction handler 399 | cg_mov_r64_r64(cg, cg_rcx, cg_rsi); // arg1 - rv 400 | cg_mov_r32_i32(cg, cg_edx, inst); // arg2 - inst 401 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, jit.handle_op_op)); 402 | break; 403 | 404 | // ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 405 | // RV32F 406 | case rv_inst_flw: 407 | cg_mov_r64_r64(cg, cg_rcx, cg_rsi); // rv 408 | get_reg(cg, cg_edx, i->rs1); 409 | if (i->imm) { 410 | cg_add_r32_i32(cg, cg_edx, i->imm); // addr 411 | } 412 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, io.mem_read_w)); // read 413 | cg_mov_r64disp_r32(cg, cg_rsi, rv_offset(rv, F[i->rd]), cg_eax); 414 | break; 415 | case rv_inst_fsw: 416 | cg_mov_r64_r64(cg, cg_rcx, cg_rsi); // rv 417 | if (i->rs1 == rv_reg_zero) { 418 | cg_mov_r32_i32(cg, cg_edx, i->imm); // addr 419 | } 420 | else { 421 | get_reg(cg, cg_edx, i->rs1); 422 | if (i->imm) { 423 | cg_add_r32_i32(cg, cg_edx, i->imm); // addr 424 | } 425 | } 426 | cg_movsx_r64_r64disp(cg, cg_r8, cg_rsi, rv_offset(rv, F[i->rs2])); // value 427 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, io.mem_write_w)); // write 428 | break; 429 | case rv_inst_fmadds: 430 | cg_movss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs1])); 431 | cg_mulss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs2])); 432 | cg_addss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs3])); 433 | cg_movss_r64disp_xmm(cg, cg_rsi, rv_offset(rv, F[i->rd]), cg_xmm0); 434 | break; 435 | case rv_inst_fmsubs: 436 | cg_movss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs1])); 437 | cg_mulss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs2])); 438 | cg_subss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs3])); 439 | cg_movss_r64disp_xmm(cg, cg_rsi, rv_offset(rv, F[i->rd]), cg_xmm0); 440 | break; 441 | case rv_inst_fnmsubs: 442 | // multiply 443 | cg_movss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs1])); 444 | cg_mulss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs2])); 445 | // negate 446 | cg_mov_r32_xmm(cg, cg_eax, cg_xmm0); 447 | cg_xor_r32_i32(cg, cg_eax, 0x80000000); 448 | cg_mov_xmm_r32(cg, cg_xmm0, cg_eax); 449 | // add 450 | cg_addss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs3])); 451 | // store 452 | cg_movss_r64disp_xmm(cg, cg_rsi, rv_offset(rv, F[i->rd]), cg_xmm0); 453 | break; 454 | case rv_inst_fnmadds: 455 | // multiply 456 | cg_movss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs1])); 457 | cg_mulss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs2])); 458 | // negate 459 | cg_mov_r32_xmm(cg, cg_eax, cg_xmm0); 460 | cg_xor_r32_i32(cg, cg_eax, 0x80000000); 461 | cg_mov_xmm_r32(cg, cg_xmm0, cg_eax); 462 | // subtract 463 | cg_subss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs3])); 464 | // store 465 | cg_movss_r64disp_xmm(cg, cg_rsi, rv_offset(rv, F[i->rd]), cg_xmm0); 466 | break; 467 | case rv_inst_fadds: 468 | cg_movss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs1])); 469 | cg_addss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs2])); 470 | cg_movss_r64disp_xmm(cg, cg_rsi, rv_offset(rv, F[i->rd]), cg_xmm0); 471 | break; 472 | case rv_inst_fsubs: 473 | cg_movss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs1])); 474 | cg_subss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs2])); 475 | cg_movss_r64disp_xmm(cg, cg_rsi, rv_offset(rv, F[i->rd]), cg_xmm0); 476 | break; 477 | case rv_inst_fmuls: 478 | cg_movss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs1])); 479 | cg_mulss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs2])); 480 | cg_movss_r64disp_xmm(cg, cg_rsi, rv_offset(rv, F[i->rd]), cg_xmm0); 481 | break; 482 | case rv_inst_fdivs: 483 | cg_movss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs1])); 484 | cg_divss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs2])); 485 | cg_movss_r64disp_xmm(cg, cg_rsi, rv_offset(rv, F[i->rd]), cg_xmm0); 486 | break; 487 | case rv_inst_fsqrts: 488 | cg_sqrtss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, F[i->rs1])); 489 | cg_movss_r64disp_xmm(cg, cg_rsi, rv_offset(rv, F[i->rd]), cg_xmm0); 490 | break; 491 | case rv_inst_fsgnjs: 492 | case rv_inst_fsgnjns: 493 | case rv_inst_fsgnjxs: 494 | case rv_inst_fmins: 495 | case rv_inst_fmaxs: 496 | case rv_inst_feqs: 497 | case rv_inst_flts: 498 | case rv_inst_fles: 499 | case rv_inst_fclasss: 500 | // defer to a handler function for these ones 501 | cg_mov_r64_r64(cg, cg_rcx, cg_rsi); // arg1 - rv 502 | cg_mov_r32_i32(cg, cg_edx, inst); // arg2 - inst 503 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, jit.handle_op_fp)); 504 | break; 505 | case rv_inst_fmvxw: 506 | cg_mov_r32_r64disp(cg, cg_eax, cg_rsi, rv_offset(rv, F[i->rs1])); 507 | cg_mov_r64disp_r32(cg, cg_rsi, rv_offset(rv, X[i->rd]), cg_eax); 508 | break; 509 | case rv_inst_fcvtws: 510 | case rv_inst_fcvtwus: 511 | cg_cvttss2si_r32_r64disp(cg, cg_eax, cg_rsi, rv_offset(rv, F[i->rs1])); 512 | cg_mov_r64disp_r32(cg, cg_rsi, rv_offset(rv, X[i->rd]), cg_eax); 513 | break; 514 | case rv_inst_fcvtsw: 515 | case rv_inst_fcvtswu: 516 | cg_cvtsi2ss_xmm_r64disp(cg, cg_xmm0, cg_rsi, rv_offset(rv, X[i->rs1])); 517 | cg_movss_r64disp_xmm(cg, cg_rsi, rv_offset(rv, F[i->rd]), cg_xmm0); 518 | break; 519 | case rv_inst_fmvwx: 520 | get_reg(cg, cg_eax, i->rs1); 521 | cg_mov_r64disp_r32(cg, cg_rsi, rv_offset(rv, F[i->rd]), cg_eax); 522 | break; 523 | 524 | // ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 525 | // RV32 Zicsr 526 | case rv_inst_csrrw: 527 | case rv_inst_csrrs: 528 | case rv_inst_csrrc: 529 | case rv_inst_csrrwi: 530 | case rv_inst_csrrsi: 531 | case rv_inst_csrrci: 532 | // offload to a specific instruction handler 533 | cg_mov_r64_r64(cg, cg_rcx, cg_rsi); // arg1 - rv 534 | cg_mov_r32_i32(cg, cg_edx, inst); // arg2 - inst 535 | cg_call_r64disp(cg, cg_rsi, rv_offset(rv, jit.handle_op_system)); 536 | break; 537 | 538 | // ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 539 | // RV32 Zifencei 540 | case rv_inst_fencei: 541 | break; 542 | 543 | // ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 544 | // RV32A 545 | case rv_inst_lrw: 546 | case rv_inst_scw: 547 | case rv_inst_amoswapw: 548 | case rv_inst_amoaddw: 549 | case rv_inst_amoxorw: 550 | case rv_inst_amoandw: 551 | case rv_inst_amoorw: 552 | case rv_inst_amominw: 553 | case rv_inst_amomaxw: 554 | case rv_inst_amominuw: 555 | case rv_inst_amomaxuw: 556 | break; 557 | 558 | default: 559 | assert(!"unreachable"); 560 | return false; 561 | } 562 | 563 | // success 564 | return true; 565 | } 566 | 567 | void codegen_prologue(struct cg_state_t *cg) { 568 | // new stack frame 569 | cg_push_r64(cg, cg_rbp); 570 | cg_mov_r64_r64(cg, cg_rbp, cg_rsp); 571 | cg_sub_r64_i32(cg, cg_rsp, 64); 572 | // save rsi 573 | cg_mov_r64disp_r64(cg, cg_rsp, 32, cg_rsi); 574 | // move rv struct pointer into rsi 575 | cg_mov_r64_r64(cg, cg_rsi, cg_rcx); 576 | } 577 | 578 | void codegen_epilogue(struct cg_state_t *cg) { 579 | // restore rsi 580 | cg_mov_r64_r64disp(cg, cg_rsi, cg_rsp, 32); 581 | // leave stack frame 582 | cg_mov_r64_r64(cg, cg_rsp, cg_rbp); 583 | cg_pop_r64(cg, cg_rbp); 584 | // return 585 | cg_ret(cg); 586 | } 587 | -------------------------------------------------------------------------------- /riscv_core/decode.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "riscv.h" 6 | #include "riscv_private.h" 7 | #include "decode.h" 8 | 9 | 10 | static bool op_load( uint32_t inst, struct rv_inst_t *ir) { 11 | 12 | // itype format 13 | const int32_t imm = dec_itype_imm(inst); 14 | const uint32_t rs1 = dec_rs1(inst); 15 | const uint32_t funct3 = dec_funct3(inst); 16 | const uint32_t rd = dec_rd(inst); 17 | 18 | ir->rd = rd; 19 | ir->imm = imm; 20 | ir->rs1 = rs1; 21 | 22 | switch (funct3) { 23 | case 0: // LB 24 | ir->opcode = rv_inst_lb; 25 | break; 26 | case 1: // LH 27 | ir->opcode = rv_inst_lh; 28 | break; 29 | case 2: // LW 30 | ir->opcode = rv_inst_lw; 31 | break; 32 | case 4: // LBU 33 | ir->opcode = rv_inst_lbu; 34 | break; 35 | case 5: // LHU 36 | ir->opcode = rv_inst_lhu; 37 | break; 38 | default: 39 | return false; 40 | } 41 | 42 | return true; 43 | } 44 | 45 | static bool op_op_imm( uint32_t inst, struct rv_inst_t *ir) { 46 | 47 | // i-type decode 48 | const int32_t imm = dec_itype_imm(inst); 49 | const uint32_t rd = dec_rd(inst); 50 | const uint32_t rs1 = dec_rs1(inst); 51 | const uint32_t funct3 = dec_funct3(inst); 52 | 53 | ir->rd = rd; 54 | ir->rs1 = rs1; 55 | ir->imm = imm; 56 | 57 | // dispatch operation type 58 | switch (funct3) { 59 | case 0: // ADDI 60 | ir->opcode = rv_inst_addi; 61 | break; 62 | case 1: // SLLI 63 | ir->opcode = rv_inst_slli; 64 | break; 65 | case 2: // SLTI 66 | ir->opcode = rv_inst_slti; 67 | break; 68 | case 3: // SLTIU 69 | ir->opcode = rv_inst_sltiu; 70 | break; 71 | case 4: // XORI 72 | ir->opcode = rv_inst_xori; 73 | break; 74 | case 5: 75 | if (imm & ~0x1f) { 76 | // SRAI 77 | ir->opcode = rv_inst_srai; 78 | } 79 | else { 80 | // SRLI 81 | ir->opcode = rv_inst_srli; 82 | } 83 | break; 84 | case 6: // ORI 85 | ir->opcode = rv_inst_ori; 86 | break; 87 | case 7: // ANDI 88 | ir->opcode = rv_inst_andi; 89 | break; 90 | default: 91 | return false; 92 | } 93 | 94 | return true; 95 | } 96 | 97 | // add upper immediate to pc 98 | static bool op_auipc( uint32_t inst, struct rv_inst_t *ir) { 99 | 100 | // u-type decode 101 | const uint32_t rd = dec_rd(inst); 102 | const uint32_t imm = dec_utype_imm(inst); 103 | 104 | ir->rd = rd; 105 | ir->imm = imm; 106 | 107 | ir->opcode = rv_inst_auipc; 108 | 109 | return true; 110 | } 111 | 112 | static bool op_store( uint32_t inst, struct rv_inst_t *ir) { 113 | 114 | // s-type format 115 | const int32_t imm = dec_stype_imm(inst); 116 | const uint32_t rs1 = dec_rs1(inst); 117 | const uint32_t rs2 = dec_rs2(inst); 118 | const uint32_t funct3 = dec_funct3(inst); 119 | 120 | ir->rd = rv_reg_zero; 121 | ir->imm = imm; 122 | ir->rs1 = rs1; 123 | ir->rs2 = rs2; 124 | 125 | switch (funct3) { 126 | case 0: // SB 127 | ir->opcode = rv_inst_sb; 128 | break; 129 | case 1: // SH 130 | ir->opcode = rv_inst_sh; 131 | break; 132 | case 2: // SW 133 | ir->opcode = rv_inst_sw; 134 | break; 135 | default: 136 | return false; 137 | } 138 | 139 | return true; 140 | } 141 | 142 | static bool op_op( uint32_t inst, struct rv_inst_t *ir) { 143 | 144 | // r-type decode 145 | const uint32_t rd = dec_rd(inst); 146 | const uint32_t funct3 = dec_funct3(inst); 147 | const uint32_t rs1 = dec_rs1(inst); 148 | const uint32_t rs2 = dec_rs2(inst); 149 | const uint32_t funct7 = dec_funct7(inst); 150 | 151 | ir->rd = rd; 152 | ir->rs1 = rs1; 153 | ir->rs2 = rs2; 154 | 155 | switch (funct7) { 156 | case 0b0000000: 157 | switch (funct3) { 158 | case 0b000: // ADD 159 | ir->opcode = rv_inst_add; 160 | break; 161 | case 0b001: // SLL 162 | ir->opcode = rv_inst_sll; 163 | break; 164 | case 0b010: // SLT 165 | ir->opcode = rv_inst_slt; 166 | break; 167 | case 0b011: // SLTU 168 | ir->opcode = rv_inst_sltu; 169 | break; 170 | case 0b100: // XOR 171 | ir->opcode = rv_inst_xor; 172 | break; 173 | case 0b101: // SRL 174 | ir->opcode = rv_inst_srl; 175 | break; 176 | case 0b110: // OR 177 | ir->opcode = rv_inst_or; 178 | break; 179 | case 0b111: // AND 180 | ir->opcode = rv_inst_and; 181 | break; 182 | default: 183 | return false; 184 | } 185 | break; 186 | case 0b0100000: 187 | switch (funct3) { 188 | case 0b000: // SUB 189 | ir->opcode = rv_inst_sub; 190 | break; 191 | case 0b101: // SRA 192 | ir->opcode = rv_inst_sra; 193 | break; 194 | default: 195 | return false; 196 | } 197 | break; 198 | #if RISCV_VM_SUPPORT_RV32M 199 | case 0b0000001: 200 | // RV32M instructions 201 | switch (funct3) { 202 | case 0b000: // MUL 203 | ir->opcode = rv_inst_mul; 204 | break; 205 | case 0b001: // MULH 206 | ir->opcode = rv_inst_mulh; 207 | break; 208 | case 0b011: // MULHU 209 | ir->opcode = rv_inst_mulhu; 210 | break; 211 | case 0b010: // MULHSU 212 | ir->opcode = rv_inst_mulhsu; 213 | break; 214 | case 0b100: // DIV 215 | ir->opcode = rv_inst_div; 216 | break; 217 | case 0b101: // DIVU 218 | ir->opcode = rv_inst_divu; 219 | break; 220 | case 0b110: // REM 221 | ir->opcode = rv_inst_rem; 222 | break; 223 | case 0b111: // REMU 224 | ir->opcode = rv_inst_remu; 225 | break; 226 | default: 227 | return false; 228 | } 229 | break; 230 | #endif // RISCV_VM_SUPPORT_RV32M 231 | default: 232 | return false; 233 | } 234 | 235 | return true; 236 | } 237 | 238 | static bool op_lui(uint32_t inst, struct rv_inst_t *ir) { 239 | 240 | // u-type decode 241 | const uint32_t rd = dec_rd(inst); 242 | const uint32_t val = dec_utype_imm(inst); 243 | 244 | ir->rd = rd; 245 | ir->imm = val; 246 | 247 | ir->opcode = rv_inst_lui; 248 | 249 | return true; 250 | } 251 | 252 | static bool op_branch(uint32_t inst, struct rv_inst_t *ir) { 253 | 254 | // b-type decode 255 | const uint32_t func3 = dec_funct3(inst); 256 | const int32_t imm = dec_btype_imm(inst); 257 | const uint32_t rs1 = dec_rs1(inst); 258 | const uint32_t rs2 = dec_rs2(inst); 259 | 260 | ir->rd = rv_reg_zero; 261 | ir->imm = imm; 262 | ir->rs1 = rs1; 263 | ir->rs2 = rs2; 264 | 265 | switch (func3) { 266 | case 0: // BEQ 267 | ir->opcode = rv_inst_beq; 268 | break; 269 | case 1: // BNE 270 | ir->opcode = rv_inst_bne; 271 | break; 272 | case 4: // BLT 273 | ir->opcode = rv_inst_blt; 274 | break; 275 | case 5: // BGE 276 | ir->opcode = rv_inst_bge; 277 | break; 278 | case 6: // BLTU 279 | ir->opcode = rv_inst_bltu; 280 | break; 281 | case 7: // BGEU 282 | ir->opcode = rv_inst_bgeu; 283 | break; 284 | default: 285 | return false; 286 | } 287 | 288 | return true; 289 | } 290 | 291 | static bool op_jalr(uint32_t inst, struct rv_inst_t *ir) { 292 | 293 | // i-type decode 294 | const uint32_t rd = dec_rd(inst); 295 | const uint32_t rs1 = dec_rs1(inst); 296 | const int32_t imm = dec_itype_imm(inst); 297 | 298 | ir->rd = rd; 299 | ir->rs1 = rs1; 300 | ir->imm = imm; 301 | 302 | ir->opcode = rv_inst_jalr; 303 | 304 | return true; 305 | } 306 | 307 | static bool op_jal(uint32_t inst, struct rv_inst_t *ir) { 308 | 309 | // j-type decode 310 | const uint32_t rd = dec_rd(inst); 311 | const int32_t rel = dec_jtype_imm(inst); 312 | 313 | ir->rd = rd; 314 | ir->imm = rel; 315 | 316 | ir->opcode = rv_inst_jal; 317 | 318 | return true; 319 | } 320 | 321 | static bool op_system(uint32_t inst, struct rv_inst_t *ir) { 322 | 323 | // i-type decode 324 | const int32_t imm = dec_itype_imm(inst); 325 | const int32_t csr = dec_csr(inst); 326 | const uint32_t funct3 = dec_funct3(inst); 327 | const uint32_t rs1 = dec_rs1(inst); 328 | const uint32_t rd = dec_rd(inst); 329 | 330 | ir->rd = rd; 331 | ir->rs1 = rs1; 332 | 333 | // dispatch by func3 field 334 | switch (funct3) { 335 | case 0: 336 | // dispatch from imm field 337 | switch (imm) { 338 | case 0: // ECALL 339 | ir->opcode = rv_inst_ecall; 340 | break; 341 | case 1: // EBREAK 342 | ir->opcode = rv_inst_ebreak; 343 | break; 344 | default: 345 | return false; 346 | } 347 | break; 348 | #if RISCV_VM_SUPPORT_Zicsr 349 | case 1: // CSRRW (Atomic Read/Write CSR) 350 | ir->opcode = rv_inst_csrrw; 351 | break; 352 | case 2: // CSRRS (Atomic Read and Set Bits in CSR) 353 | ir->opcode = rv_inst_csrrs; 354 | break; 355 | case 3: // CSRRC (Atomic Read and Clear Bits in CSR) 356 | ir->opcode = rv_inst_csrrc; 357 | break; 358 | case 5: // CSRRWI 359 | ir->opcode = rv_inst_csrrwi; 360 | break; 361 | case 6: // CSRRSI 362 | ir->opcode = rv_inst_csrrsi; 363 | break; 364 | case 7: // CSRRCI 365 | ir->opcode = rv_inst_csrrci; 366 | break; 367 | #endif // RISCV_VM_SUPPORT_Zicsr 368 | default: 369 | return false; 370 | } 371 | 372 | return true; 373 | } 374 | 375 | #if RISCV_VM_SUPPORT_RV32F 376 | static bool op_load_fp(uint32_t inst, struct rv_inst_t *ir) { 377 | 378 | const uint32_t rd = dec_rd(inst); 379 | const uint32_t rs1 = dec_rs1(inst); 380 | const int32_t imm = dec_itype_imm(inst); 381 | 382 | ir->rd = rd; 383 | ir->rs1 = rs1; 384 | ir->imm = imm; 385 | 386 | ir->opcode = rv_inst_flw; 387 | 388 | return true; 389 | } 390 | 391 | static bool op_store_fp(uint32_t inst, struct rv_inst_t *ir) { 392 | 393 | const uint32_t rs1 = dec_rs1(inst); 394 | const uint32_t rs2 = dec_rs2(inst); 395 | const int32_t imm = dec_stype_imm(inst); 396 | 397 | ir->rd = rv_reg_zero; 398 | ir->rs1 = rs1; 399 | ir->rs2 = rs2; 400 | ir->imm = imm; 401 | 402 | ir->opcode = rv_inst_fsw; 403 | 404 | return true; 405 | } 406 | 407 | static bool op_fp(uint32_t inst, struct rv_inst_t *ir) { 408 | 409 | const uint32_t rd = dec_rd(inst); 410 | const uint32_t rs1 = dec_rs1(inst); 411 | const uint32_t rs2 = dec_rs2(inst); 412 | const uint32_t rm = dec_funct3(inst); 413 | const uint32_t funct7 = dec_funct7(inst); 414 | 415 | ir->rd = rd; 416 | ir->rs1 = rs1; 417 | ir->rs2 = rs2; 418 | 419 | // dispatch based on func7 (low 2 bits are width) 420 | switch (funct7) { 421 | case 0b0000000: // FADD 422 | ir->opcode = rv_inst_fadds; 423 | break; 424 | case 0b0000100: // FSUB 425 | ir->opcode = rv_inst_fsubs; 426 | break; 427 | case 0b0001000: // FMUL 428 | ir->opcode = rv_inst_fmuls; 429 | break; 430 | case 0b0001100: // FDIV 431 | ir->opcode = rv_inst_fdivs; 432 | break; 433 | case 0b0101100: // FSQRT 434 | ir->opcode = rv_inst_fsqrts; 435 | break; 436 | case 0b1100000: 437 | switch (rs2) { 438 | case 0b00000: // FCVT.W.S 439 | ir->opcode = rv_inst_fcvtws; 440 | break; 441 | case 0b00001: // FCVT.WU.S 442 | ir->opcode = rv_inst_fcvtwus; 443 | break; 444 | default: 445 | return false; 446 | } 447 | break; 448 | case 0b1110000: 449 | switch (rm) { 450 | case 0b000: // FMV.X.W 451 | ir->opcode = rv_inst_fmvxw; 452 | break; 453 | case 0b001: // FCLASS.S 454 | ir->opcode = rv_inst_fclasss; 455 | break; 456 | default: 457 | return false; 458 | } 459 | break; 460 | case 0b1101000: 461 | switch (rs2) { 462 | case 0b00000: // FCVT.S.W 463 | ir->opcode = rv_inst_fcvtsw; 464 | break; 465 | case 0b00001: // FCVT.S.WU 466 | ir->opcode = rv_inst_fcvtswu; 467 | break; 468 | default: 469 | return false; 470 | } 471 | break; 472 | case 0b1111000: // FMV.W.X 473 | ir->opcode = rv_inst_fmvwx; 474 | break; 475 | case 0b0010000: // FSGNJ.S, FSGNJN.S, FSGNJX.S 476 | switch (rm) { 477 | case 0b000: // FSGNJ.S 478 | ir->opcode = rv_inst_fsgnjs; 479 | break; 480 | case 0b001: // FSGNJN.S 481 | ir->opcode = rv_inst_fsgnjns; 482 | break; 483 | case 0b010: // FSGNJX.S 484 | ir->opcode = rv_inst_fsgnjxs; 485 | break; 486 | default: 487 | return false; 488 | } 489 | break; 490 | case 0b0010100: // FMIN, FMAX 491 | switch (rm) { 492 | case 0b000: // FMIN 493 | ir->opcode = rv_inst_fmins; 494 | break; 495 | case 0b001: // FMAX 496 | ir->opcode = rv_inst_fmaxs; 497 | break; 498 | default: 499 | return false; 500 | } 501 | break; 502 | case 0b1010000: // FEQ.S, FLT.S, FLE.S 503 | switch (rm) { 504 | case 0b010: // FEQ.S 505 | ir->opcode = rv_inst_feqs; 506 | break; 507 | case 0b001: // FLT.S 508 | ir->opcode = rv_inst_flts; 509 | break; 510 | case 0b000: // FLE.S 511 | ir->opcode = rv_inst_fles; 512 | break; 513 | default: 514 | return false; 515 | } 516 | break; 517 | default: 518 | return false; 519 | } 520 | 521 | return true; 522 | } 523 | 524 | static bool op_madd(uint32_t inst, struct rv_inst_t *ir) { 525 | 526 | const uint32_t rd = dec_rd(inst); 527 | const uint32_t rm = dec_funct3(inst); // todo 528 | const uint32_t rs1 = dec_rs1(inst); 529 | const uint32_t rs2 = dec_rs2(inst); 530 | const uint32_t fmt = dec_r4type_fmt(inst); // unused 531 | const uint32_t rs3 = dec_r4type_rs3(inst); 532 | 533 | ir->rd = rd; 534 | ir->rs1 = rs1; 535 | ir->rs2 = rs2; 536 | ir->rs3 = rs3; 537 | 538 | ir->opcode = rv_inst_fmadds; 539 | 540 | return true; 541 | } 542 | 543 | static bool op_msub(uint32_t inst, struct rv_inst_t *ir) { 544 | 545 | const uint32_t rd = dec_rd(inst); 546 | const uint32_t rm = dec_funct3(inst); // todo 547 | const uint32_t rs1 = dec_rs1(inst); 548 | const uint32_t rs2 = dec_rs2(inst); 549 | const uint32_t fmt = dec_r4type_fmt(inst); // unused 550 | const uint32_t rs3 = dec_r4type_rs3(inst); 551 | 552 | ir->rd = rd; 553 | ir->rs1 = rs1; 554 | ir->rs2 = rs2; 555 | ir->rs3 = rs3; 556 | 557 | ir->opcode = rv_inst_fmsubs; 558 | 559 | return true; 560 | } 561 | 562 | static bool op_nmadd(uint32_t inst, struct rv_inst_t *ir) { 563 | 564 | const uint32_t rd = dec_rd(inst); 565 | const uint32_t rm = dec_funct3(inst); // todo 566 | const uint32_t rs1 = dec_rs1(inst); 567 | const uint32_t rs2 = dec_rs2(inst); 568 | const uint32_t fmt = dec_r4type_fmt(inst); // unused 569 | const uint32_t rs3 = dec_r4type_rs3(inst); 570 | 571 | ir->rd = rd; 572 | ir->rs1 = rs1; 573 | ir->rs2 = rs2; 574 | ir->rs3 = rs3; 575 | 576 | ir->opcode = rv_inst_fnmadds; 577 | 578 | return true; 579 | } 580 | 581 | static bool op_nmsub(uint32_t inst, struct rv_inst_t *ir) { 582 | 583 | const uint32_t rd = dec_rd(inst); 584 | const uint32_t rm = dec_funct3(inst); // todo 585 | const uint32_t rs1 = dec_rs1(inst); 586 | const uint32_t rs2 = dec_rs2(inst); 587 | const uint32_t fmt = dec_r4type_fmt(inst); // unused 588 | const uint32_t rs3 = dec_r4type_rs3(inst); 589 | 590 | ir->rd = rd; 591 | ir->rs1 = rs1; 592 | ir->rs2 = rs2; 593 | ir->rs3 = rs3; 594 | 595 | ir->opcode = rv_inst_fnmsubs; 596 | 597 | return true; 598 | } 599 | #else 600 | #define op_load_fp NULL 601 | #define op_store_fp NULL 602 | #define op_fp NULL 603 | #define op_madd NULL 604 | #define op_msub NULL 605 | #define op_nmadd NULL 606 | #define op_nmsub NULL 607 | #endif 608 | 609 | // opcode handler type 610 | typedef bool(*opcode_t)(uint32_t inst, struct rv_inst_t *ir); 611 | 612 | // opcode dispatch table 613 | static const opcode_t opcodes[] = { 614 | // 000 001 010 011 100 101 110 111 615 | op_load, op_load_fp, NULL, NULL, op_op_imm, op_auipc, NULL, NULL, // 00 616 | op_store, op_store_fp, NULL, NULL, op_op, op_lui, NULL, NULL, // 01 617 | op_madd, op_msub, op_nmsub, op_nmadd, op_fp, NULL, NULL, NULL, // 10 618 | op_branch, op_jalr, NULL, op_jal, op_system, NULL, NULL, NULL, // 11 619 | }; 620 | 621 | bool decode(uint32_t inst, struct rv_inst_t *out, uint32_t *pc) { 622 | 623 | // standard uncompressed 624 | if ((inst & 3) == 3) { 625 | const uint32_t index = (inst & INST_6_2) >> 2; 626 | // find translation function 627 | const opcode_t op = opcodes[index]; 628 | if (!op) { 629 | return false; 630 | } 631 | if (!op(inst, out)) { 632 | return false; 633 | } 634 | *pc += 4; 635 | } 636 | else { 637 | // compressed instruction 638 | // TODO 639 | assert(!"Unreachable"); 640 | } 641 | 642 | // success 643 | return true; 644 | } 645 | -------------------------------------------------------------------------------- /riscv_core/decode.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #pragma once 6 | 7 | enum { 8 | // RV32I 9 | rv_inst_lui, 10 | rv_inst_auipc, 11 | rv_inst_jal, 12 | rv_inst_jalr, 13 | rv_inst_beq, 14 | rv_inst_bne, 15 | rv_inst_blt, 16 | rv_inst_bge, 17 | rv_inst_bltu, 18 | rv_inst_bgeu, 19 | rv_inst_lb, 20 | rv_inst_lh, 21 | rv_inst_lw, 22 | rv_inst_lbu, 23 | rv_inst_lhu, 24 | rv_inst_sb, 25 | rv_inst_sh, 26 | rv_inst_sw, 27 | rv_inst_addi, 28 | rv_inst_slti, 29 | rv_inst_sltiu, 30 | rv_inst_xori, 31 | rv_inst_ori, 32 | rv_inst_andi, 33 | rv_inst_slli, 34 | rv_inst_srli, 35 | rv_inst_srai, 36 | rv_inst_add, 37 | rv_inst_sub, 38 | rv_inst_sll, 39 | rv_inst_slt, 40 | rv_inst_sltu, 41 | rv_inst_xor, 42 | rv_inst_srl, 43 | rv_inst_sra, 44 | rv_inst_or, 45 | rv_inst_and, 46 | rv_inst_fence, 47 | rv_inst_ecall, 48 | rv_inst_ebreak, 49 | 50 | // RV32M 51 | rv_inst_mul, 52 | rv_inst_mulh, 53 | rv_inst_mulhsu, 54 | rv_inst_mulhu, 55 | rv_inst_div, 56 | rv_inst_divu, 57 | rv_inst_rem, 58 | rv_inst_remu, 59 | 60 | // RV32F 61 | rv_inst_flw, 62 | rv_inst_fsw, 63 | rv_inst_fmadds, 64 | rv_inst_fmsubs, 65 | rv_inst_fnmsubs, 66 | rv_inst_fnmadds, 67 | rv_inst_fadds, 68 | rv_inst_fsubs, 69 | rv_inst_fmuls, 70 | rv_inst_fdivs, 71 | rv_inst_fsqrts, 72 | rv_inst_fsgnjs, 73 | rv_inst_fsgnjns, 74 | rv_inst_fsgnjxs, 75 | rv_inst_fmins, 76 | rv_inst_fmaxs, 77 | rv_inst_fcvtws, 78 | rv_inst_fcvtwus, 79 | rv_inst_fmvxw, 80 | rv_inst_feqs, 81 | rv_inst_flts, 82 | rv_inst_fles, 83 | rv_inst_fclasss, 84 | rv_inst_fcvtsw, 85 | rv_inst_fcvtswu, 86 | rv_inst_fmvwx, 87 | 88 | // RV32 Zicsr 89 | rv_inst_csrrw, 90 | rv_inst_csrrs, 91 | rv_inst_csrrc, 92 | rv_inst_csrrwi, 93 | rv_inst_csrrsi, 94 | rv_inst_csrrci, 95 | 96 | // RV32 Zifencei 97 | rv_inst_fencei, 98 | 99 | // RV32A 100 | rv_inst_lrw, 101 | rv_inst_scw, 102 | rv_inst_amoswapw, 103 | rv_inst_amoaddw, 104 | rv_inst_amoxorw, 105 | rv_inst_amoandw, 106 | rv_inst_amoorw, 107 | rv_inst_amominw, 108 | rv_inst_amomaxw, 109 | rv_inst_amominuw, 110 | rv_inst_amomaxuw, 111 | }; 112 | 113 | struct rv_inst_t { 114 | uint8_t opcode; 115 | uint8_t rd, rs1, rs2; 116 | union { 117 | int32_t imm; 118 | uint8_t rs3; 119 | }; 120 | }; 121 | 122 | static bool inst_is_branch(const struct rv_inst_t *ir) { 123 | switch (ir->opcode) { 124 | case rv_inst_jal: 125 | case rv_inst_jalr: 126 | case rv_inst_beq: 127 | case rv_inst_bne: 128 | case rv_inst_blt: 129 | case rv_inst_bge: 130 | case rv_inst_bltu: 131 | case rv_inst_bgeu: 132 | case rv_inst_ebreak: 133 | case rv_inst_ecall: 134 | return true; 135 | default: 136 | return false; 137 | } 138 | } 139 | 140 | static bool inst_is_rv32f(const struct rv_inst_t *ir) { 141 | switch (ir->opcode) { 142 | case rv_inst_flw: 143 | case rv_inst_fsw: 144 | case rv_inst_fmadds: 145 | case rv_inst_fmsubs: 146 | case rv_inst_fnmsubs: 147 | case rv_inst_fnmadds: 148 | case rv_inst_fadds: 149 | case rv_inst_fsubs: 150 | case rv_inst_fmuls: 151 | case rv_inst_fdivs: 152 | case rv_inst_fsqrts: 153 | case rv_inst_fsgnjs: 154 | case rv_inst_fsgnjns: 155 | case rv_inst_fsgnjxs: 156 | case rv_inst_fmins: 157 | case rv_inst_fmaxs: 158 | case rv_inst_fcvtws: 159 | case rv_inst_fcvtwus: 160 | case rv_inst_fmvxw: 161 | case rv_inst_feqs: 162 | case rv_inst_flts: 163 | case rv_inst_fles: 164 | case rv_inst_fclasss: 165 | case rv_inst_fcvtsw: 166 | case rv_inst_fcvtswu: 167 | case rv_inst_fmvwx: 168 | return true; 169 | } 170 | return false; 171 | } 172 | 173 | static bool inst_bypass_zero_store(const struct rv_inst_t *inst) { 174 | switch (inst->opcode) { 175 | case rv_inst_jal: 176 | case rv_inst_jalr: 177 | case rv_inst_beq: 178 | case rv_inst_bne: 179 | case rv_inst_blt: 180 | case rv_inst_bge: 181 | case rv_inst_bltu: 182 | case rv_inst_bgeu: 183 | case rv_inst_sb: 184 | case rv_inst_sh: 185 | case rv_inst_sw: 186 | case rv_inst_ecall: 187 | case rv_inst_ebreak: 188 | case rv_inst_flw: 189 | case rv_inst_fsw: 190 | case rv_inst_fmadds: 191 | case rv_inst_fmsubs: 192 | case rv_inst_fnmsubs: 193 | case rv_inst_fnmadds: 194 | case rv_inst_fadds: 195 | case rv_inst_fsubs: 196 | case rv_inst_fmuls: 197 | case rv_inst_fdivs: 198 | case rv_inst_fsqrts: 199 | case rv_inst_fsgnjs: 200 | case rv_inst_fsgnjns: 201 | case rv_inst_fsgnjxs: 202 | case rv_inst_fmins: 203 | case rv_inst_fmaxs: 204 | case rv_inst_fcvtws: 205 | case rv_inst_fcvtwus: 206 | case rv_inst_fmvxw: 207 | case rv_inst_feqs: 208 | case rv_inst_flts: 209 | case rv_inst_fles: 210 | case rv_inst_fclasss: 211 | case rv_inst_fcvtsw: 212 | case rv_inst_fcvtswu: 213 | case rv_inst_fmvwx: 214 | return true; 215 | } 216 | return false; 217 | } 218 | 219 | bool decode(uint32_t inst, struct rv_inst_t *out, uint32_t *pc); 220 | 221 | bool codegen(const struct rv_inst_t *ir, struct cg_state_t *cg, uint32_t pc, uint32_t inst); 222 | void codegen_prologue(struct cg_state_t *cg); 223 | void codegen_epilogue(struct cg_state_t *cg); 224 | -------------------------------------------------------------------------------- /riscv_core/riscv.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #include "riscv_conf.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | // riscv register file enums 11 | enum { 12 | rv_reg_zero = 0, 13 | rv_reg_ra, 14 | rv_reg_sp, 15 | rv_reg_gp, 16 | rv_reg_tp, 17 | rv_reg_t0, 18 | rv_reg_t1, 19 | rv_reg_t2, 20 | rv_reg_s0, 21 | rv_reg_s1, 22 | rv_reg_a0, 23 | rv_reg_a1, 24 | rv_reg_a2, 25 | rv_reg_a3, 26 | rv_reg_a4, 27 | rv_reg_a5, 28 | rv_reg_a6, 29 | rv_reg_a7, 30 | rv_reg_s2, 31 | rv_reg_s3, 32 | rv_reg_s4, 33 | rv_reg_s5, 34 | rv_reg_s6, 35 | rv_reg_s7, 36 | rv_reg_s8, 37 | rv_reg_s9, 38 | rv_reg_s10, 39 | rv_reg_s11, 40 | rv_reg_t3, 41 | rv_reg_t4, 42 | rv_reg_t5, 43 | rv_reg_t6, 44 | }; 45 | 46 | struct riscv_t; 47 | typedef void *riscv_user_t; 48 | 49 | typedef uint32_t riscv_word_t; 50 | typedef uint16_t riscv_half_t; 51 | typedef uint8_t riscv_byte_t; 52 | typedef uint32_t riscv_exception_t; 53 | typedef float riscv_float_t; 54 | 55 | // memory read handlers 56 | typedef riscv_word_t (*riscv_mem_ifetch)(struct riscv_t *rv, riscv_word_t addr); 57 | typedef riscv_word_t (*riscv_mem_read_w)(struct riscv_t *rv, riscv_word_t addr); 58 | typedef riscv_half_t (*riscv_mem_read_s)(struct riscv_t *rv, riscv_word_t addr); 59 | typedef riscv_byte_t (*riscv_mem_read_b)(struct riscv_t *rv, riscv_word_t addr); 60 | 61 | // memory write handlers 62 | typedef void (*riscv_mem_write_w)(struct riscv_t *rv, riscv_word_t addr, riscv_word_t data); 63 | typedef void (*riscv_mem_write_s)(struct riscv_t *rv, riscv_word_t addr, riscv_half_t data); 64 | typedef void (*riscv_mem_write_b)(struct riscv_t *rv, riscv_word_t addr, riscv_byte_t data); 65 | 66 | // system instruction handlers 67 | typedef void (*riscv_on_ecall )(struct riscv_t *rv); 68 | typedef void (*riscv_on_ebreak)(struct riscv_t *rv); 69 | 70 | // riscv emulator io interface 71 | struct riscv_io_t { 72 | // memory read interface 73 | riscv_mem_ifetch mem_ifetch; 74 | riscv_mem_read_w mem_read_w; 75 | riscv_mem_read_s mem_read_s; 76 | riscv_mem_read_b mem_read_b; 77 | // memory write interface 78 | riscv_mem_write_w mem_write_w; 79 | riscv_mem_write_s mem_write_s; 80 | riscv_mem_write_b mem_write_b; 81 | // system commands 82 | riscv_on_ecall on_ecall; 83 | riscv_on_ebreak on_ebreak; 84 | }; 85 | 86 | // create a riscv emulator 87 | struct riscv_t *rv_create(const struct riscv_io_t *io, riscv_user_t user_data); 88 | 89 | // delete a riscv emulator 90 | void rv_delete(struct riscv_t *); 91 | 92 | // reset the riscv processor 93 | void rv_reset(struct riscv_t *, riscv_word_t pc); 94 | 95 | // step the riscv emulator 96 | void rv_step(struct riscv_t *, int32_t cycles); 97 | 98 | // get riscv user data bound to an emulator 99 | riscv_user_t rv_userdata(struct riscv_t *); 100 | 101 | // set the program counter of a riscv emulator 102 | bool rv_set_pc(struct riscv_t *rv, riscv_word_t pc); 103 | 104 | // get the program counter of a riscv emulator 105 | riscv_word_t rv_get_pc(struct riscv_t *rv); 106 | 107 | // set a register of the riscv emulator 108 | void rv_set_reg(struct riscv_t *, uint32_t reg, riscv_word_t in); 109 | 110 | // get a register of the riscv emulator 111 | riscv_word_t rv_get_reg(struct riscv_t *, uint32_t reg); 112 | 113 | // return the cycle counter 114 | uint64_t rv_get_csr_cycles(struct riscv_t *); 115 | 116 | // halt the core 117 | void rv_halt(struct riscv_t *); 118 | 119 | // return the halt state 120 | bool rv_has_halted(struct riscv_t *); 121 | 122 | #ifdef __cplusplus 123 | }; // ifdef __cplusplus 124 | #endif 125 | -------------------------------------------------------------------------------- /riscv_core/riscv_common.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "riscv.h" 8 | #include "riscv_private.h" 9 | 10 | 11 | riscv_user_t rv_userdata(struct riscv_t *rv) { 12 | assert(rv); 13 | return rv->userdata; 14 | } 15 | 16 | bool rv_set_pc(struct riscv_t *rv, riscv_word_t pc) { 17 | assert(rv); 18 | if (pc & 3) { 19 | return false; 20 | } 21 | rv->PC = pc; 22 | return true; 23 | } 24 | 25 | riscv_word_t rv_get_pc(struct riscv_t *rv) { 26 | assert(rv); 27 | return rv->PC; 28 | } 29 | 30 | void rv_set_reg(struct riscv_t *rv, uint32_t reg, riscv_word_t in) { 31 | assert(rv); 32 | if (reg < RV_NUM_REGS && reg != rv_reg_zero) { 33 | rv->X[reg] = in; 34 | } 35 | } 36 | 37 | riscv_word_t rv_get_reg(struct riscv_t *rv, uint32_t reg) { 38 | assert(rv); 39 | if (reg < RV_NUM_REGS) { 40 | return rv->X[reg]; 41 | } 42 | return ~0u; 43 | } 44 | 45 | uint64_t rv_get_csr_cycles(struct riscv_t *rv) { 46 | return rv->csr_cycle; 47 | } 48 | 49 | void rv_except_inst_misaligned(struct riscv_t *rv, uint32_t old_pc) { 50 | const uint32_t base = rv->csr_mtvec & ~0x3; 51 | const uint32_t mode = rv->csr_mtvec & 0x3; 52 | 53 | const uint32_t code = 0; // instruction address misaligned 54 | 55 | rv->csr_mepc = old_pc; 56 | rv->csr_mtval = rv->PC; 57 | 58 | switch (mode) { 59 | case 0: // DIRECT 60 | rv->PC = base; 61 | break; 62 | case 1: // VECTORED 63 | rv->PC = base + 4 * code; 64 | break; 65 | } 66 | 67 | rv->csr_mcause = code; 68 | } 69 | 70 | void rv_except_load_misaligned(struct riscv_t *rv, uint32_t addr) { 71 | const uint32_t base = rv->csr_mtvec & ~0x3; 72 | const uint32_t mode = rv->csr_mtvec & 0x3; 73 | 74 | const uint32_t code = 4; // load address misaligned 75 | 76 | rv->csr_mepc = rv->PC; 77 | rv->csr_mtval = addr; 78 | 79 | switch (mode) { 80 | case 0: // DIRECT 81 | rv->PC = base; 82 | break; 83 | case 1: // VECTORED 84 | rv->PC = base + 4 * code; 85 | break; 86 | } 87 | 88 | rv->csr_mcause = code; 89 | } 90 | 91 | void rv_except_store_misaligned(struct riscv_t *rv, uint32_t addr) { 92 | const uint32_t base = rv->csr_mtvec & ~0x3; 93 | const uint32_t mode = rv->csr_mtvec & 0x3; 94 | 95 | const uint32_t code = 6; // store address misaligned 96 | 97 | rv->csr_mepc = rv->PC; 98 | rv->csr_mtval = addr; 99 | 100 | switch (mode) { 101 | case 0: // DIRECT 102 | rv->PC = base; 103 | break; 104 | case 1: // VECTORED 105 | rv->PC = base + 4 * code; 106 | break; 107 | } 108 | 109 | rv->csr_mcause = code; 110 | } 111 | 112 | void rv_except_illegal_inst(struct riscv_t *rv) { 113 | assert(!"illegal instruction"); 114 | } 115 | 116 | // get a pointer to a CSR 117 | static uint32_t *csr_get_ptr(struct riscv_t *rv, uint32_t csr) { 118 | switch (csr) { 119 | case CSR_CYCLE: 120 | return (uint32_t*)(&rv->csr_cycle) + 0; 121 | case CSR_CYCLEH: 122 | return (uint32_t*)(&rv->csr_cycle) + 1; 123 | case CSR_MSTATUS: 124 | return (uint32_t*)(&rv->csr_mstatus); 125 | case CSR_MTVEC: 126 | return (uint32_t*)(&rv->csr_mtvec); 127 | case CSR_MISA: 128 | return (uint32_t*)(&rv->csr_misa); 129 | case CSR_MSCRATCH: 130 | return (uint32_t*)(&rv->csr_mscratch); 131 | case CSR_MEPC: 132 | return (uint32_t*)(&rv->csr_mepc); 133 | case CSR_MCAUSE: 134 | return (uint32_t*)(&rv->csr_mcause); 135 | case CSR_MTVAL: 136 | return (uint32_t*)(&rv->csr_mtval); 137 | case CSR_MIP: 138 | return (uint32_t*)(&rv->csr_mip); 139 | #if RISCV_VM_SUPPORT_RV32F 140 | case CSR_FCSR: 141 | return (uint32_t*)(&rv->csr_fcsr); 142 | #endif 143 | default: 144 | return NULL; 145 | } 146 | } 147 | 148 | static bool csr_is_writable(uint32_t csr) { 149 | return csr < 0xc00; 150 | } 151 | 152 | // perform csrrw 153 | uint32_t csr_csrrw(struct riscv_t *rv, uint32_t csr, uint32_t val) { 154 | uint32_t *c = csr_get_ptr(rv, csr); 155 | if (!c) { 156 | return 0; 157 | } 158 | const uint32_t out = *c; 159 | if (csr_is_writable(csr)) { 160 | *c = val; 161 | } 162 | return out; 163 | } 164 | 165 | // perform csrrs (atomic read and set) 166 | uint32_t csr_csrrs(struct riscv_t *rv, uint32_t csr, uint32_t val) { 167 | uint32_t *c = csr_get_ptr(rv, csr); 168 | if (!c) { 169 | return 0; 170 | } 171 | const uint32_t out = *c; 172 | if (csr_is_writable(csr)) { 173 | *c |= val; 174 | } 175 | return out; 176 | } 177 | 178 | // perform csrrc (atomic read and clear) 179 | uint32_t csr_csrrc(struct riscv_t *rv, uint32_t csr, uint32_t val) { 180 | uint32_t *c = csr_get_ptr(rv, csr); 181 | if (!c) { 182 | return 0; 183 | } 184 | const uint32_t out = *c; 185 | if (csr_is_writable(csr)) { 186 | *c &= ~val; 187 | } 188 | return out; 189 | } 190 | 191 | struct riscv_t *rv_create(const struct riscv_io_t *io, riscv_user_t userdata) { 192 | assert(io); 193 | struct riscv_t *rv = (struct riscv_t *)malloc(sizeof(struct riscv_t)); 194 | memset(rv, 0, sizeof(struct riscv_t)); 195 | // copy over the IO interface 196 | memcpy(&rv->io, io, sizeof(struct riscv_io_t)); 197 | // copy over the userdata 198 | rv->userdata = userdata; 199 | // reset 200 | rv_reset(rv, 0u); 201 | // initalize jit engine 202 | rv_jit_init(rv); 203 | // return the rv structure 204 | return rv; 205 | } 206 | 207 | void rv_halt(struct riscv_t *rv) { 208 | rv->halt = true; 209 | } 210 | 211 | bool rv_has_halted(struct riscv_t *rv) { 212 | return rv->halt; 213 | } 214 | 215 | void rv_delete(struct riscv_t *rv) { 216 | assert(rv); 217 | // free any jit state 218 | rv_jit_free(rv); 219 | // free the rv structure 220 | free(rv); 221 | return; 222 | } 223 | 224 | void rv_reset(struct riscv_t *rv, riscv_word_t pc) { 225 | assert(rv); 226 | memset(rv->X, 0, sizeof(uint32_t) * RV_NUM_REGS); 227 | // set the reset address 228 | rv->PC = pc; 229 | // set the default stack pointer 230 | rv->X[rv_reg_sp] = DEFAULT_STACK_ADDR; 231 | // reset the csrs 232 | rv->csr_cycle = 0; 233 | rv->csr_mstatus = 0; 234 | // reset float registers 235 | #if RISCV_VM_SUPPORT_RV32F 236 | memset(rv->F, 0, sizeof(float) * RV_NUM_REGS); 237 | rv->csr_fcsr = 0; 238 | #endif 239 | rv->halt = false; 240 | } 241 | -------------------------------------------------------------------------------- /riscv_core/riscv_conf.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // enable RV32M 4 | #ifndef RISCV_VM_SUPPORT_RV32M 5 | #define RISCV_VM_SUPPORT_RV32M 1 6 | #endif 7 | // enable RV32 Zicsr 8 | #ifndef RISCV_VM_SUPPORT_Zicsr 9 | #define RISCV_VM_SUPPORT_Zicsr 1 10 | #endif 11 | // enable RV32 Zifencei 12 | #ifndef RISCV_VM_SUPPORT_Zifencei 13 | #define RISCV_VM_SUPPORT_Zifencei 1 14 | #endif 15 | // enable RV32A 16 | #ifndef RISCV_VM_SUPPORT_RV32A 17 | #define RISCV_VM_SUPPORT_RV32A 1 18 | #endif 19 | // enable RV32F 20 | #ifndef RISCV_VM_SUPPORT_RV32F 21 | #define RISCV_VM_SUPPORT_RV32F 1 22 | #endif 23 | // enable x64 JIT 24 | #ifndef RISCV_VM_X64_JIT 25 | #define RISCV_VM_X64_JIT 0 26 | #endif 27 | // enable machine mode support 28 | #ifndef RISCV_SUPPORT_MACHINE 29 | #define RISCV_SUPPORT_MACHINE 0 30 | #endif 31 | 32 | #define RISCV_DUMP_JIT_TRACE 0 33 | #define RISCV_JIT_PROFILE 0 34 | #define RISCV_DUMP_JIT_BLOCK 0 35 | 36 | // default top of stack address 37 | #define DEFAULT_STACK_ADDR (0xFFFFF000) 38 | -------------------------------------------------------------------------------- /riscv_core/riscv_jit.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #ifdef _WIN32 9 | #include 10 | #endif 11 | 12 | #if __linux__ 13 | //#include 14 | #include 15 | #endif 16 | 17 | #include "riscv.h" 18 | #include "riscv_private.h" 19 | 20 | #include "decode.h" 21 | 22 | 23 | // note: one idea would be to keep track of how often a block is hit and then 24 | // when inserting it into the hash map, we swap the hot block closer to 25 | // its ideal hash location. it will then have a faster lookup. 26 | 27 | // calling convention 28 | // 29 | // windows linux 30 | // arg1 RCX RDI 31 | // arg2 RDX RSI 32 | // arg3 R8 RDX 33 | // arg4 R9 RCX 34 | // arg5 R8 35 | // arg6 R9 36 | // 37 | // callee save 38 | // windows RBX, RBP, RDI, RSI, R12, R13, R14, R15 39 | // linux RBX, RBP, R12, R13, R14, R15 40 | // 41 | // caller save 42 | // windows RAX, RCX, RDX, R8, R9, R10, R11 43 | // linux RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 44 | // 45 | // erata: 46 | // windows - caller must allocate 32bytes of shadow space. 47 | // windows - stack must be 16 byte aligned. 48 | // linux - no shadow space needed. 49 | 50 | 51 | // total size of the code block 52 | static const uint32_t code_size = 1024 * 1024 * 8; 53 | 54 | // total number of block map entries 55 | static const uint32_t map_size = 1024 * 64; 56 | 57 | 58 | // flush the instruction cache for a region 59 | static void sys_flush_icache(const void *start, size_t size) { 60 | #ifdef _WIN32 61 | if (!FlushInstructionCache(GetCurrentProcess(), start, size)) { 62 | // failed 63 | } 64 | #endif 65 | #ifdef __linux__ 66 | // if (cacheflush(start, size, ICACHE) == -1) { 67 | // // failed 68 | // } 69 | #endif 70 | } 71 | 72 | // allocate system executable memory 73 | static void *sys_alloc_exec_mem(uint32_t size) { 74 | #ifdef _WIN32 75 | return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); 76 | #endif 77 | #ifdef __linux__ 78 | const int prot = PROT_READ | PROT_WRITE | PROT_EXEC; 79 | // mmap(addr, length, prot, flags, fd, offset) 80 | return mmap(NULL, size, prot, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); 81 | #endif 82 | } 83 | 84 | static void sys_free_exec_mem(void *ptr) { 85 | #ifdef _WIN32 86 | VirtualFree(ptr, 0, MEM_RELEASE); 87 | #endif 88 | #ifdef __linux__ 89 | assert(!"todo"); 90 | #endif 91 | } 92 | 93 | // this hash function is used when mapping addresses to indexes in the block map 94 | static uint32_t wang_hash(uint32_t a) { 95 | a = (a ^ 61) ^ (a >> 16); 96 | a = a + (a << 3); 97 | a = a ^ (a >> 4); 98 | a = a * 0x27d4eb2d; 99 | a = a ^ (a >> 15); 100 | return a; 101 | } 102 | 103 | // allocate a block map 104 | void block_map_alloc(struct block_map_t *map, uint32_t num_entries) { 105 | assert(0 == (num_entries & (num_entries - 1))); 106 | const uint32_t size = num_entries * sizeof(struct block_t *); 107 | void *ptr = malloc(size); 108 | memset(ptr, 0, size); 109 | map->map = (struct block_t**)ptr; 110 | map->num_entries = num_entries; 111 | } 112 | 113 | // free a block map 114 | static void block_map_free(struct block_map_t *map) { 115 | assert(map->map); 116 | free(map->map); 117 | map->map = NULL; 118 | } 119 | 120 | // clear all entries in the block map 121 | // note: will not clear the code buffer 122 | static void block_map_clear(struct block_map_t *map) { 123 | assert(map); 124 | memset(map->map, 0, map->num_entries * sizeof(struct block_t *)); 125 | } 126 | 127 | // insert a block into a blockmap 128 | static void block_map_insert(struct block_map_t *map, struct block_t *block) { 129 | assert(map->map && block); 130 | // insert into the block map 131 | const uint32_t mask = map->num_entries - 1; 132 | uint32_t index = wang_hash(block->pc_start); 133 | for (;; ++index) { 134 | if (map->map[index & mask] == NULL) { 135 | map->map[index & mask] = block; 136 | break; 137 | } 138 | } 139 | } 140 | 141 | // expand the block map by a factor of x2 142 | static void block_map_enlarge(struct riscv_jit_t *jit) { 143 | // allocate a new block map 144 | struct block_map_t new_map; 145 | block_map_alloc(&new_map, jit->block_map.num_entries * 2); 146 | // insert blocks into new map 147 | for (uint32_t i = 0; i < jit->block_map.num_entries; ++i) { 148 | struct block_t *block = jit->block_map.map[i]; 149 | if (block) { 150 | block_map_insert(&new_map, block); 151 | } 152 | } 153 | // release old map 154 | block_map_free(&jit->block_map); 155 | // use new map 156 | jit->block_map.map = new_map.map; 157 | jit->block_map.num_entries = new_map.num_entries; 158 | } 159 | 160 | // allocate a new code block 161 | static struct block_t *block_alloc(struct riscv_jit_t *jit) { 162 | // place a new block 163 | struct block_t *block = (struct block_t *)jit->code.head; 164 | struct cg_state_t *cg = &block->cg; 165 | // set the initial codegen write head 166 | cg_init(cg, block->code, jit->code.end); 167 | block->predict = NULL; 168 | #if RISCV_JIT_PROFILE 169 | block->hit_count = 0; 170 | #endif 171 | return block; 172 | } 173 | 174 | // dump the code from a block 175 | static void block_dump(struct block_t *block, FILE *fd) { 176 | fprintf(fd, "// %08x\n", block->pc_start); 177 | const uint8_t *ptr = block->cg.start; 178 | for (int i = 0; ptr + i < block->cg.head; ++i) { 179 | fprintf(fd, (i && i % 16 == 0) ? "\n%02x " : "%02x ", (int)ptr[i]); 180 | } 181 | fprintf(fd, "\n"); 182 | } 183 | 184 | // finialize a code block and insert into the block map 185 | static void block_finish(struct riscv_jit_t *jit, struct block_t *block) { 186 | assert(jit && block && jit->code.head && jit->block_map.map); 187 | struct cg_state_t *cg = &block->cg; 188 | // advance the block head ready for the next alloc 189 | jit->code.head = block->code + cg_size(cg); 190 | // insert into the block map 191 | block_map_insert(&jit->block_map, block); 192 | #if RISCV_DUMP_JIT_TRACE 193 | block_dump(block, stdout); 194 | #endif 195 | // flush the instructon cache for this block 196 | sys_flush_icache(block->code, cg_size(cg)); 197 | } 198 | 199 | // try to locate an already translated block in the block map 200 | static struct block_t *block_find(struct riscv_jit_t *jit, uint32_t addr) { 201 | assert(jit && jit->block_map.map); 202 | uint32_t index = wang_hash(addr); 203 | const uint32_t mask = jit->block_map.num_entries - 1; 204 | for (;; ++index) { 205 | struct block_t *block = jit->block_map.map[index & mask]; 206 | if (block == NULL) { 207 | return NULL; 208 | } 209 | if (block->pc_start == addr) { 210 | return block; 211 | } 212 | } 213 | } 214 | 215 | // callback for unhandled op_op instructions 216 | static void handle_op_op(struct riscv_t *rv, uint32_t inst) { 217 | // r-type decode 218 | const uint32_t rd = dec_rd(inst); 219 | const uint32_t funct3 = dec_funct3(inst); 220 | const uint32_t rs1 = dec_rs1(inst); 221 | const uint32_t rs2 = dec_rs2(inst); 222 | const uint32_t funct7 = dec_funct7(inst); 223 | 224 | switch (funct7) { 225 | case 0b0000001: 226 | // RV32M instructions 227 | switch (funct3) { 228 | case 0b010: // MULHSU 229 | { 230 | const int64_t a = (int32_t)rv->X[rs1]; 231 | const uint64_t b = rv->X[rs2]; 232 | rv->X[rd] = ((uint64_t)(a * b)) >> 32; 233 | } 234 | break; 235 | case 0b100: // DIV 236 | { 237 | const int32_t dividend = (int32_t)rv->X[rs1]; 238 | const int32_t divisor = (int32_t)rv->X[rs2]; 239 | if (divisor == 0) { 240 | rv->X[rd] = ~0u; 241 | } 242 | else if (divisor == -1 && rv->X[rs1] == 0x80000000u) { 243 | rv->X[rd] = rv->X[rs1]; 244 | } 245 | else { 246 | rv->X[rd] = dividend / divisor; 247 | } 248 | } 249 | break; 250 | case 0b101: // DIVU 251 | { 252 | const uint32_t dividend = rv->X[rs1]; 253 | const uint32_t divisor = rv->X[rs2]; 254 | if (divisor == 0) { 255 | rv->X[rd] = ~0u; 256 | } 257 | else { 258 | rv->X[rd] = dividend / divisor; 259 | } 260 | } 261 | break; 262 | case 0b110: // REM 263 | { 264 | const int32_t dividend = rv->X[rs1]; 265 | const int32_t divisor = rv->X[rs2]; 266 | if (divisor == 0) { 267 | rv->X[rd] = dividend; 268 | } 269 | else if (divisor == -1 && rv->X[rs1] == 0x80000000u) { 270 | rv->X[rd] = 0; 271 | } 272 | else { 273 | rv->X[rd] = dividend % divisor; 274 | } 275 | } 276 | break; 277 | case 0b111: // REMU 278 | { 279 | const uint32_t dividend = rv->X[rs1]; 280 | const uint32_t divisor = rv->X[rs2]; 281 | if (divisor == 0) { 282 | rv->X[rd] = dividend; 283 | } 284 | else { 285 | rv->X[rd] = dividend % divisor; 286 | } 287 | } 288 | break; 289 | default: 290 | assert(!"unreachable"); 291 | } 292 | break; 293 | default: 294 | assert(!"unreachable"); 295 | } 296 | } 297 | 298 | static void handle_op_system(struct riscv_t *rv, uint32_t inst) { 299 | // i-type decode 300 | const int32_t imm = dec_itype_imm(inst); 301 | const int32_t csr = dec_csr(inst); 302 | const uint32_t funct3 = dec_funct3(inst); 303 | const uint32_t rs1 = dec_rs1(inst); 304 | const uint32_t rd = dec_rd(inst); 305 | 306 | uint32_t tmp; 307 | 308 | // dispatch by func3 field 309 | switch (funct3) { 310 | #if RISCV_VM_SUPPORT_Zicsr 311 | case 1: // CSRRW (Atomic Read/Write CSR) 312 | tmp = csr_csrrw(rv, csr, rv->X[rs1]); 313 | rv->X[rd] = rd ? tmp : rv->X[rd]; 314 | break; 315 | case 2: // CSRRS (Atomic Read and Set Bits in CSR) 316 | tmp = csr_csrrs(rv, csr, (rs1 == rv_reg_zero) ? 0u : rv->X[rs1]); 317 | rv->X[rd] = rd ? tmp : rv->X[rd]; 318 | break; 319 | case 3: // CSRRC (Atomic Read and Clear Bits in CSR) 320 | tmp = csr_csrrc(rv, csr, (rs1 == rv_reg_zero) ? ~0u : rv->X[rs1]); 321 | rv->X[rd] = rd ? tmp : rv->X[rd]; 322 | break; 323 | case 5: // CSRRWI 324 | tmp = csr_csrrc(rv, csr, rv->X[rs1]); 325 | rv->X[rd] = rd ? tmp : rv->X[rd]; 326 | break; 327 | case 6: // CSRRSI 328 | tmp = csr_csrrs(rv, csr, rs1); 329 | rv->X[rd] = rd ? tmp : rv->X[rd]; 330 | break; 331 | case 7: // CSRRCI 332 | tmp = csr_csrrc(rv, csr, rs1); 333 | rv->X[rd] = rd ? tmp : rv->X[rd]; 334 | break; 335 | #endif // RISCV_VM_SUPPORT_Zicsr 336 | default: 337 | assert(!"unreachable"); 338 | } 339 | } 340 | 341 | // callback for unhandled op_fp instructions 342 | static void handle_op_fp(struct riscv_t *rv, uint32_t inst) { 343 | const uint32_t rd = dec_rd(inst); 344 | const uint32_t rs1 = dec_rs1(inst); 345 | const uint32_t rs2 = dec_rs2(inst); 346 | const uint32_t rm = dec_funct3(inst); // TODO: rounding! 347 | const uint32_t funct7 = dec_funct7(inst); 348 | // dispatch based on func7 (low 2 bits are width) 349 | switch (funct7) { 350 | case 0b0010000: 351 | { 352 | uint32_t f1, f2, res; 353 | memcpy(&f1, rv->F + rs1, 4); 354 | memcpy(&f2, rv->F + rs2, 4); 355 | switch (rm) { 356 | case 0b000: // FSGNJ.S 357 | res = (f1 & ~FMASK_SIGN) | (f2 & FMASK_SIGN); 358 | break; 359 | case 0b001: // FSGNJN.S 360 | res = (f1 & ~FMASK_SIGN) | (~f2 & FMASK_SIGN); 361 | break; 362 | case 0b010: // FSGNJX.S 363 | res = f1 ^ (f2 & FMASK_SIGN); 364 | break; 365 | default: 366 | assert(!"unreachable"); 367 | } 368 | memcpy(rv->F + rd, &res, 4); 369 | break; 370 | } 371 | case 0b0010100: 372 | switch (rm) { 373 | case 0b000: // FMIN 374 | rv->F[rd] = fminf(rv->F[rs1], rv->F[rs2]); 375 | break; 376 | case 0b001: // FMAX 377 | rv->F[rd] = fmaxf(rv->F[rs1], rv->F[rs2]); 378 | break; 379 | default: 380 | assert(!"unreachable"); 381 | } 382 | break; 383 | case 0b1110000: 384 | switch (rm) { 385 | case 0b001: // FCLASS.S 386 | { 387 | uint32_t bits; 388 | memcpy(&bits, rv->F + rs1, 4); 389 | rv->X[rd] = calc_fclass(bits); 390 | break; 391 | } 392 | default: 393 | assert(!"unreachable"); 394 | } 395 | break; 396 | case 0b1010000: 397 | switch (rm) { 398 | case 0b010: // FEQ.S 399 | rv->X[rd] = (rv->F[rs1] == rv->F[rs2]) ? 1 : 0; 400 | break; 401 | case 0b001: // FLT.S 402 | rv->X[rd] = (rv->F[rs1] < rv->F[rs2]) ? 1 : 0; 403 | break; 404 | case 0b000: // FLE.S 405 | rv->X[rd] = (rv->F[rs1] <= rv->F[rs2]) ? 1 : 0; 406 | break; 407 | default: 408 | assert(!"unreachable"); 409 | } 410 | break; 411 | default: 412 | assert(!"unreachable"); 413 | } 414 | } 415 | 416 | static void rv_translate_block(struct riscv_t *rv, struct block_t *block) { 417 | assert(rv && block); 418 | 419 | struct cg_state_t *cg = &block->cg; 420 | 421 | // setup the basic block 422 | block->instructions = 0; 423 | block->pc_start = rv->PC; 424 | block->pc_end = rv->PC; 425 | 426 | // prologue 427 | codegen_prologue(cg); 428 | 429 | // translate the basic block 430 | for (;;) { 431 | // fetch the next instruction 432 | const uint32_t inst = rv->io.mem_ifetch(rv, block->pc_end); 433 | // decode 434 | struct rv_inst_t dec; 435 | uint32_t pc = block->pc_end; 436 | if (!decode(inst, &dec, &pc)) { 437 | assert(!"unreachable"); 438 | } 439 | // codegen 440 | if (!codegen(&dec, cg, block->pc_end, inst)) { 441 | assert(!"unreachable"); 442 | } 443 | ++block->instructions; 444 | block->pc_end = pc; 445 | // stop on branch 446 | if (inst_is_branch(&dec)) { 447 | break; 448 | } 449 | } 450 | 451 | // epilogue 452 | codegen_epilogue(cg); 453 | } 454 | 455 | static struct block_t *block_find_or_translate(struct riscv_t *rv, struct block_t *prev) { 456 | // lookup the next block in the block map 457 | struct block_t *next = block_find(&rv->jit, rv->PC); 458 | // translate if we didnt find one 459 | if (!next) { 460 | next = block_alloc(&rv->jit); 461 | assert(next); 462 | rv_translate_block(rv, next); 463 | block_finish(&rv->jit, next); 464 | // update the block predictor 465 | // note: if the block predictor gives us a win when we 466 | // translate a new block but gives us a huge penalty when 467 | // updated after we find a new block. didnt expect that. 468 | if (prev) { 469 | prev->predict = next; 470 | } 471 | } 472 | assert(next); 473 | return next; 474 | } 475 | 476 | static void rv_jit_dump_stats(struct riscv_t *rv) { 477 | struct riscv_jit_t *jit = &rv->jit; 478 | 479 | uint32_t num_blocks = 0; 480 | uint32_t code_size = (uint32_t)(jit->code.head - jit->code.start); 481 | 482 | for (uint32_t i = 0; i < jit->block_map.num_entries; ++i) { 483 | struct block_t *block = jit->block_map.map[i]; 484 | if (!block) { 485 | continue; 486 | } 487 | ++num_blocks; 488 | 489 | #if RISCV_JIT_PROFILE 490 | if (block->hit_count > 1000) { 491 | block_dump(block, stdout); 492 | fprintf(stdout, "Hit count: %u\n", block->hit_count); 493 | } 494 | #endif 495 | } 496 | 497 | fprintf(stdout, "Number of blocks: %u\n", num_blocks); 498 | fprintf(stdout, "Code size: %u\n", code_size); 499 | } 500 | 501 | // flush the blockmap and code cache 502 | static void rv_jit_clear(struct riscv_t *rv) { 503 | struct riscv_jit_t *jit = &rv->jit; 504 | // clear the block map 505 | block_map_clear(&jit->block_map); 506 | // reset the code buffer write position 507 | jit->code.head = jit->code.start; 508 | } 509 | 510 | void rv_step(struct riscv_t *rv, int32_t cycles) { 511 | 512 | // find or translate a block for our starting PC 513 | struct block_t *block = block_find_or_translate(rv, NULL); 514 | assert(block); 515 | 516 | const uint64_t cycles_start = rv->csr_cycle; 517 | const uint64_t cycles_target = rv->csr_cycle + cycles; 518 | 519 | // loop until we hit out cycle target 520 | while (rv->csr_cycle < cycles_target && !rv->halt) { 521 | 522 | const uint32_t pc = rv->PC; 523 | 524 | // try to predict the next block 525 | // note: block predition gives us ~100 MIPS boost. 526 | if (block->predict && block->predict->pc_start == pc) { 527 | block = block->predict; 528 | } 529 | else { 530 | // lookup the next block in the block map or translate a new block 531 | struct block_t *next = block_find_or_translate(rv, block); 532 | // move onto the next block 533 | block = next; 534 | } 535 | 536 | // we should have a block by now 537 | assert(block); 538 | 539 | // call the translated block 540 | typedef void(*call_block_t)(struct riscv_t *); 541 | #if RISCV_JIT_PROFILE 542 | block->hit_count++; 543 | #endif 544 | call_block_t c = (call_block_t)block->code; 545 | c(rv); 546 | 547 | // increment the cycles csr 548 | rv->csr_cycle += block->instructions; 549 | 550 | // if this block has no instructions we cant make forward progress so 551 | // must fallback to instruction emulation 552 | if (!block->instructions) { 553 | assert(!"unable to execute empty block"); 554 | } 555 | } 556 | } 557 | 558 | bool rv_jit_init(struct riscv_t *rv) { 559 | struct riscv_jit_t *jit = &rv->jit; 560 | 561 | // allocate the block map which maps address to blocks 562 | if (jit->block_map.map == NULL) { 563 | jit->block_map.num_entries = map_size; 564 | block_map_alloc(&jit->block_map, map_size); 565 | } 566 | 567 | // allocate block/code storage space 568 | if (jit->code.start == NULL) { 569 | void *ptr = sys_alloc_exec_mem(code_size); 570 | memset(ptr, 0xcc, code_size); 571 | jit->code.start = ptr; 572 | jit->code.head = ptr; 573 | jit->code.end = jit->code.start + code_size; 574 | } 575 | 576 | // setup nonjit instruction callbacks 577 | jit->handle_op_op = handle_op_op; 578 | jit->handle_op_fp = handle_op_fp; 579 | jit->handle_op_system = handle_op_system; 580 | 581 | return true; 582 | } 583 | 584 | void rv_jit_free(struct riscv_t *rv) { 585 | struct riscv_jit_t *jit = &rv->jit; 586 | 587 | #if RISCV_DUMP_JIT_BLOCK 588 | rv_jit_dump_stats(rv); 589 | #endif 590 | 591 | if (jit->block_map.map) { 592 | block_map_free(&jit->block_map); 593 | jit->block_map.map = NULL; 594 | } 595 | 596 | if (jit->code.start) { 597 | sys_free_exec_mem(jit->code.start); 598 | jit->code.start = NULL; 599 | } 600 | } 601 | -------------------------------------------------------------------------------- /riscv_core/riscv_private.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #include "riscv_conf.h" 5 | #include "riscv.h" 6 | 7 | #include "../tinycg/tinycg.h" 8 | 9 | #define RV_NUM_REGS 32 10 | 11 | // csrs 12 | enum { 13 | // floating point 14 | CSR_FFLAGS = 0x001, 15 | CSR_FRM = 0x002, 16 | CSR_FCSR = 0x003, 17 | // machine trap status 18 | CSR_MSTATUS = 0x300, 19 | CSR_MISA = 0x301, 20 | CSR_MEDELEG = 0x302, 21 | CSR_MIDELEG = 0x303, 22 | CSR_MIE = 0x304, 23 | CSR_MTVEC = 0x305, 24 | CSR_MCOUNTEREN = 0x306, 25 | // machine trap handling 26 | CSR_MSCRATCH = 0x340, 27 | CSR_MEPC = 0x341, 28 | CSR_MCAUSE = 0x342, 29 | CSR_MTVAL = 0x343, 30 | CSR_MIP = 0x344, 31 | // low words 32 | CSR_CYCLE = 0xC00, 33 | CSR_TIME = 0xC01, 34 | CSR_INSTRET = 0xC02, 35 | // high words 36 | CSR_CYCLEH = 0xC80, 37 | CSR_TIMEH = 0xC81, 38 | CSR_INSTRETH = 0xC82, 39 | 40 | CSR_MVENDORID = 0xF11, 41 | CSR_MARCHID = 0xF12, 42 | CSR_MIMPID = 0xF13, 43 | CSR_MHARTID = 0xF14, 44 | }; 45 | 46 | // instruction decode masks 47 | enum { 48 | // ....xxxx....xxxx....xxxx....xxxx 49 | INST_6_2 = 0b00000000000000000000000001111100, 50 | // ....xxxx....xxxx....xxxx....xxxx 51 | FR_OPCODE = 0b00000000000000000000000001111111, // r-type 52 | FR_RD = 0b00000000000000000000111110000000, 53 | FR_FUNCT3 = 0b00000000000000000111000000000000, 54 | FR_RS1 = 0b00000000000011111000000000000000, 55 | FR_RS2 = 0b00000001111100000000000000000000, 56 | FR_FUNCT7 = 0b11111110000000000000000000000000, 57 | // ....xxxx....xxxx....xxxx....xxxx 58 | FI_IMM_11_0 = 0b11111111111100000000000000000000, // i-type 59 | // ....xxxx....xxxx....xxxx....xxxx 60 | FS_IMM_4_0 = 0b00000000000000000000111110000000, // s-type 61 | FS_IMM_11_5 = 0b11111110000000000000000000000000, 62 | // ....xxxx....xxxx....xxxx....xxxx 63 | FB_IMM_11 = 0b00000000000000000000000010000000, // b-type 64 | FB_IMM_4_1 = 0b00000000000000000000111100000000, 65 | FB_IMM_10_5 = 0b01111110000000000000000000000000, 66 | FB_IMM_12 = 0b10000000000000000000000000000000, 67 | // ....xxxx....xxxx....xxxx....xxxx 68 | FU_IMM_31_12 = 0b11111111111111111111000000000000, // u-type 69 | // ....xxxx....xxxx....xxxx....xxxx 70 | FJ_IMM_19_12 = 0b00000000000011111111000000000000, // j-type 71 | FJ_IMM_11 = 0b00000000000100000000000000000000, 72 | FJ_IMM_10_1 = 0b01111111111000000000000000000000, 73 | FJ_IMM_20 = 0b10000000000000000000000000000000, 74 | // ....xxxx....xxxx....xxxx....xxxx 75 | FR4_FMT = 0b00000110000000000000000000000000, // r4-type 76 | FR4_RS3 = 0b11111000000000000000000000000000, 77 | // ....xxxx....xxxx....xxxx....xxxx 78 | }; 79 | 80 | enum { 81 | // ....xxxx....xxxx....xxxx....xxxx 82 | FMASK_SIGN = 0b10000000000000000000000000000000, 83 | FMASK_EXPN = 0b01111111100000000000000000000000, 84 | FMASK_FRAC = 0b00000000011111111111111111111111, 85 | // ........xxxxxxxx........xxxxxxxx 86 | }; 87 | 88 | // a translated basic block 89 | struct block_t { 90 | // number of instructions encompased 91 | uint32_t instructions; 92 | // address range of the basic block 93 | uint32_t pc_start; 94 | uint32_t pc_end; 95 | // static next block prediction 96 | struct block_t *predict; 97 | // code gen structure 98 | struct cg_state_t cg; 99 | #if RISCV_JIT_PROFILE 100 | // number of times this block is executed 101 | uint32_t hit_count; 102 | #endif 103 | // start of this blocks code 104 | uint8_t code[]; 105 | }; 106 | 107 | struct block_map_t { 108 | // max number of entries in the block map 109 | uint32_t num_entries; 110 | // block map 111 | struct block_t **map; 112 | }; 113 | 114 | struct code_buffer_t { 115 | // memory range for code buffer 116 | uint8_t *start; 117 | uint8_t *end; 118 | // code buffer write point 119 | uint8_t *head; 120 | }; 121 | 122 | struct riscv_jit_t { 123 | // code buffer 124 | struct code_buffer_t code; 125 | // block hash map 126 | struct block_map_t block_map; 127 | // handler for non jitted op_op instructions 128 | void(*handle_op_op)(struct riscv_t *, uint32_t); 129 | void(*handle_op_fp)(struct riscv_t *, uint32_t); 130 | void(*handle_op_system)(struct riscv_t *, uint32_t); 131 | }; 132 | 133 | struct riscv_t { 134 | 135 | bool halt; 136 | 137 | // io interface 138 | struct riscv_io_t io; 139 | // integer registers 140 | riscv_word_t X[RV_NUM_REGS]; 141 | riscv_word_t PC; 142 | // user provided data 143 | riscv_user_t userdata; 144 | 145 | #if RISCV_VM_SUPPORT_RV32F 146 | // float registers 147 | riscv_float_t F[RV_NUM_REGS]; 148 | uint32_t csr_fcsr; 149 | #endif // RISCV_VM_SUPPORT_RV32F 150 | 151 | // csr registers 152 | uint64_t csr_cycle; 153 | uint32_t csr_mstatus; 154 | uint32_t csr_mtvec; 155 | uint32_t csr_misa; 156 | uint32_t csr_mtval; 157 | uint32_t csr_mcause; 158 | uint32_t csr_mscratch; 159 | uint32_t csr_mepc; 160 | uint32_t csr_mip; 161 | uint32_t csr_mbadaddr; 162 | 163 | // jit specific data 164 | struct riscv_jit_t jit; 165 | }; 166 | 167 | // decode rd field 168 | static inline uint32_t dec_rd(uint32_t inst) { 169 | return (inst & FR_RD) >> 7; 170 | } 171 | 172 | // decode rs1 field 173 | static inline uint32_t dec_rs1(uint32_t inst) { 174 | return (inst & FR_RS1) >> 15; 175 | } 176 | 177 | // decode rs2 field 178 | static inline uint32_t dec_rs2(uint32_t inst) { 179 | return (inst & FR_RS2) >> 20; 180 | } 181 | 182 | // decoded funct3 field 183 | static inline uint32_t dec_funct3(uint32_t inst) { 184 | return (inst & FR_FUNCT3) >> 12; 185 | } 186 | 187 | // decode funct7 field 188 | static inline uint32_t dec_funct7(uint32_t inst) { 189 | return (inst & FR_FUNCT7) >> 25; 190 | } 191 | 192 | // decode utype instruction immediate 193 | static inline uint32_t dec_utype_imm(uint32_t inst) { 194 | return inst & FU_IMM_31_12; 195 | } 196 | 197 | // decode jtype instruction immediate 198 | static inline int32_t dec_jtype_imm(uint32_t inst) { 199 | uint32_t dst = 0; 200 | dst |= (inst & FJ_IMM_20); 201 | dst |= (inst & FJ_IMM_19_12) << 11; 202 | dst |= (inst & FJ_IMM_11) << 2; 203 | dst |= (inst & FJ_IMM_10_1) >> 9; 204 | // note: shifted to 2nd least significant bit 205 | return ((int32_t)dst) >> 11; 206 | } 207 | 208 | // decode itype instruction immediate 209 | static inline int32_t dec_itype_imm(uint32_t inst) { 210 | return ((int32_t)(inst & FI_IMM_11_0)) >> 20; 211 | } 212 | 213 | // decode r4type format field 214 | static inline uint32_t dec_r4type_fmt(uint32_t inst) { 215 | return (inst & FR4_FMT) >> 25; 216 | } 217 | 218 | // decode r4type rs3 field 219 | static inline uint32_t dec_r4type_rs3(uint32_t inst) { 220 | return (inst & FR4_RS3) >> 27; 221 | } 222 | 223 | // decode csr instruction immediate (same as itype, zero extend) 224 | static inline uint32_t dec_csr(uint32_t inst) { 225 | return ((uint32_t)(inst & FI_IMM_11_0)) >> 20; 226 | } 227 | 228 | // decode btype instruction immediate 229 | static inline int32_t dec_btype_imm(uint32_t inst) { 230 | uint32_t dst = 0; 231 | dst |= (inst & FB_IMM_12); 232 | dst |= (inst & FB_IMM_11) << 23; 233 | dst |= (inst & FB_IMM_10_5) >> 1; 234 | dst |= (inst & FB_IMM_4_1) << 12; 235 | // note: shifted to 2nd least significant bit 236 | return ((int32_t)dst) >> 19; 237 | } 238 | 239 | // decode stype instruction immediate 240 | static inline int32_t dec_stype_imm(uint32_t inst) { 241 | uint32_t dst = 0; 242 | dst |= (inst & FS_IMM_11_5); 243 | dst |= (inst & FS_IMM_4_0) << 13; 244 | return ((int32_t)dst) >> 20; 245 | } 246 | 247 | // sign extend a 16 bit value 248 | static inline uint32_t sign_extend_h(uint32_t x) { 249 | return (int32_t)((int16_t)x); 250 | } 251 | 252 | // sign extend an 8 bit value 253 | static inline uint32_t sign_extend_b(uint32_t x) { 254 | return (int32_t)((int8_t)x); 255 | } 256 | 257 | // compute the fclass result 258 | static inline uint32_t calc_fclass(uint32_t f) { 259 | const uint32_t sign = f & FMASK_SIGN; 260 | const uint32_t expn = f & FMASK_EXPN; 261 | const uint32_t frac = f & FMASK_FRAC; 262 | 263 | // note: this could be turned into a binary decision tree for speed 264 | 265 | uint32_t out = 0; 266 | // 0x001 rs1 is -INF 267 | out |= (f == 0xff800000) ? 0x001 : 0; 268 | // 0x002 rs1 is negative normal 269 | out |= (expn && expn < 0x78000000 && sign) ? 0x002 : 0; 270 | // 0x004 rs1 is negative subnormal 271 | out |= (!expn && frac && sign) ? 0x004 : 0; 272 | // 0x008 rs1 is -0 273 | out |= (f == 0x80000000) ? 0x008 : 0; 274 | // 0x010 rs1 is +0 275 | out |= (f == 0x00000000) ? 0x010 : 0; 276 | // 0x020 rs1 is positive subnormal 277 | out |= (!expn && frac && !sign) ? 0x020 : 0; 278 | // 0x040 rs1 is positive normal 279 | out |= (expn && expn < 0x78000000 && !sign) ? 0x040 : 0; 280 | // 0x080 rs1 is +INF 281 | out |= (f == 0x7f800000) ? 0x080 : 0; 282 | // 0x100 rs1 is a signaling NaN 283 | out |= (expn == FMASK_EXPN && (frac <= 0x7ff) && frac) ? 0x100 : 0; 284 | // 0x200 rs1 is a quiet NaN 285 | out |= (expn == FMASK_EXPN && (frac >= 0x800)) ? 0x200 : 0; 286 | 287 | return out; 288 | } 289 | 290 | void rv_except_inst_misaligned(struct riscv_t *rv, uint32_t old_pc); 291 | void rv_except_load_misaligned(struct riscv_t *rv, uint32_t addr); 292 | void rv_except_store_misaligned(struct riscv_t *rv, uint32_t addr); 293 | void rv_except_illegal_inst(struct riscv_t *rv); 294 | 295 | uint32_t csr_csrrw(struct riscv_t *rv, uint32_t csr, uint32_t val); 296 | uint32_t csr_csrrs(struct riscv_t *rv, uint32_t csr, uint32_t val); 297 | uint32_t csr_csrrc(struct riscv_t *rv, uint32_t csr, uint32_t val); 298 | 299 | bool rv_jit_init(struct riscv_t *rv); 300 | void rv_jit_free(struct riscv_t *rv); 301 | -------------------------------------------------------------------------------- /riscv_vm/args.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | extern bool g_arg_trace; 6 | extern bool g_arg_compliance; 7 | extern bool g_arg_show_mips; 8 | extern bool g_fullscreen; 9 | extern bool g_no_jit; 10 | 11 | extern const char *g_arg_program; 12 | 13 | 14 | void print_usage(const char *filename) { 15 | fprintf(stderr, R"( 16 | Usage: %s [options] 17 | Option: | Description: 18 | ----------------+----------------------------------- 19 | program | RV32IM ELF file to execute 20 | --compliance | Generate a compliance signature 21 | --trace | Print execution trace 22 | --show-mips | Show MIPS throughput 23 | --fullscreen | Run in a fullscreen window 24 | )", filename); 25 | } 26 | 27 | bool parse_args(int argc, char **args) { 28 | // parse each argument in turn 29 | for (int i = 1; i < argc; ++i) { 30 | const char *arg = args[i]; 31 | // parse flags 32 | if (arg[0] == '-') { 33 | if (0 == strcmp(arg, "--help")) { 34 | return false; 35 | } 36 | if (0 == strcmp(arg, "--compliance")) { 37 | g_arg_compliance = true; 38 | continue; 39 | } 40 | if (0 == strcmp(arg, "--trace")) { 41 | g_arg_trace = true; 42 | continue; 43 | } 44 | if (0 == strcmp(arg, "--show-mips")) { 45 | g_arg_show_mips = true; 46 | continue; 47 | } 48 | if (0 == strcmp(arg, "--fullscreen")) { 49 | g_fullscreen = true; 50 | continue; 51 | } 52 | // error 53 | fprintf(stderr, "Unknown argument '%s'\n", arg); 54 | return false; 55 | } 56 | // set the executable 57 | g_arg_program = arg; 58 | } 59 | // success 60 | return true; 61 | } 62 | -------------------------------------------------------------------------------- /riscv_vm/elf.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "elf.h" 6 | #include "memory.h" 7 | 8 | #include "../riscv_core/riscv.h" 9 | 10 | elf_t::elf_t() 11 | : hdr(nullptr) 12 | , raw_size(0) 13 | { 14 | } 15 | 16 | bool elf_t::upload(struct riscv_t *rv, memory_t &mem) const { 17 | // set the entry point 18 | rv_set_pc(rv, hdr->e_entry); 19 | // loop over all of the program headers 20 | for (int p = 0; p < hdr->e_phnum; ++p) { 21 | // find next program header 22 | uint32_t offset = hdr->e_phoff + (p * hdr->e_phentsize); 23 | const ELF::Elf32_Phdr *phdr = (const ELF::Elf32_Phdr*)(data() + offset); 24 | // check this section should be loaded 25 | if (phdr->p_type != ELF::PT_LOAD) { 26 | continue; 27 | } 28 | // memcpy required range 29 | const int to_copy = std::min(phdr->p_memsz, phdr->p_filesz); 30 | if (to_copy) { 31 | mem.write(phdr->p_vaddr, data() + phdr->p_offset, to_copy); 32 | } 33 | // zero fill required range 34 | const int to_zero = std::max(phdr->p_memsz, phdr->p_filesz) - to_copy; 35 | if (to_zero) { 36 | mem.fill(phdr->p_vaddr + to_copy, to_zero, 0); 37 | } 38 | } 39 | // success 40 | return true; 41 | } 42 | 43 | bool elf_t::load(const char *path) { 44 | // free previous memory 45 | if (raw_data) { 46 | release(); 47 | } 48 | // open the file handle 49 | FILE *fd = fopen(path, "rb"); 50 | if (!fd) { 51 | return false; 52 | } 53 | // get file size 54 | fseek(fd, 0, SEEK_END); 55 | raw_size = ftell(fd); 56 | fseek(fd, 0, SEEK_SET); 57 | if (raw_size == 0) { 58 | fclose(fd); 59 | return false; 60 | } 61 | // allocate memory 62 | raw_data.reset(new uint8_t[raw_size]); 63 | // read data into memory 64 | const size_t read = fread(raw_data.get(), 1, raw_size, fd); 65 | // close file handle 66 | fclose(fd); 67 | if (read != raw_size) { 68 | release(); 69 | return false; 70 | } 71 | // point to the header 72 | hdr = (const ELF::Elf32_Ehdr*)data(); 73 | // check it is a valid ELF file 74 | if (!is_valid()) { 75 | release(); 76 | return false; 77 | } 78 | // success 79 | return true; 80 | } 81 | 82 | void elf_t::fill_symbols() { 83 | // init the symbol table 84 | symbols.clear(); 85 | symbols[0] = "NULL"; 86 | // get the string table 87 | const char *strtab = get_strtab(); 88 | if (!strtab) { 89 | return; 90 | } 91 | // get the symbol table 92 | const ELF::Elf32_Shdr *shdr = get_section_header(".symtab"); 93 | if (!shdr) { 94 | return; 95 | } 96 | // find symbol table range 97 | const ELF::Elf32_Sym *sym = (const ELF::Elf32_Sym *)(data() + shdr->sh_offset); 98 | const ELF::Elf32_Sym *end = (const ELF::Elf32_Sym *)(data() + shdr->sh_offset + shdr->sh_size); 99 | // try to find the symbol 100 | for (; sym < end; ++sym) { 101 | const char *sym_name = strtab + sym->st_name; 102 | // add to the symbol table 103 | switch (ELF_ST_TYPE(sym->st_info)) { 104 | case ELF::STT_NOTYPE: 105 | case ELF::STT_OBJECT: 106 | case ELF::STT_FUNC: 107 | symbols[uint32_t(sym->st_value)] = sym_name; 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /riscv_vm/elf.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | 9 | namespace ELF { 10 | 11 | typedef uint32_t Elf32_Addr; 12 | typedef uint32_t Elf32_Off; 13 | typedef uint16_t Elf32_Half; 14 | typedef uint32_t Elf32_Word; 15 | 16 | enum { 17 | EI_MAG0 = 0, 18 | EI_MAG1 = 1, 19 | EI_MAG2 = 2, 20 | EI_MAG3 = 3, 21 | EI_CLASS = 4, 22 | EI_DATA = 5, 23 | EI_VERSION = 6, 24 | EI_OSABI = 7, 25 | EI_ABIVERSION = 8, 26 | EI_PAD = 9, 27 | EI_NIDENT = 16, 28 | }; 29 | 30 | enum { 31 | EM_RISCV = 243, 32 | }; 33 | 34 | enum { 35 | ELFCLASS32 = 1, 36 | ELFCLASS64 = 2, 37 | }; 38 | 39 | enum { 40 | ET_NONE = 0, 41 | ET_REL = 1, 42 | ET_EXEC = 2, 43 | ET_DYN = 3, 44 | ET_CORE = 4, 45 | }; 46 | 47 | enum { 48 | PF_X = 1, 49 | PF_W = 2, 50 | PF_R = 4, 51 | }; 52 | 53 | enum { 54 | SHT_NULL = 0, 55 | SHT_PROGBITS = 1, 56 | SHT_SYMTAB = 2, 57 | SHT_STRTAB = 3, 58 | SHT_RELA = 4, 59 | SHT_HASH = 5, 60 | SHT_DYNAMIC = 6, 61 | SHT_NOTE = 7, 62 | SHT_NOBITS = 8, 63 | SHT_REL = 9, 64 | SHT_SHLIB = 10, 65 | SHT_DYNSYM = 11, 66 | SHT_INIT_ARRAY = 14, 67 | SHT_FINI_ARRAY = 15, 68 | SHT_PREINIT_ARRAY = 16, 69 | SHT_GROUP = 17, 70 | SHT_SYMTAB_SHNDX = 18, 71 | }; 72 | 73 | enum { 74 | SHF_WRITE = 0x1, 75 | SHF_ALLOC = 0x2, 76 | SHF_EXECINSTR = 0x4, 77 | SHF_MERGE = 0x10, 78 | SHF_STRINGS = 0x20, 79 | SHF_INFO_LINK = 0x40, 80 | SHF_LINK_ORDER = 0x80, 81 | SHF_OS_NONCONFORMING = 0x100, 82 | SHF_GROUP = 0x200, 83 | SHF_TLS = 0x400, 84 | SHF_COMPRESSED = 0x800, 85 | SHF_MASKOS = 0x0ff00000, 86 | SHF_MASKPROC = 0xf0000000, 87 | }; 88 | 89 | enum { 90 | PT_NULL = 0, 91 | PT_LOAD = 1, 92 | PT_DYNAMIC = 2, 93 | PT_INTERP = 3, 94 | PT_NOTE = 4, 95 | PT_SHLIB = 5, 96 | PT_PHDR = 6, 97 | PT_TLS = 7, 98 | }; 99 | 100 | enum { 101 | STT_NOTYPE = 0, 102 | STT_OBJECT = 1, 103 | STT_FUNC = 2, 104 | STT_SECTION = 3, 105 | STT_FILE = 4, 106 | STT_COMMON = 5, 107 | STT_TLS = 6, 108 | }; 109 | 110 | #define ELF_ST_BIND(x) ((x) >> 4) 111 | #define ELF_ST_TYPE(x) (((unsigned int) x) & 0xf) 112 | 113 | struct Elf32_Ehdr { 114 | uint8_t e_ident[EI_NIDENT]; 115 | Elf32_Half e_type; 116 | Elf32_Half e_machine; 117 | Elf32_Word e_version; 118 | Elf32_Addr e_entry; 119 | Elf32_Off e_phoff; 120 | Elf32_Off e_shoff; 121 | Elf32_Word e_flags; 122 | Elf32_Half e_ehsize; 123 | Elf32_Half e_phentsize; 124 | Elf32_Half e_phnum; 125 | Elf32_Half e_shentsize; 126 | Elf32_Half e_shnum; 127 | Elf32_Half e_shstrndx; 128 | }; 129 | 130 | struct Elf32_Phdr { 131 | Elf32_Word p_type; 132 | Elf32_Off p_offset; 133 | Elf32_Addr p_vaddr; 134 | Elf32_Addr p_paddr; 135 | Elf32_Word p_filesz; 136 | Elf32_Word p_memsz; 137 | Elf32_Word p_flags; 138 | Elf32_Word p_align; 139 | }; 140 | 141 | struct Elf32_Shdr { 142 | Elf32_Word sh_name; 143 | Elf32_Word sh_type; 144 | Elf32_Word sh_flags; 145 | Elf32_Addr sh_addr; 146 | Elf32_Off sh_offset; 147 | Elf32_Word sh_size; 148 | Elf32_Word sh_link; 149 | Elf32_Word sh_info; 150 | Elf32_Word sh_addralign; 151 | Elf32_Word sh_entsize; 152 | }; 153 | 154 | struct Elf32_Sym { 155 | Elf32_Word st_name; 156 | Elf32_Addr st_value; 157 | Elf32_Word st_size; 158 | uint8_t st_info; 159 | uint8_t st_other; 160 | Elf32_Half st_shndx; 161 | }; 162 | 163 | } // namespace ELF 164 | 165 | struct memory_t; 166 | 167 | // a very minimal ELF parser 168 | struct elf_t { 169 | 170 | elf_t(); 171 | 172 | // load an ELF file from disk 173 | bool load(const char *path); 174 | 175 | // release a loaded ELF file 176 | void release() { 177 | raw_data.reset(); 178 | raw_size = 0; 179 | hdr = nullptr; 180 | } 181 | 182 | // check the ELF file header is valid 183 | bool is_valid() const { 184 | // check for ELF magic 185 | if (hdr->e_ident[0] != 0x7f && 186 | hdr->e_ident[1] != 'E' && 187 | hdr->e_ident[2] != 'L' && 188 | hdr->e_ident[3] != 'F') { 189 | return false; 190 | } 191 | // must be 32bit ELF 192 | if (hdr->e_ident[ELF::EI_CLASS] != ELF::ELFCLASS32) { 193 | return false; 194 | } 195 | // check machine type is RISCV 196 | if (hdr->e_machine != ELF::EM_RISCV) { 197 | return false; 198 | } 199 | // success 200 | return true; 201 | } 202 | 203 | // get section header string table 204 | const char *get_sh_string(int index) const { 205 | uint32_t offset = hdr->e_shoff + hdr->e_shstrndx * hdr->e_shentsize; 206 | const ELF::Elf32_Shdr *shdr = (const ELF::Elf32_Shdr*)(data() + offset); 207 | return (const char*)(data() + shdr->sh_offset + index); 208 | } 209 | 210 | // get a section header 211 | const ELF::Elf32_Shdr *get_section_header(const char *name) const { 212 | for (int s = 0; s < hdr->e_shnum; ++s) { 213 | uint32_t offset = hdr->e_shoff + s * hdr->e_shentsize; 214 | const ELF::Elf32_Shdr *shdr = (const ELF::Elf32_Shdr*)(data() + offset); 215 | const char *sname = get_sh_string(shdr->sh_name); 216 | if (strcmp(name, sname) == 0) { 217 | return shdr; 218 | } 219 | } 220 | return nullptr; 221 | } 222 | 223 | // get the load range of a section 224 | bool get_data_section_range(uint32_t &start, uint32_t &end) const { 225 | const ELF::Elf32_Shdr *shdr = get_section_header(".data"); 226 | if (!shdr) { 227 | return false; 228 | } 229 | if (shdr->sh_type == ELF::SHT_NOBITS) { 230 | return false; 231 | } 232 | start = shdr->sh_addr; 233 | end = start + shdr->sh_size; 234 | return true; 235 | } 236 | 237 | // get the ELF string table 238 | const char *get_strtab() const { 239 | const ELF::Elf32_Shdr *shdr = get_section_header(".strtab"); 240 | if (!shdr) { 241 | return nullptr; 242 | } 243 | return (const char *)(data() + shdr->sh_offset); 244 | } 245 | 246 | // find a symbol entry 247 | const ELF::Elf32_Sym* get_symbol(const char *name) const { 248 | // get the string table 249 | const char *strtab = get_strtab(); 250 | if (!strtab) { 251 | return nullptr; 252 | } 253 | // get the symbol table 254 | const ELF::Elf32_Shdr *shdr = get_section_header(".symtab"); 255 | if (!shdr) { 256 | return nullptr; 257 | } 258 | // find symbol table range 259 | const ELF::Elf32_Sym *sym = (const ELF::Elf32_Sym *)(data() + shdr->sh_offset); 260 | const ELF::Elf32_Sym *end = (const ELF::Elf32_Sym *)(data() + shdr->sh_offset + shdr->sh_size); 261 | // try to find the symbol 262 | for (; sym < end; ++sym) { 263 | const char *sym_name = strtab + sym->st_name; 264 | if (strcmp(name, sym_name) == 0) { 265 | return sym; 266 | } 267 | } 268 | // cant find the symbol 269 | return nullptr; 270 | } 271 | 272 | // load the ELF file into a memory abstraction 273 | bool upload(struct riscv_t *rv, memory_t &mem) const; 274 | 275 | const uint8_t *data() const { 276 | return raw_data.get(); 277 | } 278 | 279 | uint32_t size() const { 280 | return raw_size; 281 | } 282 | 283 | const char * find_symbol(uint32_t addr) { 284 | if (symbols.empty()) { 285 | fill_symbols(); 286 | } 287 | auto itt = symbols.find(addr); 288 | return (itt == symbols.end()) ? nullptr : itt->second; 289 | } 290 | 291 | protected: 292 | 293 | void fill_symbols(); 294 | 295 | const ELF::Elf32_Ehdr *hdr; 296 | uint32_t raw_size; 297 | std::unique_ptr raw_data; 298 | 299 | // symbol table map 300 | std::map symbols; 301 | }; 302 | -------------------------------------------------------------------------------- /riscv_vm/file.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | struct file_t { 6 | 7 | bool load(const char *path) { 8 | 9 | if (mem) { 10 | unload(); 11 | } 12 | 13 | FILE *fd = fopen(path, "rb"); 14 | if (!fd) { 15 | return false; 16 | } 17 | 18 | fseek(fd, 0, SEEK_END); 19 | mem_size = ftell(fd); 20 | fseek(fd, 0, SEEK_SET); 21 | 22 | if (mem_size == 0) { 23 | fclose(fd); 24 | return false; 25 | } 26 | mem.reset(new uint8_t[mem_size]); 27 | 28 | const size_t read = fread(mem.get(), 1, mem_size, fd); 29 | 30 | fclose(fd); 31 | if (read != mem_size) { 32 | unload(); 33 | return false; 34 | } 35 | 36 | return true; 37 | } 38 | 39 | void unload() { 40 | if (mem) { 41 | mem.reset(); 42 | } 43 | mem_size = 0; 44 | } 45 | 46 | uint8_t *data() const { return mem.get(); } 47 | 48 | size_t size() const { return mem_size; } 49 | 50 | protected: 51 | std::unique_ptr mem; 52 | size_t mem_size; 53 | }; 54 | -------------------------------------------------------------------------------- /riscv_vm/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "elf.h" 6 | #include "file.h" 7 | #include "memory.h" 8 | 9 | #include "../riscv_core/riscv.h" 10 | #include "state.h" 11 | 12 | 13 | // enable program trace mode 14 | bool g_arg_trace = false; 15 | // enable compliance mode 16 | bool g_arg_compliance = false; 17 | // target executable 18 | const char *g_arg_program = "a.out"; 19 | // show MIPS 20 | bool g_arg_show_mips = false; 21 | // run in fullscreen 22 | bool g_fullscreen = false; 23 | // disable jit code generation 24 | bool g_no_jit = false; 25 | 26 | // main syscall handler 27 | void syscall_handler(struct riscv_t *); 28 | // arg parsing functions 29 | void print_usage(const char *filename); 30 | bool parse_args(int argc, char **args); 31 | 32 | namespace { 33 | 34 | riscv_word_t imp_mem_ifetch(struct riscv_t *rv, riscv_word_t addr) { 35 | state_t *s = (state_t*)rv_userdata(rv); 36 | return s->mem.read_ifetch(addr); 37 | } 38 | 39 | riscv_word_t imp_mem_read_w(struct riscv_t *rv, riscv_word_t addr) { 40 | state_t *s = (state_t*)rv_userdata(rv); 41 | return s->mem.read_w(addr); 42 | } 43 | 44 | riscv_half_t imp_mem_read_s(struct riscv_t *rv, riscv_word_t addr) { 45 | state_t *s = (state_t*)rv_userdata(rv); 46 | return s->mem.read_s(addr); 47 | } 48 | 49 | riscv_byte_t imp_mem_read_b(struct riscv_t *rv, riscv_word_t addr) { 50 | state_t *s = (state_t*)rv_userdata(rv); 51 | return s->mem.read_b(addr); 52 | } 53 | 54 | void imp_mem_write_w(struct riscv_t *rv, riscv_word_t addr, riscv_word_t data) { 55 | state_t *s = (state_t*)rv_userdata(rv); 56 | s->mem.write(addr, (uint8_t*)&data, sizeof(data)); 57 | } 58 | 59 | void imp_mem_write_s(struct riscv_t *rv, riscv_word_t addr, riscv_half_t data) { 60 | state_t *s = (state_t*)rv_userdata(rv); 61 | s->mem.write(addr, (uint8_t*)&data, sizeof(data)); 62 | } 63 | 64 | void imp_mem_write_b(struct riscv_t *rv, riscv_word_t addr, riscv_byte_t data) { 65 | state_t *s = (state_t*)rv_userdata(rv); 66 | s->mem.write(addr, (uint8_t*)&data, sizeof(data)); 67 | } 68 | 69 | void imp_on_ecall(struct riscv_t *rv) { 70 | // access userdata 71 | state_t *s = (state_t*)rv_userdata(rv); 72 | // in compliance testing it seems any `ecall` should abort 73 | if (g_arg_compliance) { 74 | rv_halt(rv); 75 | return; 76 | } 77 | // pass to the syscall handler 78 | syscall_handler(rv); 79 | } 80 | 81 | void imp_on_ebreak(struct riscv_t *rv) { 82 | state_t *s = (state_t*)rv_userdata(rv); 83 | rv_halt(rv); 84 | } 85 | 86 | // run the core - printing out an instruction trace 87 | void run_and_trace(riscv_t *rv, state_t *state, elf_t &elf) { 88 | static const uint32_t cycles_per_step = 1; 89 | // run until we see the flag that we are done 90 | for (; !rv_has_halted(rv);) { 91 | // trace execution 92 | uint32_t pc = rv_get_pc(rv); 93 | const char *sym = elf.find_symbol(pc); 94 | printf("%08x %s\n", pc, (sym ? sym : "")); 95 | // step instructions 96 | rv_step(rv, cycles_per_step); 97 | } 98 | } 99 | 100 | // run the core - showing MIPS throughput 101 | void run_and_show_mips(riscv_t *rv, state_t *state, elf_t &elf) { 102 | static const uint32_t cycles_per_step = 500; 103 | clock_t start = clock(); 104 | 105 | uint64_t cycles_base = rv_get_csr_cycles(rv); 106 | 107 | // run until we see the flag that we are done 108 | for (; !rv_has_halted(rv);) { 109 | // track instruction MIPS 110 | if ((clock() - start) >= CLOCKS_PER_SEC) { 111 | start += CLOCKS_PER_SEC; 112 | const uint64_t cycles = rv_get_csr_cycles(rv); 113 | printf("%d IPS\n", int(cycles - cycles_base)); 114 | cycles_base = cycles; 115 | } 116 | // step instructions 117 | rv_step(rv, cycles_per_step); 118 | } 119 | } 120 | 121 | // run the core 122 | void run(riscv_t *rv, state_t *state, elf_t &elf) { 123 | static const uint32_t cycles_per_step = 100; 124 | // run until we see the flag that we are done 125 | for (; !rv_has_halted(rv);) { 126 | // step instructions 127 | rv_step(rv, cycles_per_step); 128 | } 129 | } 130 | 131 | void print_signature(state_t *state, elf_t &elf) { 132 | uint32_t start = 0, end = 0; 133 | // use the entire .data section as a fallback 134 | elf.get_data_section_range(start, end); 135 | // try and access the exact signature range 136 | if (const ELF::Elf32_Sym *sym = elf.get_symbol("begin_signature")) { 137 | start = sym->st_value; 138 | } 139 | if (const ELF::Elf32_Sym *sym = elf.get_symbol("end_signature")) { 140 | end = sym->st_value; 141 | } 142 | // dump it word by word 143 | for (uint32_t i = start; i < end; i += 4) { 144 | uint32_t value = state->mem.read_w(i); 145 | printf("%08x\n", value); 146 | } 147 | } 148 | 149 | } // namespace {} 150 | 151 | 152 | int main(int argc, char **args) { 153 | 154 | // parse the program arguments 155 | if (!parse_args(argc, args)) { 156 | print_usage(args[0]); 157 | return 1; 158 | } 159 | 160 | // load the ELF file from disk 161 | elf_t elf; 162 | if (!elf.load(g_arg_program)) { 163 | fprintf(stderr, "Unable to load ELF file '%s'\n", g_arg_program); 164 | return 1; 165 | } 166 | 167 | // setup the IO handlers for the VM 168 | const riscv_io_t io = { 169 | imp_mem_ifetch, 170 | imp_mem_read_w, 171 | imp_mem_read_s, 172 | imp_mem_read_b, 173 | imp_mem_write_w, 174 | imp_mem_write_s, 175 | imp_mem_write_b, 176 | imp_on_ecall, 177 | imp_on_ebreak, 178 | }; 179 | 180 | auto state = std::make_unique(); 181 | state->break_addr = 0; 182 | state->fd_map[0] = stdin; 183 | state->fd_map[1] = stdout; 184 | state->fd_map[2] = stderr; 185 | 186 | // find the start of the heap 187 | if (const ELF::Elf32_Sym *end = elf.get_symbol("_end")) { 188 | state->break_addr = end->st_value; 189 | } 190 | 191 | // create the VM 192 | riscv_t *rv = rv_create(&io, state.get()); 193 | if (!rv) { 194 | fprintf(stderr, "Unable to create riscv emulator\n"); 195 | return 1; 196 | } 197 | 198 | // upload the ELF file into our memory abstraction 199 | if (!elf.upload(rv, state->mem)) { 200 | fprintf(stderr, "Unable to upload ELF file '%s'\n", args[1]); 201 | return 1; 202 | } 203 | 204 | // run based on the chosen mode 205 | if (g_arg_trace) { 206 | run_and_trace(rv, state.get(), elf); 207 | } 208 | else if (g_arg_show_mips) { 209 | run_and_show_mips(rv, state.get(), elf); 210 | } 211 | else { 212 | run(rv, state.get(), elf); 213 | } 214 | 215 | // print execution signature 216 | if (g_arg_compliance) { 217 | print_signature(state.get(), elf); 218 | } 219 | 220 | // delete the VM 221 | rv_delete(rv); 222 | return 0; 223 | } 224 | -------------------------------------------------------------------------------- /riscv_vm/memory.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | struct memory_t { 9 | 10 | struct chunk_t { 11 | std::array data; 12 | }; 13 | 14 | memory_t() { 15 | chunks.fill(nullptr); 16 | } 17 | 18 | ~memory_t() { 19 | clear(); 20 | } 21 | 22 | // read a c-string from memory 23 | uint32_t read_str(uint8_t *dst, uint32_t addr, uint32_t max) { 24 | uint32_t len = 0; 25 | const uint8_t *end = dst + max; 26 | for (;; ++len, ++dst) { 27 | uint8_t ch = 0; 28 | read(&ch, addr + len, 1); 29 | if (dst < end) { 30 | *dst = ch; 31 | } 32 | if (ch == 0) { 33 | break; 34 | } 35 | } 36 | return len + 1; 37 | } 38 | 39 | // read an instruction from memory 40 | uint32_t read_ifetch(uint32_t addr) { 41 | const uint32_t addr_lo = addr & mask_lo; 42 | assert((addr_lo & 3) == 0); 43 | chunk_t *c = chunks[addr >> 16]; 44 | assert(c); 45 | return *(const uint32_t *)(c->data.data() + addr_lo); 46 | } 47 | 48 | // read a word from memory 49 | uint32_t read_w(uint32_t addr) { 50 | const uint32_t addr_lo = addr & mask_lo; 51 | // test if this is within one chunk 52 | if (addr_lo <= 0xfffc) { 53 | // get the chunk 54 | if (chunk_t *c = chunks[addr >> 16]) { 55 | return *(const uint32_t *)(c->data.data() + addr_lo); 56 | } 57 | else { 58 | return 0u; 59 | } 60 | } 61 | else { 62 | uint32_t dst = 0; 63 | read((uint8_t*)&dst, addr, 4); 64 | return dst; 65 | } 66 | } 67 | 68 | // read a short from memory 69 | uint16_t read_s(uint32_t addr) { 70 | const uint32_t addr_lo = addr & mask_lo; 71 | // test if this is within one chunk 72 | if (addr_lo <= 0xfffe) { 73 | // get the chunk 74 | if (chunk_t *c = chunks[addr >> 16]) { 75 | return *(const uint16_t *)(c->data.data() + addr_lo); 76 | } 77 | else { 78 | return 0u; 79 | } 80 | } 81 | else { 82 | uint16_t dst = 0; 83 | read((uint8_t*)&dst, addr, 2); 84 | return dst; 85 | } 86 | } 87 | 88 | // read a byte from memory 89 | uint8_t read_b(uint32_t addr) { 90 | // get the chunk 91 | if (chunk_t *c = chunks[addr >> 16]) { 92 | return *(c->data.data() + (addr & 0xffff)); 93 | } 94 | else { 95 | return 0u; 96 | } 97 | } 98 | 99 | // read a length of data from memory 100 | void read(uint8_t *dst, uint32_t addr, uint32_t size) { 101 | // if this read is entirely within one chunk 102 | if ((addr & mask_hi) == ((addr + size) & mask_hi)) { 103 | // get the chunk 104 | if (chunk_t *c = chunks[addr >> 16]) { 105 | // get the subchunk pointer 106 | const uint32_t p = (addr & mask_lo); 107 | // copy over the data 108 | memcpy(dst, c->data.data() + p, size); 109 | } 110 | else { 111 | memset(dst, 0, size); 112 | } 113 | } 114 | else { 115 | // naive copy 116 | for (uint32_t i = 0; i < size; ++i) { 117 | uint32_t p = addr + i; 118 | chunk_t *c = chunks[p >> 16]; 119 | dst[i] = c ? c->data[p & 0xffff] : 0; 120 | } 121 | } 122 | } 123 | 124 | void write(uint32_t addr, const uint8_t *src, uint32_t size) { 125 | for (uint32_t i=0; i> 16; 128 | chunk_t *c = chunks[x]; 129 | if (c == nullptr) { 130 | c = new chunk_t; 131 | c->data.fill(0); 132 | chunks[x] = c; 133 | } 134 | c->data[p & 0xffff] = src[i]; 135 | } 136 | } 137 | 138 | void fill(uint32_t addr, uint32_t size, uint8_t val) { 139 | for (uint32_t i = 0; i < size; ++i) { 140 | uint32_t p = addr + i; 141 | uint32_t x = p >> 16; 142 | chunk_t *c = chunks[x]; 143 | if (c == nullptr) { 144 | c = new chunk_t; 145 | c->data.fill(0); 146 | chunks[x] = c; 147 | } 148 | c->data[p & 0xffff] = val; 149 | } 150 | } 151 | 152 | void clear() { 153 | for (chunk_t *c : chunks) { 154 | if (c) { 155 | delete c; 156 | } 157 | } 158 | chunks.fill(nullptr); 159 | } 160 | 161 | protected: 162 | static const uint32_t mask_lo = 0xffff; 163 | static const uint32_t mask_hi = ~mask_lo; 164 | 165 | std::array chunks; 166 | }; 167 | -------------------------------------------------------------------------------- /riscv_vm/state.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #include "../riscv_core/riscv.h" 6 | 7 | #include "memory.h" 8 | 9 | // state structure passed to the VM 10 | struct state_t { 11 | memory_t mem; 12 | // the data segment break address 13 | riscv_word_t break_addr; 14 | // file descriptor map 15 | std::map fd_map; 16 | }; 17 | -------------------------------------------------------------------------------- /riscv_vm/syscall.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../riscv_core/riscv.h" 6 | #include "state.h" 7 | 8 | enum { 9 | SYS_getcwd = 17, 10 | SYS_dup = 23, 11 | SYS_fcntl = 25, 12 | SYS_faccessat = 48, 13 | SYS_chdir = 49, 14 | SYS_openat = 56, 15 | SYS_close = 57, 16 | SYS_getdents = 61, 17 | SYS_lseek = 62, 18 | SYS_read = 63, 19 | SYS_write = 64, 20 | SYS_writev = 66, 21 | SYS_pread = 67, 22 | SYS_pwrite = 68, 23 | SYS_fstatat = 79, 24 | SYS_fstat = 80, 25 | SYS_exit = 93, 26 | SYS_exit_group = 94, 27 | SYS_kill = 129, 28 | SYS_rt_sigaction = 134, 29 | SYS_times = 153, 30 | SYS_uname = 160, 31 | SYS_gettimeofday = 169, 32 | SYS_getpid = 172, 33 | SYS_getuid = 174, 34 | SYS_geteuid = 175, 35 | SYS_getgid = 176, 36 | SYS_getegid = 177, 37 | SYS_brk = 214, 38 | SYS_munmap = 215, 39 | SYS_mremap = 216, 40 | SYS_mmap = 222, 41 | SYS_open = 1024, 42 | SYS_link = 1025, 43 | SYS_unlink = 1026, 44 | SYS_mkdir = 1030, 45 | SYS_access = 1033, 46 | SYS_stat = 1038, 47 | SYS_lstat = 1039, 48 | SYS_time = 1062, 49 | SYS_getmainvars = 2011, 50 | }; 51 | 52 | enum { 53 | O_RDONLY = 0, 54 | O_WRONLY = 1, 55 | O_RDWR = 2, 56 | O_ACCMODE = 3, 57 | }; 58 | 59 | // from syscall_sdl.cpp 60 | void syscall_draw_frame(struct riscv_t *rv); 61 | void syscall_draw_frame_pal(struct riscv_t *rv); 62 | 63 | namespace { 64 | 65 | int find_free_fd(struct state_t *s) { 66 | for (int i = 3; ; ++i) { 67 | auto itt = s->fd_map.find(i); 68 | if (s->fd_map.end() == itt) { 69 | return i; 70 | } 71 | } 72 | } 73 | 74 | const char *get_mode_str(uint32_t flags, uint32_t mode) { 75 | switch (flags & O_ACCMODE) { 76 | case O_RDONLY: 77 | return "rb"; 78 | case O_WRONLY: 79 | return "wb"; 80 | case O_RDWR: 81 | return "a+"; 82 | default: 83 | return nullptr; 84 | } 85 | } 86 | 87 | } // namespace 88 | 89 | void syscall_write(struct riscv_t *rv) { 90 | // access userdata 91 | state_t *s = (state_t*)rv_userdata(rv); 92 | // _write(handle, buffer, count) 93 | riscv_word_t handle = rv_get_reg(rv, rv_reg_a0); 94 | riscv_word_t buffer = rv_get_reg(rv, rv_reg_a1); 95 | riscv_word_t count = rv_get_reg(rv, rv_reg_a2); 96 | // read the string that we are printing 97 | uint8_t *temp = (uint8_t*)alloca(count); 98 | s->mem.read((uint8_t*)temp, buffer, count); 99 | // lookup the file descriptor 100 | auto itt = s->fd_map.find(int(handle)); 101 | if (itt != s->fd_map.end()) { 102 | // write out the data 103 | size_t written = fwrite(temp, 1, count, itt->second); 104 | // return number of bytes written 105 | rv_set_reg(rv, rv_reg_a0, (riscv_word_t)written); 106 | } 107 | else { 108 | // error 109 | rv_set_reg(rv, rv_reg_a0, -1); 110 | } 111 | } 112 | 113 | void syscall_exit(struct riscv_t *rv) { 114 | // access userdata 115 | state_t *s = (state_t*)rv_userdata(rv); 116 | rv_halt(rv); 117 | // _exit(code); 118 | riscv_word_t code = rv_get_reg(rv, rv_reg_a0); 119 | fprintf(stdout, "inferior exit code %d\n", (int)code); 120 | } 121 | 122 | void syscall_brk(struct riscv_t *rv) { 123 | // access userdata 124 | state_t *s = (state_t*)rv_userdata(rv); 125 | // get the increment parameter 126 | riscv_word_t increment = rv_get_reg(rv, rv_reg_a0); 127 | // note: this doesnt seem to be how it should operate but checking the 128 | // syscall source this is what it expects. 129 | // the return value doesnt seem to be documented this way. 130 | if (increment) { 131 | s->break_addr = increment; 132 | } 133 | // return new break address 134 | rv_set_reg(rv, rv_reg_a0, s->break_addr); 135 | } 136 | 137 | void syscall_gettimeofday(struct riscv_t *rv) { 138 | #define FIXED_TIME_STEP 0 139 | #if FIXED_TIME_STEP 140 | static clock_t fake = 0; 141 | #endif 142 | // access userdata 143 | state_t *s = (state_t*)rv_userdata(rv); 144 | // get the parameters 145 | riscv_word_t tv = rv_get_reg(rv, rv_reg_a0); 146 | riscv_word_t tz = rv_get_reg(rv, rv_reg_a1); 147 | // return the clock time 148 | if (tv) { 149 | #if FIXED_TIME_STEP 150 | clock_t t = (fake += CLOCKS_PER_SEC / 10); 151 | #else 152 | clock_t t = clock(); 153 | #endif 154 | int32_t tv_sec = t / CLOCKS_PER_SEC; 155 | int32_t tv_usec = (t % CLOCKS_PER_SEC) * (1000000 / CLOCKS_PER_SEC); 156 | s->mem.write(tv + 0, (const uint8_t*)&tv_sec, 4); 157 | // note: I thought this was offset 4 (tv_sec is a long) but looking at the asm 158 | // its at offset 8. Even though it does just issue an lw to read it. 159 | s->mem.write(tv + 8, (const uint8_t*)&tv_usec, 4); 160 | } 161 | if (tz) { 162 | // note: This param is currently ignored by the syscall handler in newlib so it 163 | // would be useless to do anything here. 164 | } 165 | // success 166 | rv_set_reg(rv, rv_reg_a0, 0); 167 | } 168 | 169 | void syscall_close(struct riscv_t *rv) { 170 | // access userdata 171 | state_t *s = (state_t*)rv_userdata(rv); 172 | // _close(fd); 173 | uint32_t fd = rv_get_reg(rv, rv_reg_a0); 174 | // lookup the file descriptor in question 175 | if (fd >= 3) { 176 | auto itt = s->fd_map.find(int(fd)); 177 | if (itt != s->fd_map.end()) { 178 | fclose(itt->second); 179 | s->fd_map.erase(itt); 180 | // success 181 | rv_set_reg(rv, rv_reg_a0, 0); 182 | } 183 | } 184 | // success 185 | rv_set_reg(rv, rv_reg_a0, 0); 186 | } 187 | 188 | void syscall_lseek(struct riscv_t *rv) { 189 | // access userdata 190 | state_t *s = (state_t*)rv_userdata(rv); 191 | // _lseek(fd, offset, whence); 192 | uint32_t fd = rv_get_reg(rv, rv_reg_a0); 193 | uint32_t offset = rv_get_reg(rv, rv_reg_a1); 194 | uint32_t whence = rv_get_reg(rv, rv_reg_a2); 195 | // find the file descriptor 196 | auto itt = s->fd_map.find(int(fd)); 197 | if (itt == s->fd_map.end()) { 198 | // error 199 | rv_set_reg(rv, rv_reg_a0, -1); 200 | return; 201 | } 202 | FILE *handle = itt->second; 203 | // perform the seek 204 | // note: the whence defines seems somewhat portable and doesnt require some 205 | // kind of mapping. 206 | if (fseek(handle, offset, whence)) { 207 | // error 208 | rv_set_reg(rv, rv_reg_a0, -1); 209 | return; 210 | } 211 | // success 212 | rv_set_reg(rv, rv_reg_a0, 0); 213 | } 214 | 215 | void syscall_read(struct riscv_t *rv) { 216 | // access userdata 217 | state_t *s = (state_t*)rv_userdata(rv); 218 | // _read(fd, buf, count); 219 | uint32_t fd = rv_get_reg(rv, rv_reg_a0); 220 | uint32_t buf = rv_get_reg(rv, rv_reg_a1); 221 | uint32_t count = rv_get_reg(rv, rv_reg_a2); 222 | // lookup the file 223 | auto itt = s->fd_map.find(int(fd)); 224 | if (itt == s->fd_map.end()) { 225 | // error 226 | rv_set_reg(rv, rv_reg_a0, -1); 227 | return; 228 | } 229 | FILE *handle = itt->second; 230 | // read the file into VM memory 231 | uint8_t *temp = (uint8_t*)alloca(count); 232 | size_t read = fread(temp, 1, count, handle); 233 | s->mem.write(buf, temp, uint32_t(read)); 234 | // success 235 | rv_set_reg(rv, rv_reg_a0, uint32_t(read)); 236 | } 237 | 238 | void syscall_fstat(struct riscv_t *rv) { 239 | // access userdata 240 | state_t *s = (state_t*)rv_userdata(rv); 241 | // TODO 242 | } 243 | 244 | void syscall_open(struct riscv_t *rv) { 245 | // access userdata 246 | state_t *s = (state_t*)rv_userdata(rv); 247 | // _open(name, flags, mode); 248 | uint32_t name = rv_get_reg(rv, rv_reg_a0); 249 | uint32_t flags = rv_get_reg(rv, rv_reg_a1); 250 | uint32_t mode = rv_get_reg(rv, rv_reg_a2); 251 | // read name from VM memory 252 | std::array name_str = { '\0' }; 253 | uint32_t read = s->mem.read_str((uint8_t*)name_str.data(), name, uint32_t(name_str.size())); 254 | if (read > name_str.size()) { 255 | rv_set_reg(rv, rv_reg_a0, -1); 256 | return; 257 | } 258 | // open the file 259 | const char *mode_str = get_mode_str(flags, mode); 260 | if (!mode_str) { 261 | rv_set_reg(rv, rv_reg_a0, -1); 262 | return; 263 | } 264 | FILE *handle = fopen((const char *)name_str.data(), mode_str); 265 | if (!handle) { 266 | rv_set_reg(rv, rv_reg_a0, -1); 267 | return; 268 | } 269 | // find a free file descriptor 270 | const int fd = find_free_fd(s); 271 | // insert into the file descriptor map 272 | s->fd_map[fd] = handle; 273 | // return the file descriptor 274 | rv_set_reg(rv, rv_reg_a0, fd); 275 | } 276 | 277 | void syscall_handler(struct riscv_t *rv) { 278 | // access userdata 279 | state_t *s = (state_t*)rv_userdata(rv); 280 | // get the syscall number 281 | riscv_word_t syscall = rv_get_reg(rv, rv_reg_a7); 282 | // dispatch call type 283 | switch (syscall) { 284 | case SYS_close: 285 | syscall_close(rv); 286 | break; 287 | case SYS_lseek: 288 | syscall_lseek(rv); 289 | break; 290 | case SYS_read: 291 | syscall_read(rv); 292 | break; 293 | case SYS_write: 294 | syscall_write(rv); 295 | break; 296 | case SYS_fstat: 297 | syscall_fstat(rv); 298 | break; 299 | case SYS_brk: 300 | syscall_brk(rv); 301 | break; 302 | case SYS_exit: 303 | syscall_exit(rv); 304 | break; 305 | case SYS_gettimeofday: 306 | syscall_gettimeofday(rv); 307 | break; 308 | case SYS_open: 309 | syscall_open(rv); 310 | break; 311 | #if RISCV_VM_USE_SDL 312 | case 0xbeef: 313 | syscall_draw_frame(rv); 314 | break; 315 | case 0xbabe: 316 | syscall_draw_frame_pal(rv); 317 | break; 318 | #endif 319 | default: 320 | fprintf(stderr, "unknown syscall %d\n", int(syscall)); 321 | rv_halt(rv); 322 | break; 323 | } 324 | } 325 | -------------------------------------------------------------------------------- /riscv_vm/syscall_sdl.cpp: -------------------------------------------------------------------------------- 1 | #if RISCV_VM_USE_SDL 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "../riscv_core/riscv.h" 10 | #include "state.h" 11 | 12 | extern bool g_fullscreen; 13 | 14 | static SDL_Surface *g_video; 15 | 16 | 17 | static bool check_sdl(struct riscv_t *rv, uint32_t width, uint32_t height) { 18 | 19 | state_t *s = (struct state_t *)rv_userdata(rv); 20 | 21 | // check if video has been setup 22 | if (!g_video) { 23 | if (SDL_Init(SDL_INIT_VIDEO) != 0) { 24 | fprintf(stderr, "Failed to call SDL_Init()\n"); 25 | exit(1); 26 | } 27 | 28 | int flags = 0; 29 | if (g_fullscreen) { 30 | flags |= SDL_FULLSCREEN; 31 | } 32 | 33 | g_video = SDL_SetVideoMode(width, height, 32, flags); 34 | if (!g_video) { 35 | fprintf(stderr, "Failed to call SDL_SetVideoMode()\n"); 36 | exit(1); 37 | } 38 | SDL_WM_SetCaption("riscv-vm", nullptr); 39 | } 40 | // run a simple event handler 41 | SDL_Event event; 42 | while (SDL_PollEvent(&event)) { 43 | switch (event.type) { 44 | case SDL_QUIT: 45 | rv_halt(rv); 46 | break; 47 | case SDL_KEYDOWN: 48 | if (event.key.keysym.sym == SDLK_ESCAPE) { 49 | rv_halt(rv); 50 | break; 51 | } 52 | } 53 | } 54 | // success 55 | return true; 56 | } 57 | 58 | void syscall_draw_frame(struct riscv_t *rv) { 59 | // access userdata 60 | state_t *s = (state_t*)rv_userdata(rv); 61 | // draw(screen, width, height); 62 | const uint32_t screen = rv_get_reg(rv, rv_reg_a0); 63 | const uint32_t width = rv_get_reg(rv, rv_reg_a1); 64 | const uint32_t height = rv_get_reg(rv, rv_reg_a2); 65 | // check if we need to setup SDL 66 | if (!check_sdl(rv, width, height)) { 67 | return; 68 | } 69 | // read directly into video memory 70 | if (g_video) { 71 | s->mem.read((uint8_t*)g_video->pixels, screen, width * height * 4); 72 | SDL_Flip(g_video); 73 | } 74 | #if 1 75 | SDL_Delay(1); 76 | #endif 77 | } 78 | 79 | void syscall_draw_frame_pal(struct riscv_t *rv) { 80 | // access userdata 81 | state_t *s = (state_t*)rv_userdata(rv); 82 | // draw(screen, width, height); 83 | const uint32_t buf = rv_get_reg(rv, rv_reg_a0); 84 | const uint32_t pal = rv_get_reg(rv, rv_reg_a1); 85 | const uint32_t width = rv_get_reg(rv, rv_reg_a2); 86 | const uint32_t height = rv_get_reg(rv, rv_reg_a3); 87 | // check if we need to setup SDL 88 | if (!check_sdl(rv, width, height)) { 89 | return; 90 | } 91 | // read directly into video memory 92 | if (g_video) { 93 | 94 | uint8_t *i = (uint8_t*)alloca(width * height); 95 | uint8_t *j = (uint8_t*)alloca(256 * 3); 96 | 97 | s->mem.read(i, buf, width * height); 98 | s->mem.read(j, pal, 256 * 3); 99 | 100 | uint32_t *d = (uint32_t*)g_video->pixels; 101 | const uint8_t *p = i; 102 | for (int y = 0; y < g_video->h; ++y) { 103 | for (int x = 0; x < g_video->w; ++x) { 104 | const uint8_t c = p[x]; 105 | const uint8_t *lut = j + (c * 3); 106 | d[x] = (lut[0] << 16) | (lut[1] << 8) | lut[2]; 107 | } 108 | p += g_video->w; 109 | d += g_video->w; 110 | } 111 | 112 | SDL_Flip(g_video); 113 | } 114 | #if 1 115 | SDL_Delay(1); 116 | #endif 117 | } 118 | 119 | #endif // RISCV_VM_USE_SDL 120 | -------------------------------------------------------------------------------- /tests/coremark/coremark.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/riscv-vm/d0cd7a43bdd58af3d57c98a7614689a55a4b9a60/tests/coremark/coremark.elf -------------------------------------------------------------------------------- /tests/dhrystone/dhrystone.c: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #pragma GCC optimize ("no-inline") 4 | 5 | #include "dhrystone.h" 6 | 7 | #ifndef REG 8 | #define REG 9 | /* REG becomes defined as empty */ 10 | /* i.e. no register variables */ 11 | #else 12 | #undef REG 13 | #define REG register 14 | #endif 15 | 16 | extern int Int_Glob; 17 | extern char Ch_1_Glob; 18 | 19 | 20 | Proc_6 (Enum_Val_Par, Enum_Ref_Par) 21 | /*********************************/ 22 | /* executed once */ 23 | /* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */ 24 | 25 | Enumeration Enum_Val_Par; 26 | Enumeration *Enum_Ref_Par; 27 | { 28 | *Enum_Ref_Par = Enum_Val_Par; 29 | if (! Func_3 (Enum_Val_Par)) 30 | /* then, not executed */ 31 | *Enum_Ref_Par = Ident_4; 32 | switch (Enum_Val_Par) 33 | { 34 | case Ident_1: 35 | *Enum_Ref_Par = Ident_1; 36 | break; 37 | case Ident_2: 38 | if (Int_Glob > 100) 39 | /* then */ 40 | *Enum_Ref_Par = Ident_1; 41 | else *Enum_Ref_Par = Ident_4; 42 | break; 43 | case Ident_3: /* executed */ 44 | *Enum_Ref_Par = Ident_2; 45 | break; 46 | case Ident_4: break; 47 | case Ident_5: 48 | *Enum_Ref_Par = Ident_3; 49 | break; 50 | } /* switch */ 51 | } /* Proc_6 */ 52 | 53 | 54 | Proc_7 (Int_1_Par_Val, Int_2_Par_Val, Int_Par_Ref) 55 | /**********************************************/ 56 | /* executed three times */ 57 | /* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */ 58 | /* Int_Par_Ref becomes 7 */ 59 | /* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */ 60 | /* Int_Par_Ref becomes 17 */ 61 | /* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */ 62 | /* Int_Par_Ref becomes 18 */ 63 | One_Fifty Int_1_Par_Val; 64 | One_Fifty Int_2_Par_Val; 65 | One_Fifty *Int_Par_Ref; 66 | { 67 | One_Fifty Int_Loc; 68 | 69 | Int_Loc = Int_1_Par_Val + 2; 70 | *Int_Par_Ref = Int_2_Par_Val + Int_Loc; 71 | } /* Proc_7 */ 72 | 73 | 74 | Proc_8 (Arr_1_Par_Ref, Arr_2_Par_Ref, Int_1_Par_Val, Int_2_Par_Val) 75 | /*********************************************************************/ 76 | /* executed once */ 77 | /* Int_Par_Val_1 == 3 */ 78 | /* Int_Par_Val_2 == 7 */ 79 | Arr_1_Dim Arr_1_Par_Ref; 80 | Arr_2_Dim Arr_2_Par_Ref; 81 | int Int_1_Par_Val; 82 | int Int_2_Par_Val; 83 | { 84 | REG One_Fifty Int_Index; 85 | REG One_Fifty Int_Loc; 86 | 87 | Int_Loc = Int_1_Par_Val + 5; 88 | Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val; 89 | Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc]; 90 | Arr_1_Par_Ref [Int_Loc+30] = Int_Loc; 91 | for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index) 92 | Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc; 93 | Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1; 94 | Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc]; 95 | Int_Glob = 5; 96 | } /* Proc_8 */ 97 | 98 | 99 | Enumeration Func_1 (Ch_1_Par_Val, Ch_2_Par_Val) 100 | /*************************************************/ 101 | /* executed three times */ 102 | /* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */ 103 | /* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */ 104 | /* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */ 105 | 106 | Capital_Letter Ch_1_Par_Val; 107 | Capital_Letter Ch_2_Par_Val; 108 | { 109 | Capital_Letter Ch_1_Loc; 110 | Capital_Letter Ch_2_Loc; 111 | 112 | Ch_1_Loc = Ch_1_Par_Val; 113 | Ch_2_Loc = Ch_1_Loc; 114 | if (Ch_2_Loc != Ch_2_Par_Val) 115 | /* then, executed */ 116 | return (Ident_1); 117 | else /* not executed */ 118 | { 119 | Ch_1_Glob = Ch_1_Loc; 120 | return (Ident_2); 121 | } 122 | } /* Func_1 */ 123 | 124 | 125 | Boolean Func_2 (Str_1_Par_Ref, Str_2_Par_Ref) 126 | /*************************************************/ 127 | /* executed once */ 128 | /* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */ 129 | /* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */ 130 | 131 | Str_30 Str_1_Par_Ref; 132 | Str_30 Str_2_Par_Ref; 133 | { 134 | REG One_Thirty Int_Loc; 135 | Capital_Letter Ch_Loc; 136 | 137 | Int_Loc = 2; 138 | while (Int_Loc <= 2) /* loop body executed once */ 139 | if (Func_1 (Str_1_Par_Ref[Int_Loc], 140 | Str_2_Par_Ref[Int_Loc+1]) == Ident_1) 141 | /* then, executed */ 142 | { 143 | Ch_Loc = 'A'; 144 | Int_Loc += 1; 145 | } /* if, while */ 146 | if (Ch_Loc >= 'W' && Ch_Loc < 'Z') 147 | /* then, not executed */ 148 | Int_Loc = 7; 149 | if (Ch_Loc == 'R') 150 | /* then, not executed */ 151 | return (true); 152 | else /* executed */ 153 | { 154 | if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0) 155 | /* then, not executed */ 156 | { 157 | Int_Loc += 7; 158 | Int_Glob = Int_Loc; 159 | return (true); 160 | } 161 | else /* executed */ 162 | return (false); 163 | } /* if Ch_Loc */ 164 | } /* Func_2 */ 165 | 166 | 167 | Boolean Func_3 (Enum_Par_Val) 168 | /***************************/ 169 | /* executed once */ 170 | /* Enum_Par_Val == Ident_3 */ 171 | Enumeration Enum_Par_Val; 172 | { 173 | Enumeration Enum_Loc; 174 | 175 | Enum_Loc = Enum_Par_Val; 176 | if (Enum_Loc == Ident_3) 177 | /* then, executed */ 178 | return (true); 179 | else /* not executed */ 180 | return (false); 181 | } /* Func_3 */ 182 | -------------------------------------------------------------------------------- /tests/dhrystone/dhrystone.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/riscv-vm/d0cd7a43bdd58af3d57c98a7614689a55a4b9a60/tests/dhrystone/dhrystone.elf -------------------------------------------------------------------------------- /tests/dhrystone/dhrystone_main.c: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | //************************************************************************** 4 | // Dhrystone bencmark 5 | //-------------------------------------------------------------------------- 6 | // 7 | // This is the classic Dhrystone synthetic integer benchmark. 8 | // 9 | 10 | #pragma GCC optimize ("no-inline") 11 | 12 | #include "dhrystone.h" 13 | 14 | #if 1 15 | #define debug_printf(...) 16 | #else 17 | #define debug_printf printf 18 | #endif 19 | 20 | #include 21 | 22 | /* Global Variables: */ 23 | 24 | Rec_Pointer Ptr_Glob, 25 | Next_Ptr_Glob; 26 | int Int_Glob; 27 | Boolean Bool_Glob; 28 | char Ch_1_Glob, 29 | Ch_2_Glob; 30 | int Arr_1_Glob [50]; 31 | int Arr_2_Glob [50] [50]; 32 | 33 | Enumeration Func_1 (); 34 | /* forward declaration necessary since Enumeration may not simply be int */ 35 | 36 | #ifndef REG 37 | Boolean Reg = false; 38 | #define REG 39 | /* REG becomes defined as empty */ 40 | /* i.e. no register variables */ 41 | #else 42 | Boolean Reg = true; 43 | #undef REG 44 | #define REG register 45 | #endif 46 | 47 | Boolean Done; 48 | 49 | long Begin_Time, 50 | End_Time, 51 | User_Time; 52 | long Microseconds, 53 | Dhrystones_Per_Second; 54 | 55 | /* end of variables for time measurement */ 56 | 57 | 58 | int main (int argc, char** argv) 59 | /*****/ 60 | /* main program, corresponds to procedures */ 61 | /* Main and Proc_0 in the Ada version */ 62 | { 63 | One_Fifty Int_1_Loc; 64 | REG One_Fifty Int_2_Loc; 65 | One_Fifty Int_3_Loc; 66 | REG char Ch_Index; 67 | Enumeration Enum_Loc; 68 | Str_30 Str_1_Loc; 69 | Str_30 Str_2_Loc; 70 | REG int Run_Index; 71 | REG int Number_Of_Runs; 72 | 73 | /* Arguments */ 74 | Number_Of_Runs = NUMBER_OF_RUNS; 75 | 76 | /* Initializations */ 77 | 78 | Next_Ptr_Glob = (Rec_Pointer) alloca (sizeof (Rec_Type)); 79 | Ptr_Glob = (Rec_Pointer) alloca (sizeof (Rec_Type)); 80 | 81 | Ptr_Glob->Ptr_Comp = Next_Ptr_Glob; 82 | Ptr_Glob->Discr = Ident_1; 83 | Ptr_Glob->variant.var_1.Enum_Comp = Ident_3; 84 | Ptr_Glob->variant.var_1.Int_Comp = 40; 85 | strcpy (Ptr_Glob->variant.var_1.Str_Comp, 86 | "DHRYSTONE PROGRAM, SOME STRING"); 87 | strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING"); 88 | 89 | Arr_2_Glob [8][7] = 10; 90 | /* Was missing in published program. Without this statement, */ 91 | /* Arr_2_Glob [8][7] would have an undefined value. */ 92 | /* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */ 93 | /* overflow may occur for this array element. */ 94 | 95 | debug_printf("\n"); 96 | debug_printf("Dhrystone Benchmark, Version %s\n", Version); 97 | if (Reg) 98 | { 99 | debug_printf("Program compiled with 'register' attribute\n"); 100 | } 101 | else 102 | { 103 | debug_printf("Program compiled without 'register' attribute\n"); 104 | } 105 | debug_printf("Using %s, HZ=%d\n", CLOCK_TYPE, HZ); 106 | debug_printf("\n"); 107 | 108 | Done = false; 109 | while (!Done) { 110 | debug_printf("Trying %d runs through Dhrystone:\n", Number_Of_Runs); 111 | 112 | /***************/ 113 | /* Start timer */ 114 | /***************/ 115 | 116 | // setStats(1); 117 | Start_Timer(); 118 | 119 | for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index) 120 | { 121 | 122 | Proc_5(); 123 | Proc_4(); 124 | /* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */ 125 | Int_1_Loc = 2; 126 | Int_2_Loc = 3; 127 | strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING"); 128 | Enum_Loc = Ident_2; 129 | Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc); 130 | /* Bool_Glob == 1 */ 131 | while (Int_1_Loc < Int_2_Loc) /* loop body executed once */ 132 | { 133 | Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc; 134 | /* Int_3_Loc == 7 */ 135 | Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc); 136 | /* Int_3_Loc == 7 */ 137 | Int_1_Loc += 1; 138 | } /* while */ 139 | /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */ 140 | Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc); 141 | /* Int_Glob == 5 */ 142 | Proc_1 (Ptr_Glob); 143 | for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index) 144 | /* loop body executed twice */ 145 | { 146 | if (Enum_Loc == Func_1 (Ch_Index, 'C')) 147 | /* then, not executed */ 148 | { 149 | Proc_6 (Ident_1, &Enum_Loc); 150 | strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING"); 151 | Int_2_Loc = Run_Index; 152 | Int_Glob = Run_Index; 153 | } 154 | } 155 | /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */ 156 | Int_2_Loc = Int_2_Loc * Int_1_Loc; 157 | Int_1_Loc = Int_2_Loc / Int_3_Loc; 158 | Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc; 159 | /* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */ 160 | Proc_2 (&Int_1_Loc); 161 | /* Int_1_Loc == 5 */ 162 | 163 | } /* loop "for Run_Index" */ 164 | 165 | /**************/ 166 | /* Stop timer */ 167 | /**************/ 168 | 169 | Stop_Timer(); 170 | // setStats(0); 171 | 172 | User_Time = End_Time - Begin_Time; 173 | 174 | if (User_Time < Too_Small_Time) 175 | { 176 | printf("Measured time too small to obtain meaningful results\n"); 177 | Number_Of_Runs = Number_Of_Runs * 10; 178 | printf("\n"); 179 | } else Done = true; 180 | } 181 | 182 | debug_printf("Final values of the variables used in the benchmark:\n"); 183 | debug_printf("\n"); 184 | debug_printf("Int_Glob: %d\n", Int_Glob); 185 | debug_printf(" should be: %d\n", 5); 186 | debug_printf("Bool_Glob: %d\n", Bool_Glob); 187 | debug_printf(" should be: %d\n", 1); 188 | debug_printf("Ch_1_Glob: %c\n", Ch_1_Glob); 189 | debug_printf(" should be: %c\n", 'A'); 190 | debug_printf("Ch_2_Glob: %c\n", Ch_2_Glob); 191 | debug_printf(" should be: %c\n", 'B'); 192 | debug_printf("Arr_1_Glob[8]: %d\n", Arr_1_Glob[8]); 193 | debug_printf(" should be: %d\n", 7); 194 | debug_printf("Arr_2_Glob[8][7]: %d\n", Arr_2_Glob[8][7]); 195 | debug_printf(" should be: Number_Of_Runs + 10\n"); 196 | debug_printf("Ptr_Glob->\n"); 197 | debug_printf(" Ptr_Comp: %d\n", (long) Ptr_Glob->Ptr_Comp); 198 | debug_printf(" should be: (implementation-dependent)\n"); 199 | debug_printf(" Discr: %d\n", Ptr_Glob->Discr); 200 | debug_printf(" should be: %d\n", 0); 201 | debug_printf(" Enum_Comp: %d\n", Ptr_Glob->variant.var_1.Enum_Comp); 202 | debug_printf(" should be: %d\n", 2); 203 | debug_printf(" Int_Comp: %d\n", Ptr_Glob->variant.var_1.Int_Comp); 204 | debug_printf(" should be: %d\n", 17); 205 | debug_printf(" Str_Comp: %s\n", Ptr_Glob->variant.var_1.Str_Comp); 206 | debug_printf(" should be: DHRYSTONE PROGRAM, SOME STRING\n"); 207 | debug_printf("Next_Ptr_Glob->\n"); 208 | debug_printf(" Ptr_Comp: %d\n", (long) Next_Ptr_Glob->Ptr_Comp); 209 | debug_printf(" should be: (implementation-dependent), same as above\n"); 210 | debug_printf(" Discr: %d\n", Next_Ptr_Glob->Discr); 211 | debug_printf(" should be: %d\n", 0); 212 | debug_printf(" Enum_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp); 213 | debug_printf(" should be: %d\n", 1); 214 | debug_printf(" Int_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Int_Comp); 215 | debug_printf(" should be: %d\n", 18); 216 | debug_printf(" Str_Comp: %s\n", 217 | Next_Ptr_Glob->variant.var_1.Str_Comp); 218 | debug_printf(" should be: DHRYSTONE PROGRAM, SOME STRING\n"); 219 | debug_printf("Int_1_Loc: %d\n", Int_1_Loc); 220 | debug_printf(" should be: %d\n", 5); 221 | debug_printf("Int_2_Loc: %d\n", Int_2_Loc); 222 | debug_printf(" should be: %d\n", 13); 223 | debug_printf("Int_3_Loc: %d\n", Int_3_Loc); 224 | debug_printf(" should be: %d\n", 7); 225 | debug_printf("Enum_Loc: %d\n", Enum_Loc); 226 | debug_printf(" should be: %d\n", 1); 227 | debug_printf("Str_1_Loc: %s\n", Str_1_Loc); 228 | debug_printf(" should be: DHRYSTONE PROGRAM, 1'ST STRING\n"); 229 | debug_printf("Str_2_Loc: %s\n", Str_2_Loc); 230 | debug_printf(" should be: DHRYSTONE PROGRAM, 2'ND STRING\n"); 231 | debug_printf("\n"); 232 | 233 | 234 | Microseconds = ((User_Time / Number_Of_Runs) * Mic_secs_Per_Second) / HZ; 235 | Dhrystones_Per_Second = (HZ * Number_Of_Runs) / User_Time; 236 | 237 | printf("Microseconds for one run through Dhrystone: %ld\n", Microseconds); 238 | printf("Dhrystones per Second: %ld\n", Dhrystones_Per_Second); 239 | 240 | return 0; 241 | } 242 | 243 | 244 | Proc_1 (Ptr_Val_Par) 245 | /******************/ 246 | 247 | REG Rec_Pointer Ptr_Val_Par; 248 | /* executed once */ 249 | { 250 | REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp; 251 | /* == Ptr_Glob_Next */ 252 | /* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */ 253 | /* corresponds to "rename" in Ada, "with" in Pascal */ 254 | 255 | structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob); 256 | Ptr_Val_Par->variant.var_1.Int_Comp = 5; 257 | Next_Record->variant.var_1.Int_Comp 258 | = Ptr_Val_Par->variant.var_1.Int_Comp; 259 | Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp; 260 | Proc_3 (&Next_Record->Ptr_Comp); 261 | /* Ptr_Val_Par->Ptr_Comp->Ptr_Comp 262 | == Ptr_Glob->Ptr_Comp */ 263 | if (Next_Record->Discr == Ident_1) 264 | /* then, executed */ 265 | { 266 | Next_Record->variant.var_1.Int_Comp = 6; 267 | Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp, 268 | &Next_Record->variant.var_1.Enum_Comp); 269 | Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp; 270 | Proc_7 (Next_Record->variant.var_1.Int_Comp, 10, 271 | &Next_Record->variant.var_1.Int_Comp); 272 | } 273 | else /* not executed */ 274 | structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp); 275 | } /* Proc_1 */ 276 | 277 | 278 | Proc_2 (Int_Par_Ref) 279 | /******************/ 280 | /* executed once */ 281 | /* *Int_Par_Ref == 1, becomes 4 */ 282 | 283 | One_Fifty *Int_Par_Ref; 284 | { 285 | One_Fifty Int_Loc; 286 | Enumeration Enum_Loc; 287 | 288 | Int_Loc = *Int_Par_Ref + 10; 289 | do /* executed once */ 290 | if (Ch_1_Glob == 'A') 291 | /* then, executed */ 292 | { 293 | Int_Loc -= 1; 294 | *Int_Par_Ref = Int_Loc - Int_Glob; 295 | Enum_Loc = Ident_1; 296 | } /* if */ 297 | while (Enum_Loc != Ident_1); /* true */ 298 | } /* Proc_2 */ 299 | 300 | 301 | Proc_3 (Ptr_Ref_Par) 302 | /******************/ 303 | /* executed once */ 304 | /* Ptr_Ref_Par becomes Ptr_Glob */ 305 | 306 | Rec_Pointer *Ptr_Ref_Par; 307 | 308 | { 309 | if (Ptr_Glob != Null) 310 | /* then, executed */ 311 | *Ptr_Ref_Par = Ptr_Glob->Ptr_Comp; 312 | Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp); 313 | } /* Proc_3 */ 314 | 315 | 316 | Proc_4 () /* without parameters */ 317 | /*******/ 318 | /* executed once */ 319 | { 320 | Boolean Bool_Loc; 321 | 322 | Bool_Loc = Ch_1_Glob == 'A'; 323 | Bool_Glob = Bool_Loc | Bool_Glob; 324 | Ch_2_Glob = 'B'; 325 | } /* Proc_4 */ 326 | 327 | 328 | Proc_5 () /* without parameters */ 329 | /*******/ 330 | /* executed once */ 331 | { 332 | Ch_1_Glob = 'A'; 333 | Bool_Glob = false; 334 | } /* Proc_5 */ 335 | -------------------------------------------------------------------------------- /tests/doom/README.md: -------------------------------------------------------------------------------- 1 | # RISCV-VM DOOM 2 | 3 | This is a highly experimental build of DOOM for the riscv-vm emulator. 4 | It is an interesting proof of concept however the emulator runs too slowly to play it currently. 5 | While DOOM will serve as a great testbed for profiling it goes to show that some kind of dynrec will be needed in the future. 6 | 7 | Currently only rendering has been implemented and not any user input so you can only watch the recorded demo. 8 | I will surely add user input in time as I improve the emulator speed too. 9 | 10 | 11 | ### Prerequisites 12 | - SDL1.2 Library 13 | 14 | 15 | ### Build with SDL Instructions 16 | ``` 17 | mkdir build 18 | cd build 19 | cmake .. -DUSE_SDL 20 | make 21 | ``` 22 | 23 | 24 | ### Instructions 25 | - Build the `riscv-vm` project with SDL support. 26 | - Download the shareware `doom1.wad` file. 27 | - Place `doom1.wad` into the same folder as `doom.elf`. 28 | - From the `tests/doom` folder run `./riscv-vm doom.elf`. 29 | -------------------------------------------------------------------------------- /tests/doom/doom.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/riscv-vm/d0cd7a43bdd58af3d57c98a7614689a55a4b9a60/tests/doom/doom.elf -------------------------------------------------------------------------------- /tests/helloworld/helloworld: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/riscv-vm/d0cd7a43bdd58af3d57c98a7614689a55a4b9a60/tests/helloworld/helloworld -------------------------------------------------------------------------------- /tests/helloworld/main.c: -------------------------------------------------------------------------------- 1 | // build using: 2 | // riscv64-unknown-elf-gcc -march=rv32i -mabi=ilp32 main.c -o helloworld 3 | 4 | #include 5 | 6 | int main(int argc, const char **args) { 7 | printf("Hello World!\n"); 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /tests/linpack/bench.h: -------------------------------------------------------------------------------- 1 | /* 2 | * provide timing and utility functions. 3 | * 4 | * identifiers starting with bm_ are reserved 5 | */ 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include 12 | #include 13 | 14 | /* return the last result of TIME_CYCLES. */ 15 | #define GET_CYCLES 1 /* bm_get_cycles () */ 16 | 17 | /* output the result of type double, higher is better. Never returns. */ 18 | #define RESULT(val) bm_set_result (val) 19 | 20 | /* output an error and kill the program. Never returns. */ 21 | #define ERROR(msg) bm_error (msg) 22 | 23 | /* make sure this value is calculated and not optimized away. */ 24 | #define USE(value) bm_eat((int)(value)) 25 | 26 | /* return a zero without the compiler knowing it. */ 27 | #define ZERO bm_return_zero () 28 | 29 | /* tag the next result with the given string. */ 30 | #define TAG(s) bm_set_tag (s) 31 | 32 | #define TIME_CYCLES 33 | 34 | static int bm_return_zero (void) { return 0; } 35 | static void bm_eat (int val) { } 36 | static void bm_set_tag (const char *tag) { printf ("TAG %s\n", tag); } 37 | static void bm_error(char* msg) { printf("ERROR %s\n", msg); } 38 | static void bm_set_result(float val) { ; } 39 | static void bm_get_cycles() { ; } 40 | 41 | /* 42 | * declares the main function for you, use it like this: 43 | * BEGIN_MAIN 44 | * code; 45 | * more code; 46 | * END_MAIN 47 | */ 48 | #define BEGIN_MAIN \ 49 | int \ 50 | main (int argc, char **argv) \ 51 | { 52 | 53 | #define END_MAIN \ 54 | return 0; \ 55 | } 56 | 57 | /* 58 | * can be used instead of coding your own main(), if you 59 | * only have a single, simple test 60 | */ 61 | #define SINGLE_BENCHMARK(code) \ 62 | BEGIN_MAIN \ 63 | code; \ 64 | END_MAIN 65 | 66 | #ifdef __cplusplus 67 | } 68 | #endif 69 | 70 | -------------------------------------------------------------------------------- /tests/linpack/linpack.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/riscv-vm/d0cd7a43bdd58af3d57c98a7614689a55a4b9a60/tests/linpack/linpack.elf -------------------------------------------------------------------------------- /tests/mandelbrot/mandelbrot.c: -------------------------------------------------------------------------------- 1 | main(k){float i,j,r,x,y=-16;while(puts(""),y++<15)for(x 2 | =0;x++<84;putchar(" .:-;!/>)|&IH%*#"[k&15]))for(i=k=r=0; 3 | j=r*r-i*i-2+x/25,i=2*r*i+y/10,j*j+i*i<11&&k++<111;r=j);} 4 | -------------------------------------------------------------------------------- /tests/mandelbrot/mandelbrot.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/riscv-vm/d0cd7a43bdd58af3d57c98a7614689a55a4b9a60/tests/mandelbrot/mandelbrot.elf -------------------------------------------------------------------------------- /tests/multiply/dataset1.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | 4 | #define DATA_SIZE 100 5 | 6 | int input_data1[DATA_SIZE] = 7 | { 8 | 41, 454, 833, 335, 564, 1, 187, 989, 749, 365, 350, 572, 132, 64, 949, 153, 584, 216, 805, 140, 9 | 621, 210, 6, 572, 931, 339, 890, 593, 392, 898, 694, 228, 961, 12, 110, 883, 116, 750, 296, 646, 10 | 426, 500, 314, 436, 659, 701, 774, 812, 319, 981, 678, 150, 875, 696, 376, 564, 474, 272, 938, 258, 11 | 539, 647, 569, 509, 203, 88, 280, 703, 759, 669, 606, 375, 511, 551, 657, 936, 195, 592, 81, 569, 12 | 267, 952, 229, 800, 337, 584, 944, 643, 902, 368, 241, 489, 913, 328, 826, 313, 933, 592, 985, 388 13 | }; 14 | 15 | int input_data2[DATA_SIZE] = 16 | { 17 | 195, 543, 960, 649, 566, 979, 350, 997, 649, 814, 657, 79, 181, 208, 111, 998, 859, 629, 65, 847, 18 | 288, 704, 349, 997, 141, 253, 905, 715, 886, 430, 264, 415, 576, 538, 979, 700, 761, 4, 241, 494, 19 | 478, 100, 499, 864, 403, 693, 222, 416, 444, 296, 721, 285, 676, 620, 317, 78, 224, 351, 937, 540, 20 | 288, 646, 119, 169, 615, 527, 606, 289, 389, 796, 351, 801, 455, 720, 278, 758, 367, 745, 358, 92, 21 | 584, 989, 62, 271, 985, 853, 403, 788, 346, 531, 517, 222, 559, 461, 908, 241, 775, 358, 255, 332 22 | }; 23 | 24 | int verify_data[DATA_SIZE] = 25 | { 26 | 7995, 246522, 799680, 217415, 319224, 979, 65450, 986033, 486101, 297110, 229950, 45188, 23892, 13312, 105339, 152694, 501656, 135864, 52325, 118580, 27 | 178848, 147840, 2094, 570284, 131271, 85767, 805450, 423995, 347312, 386140, 183216, 94620, 553536, 6456, 107690, 618100, 88276, 3000, 71336, 319124, 28 | 203628, 50000, 156686, 376704, 265577, 485793, 171828, 337792, 141636, 290376, 488838, 42750, 591500, 431520, 119192, 43992, 106176, 95472, 878906, 139320, 29 | 155232, 417962, 67711, 86021, 124845, 46376, 169680, 203167, 295251, 532524, 212706, 300375, 232505, 396720, 182646, 709488, 71565, 441040, 28998, 52348, 30 | 155928, 941528, 14198, 216800, 331945, 498152, 380432, 506684, 312092, 195408, 124597, 108558, 510367, 151208, 750008, 75433, 723075, 211936, 251175, 128816 31 | }; 32 | 33 | -------------------------------------------------------------------------------- /tests/multiply/multiply.c: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | // ************************************************************************* 4 | // multiply filter bencmark 5 | // ------------------------------------------------------------------------- 6 | // 7 | // This benchmark tests the software multiply implemenation. The 8 | // input data (and reference data) should be generated using the 9 | // multiply_gendata.pl perl script and dumped to a file named 10 | // dataset1.h 11 | 12 | static int multiply( int x, int y ) 13 | { 14 | 15 | int i; 16 | int result = 0; 17 | 18 | for (i = 0; i < 32; i++) { 19 | if ((x & 0x1) == 1) 20 | result = result + y; 21 | 22 | x = x >> 1; 23 | y = y << 1; 24 | } 25 | 26 | return result; 27 | 28 | } 29 | 30 | static int verify(int n, const volatile int* test, const int* verify) 31 | { 32 | int i; 33 | // Unrolled for faster verification 34 | for (i = 0; i < n/2*2; i+=2) 35 | { 36 | int t0 = test[i], t1 = test[i+1]; 37 | int v0 = verify[i], v1 = verify[i+1]; 38 | if (t0 != v0) return i+1; 39 | if (t1 != v1) return i+2; 40 | } 41 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 42 | return n; 43 | return 0; 44 | } 45 | 46 | //-------------------------------------------------------------------------- 47 | // Input/Reference Data 48 | 49 | #include "dataset1.h" 50 | 51 | //-------------------------------------------------------------------------- 52 | // Main 53 | 54 | int main( int argc, char* argv[] ) 55 | { 56 | int i; 57 | int results_data[DATA_SIZE]; 58 | 59 | #if PREALLOCATE 60 | for (i = 0; i < DATA_SIZE; i++) 61 | { 62 | results_data[i] = multiply( input_data1[i], input_data2[i] ); 63 | } 64 | #endif 65 | 66 | // setStats(1); 67 | for (i = 0; i < DATA_SIZE; i++) 68 | { 69 | results_data[i] = multiply( input_data1[i], input_data2[i] ); 70 | } 71 | // setStats(0); 72 | 73 | // Check the results 74 | return verify( DATA_SIZE, results_data, verify_data ); 75 | } 76 | -------------------------------------------------------------------------------- /tests/multiply/multiply.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/riscv-vm/d0cd7a43bdd58af3d57c98a7614689a55a4b9a60/tests/multiply/multiply.elf -------------------------------------------------------------------------------- /tests/pi/pi.c: -------------------------------------------------------------------------------- 1 | /*--- pi.c PROGRAM RANPI 2 | * 3 | * Program to compute PI by probability. 4 | * By Mark Riordan 24-DEC-1986; 5 | * Original version apparently by Don Shull. 6 | * To be used as a CPU benchmark. 7 | * 8 | * Translated to C from FORTRAN 20 Nov 1993 9 | */ 10 | #include 11 | 12 | void 13 | myadd(float *sum,float *addend); 14 | 15 | int 16 | main() { 17 | float ztot, yran, ymult, ymod, x, y, z, pi, prod; 18 | long int low, ixran, itot, j, iprod; 19 | 20 | printf("Starting PI...\n"); 21 | ztot = 0.0; 22 | low = 1; 23 | ixran = 1907; 24 | yran = 5813.0; 25 | ymult = 1307.0; 26 | ymod = 5471.0; 27 | itot = 1200000; 28 | 29 | for(j=1; j<=itot; j++) { 30 | 31 | /* 32 | c X and Y are two uniform random numbers between 0 and 1. 33 | c They are computed using two linear congruential generators. 34 | c A mix of integer and real arithmetic is used to simulate a 35 | c real program. Magnitudes are kept small to prevent 32-bit 36 | c integer overflow and to allow full precision even with a 23-bit 37 | c mantissa. 38 | */ 39 | 40 | iprod = 27611 * ixran; 41 | ixran = iprod - 74383*(long int)(iprod/74383); 42 | x = (float)ixran / 74383.0; 43 | prod = ymult * yran; 44 | yran = (prod - ymod*(long int)(prod/ymod)); 45 | y = yran / ymod; 46 | z = x*x + y*y; 47 | myadd(&ztot,&z); 48 | if ( z <= 1.0 ) { 49 | low = low + 1; 50 | } 51 | } 52 | printf(" x=%8.5f y=%8.5f low=%7d j=%7d\n",x,y,low,j); 53 | pi = 4.0 * (float)low/(float)itot; 54 | printf("Pi = %9.6f ztot=%12.2f itot=%8d\n",pi,ztot,itot); 55 | 56 | return 0; 57 | } 58 | 59 | void 60 | myadd(float *sum,float *addend) { 61 | 62 | /* 63 | c Simple adding subroutine thrown in to allow subroutine 64 | c calls/returns to be factored in as part of the benchmark. 65 | */ 66 | *sum = *sum + *addend; 67 | } 68 | -------------------------------------------------------------------------------- /tests/pi/pi.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/riscv-vm/d0cd7a43bdd58af3d57c98a7614689a55a4b9a60/tests/pi/pi.elf -------------------------------------------------------------------------------- /tests/puzzle/bench.h: -------------------------------------------------------------------------------- 1 | /* 2 | * provide timing and utility functions. 3 | * 4 | * identifiers starting with bm_ are reserved 5 | */ 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include 12 | #include 13 | 14 | /* return the last result of TIME_CYCLES. */ 15 | #define GET_CYCLES 1 /* bm_get_cycles () */ 16 | 17 | /* output the result of type double, higher is better. Never returns. */ 18 | #define RESULT(val) bm_set_result (val) 19 | 20 | /* output an error and kill the program. Never returns. */ 21 | #define ERROR(msg) bm_error (msg) 22 | 23 | /* make sure this value is calculated and not optimized away. */ 24 | #define USE(value) bm_eat((int)(value)) 25 | 26 | /* return a zero without the compiler knowing it. */ 27 | #define ZERO bm_return_zero () 28 | 29 | /* tag the next result with the given string. */ 30 | #define TAG(s) bm_set_tag (s) 31 | 32 | #define TIME_CYCLES 33 | 34 | static int bm_return_zero (void) { return 0; } 35 | static void bm_eat (int val) { } 36 | static void bm_set_tag (const char *tag) { printf ("TAG %s\n", tag); } 37 | static void bm_error(char* msg) { printf("ERROR %s\n", msg); } 38 | static void bm_set_result(float val) { ; } 39 | static void bm_get_cycles() { ; } 40 | 41 | /* 42 | * declares the main function for you, use it like this: 43 | * BEGIN_MAIN 44 | * code; 45 | * more code; 46 | * END_MAIN 47 | */ 48 | #define BEGIN_MAIN \ 49 | int \ 50 | main (int argc, char **argv) \ 51 | { 52 | 53 | #define END_MAIN \ 54 | return 0; \ 55 | } 56 | 57 | /* 58 | * can be used instead of coding your own main(), if you 59 | * only have a single, simple test 60 | */ 61 | #define SINGLE_BENCHMARK(code) \ 62 | BEGIN_MAIN \ 63 | code; \ 64 | END_MAIN 65 | 66 | #ifdef __cplusplus 67 | } 68 | #endif 69 | 70 | -------------------------------------------------------------------------------- /tests/puzzle/puzzle.c: -------------------------------------------------------------------------------- 1 | /* $Id: puzzle.c,v 1.2 1996/01/16 14:17:04 chris Exp $ */ 2 | #define size 511 3 | #define classmax 3 4 | #define typemax 12 5 | #define d 8 6 | #define true 1 7 | #define false 0 8 | 9 | #include "bench.h" 10 | 11 | /************************************************************* 12 | * puzzle.c 13 | * The standard puzzle benchmark 14 | */ 15 | 16 | char piececount[classmax + 1]; 17 | char class[typemax+1]; 18 | short piecemax[typemax+1]; 19 | char puzzle[size+1]; 20 | char p[typemax+1][size+1]; 21 | 22 | short m,n; 23 | int kount; 24 | 25 | int fit(i,j) 26 | int i,j; 27 | { 28 | short k; 29 | 30 | for (k = 0; k <= piecemax[i]; k++) 31 | if (p[i][k]) if (puzzle[j+k]) return (false); 32 | return(true); 33 | } 34 | 35 | int place(i,j) 36 | int i,j; 37 | { 38 | short k; 39 | for (k = 0; k <= piecemax[i]; k++) 40 | if (p[i][k]) puzzle[j+k] = true; 41 | piececount[class[i]] = piececount[class[i]] - 1; 42 | for (k = j; k <= size; k++) 43 | if ( ! puzzle[k] ) return(k); 44 | /*printf("puzzle filled\n");*/; 45 | return(0); 46 | } 47 | 48 | void remove_(int i, int j) 49 | { 50 | short k; 51 | 52 | for (k = 0; k <= piecemax[i]; k++) 53 | if (p[i][k]) puzzle[j+k] = false; 54 | piececount[class[i]] = piececount[class[i]] + 1; 55 | } 56 | 57 | int trial(j) 58 | int j; 59 | { 60 | int i,k; 61 | char trialval; 62 | 63 | for (i = 0; i <= typemax; i++) { 64 | if (piececount[class[i]]) 65 | if (fit(i,j)) { 66 | k = place(i,j); 67 | if (trial(k) || (k == 0)) { 68 | /*printf("piece %d at %d\n", i + 1, k + 1);*/; 69 | kount = kount + 1; 70 | return(true); 71 | } else 72 | remove_(i, j); 73 | } 74 | } 75 | kount = kount + 1; 76 | return(false); 77 | } 78 | 79 | void puzzle_(void) 80 | { 81 | int i,j,k; 82 | 83 | for (m = 0; m <= size; m++) 84 | puzzle[m] = 1; 85 | for (i = 1; i <= 5; i++) 86 | for (j = 1; j <= 5; j++) 87 | for (k = 1; k <= 5; k++) 88 | puzzle[i+8*(j+8*k)] = 0; 89 | for (i = 0; i <= 12; i++) 90 | for (m = 0; m <= 511; m++) 91 | p[i][m] = 0; 92 | for (i = 0; i <= 3; i++) 93 | for (j = 0; j <= 1; j++) 94 | for (k = 0; k <= 0; k++) 95 | p[0][i+8*(j+8*k)] = 1; 96 | class[0] = 0; 97 | piecemax[0] = 3 + 8 * 1 + 8 * 8 * 0; 98 | for (i = 0; i <= 1; i++) 99 | for (j = 0; j <= 0; j++) 100 | for (k = 0; k <= 3; k++) 101 | p[1][i+8*(j+8*k)] = 1; 102 | class[1] = 0; 103 | piecemax[1] = 1 + 8 * 0 + 8 * 8 * 3; 104 | for (i = 0; i <= 0; i++) 105 | for (j = 0; j <= 3; j++) 106 | for (k = 0; k <= 1; k++) 107 | p[2][i+8*(j+8*k)] = 1; 108 | class[2] = 0; 109 | piecemax[2] = 0 + 8 * 3 + 8 * 8 * 1; 110 | for (i = 0; i <= 1; i++) 111 | for (j = 0; j <= 3; j++) 112 | for (k = 0; k <= 0; k++) 113 | p[3][i+8*(j+8*k)] = 1; 114 | class[3] = 0; 115 | piecemax[3] = 1 + 8 * 3 + 8 * 8 * 0; 116 | for (i = 0; i <= 3; i++) 117 | for (j = 0; j <= 0; j++) 118 | for (k = 0; k <= 1; k++) 119 | p[4][i+8*(j+8*k)] = 1; 120 | class[4] = 0; 121 | piecemax[4] = 3 + 8 * 0 + 8 * 8 * 1; 122 | for (i = 0; i <= 0; i++) 123 | for (j = 0; j <= 1; j++) 124 | for (k = 0; k <= 3; k++) 125 | p[5][i+8*(j+8*k)] = 1; 126 | class[5] = 0; 127 | piecemax[5] = 0 + 8 * 1 + 8 * 8 * 3; 128 | for (i = 0; i <= 2; i++) 129 | for (j = 0; j <= 0; j++) 130 | for (k = 0; k <= 0; k++) 131 | p[6][i+8*(j+8*k)] = 1; 132 | class[6] = 1; 133 | piecemax[6] = 2 + 8 * 0 + 8 * 8 * 0; 134 | for (i = 0; i <= 0; i++) 135 | for (j = 0; j <= 2; j++) 136 | for (k = 0; k <= 0; k++) 137 | p[7][i+8*(j+8*k)] = 1; 138 | class[7] = 1; 139 | piecemax[7] = 0 + 8 * 2 + 8 * 8 * 0; 140 | for (i = 0; i <= 0; i++) 141 | for (j = 0; j <= 0; j++) 142 | for (k = 0; k <= 2; k++) 143 | p[8][i+8*(j+8*k)] = 1; 144 | class[8] = 1; 145 | piecemax[8] = 0 + 8 * 0 + 8 * 8 * 2; 146 | for (i = 0; i <= 1; i++) 147 | for (j = 0; j <= 1; j++) 148 | for (k = 0; k <= 0; k++) 149 | p[9][i+8*(j+8*k)] = 1; 150 | class[9] = 2; 151 | piecemax[9] = 1 + 8 * 1 + 8 * 8 * 0; 152 | for (i = 0; i <= 1; i++) 153 | for (j = 0; j <= 0; j++) 154 | for (k = 0; k <= 1; k++) 155 | p[10][i+8*(j+8*k)] = 1; 156 | class[10] = 2; 157 | piecemax[10] = 1 + 8 * 0 + 8 * 8 * 1; 158 | for (i = 0; i <= 0; i++) 159 | for (j = 0; j <= 1; j++) 160 | for (k = 0; k <= 1; k++) 161 | p[11][i+8*(j+8*k)] = 1; 162 | class[11] = 2; 163 | piecemax[11] = 0 + 8 * 1 + 8 * 8 * 1; 164 | for (i = 0; i <= 1; i++) 165 | for (j = 0; j <= 1; j++) 166 | for (k = 0; k <= 1; k++) 167 | p[12][i+8*(j+8*k)] = 1; 168 | class[12] = 3; 169 | piecemax[12] = 1 + 8 * 1 + 8 * 8 * 1; 170 | piececount[0] = 13; 171 | piececount[1] = 3; 172 | piececount[2] = 1; 173 | piececount[3] = 1; 174 | m = 1 + 8 * (1 + 8 * 1); 175 | kount = 0; 176 | if (fit(0,m)) 177 | n = place(0,m); 178 | else { 179 | printf("error 1\n"); 180 | } 181 | if (trial(n)) { 182 | printf("success in %d trials\n", kount); 183 | } else { 184 | printf("failure\n"); 185 | } 186 | } 187 | 188 | SINGLE_BENCHMARK(puzzle_()) 189 | -------------------------------------------------------------------------------- /tests/puzzle/puzzle.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/riscv-vm/d0cd7a43bdd58af3d57c98a7614689a55a4b9a60/tests/puzzle/puzzle.elf -------------------------------------------------------------------------------- /tests/quake/quake_320.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/riscv-vm/d0cd7a43bdd58af3d57c98a7614689a55a4b9a60/tests/quake/quake_320.elf -------------------------------------------------------------------------------- /tests/quake/quake_640.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/riscv-vm/d0cd7a43bdd58af3d57c98a7614689a55a4b9a60/tests/quake/quake_640.elf -------------------------------------------------------------------------------- /tests/rsort/rsort.c: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | //************************************************************************** 4 | // Quicksort benchmark 5 | //-------------------------------------------------------------------------- 6 | // 7 | // This benchmark uses quicksort to sort an array of integers. The 8 | // implementation is largely adapted from Numerical Recipes for C. The 9 | // input data (and reference data) should be generated using the 10 | // qsort_gendata.pl perl script and dumped to a file named 11 | // dataset1.h 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 19 | 20 | static int verify(int n, const volatile int* test, const int* verify) 21 | { 22 | int i; 23 | // Unrolled for faster verification 24 | for (i = 0; i < n/2*2; i+=2) 25 | { 26 | int t0 = test[i], t1 = test[i+1]; 27 | int v0 = verify[i], v1 = verify[i+1]; 28 | if (t0 != v0) return i+1; 29 | if (t1 != v1) return i+2; 30 | } 31 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 32 | return n; 33 | return 0; 34 | } 35 | 36 | //-------------------------------------------------------------------------- 37 | // Input/Reference Data 38 | 39 | #define type unsigned int 40 | #include "dataset1.h" 41 | 42 | #define LOG_BASE 8 43 | #define BASE (1 << LOG_BASE) 44 | 45 | #if 0 46 | # define fetch_add(ptr, inc) __sync_fetch_and_add(ptr, inc) 47 | #else 48 | # define fetch_add(ptr, inc) ((*(ptr) += (inc)) - (inc)) 49 | #endif 50 | 51 | void sort(size_t n, type* arrIn, type* scratchIn) 52 | { 53 | size_t log_exp = 0; 54 | size_t buckets[BASE]; 55 | size_t *bucket = buckets; 56 | asm("":"+r"(bucket)); 57 | type *arr = arrIn, *scratch = scratchIn, *p; 58 | size_t *b; 59 | 60 | while (log_exp < CHAR_BIT * sizeof(type)) 61 | { 62 | for (b = bucket; b < bucket + BASE; b++) 63 | *b = 0; 64 | 65 | for (p = arr; p < &arr[n-3]; p += 4) 66 | { 67 | type a0 = p[0]; 68 | type a1 = p[1]; 69 | type a2 = p[2]; 70 | type a3 = p[3]; 71 | fetch_add(&bucket[(a0 >> log_exp) % BASE], 1); 72 | fetch_add(&bucket[(a1 >> log_exp) % BASE], 1); 73 | fetch_add(&bucket[(a2 >> log_exp) % BASE], 1); 74 | fetch_add(&bucket[(a3 >> log_exp) % BASE], 1); 75 | } 76 | for ( ; p < &arr[n]; p++) 77 | bucket[(*p >> log_exp) % BASE]++; 78 | 79 | size_t prev = bucket[0]; 80 | prev += fetch_add(&bucket[1], prev); 81 | for (b = &bucket[2]; b < bucket + BASE; b += 2) 82 | { 83 | prev += fetch_add(&b[0], prev); 84 | prev += fetch_add(&b[1], prev); 85 | } 86 | static_assert(BASE % 2 == 0); 87 | 88 | for (p = &arr[n-1]; p >= &arr[3]; p -= 4) 89 | { 90 | type a0 = p[-0]; 91 | type a1 = p[-1]; 92 | type a2 = p[-2]; 93 | type a3 = p[-3]; 94 | size_t* pb0 = &bucket[(a0 >> log_exp) % BASE]; 95 | size_t* pb1 = &bucket[(a1 >> log_exp) % BASE]; 96 | size_t* pb2 = &bucket[(a2 >> log_exp) % BASE]; 97 | size_t* pb3 = &bucket[(a3 >> log_exp) % BASE]; 98 | type* s0 = scratch + fetch_add(pb0, -1); 99 | type* s1 = scratch + fetch_add(pb1, -1); 100 | type* s2 = scratch + fetch_add(pb2, -1); 101 | type* s3 = scratch + fetch_add(pb3, -1); 102 | s0[-1] = a0; 103 | s1[-1] = a1; 104 | s2[-1] = a2; 105 | s3[-1] = a3; 106 | } 107 | for ( ; p >= &arr[0]; p--) 108 | scratch[--bucket[(*p >> log_exp) % BASE]] = *p; 109 | 110 | type* tmp = arr; 111 | arr = scratch; 112 | scratch = tmp; 113 | 114 | log_exp += LOG_BASE; 115 | } 116 | if (arr != arrIn) 117 | memcpy(arr, scratch, n*sizeof(type)); 118 | } 119 | 120 | //-------------------------------------------------------------------------- 121 | // Main 122 | 123 | int main( int argc, char* argv[] ) 124 | { 125 | static type scratch[DATA_SIZE]; 126 | 127 | #if PREALLOCATE 128 | // If needed we preallocate everything in the caches 129 | sort(DATA_SIZE, verify_data, scratch); 130 | if (verify(DATA_SIZE, input_data, input_data)) 131 | return 1; 132 | #endif 133 | 134 | // Do the sort 135 | // setStats(1); 136 | sort(DATA_SIZE, input_data, scratch); 137 | // setStats(0); 138 | 139 | // Check the results 140 | return verify( DATA_SIZE, input_data, verify_data ); 141 | } 142 | -------------------------------------------------------------------------------- /tests/rsort/rsort.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/riscv-vm/d0cd7a43bdd58af3d57c98a7614689a55a4b9a60/tests/rsort/rsort.elf -------------------------------------------------------------------------------- /tests/smallpt/smallpt.cpp: -------------------------------------------------------------------------------- 1 | #include // smallpt, a Path Tracer by Kevin Beason, 2008 2 | #include // Make : g++ -O3 -fopenmp smallpt.cpp -o smallpt 3 | #include // Remove "-fopenmp" for g++ version < 4.2 4 | 5 | typedef float type_t; 6 | 7 | struct Vec { // Usage: time ./smallpt 5000 && xv image.ppm 8 | type_t x, y, z; // position, also color (r,g,b) 9 | Vec(type_t x_=0, type_t y_=0, type_t z_=0){ x=x_; y=y_; z=z_; } 10 | Vec operator+(const Vec &b) const { return Vec(x+b.x,y+b.y,z+b.z); } 11 | Vec operator-(const Vec &b) const { return Vec(x-b.x,y-b.y,z-b.z); } 12 | Vec operator*(type_t b) const { return Vec(x*b,y*b,z*b); } 13 | Vec mult(const Vec &b) const { return Vec(x*b.x,y*b.y,z*b.z); } 14 | Vec& norm(){ return *this = *this * (1/sqrt(x*x+y*y+z*z)); } 15 | type_t dot(const Vec &b) const { return x*b.x+y*b.y+z*b.z; } // cross: 16 | Vec operator%(Vec&b){return Vec(y*b.z-z*b.y,z*b.x-x*b.z,x*b.y-y*b.x);} 17 | }; 18 | struct Ray { Vec o, d; Ray(Vec o_, Vec d_) : o(o_), d(d_) {} }; 19 | enum Refl_t { DIFF, SPEC, REFR }; // material types, used in radiance() 20 | struct Sphere { 21 | type_t rad; // radius 22 | Vec p, e, c; // position, emission, color 23 | Refl_t refl; // reflection type (DIFFuse, SPECular, REFRactive) 24 | Sphere(type_t rad_, Vec p_, Vec e_, Vec c_, Refl_t refl_): 25 | rad(rad_), p(p_), e(e_), c(c_), refl(refl_) {} 26 | type_t intersect(const Ray &r) const { // returns distance, 0 if nohit 27 | Vec op = p-r.o; // Solve t^2*d.d + 2*t*(o-p).d + (o-p).(o-p)-R^2 = 0 28 | type_t t, eps=1e-2, b=op.dot(r.d), det=b*b-op.dot(op)+rad*rad; 29 | if (det<0) return 0; else det=sqrt(det); 30 | return (t=b-det)>eps ? t : ((t=b+det)>eps ? t : 0); 31 | } 32 | }; 33 | Sphere spheres[] = {//Scene: radius, position, emission, color, material 34 | Sphere(1e4, Vec( 1e4+1,40.8,81.6), Vec(),Vec(.75,.25,.25),DIFF),//Left 35 | Sphere(1e4, Vec(-1e4+99,40.8,81.6),Vec(),Vec(.25,.25,.75),DIFF),//Rght 36 | Sphere(1e4, Vec(50,40.8, 1e4), Vec(),Vec(.75,.75,.75),DIFF),//Back 37 | Sphere(1e4, Vec(50,40.8,-1e4+170), Vec(),Vec(), DIFF),//Frnt 38 | Sphere(1e4, Vec(50, 1e4, 81.6), Vec(),Vec(.75,.75,.75),DIFF),//Botm 39 | Sphere(1e4, Vec(50,-1e4+81.6,81.6),Vec(),Vec(.75,.75,.75),DIFF),//Top 40 | Sphere(16.5,Vec(27,16.5,47), Vec(),Vec(1,1,1)*.999, SPEC),//Mirr 41 | Sphere(16.5,Vec(73,16.5,78), Vec(),Vec(1,1,1)*.999, REFR),//Glas 42 | Sphere(600, Vec(50,681.6-.27,81.6),Vec(12,12,12), Vec(), DIFF) //Lite 43 | }; 44 | inline type_t clamp(type_t x){ return x<0 ? 0 : x>1 ? 1 : x; } 45 | inline int toInt(type_t x){ return int(pow(clamp(x),1/2.2)*255+.5); } 46 | inline bool intersect(const Ray &r, type_t &t, int &id){ 47 | type_t n=sizeof(spheres)/sizeof(Sphere), d, inf=t=1e20; 48 | for(int i=int(n);i--;) if((d=spheres[i].intersect(r))&&df.y && f.x>f.z ? f.x : f.y>f.z ? f.y : f.z; // max refl 58 | if (++depth>5) if (erand48(Xi).1?Vec(0,1):Vec(1))%w).norm(), v=w%u; 62 | Vec d = (u*cos(r1)*r2s + v*sin(r1)*r2s + w*sqrt(1-r2)).norm(); 63 | return obj.e + f.mult(radiance(Ray(x,d),depth,Xi)); 64 | } else if (obj.refl == SPEC) // Ideal SPECULAR reflection 65 | return obj.e + f.mult(radiance(Ray(x,r.d-n*2*n.dot(r.d)),depth,Xi)); 66 | Ray reflRay(x, r.d-n*2*n.dot(r.d)); // Ideal dielectric REFRACTION 67 | bool into = n.dot(nl)>0; // Ray from outside going in? 68 | type_t nc=1, nt=1.5, nnt=into?nc/nt:nt/nc, ddn=r.d.dot(nl), cos2t; 69 | if ((cos2t=1-nnt*nnt*(1-ddn*ddn))<0) // Total internal reflection 70 | return obj.e + f.mult(radiance(reflRay,depth,Xi)); 71 | Vec tdir = (r.d*nnt - n*((into?1:-1)*(ddn*nnt+sqrt(cos2t)))).norm(); 72 | type_t a=nt-nc, b=nt+nc, R0=a*a/(b*b), c = 1-(into?-ddn:tdir.dot(n)); 73 | type_t Re=R0+(1-R0)*c*c*c*c*c,Tr=1-Re,P=.25+.5*Re,RP=Re/P,TP=Tr/(1-P); 74 | return obj.e + f.mult(depth>2 ? (erand48(Xi)

size; 45 | } 46 | 47 | void list_init( struct List* list ) 48 | { 49 | list->size = 0; 50 | list->head = 0; 51 | } 52 | 53 | void list_push( struct List* list, int val ) 54 | { 55 | struct Node* newNode; 56 | 57 | // Pop the next free node off the free list 58 | newNode = g_nodeFreeList.head; 59 | g_nodeFreeList.head = g_nodeFreeList.head->next; 60 | 61 | // Push the new node onto the given list 62 | newNode->next = list->head; 63 | list->head = newNode; 64 | 65 | // Assign the value 66 | list->head->val = val; 67 | 68 | // Increment size 69 | list->size++; 70 | 71 | } 72 | 73 | int list_pop( struct List* list ) 74 | { 75 | struct Node* freedNode; 76 | int val; 77 | 78 | // Get the value from the->head of given list 79 | val = list->head->val; 80 | 81 | // Pop the head node off the given list 82 | freedNode = list->head; 83 | list->head = list->head->next; 84 | 85 | // Push the freed node onto the free list 86 | freedNode->next = g_nodeFreeList.head; 87 | g_nodeFreeList.head = freedNode; 88 | 89 | // Decrement size 90 | list->size--; 91 | 92 | return val; 93 | } 94 | 95 | void list_clear( struct List* list ) 96 | { 97 | while ( list_getSize(list) > 0 ) 98 | list_pop(list); 99 | } 100 | 101 | //-------------------------------------------------------------------------- 102 | // Tower data structure and functions 103 | 104 | struct Towers 105 | { 106 | int numDiscs; 107 | int numMoves; 108 | struct List pegA; 109 | struct List pegB; 110 | struct List pegC; 111 | }; 112 | 113 | void towers_init( struct Towers* this, int n ) 114 | { 115 | int i; 116 | 117 | this->numDiscs = n; 118 | this->numMoves = 0; 119 | 120 | list_init( &(this->pegA) ); 121 | list_init( &(this->pegB) ); 122 | list_init( &(this->pegC) ); 123 | 124 | for ( i = 0; i < n; i++ ) 125 | list_push( &(this->pegA), n-i ); 126 | 127 | } 128 | 129 | void towers_clear( struct Towers* this ) 130 | { 131 | 132 | list_clear( &(this->pegA) ); 133 | list_clear( &(this->pegB) ); 134 | list_clear( &(this->pegC) ); 135 | 136 | towers_init( this, this->numDiscs ); 137 | 138 | } 139 | 140 | void towers_solve_h( struct Towers* this, int n, 141 | struct List* startPeg, 142 | struct List* tempPeg, 143 | struct List* destPeg ) 144 | { 145 | int val; 146 | 147 | if ( n == 1 ) { 148 | val = list_pop(startPeg); 149 | list_push(destPeg,val); 150 | this->numMoves++; 151 | } 152 | else { 153 | towers_solve_h( this, n-1, startPeg, destPeg, tempPeg ); 154 | towers_solve_h( this, 1, startPeg, tempPeg, destPeg ); 155 | towers_solve_h( this, n-1, tempPeg, startPeg, destPeg ); 156 | } 157 | 158 | } 159 | 160 | void towers_solve( struct Towers* this ) 161 | { 162 | towers_solve_h( this, this->numDiscs, &(this->pegA), &(this->pegB), &(this->pegC) ); 163 | } 164 | 165 | int towers_verify( struct Towers* this ) 166 | { 167 | struct Node* ptr; 168 | int numDiscs = 0; 169 | 170 | if ( list_getSize(&this->pegA) != 0 ) { 171 | return 2; 172 | } 173 | 174 | if ( list_getSize(&this->pegB) != 0 ) { 175 | return 3; 176 | } 177 | 178 | if ( list_getSize(&this->pegC) != this->numDiscs ) { 179 | return 4; 180 | } 181 | 182 | for ( ptr = this->pegC.head; ptr != 0; ptr = ptr->next ) { 183 | numDiscs++; 184 | if ( ptr->val != numDiscs ) { 185 | return 5; 186 | } 187 | } 188 | 189 | if ( this->numMoves != ((1 << this->numDiscs) - 1) ) { 190 | return 6; 191 | } 192 | 193 | return 0; 194 | } 195 | 196 | //-------------------------------------------------------------------------- 197 | // Main 198 | 199 | int main( int argc, char* argv[] ) 200 | { 201 | struct Towers towers; 202 | int i; 203 | 204 | // Initialize free list 205 | 206 | list_init( &g_nodeFreeList ); 207 | g_nodeFreeList.head = &(g_nodePool[0]); 208 | g_nodeFreeList.size = NUM_DISCS; 209 | g_nodePool[NUM_DISCS-1].next = 0; 210 | g_nodePool[NUM_DISCS-1].val = 99; 211 | for ( i = 0; i < (NUM_DISCS-1); i++ ) { 212 | g_nodePool[i].next = &(g_nodePool[i+1]); 213 | g_nodePool[i].val = i; 214 | } 215 | 216 | towers_init( &towers, NUM_DISCS ); 217 | 218 | // If needed we preallocate everything in the caches 219 | 220 | #if PREALLOCATE 221 | towers_solve( &towers ); 222 | #endif 223 | 224 | // Solve it 225 | 226 | towers_clear( &towers ); 227 | // setStats(1); 228 | towers_solve( &towers ); 229 | // setStats(0); 230 | 231 | // Check the results 232 | return towers_verify( &towers ); 233 | } 234 | 235 | -------------------------------------------------------------------------------- /tests/towers/towers.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/riscv-vm/d0cd7a43bdd58af3d57c98a7614689a55a4b9a60/tests/towers/towers.elf -------------------------------------------------------------------------------- /tests/whetstone/bench.h: -------------------------------------------------------------------------------- 1 | /* 2 | * provide timing and utility functions. 3 | * 4 | * identifiers starting with bm_ are reserved 5 | */ 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include 12 | #include 13 | 14 | /* return the last result of TIME_CYCLES. */ 15 | #define GET_CYCLES 1 /* bm_get_cycles () */ 16 | 17 | /* output the result of type double, higher is better. Never returns. */ 18 | #define RESULT(val) bm_set_result (val) 19 | 20 | /* output an error and kill the program. Never returns. */ 21 | #define ERROR(msg) bm_error (msg) 22 | 23 | /* make sure this value is calculated and not optimized away. */ 24 | #define USE(value) bm_eat((int)(value)) 25 | 26 | /* return a zero without the compiler knowing it. */ 27 | #define ZERO bm_return_zero () 28 | 29 | /* tag the next result with the given string. */ 30 | #define TAG(s) bm_set_tag (s) 31 | 32 | #define TIME_CYCLES 33 | 34 | static int bm_return_zero (void) { return 0; } 35 | static void bm_eat (int val) { } 36 | static void bm_set_tag (const char *tag) { printf ("TAG %s\n", tag); } 37 | static void bm_error(char* msg) { printf("ERROR %s\n", msg); } 38 | static void bm_set_result(float val) { ; } 39 | static void bm_get_cycles() { ; } 40 | 41 | /* 42 | * declares the main function for you, use it like this: 43 | * BEGIN_MAIN 44 | * code; 45 | * more code; 46 | * END_MAIN 47 | */ 48 | #define BEGIN_MAIN \ 49 | int \ 50 | main (int argc, char **argv) \ 51 | { 52 | 53 | #define END_MAIN \ 54 | return 0; \ 55 | } 56 | 57 | /* 58 | * can be used instead of coding your own main(), if you 59 | * only have a single, simple test 60 | */ 61 | #define SINGLE_BENCHMARK(code) \ 62 | BEGIN_MAIN \ 63 | code; \ 64 | END_MAIN 65 | 66 | #ifdef __cplusplus 67 | } 68 | #endif 69 | 70 | -------------------------------------------------------------------------------- /tests/whetstone/whetstone.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Whetstone benchmark in C. This program is a translation of the 3 | * original Algol version in "A Synthetic Benchmark" by H.J. Curnow 4 | * and B.A. Wichman in Computer Journal, Vol 19 #1, February 1976. 5 | * 6 | * Used to test compiler optimization and floating point performance. 7 | * 8 | * Compile by: cc -O -s -o whet whet.c 9 | * or: cc -O -DPOUT -s -o whet whet.c 10 | * if output is desired. 11 | */ 12 | 13 | #define ITERATIONS WORKLOAD /* (1000) 1 Million Whetstone instructions */ 14 | 15 | #include 16 | 17 | #include "bench.h" 18 | 19 | 20 | TYPE x1, x2, x3, x4, x, y, z, t, t1, t2; 21 | TYPE e1[4]; 22 | int i, j, k, l, n1, n2, n3, n4, n6, n7, n8, n9, n10, n11; 23 | float time0,time1; 24 | 25 | 26 | void whetstone(void) 27 | { 28 | printf ("Whetstone Benchmark - C language version\n\n"); 29 | cputime (&time0); 30 | 31 | /* initialize constants */ 32 | 33 | t = 0.499975; 34 | t1 = 0.50025; 35 | t2 = 2.0; 36 | 37 | /* set values of module weights */ 38 | 39 | n1 = 0 * ITERATIONS; 40 | n2 = 12 * ITERATIONS; 41 | n3 = 14 * ITERATIONS; 42 | n4 = 345 * ITERATIONS; 43 | n6 = 210 * ITERATIONS; 44 | n7 = 32 * ITERATIONS; 45 | n8 = 899 * ITERATIONS; 46 | n9 = 616 * ITERATIONS; 47 | n10 = 0 * ITERATIONS; 48 | n11 = 93 * ITERATIONS; 49 | 50 | /* MODULE 1: simple identifiers */ 51 | 52 | x1 = 1.0; 53 | x2 = x3 = x4 = -1.0; 54 | 55 | for(i = 1; i <= n1; i += 1) 56 | { 57 | x1 = ( x1 + x2 + x3 - x4 ) * t; 58 | x2 = ( x1 + x2 - x3 - x4 ) * t; 59 | x3 = ( x1 - x2 + x3 + x4 ) * t; 60 | x4 = (-x1 + x2 + x3 + x4 ) * t; 61 | } 62 | #ifdef POUT 63 | pout(n1, n1, n1, x1, x2, x3, x4); 64 | #endif 65 | 66 | 67 | /* MODULE 2: array elements */ 68 | 69 | e1[0] = 1.0; 70 | e1[1] = e1[2] = e1[3] = -1.0; 71 | 72 | for (i = 1; i <= n2; i +=1) 73 | { 74 | e1[0] = ( e1[0] + e1[1] + e1[2] - e1[3] ) * t; 75 | e1[1] = ( e1[0] + e1[1] - e1[2] + e1[3] ) * t; 76 | e1[2] = ( e1[0] - e1[1] + e1[2] + e1[3] ) * t; 77 | e1[3] = (-e1[0] + e1[1] + e1[2] + e1[3] ) * t; 78 | } 79 | #ifdef POUT 80 | pout(n2, n3, n2, e1[0], e1[1], e1[2], e1[3]); 81 | #endif 82 | 83 | /* MODULE 3: array as parameter */ 84 | 85 | for (i = 1; i <= n3; i += 1) 86 | pa(e1); 87 | #ifdef POUT 88 | 89 | pout(n3, n2, n2, e1[0], e1[1], e1[2], e1[3]); 90 | #endif 91 | 92 | /* MODULE 4: conditional jumps */ 93 | 94 | j = 1; 95 | for (i = 1; i <= n4; i += 1) 96 | { 97 | if (j == 1) 98 | j = 2; 99 | else 100 | j = 3; 101 | 102 | if (j > 2) 103 | j = 0; 104 | else 105 | j = 1; 106 | 107 | if (j < 1 ) 108 | j = 1; 109 | else 110 | j = 0; 111 | } 112 | #ifdef POUT 113 | pout(n4, j, j, x1, x2, x3, x4); 114 | #endif 115 | 116 | /* MODULE 5: omitted */ 117 | 118 | /* MODULE 6: integer arithmetic */ 119 | 120 | j = 1; 121 | k = 2; 122 | l = 3; 123 | 124 | for (i = 1; i <= n6; i += 1) 125 | { 126 | j = j * (k - j) * (l -k); 127 | k = l * k - (l - j) * k; 128 | l = (l - k) * (k + j); 129 | 130 | e1[l - 2] = j + k + l; /* C arrays are zero based */ 131 | e1[k - 2] = j * k * l; 132 | } 133 | #ifdef POUT 134 | pout(n6, j, k, e1[0], e1[1], e1[2], e1[3]); 135 | #endif 136 | 137 | /* MODULE 7: trig. functions */ 138 | 139 | x = y = 0.5; 140 | 141 | for(i = 1; i <= n7; i +=1) 142 | { 143 | x = t * atanf(t2*sinf(x)*cosf(x)/(cosf(x+y)+cosf(x-y)-1.0)); 144 | y = t * atanf(t2*sinf(y)*cosf(y)/(cosf(x+y)+cosf(x-y)-1.0)); 145 | } 146 | #ifdef POUT 147 | pout(n7, j, k, x, x, y, y); 148 | #endif 149 | 150 | /* MODULE 8: procedure calls */ 151 | 152 | x = y = z = 1.0; 153 | 154 | for (i = 1; i <= n8; i +=1) 155 | p3(x, y, &z); 156 | #ifdef POUT 157 | 158 | pout(n8, j, k, x, y, z, z); 159 | #endif 160 | 161 | /* MODULE9: array references */ 162 | 163 | j = 1; 164 | k = 2; 165 | l = 3; 166 | 167 | e1[0] = 1.0; 168 | e1[1] = 2.0; 169 | e1[2] = 3.0; 170 | 171 | for(i = 1; i <= n9; i += 1) 172 | p0(); 173 | #ifdef POUT 174 | pout(n9, j, k, e1[0], e1[1], e1[2], e1[3]); 175 | #endif 176 | 177 | /* MODULE10: integer arithmetic */ 178 | 179 | j = 2; 180 | k = 3; 181 | 182 | for(i = 1; i <= n10; i +=1) 183 | { 184 | j = j + k; 185 | k = j + k; 186 | j = k - j; 187 | k = k - j - j; 188 | } 189 | #ifdef POUT 190 | pout(n10, j, k, x1, x2, x3, x4); 191 | #endif 192 | 193 | /* MODULE11: standard functions */ 194 | 195 | x = 0.75; 196 | for(i = 1; i <= n11; i +=1) 197 | x = sqrtf( expf( logf(x) / t1)); 198 | 199 | #ifdef POUT 200 | 201 | pout(n11, j, k, x, x, x, x); 202 | #endif 203 | 204 | cputime (&time1); 205 | printf ("\n Single Precision C-languare Whetstones KIPS %8.2f\n",100*ITERATIONS/(time1-time0)); 206 | 207 | } 208 | 209 | pa(e) 210 | TYPE e[4]; 211 | { 212 | register int j; 213 | 214 | j = 0; 215 | lab: 216 | e[0] = ( e[0] + e[1] + e[2] - e[3] ) * t; 217 | e[1] = ( e[0] + e[1] - e[2] + e[3] ) * t; 218 | e[2] = ( e[0] - e[1] + e[2] + e[3] ) * t; 219 | e[3] = ( -e[0] + e[1] + e[2] + e[3] ) / t2; 220 | j += 1; 221 | if (j < 6) 222 | goto lab; 223 | } 224 | 225 | 226 | p3(x, y, z) 227 | TYPE x, y, *z; 228 | { 229 | x = t * (x + y); 230 | y = t * (x + y); 231 | *z = (x + y) /t2; 232 | } 233 | 234 | 235 | p0() 236 | { 237 | e1[j] = e1[k]; 238 | e1[k] = e1[l]; 239 | e1[l] = e1[j]; 240 | } 241 | 242 | #ifdef POUT 243 | pout(n, j, k, x1, x2, x3, x4) 244 | int n, j, k; 245 | TYPE x1, x2, x3, x4; 246 | { 247 | printf("%6d%6d%6d %5e %5e %5e %5e\n", 248 | n, j, k, x1, x2, x3, x4); 249 | } 250 | #endif 251 | 252 | cputime (secs) 253 | float *secs; 254 | { 255 | #include 256 | #include 257 | struct tms tms; 258 | 259 | times(&tms); 260 | *secs = tms.tms_utime/60.0; 261 | } 262 | 263 | SINGLE_BENCHMARK(whetstone()) 264 | 265 | -------------------------------------------------------------------------------- /tests/whetstone/whetstone.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/riscv-vm/d0cd7a43bdd58af3d57c98a7614689a55a4b9a60/tests/whetstone/whetstone.elf -------------------------------------------------------------------------------- /tinycg/tinycg.h: -------------------------------------------------------------------------------- 1 | // ___________.__ _________ ________ 2 | // \__ ___/|__| ____ ___.__.\_ ___ \ / _____/ 3 | // | | | |/ < | |/ \ \// \ ___ 4 | // | | | | | \___ |\ \___\ \_\ \ 5 | // |____| |__|___| / ____| \______ /\______ / 6 | // Tiny Code Gen X64 \/\/ \/ \/ 7 | // 8 | // https://github.com/bit-hack/tinycg 9 | // 10 | 11 | #pragma once 12 | #include 13 | 14 | 15 | typedef int cg_r8_t; 16 | typedef int cg_r16_t; 17 | typedef int cg_r32_t; 18 | typedef int cg_r64_t; 19 | typedef int cg_cc_t; 20 | typedef int cg_xmm_t; 21 | 22 | enum { 23 | cg_al, 24 | cg_cl, 25 | cg_dl, 26 | cg_bl, 27 | cg_ah, 28 | cg_ch, 29 | cg_dh, 30 | cg_bh, 31 | }; 32 | 33 | enum { 34 | cg_ax, 35 | cg_cx, 36 | cg_dx, 37 | cg_bx, 38 | cg_sp, 39 | cg_bp, 40 | cg_si, 41 | cg_di, 42 | }; 43 | 44 | enum { 45 | cg_eax, 46 | cg_ecx, 47 | cg_edx, 48 | cg_ebx, 49 | cg_esp, 50 | cg_ebp, 51 | cg_esi, 52 | cg_edi, 53 | }; 54 | 55 | enum { 56 | cg_rax, 57 | cg_rcx, 58 | cg_rdx, 59 | cg_rbx, 60 | cg_rsp, 61 | cg_rbp, 62 | cg_rsi, 63 | cg_rdi, 64 | // extended registers 65 | cg_r8, 66 | cg_r9, 67 | cg_r10, 68 | cg_r11, 69 | cg_r12, 70 | cg_r13, 71 | cg_r14, 72 | cg_r15, 73 | }; 74 | 75 | enum { 76 | cg_xmm0, 77 | cg_xmm1, 78 | cg_xmm2, 79 | cg_xmm3, 80 | cg_xmm4, 81 | cg_xmm5, 82 | cg_xmm6, 83 | cg_xmm7, 84 | }; 85 | 86 | enum cc_t { 87 | cg_cc_o = 0x0, // overflow JO (OF=1) 88 | cg_cc_no = 0x1, // not overflow JNO (OF=0) 89 | cg_cc_c = 0x2, // carry JC (CF=1) 90 | cg_cc_ae = 0x3, // above or equal JAE (CF=0) 91 | cg_cc_eq = 0x4, // equal JE (ZF=1) 92 | cg_cc_ne = 0x5, // not equal JNE (ZF=0) 93 | cg_cc_be = 0x6, // below or equal JBE (CF=1 or ZF=1) 94 | cg_cc_ab = 0x7, // above JA (CF=0 and ZF=0) 95 | cg_cc_s = 0x8, // sign JS (SF=1) 96 | cg_cc_ns = 0x9, // not sign JNS (SF=0) 97 | cg_cc_p = 0xa, // parity JP (PF=1) 98 | cg_cc_np = 0xb, // parity odd JNP (PF=0) 99 | cg_cc_lt = 0xc, // less JL (SF!=OF) 100 | cg_cc_ge = 0xd, // greater or equal JGE (SF=OF) 101 | cg_cc_le = 0xe, // less or equal JLE (ZF=1 or SF!=OF) 102 | cg_cc_gt = 0xf, // greater JG (ZF=0 and SF=OF) 103 | }; 104 | 105 | struct cg_state_t { 106 | // the start address of the buffer 107 | uint8_t *start; 108 | // the end address of the buffer 109 | const uint8_t *end; 110 | // the next writing location in the buffer 111 | uint8_t *head; 112 | }; 113 | 114 | // initalize the code generator 115 | void cg_init(struct cg_state_t *, uint8_t *start, uint8_t *end); 116 | 117 | // return number of bytes written to the code buffer 118 | uint32_t cg_size(struct cg_state_t *); 119 | 120 | // clear all bytes written to the code buffer 121 | void cg_reset(struct cg_state_t *); 122 | 123 | void cg_mov_r64_r64(struct cg_state_t *, cg_r64_t r1, cg_r64_t r2); 124 | void cg_mov_r32_r32(struct cg_state_t *, cg_r32_t r1, cg_r32_t r2); 125 | void cg_mov_r64_i32(struct cg_state_t *, cg_r64_t r1, int32_t imm); 126 | void cg_mov_r32_i32(struct cg_state_t *, cg_r32_t r1, uint32_t imm); 127 | 128 | void cg_ret(struct cg_state_t *); 129 | 130 | void cg_mov_r32_r64disp(struct cg_state_t *, cg_r32_t r1, cg_r64_t base, int32_t disp); 131 | void cg_mov_r64disp_r32(struct cg_state_t *, cg_r64_t base, int32_t disp, cg_r32_t r1); 132 | void cg_mov_r64_r64disp(struct cg_state_t *, cg_r64_t base, cg_r64_t r1, int32_t disp); 133 | void cg_mov_r64disp_r64(struct cg_state_t *, cg_r64_t base, int32_t disp, cg_r64_t r1); 134 | void cg_mov_r64disp_i32(struct cg_state_t *, cg_r64_t base, int32_t disp, int32_t imm); 135 | 136 | void cg_movsx_r32_r8(struct cg_state_t *, cg_r32_t r1, cg_r8_t r2); 137 | void cg_movsx_r32_r16(struct cg_state_t *, cg_r32_t r1, cg_r16_t r2); 138 | void cg_movsx_r64_r32(struct cg_state_t *, cg_r64_t dst, cg_r32_t src); 139 | void cg_movsx_r64_r64disp(struct cg_state_t *cg, cg_r32_t dst, cg_r64_t base, int32_t disp); 140 | 141 | void cg_movzx_r32_r8(struct cg_state_t *, cg_r32_t r1, cg_r8_t r2); 142 | void cg_movzx_r32_r16(struct cg_state_t *, cg_r32_t r1, cg_r16_t r2); 143 | 144 | void cg_add_r64_i32(struct cg_state_t *, cg_r64_t t1, int32_t imm); 145 | void cg_add_r32_i32(struct cg_state_t *, cg_r32_t r1, int32_t imm); 146 | void cg_add_r32_r32(struct cg_state_t *, cg_r32_t r1, cg_r32_t r2); 147 | void cg_add_r64disp_i32(struct cg_state_t *, cg_r64_t base, int32_t offset, int32_t imm); 148 | void cg_add_r64disp_r32(struct cg_state_t *, cg_r64_t base, int32_t offset, cg_r32_t src); 149 | 150 | void cg_and_r8_i8(struct cg_state_t *, cg_r8_t r1, uint8_t imm); 151 | void cg_and_r32_i32(struct cg_state_t *, cg_r32_t r1, uint32_t imm); 152 | void cg_and_r32_r32(struct cg_state_t *, cg_r32_t r1, cg_r32_t r2); 153 | void cg_and_r64disp_i32(struct cg_state_t *, cg_r64_t base, int32_t offset, int32_t imm); 154 | void cg_and_r64disp_r32(struct cg_state_t *, cg_r64_t base, int32_t offset, cg_r32_t src); 155 | 156 | void cg_sub_r64_i32(struct cg_state_t *, cg_r64_t r1, int32_t imm); 157 | void cg_sub_r32_i32(struct cg_state_t *, cg_r32_t r1, int32_t imm); 158 | void cg_sub_r32_r32(struct cg_state_t *, cg_r32_t r1, cg_r32_t r2); 159 | void cg_sub_r64disp_i32(struct cg_state_t *, cg_r64_t base, int32_t offset, int32_t imm); 160 | void cg_sub_r64disp_r32(struct cg_state_t *, cg_r64_t base, int32_t offset, cg_r32_t src); 161 | 162 | void cg_shl_r32_i8(struct cg_state_t *, cg_r32_t r1, uint8_t imm); 163 | void cg_shl_r32_cl(struct cg_state_t *, cg_r32_t r1); 164 | void cg_shl_r64disp_i8(struct cg_state_t *, cg_r64_t base, int32_t offset, uint8_t imm); 165 | 166 | void cg_sar_r32_i8(struct cg_state_t *, cg_r32_t r1, uint8_t imm); 167 | void cg_sar_r32_cl(struct cg_state_t *, cg_r32_t r1); 168 | void cg_sar_r64disp_i8(struct cg_state_t *, cg_r64_t base, int32_t offset, uint8_t imm); 169 | 170 | void cg_shr_r32_i8(struct cg_state_t *, cg_r32_t r1, uint8_t imm); 171 | void cg_shr_r32_cl(struct cg_state_t *, cg_r32_t r1); 172 | void cg_shr_r64disp_i8(struct cg_state_t *, cg_r64_t base, int32_t offset, uint8_t imm); 173 | 174 | void cg_xor_r64_r64(struct cg_state_t *, cg_r64_t r1, cg_r64_t r2); 175 | void cg_xor_r32_i32(struct cg_state_t *, cg_r32_t r1, uint32_t imm); 176 | void cg_xor_r32_r32(struct cg_state_t *, cg_r32_t r1, cg_r32_t r2); 177 | void cg_xor_r64disp_i32(struct cg_state_t *, cg_r64_t base, int32_t offset, int32_t imm); 178 | void cg_xor_r64disp_r32(struct cg_state_t *, cg_r64_t base, int32_t offset, cg_r32_t src); 179 | 180 | void cg_or_r32_i32(struct cg_state_t *, cg_r32_t r1, uint32_t imm); 181 | void cg_or_r32_r32(struct cg_state_t *, cg_r32_t r1, cg_r32_t r2); 182 | void cg_or_r64disp_i32(struct cg_state_t *, cg_r64_t base, int32_t offset, int32_t imm); 183 | void cg_or_r64disp_r32(struct cg_state_t *, cg_r64_t base, int32_t offset, cg_r32_t src); 184 | 185 | void cg_setcc_r8(struct cg_state_t *, cg_cc_t cc, cg_r8_t r1); 186 | 187 | void cg_cmp_r64_r64(struct cg_state_t *, cg_r64_t r1, cg_r64_t r2); 188 | void cg_cmp_r32_r32(struct cg_state_t *, cg_r32_t r1, cg_r32_t r2); 189 | void cg_cmp_r32_i32(struct cg_state_t *, cg_r32_t r1, uint32_t imm); 190 | void cg_cmp_r32_r64disp(struct cg_state_t *, cg_r32_t r1, cg_r64_t base, int32_t offset); 191 | void cg_cmp_r64disp_r32(struct cg_state_t *, cg_r64_t base, int32_t offset, cg_r32_t r1); 192 | void cg_cmp_r64disp_i32(struct cg_state_t *cg, cg_r64_t base, int32_t offset, int32_t imm); 193 | 194 | void cg_call_r64disp(struct cg_state_t *, cg_r64_t base, int32_t disp32); 195 | 196 | void cg_mul_r32(struct cg_state_t *, cg_r32_t r1); 197 | void cg_mul_r64disp(struct cg_state_t *, cg_r64_t base, int32_t offset); 198 | void cg_imul_r32(struct cg_state_t *, cg_r32_t r1); 199 | void cg_imul_r64disp(struct cg_state_t *, cg_r64_t base, int32_t offset); 200 | 201 | void cg_push_r64(struct cg_state_t *, cg_r64_t r1); 202 | void cg_pop_r64(struct cg_state_t *, cg_r64_t r1); 203 | 204 | void cg_nop(struct cg_state_t *); 205 | 206 | void cg_cmov_r32_r32(struct cg_state_t *, cg_cc_t cc, cg_r32_t r1, cg_r32_t r2); 207 | 208 | const char *cg_r64_str(cg_r32_t reg); 209 | const char *cg_r32_str(cg_r32_t reg); 210 | const char *cg_r16_str(cg_r32_t reg); 211 | const char *cg_r8_str(cg_r32_t reg); 212 | 213 | void cg_movss_xmm_r64disp(struct cg_state_t *, cg_xmm_t dst, cg_r64_t base, int32_t offset); 214 | void cg_movss_r64disp_xmm(struct cg_state_t *, cg_r64_t base, int32_t offset, cg_xmm_t dst); 215 | 216 | void cg_addss_xmm_r64disp(struct cg_state_t *, cg_xmm_t dst, cg_r64_t base, int32_t offset); 217 | void cg_subss_xmm_r64disp(struct cg_state_t *, cg_xmm_t dst, cg_r64_t base, int32_t offset); 218 | void cg_mulss_xmm_r64disp(struct cg_state_t *, cg_xmm_t dst, cg_r64_t base, int32_t offset); 219 | void cg_divss_xmm_r64disp(struct cg_state_t *, cg_xmm_t dst, cg_r64_t base, int32_t offset); 220 | 221 | void cg_sqrtss_xmm_r64disp(struct cg_state_t *, cg_xmm_t dst, cg_r64_t base, int32_t offset); 222 | 223 | void cg_cvttss2si_r32_r64disp(struct cg_state_t *, cg_r32_t dst, cg_r64_t base, int32_t offset); 224 | void cg_cvtsi2ss_xmm_r64disp(struct cg_state_t *, cg_xmm_t dst, cg_r64_t base, int32_t offset); 225 | 226 | void cg_mov_r32_xmm(struct cg_state_t *, cg_r32_t dst, cg_xmm_t src); 227 | void cg_mov_xmm_r32(struct cg_state_t *, cg_xmm_t dst, cg_r32_t src); 228 | --------------------------------------------------------------------------------