├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE.txt ├── README.md ├── armulet.c ├── armulet.h ├── armulet_accessors.h ├── carmulet ├── CMakeLists.txt ├── carmulet.c └── carmulet.h ├── pico_sdk_import.cmake ├── test ├── CMakeLists.txt ├── assembler.h ├── instruction_test.cpp ├── instruction_test2.cpp ├── undef_inst_test.cpp ├── varmulet_hooks_test.S └── varmulet_hooks_test.h └── varmulet ├── CMakeLists.txt ├── varmulet.S ├── varmulet.h ├── varmulet_armv6m_core.S ├── varmulet_hooks_default.S ├── varmulet_hooks_metal_irq.S └── varmulet_macros.inc.S /.gitignore: -------------------------------------------------------------------------------- 1 | cmake-* 2 | .idea 3 | *.o 4 | build 5 | build-* 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raspberrypi/armulet/4b3df60c4a2de3cf256c6bcc521f6af0daecdee5/.gitmodules -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.15) 2 | include(pico_sdk_import.cmake) 3 | project(armulet C CXX) 4 | pico_sdk_init() 5 | 6 | set(CMAKE_C_STANDARD 11) 7 | set(CMAKE_CXX_STANDARD 17) 8 | 9 | add_library(armulet INTERFACE) 10 | target_sources(armulet INTERFACE 11 | ${CMAKE_CURRENT_LIST_DIR}/armulet.c 12 | ) 13 | target_include_directories(armulet INTERFACE ${CMAKE_CURRENT_LIST_DIR}) 14 | 15 | if (PICO_RISCV) 16 | add_subdirectory(varmulet) 17 | target_link_libraries(armulet INTERFACE varmulet) 18 | else() 19 | add_subdirectory(carmulet) 20 | target_link_libraries(armulet INTERFACE carmulet) 21 | target_link_libraries(armulet INTERFACE pico_stdlib) 22 | endif() 23 | 24 | pico_is_top_level_project(ARMULET_TOP_LEVEL_PROJECT) 25 | 26 | if (ARMULET_TOP_LEVEL_PROJECT) 27 | add_subdirectory(test) 28 | endif() 29 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2024 (c) 2020 Raspberry Pi (Trading) Ltd. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 4 | following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following 7 | disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following 10 | disclaimer in the documentation and/or other materials provided with the distribution. 11 | 12 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 16 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 18 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 19 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 20 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 21 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Armulet 2 | 3 | `ARMULET` is a C ARMv6M / ARMv8M-baseline emulator 4 | 5 | `VARMULET` is a ARMv6M / ARMv8M-baselineARMv6M emulator 6 | The goals of `VARMULET`: 7 | 8 | * small code size (currently 3K) can be placed in ROM 9 | * ? fast enough to run USB boot code under RISC-V (which will also be a non-secure ARMv6M binary under ARM). 10 | `VARMULET` seems to be about 3x faster than `ARMULET` on RISC-V for now for one particular use case which is printf 11 | heavy. We will need to test with the boot code (and also SVC calls for things like memcpy, memset) 12 | * TODO extensible by non ROM code, with no ROM specific functionality baked in (e.g. handling of priv mode, IRQ, 13 | breakpoints, SVC etc). It should basically be possible to use the emulator on Amy RISC-V programs, and also to extend 14 | it to support other 32 bit instructions for example 15 | 16 | -------------------------------------------------------------------------------- /armulet.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "armulet_accessors.h" 7 | 8 | #if ARMULET_USE_ASM 9 | #include "varmulet.h" 10 | #else 11 | #include "carmulet.h" 12 | #endif 13 | 14 | void armulet_reset_cpu(armulet_cpu_t *cpu) { 15 | memset(cpu, 0, sizeof(armulet_cpu_t)); 16 | #if ARMULET_USE_LAZY_NZ 17 | cpu->lazy_nz_val = 1; 18 | #endif 19 | } 20 | 21 | #if ARMULET_DEBUG 22 | #if ARMULET_USE_ASM 23 | #include "varmulet.h" 24 | varmulet_asm_hooks_t single_step_asm_hooks; 25 | #endif 26 | 27 | void armulet_single_step(armulet_cpu_t *cpu) { 28 | #if ARMULET_FEATURE_STEP_STATUS 29 | cpu->step_status = ARMULET_IST_NORMAL; 30 | #endif 31 | #if ARMULET_USE_ASM 32 | varmulet_step(cpu, &single_step_asm_hooks); 33 | #else 34 | carmulet_single_step(cpu); 35 | #endif 36 | } 37 | 38 | 39 | #endif 40 | 41 | #if !ARMULET_USE_REAL_MEMORY 42 | void armulet_zap(uint8_t byte) { 43 | memset(rom_memory, byte, sizeof(rom_memory)); 44 | memset(ram_memory, byte, sizeof(ram_memory)); 45 | memset(flash_memory, byte, sizeof(flash_memory)); 46 | } 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /armulet.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ARMULET_FEATURE_PROFILER 4 | //#define ARMULET_FEATURE_PROFILER 1 5 | #endif 6 | // ability to call into ARMv6M code and have it return 7 | #ifndef ARMULET_FEATURE_CALL 8 | //#define ARMULET_FEATURE_CALL 1 9 | #endif 10 | #ifndef ARMULET_USE_LAZY_NZ 11 | #define ARMULET_USE_LAZY_NZ 1 12 | #endif 13 | #ifndef ARMULET_USE_LAZY_Z 14 | //#define ARMULET_USE_LAZY_Z 1 15 | #endif 16 | 17 | #if PICO_ON_DEVICE 18 | #define ARMULET_USE_REAL_MEMORY 1 19 | #endif 20 | 21 | #if !defined(ARMULET_FEATURE_ARMV8M_BASELINE_SDIV_UDIV) && defined(ARMULET_FEATURE_ARMV8M_BASELINE) 22 | #define ARMULET_FEATURE_ARMV8M_BASELINE_SDIV_UDIV 1 23 | #endif 24 | 25 | #if !defined(ARMULET_FEATURE_ARMV8M_BASELINE_MOVW_MOVT) && defined(ARMULET_FEATURE_ARMV8M_BASELINE) 26 | #define ARMULET_FEATURE_ARMV8M_BASELINE_MOVW_MOVT 1 27 | #endif 28 | 29 | #if !defined(ARMULET_FEATURE_ARMV8M_BASELINE_BW) && defined(ARMULET_FEATURE_ARMV8M_BASELINE) 30 | #define ARMULET_FEATURE_ARMV8M_BASELINE_BW 1 31 | #endif 32 | 33 | #if !defined(ARMULET_FEATURE_ARMV8M_BASELINE_CBZ_CBNZ) && defined(ARMULET_FEATURE_ARMV8M_BASELINE) 34 | #define ARMULET_FEATURE_ARMV8M_BASELINE_CBZ_CBNZ 1 35 | #endif 36 | 37 | #if !defined(ARMULET_FEATURE_ARMV8M_BASELINE_MSPLIM) && defined(ARMULET_FEATURE_ARMV8M_BASELINE) 38 | #define ARMULET_FEATURE_ARMV8M_BASELINE_MSPLIM 1 39 | #endif 40 | 41 | #define ARMULET_CALL_RETURN_ADDRESS 0xffa00000; 42 | 43 | // todo move these out 44 | #if ARMULET_FEATURE_STEP_STATUS 45 | #define ARMULET_IST_NORMAL 0 46 | #define ARMULET_IST_BREAKPOINT 1 47 | #define ARMULET_IST_UNDEFINED16 2 48 | #define ARMULET_IST_UNDEFINED32 3 49 | #define ARMULET_IST_SVC 4 50 | #endif 51 | 52 | #ifndef __ASSEMBLER__ 53 | #include 54 | #include 55 | #include 56 | #if ARMULET_FEATURE_CALL && !ARMULET_USE_ASM 57 | #include 58 | #endif 59 | #ifdef __cplusplus 60 | extern "C" { 61 | #endif 62 | 63 | enum M0PLUS_REGS { 64 | ARM_REG_R0=0, 65 | ARM_REG_R1, 66 | ARM_REG_R2, 67 | ARM_REG_R3, 68 | ARM_REG_R4, 69 | ARM_REG_R5, 70 | ARM_REG_R6, 71 | ARM_REG_R7, 72 | ARM_REG_R8, 73 | ARM_REG_R9, 74 | ARM_REG_R10, 75 | ARM_REG_R11, 76 | ARM_REG_R12, 77 | ARM_REG_SP, 78 | ARM_REG_LR, 79 | ARM_REG_PC, 80 | NUM_M0PLUS_REGS 81 | }; 82 | 83 | static_assert(NUM_M0PLUS_REGS == 16, ""); 84 | 85 | typedef struct armulet_cpu { 86 | uint32_t regs[NUM_M0PLUS_REGS]; 87 | #if ARMULET_USE_LAZY_NZ 88 | uint32_t lazy_nz_val; 89 | #else 90 | bool _N; // should not be accessed directly 91 | bool _Z; // should not be accessed directly 92 | #endif 93 | #if ARMULET_USE_LAZY_Z 94 | uint32_t lazy_v0; 95 | uint32_t lazy_v1; 96 | uint32_t lazy_v2; 97 | #else 98 | bool _V; 99 | #endif 100 | bool C; 101 | bool primask; 102 | uint8_t ipsr; 103 | #if ARMULET_FEATURE_ARMV8M_BASELINE_MSPLIM 104 | uint32_t splim; 105 | #endif 106 | #if ARMULET_FEATURE_ASM_HOOKS_IS_FUNCTION 107 | // state to be passed to the asm hook for this instance 108 | uint32_t asm_hook_param; 109 | #endif 110 | #if ARMULET_FEATURE_SVC_HANDLER 111 | uint32_t (*svc_handler)(uint32_t a, uint32_t b, uint32_t c, uint32_t d); 112 | #endif 113 | #if ARMULET_FEATURE_STEP_STATUS 114 | int step_status; 115 | #endif 116 | #if !ARMULET_USE_ASM 117 | uint32_t pc_delta; // used post instruction 118 | #if ARMULET_FEATURE_CALL 119 | bool armulet_call; 120 | jmp_buf jmpbuf; 121 | #endif 122 | #endif 123 | } armulet_cpu_t; 124 | 125 | void armulet_reset_cpu(armulet_cpu_t *cpu); 126 | 127 | #if !ARMULET_USE_ASM 128 | void __attribute__((noreturn)) armulet_jump(armulet_cpu_t *cpu, uint32_t addr); 129 | #if ARMULET_FEATURE_CALL 130 | uint32_t armulet_call(armulet_cpu_t *cpu, uint32_t addr); 131 | #endif 132 | #if ARMULET_FEATURE_SPECIAL_READ 133 | uint32_t armulet_cb_special_read(uint32_t addr, int size); 134 | void armulet_cb_special_write(uint32_t addr, int size, uint32_t value); 135 | #endif 136 | #endif 137 | 138 | #if ARMULET_DEBUG 139 | #if ARMULET_USE_ASM 140 | extern struct varmulet_asm_hooks single_step_asm_hooks; 141 | #endif 142 | void armulet_single_step(armulet_cpu_t *cpu); 143 | #if !PICO_ON_DEVICE 144 | void armulet_zap(uint8_t byte); 145 | #endif 146 | #endif 147 | 148 | #ifdef __cplusplus 149 | } 150 | #endif 151 | #endif -------------------------------------------------------------------------------- /armulet_accessors.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "pico.h" 3 | #include "armulet.h" 4 | 5 | #define A_ROM_START 0x0000 6 | #define A_ROM_END 0x4000 7 | 8 | #define A_RAM_START 0x20000000 9 | //#if PICO_RP2350 10 | #define A_RAM_END 0x20082000 11 | //#else 12 | //#define A_RAM_END 0x20042000 13 | //#endif 14 | 15 | #define A_FLASH_START 0x10000000 16 | #define A_FLASH_END 0x10200000 17 | 18 | #if !ARMULET_USE_REAL_MEMORY 19 | extern uint8_t rom_memory[A_ROM_END - A_ROM_START]; 20 | extern uint8_t ram_memory[A_RAM_END - A_RAM_START]; 21 | extern uint8_t flash_memory[A_FLASH_END - A_FLASH_START]; 22 | #endif 23 | 24 | static inline uint32_t armulet_get_pc(const armulet_cpu_t *cpu) { 25 | assert(!(cpu->regs[ARM_REG_PC] & 1)); 26 | return cpu->regs[ARM_REG_PC]; 27 | } 28 | 29 | static inline bool armulet_get_N(const armulet_cpu_t *cpu) { 30 | #if ARMULET_USE_LAZY_NZ 31 | return ((int32_t)cpu->lazy_nz_val) < 0; 32 | #else 33 | return cpu->_N; 34 | #endif 35 | } 36 | 37 | static inline bool armulet_get_Z(const armulet_cpu_t *cpu) { 38 | #if ARMULET_USE_LAZY_NZ 39 | return !cpu->lazy_nz_val; 40 | #else 41 | return cpu->_Z; 42 | #endif 43 | } 44 | 45 | static inline void armulet_set_NZ(armulet_cpu_t *cpu, bool N, bool Z) { 46 | #if ARMULET_USE_LAZY_NZ 47 | if (Z == 1) { 48 | assert(!N); 49 | cpu->lazy_nz_val = 0; 50 | } else { 51 | cpu->lazy_nz_val = N ? 0xffffffffu : 1; 52 | } 53 | #else 54 | cpu->_N = N; 55 | cpu->_Z = Z; 56 | #endif 57 | } 58 | 59 | static inline bool armulet_get_V(armulet_cpu_t *cpu) { 60 | #if ARMULET_USE_LAZY_Z 61 | return (((int32_t) (cpu->lazy_v0 ^ cpu->lazy_v1)) < 0) && (((int32_t) (cpu->lazy_v2 ^ cpu->lazy_v1)) >= 0); 62 | #else 63 | return cpu->_V; 64 | #endif 65 | } 66 | 67 | static inline void armulet_set_V(armulet_cpu_t *cpu, bool V) { 68 | #if ARMULET_USE_LAZY_Z 69 | cpu->lazy_v0 = V ? -1 : 0; 70 | cpu->lazy_v1 = cpu->lazy_v2 = 0; 71 | #else 72 | cpu->_V = V; 73 | #endif 74 | } 75 | 76 | static inline bool armulet_is_rom(uint32_t pc) { 77 | #pragma GCC diagnostic push 78 | #pragma GCC diagnostic ignored "-Wtype-limits" 79 | return pc >= A_ROM_START && pc < A_ROM_END; 80 | #pragma GCC diagnostic pop 81 | } 82 | 83 | static inline bool armulet_is_ram(uint32_t pc) { 84 | return pc >= A_RAM_START && pc < A_RAM_END; 85 | } 86 | 87 | static inline bool armulet_is_flash(uint32_t pc) { 88 | return pc >= A_FLASH_START && pc < A_FLASH_END; 89 | } 90 | 91 | static inline bool armulet_is_valid_pc(uint32_t pc) { 92 | return armulet_is_rom(pc) || armulet_is_ram(pc) || armulet_is_flash(pc); 93 | } 94 | 95 | static inline void armulet_update_nz(armulet_cpu_t *cpu, uint32_t val) { 96 | #if ARMULET_USE_LAZY_NZ 97 | cpu->lazy_nz_val = val; 98 | #else 99 | cpu->_Z = !val; 100 | cpu->_N = ((int32_t) val) < 0; 101 | #endif 102 | } 103 | 104 | #if ARMULET_FEATURE_SPECIAL_READ 105 | static bool armulet_is_special_address(uint32_t addr) { 106 | return (addr >> 28u) == 0xeu; 107 | } 108 | #endif 109 | 110 | static inline void *armulet_resolve_address(uint32_t addr, int size) { 111 | #if ARMULET_USE_REAL_MEMORY 112 | (void)size; 113 | void *rc = (void*)addr; 114 | #else 115 | void *rc = 0; 116 | if (armulet_is_rom(addr)) rc = rom_memory + addr - A_ROM_START; 117 | else if (armulet_is_ram(addr)) rc = ram_memory + addr - A_RAM_START; 118 | else if (armulet_is_flash(addr)) rc = flash_memory + addr - A_FLASH_START; 119 | hard_assert(rc); 120 | hard_assert(!((size - 1) & (uintptr_t) addr)); // check alignment 121 | #endif 122 | return rc; 123 | } 124 | 125 | static inline uint8_t armulet_read_u8(uint32_t addr) { 126 | #if ARMULET_FEATURE_SPECIAL_READ 127 | if (is_special_address(addr)) return (uint8_t)armulet_cb_special_read(addr, 1); 128 | #endif 129 | void *mem = armulet_resolve_address(addr, 1); 130 | return *(uint8_t *) mem; 131 | } 132 | 133 | static inline uint16_t armulet_read_u16(uint32_t addr) { 134 | #if ARMULET_FEATURE_SPECIAL_READ 135 | if (is_special_address(addr)) return (uint16_t)armulet_cb_special_read(addr, 2); 136 | #endif 137 | void *mem = armulet_resolve_address(addr, 2); 138 | return *(uint16_t *) mem; 139 | } 140 | 141 | static inline uint32_t armulet_read_u32(uint32_t addr) { 142 | #if ARMULET_FEATURE_SPECIAL_READ 143 | if (is_special_address(addr)) return armulet_cb_special_read(addr, 4); 144 | #endif 145 | void *mem = armulet_resolve_address(addr, 4); 146 | return *(uint32_t *) mem; 147 | } 148 | 149 | static inline void armulet_write_u8(uint32_t addr, uint8_t val) { 150 | #if ARMULET_FEATURE_SPECIAL_READ 151 | if (is_special_address(addr)) { armulet_cb_special_write(addr, 1, val); return; } 152 | #endif 153 | void *mem = armulet_resolve_address(addr, 1); 154 | *(uint8_t *) mem = val; 155 | } 156 | 157 | static inline void armulet_write_u16(uint32_t addr, uint16_t val) { 158 | #if ARMULET_FEATURE_SPECIAL_READ 159 | if (is_special_address(addr)) { armulet_cb_special_write(addr, 2, val); return; } 160 | #endif 161 | void *mem = armulet_resolve_address(addr, 2); 162 | *(uint16_t *) mem = val; 163 | } 164 | 165 | static inline void armulet_write_u32(uint32_t addr, uint32_t val) { 166 | #if ARMULET_FEATURE_SPECIAL_READ 167 | if (is_special_address(addr)) { armulet_cb_special_write(addr, 4, val); return; } 168 | #endif 169 | void *mem = armulet_resolve_address(addr, 4); 170 | *(uint32_t *) mem = val; 171 | } 172 | 173 | static inline void armulet_update_pc(armulet_cpu_t *cpu, uint32_t new_pc) { 174 | hard_assert(new_pc & 1u); 175 | cpu->regs[ARM_REG_PC] = new_pc & ~1u; 176 | #if !ARMULET_USE_ASM 177 | cpu->pc_delta = 0; 178 | #endif 179 | } 180 | -------------------------------------------------------------------------------- /carmulet/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(carmulet INTERFACE) 2 | target_sources(carmulet INTERFACE 3 | ${CMAKE_CURRENT_LIST_DIR}/carmulet.c 4 | ) 5 | target_include_directories(carmulet INTERFACE ${CMAKE_CURRENT_LIST_DIR}) 6 | -------------------------------------------------------------------------------- /carmulet/carmulet.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "armulet.h" 4 | #include "armulet_accessors.h" 5 | 6 | #if __clang__ 7 | #pragma clang diagnostic ignored "-Wgnu-binary-literal" 8 | #endif 9 | 10 | #if !ARMULET_USE_REAL_MEMORY 11 | uint8_t rom_memory[A_ROM_END - A_ROM_START]; 12 | uint8_t ram_memory[A_RAM_END - A_RAM_START]; 13 | uint8_t flash_memory[A_FLASH_END - A_FLASH_START]; 14 | #endif 15 | 16 | #if ARMULET_TRACE_PRINTF 17 | #define trace_printf printf 18 | #else 19 | #define trace_printf(fmt, ...) ((void)0) 20 | #endif 21 | #define debug_printf printf 22 | 23 | static void warn_unsupported(const char *msg) { 24 | #if ARMULET_DEBUG 25 | debug_printf("Skipping %s\n", msg); 26 | #else 27 | panic(msg); 28 | #endif 29 | } 30 | 31 | #if ARMULET_DEBUG 32 | 33 | static void unsupported_instruction(armulet_cpu_t *cpu, uint32_t op16) { 34 | #if ARMULET_FEATURE_STEP_STATUS 35 | cpu->step_status = ARMULET_IST_UNDEFINED16; 36 | cpu->regs[ARM_REG_PC] -= 2; 37 | #else 38 | panic("Unknown instruction at address %08x: %04x\n", armulet_get_pc(cpu), op16); 39 | #endif 40 | } 41 | 42 | static void unsupported_instruction32(armulet_cpu_t *cpu, uint32_t op16, uint32_t op16b) { 43 | #if ARMULET_FEATURE_STEP_STATUS 44 | cpu->step_status = ARMULET_IST_UNDEFINED32; 45 | cpu->regs[ARM_REG_PC] -= 2; 46 | #else 47 | panic("Unknown instruction at address %08x: %04x%04x\n", armulet_get_pc(cpu), op16, op16b); 48 | #endif 49 | } 50 | 51 | #else 52 | #define unsupported_instruction(cpu, op16) __breakpoint() 53 | #define unsupported_instruction32(cpu, op16, op16b) __breakpoint() 54 | #endif 55 | 56 | static uint32_t add_update_flags(armulet_cpu_t *cpu, uint32_t a, uint32_t b) { 57 | uint32_t result = a + b; 58 | cpu->C = result < a; 59 | #if ARMULET_USE_LAZY_Z 60 | cpu->lazy_v0 = result; 61 | cpu->lazy_v1 = a; 62 | cpu->lazy_v2 = b; 63 | #else 64 | cpu->_V = (((int32_t) (result ^ b)) < 0) && (((int32_t) (a ^ b)) >= 0); 65 | #endif 66 | armulet_update_nz(cpu, result); 67 | return result; 68 | } 69 | 70 | static uint32_t adc_update_flags(armulet_cpu_t *cpu, uint32_t a, uint32_t b, bool c) { 71 | uint32_t bc = b + c; 72 | uint32_t result = a + bc; 73 | cpu->C = result < a || (c && !bc); 74 | #if ARMULET_USE_LAZY_Z 75 | cpu->lazy_v0 = result; 76 | cpu->lazy_v1 = a; 77 | cpu->lazy_v2 = b; 78 | #else 79 | cpu->_V = (((int32_t) (result ^ b)) < 0) && (((int32_t) (a ^ b)) >= 0); 80 | #endif 81 | armulet_update_nz(cpu, result); 82 | return result; 83 | } 84 | 85 | static uint32_t sub_update_flags(armulet_cpu_t *cpu, uint32_t a, uint32_t b) { 86 | uint32_t result = a - b; 87 | cpu->C = a >= b; 88 | #if ARMULET_USE_LAZY_Z 89 | cpu->lazy_v0 = a; 90 | cpu->lazy_v1 = result; 91 | cpu->lazy_v2 = b; 92 | #else 93 | cpu->_V = (((int32_t) (a ^ b)) < 0) && (((int32_t) (result ^ b)) >= 0); 94 | #endif 95 | armulet_update_nz(cpu, result); 96 | return result; 97 | } 98 | 99 | static void check_exec_return(armulet_cpu_t *cpu, uint32_t new_pc) { 100 | if (0xf == new_pc >> 28u) { 101 | #if ARMULET_FEATURE_CALL 102 | if (cpu->armulet_call && new_pc == ARMULET_CALL_RETURN_ADDRESS) { 103 | longjmp(cpu->jmpbuf, 1); 104 | } 105 | #endif 106 | panic("Exception Return"); 107 | } 108 | } 109 | 110 | static void do_shift(armulet_cpu_t *cpu, uint32_t type, uint32_t rd, uint32_t rm, uint32_t shift) { 111 | if (type == 0b000) { 112 | // lsls 113 | if (shift <= 32) { 114 | if (shift) cpu->C = (cpu->regs[rm] >> (32u - shift)) & 1u; 115 | cpu->regs[rd] = (shift < 32) ? cpu->regs[rm] << shift : 0; 116 | } else { 117 | cpu->regs[rd] = 0; 118 | cpu->C = 0; 119 | } 120 | } else if (type == 0b001) { 121 | // lsrs 122 | if (shift <= 32) { 123 | if (shift) cpu->C = (cpu->regs[rm] >> (shift - 1)) & 1u; 124 | cpu->regs[rd] = (shift < 32) ? cpu->regs[rm] >> shift : 0; 125 | } else { 126 | cpu->regs[rd] = 0; 127 | cpu->C = 0; 128 | } 129 | } else { 130 | static_assert((int32_t) (-1) >> 1 == (int32_t) (-1), ""); 131 | // asrs 132 | if (shift < 32) { 133 | if (shift) cpu->C = (cpu->regs[rm] >> (shift - 1)) & 1u; 134 | cpu->regs[rd] = (uint32_t) (((int32_t) cpu->regs[rm]) >> shift); 135 | } else { 136 | cpu->C = ((int32_t) cpu->regs[rm]) < 0; 137 | cpu->regs[rd] = (uint32_t) (((int32_t) cpu->regs[rm]) >> 31); 138 | } 139 | } 140 | armulet_update_nz(cpu, cpu->regs[rd]); 141 | } 142 | 143 | static void execute_00(armulet_cpu_t *cpu, uint32_t op16) { 144 | uint32_t prefix = (op16 >> 11u) & 0x7u; 145 | switch (prefix) { 146 | case 0b000: 147 | case 0b001: 148 | case 0b010: { 149 | // lsls, lsrs, asrs 150 | uint32_t rd = op16 & 0x7u; 151 | uint32_t rm = (op16 >> 3u) & 0x7u; 152 | uint32_t shift = (op16 >> 6u) & 0x1fu; 153 | if (prefix && !shift) shift = 32; 154 | do_shift(cpu, prefix, rd, rm, shift); 155 | break; 156 | } 157 | case 0b011: { 158 | uint32_t rn = (op16 >> 3u) & 0x7u; 159 | uint32_t rd = op16 & 0x7u; 160 | uint32_t value = op16 & 0x400u ? (op16 >> 6u) & 0x7u : cpu->regs[(op16 >> 6u) & 0x7u]; 161 | if (op16 & 0x200) { 162 | // subs 163 | cpu->regs[rd] = sub_update_flags(cpu, cpu->regs[rn], value); 164 | } else { 165 | // adds 166 | cpu->regs[rd] = add_update_flags(cpu, cpu->regs[rn], value); 167 | } 168 | break; 169 | } 170 | case 0b100: { 171 | // movs 172 | uint32_t rd = (op16 >> 8u) & 0x7u; 173 | cpu->regs[rd] = op16 & 0xffu; 174 | armulet_update_nz(cpu, cpu->regs[rd]); 175 | break; 176 | } 177 | case 0b101: { 178 | // cmp; 179 | uint32_t rn = (op16 >> 8u) & 0x7u; 180 | sub_update_flags(cpu, cpu->regs[rn], op16 & 0xffu); 181 | break; 182 | } 183 | case 0b110: 184 | case 0b111: { 185 | // adds, subs 186 | uint32_t rdn = (op16 >> 8u) & 0x7u; 187 | uint32_t imm8 = op16 & 0xffu; 188 | if (prefix & 1) { 189 | cpu->regs[rdn] = sub_update_flags(cpu, cpu->regs[rdn], imm8); 190 | } else { 191 | cpu->regs[rdn] = add_update_flags(cpu, cpu->regs[rdn], imm8); 192 | } 193 | break; 194 | } 195 | } 196 | } 197 | 198 | static void __noinline check_update_sp_pc(armulet_cpu_t *cpu, uint32_t reg) { 199 | if (reg >= ARM_REG_SP) { 200 | if (reg == ARM_REG_SP) { 201 | cpu->regs[ARM_REG_SP] &= ~3u; 202 | } else if (reg == ARM_REG_PC) { 203 | cpu->regs[ARM_REG_PC] &= ~1u; 204 | cpu->pc_delta = 0; 205 | } 206 | } 207 | } 208 | 209 | static uint32_t get_lo_hi_reg(armulet_cpu_t *cpu, uint32_t reg) { 210 | return cpu->regs[reg] + (reg == ARM_REG_PC ? 4 : 0); 211 | } 212 | 213 | static void execute_0100(armulet_cpu_t *cpu, uint32_t op16) { 214 | switch ((op16 >> 10u) & 0x3u) { 215 | case 0b00: { 216 | uint32_t rdn = op16 & 0x7u; 217 | uint32_t rm = (op16 >> 3u) & 0x7u; 218 | 219 | uint32_t prefix = (op16 >> 6u) & 0xfu; 220 | switch (prefix) { 221 | case 0b0000: 222 | // ands 223 | cpu->regs[rdn] &= cpu->regs[rm]; 224 | break; 225 | case 0b0001: 226 | // eors 227 | cpu->regs[rdn] ^= cpu->regs[rm]; 228 | break; 229 | case 0b0010: 230 | case 0b0011: 231 | case 0b0100: { 232 | // lsls, lsrs, asrs 233 | uint32_t shift = cpu->regs[rm] & 0xffu; 234 | do_shift(cpu, prefix - 0b0010, rdn, rdn, shift); 235 | return; 236 | } 237 | case 0b0101: { 238 | // adcs 239 | cpu->regs[rdn] = adc_update_flags(cpu, cpu->regs[rdn], cpu->regs[rm], cpu->C); 240 | return; 241 | } 242 | case 0b0110: { 243 | // sbcs 244 | cpu->regs[rdn] = adc_update_flags(cpu, cpu->regs[rdn], ~cpu->regs[rm], cpu->C); 245 | return; 246 | } 247 | case 0b0111: { 248 | // lion goes... 249 | uint32_t shift = cpu->regs[rm] & 0xff; 250 | if (shift) { 251 | shift &= 0x1f; 252 | cpu->regs[rdn] = (cpu->regs[rdn] >> shift) | (cpu->regs[rdn] << (32 - shift)); 253 | cpu->C = cpu->regs[rdn] >> 31u; 254 | } 255 | break; 256 | } 257 | case 0b1000: { 258 | // tsts 259 | armulet_update_nz(cpu, cpu->regs[rdn] & cpu->regs[rm]); 260 | return; 261 | } 262 | case 0b1001: { 263 | // rsbs, #0 264 | cpu->regs[rdn] = sub_update_flags(cpu, 0, cpu->regs[rm]); 265 | break; 266 | } 267 | case 0b1010: { 268 | sub_update_flags(cpu, cpu->regs[rdn], cpu->regs[rm]); 269 | return; 270 | } 271 | case 0b1011: 272 | // cmn 273 | add_update_flags(cpu, cpu->regs[rdn], cpu->regs[rm]); 274 | return; 275 | case 0b1100: 276 | // orrs 277 | cpu->regs[rdn] |= cpu->regs[rm]; 278 | break; 279 | case 0b1101: 280 | // muls 281 | cpu->regs[rdn] *= cpu->regs[rm]; 282 | break; 283 | case 0b1110: 284 | // bics 285 | cpu->regs[rdn] &= ~cpu->regs[rm]; 286 | break; 287 | case 0b1111: 288 | // mvns 289 | cpu->regs[rdn] = ~cpu->regs[rm]; 290 | break; 291 | default: 292 | unsupported_instruction(cpu, op16); 293 | } 294 | armulet_update_nz(cpu, cpu->regs[rdn]); 295 | break; 296 | } 297 | case 0b01: { 298 | uint32_t prefix = (op16 >> 6u) & 0xfu; 299 | switch (prefix >> 2) { 300 | case 0b00: { 301 | // add 302 | uint32_t rdn = (op16 & 0x7u) + ((op16 >> 4u & 0x8u)); 303 | uint32_t rm = (op16 >> 3u) & 0xfu; 304 | uint32_t vdn = get_lo_hi_reg(cpu, rdn); 305 | uint32_t vm = get_lo_hi_reg(cpu, rm); 306 | cpu->regs[rdn] = vdn + vm; 307 | check_update_sp_pc(cpu, rdn); 308 | break; 309 | } 310 | case 0b01: { 311 | uint32_t rn = ((op16 >> 4u) & 0x8u) | (op16 & 0x7u); 312 | uint32_t rm = (op16 >> 3) & 0xfu; 313 | uint32_t vn = get_lo_hi_reg(cpu, rn); 314 | uint32_t vm = get_lo_hi_reg(cpu, rm); 315 | sub_update_flags(cpu, vn, vm); 316 | break; 317 | } 318 | case 0b10: { 319 | // mov 320 | uint32_t rd = ((op16 >> 4u) & 0x8u) | (op16 & 0x7u); 321 | uint32_t rm = (op16 >> 3) & 0xfu; 322 | cpu->regs[rd] = get_lo_hi_reg(cpu, rm); 323 | check_update_sp_pc(cpu, rd); 324 | break; 325 | } 326 | case 0b11: { 327 | // bx / blx 328 | uint32_t rm = (op16 >> 3u) & 0xfu; 329 | uint32_t next_pc = get_lo_hi_reg(cpu, rm); 330 | if (prefix & 2) { 331 | cpu->regs[ARM_REG_LR] = cpu->regs[ARM_REG_PC] + 3; 332 | } else { 333 | check_exec_return(cpu, next_pc); 334 | } 335 | armulet_update_pc(cpu, next_pc); 336 | break; 337 | } 338 | default: 339 | unsupported_instruction(cpu, op16); 340 | } 341 | break; 342 | } 343 | case 0b10: 344 | case 0b11: { 345 | // ldr (literal) 346 | uint32_t rt = (op16 >> 8u) & 0x7u; 347 | uint32_t imm8 = (op16 & 0xffu); 348 | uint32_t addr = ((cpu->regs[ARM_REG_PC] >> 2u) + 1u + imm8) << 2u; 349 | cpu->regs[rt] = armulet_read_u32(addr); 350 | break; 351 | } 352 | } 353 | } 354 | 355 | static void execute_0110_1000(armulet_cpu_t *cpu, uint32_t op16) { 356 | uint32_t imm5 = (op16 >> 6u) & 0x1fu; 357 | uint32_t rn = (op16 >> 3u) & 0x7u; 358 | uint32_t rt = op16 & 0x7u; 359 | bool load = op16 & 0x800u; 360 | switch (op16 >> 12) { 361 | case 0b0110: { 362 | // ldr/str imm5 363 | uint32_t addr = cpu->regs[rn] + (imm5 << 2u); 364 | if (load) { 365 | cpu->regs[rt] = armulet_read_u32(addr); 366 | } else { 367 | armulet_write_u32(addr, cpu->regs[rt]); 368 | } 369 | break; 370 | } 371 | case 0b0111: { 372 | // ldrb/strb imm5 373 | uint32_t addr = cpu->regs[rn] + imm5; 374 | if (load) { 375 | cpu->regs[rt] = armulet_read_u8(addr); 376 | } else { 377 | armulet_write_u8(addr, (uint8_t) cpu->regs[rt]); 378 | } 379 | break; 380 | } 381 | case 0b1000: { 382 | // ldrh/strh imm5 383 | uint32_t addr = cpu->regs[rn] + (imm5 << 1u); 384 | if (load) { 385 | cpu->regs[rt] = armulet_read_u16(addr); 386 | } else { 387 | armulet_write_u16(addr, (uint16_t) cpu->regs[rt]); 388 | } 389 | break; 390 | } 391 | } 392 | } 393 | 394 | static void execute_1001(armulet_cpu_t *cpu, uint32_t op16) { 395 | uint32_t rt = (op16 >> 8u) & 0x7u; 396 | uint32_t imm8 = (op16 & 0xffu); 397 | uint32_t addr = cpu->regs[ARM_REG_SP] + (imm8 << 2u); 398 | if (op16 & 0x800u) { 399 | // ldr sp 400 | cpu->regs[rt] = armulet_read_u32(addr); 401 | } else { 402 | // str sp 403 | armulet_write_u32(addr, cpu->regs[rt]); 404 | } 405 | } 406 | 407 | static void execute_1010(armulet_cpu_t *cpu, uint32_t op16) { 408 | uint32_t rd = (op16 >> 8u) & 0x7u; 409 | uint32_t imm8 = op16 & 0xffu; 410 | if (op16 & 0x800u) { 411 | cpu->regs[rd] = cpu->regs[ARM_REG_SP] + (imm8 << 2u); 412 | } else { 413 | cpu->regs[rd] = ((cpu->regs[ARM_REG_PC] + 4) & ~3u) + (imm8 << 2u); 414 | } 415 | } 416 | 417 | static void reflect_primask(const armulet_cpu_t *cpu) { 418 | #if PICO_ON_DEVICE && !ARMULET_DEBUG 419 | #if __riscv 420 | ((void)cpu); 421 | needs_love_riscv_skip(); 422 | #else 423 | if (cpu->primask) __asm volatile ("cpsid i"); 424 | else __asm volatile ("cpsie i"); 425 | #endif 426 | #endif 427 | } 428 | 429 | static void execute_1011(armulet_cpu_t *cpu, uint32_t op16) { 430 | uint32_t prefix = (op16 >> 8u) & 0xfu; 431 | switch (prefix) { 432 | case 0b0000: { 433 | uint32_t imm7 = (op16 & 0x7fu) << 2; 434 | if (op16 & 0x80u) 435 | cpu->regs[ARM_REG_SP] -= imm7; 436 | else 437 | cpu->regs[ARM_REG_SP] += imm7; 438 | break; 439 | } 440 | case 0b0010: { 441 | uint32_t rm = (op16 >> 3u) & 0x7u; 442 | uint32_t rd = op16 & 0x7u; 443 | switch ((op16 >> 6u) & 0x3u) { 444 | case 0b00: // sxth 445 | cpu->regs[rd] = (uint32_t) (int32_t) (int16_t) cpu->regs[rm]; 446 | break; 447 | case 0b01: // sxtb 448 | cpu->regs[rd] = (uint32_t) (int32_t) (int8_t) cpu->regs[rm]; 449 | break; 450 | case 0b10: // uxth 451 | cpu->regs[rd] = (uint32_t) (uint16_t) cpu->regs[rm]; 452 | break; 453 | case 0b11: // uxtb 454 | cpu->regs[rd] = (uint32_t) (uint8_t) cpu->regs[rm]; 455 | break; 456 | } 457 | break; 458 | } 459 | case 0b0100: 460 | case 0b0101: { 461 | // push 462 | // pre-decrement ARM_REG_SP as a thought towards interrupts (if they share the same stack) 463 | cpu->regs[ARM_REG_SP] -= (uint32_t) __builtin_popcount(op16 & 0x1ffu) << 2u; 464 | __compiler_memory_barrier(); 465 | uint32_t addr = cpu->regs[ARM_REG_SP]; 466 | for (uint i = 0; i < 8; i++) { 467 | if (op16 & (1u << i)) { 468 | armulet_write_u32(addr, cpu->regs[i]); 469 | addr += 4; 470 | } 471 | } 472 | if (prefix & 1) { 473 | armulet_write_u32(addr, cpu->regs[ARM_REG_LR]); 474 | } 475 | break; 476 | } 477 | case 0b0110: { 478 | if ((op16 >> 5u) == 0b10110110011 && 2 == (op16 & 15)) { 479 | // cps 480 | cpu->primask = (bool) (op16 & 0x10u); 481 | reflect_primask(cpu); 482 | } else { 483 | unsupported_instruction(cpu, op16); 484 | } 485 | break; 486 | } 487 | case 0b1010: { 488 | uint32_t rm = (op16 >> 3u) & 0x7u; 489 | uint32_t rd = op16 & 0x7u; 490 | uint32_t src = cpu->regs[rm]; 491 | uint32_t result; 492 | switch ((op16 >> 6u) & 0x3u) { 493 | case 0b00: // rev 494 | result = __builtin_bswap32(src); 495 | break; 496 | case 0b01: // rev16 497 | result = (uint32_t) __builtin_bswap16((uint16_t) src) | 498 | ((uint32_t) __builtin_bswap16((uint16_t) (src >> 16)) << 16u); 499 | break; 500 | case 0b10: 501 | unsupported_instruction(cpu, op16); 502 | return; 503 | case 0b11: // revsh 504 | result = (uint32_t) (int32_t) (int16_t) __builtin_bswap16((uint16_t) src); 505 | break; 506 | } 507 | #pragma GCC diagnostic push 508 | #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" 509 | cpu->regs[rd] = result; 510 | #pragma GCC diagnostic pop 511 | break; 512 | } 513 | case 0b1100: 514 | case 0b1101: { 515 | // pop 516 | uint32_t addr = cpu->regs[ARM_REG_SP]; 517 | for (uint i = 0; i < 8; i++) { 518 | if (op16 & (1u << i)) { 519 | cpu->regs[i] = armulet_read_u32(addr); 520 | addr += 4; 521 | } 522 | } 523 | if (prefix & 1) { 524 | uint32_t next_pc = armulet_read_u32(addr); 525 | check_exec_return(cpu, next_pc); 526 | armulet_update_pc(cpu, next_pc); 527 | addr += 4; 528 | } 529 | __compiler_memory_barrier(); 530 | cpu->regs[ARM_REG_SP] = addr; 531 | break; 532 | } 533 | case 0b1110: { 534 | #if ARMULET_FEATURE_STEP_STATUS 535 | cpu->step_status = ARMULET_IST_BREAKPOINT; 536 | #endif 537 | #if ARMULET_DEBUG 538 | puts("ignoring BKPT"); 539 | #else 540 | // bkpt 541 | __breakpoint(); 542 | #endif 543 | cpu->pc_delta = 0; 544 | break; 545 | } 546 | case 0b1111: { 547 | // hint instructions 548 | if (op16 & 0xf) { 549 | unsupported_instruction(cpu, op16); 550 | } else { 551 | uint32_t opA = (op16 >> 4) & 0xf; 552 | switch (opA) { 553 | case 0b0010: 554 | #if PICO_ON_DEVICE 555 | // __wfe(); 556 | #endif 557 | break; 558 | case 0b0100: 559 | #if PICO_ON_DEVICE 560 | // __sev(); 561 | #endif 562 | break; 563 | default: 564 | warn_unsupported("wfi etc"); 565 | } 566 | } 567 | break; 568 | } 569 | default: 570 | unsupported_instruction(cpu, op16); 571 | } 572 | } 573 | 574 | static void execute_11(armulet_cpu_t *cpu, uint32_t op16) { 575 | uint32_t prefix = (op16 >> 11u) & 0x7u; 576 | switch (prefix) { 577 | case 0b000: { 578 | // stmia 579 | uint32_t rn = (op16 >> 8u) & 0x7u; 580 | uint32_t addr = cpu->regs[rn]; 581 | for (uint i = 0; i < 8; i++) { 582 | if (op16 & (1u << i)) { 583 | armulet_write_u32(addr, cpu->regs[i]); 584 | addr += 4; 585 | } 586 | } 587 | cpu->regs[rn] = addr; 588 | break; 589 | } 590 | case 0b001: { 591 | // ldmia 592 | uint32_t rn = (op16 >> 8u) & 0x7u; 593 | uint32_t addr = cpu->regs[rn]; 594 | for (uint i = 0; i < 8; i++) { 595 | if (op16 & (1u << i)) { 596 | cpu->regs[i] = armulet_read_u32(addr); 597 | addr += 4; 598 | } 599 | } 600 | if (!(op16 & (1u << rn))) cpu->regs[rn] = addr; 601 | break; 602 | } 603 | case 0b010: 604 | case 0b011: { 605 | // b condition 606 | uint32_t opcode = (op16 >> 8u) & 0xfu; 607 | bool take_branch; 608 | switch (opcode >> 1) { 609 | case 0b000: // eq / ne 610 | take_branch = armulet_get_Z(cpu); 611 | break; 612 | case 0b001: // cs / cc 613 | take_branch = cpu->C; 614 | break; 615 | case 0b010: // mi / pl 616 | take_branch = armulet_get_N(cpu); 617 | break; 618 | case 0b011: // vs / vc 619 | take_branch = armulet_get_V(cpu); 620 | break; 621 | case 0b100: // hi / ls 622 | take_branch = cpu->C && !armulet_get_Z(cpu); 623 | break; 624 | case 0b101: // ge / lt 625 | take_branch = armulet_get_N(cpu) == armulet_get_V(cpu); 626 | break; 627 | case 0b110: // gt / le 628 | take_branch = !armulet_get_Z(cpu) && armulet_get_N(cpu) == armulet_get_V(cpu); 629 | break; 630 | case 0b111: 631 | if (opcode == 0b1111) { 632 | // svc 633 | #if ARMULET_FEATURE_STEP_STATUS 634 | cpu->step_status = ARMULET_IST_SVC; 635 | #else 636 | unsupported_instruction(cpu, op16); 637 | #endif 638 | } else { 639 | // udf 640 | unsupported_instruction(cpu, op16); 641 | } 642 | return; 643 | } 644 | #pragma GCC diagnostic push 645 | #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" 646 | if (opcode & 1) take_branch = !take_branch; 647 | if (take_branch) { 648 | uint32_t delta = (uint32_t) ((((int32_t) op16) << 24u) >> 23u); 649 | cpu->pc_delta = 4 + delta; 650 | } 651 | #pragma GCC diagnostic pop 652 | break; 653 | } 654 | case 0b100: { 655 | // b 656 | uint32_t delta = (uint32_t) ((((int32_t) op16) << 21u) >> 20u); 657 | cpu->pc_delta = 4 + delta; 658 | break; 659 | } 660 | case 0b110: { 661 | uint32_t op16_2 = armulet_read_u16(cpu->regs[ARM_REG_PC] + 2); 662 | if (op16_2 & 0x8000u) { 663 | uint32_t op2 = (op16_2 >> 12u) & 0x7u; 664 | if ((op2 & 0x5u) == 0x5u) { 665 | // bl 666 | uint32_t s = (op16 >> 10u) & 1u; 667 | uint32_t j1 = (op16_2 >> 13u) & 1u; 668 | uint32_t j2 = (op16_2 >> 11u) & 1u; 669 | uint32_t i1 = (j1 ^ s) ? 0 : 2; 670 | uint32_t i2 = (j2 ^ s) ? 0 : 1; 671 | uint32_t offset = (uint32_t) (((int32_t) ((s * 4u | i1 | i2) << 29u)) >> 7u); 672 | offset |= (op16 & 0x3ffu) << 12; 673 | offset |= (op16_2 & 0x7ffu) << 1; 674 | cpu->regs[ARM_REG_LR] = cpu->regs[ARM_REG_PC] + 5; 675 | cpu->pc_delta = 4 + offset; 676 | } else { 677 | uint32_t op1 = (op16 >> 4u) & 0x7fu; 678 | if (op1 == 0b0111011) { 679 | // todo dmb etc. 680 | cpu->pc_delta = 4; 681 | } else if (op1 == 0b0111110) { 682 | // mrs 683 | uint32_t SYSm = op16_2 & 0xffu; 684 | uint32_t rd = (op16_2 >> 8u) & 0xfu; 685 | uint32_t value = 0; 686 | switch (SYSm >> 3) { 687 | case 0: 688 | if (SYSm & 1) value = cpu->ipsr; 689 | if (SYSm < 4) { 690 | if (armulet_get_N(cpu)) value |= 0x80000000; 691 | if (armulet_get_Z(cpu)) value |= 0x40000000; 692 | if (cpu->C) value |= 0x20000000; 693 | if (armulet_get_V(cpu)) value |= 0x10000000; 694 | } 695 | break; 696 | case 1: 697 | value = cpu->regs[ARM_REG_SP]; // todo correct sp 698 | break; 699 | case 2: 700 | if (!(SYSm & 7u)) { 701 | value = cpu->primask; // todo unpriveleged can't read 702 | } else { 703 | // warn_unsupported("mrs ctrl"); 704 | // todo need 705 | } 706 | break; 707 | default: 708 | unsupported_instruction32(cpu, op16, op16_2); 709 | break; 710 | } 711 | cpu->regs[rd] = value; 712 | cpu->pc_delta = 4; 713 | // check_update_sp_pc(cpu, rd); // unpredictable 714 | } else if (op1 == 0b0111000) { 715 | // msr 716 | uint32_t SYSm = op16_2 & 0xffu; 717 | uint32_t rd = op16 & 0xfu; 718 | uint32_t val = get_lo_hi_reg(cpu, rd); 719 | switch (SYSm >> 3) { 720 | case 0: 721 | if (SYSm < 4) { 722 | bool n = val & 0x80000000; 723 | bool z = val & 0x40000000; 724 | if (n && z) n = false; 725 | armulet_set_NZ(cpu, n, z); 726 | cpu->C = val & 0x20000000; 727 | armulet_set_V(cpu, val & 0x10000000); 728 | } 729 | break; 730 | case 1: 731 | // warn_unsupported("msr sp"); 732 | // todo MARM_REG_SP/PARM_REG_SP 733 | cpu->regs[ARM_REG_SP] = val & ~3u; // todo correct ARM_REG_SP 734 | break; 735 | case 2: 736 | if (!(SYSm & 7u)) { 737 | cpu->primask = val & 1; 738 | reflect_primask(cpu); 739 | } else { 740 | warn_unsupported("msr ctrl"); 741 | } 742 | break; 743 | default: 744 | unsupported_instruction32(cpu, op16, op16_2); 745 | } 746 | cpu->pc_delta = 4; 747 | } else { 748 | unsupported_instruction32(cpu, op16, op16_2); 749 | } 750 | } 751 | } else { 752 | unsupported_instruction(cpu, op16); 753 | } 754 | break; 755 | } 756 | default: 757 | unsupported_instruction(cpu, op16); 758 | } 759 | } 760 | 761 | void execute_0101(armulet_cpu_t *cpu, uint32_t op16) { 762 | uint32_t opb = (op16 >> 9u) & 0x7u; 763 | uint32_t rm = (op16 >> 6u) & 0x7u; 764 | uint32_t rn = (op16 >> 3u) & 0x7u; 765 | uint32_t rt = op16 & 0x7u; 766 | uint32_t addr = cpu->regs[rn] + cpu->regs[rm]; 767 | switch (opb) { 768 | case 0b0000: 769 | // str 770 | armulet_write_u32(addr, cpu->regs[rt]); 771 | break; 772 | case 0b0001: 773 | // strh 774 | armulet_write_u16(addr, (uint16_t) cpu->regs[rt]); 775 | break; 776 | case 0b0010: 777 | // str 778 | armulet_write_u8(addr, (uint8_t) cpu->regs[rt]); 779 | break; 780 | case 0b0011: 781 | // ldrsb 782 | cpu->regs[rt] = (uint32_t) (int32_t) (int8_t) armulet_read_u8(addr); 783 | break; 784 | case 0b0100: 785 | // ldr 786 | cpu->regs[rt] = armulet_read_u32(addr); 787 | break; 788 | case 0b0101: 789 | // ldrh 790 | cpu->regs[rt] = armulet_read_u16(addr); 791 | break; 792 | case 0b0110: 793 | // ldrb 794 | cpu->regs[rt] = armulet_read_u8(addr); 795 | break; 796 | case 0b0111: 797 | // ldrsh 798 | cpu->regs[rt] = (uint32_t) (int32_t) (int16_t) armulet_read_u16(addr); 799 | break; 800 | default: 801 | unsupported_instruction(cpu, op16); 802 | } 803 | } 804 | 805 | /* 806 | 807 | armulet_read_u16 address_reg, out_reg: 808 | bge \address_reg, water_mark_reg armulet_read_u16_special 809 | lh \out_reg, \address_reg 810 | 811 | finish_instruction: 812 | add r_pc, r_pc_delta 813 | step: 814 | armulet_read_u16 r_pc, r_inst 815 | li r_pc_delta, 2 816 | srli r_tmp0, r_inst, 12 817 | sh2add r_tmp1, r_main_decode, r_tmp0 818 | jr r_tmp1 819 | 820 | */ 821 | 822 | void carmulet_single_step(armulet_cpu_t *cpu) { 823 | uint32_t pc = armulet_get_pc(cpu); 824 | hard_assert(armulet_is_valid_pc(pc)); 825 | uint32_t op16 = armulet_read_u16(pc); 826 | uint32_t prefix = (op16 >> 12u) & 0xfu; 827 | cpu->pc_delta = 2; 828 | switch (prefix) { 829 | case 0b0000: 830 | case 0b0001: 831 | case 0b0010: 832 | case 0b0011: 833 | execute_00(cpu, op16); 834 | break; 835 | case 0b0100: 836 | execute_0100(cpu, op16); 837 | break; 838 | case 0b0101: 839 | execute_0101(cpu, op16); 840 | break; 841 | case 0b0110: 842 | case 0b0111: 843 | case 0b1000: 844 | execute_0110_1000(cpu, op16); 845 | break; 846 | case 0b1001: 847 | execute_1001(cpu, op16); 848 | break; 849 | case 0b1010: 850 | execute_1010(cpu, op16); 851 | break; 852 | case 0b1011: 853 | execute_1011(cpu, op16); 854 | break; 855 | case 0b1100: 856 | case 0b1101: 857 | case 0b1110: 858 | case 0b1111: 859 | execute_11(cpu, op16); 860 | break; 861 | } 862 | cpu->regs[ARM_REG_PC] += cpu->pc_delta; 863 | } 864 | 865 | void __attribute__((noreturn)) carmulet_run(armulet_cpu_t *cpu) { 866 | while (true) { 867 | carmulet_single_step(cpu); 868 | } 869 | } 870 | 871 | void carmulet_jump(armulet_cpu_t *cpu, uint32_t addr) { 872 | #if ARMULET_FEATURE_CALL 873 | cpu->armulet_call = false; 874 | #endif 875 | armulet_update_pc(cpu, addr); 876 | carmulet_run(cpu); 877 | } 878 | 879 | #if ARMULET_FEATURE_CALL 880 | uint32_t carmulet_call(armulet_cpu_t *cpu, uint32_t addr) { 881 | if (setjmp(cpu->jmpbuf)) { 882 | return cpu->regs[R0]; 883 | } 884 | cpu->armulet_call = true; 885 | cpu->regs[LR] = ARMULET_CALL_RETURN_ADDRESS; 886 | armulet_update_pc(cpu, addr); 887 | run(cpu); 888 | } 889 | #endif 890 | 891 | 892 | -------------------------------------------------------------------------------- /carmulet/carmulet.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "armulet.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | void carmulet_single_step(armulet_cpu_t *cpu); 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | -------------------------------------------------------------------------------- /pico_sdk_import.cmake: -------------------------------------------------------------------------------- 1 | # This is a copy of /external/pico_sdk_import.cmake 2 | 3 | # This can be dropped into an external project to help locate this SDK 4 | # It should be include()ed prior to project() 5 | 6 | if (DEFINED ENV{PICO_SDK_PATH} AND (NOT PICO_SDK_PATH)) 7 | set(PICO_SDK_PATH $ENV{PICO_SDK_PATH}) 8 | message("Using PICO_SDK_PATH from environment ('${PICO_SDK_PATH}')") 9 | endif () 10 | 11 | if (DEFINED ENV{PICO_SDK_FETCH_FROM_GIT} AND (NOT PICO_SDK_FETCH_FROM_GIT)) 12 | set(PICO_SDK_FETCH_FROM_GIT $ENV{PICO_SDK_FETCH_FROM_GIT}) 13 | message("Using PICO_SDK_FETCH_FROM_GIT from environment ('${PICO_SDK_FETCH_FROM_GIT}')") 14 | endif () 15 | 16 | if (DEFINED ENV{PICO_SDK_FETCH_FROM_GIT_PATH} AND (NOT PICO_SDK_FETCH_FROM_GIT_PATH)) 17 | set(PICO_SDK_FETCH_FROM_GIT_PATH $ENV{PICO_SDK_FETCH_FROM_GIT_PATH}) 18 | message("Using PICO_SDK_FETCH_FROM_GIT_PATH from environment ('${PICO_SDK_FETCH_FROM_GIT_PATH}')") 19 | endif () 20 | 21 | set(PICO_SDK_PATH "${PICO_SDK_PATH}" CACHE PATH "Path to the Raspberry Pi Pico SDK") 22 | set(PICO_SDK_FETCH_FROM_GIT "${PICO_SDK_FETCH_FROM_GIT}" CACHE BOOL "Set to ON to fetch copy of SDK from git if not otherwise locatable") 23 | set(PICO_SDK_FETCH_FROM_GIT_PATH "${PICO_SDK_FETCH_FROM_GIT_PATH}" CACHE FILEPATH "location to download SDK") 24 | 25 | if (NOT PICO_SDK_PATH) 26 | if (PICO_SDK_FETCH_FROM_GIT) 27 | include(FetchContent) 28 | set(FETCHCONTENT_BASE_DIR_SAVE ${FETCHCONTENT_BASE_DIR}) 29 | if (PICO_SDK_FETCH_FROM_GIT_PATH) 30 | get_filename_component(FETCHCONTENT_BASE_DIR "${PICO_SDK_FETCH_FROM_GIT_PATH}" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}") 31 | endif () 32 | # GIT_SUBMODULES_RECURSE was added in 3.17 33 | if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.17.0") 34 | FetchContent_Declare( 35 | pico_sdk 36 | GIT_REPOSITORY https://github.com/raspberrypi/pico-sdk 37 | GIT_TAG master 38 | GIT_SUBMODULES_RECURSE FALSE 39 | ) 40 | else () 41 | FetchContent_Declare( 42 | pico_sdk 43 | GIT_REPOSITORY https://github.com/raspberrypi/pico-sdk 44 | GIT_TAG master 45 | ) 46 | endif () 47 | 48 | if (NOT pico_sdk) 49 | message("Downloading Raspberry Pi Pico SDK") 50 | FetchContent_Populate(pico_sdk) 51 | set(PICO_SDK_PATH ${pico_sdk_SOURCE_DIR}) 52 | endif () 53 | set(FETCHCONTENT_BASE_DIR ${FETCHCONTENT_BASE_DIR_SAVE}) 54 | else () 55 | message(FATAL_ERROR 56 | "SDK location was not specified. Please set PICO_SDK_PATH or set PICO_SDK_FETCH_FROM_GIT to on to fetch from git." 57 | ) 58 | endif () 59 | endif () 60 | 61 | get_filename_component(PICO_SDK_PATH "${PICO_SDK_PATH}" REALPATH BASE_DIR "${CMAKE_BINARY_DIR}") 62 | if (NOT EXISTS ${PICO_SDK_PATH}) 63 | message(FATAL_ERROR "Directory '${PICO_SDK_PATH}' not found") 64 | endif () 65 | 66 | set(PICO_SDK_INIT_CMAKE_FILE ${PICO_SDK_PATH}/pico_sdk_init.cmake) 67 | if (NOT EXISTS ${PICO_SDK_INIT_CMAKE_FILE}) 68 | message(FATAL_ERROR "Directory '${PICO_SDK_PATH}' does not appear to contain the Raspberry Pi Pico SDK") 69 | endif () 70 | 71 | set(PICO_SDK_PATH ${PICO_SDK_PATH} CACHE PATH "Path to the Raspberry Pi Pico SDK" FORCE) 72 | 73 | include(${PICO_SDK_INIT_CMAKE_FILE}) 74 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(instruction_test instruction_test.cpp 2 | $<$:varmulet_hooks_test.S> 3 | ) 4 | target_link_libraries(instruction_test armulet pico_stdlib) 5 | message("PICO_RISCV ${PICO_RISCV}") 6 | target_compile_definitions(instruction_test PRIVATE 7 | ARMULET_DEBUG=1 8 | ARMULET_FEATURE_STEP=1 9 | ARMULET_FEATURE_ARMV8M_BASELINE_MSPLIM=1 10 | ARMULET_FEATURE_ARMV8M_BASELINE=1 11 | $<$:ARMULET_USE_ASM=1> 12 | ) 13 | pico_add_extra_outputs(instruction_test) 14 | 15 | add_executable(instruction_test2 16 | instruction_test2.cpp 17 | $<$:varmulet_hooks_test.S> 18 | ) 19 | target_link_libraries(instruction_test2 armulet pico_stdlib) 20 | target_compile_definitions(instruction_test2 PRIVATE 21 | ARMULET_DEBUG=1 22 | ARMULET_FEATURE_STEP=1 23 | ARMULET_FEATURE_STEP_STATUS=1 24 | ARMULET_FEATURE_ARMV8M_BASELINE_MSPLIM=1 25 | ARMULET_FEATURE_ARMV8M_BASELINE=1 26 | VARMULET_USE_EARLY_INSTR_READ=0 # get two extra tests this way - todo revisit if we test access faults 27 | $<$:ARMULET_USE_ASM=1> 28 | EARLY_READ=0 29 | ) 30 | pico_add_extra_outputs(instruction_test2) 31 | 32 | add_executable(instruction_test_nf instruction_test.cpp) 33 | target_link_libraries(instruction_test_nf armulet pico_stdlib) 34 | if (PICO_RISCV) 35 | # pico_set_binary_type(instruction_test no_flash) 36 | # pico_set_binary_type(instruction_test2 no_flash) 37 | else() 38 | target_compile_definitions(instruction_test_nf PRIVATE ARMULET_DEBUG=1) 39 | pico_add_extra_outputs(instruction_test_nf) 40 | pico_set_binary_type(instruction_test_nf no_flash) 41 | endif() 42 | -------------------------------------------------------------------------------- /test/assembler.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static inline uint32_t opcodeADCS(uint32_t Rdn, uint32_t Rm) { 5 | return (0b0100000101 << 6) | ((Rm & 7) << 3) | (Rdn & 7); 6 | } 7 | 8 | static inline uint32_t opcodeADDS1(uint32_t Rd, uint32_t Rn, uint32_t imm3) { 9 | return (0b0001110 << 9) | ((imm3 & 0x7) << 6) | ((Rn & 7) << 3) | (Rd & 7); 10 | } 11 | 12 | static inline uint32_t opcodeADDS2(uint32_t Rdn, uint32_t imm8) { 13 | return (0b00110 << 11) | ((Rdn & 7) << 8) | (imm8 & 0xff); 14 | } 15 | 16 | static inline uint32_t opcodeADDspPlusImm(uint32_t Rd, uint32_t imm8) { 17 | return (0b10101 << 11) | ((Rd & 7) << 8) | ((imm8 >> 2) & 0xff); 18 | } 19 | 20 | static inline uint32_t opcodeADDsp2(uint32_t imm) { 21 | return (0b101100000 << 7) | ((imm >> 2) & 0x7f); 22 | } 23 | 24 | static inline uint32_t opcodeADDSreg(uint32_t Rd, uint32_t Rn, uint32_t Rm) { 25 | return (0b0001100 << 9) | ((Rm & 0x7) << 6) | ((Rn & 7) << 3) | (Rd & 7); 26 | } 27 | 28 | static inline uint32_t opcodeADDreg(uint32_t Rdn, uint32_t Rm) { 29 | return (0b01000100 << 8) | ((Rdn & 0x8) << 4) | ((Rm & 0xf) << 3) | (Rdn & 0x7); 30 | } 31 | 32 | static inline uint32_t opcodeADR(uint32_t Rd, uint32_t imm8) { 33 | return (0b10100 << 11) | ((Rd & 7) << 8) | ((imm8 >> 2) & 0xff); 34 | } 35 | 36 | static inline uint32_t opcodeANDS(uint32_t Rn, uint32_t Rm) { 37 | return (0b0100000000 << 6) | ((Rm & 7) << 3) | (Rn & 0x7); 38 | } 39 | 40 | static inline uint32_t opcodeASRS(uint32_t Rd, uint32_t Rm, uint32_t imm5) { 41 | return (0b00010 << 11) | ((imm5 & 0x1f) << 6) | ((Rm & 0x7) << 3) | (Rd & 0x7); 42 | } 43 | 44 | static inline uint32_t opcodeASRSreg(uint32_t Rdn, uint32_t Rm) { 45 | return (0b0100000100 << 6) | ((Rm & 0x7) << 3) | ((Rm & 0x7) << 3) | (Rdn & 0x7); 46 | } 47 | 48 | static inline uint32_t opcodeBT1(uint32_t cond, uint32_t imm8) { 49 | return (0b1101 << 12) | ((cond & 0xf) << 8) | ((imm8 >> 1) & 0x1ff); 50 | } 51 | 52 | static inline uint32_t opcodeBT2(uint32_t imm11) { 53 | return (0b11100 << 11) | ((imm11 >> 1) & 0x7ff); 54 | } 55 | 56 | static inline uint32_t opcodeBICS(uint32_t Rdn, uint32_t Rm) { 57 | return (0b0100001110 << 6) | ((Rm & 7) << 3) | (Rdn & 7); 58 | } 59 | 60 | static inline uint32_t opcodeBKPT(uint32_t imm) { 61 | const uint32_t num = imm & 0xff; 62 | const uint32_t opcode = (0b10111110 << 8) | num; 63 | return opcode; 64 | } 65 | 66 | static inline uint32_t opcodeBL(int32_t imm) { 67 | const uint32_t imm11 = (imm >> 1) & 0x7ff; 68 | const uint32_t imm10 = (imm >> 12) & 0x3ff; 69 | const uint32_t s = imm < 0 ? 1 : 0; 70 | const uint32_t j2 = 1 - (((imm >> 22) & 0x1) ^ s); 71 | const uint32_t j1 = 1 - (((imm >> 23) & 0x1) ^ s); 72 | const uint32_t opcode = 73 | (0b1101 << 28) | (j1 << 29) | (j2 << 27) | (imm11 << 16) | (0b11110 << 11) | (s << 10) | imm10; 74 | return opcode; 75 | } 76 | 77 | static inline uint32_t opcodeBLX(uint32_t Rm) { 78 | return (0b010001111 << 7) | (Rm << 3); 79 | } 80 | 81 | static inline uint32_t opcodeBX(uint32_t Rm) { 82 | return (0b010001110 << 7) | (Rm << 3); 83 | } 84 | 85 | static inline uint32_t opcodeCMN(uint32_t Rn, uint32_t Rm) { 86 | return (0b0100001011 << 6) | ((Rm & 0x7) << 3) | (Rn & 0x7); 87 | } 88 | 89 | static inline uint32_t opcodeCMPimm(uint32_t Rn, uint32_t Imm8) { 90 | return (0b00101 << 11) | ((Rn & 0x7) << 8) | (Imm8 & 0xff); 91 | } 92 | 93 | static inline uint32_t opcodeCMPregT1(uint32_t Rn, uint32_t Rm) { 94 | return (0b0100001010 << 6) | ((Rm & 0x7) << 3) | (Rn & 0x7); 95 | } 96 | 97 | static inline uint32_t opcodeCMPregT2(uint32_t Rn, uint32_t Rm) { 98 | return (0b01000101 << 8) | (((Rn >> 3) & 0x1) << 7) | ((Rm & 0xf) << 3) | (Rn & 0x7); 99 | } 100 | 101 | static inline uint32_t opcodeCPSID() { 102 | return 0b1011011001110010; 103 | } 104 | 105 | static inline uint32_t opcodeCPSIE() { 106 | return 0b1011011001100010; 107 | } 108 | 109 | static inline uint32_t opcodeDMBSY() { 110 | return 0x8f50f3bf; 111 | } 112 | 113 | static inline uint32_t opcodeDSBSY() { 114 | return 0x8f4ff3bf; 115 | } 116 | 117 | static inline uint32_t opcodeEORS(uint32_t Rdn, uint32_t Rm) { 118 | return (0b0100000001 << 6) | ((Rm & 0x7) << 3) | (Rdn & 0x7); 119 | } 120 | 121 | static inline uint32_t opcodeISBSY() { 122 | return 0x8f6ff3bf; 123 | } 124 | 125 | static inline uint32_t opcodeLDMIA(uint32_t Rn, uint32_t registers) { 126 | return (0b11001 << 11) | ((Rn & 0x7) << 8) | (registers & 0xff); 127 | } 128 | 129 | static inline uint32_t opcodeLDRreg(uint32_t Rt, uint32_t Rn, uint32_t Rm) { 130 | return (0b0101100 << 9) | ((Rm & 0x7) << 6) | ((Rn & 0x7) << 3) | (Rt & 0x7); 131 | } 132 | 133 | static inline uint32_t opcodeLDRimm(uint32_t Rt, uint32_t Rn, uint32_t imm5) { 134 | return (0b01101 << 11) | (((imm5 >> 2) & 0x1f) << 6) | ((Rn & 0x7) << 3) | (Rt & 0x7); 135 | } 136 | 137 | static inline uint32_t opcodeLDRlit(uint32_t Rt, uint32_t imm8) { 138 | return (0b01001 << 11) | ((imm8 >> 2) & 0xff) | ((Rt & 0x7) << 8); 139 | } 140 | 141 | static inline uint32_t opcodeLDRB(uint32_t Rt, uint32_t Rn, uint32_t imm5) { 142 | return (0b01111 << 11) | ((imm5 & 0x1f) << 6) | ((Rn & 0x7) << 3) | (Rt & 0x7); 143 | } 144 | 145 | static inline uint32_t opcodeLDRsp(uint32_t Rt, uint32_t imm8) { 146 | return (0b10011 << 11) | ((Rt & 7) << 8) | ((imm8 >> 2) & 0xff); 147 | } 148 | 149 | static inline uint32_t opcodeLDRBreg(uint32_t Rt, uint32_t Rn, uint32_t Rm) { 150 | return (0b0101110 << 9) | ((Rm & 0x7) << 6) | ((Rn & 0x7) << 3) | (Rt & 0x7); 151 | } 152 | 153 | static inline uint32_t opcodeLDRH(uint32_t Rt, uint32_t Rn, uint32_t imm5) { 154 | return (0b10001 << 11) | (((imm5 >> 1) & 0xf) << 6) | ((Rn & 0x7) << 3) | (Rt & 0x7); 155 | } 156 | 157 | static inline uint32_t opcodeLDRHreg(uint32_t Rt, uint32_t Rn, uint32_t Rm) { 158 | return (0b0101101 << 9) | ((Rm & 0x7) << 6) | ((Rn & 0x7) << 3) | (Rt & 0x7); 159 | } 160 | 161 | static inline uint32_t opcodeLDRSB(uint32_t Rt, uint32_t Rn, uint32_t Rm) { 162 | return (0b0101011 << 9) | ((Rm & 0x7) << 6) | ((Rn & 0x7) << 3) | (Rt & 0x7); 163 | } 164 | 165 | static inline uint32_t opcodeLDRSH(uint32_t Rt, uint32_t Rn, uint32_t Rm) { 166 | return (0b0101111 << 9) | ((Rm & 0x7) << 6) | ((Rn & 0x7) << 3) | (Rt & 0x7); 167 | } 168 | 169 | static inline uint32_t opcodeLSLSreg(uint32_t Rdn, uint32_t Rm) { 170 | return (0b0100000010 << 6) | ((Rm & 0x7) << 3) | (Rdn & 0x7); 171 | } 172 | 173 | static inline uint32_t opcodeLSLSimm(uint32_t Rd, uint32_t Rm, uint32_t Imm5) { 174 | return (0b00000 << 11) | ((Imm5 & 0x1f) << 6) | ((Rm & 0x7) << 3) | (Rd & 0x7); 175 | } 176 | 177 | static inline uint32_t opcodeLSRS(uint32_t Rd, uint32_t Rm, uint32_t imm5) { 178 | return (0b00001 << 11) | ((imm5 & 0x1f) << 6) | ((Rm & 0x7) << 3) | (Rd & 0x7); 179 | } 180 | 181 | static inline uint32_t opcodeLSRSreg(uint32_t Rdn, uint32_t Rm) { 182 | return (0b0100000011 << 6) | ((Rm & 0x7) << 3) | (Rdn & 0x7); 183 | } 184 | 185 | static inline uint32_t opcodeMOV(uint32_t Rd, uint32_t Rm) { 186 | return (0b01000110 << 8) | ((Rd & 0x8 ? 1 : 0) << 7) | (Rm << 3) | (Rd & 0x7); 187 | } 188 | 189 | static inline uint32_t opcodeMOVS(uint32_t Rd, uint32_t imm8) { 190 | return (0b00100 << 11) | ((Rd & 0x7) << 8) | (imm8 & 0xff); 191 | } 192 | 193 | static inline uint32_t opcodeMOVSreg(uint32_t Rd, uint32_t Rm) { 194 | return (0b000000000 << 6) | ((Rm & 0x7) << 3) | (Rd & 0x7); 195 | } 196 | static inline uint32_t opcodeMRS(uint32_t Rd, uint32_t specReg) { 197 | return ( 198 | ((0b1000 << 28) | ((Rd & 0xf) << 24) | ((specReg & 0xff) << 16) | 0b1111001111101111) 199 | ); 200 | } 201 | 202 | static inline uint32_t opcodeMSR(uint32_t specReg, uint32_t Rn) { 203 | return ((0b10001000 << 24) | ((specReg & 0xff) << 16) | (0b111100111000 << 4) | (Rn & 0xf)); 204 | } 205 | 206 | static inline uint32_t opcodeMULS(uint32_t Rn, uint32_t Rdm) { 207 | return (0b0100001101 << 6) | ((Rn & 7) << 3) | (Rdm & 7); 208 | } 209 | 210 | static inline uint32_t opcodeMVNS(uint32_t Rd, uint32_t Rm) { 211 | return (0b0100001111 << 6) | ((Rm & 7) << 3) | (Rd & 7); 212 | } 213 | 214 | static inline uint32_t opcodeNOP() { 215 | return 0b1011111100000000; 216 | } 217 | 218 | static inline uint32_t opcodeORRS(uint32_t Rn, uint32_t Rm) { 219 | return (0b0100001100 << 6) | ((Rm & 0x7) << 3) | (Rn & 0x7); 220 | } 221 | 222 | static inline uint32_t opcodePOP(bool P, uint32_t registerList) { 223 | return (0b1011110 << 9) | ((P ? 1 : 0) << 8) | registerList; 224 | } 225 | 226 | static inline uint32_t opcodePUSH(bool M, uint32_t registerList) { 227 | return (0b1011010 << 9) | ((M ? 1 : 0) << 8) | registerList; 228 | } 229 | 230 | static inline uint32_t opcodeREV(uint32_t Rd, uint32_t Rn) { 231 | return (0b1011101000 << 6) | ((Rn & 0x7) << 3) | (Rd & 0x7); 232 | } 233 | 234 | static inline uint32_t opcodeREV16(uint32_t Rd, uint32_t Rn) { 235 | return (0b1011101001 << 6) | ((Rn & 0x7) << 3) | (Rd & 0x7); 236 | } 237 | 238 | static inline uint32_t opcodeREVSH(uint32_t Rd, uint32_t Rn) { 239 | return (0b1011101011 << 6) | ((Rn & 0x7) << 3) | (Rd & 0x7); 240 | } 241 | 242 | static inline uint32_t opcodeROR(uint32_t Rdn, uint32_t Rm) { 243 | return (0b0100000111 << 6) | ((Rm & 0x7) << 3) | (Rdn & 0x7); 244 | } 245 | 246 | static inline uint32_t opcodeRSBS(uint32_t Rd, uint32_t Rn) { 247 | return (0b0100001001 << 6) | ((Rn & 0x7) << 3) | (Rd & 0x7); 248 | } 249 | 250 | static inline uint32_t opcodeSBCS(uint32_t Rn, uint32_t Rm) { 251 | return (0b0100000110 << 6) | ((Rm & 0x7) << 3) | (Rn & 0x7); 252 | } 253 | 254 | static inline uint32_t opcodeSTMIA(uint32_t Rn, uint32_t registers) { 255 | return (0b11000 << 11) | ((Rn & 0x7) << 8) | (registers & 0xff); 256 | } 257 | 258 | static inline uint32_t opcodeSTR(uint32_t Rt, uint32_t Rm, uint32_t imm5) { 259 | return (0b01100 << 11) | (((imm5 >> 2) & 0x1f) << 6) | ((Rm & 0x7) << 3) | (Rt & 0x7); 260 | } 261 | 262 | static inline uint32_t opcodeSTRsp(uint32_t Rt, uint32_t imm8) { 263 | return (0b10010 << 11) | ((Rt & 7) << 8) | ((imm8 >> 2) & 0xff); 264 | } 265 | 266 | static inline uint32_t opcodeSTRreg(uint32_t Rt, uint32_t Rn, uint32_t Rm) { 267 | return (0b0101000 << 9) | ((Rm & 0x7) << 6) | ((Rn & 0x7) << 3) | (Rt & 0x7); 268 | } 269 | 270 | static inline uint32_t opcodeSTRB(uint32_t Rt, uint32_t Rm, uint32_t imm5) { 271 | return (0b01110 << 11) | ((imm5 & 0x1f) << 6) | ((Rm & 0x7) << 3) | (Rt & 0x7); 272 | } 273 | 274 | static inline uint32_t opcodeSTRBreg(uint32_t Rt, uint32_t Rn, uint32_t Rm) { 275 | return (0b0101010 << 9) | ((Rm & 0x7) << 6) | ((Rn & 0x7) << 3) | (Rt & 0x7); 276 | } 277 | 278 | static inline uint32_t opcodeSTRH(uint32_t Rt, uint32_t Rm, uint32_t imm5) { 279 | return (0b10000 << 11) | (((imm5 >> 1) & 0x1f) << 6) | ((Rm & 0x7) << 3) | (Rt & 0x7); 280 | } 281 | 282 | static inline uint32_t opcodeSTRHreg(uint32_t Rt, uint32_t Rn, uint32_t Rm) { 283 | return (0b0101001 << 9) | ((Rm & 0x7) << 6) | ((Rn & 0x7) << 3) | (Rt & 0x7); 284 | } 285 | 286 | static inline uint32_t opcodeSUBS1(uint32_t Rd, uint32_t Rn, uint32_t imm3) { 287 | return (0b0001111 << 9) | ((imm3 & 0x7) << 6) | ((Rn & 7) << 3) | (Rd & 7); 288 | } 289 | 290 | static inline uint32_t opcodeSUBS2(uint32_t Rdn, uint32_t imm8) { 291 | return (0b00111 << 11) | ((Rdn & 7) << 8) | (imm8 & 0xff); 292 | } 293 | 294 | static inline uint32_t opcodeSUBSreg(uint32_t Rd, uint32_t Rn, uint32_t Rm) { 295 | return (0b0001101 << 9) | ((Rm & 0x7) << 6) | ((Rn & 7) << 3) | (Rd & 7); 296 | } 297 | 298 | static inline uint32_t opcodeSUBsp(uint32_t imm) { 299 | return (0b101100001 << 7) | ((imm >> 2) & 0x7f); 300 | } 301 | 302 | static inline uint32_t opcodeSVC(uint32_t imm8) { 303 | return (0b11011111 << 8) | (imm8 & 0xff); 304 | } 305 | 306 | static inline uint32_t opcodeSXTB(uint32_t Rd, uint32_t Rm) { 307 | return (0b1011001001 << 6) | ((Rm & 7) << 3) | (Rd & 7); 308 | } 309 | 310 | static inline uint32_t opcodeSXTH(uint32_t Rd, uint32_t Rm) { 311 | return (0b1011001000 << 6) | ((Rm & 7) << 3) | (Rd & 7); 312 | } 313 | 314 | static inline uint32_t opcodeTST(uint32_t Rm, uint32_t Rn) { 315 | return (0b0100001000 << 6) | ((Rn & 7) << 3) | (Rm & 7); 316 | } 317 | 318 | static inline uint32_t opcodeUXTB(uint32_t Rd, uint32_t Rm) { 319 | return (0b1011001011 << 6) | ((Rm & 7) << 3) | (Rd & 7); 320 | } 321 | 322 | static inline uint32_t opcodeUDF(uint32_t imm8) { 323 | return ((0b11011110 << 8) | (imm8 & 0xff)); 324 | } 325 | 326 | static inline uint32_t opcodeUDF2(uint32_t imm16) { 327 | const uint32_t imm12 = imm16 & 0xfff; 328 | const uint32_t imm4 = (imm16 >> 12) & 0xf; 329 | return ((0b111101111111 << 4) | imm4 | (0b1010 << 28) | (imm12 << 16)); 330 | } 331 | 332 | static inline uint32_t opcodeUXTH(uint32_t Rd, uint32_t Rm) { 333 | return (0b1011001010 << 6) | ((Rm & 7) << 3) | (Rd & 7); 334 | } 335 | 336 | static inline uint32_t opcodeWFI() { 337 | return 0b1011111100110000; 338 | } 339 | 340 | static inline uint32_t opcodeYIELD() { 341 | return 0b1011111100010000; 342 | } 343 | -------------------------------------------------------------------------------- /test/undef_inst_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | std::vector undefined = { 5 | // 16 bit ================================================== 6 | 7 | // Shift (immediate), add, subtract, move, and compare ----- 8 | // 00xxxx 9 | 10 | // Data processing ----------------------------------------- 11 | // 010000 12 | 13 | // "010000 0100 " // unpredictable 14 | 15 | // Special data instructions and branch and exchange ------- 16 | // 010001 17 | 18 | // LDR (literal) ------------------------------------------- 19 | // 01001x 20 | 21 | // Load/store single data item ----------------------------- 22 | // 0101xx 23 | // 011xxx 24 | // 100xxx 25 | 26 | // ADR ----------------------------------------------------- 27 | // 10100x 28 | 29 | // ADD (SP plus immediate) --------------------------------- 30 | // 10101x 31 | 32 | // Miscellaneous 16-bit instructions ----------------------- 33 | // 1011 xx 34 | // 00000xx 35 | // 00001xx 36 | " 1011 0001xxx", 37 | // 001000x 38 | // 001001x 39 | // 001010x 40 | // 001011x 41 | " 1011 0011xxx", 42 | // 010xxxx 43 | " 1011 011000x", 44 | " 1011 0110010", 45 | // 0110011 46 | " 1011 01101xx", 47 | " 1011 0111xxx", 48 | " 1011 100xxxx", 49 | // 101000x 50 | // 1011 101001x 51 | " 1011 101010x", 52 | // 101011x 53 | " 1011 1011xxx", 54 | // 110xxxx 55 | // 1110xxx 56 | // 1111xxx 57 | 58 | // STM, STMIA, STMEA --------------------------------------- 59 | // 11000x 60 | 61 | // LDM, LDMIA, LDMFD --------------------------------------- 62 | // 11001x 63 | 64 | // Conditional branch, and Supervisor Call ----------------- 65 | // 1101 xx 66 | 67 | "1101 1110", // UDF 68 | 69 | // Unconditional branch ------------------------------------ 70 | // 1100x 71 | 72 | // 32 bit ================================================== 73 | // 11101 74 | // 11110 75 | // 11111 76 | }; -------------------------------------------------------------------------------- /test/varmulet_hooks_test.S: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #if PICO_RP2350 8 | #include "pico.h" 9 | #endif 10 | 11 | #include "varmulet_macros.inc.S" 12 | 13 | .macro varmulet_step_status VAL, tmp 14 | #if ARMULET_FEATURE_STEP_STATUS 15 | li \tmp, \VAL 16 | sw \tmp, CPU_OFFSET_STEP_STATUS(r_cpu) 17 | #endif 18 | .endm 19 | 20 | .macro varmulet_step_status_ni VAL, tmp, delta 21 | #if ARMULET_FEATURE_STEP_STATUS 22 | li \tmp, \VAL 23 | sw \tmp, CPU_OFFSET_STEP_STATUS(r_cpu) 24 | addi r_pc, r_pc, -\delta 25 | next_instruction 26 | #endif 27 | .endm 28 | 29 | .global install_varmulet_test_hooks 30 | .text 31 | install_varmulet_test_hooks: 32 | #if ARMULET_FEATURE_ASM_HOOKS_IS_FUNCTION 33 | #error ARMULET_FEATURE_ASM_HOOKS_IS_FUNCTION not supported for tests 34 | #endif 35 | la a1, test_bkpt_instr 36 | sw a1, (4*VASM_HOOKS_INDEX_BKPT_INSTR)(a0) 37 | #if ARMULET_FEATURE_STEP_STATUS 38 | la a1, test_svc_instr 39 | sw a1, (4*VASM_HOOKS_INDEX_SVC_INSTR)(a0) 40 | la a1, test_undefined16 41 | sw a1, (4*VASM_HOOKS_INDEX_UNDEFINED16)(a0) 42 | la a1, test_undefined32 43 | sw a1, (4*VASM_HOOKS_INDEX_UNDEFINED32)(a0) 44 | #endif 45 | ret 46 | 47 | test_bkpt_instr: 48 | addi r_pc, r_pc, -2 49 | #if ARMULET_FEATURE_STEP_STATUS 50 | varmulet_step_status ARMULET_IST_BREAKPOINT, r_tmp0 51 | #endif 52 | next_instruction 53 | 54 | #if ARMULET_FEATURE_STEP_STATUS 55 | test_svc_instr: 56 | varmulet_step_status ARMULET_IST_SVC, r_tmp0 57 | next_instruction 58 | 59 | test_undefined16: 60 | varmulet_step_status_ni ARMULET_IST_UNDEFINED16, r_tmp0, 2 61 | next_instruction 62 | 63 | test_undefined32: 64 | varmulet_step_status_ni ARMULET_IST_UNDEFINED32, r_tmp1, 4 65 | next_instruction 66 | #endif -------------------------------------------------------------------------------- /test/varmulet_hooks_test.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "varmulet.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | 9 | #if ARMULET_USE_ASM 10 | void install_varmulet_test_hooks(struct varmulet_asm_hooks *hooks); 11 | #endif 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | -------------------------------------------------------------------------------- /varmulet/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | add_library(varmulet INTERFACE) 3 | target_sources(armulet INTERFACE 4 | ${CMAKE_CURRENT_LIST_DIR}/varmulet.S 5 | ${CMAKE_CURRENT_LIST_DIR}/varmulet_armv6m_core.S 6 | ${CMAKE_CURRENT_LIST_DIR}/varmulet_hooks_default.S 7 | ) 8 | target_include_directories(varmulet INTERFACE ${CMAKE_CURRENT_LIST_DIR}) 9 | -------------------------------------------------------------------------------- /varmulet/varmulet.S: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #if PICO_RP2350 8 | #include "pico.h" 9 | #endif 10 | 11 | // function sections 12 | // try to standardize on args to hooks 13 | // in case we don't want to align 14 | #include "varmulet_macros.inc.S" 15 | 16 | #if ARMULET_FEATURE_PROFILER 17 | .section .data 18 | .global address_profile 19 | .p2align 2 20 | address_profile: 21 | .space 65536 22 | #endif 23 | 24 | .section .text.varmulet_step 25 | 26 | #if ARMULET_FEATURE_STEP 27 | .global varmulet_step 28 | // note: this is really a custom thing 29 | varmulet_step: 30 | addi sp, sp, -4 31 | sw ra, (sp) 32 | call_hook_enter_fn_trash_tmp2_work1 33 | #if VARMULET_USE_EARLY_INSTR_READ 34 | read_mem_u16_unchecked r_inst, r_pc 35 | #endif 36 | mv r_work0, r_next_inst 37 | la r_next_inst, varmulet_step_done // override whatever is in there 38 | jr r_work0 39 | varmulet_step_done: 40 | call_hook_exit_fn_trash_tmp2_work1 41 | lw ra, (sp) 42 | add sp, sp, 4 43 | ret 44 | #endif 45 | 46 | .section .text.varmulet_run 47 | .global varmulet_run 48 | varmulet_run: 49 | mv r_cpu, a0 50 | addi sp, sp, -4 51 | sw ra, (sp) 52 | call_hook_enter_fn_trash_tmp2_work1 53 | #if ARMULET_FEATURE_CALL 54 | // Store the address to go to when we return from Armv6-M on the stack 55 | // (note the register mapped as r_tmp0 should have been saved by 56 | // varmulet_hook_default_enter_fn or similar in the previous jalr call) 57 | addi sp, sp, -4 58 | lui r_tmp0, %hi(varmulet_run_return) 59 | addi r_tmp0, r_tmp0, %lo(varmulet_run_return) 60 | sw r_tmp0, 0(sp) 61 | #endif 62 | next_instruction 63 | 64 | varmulet_run_return: 65 | call_hook_exit_fn_trash_tmp2_work1 66 | lw a0, 0(r_cpu) 67 | lw ra, (sp) 68 | add sp, sp, 4 69 | ret 70 | 71 | // these should be linked in 72 | #if 0 && VARMULET_USE_WATERMARK 73 | read8_special: 74 | ebreak 75 | 76 | read16_special: 77 | ebreak 78 | 79 | read32_special: 80 | ebreak 81 | 82 | write8_special: 83 | ebreak 84 | 85 | write16_special: 86 | ebreak 87 | 88 | write32_special: 89 | ebreak 90 | #endif 91 | 92 | -------------------------------------------------------------------------------- /varmulet/varmulet.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #if PICO_RP2350 3 | #include "pico.h" 4 | #endif 5 | #include "armulet.h" 6 | 7 | #if !ARMULET_USE_REAL_MEMORY 8 | #error VARMULET requires ARMULET_USE_REAL_MEMORY 9 | #endif 10 | 11 | #ifndef VARMULET_USE_WATERMARK 12 | #define VARMULET_USE_WATERMARK ARMULET_FEATURE_CHECKED_MEMORY 13 | #else 14 | #if ARMULET_FEATURE_CHECKED_MEMORY & !VARMULET_USE_WATERMARK 15 | #error ARMULET_FEATURE_CHECKED_MEMORY requires VARMULET_USE_WATERMARK 16 | #endif 17 | #endif 18 | 19 | // Use multi-bit extract from Hazard3 for improved speed 20 | #ifndef VARMULET_USE_HAZARD3_CUSTOM 21 | #define VARMULET_USE_HAZARD3_CUSTOM 1 22 | #endif 23 | 24 | // the instruction implementations must read the next instruction into r_inst (note this 25 | // is automatically handled by the 'next_instruction' macro 26 | #ifndef VARMULET_USE_EARLY_INSTR_READ 27 | #define VARMULET_USE_EARLY_INSTR_READ 1 28 | #endif 29 | 30 | #define CPU_OFFSET_R0 (0 * 4) 31 | #define CPU_OFFSET_R1 (1 * 4) 32 | #define CPU_OFFSET_R2 (2 * 4) 33 | #define CPU_OFFSET_R3 (3 * 4) 34 | #define CPU_OFFSET_R12 (12 * 4) 35 | #define CPU_OFFSET_SP (13 * 4) 36 | #define CPU_OFFSET_LR (14 * 4) 37 | #define CPU_OFFSET_PC (15 * 4) 38 | #define CPU_OFFSET_LAZY_NZ (16 * 4) 39 | #define CPU_OFFSET_V 68 40 | #define CPU_OFFSET_C 69 41 | #define CPU_OFFSET_PRIMASK 70 42 | #define CPU_OFFSET_IPSR 71 43 | #define __CPU_BASE 72 44 | #if ARMULET_FEATURE_ARMV8M_BASELINE_MSPLIM 45 | #define CPU_OFFSET_SPLIM __CPU_BASE 46 | #define CPU_SPLIM_SIZE 4 47 | #else 48 | #define CPU_SPLIM_SIZE 0 49 | #endif 50 | #if ARMULET_FEATURE_ASM_HOOKS_IS_FUNCTION 51 | #define CPU_OFFSET_ASM_HOOK_PARAM (__CPU_BASE + CPU_SPLIM_SIZE) 52 | #define CPU_ASM_HOOK_PARAM_SIZE 4 53 | #else 54 | #define CPU_ASM_HOOK_PARAM_SIZE 0 55 | #endif 56 | #if ARMULET_FEATURE_STEP_STATUS 57 | #define CPU_OFFSET_STEP_STATUS (__CPU_BASE + CPU_SPLIM_SIZE + CPU_ASM_HOOK_PARAM_SIZE) 58 | #define CPU_STEP_STATUS_SIZE 4 59 | #else 60 | #define CPU_STEP_STATUS_SIZE 0 61 | #endif 62 | 63 | #ifndef __ASSEMBLER__ 64 | static_assert(offsetof(armulet_cpu_t, ipsr) == CPU_OFFSET_IPSR, ""); 65 | #if ARMULET_FEATURE_STEP_STATUS 66 | static_assert(offsetof(armulet_cpu_t, step_status) == CPU_OFFSET_STEP_STATUS, ""); 67 | #endif 68 | #endif 69 | 70 | #define VASM_HOOKS_INDEX_ENTER_FN 0 71 | #define VASM_HOOKS_INDEX_EXIT_FN 1 72 | // save regs to stack prior to calling into ABI function 73 | #define VASM_HOOKS_INDEX_SAVE_REGS_FN 2 74 | // restore regs from stack after calling into ABI function 75 | #define VASM_HOOKS_INDEX_RESTORE_REGS_FN 3 76 | // start executing next instruction (at PC)... note that if 77 | // VARMULET_USE_EARLY_INSTR_READ=1, then r_inst has been loaded from PC prior to jumping here 78 | #if !VARMULET_USE_ENTER_HOOK_TO_OVERRIDE_REGISTER_STORED_HOOKS 79 | #define VASM_HOOKS_INDEX_NEXT_INSTRUCTION 4 80 | #define VASM_HOOKS_INDEX_MAIN_DECODE_TABLE 5 81 | #define VASM_HOOKS_INDEX_DP_DECODE_TABLE 6 82 | // raw unknown 16 bit instruction, instruction in r_inst; should end with `next_instruction` 83 | #define VASM_HOOKS_INDEX_UNDEFINED16 7 84 | #else 85 | #define VASM_HOOKS_INDEX_UNDEFINED16 4 86 | #endif 87 | // raw unknown 32 bit instruction, instruction in r_inst : r_tmp0; should end with `next_instruction` 88 | #define VASM_HOOKS_INDEX_UNDEFINED32 (VASM_HOOKS_INDEX_UNDEFINED16+1) 89 | #define VASM_HOOKS_INDEX_BKPT_INSTR (VASM_HOOKS_INDEX_UNDEFINED16+2) 90 | #define VASM_HOOKS_INDEX_HINT_INSTR (VASM_HOOKS_INDEX_UNDEFINED16+3) 91 | // raw decoded svc_call, instruction in r_inst; SVC number is in low byte of r_inst, should end with `next_instruction` 92 | #define VASM_HOOKS_INDEX_SVC_INSTR (VASM_HOOKS_INDEX_UNDEFINED16+4) 93 | // raw decoded cps_call, instruction in r_inst; should end with `next_instruction` 94 | #define VASM_HOOKS_INDEX_CPS_INSTR (VASM_HOOKS_INDEX_UNDEFINED16+5) 95 | #define VASM_HOOKS_INDEX_MRS_INSTR (VASM_HOOKS_INDEX_UNDEFINED16+6) 96 | #define VASM_HOOKS_INDEX_MSR_INSTR (VASM_HOOKS_INDEX_UNDEFINED16+7) 97 | #define VASM_HOOKS_INDEX_MISC_CONTROL_INSTR (VASM_HOOKS_INDEX_UNDEFINED16+8) 98 | // primask updated; new value in r_work0 99 | #define VASM_HOOKS_INDEX_UPDATE_PRIMASK_FN (VASM_HOOKS_INDEX_UNDEFINED16+9) 100 | // raw decoded bx to registoer >= 0xf0000000, instruction in r_inst, should end with `next_instruction` 101 | #define VASM_HOOKS_INDEX_EXC_RETURN (VASM_HOOKS_INDEX_UNDEFINED16+10) 102 | // if an EXC_RETURN is taken to ARMULET_CALL_RETURN_ADDRESS, jmp via here 103 | #define VASM_HOOKS_INDEX_CALL_RETURN (VASM_HOOKS_INDEX_UNDEFINED16+11) 104 | 105 | #ifndef __ASSEMBLER__ 106 | #ifdef __cplusplus 107 | extern "C" { 108 | #endif 109 | static_assert(offsetof(armulet_cpu_t, lazy_nz_val) == CPU_OFFSET_LAZY_NZ, ""); 110 | static_assert(offsetof(armulet_cpu_t, _V) == CPU_OFFSET_V, ""); 111 | static_assert(offsetof(armulet_cpu_t, C) == CPU_OFFSET_C, ""); 112 | static_assert(offsetof(armulet_cpu_t, primask) == CPU_OFFSET_PRIMASK, ""); 113 | #if ARMULET_FEATURE_SVC_HANDLER 114 | static_assert(offsetof(armulet_cpu_t, svc_handler) == CPU_OFFSET_SVC_HANDLER, ""); 115 | #endif 116 | #if ARMULET_FEATURE_ASM_HOOKS_IS_FUNCTION 117 | static_assert(offsetof(armulet_cpu_t, asm_hook_param) == CPU_OFFSET_ASM_HOOK_PARAM, ""); 118 | #endif 119 | 120 | #ifdef VARMULET_ASM_HOOK_TYPE 121 | typedef VARMULET_ASM_HOOK_TYPE asm_hook_t; 122 | #else 123 | typedef uintptr_t asm_hook_t; 124 | #endif 125 | typedef struct varmulet_asm_hooks { 126 | asm_hook_t enter_fn; 127 | asm_hook_t exit_fn; 128 | asm_hook_t save_regs_fn; 129 | asm_hook_t restore_regs_fn; 130 | #if !VARMULET_USE_ENTER_HOOK_TO_OVERRIDE_REGISTER_STORED_HOOKS 131 | asm_hook_t next_instruction; 132 | asm_hook_t main_decode_table; 133 | asm_hook_t dp_decode_table; 134 | #endif 135 | asm_hook_t undefined16; 136 | asm_hook_t undefined32; 137 | asm_hook_t bkpt_instr; 138 | asm_hook_t hint_instr; 139 | asm_hook_t svc_instr; 140 | asm_hook_t cps_instr; 141 | asm_hook_t mrs_instr; 142 | asm_hook_t msr_instr; 143 | asm_hook_t misc_control_instr; 144 | asm_hook_t update_primask_fn; 145 | asm_hook_t exc_return; 146 | asm_hook_t call_return; 147 | } varmulet_asm_hooks_t; 148 | 149 | static_assert(offsetof(varmulet_asm_hooks_t, exc_return) == VASM_HOOKS_INDEX_EXC_RETURN * sizeof(asm_hook_t), ""); 150 | static_assert(offsetof(varmulet_asm_hooks_t, update_primask_fn) == VASM_HOOKS_INDEX_UPDATE_PRIMASK_FN * sizeof(asm_hook_t), ""); 151 | static_assert(offsetof(varmulet_asm_hooks_t, exc_return) == VASM_HOOKS_INDEX_EXC_RETURN * sizeof(asm_hook_t), ""); 152 | 153 | int varmulet_run(armulet_cpu_t *cpu, const varmulet_asm_hooks_t *hooks); 154 | void varmulet_step(armulet_cpu_t *cpu, const varmulet_asm_hooks_t *hooks); 155 | 156 | extern varmulet_asm_hooks_t varmulet_default_asm_hooks; 157 | 158 | #ifndef __cplusplus 159 | // declare these addresses all as const void, so there is no temptation to call them - note some compilers might not like this (e.g. CPP) 160 | extern const void varmulet_hook_default_enter_fn; 161 | extern const void varmulet_hook_default_exit_fn; 162 | extern const void varmulet_hook_default_save_regs_fn; 163 | extern const void varmulet_hook_default_restore_regs_fn; 164 | extern const void varmulet_hook_default_execute_instruction; 165 | extern const void varmulet_main_decode_table; 166 | extern const void varmulet_dp_decode_table; 167 | extern const void varmulet_hook_default_bkpt_instr; 168 | extern const void varmulet_hook_default_svc_instr; 169 | extern const void varmulet_hook_default_hint_instr; 170 | extern const void varmulet_hook_default_cps_instr; 171 | extern const void varmulet_hook_default_mrs_instr; 172 | extern const void varmulet_hook_default_msr_instr; 173 | extern const void varmulet_hook_default_misc_control_instr; 174 | extern const void varmulet_hook_default_update_primask_fn; 175 | extern const void varmulet_hook_default_exc_return; 176 | extern const void varmulet_hook_default_call_return; 177 | extern const void varmulet_halt; 178 | #endif 179 | 180 | #ifdef __cplusplus 181 | } 182 | #endif 183 | #endif // __ASSEMBLER__ 184 | -------------------------------------------------------------------------------- /varmulet/varmulet_armv6m_core.S: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #if PICO_RP2350 8 | #include "pico.h" 9 | #endif 10 | 11 | #include "varmulet_macros.inc.S" 12 | 13 | // speed vs space 14 | .macro main_decode_align 15 | .p2align 2 16 | .endm 17 | 18 | .global varmulet_main_decode_table 19 | .global varmulet_dp_decode_table 20 | .global vexecute_undefined // saves duplicating it elsewhere 21 | .global vexecute32_undefined // saves duplicating it elsewhere 22 | .global do_jmp_hook_exc_return 23 | .global do_jmp_hook_exc_return_tmp0 24 | 25 | #if VARMULET_USE_HWORD_MAIN_DECODE 26 | .section .text.varmulet_armv6m_core 27 | .macro main_decode_entry sym 28 | #if VARMULET_HWORD_MAIN_DECODE_ABSOLUTE_ADDR_OFFSET 29 | // Workaround for lack of 16-bit data relocations: put decode table at known 30 | // address so that we can generate the pointers using assembler arithmetic. 31 | .hword \sym - varmulet_main_decode_table + VARMULET_HWORD_MAIN_DECODE_ABSOLUTE_ADDR_OFFSET 32 | #else 33 | .hword \sym - varmulet_main_decode_table 34 | #endif 35 | .endm 36 | #else 37 | // Regular 32-bit function pointer with data relocation 38 | .section .rodata.varmulet_main_decode_table 39 | .macro main_decode_entry sym 40 | .word \sym 41 | .endm 42 | #endif 43 | 44 | .macro table_branch name 45 | jal v_table_branch 46 | .set \name\()_table_base,. 47 | .endm 48 | 49 | .macro table_branch_entry name,sym 50 | .byte \sym-\name\()_table_base 51 | .endm 52 | 53 | .macro enter_asm_hook_shared_if_smaller 54 | #if ARMULET_FEATURE_ASM_HOOKS_IS_FUNCTION 55 | // in this case should be a single 16-bit instruction 56 | tail_call_asm_hook_in_tmp2_trash_work1 57 | #else 58 | j shared_enter_asm_hook 59 | #endif 60 | .endm 61 | 62 | .p2align 2 63 | varmulet_main_decode_table: 64 | // Shift (immediate), add, subtract, move, and compare 65 | main_decode_entry vexecute_lslsi // 0000 00 66 | main_decode_entry vexecute_lslsi // 0000 01 67 | main_decode_entry vexecute_lsrsi // 0000 10 68 | main_decode_entry vexecute_lsrsi // 0000 11 69 | main_decode_entry vexecute_asrsi // 0001 00 70 | main_decode_entry vexecute_asrsi // 0001 01 71 | main_decode_entry vexecute_adds_subs // 0001 10 72 | main_decode_entry vexecute_addsi_subsi // 0001 11 73 | main_decode_entry vexecute_movsi // 0010 00 74 | main_decode_entry vexecute_movsi // 0010 01 75 | main_decode_entry vexecute_cmpi // 0010 10 76 | main_decode_entry vexecute_cmpi // 0010 11 77 | main_decode_entry vexecute_adds8i // 0011 00 78 | main_decode_entry vexecute_adds8i // 0011 01 79 | main_decode_entry vexecute_subs8i // 0011 10 80 | main_decode_entry vexecute_subs8i // 0011 11 81 | // Data processing 82 | main_decode_entry vexecute_dp // 0100 00 83 | // Special data instructions and branch and exchange 84 | main_decode_entry vexecute_010001 // 0100 01 85 | // Load from Literal Pool 86 | main_decode_entry vexecute_ldr_lit // 0100 10 87 | main_decode_entry vexecute_ldr_lit // 0100 11 88 | // Load/store single data item 89 | main_decode_entry vexecute_010100 // 0101 00 // str, strh 90 | main_decode_entry vexecute_010101 // 0101 01 // strb ldrsb 91 | main_decode_entry vexecute_010110 // 0101 10 // ldr, ldrh 92 | main_decode_entry vexecute_010111 // 0101 11 // ldrb, lrdsh 93 | 94 | main_decode_entry vexecute_stri // 0110 00 95 | main_decode_entry vexecute_stri // 0110 01 96 | main_decode_entry vexecute_ldri // 0110 10 97 | main_decode_entry vexecute_ldri // 0110 11 98 | 99 | main_decode_entry vexecute_strbi // 0111 00 100 | main_decode_entry vexecute_strbi // 0111 01 101 | main_decode_entry vexecute_ldrbi // 0111 10 102 | main_decode_entry vexecute_ldrbi // 0111 11 103 | 104 | main_decode_entry vexecute_strhi // 1000 00 105 | main_decode_entry vexecute_strhi // 1000 01 106 | main_decode_entry vexecute_ldrhi // 1000 10 107 | main_decode_entry vexecute_ldrhi // 1000 11 108 | 109 | main_decode_entry vexecute_strspi // 1001 00 110 | main_decode_entry vexecute_strspi // 1001 01 111 | main_decode_entry vexecute_ldrspi // 1001 10 112 | main_decode_entry vexecute_ldrspi // 1001 11 113 | 114 | // Generate PC-relative address 115 | main_decode_entry vexecute_adr // 1010 00 116 | main_decode_entry vexecute_adr // 1010 01 117 | 118 | // Generate SP-relative address 119 | main_decode_entry vexecute_add_sp // 1010 10 120 | main_decode_entry vexecute_add_sp // 1010 11 121 | 122 | // Miscellaneous 16-bit instructions 123 | main_decode_entry vexecute_101100 // 1011 00 124 | main_decode_entry vexecute_101101 // 1011 01 125 | main_decode_entry vexecute_101110 // 1011 10 126 | main_decode_entry vexecute_101111 // 1011 11 127 | 128 | // Store multiple registers 129 | main_decode_entry vexecute_stm // 1100 00 130 | main_decode_entry vexecute_stm // 1100 01 131 | 132 | // Load multiple registers 133 | main_decode_entry vexecute_ldm // 1101 00 134 | main_decode_entry vexecute_ldm // 1101 01 135 | 136 | // Conditional branch, and Supervisor Call 137 | main_decode_entry vexecute_1101xx // 1101 00 138 | main_decode_entry vexecute_1101xx // 1101 01 139 | main_decode_entry vexecute_1101xx // 1101 10 140 | main_decode_entry vexecute_1101xx // 1101 11 141 | 142 | // Unconditional Branch 143 | main_decode_entry vexecute_branch // 1110 00 144 | main_decode_entry vexecute_branch // 1110 01 145 | 146 | // 32 bit instructions from here on 147 | 148 | main_decode_entry vexecute32_prefix_undefined // 1110 10 149 | main_decode_entry vexecute32_prefix_undefined // 1110 11 150 | 151 | main_decode_entry vexecute32_11110 // 1111 00 152 | main_decode_entry vexecute32_11110 // 1111 01 153 | 154 | #if ARMULET_FEATURE_ARMV8M_BASELINE_SDIV_UDIV 155 | main_decode_entry vexecute32_111110 // 1111 10 156 | #else 157 | main_decode_entry vexecute32_prefix_undefined 158 | #endif 159 | main_decode_entry vexecute32_prefix_undefined // 1111 11 160 | main_decode_end: 161 | 162 | // These tables must be concatenated in order for the main_decode_entry 163 | // address arithmetic to work with 16-bit pointers across multiple tables: 164 | varmulet_dp_decode_table: 165 | main_decode_entry vdp_ands // 0000 166 | main_decode_entry vdp_eors // 0001 167 | main_decode_entry vdp_lsls // 0010 168 | main_decode_entry vdp_lsrs // 0011 169 | main_decode_entry vdp_asrs // 0100 170 | main_decode_entry vdp_adcs // 0101 171 | main_decode_entry vdp_sbcs // 0110 172 | main_decode_entry vdp_rors // 0111 173 | main_decode_entry vdp_tsts // 1000 174 | main_decode_entry vdp_rsbs // 1001 175 | main_decode_entry vdp_cmp // 1010 176 | main_decode_entry vdp_cmn // 1011 177 | main_decode_entry vdp_orrs // 1100 178 | main_decode_entry vdp_muls // 1101 179 | main_decode_entry vdp_bics // 1110 180 | main_decode_entry vdp_mvns // 1111 181 | 182 | .section .text.varmulet_armv6m_core 183 | 184 | v_table_branch: 185 | add r_tmp0, r_tmp0, ra 186 | lbu r_tmp0, (r_tmp0) 187 | bexti r_work2, r_inst, 8 // spare cycle so calculate for vcb_bxx 188 | add ra, ra, r_tmp0 189 | ret 190 | 191 | // in some cases the source register is in tmp0, and it is better to move it to work0 once we've detected an exception! 192 | do_jmp_hook_exc_return_tmp0: 193 | mv r_work0, r_tmp0 194 | do_jmp_hook_exc_return: 195 | jmp_hook_exc_return_trash_tmp2_work1 196 | 197 | // === BEGINNING OF INSTRUCTIONS 198 | // odd length 199 | vexecute_asrsi: // 0001 0x 200 | jal v_ldr_str_r2i_setup // work0=&Rd, work1=Rm, work2=immediate 201 | v_asr_entry: 202 | addi r_tmp1, r_work2, -1 203 | bext r_c, r_work1, r_tmp1 204 | beqz r_work2, 2f 205 | 1: 206 | sra r_lazy_nz, r_work1, r_work2 207 | store_nz_at_work0_next_instruction 208 | 2: 209 | // immediate of 0 is shift by 32 210 | li r_work2,31 211 | j 1b 212 | 213 | // === Shift (immediate), add, subtract, move, and compare 214 | main_decode_align 215 | vexecute_lslsi: // 0000 0x 216 | get_va_imm5_ra_rb r_lazy_nz, r_work1, r_work0, r_tmp0 // lazy_nz=operand/result, work1=temp, work0=dest, tmp0=shift amount 217 | v_lsl_entry: 218 | beqz r_tmp0, 1f 219 | neg r_tmp1, r_tmp0 220 | bext r_c, r_lazy_nz, r_tmp1 221 | sll r_lazy_nz, r_lazy_nz, r_tmp0 222 | 1: 223 | // immediate of 0 is movs rd, rm 224 | store_nz_at_work0_next_instruction 225 | 226 | main_decode_align 227 | vexecute_lsrsi: // 0000 1x 228 | get_va_imm5_ra_rb r_lazy_nz, r_work1, r_work0, r_tmp0 229 | v_lsr_entry: 230 | addi r_tmp1, r_tmp0, -1 231 | bext r_c, r_lazy_nz, r_tmp1 232 | beqz r_tmp0, 2f 233 | srl r_lazy_nz, r_lazy_nz, r_tmp0 234 | 1: 235 | store_nz_at_work0_next_instruction 236 | 2: 237 | // immediate of 0 is shift by 32 238 | li r_lazy_nz, 0 239 | j 1b 240 | 241 | // static counts in ROM text: 242 | // 3-reg adds: 168 243 | // 3-reg subs: 62 244 | // 2-reg addsi: 41 245 | // 2-reg subsi: 24 246 | // 1-reg addsi: 189 247 | // 1-reg subsi: 58 248 | // conclusion: 2-reg immediate instructions don't get the fast fall through 249 | 250 | main_decode_align 251 | vexecute_adds_subs: // 0001 10 252 | // adds/subs rd, rn, rm 253 | rlo_ptr_8_6 r_work2 // rm 254 | rlo_ptr_5_3 r_work1 // rn 255 | rlo_ptr_2_0 r_work0 // rd 256 | lw r_tmp0, (r_work2) // tmp0 = regs[rm] 257 | lw r_work2, (r_work1) // work2 = regs[rn] 258 | bexti r_tmp1, r_inst, 9 259 | bnez r_tmp1, vexecute_subs_common 260 | vexecute_adds_common: 261 | add_update_flags r_work2, r_tmp0 262 | store_nz_at_work0_next_instruction 263 | 264 | // .p2align 2 (uncommon) odd length 265 | vdp_muls: 266 | // cpu->regs[rdn] *= cpu->regs[rm]; 267 | mul r_lazy_nz, r_work1, r_work2 268 | j vdp_shared_store_nz_at_work0_next_instruction 269 | 270 | main_decode_align 271 | vexecute_addsi_subsi: // 0001 11 272 | // adds/subs rd, rn, #imm3 273 | rlo_ptr_5_3 r_work1 // rn 274 | rlo_ptr_2_0 r_work0 // rd 275 | lw r_work2, (r_work1) // work2 = regs[rn] 276 | h3.bextmi r_tmp0, r_inst, 6, 3 277 | bexti r_tmp1, r_inst, 9 278 | beqz r_tmp1, vexecute_adds_common 279 | j vexecute_subs_common 280 | 281 | main_decode_align 282 | vexecute_movsi: // 0010 0x 283 | rlo_ptr_10_8 r_work0 // rd 284 | zext.b r_lazy_nz, r_inst 285 | store_nz_at_work0_next_instruction 286 | 287 | main_decode_align 288 | vexecute_adds8i: // 0011 0x 289 | rlo_ptr_10_8 r_work0 // rdn 290 | lw r_work2, (r_work0) // work2 = regs[rdn] 291 | // Use destructive zext.b to get 16-bit encoding: 292 | zext.b r_inst, r_inst 293 | add_update_flags r_work2, r_inst 294 | store_nz_at_work0_next_instruction 295 | 296 | // odd length 297 | vdp_tsts: 298 | and r_lazy_nz, r_work1, r_work2 299 | next_instruction 300 | 301 | main_decode_align 302 | vexecute_subs8i: // 0011 1x 303 | rlo_ptr_10_8 r_work0 // rdn 304 | lw r_work2, (r_work0) // work2 = regs[rdn] 305 | zext.b r_tmp0, r_inst 306 | vexecute_subs_common: 307 | sub_update_flags r_work2, r_tmp0 308 | store_nz_at_work0_next_instruction 309 | 310 | // === Data processing 311 | main_decode_align 312 | vexecute_dp: // 0100 00 313 | rlo_ptr_2_0 r_work0 // rdn 314 | rlo_ptr_5_3 r_work1 // rm 315 | h3.bextmi r_tmp0, r_inst, 6, 4 316 | #if VARMULET_USE_HWORD_MAIN_DECODE 317 | sh1add r_tmp0, r_tmp0, r_dp_decode 318 | lhu r_tmp0, (r_tmp0) 319 | #if !defined(VARMULET_HWORD_MAIN_DECODE_ABSOLUTE_ADDR_OFFSET) 320 | // 16-bit table entry is relative to first main decode entry 321 | add r_tmp0, r_tmp0, r_main_decode 322 | #endif 323 | #else 324 | sh2add r_tmp0, r_tmp0, r_dp_decode 325 | lw r_tmp0, (r_tmp0) 326 | #endif 327 | lw r_work2, (r_work1) // work2 = regs[rm] 328 | lw r_work1, (r_work0) // work1 = regs[rdn] 329 | jr r_tmp0 330 | 331 | // .p2align 2 (uncommon) 332 | // odd length, tuck it in here 333 | vdp_rsbs: 334 | neg r_lazy_nz,r_work2 335 | seqz r_c,r_lazy_nz // C is set for 0 result, clear otherwise 336 | and r_topbit_v,r_lazy_nz,r_work2 // V is set for 0x80000000 (only case where both argument and result are negative) 337 | j vdp_shared_store_nz_at_work0_next_instruction 338 | 339 | .p2align 2 340 | vdp_ands: 341 | // cpu->regs[rdn] &= cpu->regs[rm]; 342 | and r_lazy_nz, r_work1, r_work2 343 | store_nz_at_work0_next_instruction 344 | 345 | .p2align 2 346 | vdp_eors: 347 | // cpu->regs[rdn] ^= cpu->regs[rm]; 348 | xor r_lazy_nz, r_work1, r_work2 349 | store_nz_at_work0_next_instruction 350 | 351 | // .p2align 2 // the mv is a compressed instruction 352 | vdp_lsls: 353 | mv r_lazy_nz,r_work1 354 | zext.b r_tmp0, r_work2 // shift amount 355 | li r_tmp1,32 356 | blt r_tmp0,r_tmp1,v_lsl_entry 357 | v_lsls_over_ret: 358 | slti r_c,r_tmp0,33 359 | and r_c,r_c,r_work1 360 | mv r_lazy_nz,zero 361 | j vdp_shared_store_nz_at_work0_next_instruction 362 | 363 | // .p2align 2 // the mv is a compressed instruction 364 | vdp_lsrs: 365 | mv r_lazy_nz,r_work1 366 | zext.b r_tmp0, r_work2 // shift amount 367 | beqz r_tmp0,vdp_shared_store_nz_at_work0_next_instruction // zero shift 368 | li r_tmp1,32 369 | blt r_tmp0,r_tmp1,v_lsr_entry 370 | srli r_work1,r_work1,31 371 | j v_lsls_over_ret 372 | 373 | // .p2align 2 // the mv is a compressed instruction 374 | vdp_asrs: 375 | mv r_lazy_nz,r_work1 376 | zext.b r_work2, r_work2 // shift amount 377 | beqz r_work2,vdp_shared_store_nz_at_work0_next_instruction // zero shift means zero for shift-by-register 378 | li r_tmp1,31 379 | ble r_work2,r_tmp1,1f 380 | li r_work2,0 // a shift by >31 places is the same as a shift by 32, which is done by immediate shift operand of zero 381 | 1: 382 | j v_asr_entry 383 | 384 | .p2align 2 385 | vdp_sbcs: 386 | not r_work2, r_work2 387 | // fall thru 388 | vdp_adcs: 389 | add r_tmp0, r_work2, r_c // tmp1(bc) = b + c 390 | add r_lazy_nz, r_work1, r_tmp0 // result = a + bc 391 | 392 | // calculate C 393 | seqz r_tmp1, r_tmp0 // !bc 394 | and r_tmp1, r_tmp1, r_c 395 | sltu r_tmp0, r_lazy_nz, r_work1 396 | or r_c, r_tmp0, r_tmp1 397 | 398 | // calculate V 399 | xor r_tmp0, r_lazy_nz, r_work2 400 | xnor r_tmp1, r_work2, r_work1 401 | and r_topbit_v, r_tmp0, r_tmp1 402 | 403 | store_nz_at_work0_next_instruction 404 | 405 | // .p2align 2 (uncommon) 406 | vdp_rors: 407 | zext.b r_tmp0, r_work2 408 | ror r_lazy_nz, r_work1, r_tmp0 409 | // carry is unchanged if 8-bit shift count is 0: 410 | beqz r_tmp0, 1f 411 | srli r_c, r_lazy_nz, 31 412 | 1: 413 | j vdp_shared_store_nz_at_work0_next_instruction 414 | 415 | // .p2align 2 (uncommon) 416 | vdp_cmn: 417 | addi r_work0,r_cpu,CPU_OFFSET_LAZY_NZ // set result pointer to safe-to-write-to memory location !!! check 418 | mv r_tmp0, r_work1 419 | j vexecute_adds_common 420 | 421 | main_decode_align 422 | vexecute_cmpi: // 0010 1x 423 | rlo_ptr_10_8 r_work0 // rdn 424 | #if 0 425 | // Maintain alignment to fall into vdp_cmp without an alignment nop 426 | .option push 427 | .option norvc 428 | lw r_work1, (r_work0) // work2 = regs[rdn] 429 | .option pop 430 | #else 431 | // Seems the C vs Zca confusion has broken .option norvc -- this is the 432 | // same instruction as above but has the requested size. 433 | .insn i 0x03, 0x2, r_work1, r_work0, 0 434 | #endif 435 | zext.b r_work2, r_inst 436 | // fall through 437 | vdp_cmp: 438 | // this is also entered from vexecute_add_cmp_mov_hi 439 | sub_update_flags r_work1, r_work2 440 | next_instruction 441 | 442 | // .p2align 2 (uncommon) 443 | vdp_mvns: 444 | // cpu->regs[rdn] = ~cpu->regs[rm]; 445 | not r_lazy_nz, r_work2 446 | j vdp_shared_store_nz_at_work0_next_instruction 447 | 448 | .p2align 2 449 | vdp_orrs: 450 | // cpu->regs[rdn] |= cpu->regs[rm]; 451 | or r_lazy_nz, r_work1, r_work2 452 | // uncommon instructions share this tail: 453 | vdp_shared_store_nz_at_work0_next_instruction: 454 | store_nz_at_work0_next_instruction 455 | 456 | .p2align 2 457 | vdp_bics: 458 | // cpu->regs[rdn] &= ~cpu->regs[rm]; 459 | andn r_lazy_nz, r_work1, r_work2 460 | store_nz_at_work0_next_instruction 461 | 462 | 463 | // === Special data instructions and branch and exchange 464 | main_decode_align 465 | vexecute_010001: // 0100 01 466 | h3.bextmi r_tmp0, r_inst, 8, 2 467 | addi r_tmp0, r_tmp0, -3 468 | beqz r_tmp0, vexecute_010001_11 469 | 470 | vexecute_add_cmp_mov_hi: 471 | // uint32_t rdn = (op16 & 0x7u) + ((op16 >> 4u & 0x8u)); 472 | bexti r_tmp0, r_inst, 7 473 | andi r_tmp1, r_inst, 7 474 | sh3add r_work0, r_tmp0, r_tmp1 475 | // uint32_t rm = (op16 >> 3u) & 0xfu; 476 | h3.bextmi r_work1, r_inst, 3, 4 477 | 478 | // uint32_t vm = get_lo_hi_reg(cpu, rm); 479 | r_lo_hi_value r_work2, r_work1, r_tmp0, r_tmp1 480 | // mov only requires value of rm 481 | bexti r_tmp0, r_inst, 9 482 | bnez r_tmp0, vexecute_mov_hi 483 | // uint32_t vdn = get_lo_hi_reg(cpu, rdn); 484 | r_lo_hi_value r_work1, r_work0, r_tmp0, r_tmp1 485 | 486 | bexti r_tmp0, r_inst, 8 487 | bnez r_tmp0, vdp_cmp 488 | vexecute_add_hi: 489 | add r_work2, r_work2, r_work1 490 | // fall through 491 | vexecute_mov_hi: 492 | // no flags, may update pc 493 | // adapted from written out version of: w_lo_hi_value_sp_pc r_work0, r_work2, r_tmp0, r_tmp1 494 | // r_work0: register to write to 0..15; r_work2: value to write 495 | sh2add r_tmp0, r_work0, r_cpu 496 | addi r_tmp1, r_work0, -13 497 | beqz r_tmp1, v_write_sp 498 | addi r_tmp1, r_tmp1, -2 499 | mv r_work0, r_work2 500 | beqz r_tmp1, v_write_pc_work0 501 | sw r_work2, (r_tmp0) 502 | next_instruction 503 | 504 | vexecute_010001_11: 505 | // branch to PC is undefined, so we don't care about getting it right 506 | // this is an unrolled register read 507 | h3.bextmi r_tmp0, r_inst, 3, 4 508 | sh2add r_tmp0, r_tmp0, r_cpu 509 | lw r_work0, (r_tmp0) 510 | 511 | bexti r_tmp0, r_inst, 7 512 | bnez r_tmp0, vexecute_blx 513 | vexecute_bx: 514 | check_exc_return_to_work0_trash_tmp2_work1 515 | j v_write_pc_work0 516 | 517 | // .p2align 2 odd length, unlikely to be in a tight loop 518 | vexecute_blx: 519 | add r_tmp1, r_pc, 1 // thumb bit 520 | sw r_tmp1, CPU_OFFSET_LR(r_cpu) 521 | v_write_pc_work0: 522 | andi r_pc, r_work0, ~1 523 | next_instruction 524 | 525 | // === Load from Literal Pool 526 | main_decode_align 527 | vexecute_ldr_lit: // 0100 1x 528 | // // ldr (literal) 529 | // uint32_t rt = (op16 >> 8u) & 0x7u; 530 | // uint32_t imm8 = (op16 & 0xffu); 531 | // uint32_t addr = ((cpu->regs[PC] >> 2u) + 1u + imm8) << 2u; 532 | // cpu->regs[rt] = read_u32(addr); 533 | rlo_ptr_10_8 r_tmp2 534 | zext.b r_inst, r_inst 535 | 536 | addi r_tmp1, r_pc, 2 // + 2 to get (pc + 4) 537 | andi r_tmp1, r_tmp1, ~3 538 | sh2add r_inst, r_inst, r_tmp1 539 | read_mem_32_unchecked r_tmp1, r_inst // this is pc relative so no real reason to check 540 | sw r_tmp1, (r_tmp2) 541 | next_instruction 542 | 543 | v_ldr_str_r2i_setup: 544 | h3.bextmi r_work2, r_inst, 6, 5 545 | j 1f 546 | v_ldr_str_r3_setup: 547 | rlo_ptr_8_6 r_work2 // rm 548 | lw r_work2, (r_work2) 549 | 1: 550 | rlo_ptr_5_3 r_work1 // rn 551 | lw r_work1, (r_work1) 552 | rlo_ptr_2_0 r_work0 // rt 553 | ret 554 | 555 | // work0=&Rt, work1=effective address 556 | .macro ldrh_strh_r2i_setup 557 | jal v_ldr_str_r2i_setup 558 | sh1add r_work1, r_work2, r_work1 559 | .endm 560 | 561 | // work0=&Rt, work1=effective address 562 | .macro ldr_str_r3_setup 563 | jal v_ldr_str_r3_setup 564 | add r_work1, r_work1, r_work2 565 | .endm 566 | 567 | // === Load/store single data item 568 | 569 | main_decode_align 570 | vexecute_010100: // 0101 00 // str, strh 571 | ldr_str_r3_setup 572 | bexti r_tmp0, r_inst, 9 573 | bnez r_tmp0, vexecute_strh 574 | vexecute_str: 575 | lw r_tmp0, (r_work0) 576 | write_mem_32_at_work1_from_tmp0 577 | next_instruction 578 | vexecute_strh: 579 | lw r_tmp0, (r_work0) 580 | write_mem_16_at_work1_from_tmp0 581 | next_instruction 582 | vexecute_ldrsb: 583 | read_mem_s8_at_work1_to_tmp0 584 | // shared as uncommon 585 | j ld3_shared_write_tmp0_to_work0_next_instruction 586 | 587 | main_decode_align 588 | vexecute_010101: // 0101 01 // strb ldrsb 589 | ldr_str_r3_setup 590 | bexti r_tmp0, r_inst, 9 591 | bnez r_tmp0, vexecute_ldrsb 592 | vexecute_strb: 593 | lw r_tmp0, (r_work0) 594 | write_mem_8_at_work1_from_tmp0 595 | next_instruction 596 | 597 | main_decode_align 598 | vexecute_010110: // 0101 10 // ldr, ldrh 599 | ldr_str_r3_setup 600 | bexti r_tmp0, r_inst, 9 601 | bnez r_tmp0, vexecute_ldrh 602 | vexecute_ldr: 603 | read_mem_32_at_work1_to_tmp0 604 | sw r_tmp0, (r_work0) 605 | next_instruction 606 | 607 | // === Generate SP-relative address 608 | // main_decode_align: unlikely to be in a tight loop 609 | vexecute_add_sp: // 1010 1x 610 | lw r_tmp1, CPU_OFFSET_SP(r_cpu) 611 | j vexecute_address_gen 612 | 613 | main_decode_align 614 | vexecute_010111: // 0101 11 // ldrb, lrdsh 615 | ldr_str_r3_setup 616 | bexti r_tmp0, r_inst, 9 617 | bnez r_tmp0, vexecute_ldrsh 618 | vexecute_ldrb: 619 | read_mem_u8_at_work1_to_tmp0 620 | ld3_shared_write_tmp0_to_work0_next_instruction: 621 | sw r_tmp0, (r_work0) 622 | next_instruction 623 | vexecute_ldrsh: 624 | read_mem_s16_at_work1_to_tmp0 625 | // shared as uncommon 626 | j ld3_shared_write_tmp0_to_work0_next_instruction 627 | 628 | main_decode_align 629 | vexecute_stri: // 0110 0x 630 | get_va_imm5_ra_rb r_tmp0, r_work1, r_work0, r_tmp1 631 | sh2add r_work1, r_tmp1, r_tmp0 632 | lw r_tmp0, (r_work0) 633 | write_mem_32_at_work1_from_tmp0 634 | next_instruction 635 | 636 | main_decode_align 637 | vexecute_ldri: // 0110 1x 638 | get_va_imm5_ra_rb r_tmp1, r_work1, r_work0, r_tmp0 639 | sh2add r_work1, r_tmp0, r_tmp1 640 | read_mem_32_at_work1_to_tmp0 641 | sw r_tmp0, (r_work0) 642 | next_instruction 643 | 644 | main_decode_align 645 | vexecute_strbi: // 0111 0x 646 | get_va_imm5_ra_rb r_tmp0, r_work1, r_work0, r_tmp1 647 | add r_work1, r_tmp1, r_tmp0 648 | lw r_tmp0, (r_work0) 649 | write_mem_8_at_work1_from_tmp0 650 | next_instruction 651 | 652 | main_decode_align 653 | vexecute_ldrbi: // 0111 1x 654 | get_va_imm5_ra_rb r_tmp1, r_work1, r_work0, r_tmp0 655 | add r_work1, r_tmp0, r_tmp1 656 | read_mem_u8_at_work1_to_tmp0 657 | sw r_tmp0, (r_work0) 658 | next_instruction 659 | 660 | 661 | main_decode_align 662 | vexecute_strhi: // 1000 0x 663 | ldrh_strh_r2i_setup 664 | lw r_tmp0, (r_work0) 665 | write_mem_16_at_work1_from_tmp0 666 | next_instruction 667 | 668 | main_decode_align 669 | vexecute_ldrhi: // 1000 1x 670 | ldrh_strh_r2i_setup 671 | read_mem_u16_at_work1_to_tmp0 672 | sw r_tmp0, (r_work0) 673 | next_instruction 674 | 675 | 676 | // No .align required, starts with 16-bit 677 | //main_decode_align 678 | vexecute_strspi: // 1001 0x 679 | lw r_tmp1, CPU_OFFSET_SP(r_cpu) 680 | rlo_ptr_10_8 r_tmp2 681 | zext.b r_inst, r_inst 682 | sh2add r_work1, r_inst, r_tmp1 683 | lw r_tmp0, (r_tmp2) 684 | write_stack_32_at_work1_from_tmp0 685 | next_instruction 686 | 687 | // No .align required, starts with 16-bit 688 | //main_decode_align 689 | vexecute_ldrspi: // 1001 1x 690 | lw r_tmp1, CPU_OFFSET_SP(r_cpu) 691 | rlo_ptr_10_8 r_tmp2 692 | zext.b r_inst, r_inst 693 | sh2add r_work1, r_inst, r_tmp1 694 | read_stack_32_at_work1_to_tmp0 695 | sw r_tmp0, (r_tmp2) 696 | next_instruction 697 | 698 | // === Generate PC-relative address 699 | main_decode_align 700 | vexecute_adr: // 1010 0x 701 | addi r_tmp1, r_pc, 2 702 | andi r_tmp1, r_tmp1, ~3 703 | vexecute_address_gen: 704 | rlo_ptr_10_8 r_work0 // rd 705 | zext.b r_tmp0, r_inst 706 | sh2add r_tmp1, r_tmp0, r_tmp1 707 | sw r_tmp1, (r_work0) 708 | next_instruction 709 | 710 | // odd length 711 | vexecute_ldrh: 712 | read_mem_u16_at_work1_to_tmp0 713 | sw r_tmp0, (r_work0) 714 | next_instruction 715 | 716 | 717 | // === Miscellaneous 16-bit instructions 718 | main_decode_align 719 | vexecute_101100: // 1011 00 720 | bexti r_tmp0, r_inst, 8 721 | #if ARMULET_FEATURE_ARMV8M_BASELINE_CBZ_CBNZ 722 | bnez r_tmp0, vexecute_cbz_cbnz 723 | #else 724 | bnez r_tmp0, vexecute_undefined // todo more specific? 725 | #endif 726 | bexti r_tmp0, r_inst, 9 727 | bnez r_tmp0, vexecute_extend 728 | vexecute_add_sub_sp: 729 | andi r_tmp0, r_inst, 0x7f 730 | lw r_work2, CPU_OFFSET_SP(r_cpu) 731 | 732 | bexti r_tmp1, r_inst, 7 // 1 for sub, 0 for add 733 | beqz r_tmp1, 1f 734 | neg r_tmp0, r_tmp0 735 | 1: 736 | sh2add r_work2, r_tmp0, r_work2 737 | v_write_sp: 738 | andi r_work2, r_work2, ~3 739 | sw r_work2, CPU_OFFSET_SP(r_cpu) 740 | check_splim r_work2, r_tmp0 741 | next_instruction 742 | 743 | vexecute_extend: 744 | rlo_ptr_5_3 r_work1 // rm 745 | rlo_ptr_2_0 r_work0 // rd 746 | lw r_work1, (r_work1) 747 | h3.bextmi r_tmp0, r_inst, 6, 2 748 | table_branch extend 749 | table_branch_entry extend,vexecute_extend_sxth 750 | table_branch_entry extend,vexecute_extend_sxtb 751 | table_branch_entry extend,vexecute_extend_uxth 752 | table_branch_entry extend,vexecute_extend_uxtb 753 | 754 | vexecute_extend_sxtb: // 1 755 | sext.b r_work1, r_work1 756 | vexecute_extend_sxth: // 0 757 | sext.h r_work1, r_work1 758 | j vexecute_extend_done 759 | vexecute_extend_uxtb: // 3 (most common) 760 | zext.b r_work1, r_work1 761 | j vexecute_extend_done // this is the commonest one, and this block is otherwise odd length anyway 762 | vexecute_extend_uxth: // 2 763 | zext.h r_work1, r_work1 764 | vexecute_extend_done: 765 | sw r_work1, (r_work0) 766 | v_next_instruction_0: 767 | next_instruction 768 | 769 | main_decode_align 770 | vexecute_101101: // 1011 01 771 | bexti r_tmp0, r_inst, 9 772 | bnez r_tmp0, vexecute_101101_1 773 | vexecute_push: 774 | // push 775 | // pre-decrement SP as a thought towards interrupts (if they share the same stack) 776 | // cpu->regs[SP] -= (uint32_t)__builtin_popcount(op16 & 0x1ffu) << 2u; 777 | // __compiler_memory_barrier(); 778 | lw r_work1, CPU_OFFSET_SP(r_cpu) 779 | andi r_tmp1, r_inst, 0x1ff 780 | cpop r_tmp0, r_tmp1 781 | slli r_tmp0, r_tmp0, 2 782 | sub r_work0, r_work1, r_tmp0 783 | sw r_work0, CPU_OFFSET_SP(r_cpu) 784 | check_splim r_work0, r_tmp0 785 | // uint32_t addr = cpu->regs[SP]; 786 | // uint32_t lo_bitmap = op16 & 0xff; 787 | // while (lo_bitmap) { 788 | // uint regnum = __builtin_ctz(lo_bitmap); 789 | // lo_bitmap &= ~(1u << regnum); 790 | // write_u32(addr, cpu_regs[i]); 791 | // addr += 4 792 | // } 793 | 794 | #if !ASSUME_STACK_SAFE // then write_stack_32_at_work1_from_tmp0 is the same as write_mem_32_at_work1_from_tmp0 and we can tail into stmia 795 | andi r_tmp2, r_inst, 0xff 796 | beq r_tmp1,r_tmp2,1f // r14 included? 797 | lw r_tmp0, CPU_OFFSET_LR(r_cpu) 798 | addi r_work1,r_work1,-4 799 | write_mem_32_at_work1_from_tmp0 800 | 1: 801 | mv r_work1,r_work0 802 | beqz r_tmp2, v_next_instruction_0 // early skip for push {lr} 803 | li r_work2,0 // zero pointer to avoid writeback 804 | j v_stmia_entry 805 | 806 | #else 807 | andi r_tmp2, r_inst, 0xff 808 | beqz r_tmp2, 2f // early skip for push {lr} 809 | 1: 810 | ctz r_tmp1, r_tmp2 811 | bclr r_tmp2, r_tmp2, r_tmp1 812 | sh2add r_work0, r_tmp1, r_cpu 813 | lw r_tmp0, (r_work0) 814 | write_stack_32_at_work1_from_tmp0 815 | addi r_work1, r_work1, 4 816 | bnez r_tmp2, 1b 817 | 2: 818 | bexti r_tmp0, r_inst, 8 819 | beqz r_tmp0, 1f 820 | // if (prefix & 1) { 821 | // write_u32(addr, cpu->regs[LR]); 822 | // } 823 | lw r_tmp0, CPU_OFFSET_LR(r_cpu) 824 | write_stack_32_at_work1_from_tmp0 825 | 1: 826 | next_instruction 827 | #endif 828 | 829 | main_decode_align 830 | vexecute_101110: // 1011 10 831 | #if ARMULET_FEATURE_ARMV8M_BASELINE_CBZ_CBNZ 832 | bexti r_tmp0, r_inst, 8 833 | // Note cbz/cbnz are overwhelmingly more common than rev/rev16/rev16sh 834 | beqz r_tmp0, vexecute_rev16_rev_revsh 835 | vexecute_cbz_cbnz: 836 | rlo_ptr_2_0 r_work0 // rn 837 | lw r_work0, (r_work0) 838 | bexti r_work1, r_inst, 11 //r_work1 = do_branch_on_nonzero 839 | seqz r_work0, r_work0 // r_work0 = reg_was_zero 840 | beq r_work0, r_work1, 1f 841 | h3.bextmi r_tmp0, r_inst, 3, 5 842 | bexti r_tmp1, r_inst, 9 843 | slli r_tmp1, r_tmp1, 6 844 | sh1add r_pc, r_tmp0, r_pc 845 | add r_pc, r_pc, r_tmp1 846 | addi r_pc, r_pc, 2 847 | 1: 848 | next_instruction 849 | #endif 850 | vexecute_rev16_rev_revsh: 851 | rlo_ptr_5_3 r_work1 // rm 852 | rlo_ptr_2_0 r_work0 // rd 853 | lw r_tmp1, (r_work1) 854 | h3.bextmi r_tmp0, r_inst, 6, 4 855 | addi r_tmp0, r_tmp0, -8 856 | beqz r_tmp0, vexecute_rev // == 8 857 | addi r_tmp0, r_tmp0, -1 858 | beqz r_tmp0, vexecute_rev16 // == 9 859 | addi r_tmp0, r_tmp0, -2 860 | bnez r_tmp0, vexecute_undefined // != 11 861 | 862 | vexecute_revsh: 863 | rev8 r_tmp1, r_tmp1 864 | srai r_tmp1, r_tmp1, 16 865 | // Share tails as this is an uncommon instruction 866 | j 1f 867 | 868 | vexecute_rev16: 869 | rori r_tmp1, r_tmp1, 16 870 | // fall through 871 | vexecute_rev: 872 | rev8 r_tmp1, r_tmp1 873 | 1: 874 | sw r_tmp1, (r_work0) 875 | next_instruction 876 | 877 | main_decode_align 878 | vexecute_101111: // 1011 11 879 | bexti r_tmp0, r_inst, 9 880 | bnez r_tmp0, vexecute_101111_1 881 | vexecute_pop: 882 | // uint32_t addr = cpu->regs[SP]; 883 | addi r_work2,r_cpu,CPU_OFFSET_SP 884 | lw r_work1, (r_work2) 885 | // uint32_t lo_bitmap = op16 & 0xff; 886 | // while (lo_bitmap) { 887 | // uint regnum = __builtin_clz(lo_bitmap); 888 | // lo_bitmap &= ~(1u << regnum); 889 | // cpu->regs[regnum] = read_u32(addr); 890 | // addr += 4; 891 | // } 892 | andi r_tmp2, r_inst, 0xff 893 | beqz r_tmp2, 2f // early skip for pop {pc} 894 | v_pop_entry: 895 | 1: 896 | ctz r_tmp1, r_tmp2 897 | bclr r_tmp2, r_tmp2, r_tmp1 898 | read_stack_32_at_work1_to_tmp0 899 | sh2add r_work0, r_tmp1, r_cpu 900 | sw r_tmp0, (r_work0) 901 | addi r_work1, r_work1, 4 902 | bnez r_tmp2, 1b 903 | 2: 904 | 905 | // if (prefix & 1) { 906 | // uint32_t next_pc = read_u32(addr); 907 | // check_exec_return(cpu, next_pc); 908 | // update_pc(cpu, next_pc); 909 | // addr += 4; 910 | // } 911 | bexti r_tmp0, r_inst, 8 912 | beqz r_tmp0, pop_no_pc 913 | read_stack_32_at_work1_to_tmp0 914 | addi r_work1, r_work1, 4 915 | sw r_work1, (r_work2) 916 | check_exc_return_to_tmp0_trash_tmp2_work1 917 | andi r_pc, r_tmp0, ~1 918 | next_instruction 919 | 920 | // __compiler_memory_barrier(); 921 | // cpu->regs[SP] = addr; 922 | pop_no_pc: 923 | beqz r_work2,1f 924 | sw r_work1, (r_work2) 925 | 1: 926 | next_instruction 927 | 928 | vexecute_dummy_bkpt: 929 | // Set r_inst=0xb000 in the handler (often a native ebreak), useful 930 | // telltale -- we enter this path on an splim check fail 931 | lui r_inst, 0xb 932 | // should already be aligned, but just make sure: 933 | main_decode_align 934 | vexecute_101111_1: 935 | #if VASM_HOOKS_INDEX_HINT_INSTR != VASM_HOOKS_INDEX_BKPT_INSTR + 1 936 | #error "Uh oh" 937 | #endif 938 | bexti r_tmp2, r_inst, 8 939 | addi r_tmp2, r_tmp2, VASM_HOOKS_INDEX_BKPT_INSTR 940 | enter_asm_hook_shared_if_smaller 941 | 942 | // === Store multiple registers 943 | main_decode_align 944 | vexecute_stm: // 1100 0x 945 | // uint32_t rn = (op16 >> 8u) & 0x7u; 946 | rlo_ptr_10_8 r_work2 // rn 947 | // uint32_t addr = cpu->regs[rn]; 948 | lw r_work1, (r_work2) 949 | // uint32_t lo_bitmap = op16 & 0xffu; 950 | // while (lo_bitmap) { 951 | // uint regnum = __builtin_ctz(lo_bitmap); 952 | // lo_bitmap &= ~(1u << regnum); 953 | // write_u32(addr, cpu->regs[regnum]); 954 | // addr += 4; 955 | // } 956 | andi r_tmp2, r_inst, 0xff 957 | // UNPREDICTABLE if bitmap is all-zeroes: treated as {r0} (save a beqz) 958 | v_stmia_entry: 959 | 1: 960 | ctz r_tmp1, r_tmp2 961 | bclr r_tmp2, r_tmp2, r_tmp1 962 | sh2add r_work0, r_tmp1, r_cpu 963 | lw r_tmp0, (r_work0) 964 | write_mem_32_at_work1_from_tmp0 965 | addi r_work1, r_work1, 4 966 | bnez r_tmp2, 1b 967 | 2: 968 | beqz r_work2,1f // writeback pointer==0? don't write to it 969 | sw r_work1, (r_work2) 970 | 1: 971 | next_instruction 972 | 973 | // odd length 974 | vexecute_101101_1: 975 | li r_tmp0, 3 976 | h3.bextmi r_tmp1, r_inst, 5, 4 977 | bne r_tmp0, r_tmp1, vexecute_undefined 978 | 979 | // 101101_10011 980 | vexecute_cps: 981 | li r_tmp2, VASM_HOOKS_INDEX_CPS_INSTR 982 | enter_asm_hook_shared_if_smaller 983 | 984 | // === Load multiple registers 985 | main_decode_align 986 | vexecute_ldm: // 1101 0x // ldmia 987 | // uint32_t rn = (op16 >> 8u) & 0x7u; 988 | // This would be rlo_ptr_10_8 r_work2 but we also need to remember rn for later 989 | h3.bextmi r_work0, r_inst, 8, 3 990 | sh2add r_work2, r_work0, r_cpu 991 | // uint32_t addr = cpu->regs[rn]; 992 | lw r_work1, (r_work2) 993 | // uint32_t lo_bitmap = op16 & 0xffu; 994 | // while (lo_bitmap) { 995 | // uint regnum = __builtin_ctz(lo_bitmap); 996 | // lo_bitmap &= ~(1u << regnum); 997 | // cpu->regs[regnum] = read_u32(addr); 998 | // addr += 4; 999 | // } 1000 | #if !ASSUME_STACK_SAFE // then write_stack_32_at_work1_from_tmp0 is the same as write_mem_32_at_work1_from_tmp0 and we can tail into pop 1001 | zext.b r_inst, r_inst 1002 | mv r_tmp2,r_inst 1003 | bext r_tmp0, r_inst, r_work0 1004 | beqz r_tmp0, 1f 1005 | li r_work2,0 // in the list, so no writeback 1006 | 1: 1007 | j v_pop_entry 1008 | #else 1009 | // UNPREDICTABLE if bitmap is all-zeroes: treated as {r0}, except that rn 1010 | // is written back even if rn is r0 (save a beqz) 1011 | zext.b r_tmp2, r_inst 1012 | 1: 1013 | ctz r_tmp1, r_tmp2 1014 | bclr r_tmp2, r_tmp2, r_tmp1 1015 | read_mem_32_at_work1_to_tmp0 1016 | sh2add r_tmp1, r_tmp1, r_cpu 1017 | sw r_tmp0, (r_tmp1) 1018 | addi r_work1, r_work1, 4 1019 | bnez r_tmp2, 1b 1020 | 2: 1021 | // if (!(op16 & (1u << rn))) cpu->regs[rn] = addr; 1022 | bext r_tmp0, r_inst, r_work0 1023 | bnez r_tmp0, 1f 1024 | sw r_work1, (r_work2) 1025 | 1: 1026 | next_instruction 1027 | #endif 1028 | 1029 | // === Conditional branch, and Supervisor Call 1030 | main_decode_align 1031 | vexecute_1101xx: // 1101 xx 1032 | h3.bextmi r_tmp0, r_inst, 9, 3 1033 | // eq/ne is the most common, make it faster 1034 | #define VCB_EQ_NE_FASTER 1 1035 | #if VCB_EQ_NE_FASTER 1036 | beqz r_tmp0,vcb_eq_ne 1037 | #endif 1038 | table_branch bxx 1039 | table_branch_entry bxx,vcb_eq_ne // 0000 1040 | table_branch_entry bxx,vcb_cs_cc // 0001 1041 | table_branch_entry bxx,vcb_mi_pl // 0010 1042 | table_branch_entry bxx,vcb_vs_vc // 0011 1043 | table_branch_entry bxx,vcb_hi_ls // 0100 1044 | table_branch_entry bxx,vcb_ge_lt // 0101 1045 | table_branch_entry bxx,vcb_gt_le // 0110 1046 | table_branch_entry bxx,vcb_svc_udf // 0111 1047 | 1048 | // NOTE r_work2 is reserved in all vcb_x routines 1049 | 1050 | // align both this common option, and the shared vcb_bxx_check 1051 | .p2align 2 1052 | vcb_eq_ne: // 0000 1053 | #if VCB_EQ_NE_FASTER 1054 | bexti r_work2, r_inst, 8 1055 | #endif 1056 | bnez r_lazy_nz,vcb_bxx_no 1057 | // fall through 1058 | vcb_bxx_yes: 1059 | // This functions as a logical not (assuming input is 0/1) with a 16-bit 1060 | // opcode, which keeps the next instruction aligned. 1061 | addi r_work2, r_work2, -1 1062 | vcb_bxx_no: 1063 | beqz r_work2, 1f 1064 | sext.b r_inst, r_inst 1065 | sh1add r_pc, r_inst, r_pc 1066 | addi r_pc, r_pc, 2 1067 | 1: 1068 | next_instruction 1069 | 1070 | // packed to give aligned entry points for more-common branch types when first 1071 | // instruction is 32-bit -- in particular bvs/bvc are incredibly rare, as is svc 1072 | 1073 | .p2align 2 1074 | vcb_cs_cc: // 0001 1075 | beqz r_c,vcb_bxx_no 1076 | j vcb_bxx_yes 1077 | 1078 | vcb_vs_vc: // 0011 1079 | bltz r_topbit_v,vcb_bxx_yes 1080 | j vcb_bxx_no 1081 | 1082 | vcb_hi_ls: // 0100 1083 | // take_branch = cpu->C && !get_Z(cpu); 1084 | beqz r_c,vcb_bxx_no 1085 | beqz r_lazy_nz,vcb_bxx_no 1086 | j vcb_bxx_yes 1087 | 1088 | vcb_mi_pl: // 0010 1089 | bltz r_lazy_nz,vcb_bxx_yes 1090 | j vcb_bxx_no 1091 | 1092 | vcb_gt_le: // 0110 1093 | // take_branch = !get_Z(cpu) && get_N(cpu) == get_V(cpu); 1094 | beqz r_lazy_nz,vcb_bxx_no 1095 | // fall thru 1096 | vcb_ge_lt: // 0101 1097 | // take_branch = get_N(cpu) == get_V(cpu); 1098 | xor r_work0,r_lazy_nz,r_topbit_v 1099 | bltz r_work0,vcb_bxx_no 1100 | j vcb_bxx_yes 1101 | 1102 | vcb_svc_udf: // 0111 1103 | beqz r_work2, vexecute_undefined 1104 | 1105 | vcb_svc: 1106 | li r_tmp2, VASM_HOOKS_INDEX_SVC_INSTR 1107 | enter_asm_hook_shared_if_smaller 1108 | 1109 | // === Unconditional Branch 1110 | // main_decode_align don't care, 16-bit 1111 | vexecute_branch: // 1110 0x 1112 | // uint32_t delta = (uint32_t)((((int32_t) op16) << 21u) >> 20u); 1113 | slli r_inst, r_inst, 21 1114 | srai r_inst, r_inst, 20 1115 | add r_pc, r_pc, r_inst 1116 | addi r_pc, r_pc, 2 1117 | next_instruction 1118 | 1119 | // === 32 bit instructions from here on 1120 | 1121 | main_decode_align 1122 | vexecute32_11110: // 1111 0x 1123 | read_mem_u16_unchecked r_work2, r_pc 1124 | add r_pc, r_pc, 2 1125 | #if ARMULET_FEATURE_ARMV8M_BASELINE_MOVW_MOVT 1126 | bexti r_tmp0, r_work2, 15 1127 | beqz r_tmp0, vexecute32_dp 1128 | #endif 1129 | bexti r_tmp0, r_work2, 14 1130 | beqz r_tmp0, vexecute32_misc 1131 | 1132 | vexecute32_11110x_and_11: 1133 | bexti r_tmp0, r_work2, 12 1134 | beqz r_tmp0, vexecute32_undefined 1135 | vexecute32_bl: // 11110x and 11x1 1136 | add r_tmp1, r_pc, 1 // thumb bit 1137 | sw r_tmp1, CPU_OFFSET_LR(r_cpu) 1138 | //fallthrough 1139 | 1140 | vexecute32_bw: 1141 | // r_inst[12:0] = 1 0 S imm10 1142 | addi r_tmp2, r_inst, -1024 // r_tmp2[12:0] = S !S !S imm10 1143 | slli r_tmp0, r_work2, 5 // concatenate imm11 to end (plus 5 incidental zeroes) 1144 | pack r_tmp2, r_tmp0, r_tmp2 // (it's called pack but I use it to unpack things???) 1145 | slli r_tmp2, r_tmp2, 3 // Sign-extend and scale by 2 overall (sll 5 + 3 - 7) 1146 | srai r_tmp2, r_tmp2, 7 // {{8{S}}, !S, !S, imm10, imm11, 1'b0} 1147 | 1148 | bexti r_tmp0, r_work2, 13 // J1 1149 | bexti r_tmp1, r_work2, 11 // J2 1150 | sh1add r_tmp0, r_tmp0, r_tmp1 // {J1, J2} 1151 | slli r_tmp0, r_tmp0, 22 1152 | 1153 | xor r_tmp0, r_tmp0, r_tmp2 // Mix the pasta and the sauce 1154 | add r_pc, r_pc, r_tmp0 1155 | next_instruction 1156 | 1157 | #if ARMULET_FEATURE_ARMV8M_BASELINE_MOVW_MOVT 1158 | main_decode_align 1159 | vexecute32_dp: 1160 | pack r_tmp1, r_work2, r_inst // xxx:imm4:0:imm3:xxx 1161 | srli r_tmp1, r_tmp1, 12 // xxx:imm4:0:imm3 1162 | packh r_tmp1, r_work2, r_tmp1 // imm4:0:imm3:imm8 1163 | andi r_tmp2, r_inst, 1 << 10 // i: zeroes 1164 | sh1add r_tmp1, r_tmp2, r_tmp1 // imm4:i:imm3:imm8 (imm16 complete) 1165 | r_ptr32_11_8 r_tmp0 //Rd 1166 | h3.bextmi r_tmp2, r_inst, 4, 6 //was it even an instr we support? 1167 | addi r_tmp2, r_tmp2, -0x24 1168 | bnez r_tmp2, 1f 1169 | // fallthrough to common case of movw 1170 | vexecute32_movw: 1171 | sw r_tmp1, (r_tmp0) 1172 | next_instruction 1173 | 1: 1174 | addi r_tmp2, r_tmp2, -0x08 1175 | bnez r_tmp2, vexecute32_undefined 1176 | // fallthrough 1177 | vexecute32_movt: 1178 | sh r_tmp1, 2(r_tmp0) 1179 | next_instruction 1180 | 1181 | #endif 1182 | 1183 | vexecute32_misc: // 11110x and 10 1184 | bexti r_tmp0, r_work2, 12 1185 | #if ARMULET_FEATURE_ARMV8M_BASELINE_BW 1186 | bnez r_tmp0, vexecute32_bw // op2 == 1 or op2 == 3 1187 | #else 1188 | bnez r_tmp0, vexecute32_undefined // op2 == 1 or op2 == 3 1189 | #endif 1190 | bexti r_tmp0, r_inst, 10 1191 | bnez r_tmp0, vexecute32_undefined // includes udf 1192 | andi r_tmp0, r_tmp0, 3 // op1 == 1xxxxxx 1193 | bnez r_tmp0, vexecute32_undefined // includes udf 1194 | // 11110x and 10x0 (op2 == 0) 1195 | h3.bextmi r_tmp0, r_inst, 5, 7 // op1 >> 1 1196 | addi r_tmp0, r_tmp0, -0x1c 1197 | beqz r_tmp0, vexecute32_msr // == 0x1c 1198 | addi r_tmp0, r_tmp0, -1 1199 | beqz r_tmp0, vexecute32_misc_control // == 0x1d 1200 | addi r_tmp0, r_tmp0, -2 1201 | bnez r_tmp0, vexecute32_undefined // != 0x1f 1202 | // fall thru 1203 | vexecute32_mrs: 1204 | li r_tmp2, VASM_HOOKS_INDEX_MRS_INSTR 1205 | enter_asm_hook_shared_if_smaller 1206 | vexecute32_msr: 1207 | li r_tmp2, VASM_HOOKS_INDEX_MSR_INSTR 1208 | enter_asm_hook_shared_if_smaller 1209 | vexecute32_misc_control: 1210 | li r_tmp2, VASM_HOOKS_INDEX_MISC_CONTROL_INSTR 1211 | // fall through 1212 | shared_enter_asm_hook: 1213 | tail_call_asm_hook_in_tmp2_trash_work1 1214 | 1215 | #if ARMULET_FEATURE_ARMV8M_BASELINE_SDIV_UDIV 1216 | vexecute32_111110: 1217 | read_mem_u16_unchecked r_work2, r_pc 1218 | add r_pc, r_pc, 2 1219 | r_ptr32_19_16 r_tmp0 1220 | lw r_tmp0, (r_tmp0) //Rn 1221 | r_ptr32_3_0 r_tmp1 1222 | lw r_tmp1, (r_tmp1) //Rm 1223 | r_ptr32_11_8 r_work0 //&Rd 1224 | h3.bextmi r_tmp2, r_inst, 4, 6 //was it even an instr we support? 1225 | addi r_tmp2, r_tmp2, -0x39 1226 | beqz r_tmp2, vexecute32_sdiv 1227 | addi r_tmp2, r_tmp2, -0x02 1228 | bnez r_tmp2, vexecute32_undefined 1229 | //fallthrough 1230 | 1231 | vexecute32_udiv: 1232 | beqz r_tmp1, div_by_zero 1233 | divu r_tmp0, r_tmp0, r_tmp1 1234 | sw r_tmp0, (r_work0) 1235 | next_instruction 1236 | 1237 | vexecute32_sdiv: 1238 | beqz r_tmp1, div_by_zero 1239 | div r_tmp0, r_tmp0, r_tmp1 1240 | sw r_tmp0, (r_work0) 1241 | next_instruction 1242 | 1243 | div_by_zero: 1244 | sw r_tmp1, (r_work0) 1245 | next_instruction 1246 | #endif 1247 | 1248 | vexecute_undefined: 1249 | jmp_hook_undefined16_trash_tmp2_work1 1250 | 1251 | vexecute32_prefix_undefined: 1252 | read_mem_u16_unchecked r_work2, r_pc 1253 | addi r_pc, r_pc, 2 1254 | // fall thru 1255 | 1256 | vexecute32_undefined: 1257 | jmp_hook_undefined32_trash_tmp2_work1 1258 | 1259 | // === END OF INSTRUCTIONS 1260 | -------------------------------------------------------------------------------- /varmulet/varmulet_hooks_default.S: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #if PICO_RP2350 8 | #include "pico.h" 9 | #endif 10 | 11 | #include "varmulet_macros.inc.S" 12 | 13 | .macro j_possibly_short sym 14 | #if VARMULET_FORCE_SHORT_JUMPS_FROM_DEFAULT_HOOKS 15 | .reloc ., R_RISCV_RVC_JUMP, \sym 16 | .hword 0xa001 17 | #else 18 | j \sym 19 | #endif 20 | .endm 21 | 22 | .global varmulet_default_asm_hooks 23 | .global varmulet_halt 24 | 25 | // for use in static asm_hooks compiled against the code 26 | .macro public_hook name 27 | .global \name 28 | .word \name 29 | .endm 30 | 31 | code_label_with_section varmulet_halt 32 | ebreak 33 | j varmulet_halt 34 | 35 | .section .rodata.varmulet_default_asm_hooks 36 | .p2align 2 37 | varmulet_default_asm_hooks: 38 | public_hook varmulet_hook_default_enter_fn 39 | public_hook varmulet_hook_default_exit_fn 40 | public_hook varmulet_hook_default_save_regs_fn 41 | public_hook varmulet_hook_default_restore_regs_fn 42 | #if !VARMULET_USE_ENTER_HOOK_TO_OVERRIDE_REGISTER_STORED_HOOKS 43 | public_hook varmulet_hook_default_execute_instruction 44 | public_hook varmulet_main_decode_table 45 | public_hook varmulet_dp_decode_table 46 | #endif 47 | .word varmulet_halt // undefined16 48 | .word varmulet_halt // undefined32 49 | public_hook varmulet_hook_default_bkpt_instr 50 | public_hook varmulet_hook_default_hint_instr 51 | .word varmulet_halt // public_hook varmulet_hook_default_svc_instr 52 | public_hook varmulet_hook_default_cps_instr 53 | public_hook varmulet_hook_default_mrs_instr 54 | public_hook varmulet_hook_default_msr_instr 55 | public_hook varmulet_hook_default_misc_control_instr 56 | public_hook varmulet_hook_default_update_primask_fn 57 | public_hook varmulet_hook_default_exc_return 58 | #if ARMULET_FEATURE_CALL 59 | public_hook varmulet_hook_default_call_return 60 | #else 61 | .word varmulet_halt 62 | #endif 63 | 64 | code_label_with_section varmulet_hook_default_save_regs_fn 65 | .global varmulet_save_a0_a7_t0_t5 66 | varmulet_save_a0_a7_t0_t5: 67 | addi sp, sp, -56 68 | sw a0, 0(sp) 69 | sw a1, 4(sp) 70 | sw a2, 8(sp) 71 | sw a3, 12(sp) 72 | sw a4, 16(sp) 73 | sw a5, 20(sp) 74 | sw a6, 24(sp) 75 | sw a7, 28(sp) 76 | sw t0, 32(sp) 77 | sw t1, 36(sp) 78 | sw t2, 40(sp) 79 | sw t3, 44(sp) 80 | sw t4, 48(sp) 81 | sw t5, 52(sp) 82 | // note we don't save/restore t6 which is r_asm_hooks because we need it to call this method so it 83 | // is saved after this function 84 | // sw t6, 56(sp) 85 | ret 86 | 87 | code_label_with_section varmulet_hook_default_restore_regs_fn 88 | .global varmulet_restore_a0_a7_t0_t5 89 | varmulet_restore_a0_a7_t0_t5: 90 | lw a0, 0(sp) 91 | lw a1, 4(sp) 92 | lw a2, 8(sp) 93 | lw a3, 12(sp) 94 | lw a4, 16(sp) 95 | lw a5, 20(sp) 96 | lw a6, 24(sp) 97 | lw a7, 28(sp) 98 | lw t0, 32(sp) 99 | lw t1, 36(sp) 100 | lw t2, 40(sp) 101 | lw t3, 44(sp) 102 | lw t4, 48(sp) 103 | lw t5, 52(sp) 104 | // note we don't save/restore t6 which is r_asm_hooks because we need it to call this method so it 105 | // is restored before this function 106 | //lw t6, 56(sp) 107 | addi sp, sp, 56 108 | ret 109 | 110 | .macro la_codeaddr rd, sym 111 | // lui is expected to be 16-bit if we're using 16-bit pointers, so this should be smaller 112 | // (note `la` expands to `auipc; addi` even with medlow code model) 113 | #if VARMULET_USE_HWORD_MAIN_DECODE && defined(VARMULET_HWORD_MAIN_DECODE_ABSOLUTE_ADDR_OFFSET) 114 | lui \rd, %hi(\sym) 115 | addi \rd, \rd, %lo(\sym) 116 | #else 117 | la \rd, \sym 118 | #endif 119 | .endm 120 | 121 | code_label_with_section varmulet_hook_default_enter_fn 122 | #if __riscv_zcmp 123 | cm.push {ra, s0-s2}, -16 124 | #else 125 | addi sp, sp, -12 126 | sw s0, (sp) 127 | sw s1, 4(sp) 128 | sw s2, 8(sp) 129 | #endif 130 | #if VARMULET_USE_WATERMARK 131 | li r_watermark, 0xe0000000 132 | #endif 133 | li r_f0000000, 0xf0000000 134 | #if !VARMULET_USE_ENTER_HOOK_TO_OVERRIDE_REGISTER_STORED_HOOKS 135 | #if ARMULET_FEATURE_ASM_HOOKS_IS_FUNCTION 136 | #error ARMULET_FEATURE_ASM_HOOKS_IS_FUNCTION not supported without VARMULET_USE_ENTER_HOOK_TO_OVERRIDE_REGISTER_STORED_HOOKS 137 | #endif 138 | lw r_main_decode, (4*VASM_HOOKS_INDEX_MAIN_DECODE_TABLE)(r_asm_hooks) 139 | lw r_dp_decode, (4*VASM_HOOKS_INDEX_DP_DECODE_TABLE)(r_asm_hooks) 140 | lw r_next_inst, (4*VASM_HOOKS_INDEX_NEXT_INSTRUCTION)(r_asm_hooks) 141 | #else 142 | la_codeaddr r_main_decode, varmulet_main_decode_table 143 | la_codeaddr r_dp_decode, varmulet_dp_decode_table 144 | la_codeaddr r_next_inst, varmulet_hook_default_execute_instruction 145 | #endif 146 | lb r_c, CPU_OFFSET_C(r_cpu) 147 | lw r_pc, CPU_OFFSET_PC(r_cpu) 148 | lb r_topbit_v, CPU_OFFSET_V(r_cpu) 149 | slli r_topbit_v, r_topbit_v, 31 150 | lw r_lazy_nz, CPU_OFFSET_LAZY_NZ(r_cpu) 151 | ret 152 | 153 | code_label_with_section varmulet_hook_default_exit_fn 154 | sw r_pc, CPU_OFFSET_PC(r_cpu) 155 | sb r_c, CPU_OFFSET_C(r_cpu) 156 | sw r_lazy_nz, CPU_OFFSET_LAZY_NZ(r_cpu) 157 | srli r_tmp0, r_topbit_v, 31 158 | sb r_tmp0, CPU_OFFSET_V(r_cpu) 159 | #if __riscv_zcmp 160 | sw ra, 0(sp) // preserve ra on pop 161 | cm.pop {ra, s0-s2}, +16 162 | #else 163 | lw s0, (sp) 164 | lw s1, 4(sp) 165 | lw s2, 8(sp) 166 | addi sp, sp, 12 167 | #endif 168 | ret 169 | 170 | code_label_with_section varmulet_hook_default_execute_instruction 171 | .p2align 2 172 | #if !VARMULET_USE_EARLY_INSTR_READ 173 | // don't think there is any benefit in checking here - if people care they can replace this loop 174 | read_mem_u16_unchecked r_inst, r_pc 175 | add r_pc, r_pc, 2 176 | #endif 177 | #if ARMULET_FEATURE_PROFILER 178 | la r_tmp1, address_profile 179 | slli r_tmp0, r_pc, 17 180 | srli r_tmp0, r_tmp0, 16 181 | add r_tmp1, r_tmp1, r_tmp0 182 | lw r_tmp0, (r_tmp1) 183 | addi r_tmp0, r_tmp0, 1 184 | sw r_tmp0, (r_tmp1) 185 | #endif 186 | // get handler address based on the top 6 bits 187 | srli r_tmp0, r_inst, 10 188 | #if VARMULET_USE_HWORD_MAIN_DECODE 189 | sh1add r_tmp1, r_tmp0, r_main_decode 190 | lhu r_tmp1, (r_tmp1) 191 | #if !defined(VARMULET_HWORD_MAIN_DECODE_ABSOLUTE_ADDR_OFFSET) 192 | // 16-bit table entry is relative address 193 | add r_tmp1, r_tmp1, r_main_decode 194 | #endif 195 | #else 196 | sh2add r_tmp1, r_tmp0, r_main_decode 197 | lw r_tmp1, (r_tmp1) 198 | #endif 199 | #if VARMULET_USE_EARLY_INSTR_READ 200 | add r_pc, r_pc, 2 201 | #else 202 | // Hoisted from rlo_ptr_2_0 to save a cycle in some common instructions (plus some code size) 203 | andi r_work0, r_inst, 0x7 204 | #endif 205 | // jump to handler 206 | jr r_tmp1 207 | 208 | code_label_with_section varmulet_hook_default_bkpt_instr 209 | ebreak 210 | // fall thru 211 | varmulet_hook_default_hint_instr: // hint defaults to nop 212 | next_instruction 213 | 214 | code_label_with_section varmulet_hook_default_cps_instr 215 | andi r_tmp0, r_inst, 15 216 | li r_tmp1, 2 217 | beq r_tmp0, r_tmp1, 1f 218 | j_possibly_short vexecute_undefined 219 | 1: 220 | bexti r_work0, r_inst, 4 221 | call_hook_update_primask_fn_work0_trash_tmp2_work1 222 | next_instruction 223 | 224 | code_label_with_section varmulet_hook_default_update_primask_fn 225 | sb r_work0, CPU_OFFSET_PRIMASK(r_cpu) 226 | ret 227 | 228 | #if 0 229 | code_label_with_section varmulet_hook_default_svc_instr 230 | call_hook_save_regs_fn 231 | addi sp, sp, -4 232 | sw r_cpu, 0(sp) 233 | mv a4, r_cpu // not a0-a4 234 | zext.b a0, r_inst 235 | lw a1, 0(a4) // r0 236 | lw a2, 4(a4) // r1 237 | lw a3, 8(a4) // r2 238 | lw a5, CPU_OFFSET_SVC_HANDLER(a4) 239 | beqz a5, varmulet_halt 240 | 1: 241 | jalr a5 242 | lw a4, 0(sp) 243 | addi sp, sp, 4 244 | sw a0, 0(a4) // r0 245 | call_hook_restore_regs_fn 246 | next_instruction 247 | #endif 248 | 249 | code_label_with_section varmulet_hook_default_mrs_instr 250 | // uint32_t rd = (op16_2 >> 8u) & 0xfu; 251 | h3.bextmi r_tmp0, r_work2, 8, 4 252 | sh2add r_work0, r_tmp0, r_cpu 253 | // uint32_t SYSm = op16_2 & 0xffu; 254 | zext.b r_work1, r_work2 255 | addi r_work1, r_work1, -0b00001000 256 | bltz r_work1, vexecute_mrs_psr // < 0b00001000 257 | beqz r_work1, vexecute_mrs_msp // == 0b00001000 258 | addi r_work1, r_work1, 0b00001000 - 0b00001010 259 | #if ARMULET_FEATURE_ARMV8M_BASELINE_MSPLIM 260 | beqz r_work1, v_execute_mrs_msplim // == 0b00001010 261 | #endif 262 | addi r_work1, r_work1, 0b00001010 - 0b00010000 263 | beqz r_work1, vexecute_mrs_primask // == 0b00010000 264 | addi r_work1, r_work1, 0b00010000 - 0b00010100 265 | // manual branch relaxation to allow rvc branch 266 | beqz r_work1, 1f // == 0b00010100 267 | j_possibly_short vexecute32_undefined 268 | 1: 269 | // would be `zero`, but using known-zero r_work1 gets us a 16-bit encoding: 270 | sw r_work1, (r_work0) 271 | next_instruction 272 | 273 | vexecute_mrs_msp: 274 | lw r_tmp0, CPU_OFFSET_SP(r_cpu) 275 | j mrs_finish 276 | #if ARMULET_FEATURE_ARMV8M_BASELINE_MSPLIM 277 | v_execute_mrs_msplim: 278 | lw r_tmp0, CPU_OFFSET_SPLIM(r_cpu) 279 | j mrs_finish 280 | #endif 281 | vexecute_mrs_primask: 282 | lb r_tmp0, CPU_OFFSET_PRIMASK(r_cpu) 283 | 284 | mrs_finish: 285 | // note storing to SP, PC is unpredictable 286 | sw r_tmp0, (r_work0) 287 | next_instruction 288 | 289 | vexecute_mrs_psr: 290 | li r_tmp0, 0 291 | andi r_tmp1, r_work1, 1 292 | beqz r_tmp1, 1f 293 | lb r_tmp0, CPU_OFFSET_IPSR(r_cpu) 294 | 1: 295 | andi r_tmp1, r_work1, 4 296 | bnez r_tmp1, mrs_finish 297 | flags_to_apsr r_work1, r_tmp1, r_tmp2 298 | or r_tmp0, r_tmp0, r_work1 299 | j mrs_finish 300 | 301 | code_label_with_section varmulet_hook_default_msr_instr 302 | // uint32_t rd = op16 & 0xfu; 303 | andi r_tmp0, r_inst, 15 304 | sh2add r_tmp0, r_tmp0, r_cpu 305 | lw r_tmp0, (r_tmp0) 306 | // uint32_t SYSm = op16_2 & 0xffu; 307 | zext.b r_work1, r_work2 308 | addi r_work1, r_work1, -0b00001000 309 | bltz r_work1, vexecute_msr_psr // < 0b00001000 310 | beqz r_work1, vexecute_msr_msp // == 0b00001000 311 | addi r_work1, r_work1, 0b00001000 - 0b00001010 312 | #if ARMULET_FEATURE_ARMV8M_BASELINE_MSPLIM 313 | beqz r_work1, vexecute_msr_msplim // == 0b00001010 314 | #endif 315 | addi r_work1, r_work1, 0b00001010 - 0b00010000 316 | beqz r_work1, vexecute_msr_primask // == 0b00010000 317 | addi r_work1, r_work1, 0b00010000 - 0b00010100 318 | beqz r_work1, 1f // == 0b00010100 (nop - control) 319 | j_possibly_short vexecute32_undefined 320 | vexecute_msr_msp: 321 | andi r_tmp0, r_tmp0, ~3 322 | sw r_tmp0, CPU_OFFSET_SP(r_cpu) 323 | 1: 324 | next_instruction 325 | 326 | vexecute_msr_msplim: 327 | sw r_tmp0, CPU_OFFSET_SPLIM(r_cpu) 328 | next_instruction 329 | 330 | vexecute_msr_primask: 331 | andi r_work0, r_tmp0, 1 332 | call_hook_update_primask_fn_work0_trash_tmp2_work1 333 | next_instruction 334 | 335 | vexecute_msr_psr: 336 | andi r_tmp1, r_work1, 4 337 | bnez r_tmp1, 1b 338 | apsr_to_flags r_tmp0, r_tmp2 339 | next_instruction 340 | 341 | code_label_with_section varmulet_hook_default_misc_control_instr 342 | h3.bextmi r_tmp0, r_work2, 4, 4 343 | addi r_tmp0, r_tmp0, -4 344 | li r_tmp1, 3 345 | bltu r_tmp0, r_tmp1, 1f 346 | j_possibly_short vexecute32_undefined 347 | 1: 348 | // dsb, dmb, isb 349 | next_instruction 350 | 351 | #if ARMULET_FEATURE_CALL 352 | code_label_with_section varmulet_hook_default_call_return 353 | lw r_tmp0, (sp) 354 | addi sp, sp, 4 355 | jr r_tmp0 356 | #endif 357 | 358 | code_label_with_section varmulet_hook_default_exc_return 359 | #if ARMULET_FEATURE_CALL 360 | li r_tmp0, ARMULET_CALL_RETURN_ADDRESS 361 | beq r_tmp0, r_work0, varmulet_do_call_return 362 | #endif 363 | ebreak 364 | next_instruction 365 | 366 | #if ARMULET_FEATURE_CALL 367 | varmulet_do_call_return: 368 | tail_call_asm_hook_trash_tmp2_work1 VASM_HOOKS_INDEX_CALL_RETURN 369 | #endif 370 | -------------------------------------------------------------------------------- /varmulet/varmulet_hooks_metal_irq.S: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #if PICO_RP2350 8 | #include "pico.h" 9 | #endif 10 | 11 | #include "varmulet_macros.inc.S" 12 | 13 | // ------------------------------------------------------------------------------- 14 | // This is (semi-incomplete; it assumes only one IRQ handler) 15 | // example code for handling an IRQ inside the ARM world (i.e. pushing an exception 16 | // frame onto the ARM stack and continuing in the original varmulet instance 17 | // ------------------------------------------------------------------------------- 18 | .global metal_irq_pending 19 | .global metal_irq_hook_exc_return 20 | 21 | metal_irq_pending: 22 | // the idea here is to disable IRQs for when we do an MRET (leaving the RISC-V IRQ pending) 23 | // and return from the exception with r_next_inst set to cause 'vtake_exception' to be called once 24 | // the currently executing instruction completes. 25 | // todo we assume here directly back into varmulet code, and that we don't have stack to unwind, 26 | // so this is not very general code atm 27 | li r_next_inst, 0x80 28 | csrc mstatus, r_next_inst // set interrupt enable for mret to 0 29 | la r_next_inst, vtake_exception 30 | mret 31 | 32 | metal_take_exception: 33 | // create an ARMv6M exception stack frame 34 | lw r_tmp0, CPU_OFFSET_SP(r_cpu) 35 | addi r_tmp0, r_tmp0, -32 36 | sw r_tmp0, CPU_OFFSET_SP(r_cpu) 37 | 38 | lw r_tmp1, CPU_OFFSET_R0(r_cpu) 39 | sw r_tmp1, 0(r_tmp0) 40 | lw r_tmp1, CPU_OFFSET_R1(r_cpu) 41 | sw r_tmp1, 4(r_tmp0) 42 | lw r_tmp1, CPU_OFFSET_R2(r_cpu) 43 | sw r_tmp1, 8(r_tmp0) 44 | lw r_tmp1, CPU_OFFSET_R3(r_cpu) 45 | sw r_tmp1, 12(r_tmp0) 46 | lw r_tmp1, CPU_OFFSET_R12(r_cpu) 47 | sw r_tmp1, 16(r_tmp0) 48 | lw r_tmp1, CPU_OFFSET_LR(r_cpu) 49 | sw r_tmp1, 20(r_tmp0) 50 | sw r_pc, 24(r_tmp0) 51 | flags_to_apsr r_tmp1, r_work0, r_work1 52 | sw r_tmp1, 28(r_tmp0) // todo save flags and such 53 | 54 | li r_tmp1, 0xfffffff1 55 | sw r_tmp1, CPU_OFFSET_LR(r_cpu) 56 | 57 | // load our single ARM irq_handler 58 | la r_tmp1, metal_arm_irq_handler 59 | lw r_pc, (r_tmp1) 60 | andi r_pc, r_pc, ~1 61 | 62 | // continue executing. 63 | lw r_next_inst, VASM_HOOKS_OFFSET_NEXT_INSTRUCTION(r_asm_hooks) 64 | next_instruction 65 | 66 | metal_irq_hook_exc_return: 67 | #if ARMULET_FEATURE_CALL 68 | li r_tmp0, ARMULET_CALL_RETURN_ADDRESS 69 | // todo this should be user specified address 70 | beq r_tmp0, r_work0, varmulet_do_call_return 71 | #endif 72 | // pop the exception frame 73 | lw r_tmp0, CPU_OFFSET_SP(r_cpu) 74 | lw r_tmp1, 0(r_tmp0) 75 | sw r_tmp1, CPU_OFFSET_R0(r_cpu) 76 | lw r_tmp1, 4(r_tmp0) 77 | sw r_tmp1, CPU_OFFSET_R1(r_cpu) 78 | lw r_tmp1, 8(r_tmp0) 79 | sw r_tmp1, CPU_OFFSET_R2(r_cpu) 80 | lw r_tmp1, 12(r_tmp0) 81 | sw r_tmp1, CPU_OFFSET_R3(r_cpu) 82 | lw r_tmp1, 16(r_tmp0) 83 | sw r_tmp1, CPU_OFFSET_R12(r_cpu) 84 | lw r_tmp1, 20(r_tmp0) 85 | sw r_tmp1, CPU_OFFSET_LR(r_cpu) 86 | lw r_pc, 24(r_tmp0) 87 | lw r_tmp1, 28(r_tmp0) 88 | apsr_to_flags r_tmp1, r_tmp2 89 | 90 | addi r_tmp0, r_tmp0, 32 91 | sw r_tmp0, CPU_OFFSET_SP(r_cpu) 92 | // todo note we reenable IRQs, but we s 93 | csrsi mstatus, 0x8 94 | next_instruction 95 | -------------------------------------------------------------------------------- /varmulet/varmulet_macros.inc.S: -------------------------------------------------------------------------------- 1 | #include "varmulet.h" 2 | 3 | #if !ARMULET_USE_LAZY_NZ || ARMULET_USE_LAZY_Z 4 | #error varmulet requires ARMULET_USE_LAZY_NZ and not ARMULET_USE_LAZY_Z 5 | #endif 6 | 7 | 8 | // =================================================== 9 | // 10 | // s0 11 | // s1 12 | // 13 | // a0 14 | // a1 15 | // a2 16 | // a3 17 | // a4 18 | // a5 19 | // a6 20 | // a7 21 | 22 | // t1 23 | // t2 24 | // t3 25 | // t4 26 | // t5 27 | // t6 28 | // t7 29 | 30 | // r_tmp0->r_tmp2 are for local temproaries (they are not commonly set in setup code common to multiple instruction implementations) 31 | // r_work0->r_work2 are also temporary, but are used also for pre-instruction setup values (often register pointers/values) 32 | 33 | #define r_tmp0 s0 34 | #define r_tmp1 s1 35 | #define r_f0000000 s2 36 | 37 | #define r_pc a0 // ARM pc points at following instruction during instruction execution 38 | #define r_tmp2 a1 // note this is used as a temporary in function enter/exit so needs to be callee saved (and not any of the things we use below) 39 | #define r_work0 a2 40 | #define r_work1 a3 // note this is used as a temporary in function enter/exit so needs to be callee saved (and not any of the things we use below) 41 | #define r_lazy_nz a4 42 | #define r_inst a5 // the current 16 bit instruction 43 | #define r_next_inst a6 // need to jump to this after each instruction 44 | #define r_c a7 // the carry 1/0 45 | 46 | #define r_main_decode t1 // main decoder table 47 | #define r_cpu t2 48 | #define r_work2 t3 49 | 50 | #define r_topbit_v t4 // 0b Vxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx 51 | #define r_dp_decode t5 52 | // this is the last one by definition because of save/restore regs (we must save it separately, 53 | // and don't want to leave a gap in registers the hook saves/restores) 54 | #define r_asm_hooks t6 55 | 56 | #if VARMULET_USE_WATERMARK 57 | #error todo need assign a r_watermark register for this 58 | #endif 59 | 60 | // ---------------------------------------------------------------------------- 61 | // Hazard3 custom instructions 62 | 63 | // rd = (rs1 >> rs2[4:0]) & ~(-1 << nbits) 64 | .macro h3.bextm rd rs1 rs2 nbits 65 | .if (\nbits < 1) || (\nbits > 8) 66 | .err 67 | .endif 68 | #if VARMULET_USE_HAZARD3_CUSTOM 69 | .insn r 0x0b, 0x4, (((\nbits - 1) & 0x7 ) << 1), \rd, \rs1, \rs2 70 | #else 71 | srl \rd, \rs1, \rs2 72 | andi \rd, \rd, ((1 << \nbits) - 1) 73 | #endif 74 | .endm 75 | 76 | // rd = (rs1 >> shamt) & ~(-1 << nbits) 77 | .macro h3.bextmi rd rs1 shamt nbits 78 | .if (\nbits < 1) || (\nbits > 8) 79 | .err 80 | .endif 81 | .if (\shamt < 0) || (\shamt > 31) 82 | .err 83 | .endif 84 | #if VARMULET_USE_HAZARD3_CUSTOM 85 | .insn i 0x0b, 0x4, \rd, \rs1, (\shamt & 0x1f) | (((\nbits - 1) & 0x7 ) << 6) 86 | #else 87 | srli \rd, \rs1, \shamt 88 | andi \rd, \rd, ((1 << \nbits) - 1) 89 | #endif 90 | .endm 91 | 92 | // ---------------------------------------------------------------------------- 93 | 94 | .macro code_label_with_section l 95 | .section .text.\l 96 | \l: 97 | .endm 98 | 99 | // ---------------------------------------------------------------------------- 100 | // MEMORY access macros for the "ARM" address space 101 | // ---------------------------------------------------------------------------- 102 | 103 | // direct use of _unchecked macro means should only be for memory access that aren't done on behalf of an 104 | // ARM load/store/push/pop instruction (other than instruction fetch); for everything else use the non-"_unchecked" 105 | // macros which use fixed address and data RISC-V registers 106 | .macro read_mem_u16_unchecked target_reg, address_reg 107 | lhu \target_reg, 0(\address_reg) 108 | .endm 109 | 110 | .macro read_mem_32_unchecked target_reg, address_reg 111 | lw \target_reg, 0(\address_reg) 112 | .endm 113 | 114 | .macro write_mem_32_unchecked out_reg, address_reg 115 | sw \out_reg, (\address_reg) 116 | .endm 117 | 118 | // ---------------------------------------------------------------------------- 119 | // These remaining memory access macros have ARM address in r_work1 and data to/from r_tmp0 120 | // this allos read/writeN_special to know what registers to use when watermarking, and also potentially 121 | // makes it easy for an exception handler to virtualize an ARM address range based on memory protection 122 | .macro read_mem_u8_at_work1_to_tmp0 123 | #if VARMULET_USE_WATERMARK 124 | bgeu r_work1, r_watermark, read8_special 125 | #endif 126 | lbu r_tmp0, (r_work1) 127 | .endm 128 | 129 | .macro read_mem_s8_at_work1_to_tmp0 130 | #if VARMULET_USE_WATERMARK 131 | bgeu r_work1, r_watermark, read8_special 132 | #endif 133 | lb r_tmp0, (r_work1) 134 | .endm 135 | 136 | .macro read_mem_u16_at_work1_to_tmp0 137 | #if VARMULET_USE_WATERMARK 138 | bgeu r_work1, r_watermark, read16_special 139 | #endif 140 | lhu r_tmp0, (r_work1) 141 | .endm 142 | 143 | .macro read_mem_s16_at_work1_to_tmp0 144 | #if VARMULET_USE_WATERMARK 145 | bgeu r_work1, r_watermark, read16_special 146 | #endif 147 | lh r_tmp0, (r_work1) 148 | .endm 149 | 150 | .macro read_mem_32_at_work1_to_tmp0 151 | #if VARMULET_USE_WATERMARK 152 | bgeu r_work1, r_watermark, read32_special 153 | #endif 154 | lw r_tmp0, (r_work1) 155 | .endm 156 | 157 | .macro read_stack_32_at_work1_to_tmp0 158 | #if ASSUME_STACK_SAFE 159 | read_u32_no_check r_tmp0, r_work1 160 | #else 161 | read_mem_32_at_work1_to_tmp0 162 | #endif 163 | .endm 164 | 165 | .macro write_mem_32_at_work1_from_tmp0 166 | #if VARMULET_USE_WATERMARK 167 | bgeu r_work1, r_watermark, write32_special 168 | #endif 169 | sw r_tmp0, (r_work1) 170 | .endm 171 | 172 | .macro write_stack_32_at_work1_from_tmp0 173 | #if ASSUME_STACK_SAFE 174 | write_mem_32_unchecked r_tmp0, r_work1 175 | #else 176 | write_mem_32_at_work1_from_tmp0 177 | #endif 178 | .endm 179 | 180 | .macro write_mem_16_at_work1_from_tmp0 181 | #if VARMULET_USE_WATERMARK 182 | bgeu r_work1, r_watermark, write16_special 183 | #endif 184 | sh r_tmp0, (r_work1) 185 | .endm 186 | 187 | .macro write_mem_8_at_work1_from_tmp0 188 | #if VARMULET_USE_WATERMARK 189 | bgeu r_work1, r_watermark, write8_special 190 | #endif 191 | sb r_tmp0, (r_work1) 192 | .endm 193 | 194 | .macro rlo_ptr_10_8 reg 195 | h3.bextmi \reg, r_inst, 8, 3 196 | sh2add \reg, \reg, r_cpu 197 | .endm 198 | 199 | .macro rlo_ptr_8_6 reg 200 | h3.bextmi \reg, r_inst, 6, 3 201 | sh2add \reg, \reg, r_cpu 202 | .endm 203 | 204 | 205 | .macro rlo_ptr_2_0_should_use_a2_not_a2 206 | .endm 207 | .macro rlo_ptr_2_0 reg 208 | #if !VARMULET_USE_EARLY_INSTR_READ 209 | // Assert that only r_work0 (assumed to be a2) is used, to match the 210 | // hoisted `andi` in varmulet_hook_default_execute_instruction: 211 | rlo_ptr_2_0_should_use_a2_not_\reg 212 | // (note this assertion will need updating if you reallocate r_work0) 213 | #else 214 | andi \reg, r_inst, 0x7 215 | #endif 216 | sh2add \reg, \reg, r_cpu 217 | .endm 218 | 219 | .macro rlo_ptr_5_3 reg 220 | h3.bextmi \reg, r_inst, 3, 3 221 | sh2add \reg, \reg, r_cpu 222 | .endm 223 | 224 | #if ARMULET_FEATURE_ARMV8M_BASELINE 225 | .macro r_ptr32_11_8 reg 226 | h3.bextmi \reg, r_work2, 8, 4 227 | sh2add \reg, \reg, r_cpu 228 | .endm 229 | 230 | .macro r_ptr32_3_0 reg 231 | andi \reg, r_work2, 0xf 232 | sh2add \reg, \reg, r_cpu 233 | .endm 234 | 235 | .macro r_ptr32_19_16 reg 236 | andi \reg, r_inst, 0xf 237 | sh2add \reg, \reg, r_cpu 238 | .endm 239 | #endif 240 | 241 | // todo make a function 242 | // out and reg can be the same 243 | .macro r_lo_hi_value out, reg, temp1, temp2 244 | sh2add \temp1, \reg, r_cpu 245 | addi \temp2, \reg, -15 246 | lw \out, (\temp1) 247 | // Do not use \reg now that \out is trashed 248 | bnez \temp2, 1f 249 | addi \out, r_pc, 2 250 | 1: 251 | .endm 252 | 253 | .macro check_splim value, temp 254 | #if ARMULET_FEATURE_ARMV8M_BASELINE_MSPLIM 255 | lw \temp, CPU_OFFSET_SPLIM(r_cpu) 256 | #if 1 257 | bltu \value, \temp, vexecute_dummy_bkpt 258 | #else 259 | bgeu \value, \temp, 6f 260 | ebreak 261 | j vexecute_dummy_bkpt 262 | 6: 263 | #endif 264 | #endif 265 | 266 | .endm 267 | .macro w_lo_hi_value_sp_pc reg, value, temp1, temp2 268 | sh2add \temp1, \reg, r_cpu 269 | addi \temp2, \reg, -13 270 | bltz \temp2, 8f 271 | beqz \temp2, 7f 272 | addi \temp2, \temp2, -2 273 | bnez \temp2, 8f 274 | andi r_pc, \value, ~1 275 | j 9f 276 | 7: 277 | andi \value, \value, ~3 278 | sw \value, (\temp1) 279 | check_splim \value, \temp2 280 | 8: 281 | sw \value, (\temp1) 282 | 9: 283 | .endm 284 | 285 | .macro get_va_imm5_ra_rb out, a, b, imm 286 | rlo_ptr_5_3 \a 287 | rlo_ptr_2_0 \b 288 | lw \out, (\a) 289 | h3.bextmi \imm, r_inst, 6, 5 290 | .endm 291 | 292 | .macro get_z reg 293 | seqz \reg, r_lazy_nz 294 | .endm 295 | 296 | .macro get_not_z reg 297 | snez \reg, r_lazy_nz 298 | .endm 299 | 300 | .macro get_n reg 301 | sltz \reg, r_lazy_nz 302 | .endm 303 | 304 | .macro get_c reg 305 | mv \reg, r_c 306 | .endm 307 | 308 | .macro get_v reg 309 | srli \reg, r_topbit_v, 31 310 | .endm 311 | 312 | .macro flags_to_apsr apsr, tmpa, tmpb 313 | get_N \tmpa 314 | get_Z \tmpb 315 | sh1add \apsr, \tmpa, \tmpb 316 | get_V \tmpb 317 | sh1add \tmpa, r_c, \tmpb 318 | sh2add \apsr, \apsr, \tmpa 319 | slli \apsr, \apsr, 28 320 | .endm 321 | 322 | .macro apsr_to_flags apsr, tmp 323 | // N Z LNZ 324 | // ------- 325 | // 0 0 1 326 | // 0 1 0 327 | // 1 0 -1 328 | // 1 1 -2 (not ideal, but we don't have a valid answer) 329 | srli r_lazy_nz, \apsr, 30 330 | li \tmp, 1 331 | sub r_lazy_nz, \tmp, r_lazy_nz 332 | 333 | sll r_topbit_v, \apsr, 3 334 | bexti r_c, \apsr, 29 335 | .endm 336 | 337 | .macro next_instruction 338 | #if VARMULET_USE_EARLY_INSTR_READ 339 | read_mem_u16_unchecked r_inst, r_pc 340 | #endif 341 | jr r_next_inst 342 | .endm 343 | 344 | // have this in case we want to replace with a single jmp to common code 345 | .macro store_nz_at_work0_next_instruction 346 | sw r_lazy_nz, (r_work0) 347 | next_instruction 348 | .endm 349 | 350 | .macro add_update_flags r_a r_b 351 | add r_lazy_nz, \r_a, \r_b 352 | // calulate v 353 | xor r_topbit_v, r_lazy_nz, \r_a 354 | xnor r_c, \r_a, \r_b 355 | and r_topbit_v, r_topbit_v, r_c 356 | // calulate c 357 | sltu r_c, r_lazy_nz, \r_a 358 | .endm 359 | 360 | .macro sub_update_flags r_a r_b 361 | sub r_lazy_nz, \r_a, \r_b 362 | // calulate v 363 | xor r_topbit_v, \r_a, r_lazy_nz 364 | xnor r_c, r_lazy_nz, \r_b 365 | and r_topbit_v, r_topbit_v, r_c 366 | // calculate c 367 | sltu r_c, \r_a, \r_b 368 | xori r_c, r_c, 1 369 | .endm 370 | 371 | .macro call_asm_hook_fn_trash_tmp2_work1 FN_INDEX 372 | #if !ARMULET_FEATURE_ASM_HOOKS_IS_FUNCTION 373 | lw r_tmp2, (4 * \FN_INDEX)(r_asm_hooks) 374 | jalr r_tmp2 375 | #else 376 | li r_tmp2, \FN_INDEX 377 | jalr r_asm_hooks 378 | #endif 379 | .endm 380 | 381 | .macro tail_call_asm_hook_trash_tmp2_work1 HOOK_INDEX 382 | #if !ARMULET_FEATURE_ASM_HOOKS_IS_FUNCTION 383 | lw r_tmp2, (4 * \HOOK_INDEX)(r_asm_hooks) 384 | jr r_tmp2 385 | #else 386 | li r_tmp2, \HOOK_INDEX 387 | jalr r_asm_hooks 388 | #endif 389 | .endm 390 | 391 | .macro tail_call_asm_hook_in_tmp2_trash_work1 392 | #if !ARMULET_FEATURE_ASM_HOOKS_IS_FUNCTION 393 | sh2add r_tmp2, r_tmp2, r_asm_hooks 394 | lw r_tmp2, (r_tmp2) 395 | jr r_tmp2 396 | #else 397 | jr r_asm_hooks 398 | #endif 399 | .endm 400 | 401 | .macro call_hook_enter_fn_trash_tmp2_work1 402 | mv r_cpu, a0 403 | mv r_asm_hooks, a1 404 | // use tmp2 (a1) since we know it is unused atm 405 | call_asm_hook_fn_trash_tmp2_work1 VASM_HOOKS_INDEX_ENTER_FN 406 | .endm 407 | 408 | .macro call_hook_exit_fn_trash_tmp2_work1 409 | // use tmp2 (a1) since we know we don't care about it's value (we might about t0) 410 | call_asm_hook_fn_trash_tmp2_work1 VASM_HOOKS_INDEX_EXIT_FN 411 | .endm 412 | 413 | .macro call_hook_save_regs_fn_trash_tmp2_work1 414 | call_asm_hook_fn_trash_tmp0_work1 VASM_HOOKS_INDEX_SAVE_REGS_FN 415 | // need to store r_asm_hooks at the end of the stack, so we can pop it before calling restore_regs_fn 416 | addi sp, sp, -4 417 | sw r_asm_hooks, (sp) 418 | .endm 419 | 420 | .macro call_hook_restore_regs_fn_trash_tmp2_work1 421 | lw r_asm_hooks, (sp) 422 | addi sp, sp, 4 423 | call_asm_hook_trash_tmp2_work1 VASM_HOOKS_INDEX_RESTORE_REGS_FN 424 | .endm 425 | 426 | .macro call_hook_update_primask_fn_work0_trash_tmp2_work1 427 | call_asm_hook_fn_trash_tmp2_work1 VASM_HOOKS_INDEX_UPDATE_PRIMASK_FN 428 | .endm 429 | 430 | .macro jmp_hook_undefined16_trash_tmp2_work1 431 | tail_call_asm_hook_trash_tmp2_work1 VASM_HOOKS_INDEX_UNDEFINED16 432 | .endm 433 | 434 | .macro jmp_hook_undefined32_trash_tmp2_work1 435 | tail_call_asm_hook_trash_tmp2_work1 VASM_HOOKS_INDEX_UNDEFINED32 436 | .endm 437 | 438 | .macro jmp_hook_exc_return_trash_tmp2_work1 439 | tail_call_asm_hook_trash_tmp2_work1 VASM_HOOKS_INDEX_EXC_RETURN 440 | .endm 441 | 442 | .macro check_exc_return_to_work0_trash_tmp2_work1 443 | bgeu r_work0, r_f0000000, do_jmp_hook_exc_return 444 | .endm 445 | 446 | .macro check_exc_return_to_tmp0_trash_tmp2_work1 447 | bgeu r_tmp0, r_f0000000, do_jmp_hook_exc_return_tmp0 448 | .endm 449 | --------------------------------------------------------------------------------