├── .gitignore ├── rosettaRuntime ├── X87State.cpp ├── SIMDGuard.cpp ├── X87Float80.cpp ├── X87StackRegister.cpp ├── main.cpp ├── Log.h ├── X87Float80.h ├── X87StackRegister.h ├── SIMDGuard.h ├── Log.cpp ├── Export.h ├── X87.h ├── X87State.h ├── Export.cpp └── X87.cpp ├── .gitmodules ├── loader ├── offset_finder.hpp ├── macho_loader.hpp ├── macho_loader.cpp ├── offset_finder.cpp └── main.cpp ├── LICENSE ├── ida ├── label.py └── chain_fixup.py ├── sample ├── cpuid.c └── math.c ├── CMakeLists.txt └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store -------------------------------------------------------------------------------- /rosettaRuntime/X87State.cpp: -------------------------------------------------------------------------------- 1 | #include "X87State.h" -------------------------------------------------------------------------------- /rosettaRuntime/SIMDGuard.cpp: -------------------------------------------------------------------------------- 1 | #include "SIMDGuard.h" -------------------------------------------------------------------------------- /rosettaRuntime/X87Float80.cpp: -------------------------------------------------------------------------------- 1 | #include "X87Float80.h" -------------------------------------------------------------------------------- /rosettaRuntime/X87StackRegister.cpp: -------------------------------------------------------------------------------- 1 | #include "X87StackRegister.h" -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/openlibm"] 2 | path = third_party/openlibm 3 | url = https://github.com/JuliaMath/openlibm 4 | -------------------------------------------------------------------------------- /rosettaRuntime/main.cpp: -------------------------------------------------------------------------------- 1 | extern "C" __attribute__((used, visibility("default"), section("__TEXT,__text"), 2 | noinline, retain)) void 3 | start() {} -------------------------------------------------------------------------------- /loader/offset_finder.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | struct OffsetFinder { 11 | 12 | auto set_default_offsets() -> void; 13 | 14 | auto determine_offsets() -> void; 15 | 16 | std::uint64_t offset_loop_copy_func; 17 | std::uint64_t offset_svc_call_entry; 18 | std::uint64_t offset_svc_call_ret; 19 | }; -------------------------------------------------------------------------------- /loader/macho_loader.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | struct MachoLoader { 10 | auto open(std::filesystem::path const& path) -> bool; 11 | 12 | auto mach_header() const -> mach_header_64*; 13 | 14 | auto image_size() const -> size_t; 15 | 16 | auto get_section(const char* segment, const char* section) -> section_64*; 17 | 18 | auto for_each_segment(std::function) -> void; 19 | 20 | std::vector buffer_; 21 | }; -------------------------------------------------------------------------------- /rosettaRuntime/Log.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | // flip this to 1 to enable logging of instruction calls 7 | #if 0 8 | #define LOG(fd, msg, len) syscall_write(1, msg, len); 9 | //syscall_write(1, msg, len); 10 | //#define MISSING(fd, msg, len) ((void)0) 11 | #else 12 | #define LOG(fd, msg, len) ((void)0) 13 | #endif 14 | #ifndef MISSING 15 | #define MISSING(fd, msg, len) syscall_write(1, msg, len); 16 | #endif 17 | 18 | extern auto syscall_write(int fd, const char* buf, uint64_t count) -> uint64_t; 19 | 20 | extern void simple_printf(const char* format, ...); -------------------------------------------------------------------------------- /rosettaRuntime/X87Float80.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #pragma pack(push, 1) 7 | struct X87Float80 { 8 | union { 9 | struct { 10 | uint64_t mantissa; 11 | uint16_t exponent; 12 | }; 13 | uint8_t bytes[10]; 14 | double ieee754; // 64 bits 15 | }; 16 | }; 17 | #pragma pack(pop) 18 | static_assert(sizeof(X87Float80) == 0x0A, "Invalid size for X87Float80"); 19 | static_assert(offsetof(X87Float80, mantissa) == 0, "Invalid offset for X87Float80::mantissa"); 20 | static_assert(offsetof(X87Float80, exponent) == 8, "Invalid offset for X87Float80::exponent"); 21 | -------------------------------------------------------------------------------- /rosettaRuntime/X87StackRegister.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #pragma pack(push, 1) 7 | struct X87StackRegister { 8 | #if defined(X87_CONVERT_TO_FP80) 9 | union { 10 | struct { 11 | uint64_t mantissa; 12 | uint16_t exponent; 13 | }; 14 | uint8_t bytes[10]; 15 | double ieee754; // 64 bits 16 | }; 17 | #else 18 | double ieee754; // 64 bits 19 | #endif 20 | }; 21 | #pragma pack(pop) 22 | 23 | #if defined(X87_CONVERT_TO_FP80) 24 | static_assert(sizeof(X87StackRegister) == 0x0A, 25 | "Invalid size for X87StackRegister"); 26 | static_assert(offsetof(X87StackRegister, mantissa) == 0, 27 | "Invalid offset for X87StackRegister::mantissa"); 28 | static_assert(offsetof(X87StackRegister, exponent) == 8, 29 | "Invalid offset for X87StackRegister::exponent"); 30 | #else 31 | static_assert(sizeof(X87StackRegister) == 0x08, 32 | "Invalid size for X87StackRegister"); 33 | #endif 34 | static_assert(offsetof(X87StackRegister, ieee754) == 0, 35 | "Invalid offset for X87StackRegister::ieee754"); -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Lifeisawful 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ida/label.py: -------------------------------------------------------------------------------- 1 | import ida_segment 2 | import ida_bytes 3 | import ida_name 4 | import idaapi 5 | 6 | exports_segm = ida_segment.get_segm_by_name('exports') 7 | assert exports_segm 8 | 9 | exported_function_ea = ida_bytes.get_qword(exports_segm.start_ea + 0x08) 10 | exported_function_count = ida_bytes.get_qword(exports_segm.start_ea + 0x10) 11 | 12 | for i in range(exported_function_count): 13 | entry_ea = exported_function_ea + i * 0x10 14 | 15 | func_ea = ida_bytes.get_qword(entry_ea) 16 | func_name_ea = ida_bytes.get_qword(entry_ea + 0x08) 17 | 18 | func_name_managled = ida_bytes.get_strlit_contents(func_name_ea, -1, idaapi.STRTYPE_C).decode('utf-8') 19 | func_name_demangled = ida_name.demangle_name(func_name_managled, ida_name.MNG_LONG_FORM, ida_name.DQT_FULL) 20 | 21 | func_name_split = func_name_demangled.split('(')[0] 22 | #find last :: and get the function name 23 | simple_func_name = func_name_split.split('::')[-1] 24 | 25 | # add full type as comment 26 | ida_bytes.set_cmt(func_ea, func_name_demangled, 0) 27 | # set function name 28 | ida_name.set_name(func_ea, func_name_split) 29 | 30 | openbracket = "{" 31 | closebracket = "}" 32 | print(f'{openbracket}&{simple_func_name}, "{func_name_managled}"{closebracket},') 33 | -------------------------------------------------------------------------------- /sample/cpuid.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | // For storing CPUID results 5 | unsigned int eax, ebx, ecx, edx; 6 | 7 | // Get vendor ID string (EAX=0) 8 | char vendor[13]; // 12 characters plus null terminator 9 | 10 | __asm__ __volatile__ ( 11 | "cpuid" 12 | : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) 13 | : "a" (0) 14 | ); 15 | 16 | // The vendor string is stored in EBX, EDX, ECX in that order 17 | *((unsigned int*)vendor) = ebx; 18 | *((unsigned int*)(vendor + 4)) = edx; 19 | *((unsigned int*)(vendor + 8)) = ecx; 20 | vendor[12] = '\0'; 21 | 22 | printf("CPU Vendor: %s\n", vendor); 23 | 24 | // Get processor info and feature bits (EAX=1) 25 | __asm__ __volatile__ ( 26 | "cpuid" 27 | : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) 28 | : "a" (1) 29 | ); 30 | 31 | // Extract processor information from EAX 32 | unsigned int stepping = eax & 0xF; 33 | unsigned int model = (eax >> 4) & 0xF; 34 | unsigned int family = (eax >> 8) & 0xF; 35 | unsigned int type = (eax >> 12) & 0x3; 36 | unsigned int ext_model = (eax >> 16) & 0xF; 37 | unsigned int ext_family = (eax >> 20) & 0xFF; 38 | 39 | // Print processor information 40 | printf("CPU Info:\n"); 41 | printf(" Family: %d (0x%X)\n", family, family); 42 | printf(" Model: %d (0x%X)\n", model, model); 43 | printf(" Stepping: %d (0x%X)\n", stepping, stepping); 44 | printf(" Extended Family: %d (0x%X)\n", ext_family, ext_family); 45 | printf(" Extended Model: %d (0x%X)\n", ext_model, ext_model); 46 | 47 | // Print feature flags 48 | printf("Feature Flags:\n"); 49 | printf(" EDX: 0x%08X\n", edx); 50 | printf(" ECX: 0x%08X\n", ecx); 51 | 52 | // Print some common CPU features 53 | if (edx & (1 << 25)) printf(" - SSE: Supported\n"); 54 | if (edx & (1 << 26)) printf(" - SSE2: Supported\n"); 55 | if (ecx & (1 << 0)) printf(" - SSE3: Supported\n"); 56 | if (ecx & (1 << 28)) printf(" - AVX: Supported\n"); 57 | 58 | return 0; 59 | } -------------------------------------------------------------------------------- /rosettaRuntime/SIMDGuard.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // most x87 instruction handlers only use d0-d3 registers, so backing all up is 4 | // too much of a penalty. Only sin/cos/log2/exp require a full backup of the 5 | // simd registers. Anything above d7 handled by the compiler. This assumption 6 | // was made by staring at disassembled code. 7 | 8 | #include 9 | #define ENABLE_SIMD_GUARD 10 | 11 | struct SIMDGuard { 12 | using SIMDRegister_t = uint8_t[8]; 13 | 14 | SIMDGuard() { 15 | #if defined(ENABLE_SIMD_GUARD) 16 | // Save q0–q3 in pairs into buf 17 | asm volatile("stp q0, q1, [%0, # 0]\n\t" 18 | "stp q2, q3, [%0, #32]\n\t" 19 | : /* no outputs */ 20 | : "r"(buf) 21 | : "memory"); 22 | #endif 23 | } 24 | 25 | ~SIMDGuard() { 26 | #if defined(ENABLE_SIMD_GUARD) 27 | // Restore q0–q7 in reverse order 28 | asm volatile("ldp q2, q3, [%0, #32]\n\t" 29 | "ldp q0, q1, [%0, # 0]\n\t" 30 | : 31 | : "r"(buf) 32 | : "v0", "v1", "v2", "v3", "memory"); 33 | #endif 34 | } 35 | 36 | alignas(16) uint8_t buf[16][4]; 37 | }; 38 | 39 | struct SIMDGuardFull { 40 | using SIMDRegister_t = uint8_t[8]; 41 | 42 | SIMDGuardFull() { 43 | #if defined(ENABLE_SIMD_GUARD) 44 | // Save q0–q7 in pairs into buf 45 | asm volatile("stp q0, q1, [%0, # 0]\n\t" 46 | "stp q2, q3, [%0, #32]\n\t" 47 | "stp q4, q5, [%0, #64]\n\t" 48 | "stp q6, q7, [%0, #96]\n\t" 49 | : /* no outputs */ 50 | : "r"(buf) 51 | : "memory"); 52 | #endif 53 | } 54 | 55 | ~SIMDGuardFull() { 56 | #if defined(ENABLE_SIMD_GUARD) 57 | // Restore q0–q7 in reverse order 58 | asm volatile("ldp q6, q7, [%0, #96]\n\t" 59 | "ldp q4, q5, [%0, #64]\n\t" 60 | "ldp q2, q3, [%0, #32]\n\t" 61 | "ldp q0, q1, [%0, # 0]\n\t" 62 | : 63 | : "r"(buf) 64 | : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory"); 65 | #endif 66 | } 67 | 68 | alignas(16) uint8_t buf[16][8]; 69 | }; -------------------------------------------------------------------------------- /sample/math.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define TIMES 1000000 6 | #define RUNS 10 7 | #define METHOD run_fsqrt 8 | #define STRINGIFY1(x) #x 9 | #define STRINGIFY(x) STRINGIFY1(x) // Add this line 10 | 11 | clock_t run_add() { 12 | float one = 1.0f; 13 | float two = 2.0f; 14 | 15 | clock_t start = clock(); 16 | 17 | // Run addition many times to get measurable time 18 | float three; 19 | for(int i = 0; i < TIMES; i++) { 20 | three = one + two; 21 | } 22 | 23 | clock_t end = clock(); 24 | clock_t time_spent = (end - start); 25 | 26 | printf("Result: %x\n", *(uint32_t*)&three); 27 | return time_spent; 28 | } 29 | 30 | clock_t run_div() { 31 | float one = 1.0f; 32 | float two = 2.0f; 33 | 34 | clock_t start = clock(); 35 | 36 | // Run division many times to get measurable time 37 | float three; 38 | for(int i = 0; i < TIMES; i++) { 39 | three = one / two; 40 | } 41 | 42 | clock_t end = clock(); 43 | clock_t time_spent = (end - start); 44 | 45 | printf("Result: %x\n", *(uint32_t*)&three); 46 | return time_spent; 47 | } 48 | 49 | clock_t run_mul() { 50 | float one = 1.0f; 51 | float two = 2.0f; 52 | 53 | clock_t start = clock(); 54 | 55 | // Run division many times to get measurable time 56 | float three; 57 | for(int i = 0; i < TIMES; i++) { 58 | three = one * two; 59 | } 60 | 61 | clock_t end = clock(); 62 | clock_t time_spent = (end - start); 63 | 64 | printf("Result: %x\n", *(uint32_t*)&three); 65 | return time_spent; 66 | } 67 | 68 | clock_t run_fsqrt() { 69 | float sixteen = 16.0f; 70 | 71 | clock_t start = clock(); 72 | 73 | // Run fsqrt many times to get measurable time 74 | float four; 75 | for(int i = 0; i < TIMES; i++) { 76 | four = __builtin_sqrtf(sixteen); 77 | } 78 | 79 | clock_t end = clock(); 80 | clock_t time_spent = (end - start); 81 | 82 | printf("Result: %x\n", *(uint32_t*)&four); 83 | return time_spent; 84 | } 85 | 86 | int main() { 87 | clock_t times[RUNS]; 88 | clock_t sum = 0; 89 | 90 | printf("benchmark %s\n", STRINGIFY(METHOD)); 91 | 92 | // Perform multiple runs 93 | for(int i = 0; i < RUNS; i++) { 94 | times[i] = METHOD(); 95 | sum += times[i]; 96 | printf("Run %d time: %lu ticks\n", i+1, times[i]); 97 | } 98 | 99 | // Calculate average using integer math 100 | clock_t avg = sum / RUNS; 101 | printf("\nAverage time: %lu ticks\n", avg); 102 | 103 | return 0; 104 | } -------------------------------------------------------------------------------- /loader/macho_loader.cpp: -------------------------------------------------------------------------------- 1 | #include "macho_loader.hpp" 2 | 3 | #include 4 | #include 5 | 6 | auto MachoLoader::open(std::filesystem::path const& path) -> bool { 7 | if (!std::filesystem::exists(path)) { 8 | return false; 9 | } 10 | 11 | auto file = std::ifstream(path, std::ios::binary); 12 | 13 | if (!file.is_open()) { 14 | return false; 15 | } 16 | 17 | buffer_ = std::vector(std::istreambuf_iterator(file), std::istreambuf_iterator()); 18 | 19 | return buffer_.empty() == false; 20 | } 21 | auto MachoLoader::mach_header() const -> mach_header_64* { 22 | return (mach_header_64*) buffer_.data(); 23 | } 24 | 25 | auto MachoLoader::image_size() const -> size_t { 26 | auto header = mach_header(); 27 | 28 | size_t image_size = 0; 29 | 30 | load_command* cmd = (load_command*)(header + 1); 31 | 32 | for (auto i = 0; i < header->ncmds; i++) { 33 | if (cmd->cmd == LC_SEGMENT_64) { 34 | auto seg = (segment_command_64*) cmd; 35 | 36 | uint64_t seg_end = seg->vmaddr + seg->vmsize; 37 | if (seg_end > image_size) { 38 | image_size = seg_end; 39 | } 40 | } 41 | 42 | cmd = (load_command*)((uint8_t*) cmd + cmd->cmdsize); 43 | } 44 | 45 | image_size = (image_size + vm_page_size - 1) & ~(vm_page_size - 1); 46 | return image_size; 47 | } 48 | 49 | auto MachoLoader::get_section(const char* segment, const char* section) -> section_64* { 50 | auto header = mach_header(); 51 | 52 | load_command* cmd = (load_command*)(header + 1); 53 | 54 | for (auto i = 0; i < header->ncmds; i++) { 55 | if (cmd->cmd == LC_SEGMENT_64) { 56 | auto seg = (segment_command_64*) cmd; 57 | 58 | if (strcmp(seg->segname, segment) == 0) { 59 | section_64* sect = (section_64*)(seg + 1); 60 | 61 | for (auto j = 0; j < seg->nsects; j++) { 62 | if (strcmp(sect->sectname, section) == 0) { 63 | return sect; 64 | } 65 | 66 | sect++; 67 | } 68 | } 69 | } 70 | 71 | cmd = (load_command*)((uint8_t*) cmd + cmd->cmdsize); 72 | } 73 | 74 | return nullptr; 75 | } 76 | 77 | auto MachoLoader::for_each_segment(std::function callback) -> void { 78 | auto header = mach_header(); 79 | 80 | load_command* cmd = (load_command*)(header + 1); 81 | 82 | for (auto i = 0; i < header->ncmds; i++) { 83 | if (cmd->cmd == LC_SEGMENT_64) { 84 | auto seg = (segment_command_64*) cmd; 85 | 86 | if (seg->nsects != 0) { 87 | callback(seg); 88 | } 89 | } 90 | 91 | cmd = (load_command*)((uint8_t*) cmd + cmd->cmdsize); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /ida/chain_fixup.py: -------------------------------------------------------------------------------- 1 | # IDA fails at fixing up the chained fixups of the rosetta binary 2 | # This script is a workaround to fix the chained fixups 3 | 4 | import ida_segment 5 | import ida_bytes 6 | import idaapi 7 | import ida_offset 8 | 9 | ''' 10 | struct dyld_chained_starts_offsets 11 | { 12 | uint32_t pointer_format; // DYLD_CHAINED_PTR_32_FIRMWARE 13 | uint32_t starts_count; // number of starts in array 14 | uint32_t chain_starts[1]; // array chain start offsets 15 | }; 16 | 17 | DYLD_CHAINED_PTR_64_OFFSET = 6 18 | 19 | 20 | // DYLD_CHAINED_PTR_64/DYLD_CHAINED_PTR_64_OFFSET 21 | struct dyld_chained_ptr_64_rebase 22 | { 23 | uint64_t target : 36, // 64GB max image size (DYLD_CHAINED_PTR_64 => vmAddr, DYLD_CHAINED_PTR_64_OFFSET => runtimeOffset) 24 | high8 : 8, // top 8 bits set to this (DYLD_CHAINED_PTR_64 => after slide added, DYLD_CHAINED_PTR_64_OFFSET => before slide added) 25 | reserved : 7, // all zeros 26 | next : 12, // 4-byte stride 27 | bind : 1; // == 0 28 | }; 29 | 30 | ''' 31 | 32 | 33 | chain_starts_segm = ida_segment.get_segm_by_name('__chain_starts') 34 | assert chain_starts_segm 35 | 36 | class dyld_chained_starts_offsets: 37 | def __init__(self, ea): 38 | self.pointer_format = ida_bytes.get_dword(ea) 39 | self.starts_count = ida_bytes.get_dword(ea + 4) 40 | self.chain_starts = [] 41 | for i in range(self.starts_count): 42 | self.chain_starts.append(ida_bytes.get_dword(ea + 8 + i * 4)) 43 | 44 | def __str__(self): 45 | hex_starts = [f"0x{x:08x}" for x in self.chain_starts] 46 | return f'pointer_format: 0x{self.pointer_format:x}, starts_count: {self.starts_count}, chain_starts: {hex_starts}' 47 | 48 | class dyld_chained_ptr_64_rebase: 49 | def __init__(self, ea): 50 | self.target = ida_bytes.get_qword(ea) & 0xFFFFFFFFFF 51 | self.high8 = (ida_bytes.get_qword(ea) >> 36) & 0xFF 52 | self.reserved = (ida_bytes.get_qword(ea) >> 44) & 0x7F 53 | self.next = (ida_bytes.get_qword(ea) >> 52) & 0xFFF 54 | self.bind = (ida_bytes.get_qword(ea) >> 64) & 0x1 55 | 56 | def __str__(self): 57 | return f'target: 0x{self.target:09x}, high8: 0x{self.high8:02x}, reserved: 0x{self.reserved:02x}, next: 0x{self.next:03x}, bind: 0x{self.bind:01x}' 58 | 59 | imagebase = idaapi.get_imagebase() 60 | chained_starts = dyld_chained_starts_offsets(chain_starts_segm.start_ea) 61 | 62 | def walk_chain(ea): 63 | print(f'walking chain at 0x{ea:08x}') 64 | is_next = True 65 | while is_next: 66 | chained_ptr = dyld_chained_ptr_64_rebase(ea) 67 | 68 | print(f' 0x{ea:08x}: {chained_ptr}') 69 | 70 | # patch the address 71 | ida_bytes.patch_qword(ea, chained_ptr.target + imagebase) 72 | # make the address an offset 73 | ida_bytes.create_data(ea, ida_bytes.FF_QWORD, 8, idaapi.BADADDR) 74 | ida_offset.op_plain_offset(ea, 0, 0) 75 | 76 | ea += chained_ptr.next * 8 77 | is_next = chained_ptr.next != 0 78 | 79 | for i in range(chained_starts.starts_count): 80 | walk_chain(chained_starts.chain_starts[i]) -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | project(rosettax87) 4 | 5 | set(CMAKE_CXX_STANDARD 20) 6 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 7 | 8 | add_executable(rosettax87 loader/main.cpp loader/macho_loader.cpp loader/offset_finder.cpp) 9 | 10 | add_executable(libRuntimeRosettax87 11 | rosettaRuntime/main.cpp 12 | rosettaRuntime/X87Float80.cpp 13 | rosettaRuntime/X87StackRegister.cpp 14 | rosettaRuntime/X87State.cpp 15 | rosettaRuntime/X87.cpp 16 | rosettaRuntime/Export.cpp 17 | rosettaRuntime/Log.cpp 18 | rosettaRuntime/SIMDGuard.cpp 19 | ) 20 | 21 | target_include_directories(libRuntimeRosettax87 PRIVATE 22 | rosettaRuntime 23 | ) 24 | 25 | # Add ExternalProject module 26 | include(ExternalProject) 27 | 28 | # Create directories before CMake references them 29 | file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/deps/lib) 30 | file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/deps/include) 31 | 32 | set(OPTIMIZATION_FLAGS 33 | "-fvisibility=hidden" 34 | "-O3" 35 | "-funroll-loops" 36 | "-march=native" 37 | "-mtune=native" 38 | "-fomit-frame-pointer" 39 | "-mcpu=apple-m1" 40 | "-fno-builtin-sin" 41 | "-ftree-vectorize" 42 | "-fvectorize" 43 | "-finline-functions" 44 | "-fno-stack-protector" 45 | "-fno-exceptions" 46 | "-fno-unwind-tables" 47 | "-fno-asynchronous-unwind-tables" 48 | ) 49 | 50 | string(REPLACE ";" " " OPTIMIZATION_FLAGS_STRING "${OPTIMIZATION_FLAGS}") 51 | 52 | # Build OpenLibm as a separate project with static library 53 | ExternalProject_Add( 54 | openlibm_build 55 | SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/openlibm 56 | CMAKE_ARGS 57 | -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/deps 58 | -DBUILD_SHARED_LIBS=OFF 59 | -DCMAKE_C_FLAGS=${OPTIMIZATION_FLAGS_STRING} 60 | # Skip running the default install step that has errors 61 | INSTALL_COMMAND "" 62 | BUILD_BYPRODUCTS ${CMAKE_BINARY_DIR}/openlibm_build-prefix/src/openlibm_build-build/libopenlibm.a 63 | ) 64 | 65 | # Add a custom step to copy the headers and static library manually 66 | ExternalProject_Add_Step(openlibm_build custom_install 67 | COMMENT "Installing OpenLibm files" 68 | COMMAND ${CMAKE_COMMAND} -E copy_directory 69 | ${CMAKE_CURRENT_SOURCE_DIR}/third_party/openlibm/include 70 | ${CMAKE_BINARY_DIR}/deps/include 71 | COMMAND ${CMAKE_COMMAND} -E copy 72 | ${CMAKE_BINARY_DIR}/openlibm_build-prefix/src/openlibm_build-build/libopenlibm.a 73 | ${CMAKE_BINARY_DIR}/deps/lib/ 74 | DEPENDEES build 75 | ) 76 | 77 | # Create an imported target for the static library 78 | add_library(openlibm STATIC IMPORTED) 79 | add_dependencies(openlibm openlibm_build) 80 | set_target_properties(openlibm PROPERTIES 81 | IMPORTED_LOCATION ${CMAKE_BINARY_DIR}/deps/lib/libopenlibm.a 82 | ) 83 | target_include_directories(openlibm INTERFACE ${CMAKE_BINARY_DIR}/deps/include) 84 | 85 | target_link_options(libRuntimeRosettax87 PRIVATE 86 | "-Wl,-no_compact_unwind" 87 | "-Wl,-no_pie" 88 | "-static" 89 | "-nostdlib" 90 | "-Wl,-segaddr,__TEXT,0x0" 91 | "-Wl,-segaddr,__DATA,0x10000" 92 | "-Wl,-pagezero_size,0x0" 93 | "-Wl,-headerpad,0" 94 | "-Wl,-e,_start" 95 | "-Wl,-segalign,10" 96 | ) 97 | 98 | target_compile_options(libRuntimeRosettax87 PRIVATE 99 | ${OPTIMIZATION_FLAGS} 100 | "-flto" 101 | ) 102 | 103 | target_link_libraries(libRuntimeRosettax87 PRIVATE 104 | openlibm 105 | ) 106 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RosettaHack x87 2 | 3 | ## Overview 4 | 5 | This is an experimental project that modifies Apple's Rosetta technology to use less precise but significantly faster x87 instruction handlers. The benchmarks show approximately 4-5x performance improvement for x87 floating-point operations. 6 | 7 | **⚠️ Warning: This is not ready for end-users. Use at your own risk.** 8 | 9 | ## Prerequisites 10 | 11 | - macOS 15.5 or compatible 12 | - C compiler (clang) 13 | - CMake 14 | - Administrator privileges (for running helper service) 15 | 16 | ## Building 17 | 18 | ### Main Project 19 | 20 | ``` 21 | cmake -B build 22 | cmake --build build 23 | ``` 24 | 25 | ### Sample Test Program 26 | 27 | ```clang -v -arch x86_64 -mno-sse -mfpmath=387 ./sample/math.c -o ./build/math``` 28 | 29 | ## Running 30 | 31 | This requires two terminal windows: 32 | 33 | 1. Run the helper service as root from the build folder: 34 | ``` 35 | sudo ./rosettax87 36 | ``` 37 | 38 | 2. Run the target program from the build folder: 39 | ``` 40 | ./rosettax87 ./math 41 | ``` 42 | 43 | ## Performance Benchmark 44 | 45 | ### Default Rosetta x87 handlers: 46 | ```log 47 | ╰─$ ./math 48 | benchmark run_fsqrt 49 | Result: 40800000 50 | Run 1 time: 246664 ticks 51 | Result: 40800000 52 | Run 2 time: 233811 ticks 53 | Result: 40800000 54 | Run 3 time: 227264 ticks 55 | Result: 40800000 56 | Run 4 time: 225907 ticks 57 | Result: 40800000 58 | Run 5 time: 228230 ticks 59 | Result: 40800000 60 | Run 6 time: 227629 ticks 61 | Result: 40800000 62 | Run 7 time: 226741 ticks 63 | Result: 40800000 64 | Run 8 time: 227249 ticks 65 | Result: 40800000 66 | Run 9 time: 227047 ticks 67 | Result: 40800000 68 | Run 10 time: 226771 ticks 69 | 70 | Average time: 229731 ticks 71 | ``` 72 | 73 | ### Custom rosetta x87 handlers: 74 | ```log 75 | ╰─$ ./rosettax87 ./math 76 | launching into program 77 | RosettaRuntimex87 built Apr 23 2025 18:43:47 78 | benchmark run_fsqrt 79 | Result: 40800000 80 | Run 1 time: 48682 ticks 81 | Result: 40800000 82 | Run 2 time: 48292 ticks 83 | Result: 40800000 84 | Run 3 time: 48306 ticks 85 | Result: 40800000 86 | Run 4 time: 48369 ticks 87 | Result: 40800000 88 | Run 5 time: 48230 ticks 89 | Result: 40800000 90 | Run 6 time: 48561 ticks 91 | Result: 40800000 92 | Run 7 time: 49638 ticks 93 | Result: 40800000 94 | Run 8 time: 48371 ticks 95 | Result: 40800000 96 | Run 9 time: 48313 ticks 97 | Result: 40800000 98 | Run 10 time: 48410 ticks 99 | 100 | Average time: 48517 ticks 101 | ``` 102 | 103 | ## Technical Details 104 | 105 | ### Research Notes 106 | 107 | If you want to examine `runtime` and `libRosettaRuntime` using `IDA PRO`, you need to use `chain_fixup.py`. 108 | - `libRosettaRuntime` is located at `/Library/Apple/usr/libexec/oah/libRosettaRuntime`. 109 | - `runtime` is located at `/usr/libexec/rosetta/runtime`. 110 | 111 | ### Windows Applications Through Wine 112 | 113 | You can use the brew `wine@devel` cask with RosettaHack x87. It supports launching Windows applications through Wine with an environment variable `ROSETTA_X87_PATH`. 114 | 115 | 1. Install `wine@devel` using [Homebrew](https://brew.sh/) 116 | 117 | ```bash 118 | brew install --cask wine@devel 119 | ``` 120 | 121 | 2. To permanently set the environment variable, add the following to your `~/.bashrc` or `~/.zshrc` file: 122 | ```bash 123 | export ROSETTA_X87_PATH=/Path/To/rosettax87 124 | ``` 125 | 126 | 3. Run rosettax87 as sudo in a separate terminal 127 | ```bash 128 | sudo $ROSETTA_X87_PATH 129 | ``` 130 | 131 | 4. Run the Windows application 132 | ```bash 133 | wine PATH_TO_BINARY.exe 134 | ``` 135 | 136 | ## License 137 | 138 | This project is licensed under `MIT`. -------------------------------------------------------------------------------- /loader/offset_finder.cpp: -------------------------------------------------------------------------------- 1 | #include "offset_finder.hpp" 2 | 3 | auto OffsetFinder::set_default_offsets() -> void { 4 | //These are the default offsets for the rosetta runtime that matches MD5 hash: c6b7650638eaf4d15bd56b9aae282b11 5 | 6 | offset_loop_copy_func = 0x1289C; //Some kind of function that copies n values from param3 to param1 where n is defined by a value in param4. 7 | offset_svc_call_entry = 0x3c58; //The entry point of a function that does a Supervisor Call instruction with the parameter 0x80 (the immediate used by XNU? This is what a quick google search tells me) 8 | offset_svc_call_ret = offset_svc_call_entry + 0xc; //The return point of the above function 9 | 10 | return; 11 | } 12 | 13 | auto OffsetFinder::determine_offsets() -> void{ 14 | //byte patterns in hex for the functions we need to find. 15 | //I really don't know if it's wise to check for the whole function block, but I'm not really sure how much these can change between versions 16 | const std::vector loop_copy_func = { 0x62, 0x06, 0x40, 0xf9, 0x63, 0x12, 0x40, 0xb9};//, 0xe0, 0x05, 0x0f, 0x10, 0x1f, 0x20, 0x03, 0xd5}; 17 | const std::vector svc_call = { 0xb0, 0x18, 0x80, 0xd2, 0x01, 0x10, 0x00, 0xd4, 0xe1, 0x37, 0x9f, 0x9a, 0xc0, 0x03, 0x5f, 0xd6}; 18 | //For svc_call we need to check where this bitpattern starts in the code and also where it ends (we can just add 0xc to the start to get the end) 19 | 20 | //Load rosetta runtime into an ifstream 21 | std::ifstream file{"/usr/libexec/rosetta/runtime", std::ios::binary}; 22 | 23 | //Check if we were successfully able to load the file, if not abort and use default offsets 24 | if(!file){ 25 | printf("Problem accessing rosetta runtime to determine offsets automatically.\nFalling back to macOS 15.4.1 defaults (This WILL crash your app if they are not correct!)\n"); 26 | return; 27 | } 28 | 29 | //Determine size of rosetta runtime file 30 | file.seekg(0, std::ios::end); 31 | std::streampos size = file.tellg(); 32 | file.seekg(0, std::ios::beg); 33 | 34 | //Set our buffer to the size of the file 35 | std::vector buffer(size); 36 | 37 | //read into the buffer 38 | if (!file.read(reinterpret_cast(buffer.data()), size)) { 39 | printf("Problem reading rosetta runtime to determine offsets automatically.\nFalling back to macOS 15.4.1 defaults (This WILL crash your app if they are not correct!)\n"); 40 | return; 41 | } 42 | 43 | //Do the search and store the results 44 | std::vector results; 45 | for (const auto offset : {loop_copy_func, svc_call}) 46 | { 47 | const std::boyer_moore_searcher searcher(offset.begin(), offset.end()); 48 | const auto it = std::search(buffer.begin(), buffer.end(), searcher); 49 | if (it == buffer.end()){ 50 | std::cout << "Offset not found in rosetta runtime binary\n"; 51 | results.push_back(-1); 52 | } 53 | else{ 54 | std::cout << "Found offset @ " << std::format("{:x}",std::distance(buffer.begin(), it)) << '\n'; 55 | results.push_back((std::uint64_t)std::distance(buffer.begin(), it)); 56 | } 57 | } 58 | 59 | //If we've stored -1 in any offset, error out and fall back to non-accelerated x87 handles. 60 | if ((int)results[0] <= -1 || (int)results[1] <= -1){ 61 | printf("Problem searching rosetta runtime to determine offsets automatically.\nFalling back to macOS 15.4.1 defaults (This WILL crash your app if they are not correct!)\n"); 62 | return; 63 | } 64 | 65 | //Set the offsets to the results that we've found now that we know they're "correct". 66 | offset_loop_copy_func = results[0]; 67 | offset_svc_call_entry = results[1]; 68 | offset_svc_call_ret = offset_svc_call_entry + 0xc; 69 | 70 | printf("Found rosetta runtime offsets successfully!\n"); 71 | return; 72 | } -------------------------------------------------------------------------------- /rosettaRuntime/Log.cpp: -------------------------------------------------------------------------------- 1 | #include "Log.h" 2 | #include 3 | 4 | auto syscall_write(int fd, const char* buf, uint64_t count) -> uint64_t { 5 | register uint64_t x0 __asm__("x0") = fd; 6 | register uint64_t x1 __asm__("x1") = (uint64_t)buf; 7 | register uint64_t x2 __asm__("x2") = count; 8 | register uint64_t x16 __asm__("x16") = 397; // SYS_write_nocancel 9 | 10 | asm volatile( 11 | "svc #0x80\n" 12 | "mov x1, #-1\n" 13 | "csel x0, x1, x0, cs\n" 14 | : "+r"(x0) 15 | : "r"(x1), "r"(x2), "r"(x16) 16 | : "memory"); 17 | 18 | return x0; 19 | } 20 | 21 | __attribute__((no_stack_protector, optnone)) void simple_printf(const char* format, ...) { 22 | static char buffer[1024]; 23 | char* buf_ptr = buffer; 24 | const char* str; 25 | int d; 26 | va_list args; 27 | va_start(args, format); 28 | 29 | buffer[0] = '\0'; 30 | 31 | for (const char* ptr = format; *ptr != '\0'; ++ptr) { 32 | if (*ptr == '%' && *(ptr + 1) != '\0') { 33 | ++ptr; 34 | switch (*ptr) { 35 | case 'f': { 36 | double f = va_arg(args, double); 37 | 38 | // Handle special cases 39 | if (std::isnan(f)) { 40 | const char* nan = "nan"; 41 | while (*nan) *buf_ptr++ = *nan++; 42 | break; 43 | } 44 | if (std::isinf(f)) { 45 | const char* inf = "inf"; 46 | while (*inf) *buf_ptr++ = *inf++; 47 | break; 48 | } 49 | 50 | // Handle negative numbers 51 | if (f < 0) { 52 | *buf_ptr++ = '-'; 53 | f = -f; 54 | } 55 | 56 | // Extract integer and fractional parts 57 | int64_t integer_part = (int64_t)f; 58 | double fractional_part = f - integer_part; 59 | 60 | // Print integer part 61 | char int_buf[20]; 62 | char* int_ptr = int_buf + sizeof(int_buf) - 1; 63 | *int_ptr = '\0'; 64 | 65 | do { 66 | *--int_ptr = '0' + (integer_part % 10); 67 | integer_part /= 10; 68 | } while (integer_part > 0); 69 | 70 | while (*int_ptr) *buf_ptr++ = *int_ptr++; 71 | 72 | // Print decimal point and fractional part 73 | *buf_ptr++ = '.'; 74 | 75 | // Print 6 decimal places 76 | int precision = 6; 77 | while (precision-- > 0) { 78 | fractional_part *= 10; 79 | int digit = (int)fractional_part; 80 | *buf_ptr++ = '0' + digit; 81 | fractional_part -= digit; 82 | } 83 | break; 84 | } 85 | case 's': 86 | str = va_arg(args, const char*); 87 | while (*str != '\0') { 88 | *buf_ptr++ = *str++; 89 | } 90 | break; 91 | case 'p': { 92 | uint64_t p = (uint64_t)va_arg(args, void*); 93 | static char num_buf[18]; // 0x + 16 digits + null 94 | char* num_ptr = num_buf + sizeof(num_buf) - 1; 95 | *num_ptr = '\0'; 96 | *buf_ptr++ = '0'; 97 | *buf_ptr++ = 'x'; 98 | do { 99 | int digit = p & 0xF; 100 | *--num_ptr = digit < 10 ? '0' + digit : 'a' + (digit - 10); 101 | p >>= 4; 102 | } while (p != 0); 103 | // // Pad with zeros to ensure 16 digits 104 | while (num_ptr > num_buf + 2) { 105 | *--num_ptr = '0'; 106 | } 107 | while (*num_ptr != '\0') { 108 | *buf_ptr++ = *num_ptr++; 109 | } 110 | break; 111 | } 112 | case 'd': { 113 | d = va_arg(args, int); 114 | char num_buf[20]; 115 | char* num_ptr = num_buf + sizeof(num_buf) - 1; 116 | *num_ptr = '\0'; 117 | if (d < 0) { 118 | *buf_ptr++ = '-'; 119 | d = -d; 120 | } 121 | do { 122 | *--num_ptr = '0' + (d % 10); 123 | d /= 10; 124 | } while (d != 0); 125 | while (*num_ptr != '\0') { 126 | *buf_ptr++ = *num_ptr++; 127 | } 128 | break; 129 | } 130 | case 'l': { 131 | ++ptr; // Skip 'l' 132 | if (*ptr == 'd') { 133 | d = va_arg(args, long long); 134 | char num_buf[20]; 135 | char* num_ptr = num_buf + sizeof(num_buf) - 1; 136 | *num_ptr = '\0'; 137 | if (d < 0) { 138 | *buf_ptr++ = '-'; 139 | d = -d; 140 | } 141 | do { 142 | *--num_ptr = '0' + (d % 10); 143 | d /= 10; 144 | } while (d != 0); 145 | while (*num_ptr != '\0') { 146 | *buf_ptr++ = *num_ptr++; 147 | } 148 | } 149 | break; 150 | } 151 | default: 152 | *buf_ptr++ = '%'; 153 | *buf_ptr++ = *ptr; 154 | break; 155 | } 156 | } else { 157 | *buf_ptr++ = *ptr; 158 | } 159 | } 160 | 161 | *buf_ptr = '\0'; 162 | va_end(args); 163 | 164 | syscall_write(1, buffer, buf_ptr - buffer); 165 | } 166 | -------------------------------------------------------------------------------- /rosettaRuntime/Export.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "X87.h" 8 | 9 | struct Export { 10 | void* address; 11 | const char* name; 12 | }; 13 | 14 | struct Exports { 15 | uint64_t version; // 0x1560000000000 16 | const Export* x87_exports; 17 | uint64_t x87_export_count; 18 | const Export* runtime_exports; 19 | uint64_t runtime_export_count; 20 | }; 21 | 22 | static_assert(sizeof(Exports) == 0x28, "Invalid size for Exports"); 23 | 24 | extern init_library_t orig_init_library; 25 | extern register_runtime_routine_offsets_t orig_register_runtime_routine_offsets; 26 | extern translator_use_t8027_codegen_t orig_translator_use_t8027_codegen; 27 | extern translator_reset_t orig_translator_reset; 28 | extern ir_create_bad_access_t orig_ir_create_bad_access; 29 | extern ir_create_t orig_ir_create; 30 | extern module_free_t orig_module_free; 31 | extern module_get_size_t orig_module_get_size; 32 | extern module_is_bad_access_t orig_module_is_bad_access; 33 | extern module_print_t orig_module_print; 34 | extern translator_translate_t orig_translator_translate; 35 | extern translator_free_t orig_translator_free; 36 | extern translator_get_data_t orig_translator_get_data; 37 | extern translator_get_size_t orig_translator_get_size; 38 | extern translator_get_branch_slots_offset_t orig_translator_get_branch_slots_offset; 39 | extern translator_get_branch_slots_count_t orig_translator_get_branch_slots_count; 40 | extern translator_get_branch_entries_t orig_translator_get_branch_entries; 41 | extern translator_get_instruction_offsets_t orig_translator_get_instruction_offsets; 42 | extern translator_apply_fixups_t orig_translator_apply_fixups; 43 | extern x87_init_t orig_x87_init; 44 | extern x87_state_from_x86_float_state_t orig_x87_state_from_x86_float_state; 45 | extern x87_state_to_x86_float_state_t orig_x87_state_to_x86_float_state; 46 | extern x87_pop_register_stack_t orig_x87_pop_register_stack; 47 | extern x87_f2xm1_t orig_x87_f2xm1; 48 | extern x87_fabs_t orig_x87_fabs; 49 | extern x87_fadd_ST_t orig_x87_fadd_ST; 50 | extern x87_fadd_f32_t orig_x87_fadd_f32; 51 | extern x87_fadd_f64_t orig_x87_fadd_f64; 52 | extern x87_fbld_t orig_x87_fbld; 53 | extern x87_fbstp_t orig_x87_fbstp; 54 | extern x87_fchs_t orig_x87_fchs; 55 | extern x87_fcmov_t orig_x87_fcmov; 56 | extern x87_fcom_ST_t orig_x87_fcom_ST; 57 | extern x87_fcom_f32_t orig_x87_fcom_f32; 58 | extern x87_fcom_f64_t orig_x87_fcom_f64; 59 | extern x87_fcomi_t orig_x87_fcomi; 60 | extern x87_fcos_t orig_x87_fcos; 61 | extern x87_fdecstp_t orig_x87_fdecstp; 62 | extern x87_fdiv_ST_t orig_x87_fdiv_ST; 63 | extern x87_fdiv_f32_t orig_x87_fdiv_f32; 64 | extern x87_fdiv_f64_t orig_x87_fdiv_f64; 65 | extern x87_fdivr_ST_t orig_x87_fdivr_ST; 66 | extern x87_fdivr_f32_t orig_x87_fdivr_f32; 67 | extern x87_fdivr_f64_t orig_x87_fdivr_f64; 68 | extern x87_ffree_t orig_x87_ffree; 69 | extern x87_fiadd_t orig_x87_fiadd; 70 | extern x87_ficom_t orig_x87_ficom; 71 | extern x87_fidiv_t orig_x87_fidiv; 72 | extern x87_fidivr_t orig_x87_fidivr; 73 | extern x87_fild_t orig_x87_fild; 74 | extern x87_fimul_t orig_x87_fimul; 75 | extern x87_fincstp_t orig_x87_fincstp; 76 | extern x87_fist_i16_t orig_x87_fist_i16; 77 | extern x87_fist_i32_t orig_x87_fist_i32; 78 | extern x87_fist_i64_t orig_x87_fist_i64; 79 | extern x87_fistt_i16_t orig_x87_fistt_i16; 80 | extern x87_fistt_i32_t orig_x87_fistt_i32; 81 | extern x87_fistt_i64_t orig_x87_fistt_i64; 82 | extern x87_fisub_t orig_x87_fisub; 83 | extern x87_fisubr_t orig_x87_fisubr; 84 | extern x87_fld_STi_t orig_x87_fld_STi; 85 | extern x87_fld_constant_t orig_x87_fld_constant; 86 | extern x87_fld_fp32_t orig_x87_fld_fp32; 87 | extern x87_fld_fp64_t orig_x87_fld_fp64; 88 | extern x87_fld_fp80_t orig_x87_fld_fp80; 89 | extern x87_fmul_ST_t orig_x87_fmul_ST; 90 | extern x87_fmul_f32_t orig_x87_fmul_f32; 91 | extern x87_fmul_f64_t orig_x87_fmul_f64; 92 | extern x87_fpatan_t orig_x87_fpatan; 93 | extern x87_fprem_t orig_x87_fprem; 94 | extern x87_fprem1_t orig_x87_fprem1; 95 | extern x87_fptan_t orig_x87_fptan; 96 | extern x87_frndint_t orig_x87_frndint; 97 | extern x87_fscale_t orig_x87_fscale; 98 | extern x87_fsin_t orig_x87_fsin; 99 | extern x87_fsincos_t orig_x87_fsincos; 100 | extern x87_fsqrt_t orig_x87_fsqrt; 101 | extern x87_fst_STi_t orig_x87_fst_STi; 102 | extern x87_fst_fp32_t orig_x87_fst_fp32; 103 | extern x87_fst_fp64_t orig_x87_fst_fp64; 104 | extern x87_fst_fp80_t orig_x87_fst_fp80; 105 | extern x87_fsub_ST_t orig_x87_fsub_ST; 106 | extern x87_fsub_f32_t orig_x87_fsub_f32; 107 | extern x87_fsub_f64_t orig_x87_fsub_f64; 108 | extern x87_fsubr_ST_t orig_x87_fsubr_ST; 109 | extern x87_fsubr_f32_t orig_x87_fsubr_f32; 110 | extern x87_fsubr_f64_t orig_x87_fsubr_f64; 111 | extern x87_fucom_t orig_x87_fucom; 112 | extern x87_fucomi_t orig_x87_fucomi; 113 | extern x87_fxam_t orig_x87_fxam; 114 | extern x87_fxch_t orig_x87_fxch; 115 | extern x87_fxtract_t orig_x87_fxtract; 116 | extern x87_fyl2x_t orig_x87_fyl2x; 117 | extern x87_fyl2xp1_t orig_x87_fyl2xp1; 118 | extern sse_pcmpestri_t orig_sse_pcmpestri; 119 | extern sse_pcmpestrm_t orig_sse_pcmpestrm; 120 | extern sse_pcmpistri_t orig_sse_pcmpistri; 121 | extern sse_pcmpistrm_t orig_sse_pcmpistrm; 122 | extern is_ldt_initialized_t orig_is_ldt_initialized; 123 | extern get_ldt_t orig_get_ldt; 124 | extern set_ldt_t orig_set_ldt; 125 | extern execution_mode_for_code_segment_selector_t orig_execution_mode_for_code_segment_selector; 126 | extern mov_segment_t orig_mov_segment; 127 | extern abi_for_address_t orig_abi_for_address; 128 | extern determine_state_recovery_action_t orig_determine_state_recovery_action; 129 | extern get_segment_limit_t orig_get_segment_limit; 130 | extern translator_set_variant_t orig_translator_set_variant; 131 | 132 | extern runtime_cpuid_t orig_runtime_cpuid; 133 | extern runtime_wide_udiv_64_t orig_runtime_wide_udiv_64; 134 | extern runtime_wide_sdiv_64_t orig_runtime_wide_sdiv_64; 135 | 136 | extern Exports kImports; 137 | 138 | extern auto exports_init() -> void; -------------------------------------------------------------------------------- /rosettaRuntime/X87.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "X87Float80.h" 6 | 7 | struct SymbolList {}; 8 | struct ThreadContextOffsets {}; 9 | enum BadAccessKind {}; 10 | enum TranslationMode {}; 11 | enum ExecutionMode {}; 12 | struct ModuleResult {}; 13 | struct TranslationResult {}; 14 | struct X86FloatState64 {}; 15 | enum X87Constant { 16 | kOne = 0, 17 | kZero = 1, 18 | kPi = 2, 19 | kLog2e = 3, 20 | kLoge2 = 4, 21 | kLog2t = 5, 22 | kLog102 = 6, 23 | }; 24 | 25 | struct SegmentRegisters {}; 26 | enum SegmentRegister {}; 27 | struct X87State; 28 | 29 | struct X87Float80StatusWordResult { 30 | uint64_t mantissa; 31 | uint16_t exponent; 32 | uint16_t status_word; 33 | }; 34 | 35 | struct X87ResultStatusWord { 36 | union { 37 | uint64_t result; 38 | int64_t signed_result; 39 | }; 40 | uint16_t status_word; 41 | }; 42 | static_assert(sizeof(X87ResultStatusWord) == 0x10); 43 | 44 | void* init_library(SymbolList const*, unsigned long long, ThreadContextOffsets const*); 45 | using init_library_t = decltype(&init_library); 46 | 47 | void register_runtime_routine_offsets(); 48 | using register_runtime_routine_offsets_t = decltype(®ister_runtime_routine_offsets); 49 | 50 | void translator_use_t8027_codegen(); 51 | using translator_use_t8027_codegen_t = decltype(&translator_use_t8027_codegen); 52 | 53 | void translator_reset(); 54 | using translator_reset_t = decltype(&translator_reset); 55 | 56 | void ir_create_bad_access(); 57 | using ir_create_bad_access_t = decltype(&ir_create_bad_access); 58 | 59 | void ir_create(); 60 | using ir_create_t = decltype(&ir_create); 61 | 62 | void module_free(); 63 | using module_free_t = decltype(&module_free); 64 | 65 | void module_get_size(); 66 | using module_get_size_t = decltype(&module_get_size); 67 | 68 | void module_is_bad_access(); 69 | using module_is_bad_access_t = decltype(&module_is_bad_access); 70 | 71 | void module_print(); 72 | using module_print_t = decltype(&module_print); 73 | 74 | void translator_translate(); 75 | using translator_translate_t = decltype(&translator_translate); 76 | 77 | void translator_free(); 78 | using translator_free_t = decltype(&translator_free); 79 | 80 | void translator_get_data(); 81 | using translator_get_data_t = decltype(&translator_get_data); 82 | 83 | void translator_get_size(); 84 | using translator_get_size_t = decltype(&translator_get_size); 85 | 86 | void translator_get_branch_slots_offset(); 87 | using translator_get_branch_slots_offset_t = decltype(&translator_get_branch_slots_offset); 88 | 89 | void translator_get_branch_slots_count(); 90 | using translator_get_branch_slots_count_t = decltype(&translator_get_branch_slots_count); 91 | 92 | void translator_get_branch_entries(); 93 | using translator_get_branch_entries_t = decltype(&translator_get_branch_entries); 94 | 95 | void translator_get_instruction_offsets(); 96 | using translator_get_instruction_offsets_t = decltype(&translator_get_instruction_offsets); 97 | 98 | void translator_apply_fixups(); 99 | using translator_apply_fixups_t = decltype(&translator_apply_fixups); 100 | 101 | void x87_init(X87State*); 102 | using x87_init_t = decltype(&x87_init); 103 | 104 | void x87_state_from_x86_float_state(); 105 | using x87_state_from_x86_float_state_t = decltype(&x87_state_from_x86_float_state); 106 | 107 | void x87_state_to_x86_float_state(); 108 | using x87_state_to_x86_float_state_t = decltype(&x87_state_to_x86_float_state); 109 | 110 | void x87_pop_register_stack(); 111 | using x87_pop_register_stack_t = decltype(&x87_pop_register_stack); 112 | 113 | void x87_f2xm1(X87State*); 114 | using x87_f2xm1_t = decltype(&x87_f2xm1); 115 | 116 | void x87_fabs(X87State*); 117 | using x87_fabs_t = decltype(&x87_fabs); 118 | 119 | void x87_fadd_ST(X87State*, unsigned int, unsigned int, bool); 120 | using x87_fadd_ST_t = decltype(&x87_fadd_ST); 121 | 122 | void x87_fadd_f32(X87State*, unsigned int); 123 | using x87_fadd_f32_t = decltype(&x87_fadd_f32); 124 | 125 | void x87_fadd_f64(X87State*, unsigned long long); 126 | using x87_fadd_f64_t = decltype(&x87_fadd_f64); 127 | 128 | void x87_fbld(X87State*, unsigned long long, unsigned long long); 129 | using x87_fbld_t = decltype(&x87_fbld); 130 | 131 | struct uint128_t { 132 | uint64_t low; 133 | uint64_t high; 134 | }; 135 | 136 | uint128_t x87_fbstp(X87State*); 137 | using x87_fbstp_t = decltype(&x87_fbstp); 138 | 139 | void x87_fchs(X87State*); 140 | using x87_fchs_t = decltype(&x87_fchs); 141 | 142 | void x87_fcmov(X87State*, unsigned int, unsigned int); 143 | using x87_fcmov_t = decltype(&x87_fcmov); 144 | 145 | void x87_fcom_ST(X87State*, unsigned int, unsigned int); 146 | using x87_fcom_ST_t = decltype(&x87_fcom_ST); 147 | 148 | void x87_fcom_f32(X87State*, unsigned int, bool); 149 | using x87_fcom_f32_t = decltype(&x87_fcom_f32); 150 | 151 | void x87_fcom_f64(X87State*, unsigned long long, bool); 152 | using x87_fcom_f64_t = decltype(&x87_fcom_f64); 153 | 154 | uint32_t x87_fcomi(X87State*, unsigned int, bool); 155 | using x87_fcomi_t = decltype(&x87_fcomi); 156 | 157 | void x87_fcos(X87State*); 158 | using x87_fcos_t = decltype(&x87_fcos); 159 | 160 | void x87_fdecstp(X87State*); 161 | using x87_fdecstp_t = decltype(&x87_fdecstp); 162 | 163 | void x87_fdiv_ST(X87State*, unsigned int, unsigned int, bool); 164 | using x87_fdiv_ST_t = decltype(&x87_fdiv_ST); 165 | 166 | void x87_fdiv_f32(X87State*, unsigned int); 167 | using x87_fdiv_f32_t = decltype(&x87_fdiv_f32); 168 | 169 | void x87_fdiv_f64(X87State*, unsigned long long); 170 | using x87_fdiv_f64_t = decltype(&x87_fdiv_f64); 171 | 172 | void x87_fdivr_ST(X87State*, unsigned int, unsigned int, bool); 173 | using x87_fdivr_ST_t = decltype(&x87_fdivr_ST); 174 | 175 | void x87_fdivr_f32(X87State*, unsigned int); 176 | using x87_fdivr_f32_t = decltype(&x87_fdivr_f32); 177 | 178 | void x87_fdivr_f64(X87State*, unsigned long long); 179 | using x87_fdivr_f64_t = decltype(&x87_fdivr_f64); 180 | 181 | void x87_ffree(X87State*, unsigned int); 182 | using x87_ffree_t = decltype(&x87_ffree); 183 | 184 | void x87_fiadd(X87State*, int); 185 | using x87_fiadd_t = decltype(&x87_fiadd); 186 | 187 | void x87_ficom(X87State*, int, bool); 188 | using x87_ficom_t = decltype(&x87_ficom); 189 | 190 | void x87_fidiv(X87State*, int); 191 | using x87_fidiv_t = decltype(&x87_fidiv); 192 | 193 | void x87_fidivr(X87State*, int); 194 | using x87_fidivr_t = decltype(&x87_fidivr); 195 | 196 | void x87_fild(X87State*, int64_t); 197 | using x87_fild_t = decltype(&x87_fild); 198 | 199 | void x87_fimul(X87State*, int); 200 | using x87_fimul_t = decltype(&x87_fimul); 201 | 202 | void x87_fincstp(X87State*); 203 | using x87_fincstp_t = decltype(&x87_fincstp); 204 | 205 | X87ResultStatusWord x87_fist_i16(X87State const*); 206 | using x87_fist_i16_t = decltype(&x87_fist_i16); 207 | 208 | X87ResultStatusWord x87_fist_i32(X87State const*); 209 | using x87_fist_i32_t = decltype(&x87_fist_i32); 210 | 211 | X87ResultStatusWord x87_fist_i64(X87State const*); 212 | using x87_fist_i64_t = decltype(&x87_fist_i64); 213 | 214 | X87ResultStatusWord x87_fistt_i16(X87State const*); 215 | using x87_fistt_i16_t = decltype(&x87_fistt_i16); 216 | 217 | X87ResultStatusWord x87_fistt_i32(X87State const*); 218 | using x87_fistt_i32_t = decltype(&x87_fistt_i32); 219 | 220 | X87ResultStatusWord x87_fistt_i64(X87State const*); 221 | using x87_fistt_i64_t = decltype(&x87_fistt_i64); 222 | 223 | void x87_fisub(X87State*, int); 224 | using x87_fisub_t = decltype(&x87_fisub); 225 | 226 | void x87_fisubr(X87State*, int); 227 | using x87_fisubr_t = decltype(&x87_fisubr); 228 | 229 | void x87_fld_STi(X87State*, unsigned int); 230 | using x87_fld_STi_t = decltype(&x87_fld_STi); 231 | 232 | void x87_fld_constant(X87State*, X87Constant); 233 | using x87_fld_constant_t = decltype(&x87_fld_constant); 234 | 235 | void x87_fld_fp32(X87State*, unsigned int); 236 | using x87_fld_fp32_t = decltype(&x87_fld_fp32); 237 | 238 | void x87_fld_fp64(X87State*, unsigned long long); 239 | using x87_fld_fp64_t = decltype(&x87_fld_fp64); 240 | 241 | void x87_fld_fp80(X87State*, X87Float80); 242 | using x87_fld_fp80_t = decltype(&x87_fld_fp80); 243 | 244 | void x87_fmul_ST(X87State*, unsigned int, unsigned int, bool); 245 | using x87_fmul_ST_t = decltype(&x87_fmul_ST); 246 | 247 | void x87_fmul_f32(X87State*, unsigned int); 248 | using x87_fmul_f32_t = decltype(&x87_fmul_f32); 249 | 250 | void x87_fmul_f64(X87State*, unsigned long long); 251 | using x87_fmul_f64_t = decltype(&x87_fmul_f64); 252 | 253 | void x87_fpatan(X87State*); 254 | using x87_fpatan_t = decltype(&x87_fpatan); 255 | 256 | void x87_fprem(X87State*); 257 | using x87_fprem_t = decltype(&x87_fprem); 258 | 259 | void x87_fprem1(X87State*); 260 | using x87_fprem1_t = decltype(&x87_fprem1); 261 | 262 | void x87_fptan(X87State*); 263 | using x87_fptan_t = decltype(&x87_fptan); 264 | 265 | void x87_frndint(X87State*); 266 | using x87_frndint_t = decltype(&x87_frndint); 267 | 268 | void x87_fscale(X87State*); 269 | using x87_fscale_t = decltype(&x87_fscale); 270 | 271 | void x87_fsin(X87State*); 272 | using x87_fsin_t = decltype(&x87_fsin); 273 | 274 | void x87_fsincos(X87State*); 275 | using x87_fsincos_t = decltype(&x87_fsincos); 276 | 277 | void x87_fsqrt(X87State*); 278 | using x87_fsqrt_t = decltype(&x87_fsqrt); 279 | 280 | void x87_fst_STi(X87State*, unsigned int, bool); 281 | using x87_fst_STi_t = decltype(&x87_fst_STi); 282 | 283 | X87ResultStatusWord x87_fst_fp32(X87State const*); 284 | using x87_fst_fp32_t = decltype(&x87_fst_fp32); 285 | 286 | X87ResultStatusWord x87_fst_fp64(X87State const*); 287 | using x87_fst_fp64_t = decltype(&x87_fst_fp64); 288 | 289 | X87Float80StatusWordResult x87_fst_fp80(X87State const*); 290 | using x87_fst_fp80_t = decltype(&x87_fst_fp80); 291 | 292 | void x87_fsub_ST(X87State*, unsigned int, unsigned int, bool); 293 | using x87_fsub_ST_t = decltype(&x87_fsub_ST); 294 | 295 | void x87_fsub_f32(X87State*, unsigned int); 296 | using x87_fsub_f32_t = decltype(&x87_fsub_f32); 297 | 298 | void x87_fsub_f64(X87State*, unsigned long long); 299 | using x87_fsub_f64_t = decltype(&x87_fsub_f64); 300 | 301 | void x87_fsubr_ST(X87State*, unsigned int, unsigned int, bool); 302 | using x87_fsubr_ST_t = decltype(&x87_fsubr_ST); 303 | 304 | void x87_fsubr_f32(X87State*, unsigned int); 305 | using x87_fsubr_f32_t = decltype(&x87_fsubr_f32); 306 | 307 | void x87_fsubr_f64(X87State*, unsigned long long); 308 | using x87_fsubr_f64_t = decltype(&x87_fsubr_f64); 309 | 310 | void x87_fucom(X87State*, unsigned int, unsigned int); 311 | using x87_fucom_t = decltype(&x87_fucom); 312 | 313 | uint32_t x87_fucomi(X87State*, unsigned int, bool); 314 | using x87_fucomi_t = decltype(&x87_fucomi); 315 | 316 | void x87_fxam(X87State*); 317 | using x87_fxam_t = decltype(&x87_fxam); 318 | 319 | void x87_fxch(X87State*, unsigned int); 320 | using x87_fxch_t = decltype(&x87_fxch); 321 | 322 | void x87_fxtract(X87State*); 323 | using x87_fxtract_t = decltype(&x87_fxtract); 324 | 325 | void x87_fyl2x(X87State*); 326 | using x87_fyl2x_t = decltype(&x87_fyl2x); 327 | 328 | void x87_fyl2xp1(X87State*); 329 | using x87_fyl2xp1_t = decltype(&x87_fyl2xp1); 330 | 331 | void sse_pcmpestri(); 332 | using sse_pcmpestri_t = decltype(&sse_pcmpestri); 333 | 334 | void sse_pcmpestrm(); 335 | using sse_pcmpestrm_t = decltype(&sse_pcmpestrm); 336 | 337 | void sse_pcmpistri(); 338 | using sse_pcmpistri_t = decltype(&sse_pcmpistri); 339 | 340 | void sse_pcmpistrm(); 341 | using sse_pcmpistrm_t = decltype(&sse_pcmpistrm); 342 | 343 | void is_ldt_initialized(void); 344 | using is_ldt_initialized_t = decltype(&is_ldt_initialized); 345 | 346 | void get_ldt(); 347 | using get_ldt_t = decltype(&get_ldt); 348 | 349 | void set_ldt(); 350 | using set_ldt_t = decltype(&set_ldt); 351 | 352 | void execution_mode_for_code_segment_selector(); 353 | using execution_mode_for_code_segment_selector_t = decltype(&execution_mode_for_code_segment_selector); 354 | 355 | void mov_segment(); 356 | using mov_segment_t = decltype(&mov_segment); 357 | 358 | void abi_for_address(); 359 | using abi_for_address_t = decltype(&abi_for_address); 360 | 361 | void determine_state_recovery_action(); 362 | using determine_state_recovery_action_t = decltype(&determine_state_recovery_action); 363 | 364 | void get_segment_limit(); 365 | using get_segment_limit_t = decltype(&get_segment_limit); 366 | 367 | void translator_set_variant(); 368 | using translator_set_variant_t = decltype(&translator_set_variant); 369 | 370 | void runtime_cpuid(); 371 | using runtime_cpuid_t = decltype(&runtime_cpuid); 372 | 373 | void runtime_wide_udiv_64(); 374 | using runtime_wide_udiv_64_t = decltype(&runtime_wide_udiv_64); 375 | 376 | void runtime_wide_sdiv_64(); 377 | using runtime_wide_sdiv_64_t = decltype(&runtime_wide_sdiv_64); 378 | -------------------------------------------------------------------------------- /rosettaRuntime/X87State.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "Log.h" 10 | #include "X87Float80.h" 11 | #include "X87StackRegister.h" 12 | 13 | // Option to convert 64 bit float to 80 bit x87 format to be compatible with 14 | // original rosetta x87 implementation This must be used if you want to 15 | // selectively enable x87 instructions. 16 | // #define X87_CONVERT_TO_FP80 17 | 18 | enum X87StatusWordFlag : uint16_t { 19 | // Exception flags 20 | kInvalidOperation = 0x0001, // Invalid Operation Exception 21 | kDenormalizedOperand = 0x0002, // Denormalized Operand Exception 22 | kZeroDivide = 0x0004, // Zero Divide Exception 23 | kOverflow = 0x0008, // Overflow Exception– 24 | kUnderflow = 0x0010, // Underflow Exception 25 | kPrecision = 0x0020, // Precision Exception 26 | 27 | // Status flags 28 | kStackFault = 0x0040, // Stack Fault 29 | kErrorSummary = 0x0080, // Error Summary Status 30 | 31 | // Condition codes 32 | kConditionCode0 = 0x0100, // Condition Code 0 33 | kConditionCode1 = 0x0200, // Condition Code 1 34 | kConditionCode2 = 0x0400, // Condition Code 2 35 | kConditionCode3 = 0x4000, // Condition Code 3 36 | 37 | // Special flags 38 | kTopOfStack = 0x3800, // Top of Stack Pointer (bits 11-13) 39 | kBusy = 0x8000, // FPU Busy 40 | }; 41 | 42 | enum class X87TagState { 43 | kValid = 0, // 00: Valid non-zero value 44 | kZero = 1, // 01: Valid zero value 45 | kSpecial = 2, // 10: Special value (NaN, Infinity, Denormal) 46 | kEmpty = 3 // 11: Empty register 47 | }; 48 | 49 | enum X87ControlWord : uint16_t { 50 | // Exception Masks (1=masked) 51 | kInvalidOpMask = 0x0001, 52 | kDenormalMask = 0x0002, 53 | kZeroDivideMask = 0x0004, 54 | kOverflowMask = 0x0008, 55 | kUnderflowMask = 0x0010, 56 | kPrecisionMask = 0x0020, 57 | 58 | // Precision Control 59 | kPrecisionControl = 0x0300, 60 | kPrecision24Bit = 0x0000, // Single precision 61 | kPrecision53Bit = 0x0200, // Double precision 62 | kPrecision64Bit = 0x0300, // Extended precision 63 | 64 | // Rounding Control 65 | kRoundingControlMask = 0x0C00, 66 | kRoundToNearest = 0x0000, 67 | kRoundDown = 0x0400, 68 | kRoundUp = 0x0800, 69 | kRoundToZero = 0x0C00, 70 | 71 | // Infinity Control (only on 287) 72 | kInfinityControl = 0x1000 73 | }; 74 | 75 | #if defined(X87_CONVERT_TO_FP80) 76 | float inline ConvertX87RegisterToFloat32(X87Float80 x87, 77 | uint16_t *status_flags) { 78 | uint64_t mantissa = x87.mantissa; 79 | uint16_t biased_exp = x87.exponent & 0x7FFF; 80 | uint32_t sign = (x87.exponent & 0x8000) ? 0x80000000 : 0; 81 | uint32_t bits; 82 | 83 | if (biased_exp == 0 && mantissa == 0) { 84 | bits = sign; 85 | } else if (biased_exp == 0x7FFF) { 86 | // NaN or Infinity 87 | if ((mantissa & 0x7FFFFFFFFFFFFFFFULL) != 0) { 88 | if (status_flags) 89 | *status_flags |= X87StatusWordFlag::kInvalidOperation; 90 | bits = sign | 0x7FC00000; // Quiet NaN 91 | } else { 92 | bits = sign | 0x7F800000; // Infinity 93 | } 94 | } else { 95 | int32_t exp = static_cast(biased_exp) - 16383 + 127; 96 | uint64_t frac = mantissa & 0x7FFFFFFFFFFFFFFFULL; 97 | uint32_t significant = static_cast(frac >> 40); 98 | 99 | uint64_t round_bit = (frac >> 39) & 1; 100 | uint64_t sticky_bits = frac & 0x7FFFFFFFFF; 101 | 102 | // Underflow or subnormal 103 | if (exp <= 0) { 104 | if (status_flags) 105 | *status_flags |= 106 | (X87StatusWordFlag::kUnderflow | X87StatusWordFlag::kPrecision); 107 | if (exp < -23) { 108 | bits = sign; 109 | } else { 110 | int shift = 1 - exp; 111 | significant = static_cast((frac | 0x8000000000000000ULL) >> 112 | (40 + shift)); 113 | round_bit = (frac >> (39 + shift)) & 1; 114 | sticky_bits = frac & ((1ULL << (39 + shift)) - 1); 115 | exp = 0; 116 | } 117 | } 118 | 119 | // Precision exception 120 | bool inexact = (round_bit || sticky_bits); 121 | if (inexact && status_flags) 122 | *status_flags |= X87StatusWordFlag::kPrecision; 123 | 124 | // Round to nearest even and detect overflow 125 | if (round_bit && (sticky_bits || (significant & 1))) { 126 | significant++; 127 | if (significant == 0x800000) { 128 | significant = 0; 129 | exp++; 130 | if (exp >= 255) { 131 | if (status_flags) 132 | *status_flags |= X87StatusWordFlag::kOverflow; 133 | bits = sign | 0x7F800000; 134 | goto return_float32; 135 | } 136 | } 137 | } 138 | 139 | // Overflow after rounding 140 | if (exp >= 255) { 141 | if (status_flags) 142 | *status_flags |= X87StatusWordFlag::kOverflow; 143 | bits = sign | 0x7F800000; 144 | } else { 145 | bits = 146 | sign | (static_cast(exp) << 23) | (significant & 0x7FFFFF); 147 | } 148 | } 149 | 150 | return_float32: 151 | union { 152 | uint32_t u; 153 | float f; 154 | } result; 155 | result.u = bits; 156 | return result.f; 157 | } 158 | 159 | inline X87Float80 ConvertFloat64ToX87Register(double value, 160 | uint16_t *status_flags) { 161 | X87Float80 result; 162 | union { 163 | double v; 164 | uint64_t bits; 165 | } d{value}; 166 | uint64_t sign = (d.bits >> 63) & 0x1; 167 | uint64_t exp = (d.bits >> 52) & 0x7FF; 168 | uint64_t mantissa = d.bits & 0xFFFFFFFFFFFFFULL; 169 | 170 | // Zero 171 | if (exp == 0 && mantissa == 0) { 172 | result.exponent = static_cast(sign << 15); 173 | result.mantissa = 0; 174 | return result; 175 | } 176 | 177 | // NaN or Infinity 178 | if (exp == 0x7FF) { 179 | result.exponent = static_cast((sign << 15) | 0x7FFF); 180 | if (mantissa == 0) { 181 | result.mantissa = 0x8000000000000000ULL; 182 | } else { 183 | result.mantissa = 0xC000000000000000ULL | (mantissa << 11); 184 | if (status_flags) 185 | *status_flags |= X87StatusWordFlag::kInvalidOperation; 186 | } 187 | return result; 188 | } 189 | 190 | // Denormalized double 191 | if (exp == 0) { 192 | if (status_flags) 193 | *status_flags |= X87StatusWordFlag::kDenormalizedOperand; 194 | int shift = __builtin_clzll(mantissa) - 11; 195 | mantissa <<= (shift + 1); 196 | exp = 1 - shift; 197 | } 198 | 199 | // Normal 200 | uint16_t x87_exp = static_cast((exp - 1023) + 16383); 201 | result.exponent = static_cast((sign << 15) | x87_exp); 202 | result.mantissa = (mantissa << 11) | 0x8000000000000000ULL; 203 | return result; 204 | } 205 | 206 | #endif 207 | 208 | double inline ConvertX87RegisterToFloat64(X87Float80 x87, 209 | uint16_t *status_flags) { 210 | uint64_t mantissa = x87.mantissa; 211 | uint16_t biased_exp = x87.exponent & 0x7FFF; 212 | uint64_t sign = (x87.exponent & 0x8000) ? 0x8000000000000000ULL : 0; 213 | union { 214 | uint64_t bits; 215 | double value; 216 | } result; 217 | 218 | // Zero 219 | if (mantissa == 0) { 220 | return (sign ? -0.0 : 0.0); 221 | } 222 | 223 | // NaN or Infinity 224 | if (biased_exp == 0x7FFF) { 225 | if (mantissa != 0x8000000000000000ULL) { 226 | if (status_flags) 227 | *status_flags |= X87StatusWordFlag::kInvalidOperation; 228 | result.bits = sign | 0x7FF8000000000000ULL; 229 | return result.value; 230 | } 231 | result.bits = sign | 0x7FF0000000000000ULL; 232 | return result.value; 233 | } 234 | 235 | int32_t exp = static_cast(biased_exp) - 16383 + 1023; 236 | 237 | // Denormalized / Underflow 238 | if (exp <= 0) { 239 | if (status_flags) 240 | *status_flags |= X87StatusWordFlag::kUnderflow; 241 | if (exp < -52) { 242 | return (sign ? -0.0 : 0.0); 243 | } 244 | // Denormalize 245 | mantissa >>= (1 - exp); 246 | exp = 0; 247 | } 248 | 249 | // Overflow 250 | if (exp >= 2047) { 251 | if (status_flags) 252 | *status_flags |= X87StatusWordFlag::kOverflow; 253 | result.bits = sign | 0x7FF0000000000000ULL; 254 | return result.value; 255 | } 256 | 257 | // Round to 52 bits 258 | uint64_t significant = (mantissa >> 11) & 0xFFFFFFFFFFFFFULL; 259 | uint64_t round_bit = (mantissa >> 10) & 1; 260 | uint64_t sticky_bits = (mantissa & ((1ULL << 10) - 1)) != 0; 261 | 262 | // Precision exception 263 | if ((round_bit || sticky_bits) && status_flags) { 264 | *status_flags |= X87StatusWordFlag::kPrecision; 265 | } 266 | 267 | // Round to nearest even 268 | if (round_bit && (sticky_bits || (significant & 1))) { 269 | significant++; 270 | if (significant == 0x10000000000000ULL) { 271 | significant = 0; 272 | exp++; 273 | if (exp >= 2047) { 274 | if (status_flags) 275 | *status_flags |= X87StatusWordFlag::kOverflow; 276 | result.bits = sign | 0x7FF0000000000000ULL; 277 | return result.value; 278 | } 279 | } 280 | } 281 | 282 | result.bits = sign | (static_cast(exp) << 52) | significant; 283 | return result.value; 284 | } 285 | 286 | #pragma pack(push, 1) 287 | struct X87State { 288 | uint16_t control_word; 289 | uint16_t status_word; 290 | int16_t tag_word; 291 | 292 | #if defined(X87_CONVERT_TO_FP80) 293 | X87Float80 st[8]; 294 | #else 295 | uint8_t padding[2]; // Padding to align to 16 bytes 296 | X87StackRegister st[8]; 297 | #endif 298 | 299 | X87State() 300 | : control_word(0x037F), status_word(0x0000), 301 | tag_word(0xFFFF) // All registers marked empty (11) 302 | { 303 | // Initialize all registers to zero 304 | for (int i = 0; i < 8; i++) { 305 | st[i].ieee754 = 0.0; 306 | } 307 | } 308 | 309 | // Get index of top register 310 | auto top_index() const -> unsigned int { 311 | return (status_word >> 11) & 7; 312 | } // Get reference to top register 313 | 314 | // Get index of ST(i) register 315 | auto get_st_index(unsigned int st_offset) const -> unsigned int { 316 | return (st_offset + top_index()) & 7; 317 | } 318 | 319 | // Get value from register at ST(i). Checks tag bits for validity, returns 0.0 320 | // if empty. Updates status word. 321 | __attribute__((always_inline)) auto get_st(unsigned int st_offset) -> double { 322 | const unsigned int reg_idx = get_st_index(st_offset); 323 | const auto tag = static_cast((tag_word >> (reg_idx * 2)) & 3); 324 | if (tag == X87TagState::kEmpty) { 325 | // FP_X_STK | FP_X_INV 326 | status_word |= 327 | X87StatusWordFlag::kStackFault | X87StatusWordFlag::kInvalidOperation; 328 | return std::numeric_limits::quiet_NaN(); 329 | } 330 | #if !defined(X87_CONVERT_TO_FP80) 331 | return st[reg_idx].ieee754; 332 | #else 333 | return ConvertX87RegisterToFloat64(st[reg_idx], &status_word); 334 | #endif 335 | } 336 | 337 | auto get_st_const(unsigned int st_offset) const 338 | -> std::pair { 339 | const unsigned int reg_idx = get_st_index(st_offset); 340 | const X87TagState tag = 341 | static_cast((tag_word >> (reg_idx * 2)) & 3); 342 | 343 | uint16_t new_status_word = 344 | status_word & ~(X87StatusWordFlag::kConditionCode1); 345 | if (tag == X87TagState::kEmpty) { 346 | // FP_X_STK | FP_X_INV 347 | // return nan 348 | return {std::numeric_limits::quiet_NaN(), 349 | new_status_word | X87StatusWordFlag::kStackFault | 350 | X87StatusWordFlag::kInvalidOperation}; 351 | } 352 | 353 | #if !defined(X87_CONVERT_TO_FP80) 354 | return {st[reg_idx].ieee754, new_status_word}; 355 | #else 356 | auto value = ConvertX87RegisterToFloat64(st[reg_idx], &new_status_word); 357 | return {value, new_status_word}; 358 | #endif 359 | } 360 | 361 | auto get_st_const32(unsigned int st_offset) const 362 | -> std::pair { 363 | const unsigned int reg_idx = get_st_index(st_offset); 364 | const X87TagState tag = 365 | static_cast((tag_word >> (reg_idx * 2)) & 3); 366 | 367 | uint16_t new_status_word = 368 | status_word & ~(X87StatusWordFlag::kConditionCode1); 369 | if (tag == X87TagState::kEmpty) { 370 | // FP_X_STK | FP_X_INV 371 | // return nan 372 | return {std::numeric_limits::quiet_NaN(), 373 | new_status_word | X87StatusWordFlag::kStackFault | 374 | X87StatusWordFlag::kInvalidOperation}; 375 | } 376 | 377 | #if !defined(X87_CONVERT_TO_FP80) 378 | return {st[reg_idx].ieee754, new_status_word}; 379 | #else 380 | auto value = ConvertX87RegisterToFloat32(st[reg_idx], &new_status_word); 381 | return {value, new_status_word}; 382 | #endif 383 | } 384 | 385 | __attribute__((always_inline)) auto get_st_tag(unsigned int st_offset) const 386 | -> X87TagState { 387 | const unsigned int reg_idx = get_st_index(st_offset); 388 | return static_cast((tag_word >> (reg_idx * 2)) & 3); 389 | } 390 | 391 | // Push value to FPU stack 392 | auto push() -> void { 393 | const int current_top = top_index(); 394 | const int new_top = (current_top - 1) & 7; 395 | status_word = 396 | (status_word & ~X87StatusWordFlag::kTopOfStack) | (new_top << 11); 397 | // Clear tag bits (set to valid 00) for new register 398 | tag_word &= ~(3 << (new_top * 2)); 399 | } 400 | 401 | auto pop() -> void { 402 | const int current_top = top_index(); 403 | // Set tag bits to empty (11) for popped register 404 | tag_word |= (3 << (current_top * 2)); 405 | st[current_top].ieee754 = 0.0; 406 | status_word = (status_word & ~X87StatusWordFlag::kTopOfStack) | 407 | (((current_top + 1) & 7) << 11); 408 | } 409 | 410 | __attribute__((always_inline)) auto set_st(unsigned int st_offset, 411 | double value) -> void { 412 | auto st_idx = get_st_index(st_offset); 413 | 414 | #if !defined(X87_CONVERT_TO_FP80) 415 | st[st_idx].ieee754 = value; 416 | #else 417 | // Convert value to x87 format 418 | st[st_idx] = ConvertFloat64ToX87Register(value, &status_word); 419 | #endif 420 | X87TagState tag; 421 | if (value == 0.0) { 422 | tag = X87TagState::kZero; 423 | } else if (std::isnan(value) || std::isinf(value) || 424 | std::fpclassify(value) == FP_SUBNORMAL) { 425 | tag = X87TagState::kSpecial; 426 | } else { 427 | tag = X87TagState::kValid; 428 | } 429 | 430 | // Clear existing tag bits and set new state 431 | tag_word &= ~(3 << (st_idx * 2)); 432 | tag_word |= (static_cast(tag) << (st_idx * 2)); 433 | } 434 | 435 | __attribute__((always_inline)) auto set_st_fast(unsigned int st_offset, 436 | double value) -> void { 437 | const unsigned int idx = get_st_index(st_offset); 438 | 439 | #if !defined(X87_CONVERT_TO_FP80) 440 | // Direct IEEE-754 store 441 | st[idx].ieee754 = value; 442 | #else 443 | // Convert to FP80 format without modifying status_word 444 | st[idx] = ConvertFloat64ToX87Register(value, nullptr); 445 | #endif 446 | 447 | // Clear both tag bits → 00 (kValid) 448 | tag_word &= ~(0x3u << (idx * 2)); 449 | } 450 | 451 | // Fast path: bypass tag-checks, assume value valid 452 | __attribute__((always_inline)) auto get_st_fast(unsigned int st_offset) const 453 | -> double { 454 | // Compute absolute slot index 455 | const unsigned int idx = get_st_index(st_offset); 456 | #if !defined(X87_CONVERT_TO_FP80) 457 | // Direct IEEE-754 load 458 | return st[idx].ieee754; 459 | #else 460 | // If you still need FP80 support, convert without touching status_word 461 | return ConvertX87RegisterToFloat64(st[idx], nullptr); 462 | #endif 463 | } 464 | 465 | auto swap_registers(unsigned int reg_offset1, unsigned int reg_offset2) 466 | -> void { 467 | // Swap register contents 468 | auto reg_idx1 = get_st_index(reg_offset1); 469 | auto reg_idx2 = get_st_index(reg_offset2); 470 | 471 | auto temp = st[reg_idx1].ieee754; 472 | st[reg_idx1].ieee754 = st[reg_idx2].ieee754; 473 | st[reg_idx2].ieee754 = temp; 474 | 475 | // Get current tags 476 | const int tag1 = (tag_word >> (reg_idx1 * 2)) & 3; 477 | const int tag2 = (tag_word >> (reg_idx2 * 2)) & 3; 478 | 479 | // Clear both tags 480 | tag_word &= ~((3 << (reg_idx1 * 2)) | (3 << (reg_idx2 * 2))); 481 | 482 | // Set swapped tags 483 | tag_word |= (tag2 << (reg_idx1 * 2)) | (tag1 << (reg_idx2 * 2)); 484 | } 485 | 486 | auto print() const -> void { 487 | simple_printf("FPU state:\n"); 488 | simple_printf("Control word: %d\n", control_word); 489 | simple_printf("Status word: %d\n", status_word); 490 | simple_printf("Tag word: %d\n", tag_word); 491 | simple_printf("Top index: %d\n", top_index()); 492 | simple_printf("\n"); 493 | } 494 | }; 495 | #pragma pack(pop) 496 | #if defined(X87_CONVERT_TO_FP80) 497 | static_assert(sizeof(X87State) == 0x56, "Invalid size for X87State"); 498 | #else 499 | static_assert(sizeof(X87State) == 0x48, "Invalid size for X87State"); 500 | #endif 501 | static_assert(offsetof(X87State, control_word) == 0, 502 | "Invalid offset for X87State::control_word"); 503 | static_assert(offsetof(X87State, status_word) == 2, 504 | "Invalid offset for X87State::status_word"); 505 | static_assert(offsetof(X87State, tag_word) == 4, 506 | "Invalid offset for X87State::tag_word"); 507 | 508 | #if defined(X87_CONVERT_TO_FP80) 509 | static_assert(offsetof(X87State, st) == 6, "Invalid offset for X87State::st0"); 510 | #else 511 | static_assert(offsetof(X87State, st) == 0x08, "Invalid offset for X87State::st0"); 512 | #endif -------------------------------------------------------------------------------- /rosettaRuntime/Export.cpp: -------------------------------------------------------------------------------- 1 | #include "Export.h" 2 | 3 | #include "X87.h" 4 | 5 | __attribute__((used)) init_library_t orig_init_library; 6 | __attribute__((used)) register_runtime_routine_offsets_t orig_register_runtime_routine_offsets; 7 | __attribute__((used)) translator_use_t8027_codegen_t orig_translator_use_t8027_codegen; 8 | __attribute__((used)) translator_reset_t orig_translator_reset; 9 | __attribute__((used)) ir_create_bad_access_t orig_ir_create_bad_access; 10 | __attribute__((used)) ir_create_t orig_ir_create; 11 | __attribute__((used)) module_free_t orig_module_free; 12 | __attribute__((used)) module_get_size_t orig_module_get_size; 13 | __attribute__((used)) module_is_bad_access_t orig_module_is_bad_access; 14 | __attribute__((used)) module_print_t orig_module_print; 15 | __attribute__((used)) translator_translate_t orig_translator_translate; 16 | __attribute__((used)) translator_free_t orig_translator_free; 17 | __attribute__((used)) translator_get_data_t orig_translator_get_data; 18 | __attribute__((used)) translator_get_size_t orig_translator_get_size; 19 | __attribute__((used)) translator_get_branch_slots_offset_t orig_translator_get_branch_slots_offset; 20 | __attribute__((used)) translator_get_branch_slots_count_t orig_translator_get_branch_slots_count; 21 | __attribute__((used)) translator_get_branch_entries_t orig_translator_get_branch_entries; 22 | __attribute__((used)) translator_get_instruction_offsets_t orig_translator_get_instruction_offsets; 23 | __attribute__((used)) translator_apply_fixups_t orig_translator_apply_fixups; 24 | __attribute__((used)) x87_init_t orig_x87_init; 25 | __attribute__((used)) x87_state_from_x86_float_state_t orig_x87_state_from_x86_float_state; 26 | __attribute__((used)) x87_state_to_x86_float_state_t orig_x87_state_to_x86_float_state; 27 | __attribute__((used)) x87_pop_register_stack_t orig_x87_pop_register_stack; 28 | __attribute__((used)) x87_f2xm1_t orig_x87_f2xm1; 29 | __attribute__((used)) x87_fabs_t orig_x87_fabs; 30 | __attribute__((used)) x87_fadd_ST_t orig_x87_fadd_ST; 31 | __attribute__((used)) x87_fadd_f32_t orig_x87_fadd_f32; 32 | __attribute__((used)) x87_fadd_f64_t orig_x87_fadd_f64; 33 | __attribute__((used)) x87_fbld_t orig_x87_fbld; 34 | __attribute__((used)) x87_fbstp_t orig_x87_fbstp; 35 | __attribute__((used)) x87_fchs_t orig_x87_fchs; 36 | __attribute__((used)) x87_fcmov_t orig_x87_fcmov; 37 | __attribute__((used)) x87_fcom_ST_t orig_x87_fcom_ST; 38 | __attribute__((used)) x87_fcom_f32_t orig_x87_fcom_f32; 39 | __attribute__((used)) x87_fcom_f64_t orig_x87_fcom_f64; 40 | __attribute__((used)) x87_fcomi_t orig_x87_fcomi; 41 | __attribute__((used)) x87_fcos_t orig_x87_fcos; 42 | __attribute__((used)) x87_fdecstp_t orig_x87_fdecstp; 43 | __attribute__((used)) x87_fdiv_ST_t orig_x87_fdiv_ST; 44 | __attribute__((used)) x87_fdiv_f32_t orig_x87_fdiv_f32; 45 | __attribute__((used)) x87_fdiv_f64_t orig_x87_fdiv_f64; 46 | __attribute__((used)) x87_fdivr_ST_t orig_x87_fdivr_ST; 47 | __attribute__((used)) x87_fdivr_f32_t orig_x87_fdivr_f32; 48 | __attribute__((used)) x87_fdivr_f64_t orig_x87_fdivr_f64; 49 | __attribute__((used)) x87_ffree_t orig_x87_ffree; 50 | __attribute__((used)) x87_fiadd_t orig_x87_fiadd; 51 | __attribute__((used)) x87_ficom_t orig_x87_ficom; 52 | __attribute__((used)) x87_fidiv_t orig_x87_fidiv; 53 | __attribute__((used)) x87_fidivr_t orig_x87_fidivr; 54 | __attribute__((used)) x87_fild_t orig_x87_fild; 55 | __attribute__((used)) x87_fimul_t orig_x87_fimul; 56 | __attribute__((used)) x87_fincstp_t orig_x87_fincstp; 57 | __attribute__((used)) x87_fist_i16_t orig_x87_fist_i16; 58 | __attribute__((used)) x87_fist_i32_t orig_x87_fist_i32; 59 | __attribute__((used)) x87_fist_i64_t orig_x87_fist_i64; 60 | __attribute__((used)) x87_fistt_i16_t orig_x87_fistt_i16; 61 | __attribute__((used)) x87_fistt_i32_t orig_x87_fistt_i32; 62 | __attribute__((used)) x87_fistt_i64_t orig_x87_fistt_i64; 63 | __attribute__((used)) x87_fisub_t orig_x87_fisub; 64 | __attribute__((used)) x87_fisubr_t orig_x87_fisubr; 65 | __attribute__((used)) x87_fld_STi_t orig_x87_fld_STi; 66 | __attribute__((used)) x87_fld_constant_t orig_x87_fld_constant; 67 | __attribute__((used)) x87_fld_fp32_t orig_x87_fld_fp32; 68 | __attribute__((used)) x87_fld_fp64_t orig_x87_fld_fp64; 69 | __attribute__((used)) x87_fld_fp80_t orig_x87_fld_fp80; 70 | __attribute__((used)) x87_fmul_ST_t orig_x87_fmul_ST; 71 | __attribute__((used)) x87_fmul_f32_t orig_x87_fmul_f32; 72 | __attribute__((used)) x87_fmul_f64_t orig_x87_fmul_f64; 73 | __attribute__((used)) x87_fpatan_t orig_x87_fpatan; 74 | __attribute__((used)) x87_fprem_t orig_x87_fprem; 75 | __attribute__((used)) x87_fprem1_t orig_x87_fprem1; 76 | __attribute__((used)) x87_fptan_t orig_x87_fptan; 77 | __attribute__((used)) x87_frndint_t orig_x87_frndint; 78 | __attribute__((used)) x87_fscale_t orig_x87_fscale; 79 | __attribute__((used)) x87_fsin_t orig_x87_fsin; 80 | __attribute__((used)) x87_fsincos_t orig_x87_fsincos; 81 | __attribute__((used)) x87_fsqrt_t orig_x87_fsqrt; 82 | __attribute__((used)) x87_fst_STi_t orig_x87_fst_STi; 83 | __attribute__((used)) x87_fst_fp32_t orig_x87_fst_fp32; 84 | __attribute__((used)) x87_fst_fp64_t orig_x87_fst_fp64; 85 | __attribute__((used)) x87_fst_fp80_t orig_x87_fst_fp80; 86 | __attribute__((used)) x87_fsub_ST_t orig_x87_fsub_ST; 87 | __attribute__((used)) x87_fsub_f32_t orig_x87_fsub_f32; 88 | __attribute__((used)) x87_fsub_f64_t orig_x87_fsub_f64; 89 | __attribute__((used)) x87_fsubr_ST_t orig_x87_fsubr_ST; 90 | __attribute__((used)) x87_fsubr_f32_t orig_x87_fsubr_f32; 91 | __attribute__((used)) x87_fsubr_f64_t orig_x87_fsubr_f64; 92 | __attribute__((used)) x87_fucom_t orig_x87_fucom; 93 | __attribute__((used)) x87_fucomi_t orig_x87_fucomi; 94 | __attribute__((used)) x87_fxam_t orig_x87_fxam; 95 | __attribute__((used)) x87_fxch_t orig_x87_fxch; 96 | __attribute__((used)) x87_fxtract_t orig_x87_fxtract; 97 | __attribute__((used)) x87_fyl2x_t orig_x87_fyl2x; 98 | __attribute__((used)) x87_fyl2xp1_t orig_x87_fyl2xp1; 99 | __attribute__((used)) sse_pcmpestri_t orig_sse_pcmpestri; 100 | __attribute__((used)) sse_pcmpestrm_t orig_sse_pcmpestrm; 101 | __attribute__((used)) sse_pcmpistri_t orig_sse_pcmpistri; 102 | __attribute__((used)) sse_pcmpistrm_t orig_sse_pcmpistrm; 103 | __attribute__((used)) is_ldt_initialized_t orig_is_ldt_initialized; 104 | __attribute__((used)) get_ldt_t orig_get_ldt; 105 | __attribute__((used)) set_ldt_t orig_set_ldt; 106 | __attribute__((used)) execution_mode_for_code_segment_selector_t orig_execution_mode_for_code_segment_selector; 107 | __attribute__((used)) mov_segment_t orig_mov_segment; 108 | __attribute__((used)) abi_for_address_t orig_abi_for_address; 109 | __attribute__((used)) determine_state_recovery_action_t orig_determine_state_recovery_action; 110 | __attribute__((used)) get_segment_limit_t orig_get_segment_limit; 111 | __attribute__((used)) translator_set_variant_t orig_translator_set_variant; 112 | 113 | __attribute__((used)) runtime_cpuid_t orig_runtime_cpuid; 114 | __attribute__((used)) runtime_wide_udiv_64_t orig_runtime_wide_udiv_64; 115 | __attribute__((used)) runtime_wide_sdiv_64_t orig_runtime_wide_sdiv_64; 116 | 117 | constexpr std::array kExportList{ 118 | Export{(void*)&init_library, 119 | "__ZN7rosetta7runtime7library12init_libraryEPKNS1_10SymbolListEyPKNS_20ThreadContextOffsetsE"}, 120 | Export{(void*)®ister_runtime_routine_offsets, 121 | "__ZN7rosetta7runtime7library32register_runtime_routine_offsetsEPKyPPKcm"}, 122 | Export{(void*)&translator_use_t8027_codegen, "__ZN7rosetta7runtime7library28translator_use_t8027_codegenEb"}, 123 | Export{(void*)&translator_reset, "__ZN7rosetta7runtime7library16translator_resetEv"}, 124 | Export{(void*)&ir_create_bad_access, "__ZN7rosetta7runtime7library20ir_create_bad_accessEy13BadAccessKind"}, 125 | Export{(void*)&ir_create, "__ZN7rosetta7runtime7library9ir_createEyjj15TranslationMode13ExecutionMode"}, 126 | Export{(void*)&module_free, "__ZN7rosetta7runtime7library11module_freeEPKNS1_12ModuleResultE"}, 127 | Export{(void*)&module_get_size, "__ZN7rosetta7runtime7library15module_get_sizeEPKNS1_12ModuleResultE"}, 128 | Export{(void*)&module_is_bad_access, "__ZN7rosetta7runtime7library20module_is_bad_accessEPKNS1_12ModuleResultE"}, 129 | Export{(void*)&module_print, "__ZN7rosetta7runtime7library12module_printEPKNS1_12ModuleResultEi"}, 130 | Export{(void*)&translator_translate, 131 | "__ZN7rosetta7runtime7library20translator_translateEPKNS1_12ModuleResultE15TranslationMode"}, 132 | Export{(void*)&translator_free, "__ZN7rosetta7runtime7library15translator_freeEPKNS1_17TranslationResultE"}, 133 | Export{(void*)&translator_get_data, "__ZN7rosetta7runtime7library19translator_get_dataEPKNS1_17TranslationResultE"}, 134 | Export{(void*)&translator_get_size, "__ZN7rosetta7runtime7library19translator_get_sizeEPKNS1_17TranslationResultE"}, 135 | Export{(void*)&translator_get_branch_slots_offset, 136 | "__ZN7rosetta7runtime7library34translator_get_branch_slots_offsetEPKNS1_17TranslationResultE"}, 137 | Export{(void*)&translator_get_branch_slots_count, 138 | "__ZN7rosetta7runtime7library33translator_get_branch_slots_countEPKNS1_17TranslationResultE"}, 139 | Export{(void*)&translator_get_branch_entries, 140 | "__ZN7rosetta7runtime7library29translator_get_branch_entriesEPKNS1_17TranslationResultE"}, 141 | Export{(void*)&translator_get_instruction_offsets, 142 | "__ZN7rosetta7runtime7library34translator_get_instruction_offsetsEPKNS1_17TranslationResultE"}, 143 | Export{(void*)&translator_apply_fixups, 144 | "__ZN7rosetta7runtime7library23translator_apply_fixupsEPNS1_17TranslationResultEPhy"}, 145 | Export{(void*)&x87_init, "__ZN7rosetta7runtime7library8x87_initEPNS1_8X87StateE"}, 146 | Export{(void*)&x87_state_from_x86_float_state, 147 | "__ZN7rosetta7runtime7library30x87_state_from_x86_float_stateEPNS1_8X87StateEPKNS0_15X86FloatState64E"}, 148 | Export{(void*)&x87_state_to_x86_float_state, 149 | "__ZN7rosetta7runtime7library28x87_state_to_x86_float_stateEPKNS1_8X87StateEPNS0_15X86FloatState64E"}, 150 | Export{(void*)&x87_pop_register_stack, "__ZN7rosetta7runtime7library22x87_pop_register_stackEPNS1_8X87StateE"}, 151 | Export{(void*)&x87_f2xm1, "__ZN7rosetta7runtime7library9x87_f2xm1EPNS1_8X87StateE"}, 152 | Export{(void*)&x87_fabs, "__ZN7rosetta7runtime7library8x87_fabsEPNS1_8X87StateE"}, 153 | Export{(void*)&x87_fadd_ST, "__ZN7rosetta7runtime7library11x87_fadd_STEPNS1_8X87StateEjjb"}, 154 | Export{(void*)&x87_fadd_f32, "__ZN7rosetta7runtime7library12x87_fadd_f32EPNS1_8X87StateEj"}, 155 | Export{(void*)&x87_fadd_f64, "__ZN7rosetta7runtime7library12x87_fadd_f64EPNS1_8X87StateEy"}, 156 | Export{(void*)&x87_fbld, "__ZN7rosetta7runtime7library8x87_fbldEPNS1_8X87StateEyy"}, 157 | Export{(void*)&x87_fbstp, "__ZN7rosetta7runtime7library9x87_fbstpEPKNS1_8X87StateE"}, 158 | Export{(void*)&x87_fchs, "__ZN7rosetta7runtime7library8x87_fchsEPNS1_8X87StateE"}, 159 | Export{(void*)&x87_fcmov, "__ZN7rosetta7runtime7library9x87_fcmovEPNS1_8X87StateEjj"}, 160 | Export{(void*)&x87_fcom_ST, "__ZN7rosetta7runtime7library11x87_fcom_STEPNS1_8X87StateEjj"}, 161 | Export{(void*)&x87_fcom_f32, "__ZN7rosetta7runtime7library12x87_fcom_f32EPNS1_8X87StateEjb"}, 162 | Export{(void*)&x87_fcom_f64, "__ZN7rosetta7runtime7library12x87_fcom_f64EPNS1_8X87StateEyb"}, 163 | Export{(void*)&x87_fcomi, "__ZN7rosetta7runtime7library9x87_fcomiEPNS1_8X87StateEjb"}, 164 | Export{(void*)&x87_fcos, "__ZN7rosetta7runtime7library8x87_fcosEPNS1_8X87StateE"}, 165 | Export{(void*)&x87_fdecstp, "__ZN7rosetta7runtime7library11x87_fdecstpEPNS1_8X87StateE"}, 166 | Export{(void*)&x87_fdiv_ST, "__ZN7rosetta7runtime7library11x87_fdiv_STEPNS1_8X87StateEjjb"}, 167 | Export{(void*)&x87_fdiv_f32, "__ZN7rosetta7runtime7library12x87_fdiv_f32EPNS1_8X87StateEj"}, 168 | Export{(void*)&x87_fdiv_f64, "__ZN7rosetta7runtime7library12x87_fdiv_f64EPNS1_8X87StateEy"}, 169 | Export{(void*)&x87_fdivr_ST, "__ZN7rosetta7runtime7library12x87_fdivr_STEPNS1_8X87StateEjjb"}, 170 | Export{(void*)&x87_fdivr_f32, "__ZN7rosetta7runtime7library13x87_fdivr_f32EPNS1_8X87StateEj"}, 171 | Export{(void*)&x87_fdivr_f64, "__ZN7rosetta7runtime7library13x87_fdivr_f64EPNS1_8X87StateEy"}, 172 | Export{(void*)&x87_ffree, "__ZN7rosetta7runtime7library9x87_ffreeEPNS1_8X87StateEj"}, 173 | Export{(void*)&x87_fiadd, "__ZN7rosetta7runtime7library9x87_fiaddEPNS1_8X87StateEi"}, 174 | Export{(void*)&x87_ficom, "__ZN7rosetta7runtime7library9x87_ficomEPNS1_8X87StateEib"}, 175 | Export{(void*)&x87_fidiv, "__ZN7rosetta7runtime7library9x87_fidivEPNS1_8X87StateEi"}, 176 | Export{(void*)&x87_fidivr, "__ZN7rosetta7runtime7library10x87_fidivrEPNS1_8X87StateEi"}, 177 | Export{(void*)&x87_fild, "__ZN7rosetta7runtime7library8x87_fildEPNS1_8X87StateEx"}, 178 | Export{(void*)&x87_fimul, "__ZN7rosetta7runtime7library9x87_fimulEPNS1_8X87StateEi"}, 179 | Export{(void*)&x87_fincstp, "__ZN7rosetta7runtime7library11x87_fincstpEPNS1_8X87StateE"}, 180 | Export{(void*)&x87_fist_i16, "__ZN7rosetta7runtime7library12x87_fist_i16EPKNS1_8X87StateE"}, 181 | Export{(void*)&x87_fist_i32, "__ZN7rosetta7runtime7library12x87_fist_i32EPKNS1_8X87StateE"}, 182 | Export{(void*)&x87_fist_i64, "__ZN7rosetta7runtime7library12x87_fist_i64EPKNS1_8X87StateE"}, 183 | Export{(void*)&x87_fistt_i16, "__ZN7rosetta7runtime7library13x87_fistt_i16EPKNS1_8X87StateE"}, 184 | Export{(void*)&x87_fistt_i32, "__ZN7rosetta7runtime7library13x87_fistt_i32EPKNS1_8X87StateE"}, 185 | Export{(void*)&x87_fistt_i64, "__ZN7rosetta7runtime7library13x87_fistt_i64EPKNS1_8X87StateE"}, 186 | Export{(void*)&x87_fisub, "__ZN7rosetta7runtime7library9x87_fisubEPNS1_8X87StateEi"}, 187 | Export{(void*)&x87_fisubr, "__ZN7rosetta7runtime7library10x87_fisubrEPNS1_8X87StateEi"}, 188 | Export{(void*)&x87_fld_STi, "__ZN7rosetta7runtime7library11x87_fld_STiEPNS1_8X87StateEj"}, 189 | Export{(void*)&x87_fld_constant, 190 | "__ZN7rosetta7runtime7library16x87_fld_constantEPNS1_8X87StateENS_10translator3x8711X87ConstantE"}, 191 | Export{(void*)&x87_fld_fp32, "__ZN7rosetta7runtime7library12x87_fld_fp32EPNS1_8X87StateEj"}, 192 | Export{(void*)&x87_fld_fp64, "__ZN7rosetta7runtime7library12x87_fld_fp64EPNS1_8X87StateEy"}, 193 | Export{(void*)&x87_fld_fp80, "__ZN7rosetta7runtime7library12x87_fld_fp80EPNS1_8X87StateENS1_10X87Float80E"}, 194 | Export{(void*)&x87_fmul_ST, "__ZN7rosetta7runtime7library11x87_fmul_STEPNS1_8X87StateEjjb"}, 195 | Export{(void*)&x87_fmul_f32, "__ZN7rosetta7runtime7library12x87_fmul_f32EPNS1_8X87StateEj"}, 196 | Export{(void*)&x87_fmul_f64, "__ZN7rosetta7runtime7library12x87_fmul_f64EPNS1_8X87StateEy"}, 197 | Export{(void*)&x87_fpatan, "__ZN7rosetta7runtime7library10x87_fpatanEPNS1_8X87StateE"}, 198 | Export{(void*)&x87_fprem, "__ZN7rosetta7runtime7library9x87_fpremEPNS1_8X87StateE"}, 199 | Export{(void*)&x87_fprem1, "__ZN7rosetta7runtime7library10x87_fprem1EPNS1_8X87StateE"}, 200 | Export{(void*)&x87_fptan, "__ZN7rosetta7runtime7library9x87_fptanEPNS1_8X87StateE"}, 201 | Export{(void*)&x87_frndint, "__ZN7rosetta7runtime7library11x87_frndintEPNS1_8X87StateE"}, 202 | Export{(void*)&x87_fscale, "__ZN7rosetta7runtime7library10x87_fscaleEPNS1_8X87StateE"}, 203 | Export{(void*)&x87_fsin, "__ZN7rosetta7runtime7library8x87_fsinEPNS1_8X87StateE"}, 204 | Export{(void*)&x87_fsincos, "__ZN7rosetta7runtime7library11x87_fsincosEPNS1_8X87StateE"}, 205 | Export{(void*)&x87_fsqrt, "__ZN7rosetta7runtime7library9x87_fsqrtEPNS1_8X87StateE"}, 206 | Export{(void*)&x87_fst_STi, "__ZN7rosetta7runtime7library11x87_fst_STiEPNS1_8X87StateEjb"}, 207 | Export{(void*)&x87_fst_fp32, "__ZN7rosetta7runtime7library12x87_fst_fp32EPKNS1_8X87StateE"}, 208 | Export{(void*)&x87_fst_fp64, "__ZN7rosetta7runtime7library12x87_fst_fp64EPKNS1_8X87StateE"}, 209 | Export{(void*)&x87_fst_fp80, "__ZN7rosetta7runtime7library12x87_fst_fp80EPKNS1_8X87StateE"}, 210 | Export{(void*)&x87_fsub_ST, "__ZN7rosetta7runtime7library11x87_fsub_STEPNS1_8X87StateEjjb"}, 211 | Export{(void*)&x87_fsub_f32, "__ZN7rosetta7runtime7library12x87_fsub_f32EPNS1_8X87StateEj"}, 212 | Export{(void*)&x87_fsub_f64, "__ZN7rosetta7runtime7library12x87_fsub_f64EPNS1_8X87StateEy"}, 213 | Export{(void*)&x87_fsubr_ST, "__ZN7rosetta7runtime7library12x87_fsubr_STEPNS1_8X87StateEjjb"}, 214 | Export{(void*)&x87_fsubr_f32, "__ZN7rosetta7runtime7library13x87_fsubr_f32EPNS1_8X87StateEj"}, 215 | Export{(void*)&x87_fsubr_f64, "__ZN7rosetta7runtime7library13x87_fsubr_f64EPNS1_8X87StateEy"}, 216 | Export{(void*)&x87_fucom, "__ZN7rosetta7runtime7library9x87_fucomEPNS1_8X87StateEjj"}, 217 | Export{(void*)&x87_fucomi, "__ZN7rosetta7runtime7library10x87_fucomiEPNS1_8X87StateEjb"}, 218 | Export{(void*)&x87_fxam, "__ZN7rosetta7runtime7library8x87_fxamEPNS1_8X87StateE"}, 219 | Export{(void*)&x87_fxch, "__ZN7rosetta7runtime7library8x87_fxchEPNS1_8X87StateEj"}, 220 | Export{(void*)&x87_fxtract, "__ZN7rosetta7runtime7library11x87_fxtractEPNS1_8X87StateE"}, 221 | Export{(void*)&x87_fyl2x, "__ZN7rosetta7runtime7library9x87_fyl2xEPNS1_8X87StateE"}, 222 | Export{(void*)&x87_fyl2xp1, "__ZN7rosetta7runtime7library11x87_fyl2xp1EPNS1_8X87StateE"}, 223 | Export{(void*)&sse_pcmpestri, "__ZN7rosetta7runtime7library13sse_pcmpestriEyyyyhxx"}, 224 | Export{(void*)&sse_pcmpestrm, "__ZN7rosetta7runtime7library13sse_pcmpestrmEyyyyhxx"}, 225 | Export{(void*)&sse_pcmpistri, "__ZN7rosetta7runtime7library13sse_pcmpistriEyyyyh"}, 226 | Export{(void*)&sse_pcmpistrm, "__ZN7rosetta7runtime7library13sse_pcmpistrmEyyyyh"}, 227 | Export{(void*)&is_ldt_initialized, "__ZN7rosetta7runtime7library18is_ldt_initializedEv"}, 228 | Export{(void*)&get_ldt, "__ZN7rosetta7runtime7library7get_ldtEjjPvj"}, 229 | Export{(void*)&set_ldt, "__ZN7rosetta7runtime7library7set_ldtEjjPKvj"}, 230 | Export{(void*)&execution_mode_for_code_segment_selector, 231 | "__ZN7rosetta7runtime7library40execution_mode_for_code_segment_selectorEjt"}, 232 | Export{(void*)&mov_segment, 233 | "__ZN7rosetta7runtime7library11mov_segmentEjPNS1_16SegmentRegistersENS1_15SegmentRegisterEt"}, 234 | Export{(void*)&abi_for_address, "__ZN7rosetta7runtime7library15abi_for_addressEy"}, 235 | Export{(void*)&determine_state_recovery_action, 236 | "__ZN7rosetta7runtime7library31determine_state_recovery_actionEPKjjj"}, 237 | Export{(void*)&get_segment_limit, "__ZN7rosetta7runtime7library17get_segment_limitEjt"}, 238 | Export{(void*)&translator_set_variant, "__ZN7rosetta7runtime7library22translator_set_variantEb"}, 239 | }; 240 | 241 | constexpr std::array kRuntimeExportList = { 242 | Export{(void*)&runtime_cpuid, "runtime_cpuid"}, 243 | Export{(void*)&runtime_wide_udiv_64, "runtime_wide_udiv_64"}, 244 | Export{(void*)&runtime_wide_sdiv_64, "runtime_wide_sdiv_64"}, 245 | }; 246 | 247 | __attribute__((section("__DATA,exports"), used)) Exports kExports = { 248 | 0x1560000000000, kExportList.data(), kExportList.size(), kRuntimeExportList.data(), kRuntimeExportList.size(), 249 | }; 250 | 251 | // this is filled in by loader with the exports of libRosettaRuntime 252 | __attribute__((section("__DATA,imports"), used)) Exports kImports = { 253 | 0x0, 0x0, 0x0, 0x0, 0x0, 254 | }; 255 | 256 | auto exports_init() -> void { 257 | // copy the exports from libRosettaRuntime to orig_* function pointers 258 | void** p = (void**)&orig_init_library; 259 | for (auto i = 0; i < kImports.x87_export_count; i++) { 260 | *p = kImports.x87_exports[i].address; 261 | p++; 262 | } 263 | 264 | p = (void**)&orig_runtime_cpuid; 265 | for (auto i = 0; i < kImports.runtime_export_count; i++) { 266 | *p = kImports.runtime_exports[i].address; 267 | p++; 268 | } 269 | } -------------------------------------------------------------------------------- /loader/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include // For _NSGetExecutablePath 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include // Add for uint64_t 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include // For chmod 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | #include "macho_loader.hpp" 26 | #include "offset_finder.hpp" 27 | 28 | // #define WINE 29 | 30 | static bool g_is_wine = false; 31 | static bool g_is_wine_preloader = false; 32 | 33 | typedef const struct dyld_process_info_base *dyld_process_info; 34 | extern "C" dyld_process_info 35 | _dyld_process_info_create(task_t task, uint64_t timestamp, 36 | kern_return_t *kernelError); 37 | 38 | extern "C" void _dyld_process_info_for_each_image( 39 | dyld_process_info info, 40 | void (^callback)(uint64_t machHeaderAddress, const uuid_t uuid, 41 | const char *path)); 42 | 43 | extern "C" void _dyld_process_info_release(dyld_process_info info); 44 | 45 | class MuhDebugger { 46 | private: 47 | pid_t childPid; 48 | task_t taskPort; 49 | static const unsigned int AARCH64_BREAKPOINT; // just declare here 50 | std::map breakpoints; // addr -> original instruction 51 | static const int MAX_WATCHPOINTS = 52 | 4; // AArch64 typically has 4 hardware watchpoints 53 | std::map watchpoints; // address -> debug register index 54 | bool waitForEvent(int *status) { 55 | if (waitpid(childPid, status, 0) == -1) { 56 | perror("waitpid"); 57 | return false; 58 | } 59 | if (WIFSTOPPED(*status)) { 60 | int signal = WSTOPSIG(*status); 61 | printf("Process stopped signal=%d\n", signal); 62 | 63 | if (signal == SIGBUS) { 64 | printf("accessing thread\n"); 65 | arm_thread_state64_t thread_state; 66 | 67 | thread_act_port_array_t thread_list; 68 | mach_msg_type_number_t thread_count = 0; // Initialize to 0 69 | // Get thread port 70 | if (task_threads(taskPort, &thread_list, &thread_count) == 71 | KERN_SUCCESS) { 72 | printf("task_threads succeeded, thread_count=%d\n", thread_count); 73 | 74 | // Get register state 75 | mach_msg_type_number_t state_count = ARM_THREAD_STATE64_COUNT; 76 | kern_return_t kr = 77 | thread_get_state(thread_list[0], ARM_THREAD_STATE64, 78 | (thread_state_t)&thread_state, &state_count); 79 | if (kr == KERN_SUCCESS) { 80 | printf("Thread state:\n"); 81 | uint64_t pc = thread_state.__pc; 82 | 83 | // Get VM region info 84 | vm_address_t region_address = pc; 85 | vm_size_t region_size; 86 | vm_region_basic_info_data_64_t info; 87 | mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64; 88 | mach_port_t object_name; 89 | 90 | if (vm_region_64(taskPort, ®ion_address, ®ion_size, 91 | VM_REGION_BASIC_INFO_64, (vm_region_info_t)&info, 92 | &info_count, &object_name) == KERN_SUCCESS) { 93 | printf("SIGBUS Details:\n"); 94 | printf("-> Fault Address: 0x%llx\n", pc); 95 | printf("-> Region Start: 0x%llx\n", region_address); 96 | printf("-> Region End: 0x%llx\n", region_address + region_size); 97 | printf("-> Region Size: 0x%llx\n", region_size); 98 | printf("-> Region Permissions: %c%c%c\n", 99 | (info.protection & VM_PROT_READ) ? 'r' : '-', 100 | (info.protection & VM_PROT_WRITE) ? 'w' : '-', 101 | (info.protection & VM_PROT_EXECUTE) ? 'x' : '-'); 102 | printf("-> Region Alignment: 0x%x\n", 103 | (unsigned int)region_address % 16); 104 | 105 | // Print the instruction at the fault address 106 | uint32_t instruction; 107 | mach_vm_size_t size = sizeof(instruction); 108 | if (mach_vm_read_overwrite(taskPort, pc, size, 109 | (mach_vm_address_t)&instruction, 110 | &size) == KERN_SUCCESS) { 111 | printf("-> Instruction at Fault Address: 0x%x\n", instruction); 112 | } else { 113 | printf("-> Failed to read instruction at Fault Address\n"); 114 | } 115 | } 116 | } else { 117 | printf("thread_get_state failed: %s\n", mach_error_string(kr)); 118 | } 119 | mach_port_deallocate(mach_task_self(), thread_list[0]); 120 | } else { 121 | printf("task_threads failed\n"); 122 | } 123 | } 124 | } 125 | return true; 126 | } 127 | 128 | const char *findModuleForAddress(uint64_t address) { 129 | kern_return_t kr; 130 | auto process_info = _dyld_process_info_create(taskPort, 0, &kr); 131 | 132 | if (kr != KERN_SUCCESS) { 133 | printf("Failed to get dyld process info (error 0x%x: %s)\n", kr, 134 | mach_error_string(kr)); 135 | return NULL; 136 | } 137 | 138 | __block const char *modpath = nullptr; 139 | 140 | _dyld_process_info_for_each_image( 141 | process_info, 142 | ^(uint64_t machHeaderAddress, const uuid_t uuid, const char *path) { 143 | if (address == machHeaderAddress) { 144 | modpath = strdup(path); 145 | } 146 | }); 147 | 148 | return modpath ? modpath : strdup(""); 149 | } 150 | 151 | bool configureDebugRegisters(thread_act_port_array_t thread_list) { 152 | arm_debug_state64_t debug_state; 153 | mach_msg_type_number_t count = ARM_DEBUG_STATE64_COUNT; 154 | 155 | kern_return_t kr = thread_get_state(thread_list[0], ARM_DEBUG_STATE64, 156 | (thread_state_t)&debug_state, &count); 157 | 158 | if (kr != KERN_SUCCESS) { 159 | printf("Failed to get debug state (error 0x%x: %s)\n", kr, 160 | mach_error_string(kr)); 161 | return false; 162 | } 163 | 164 | // Enable monitor mode in MDSCR 165 | debug_state.__mdscr_el1 |= 0x8000; // Set MDE bit (bit 15) 166 | 167 | kr = 168 | thread_set_state(thread_list[0], ARM_DEBUG_STATE64, 169 | (thread_state_t)&debug_state, ARM_DEBUG_STATE64_COUNT); 170 | 171 | if (kr != KERN_SUCCESS) { 172 | printf("Failed to set debug state (error 0x%x: %s)\n", kr, 173 | mach_error_string(kr)); 174 | return false; 175 | } 176 | 177 | return true; 178 | } 179 | 180 | public: 181 | bool adjustMemoryProtection(uint64_t address, vm_prot_t protection, 182 | mach_vm_size_t size = 0) { 183 | mach_vm_address_t region = 184 | address & ~(vm_page_size - 1); // align to page boundary 185 | // align size to page boundary 186 | if (size == 0) { 187 | size = vm_page_size; 188 | } 189 | size = (size + vm_page_size - 1) & ~(vm_page_size - 1); 190 | 191 | printf("Adjusting memory protection at 0x%llx - 0x%llx\n", 192 | (unsigned long long)region, (unsigned long long)(region + size)); 193 | 194 | for (mach_vm_size_t offset = 0; offset < size; offset += vm_page_size) { 195 | mach_vm_address_t page = region + offset; 196 | kern_return_t kr = 197 | mach_vm_protect(taskPort, page, vm_page_size, FALSE, protection); 198 | if (kr != KERN_SUCCESS) { 199 | printf( 200 | "Failed to adjust memory protection at 0x%llx (error 0x%x: %s)\n", 201 | (unsigned long long)page, kr, mach_error_string(kr)); 202 | return false; 203 | } 204 | } 205 | return true; 206 | } 207 | bool attach(pid_t pid) { 208 | childPid = pid; 209 | printf("attempting to attach to %d\n", childPid); 210 | if (ptrace(PT_ATTACH, childPid, 0, 0) < 0) { 211 | perror("ptrace(PT_ATTACHEXC)"); 212 | return false; 213 | } 214 | 215 | // ::usleep(250000); 216 | 217 | printf("wait for event\n"); 218 | int status; 219 | waitForEvent(&status); 220 | 221 | printf("continued execution\n"); 222 | 223 | // Get task port for the child process 224 | if (task_for_pid(mach_task_self(), childPid, &taskPort) != KERN_SUCCESS) { 225 | printf("Failed to get task port for pid %d\n", childPid); 226 | return false; 227 | } 228 | 229 | auto attach_count = g_is_wine_preloader ? 2 : 1; 230 | 231 | for (auto i = 0; i < attach_count; i++) { 232 | 233 | printf("continuing..\n"); 234 | if (!continueExecution()) { 235 | printf("Failed to continue execution\n"); 236 | return false; 237 | } 238 | 239 | printf("waited..\n"); 240 | // Get task port for the child process 241 | if (task_for_pid(mach_task_self(), childPid, &taskPort) != KERN_SUCCESS) { 242 | printf("Failed to get task port for pid %d\n", childPid); 243 | return false; 244 | } 245 | 246 | printf("acquired task port\n"); 247 | } 248 | 249 | printf("Started debugging process %d\n", childPid); 250 | return true; 251 | } 252 | 253 | bool continueExecution() { 254 | if (ptrace(PT_CONTINUE, childPid, (caddr_t)1, 0) < 0) { 255 | perror("ptrace(PT_CONTINUE)"); 256 | return false; 257 | } 258 | 259 | // usleep(250000); 260 | 261 | printf("continueExecution waiting for event ..\n"); 262 | 263 | int status; 264 | return waitForEvent(&status); 265 | } 266 | 267 | bool singleStep() { 268 | if (ptrace(PT_STEP, childPid, (caddr_t)1, 0) < 0) { 269 | perror("ptrace(PT_STEP)"); 270 | return false; 271 | } 272 | return true; 273 | } 274 | 275 | bool detach() { 276 | if (ptrace(PT_DETACH, childPid, (caddr_t)1, 0) < 0) { 277 | perror("ptrace(PT_DETACH)"); 278 | return false; 279 | } 280 | return true; 281 | } 282 | 283 | struct ModuleInfo { 284 | uintptr_t address; 285 | std::string path; 286 | }; 287 | 288 | auto getModuleList() -> std::vector { 289 | __block std::vector moduleList; 290 | kern_return_t kr; 291 | auto process_info = _dyld_process_info_create(taskPort, 0, &kr); 292 | 293 | if (kr != KERN_SUCCESS) { 294 | printf("Failed to get dyld process info (error 0x%x: %s)\n", kr, 295 | mach_error_string(kr)); 296 | return moduleList; 297 | } 298 | 299 | _dyld_process_info_for_each_image( 300 | process_info, ^(uint64_t address, const uuid_t uuid, const char *path) { 301 | moduleList.push_back({address, std::string(path)}); 302 | }); 303 | 304 | return moduleList; 305 | } 306 | 307 | auto find_runtime() -> uintptr_t { 308 | auto module_list = getModuleList(); 309 | 310 | auto runtime_it = std::find_if( 311 | module_list.begin(), module_list.end(), [](const ModuleInfo &module) { 312 | return module.path == "/usr/libexec/rosetta/runtime"; 313 | }); 314 | if (runtime_it != module_list.end()) { 315 | return runtime_it->address; 316 | } 317 | 318 | mach_vm_address_t address = 0; 319 | mach_vm_size_t size; 320 | vm_region_basic_info_data_64_t info; 321 | mach_msg_type_number_t count = VM_REGION_BASIC_INFO_COUNT_64; 322 | mach_port_t object_name; 323 | 324 | while (true) { 325 | if (mach_vm_region(taskPort, &address, &size, VM_REGION_BASIC_INFO_64, 326 | (vm_region_info_t)&info, &count, 327 | &object_name) != KERN_SUCCESS) { 328 | break; 329 | } 330 | 331 | if (info.protection & (VM_PROT_EXECUTE | VM_PROT_READ)) { 332 | if (std::find_if(module_list.begin(), module_list.end(), 333 | [address](const ModuleInfo &module) { 334 | return address == module.address; 335 | }) == module_list.end()) { 336 | uint32_t magic_bytes; 337 | if (readMemory(address, &magic_bytes, sizeof(magic_bytes)) && 338 | magic_bytes == MH_MAGIC_64) { 339 | return address; 340 | } 341 | } 342 | } 343 | 344 | address += size; 345 | } 346 | 347 | return 0; 348 | } 349 | 350 | uint64_t findModule(const char *moduleName) { 351 | kern_return_t kr; 352 | auto process_info = _dyld_process_info_create(taskPort, 0, &kr); 353 | 354 | if (kr != KERN_SUCCESS) { 355 | printf("Failed to get dyld process info (error 0x%x: %s)\n", kr, 356 | mach_error_string(kr)); 357 | return 0; 358 | } 359 | 360 | __block uint64_t machHeaderAddress = 0; 361 | 362 | _dyld_process_info_for_each_image( 363 | process_info, ^(uint64_t address, const uuid_t uuid, const char *path) { 364 | if (strstr(path, moduleName)) { 365 | machHeaderAddress = address; 366 | } 367 | }); 368 | 369 | return machHeaderAddress; 370 | } 371 | 372 | bool setBreakpoint(uint64_t address) { 373 | // Verify address is in valid range 374 | if (address >= MACH_VM_MAX_ADDRESS) { 375 | printf("Invalid address 0x%llx\n", (unsigned long long)address); 376 | return false; 377 | } 378 | unsigned int original; 379 | mach_vm_size_t read_size; 380 | 381 | // Read the original instruction 382 | kern_return_t kr = 383 | mach_vm_read_overwrite(taskPort, address, sizeof(unsigned int), 384 | (mach_vm_address_t)&original, &read_size); 385 | if (kr != KERN_SUCCESS) { 386 | printf("Failed to read memory at 0x%llx (error 0x%x: %s)\n", 387 | (unsigned long long)address, kr, mach_error_string(kr)); 388 | return false; 389 | } 390 | 391 | // printf("read success\n"); 392 | // First, try to adjust memory protection 393 | if (!adjustMemoryProtection(address, 394 | VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY)) { 395 | return false; 396 | } 397 | 398 | // printf("adjustMemoryProtection success\n"); 399 | 400 | // Write breakpoint instruction 401 | kr = mach_vm_write(taskPort, address, (vm_offset_t)&AARCH64_BREAKPOINT, 402 | sizeof(unsigned int)); 403 | if (kr != KERN_SUCCESS) { 404 | printf("Failed to write breakpoint at 0x%llx (error 0x%x: %s)\n", 405 | (unsigned long long)address, kr, mach_error_string(kr)); 406 | return false; 407 | } 408 | // printf("write success\n"); 409 | if (!adjustMemoryProtection(address, VM_PROT_READ | VM_PROT_EXECUTE)) { 410 | return false; 411 | } 412 | 413 | // printf("adjustMemoryProtection success\n"); 414 | breakpoints[address] = original; 415 | printf("Breakpoint set at address 0x%llx\n", (unsigned long long)address); 416 | return true; 417 | } 418 | 419 | bool removeBreakpoint(uint64_t address) { 420 | auto it = breakpoints.find(address); 421 | if (it == breakpoints.end()) { 422 | printf("No breakpoint found at address 0x%llx\n", 423 | (unsigned long long)address); 424 | return false; 425 | } 426 | 427 | // First, try to adjust memory protection 428 | if (!adjustMemoryProtection(address, VM_PROT_READ | VM_PROT_WRITE)) { 429 | return false; 430 | } 431 | 432 | // Restore original instruction 433 | kern_return_t kr = mach_vm_write( 434 | taskPort, address, (vm_offset_t)&it->second, sizeof(unsigned int)); 435 | if (kr != KERN_SUCCESS) { 436 | printf( 437 | "Failed to restore original instruction at 0x%llx (error 0x%x: %s)\n", 438 | (unsigned long long)address, kr, mach_error_string(kr)); 439 | return false; 440 | } 441 | if (!adjustMemoryProtection(address, VM_PROT_READ | VM_PROT_EXECUTE)) { 442 | return false; 443 | } 444 | breakpoints.erase(it); 445 | printf("Breakpoint removed from address 0x%llx\n", 446 | (unsigned long long)address); 447 | return true; 448 | } 449 | 450 | bool isBreakpoint(uint64_t address) { 451 | return breakpoints.find(address) != breakpoints.end(); 452 | } 453 | 454 | bool listModules() { 455 | task_t task; 456 | if (task_for_pid(mach_task_self(), childPid, &task) != KERN_SUCCESS) { 457 | printf("Failed to get task for pid %d\n", childPid); 458 | return false; 459 | } 460 | 461 | mach_vm_address_t address = 0; 462 | mach_vm_size_t size; 463 | vm_region_basic_info_data_64_t info; 464 | mach_msg_type_number_t count = VM_REGION_BASIC_INFO_COUNT_64; 465 | mach_port_t object_name; 466 | 467 | printf("Memory regions:\n"); 468 | while (true) { 469 | if (mach_vm_region(task, &address, &size, VM_REGION_BASIC_INFO_64, 470 | (vm_region_info_t)&info, &count, 471 | &object_name) != KERN_SUCCESS) { 472 | break; 473 | } 474 | 475 | char permissions[4] = {(info.protection & VM_PROT_READ) ? 'r' : '-', 476 | (info.protection & VM_PROT_WRITE) ? 'w' : '-', 477 | (info.protection & VM_PROT_EXECUTE) ? 'x' : '-', 478 | '\0'}; 479 | 480 | const char *module = findModuleForAddress(address); 481 | printf("0x%llx - 0x%llx %s %s\n", (unsigned long long)address, 482 | (unsigned long long)(address + size), permissions, 483 | module ? module : ""); 484 | free((void *)module); 485 | 486 | address += size; 487 | } 488 | 489 | mach_port_deallocate(mach_task_self(), task); 490 | return true; 491 | } 492 | 493 | bool printRegisters() { 494 | thread_act_port_array_t thread_list; 495 | mach_msg_type_number_t thread_count; 496 | 497 | kern_return_t kr = task_threads(taskPort, &thread_list, &thread_count); 498 | if (kr != KERN_SUCCESS) { 499 | printf("Failed to get threads (error 0x%x: %s)\n", kr, 500 | mach_error_string(kr)); 501 | return false; 502 | } 503 | 504 | // We'll use the first thread 505 | arm_thread_state64_t state; 506 | mach_msg_type_number_t count = ARM_THREAD_STATE64_COUNT; 507 | kr = thread_get_state(thread_list[0], ARM_THREAD_STATE64, 508 | (thread_state_t)&state, &count); 509 | 510 | if (kr != KERN_SUCCESS) { 511 | printf("Failed to get thread state (error 0x%x: %s)\n", kr, 512 | mach_error_string(kr)); 513 | return false; 514 | } 515 | 516 | // Print x0 through x28 517 | for (int i = 0; i <= 28; i++) { 518 | printf("x%-2d = 0x%016llx\n", i, state.__x[i]); 519 | } 520 | 521 | // Print special registers 522 | printf("fp = 0x%016llx\n", state.__fp); 523 | printf("lr = 0x%016llx\n", state.__lr); 524 | printf("sp = 0x%016llx\n", state.__sp); 525 | printf("pc = 0x%016llx\n", state.__pc); 526 | printf("cpsr= 0x%08x\n", state.__cpsr); 527 | 528 | // Cleanup 529 | for (unsigned int i = 0; i < thread_count; i++) { 530 | mach_port_deallocate(mach_task_self(), thread_list[i]); 531 | } 532 | vm_deallocate(mach_task_self(), (vm_address_t)thread_list, 533 | sizeof(thread_t) * thread_count); 534 | 535 | return true; 536 | } 537 | 538 | bool printMemory(uint64_t address, size_t size) { 539 | vm_offset_t buffer; 540 | mach_msg_type_number_t read_size = (mach_msg_type_number_t)size; 541 | 542 | kern_return_t kr = 543 | mach_vm_read(taskPort, address, size, &buffer, &read_size); 544 | 545 | if (kr != KERN_SUCCESS) { 546 | printf("Failed to read memory at 0x%llx (error 0x%x: %s)\n", 547 | (unsigned long long)address, kr, mach_error_string(kr)); 548 | return false; 549 | } 550 | 551 | // Print hexdump 552 | uint8_t *data = (uint8_t *)buffer; 553 | for (size_t i = 0; i < read_size; i += 16) { 554 | // Print address 555 | printf("%016llx: ", (unsigned long long)(address + i)); 556 | 557 | // Print hex bytes 558 | for (size_t j = 0; j < 16; j++) { 559 | if (i + j < read_size) { 560 | printf("%02x ", data[i + j]); 561 | } else { 562 | printf(" "); 563 | } 564 | if (j == 7) 565 | printf(" "); // Extra space between groups of 8 566 | } 567 | 568 | // Print ASCII representation 569 | printf(" |"); 570 | for (size_t j = 0; j < 16; j++) { 571 | if (i + j < read_size) { 572 | uint8_t c = data[i + j]; 573 | printf("%c", (c >= 32 && c <= 126) ? c : '.'); 574 | } else { 575 | printf(" "); 576 | } 577 | } 578 | printf("|\n"); 579 | } 580 | 581 | // Deallocate the buffer 582 | vm_deallocate(mach_task_self(), buffer, read_size); 583 | return true; 584 | } 585 | 586 | // Add this enum inside the class 587 | enum Register { 588 | X0, 589 | X1, 590 | X2, 591 | X3, 592 | X4, 593 | X5, 594 | X6, 595 | X7, 596 | X8, 597 | X9, 598 | X10, 599 | X11, 600 | X12, 601 | X13, 602 | X14, 603 | X15, 604 | X16, 605 | X17, 606 | X18, 607 | X19, 608 | X20, 609 | X21, 610 | X22, 611 | X23, 612 | X24, 613 | X25, 614 | X26, 615 | X27, 616 | X28, 617 | FP, 618 | LR, 619 | SP, 620 | PC, 621 | CPSR 622 | }; 623 | 624 | uint64_t readRegister(Register reg) { 625 | thread_act_port_array_t thread_list; 626 | mach_msg_type_number_t thread_count; 627 | 628 | kern_return_t kr = task_threads(taskPort, &thread_list, &thread_count); 629 | if (kr != KERN_SUCCESS) { 630 | printf("Failed to get threads (error 0x%x: %s)\n", kr, 631 | mach_error_string(kr)); 632 | return 0; 633 | } 634 | 635 | arm_thread_state64_t state; 636 | mach_msg_type_number_t count = ARM_THREAD_STATE64_COUNT; 637 | kr = thread_get_state(thread_list[0], ARM_THREAD_STATE64, 638 | (thread_state_t)&state, &count); 639 | 640 | if (kr != KERN_SUCCESS) { 641 | printf("Failed to get thread state (error 0x%x: %s)\n", kr, 642 | mach_error_string(kr)); 643 | return 0; 644 | } 645 | 646 | uint64_t value = 0; 647 | if (reg >= X0 && reg <= X28) { 648 | value = state.__x[reg]; 649 | } else { 650 | switch (reg) { 651 | case FP: 652 | value = state.__fp; 653 | break; 654 | case LR: 655 | value = state.__lr; 656 | break; 657 | case SP: 658 | value = state.__sp; 659 | break; 660 | case PC: 661 | value = state.__pc; 662 | break; 663 | case CPSR: 664 | value = state.__cpsr; 665 | break; 666 | default: { 667 | printf("Invalid register\n"); 668 | return 0; 669 | } 670 | } 671 | } 672 | 673 | // Cleanup 674 | for (unsigned int i = 0; i < thread_count; i++) { 675 | mach_port_deallocate(mach_task_self(), thread_list[i]); 676 | } 677 | vm_deallocate(mach_task_self(), (vm_address_t)thread_list, 678 | sizeof(thread_t) * thread_count); 679 | 680 | return value; 681 | } 682 | 683 | bool setRegister(Register reg, uint64_t value) { 684 | thread_act_port_array_t thread_list; 685 | mach_msg_type_number_t thread_count; 686 | 687 | kern_return_t kr = task_threads(taskPort, &thread_list, &thread_count); 688 | if (kr != KERN_SUCCESS) { 689 | printf("Failed to get threads (error 0x%x: %s)\n", kr, 690 | mach_error_string(kr)); 691 | return false; 692 | } 693 | 694 | arm_thread_state64_t state; 695 | mach_msg_type_number_t count = ARM_THREAD_STATE64_COUNT; 696 | kr = thread_get_state(thread_list[0], ARM_THREAD_STATE64, 697 | (thread_state_t)&state, &count); 698 | 699 | if (kr != KERN_SUCCESS) { 700 | printf("Failed to get thread state (error 0x%x: %s)\n", kr, 701 | mach_error_string(kr)); 702 | return false; 703 | } 704 | 705 | if (reg >= X0 && reg <= X28) { 706 | state.__x[reg] = value; 707 | } else { 708 | switch (reg) { 709 | case FP: 710 | state.__fp = value; 711 | break; 712 | case LR: 713 | state.__lr = value; 714 | break; 715 | case SP: 716 | state.__sp = value; 717 | break; 718 | case PC: 719 | state.__pc = value; 720 | break; 721 | case CPSR: 722 | state.__cpsr = value; 723 | break; 724 | default: { 725 | printf("Invalid register\n"); 726 | return false; 727 | } 728 | } 729 | } 730 | 731 | kr = thread_set_state(thread_list[0], ARM_THREAD_STATE64, 732 | (thread_state_t)&state, ARM_THREAD_STATE64_COUNT); 733 | if (kr != KERN_SUCCESS) { 734 | printf("Failed to set thread state (error 0x%x: %s)\n", kr, 735 | mach_error_string(kr)); 736 | return false; 737 | } 738 | 739 | // Cleanup 740 | for (unsigned int i = 0; i < thread_count; i++) { 741 | mach_port_deallocate(mach_task_self(), thread_list[i]); 742 | } 743 | vm_deallocate(mach_task_self(), (vm_address_t)thread_list, 744 | sizeof(thread_t) * thread_count); 745 | 746 | return true; 747 | } 748 | 749 | bool readMemory(uint64_t address, void *buffer, size_t size) { 750 | mach_vm_size_t read_size; 751 | 752 | kern_return_t kr = mach_vm_read_overwrite( 753 | taskPort, address, size, (mach_vm_address_t)buffer, &read_size); 754 | 755 | if (kr != KERN_SUCCESS) { 756 | printf("Failed to read memory at 0x%llx (error 0x%x: %s)\n", 757 | (unsigned long long)address, kr, mach_error_string(kr)); 758 | return false; 759 | } 760 | 761 | return read_size == size; 762 | } 763 | 764 | bool writeMemory(uint64_t address, const void *buffer, size_t size) { 765 | kern_return_t kr = 766 | mach_vm_write(taskPort, address, (vm_offset_t)buffer, size); 767 | 768 | if (kr != KERN_SUCCESS) { 769 | printf("Failed to write memory at 0x%llx (error 0x%x: %s)\n", 770 | (unsigned long long)address, kr, mach_error_string(kr)); 771 | return false; 772 | } 773 | 774 | return true; 775 | } 776 | 777 | uint64_t allocateMemory(size_t size) { 778 | mach_vm_address_t address = 0; // Let system choose the address 779 | 780 | kern_return_t kr = 781 | mach_vm_allocate(taskPort, &address, size, VM_FLAGS_ANYWHERE); 782 | 783 | if (kr != KERN_SUCCESS) { 784 | printf("Failed to allocate memory (error 0x%x: %s)\n", kr, 785 | mach_error_string(kr)); 786 | return 0; 787 | } 788 | 789 | // Set memory protection to RWX 790 | if (!adjustMemoryProtection(address, VM_PROT_READ | VM_PROT_WRITE)) { 791 | // If protection fails, deallocate the memory 792 | mach_vm_deallocate(taskPort, address, size); 793 | return 0; 794 | } 795 | 796 | printf("Allocated %zu bytes at 0x%llx\n", size, 797 | (unsigned long long)address); 798 | return address; 799 | } 800 | 801 | bool copyThreadState(arm_thread_state64_t &state) { 802 | thread_act_port_array_t thread_list; 803 | mach_msg_type_number_t thread_count; 804 | 805 | kern_return_t kr = task_threads(taskPort, &thread_list, &thread_count); 806 | if (kr != KERN_SUCCESS) { 807 | printf("Failed to get threads (error 0x%x: %s)\n", kr, 808 | mach_error_string(kr)); 809 | return false; 810 | } 811 | 812 | mach_msg_type_number_t count = ARM_THREAD_STATE64_COUNT; 813 | kr = thread_get_state(thread_list[0], ARM_THREAD_STATE64, 814 | (thread_state_t)&state, &count); 815 | 816 | // Cleanup 817 | for (unsigned int i = 0; i < thread_count; i++) { 818 | mach_port_deallocate(mach_task_self(), thread_list[i]); 819 | } 820 | vm_deallocate(mach_task_self(), (vm_address_t)thread_list, 821 | sizeof(thread_t) * thread_count); 822 | 823 | if (kr != KERN_SUCCESS) { 824 | printf("Failed to get thread state (error 0x%x: %s)\n", kr, 825 | mach_error_string(kr)); 826 | return false; 827 | } 828 | 829 | return true; 830 | } 831 | 832 | bool restoreThreadState(const arm_thread_state64_t &state) { 833 | thread_act_port_array_t thread_list; 834 | mach_msg_type_number_t thread_count; 835 | 836 | kern_return_t kr = task_threads(taskPort, &thread_list, &thread_count); 837 | if (kr != KERN_SUCCESS) { 838 | printf("Failed to get threads (error 0x%x: %s)\n", kr, 839 | mach_error_string(kr)); 840 | return false; 841 | } 842 | 843 | kr = thread_set_state(thread_list[0], ARM_THREAD_STATE64, 844 | (thread_state_t)&state, ARM_THREAD_STATE64_COUNT); 845 | 846 | // Cleanup 847 | for (unsigned int i = 0; i < thread_count; i++) { 848 | mach_port_deallocate(mach_task_self(), thread_list[i]); 849 | } 850 | vm_deallocate(mach_task_self(), (vm_address_t)thread_list, 851 | sizeof(thread_t) * thread_count); 852 | 853 | if (kr != KERN_SUCCESS) { 854 | printf("Failed to set thread state (error 0x%x: %s)\n", kr, 855 | mach_error_string(kr)); 856 | return false; 857 | } 858 | 859 | return true; 860 | } 861 | 862 | bool printStackTrace() { 863 | arm_thread_state64_t state; 864 | if (!copyThreadState(state)) { 865 | return false; 866 | } 867 | 868 | uint64_t fp = state.__fp; 869 | uint64_t lr = state.__lr; 870 | uint64_t pc = state.__pc; 871 | 872 | printf("Stack trace:\n"); 873 | printf("#0 0x%llx\n", pc); 874 | 875 | int frame = 1; 876 | // Follow frame pointers until we hit NULL or an invalid address 877 | while (fp != 0 && 878 | frame < 32) { // Limit to 32 frames to prevent infinite loops 879 | uint64_t prev_fp; 880 | uint64_t prev_lr; 881 | 882 | // Read previous frame pointer and link register 883 | if (!readMemory(fp, &prev_fp, sizeof(prev_fp)) || 884 | !readMemory(fp + 8, &prev_lr, sizeof(prev_lr))) { 885 | printf("Failed to read stack frame at 0x%llx\n", fp); 886 | break; 887 | } 888 | 889 | // Print the frame 890 | const char *module = findModuleForAddress(prev_lr); 891 | printf("#%d 0x%llx %s\n", frame, prev_lr, module); 892 | free((void *)module); 893 | 894 | // Move to previous frame 895 | fp = prev_fp; 896 | frame++; 897 | } 898 | 899 | return true; 900 | } 901 | 902 | bool setWatchpoint(uint64_t address, size_t size) { 903 | if (watchpoints.size() >= MAX_WATCHPOINTS) { 904 | printf("No available hardware watchpoints\n"); 905 | return false; 906 | } 907 | 908 | // Size must be 1, 2, 4, or 8 bytes and address must be aligned 909 | if (size != 1 && size != 2 && size != 4 && size != 8) { 910 | printf("Invalid watchpoint size (must be 1, 2, 4, or 8)\n"); 911 | return false; 912 | } 913 | 914 | if (address & (size - 1)) { 915 | printf("Address must be aligned to size\n"); 916 | return false; 917 | } 918 | 919 | thread_act_port_array_t thread_list; 920 | mach_msg_type_number_t thread_count; 921 | 922 | kern_return_t kr = task_threads(taskPort, &thread_list, &thread_count); 923 | if (kr != KERN_SUCCESS) { 924 | printf("Failed to get threads (error 0x%x: %s)\n", kr, 925 | mach_error_string(kr)); 926 | return false; 927 | } 928 | 929 | arm_debug_state64_t debug_state; 930 | mach_msg_type_number_t count = ARM_DEBUG_STATE64_COUNT; 931 | 932 | kr = thread_get_state(thread_list[0], ARM_DEBUG_STATE64, 933 | (thread_state_t)&debug_state, &count); 934 | 935 | if (kr != KERN_SUCCESS) { 936 | printf("Failed to get debug state (error 0x%x: %s)\n", kr, 937 | mach_error_string(kr)); 938 | return false; 939 | } 940 | 941 | // Find first free debug register 942 | int reg_idx = -1; 943 | for (int i = 0; i < MAX_WATCHPOINTS; i++) { 944 | if ((debug_state.__wcr[i] & 1) == 0) { // Check if watchpoint is disabled 945 | reg_idx = i; 946 | break; 947 | } 948 | } 949 | 950 | if (reg_idx == -1) { 951 | printf("No available hardware watchpoints\n"); 952 | return false; 953 | } 954 | 955 | // Configure watchpoint 956 | debug_state.__wvr[reg_idx] = address; 957 | 958 | // WCR bits: 959 | // Bit 0: Enable 960 | // Bits 1-3: Size encoding (001=1 byte, 010=2 bytes, 011=4 bytes, 100=8 961 | // bytes) Bits 5-8: Access type (10 = write) Bits 20-28: Length mask 962 | uint64_t wcr = 1; // Enable bit 963 | switch (size) { 964 | case 1: 965 | wcr |= (1ULL << 1); 966 | break; 967 | case 2: 968 | wcr |= (2ULL << 1); 969 | break; 970 | case 4: 971 | wcr |= (3ULL << 1); 972 | break; 973 | case 8: 974 | wcr |= (4ULL << 1); 975 | break; 976 | } 977 | wcr |= (2ULL << 5); // Write access 978 | 979 | debug_state.__wcr[reg_idx] = wcr; 980 | 981 | // Enable monitor mode if not already enabled 982 | if ((debug_state.__mdscr_el1 & 0x8000) == 0) { 983 | debug_state.__mdscr_el1 |= 0x8000; 984 | } 985 | 986 | kr = 987 | thread_set_state(thread_list[0], ARM_DEBUG_STATE64, 988 | (thread_state_t)&debug_state, ARM_DEBUG_STATE64_COUNT); 989 | 990 | if (kr != KERN_SUCCESS) { 991 | printf("Failed to set debug state (error 0x%x: %s)\n", kr, 992 | mach_error_string(kr)); 993 | return false; 994 | } 995 | 996 | watchpoints[address] = reg_idx; 997 | printf("Watchpoint set at address 0x%llx (size=%zu)\n", address, size); 998 | 999 | // Cleanup 1000 | for (unsigned int i = 0; i < thread_count; i++) { 1001 | mach_port_deallocate(mach_task_self(), thread_list[i]); 1002 | } 1003 | vm_deallocate(mach_task_self(), (vm_address_t)thread_list, 1004 | sizeof(thread_t) * thread_count); 1005 | 1006 | return true; 1007 | } 1008 | 1009 | bool removeWatchpoint(uint64_t address) { 1010 | auto it = watchpoints.find(address); 1011 | if (it == watchpoints.end()) { 1012 | printf("No watchpoint found at address 0x%llx\n", address); 1013 | return false; 1014 | } 1015 | 1016 | thread_act_port_array_t thread_list; 1017 | mach_msg_type_number_t thread_count; 1018 | 1019 | kern_return_t kr = task_threads(taskPort, &thread_list, &thread_count); 1020 | if (kr != KERN_SUCCESS) { 1021 | printf("Failed to get threads (error 0x%x: %s)\n", kr, 1022 | mach_error_string(kr)); 1023 | return false; 1024 | } 1025 | 1026 | arm_debug_state64_t debug_state; 1027 | mach_msg_type_number_t count = ARM_DEBUG_STATE64_COUNT; 1028 | 1029 | kr = thread_get_state(thread_list[0], ARM_DEBUG_STATE64, 1030 | (thread_state_t)&debug_state, &count); 1031 | 1032 | if (kr != KERN_SUCCESS) { 1033 | printf("Failed to get debug state (error 0x%x: %s)\n", kr, 1034 | mach_error_string(kr)); 1035 | return false; 1036 | } 1037 | 1038 | // Disable the watchpoint 1039 | debug_state.__wcr[it->second] = 0; 1040 | 1041 | kr = 1042 | thread_set_state(thread_list[0], ARM_DEBUG_STATE64, 1043 | (thread_state_t)&debug_state, ARM_DEBUG_STATE64_COUNT); 1044 | 1045 | if (kr != KERN_SUCCESS) { 1046 | printf("Failed to set debug state (error 0x%x: %s)\n", kr, 1047 | mach_error_string(kr)); 1048 | return false; 1049 | } 1050 | 1051 | watchpoints.erase(it); 1052 | printf("Watchpoint removed from address 0x%llx\n", address); 1053 | 1054 | // Cleanup 1055 | for (unsigned int i = 0; i < thread_count; i++) { 1056 | mach_port_deallocate(mach_task_self(), thread_list[i]); 1057 | } 1058 | vm_deallocate(mach_task_self(), (vm_address_t)thread_list, 1059 | sizeof(thread_t) * thread_count); 1060 | 1061 | return true; 1062 | } 1063 | 1064 | ~MuhDebugger() { 1065 | if (taskPort != MACH_PORT_NULL) { 1066 | mach_port_deallocate(mach_task_self(), taskPort); 1067 | } 1068 | } 1069 | }; 1070 | 1071 | // Define the static constant outside the class 1072 | const unsigned int MuhDebugger::AARCH64_BREAKPOINT = 0xD4200000; 1073 | 1074 | struct Exports { 1075 | uint64_t version; // 0x1560000000000 1076 | uint64_t x87_exports; 1077 | uint64_t x87_export_count; 1078 | uint64_t runtime_exports; 1079 | uint64_t runtime_export_count; 1080 | }; 1081 | 1082 | struct Export { 1083 | uint64_t address; 1084 | uint64_t name; 1085 | }; 1086 | 1087 | int run_helper_mode() { 1088 | const char *socket_path = "/var/run/rosetta_helper.sock"; 1089 | 1090 | // Create Unix domain socket 1091 | int server_fd = socket(AF_UNIX, SOCK_STREAM, 0); 1092 | if (server_fd < 0) { 1093 | perror("socket"); 1094 | return 1; 1095 | } 1096 | 1097 | // Remove existing socket if present 1098 | unlink(socket_path); 1099 | 1100 | struct sockaddr_un addr; 1101 | memset(&addr, 0, sizeof(addr)); 1102 | addr.sun_family = AF_UNIX; 1103 | strncpy(addr.sun_path, socket_path, sizeof(addr.sun_path) - 1); 1104 | 1105 | if (bind(server_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { 1106 | perror("bind"); 1107 | close(server_fd); 1108 | return 1; 1109 | } 1110 | 1111 | // Set socket permissions to allow non-root users to connect 1112 | chmod(socket_path, 0666); 1113 | 1114 | if (listen(server_fd, 5) < 0) { 1115 | perror("listen"); 1116 | close(server_fd); 1117 | return 1; 1118 | } 1119 | 1120 | printf("Helper listening on %s\n", socket_path); 1121 | 1122 | while (true) { 1123 | int client_fd = accept(server_fd, NULL, NULL); 1124 | if (client_fd < 0) { 1125 | perror("accept"); 1126 | continue; 1127 | } 1128 | 1129 | // Read PID from client 1130 | pid_t target_pid; 1131 | ssize_t bytes_read = recv(client_fd, &target_pid, sizeof(target_pid), 0); 1132 | if (bytes_read != sizeof(target_pid)) { 1133 | perror("recv"); 1134 | close(client_fd); 1135 | continue; 1136 | } 1137 | 1138 | printf("Received request to debug PID: %d\n", target_pid); 1139 | 1140 | // Fork and execute new instance 1141 | pid_t child = fork(); 1142 | if (child == 0) { 1143 | // Child process 1144 | char pid_str[32]; 1145 | snprintf(pid_str, sizeof(pid_str), "%d", target_pid); 1146 | 1147 | // Get path to current executable 1148 | char path[4096]; 1149 | uint32_t path_size = sizeof(path); 1150 | if (_NSGetExecutablePath(path, &path_size) == 0) { 1151 | char *args[] = {path, pid_str, NULL}; 1152 | execv(path, args); 1153 | perror("execv"); 1154 | } 1155 | exit(1); 1156 | } 1157 | 1158 | // Send success response 1159 | uint8_t success = 1; 1160 | send(client_fd, &success, sizeof(success), 0); 1161 | close(client_fd); 1162 | } 1163 | 1164 | close(server_fd); 1165 | return 0; 1166 | } 1167 | std::string get_process_name(int pid) { 1168 | char name[PROC_PIDPATHINFO_MAXSIZE]; 1169 | if (proc_name(pid, name, sizeof(name)) <= 0) { 1170 | return ""; // Empty string if failed 1171 | } 1172 | return std::string(name); 1173 | } 1174 | #include 1175 | 1176 | std::string get_process_cmdline(int pid) { 1177 | int mib[4] = {CTL_KERN, KERN_PROCARGS2, pid, 0}; 1178 | size_t size = 0; 1179 | 1180 | // Get buffer size 1181 | if (sysctl(mib, 3, NULL, &size, NULL, 0) < 0) { 1182 | return ""; 1183 | } 1184 | 1185 | // Allocate buffer 1186 | char *buffer = (char *)malloc(size); 1187 | if (!buffer) { 1188 | return ""; 1189 | } 1190 | 1191 | // Get process args 1192 | if (sysctl(mib, 3, buffer, &size, NULL, 0) < 0) { 1193 | free(buffer); 1194 | return ""; 1195 | } 1196 | 1197 | // First int in buffer is argc 1198 | int argc = *(int *)buffer; 1199 | printf("process argc count: %d\n", argc); 1200 | 1201 | // Skip argc and executable path 1202 | char *p = buffer + sizeof(int); 1203 | while (*p != '\0') 1204 | p++; 1205 | p++; 1206 | 1207 | while (*p == '\0') 1208 | p++; 1209 | 1210 | // Build command line string 1211 | std::string cmdline; 1212 | for (int i = 0; i < argc && *p != '\0'; i++) { 1213 | if (i > 0) 1214 | cmdline += " "; 1215 | cmdline += p; 1216 | while (*p != '\0') 1217 | p++; 1218 | p++; 1219 | } 1220 | 1221 | free(buffer); 1222 | return cmdline; 1223 | } 1224 | 1225 | int main(int argc, char *argv[]) { 1226 | 1227 | // Set up offsets dynamically 1228 | OffsetFinder offset_finder; 1229 | // Set default offsets temporarily (or just in case we need to fall back) 1230 | offset_finder.set_default_offsets(); 1231 | // Search the rosetta runtime binary for offsets. 1232 | offset_finder.determine_offsets(); 1233 | 1234 | if (geteuid() != 0) { 1235 | if (argc < 2) { 1236 | printf("Usage: %s \n", argv[0]); 1237 | return 1; 1238 | } 1239 | // connect to helper tool socket and send PID 1240 | int client_fd = socket(AF_UNIX, SOCK_STREAM, 0); 1241 | if (client_fd < 0) { 1242 | perror("socket"); 1243 | return 1; 1244 | } 1245 | 1246 | struct sockaddr_un addr; 1247 | 1248 | memset(&addr, 0, sizeof(addr)); 1249 | 1250 | addr.sun_family = AF_UNIX; 1251 | strncpy(addr.sun_path, "/var/run/rosetta_helper.sock", 1252 | sizeof(addr.sun_path) - 1); 1253 | 1254 | if (connect(client_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { 1255 | perror("connect"); 1256 | close(client_fd); 1257 | return 1; 1258 | } 1259 | 1260 | pid_t pid = getpid(); 1261 | 1262 | ssize_t bytes_sent = send(client_fd, &pid, sizeof(pid), 0); 1263 | if (bytes_sent != sizeof(pid)) { 1264 | perror("send"); 1265 | close(client_fd); 1266 | return 1; 1267 | } 1268 | 1269 | uint8_t success; 1270 | 1271 | ssize_t bytes_read = recv(client_fd, &success, sizeof(success), 0); 1272 | 1273 | if (bytes_read != sizeof(success)) { 1274 | perror("recv"); 1275 | close(client_fd); 1276 | return 1; 1277 | } 1278 | 1279 | close(client_fd); 1280 | // Non-root code path 1281 | // raise(SIGSTOP); 1282 | usleep(50000000); 1283 | printf("launching into program\n"); 1284 | execv(argv[1], &argv[1]); 1285 | return 1; 1286 | } 1287 | 1288 | // Root code path 1289 | if (argc < 2) { 1290 | // Run as helper tool 1291 | return run_helper_mode(); 1292 | } 1293 | 1294 | // Debug specific process 1295 | int pid = atoi(argv[1]); 1296 | printf("Running as root, attaching to process %d\n", pid); 1297 | 1298 | MuhDebugger dbg; 1299 | 1300 | auto cmdline = get_process_cmdline(pid); 1301 | printf("cmdline: %s\n", cmdline.c_str()); 1302 | if (cmdline.find("wine") != std::string::npos) { 1303 | printf("Wine process detected\n"); 1304 | g_is_wine = true; 1305 | } 1306 | 1307 | if (!dbg.attach(pid)) { 1308 | printf("Failed to attach to process\n"); 1309 | return 1; 1310 | } 1311 | printf("Attached successfully\n"); 1312 | 1313 | auto module_list = dbg.getModuleList(); 1314 | 1315 | for (const auto &module : module_list) { 1316 | printf("address %llx, name %s\n", module.address, module.path.c_str()); 1317 | } 1318 | 1319 | const auto runtime_base = dbg.find_runtime(); 1320 | 1321 | printf("Rosetta runtime base: 0x%llx\n", runtime_base); 1322 | 1323 | if (runtime_base == 0) { 1324 | printf("Failed to find Rosetta runtime\n"); 1325 | return 1; 1326 | } 1327 | 1328 | dbg.setBreakpoint(runtime_base + offset_finder.offset_loop_copy_func); 1329 | dbg.continueExecution(); 1330 | 1331 | #if 0 1332 | // Uncomment to enable debug flags in Rosetta runtime, not recommended 1333 | // enable PRINT_IR 1334 | // dbg.writeMemory(runtime_base + 0x3B280, "\x01", 1); 1335 | 1336 | // enable PRINT_SEGMENTS 1337 | // dbg.writeMemory(runtime_base + 0x3B281, "\x01", 1); 1338 | 1339 | // enable DISABLE_EXCEPTIONS 1340 | // dbg.writeMemory(runtime_base + 0x3B27D, "\x01", 1); 1341 | 1342 | // enable DISABLE_SIGACTION 1343 | // dbg.writeMemory(runtime_base + 0x3B27E, "\x01", 1); 1344 | 1345 | // enable ALLOW_GUARD_PAGES 1346 | // dbg.writeMemory(runtime_base + 0x3B278, "\x01", 1); 1347 | 1348 | // // enable SCRIBBLE_TRANSLATIONS 1349 | // dbg.writeMemory(runtime_base + 0x3B282, "\x01", 1); 1350 | 1351 | // // enable AOT_ERRORS_ARE_FATAL 1352 | // dbg.writeMemory(runtime_base + 0x3B279, "\x01", 1); 1353 | #endif 1354 | 1355 | auto rosetta_runtime_exports_address = 1356 | dbg.readRegister(MuhDebugger::Register::X19); 1357 | printf("Rosetta runtime exports: 0x%llx\n", rosetta_runtime_exports_address); 1358 | 1359 | Exports exports; 1360 | dbg.readMemory(rosetta_runtime_exports_address, &exports, sizeof(exports)); 1361 | 1362 | printf("Rosetta version: %llx\n", exports.version); 1363 | 1364 | char path[PATH_MAX]; 1365 | uint32_t path_size = sizeof(path); 1366 | if (_NSGetExecutablePath(path, &path_size) != 0) { 1367 | printf("Failed to get executable path\n"); 1368 | return 1; 1369 | } 1370 | 1371 | // get the directory of the current executable 1372 | std::filesystem::path executable_path(path); 1373 | std::filesystem::path executable_dir = executable_path.parent_path(); 1374 | 1375 | MachoLoader macho_loader; 1376 | 1377 | if (!macho_loader.open(executable_dir / "libRuntimeRosettax87")) { 1378 | printf("Failed to open Mach-O file\n"); 1379 | return 1; 1380 | } 1381 | 1382 | // we need to call mmap to allocate the memory for our macho 1383 | 1384 | uint64_t macho_base = 0; // dbg.allocateMemory(macho_loader.image_size()); 1385 | 1386 | // first we store the original state of the thread 1387 | arm_thread_state64_t backup_thread_state; 1388 | dbg.copyThreadState(backup_thread_state); 1389 | 1390 | // setup a breakpoint after mmap syscall 1391 | dbg.setBreakpoint(runtime_base + offset_finder.offset_svc_call_ret); 1392 | 1393 | // now we prepare the registers for the mmap call 1394 | arm_thread_state64_t mmap_thread_state; 1395 | memcpy(&mmap_thread_state, &backup_thread_state, 1396 | sizeof(arm_thread_state64_t)); 1397 | 1398 | #define MAP_TRANSLATED_ALLOW_EXECUTE \ 1399 | 0x20000 /* allow execute in translated processes */ 1400 | 1401 | mmap_thread_state.__x[0] = 0x210000000LL; // addr 1402 | mmap_thread_state.__x[1] = macho_loader.image_size(); // size 1403 | mmap_thread_state.__x[2] = VM_PROT_READ | VM_PROT_WRITE; // prot 1404 | mmap_thread_state.__x[3] = 1405 | MAP_ANON | MAP_FIXED | MAP_TRANSLATED_ALLOW_EXECUTE; // flags 1406 | mmap_thread_state.__x[4] = -1; // fd 1407 | mmap_thread_state.__x[5] = 0; // offset 1408 | 1409 | mmap_thread_state.__pc = runtime_base + offset_finder.offset_svc_call_entry; 1410 | 1411 | dbg.restoreThreadState(mmap_thread_state); 1412 | dbg.continueExecution(); 1413 | 1414 | macho_base = dbg.readRegister(MuhDebugger::Register::X0); 1415 | 1416 | printf("Allocated memory at 0x%llx\n", macho_base); 1417 | 1418 | dbg.removeBreakpoint(runtime_base + offset_finder.offset_svc_call_ret); 1419 | dbg.restoreThreadState(backup_thread_state); 1420 | 1421 | macho_loader.for_each_segment([&](segment_command_64 *segm) { 1422 | auto dest = macho_base + segm->vmaddr; 1423 | auto size = segm->vmsize; 1424 | auto src = macho_loader.buffer_.data() + segm->fileoff; 1425 | 1426 | printf("Copying segment %s from 0x%llx to 0x%llx (%zx bytes)\n", 1427 | segm->segname, (unsigned long long)segm->fileoff, 1428 | (unsigned long long)dest, (unsigned long)size); 1429 | 1430 | dbg.writeMemory(dest, src, size); 1431 | 1432 | dbg.adjustMemoryProtection(dest, segm->initprot, segm->vmsize); 1433 | }); 1434 | 1435 | // fix up Exports segment of mapped macho 1436 | uint64_t macho_exports_address = 1437 | macho_base + macho_loader.get_section("__DATA", "exports")->addr; 1438 | Exports macho_exports; 1439 | 1440 | dbg.readMemory(macho_exports_address, &macho_exports, sizeof(macho_exports)); 1441 | macho_exports.x87_exports += macho_base; 1442 | macho_exports.runtime_exports += macho_base; 1443 | 1444 | std::vector x87_exports(macho_exports.x87_export_count); 1445 | std::vector runtime_exports(macho_exports.runtime_export_count); 1446 | 1447 | dbg.readMemory(macho_exports.x87_exports, x87_exports.data(), 1448 | x87_exports.size() * sizeof(Export)); 1449 | dbg.readMemory(macho_exports.runtime_exports, runtime_exports.data(), 1450 | runtime_exports.size() * sizeof(Export)); 1451 | 1452 | for (auto &exp : x87_exports) { 1453 | exp.address += macho_base; 1454 | exp.name += macho_base; 1455 | } 1456 | 1457 | for (auto &exp : runtime_exports) { 1458 | exp.address += macho_base; 1459 | exp.name += macho_base; 1460 | } 1461 | 1462 | dbg.writeMemory(macho_exports.x87_exports, x87_exports.data(), 1463 | x87_exports.size() * sizeof(Export)); 1464 | dbg.writeMemory(macho_exports.runtime_exports, runtime_exports.data(), 1465 | runtime_exports.size() * sizeof(Export)); 1466 | 1467 | printf("macho_exports_address: 0x%llx\n", macho_exports_address); 1468 | printf("macho_exports.x87_exports: 0x%llx\n", macho_exports.x87_exports); 1469 | printf("macho_exports.runtime_exports: 0x%llx\n", 1470 | macho_exports.runtime_exports); 1471 | 1472 | dbg.writeMemory(macho_exports_address, &macho_exports, sizeof(macho_exports)); 1473 | 1474 | // look up imports section of mapped macho 1475 | auto macho_imports_address = 1476 | macho_base + macho_loader.get_section("__DATA", "imports")->addr; 1477 | printf("macho_imports_address: 0x%llx\n", macho_imports_address); 1478 | 1479 | // read the exports from X19 register and copy them to the imports section of 1480 | // the mapped macho 1481 | auto lib_rosetta_runtime_exports_address = 1482 | dbg.readRegister(MuhDebugger::Register::X19); 1483 | printf("lib_rosetta_runtime_exports_address: 0x%llx\n", 1484 | lib_rosetta_runtime_exports_address); 1485 | 1486 | Exports lib_rosetta_runtime_exports; 1487 | dbg.readMemory(lib_rosetta_runtime_exports_address, 1488 | &lib_rosetta_runtime_exports, 1489 | sizeof(lib_rosetta_runtime_exports)); 1490 | 1491 | printf("lib_rosetta_runtime_exports.version = 0x%llx\n", 1492 | lib_rosetta_runtime_exports.version); 1493 | printf("lib_rosetta_runtime_exports.x87_exports = 0x%llx\n", 1494 | lib_rosetta_runtime_exports.x87_exports); 1495 | printf("lib_rosetta_runtime_exports.x87_export_count = 0x%llx\n", 1496 | lib_rosetta_runtime_exports.x87_export_count); 1497 | printf("lib_rosetta_runtime_exports.runtime_exports = 0x%llx\n", 1498 | lib_rosetta_runtime_exports.runtime_exports); 1499 | printf("lib_rosetta_runtime_exports.runtime_export_count = 0x%llx\n", 1500 | lib_rosetta_runtime_exports.runtime_export_count); 1501 | 1502 | dbg.writeMemory(macho_imports_address, &lib_rosetta_runtime_exports, 1503 | sizeof(lib_rosetta_runtime_exports)); 1504 | 1505 | dbg.removeBreakpoint(runtime_base + offset_finder.offset_loop_copy_func); 1506 | 1507 | // replace the exports in X19 register with the address of the mapped macho 1508 | dbg.setRegister(MuhDebugger::Register::X19, macho_exports_address); 1509 | 1510 | if (g_is_wine) 1511 | dbg.detach(); 1512 | else { 1513 | dbg.continueExecution(); 1514 | } 1515 | 1516 | #if 0 1517 | // Basic debugging loop example 1518 | printf("Debugger attached. Commands:\n"); 1519 | printf("b - Set breakpoint at address\n"); 1520 | printf("r - Remove breakpoint at address\n"); 1521 | printf("c - Continue execution\n"); 1522 | printf("m - List mapped modules\n"); 1523 | printf("p - Print CPU registers\n"); 1524 | printf("x - Examine memory\n"); 1525 | printf("q - Quit\n"); 1526 | 1527 | char cmd; 1528 | uint64_t addr; 1529 | size_t size; 1530 | int status; 1531 | while (true) { 1532 | printf("dbg> "); 1533 | scanf(" %c", &cmd); 1534 | 1535 | switch (cmd) { 1536 | case 'b': 1537 | scanf("%llx", &addr); 1538 | dbg.setBreakpoint(addr); 1539 | break; 1540 | case 'r': 1541 | scanf("%llx", &addr); 1542 | dbg.removeBreakpoint(addr); 1543 | break; 1544 | case 'c': 1545 | dbg.continueExecution(); 1546 | break; 1547 | case 'm': 1548 | dbg.listModules(); 1549 | break; 1550 | case 'p': 1551 | dbg.printRegisters(); 1552 | break; 1553 | case 't': 1554 | dbg.printStackTrace(); 1555 | break; 1556 | case 's': 1557 | dbg.singleStep(); 1558 | break; 1559 | case 'w': { 1560 | scanf("%llx %zx", &addr, &size); 1561 | dbg.setWatchpoint(addr, size); 1562 | } break; 1563 | case 'x': 1564 | scanf("%llx %zx", &addr, &size); 1565 | dbg.printMemory(addr, size); 1566 | break; 1567 | case 'q': 1568 | return 0; 1569 | } 1570 | } 1571 | #endif 1572 | return 0; 1573 | } 1574 | -------------------------------------------------------------------------------- /rosettaRuntime/X87.cpp: -------------------------------------------------------------------------------- 1 | #include "X87.h" 2 | 3 | #include "Export.h" 4 | #include "Log.h" 5 | #include "SIMDGuard.h" 6 | #include "X87State.h" 7 | #include "openlibm_math.h" 8 | 9 | #define X87_F2XM1 10 | #define X87_FABS 11 | #define X87_FADD_ST 12 | #define X87_FADD_F32 13 | #define X87_FADD_F64 14 | #define X87_FBLD 15 | #define X87_FBSTP 16 | #define X87_FCHS 17 | #define X87_FCMOV 18 | #define X87_FCOM_ST 19 | #define X87_FCOM_F32 20 | #define X87_FCOM_F64 21 | #define X87_FCOMI 22 | #define X87_FCOS 23 | #define X87_FDECSTP 24 | #define X87_FDIV_ST 25 | #define X87_FDIV_F32 26 | #define X87_FDIV_F64 27 | #define X87_FDIVR_ST 28 | #define X87_FDIVR_F32 29 | #define X87_FDIVR_F64 30 | #define X87_FFREE 31 | #define X87_FIADD 32 | #define X87_FICOM 33 | #define X87_FIDIV 34 | #define X87_FIDIVR 35 | #define X87_FILD 36 | #define X87_FIMUL 37 | #define X87_FINCSTP 38 | #define X87_FIST_I16 39 | #define X87_FIST_I32 40 | #define X87_FIST_I64 41 | #define X87_FISTT_I16 42 | #define X87_FISTT_I32 43 | #define X87_FISTT_I64 44 | #define X87_FISUB 45 | #define X87_FISUBR 46 | #define X87_FLD_STI 47 | #define X87_FLD_CONSTANT 48 | #define X87_FLD_FP32 49 | #define X87_FLD_FP64 50 | #define X87_FLD_FP80 51 | #define X87_FMUL_ST 52 | #define X87_FMUL_F32 53 | #define X87_FMUL_F64 54 | #define X87_FPATAN 55 | #define X87_FPREM 56 | #define X87_FPREM1 57 | #define X87_FPTAN 58 | #define X87_FRNDINT 59 | #define X87_FSCALE 60 | #define X87_FSIN 61 | #define X87_FSINCOS 62 | #define X87_FSQRT 63 | #define X87_FST_STI 64 | #define X87_FST_FP32 65 | #define X87_FST_FP64 66 | #define X87_FST_FP80 67 | #define X87_FSUB_ST 68 | #define X87_FSUB_F32 69 | #define X87_FSUB_F64 70 | #define X87_FSUBR_ST 71 | #define X87_FSUBR_F32 72 | #define X87_FSUBR_F64 73 | #define X87_FUCOM 74 | #define X87_FUCOMI 75 | #define X87_FXAM 76 | #define X87_FXCH 77 | #define X87_FXTRACT 78 | #define X87_FYL2X 79 | #define X87_FYL2XP1 80 | 81 | #define X87_TRAMPOLINE(NAME, REGISTER) \ 82 | void __attribute__((naked, used)) NAME() { \ 83 | asm volatile("adrp " #REGISTER ", _orig_" #NAME "@PAGE\n" \ 84 | "ldr " #REGISTER ", [" #REGISTER ", _orig_" #NAME \ 85 | "@PAGEOFF]\n" \ 86 | "br " #REGISTER); \ 87 | } 88 | 89 | #define X87_TRAMPOLINE_ARGS(RETURN, NAME, ARGS, REGISTER) \ 90 | RETURN __attribute__((naked, used)) NAME ARGS { \ 91 | asm volatile("adrp " #REGISTER ", _orig_" #NAME "@PAGE\n" \ 92 | "ldr " #REGISTER ", [" #REGISTER ", _orig_" #NAME \ 93 | "@PAGEOFF]\n" \ 94 | "br " #REGISTER); \ 95 | } 96 | 97 | void *init_library(SymbolList const *a1, unsigned long long a2, 98 | ThreadContextOffsets const *a3) { 99 | SIMDGuardFull simd_guard; 100 | exports_init(); 101 | 102 | simple_printf("RosettaRuntimex87 built %s" 103 | "\n", 104 | __DATE__ " " __TIME__); 105 | 106 | return orig_init_library(a1, a2, a3); 107 | } 108 | 109 | X87_TRAMPOLINE(register_runtime_routine_offsets, x9) 110 | X87_TRAMPOLINE(translator_use_t8027_codegen, x9) 111 | X87_TRAMPOLINE(translator_reset, x9) 112 | X87_TRAMPOLINE(ir_create_bad_access, x9) 113 | X87_TRAMPOLINE(ir_create, x9) 114 | X87_TRAMPOLINE(module_free, x9) 115 | X87_TRAMPOLINE(module_get_size, x9) 116 | X87_TRAMPOLINE(module_is_bad_access, x9) 117 | X87_TRAMPOLINE(module_print, x9) 118 | X87_TRAMPOLINE(translator_translate, x9) 119 | X87_TRAMPOLINE(translator_free, x9) 120 | X87_TRAMPOLINE(translator_get_data, x9) 121 | X87_TRAMPOLINE(translator_get_size, x9) 122 | X87_TRAMPOLINE(translator_get_branch_slots_offset, x9) 123 | X87_TRAMPOLINE(translator_get_branch_slots_count, x9) 124 | X87_TRAMPOLINE(translator_get_branch_entries, x9) 125 | X87_TRAMPOLINE(translator_get_instruction_offsets, x9) 126 | X87_TRAMPOLINE(translator_apply_fixups, x9) 127 | 128 | #if !defined(X87_CONVERT_TO_FP80) 129 | void x87_init(X87State *a1) { 130 | SIMDGuardFull simd_guard; 131 | LOG(1, "x87_init\n", 9); 132 | *a1 = X87State(); 133 | } 134 | #else 135 | X87_TRAMPOLINE_ARGS(void, x87_init, (X87State * a1), x9); 136 | #endif 137 | 138 | X87_TRAMPOLINE(x87_state_from_x86_float_state, x9); 139 | X87_TRAMPOLINE(x87_state_to_x86_float_state, x9); 140 | X87_TRAMPOLINE(x87_pop_register_stack, x9); 141 | 142 | #if defined(X87_F2XM1) 143 | void x87_f2xm1(X87State *state) { 144 | SIMDGuard simd_guard; 145 | 146 | LOG(1, "x87_f2xm1\n", 10); 147 | // Get value from ST(0) 148 | auto x = state->get_st_fast(0); 149 | 150 | // // Check range [-1.0, +1.0] 151 | if (x < -1.0f || x > 1.0f) { 152 | // Set to NaN for undefined result 153 | state->set_st_fast(0, 0); 154 | return; 155 | } 156 | 157 | // Calculate 2^x - 1 using mmath::exp2 158 | auto result = exp2(x) - 1.0f; 159 | 160 | // Store result back in ST(0) 161 | state->set_st_fast(0, result); 162 | } 163 | #else 164 | X87_TRAMPOLINE_ARGS(void, x87_f2xm1, (X87State * state), x9); 165 | #endif 166 | 167 | // Clears the sign bit of ST(0) to create the absolute value of the operand. The 168 | // following table shows the results obtained when creating the absolute value 169 | // of various classes of numbers. C1 Set to 0. 170 | #if defined(X87_FABS) 171 | void x87_fabs(X87State *a1) { 172 | SIMDGuard simd_guard; 173 | 174 | LOG(1, "x87_fabs\n", 10); 175 | 176 | // Clear condition code 1 and exception flags 177 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 178 | 179 | // Get value in ST(0) 180 | auto value = a1->get_st_fast(0); 181 | 182 | // Set value in ST(0) to its absolute value 183 | a1->set_st_fast(0, std::abs(value)); 184 | } 185 | #else 186 | X87_TRAMPOLINE_ARGS(void, x87_fabs, (X87State * a1), x9); 187 | #endif 188 | 189 | #if defined(X87_FADD_ST) 190 | void x87_fadd_ST(X87State *a1, unsigned int st_offset_1, 191 | unsigned int st_offset_2, bool pop_stack) { 192 | SIMDGuard simd_guard; 193 | 194 | LOG(1, "x87_fadd_ST\n", 13); 195 | // Clear condition code 1 and exception flags 196 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 197 | 198 | // Get register indices and values 199 | const auto val1 = a1->get_st_fast(st_offset_1); 200 | const auto val2 = a1->get_st_fast(st_offset_2); 201 | 202 | // Perform addition and store result in ST(idx1) 203 | a1->set_st_fast(st_offset_1, val1 + val2); 204 | 205 | if (pop_stack) { 206 | a1->pop(); 207 | } 208 | } 209 | #else 210 | X87_TRAMPOLINE_ARGS(void, x87_fadd_ST, 211 | (X87State * a1, unsigned int st_offset_1, 212 | unsigned int st_offset_2, bool pop_stack), 213 | x9); 214 | #endif 215 | 216 | #if defined(X87_FADD_F32) 217 | void x87_fadd_f32(X87State *a1, unsigned int fp32) { 218 | SIMDGuard simd_guard; 219 | 220 | LOG(1, "x87_fadd_f32\n", 14); 221 | 222 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 223 | 224 | auto value = std::bit_cast(fp32); 225 | auto st0 = a1->get_st_fast(0); 226 | 227 | a1->set_st_fast(0, st0 + value); 228 | } 229 | #else 230 | X87_TRAMPOLINE_ARGS(void, x87_fadd_f32, (X87State * a1, unsigned int fp32), x9); 231 | #endif 232 | 233 | #if defined(X87_FADD_F64) 234 | void x87_fadd_f64(X87State *a1, unsigned long long a2) { 235 | SIMDGuard simd_guard; 236 | 237 | LOG(1, "x87_fadd_f64\n", 14); 238 | 239 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 240 | 241 | auto value = std::bit_cast(a2); 242 | auto st0 = a1->get_st_fast(0); 243 | 244 | a1->set_st_fast(0, st0 + value); 245 | } 246 | #else 247 | X87_TRAMPOLINE_ARGS(void, x87_fadd_f64, (X87State * a1, unsigned long long a2), 248 | x9); 249 | #endif 250 | 251 | double BCD2Double(uint8_t bcd[10]) { 252 | uint64_t tmp = 0; 253 | uint64_t mult = 1; 254 | uint8_t piece; 255 | 256 | for (int i = 0; i < 9; ++i) { 257 | piece = bcd[i]; 258 | tmp += mult * (piece & 0x0F); 259 | mult *= 10; 260 | tmp += mult * ((piece >> 4) & 0x0F); 261 | mult *= 10; 262 | } 263 | 264 | piece = bcd[9]; 265 | tmp += mult * (piece & 0x0F); 266 | 267 | double value = static_cast(tmp); 268 | 269 | if (piece & 0x80) { 270 | value = -value; 271 | } 272 | 273 | return value; 274 | } 275 | 276 | #if defined(X87_FBLD) 277 | void x87_fbld(X87State *a1, unsigned long long a2, unsigned long long a3) { 278 | SIMDGuard simd_guard; 279 | LOG(1, "x87_fbld\n", 10); 280 | 281 | // set C1 to 0 282 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 283 | 284 | uint8_t bcd[10]; 285 | memcpy(bcd, &a2, 8); // Copy 8 bytes from a2 286 | memcpy(bcd + 8, &a3, 2); // Copy 2 bytes from a3 287 | 288 | auto value = BCD2Double(bcd); 289 | 290 | // Add space on the stack and push the converted BCD 291 | a1->push(); 292 | a1->set_st(0, value); 293 | } 294 | #else 295 | X87_TRAMPOLINE_ARGS(void, x87_fbld, 296 | (X87State * a1, unsigned long long a2, 297 | unsigned long long a3), 298 | x9); 299 | #endif 300 | 301 | #if defined(X87_FBSTP) 302 | uint128_t x87_fbstp(X87State *a1) { 303 | LOG(1, "x87_fbstp\n", 11); 304 | 305 | auto st0 = a1->get_st(0); 306 | a1->pop(); 307 | 308 | // convert double to BCD 309 | uint8_t bcd[10] = {0}; // Initialize all bytes to 0 310 | 311 | // Handle sign 312 | bool is_negative = signbit(st0); 313 | 314 | // Handle special cases 315 | if (isnan(st0) || isinf(st0)) { 316 | // Set to indefinite BCD value 317 | memset(bcd, 0, 10); 318 | if (is_negative) { 319 | bcd[9] = 0x80; // Set sign bit if negative 320 | } 321 | } else { 322 | // Get absolute value 323 | double abs_value = fabs(st0); 324 | 325 | // Truncate to integer 326 | abs_value = trunc(abs_value); 327 | 328 | // Check if value is too large for BCD format (more than 18 decimal digits) 329 | if (abs_value > 999999999999999999.0) { 330 | // Handle overflow - set to maximum BCD value 331 | memset(bcd, 0x99, 9); // Set first 9 bytes to 0x99 (all digits = 9) 332 | bcd[9] = 0x09; // Set last digit to 9 333 | if (is_negative) { 334 | bcd[9] |= 0x80; // Set sign bit if negative 335 | } 336 | } else { 337 | // Convert to BCD representation 338 | uint64_t integer_part = static_cast(abs_value); 339 | 340 | // Process each byte (2 decimal digits per byte) 341 | for (int i = 0; i < 9; i++) { 342 | uint8_t digit1 = integer_part % 10; 343 | integer_part /= 10; 344 | uint8_t digit2 = integer_part % 10; 345 | integer_part /= 10; 346 | 347 | bcd[i] = digit1 | (digit2 << 4); 348 | } 349 | 350 | // Handle the 10th byte (contains 1 digit and sign) 351 | bcd[9] = integer_part % 10; 352 | if (is_negative) { 353 | bcd[9] |= 0x80; // Set sign bit if negative 354 | } 355 | } 356 | } 357 | 358 | return { 359 | .low = reinterpret_cast(bcd)[0], 360 | .high = reinterpret_cast(bcd)[1], 361 | }; 362 | } 363 | #else 364 | X87_TRAMPOLINE_ARGS(uint128_t, x87_fbstp, (X87State *a1), x9); 365 | #endif 366 | 367 | #if defined(X87_FCHS) 368 | void x87_fchs(X87State *a1) { 369 | SIMDGuard simd_guard; 370 | 371 | LOG(1, "x87_fchs\n", 10); 372 | // set C1 to 0 373 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 374 | 375 | // Negate value in ST(0) 376 | a1->set_st_fast(0, -a1->get_st_fast(0)); 377 | } 378 | #else 379 | X87_TRAMPOLINE_ARGS(void, x87_fchs, (X87State * a1), x9); 380 | #endif 381 | 382 | #if defined(X87_FCMOV) 383 | void x87_fcmov(X87State *state, unsigned int condition, 384 | unsigned int st_offset) { 385 | SIMDGuard simd_guard; 386 | 387 | LOG(1, "x87_fcmov\n", 11); 388 | 389 | // clear precision flag 390 | state->status_word &= ~X87StatusWordFlag::kConditionCode1; 391 | 392 | double value; 393 | 394 | auto st_tag_word = state->get_st_tag(st_offset); 395 | if (st_tag_word != X87TagState::kEmpty) { 396 | if (condition == 0) { 397 | return; 398 | } 399 | 400 | value = state->get_st(st_offset); 401 | } else { 402 | state->status_word |= 0x41; // Set invalid operation 403 | value = 0.0f; 404 | } 405 | 406 | state->set_st(0, value); // Perform the actual register move 407 | } 408 | #else 409 | X87_TRAMPOLINE_ARGS(void, x87_fcmov, 410 | (X87State * state, unsigned int condition, 411 | unsigned int st_offset), 412 | x9); 413 | #endif 414 | 415 | #if defined(X87_FCOM_ST) 416 | void x87_fcom_ST(X87State *a1, unsigned int st_offset, 417 | unsigned int number_of_pops) { 418 | SIMDGuard simd_guard; 419 | 420 | LOG(1, "x87_fcom_ST\n", 13); 421 | 422 | // Get values to compare 423 | auto st0 = a1->get_st(0); 424 | auto src = a1->get_st(st_offset); 425 | 426 | // Clear condition code bits C0, C2, C3 (bits 8, 9, 14) 427 | a1->status_word &= ~(kConditionCode0 | kConditionCode2 | kConditionCode3); 428 | 429 | // Set condition codes based on comparison 430 | if (st0 > src) { 431 | // Leave C0=C2=C3=0 432 | } else if (st0 < src) { 433 | a1->status_word |= kConditionCode0; // Set C0=1 434 | } else { // st0 == sti 435 | a1->status_word |= kConditionCode3; // Set C3=1 436 | } 437 | 438 | if ((a1->control_word & kInvalidOpMask) == kInvalidOpMask) { 439 | if (isnan(st0) || isnan(src)) { 440 | a1->status_word |= 441 | kConditionCode0 | kConditionCode2 | kConditionCode3; // Set C0=C2=C3=1 442 | } 443 | } 444 | 445 | // Handle pops if requested 446 | for (unsigned int i = 0; i < number_of_pops; i++) { 447 | a1->pop(); 448 | } 449 | } 450 | #else 451 | X87_TRAMPOLINE_ARGS(void, x87_fcom_ST, 452 | (X87State * a1, unsigned int st_offset, 453 | unsigned int number_of_pops), 454 | x9); 455 | #endif 456 | 457 | #if defined(X87_FCOM_F32) 458 | void x87_fcom_f32(X87State *a1, unsigned int fp32, bool pop) { 459 | SIMDGuard simd_guard; 460 | 461 | LOG(1, "x87_fcom_f32\n", 14); 462 | auto st0 = a1->get_st(0); 463 | auto src = std::bit_cast(fp32); 464 | 465 | a1->status_word &= 466 | ~(kConditionCode0 | kConditionCode1 | kConditionCode2 | kConditionCode3); 467 | 468 | if (st0 > src) { 469 | // Leave C0=C2=C3=0 470 | } else if (st0 < src) { 471 | a1->status_word |= kConditionCode0; // Set C0=1 472 | } else { // st0 == value 473 | a1->status_word |= kConditionCode3; // Set C3=1 474 | } 475 | 476 | if ((a1->control_word & kInvalidOpMask) == kInvalidOpMask) { 477 | if (isnan(st0) || isnan(src)) { 478 | a1->status_word |= 479 | kConditionCode0 | kConditionCode2 | kConditionCode3; // Set C0=C2=C3=1 480 | } 481 | } 482 | 483 | if (pop) { 484 | a1->pop(); 485 | } 486 | } 487 | #else 488 | X87_TRAMPOLINE_ARGS(void, x87_fcom_f32, 489 | (X87State * a1, unsigned int fp32, bool pop), x9); 490 | #endif 491 | 492 | #if defined(X87_FCOM_F64) 493 | void x87_fcom_f64(X87State *a1, unsigned long long fp64, bool pop) { 494 | SIMDGuard simd_guard; 495 | 496 | LOG(1, "x87_fcom_f64\n", 14); 497 | auto st0 = a1->get_st(0); 498 | auto src = std::bit_cast(fp64); 499 | 500 | a1->status_word &= ~(kConditionCode0 | kConditionCode2 | kConditionCode3); 501 | 502 | if (st0 > src) { 503 | // Leave C0=C2=C3=0 504 | } else if (st0 < src) { 505 | a1->status_word |= kConditionCode0; // Set C0=1 506 | } else { // st0 == value 507 | a1->status_word |= kConditionCode3; // Set C3=1 508 | } 509 | 510 | if ((a1->control_word & kInvalidOpMask) == kInvalidOpMask) { 511 | if (isnan(st0) || isnan(src)) { 512 | a1->status_word |= 513 | kConditionCode0 | kConditionCode2 | kConditionCode3; // Set C0=C2=C3=1 514 | } 515 | } 516 | 517 | if (pop) { 518 | a1->pop(); 519 | } 520 | } 521 | #else 522 | X87_TRAMPOLINE_ARGS(void, x87_fcom_f64, 523 | (X87State * a1, unsigned long long fp64, bool pop), x9); 524 | #endif 525 | 526 | #if defined(X87_FCOMI) 527 | uint32_t x87_fcomi(X87State *state, unsigned int st_offset, bool pop) { 528 | SIMDGuard simd_guard; 529 | 530 | LOG(1, "x87_fcomi\n", 11); 531 | state->status_word &= ~(kConditionCode0); 532 | 533 | auto st0_val = state->get_st(0); 534 | auto sti_val = state->get_st(st_offset); 535 | 536 | uint32_t flags = 0; 537 | /* 538 | Filters: fcomi 539 | Randomness seeded to: 3528984885 540 | x87_fcomi_less 541 | x87_fcomi result: 0x000000000000000 542 | x87_fcomi_greater 543 | x87_fcomi result: 0x000000020000000 544 | x87_fcomi_equal 545 | x87_fcomi result: 0x000000060000000 546 | */ 547 | 548 | if (st0_val < sti_val) { 549 | flags = 0x000000000000000; 550 | } else if (st0_val > sti_val) { 551 | flags = 0x000000020000000; 552 | } else { 553 | flags = 0x000000060000000; 554 | } 555 | 556 | if (pop) { 557 | state->pop(); 558 | } 559 | 560 | return flags; 561 | } 562 | #else 563 | X87_TRAMPOLINE_ARGS(uint32_t, x87_fcomi, 564 | (X87State * state, unsigned int st_offset, bool pop), x9); 565 | #endif 566 | 567 | #if defined(X87_FCOS) 568 | void x87_fcos(X87State *a1) { 569 | SIMDGuardFull simd_guard; 570 | 571 | LOG(1, "x87_fcos\n", 10); 572 | a1->status_word &= ~(kConditionCode1 | kConditionCode2); 573 | // Get ST(0) 574 | auto value = a1->get_st_fast(0); 575 | 576 | // Calculate cosine 577 | auto result = cos(value); 578 | 579 | // Store result back in ST(0) 580 | a1->set_st_fast(0, result); 581 | } 582 | #else 583 | X87_TRAMPOLINE_ARGS(void, x87_fcos, (X87State * a1), x9); 584 | #endif 585 | 586 | #if defined(X87_FDECSTP) 587 | void x87_fdecstp(X87State *a1) { 588 | LOG(1, "x87_fdecstp\n", 13); 589 | 590 | uint16_t current_top = 591 | (a1->status_word & X87StatusWordFlag::kTopOfStack) >> 11; 592 | 593 | // Decrement the top of stack pointer (wrapping from 0 to 7) 594 | uint16_t new_top = (current_top - 1) & 7; 595 | 596 | // Clear C1 597 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 598 | // Clear the top of stack bits and set the new value 599 | a1->status_word = 600 | (a1->status_word & ~X87StatusWordFlag::kTopOfStack) | (new_top << 11); 601 | } 602 | #else 603 | X87_TRAMPOLINE_ARGS(void, x87_fdecstp, (X87State * a1), x9); 604 | #endif 605 | 606 | #if defined(X87_FDIV_ST) 607 | void x87_fdiv_ST(X87State *a1, unsigned int st_offset_1, 608 | unsigned int st_offset_2, bool pop_stack) { 609 | SIMDGuard simd_guard; 610 | 611 | LOG(1, "x87_fdiv_ST\n", 13); 612 | // Clear condition code 1 and exception flags 613 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 614 | 615 | // Get register indices and values 616 | const auto val1 = a1->get_st_fast(st_offset_1); 617 | const auto val2 = a1->get_st_fast(st_offset_2); 618 | 619 | // Perform division and store result 620 | a1->set_st_fast(st_offset_1, val1 / val2); 621 | 622 | if (pop_stack) { 623 | a1->pop(); 624 | } 625 | } 626 | #else 627 | X87_TRAMPOLINE_ARGS(void, x87_fdiv_ST, 628 | (X87State * a1, unsigned int st_offset_1, 629 | unsigned int st_offset_2, bool pop_stack), 630 | x9); 631 | #endif 632 | 633 | #if defined(X87_FDIV_F32) 634 | void x87_fdiv_f32(X87State *a1, unsigned int a2) { 635 | SIMDGuard simd_guard; 636 | 637 | LOG(1, "x87_fdiv_f32\n", 14); 638 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 639 | 640 | auto value = std::bit_cast(a2); 641 | auto st0 = a1->get_st_fast(0); 642 | 643 | a1->set_st_fast(0, st0 / value); 644 | } 645 | #else 646 | X87_TRAMPOLINE_ARGS(void, x87_fdiv_f32, (X87State * a1, unsigned int a2), x9); 647 | #endif 648 | 649 | #if defined(X87_FDIV_F64) 650 | void x87_fdiv_f64(X87State *a1, unsigned long long a2) { 651 | SIMDGuard simd_guard; 652 | 653 | LOG(1, "x87_fdiv_f64\n", 14); 654 | 655 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 656 | 657 | auto value = std::bit_cast(a2); 658 | auto st0 = a1->get_st_fast(0); 659 | 660 | a1->set_st_fast(0, st0 / value); 661 | } 662 | #else 663 | X87_TRAMPOLINE_ARGS(void, x87_fdiv_f64, (X87State * a1, unsigned long long a2), 664 | x9); 665 | #endif 666 | 667 | #if defined(X87_FDIVR_ST) 668 | void x87_fdivr_ST(X87State *a1, unsigned int st_offset_1, 669 | unsigned int st_offset_2, bool pop_stack) { 670 | SIMDGuard simd_guard; 671 | 672 | LOG(1, "x87_fdivr_ST\n", 14); 673 | // Clear condition code 1 and exception flags 674 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 675 | 676 | // Get register indices and values 677 | const auto val1 = a1->get_st_fast(st_offset_1); 678 | const auto val2 = a1->get_st_fast(st_offset_2); 679 | 680 | // Perform reversed division and store result 681 | a1->set_st_fast(st_offset_1, val2 / val1); 682 | 683 | if (pop_stack) { 684 | a1->pop(); 685 | } 686 | } 687 | #else 688 | X87_TRAMPOLINE_ARGS(void, x87_fdivr_ST, 689 | (X87State * a1, unsigned int st_offset_1, 690 | unsigned int st_offset_2, bool pop_stack), 691 | x9); 692 | #endif 693 | 694 | #if defined(X87_FDIVR_F32) 695 | void x87_fdivr_f32(X87State *a1, unsigned int a2) { 696 | SIMDGuard simd_guard; 697 | 698 | LOG(1, "x87_fdivr_f32\n", 15); 699 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 700 | 701 | auto value = std::bit_cast(a2); 702 | auto st0 = a1->get_st_fast(0); 703 | 704 | a1->set_st_fast(0, value / st0); 705 | } 706 | #else 707 | X87_TRAMPOLINE_ARGS(void, x87_fdivr_f32, (X87State * a1, unsigned int a2), x9); 708 | #endif 709 | 710 | #if defined(X87_FDIVR_F64) 711 | void x87_fdivr_f64(X87State *a1, unsigned long long a2) { 712 | SIMDGuard simd_guard; 713 | 714 | LOG(1, "x87_fdivr_f64\n", 15); 715 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 716 | 717 | auto value = std::bit_cast(a2); 718 | auto st0 = a1->get_st_fast(0); 719 | 720 | a1->set_st_fast(0, value / st0); 721 | } 722 | #else 723 | X87_TRAMPOLINE_ARGS(void, x87_fdivr_f64, (X87State * a1, unsigned long long a2), 724 | x9); 725 | #endif 726 | 727 | void x87_ffree(X87State *a1, unsigned int a2) { 728 | LOG(1, "x87_ffree\n", 11); 729 | orig_x87_ffree(a1, a2); 730 | } 731 | 732 | #if defined(X87_FIADD) 733 | void x87_fiadd(X87State *a1, int m32int) { 734 | SIMDGuard simd_guard; 735 | 736 | LOG(1, "x87_fiadd\n", 11); 737 | // simple_printf("m32int: %d\n", m32int); 738 | 739 | // Clear condition code 1 and exception flags 740 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 741 | 742 | // Get value in ST(0) 743 | auto st0 = a1->get_st(0); 744 | 745 | // Add integer value 746 | st0 += m32int; 747 | 748 | // Store result back in ST(0) 749 | a1->set_st(0, st0); 750 | } 751 | #else 752 | X87_TRAMPOLINE_ARGS(void, x87_fiadd, (X87State * a1, int m32int), x9); 753 | #endif 754 | 755 | #if defined(X87_FICOM) 756 | void x87_ficom(X87State *a1, int src, bool pop) { 757 | SIMDGuard simd_guard; 758 | LOG(1, "x87_ficom\n", 11); 759 | auto st0 = a1->get_st(0); 760 | 761 | // Clear condition code bits C0, C2, C3 (bits 8, 9, 14) 762 | a1->status_word &= ~(kConditionCode0 | kConditionCode2 | kConditionCode3); 763 | 764 | // Set condition codes based on comparison 765 | if (isnan(st0)) { 766 | a1->status_word |= 767 | kConditionCode0 | kConditionCode2 | kConditionCode3; // Set C0=C2=C3=1 768 | } else if (st0 > src) { 769 | // Leave C0=C2=C3=0 770 | } else if (st0 < src) { 771 | a1->status_word |= kConditionCode0; // Set C0=1 772 | } else { // st0 == src 773 | a1->status_word |= kConditionCode3; // Set C3=1 774 | } 775 | 776 | // Handle pops if requested 777 | if (pop) { 778 | a1->pop(); 779 | } 780 | } 781 | #else 782 | X87_TRAMPOLINE_ARGS(void, x87_ficom, (X87State * a1, int src, bool pop), x9); 783 | #endif 784 | 785 | #if defined(X87_FIDIV) 786 | void x87_fidiv(X87State *a1, int a2) { 787 | SIMDGuard simd_guard; 788 | 789 | LOG(1, "x87_fidiv\n", 11); 790 | // Clear condition code 1 and exception flags 791 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 792 | 793 | // Get value in ST(0) 794 | auto value = a1->get_st(0); 795 | 796 | // Divide by integer value 797 | value /= a2; 798 | 799 | // Store result back in ST(0) 800 | a1->set_st(0, value); 801 | } 802 | #else 803 | X87_TRAMPOLINE_ARGS(void, x87_fidiv, (X87State * a1, int a2), x9); 804 | #endif 805 | 806 | #if defined(X87_FIDIVR) 807 | void x87_fidivr(X87State *a1, int a2) { 808 | SIMDGuard simd_guard; 809 | 810 | LOG(1, "x87_fidivr\n", 12); 811 | // Clear condition code 1 and exception flags 812 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 813 | 814 | // Get value in ST(0) 815 | auto value = a1->get_st(0); 816 | 817 | // Divide integer value by value in ST(0) 818 | value = a2 / value; 819 | 820 | // Store result back in ST(0) 821 | a1->set_st(0, value); 822 | } 823 | #else 824 | X87_TRAMPOLINE_ARGS(void, x87_fidivr, (X87State * a1, int a2), x9); 825 | #endif 826 | 827 | #if defined(X87_FILD) 828 | void x87_fild(X87State *a1, int64_t value) { 829 | 830 | __asm__ volatile("" : : : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"); 831 | SIMDGuard simd_guard; 832 | LOG(1, "x87_fild\n", 10); 833 | 834 | a1->push(); 835 | a1->set_st(0, static_cast(value)); 836 | } 837 | #else 838 | X87_TRAMPOLINE_ARGS(void, x87_fild, (X87State * a1, int64_t value), x9); 839 | #endif 840 | 841 | #if defined(X87_FIMUL) 842 | void x87_fimul(X87State *a1, int a2) { 843 | SIMDGuard simd_guard; 844 | LOG(1, "x87_fimul\n", 11); 845 | // Clear condition code 1 and exception flags 846 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 847 | 848 | // Get value in ST(0) 849 | auto value = a1->get_st(0); 850 | 851 | // Multiply by integer value 852 | value *= a2; 853 | 854 | // Store result back in ST(0) 855 | a1->set_st(0, value); 856 | } 857 | #else 858 | X87_TRAMPOLINE_ARGS(void, x87_fimul, (X87State * a1, int a2), x9); 859 | #endif 860 | 861 | void x87_fincstp(X87State *a1) { 862 | LOG(1, "x87_fincstp\n", 13); 863 | 864 | // Clear condition code 1 (C1) 865 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 866 | 867 | // Extract the TOP field (bits 11-13) 868 | uint16_t top = (a1->status_word & X87StatusWordFlag::kTopOfStack) >> 11; 869 | 870 | // Increment TOP with wrap-around (values 0-7) 871 | top = (top + 1) & 0x7; 872 | 873 | // Clear old TOP value and set the new one 874 | a1->status_word &= ~X87StatusWordFlag::kTopOfStack; // Clear TOP field 875 | a1->status_word |= (top << 11); // Set new TOP value 876 | } 877 | 878 | #if defined(X87_FIST_I16) 879 | X87ResultStatusWord x87_fist_i16(X87State const *a1) { 880 | SIMDGuard simd_guard; 881 | 882 | LOG(1, "x87_fist_i16\n", 14); 883 | auto [value, status_word] = a1->get_st_const(0); 884 | X87ResultStatusWord result{0, status_word}; 885 | 886 | // Special case: value > INT16_MAX or infinity (changed from >=) 887 | if (value > static_cast(INT16_MAX)) { 888 | result.signed_result = INT16_MIN; // 0x8000 889 | result.status_word |= X87StatusWordFlag::kConditionCode1; 890 | return result; 891 | } 892 | 893 | // Special case: value <= INT16_MIN 894 | if (value <= static_cast(INT16_MIN)) { 895 | result.signed_result = INT16_MIN; 896 | result.status_word |= X87StatusWordFlag::kConditionCode1; 897 | return result; 898 | } 899 | 900 | // Normal case 901 | auto round_bits = a1->control_word & X87ControlWord::kRoundingControlMask; 902 | 903 | switch (round_bits) { 904 | case X87ControlWord::kRoundToNearest: { 905 | result.signed_result = static_cast(std::nearbyint(value)); 906 | } break; 907 | 908 | case X87ControlWord::kRoundDown: { 909 | result.signed_result = static_cast(std::floor(value)); 910 | return result; 911 | } break; 912 | case X87ControlWord::kRoundUp: { 913 | result.signed_result = static_cast(std::ceil(value)); 914 | return result; 915 | } break; 916 | 917 | case X87ControlWord::kRoundToZero: { 918 | result.signed_result = static_cast(value); 919 | return result; 920 | } break; 921 | } 922 | 923 | return result; 924 | } 925 | #else 926 | X87_TRAMPOLINE_ARGS(X87ResultStatusWord, x87_fist_i16, (X87State const *a1), 927 | x9); 928 | #endif 929 | 930 | #if defined(X87_FIST_I32) 931 | X87ResultStatusWord x87_fist_i32(X87State const *a1) { 932 | SIMDGuard simd_guard; 933 | 934 | LOG(1, "x87_fist_i32\n", 14); 935 | auto [value, status_word] = a1->get_st_const(0); 936 | X87ResultStatusWord result{0, status_word}; 937 | 938 | // Special case: value >= INT32_MAX or infinity 939 | if (value >= static_cast(INT32_MAX)) { 940 | result.signed_result = INT32_MIN; // 0x80000000 941 | result.status_word |= X87StatusWordFlag::kConditionCode1; 942 | return result; 943 | } 944 | 945 | // Special case: value <= INT32_MIN 946 | if (value <= static_cast(INT32_MIN)) { 947 | result.signed_result = INT32_MIN; 948 | result.status_word |= X87StatusWordFlag::kConditionCode1; 949 | return result; 950 | } 951 | 952 | auto round_bits = a1->control_word & X87ControlWord::kRoundingControlMask; 953 | 954 | switch (round_bits) { 955 | case X87ControlWord::kRoundToNearest: { 956 | result.signed_result = static_cast(std::nearbyint(value)); 957 | } break; 958 | 959 | case X87ControlWord::kRoundDown: { 960 | result.signed_result = static_cast(std::floor(value)); 961 | return result; 962 | } break; 963 | case X87ControlWord::kRoundUp: { 964 | result.signed_result = static_cast(std::ceil(value)); 965 | return result; 966 | } break; 967 | 968 | case X87ControlWord::kRoundToZero: { 969 | result.signed_result = static_cast(value); 970 | return result; 971 | } break; 972 | } 973 | 974 | return result; 975 | } 976 | #else 977 | X87_TRAMPOLINE_ARGS(X87ResultStatusWord, x87_fist_i32, (X87State const *a1), 978 | x9); 979 | #endif 980 | 981 | #if defined(X87_FIST_I64) 982 | X87ResultStatusWord x87_fist_i64(X87State const *a1) { 983 | SIMDGuard simd_guard; 984 | 985 | LOG(1, "x87_fist_i64\n", 14); 986 | // Get value in ST(0) 987 | auto [value, status_word] = a1->get_st_const(0); 988 | 989 | X87ResultStatusWord result{0, status_word}; 990 | 991 | // Special case: value >= INT64_MAX or infinity 992 | if (value >= static_cast(INT64_MAX)) { 993 | result.signed_result = INT64_MIN; // 0x8000000000000000 994 | result.status_word |= X87StatusWordFlag::kConditionCode1; 995 | return result; 996 | } 997 | 998 | // Special case: value <= INT64_MIN 999 | if (value <= static_cast(INT64_MIN)) { 1000 | result.signed_result = INT64_MIN; 1001 | result.status_word |= X87StatusWordFlag::kConditionCode1; 1002 | return result; 1003 | } 1004 | 1005 | // Normal case 1006 | 1007 | auto round_bits = a1->control_word & X87ControlWord::kRoundingControlMask; 1008 | 1009 | switch (round_bits) { 1010 | case X87ControlWord::kRoundToNearest: { 1011 | result.signed_result = static_cast(std::nearbyint(value)); 1012 | } break; 1013 | 1014 | case X87ControlWord::kRoundDown: { 1015 | result.signed_result = static_cast(std::floor(value)); 1016 | return result; 1017 | } break; 1018 | case X87ControlWord::kRoundUp: { 1019 | result.signed_result = static_cast(std::ceil(value)); 1020 | return result; 1021 | } break; 1022 | 1023 | case X87ControlWord::kRoundToZero: { 1024 | result.signed_result = static_cast(value); 1025 | return result; 1026 | } break; 1027 | } 1028 | 1029 | return result; 1030 | } 1031 | #else 1032 | X87_TRAMPOLINE_ARGS(X87ResultStatusWord, x87_fist_i64, (X87State const *a1), 1033 | x9); 1034 | #endif 1035 | 1036 | #if defined(X87_FISTT_I16) 1037 | X87ResultStatusWord x87_fistt_i16(X87State const *a1) { 1038 | SIMDGuard simd_guard; 1039 | 1040 | LOG(1, "x87_fistt_i16\n", 15); 1041 | // Get value in ST(0) 1042 | auto [value, status_word] = a1->get_st_const(0); 1043 | 1044 | return {.signed_result = static_cast(value), status_word}; 1045 | } 1046 | #else 1047 | X87_TRAMPOLINE_ARGS(X87ResultStatusWord, x87_fistt_i16, (X87State const *a1), 1048 | x9); 1049 | #endif 1050 | 1051 | #if defined(X87_FISTT_I32) 1052 | X87ResultStatusWord x87_fistt_i32(X87State const *a1) { 1053 | SIMDGuard simd_guard; 1054 | 1055 | LOG(1, "x87_fistt_i32\n", 15); 1056 | // Get value in ST(0) 1057 | auto [value, status_word] = a1->get_st_const(0); 1058 | 1059 | return {.signed_result = static_cast(value), status_word}; 1060 | } 1061 | #else 1062 | X87_TRAMPOLINE_ARGS(X87ResultStatusWord, x87_fistt_i32, (X87State const *a1), 1063 | x9); 1064 | #endif 1065 | 1066 | #if defined(X87_FISTT_I64) 1067 | X87ResultStatusWord x87_fistt_i64(X87State const *a1) { 1068 | SIMDGuard simd_guard; 1069 | 1070 | LOG(1, "x87_fistt_i64\n", 15); 1071 | // Get value in ST(0) 1072 | auto [value, status_word] = a1->get_st_const(0); 1073 | 1074 | return {.signed_result = static_cast(value), status_word}; 1075 | } 1076 | #else 1077 | X87_TRAMPOLINE_ARGS(X87ResultStatusWord, x87_fistt_i64, (X87State const *a1), 1078 | x9); 1079 | #endif 1080 | 1081 | #if defined(X87_FISUB) 1082 | void x87_fisub(X87State *a1, int a2) { 1083 | SIMDGuard simd_guard; 1084 | 1085 | LOG(1, "x87_fisub\n", 11); 1086 | // Clear condition code 1 1087 | a1->status_word &= ~(X87StatusWordFlag::kConditionCode1); 1088 | 1089 | // Get value in ST(0) 1090 | auto value = a1->get_st(0); 1091 | 1092 | // Subtract integer value 1093 | value -= a2; 1094 | 1095 | // Store result back in ST(0) 1096 | a1->set_st(0, value); 1097 | } 1098 | #else 1099 | X87_TRAMPOLINE_ARGS(void, x87_fisub, (X87State * a1, int a2), x9); 1100 | #endif 1101 | 1102 | #if defined(X87_FISUBR) 1103 | void x87_fisubr(X87State *a1, int a2) { 1104 | SIMDGuard simd_guard; 1105 | 1106 | LOG(1, "x87_fisubr\n", 12); 1107 | 1108 | // Clear condition code 1 1109 | a1->status_word &= ~(X87StatusWordFlag::kConditionCode1); 1110 | 1111 | // Get value in ST(0) 1112 | auto value = a1->get_st(0); 1113 | 1114 | // Subtract integer value 1115 | value = a2 - value; 1116 | 1117 | // Store result back in ST(0) 1118 | a1->set_st(0, value); 1119 | } 1120 | #else 1121 | X87_TRAMPOLINE_ARGS(void, x87_fisubr, (X87State * a1, int a2), x9); 1122 | #endif 1123 | 1124 | // Push ST(i) onto the FPU register stack. 1125 | #if defined(X87_FLD_STI) 1126 | void x87_fld_STi(X87State *a1, unsigned int st_offset) { 1127 | SIMDGuard simd_guard; 1128 | 1129 | LOG(1, "x87_fld_STi\n", 13); 1130 | a1->status_word &= ~0x200u; 1131 | 1132 | // Get index of ST(i) register 1133 | const auto value = a1->get_st(st_offset); 1134 | 1135 | // make room for new value 1136 | a1->push(); 1137 | 1138 | // Copy value from ST(i) to ST(0) 1139 | a1->set_st(0, value); 1140 | } 1141 | #else 1142 | X87_TRAMPOLINE_ARGS(void, x87_fld_STi, (X87State * a1, unsigned int st_offset), 1143 | x9); 1144 | #endif 1145 | 1146 | #if defined(X87_FLD_CONSTANT) 1147 | void x87_fld_constant(X87State *a1, X87Constant a2) { 1148 | SIMDGuard simd_guard; 1149 | 1150 | LOG(1, "x87_fld_constant\n", 18); 1151 | // simple_printf("x87_fld_constant %d\n", (int)a2); 1152 | switch (a2) { 1153 | case X87Constant::kOne: { // fld1 1154 | a1->push(); 1155 | a1->set_st(0, 1.0); 1156 | } break; 1157 | 1158 | case X87Constant::kZero: { // fldz 1159 | a1->push(); 1160 | a1->set_st(0, 0.0); 1161 | } break; 1162 | 1163 | case X87Constant::kPi: { // fldpi 1164 | // store_x87_extended_value(a1, {.ieee754 = 3.141592741f}); 1165 | a1->push(); 1166 | a1->set_st(0, 3.141592741f); 1167 | } break; 1168 | 1169 | case X87Constant::kLog2e: { // fldl2e 1170 | // store_x87_extended_value(a1, {.ieee754 = 1.44269502f}); 1171 | a1->push(); 1172 | a1->set_st(0, 1.44269502f); 1173 | } break; 1174 | 1175 | case X87Constant::kLoge2: { // fldln2 1176 | // store_x87_extended_value(a1, {.ieee754 = 0.693147182f}); 1177 | a1->push(); 1178 | a1->set_st(0, 0.693147182f); 1179 | } break; 1180 | 1181 | case X87Constant::kLog2t: { // fldl2t 1182 | // store_x87_extended_value(a1, {.ieee754 = 3.321928f}); 1183 | a1->push(); 1184 | a1->set_st(0, 3.321928f); 1185 | } break; 1186 | 1187 | case X87Constant::kLog102: { // fldl2e 1188 | // store_x87_extended_value(a1, {.ieee754 = 0.301029987f}); 1189 | a1->push(); 1190 | a1->set_st(0, 0.301029987f); 1191 | } break; 1192 | 1193 | default: { 1194 | simple_printf("x87_fld_constant ERROR %d\n", (int)a2); 1195 | } break; 1196 | } 1197 | } 1198 | #else 1199 | X87_TRAMPOLINE_ARGS(void, x87_fld_constant, (X87State * a1, X87Constant a2), 1200 | x9); 1201 | #endif 1202 | 1203 | #if defined(X87_FLD_FP32) 1204 | void x87_fld_fp32(X87State *a1, unsigned int a2) { 1205 | SIMDGuard simd_guard; 1206 | 1207 | LOG(1, "x87_fld_fp32\n", 14); 1208 | 1209 | // Push new value onto stack, get reference to new top 1210 | a1->push(); 1211 | 1212 | a1->set_st(0, std::bit_cast(a2)); 1213 | } 1214 | #else 1215 | X87_TRAMPOLINE_ARGS(void, x87_fld_fp32, (X87State * a1, unsigned int a2), x9); 1216 | #endif 1217 | 1218 | #if defined(X87_FLD_FP64) 1219 | void x87_fld_fp64(X87State *a1, unsigned long long a2) { 1220 | SIMDGuard simd_guard; 1221 | 1222 | LOG(1, "x87_fld_fp64\n", 14); 1223 | 1224 | // Push new value onto stack, get reference to new top 1225 | a1->push(); 1226 | 1227 | a1->set_st(0, std::bit_cast(a2)); 1228 | } 1229 | #else 1230 | X87_TRAMPOLINE_ARGS(void, x87_fld_fp64, (X87State * a1, unsigned long long a2), 1231 | x9); 1232 | #endif 1233 | 1234 | #if defined(X87_FLD_FP80) 1235 | void x87_fld_fp80(X87State *a1, X87Float80 a2) { 1236 | LOG(1, "x87_fld_fp80\n", 14); 1237 | 1238 | auto ieee754 = ConvertX87RegisterToFloat64(a2, &a1->status_word); 1239 | 1240 | a1->push(); 1241 | a1->set_st(0, ieee754); 1242 | } 1243 | #else 1244 | X87_TRAMPOLINE_ARGS(void, x87_fld_fp80, (X87State * a1, X87Float80 a2), x9); 1245 | #endif 1246 | 1247 | #if defined(X87_FMUL_ST) 1248 | void x87_fmul_ST(X87State *a1, unsigned int st_offset_1, 1249 | unsigned int st_offset_2, bool pop_stack) { 1250 | SIMDGuard simd_guard; 1251 | 1252 | LOG(1, "x87_fmul_ST\n", 13); 1253 | 1254 | // Clear condition code 1 and exception flags 1255 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 1256 | 1257 | // Get register indices and values 1258 | const auto val1 = a1->get_st_fast(st_offset_1); 1259 | const auto val2 = a1->get_st_fast(st_offset_2); 1260 | 1261 | // Perform multiplication and store result 1262 | a1->set_st_fast(st_offset_1, val1 * val2); 1263 | 1264 | if (pop_stack) { 1265 | a1->pop(); 1266 | } 1267 | } 1268 | #else 1269 | X87_TRAMPOLINE_ARGS(void, x87_fmul_ST, 1270 | (X87State * a1, unsigned int st_offset_1, 1271 | unsigned int st_offset_2, bool pop_stack), 1272 | x9); 1273 | #endif 1274 | 1275 | #if defined(X87_FMUL_F32) 1276 | void x87_fmul_f32(X87State *a1, unsigned int fp32) { 1277 | SIMDGuard simd_guard; 1278 | 1279 | LOG(1, "x87_fmul_f32\n", 14); 1280 | 1281 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 1282 | 1283 | auto value = std::bit_cast(fp32); 1284 | auto st0 = a1->get_st_fast(0); 1285 | 1286 | a1->set_st_fast(0, st0 * value); 1287 | } 1288 | #else 1289 | X87_TRAMPOLINE_ARGS(void, x87_fmul_f32, (X87State * a1, unsigned int fp32), x9); 1290 | #endif 1291 | 1292 | #if defined(X87_FMUL_F64) 1293 | void x87_fmul_f64(X87State *a1, unsigned long long a2) { 1294 | SIMDGuard simd_guard; 1295 | 1296 | LOG(1, "x87_fmul_f64\n", 14); 1297 | 1298 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 1299 | 1300 | auto value = std::bit_cast(a2); 1301 | auto st0 = a1->get_st_fast(0); 1302 | 1303 | a1->set_st_fast(0, st0 * value); 1304 | } 1305 | #else 1306 | X87_TRAMPOLINE_ARGS(void, x87_fmul_f64, (X87State * a1, unsigned long long a2), 1307 | x9); 1308 | #endif 1309 | 1310 | // Replace ST(1) with arctan(ST(1)/ST(0)) and pop the register stack. 1311 | #if defined(X87_FPATAN) 1312 | void x87_fpatan(X87State *a1) { 1313 | SIMDGuard simd_guard; 1314 | 1315 | LOG(1, "x87_fpatan\n", 12); 1316 | 1317 | a1->status_word &= ~(X87StatusWordFlag::kConditionCode1); 1318 | 1319 | // Get values from ST(0) and ST(1) 1320 | auto st0 = a1->get_st(0); 1321 | auto st1 = a1->get_st(1); 1322 | 1323 | // Calculate arctan(ST(1)/ST(0)) 1324 | auto result = atan2(st1, st0); 1325 | 1326 | // Store result in ST(1) and pop the register stack 1327 | a1->set_st(1, result); 1328 | 1329 | a1->pop(); 1330 | } 1331 | #else 1332 | X87_TRAMPOLINE_ARGS(void, x87_fpatan, (X87State * a1), x9); 1333 | #endif 1334 | 1335 | 1336 | #if defined(X87_FPREM) 1337 | void x87_fprem(X87State *a1) { 1338 | SIMDGuard simd_guard; 1339 | LOG(1, "x87_fprem\n", 11); 1340 | 1341 | // 1) Clear CC0–CC3 1342 | a1->status_word &= 1343 | ~(kConditionCode0 | kConditionCode1 | kConditionCode2 | kConditionCode3); 1344 | 1345 | double st0 = a1->get_st(0); 1346 | double st1 = a1->get_st(1); 1347 | 1348 | // 2) Special cases: NaN/div0/∞ → #IA, ∞ divisor → pass through 1349 | if (isnan(st0) || isnan(st1) || isinf(st0) || st1 == 0.0) { 1350 | a1->set_st(0, std::numeric_limits::quiet_NaN()); 1351 | a1->status_word |= kInvalidOperation; 1352 | return; 1353 | } 1354 | if (isinf(st1)) { 1355 | // remainder = dividend; no exception 1356 | return; 1357 | } 1358 | 1359 | // 3) Compute truncated quotient and remainder 1360 | double rawDiv = st0 / st1; 1361 | double truncDiv = std::trunc(rawDiv); // Q = trunc(ST0/ST1) 1362 | int q = static_cast(truncDiv); 1363 | double rem = std::fmod(st0, st1); // rem = ST0 - Q*ST1 1364 | a1->set_st(0, rem); 1365 | 1366 | // 4) CC0, CC1, CC3 ← low bits of Q (Q2→CC0, Q0→CC1, Q1→CC3) 1367 | if (q & 0x4) 1368 | a1->status_word |= kConditionCode0; 1369 | if (q & 0x1) 1370 | a1->status_word |= kConditionCode1; 1371 | if (q & 0x2) 1372 | a1->status_word |= kConditionCode3; 1373 | 1374 | // 5) CC2 “incomplete” if exponent gap > 0 1375 | // D = E0 – E1; E = std::ilogb(x) 1376 | int e0 = std::ilogb(st0); 1377 | int e1 = std::ilogb(st1); 1378 | int D = e0 - e1; 1379 | if (D > 0) { 1380 | a1->status_word |= kConditionCode2; 1381 | // (optional) you could iterate: rem -= std::ldexp(trunc(rem/st1), D); 1382 | } 1383 | } 1384 | #else 1385 | X87_TRAMPOLINE_ARGS(void, x87_fprem, (X87State * a1), x9); 1386 | #endif 1387 | 1388 | 1389 | #if defined(X87_FPREM1) 1390 | void x87_fprem1(X87State *a1) { 1391 | SIMDGuard simd_guard; 1392 | LOG(1, "x87_fprem1\n", 12); 1393 | 1394 | // 1) clear condition-code bits CC0–CC3 1395 | a1->status_word &= 1396 | ~(kConditionCode0 | kConditionCode1 | kConditionCode2 | kConditionCode3); 1397 | 1398 | double st0 = a1->get_st(0); 1399 | double st1 = a1->get_st(1); 1400 | 1401 | // 2) special cases: NaN/div0/∞ → #IA or pass through 1402 | if (isnan(st0) || isnan(st1) || isinf(st0) || st1 == 0.0) { 1403 | a1->set_st(0, std::numeric_limits::quiet_NaN()); 1404 | a1->status_word |= kInvalidOperation; 1405 | return; 1406 | } 1407 | if (isinf(st1)) { 1408 | // remainder = dividend; no exception 1409 | return; 1410 | } 1411 | 1412 | // 3) IEEE-754 remainder with nearest-integer quotient 1413 | int q; 1414 | double rem = std::remquo(st0, st1, &q); 1415 | // rem = ST0 – q*ST1, where q = round-to-nearest(ST0/ST1), ties-to-even 1416 | a1->set_st(0, rem); 1417 | 1418 | // 4) CC0, CC1, CC3 from the three low bits of q: 1419 | // Q2→CC0, Q0→CC1, Q1→CC3 1420 | if (q & 0x4) 1421 | a1->status_word |= kConditionCode0; 1422 | if (q & 0x1) 1423 | a1->status_word |= kConditionCode1; 1424 | if (q & 0x2) 1425 | a1->status_word |= kConditionCode3; 1426 | 1427 | // 5) CC2 = “incomplete” flag based on exponent diff D = E0 – E1 1428 | int e0 = std::ilogb(st0); // unbiased exponent of st0 1429 | int e1 = std::ilogb(st1); // unbiased exponent of st1 1430 | int D = e0 - e1; 1431 | if (D >= 64) { 1432 | a1->status_word |= kConditionCode2; 1433 | // (optional) do the “partial” reduction loop per spec if you want 1434 | // hardware-accurate step-wise remainder 1435 | } 1436 | // else D<64 ⇒ CC2 stays clear (complete reduction) 1437 | } 1438 | #else 1439 | X87_TRAMPOLINE_ARGS(void, x87_fprem1, (X87State * a1), x9); 1440 | #endif 1441 | 1442 | #if defined(X87_FPTAN) 1443 | void x87_fptan(X87State *a1) { 1444 | SIMDGuard simd_guard; 1445 | 1446 | LOG(1, "x87_fptan\n", 11); 1447 | 1448 | a1->status_word &= ~(X87StatusWordFlag::kConditionCode1 | 1449 | X87StatusWordFlag::kConditionCode2); 1450 | 1451 | // Get value from ST(0) 1452 | const auto value = a1->get_st(0); 1453 | 1454 | // Calculate tangent 1455 | auto tan_value = tan(value); 1456 | 1457 | // Store result in ST(0) 1458 | a1->set_st(0, tan_value); 1459 | 1460 | // Push 1.0 onto the FPU register stack 1461 | a1->push(); 1462 | a1->set_st(0, 1.0); 1463 | } 1464 | #else 1465 | X87_TRAMPOLINE_ARGS(void, x87_fptan, (X87State * a1), x9); 1466 | #endif 1467 | 1468 | #if defined(X87_FRNDINT) 1469 | void x87_frndint(X87State *a1) { 1470 | SIMDGuard simd_guard; 1471 | 1472 | LOG(1, "x87_frndint\n", 13); 1473 | 1474 | a1->status_word &= ~(X87StatusWordFlag::kConditionCode1); 1475 | 1476 | // Get current value and round it 1477 | double value = a1->get_st_fast(0); 1478 | double rounded; 1479 | auto round_bits = a1->control_word & X87ControlWord::kRoundingControlMask; 1480 | 1481 | switch (round_bits) { 1482 | case X87ControlWord::kRoundToNearest: { 1483 | rounded = std::nearbyint(value); 1484 | } break; 1485 | 1486 | case X87ControlWord::kRoundDown: { 1487 | rounded = std::floor(value); 1488 | } break; 1489 | case X87ControlWord::kRoundUp: { 1490 | rounded = std::ceil(value); 1491 | } break; 1492 | 1493 | case X87ControlWord::kRoundToZero: { 1494 | rounded = std::trunc(value); 1495 | } break; 1496 | } 1497 | 1498 | // Store rounded value and update tag 1499 | a1->set_st_fast(0, rounded); 1500 | } 1501 | #else 1502 | X87_TRAMPOLINE_ARGS(void, x87_frndint, (X87State * a1), x9); 1503 | #endif 1504 | 1505 | #if defined(X87_FSCALE) 1506 | void x87_fscale(X87State *state) { 1507 | SIMDGuard simd_guard; 1508 | 1509 | LOG(1, "x87_fscale\n", 12); 1510 | 1511 | state->status_word &= ~(X87StatusWordFlag::kConditionCode1); 1512 | 1513 | // Get values from ST(0) and ST(1) 1514 | double st0 = state->get_st(0); 1515 | double st1 = state->get_st(1); 1516 | 1517 | // Round ST(1) to nearest integer 1518 | int scale = static_cast(st1); 1519 | 1520 | // Scale ST(0) by 2^scale using bit manipulation for integer powers 1521 | int32_t exponent = scale + 1023; // IEEE-754 bias 1522 | uint64_t scaleFactor = static_cast(exponent) << 52; 1523 | double factor = std::bit_cast(scaleFactor); 1524 | 1525 | // Multiply ST(0) by scale factor 1526 | double result = st0 * factor; 1527 | 1528 | // Store result back in ST(0) 1529 | state->set_st(0, result); 1530 | } 1531 | #else 1532 | X87_TRAMPOLINE_ARGS(void, x87_fscale, (X87State * state), x9); 1533 | #endif 1534 | 1535 | #if defined(X87_FSIN) 1536 | void x87_fsin(X87State *a1) { 1537 | SIMDGuardFull simd_guard; 1538 | 1539 | LOG(1, "x87_fsin\n", 10); 1540 | 1541 | a1->status_word &= ~(X87StatusWordFlag::kConditionCode1 | 1542 | X87StatusWordFlag::kConditionCode2); 1543 | 1544 | // Get current value from top register 1545 | const double value = a1->get_st_fast(0); 1546 | 1547 | // Store result and update tag 1548 | a1->set_st_fast(0, sin(value)); 1549 | } 1550 | #else 1551 | X87_TRAMPOLINE_ARGS(void, x87_fsin, (X87State * a1), x9); 1552 | #endif 1553 | 1554 | #if defined(X87_FSINCOS) 1555 | void x87_fsincos(X87State *a1) { 1556 | SIMDGuardFull simd_guard; 1557 | 1558 | LOG(1, "x87_fsincos\n", 13); 1559 | 1560 | a1->status_word &= ~(X87StatusWordFlag::kConditionCode1 | 1561 | X87StatusWordFlag::kConditionCode2); 1562 | 1563 | // Get value from ST(0) 1564 | const auto value = a1->get_st_fast(0); 1565 | 1566 | // Calculate sine and cosine 1567 | auto sin_value = sin(value); 1568 | auto cos_value = cos(value); 1569 | 1570 | // Store sine in ST(0) 1571 | a1->set_st_fast(0, sin_value); 1572 | 1573 | // Push cosine onto the FPU register stack 1574 | a1->push(); 1575 | a1->set_st_fast(0, cos_value); 1576 | 1577 | // Clear C2 condition code bit 1578 | a1->status_word &= ~X87StatusWordFlag::kConditionCode2; 1579 | } 1580 | #else 1581 | X87_TRAMPOLINE_ARGS(void, x87_fsincos, (X87State * a1), x9); 1582 | #endif 1583 | 1584 | // Computes square root of ST(0) and stores the result in ST(0). 1585 | #if defined(X87_FSQRT) 1586 | void x87_fsqrt(X87State *a1) { 1587 | SIMDGuard simd_guard; 1588 | 1589 | LOG(1, "x87_fsqrt\n", 11); 1590 | 1591 | a1->status_word &= ~(X87StatusWordFlag::kConditionCode1); 1592 | 1593 | // Get current value and calculate sqrt 1594 | const double value = a1->get_st_fast(0); 1595 | 1596 | a1->status_word |= X87StatusWordFlag::kPrecision; 1597 | 1598 | // Store result and update tag 1599 | a1->set_st_fast(0, sqrt(value)); 1600 | } 1601 | #else 1602 | X87_TRAMPOLINE_ARGS(void, x87_fsqrt, (X87State * a1), x9); 1603 | #endif 1604 | 1605 | #if defined(X87_FST_STI) 1606 | void x87_fst_STi(X87State *a1, unsigned int st_offset, bool pop) { 1607 | SIMDGuard simd_guard; 1608 | 1609 | LOG(1, "x87_fst_STi\n", 13); 1610 | 1611 | // Clear C1 condition code (bit 9) 1612 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 1613 | 1614 | // Copy ST(0) to ST(i) 1615 | a1->set_st(st_offset, a1->get_st(0)); 1616 | 1617 | // Pop if requested 1618 | if (pop) { 1619 | a1->pop(); 1620 | } 1621 | } 1622 | #else 1623 | X87_TRAMPOLINE_ARGS(void, x87_fst_STi, 1624 | (X87State * a1, unsigned int st_offset, bool pop), x9); 1625 | #endif 1626 | 1627 | #if defined(X87_FST_FP32) 1628 | X87ResultStatusWord x87_fst_fp32(X87State const *a1) { 1629 | SIMDGuard simd_guard; 1630 | 1631 | LOG(1, "x87_fst_fp32\n", 14); 1632 | 1633 | auto [value, status_word] = a1->get_st_const32(0); 1634 | float tmp = value; 1635 | return {std::bit_cast(tmp), status_word}; 1636 | } 1637 | #else 1638 | X87_TRAMPOLINE_ARGS(X87ResultStatusWord, x87_fst_fp32, (X87State const *a1), 1639 | x9); 1640 | #endif 1641 | 1642 | #if defined(X87_FST_FP64) 1643 | X87ResultStatusWord x87_fst_fp64(X87State const *a1) { 1644 | SIMDGuard simd_guard; 1645 | 1646 | LOG(1, "x87_fst_fp64\n", 14); 1647 | 1648 | // Create temporary double to ensure proper value representation 1649 | auto [value, status_word] = a1->get_st_const(0); 1650 | double tmp = value; 1651 | return {std::bit_cast(tmp), status_word}; 1652 | } 1653 | #else 1654 | X87_TRAMPOLINE_ARGS(X87ResultStatusWord, x87_fst_fp64, (X87State const *a1), 1655 | x9); 1656 | #endif 1657 | 1658 | #if defined(X87_FST_FP80) 1659 | X87Float80StatusWordResult x87_fst_fp80(X87State const *a1) { 1660 | SIMDGuard simd_guard; 1661 | 1662 | LOG(1, "x87_fst_fp80\n", 14); 1663 | 1664 | // Get value from ST(0) 1665 | auto [value, status_word] = a1->get_st_const(0); 1666 | 1667 | float tmp = value; 1668 | uint32_t float32 = std::bit_cast(tmp); 1669 | 1670 | // Extract components from float32 1671 | uint32_t mantissa = float32 & 0x7FFFFF; // 23 bits 1672 | uint8_t exp = (float32 >> 23) & 0xFF; // 8 bits 1673 | uint16_t sign = (float32 >> 31) << 15; // Move sign to bit 15 1674 | 1675 | X87Float80StatusWordResult result; 1676 | result.status_word = status_word; 1677 | 1678 | // Handle zero 1679 | if (exp == 0 && mantissa == 0) { 1680 | result.mantissa = 0; 1681 | result.exponent = sign; 1682 | return result; 1683 | } 1684 | 1685 | // Handle subnormal numbers 1686 | if (exp == 0) { 1687 | // Set denormal flag 1688 | 1689 | // Count leading zeros to normalize 1690 | int leading_zeros = 1691 | __builtin_clz(mantissa) - 8; // -8 because mantissa is in upper 23 bits 1692 | mantissa <<= leading_zeros; 1693 | 1694 | // Adjust exponent for normalization 1695 | exp = 1 - leading_zeros; 1696 | } 1697 | // Handle infinity or NaN 1698 | else if (exp == 255) { 1699 | // Set invalid operation flag if NaN 1700 | 1701 | result.mantissa = (uint64_t)mantissa << 40 | 0x8000000000000000ULL; 1702 | result.exponent = sign | 0x7FFF; // Maximum exponent 1703 | return result; 1704 | } 1705 | 1706 | // Normal numbers: Convert to x87 format 1707 | // Shift 23-bit mantissa to 64 bits and set explicit integer bit 1708 | result.mantissa = ((uint64_t)mantissa << 40) | 0x8000000000000000ULL; 1709 | 1710 | // Bias adjustment: IEEE 754 bias(127) to x87 bias(16383) 1711 | result.exponent = sign | (exp + 16383 - 127); 1712 | 1713 | return result; 1714 | } 1715 | #else 1716 | X87_TRAMPOLINE_ARGS(X87Float80, x87_fst_fp80, (X87State const *a1), x9); 1717 | #endif 1718 | 1719 | #if defined(X87_FSUB_ST) 1720 | void x87_fsub_ST(X87State *a1, unsigned int st_offset1, unsigned int st_offset2, 1721 | bool pop) { 1722 | SIMDGuard simd_guard; 1723 | 1724 | LOG(1, "x87_fsub_ST\n", 13); 1725 | 1726 | // Clear condition code 1 and exception flags 1727 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 1728 | 1729 | // Get register indices and values 1730 | const auto val1 = a1->get_st_fast(st_offset1); 1731 | const auto val2 = a1->get_st_fast(st_offset2); 1732 | 1733 | // Perform subtraction and store result in ST(st_offset1) 1734 | a1->set_st_fast(st_offset1, val1 - val2); 1735 | 1736 | if (pop) { 1737 | a1->pop(); 1738 | } 1739 | } 1740 | #else 1741 | X87_TRAMPOLINE_ARGS(void, x87_fsub_ST, 1742 | (X87State * a1, unsigned int st_offset1, 1743 | unsigned int st_offset2, bool pop), 1744 | x9); 1745 | #endif 1746 | 1747 | #if defined(X87_FSUB_F32) 1748 | void x87_fsub_f32(X87State *a1, unsigned int a2) { 1749 | SIMDGuard simd_guard; 1750 | 1751 | LOG(1, "x87_fsub_f32\n", 14); 1752 | 1753 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 1754 | 1755 | auto value = std::bit_cast(a2); 1756 | auto st0 = a1->get_st_fast(0); 1757 | 1758 | a1->set_st_fast(0, st0 - value); 1759 | } 1760 | #else 1761 | X87_TRAMPOLINE_ARGS(void, x87_fsub_f32, (X87State * a1, unsigned int a2), x9); 1762 | #endif 1763 | 1764 | #if defined(X87_FSUB_F64) 1765 | void x87_fsub_f64(X87State *a1, unsigned long long a2) { 1766 | SIMDGuard simd_guard; 1767 | 1768 | LOG(1, "x87_fsub_f64\n", 14); 1769 | 1770 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 1771 | 1772 | auto value = std::bit_cast(a2); 1773 | auto st0 = a1->get_st_fast(0); 1774 | 1775 | a1->set_st_fast(0, st0 - value); 1776 | } 1777 | #else 1778 | X87_TRAMPOLINE_ARGS(void, x87_fsub_f64, (X87State * a1, unsigned long long a2), 1779 | x9); 1780 | #endif 1781 | 1782 | #if defined(X87_FSUBR_ST) 1783 | void x87_fsubr_ST(X87State *a1, unsigned int st_offset1, 1784 | unsigned int st_offset2, bool pop) { 1785 | SIMDGuard simd_guard; 1786 | 1787 | LOG(1, "x87_fsubr_ST\n", 14); 1788 | 1789 | // Clear condition code 1 and exception flags 1790 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 1791 | 1792 | // Get register indices and values 1793 | const auto val1 = a1->get_st_fast(st_offset1); 1794 | const auto val2 = a1->get_st_fast(st_offset2); 1795 | 1796 | // Perform reversed subtraction and store result in ST(st_offset1) 1797 | a1->set_st_fast(st_offset1, val2 - val1); 1798 | 1799 | if (pop) { 1800 | a1->pop(); 1801 | } 1802 | } 1803 | #else 1804 | X87_TRAMPOLINE_ARGS(void, x87_fsubr_ST, 1805 | (X87State * a1, unsigned int st_offset1, 1806 | unsigned int st_offset2, bool pop), 1807 | x9); 1808 | #endif 1809 | 1810 | #if defined(X87_FSUBR_F32) 1811 | void x87_fsubr_f32(X87State *a1, unsigned int a2) { 1812 | SIMDGuard simd_guard; 1813 | 1814 | LOG(1, "x87_fsubr_f32\n", 15); 1815 | 1816 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 1817 | 1818 | auto value = std::bit_cast(a2); 1819 | auto st0 = a1->get_st_fast(0); 1820 | 1821 | a1->set_st_fast(0, value - st0); 1822 | } 1823 | #else 1824 | X87_TRAMPOLINE_ARGS(void, x87_fsubr_f32, (X87State * a1, unsigned int a2), x9); 1825 | #endif 1826 | 1827 | #if defined(X87_FSUBR_F64) 1828 | void x87_fsubr_f64(X87State *a1, unsigned long long a2) { 1829 | SIMDGuard simd_guard; 1830 | 1831 | LOG(1, "x87_fsubr_f64\n", 15); 1832 | 1833 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 1834 | 1835 | auto value = std::bit_cast(a2); 1836 | auto st0 = a1->get_st_fast(0); 1837 | 1838 | a1->set_st_fast(0, value - st0); 1839 | } 1840 | #else 1841 | X87_TRAMPOLINE_ARGS(void, x87_fsubr_f64, (X87State * a1, unsigned long long a2), 1842 | x9); 1843 | #endif 1844 | 1845 | #if defined(X87_FUCOM) 1846 | void x87_fucom(X87State *a1, unsigned int st_offset, unsigned int pop) { 1847 | SIMDGuard simd_guard; 1848 | 1849 | LOG(1, "x87_fucom\n", 11); 1850 | auto st0 = a1->get_st(0); 1851 | auto src = a1->get_st(st_offset); 1852 | 1853 | // Clear condition code bits C0, C2, C3 (bits 8, 9, 14) 1854 | a1->status_word &= ~(kConditionCode0 | kConditionCode2 | kConditionCode3); 1855 | 1856 | // Set condition codes based on comparison 1857 | if (isnan(st0) || isnan(src)) { 1858 | a1->status_word |= 1859 | kConditionCode0 | kConditionCode2 | kConditionCode3; // Set C0=C2=C3=1 1860 | } else if (st0 > src) { 1861 | // Leave C0=C2=C3=0 1862 | } else if (st0 < src) { 1863 | a1->status_word |= kConditionCode0; // Set C0=1 1864 | } else { // st0 == src 1865 | a1->status_word |= kConditionCode3; // Set C3=1 1866 | } 1867 | 1868 | // Handle pops if requested 1869 | for (auto i = 0; i < pop; ++i) { 1870 | a1->pop(); 1871 | } 1872 | } 1873 | #else 1874 | X87_TRAMPOLINE_ARGS(void, x87_fucom, 1875 | (X87State * a1, unsigned int st_offset, unsigned int pop), 1876 | x9); 1877 | #endif 1878 | 1879 | #if defined(X87_FUCOMI) 1880 | uint32_t x87_fucomi(X87State *state, unsigned int st_offset, bool pop_stack) { 1881 | SIMDGuard simd_guard; 1882 | 1883 | LOG(1, "x87_fucomi\n", 12); 1884 | 1885 | state->status_word &= ~X87StatusWordFlag::kConditionCode1; 1886 | 1887 | auto st0_val = state->get_st(0); 1888 | auto sti_val = state->get_st(st_offset); 1889 | 1890 | uint32_t flags = 0; 1891 | /* 1892 | Filters: fcomi 1893 | Randomness seeded to: 3528984885 1894 | x87_fcomi_less 1895 | x87_fcomi result: 0x000000000000000 1896 | x87_fcomi_greater 1897 | x87_fcomi result: 0x000000020000000 1898 | x87_fcomi_equal 1899 | x87_fcomi result: 0x000000060000000 1900 | */ 1901 | 1902 | if (st0_val < sti_val) { 1903 | flags = 0x000000000000000; 1904 | } else if (st0_val > sti_val) { 1905 | flags = 0x000000020000000; 1906 | } else { 1907 | flags = 0x000000060000000; 1908 | } 1909 | 1910 | if (pop_stack) { 1911 | state->pop(); 1912 | } 1913 | 1914 | return flags; 1915 | } 1916 | #else 1917 | X87_TRAMPOLINE_ARGS(uint32_t, x87_fucomi, 1918 | (X87State * state, unsigned int st_offset, bool pop_stack), 1919 | x9); 1920 | #endif 1921 | 1922 | #if defined(X87_FXAM) 1923 | void x87_fxam(X87State *a1) { 1924 | SIMDGuard simd_guard; 1925 | 1926 | LOG(1, "x87_fxam\n", 10); 1927 | 1928 | // Get tag state for ST(0) 1929 | X87TagState tag = a1->get_st_tag(0); 1930 | 1931 | // simple_printf("tag: %d\n", tag); 1932 | 1933 | static_assert((X87StatusWordFlag::kConditionCode0 | 1934 | X87StatusWordFlag::kConditionCode1 | 1935 | X87StatusWordFlag::kConditionCode2 | 1936 | X87StatusWordFlag::kConditionCode3) == 0x4700); 1937 | 1938 | // Clear C3,C2,C1,C0 bits 1939 | a1->status_word &= ~( 1940 | X87StatusWordFlag::kConditionCode0 | X87StatusWordFlag::kConditionCode1 | 1941 | X87StatusWordFlag::kConditionCode2 | X87StatusWordFlag::kConditionCode3); 1942 | 1943 | // Handle empty and zero based on tag word 1944 | if (tag == X87TagState::kEmpty) { 1945 | a1->status_word |= X87StatusWordFlag::kConditionCode3 | 1946 | X87StatusWordFlag::kConditionCode0; // C3=1, C0=1 (101) 1947 | return; 1948 | } 1949 | if (tag == X87TagState::kZero) { 1950 | a1->status_word |= X87StatusWordFlag::kConditionCode3; // C3=1 (100) 1951 | return; 1952 | } 1953 | 1954 | // Get actual value for other cases 1955 | auto value = a1->get_st(0); 1956 | 1957 | // Set C1 based on sign 1958 | if (signbit(value)) { 1959 | a1->status_word |= X87StatusWordFlag::kConditionCode1; 1960 | } 1961 | 1962 | // Set C3,C2,C0 based on value type 1963 | if (isnan(value)) { 1964 | a1->status_word |= X87StatusWordFlag::kConditionCode0; // 001 1965 | } else if (isinf(value)) { 1966 | a1->status_word |= X87StatusWordFlag::kConditionCode2 | 1967 | X87StatusWordFlag::kConditionCode0; // 011 1968 | } else if (fpclassify(value) == FP_SUBNORMAL) { 1969 | a1->status_word |= X87StatusWordFlag::kConditionCode3 | 1970 | X87StatusWordFlag::kConditionCode2; // 110 1971 | } else { 1972 | a1->status_word |= X87StatusWordFlag::kConditionCode2; // 010 (normal) 1973 | } 1974 | } 1975 | #else 1976 | X87_TRAMPOLINE_ARGS(void, x87_fxam, (X87State * a1), x9); 1977 | #endif 1978 | 1979 | #if defined(X87_FXCH) 1980 | void x87_fxch(X87State *a1, unsigned int st_offset) { 1981 | SIMDGuard simd_guard; 1982 | 1983 | LOG(1, "x87_fxch\n", 10); 1984 | 1985 | // Clear condition code 1 1986 | a1->status_word &= ~X87StatusWordFlag::kConditionCode1; 1987 | 1988 | auto st0 = a1->get_st(0); 1989 | auto sti = a1->get_st(st_offset); 1990 | 1991 | a1->set_st(0, sti); 1992 | a1->set_st(st_offset, st0); 1993 | } 1994 | #else 1995 | X87_TRAMPOLINE_ARGS(void, x87_fxch, (X87State * a1, unsigned int st_offset), 1996 | x9); 1997 | #endif 1998 | 1999 | #if defined(X87_FXTRACT) 2000 | void x87_fxtract(X87State *a1) { 2001 | SIMDGuardFull simd_guard; 2002 | 2003 | LOG(1, "x87_fxtract\n", 13); 2004 | 2005 | auto st0 = a1->get_st(0); 2006 | 2007 | // If the floating-point zero-divide exception (#Z) is masked and the source 2008 | // operand is zero, an exponent value of –∞ is stored in register ST(1) and 0 2009 | // with the sign of the source operand is stored in register ST(0). 2010 | if ((a1->control_word & X87ControlWord::kZeroDivideMask) != 0 && st0 == 0.0) { 2011 | a1->set_st(1, -std::numeric_limits::infinity()); 2012 | a1->set_st(0, copysign(0.0, st0)); 2013 | return; 2014 | } 2015 | 2016 | if (isinf(st0)) { 2017 | a1->set_st(0, st0); 2018 | a1->push(); 2019 | a1->set_st(0, std::numeric_limits::infinity()); 2020 | return; 2021 | } 2022 | 2023 | auto e = std::floor(log2(abs(st0))); 2024 | auto m = st0 / pow(2.0, e); 2025 | 2026 | a1->set_st(0, e); 2027 | 2028 | a1->push(); 2029 | a1->set_st(0, m); 2030 | } 2031 | #else 2032 | X87_TRAMPOLINE_ARGS(void, x87_fxtract, (X87State * a1), x9); 2033 | #endif 2034 | 2035 | void fyl2x_common(X87State *state, double constant) { 2036 | // Clear condition code 1 2037 | state->status_word &= ~X87StatusWordFlag::kConditionCode1; 2038 | 2039 | // Get x from ST(0) and y from ST(1) 2040 | auto st0 = state->get_st(0); 2041 | auto st1 = state->get_st(1); 2042 | 2043 | // Calculate y * log2(x) 2044 | auto result = st1 * (log2(st0 + constant)); 2045 | 2046 | // Pop ST(0) 2047 | state->pop(); 2048 | 2049 | // Store result in new ST(0) 2050 | state->set_st(0, result); 2051 | } 2052 | 2053 | // Replace ST(1) with (ST(1) ∗ log2ST(0)) and pop the register stack. 2054 | #if defined(X87_FYL2X) 2055 | void x87_fyl2x(X87State *state) { 2056 | SIMDGuardFull simd_guard; 2057 | LOG(1, "x87_fyl2x\n", 12); 2058 | 2059 | fyl2x_common(state, 0.0); 2060 | } 2061 | #else 2062 | X87_TRAMPOLINE_ARGS(void, x87_fyl2x, (X87State * state), x9); 2063 | #endif 2064 | 2065 | // Replace ST(1) with (ST(1) ∗ log2ST(0 + 1.0)) and pop the register stack. 2066 | #if defined(X87_FYL2XP1) 2067 | void x87_fyl2xp1(X87State *state) { 2068 | SIMDGuardFull simd_guard; 2069 | LOG(1, "x87_fyl2xp1\n", 14); 2070 | 2071 | fyl2x_common(state, 1.0); 2072 | } 2073 | #else 2074 | X87_TRAMPOLINE_ARGS(void, x87_fyl2xp1, (X87State * state), x9); 2075 | #endif 2076 | 2077 | X87_TRAMPOLINE(sse_pcmpestri, x9) 2078 | X87_TRAMPOLINE(sse_pcmpestrm, x9) 2079 | X87_TRAMPOLINE(sse_pcmpistri, x9) 2080 | X87_TRAMPOLINE(sse_pcmpistrm, x9) 2081 | X87_TRAMPOLINE(is_ldt_initialized, x9) 2082 | X87_TRAMPOLINE(get_ldt, x9) 2083 | X87_TRAMPOLINE(set_ldt, x9) 2084 | X87_TRAMPOLINE(execution_mode_for_code_segment_selector, x9) 2085 | X87_TRAMPOLINE(mov_segment, x9) 2086 | X87_TRAMPOLINE(abi_for_address, x9) 2087 | 2088 | X87_TRAMPOLINE(determine_state_recovery_action, x9) 2089 | X87_TRAMPOLINE(get_segment_limit, x9) 2090 | X87_TRAMPOLINE(translator_set_variant, x9) 2091 | 2092 | X87_TRAMPOLINE(runtime_cpuid, x22) 2093 | X87_TRAMPOLINE(runtime_wide_udiv_64, x9) 2094 | X87_TRAMPOLINE(runtime_wide_sdiv_64, x9) --------------------------------------------------------------------------------