├── test ├── empty.s ├── riscv64 │ ├── exit.S │ ├── meson.build │ └── lrsc-loop.S ├── aarch64 │ ├── exit.S │ ├── meson.build │ ├── recursion.S │ └── fork.S ├── x86_64 │ ├── exit.S │ ├── call-pop.S │ ├── nowrite.S │ ├── call-ret-mismatch.S │ ├── recursion.S │ ├── stosb-call.S │ ├── meson.build │ └── fork.S └── meson.build ├── .gitignore ├── server ├── version.cc.in ├── version.h ├── config.cc ├── config.h ├── optimizer.h ├── callconv.h ├── codegenerator.h ├── cache.h ├── meson.build ├── connection.h ├── optimizer.cc ├── codegenerator.cc ├── cache.cc ├── connection.cc └── rewriteserver.cc ├── client ├── emulate.h ├── dispatch.h ├── memory.h ├── dispatcher-info.h ├── elf-loader.h ├── rtld.h ├── plt.inc ├── state.h ├── meson.build ├── translator.h ├── memset-aarch64.S ├── memory.c ├── common.h ├── translator.c ├── main.c ├── elf-loader.c ├── math.c ├── dispatch.c ├── minilibc.c └── rtld.c ├── .builds ├── llvm15-x86_64-debian.yml └── llvm17-x86_64-debian.yml ├── .gitmodules ├── shared └── instrew-protocol.inc ├── meson.build ├── README.md └── LICENSE /test/empty.s: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /*.sublime-* 2 | /build* 3 | /install 4 | -------------------------------------------------------------------------------- /test/riscv64/exit.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global _start 3 | _start: 4 | li a0, 0 5 | li a7, 94 6 | ecall 7 | j . 8 | -------------------------------------------------------------------------------- /test/aarch64/exit.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global _start 3 | _start: 4 | mov x0, #0 5 | mov x8, #94 6 | svc #0 7 | udf #0 8 | -------------------------------------------------------------------------------- /test/x86_64/exit.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | .text 3 | .global _start 4 | _start: 5 | xor edi, edi 6 | mov eax, 231 7 | syscall 8 | ud2 9 | -------------------------------------------------------------------------------- /test/riscv64/meson.build: -------------------------------------------------------------------------------- 1 | 2 | triple = 'riscv64-linux-gnu' 3 | cases = [ 4 | {'name': 'exit', 'src': files('exit.S')}, 5 | {'name': 'lrsc-loop', 'src': files('lrsc-loop.S')}, 6 | ] 7 | -------------------------------------------------------------------------------- /server/version.cc.in: -------------------------------------------------------------------------------- 1 | 2 | #include "version.h" 3 | 4 | #include 5 | 6 | using namespace std::literals; 7 | 8 | namespace instrew { 9 | 10 | const std::string_view instrewVersion = "@VCS_TAG@"sv; 11 | 12 | } 13 | -------------------------------------------------------------------------------- /client/emulate.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_EMULATE_H 3 | #define _INSTREW_EMULATE_H 4 | 5 | #include 6 | 7 | struct State; 8 | 9 | void signal_init(struct State* state); 10 | void emulate_syscall(uint64_t* cpu_state); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /client/dispatch.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_DISPATCH_H 3 | #define _INSTREW_DISPATCH_H 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | const struct DispatcherInfo* dispatch_get(struct State* state); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /server/version.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_SERVER_VERSION_H 3 | #define _INSTREW_SERVER_VERSION_H 4 | 5 | #include 6 | 7 | namespace instrew { 8 | 9 | extern const std::string_view instrewVersion; 10 | 11 | } // namespace instrew 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /test/x86_64/call-pop.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | .text 3 | .global _start 4 | _start: 5 | mov eax, 0x1000000 6 | 1: call 2f 7 | 2: pop rcx 8 | sub eax, 1 9 | jnz 1b 10 | 11 | xor edi, edi 12 | mov eax, 231 13 | syscall 14 | ud2 15 | -------------------------------------------------------------------------------- /server/config.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "config.h" 3 | 4 | #include 5 | 6 | 7 | llvm::cl::OptionCategory InstrewCategory("Instrew Options"); 8 | llvm::cl::OptionCategory CodeGenCategory("Instrew Code Generation Options", "Options affecting the translated code."); 9 | -------------------------------------------------------------------------------- /test/x86_64/nowrite.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | .text 3 | .global _start 4 | _start: 5 | mov byte ptr [rip + data], 0 6 | // should not be reached 7 | xor edi, edi 8 | mov eax, 231 9 | syscall 10 | 11 | .rodata 12 | data: 13 | .byte 0xff 14 | -------------------------------------------------------------------------------- /server/config.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_SERVER_CONFIG_H 3 | #define _INSTREW_SERVER_CONFIG_H 4 | 5 | 6 | namespace llvm::cl { 7 | class OptionCategory; 8 | } 9 | 10 | extern llvm::cl::OptionCategory InstrewCategory; 11 | extern llvm::cl::OptionCategory CodeGenCategory; 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /test/riscv64/lrsc-loop.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global _start 3 | _start: 4 | mv a0, sp 5 | 1: ld a5, (a0) 6 | 2: lr.d.aqrl a4, (a0) 7 | bne a4, a5, 1b 8 | sc.d.rl a2,a3,(a0) 9 | bnez a2, 2b 10 | bne a4, a5, 1b 11 | 12 | li a0, 0 13 | li a7, 94 14 | ecall 15 | j . 16 | -------------------------------------------------------------------------------- /test/aarch64/meson.build: -------------------------------------------------------------------------------- 1 | 2 | triple = 'aarch64-linux-gnu' 3 | cases = [ 4 | {'name': 'exit', 'src': files('exit.S')}, 5 | {'name': 'fork', 'src': files('fork.S')}, 6 | {'name': 'recursion', 'src': files('recursion.S')}, 7 | {'name': 'recursion-callret', 'src': files('recursion.S'), 'instrew_args': ['-callret']}, 8 | ] 9 | -------------------------------------------------------------------------------- /test/x86_64/call-ret-mismatch.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | .text 3 | .global _start 4 | _start: 5 | call foo 6 | 7 | xor edi, edi 8 | mov eax, 231 9 | syscall 10 | ud2 11 | 12 | foo: 13 | push rax 14 | call bar 15 | pop rcx 16 | ret 17 | bar: 18 | add rsp, 0x10 19 | ret 20 | -------------------------------------------------------------------------------- /client/memory.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_MEMORY_H 3 | #define _INSTREW_MEMORY_H 4 | 5 | #include 6 | 7 | int mem_init(void); 8 | 9 | void* mem_alloc_data(size_t size, size_t alignment); 10 | 11 | void* mem_alloc_code(size_t size, size_t alignment); 12 | int mem_write_code(void* dst, const void* src, size_t size); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /client/dispatcher-info.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_DISPATCHER_INFO_H 3 | #define _INSTREW_DISPATCHER_INFO_H 4 | 5 | #include 6 | 7 | struct DispatcherInfo { 8 | void (*loop_func)(uint64_t* cpu_regs); 9 | uintptr_t quick_dispatch_func; 10 | uintptr_t full_dispatch_func; 11 | 12 | uint8_t patch_data_reg; 13 | }; 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /.builds/llvm15-x86_64-debian.yml: -------------------------------------------------------------------------------- 1 | image: debian/sid 2 | arch: amd64 3 | packages: 4 | - meson 5 | - llvm-15-dev 6 | - libssl-dev 7 | - pkg-config 8 | - clang 9 | - lld 10 | sources: 11 | - https://git.sr.ht/~aengelke/instrew 12 | tasks: 13 | - build: | 14 | mkdir build 15 | meson build instrew -Dprefix="$PWD"/install -Ddebug=true -Doptimization=3 16 | ninja -C build 17 | ninja -C build test 18 | ninja -C build install 19 | -------------------------------------------------------------------------------- /.builds/llvm17-x86_64-debian.yml: -------------------------------------------------------------------------------- 1 | image: debian/sid 2 | arch: amd64 3 | packages: 4 | - meson 5 | - llvm-17-dev 6 | - libssl-dev 7 | - pkg-config 8 | - clang 9 | - lld 10 | sources: 11 | - https://git.sr.ht/~aengelke/instrew 12 | tasks: 13 | - build: | 14 | mkdir build 15 | meson build instrew -Dprefix="$PWD"/install -Ddebug=true -Doptimization=3 16 | ninja -C build 17 | ninja -C build test 18 | ninja -C build install 19 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "subprojects/rellume"] 2 | path = subprojects/rellume 3 | url = https://github.com/aengelke/rellume.git 4 | [submodule "subprojects/fadec"] 5 | path = subprojects/fadec 6 | url = https://github.com/aengelke/fadec.git 7 | [submodule "subprojects/frvdec"] 8 | path = subprojects/frvdec 9 | url = https://git.sr.ht/~aengelke/frvdec 10 | [submodule "subprojects/farmdec"] 11 | path = subprojects/farmdec 12 | url = https://github.com/okitec/farmdec.git 13 | -------------------------------------------------------------------------------- /server/optimizer.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_SERVER_OPTIMIZER_H 3 | #define _INSTREW_SERVER_OPTIMIZER_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | class Optimizer { 13 | public: 14 | void Optimize(llvm::Function* fn); 15 | 16 | /// Dump optimizer configuration into the buffer. 17 | void appendConfig(llvm::SmallVectorImpl& buffer) const; 18 | }; 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /test/x86_64/recursion.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | .text 3 | .global _start 4 | _start: 5 | mov edi, 38 6 | call fib 7 | xor edi, edi 8 | cmp rax, 39088169 9 | setne dil 10 | mov eax, 231 11 | syscall 12 | ud2 13 | 14 | fib: 15 | mov rax, rdi 16 | cmp rax, 1 17 | jbe 1f 18 | push rbx 19 | mov rbx, rax 20 | sub rdi, 1 21 | call fib 22 | mov rdi, rbx 23 | sub rdi, 2 24 | mov rbx, rax 25 | call fib 26 | add rax, rbx 27 | pop rbx 28 | 1: ret 29 | -------------------------------------------------------------------------------- /test/x86_64/stosb-call.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | .text 3 | .global _start 4 | _start: 5 | mov rdi, rsp 6 | mov eax, 0xdf 7 | mov ecx, 8 8 | rep stosb 9 | call foo 10 | .Laftercall: 11 | 12 | mov edi, 1 // exit status 13 | mov rsi, 0xdfdfdfdfdfdfdfdf 14 | cmp [rsp], rsi 15 | jne .Lexit 16 | lea rsi, [rip + .Laftercall] 17 | cmp [rsp-8], rsi 18 | jne .Lexit 19 | 20 | xor edi, edi 21 | .Lexit: 22 | mov eax, 231 23 | syscall 24 | ud2 25 | 26 | foo: 27 | ret 28 | -------------------------------------------------------------------------------- /test/aarch64/recursion.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global _start 3 | _start: 4 | mov x0, #38 5 | bl fib 6 | ldr x1, .Lexpected 7 | cmp x0, x1 8 | cset x0, ne 9 | mov x8, #94 10 | svc #0 11 | udf #0 12 | 13 | 14 | fib: 15 | cmp x0, #1 16 | b.ls 1f 17 | stp x19, lr, [sp, #-16]! 18 | sub x19, x0, 2 19 | sub x0, x0, 1 20 | bl fib 21 | mov x1, x19 22 | mov x19, x0 23 | mov x0, x1 24 | bl fib 25 | add x0, x0, x19 26 | ldp x19, lr, [sp], #16 27 | 1: ret 28 | 29 | .section .rodata 30 | .Lexpected: 31 | .8byte 39088169 32 | .previous 33 | -------------------------------------------------------------------------------- /client/elf-loader.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_ELF_LOADER_H 3 | #define _INSTREW_ELF_LOADER_H 4 | 5 | #include 6 | #include 7 | 8 | 9 | #define Elf_Half Elf64_Half 10 | #define Elf_Ehdr Elf64_Ehdr 11 | #define Elf_Phdr Elf64_Phdr 12 | #define Elf_Shdr Elf64_Shdr 13 | #define Elf_Note Elf64_Note 14 | 15 | struct BinaryInfo { 16 | void* exec_entry; 17 | void* elf_entry; 18 | Elf_Half machine; 19 | Elf_Phdr* phdr; 20 | size_t phnum; 21 | size_t phent; 22 | }; 23 | 24 | typedef struct BinaryInfo BinaryInfo; 25 | 26 | int load_elf_binary(const char* filename, BinaryInfo* out_info); 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /server/callconv.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_SERVER_CALLCONV_H 3 | #define _INSTREW_SERVER_CALLCONV_H 4 | 5 | #include 6 | 7 | 8 | enum class CallConv { 9 | CDECL, 10 | #if LL_LLVM_MAJOR < 17 11 | // LLVM 17 droped hhvmcc 12 | HHVM, 13 | RV64_X86_HHVM, 14 | AARCH64_X86_HHVM, 15 | #endif 16 | X86_X86_REGCALL, 17 | RV64_X86_REGCALL, 18 | AARCH64_X86_REGCALL, 19 | X86_AARCH64_X, 20 | AARCH64_AARCH64_X, 21 | }; 22 | 23 | CallConv GetFastCC(int host_arch, int guest_arch); 24 | int GetCallConvClientNumber(CallConv cc); 25 | llvm::Function* ChangeCallConv(llvm::Function* fn, CallConv cc); 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /server/codegenerator.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_SERVER_CODE_GENERATOR_H 3 | #define _INSTREW_SERVER_CODE_GENERATOR_H 4 | 5 | #include 6 | #include 7 | 8 | 9 | struct IWServerConfig; 10 | 11 | class CodeGenerator { 12 | public: 13 | CodeGenerator(const IWServerConfig& server_config, bool pic, 14 | llvm::SmallVectorImpl &o); 15 | ~CodeGenerator(); 16 | void GenerateCode(llvm::Module* mod); 17 | 18 | /// Dump code generator configuration into the buffer. 19 | void appendConfig(llvm::SmallVectorImpl& buffer) const; 20 | 21 | private: 22 | class impl; 23 | std::unique_ptr pimpl; 24 | }; 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /server/cache.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_SERVER_CACHE_H 3 | #define _INSTREW_SERVER_CACHE_H 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace instrew { 10 | 11 | class Cache { 12 | public: 13 | static constexpr size_t HASH_SIZE = 20; 14 | 15 | Cache(); 16 | ~Cache(); 17 | 18 | std::pair Get(const uint8_t* hash); // returns fd (or -1) + size 19 | void Put(const uint8_t* hash, size_t bufsz, const char* buf); 20 | 21 | private: 22 | std::filesystem::path FileName(const uint8_t* hash, std::string suffix = ""); 23 | 24 | bool allow_read; 25 | bool allow_write; 26 | std::filesystem::path path; 27 | }; 28 | 29 | } // namespace instrew 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /test/x86_64/meson.build: -------------------------------------------------------------------------------- 1 | triple = 'x86_64-linux-gnu' 2 | cases = [ 3 | {'name': 'exit', 'src': files('exit.S')}, 4 | {'name': 'call-pop', 'src': files('call-pop.S')}, 5 | {'name': 'call-ret-mismatch', 'src': files('call-ret-mismatch.S')}, 6 | {'name': 'call-ret-mismatch-callret', 'src': files('call-ret-mismatch.S'), 'instrew_args': ['-callret']}, 7 | {'name': 'nowrite', 'src': files('nowrite.S'), 'should_fail': true}, 8 | {'name': 'fork', 'src': files('fork.S')}, 9 | {'name': 'recursion', 'src': files('recursion.S')}, 10 | {'name': 'recursion-callret', 'src': files('recursion.S'), 'instrew_args': ['-callret']}, 11 | {'name': 'stosb-call', 'src': files('stosb-call.S')}, 12 | {'name': 'stosb-call-callret', 'src': files('stosb-call.S'), 'instrew_args': ['-callret']}, 13 | ] 14 | -------------------------------------------------------------------------------- /test/aarch64/fork.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global _start 3 | _start: 4 | mov x0, #17 // flags = SIGCHLD 5 | mov x1, xzr // stack 6 | mov x2, xzr // parent_tid 7 | mov x3, xzr // tls 8 | mov x4, xzr // child_tid 9 | mov x8, #220 // __NR_clone 10 | svc #0 11 | tst x0, x0 12 | b.mi .Lexit 13 | b.eq .Lchild 14 | 15 | mov x9, x0 // save pid to x9 16 | // x0 is pid 17 | sub x1, sp, #8 // wstatus 18 | mov x2, xzr // options 19 | mov x3, xzr // rusage 20 | mov x8, #260 // __NR_wait4 21 | svc #0 22 | tst x0, x0 23 | b.mi .Lexit 24 | cmp x0, x9 25 | mov x0, #1 26 | b.ne .Lexit 27 | 28 | ldr w1, [x1] 29 | mov w2, #0x4200 // W_EXITCODE(0x42, 0) 30 | cmp w1, w2 31 | b.ne .Lexit 32 | mov x0, #0 33 | 34 | .Lexit: 35 | mov x8, #94 // __NR_exit_group 36 | svc #0 37 | udf #0 38 | 39 | .Lchild: 40 | mov x0, #0x42 41 | mov x8, #94 // __NR_exit_group 42 | svc #0 43 | udf #0 44 | -------------------------------------------------------------------------------- /test/x86_64/fork.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | .text 3 | .global _start 4 | _start: 5 | mov edi, 17 // flags = SIGCHLD 6 | xor esi, esi // stack 7 | xor edx, edx // parent_tid 8 | xor r10, r10 // child_tid 9 | xor r8, r8 // tls 10 | mov eax, 56 // __NR_clone 11 | syscall 12 | test rax, rax 13 | js .Lexit 14 | jz .Lchild 15 | 16 | mov rdi, rax // pid 17 | lea rsi, [rsp - 0x8] // wstatus 18 | xor edx, edx // options 19 | xor r10, r10 // rusage 20 | mov eax, 61 // __NR_wait4 21 | syscall 22 | test rax, rax 23 | js .Lexit // wait4 failed? 24 | cmp rax, rdi 25 | mov rax, 1 26 | jne .Lexit // wait4 should have returned pid 27 | 28 | mov esi, [rsi] 29 | cmp esi, 0x4200 // W_EXITCODE(0x42, 0) 30 | jne .Lexit 31 | xor eax, eax 32 | 33 | .Lexit: 34 | mov rdi, rax 35 | mov eax, 231 // __NR_exit_group 36 | syscall 37 | ud2 38 | 39 | .Lchild: 40 | mov edi, 0x42 41 | mov eax, 231 // __NR_exit_group 42 | syscall 43 | ud2 44 | -------------------------------------------------------------------------------- /client/rtld.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_RTLD_H 3 | #define _INSTREW_RTLD_H 4 | 5 | #include 6 | #include 7 | 8 | typedef struct RtldObject RtldObject; 9 | struct Rtld { 10 | int perfmap_fd; 11 | int perfdump_fd; 12 | const struct DispatcherInfo* disp_info; 13 | 14 | RtldObject* objects; 15 | size_t objects_idx; 16 | size_t objects_cap; 17 | 18 | void* plt; 19 | 20 | void* server_funcs[16]; 21 | }; 22 | typedef struct Rtld Rtld; 23 | 24 | struct RtldPatchData { 25 | uint64_t sym_addr; 26 | unsigned rel_type; 27 | unsigned rel_size; 28 | int64_t addend; 29 | uintptr_t patch_addr; 30 | }; 31 | 32 | int rtld_init(Rtld* r, const struct DispatcherInfo* disp_info); 33 | /// Init perf support, modes: 0=none, 1=map, 2=map+jitdump 34 | int rtld_perf_init(Rtld* r, int mode); 35 | int rtld_resolve(Rtld* r, uintptr_t addr, void** out_entry); 36 | 37 | int rtld_add_object(Rtld* r, void* obj_base, size_t obj_size, uint64_t skew); 38 | 39 | void rtld_patch(struct RtldPatchData* patch_data, void* sym); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /server/meson.build: -------------------------------------------------------------------------------- 1 | sources = files( 2 | 'cache.cc', 3 | 'callconv.cc', 4 | 'codegenerator.cc', 5 | 'config.cc', 6 | 'connection.cc', 7 | 'optimizer.cc', 8 | 'rewriteserver.cc', 9 | ) 10 | 11 | config_data = configuration_data() 12 | config_data.set_quoted('INSTREW_TOOL_PATH', get_option('prefix')/get_option('libdir')/'instrew') 13 | configure_file(configuration: config_data, output: 'instrew-server-config.h') 14 | 15 | python3 = find_program('python3') 16 | pytobytes = 'import sys;print(repr(list(open(sys.argv[1],"rb").read()))[1:-1])' 17 | client_bytes = custom_target('hex-client', output: 'client.inc', input: client, 18 | capture: true, 19 | command: [python3, '-c', pytobytes, '@INPUT@']) 20 | 21 | version = vcs_tag(input: 'version.cc.in', output: 'version.cc') 22 | 23 | instrew = executable('instrew', sources, version, client_bytes, 24 | include_directories: include_directories('.', '../shared'), 25 | dependencies: [librellume, libllvm, libcrypto], 26 | link_args: ['-ldl'], 27 | install: true) 28 | -------------------------------------------------------------------------------- /shared/instrew-protocol.inc: -------------------------------------------------------------------------------- 1 | 2 | #if defined(INSTREW_MESSAGE_ID) 3 | INSTREW_MESSAGE_ID(0, UNKNOWN) 4 | INSTREW_MESSAGE_ID(1, C_EXIT) 5 | INSTREW_MESSAGE_ID(2, C_TRANSLATE) 6 | INSTREW_MESSAGE_ID(3, S_MEMREQ) 7 | INSTREW_MESSAGE_ID(4, C_MEMBUF) 8 | INSTREW_MESSAGE_ID(5, S_OBJECT) 9 | INSTREW_MESSAGE_ID(6, C_CONFIG1) 10 | INSTREW_MESSAGE_ID(7, C_CONFIG2) 11 | INSTREW_MESSAGE_ID(8, S_CONFIG_STATUS) 12 | INSTREW_MESSAGE_ID(9, C_INIT) 13 | INSTREW_MESSAGE_ID(10, S_INIT) 14 | INSTREW_MESSAGE_ID(11, C_FORK) 15 | INSTREW_MESSAGE_ID(12, S_FD) // encloses one fd and/or an error status 16 | #elif defined(INSTREW_SERVER_CONF) 17 | // INSTREW_SERVER_CONF_*(id, name, default) 18 | INSTREW_SERVER_CONF_INT32(0, guest_arch, 0) 19 | INSTREW_SERVER_CONF_INT32(0, host_arch, 0) 20 | INSTREW_SERVER_CONF_INT32(0, host_cpu_features, 0) 21 | INSTREW_SERVER_CONF_INT32(0, server_mode, 0) 22 | INSTREW_SERVER_CONF_INT32(0, stack_alignment, 0) 23 | #elif defined(INSTREW_CLIENT_CONF) 24 | // INSTREW_CLIENT_CONF_*(id, name); defaults to zero. 25 | INSTREW_CLIENT_CONF_INT32(1, callconv) 26 | INSTREW_CLIENT_CONF_INT32(1, profile) 27 | INSTREW_CLIENT_CONF_INT32(1, perf) 28 | INSTREW_CLIENT_CONF_INT32(1, print_trace) 29 | INSTREW_CLIENT_CONF_INT32(1, print_regs) 30 | #endif 31 | -------------------------------------------------------------------------------- /client/plt.inc: -------------------------------------------------------------------------------- 1 | PLT_ENTRY("syscall", emulate_syscall) // emulate.c 2 | PLT_ENTRY("syscall_rv64", emulate_rv64_syscall) // emulate.c 3 | PLT_ENTRY("syscall_aarch64", emulate_aarch64_syscall) // emulate.c 4 | PLT_ENTRY("cpuid", emulate_cpuid) // emulate.c 5 | PLT_ENTRY("__divti3", __divti3) // libgcc 6 | PLT_ENTRY("__udivti3", __udivti3) // libgcc 7 | PLT_ENTRY("__modti3", __modti3) // libgcc 8 | PLT_ENTRY("__umodti3", __umodti3) // libgcc 9 | PLT_ENTRY("floorf", floorf) // math.c 10 | PLT_ENTRY("floor", floor) // math.c 11 | PLT_ENTRY("ceilf", ceilf) // math.c 12 | PLT_ENTRY("ceil", ceil) // math.c 13 | PLT_ENTRY("roundf", roundf) // math.c 14 | PLT_ENTRY("round", round) // math.c 15 | PLT_ENTRY("truncf", truncf) // math.c 16 | PLT_ENTRY("trunc", trunc) // math.c 17 | PLT_ENTRY("fmaf", fmaf) // math.c 18 | PLT_ENTRY("fma", fma) // math.c 19 | PLT_ENTRY("instrew_tail_cdecl", dispatch_cdecl) // dispatch.c 20 | PLT_ENTRY("instrew_call_cdecl", dispatch_cdecl) // dispatch.c 21 | #if defined(__x86_64__) 22 | PLT_ENTRY("instrew_tail_hhvm", dispatch_hhvm_tail) // dispatch.c 23 | PLT_ENTRY("instrew_call_hhvm", dispatch_hhvm_tail) // dispatch.c 24 | #endif // defined(__x86_64__) 25 | PLT_ENTRY("memset", memset) // minilibc.c 26 | PLT_ENTRY("dprintf", dprintf) // minilibc.c 27 | -------------------------------------------------------------------------------- /client/state.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_STATE_H 3 | #define _INSTREW_STATE_H 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | 13 | struct State { 14 | Rtld rtld; 15 | Translator translator; 16 | 17 | uint64_t rew_time; 18 | 19 | struct sigaction sigact[_NSIG]; 20 | 21 | struct TranslatorServerConfig tsc; 22 | struct TranslatorConfig tc; 23 | }; 24 | 25 | #define QUICK_TLB_BITS 10 26 | 27 | struct CpuState { 28 | struct CpuState* self; 29 | struct State* state; 30 | uintptr_t _unused[6]; 31 | 32 | _Alignas(64) uint8_t regdata[0x400]; 33 | 34 | _Alignas(64) uint64_t quick_tlb[1 << QUICK_TLB_BITS][2]; 35 | 36 | _Atomic volatile int sigpending; 37 | sigset_t sigmask; 38 | stack_t sigaltstack; 39 | struct siginfo siginfo; 40 | }; 41 | 42 | #define CPU_STATE_REGDATA_OFFSET 0x40 43 | _Static_assert(offsetof(struct CpuState, regdata) == CPU_STATE_REGDATA_OFFSET, 44 | "CPU_STATE_REGDATA_OFFSET mismatch"); 45 | 46 | #define CPU_STATE_QTLB_OFFSET 0x440 47 | _Static_assert(offsetof(struct CpuState, quick_tlb) == CPU_STATE_QTLB_OFFSET, 48 | "CPU_STATE_QTLB_OFFSET mismatch"); 49 | 50 | #define CPU_STATE_FROM_REGS(regdata) ((struct CpuState*) \ 51 | ((char*) regdata - CPU_STATE_REGDATA_OFFSET)) 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /server/connection.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_SERVER_CONNECTION_H 3 | #define _INSTREW_SERVER_CONNECTION_H 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | struct IWServerConfig { 11 | #define INSTREW_SERVER_CONF 12 | #define INSTREW_SERVER_CONF_INT32(id, name, default) \ 13 | int32_t tsc_ ## name; 14 | #include "instrew-protocol.inc" 15 | #undef INSTREW_SERVER_CONF 16 | #undef INSTREW_SERVER_CONF_INT32 17 | } __attribute__((packed)); 18 | 19 | struct IWClientConfig { 20 | #define INSTREW_CLIENT_CONF 21 | #define INSTREW_CLIENT_CONF_INT32(id, name) \ 22 | int32_t tc_ ## name = 0; 23 | #include "instrew-protocol.inc" 24 | #undef INSTREW_CLIENT_CONF 25 | #undef INSTREW_CLIENT_CONF_INT32 26 | } __attribute__((packed)); 27 | 28 | typedef struct IWConnection IWConnection; 29 | 30 | const struct IWServerConfig* iw_get_sc(IWConnection* iwc); 31 | struct IWClientConfig* iw_get_cc(IWConnection* iwc); 32 | size_t iw_readmem(IWConnection* iwc, uintptr_t addr, size_t len, uint8_t* buf); 33 | bool iw_cache_probe(IWConnection* iwc, uintptr_t addr, const uint8_t* hash); 34 | void iw_sendobj(IWConnection* iwc, uintptr_t addr, const void* data, size_t size, const uint8_t* hash); 35 | 36 | typedef struct IWState IWState; 37 | 38 | struct IWFunctions { 39 | struct IWState* (* init)(IWConnection* iwc); 40 | void (* translate)(IWState* state, uintptr_t addr); 41 | void (* finalize)(IWState* state); 42 | }; 43 | 44 | int iw_run_server(const struct IWFunctions* fns, int argc, char** argv); 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /client/meson.build: -------------------------------------------------------------------------------- 1 | sources = [ 2 | 'dispatch.c', 3 | 'elf-loader.c', 4 | 'emulate.c', 5 | 'main.c', 6 | 'math.c', 7 | 'memory.c', 8 | 'minilibc.c', 9 | 'rtld.c', 10 | 'translator.c', 11 | ] 12 | 13 | if host_machine.cpu_family() == 'aarch64' 14 | sources += [ 15 | 'memset-aarch64.S' 16 | ] 17 | endif 18 | 19 | client_c_args = ['-D_GNU_SOURCE', '-nostdlib', '-fno-builtin', 20 | '-fno-stack-protector', '-fomit-frame-pointer', '-fPIC'] 21 | client_link_args = ['-nostdlib', '-nostartfiles', '-lgcc'] 22 | 23 | cc = meson.get_compiler('c') 24 | if cc.has_argument('-static-pie') 25 | client_link_args += ['-static-pie'] 26 | else 27 | client_link_args += ['-Wl,-static', '-Wl,-pie', '-Wl,--no-dynamic-linker', '-Wl,-z,text'] 28 | endif 29 | 30 | if host_machine.cpu_family() == 'x86_64' 31 | if cc.has_argument('-mpreferred-stack-boundary=3') # GCC 32 | client_c_args += ['-mpreferred-stack-boundary=3'] 33 | elif cc.has_argument('-mstack-alignment=3') # Clang 34 | client_c_args += ['-mstack-alignment=3'] 35 | else 36 | error('could not lower stack aligment') 37 | endif 38 | endif 39 | 40 | client = executable('instrew-client', sources, 41 | include_directories: include_directories('.', '../shared'), 42 | c_args: client_c_args, 43 | link_args: client_link_args, 44 | install: true, 45 | override_options: ['b_sanitize=none']) 46 | 47 | test('mathlib', executable('test_mathlib', 'math.c', c_args: ['-DTEST', '-fno-builtin']), protocol: 'tap') 48 | -------------------------------------------------------------------------------- /test/meson.build: -------------------------------------------------------------------------------- 1 | 2 | clang = find_program('clang', native: true, required: false, disabler: true) 3 | if not clang.found() 4 | warning('unable to find clang; disabling tests') 5 | endif 6 | 7 | # TODO: remove this once non-native page sizes are supported 8 | pagesize_cmd = run_command('getconf', 'PAGESIZE', check: true) 9 | pagesize_flag = [] 10 | if pagesize_cmd.returncode() != 0 11 | warning('unable to get host page size for test binaries') 12 | else 13 | pagesize = pagesize_cmd.stdout().strip().to_int() 14 | if pagesize > 4096 15 | message('compiling tests for host PAGESIZE=@0@'.format(pagesize)) 16 | pagesize_flag = ['-Wl,-z,max-page-size=@0@'.format(pagesize)] 17 | endif 18 | endif 19 | 20 | foreach arch : ['aarch64', 'riscv64', 'x86_64'] 21 | subdir(arch) 22 | 23 | testcc = [clang, '--target=@0@'.format(triple), '-nostdlib', '-static', '-fuse-ld=lld'] + pagesize_flag 24 | testrun = run_command(testcc + ['-o', '/dev/null', files('empty.s')], check: false) 25 | if testrun.returncode() != 0 26 | warning('defunctional @0@ Clang/LLD; disabling tests'.format(arch)) 27 | continue 28 | endif 29 | 30 | foreach case : cases 31 | name = '@0@-@1@'.format(arch, case.get('name')) 32 | exec = custom_target(name, 33 | input: case.get('src'), 34 | output: name, 35 | depfile: name + '.d', 36 | command: testcc + ['-MD', '-MF', '@DEPFILE@', '-o', '@OUTPUT@', '@INPUT@'] + case.get('compile_args', [])) 37 | test(name, instrew, suite: [arch], 38 | args: case.get('instrew_args', []) + [exec] + case.get('args', []), 39 | should_fail: case.get('should_fail', false)) 40 | endforeach 41 | endforeach 42 | -------------------------------------------------------------------------------- /client/translator.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INSTREW_TRANSLATOR_H 3 | #define _INSTREW_TRANSLATOR_H 4 | 5 | struct TranslatorServerConfig { 6 | #define INSTREW_SERVER_CONF 7 | #define INSTREW_SERVER_CONF_BOOL(id, name, default) \ 8 | bool tsc_ ## name; 9 | #define INSTREW_SERVER_CONF_INT32(id, name, default) \ 10 | int32_t tsc_ ## name; 11 | #include "instrew-protocol.inc" 12 | #undef INSTREW_SERVER_CONF 13 | #undef INSTREW_SERVER_CONF_BOOL 14 | #undef INSTREW_SERVER_CONF_INT32 15 | }; 16 | 17 | typedef struct TranslatorMsgHdr TranslatorMsgHdr; 18 | struct TranslatorMsgHdr { 19 | uint32_t id; 20 | int32_t sz; 21 | }; 22 | 23 | struct Translator { 24 | int socket; 25 | 26 | size_t written_bytes; 27 | TranslatorMsgHdr last_hdr; 28 | 29 | void* recvbuf; 30 | size_t recvbuf_sz; 31 | }; 32 | 33 | typedef struct Translator Translator; 34 | 35 | int translator_init(Translator* t, const char* server_config, 36 | const struct TranslatorServerConfig* tsc); 37 | int translator_fini(Translator* t); 38 | int translator_get_object(Translator* t, void** out_obj, size_t* out_obj_size); 39 | int translator_get(Translator* t, uintptr_t addr, void** out_obj, 40 | size_t* out_obj_size); 41 | 42 | struct TranslatorConfig { 43 | #define INSTREW_CLIENT_CONF 44 | #define INSTREW_CLIENT_CONF_INT32(id, name) \ 45 | int32_t tc_ ## name; 46 | #include "instrew-protocol.inc" 47 | #undef INSTREW_CLIENT_CONF 48 | #undef INSTREW_CLIENT_CONF_INT32 49 | } __attribute__((packed)); 50 | 51 | int translator_config_fetch(Translator* t, struct TranslatorConfig* cfg); 52 | 53 | // Fork server process and return new socket fd. 54 | int translator_fork_prepare(Translator* t); 55 | // Client fork succeeded, use forked translator from now on. 56 | int translator_fork_finalize(Translator* t, int fork_fd); 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /meson.build: -------------------------------------------------------------------------------- 1 | project('instrew', ['c', 'cpp'], 2 | default_options: [ 3 | 'buildtype=debugoptimized', 4 | 'default_library=static', 5 | 'warning_level=3', 6 | 'c_std=c11', 7 | 'cpp_std=c++17', 8 | 'b_ndebug=if-release', 9 | ], 10 | meson_version: '>=0.52') 11 | 12 | if host_machine.endian() != 'little' 13 | error('cannot compile for non-little-endian machine') 14 | endif 15 | 16 | add_project_arguments(['-Wmissing-field-initializers', 17 | '-Wunused-parameter', 18 | '-Wshadow', 19 | '-Wpointer-arith', 20 | '-Wwrite-strings', 21 | '-Winline', 22 | '-Wformat-nonliteral', 23 | '-Wformat-security', 24 | '-Wswitch-default', 25 | '-Wundef', 26 | '-Wno-cast-align', # we frequently do casts 27 | '-Werror=incompatible-pointer-types', 28 | '-Werror=implicit-function-declaration'], 29 | language: 'c') 30 | 31 | add_global_arguments(['-fno-rtti', '-fno-exceptions'], language: 'cpp') 32 | 33 | llvm_version = ['>=16', '<19'] 34 | # First, attempt to use the shared library. 35 | libllvm = dependency('llvm', version: llvm_version, static: false, 36 | method: 'config-tool', include_type: 'system', 37 | required: false) 38 | if not libllvm.found() 39 | # Try static libraries. 40 | libllvm = dependency('llvm', version: llvm_version, static: true, 41 | method: 'config-tool', include_type: 'system', 42 | modules: ['x86', 'aarch64', 'riscv', 'analysis', 'passes']) 43 | endif 44 | add_project_arguments(['-DLL_LLVM_MAJOR='+libllvm.version().split('.')[0]], language: 'cpp') 45 | 46 | libcrypto = dependency('libcrypto') 47 | 48 | rellume = subproject('rellume') 49 | librellume = rellume.get_variable('librellume') 50 | 51 | subdir('client') 52 | subdir('server') 53 | subdir('test') 54 | -------------------------------------------------------------------------------- /client/memset-aarch64.S: -------------------------------------------------------------------------------- 1 | // https://github.com/ARM-software/optimized-routines/blob/master/string/aarch64/memset.S 2 | // some macros were inlined 3 | 4 | /* 5 | * memset - fill memory with a constant byte 6 | * 7 | * Copyright (c) 2012-2021, Arm Limited. 8 | * SPDX-License-Identifier: MIT 9 | */ 10 | 11 | /* Assumptions: 12 | * 13 | * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. 14 | * 15 | */ 16 | 17 | #define ENTRY(name) \ 18 | .global name; \ 19 | .type name,%function; \ 20 | .align 6; \ 21 | name: \ 22 | .cfi_startproc; 23 | 24 | #define END(name) \ 25 | .cfi_endproc; \ 26 | .size name, .-name; 27 | 28 | #define L(l) .L ## l 29 | 30 | 31 | #define dstin x0 32 | #define val x1 33 | #define valw w1 34 | #define count x2 35 | #define dst x3 36 | #define dstend x4 37 | #define zva_val x5 38 | 39 | ENTRY (memset) 40 | dup v0.16B, valw 41 | add dstend, dstin, count 42 | 43 | cmp count, 96 44 | b.hi L(set_long) 45 | cmp count, 16 46 | b.hs L(set_medium) 47 | mov val, v0.D[0] 48 | 49 | /* Set 0..15 bytes. */ 50 | tbz count, 3, 1f 51 | str val, [dstin] 52 | str val, [dstend, -8] 53 | ret 54 | .p2align 4 55 | 1: tbz count, 2, 2f 56 | str valw, [dstin] 57 | str valw, [dstend, -4] 58 | ret 59 | 2: cbz count, 3f 60 | strb valw, [dstin] 61 | tbz count, 1, 3f 62 | strh valw, [dstend, -2] 63 | 3: ret 64 | 65 | /* Set 17..96 bytes. */ 66 | L(set_medium): 67 | str q0, [dstin] 68 | tbnz count, 6, L(set96) 69 | str q0, [dstend, -16] 70 | tbz count, 5, 1f 71 | str q0, [dstin, 16] 72 | str q0, [dstend, -32] 73 | 1: ret 74 | 75 | .p2align 4 76 | /* Set 64..96 bytes. Write 64 bytes from the start and 77 | 32 bytes from the end. */ 78 | L(set96): 79 | str q0, [dstin, 16] 80 | stp q0, q0, [dstin, 32] 81 | stp q0, q0, [dstend, -32] 82 | ret 83 | 84 | .p2align 4 85 | L(set_long): 86 | and valw, valw, 255 87 | bic dst, dstin, 15 88 | str q0, [dstin] 89 | cmp count, 160 90 | ccmp valw, 0, 0, hs 91 | b.ne L(no_zva) 92 | 93 | #ifndef SKIP_ZVA_CHECK 94 | mrs zva_val, dczid_el0 95 | and zva_val, zva_val, 31 96 | cmp zva_val, 4 /* ZVA size is 64 bytes. */ 97 | b.ne L(no_zva) 98 | #endif 99 | str q0, [dst, 16] 100 | stp q0, q0, [dst, 32] 101 | bic dst, dst, 63 102 | sub count, dstend, dst /* Count is now 64 too large. */ 103 | sub count, count, 128 /* Adjust count and bias for loop. */ 104 | 105 | .p2align 4 106 | L(zva_loop): 107 | add dst, dst, 64 108 | dc zva, dst 109 | subs count, count, 64 110 | b.hi L(zva_loop) 111 | stp q0, q0, [dstend, -64] 112 | stp q0, q0, [dstend, -32] 113 | ret 114 | 115 | L(no_zva): 116 | sub count, dstend, dst /* Count is 16 too large. */ 117 | sub dst, dst, 16 /* Dst is biased by -32. */ 118 | sub count, count, 64 + 16 /* Adjust count and bias for loop. */ 119 | L(no_zva_loop): 120 | stp q0, q0, [dst, 32] 121 | stp q0, q0, [dst, 64]! 122 | subs count, count, 64 123 | b.hi L(no_zva_loop) 124 | stp q0, q0, [dstend, -64] 125 | stp q0, q0, [dstend, -32] 126 | ret 127 | 128 | END (memset) 129 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Instrew — LLVM-based Dynamic Binary Translation 2 | 3 | [![builds.sr.ht status](https://builds.sr.ht/~aengelke/instrew/commits/master.svg)](https://builds.sr.ht/~aengelke/instrew/commits/master?) 4 | 5 | Instrew is a performance-targeted transparent dynamic binary translator(/instrumenter) based on LLVM. Currently supported source/guest architectures are x86-64, AArch64, and RISC-V64 (rv64imafdc); supported host architectures are x86-64 and AArch64. The original code is lifted to LLVM-IR using [Rellume](https://github.com/aengelke/rellume), where it can be modified and from which new machine code is generated using LLVM's JIT compiler. 6 | 7 | ### Using Instrew 8 | 9 | After cloning and checking out submodules, compile Instrew as follows: 10 | 11 | ``` 12 | mkdir build 13 | meson build -Dbuildtype=release 14 | ninja -C build 15 | # optionally, run tests 16 | ninja -C build test 17 | ``` 18 | 19 | Afterwards, you can run an application with Instrew. Statically linked applications often have a significantly lower translation time. New glibc versions often tend to use recent syscalls that are not yet supported, therefore warnings about missing system calls can sometimes be ignored. 20 | 21 | ``` 22 | ./build/server/instrew /bin/ls -l 23 | ``` 24 | 25 | You can also use some options to customize the translation: 26 | 27 | - `-profile`: print information about the time used for translation. 28 | - `-callret`: enable call–return optimization. Often gives higher run-time performance at higher translation-time. 29 | - `-targetopt=n`: set LLVM optimization level, 0-3. Default is 3, use 0 for FastISel. 30 | - `-fastcc=0`: use C calling convention instead of architecture-specific optimized calling convention; primarily useful for debugging. 31 | - `-perf=n`: enable perf support. 1=generate memory map, 2=generate JITDUMP 32 | - `-dumpir={lift,cc,opt,codegen}`: print IR after the specified stage. Generates lots of output. 33 | - `-dumpobj`: dump compiled code into object files in the current working directory. 34 | - `-help`/`-help-hidden` shows more options. 35 | 36 | Example: 37 | 38 | ``` 39 | ./build/server/instrew -profile -targetopt=0 /bin/ls -l 40 | ``` 41 | 42 | ### Architecture 43 | 44 | Instrew implements a two-process client/server architecture: the light-weight client contains the guest address space as well as the code cache and controls execution, querying rewritten objects as necessary from the server. The server performs lifting (requesting instruction bytes from the client when required), instrumentation, and code generation and sends back an ELF object file. When receiving a new object file, the client resolves missing symbols and applies relocations. 45 | 46 | ### Publications 47 | 48 | - Alexis Engelke. Optimizing Performance Using Dynamic Code Generation. Dissertation. Technical University of Munich, Munich, 2021. ([Thesis](https://mediatum.ub.tum.de/doc/1614897/1614897.pdf)) 49 | - Alexis Engelke, Dominik Okwieka, and Martin Schulz. Efficient LLVM-Based Dynamic Binary Translation. In 17th ACM SIGPLAN/SIGOPS International Conference on Virtual Execution Environments (VEE ’21), April 16, 2021. [Paper](https://home.in.tum.de/~engelke/pubs/2104-vee.pdf) 50 | - Alexis Engelke and Martin Schulz. Instrew: Leveraging LLVM for High Performance Dynamic Binary Instrumentation. In 16th ACM SIGPLAN/SIGOPS International Conference on Virtual Execution Environments (VEE ’20), March 17, 2020, Lausanne, Switzerland. [Paper](https://home.in.tum.de/~engelke/pubs/2003-vee.pdf) -- Please cite this paper when referring to Instrew in general. 51 | 52 | ### License 53 | Instrew is licensed under LGPLv2.1+. 54 | -------------------------------------------------------------------------------- /server/optimizer.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "optimizer.h" 3 | 4 | #include "config.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | // #include 20 | #include 21 | // #include 22 | #include 23 | #include 24 | // #include 25 | #include 26 | #include 27 | #include 28 | 29 | 30 | void Optimizer::Optimize(llvm::Function* fn) { 31 | llvm::PassBuilder pb; 32 | llvm::FunctionPassManager fpm{}; 33 | 34 | llvm::LoopAnalysisManager lam{}; 35 | llvm::FunctionAnalysisManager fam{}; 36 | llvm::CGSCCAnalysisManager cgam{}; 37 | llvm::ModuleAnalysisManager mam{}; 38 | llvm::PassInstrumentationCallbacks pic{}; 39 | llvm::PrintIRInstrumentation pii{}; 40 | 41 | pii.registerCallbacks(pic); 42 | fam.registerPass([&] { return llvm::PassInstrumentationAnalysis(&pic); }); 43 | 44 | // Register the AA manager first so that our version is the one used. 45 | fam.registerPass([&] { return pb.buildDefaultAAPipeline(); }); 46 | // Register analysis passes... 47 | pb.registerModuleAnalyses(mam); 48 | pb.registerCGSCCAnalyses(cgam); 49 | pb.registerFunctionAnalyses(fam); 50 | pb.registerLoopAnalyses(lam); 51 | pb.crossRegisterProxies(lam, fam, cgam, mam); 52 | 53 | // fpm = pb.buildFunctionSimplificationPipeline(llvm::PassBuilder::O3, llvm::PassBuilder::ThinLTOPhase::None, false); 54 | 55 | // fpm.addPass(llvm::ADCEPass()); 56 | fpm.addPass(llvm::DCEPass()); 57 | fpm.addPass(llvm::EarlyCSEPass(/*MemorySSA=*/false)); 58 | // fpm.addPass(llvm::NewGVNPass()); 59 | // fpm.addPass(llvm::DSEPass()); 60 | 61 | // This is tricky. For LLVM <=9, the parameter indicates "expensive 62 | // combines" -- which is what we want to be false. For LLVM 11+, however, 63 | // the parameter suddenly means NumIterations, and if we pass "false", 64 | // InstCombine does zero iterations -- which is not what we want. So we go 65 | // with the default option, which brings useless "expensive combines" (they 66 | // were, thus, removed in later LLVM versions), but makes LLVM 11 work. 67 | fpm.addPass(llvm::InstCombinePass()); 68 | // fpm.addPass(llvm::CorrelatedValuePropagationPass()); 69 | // if (instrew_cfg.extrainstcombine) 70 | // fpm.addPass(llvm::SimplifyCFGPass()); 71 | // fpm.addPass(llvm::AggressiveInstCombinePass()); 72 | // fpm.addPass(llvm::ReassociatePass()); 73 | // fpm.addPass(llvm::MergedLoadStoreMotionPass()); 74 | fpm.addPass(llvm::MemCpyOptPass()); 75 | // if (instrew_cfg.extrainstcombine) 76 | // fpm.addPass(llvm::InstCombinePass()); 77 | // fpm.addPass(llvm::SCCPPass()); 78 | // fpm.addPass(llvm::AAEvaluator()); 79 | fpm.run(*fn, fam); 80 | } 81 | 82 | void Optimizer::appendConfig(llvm::SmallVectorImpl& buffer) const { 83 | struct { 84 | uint32_t version = 1; 85 | } config; 86 | 87 | std::size_t start = buffer.size(); 88 | buffer.resize_for_overwrite(buffer.size() + sizeof(config)); 89 | std::memcpy(&buffer[start], &config, sizeof(config)); 90 | } 91 | -------------------------------------------------------------------------------- /client/memory.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | 11 | #define MEM_BASE ((void*) 0x0000400000000000ull) 12 | #define MEM_CODE_SIZE 0x40000000 13 | #define MEM_DATA_SIZE 0x01000000 14 | 15 | typedef struct Arena Arena; 16 | struct Arena { 17 | char* start; 18 | char* end; 19 | char* brk; 20 | char* brkp; 21 | }; 22 | 23 | static int 24 | arena_init(Arena* arena, void* base, size_t size) { 25 | void* mem = mmap(base, size, PROT_NONE, 26 | MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); 27 | if (BAD_ADDR(mem)) 28 | return (int) (uintptr_t) mem; 29 | 30 | arena->start = mem; 31 | arena->end = (char*) mem + size; 32 | arena->brk = mem; 33 | arena->brkp = mem; 34 | 35 | return 0; 36 | } 37 | 38 | static void* 39 | arena_alloc(Arena* arena, size_t size, size_t alignment, bool exec) { 40 | if (alignment < 0x40) 41 | alignment = 0x40; 42 | if (alignment & (alignment - 1)) 43 | return (void*) (uintptr_t) -EINVAL; 44 | char* brk_al = (char*) ALIGN_UP((uintptr_t) arena->brk, alignment); 45 | if (brk_al + size <= arena->brkp) { // easy case. 46 | arena->brk = brk_al + size; 47 | return brk_al; 48 | } 49 | size_t newpgsz = ALIGN_UP(size - (arena->brkp - brk_al), getpagesize()); 50 | if (arena->brk + newpgsz > arena->end) 51 | return (void*) (uintptr_t) -ENOMEM; 52 | int prot = PROT_READ|PROT_WRITE | (exec ? PROT_EXEC : 0); 53 | int ret = mprotect(arena->brkp, newpgsz, prot); 54 | if (ret < 0) 55 | return (void*) (uintptr_t) ret; 56 | arena->brkp += newpgsz; 57 | arena->brk = brk_al + size; 58 | return brk_al; 59 | } 60 | 61 | Arena main_arena_code; 62 | Arena main_arena_data; 63 | 64 | int 65 | mem_init(void) { 66 | int ret = arena_init(&main_arena_data, MEM_BASE, MEM_DATA_SIZE); 67 | if (ret) 68 | return ret; 69 | void* code_arena_base = (void*) ((uintptr_t) MEM_BASE + MEM_DATA_SIZE); 70 | ret = arena_init(&main_arena_code, code_arena_base, MEM_CODE_SIZE); 71 | if (ret) 72 | return ret; 73 | return 0; 74 | } 75 | 76 | void* 77 | mem_alloc_data(size_t size, size_t alignment) { 78 | return arena_alloc(&main_arena_data, size, alignment, /*exec=*/false); 79 | } 80 | 81 | void* 82 | mem_alloc_code(size_t size, size_t alignment) { 83 | return arena_alloc(&main_arena_code, size, alignment, /*exec=*/true); 84 | } 85 | 86 | int 87 | mem_write_code(void* dst, const void* src, size_t size) { 88 | // Note: if W^X is enforced, the pages need to be mapped somewhere else for 89 | // writing (e.g., using memfd). 90 | memcpy(dst, src, size); 91 | 92 | // Flush ICache, except for x86-64. 93 | #if defined(__x86_64__) 94 | // Do nothing; x86-64 flushes ICache automatically. 95 | #elif defined(__aarch64__) 96 | uintptr_t dstu = (uintptr_t) dst; 97 | // Procedure from AArch64 Manual, B2.4.4 98 | uint64_t ctr_el0 = 0; 99 | __asm__("mrs %0, ctr_el0" : "=r"(ctr_el0)); 100 | // Encoding of cache line sizes is log2(#words), one word is 4 bytes. 101 | size_t dc_line_sz = 4 << ((ctr_el0 >> 16) & 0xf); // DCache min line size 102 | size_t ic_line_sz = 4 << ((ctr_el0 >> 0) & 0xf); // ICache min line size 103 | 104 | if (!(ctr_el0 & (1 << 28))) { // IDC == 0 => DC invalidation required 105 | for (uintptr_t p = dstu & ~dc_line_sz; p < dstu + size; p += dc_line_sz) 106 | __asm__ volatile("dc cvau, %0" : : "r"(p)); 107 | __asm__ volatile("dsb ish"); 108 | } 109 | if (!(ctr_el0 & (1 << 29))) { // DIC == 0 => IC invalidation required 110 | for (uintptr_t p = dstu & ~ic_line_sz; p < dstu + size; p += ic_line_sz) 111 | __asm__ volatile("ic ivau, %0" : : "r"(p)); 112 | __asm__ volatile("dsb ish"); 113 | } 114 | __asm__ volatile("isb"); 115 | #else 116 | #error "Implement ICache flush for unknown target" 117 | #endif 118 | return 0; 119 | } 120 | -------------------------------------------------------------------------------- /client/common.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef COMMON_H 3 | #define COMMON_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #ifndef __has_attribute 19 | #define __has_attribute(x) 0 20 | #endif 21 | 22 | #if __has_attribute(externally_visible) 23 | #define GNU_FORCE_EXTERN __attribute__((externally_visible)) 24 | #else 25 | #define GNU_FORCE_EXTERN 26 | #endif 27 | 28 | typedef __kernel_ssize_t ssize_t; 29 | typedef __kernel_off_t off_t; 30 | typedef __kernel_pid_t pid_t; 31 | 32 | struct msghdr { 33 | void* msg_name; 34 | int msg_namelen; 35 | struct iovec* msg_iov; 36 | size_t msg_iovlen; 37 | void* msg_control; 38 | size_t msg_controllen; 39 | unsigned msg_flags; 40 | }; 41 | 42 | extern char **environ; 43 | 44 | long syscall(long, long, long, long, long, long, long); 45 | 46 | int set_thread_area(void* tp); 47 | void* get_thread_area(void); 48 | 49 | __attribute__((noreturn)) void _exit(int status); 50 | int getpid(void); 51 | int gettid(void); 52 | 53 | // time.h 54 | int clock_gettime(int clk_id, struct timespec* tp); 55 | int nanosleep(const struct timespec* req, struct timespec* rem); 56 | 57 | int open(const char* pathname, int flags, int mode); 58 | int openat(int dirfd, const char* pathname, int flags, int mode); 59 | off_t lseek(int fd, off_t offset, int whence); 60 | ssize_t read(int fd, void* buf, size_t count); 61 | ssize_t write(int fd, const void* buf, size_t count); 62 | int close(int fd); 63 | 64 | #define SCM_RIGHTS 0x1 65 | #define MSG_CMSG_CLOEXEC 0x40000000 66 | ssize_t recvmsg(int fd, struct msghdr* msg, int flags); 67 | 68 | ssize_t read_full(int fd, void* buf, size_t nbytes); 69 | ssize_t write_full(int fd, const void* buf, size_t nbytes); 70 | 71 | // sys/auxv.h 72 | unsigned long int getauxval(unsigned long int __type); 73 | 74 | // sys/mman.h 75 | void* mmap(void* addr, size_t length, int prot, int flags, int fd, 76 | off_t offset); 77 | int munmap(void* addr, size_t length); 78 | int mprotect(void* addr, size_t len, int prot); 79 | 80 | // stdio.h 81 | int vsnprintf(char* str, size_t size, const char* restrict format, va_list args); 82 | int snprintf(char* str, size_t size, const char* restrict format, ...); 83 | int vdprintf(int fd, const char* restrict format, va_list args); 84 | int dprintf(int fd, const char* restrict format, ...); 85 | int printf(const char* restrict format, ...); 86 | int puts(const char* s); 87 | 88 | // strings.h 89 | size_t strlen(const char* s); 90 | int strcmp(const char* s1, const char* s2); 91 | int strncmp(const char* s1, const char* s2, size_t n); 92 | char* strchr(const char* s, int c); 93 | void* memset(void* s, int c, size_t n); 94 | int memcmp(const void* s1, const void* s2, size_t n); 95 | void* memcpy(void* dest, const void* src, size_t n); 96 | 97 | // signal.h 98 | #ifndef _NSIG 99 | #define _NSIG 64 100 | #else 101 | _Static_assert(_NSIG == 64, "_NSIG mismtach"); 102 | #endif 103 | int kill(pid_t pid, int sig); 104 | int sigemptyset(sigset_t* set); 105 | int sigfillset(sigset_t* set); 106 | int sigaddset(sigset_t* set, int signum); 107 | int sigdelset(sigset_t* set, int signum); 108 | int sigismember(const sigset_t* set, int signum); 109 | int sigaction(int signum, const struct sigaction* restrict act, 110 | struct sigaction* restrict oact); 111 | int sigprocmask(int how, const sigset_t* restrict set, sigset_t* restrict old); 112 | int sigsuspend(const sigset_t* set); 113 | 114 | int execve(const char* filename, const char* const argv[], const char* const envp[]); 115 | int dup2(int oldfd, int newfd); 116 | int pipe2(int pipefd[2], int flags); 117 | int __clone(int (*func)(void *), void *stack, int flags, void *arg, ...); 118 | 119 | size_t getpagesize(void) __attribute__((const)); 120 | 121 | #define STRINGIFY_ARG(x) #x 122 | #define STRINGIFY(x) STRINGIFY_ARG(x) 123 | #define STRINGIFY_VA_ARG(...) #__VA_ARGS__ 124 | #define STRINGIFY_VA(...) STRINGIFY_VA_ARG(__VA_ARGS__) 125 | 126 | #define PASTE_ARGS(a,b) a ## b 127 | #define PASTE(a,b) PASTE_ARGS(a, b) 128 | 129 | #define ALIGN_DOWN(v,a) ((v) & ~((a)-1)) 130 | #define ALIGN_UP(v,a) (((v) + (a - 1)) & ~((a)-1)) 131 | 132 | #define LIKELY(x) __builtin_expect((x), 1) 133 | #define UNLIKELY(x) __builtin_expect((x), 0) 134 | 135 | #define ASM_BLOCK(...) __asm__(STRINGIFY_VA(__VA_ARGS__)) 136 | 137 | #if __SIZEOF_POINTER__ == 8 138 | #define BAD_ADDR(a) (((uintptr_t) (a)) > 0xfffffffffffff000ULL) 139 | #else 140 | #define BAD_ADDR(a) (((uintptr_t) (a)) > 0xfffff000UL) 141 | #endif 142 | 143 | #endif 144 | -------------------------------------------------------------------------------- /server/codegenerator.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "codegenerator.h" 3 | 4 | #include "connection.h" 5 | #include "config.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | // LLVM < 13 has TargetRegistry.h in Support/ 13 | #if __has_include() 14 | #include 15 | #else 16 | #include 17 | #endif 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | 26 | namespace { 27 | 28 | llvm::cl::opt targetopt("targetopt", llvm::cl::init(2), 29 | llvm::cl::desc("Back-end optimization level (default: 2)"), 30 | llvm::cl::value_desc("0/1/2/3"), 31 | llvm::cl::cat(CodeGenCategory)); 32 | 33 | } // end anonymous namespace 34 | 35 | class CodeGenerator::impl { 36 | private: 37 | llvm::SmallVectorImpl& obj_buffer; 38 | llvm::raw_svector_ostream obj_stream; 39 | llvm::MCContext* mc_ctx; 40 | std::unique_ptr target; 41 | llvm::legacy::PassManager mc_pass_manager; 42 | 43 | public: 44 | impl(const IWServerConfig& server_config, bool pic, 45 | llvm::SmallVectorImpl &o) 46 | : obj_buffer(o), obj_stream(o), mc_ctx(nullptr), mc_pass_manager() { 47 | llvm::InitializeNativeTarget(); 48 | llvm::InitializeNativeTargetAsmPrinter(); 49 | llvm::InitializeNativeTargetAsmParser(); 50 | 51 | llvm::TargetOptions target_options; 52 | target_options.EnableFastISel = targetopt == 0; // Use FastISel for CodeGenOpt::None 53 | #if LL_LLVM_MAJOR < 13 54 | // In LLVM13+, Module::setOverrideStackAlignment is used instead. 55 | if (server_config.tsc_stack_alignment != 0) 56 | target_options.StackAlignmentOverride = server_config.tsc_stack_alignment; 57 | #endif 58 | 59 | std::string triple; 60 | llvm::CodeModel::Model cm; 61 | switch (server_config.tsc_host_arch) { 62 | case EM_X86_64: 63 | triple = "x86_64-unknown-linux-gnu"; 64 | cm = pic ? llvm::CodeModel::Medium : llvm::CodeModel::Small; 65 | break; 66 | case EM_AARCH64: 67 | triple = "aarch64-unknown-linux-gnu"; 68 | // The AArch64 target doesn't support the medium code model. 69 | cm = pic ? llvm::CodeModel::Large : llvm::CodeModel::Small; 70 | break; 71 | default: 72 | std::cerr << "unknown host architecture" << std::endl; 73 | abort(); 74 | } 75 | 76 | llvm::Reloc::Model rm = llvm::Reloc::Static; 77 | // For non-PIC code, we use the small code model. Since we don't link 78 | // objects to 32-bit addresses, these must be addressed PC-relative. 79 | if (!pic) 80 | rm = llvm::Reloc::PIC_; 81 | 82 | std::string error; 83 | const llvm::Target* the_target = llvm::TargetRegistry::lookupTarget(triple, error); 84 | if (!the_target) { 85 | std::cerr << "could not get target: " << error << std::endl; 86 | abort(); 87 | } 88 | 89 | target = std::unique_ptr(the_target->createTargetMachine( 90 | /*TT=*/triple, /*CPU=*/"", 91 | /*Features=*/"", /*Options=*/target_options, 92 | /*RelocModel=*/rm, 93 | /*CodeModel=*/cm, 94 | #if LL_LLVM_MAJOR < 18 95 | /*OptLevel=*/static_cast(unsigned(targetopt)), 96 | #else 97 | /*OptLevel=*/llvm::CodeGenOpt::getLevel(targetopt).value_or(llvm::CodeGenOptLevel::Default), 98 | #endif 99 | /*JIT=*/true 100 | )); 101 | if (!target) { 102 | std::cerr << "could not allocate target machine" << std::endl; 103 | abort(); 104 | } 105 | 106 | if (target->addPassesToEmitMC(mc_pass_manager, mc_ctx, obj_stream, 107 | /*DisableVerify=*/true)) { 108 | std::cerr << "target doesn't support code gen" << std::endl; 109 | abort(); 110 | } 111 | } 112 | 113 | void GenerateCode(llvm::Module* mod) { 114 | mod->setDataLayout(target->createDataLayout()); 115 | obj_buffer.clear(); 116 | mc_pass_manager.run(*mod); 117 | } 118 | }; 119 | 120 | CodeGenerator::CodeGenerator(const IWServerConfig& sc, bool pic, 121 | llvm::SmallVectorImpl& o) 122 | : pimpl{std::make_unique(sc, pic, o)} {} 123 | CodeGenerator::~CodeGenerator() {} 124 | void CodeGenerator::GenerateCode(llvm::Module* m) { pimpl->GenerateCode(m); } 125 | 126 | void CodeGenerator::appendConfig(llvm::SmallVectorImpl& buffer) const { 127 | struct { 128 | uint32_t version = 1; 129 | uint32_t targetopt = targetopt; 130 | } config; 131 | 132 | std::size_t start = buffer.size(); 133 | buffer.resize_for_overwrite(buffer.size() + sizeof(config)); 134 | std::memcpy(&buffer[start], &config, sizeof(config)); 135 | } 136 | -------------------------------------------------------------------------------- /server/cache.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "cache.h" 3 | 4 | #include "config.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | namespace instrew { 19 | 20 | // Cache implementation heavily inspired by MESA: 21 | // https://github.com/mesa3d/mesa/blob/main/src/util/disk_cache_os.c 22 | 23 | namespace { 24 | 25 | enum class CacheMode { 26 | ReadOnly, WriteOnly, ReadWrite 27 | }; 28 | 29 | llvm::cl::opt cacheEnabled("cache", llvm::cl::desc("Enable caching"), llvm::cl::cat(InstrewCategory)); 30 | llvm::cl::opt cacheMode("cachemode", llvm::cl::desc("Allowed cache accesses:"), 31 | llvm::cl::init(CacheMode::ReadWrite), 32 | llvm::cl::values( 33 | clEnumValN(CacheMode::ReadWrite, "readwrite", "Read-write (default)"), 34 | clEnumValN(CacheMode::ReadOnly, "readonly", "Read-only"), 35 | clEnumValN(CacheMode::WriteOnly, "writeonly", "Write-only (rarely useful)") 36 | ), llvm::cl::cat(InstrewCategory)); 37 | llvm::cl::opt cacheDir("cachedir", llvm::cl::desc("Cache directory"), llvm::cl::cat(InstrewCategory)); 38 | llvm::cl::opt cacheVerbose("cacheverbose", llvm::cl::desc("Print cache operations"), llvm::cl::Hidden, llvm::cl::cat(InstrewCategory)); 39 | 40 | struct HexBuffer { 41 | const uint8_t* buf; 42 | size_t size; 43 | friend std::ostream& operator<<(std::ostream& os, HexBuffer const& self) { 44 | os << std::hex << std::setfill('0'); 45 | for (size_t i = 0; i != self.size; i++) 46 | os << std::setw(2) << static_cast(self.buf[i]); 47 | return os << std::dec; 48 | } 49 | }; 50 | 51 | static ssize_t write_full(int fd, const char* buf, size_t nbytes) { 52 | size_t total_written = 0; 53 | const char* buf_cp = buf; 54 | while (total_written < nbytes) { 55 | ssize_t bytes_written = write(fd, buf_cp + total_written, nbytes - total_written); 56 | if (bytes_written < 0) 57 | return bytes_written; 58 | if (bytes_written == 0) 59 | return -EIO; 60 | total_written += bytes_written; 61 | } 62 | return total_written; 63 | } 64 | 65 | } // end namespace 66 | 67 | Cache::Cache() { 68 | allow_read = false; 69 | allow_write = false; // default to no cache -- it's not critical. 70 | if (!cacheEnabled) 71 | return; 72 | if (geteuid() != getuid()) 73 | return; 74 | if (!cacheDir.empty()) { 75 | path = static_cast(cacheDir); 76 | } else { 77 | passwd* pw = getpwuid(getuid()); 78 | path = pw->pw_dir; 79 | path /= ".cache"; 80 | path /= "instrew"; 81 | } 82 | std::error_code ec; 83 | // F*ck C++ creates this with 0777, but 0755 would be better. 84 | // TODO: fix cache dir permissions 85 | std::filesystem::create_directories(path, ec); 86 | if (ec) { 87 | std::cerr << "unable to create cache directory, disabling cache" << std::endl; 88 | return; 89 | } 90 | allow_read = cacheMode != CacheMode::WriteOnly; 91 | allow_write = cacheMode != CacheMode::ReadOnly; 92 | } 93 | 94 | Cache::~Cache() { 95 | } 96 | 97 | std::filesystem::path Cache::FileName(const uint8_t* hash, std::string suffix) { 98 | std::stringstream fn; 99 | fn << HexBuffer{hash, HASH_SIZE} << suffix; 100 | return path / fn.str(); 101 | } 102 | 103 | std::pair Cache::Get(const uint8_t* hash) { 104 | if (!allow_read) 105 | return std::make_pair(-1, 0); 106 | 107 | std::filesystem::path cachefile = FileName(hash); 108 | int fd = open(cachefile.c_str(), O_RDONLY | O_CLOEXEC); 109 | if (fd == -1) 110 | return std::make_pair(-1, 0); 111 | struct stat sb; 112 | if (fstat(fd, &sb) == -1) { 113 | close(fd); 114 | return std::make_pair(-1, 0); 115 | } 116 | if (cacheVerbose) 117 | std::cerr << "hitting " << cachefile << "\n"; 118 | return std::make_pair(fd, sb.st_size); 119 | } 120 | 121 | void Cache::Put(const uint8_t* hash, size_t bufsz, const char* buf) { 122 | if (!allow_write) 123 | return; 124 | 125 | std::filesystem::path cachefile = FileName(hash, ".tmp"); 126 | std::string cachefile_tmp = cachefile; // copy 127 | cachefile.replace_extension(""); 128 | 129 | struct flock lock{}; 130 | 131 | int fd_real = -1; 132 | int fd = open(cachefile_tmp.c_str(), O_WRONLY | O_CLOEXEC | O_CREAT, 0644); 133 | if (fd == -1) 134 | goto close_fds; 135 | 136 | lock.l_type = F_WRLCK; 137 | lock.l_whence = SEEK_SET; 138 | if (fcntl(fd, F_SETLK, &lock) < 0) 139 | goto close_fds; 140 | 141 | fd_real = open(cachefile.c_str(), O_RDONLY, O_CLOEXEC); 142 | if (fd_real >= 0) { // someone else got it already, so nothing to do. 143 | unlink(cachefile_tmp.c_str()); 144 | goto close_fds; 145 | } 146 | if (write_full(fd, buf, bufsz) < 0) { 147 | unlink(cachefile_tmp.c_str()); 148 | goto close_fds; 149 | } 150 | if (cacheVerbose) 151 | std::cerr << "writing to " << cachefile << "\n"; 152 | if (rename(cachefile_tmp.c_str(), cachefile.c_str()) < 0) { 153 | unlink(cachefile_tmp.c_str()); 154 | goto close_fds; 155 | } 156 | 157 | close_fds: 158 | if (fd_real != -1) 159 | close(fd_real); 160 | if (fd != -1) 161 | close(fd); 162 | } 163 | 164 | } // namespace instrew 165 | -------------------------------------------------------------------------------- /client/translator.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | 12 | 13 | enum MsgId { 14 | #define INSTREW_MESSAGE_ID(id, name) MSGID_ ## name = id, 15 | #include "instrew-protocol.inc" 16 | #undef INSTREW_MESSAGE_ID 17 | }; 18 | 19 | static int translator_hdr_send(Translator* t, uint32_t id, int32_t sz) { 20 | if (t->last_hdr.id != MSGID_UNKNOWN) 21 | return -EPROTO; 22 | int ret; 23 | TranslatorMsgHdr hdr = {id, sz}; 24 | if ((ret = write_full(t->socket, &hdr, sizeof(hdr))) != sizeof(hdr)) 25 | return ret; 26 | return 0; 27 | } 28 | 29 | static int32_t translator_hdr_recv(Translator* t, uint32_t id) { 30 | if (t->last_hdr.id == MSGID_UNKNOWN) { 31 | int ret = read_full(t->socket, &t->last_hdr, sizeof(t->last_hdr)); 32 | if (ret != sizeof(t->last_hdr)) 33 | return ret; 34 | } 35 | if (t->last_hdr.id != id) 36 | return -EPROTO; 37 | int32_t sz = t->last_hdr.sz; 38 | t->last_hdr = (TranslatorMsgHdr) {MSGID_UNKNOWN, 0}; 39 | return sz; 40 | } 41 | 42 | int translator_init(Translator* t, const char* server_config, 43 | const struct TranslatorServerConfig* tsc) { 44 | int socket = 0; 45 | for (size_t i = 0; server_config[i]; i++) 46 | socket = socket * 10 + server_config[i] - '0'; 47 | t->socket = socket; 48 | 49 | t->written_bytes = 0; 50 | t->last_hdr = (TranslatorMsgHdr) {MSGID_UNKNOWN, 0}; 51 | t->recvbuf = NULL; 52 | t->recvbuf_sz = 0; 53 | 54 | int ret; 55 | if ((ret = translator_hdr_send(t, MSGID_C_INIT, sizeof *tsc))) 56 | return ret; 57 | if ((ret = write_full(t->socket, tsc, sizeof *tsc)) != sizeof *tsc) 58 | return ret; 59 | 60 | return 0; 61 | } 62 | 63 | int translator_fini(Translator* t) { 64 | close(t->socket); 65 | return 0; 66 | } 67 | 68 | int translator_config_fetch(Translator* t, struct TranslatorConfig* cfg) { 69 | int32_t sz = translator_hdr_recv(t, MSGID_S_INIT); 70 | if (sz < 0) 71 | return sz; 72 | if (sz != sizeof *cfg) 73 | return -EPROTO; 74 | ssize_t ret = read_full(t->socket, cfg, sz); 75 | if (ret != (ssize_t) sz) 76 | return ret; 77 | return 0; 78 | } 79 | 80 | int translator_get_object(Translator* t, void** out_obj, size_t* out_obj_size) { 81 | int32_t sz = translator_hdr_recv(t, MSGID_S_OBJECT); 82 | if (sz < 0) 83 | return sz; 84 | 85 | if ((uint32_t) sz >= t->recvbuf_sz) { 86 | // TODO: free old buffer 87 | // int ret = mem_free(t->recvbuf); 88 | // if (ret) 89 | // return ret; 90 | size_t newsz = ALIGN_UP(sz, getpagesize()); 91 | t->recvbuf = mem_alloc_data(newsz, getpagesize()); 92 | if (BAD_ADDR(t->recvbuf)) 93 | return (int) (uintptr_t) t->recvbuf; 94 | t->recvbuf_sz = newsz; 95 | } 96 | int ret = read_full(t->socket, t->recvbuf, sz); 97 | if (ret != (ssize_t) sz) 98 | return ret; 99 | 100 | *out_obj = t->recvbuf; 101 | *out_obj_size = sz; 102 | 103 | return 0; 104 | } 105 | 106 | int translator_get(Translator* t, uintptr_t addr, void** out_obj, 107 | size_t* out_obj_size) { 108 | int ret; 109 | if ((ret = translator_hdr_send(t, MSGID_C_TRANSLATE, 8)) != 0) 110 | return ret; 111 | if ((ret = write_full(t->socket, &addr, sizeof(addr))) != sizeof(addr)) 112 | return ret; 113 | 114 | while (true) { 115 | int32_t sz = translator_hdr_recv(t, MSGID_S_MEMREQ); 116 | if (sz == -EPROTO) { 117 | return translator_get_object(t, out_obj, out_obj_size); 118 | } else if (sz < 0) { 119 | return sz; 120 | } 121 | 122 | // handle memory request 123 | struct { uint64_t addr; size_t buf_sz; } memrq; 124 | if (sz != sizeof(memrq)) 125 | return -1; 126 | if ((ret = read_full(t->socket, &memrq, sizeof(memrq))) != sizeof(memrq)) 127 | return ret; 128 | if (memrq.buf_sz > 0x1000) 129 | memrq.buf_sz = 0x1000; 130 | 131 | if ((ret = translator_hdr_send(t, MSGID_C_MEMBUF, memrq.buf_sz+1)) < 0) 132 | return ret; 133 | 134 | uint8_t failed = 0; 135 | if ((ret = write_full(t->socket, (void*) memrq.addr, memrq.buf_sz)) != (ssize_t) memrq.buf_sz) { 136 | // Gracefully handle reads from invalid addresses 137 | if (ret == -EFAULT) { 138 | failed = 1; 139 | // Send zero bytes as padding 140 | for (size_t i = 0; i < memrq.buf_sz; i++) 141 | if (write_full(t->socket, "", 1) != 1) 142 | return ret; 143 | } else { 144 | dprintf(2, "translator_get: failed writing from address 0x%lx\n", memrq.addr); 145 | return ret; 146 | } 147 | } 148 | 149 | if ((ret = write_full(t->socket, &failed, 1)) != 1) 150 | return ret; 151 | 152 | t->written_bytes += memrq.buf_sz; 153 | } 154 | } 155 | 156 | int 157 | translator_fork_prepare(Translator* t) { 158 | int ret; 159 | if ((ret = translator_hdr_send(t, MSGID_C_FORK, 0))) 160 | return ret; 161 | 162 | int32_t sz = translator_hdr_recv(t, MSGID_S_FD); 163 | if (sz != 4) 164 | return sz < 0 ? sz : -EPROTO; 165 | 166 | int error; 167 | struct iovec iov = {&error, sizeof(error)}; 168 | struct fd_cmsg { 169 | size_t cmsg_len; 170 | int cmsg_level; 171 | int cmsg_type; 172 | int fd; 173 | } cmsg; 174 | size_t cmsg_len = offsetof(struct fd_cmsg, fd) + sizeof(int); 175 | struct msghdr msg = { 176 | .msg_iov = &iov, 177 | .msg_iovlen = 1, 178 | .msg_control = &cmsg, 179 | .msg_controllen = cmsg_len, 180 | }; 181 | if ((ret = recvmsg(t->socket, &msg, MSG_CMSG_CLOEXEC)) != sizeof(error)) 182 | return ret < 0 ? ret : -EPROTO; 183 | if (error != 0) 184 | return error; 185 | if (cmsg.cmsg_type != SCM_RIGHTS || cmsg.cmsg_len != cmsg_len) 186 | return -EPROTO; 187 | 188 | return cmsg.fd; 189 | } 190 | 191 | int 192 | translator_fork_finalize(Translator* t, int fork_fd) { 193 | close(t->socket); // Forked process should not use parent translator. 194 | t->socket = fork_fd; 195 | return 0; 196 | } 197 | -------------------------------------------------------------------------------- /client/main.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #define PLATFORM_STRING "x86_64" 16 | 17 | 18 | int main(int argc, char** argv) { 19 | int i; 20 | int retval; 21 | 22 | BinaryInfo info = {0}; 23 | 24 | // Initialize state. 25 | struct State state = {0}; 26 | 27 | if (argc < 3) { 28 | puts("usage: CONFSTR EXECUTABLE [ARGS...]"); 29 | return 1; 30 | } 31 | 32 | char* server_config = argv[1]; 33 | argc -= 2; 34 | argv += 2; 35 | 36 | signal_init(&state); 37 | 38 | retval = mem_init(); 39 | if (retval < 0) { 40 | puts("error: failed to initialize heap"); 41 | return retval; 42 | } 43 | 44 | // Load binary first, because we need to know the architecture. 45 | retval = load_elf_binary(argv[0], &info); 46 | if (retval != 0) { 47 | puts("error: could not load file"); 48 | return retval; 49 | } 50 | 51 | state.tsc.tsc_guest_arch = info.machine; 52 | #ifdef __x86_64__ 53 | state.tsc.tsc_host_arch = EM_X86_64; 54 | state.tsc.tsc_stack_alignment = 8; 55 | #elif defined(__aarch64__) 56 | state.tsc.tsc_host_arch = EM_AARCH64; 57 | #else 58 | #error "Unsupported architecture!" 59 | #endif 60 | 61 | retval = translator_init(&state.translator, server_config, &state.tsc); 62 | if (retval != 0) { 63 | puts("error: could not spawn rewriting server"); 64 | return retval; 65 | } 66 | 67 | retval = translator_config_fetch(&state.translator, &state.tc); 68 | if (retval != 0) { 69 | puts("error: could not fetch client configuration"); 70 | return 1; 71 | } 72 | 73 | const struct DispatcherInfo* disp_info = dispatch_get(&state); 74 | if (!disp_info || !disp_info->loop_func) { 75 | puts("error: unsupported calling convention"); 76 | return -EOPNOTSUPP; 77 | } 78 | 79 | // TODO: don't hardcode stack size 80 | // TODO: support execstack 81 | #define STACK_SIZE 0x1000000 82 | int stack_prot = PROT_READ|PROT_WRITE; 83 | int stack_flags = MAP_PRIVATE|MAP_ANONYMOUS|MAP_GROWSDOWN|MAP_STACK; 84 | void* stack = mmap(NULL, STACK_SIZE, stack_prot, stack_flags, -1, 0); 85 | if (BAD_ADDR(stack)) { 86 | puts("error: failed to allocate stack"); 87 | retval = (int) (uintptr_t) stack; 88 | goto out; 89 | } 90 | 91 | //memset(stack, 0xcc, STACK_SIZE); 92 | mprotect(stack, 0x1000, PROT_NONE); 93 | 94 | // Initialize stack according to ABI 95 | size_t* stack_top = (size_t*) stack + STACK_SIZE/sizeof(size_t); 96 | 97 | // Stack alignment 98 | int envc = 0; 99 | while (environ[envc]) 100 | envc++; 101 | stack_top -= (argc + envc) & 1; // auxv has even number of entries 102 | 103 | // Set auxiliary values 104 | *(--stack_top) = 0; // Null auxiliary vector entry 105 | 106 | *(--stack_top) = (uintptr_t) info.elf_entry; *(--stack_top) = AT_ENTRY; 107 | *(--stack_top) = (uintptr_t) info.phdr; *(--stack_top) = AT_PHDR; 108 | *(--stack_top) = info.phent; *(--stack_top) = AT_PHENT; 109 | *(--stack_top) = info.phnum; *(--stack_top) = AT_PHNUM; 110 | *(--stack_top) = (size_t) PLATFORM_STRING; *(--stack_top) = AT_PLATFORM; 111 | *(--stack_top) = getauxval(AT_RANDOM); *(--stack_top) = AT_RANDOM; 112 | *(--stack_top) = getauxval(AT_UID); *(--stack_top) = AT_UID; 113 | *(--stack_top) = getauxval(AT_EUID); *(--stack_top) = AT_EUID; 114 | *(--stack_top) = getauxval(AT_GID); *(--stack_top) = AT_GID; 115 | *(--stack_top) = getauxval(AT_EGID); *(--stack_top) = AT_EGID; 116 | *(--stack_top) = getauxval(AT_CLKTCK); *(--stack_top) = AT_CLKTCK; 117 | *(--stack_top) = getauxval(AT_PAGESZ); *(--stack_top) = AT_PAGESZ; 118 | *(--stack_top) = 0x8001; *(--stack_top) = AT_HWCAP; 119 | *(--stack_top) = 0; *(--stack_top) = AT_HWCAP2; 120 | *(--stack_top) = 0; *(--stack_top) = AT_SECURE; 121 | 122 | *(--stack_top) = 0; // End of environment pointers 123 | stack_top -= envc; 124 | for (i = 0; i < envc; i++) 125 | stack_top[i] = (uintptr_t) environ[i]; 126 | *(--stack_top) = 0; // End of argument pointers 127 | stack_top -= argc; 128 | for (i = 0; i < argc; i++) 129 | stack_top[i] = (size_t) argv[i]; 130 | *(--stack_top) = argc; // Argument Count 131 | 132 | retval = rtld_init(&state.rtld, disp_info); 133 | if (retval < 0) { 134 | puts("error: could not initialize runtime linker"); 135 | return retval; 136 | } 137 | 138 | retval = rtld_perf_init(&state.rtld, state.tc.tc_perf); 139 | if (retval < 0) { 140 | puts("warning: could not initialize perf support"); 141 | } 142 | 143 | void* initobj; 144 | size_t initobj_size; 145 | retval = translator_get_object(&state.translator, &initobj, &initobj_size); 146 | if (retval < 0) { 147 | puts("error: could not get initial object"); 148 | return retval; 149 | } 150 | if (initobj_size > 0) { 151 | retval = rtld_add_object(&state.rtld, initobj, initobj_size, 0); 152 | if (retval < 0) { 153 | puts("error: could not get initial object"); 154 | return retval; 155 | } 156 | } 157 | 158 | struct CpuState* cpu_state = mem_alloc_data(sizeof(struct CpuState), 159 | _Alignof(struct CpuState)); 160 | // TODO: check for BAD_ADDR(cpu_state) 161 | memset(cpu_state, 0, sizeof(*cpu_state)); 162 | cpu_state->self = cpu_state; 163 | cpu_state->state = &state; 164 | 165 | retval = set_thread_area(cpu_state); 166 | if (retval) { 167 | puts("error: could not set thread area"); 168 | return retval; 169 | } 170 | 171 | uint64_t* cpu_regs = (uint64_t*) &cpu_state->regdata; 172 | 173 | cpu_regs[0] = (uintptr_t) info.exec_entry; 174 | if (state.tsc.tsc_guest_arch == EM_X86_64) { 175 | cpu_regs[5] = (uintptr_t) stack_top; 176 | } else if (state.tsc.tsc_guest_arch == EM_RISCV) { 177 | cpu_regs[3] = (uintptr_t) stack_top; 178 | } else if (state.tsc.tsc_guest_arch == EM_AARCH64) { 179 | cpu_regs[33] = (uintptr_t) stack_top; 180 | } else { 181 | // well... -.- 182 | puts("error: unsupported architecture"); 183 | return -ENOEXEC; 184 | } 185 | 186 | disp_info->loop_func(cpu_regs); 187 | 188 | out: 189 | return retval; 190 | } 191 | -------------------------------------------------------------------------------- /client/elf-loader.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | 13 | static int 14 | elf_read(int fd, size_t off, void* buf, size_t nbytes) { 15 | if (lseek(fd, off, SEEK_SET) == -1) 16 | return -1; 17 | if (read_full(fd, buf, nbytes) == -1) 18 | return -1; 19 | return 0; 20 | } 21 | 22 | static Elf_Phdr* load_elf_phdrs(Elf_Ehdr* elf_ex, int fd) { 23 | Elf_Phdr* phdata = NULL; 24 | int err = -1; 25 | 26 | if (elf_ex->e_phentsize != sizeof(Elf_Phdr)) 27 | goto out; 28 | 29 | if (elf_ex->e_phnum < 1 || elf_ex->e_phnum > 65536U / sizeof(Elf_Phdr)) 30 | goto out; 31 | 32 | size_t size = sizeof(Elf_Phdr) * elf_ex->e_phnum; 33 | if (size > 0x1000) 34 | goto out; 35 | 36 | phdata = mmap(NULL, 0x1000, PROT_READ|PROT_WRITE, 37 | MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 38 | if (BAD_ADDR(phdata)) { 39 | phdata = NULL; 40 | goto out; 41 | } 42 | 43 | if (elf_read(fd, elf_ex->e_phoff, phdata, size) == -1) 44 | goto out; 45 | 46 | err = 0; 47 | 48 | out: 49 | if (err && phdata != NULL) { 50 | munmap(phdata, 0x1000); 51 | phdata = NULL; 52 | } 53 | 54 | return phdata; 55 | } 56 | 57 | static uintptr_t 58 | elf_determine_load_bias(size_t num_ph, const Elf_Phdr elf_phdata[num_ph]) { 59 | unsigned has_first = 0; 60 | uintptr_t start = 0; 61 | uintptr_t end = 0; 62 | for (size_t i = 0; i < num_ph; i++) { 63 | if (elf_phdata[i].p_type != PT_LOAD) 64 | continue; 65 | if (!has_first) { 66 | has_first = 1; 67 | start = ALIGN_DOWN(elf_phdata[i].p_vaddr, getpagesize()); 68 | } 69 | end = elf_phdata[i].p_vaddr + elf_phdata[i].p_memsz; 70 | } 71 | if (start >= end) 72 | return (uintptr_t) -ENOEXEC; 73 | 74 | size_t total_size = end - start; 75 | void* load_bias_ptr = mmap(NULL, total_size, PROT_NONE, 76 | MAP_PRIVATE|MAP_NORESERVE|MAP_ANONYMOUS, 77 | -1, 0); 78 | if (BAD_ADDR(load_bias_ptr)) 79 | return (uintptr_t) load_bias_ptr; 80 | munmap(load_bias_ptr, total_size); 81 | 82 | return (uintptr_t) load_bias_ptr - start; 83 | } 84 | 85 | static int 86 | elf_map(uintptr_t addr, const Elf_Phdr* elf_ppnt, int fd) { 87 | int retval; 88 | 89 | int prot = 0; 90 | if (elf_ppnt->p_flags & PF_R) 91 | prot |= PROT_READ; 92 | if (elf_ppnt->p_flags & PF_W) 93 | prot |= PROT_WRITE; 94 | if (elf_ppnt->p_flags & PF_X) { 95 | // Never map code as executable, since the host can't execute it. 96 | } 97 | 98 | size_t pagesz = getpagesize(); 99 | 100 | uintptr_t mapstart = ALIGN_DOWN(addr, pagesz); 101 | uintptr_t mapend = ALIGN_UP(addr + elf_ppnt->p_filesz, pagesz); 102 | uintptr_t dataend = addr + elf_ppnt->p_filesz; 103 | uintptr_t allocend = addr + elf_ppnt->p_memsz; 104 | // Note: technically, the ALIGN_UP() is not necessary; but some ld versions 105 | // generate faulty PT_LOAD entries where zero-ing up to the page end is 106 | // assumed. 107 | if (prot & PROT_WRITE) 108 | allocend = ALIGN_UP(allocend, pagesz); 109 | uintptr_t mapoff = ALIGN_DOWN(elf_ppnt->p_offset, pagesz); 110 | 111 | if ((elf_ppnt->p_vaddr & (pagesz - 1)) != (elf_ppnt->p_offset & (pagesz - 1))) { 112 | printf("mapoff (%lx %lx pgsz=%lx)\n", elf_ppnt->p_vaddr, 113 | elf_ppnt->p_offset, pagesz); 114 | return -ENOEXEC; 115 | } 116 | 117 | if (mapend > mapstart) { 118 | void* mapret = mmap((void*) mapstart, mapend - mapstart, prot, 119 | MAP_PRIVATE|MAP_FIXED, fd, mapoff); 120 | if (BAD_ADDR(mapret)) { 121 | puts("map (file)"); 122 | retval = (int) (uintptr_t) mapret; 123 | goto out; 124 | } 125 | } 126 | 127 | if (allocend > dataend) 128 | { 129 | uintptr_t zeropage = ALIGN_UP(dataend, pagesz); 130 | if (allocend < zeropage) 131 | zeropage = allocend; 132 | 133 | if (zeropage > dataend) 134 | { 135 | // We have data at the last page of the segment that has to be 136 | // zeroed. If necessary, we have to give write privileges 137 | // temporarily. 138 | if ((prot & PROT_WRITE) == 0) { 139 | puts("zero (page end)"); 140 | retval = mprotect((void*) ALIGN_DOWN(dataend, pagesz), 141 | pagesz, prot | PROT_WRITE); 142 | if (retval < 0) 143 | goto out; 144 | } 145 | memset((void*) dataend, 0, zeropage - dataend); 146 | if ((prot & PROT_WRITE) == 0) { 147 | mprotect((void*) ALIGN_DOWN(dataend, pagesz), pagesz, 148 | prot); 149 | } 150 | } 151 | 152 | // We have entire pages that have to be zeroed. 153 | if (allocend > zeropage) { 154 | void* mapret = mmap((void*) zeropage, allocend - zeropage, prot, 155 | MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0); 156 | if (BAD_ADDR(mapret)) { 157 | puts("map (zero)"); 158 | retval = (int) (uintptr_t) mapret; 159 | goto out; 160 | } 161 | } 162 | } 163 | 164 | retval = 0; 165 | 166 | out: 167 | return retval; 168 | } 169 | 170 | static uintptr_t 171 | load_elf_interp(const Elf_Ehdr* interp_ehdr, const Elf_Phdr interp_phdata[], 172 | int interp_fd) { 173 | const Elf_Phdr* ppnt; 174 | int i; 175 | 176 | uintptr_t load_bias = 0; 177 | if (interp_ehdr->e_type == ET_DYN) { 178 | load_bias = elf_determine_load_bias(interp_ehdr->e_phnum, interp_phdata); 179 | if (BAD_ADDR(load_bias)) 180 | return load_bias; 181 | } 182 | 183 | for (i = 0, ppnt = interp_phdata; i < interp_ehdr->e_phnum; i++, ppnt++) { 184 | if (ppnt->p_type != PT_LOAD) 185 | continue; 186 | 187 | int retval = elf_map(load_bias + ppnt->p_vaddr, ppnt, interp_fd); 188 | if (retval < 0) 189 | return retval; 190 | } 191 | 192 | return load_bias; 193 | } 194 | 195 | int load_elf_binary(const char* filename, BinaryInfo* out_info) { 196 | int retval; 197 | int i; 198 | Elf_Phdr* elf_ppnt; 199 | 200 | int interp_fd = -1; 201 | int fd = open(filename, O_RDONLY, 0); 202 | if (fd < 0) { 203 | retval = fd; 204 | goto out; 205 | } 206 | 207 | Elf_Ehdr elfhdr_ex; 208 | retval = read_full(fd, &elfhdr_ex, sizeof(Elf_Ehdr)); 209 | if (retval < 0) 210 | goto out_close; 211 | 212 | retval = -ENOEXEC; 213 | if (memcmp(&elfhdr_ex, ELFMAG, SELFMAG) != 0) 214 | goto out_close; 215 | 216 | if (elfhdr_ex.e_ident[EI_CLASS] != ELFCLASS64) 217 | goto out_close; 218 | 219 | if (elfhdr_ex.e_type != ET_EXEC && elfhdr_ex.e_type != ET_DYN) 220 | goto out_close; 221 | 222 | Elf_Phdr* elf_phdata = load_elf_phdrs(&elfhdr_ex, fd); 223 | if (elf_phdata == NULL) { 224 | puts("Could not load phdata"); 225 | goto out_close; 226 | } 227 | 228 | Elf_Ehdr interp_ehdr; 229 | Elf_Phdr* interp_phdata = NULL; 230 | for (i = 0, elf_ppnt = elf_phdata; i < elfhdr_ex.e_phnum; i++, elf_ppnt++) { 231 | if (elf_ppnt->p_type != PT_INTERP) 232 | continue; 233 | if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2) 234 | goto out_free_ph; 235 | 236 | char interp_name[PATH_MAX]; 237 | retval = elf_read(fd, elf_ppnt->p_offset, interp_name, elf_ppnt->p_filesz); 238 | if (retval < 0) 239 | goto out_free_ph; 240 | if (interp_name[elf_ppnt->p_filesz - 1] != 0) { 241 | retval = -ENOEXEC; 242 | goto out_free_ph; 243 | } 244 | 245 | interp_fd = open(interp_name, O_RDONLY|O_CLOEXEC, 0); 246 | if (interp_fd < 0) { 247 | retval = interp_fd; 248 | goto out_free_ph; 249 | } 250 | retval = elf_read(interp_fd, 0, &interp_ehdr, sizeof(Elf_Ehdr)); 251 | if (retval < 0) 252 | goto out_free_ph; 253 | 254 | retval = -ELIBBAD; 255 | if (memcmp(&interp_ehdr, ELFMAG, SELFMAG) != 0) 256 | goto out_free_ph; 257 | if (interp_ehdr.e_ident[EI_CLASS] != ELFCLASS64) 258 | goto out_free_ph; 259 | if (interp_ehdr.e_type != ET_EXEC && interp_ehdr.e_type != ET_DYN) 260 | goto out_free_ph; 261 | if (interp_ehdr.e_machine != elfhdr_ex.e_machine) 262 | goto out_free_ph; 263 | 264 | interp_phdata = load_elf_phdrs(&interp_ehdr, interp_fd); 265 | if (interp_phdata == NULL) { 266 | puts("Could not load interp phdata"); 267 | goto out_free_ph; 268 | } 269 | 270 | break; 271 | } 272 | 273 | uintptr_t load_bias = 0; 274 | if (elfhdr_ex.e_type == ET_DYN) { 275 | load_bias = elf_determine_load_bias(elfhdr_ex.e_phnum, elf_phdata); 276 | if (BAD_ADDR(load_bias)) { 277 | retval = (int) load_bias; 278 | goto out_free_ph; 279 | } 280 | } 281 | 282 | // TODO: Support GNU_STACK and architecture specific program headers 283 | // TODO: Support executable stack 284 | 285 | uintptr_t load_addr = 0; 286 | unsigned load_addr_set = 0; 287 | for (i = 0, elf_ppnt = elf_phdata; i < elfhdr_ex.e_phnum; i++, elf_ppnt++) { 288 | if (elf_ppnt->p_type != PT_LOAD) 289 | continue; 290 | 291 | if (!load_addr_set) { 292 | load_addr_set = 1; 293 | load_addr = (elf_ppnt->p_vaddr-elf_ppnt->p_offset) + load_bias; 294 | } 295 | 296 | retval = elf_map(load_bias + elf_ppnt->p_vaddr, elf_ppnt, fd); 297 | if (retval < 0) 298 | goto out_free_ph; 299 | } 300 | 301 | uintptr_t elf_entry = load_bias + elfhdr_ex.e_entry; 302 | uintptr_t entry = elf_entry; 303 | 304 | if (interp_fd >= 0) { 305 | uintptr_t interp_load_bias = load_elf_interp(&interp_ehdr, interp_phdata, interp_fd); 306 | if (BAD_ADDR(interp_load_bias)) { 307 | retval = interp_load_bias; 308 | goto out_free_ph; 309 | } 310 | 311 | entry = interp_load_bias + interp_ehdr.e_entry; 312 | } 313 | 314 | if (out_info != NULL) { 315 | out_info->elf_entry = (void*) elf_entry; 316 | out_info->exec_entry = (void*) entry; 317 | out_info->machine = elfhdr_ex.e_machine; 318 | out_info->phdr = (Elf_Phdr*) (load_addr + elfhdr_ex.e_phoff); 319 | out_info->phnum = elfhdr_ex.e_phnum; 320 | out_info->phent = elfhdr_ex.e_phentsize; 321 | } 322 | 323 | retval = 0; 324 | 325 | out_free_ph: 326 | munmap(elf_phdata, 0x1000); 327 | if (interp_phdata) 328 | munmap(interp_phdata, 0x1000); 329 | 330 | out_close: 331 | close(fd); 332 | if (interp_fd >= 0) 333 | close(interp_fd); 334 | 335 | out: 336 | return retval; 337 | } 338 | -------------------------------------------------------------------------------- /client/math.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #define I32_AS_F32(vi) ((union { int32_t i; float f; }) { .i = vi }.f) 5 | #define I64_AS_F64(vi) ((union { int64_t i; double d; }) { .i = vi }.d) 6 | #define F32_AS_I32(v) ((union { int32_t i; float f; }) { .f = v }.i) 7 | #define F64_AS_I64(v) ((union { int64_t i; double d; }) { .d = v }.i) 8 | #define EXP_F32(vi) ((((vi) >> 23) & 0xff) - 0x7f) 9 | #define EXP_F64(vi) ((((vi) >> 52) & 0x7ff) - 0x3ff) 10 | 11 | float 12 | floorf(float v) { 13 | int32_t vi = F32_AS_I32(v); 14 | int exp = EXP_F32(vi); 15 | if (exp < 0) { // less than one 16 | // < 0 -> -1, else sign(v) * 0 17 | return vi == INT32_MIN ? -0.0f : vi < 0 ? -1.0f : 0.0f; 18 | } else if (exp < 23) { 19 | int32_t msk = 0x7fffff >> exp; 20 | return I32_AS_F32((vi + (vi < 0 ? msk : 0)) & ~msk); 21 | } else { // integral, Inf, or NaN 22 | return v; 23 | } 24 | } 25 | 26 | double 27 | floor(double v) { 28 | int64_t vi = F64_AS_I64(v); 29 | int exp = EXP_F64(vi); 30 | if (exp < 0) { // less than one 31 | // < 0 -> -1, else sign(v) * 0 32 | return vi == INT64_MIN ? -0.0 : vi < 0 ? -1.0 : 0.0; 33 | } else if (exp < 52) { 34 | int64_t msk = 0xfffffffffffff >> exp; 35 | return I64_AS_F64((vi + (vi < 0 ? msk : 0)) & ~msk); 36 | } else { // integral, Inf, or NaN 37 | return v; 38 | } 39 | } 40 | 41 | float 42 | ceilf(float v) { 43 | int32_t vi = F32_AS_I32(v); 44 | int exp = EXP_F32(vi); 45 | if (exp < 0) { // less than one 46 | // <= 0 -> sign(v) * 0, else 1 47 | return vi < 0 ? -0.0f : vi == 0 ? 0.0f : 1.0f; 48 | } else if (exp < 23) { 49 | int32_t msk = 0x7fffff >> exp; 50 | return I32_AS_F32((vi + (vi >= 0 ? msk : 0)) & ~msk); 51 | } else { // integral, Inf, or NaN 52 | return v; 53 | } 54 | } 55 | 56 | double 57 | ceil(double v) { 58 | int64_t vi = F64_AS_I64(v); 59 | int exp = EXP_F64(vi); 60 | if (exp < 0) { // less than one 61 | // <= 0 -> sign(v) * 0, else 1 62 | return vi < 0 ? -0.0 : vi == 0 ? 0.0 : 1.0; 63 | } else if (exp < 52) { 64 | int64_t msk = 0xfffffffffffff >> exp; 65 | return I64_AS_F64((vi + (vi >= 0 ? msk : 0)) & ~msk); 66 | } else { // integral, Inf, or NaN 67 | return v; 68 | } 69 | } 70 | 71 | float 72 | roundf(float v) { 73 | int32_t vi = F32_AS_I32(v); 74 | int exp = EXP_F32(vi); 75 | if (exp < -1) { // less than 0.5 76 | return I32_AS_F32(vi & 0x80000000); 77 | } else if (exp == -1) { // between 0.5 and 1 78 | return vi < 0 ? -1.0f : 1.0f; 79 | } else if (exp < 23) { 80 | int32_t msk = 0x7fffff >> exp; 81 | if (vi & msk) // v is not integral 82 | return I32_AS_F32((vi + (1 + (msk >> 1))) & ~msk); 83 | return v; 84 | } else { // integral, Inf, or NaN 85 | return v; 86 | } 87 | } 88 | 89 | double 90 | round(double v) { 91 | int64_t vi = F64_AS_I64(v); 92 | int exp = EXP_F64(vi); 93 | if (exp < -1) { // less than 0.5 94 | return I64_AS_F64(vi & 0x8000000000000000); 95 | } else if (exp == -1) { // between 0.5 and 1 96 | return vi < 0 ? -1.0 : 1.0; 97 | } else if (exp < 52) { 98 | int64_t msk = 0xfffffffffffff >> exp; 99 | if (vi & msk) // v is not integral 100 | return I64_AS_F64((vi + (1 + (msk >> 1))) & ~msk); 101 | return v; 102 | } else { // integral, Inf, or NaN 103 | return v; 104 | } 105 | } 106 | 107 | float 108 | truncf(float v) { 109 | int32_t vi = F32_AS_I32(v); 110 | int exp = EXP_F32(vi); 111 | if (exp < 0) { // less than one 112 | // < 0 -> -1, else sign(v) * 0 113 | return vi < 0 ? -0.0f : 0.0f; 114 | } else if (exp < 23) { 115 | int32_t msk = 0x7fffff >> exp; 116 | return I32_AS_F32(vi & ~msk); 117 | } else { // integral, Inf, or NaN 118 | return v; 119 | } 120 | } 121 | 122 | double 123 | trunc(double v) { 124 | int64_t vi = F64_AS_I64(v); 125 | int exp = EXP_F64(vi); 126 | if (exp < 0) { // less than one 127 | // < 0 -> -1, else sign(v) * 0 128 | return vi < 0 ? -0.0 : 0.0; 129 | } else if (exp < 52) { 130 | int64_t msk = 0xfffffffffffff >> exp; 131 | return I64_AS_F64(vi & ~msk); 132 | } else { // integral, Inf, or NaN 133 | return v; 134 | } 135 | } 136 | 137 | float 138 | fmaf(float x, float y, float z) { 139 | return (x * y) + z; // TODO: actually perform fused multiply-add 140 | } 141 | 142 | double 143 | fma(double x, double y, double z) { 144 | return (x * y) + z; // TODO: actually perform fused multiply-add 145 | } 146 | 147 | #ifdef TEST 148 | #include 149 | #include 150 | 151 | int 152 | main(void) { 153 | unsigned count = 0; 154 | unsigned failed = 0; 155 | #define CASE(FLOAT_TY, F_AS_I, PRIxN, a, b) do { \ 156 | count++; \ 157 | FLOAT_TY _a = (a), _b = (b); \ 158 | unsigned ok = F_AS_I(_a) == F_AS_I(_b); \ 159 | printf("%s %d - " #a " == " #b "\n", &"not ok"[4*ok], count); \ 160 | if (!ok) { \ 161 | printf("# %lf (%"PRIxN") != %lf (%"PRIxN")\n", \ 162 | _a, F_AS_I(_a), _b, F_AS_I(_b)); \ 163 | failed++; \ 164 | } \ 165 | } while (0) 166 | #define CASE_F32(a, b) CASE(float, F32_AS_I32, PRIx32, a, b) 167 | #define CASE_F64(a, b) CASE(double, F64_AS_I64, PRIx64, a, b) 168 | 169 | CASE_F32(floorf(+0.0f), +0.0f); 170 | CASE_F32(floorf(-0.0f), -0.0f); 171 | CASE_F32(floorf(-0.5f), -1.0f); 172 | CASE_F32(floorf(+0.1f), +0.0f); 173 | CASE_F32(floorf(-1.0f), -1.0f); 174 | CASE_F32(floorf(+1.125f), +1.0f); 175 | CASE_F32(floorf(-1.125f), -2.0f); 176 | CASE_F32(floorf(+63.0f), +63.0f); 177 | CASE_F32(floorf(-63.0f), -63.0f); 178 | CASE_F32(floorf(+63.5f), +63.0f); 179 | CASE_F32(floorf(-63.5f), -64.0f); 180 | CASE_F32(floorf((int32_t) 1 << 24), (int32_t) 1 << 24); 181 | CASE_F32(floorf(((int32_t) 1 << 24) + 2), ((int32_t) 1 << 24) + 2); 182 | 183 | CASE_F64(floor(+0.0), +0.0); 184 | CASE_F64(floor(-0.0), -0.0); 185 | CASE_F64(floor(-0.5), -1.0); 186 | CASE_F64(floor(+0.1), +0.0); 187 | CASE_F64(floor(-1.0), -1.0); 188 | CASE_F64(floor(+1.125), +1.0); 189 | CASE_F64(floor(-1.125), -2.0); 190 | CASE_F64(floor(+63.0), +63.0); 191 | CASE_F64(floor(-63.0), -63.0); 192 | CASE_F64(floor(+63.5), +63.0); 193 | CASE_F64(floor(-63.5), -64.0); 194 | CASE_F64(floor((int64_t) 1 << 53), (int64_t) 1 << 53); 195 | CASE_F64(floor(((int64_t) 1 << 53) + 2), ((int64_t) 1 << 53) + 2); 196 | 197 | CASE_F32(ceilf(+0.0f), +0.0f); 198 | CASE_F32(ceilf(-0.0f), -0.0f); 199 | CASE_F32(ceilf(-0.5f), -0.0f); 200 | CASE_F32(ceilf(+0.1f), +1.0f); 201 | CASE_F32(ceilf(-1.0f), -1.0f); 202 | CASE_F32(ceilf(+1.125f), +2.0f); 203 | CASE_F32(ceilf(-1.125f), -1.0f); 204 | CASE_F32(ceilf(+63.0f), +63.0f); 205 | CASE_F32(ceilf(-63.0f), -63.0f); 206 | CASE_F32(ceilf(+63.5f), +64.0f); 207 | CASE_F32(ceilf(-63.5f), -63.0f); 208 | CASE_F32(ceilf((int32_t) 1 << 24), (int32_t) 1 << 24); 209 | CASE_F32(ceilf(((int32_t) 1 << 24) + 2), ((int32_t) 1 << 24) + 2); 210 | 211 | CASE_F64(ceil(+0.0), +0.0); 212 | CASE_F64(ceil(-0.0), -0.0); 213 | CASE_F64(ceil(-0.5), -0.0); 214 | CASE_F64(ceil(+0.1), +1.0); 215 | CASE_F64(ceil(-1.0), -1.0); 216 | CASE_F64(ceil(+1.125), +2.0); 217 | CASE_F64(ceil(-1.125), -1.0); 218 | CASE_F64(ceil(+63.0), +63.0); 219 | CASE_F64(ceil(-63.0), -63.0); 220 | CASE_F64(ceil(+63.5), +64.0); 221 | CASE_F64(ceil(-63.5), -63.0); 222 | CASE_F64(ceil((int64_t) 1 << 53), (int64_t) 1 << 53); 223 | CASE_F64(ceil(((int64_t) 1 << 53) + 2), ((int64_t) 1 << 53) + 2); 224 | 225 | CASE_F32(roundf(+0.0f), +0.0f); 226 | CASE_F32(roundf(-0.0f), -0.0f); 227 | CASE_F32(roundf(+0.25f), +0.0f); 228 | CASE_F32(roundf(-0.25f), -0.0f); 229 | CASE_F32(roundf(+0.5f), +1.0f); 230 | CASE_F32(roundf(-0.5f), -1.0f); 231 | CASE_F32(roundf(+1.0f), +1.0f); 232 | CASE_F32(roundf(-1.0f), -1.0f); 233 | CASE_F32(roundf(+1.125f), +1.0f); 234 | CASE_F32(roundf(-1.125f), -1.0f); 235 | CASE_F32(roundf(+1.5f), +2.0f); 236 | CASE_F32(roundf(-1.5f), -2.0f); 237 | CASE_F32(roundf(+1.625f), +2.0f); 238 | CASE_F32(roundf(-1.625f), -2.0f); 239 | CASE_F32(roundf(+63.0f), +63.0f); 240 | CASE_F32(roundf(-63.0f), -63.0f); 241 | CASE_F32(roundf(+63.5f), +64.0f); 242 | CASE_F32(roundf(-63.5f), -64.0f); 243 | CASE_F32(roundf((int32_t) 1 << 24), (int32_t) 1 << 24); 244 | CASE_F32(roundf(((int32_t) 1 << 24) + 2), ((int32_t) 1 << 24) + 2); 245 | 246 | CASE_F64(round(+0.0), +0.0); 247 | CASE_F64(round(-0.0), -0.0); 248 | CASE_F64(round(+0.25), +0.0); 249 | CASE_F64(round(-0.25), -0.0); 250 | CASE_F64(round(+0.5), +1.0); 251 | CASE_F64(round(-0.5), -1.0); 252 | CASE_F64(round(+1.0), +1.0); 253 | CASE_F64(round(-1.0), -1.0); 254 | CASE_F64(round(+1.125), +1.0); 255 | CASE_F64(round(-1.125), -1.0); 256 | CASE_F64(round(+1.5), +2.0); 257 | CASE_F64(round(-1.5), -2.0); 258 | CASE_F64(round(+1.625), +2.0); 259 | CASE_F64(round(-1.625), -2.0); 260 | CASE_F64(round(+63.0), +63.0); 261 | CASE_F64(round(-63.0), -63.0); 262 | CASE_F64(round(+63.5), +64.0); 263 | CASE_F64(round(-63.5), -64.0); 264 | CASE_F64(round((int64_t) 1 << 53), (int64_t) 1 << 53); 265 | CASE_F64(round(((int64_t) 1 << 53) + 2), ((int64_t) 1 << 53) + 2); 266 | 267 | CASE_F32(truncf(+0.0f), +0.0f); 268 | CASE_F32(truncf(-0.0f), -0.0f); 269 | CASE_F32(truncf(+0.25f), +0.0f); 270 | CASE_F32(truncf(-0.25f), -0.0f); 271 | CASE_F32(truncf(+0.5f), +0.0f); 272 | CASE_F32(truncf(-0.5f), -0.0f); 273 | CASE_F32(truncf(+1.0f), +1.0f); 274 | CASE_F32(truncf(-1.0f), -1.0f); 275 | CASE_F32(truncf(+1.125f), +1.0f); 276 | CASE_F32(truncf(-1.125f), -1.0f); 277 | CASE_F32(truncf(+1.5f), +1.0f); 278 | CASE_F32(truncf(-1.5f), -1.0f); 279 | CASE_F32(truncf(+1.625f), +1.0f); 280 | CASE_F32(truncf(-1.625f), -1.0f); 281 | CASE_F32(truncf(+63.0f), +63.0f); 282 | CASE_F32(truncf(-63.0f), -63.0f); 283 | CASE_F32(truncf(+63.5f), +63.0f); 284 | CASE_F32(truncf(-63.5f), -63.0f); 285 | CASE_F32(truncf((int32_t) 1 << 24), (int32_t) 1 << 24); 286 | CASE_F32(truncf(((int32_t) 1 << 24) + 2), ((int32_t) 1 << 24) + 2); 287 | 288 | CASE_F64(trunc(+0.0), +0.0); 289 | CASE_F64(trunc(-0.0), -0.0); 290 | CASE_F64(trunc(+0.25), +0.0); 291 | CASE_F64(trunc(-0.25), -0.0); 292 | CASE_F64(trunc(+0.5), +0.0); 293 | CASE_F64(trunc(-0.5), -0.0); 294 | CASE_F64(trunc(+1.0), +1.0); 295 | CASE_F64(trunc(-1.0), -1.0); 296 | CASE_F64(trunc(+1.125), +1.0); 297 | CASE_F64(trunc(-1.125), -1.0); 298 | CASE_F64(trunc(+1.5), +1.0); 299 | CASE_F64(trunc(-1.5), -1.0); 300 | CASE_F64(trunc(+1.625), +1.0); 301 | CASE_F64(trunc(-1.625), -1.0); 302 | CASE_F64(trunc(+63.0), +63.0); 303 | CASE_F64(trunc(-63.0), -63.0); 304 | CASE_F64(trunc(+63.5), +63.0); 305 | CASE_F64(trunc(-63.5), -63.0); 306 | CASE_F64(trunc((int64_t) 1 << 53), (int64_t) 1 << 53); 307 | CASE_F64(trunc(((int64_t) 1 << 53) + 2), ((int64_t) 1 << 53) + 2); 308 | 309 | printf("1..%u\n", count); 310 | return failed != 0; 311 | } 312 | #endif 313 | -------------------------------------------------------------------------------- /server/connection.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "connection.h" 3 | 4 | #include "cache.h" 5 | #include "config.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | 26 | namespace { 27 | 28 | llvm::cl::opt dumpObjects("dumpobj", llvm::cl::desc("Dump compiled object files"), llvm::cl::Hidden, llvm::cl::cat(InstrewCategory)); 29 | llvm::cl::opt Stub("stub", llvm::cl::desc("Path for instrew client stub (default: built-in stub). Only useful for debugging."), llvm::cl::value_desc("instrew-client"), llvm::cl::Hidden, llvm::cl::cat(InstrewCategory)); 30 | llvm::cl::opt Program(llvm::cl::Positional, llvm::cl::desc(""), llvm::cl::Required); 31 | llvm::cl::list ProgramArgs(llvm::cl::ConsumeAfter, llvm::cl::desc("...")); 32 | 33 | struct HexBuffer { 34 | const uint8_t* buf; 35 | size_t size; 36 | friend std::ostream& operator<<(std::ostream& os, HexBuffer const& self) { 37 | os << std::hex << std::setfill('0'); 38 | for (size_t i = 0; i != self.size; i++) 39 | os << std::setw(2) << static_cast(self.buf[i]); 40 | return os << std::dec; 41 | } 42 | }; 43 | 44 | namespace Msg { 45 | enum Id { 46 | #define INSTREW_MESSAGE_ID(id, name) name = id, 47 | #include "instrew-protocol.inc" 48 | #undef INSTREW_MESSAGE_ID 49 | }; 50 | struct Hdr { 51 | uint32_t id; 52 | int32_t sz; 53 | } __attribute__((packed)); 54 | } 55 | 56 | class Conn { 57 | private: 58 | std::FILE* file; 59 | Msg::Hdr wr_hdr; 60 | Msg::Hdr recv_hdr; 61 | 62 | Conn(const Conn&) = delete; 63 | void operator=(const Conn& x) = delete; 64 | 65 | public: 66 | Conn(int fd) : file(fdopen(fd, "rb+")), recv_hdr{} {} 67 | ~Conn() { 68 | fclose(file); 69 | } 70 | 71 | Conn& operator=(Conn&& x) { 72 | if (this != &x) 73 | std::swap(file, x.file); 74 | return *this; 75 | } 76 | 77 | Msg::Id RecvMsg() { 78 | assert(recv_hdr.sz == 0 && "unread message parts"); 79 | if (!std::fread(&recv_hdr, sizeof(recv_hdr), 1, file)) 80 | return Msg::C_EXIT; // probably EOF 81 | return static_cast(recv_hdr.id); 82 | } 83 | 84 | void Read(void* buf, size_t size) { 85 | if (static_cast(recv_hdr.sz) < size) 86 | assert(false && "message too small"); 87 | if (!std::fread(buf, size, 1, file)) 88 | assert(false && "unable to read msg content"); 89 | recv_hdr.sz -= size; 90 | } 91 | template 92 | T Read() { 93 | T t; 94 | Read(&t, sizeof(t)); 95 | return t; 96 | } 97 | 98 | void SendMsgHdr(Msg::Id id, size_t size) { 99 | assert(size <= INT32_MAX); 100 | wr_hdr = Msg::Hdr{ id, static_cast(size) }; 101 | if (!std::fwrite(&wr_hdr, sizeof(wr_hdr), 1, file)) 102 | assert(false && "unable to write msg hdr"); 103 | } 104 | void Write(const void* buf, size_t size) { 105 | if (size > 0 && !std::fwrite(buf, size, 1, file)) 106 | assert(false && "unable to write msg content"); 107 | wr_hdr.sz -= size; 108 | if (wr_hdr.sz == 0) 109 | std::fflush(file); 110 | } 111 | void Sendfile(int fd, size_t size) { 112 | std::fflush(file); 113 | while (size) { 114 | ssize_t cnt = sendfile(fileno(file), fd, nullptr, size); 115 | if (cnt < 0) 116 | assert(false && "unable to write msg content (sendfile)"); 117 | size -= cnt; 118 | } 119 | wr_hdr.sz -= size; 120 | } 121 | template 122 | void SendMsg(Msg::Id id, const T& val) { 123 | SendMsgHdr(id, sizeof(T)); 124 | Write(&val, sizeof(T)); 125 | } 126 | 127 | template 128 | void SendMsgWithFd(Msg::Id id, const T& val, int sendfd) { 129 | SendMsgHdr(id, sizeof(T)); 130 | std::fflush(file); 131 | 132 | alignas(alignof(struct cmsghdr)) char cmsgbuf[CMSG_SPACE(sizeof(int))]; 133 | struct msghdr msg{}; 134 | void* vptr = reinterpret_cast(const_cast(&val)); 135 | struct iovec msgiov = {vptr, sizeof(T)}; 136 | msg.msg_iov = &msgiov; 137 | msg.msg_iovlen = 1; 138 | msg.msg_control = &cmsgbuf; 139 | msg.msg_controllen = sizeof(cmsgbuf); 140 | 141 | struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); 142 | cmsg->cmsg_level = SOL_SOCKET; 143 | cmsg->cmsg_type = SCM_RIGHTS; 144 | cmsg->cmsg_len = CMSG_LEN(sizeof(int)); 145 | std::memcpy(CMSG_DATA(cmsg), &sendfd, sizeof(int)); 146 | 147 | ssize_t ret = sendmsg(fileno(file), &msg, 0); 148 | if (ret != sizeof(T)) { 149 | perror("sendmsg"); 150 | assert(false && "unable to send socket fd"); 151 | } 152 | } 153 | }; 154 | 155 | int CreateChild(char* argv0) { 156 | int fds[2]; 157 | int ret = socketpair(AF_UNIX, SOCK_STREAM, 0, &fds[0]); 158 | if (ret < 0) { 159 | perror("socketpair"); 160 | std::exit(1); 161 | } 162 | 163 | std::string client_config = std::to_string(fds[1]); 164 | 165 | std::vector exec_args; 166 | exec_args.reserve(ProgramArgs.size() + 4); 167 | exec_args.push_back(argv0); 168 | exec_args.push_back(client_config.c_str()); 169 | exec_args.push_back(Program.c_str()); 170 | for (auto& uarg : ProgramArgs) 171 | exec_args.push_back(uarg.c_str()); 172 | exec_args.push_back(nullptr); 173 | 174 | int memfd = -1; 175 | if (Stub.empty()) { 176 | static const unsigned char instrew_stub[] = { 177 | #include "client.inc" 178 | }; 179 | 180 | memfd = memfd_create("instrew_stub", MFD_CLOEXEC); 181 | if (memfd < 0) { 182 | perror("memfd_create"); 183 | std::exit(1); 184 | } 185 | size_t written = 0; 186 | size_t total = sizeof(instrew_stub); 187 | while (written < total) { 188 | auto wres = write(memfd, instrew_stub + written, total - written); 189 | if (wres < 0) { 190 | perror("write"); 191 | std::exit(1); 192 | } 193 | written += wres; 194 | } 195 | } 196 | 197 | pid_t forkres = fork(); 198 | if (forkres < 0) { 199 | perror("fork"); 200 | std::exit(1); 201 | } else if (forkres > 0) { 202 | close(fds[0]); 203 | if (memfd >= 0) 204 | fexecve(memfd, const_cast(&exec_args[0]), environ); 205 | else 206 | execve(Stub.c_str(), const_cast(&exec_args[0]), environ); 207 | perror("fexecve"); 208 | std::exit(1); 209 | } 210 | if (memfd >= 0) 211 | close(memfd); 212 | close(fds[1]); 213 | return fds[0]; 214 | } 215 | 216 | class RemoteMemory { 217 | private: 218 | const static size_t PG_SIZE = 0x1000; 219 | using Page = std::array; 220 | std::unordered_map> page_cache; 221 | Conn& conn; 222 | 223 | public: 224 | RemoteMemory(Conn& c) : conn(c) {} 225 | 226 | private: 227 | Page* GetPage(size_t page_addr) { 228 | const auto& page_it = page_cache.find(page_addr); 229 | if (page_it != page_cache.end()) 230 | return page_it->second.get(); 231 | 232 | struct { uint64_t addr; size_t buf_sz; } send_buf{page_addr, PG_SIZE}; 233 | conn.SendMsg(Msg::S_MEMREQ, send_buf); 234 | 235 | Msg::Id msgid = conn.RecvMsg(); 236 | if (msgid != Msg::C_MEMBUF) 237 | return nullptr; 238 | 239 | auto page = std::make_unique(); 240 | conn.Read(page->data(), page->size()); 241 | uint8_t failed = conn.Read(); 242 | if (failed) 243 | return nullptr; 244 | 245 | page_cache[page_addr] = std::move(page); 246 | 247 | return page_cache[page_addr].get(); 248 | }; 249 | 250 | public: 251 | size_t Get(size_t start, size_t end, uint8_t* buf) { 252 | size_t start_page = start & ~(PG_SIZE - 1); 253 | size_t end_page = end & ~(PG_SIZE - 1); 254 | size_t bytes_written = 0; 255 | for (size_t cur = start_page; cur <= end_page; cur += PG_SIZE) { 256 | Page* page = GetPage(cur); 257 | if (!page) 258 | break; 259 | size_t start_off = cur < start ? (start & (PG_SIZE - 1)) : 0; 260 | size_t end_off = cur + PG_SIZE > end ? (end & (PG_SIZE - 1)) : PG_SIZE; 261 | std::copy(page->data() + start_off, page->data() + end_off, buf + bytes_written); 262 | bytes_written += end_off - start_off; 263 | } 264 | return bytes_written; 265 | } 266 | }; 267 | 268 | } // end namespace 269 | 270 | struct IWConnection { 271 | const struct IWFunctions* fns; 272 | Conn& conn; 273 | 274 | IWServerConfig iwsc; 275 | IWClientConfig iwcc; 276 | bool need_iwcc; 277 | 278 | RemoteMemory remote_memory; 279 | instrew::Cache cache; 280 | 281 | IWConnection(const struct IWFunctions* fns, Conn& conn) 282 | : fns(fns), conn(conn), remote_memory(conn) {} 283 | 284 | private: 285 | FILE* OpenObjDump(uint64_t addr) { 286 | if (!dumpObjects) 287 | return nullptr; 288 | std::stringstream debug_out1_name; 289 | debug_out1_name << std::hex << "func_" << addr << ".elf"; 290 | return std::fopen(debug_out1_name.str().c_str(), "wb"); 291 | } 292 | 293 | public: 294 | bool CacheProbe(uint64_t addr, const uint8_t* hash) { 295 | (void) addr; 296 | auto res = cache.Get(hash); 297 | if (res.first < 0) 298 | return false; 299 | conn.SendMsgHdr(Msg::S_OBJECT, res.second); 300 | conn.Sendfile(res.first, res.second); 301 | close(res.first); 302 | return true; 303 | } 304 | 305 | void SendObject(uint64_t addr, const void* data, size_t size, 306 | const uint8_t* hash) { 307 | if (need_iwcc) { 308 | conn.SendMsg(Msg::S_INIT, iwcc); 309 | need_iwcc = false; 310 | } 311 | conn.SendMsgHdr(Msg::S_OBJECT, size); 312 | conn.Write(data, size); 313 | if (FILE* df = OpenObjDump(addr)) { 314 | std::fwrite(data, size, 1, df); 315 | std::fclose(df); 316 | } 317 | if (hash) 318 | cache.Put(hash, size, static_cast(data)); 319 | } 320 | 321 | int Run() { 322 | if (conn.RecvMsg() != Msg::C_INIT) { 323 | std::cerr << "error: expected C_INIT message" << std::endl; 324 | return 1; 325 | } 326 | iwsc = conn.Read(); 327 | // In mode 0, we need to respond with a client config. 328 | need_iwcc = iwsc.tsc_server_mode == 0; 329 | 330 | IWState* state = fns->init(this); 331 | if (need_iwcc) 332 | SendObject(0, "", 0, nullptr); // this will send the client config 333 | 334 | while (true) { 335 | Msg::Id msgid = conn.RecvMsg(); 336 | if (msgid == Msg::C_EXIT) { 337 | fns->finalize(state); 338 | state = nullptr; 339 | return 0; 340 | } else if (msgid == Msg::C_TRANSLATE) { 341 | auto addr = conn.Read(); 342 | fns->translate(state, addr); 343 | } else if (msgid == Msg::C_FORK) { 344 | int child_fds[2]; 345 | int ret = socketpair(AF_UNIX, SOCK_STREAM, 0, &child_fds[0]); 346 | if (ret < 0) { 347 | int err = errno; 348 | perror("socketpair"); 349 | conn.SendMsg(Msg::S_FD, -err); 350 | continue; 351 | } 352 | 353 | pid_t pid = fork(); 354 | if (pid < 0) { 355 | conn.SendMsg(Msg::S_FD, -errno); 356 | close(child_fds[0]); 357 | close(child_fds[1]); 358 | } else if (pid == 0) { 359 | conn = child_fds[0]; 360 | close(child_fds[1]); 361 | } else { 362 | conn.SendMsgWithFd(Msg::S_FD, 0, child_fds[1]); 363 | close(child_fds[0]); 364 | close(child_fds[1]); 365 | } 366 | } else { 367 | std::cerr << "unexpected msg " << msgid << std::endl; 368 | return 1; 369 | } 370 | } 371 | } 372 | }; 373 | 374 | const struct IWServerConfig* iw_get_sc(IWConnection* iwc) { 375 | return &iwc->iwsc; 376 | } 377 | struct IWClientConfig* iw_get_cc(IWConnection* iwc) { 378 | return &iwc->iwcc; 379 | } 380 | size_t iw_readmem(IWConnection* iwc, uintptr_t addr, uintptr_t end, uint8_t* buf) { 381 | return iwc->remote_memory.Get(addr, end, buf); 382 | } 383 | bool iw_cache_probe(IWConnection* iwc, uintptr_t addr, const uint8_t* hash) { 384 | return iwc->CacheProbe(addr, hash); 385 | } 386 | void iw_sendobj(IWConnection* iwc, uintptr_t addr, const void* data, 387 | size_t size, const uint8_t* hash) { 388 | iwc->SendObject(addr, data, size, hash); 389 | } 390 | 391 | int iw_run_server(const struct IWFunctions* fns, int argc, char** argv) { 392 | Conn conn(CreateChild(argv[0])); 393 | IWConnection iwc{fns, conn}; 394 | return iwc.Run(); 395 | } 396 | -------------------------------------------------------------------------------- /server/rewriteserver.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "callconv.h" 3 | #include "codegenerator.h" 4 | #include "config.h" 5 | #include "connection.h" 6 | #include "instrew-server-config.h" 7 | #include "optimizer.h" 8 | #include "version.h" 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | 36 | namespace { 37 | 38 | enum class DumpIR { 39 | Lift, CC, Opt, CodeGen, 40 | }; 41 | 42 | llvm::cl::opt enableProfiling("profile", llvm::cl::desc("Profile translation"), llvm::cl::cat(InstrewCategory)); 43 | llvm::cl::opt enableTracing("trace", llvm::cl::desc("Trace execution (lots of logs)"), llvm::cl::cat(InstrewCategory)); 44 | llvm::cl::opt perfSupport("perf", llvm::cl::desc("Enable perf support:"), 45 | llvm::cl::values( 46 | clEnumVal(0, "disabled"), 47 | clEnumVal(1, "write perf memory map"), 48 | clEnumVal(2, "write jitdump file") 49 | ), 50 | llvm::cl::cat(InstrewCategory)); 51 | llvm::cl::opt verifyLiftedIR("verify-lifted", llvm::cl::desc("Verify lifted IR"), llvm::cl::cat(InstrewCategory)); 52 | llvm::cl::bits dumpIR("dumpir", llvm::cl::desc("Dump IR after:"), 53 | llvm::cl::values( 54 | clEnumValN(DumpIR::Lift, "lift", "lifting"), 55 | clEnumValN(DumpIR::CC, "cc", "calling convention"), 56 | clEnumValN(DumpIR::Opt, "opt", "optimizations"), 57 | clEnumValN(DumpIR::CodeGen, "codegen", "code generation") 58 | ), llvm::cl::cat(InstrewCategory)); 59 | llvm::cl::opt safeCallRet("safe-call-ret", llvm::cl::desc("Don't clobber flags on call/ret instructions"), llvm::cl::cat(CodeGenCategory)); 60 | llvm::cl::opt enableCallret("callret", llvm::cl::desc("Enable call-ret lifting"), llvm::cl::cat(CodeGenCategory)); 61 | llvm::cl::opt enableFastcc("fastcc", llvm::cl::desc("Enable register-based calling convention (default: true)"), llvm::cl::init(true), llvm::cl::cat(CodeGenCategory)); 62 | llvm::cl::opt enablePIC("pic", llvm::cl::desc("Compile code position-independent"), llvm::cl::cat(CodeGenCategory)); 63 | 64 | } // end anonymous namespace 65 | 66 | #define SPTR_ADDR_SPACE 1 67 | 68 | static llvm::Function* CreateFunc(llvm::LLVMContext& ctx, 69 | const std::string name) { 70 | llvm::Type* sptr = llvm::PointerType::get(ctx, SPTR_ADDR_SPACE); 71 | llvm::Type* void_ty = llvm::Type::getVoidTy(ctx); 72 | auto* fn_ty = llvm::FunctionType::get(void_ty, {sptr}, false); 73 | auto linkage = llvm::GlobalValue::ExternalLinkage; 74 | return llvm::Function::Create(fn_ty, linkage, name); 75 | } 76 | 77 | static llvm::GlobalVariable* CreatePcBase(llvm::LLVMContext& ctx) { 78 | llvm::Type* i64 = llvm::Type::getInt64Ty(ctx); 79 | auto* pc_base_var = new llvm::GlobalVariable(i64, false, 80 | llvm::GlobalValue::ExternalLinkage); 81 | pc_base_var->setName("instrew_baseaddr"); 82 | llvm::Constant* lim_val = llvm::ConstantInt::get(i64, -1); 83 | llvm::Metadata* lim = llvm::ConstantAsMetadata::get(lim_val); 84 | llvm::MDNode* node = llvm::MDNode::get(ctx, {lim, lim}); 85 | pc_base_var->setMetadata("absolute_symbol", node); 86 | return pc_base_var; 87 | } 88 | 89 | struct IWState { 90 | private: 91 | IWConnection* iwc; 92 | const IWServerConfig* iwsc = nullptr; 93 | IWClientConfig* iwcc = nullptr; 94 | CallConv instrew_cc = CallConv::CDECL; 95 | 96 | LLConfig* rlcfg; 97 | llvm::LLVMContext ctx; 98 | llvm::Constant* pc_base; 99 | llvm::SmallVector helper_fns; 100 | std::unique_ptr mod; 101 | 102 | Optimizer optimizer; 103 | llvm::SmallVector obj_buffer; 104 | CodeGenerator codegen; 105 | 106 | llvm::SmallVector hashBuffer; 107 | 108 | std::chrono::steady_clock::duration dur_predecode{}; 109 | std::chrono::steady_clock::duration dur_lifting{}; 110 | std::chrono::steady_clock::duration dur_instrument{}; 111 | std::chrono::steady_clock::duration dur_llvm_opt{}; 112 | std::chrono::steady_clock::duration dur_llvm_codegen{}; 113 | 114 | void appendConfig(llvm::SmallVectorImpl& buffer) const { 115 | struct { 116 | uint32_t version = 2; 117 | uint8_t safeCallRet = safeCallRet; 118 | uint8_t enableCallret = enableCallret; 119 | uint8_t enableFastcc = enableFastcc; 120 | uint8_t enablePIC = enablePIC; 121 | 122 | uint32_t guestArch; 123 | uint32_t hostArch; 124 | uint32_t stackAlignment; 125 | } config; 126 | config.guestArch = iwsc->tsc_guest_arch; 127 | config.hostArch = iwsc->tsc_host_arch; 128 | config.stackAlignment = iwsc->tsc_stack_alignment; 129 | 130 | std::size_t start = buffer.size(); 131 | buffer.resize_for_overwrite(buffer.size() + sizeof(config)); 132 | std::memcpy(&buffer[start], &config, sizeof(config)); 133 | } 134 | 135 | public: 136 | 137 | IWState(IWConnection* iwc) 138 | : codegen(*iw_get_sc(iwc), enablePIC, obj_buffer) { 139 | this->iwc = iwc; 140 | iwsc = iw_get_sc(iwc); 141 | iwcc = iw_get_cc(iwc); 142 | 143 | #ifndef NDEBUG 144 | // Discard non-global value names in release builds. 145 | ctx.setDiscardValueNames(true); 146 | #endif 147 | 148 | rlcfg = ll_config_new(); 149 | ll_config_enable_verify_ir(rlcfg, verifyLiftedIR); 150 | ll_config_set_call_ret_clobber_flags(rlcfg, !safeCallRet); 151 | ll_config_set_sptr_addrspace(rlcfg, SPTR_ADDR_SPACE); 152 | ll_config_enable_overflow_intrinsics(rlcfg, false); 153 | if (enableCallret) { 154 | auto call_fn = CreateFunc(ctx, "instrew_call_cdecl"); 155 | helper_fns.push_back(call_fn); 156 | ll_config_set_tail_func(rlcfg, llvm::wrap(call_fn)); 157 | ll_config_set_call_func(rlcfg, llvm::wrap(call_fn)); 158 | } 159 | if (iwsc->tsc_guest_arch == EM_X86_64) { 160 | ll_config_set_architecture(rlcfg, "x86-64"); 161 | 162 | auto syscall_fn = CreateFunc(ctx, "syscall"); 163 | helper_fns.push_back(syscall_fn); 164 | ll_config_set_syscall_impl(rlcfg, llvm::wrap(syscall_fn)); 165 | 166 | // cpuinfo function is CPUID on x86-64. 167 | llvm::Type* i32 = llvm::Type::getInt32Ty(ctx); 168 | llvm::Type* i64 = llvm::Type::getInt64Ty(ctx); 169 | auto i64_i64 = llvm::StructType::get(i64, i64); 170 | auto cpuinfo_fn_ty = llvm::FunctionType::get(i64_i64, {i32, i32}, false); 171 | auto linkage = llvm::GlobalValue::ExternalLinkage; 172 | auto cpuinfo_fn = llvm::Function::Create(cpuinfo_fn_ty, linkage, "cpuid"); 173 | helper_fns.push_back(cpuinfo_fn); 174 | ll_config_set_cpuinfo_func(rlcfg, llvm::wrap(cpuinfo_fn)); 175 | } else if (iwsc->tsc_guest_arch == EM_RISCV) { 176 | ll_config_set_architecture(rlcfg, "rv64"); 177 | 178 | auto syscall_fn = CreateFunc(ctx, "syscall_rv64"); 179 | helper_fns.push_back(syscall_fn); 180 | ll_config_set_syscall_impl(rlcfg, llvm::wrap(syscall_fn)); 181 | } else if (iwsc->tsc_guest_arch == EM_AARCH64) { 182 | ll_config_set_architecture(rlcfg, "aarch64"); 183 | 184 | auto syscall_fn = CreateFunc(ctx, "syscall_aarch64"); 185 | helper_fns.push_back(syscall_fn); 186 | ll_config_set_syscall_impl(rlcfg, llvm::wrap(syscall_fn)); 187 | } else { 188 | std::cerr << "error: unsupported architecture" << std::endl; 189 | abort(); 190 | } 191 | 192 | // Backward compatibility -- only one fast CC per guest--host pair now. 193 | if (enableFastcc) 194 | instrew_cc = GetFastCC(iwsc->tsc_host_arch, iwsc->tsc_guest_arch); 195 | else 196 | instrew_cc = CallConv::CDECL; 197 | iwcc->tc_callconv = GetCallConvClientNumber(instrew_cc); 198 | iwcc->tc_profile = enableProfiling; 199 | iwcc->tc_perf = perfSupport; 200 | iwcc->tc_print_trace = enableTracing; 201 | 202 | llvm::GlobalVariable* pc_base_var = CreatePcBase(ctx); 203 | pc_base = llvm::ConstantExpr::getPtrToInt(pc_base_var, 204 | llvm::Type::getInt64Ty(ctx)); 205 | 206 | mod = std::make_unique("mod", ctx); 207 | #if LL_LLVM_MAJOR >= 13 208 | if (iwsc->tsc_stack_alignment != 0) 209 | mod->setOverrideStackAlignment(iwsc->tsc_stack_alignment); 210 | #endif 211 | #if LL_LLVM_MAJOR >= 19 212 | mod->setIsNewDbgInfoFormat(true); 213 | #endif 214 | 215 | #if LL_LLVM_MAJOR < 17 216 | mod->getGlobalList().push_back(pc_base_var); 217 | #else 218 | mod->insertGlobalVariable(pc_base_var); 219 | #endif 220 | for (const auto& helper_fn : helper_fns) 221 | mod->getFunctionList().push_back(helper_fn); 222 | 223 | llvm::SmallVector used; 224 | used.push_back(pc_base_var); 225 | 226 | for (llvm::Function& fn : mod->functions()) 227 | used.push_back(&fn); 228 | 229 | llvm::Type* ptrTy = llvm::PointerType::get(ctx, 0); 230 | llvm::ArrayType* used_ty = llvm::ArrayType::get(ptrTy, used.size()); 231 | llvm::GlobalVariable* llvm_used = new llvm::GlobalVariable( 232 | *mod, used_ty, /*const=*/false, 233 | llvm::GlobalValue::AppendingLinkage, 234 | llvm::ConstantArray::get(used_ty, used), "llvm.used"); 235 | llvm_used->setSection("llvm.metadata"); 236 | 237 | codegen.GenerateCode(mod.get()); 238 | iw_sendobj(iwc, 0, obj_buffer.data(), obj_buffer.size(), nullptr); 239 | 240 | for (llvm::Function& fn : mod->functions()) 241 | if (fn.hasExternalLinkage() && !fn.empty()) 242 | fn.deleteBody(); 243 | 244 | appendConfig(hashBuffer); 245 | optimizer.appendConfig(hashBuffer); 246 | codegen.appendConfig(hashBuffer); 247 | } 248 | ~IWState() { 249 | if (enableProfiling) { 250 | std::cerr << "Server profile: " << std::dec 251 | << std::chrono::duration_cast(dur_predecode).count() 252 | << "ms predecode; " 253 | << std::chrono::duration_cast(dur_lifting).count() 254 | << "ms lifting; " 255 | << std::chrono::duration_cast(dur_instrument).count() 256 | << "ms instrumentation; " 257 | << std::chrono::duration_cast(dur_llvm_opt).count() 258 | << "ms llvm_opt; " 259 | << std::chrono::duration_cast(dur_llvm_codegen).count() 260 | << "ms llvm_codegen" 261 | << std::endl; 262 | } 263 | llvm::reportAndResetTimings(&llvm::errs()); 264 | ll_config_free(rlcfg); 265 | } 266 | 267 | void Translate(uintptr_t addr) { 268 | auto time_predecode_start = std::chrono::steady_clock::now(); 269 | 270 | // Optionally generate position-independent code, where the offset 271 | // can be adjusted using relocations. 272 | if (enablePIC) 273 | ll_config_set_pc_base(rlcfg, addr, llvm::wrap(pc_base)); 274 | 275 | LLFunc* rlfn = ll_func_new(llvm::wrap(mod.get()), rlcfg); 276 | RellumeMemAccessCb accesscb = [](size_t addr, uint8_t* buf, size_t bufsz, void* user_arg) { 277 | auto iwc = reinterpret_cast(user_arg); 278 | return iw_readmem(iwc, addr, addr + bufsz, buf); 279 | }; 280 | int fail = ll_func_decode_cfg(rlfn, addr, accesscb, reinterpret_cast(iwc)); 281 | if (fail) { 282 | std::cerr << "error: decode failed 0x" << std::hex << addr << std::endl; 283 | ll_func_dispose(rlfn); 284 | iw_sendobj(iwc, addr, nullptr, 0, nullptr); 285 | return; 286 | } 287 | 288 | size_t hashConfigEnd = hashBuffer.size(); 289 | 290 | // Store address only for non-PIC code, other addresses are relative. 291 | uint64_t hashAddr = enablePIC ? 0 : addr; 292 | hashBuffer.resize_for_overwrite(hashConfigEnd + sizeof(hashAddr)); 293 | memcpy(&hashBuffer[hashConfigEnd], &hashAddr, sizeof(hashAddr)); 294 | 295 | const struct RellumeCodeRange* ranges = ll_func_ranges(rlfn); 296 | for (; ranges->start || ranges->end; ranges++) { 297 | uint64_t rel_start = ranges->start - addr; 298 | uint64_t size = ranges->end - ranges->start; 299 | 300 | size_t bufEnd = hashBuffer.size(); 301 | hashBuffer.resize_for_overwrite(bufEnd + 2 * sizeof(uint64_t) + size); 302 | memcpy(&hashBuffer[bufEnd], &rel_start, sizeof(uint64_t)); 303 | memcpy(&hashBuffer[bufEnd + sizeof(uint64_t)], &size, sizeof(uint64_t)); 304 | iw_readmem(iwc, ranges->start, ranges->end, &hashBuffer[bufEnd + 2 * sizeof(uint64_t)]); 305 | } 306 | 307 | uint8_t hash[SHA_DIGEST_LENGTH]; 308 | SHA1(hashBuffer.data(), hashBuffer.size(), hash); 309 | hashBuffer.truncate(hashConfigEnd); 310 | 311 | if (iw_cache_probe(iwc, addr, hash)) { 312 | ll_func_dispose(rlfn); 313 | if (enableProfiling) 314 | dur_predecode += std::chrono::steady_clock::now() - time_predecode_start; 315 | return; 316 | } 317 | 318 | auto time_lifting_start = std::chrono::steady_clock::now(); 319 | LLVMValueRef fn_wrapped = !fail ? ll_func_lift(rlfn) : nullptr; 320 | if (!fn_wrapped) { 321 | std::cerr << "error: lift failed 0x" << std::hex << addr << "\n"; 322 | ll_func_dispose(rlfn); 323 | iw_sendobj(iwc, addr, nullptr, 0, nullptr); 324 | return; 325 | } 326 | 327 | llvm::Function* fn = llvm::unwrap(fn_wrapped); 328 | fn->setName("S0_" + llvm::Twine::utohexstr(addr)); 329 | ll_func_dispose(rlfn); 330 | 331 | if (dumpIR.isSet(DumpIR::Lift)) 332 | mod->print(llvm::errs(), nullptr); 333 | 334 | auto time_instrument_start = std::chrono::steady_clock::now(); 335 | fn = ChangeCallConv(fn, instrew_cc); 336 | if (dumpIR.isSet(DumpIR::CC)) 337 | mod->print(llvm::errs(), nullptr); 338 | 339 | auto time_llvm_opt_start = std::chrono::steady_clock::now(); 340 | optimizer.Optimize(fn); 341 | if (dumpIR.isSet(DumpIR::Opt)) 342 | mod->print(llvm::errs(), nullptr); 343 | 344 | auto time_llvm_codegen_start = std::chrono::steady_clock::now(); 345 | codegen.GenerateCode(mod.get()); 346 | if (dumpIR.isSet(DumpIR::CodeGen)) 347 | mod->print(llvm::errs(), nullptr); 348 | 349 | iw_sendobj(iwc, addr, obj_buffer.data(), obj_buffer.size(), hash); 350 | 351 | // Remove unused functions and dead prototypes. Having many prototypes 352 | // causes some compile-time overhead. 353 | for (auto& glob_fn : llvm::make_early_inc_range(*mod)) 354 | if (glob_fn.use_empty()) 355 | glob_fn.eraseFromParent(); 356 | 357 | if (enableProfiling) { 358 | dur_predecode += time_lifting_start - time_predecode_start; 359 | dur_lifting += time_instrument_start - time_lifting_start; 360 | dur_instrument += time_llvm_opt_start - time_instrument_start; 361 | dur_llvm_opt += time_llvm_codegen_start - time_llvm_opt_start; 362 | dur_llvm_codegen += std::chrono::steady_clock::now() - time_llvm_codegen_start; 363 | } 364 | } 365 | }; 366 | 367 | 368 | int main(int argc, char** argv) { 369 | llvm::cl::HideUnrelatedOptions({&InstrewCategory, &CodeGenCategory}); 370 | auto& optionMap = llvm::cl::getRegisteredOptions(); 371 | optionMap["time-passes"]->setHiddenFlag(llvm::cl::Hidden); 372 | llvm::cl::SetVersionPrinter([](llvm::raw_ostream& os) { 373 | os << "Instrew " << instrew::instrewVersion << "\n"; 374 | }); 375 | llvm::cl::ParseCommandLineOptions(argc, argv); 376 | 377 | static const IWFunctions iwf = { 378 | /*.init=*/[](IWConnection* iwc) { 379 | return new IWState(iwc); 380 | }, 381 | /*.translate=*/[](IWState* state, uintptr_t addr) { 382 | state->Translate(addr); 383 | }, 384 | /*.finalize=*/[](IWState* state) { 385 | delete state; 386 | }, 387 | }; 388 | 389 | return iw_run_server(&iwf, argc, argv); 390 | } 391 | -------------------------------------------------------------------------------- /client/dispatch.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | 15 | // Prototype to make compilers happy. This is used in the assembly HHVM 16 | // dispatcher on x86-64 below. 17 | uintptr_t resolve_func(struct CpuState*, uintptr_t, struct RtldPatchData*); 18 | 19 | static void 20 | print_trace(struct CpuState* cpu_state, uintptr_t addr) { 21 | uint64_t* cpu_regs = (uint64_t*) cpu_state->regdata; 22 | dprintf(2, "Trace 0x%lx\n", addr); 23 | if (cpu_state->state->tc.tc_print_regs) { 24 | dprintf(2, "RAX=%lx RBX=%lx RCX=%lx RDX=%lx\n", cpu_regs[1], cpu_regs[4], cpu_regs[2], cpu_regs[3]); 25 | dprintf(2, "RSI=%lx RDI=%lx RBP=%lx RSP=%lx\n", cpu_regs[7], cpu_regs[8], cpu_regs[6], cpu_regs[5]); 26 | dprintf(2, "R8 =%lx R9 =%lx R10=%lx R11=%lx\n", cpu_regs[9], cpu_regs[10], cpu_regs[11], cpu_regs[12]); 27 | dprintf(2, "R12=%lx R13=%lx R14=%lx R15=%lx\n", cpu_regs[13], cpu_regs[14], cpu_regs[15], cpu_regs[16]); 28 | dprintf(2, "RIP=%lx\n", addr); 29 | dprintf(2, "XMM0=%lx:%lx XMM1=%lx:%lx\n", cpu_regs[18], cpu_regs[19], cpu_regs[20], cpu_regs[21]); 30 | dprintf(2, "XMM2=%lx:%lx XMM3=%lx:%lx\n", cpu_regs[22], cpu_regs[23], cpu_regs[24], cpu_regs[25]); 31 | dprintf(2, "XMM4=%lx:%lx XMM5=%lx:%lx\n", cpu_regs[26], cpu_regs[27], cpu_regs[28], cpu_regs[29]); 32 | dprintf(2, "XMM6=%lx:%lx XMM7=%lx:%lx\n", cpu_regs[30], cpu_regs[31], cpu_regs[32], cpu_regs[33]); 33 | } 34 | } 35 | 36 | #define QUICK_TLB_BITS 10 37 | #define QUICK_TLB_BITOFF 4 // must be either 1, 2, 3, or 4 38 | // Clang's inline assembly doesn't support expressions for index scale. 39 | // #define QUICK_TLB_IDXSCALE (1 << (4-QUICK_TLB_BITOFF)) 40 | #if QUICK_TLB_BITOFF == 4 41 | #define QUICK_TLB_IDXSCALE 1 42 | #elif QUICK_TLB_BITOFF == 3 43 | #define QUICK_TLB_IDXSCALE 2 44 | #elif QUICK_TLB_BITOFF == 2 45 | #define QUICK_TLB_IDXSCALE 4 46 | #elif QUICK_TLB_BITOFF == 1 47 | #define QUICK_TLB_IDXSCALE 8 48 | #else 49 | #error "invalid QUICK_TLB_BITOFF" 50 | #endif 51 | #define QUICK_TLB_HASH(addr) (((addr) >> QUICK_TLB_BITOFF) & ((1 << QUICK_TLB_BITS) - 1)) 52 | 53 | GNU_FORCE_EXTERN 54 | uintptr_t 55 | resolve_func(struct CpuState* cpu_state, uintptr_t addr, 56 | struct RtldPatchData* patch_data) { 57 | struct State* state = cpu_state->state; 58 | 59 | if (patch_data) 60 | addr = patch_data->sym_addr; 61 | 62 | void* func; 63 | int retval = rtld_resolve(&state->rtld, addr, &func); 64 | if (UNLIKELY(retval < 0)) { 65 | struct timespec start_time; 66 | struct timespec end_time; 67 | if (UNLIKELY(state->tc.tc_profile)) 68 | clock_gettime(CLOCK_MONOTONIC, &start_time); 69 | 70 | void* obj_base; 71 | size_t obj_size; 72 | retval = translator_get(&state->translator, addr, &obj_base, &obj_size); 73 | if (retval < 0) 74 | goto error; 75 | 76 | retval = rtld_add_object(&state->rtld, obj_base, obj_size, addr); 77 | if (retval < 0) 78 | goto error; 79 | retval = rtld_resolve(&state->rtld, addr, &func); 80 | if (retval < 0) 81 | goto error; 82 | 83 | if (UNLIKELY(state->tc.tc_profile)) { 84 | clock_gettime(CLOCK_MONOTONIC, &end_time); 85 | size_t time_ns = (end_time.tv_sec - start_time.tv_sec) * 1000000000 86 | + (end_time.tv_nsec - start_time.tv_nsec); 87 | state->rew_time += time_ns; 88 | } 89 | } 90 | 91 | // If we want a trace, don't update quick TLB. This forces a full resolve on 92 | // every dispatch, yielding a complete trace. Tracing is slow anyway, so we 93 | // don't care about performance when tracing is active. 94 | if (LIKELY(!state->tc.tc_print_trace)) { 95 | // If possible, patch code which caused us to get here. 96 | rtld_patch(patch_data, func); 97 | 98 | // Update quick TLB 99 | uintptr_t hash = QUICK_TLB_HASH(addr); 100 | cpu_state->quick_tlb[hash][0] = addr; 101 | cpu_state->quick_tlb[hash][1] = (uintptr_t) func; 102 | } else { 103 | print_trace(cpu_state, addr); 104 | } 105 | 106 | return (uintptr_t) func; 107 | 108 | error: 109 | dprintf(2, "error resolving address %lx: %u\n", addr, -retval); 110 | _exit(retval); 111 | } 112 | 113 | // Used for PLT. 114 | void dispatch_cdecl(uint64_t*); 115 | 116 | inline void dispatch_cdecl(uint64_t* cpu_regs) { 117 | struct CpuState* cpu_state = CPU_STATE_FROM_REGS(cpu_regs); 118 | 119 | uintptr_t addr = cpu_regs[0]; 120 | uintptr_t hash = QUICK_TLB_HASH(addr); 121 | 122 | uintptr_t func = cpu_state->quick_tlb[hash][1]; 123 | if (UNLIKELY(cpu_state->quick_tlb[hash][0] != addr)) 124 | func = resolve_func(cpu_state, addr, NULL); 125 | 126 | void(* func_p)(void*); 127 | *((void**) &func_p) = (void*) func; 128 | func_p(cpu_regs); 129 | } 130 | 131 | static void 132 | dispatch_cdecl_loop(uint64_t* cpu_regs) { 133 | while (true) 134 | dispatch_cdecl(cpu_regs); 135 | } 136 | 137 | #ifdef __x86_64__ 138 | 139 | __attribute__((noreturn)) extern void dispatch_hhvm(uint64_t* cpu_state); 140 | void dispatch_hhvm_tail(); 141 | void dispatch_hhvm_fullresolve(); 142 | 143 | __attribute__((noreturn)) extern void dispatch_regcall(uint64_t* cpu_state); 144 | void dispatch_regcall_tail(); 145 | void dispatch_regcall_fullresolve(); 146 | 147 | #define QUICK_TLB_OFFSET_ASM(dest_reg, addr_reg) \ 148 | lea dest_reg, [addr_reg * 4]; \ 149 | and dest_reg, ((1 << QUICK_TLB_BITS) - 1) << (2 + QUICK_TLB_BITOFF); 150 | 151 | ASM_BLOCK( 152 | .intel_syntax noprefix; 153 | 154 | // Stores result in r14, preserves all other registers 155 | .align 16; 156 | .type dispatch_hhvm_fullresolve, @function; 157 | dispatch_hhvm_fullresolve: // stack alignment: cdecl 158 | // Save all cdecl caller-saved registers. 159 | push rax; 160 | push rcx; 161 | push rdx; 162 | push rsi; 163 | push rdi; 164 | push r8; 165 | push r9; 166 | push r10; 167 | push r11; 168 | mov rdi, [r12 - CPU_STATE_REGDATA_OFFSET]; // cpu_state 169 | mov rsi, rbx; // addr 170 | mov rdx, r14; // patch data 171 | call resolve_func; 172 | mov r14, rax; // return value 173 | // Restore callee-saved registers. 174 | pop r11; 175 | pop r10; 176 | pop r9; 177 | pop r8; 178 | pop rdi; 179 | pop rsi; 180 | pop rdx; 181 | pop rcx; 182 | pop rax; 183 | jmp r14; 184 | .size dispatch_hhvm_fullresolve, .-dispatch_hhvm_fullresolve; 185 | 186 | .align 16; 187 | .global dispatch_hhvm_tail; 188 | .type dispatch_hhvm_tail, @function; 189 | dispatch_hhvm_tail: // stack alignment: cdecl 190 | mov r14, rbx; 191 | and r14, ((1 << QUICK_TLB_BITS) - 1) << QUICK_TLB_BITOFF; 192 | cmp rbx, [r12 + QUICK_TLB_IDXSCALE*r14 - CPU_STATE_REGDATA_OFFSET + CPU_STATE_QTLB_OFFSET]; 193 | jne 1f; 194 | jmp [r12 + QUICK_TLB_IDXSCALE*r14 - CPU_STATE_REGDATA_OFFSET + CPU_STATE_QTLB_OFFSET + 8]; 195 | .align 16; 196 | 1: xor r14, r14; // zero patch data 197 | jmp dispatch_hhvm_fullresolve; 198 | .size dispatch_hhvm_tail, .-dispatch_hhvm_tail; 199 | 200 | .align 16; 201 | .global dispatch_hhvm_call; 202 | .type dispatch_hhvm_call, @function; 203 | dispatch_hhvm_call: // stack alignment: hhvm 204 | mov r14, rbx; 205 | and r14, ((1 << QUICK_TLB_BITS) - 1) << QUICK_TLB_BITOFF; 206 | cmp rbx, [r12 + QUICK_TLB_IDXSCALE*r14 - CPU_STATE_REGDATA_OFFSET + CPU_STATE_QTLB_OFFSET]; 207 | jne 1f; 208 | call [r12 + QUICK_TLB_IDXSCALE*r14 - CPU_STATE_REGDATA_OFFSET + CPU_STATE_QTLB_OFFSET + 8]; 209 | ret; 210 | .align 16; 211 | 1: xor r14, r14; // zero patch data 212 | call dispatch_hhvm_fullresolve; 213 | ret; 214 | .size dispatch_hhvm_call, .-dispatch_hhvm_call; 215 | 216 | .align 16; 217 | .global dispatch_hhvm; 218 | .type dispatch_hhvm, @function; 219 | dispatch_hhvm: 220 | mov r12, rdi; // cpu_regs 221 | // Load HHVM registers 222 | mov rbx, [r12 + 0 * 8]; 223 | mov rax, [r12 + 1 * 8]; 224 | mov rcx, [r12 + 2 * 8]; 225 | mov rdx, [r12 + 3 * 8]; 226 | mov rbp, [r12 + 4 * 8]; 227 | mov r15, [r12 + 5 * 8]; 228 | mov r13, [r12 + 6 * 8]; 229 | mov rsi, [r12 + 7 * 8]; 230 | mov rdi, [r12 + 8 * 8]; 231 | mov r8, [r12 + 9 * 8]; 232 | mov r9, [r12 + 10 * 8]; 233 | mov r10, [r12 + 11 * 8]; 234 | mov r11, [r12 + 12 * 8]; 235 | 236 | jmp 4f; 237 | 238 | .align 16; 239 | // This is the quick_tlb hot loop. 240 | 2: call [r12 + QUICK_TLB_IDXSCALE*r14 - CPU_STATE_REGDATA_OFFSET + CPU_STATE_QTLB_OFFSET + 8]; 241 | 3: mov r14, rbx; 242 | and r14, ((1 << QUICK_TLB_BITS) - 1) << QUICK_TLB_BITOFF; 243 | cmp rbx, [r12 + QUICK_TLB_IDXSCALE*r14 - CPU_STATE_REGDATA_OFFSET + CPU_STATE_QTLB_OFFSET]; 244 | je 2b; 245 | 246 | // This code isn't exactly cold, but should be executed not that often. 247 | // If we don't have addr in the quick_tlb, do a full resolve. 248 | 4: xor r14, r14; // zero patch data 249 | call dispatch_hhvm_fullresolve; 250 | jmp 3b; 251 | .size dispatch_hhvm, .-dispatch_hhvm; 252 | 253 | .att_syntax; 254 | ); 255 | 256 | ASM_BLOCK( 257 | .intel_syntax noprefix; 258 | 259 | // Stores result in r10, preserves all other registers 260 | .align 16; 261 | .type dispatch_regcall_fullresolve, @function; 262 | dispatch_regcall_fullresolve: 263 | // Save all cdecl caller-saved registers. 264 | push rax; 265 | push rcx; 266 | push rdx; 267 | push rsi; 268 | push rdi; 269 | push r8; 270 | push r9; 271 | mov rdi, [rax - CPU_STATE_REGDATA_OFFSET]; // cpu_state 272 | mov rsi, rcx; // addr 273 | mov rdx, r10; // patch data 274 | call resolve_func; 275 | mov r10, rax; // return value 276 | // Restore callee-saved registers. 277 | pop r9; 278 | pop r8; 279 | pop rdi; 280 | pop rsi; 281 | pop rdx; 282 | pop rcx; 283 | pop rax; 284 | jmp r10; 285 | .size dispatch_regcall_fullresolve, .-dispatch_regcall_fullresolve; 286 | 287 | .align 16; 288 | .global dispatch_regcall_tail; 289 | .type dispatch_regcall_tail, @function; 290 | dispatch_regcall_tail: 291 | mov r10, rcx; 292 | and r10, ((1 << QUICK_TLB_BITS) - 1) << QUICK_TLB_BITOFF; 293 | cmp rcx, [rax + QUICK_TLB_IDXSCALE*r10 - CPU_STATE_REGDATA_OFFSET + CPU_STATE_QTLB_OFFSET]; 294 | jne 1f; 295 | jmp [rax + QUICK_TLB_IDXSCALE*r10 - CPU_STATE_REGDATA_OFFSET + CPU_STATE_QTLB_OFFSET + 8]; 296 | .align 16; 297 | 1: xor r10, r10; // zero patch data 298 | jmp dispatch_regcall_fullresolve; 299 | .size dispatch_regcall_tail, .-dispatch_regcall_tail; 300 | 301 | .align 16; 302 | .global dispatch_regcall; 303 | .type dispatch_regcall, @function; 304 | dispatch_regcall: 305 | mov rax, rdi; // cpu_regs 306 | // Load regcall registers 307 | mov rcx, [rax + 0 * 8]; 308 | mov rdx, [rax + 1 * 8]; 309 | mov rdi, [rax + 2 * 8]; 310 | mov rsi, [rax + 3 * 8]; 311 | mov r8, [rax + 4 * 8]; 312 | mov r9, [rax + 5 * 8]; 313 | mov r12, [rax + 7 * 8]; 314 | mov r13, [rax + 8 * 8]; 315 | mov r14, [rax + 9 * 8]; 316 | mov r15, [rax + 10 * 8]; 317 | 318 | jmp 4f; 319 | 320 | .align 16; 321 | // This is the quick_tlb hot loop. 322 | 2: call [rax + QUICK_TLB_IDXSCALE*r10 - CPU_STATE_REGDATA_OFFSET + CPU_STATE_QTLB_OFFSET + 8]; 323 | 3: mov r10, rcx; 324 | and r10, ((1 << QUICK_TLB_BITS) - 1) << QUICK_TLB_BITOFF; 325 | cmp rcx, [rax + QUICK_TLB_IDXSCALE*r10 - CPU_STATE_REGDATA_OFFSET + CPU_STATE_QTLB_OFFSET]; 326 | je 2b; 327 | 328 | // This code isn't exactly cold, but should be executed not that often. 329 | // If we don't have addr in the quick_tlb, do a full resolve. 330 | 4: xor r10, r10; // zero patch data 331 | call dispatch_regcall_fullresolve; 332 | jmp 3b; 333 | .size dispatch_regcall, .-dispatch_regcall; 334 | 335 | .att_syntax; 336 | ); 337 | 338 | #endif // defined(__x86_64__) 339 | 340 | #if defined(__aarch64__) 341 | 342 | void dispatch_aapcsx(); 343 | void dispatch_aapcsx_fullresolve(); 344 | void dispatch_aapcsx_loop(); 345 | 346 | ASM_BLOCK( 347 | .align 16; 348 | .global dispatch_aapcsx; 349 | .type dispatch_aapcsx, @function; 350 | dispatch_aapcsx: 351 | add x17, x20, -CPU_STATE_REGDATA_OFFSET + CPU_STATE_QTLB_OFFSET; 352 | and x16, x0, ((1 << QUICK_TLB_BITS) - 1) << QUICK_TLB_BITOFF; 353 | add x17, x17, x16, lsl (4-QUICK_TLB_BITOFF); 354 | ldp x16, x17, [x17]; 355 | cmp x16, x0; 356 | b.ne 1f; 357 | br x17; 358 | 1: mov x16, xzr; // zero dispatch data 359 | b dispatch_aapcsx_fullresolve; 360 | .size dispatch_aapcsx, .-dispatch_aapcsx; 361 | 362 | .align 16; 363 | .type dispatch_aapcsx_loop, @function; 364 | dispatch_aapcsx_loop: 365 | mov x20, x0; // reg_data 366 | ldr x0, [x0]; // addr 367 | ldr x1, [x20, 0x8]; 368 | ldr x2, [x20, 0x10]; 369 | ldr x3, [x20, 0x18]; 370 | ldr x4, [x20, 0x20]; 371 | ldr x5, [x20, 0x28]; 372 | ldr x6, [x20, 0x38]; 373 | ldr x7, [x20, 0x40]; 374 | b 2f; 375 | 376 | .align 16; 377 | 1: blr x17; 378 | 2: add x17, x20, -CPU_STATE_REGDATA_OFFSET + CPU_STATE_QTLB_OFFSET; 379 | and x16, x0, ((1 << QUICK_TLB_BITS) - 1) << QUICK_TLB_BITOFF; 380 | add x17, x17, x16, lsl (4-QUICK_TLB_BITOFF); 381 | ldp x16, x17, [x17]; 382 | cmp x16, x0; 383 | b.eq 1b; 384 | 385 | mov x16, xzr; // zero patch data 386 | bl dispatch_aapcsx_fullresolve; 387 | b 2b; 388 | .size dispatch_aapcsx_loop, .-dispatch_aapcsx_loop; 389 | 390 | .align 16; 391 | .global dispatch_aapcsx_fullresolve; 392 | .type dispatch_aapcsx_fullresolve, @function; 393 | dispatch_aapcsx_fullresolve: 394 | sub sp, sp, 0x2b0; 395 | stp x18, x30, [sp]; 396 | stp x0, x1, [sp, 0x10]; 397 | stp x2, x3, [sp, 0x20]; 398 | stp x4, x5, [sp, 0x30]; 399 | stp x6, x7, [sp, 0x40]; 400 | stp x8, x9, [sp, 0x50]; 401 | stp x10, x11, [sp, 0x60]; 402 | stp x12, x13, [sp, 0x70]; 403 | stp x14, x15, [sp, 0x80]; 404 | stp q0, q1, [sp, 0x90]; 405 | stp q2, q3, [sp, 0xb0]; 406 | stp q4, q5, [sp, 0xd0]; 407 | stp q6, q7, [sp, 0xf0]; 408 | stp q6, q7, [sp, 0x110]; 409 | stp q8, q9, [sp, 0x130]; 410 | stp q10, q11, [sp, 0x150]; 411 | stp q12, q13, [sp, 0x170]; 412 | stp q14, q15, [sp, 0x190]; 413 | stp q16, q17, [sp, 0x1b0]; 414 | stp q18, q19, [sp, 0x1d0]; 415 | stp q20, q21, [sp, 0x1f0]; 416 | stp q22, q23, [sp, 0x210]; 417 | stp q24, q25, [sp, 0x230]; 418 | stp q26, q27, [sp, 0x250]; 419 | stp q28, q29, [sp, 0x270]; 420 | stp q30, q31, [sp, 0x290]; 421 | 422 | mov x1, x0; // addr 423 | sub x0, x20, CPU_STATE_REGDATA_OFFSET; // cpu_state 424 | mov x2, x16; // patch_data 425 | bl resolve_func; 426 | mov x16, x0; 427 | 428 | ldp x18, x30, [sp]; 429 | ldp x0, x1, [sp, 0x10]; 430 | ldp x2, x3, [sp, 0x20]; 431 | ldp x4, x5, [sp, 0x30]; 432 | ldp x6, x7, [sp, 0x40]; 433 | ldp x8, x9, [sp, 0x50]; 434 | ldp x10, x11, [sp, 0x60]; 435 | ldp x12, x13, [sp, 0x70]; 436 | ldp x14, x15, [sp, 0x80]; 437 | ldp q0, q1, [sp, 0x90]; 438 | ldp q2, q3, [sp, 0xb0]; 439 | ldp q4, q5, [sp, 0xd0]; 440 | ldp q6, q7, [sp, 0xf0]; 441 | ldp q6, q7, [sp, 0x110]; 442 | ldp q8, q9, [sp, 0x130]; 443 | ldp q10, q11, [sp, 0x150]; 444 | ldp q12, q13, [sp, 0x170]; 445 | ldp q14, q15, [sp, 0x190]; 446 | ldp q16, q17, [sp, 0x1b0]; 447 | ldp q18, q19, [sp, 0x1d0]; 448 | ldp q20, q21, [sp, 0x1f0]; 449 | ldp q22, q23, [sp, 0x210]; 450 | ldp q24, q25, [sp, 0x230]; 451 | ldp q26, q27, [sp, 0x250]; 452 | ldp q28, q29, [sp, 0x270]; 453 | ldp q30, q31, [sp, 0x290]; 454 | add sp, sp, 0x2b0; 455 | br x16; 456 | .size dispatch_aapcsx_fullresolve, .-dispatch_aapcsx_fullresolve; 457 | ); 458 | 459 | #endif // defined(__aarch64__) 460 | 461 | const struct DispatcherInfo* 462 | dispatch_get(struct State* state) { 463 | static const struct DispatcherInfo infos[] = { 464 | [0] = { 465 | .loop_func = dispatch_cdecl_loop, 466 | .quick_dispatch_func = (uintptr_t) dispatch_cdecl, 467 | .full_dispatch_func = (uintptr_t) dispatch_cdecl, 468 | .patch_data_reg = 6, // rsi 469 | }, 470 | #if defined(__x86_64__) 471 | [1] = { 472 | .loop_func = dispatch_hhvm, 473 | .quick_dispatch_func = (uintptr_t) dispatch_hhvm_tail, 474 | .full_dispatch_func = (uintptr_t) dispatch_hhvm_fullresolve, 475 | .patch_data_reg = 14, // r14 476 | }, 477 | [2] = { 478 | .loop_func = dispatch_regcall, 479 | .quick_dispatch_func = (uintptr_t) dispatch_regcall_tail, 480 | .full_dispatch_func = (uintptr_t) dispatch_regcall_fullresolve, 481 | .patch_data_reg = 10, // r10 482 | }, 483 | #endif // defined(__x86_64__) 484 | #if defined(__aarch64__) 485 | [3] = { 486 | .loop_func = dispatch_aapcsx_loop, 487 | .quick_dispatch_func = (uintptr_t) dispatch_aapcsx, 488 | .full_dispatch_func = (uintptr_t) dispatch_aapcsx_fullresolve, 489 | .patch_data_reg = 16, // x16 490 | }, 491 | #endif // defined(__aarch64__) 492 | }; 493 | 494 | unsigned callconv = state->tc.tc_callconv; 495 | if (callconv < sizeof infos / sizeof infos[0]) 496 | return &infos[callconv]; 497 | return NULL; 498 | } 499 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 2.1, February 1999 3 | 4 | Copyright (C) 1991, 1999 Free Software Foundation, Inc. 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | [This is the first released version of the Lesser GPL. It also counts 10 | as the successor of the GNU Library Public License, version 2, hence 11 | the version number 2.1.] 12 | 13 | Preamble 14 | 15 | The licenses for most software are designed to take away your 16 | freedom to share and change it. By contrast, the GNU General Public 17 | Licenses are intended to guarantee your freedom to share and change 18 | free software--to make sure the software is free for all its users. 19 | 20 | This license, the Lesser General Public License, applies to some 21 | specially designated software packages--typically libraries--of the 22 | Free Software Foundation and other authors who decide to use it. You 23 | can use it too, but we suggest you first think carefully about whether 24 | this license or the ordinary General Public License is the better 25 | strategy to use in any particular case, based on the explanations below. 26 | 27 | When we speak of free software, we are referring to freedom of use, 28 | not price. Our General Public Licenses are designed to make sure that 29 | you have the freedom to distribute copies of free software (and charge 30 | for this service if you wish); that you receive source code or can get 31 | it if you want it; that you can change the software and use pieces of 32 | it in new free programs; and that you are informed that you can do 33 | these things. 34 | 35 | To protect your rights, we need to make restrictions that forbid 36 | distributors to deny you these rights or to ask you to surrender these 37 | rights. These restrictions translate to certain responsibilities for 38 | you if you distribute copies of the library or if you modify it. 39 | 40 | For example, if you distribute copies of the library, whether gratis 41 | or for a fee, you must give the recipients all the rights that we gave 42 | you. You must make sure that they, too, receive or can get the source 43 | code. If you link other code with the library, you must provide 44 | complete object files to the recipients, so that they can relink them 45 | with the library after making changes to the library and recompiling 46 | it. And you must show them these terms so they know their rights. 47 | 48 | We protect your rights with a two-step method: (1) we copyright the 49 | library, and (2) we offer you this license, which gives you legal 50 | permission to copy, distribute and/or modify the library. 51 | 52 | To protect each distributor, we want to make it very clear that 53 | there is no warranty for the free library. Also, if the library is 54 | modified by someone else and passed on, the recipients should know 55 | that what they have is not the original version, so that the original 56 | author's reputation will not be affected by problems that might be 57 | introduced by others. 58 | 59 | Finally, software patents pose a constant threat to the existence of 60 | any free program. We wish to make sure that a company cannot 61 | effectively restrict the users of a free program by obtaining a 62 | restrictive license from a patent holder. Therefore, we insist that 63 | any patent license obtained for a version of the library must be 64 | consistent with the full freedom of use specified in this license. 65 | 66 | Most GNU software, including some libraries, is covered by the 67 | ordinary GNU General Public License. This license, the GNU Lesser 68 | General Public License, applies to certain designated libraries, and 69 | is quite different from the ordinary General Public License. We use 70 | this license for certain libraries in order to permit linking those 71 | libraries into non-free programs. 72 | 73 | When a program is linked with a library, whether statically or using 74 | a shared library, the combination of the two is legally speaking a 75 | combined work, a derivative of the original library. The ordinary 76 | General Public License therefore permits such linking only if the 77 | entire combination fits its criteria of freedom. The Lesser General 78 | Public License permits more lax criteria for linking other code with 79 | the library. 80 | 81 | We call this license the "Lesser" General Public License because it 82 | does Less to protect the user's freedom than the ordinary General 83 | Public License. It also provides other free software developers Less 84 | of an advantage over competing non-free programs. These disadvantages 85 | are the reason we use the ordinary General Public License for many 86 | libraries. However, the Lesser license provides advantages in certain 87 | special circumstances. 88 | 89 | For example, on rare occasions, there may be a special need to 90 | encourage the widest possible use of a certain library, so that it becomes 91 | a de-facto standard. To achieve this, non-free programs must be 92 | allowed to use the library. A more frequent case is that a free 93 | library does the same job as widely used non-free libraries. In this 94 | case, there is little to gain by limiting the free library to free 95 | software only, so we use the Lesser General Public License. 96 | 97 | In other cases, permission to use a particular library in non-free 98 | programs enables a greater number of people to use a large body of 99 | free software. For example, permission to use the GNU C Library in 100 | non-free programs enables many more people to use the whole GNU 101 | operating system, as well as its variant, the GNU/Linux operating 102 | system. 103 | 104 | Although the Lesser General Public License is Less protective of the 105 | users' freedom, it does ensure that the user of a program that is 106 | linked with the Library has the freedom and the wherewithal to run 107 | that program using a modified version of the Library. 108 | 109 | The precise terms and conditions for copying, distribution and 110 | modification follow. Pay close attention to the difference between a 111 | "work based on the library" and a "work that uses the library". The 112 | former contains code derived from the library, whereas the latter must 113 | be combined with the library in order to run. 114 | 115 | GNU LESSER GENERAL PUBLIC LICENSE 116 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 117 | 118 | 0. This License Agreement applies to any software library or other 119 | program which contains a notice placed by the copyright holder or 120 | other authorized party saying it may be distributed under the terms of 121 | this Lesser General Public License (also called "this License"). 122 | Each licensee is addressed as "you". 123 | 124 | A "library" means a collection of software functions and/or data 125 | prepared so as to be conveniently linked with application programs 126 | (which use some of those functions and data) to form executables. 127 | 128 | The "Library", below, refers to any such software library or work 129 | which has been distributed under these terms. A "work based on the 130 | Library" means either the Library or any derivative work under 131 | copyright law: that is to say, a work containing the Library or a 132 | portion of it, either verbatim or with modifications and/or translated 133 | straightforwardly into another language. (Hereinafter, translation is 134 | included without limitation in the term "modification".) 135 | 136 | "Source code" for a work means the preferred form of the work for 137 | making modifications to it. For a library, complete source code means 138 | all the source code for all modules it contains, plus any associated 139 | interface definition files, plus the scripts used to control compilation 140 | and installation of the library. 141 | 142 | Activities other than copying, distribution and modification are not 143 | covered by this License; they are outside its scope. The act of 144 | running a program using the Library is not restricted, and output from 145 | such a program is covered only if its contents constitute a work based 146 | on the Library (independent of the use of the Library in a tool for 147 | writing it). Whether that is true depends on what the Library does 148 | and what the program that uses the Library does. 149 | 150 | 1. You may copy and distribute verbatim copies of the Library's 151 | complete source code as you receive it, in any medium, provided that 152 | you conspicuously and appropriately publish on each copy an 153 | appropriate copyright notice and disclaimer of warranty; keep intact 154 | all the notices that refer to this License and to the absence of any 155 | warranty; and distribute a copy of this License along with the 156 | Library. 157 | 158 | You may charge a fee for the physical act of transferring a copy, 159 | and you may at your option offer warranty protection in exchange for a 160 | fee. 161 | 162 | 2. You may modify your copy or copies of the Library or any portion 163 | of it, thus forming a work based on the Library, and copy and 164 | distribute such modifications or work under the terms of Section 1 165 | above, provided that you also meet all of these conditions: 166 | 167 | a) The modified work must itself be a software library. 168 | 169 | b) You must cause the files modified to carry prominent notices 170 | stating that you changed the files and the date of any change. 171 | 172 | c) You must cause the whole of the work to be licensed at no 173 | charge to all third parties under the terms of this License. 174 | 175 | d) If a facility in the modified Library refers to a function or a 176 | table of data to be supplied by an application program that uses 177 | the facility, other than as an argument passed when the facility 178 | is invoked, then you must make a good faith effort to ensure that, 179 | in the event an application does not supply such function or 180 | table, the facility still operates, and performs whatever part of 181 | its purpose remains meaningful. 182 | 183 | (For example, a function in a library to compute square roots has 184 | a purpose that is entirely well-defined independent of the 185 | application. Therefore, Subsection 2d requires that any 186 | application-supplied function or table used by this function must 187 | be optional: if the application does not supply it, the square 188 | root function must still compute square roots.) 189 | 190 | These requirements apply to the modified work as a whole. If 191 | identifiable sections of that work are not derived from the Library, 192 | and can be reasonably considered independent and separate works in 193 | themselves, then this License, and its terms, do not apply to those 194 | sections when you distribute them as separate works. But when you 195 | distribute the same sections as part of a whole which is a work based 196 | on the Library, the distribution of the whole must be on the terms of 197 | this License, whose permissions for other licensees extend to the 198 | entire whole, and thus to each and every part regardless of who wrote 199 | it. 200 | 201 | Thus, it is not the intent of this section to claim rights or contest 202 | your rights to work written entirely by you; rather, the intent is to 203 | exercise the right to control the distribution of derivative or 204 | collective works based on the Library. 205 | 206 | In addition, mere aggregation of another work not based on the Library 207 | with the Library (or with a work based on the Library) on a volume of 208 | a storage or distribution medium does not bring the other work under 209 | the scope of this License. 210 | 211 | 3. You may opt to apply the terms of the ordinary GNU General Public 212 | License instead of this License to a given copy of the Library. To do 213 | this, you must alter all the notices that refer to this License, so 214 | that they refer to the ordinary GNU General Public License, version 2, 215 | instead of to this License. (If a newer version than version 2 of the 216 | ordinary GNU General Public License has appeared, then you can specify 217 | that version instead if you wish.) Do not make any other change in 218 | these notices. 219 | 220 | Once this change is made in a given copy, it is irreversible for 221 | that copy, so the ordinary GNU General Public License applies to all 222 | subsequent copies and derivative works made from that copy. 223 | 224 | This option is useful when you wish to copy part of the code of 225 | the Library into a program that is not a library. 226 | 227 | 4. You may copy and distribute the Library (or a portion or 228 | derivative of it, under Section 2) in object code or executable form 229 | under the terms of Sections 1 and 2 above provided that you accompany 230 | it with the complete corresponding machine-readable source code, which 231 | must be distributed under the terms of Sections 1 and 2 above on a 232 | medium customarily used for software interchange. 233 | 234 | If distribution of object code is made by offering access to copy 235 | from a designated place, then offering equivalent access to copy the 236 | source code from the same place satisfies the requirement to 237 | distribute the source code, even though third parties are not 238 | compelled to copy the source along with the object code. 239 | 240 | 5. A program that contains no derivative of any portion of the 241 | Library, but is designed to work with the Library by being compiled or 242 | linked with it, is called a "work that uses the Library". Such a 243 | work, in isolation, is not a derivative work of the Library, and 244 | therefore falls outside the scope of this License. 245 | 246 | However, linking a "work that uses the Library" with the Library 247 | creates an executable that is a derivative of the Library (because it 248 | contains portions of the Library), rather than a "work that uses the 249 | library". The executable is therefore covered by this License. 250 | Section 6 states terms for distribution of such executables. 251 | 252 | When a "work that uses the Library" uses material from a header file 253 | that is part of the Library, the object code for the work may be a 254 | derivative work of the Library even though the source code is not. 255 | Whether this is true is especially significant if the work can be 256 | linked without the Library, or if the work is itself a library. The 257 | threshold for this to be true is not precisely defined by law. 258 | 259 | If such an object file uses only numerical parameters, data 260 | structure layouts and accessors, and small macros and small inline 261 | functions (ten lines or less in length), then the use of the object 262 | file is unrestricted, regardless of whether it is legally a derivative 263 | work. (Executables containing this object code plus portions of the 264 | Library will still fall under Section 6.) 265 | 266 | Otherwise, if the work is a derivative of the Library, you may 267 | distribute the object code for the work under the terms of Section 6. 268 | Any executables containing that work also fall under Section 6, 269 | whether or not they are linked directly with the Library itself. 270 | 271 | 6. As an exception to the Sections above, you may also combine or 272 | link a "work that uses the Library" with the Library to produce a 273 | work containing portions of the Library, and distribute that work 274 | under terms of your choice, provided that the terms permit 275 | modification of the work for the customer's own use and reverse 276 | engineering for debugging such modifications. 277 | 278 | You must give prominent notice with each copy of the work that the 279 | Library is used in it and that the Library and its use are covered by 280 | this License. You must supply a copy of this License. If the work 281 | during execution displays copyright notices, you must include the 282 | copyright notice for the Library among them, as well as a reference 283 | directing the user to the copy of this License. Also, you must do one 284 | of these things: 285 | 286 | a) Accompany the work with the complete corresponding 287 | machine-readable source code for the Library including whatever 288 | changes were used in the work (which must be distributed under 289 | Sections 1 and 2 above); and, if the work is an executable linked 290 | with the Library, with the complete machine-readable "work that 291 | uses the Library", as object code and/or source code, so that the 292 | user can modify the Library and then relink to produce a modified 293 | executable containing the modified Library. (It is understood 294 | that the user who changes the contents of definitions files in the 295 | Library will not necessarily be able to recompile the application 296 | to use the modified definitions.) 297 | 298 | b) Use a suitable shared library mechanism for linking with the 299 | Library. A suitable mechanism is one that (1) uses at run time a 300 | copy of the library already present on the user's computer system, 301 | rather than copying library functions into the executable, and (2) 302 | will operate properly with a modified version of the library, if 303 | the user installs one, as long as the modified version is 304 | interface-compatible with the version that the work was made with. 305 | 306 | c) Accompany the work with a written offer, valid for at 307 | least three years, to give the same user the materials 308 | specified in Subsection 6a, above, for a charge no more 309 | than the cost of performing this distribution. 310 | 311 | d) If distribution of the work is made by offering access to copy 312 | from a designated place, offer equivalent access to copy the above 313 | specified materials from the same place. 314 | 315 | e) Verify that the user has already received a copy of these 316 | materials or that you have already sent this user a copy. 317 | 318 | For an executable, the required form of the "work that uses the 319 | Library" must include any data and utility programs needed for 320 | reproducing the executable from it. However, as a special exception, 321 | the materials to be distributed need not include anything that is 322 | normally distributed (in either source or binary form) with the major 323 | components (compiler, kernel, and so on) of the operating system on 324 | which the executable runs, unless that component itself accompanies 325 | the executable. 326 | 327 | It may happen that this requirement contradicts the license 328 | restrictions of other proprietary libraries that do not normally 329 | accompany the operating system. Such a contradiction means you cannot 330 | use both them and the Library together in an executable that you 331 | distribute. 332 | 333 | 7. You may place library facilities that are a work based on the 334 | Library side-by-side in a single library together with other library 335 | facilities not covered by this License, and distribute such a combined 336 | library, provided that the separate distribution of the work based on 337 | the Library and of the other library facilities is otherwise 338 | permitted, and provided that you do these two things: 339 | 340 | a) Accompany the combined library with a copy of the same work 341 | based on the Library, uncombined with any other library 342 | facilities. This must be distributed under the terms of the 343 | Sections above. 344 | 345 | b) Give prominent notice with the combined library of the fact 346 | that part of it is a work based on the Library, and explaining 347 | where to find the accompanying uncombined form of the same work. 348 | 349 | 8. You may not copy, modify, sublicense, link with, or distribute 350 | the Library except as expressly provided under this License. Any 351 | attempt otherwise to copy, modify, sublicense, link with, or 352 | distribute the Library is void, and will automatically terminate your 353 | rights under this License. However, parties who have received copies, 354 | or rights, from you under this License will not have their licenses 355 | terminated so long as such parties remain in full compliance. 356 | 357 | 9. You are not required to accept this License, since you have not 358 | signed it. However, nothing else grants you permission to modify or 359 | distribute the Library or its derivative works. These actions are 360 | prohibited by law if you do not accept this License. Therefore, by 361 | modifying or distributing the Library (or any work based on the 362 | Library), you indicate your acceptance of this License to do so, and 363 | all its terms and conditions for copying, distributing or modifying 364 | the Library or works based on it. 365 | 366 | 10. Each time you redistribute the Library (or any work based on the 367 | Library), the recipient automatically receives a license from the 368 | original licensor to copy, distribute, link with or modify the Library 369 | subject to these terms and conditions. You may not impose any further 370 | restrictions on the recipients' exercise of the rights granted herein. 371 | You are not responsible for enforcing compliance by third parties with 372 | this License. 373 | 374 | 11. If, as a consequence of a court judgment or allegation of patent 375 | infringement or for any other reason (not limited to patent issues), 376 | conditions are imposed on you (whether by court order, agreement or 377 | otherwise) that contradict the conditions of this License, they do not 378 | excuse you from the conditions of this License. If you cannot 379 | distribute so as to satisfy simultaneously your obligations under this 380 | License and any other pertinent obligations, then as a consequence you 381 | may not distribute the Library at all. For example, if a patent 382 | license would not permit royalty-free redistribution of the Library by 383 | all those who receive copies directly or indirectly through you, then 384 | the only way you could satisfy both it and this License would be to 385 | refrain entirely from distribution of the Library. 386 | 387 | If any portion of this section is held invalid or unenforceable under any 388 | particular circumstance, the balance of the section is intended to apply, 389 | and the section as a whole is intended to apply in other circumstances. 390 | 391 | It is not the purpose of this section to induce you to infringe any 392 | patents or other property right claims or to contest validity of any 393 | such claims; this section has the sole purpose of protecting the 394 | integrity of the free software distribution system which is 395 | implemented by public license practices. Many people have made 396 | generous contributions to the wide range of software distributed 397 | through that system in reliance on consistent application of that 398 | system; it is up to the author/donor to decide if he or she is willing 399 | to distribute software through any other system and a licensee cannot 400 | impose that choice. 401 | 402 | This section is intended to make thoroughly clear what is believed to 403 | be a consequence of the rest of this License. 404 | 405 | 12. If the distribution and/or use of the Library is restricted in 406 | certain countries either by patents or by copyrighted interfaces, the 407 | original copyright holder who places the Library under this License may add 408 | an explicit geographical distribution limitation excluding those countries, 409 | so that distribution is permitted only in or among countries not thus 410 | excluded. In such case, this License incorporates the limitation as if 411 | written in the body of this License. 412 | 413 | 13. The Free Software Foundation may publish revised and/or new 414 | versions of the Lesser General Public License from time to time. 415 | Such new versions will be similar in spirit to the present version, 416 | but may differ in detail to address new problems or concerns. 417 | 418 | Each version is given a distinguishing version number. If the Library 419 | specifies a version number of this License which applies to it and 420 | "any later version", you have the option of following the terms and 421 | conditions either of that version or of any later version published by 422 | the Free Software Foundation. If the Library does not specify a 423 | license version number, you may choose any version ever published by 424 | the Free Software Foundation. 425 | 426 | 14. If you wish to incorporate parts of the Library into other free 427 | programs whose distribution conditions are incompatible with these, 428 | write to the author to ask for permission. For software which is 429 | copyrighted by the Free Software Foundation, write to the Free 430 | Software Foundation; we sometimes make exceptions for this. Our 431 | decision will be guided by the two goals of preserving the free status 432 | of all derivatives of our free software and of promoting the sharing 433 | and reuse of software generally. 434 | 435 | NO WARRANTY 436 | 437 | 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 438 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 439 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 440 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY 441 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 442 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 443 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 444 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 445 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 446 | 447 | 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 448 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 449 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 450 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 451 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 452 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 453 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 454 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 455 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 456 | DAMAGES. 457 | -------------------------------------------------------------------------------- /client/minilibc.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | #include 6 | #if defined(__x86_64__) 7 | #include 8 | #endif 9 | 10 | 11 | extern int main(int argc, char** argv); 12 | void __start_main(const size_t* initial_stack, const size_t* dynv); 13 | void __restore(void); 14 | 15 | #if UINTPTR_MAX == 0xffffffff 16 | #define ELF_R_TYPE ELF32_R_TYPE 17 | #define Elf_Phdr Elf32_Phdr 18 | #else 19 | #define ELF_R_TYPE ELF64_R_TYPE 20 | #define Elf_Phdr Elf64_Phdr 21 | #endif 22 | 23 | #if defined(__x86_64__) 24 | #define R_RELATIVE R_X86_64_RELATIVE 25 | 26 | ASM_BLOCK( 27 | .intel_syntax noprefix; 28 | .weak _DYNAMIC; 29 | .hidden _DYNAMIC; 30 | .global _start; 31 | _start: 32 | xor ebp, ebp; 33 | mov rdi, rsp; 34 | lea rsi, [rip+_DYNAMIC]; 35 | and rsp, 0xfffffffffffffff0; 36 | call __start_main; 37 | .att_syntax; 38 | ); 39 | 40 | ASM_BLOCK( 41 | .intel_syntax noprefix; 42 | .global __clone; 43 | .type __clone, @function; 44 | __clone: 45 | and rsi, -16; // store arg on child stack 46 | sub rsi, 8; 47 | mov [rsi], rcx; 48 | mov r11, rdi; // temporarily store func in r11 49 | 50 | mov rdi, rdx; // flags 51 | // rsi is stack 52 | mov rdx, r8; // ptid 53 | mov r10, [rsp + 8]; // newtls 54 | mov r8, r9; // ctid 55 | 56 | mov r9, r11; // r11 is clobbered by the syscall instruction 57 | 58 | mov eax, 56; // __NR_clone 59 | syscall; 60 | test eax, eax; 61 | jnz 1f; 62 | 63 | pop rdi; // in child 64 | call r9; 65 | mov edi, eax; 66 | mov eax, 60; // __NR_exit 67 | syscall; 68 | 69 | 1: ret; 70 | .att_syntax; 71 | ); 72 | 73 | ASM_BLOCK( 74 | .intel_syntax noprefix; 75 | __restore: 76 | mov rax, __NR_rt_sigreturn; 77 | syscall; 78 | .att_syntax; 79 | ); 80 | 81 | static size_t syscall0(int syscall_number) { 82 | size_t retval = syscall_number; 83 | __asm__ volatile("syscall" : "+a"(retval) : : "memory", "rcx", "r11"); 84 | return retval; 85 | } 86 | 87 | static size_t syscall1(int syscall_number, size_t a1) { 88 | size_t retval = syscall_number; 89 | __asm__ volatile("syscall" : "+a"(retval) : 90 | "D"(a1) : 91 | "memory", "rcx", "r11"); 92 | return retval; 93 | } 94 | 95 | static size_t syscall2(int syscall_number, size_t a1, size_t a2) { 96 | size_t retval = syscall_number; 97 | __asm__ volatile("syscall" : "+a"(retval) : 98 | "D"(a1), "S"(a2) : 99 | "memory", "rcx", "r11"); 100 | return retval; 101 | } 102 | 103 | static size_t syscall3(int syscall_number, size_t a1, size_t a2, size_t a3) { 104 | size_t retval = syscall_number; 105 | __asm__ volatile("syscall" : "+a"(retval) : 106 | "D"(a1), "S"(a2), "d"(a3) : 107 | "memory", "rcx", "r11"); 108 | return retval; 109 | } 110 | 111 | static size_t syscall4(int syscall_number, size_t a1, size_t a2, size_t a3, 112 | size_t a4) { 113 | size_t retval = syscall_number; 114 | register size_t r10 __asm__("r10") = a4; 115 | __asm__ volatile("syscall" : "+a"(retval) : 116 | "D"(a1),"S"(a2),"d"(a3),"r"(r10) : 117 | "memory","rcx","r11"); 118 | return retval; 119 | } 120 | 121 | static size_t syscall6(int syscall_number, size_t a1, size_t a2, size_t a3, 122 | size_t a4, size_t a5, size_t a6) { 123 | size_t retval = syscall_number; 124 | register size_t r8 __asm__("r8") = a5; 125 | register size_t r9 __asm__("r9") = a6; 126 | register size_t r10 __asm__("r10") = a4; 127 | __asm__ volatile("syscall" : "+a"(retval) : 128 | "D"(a1),"S"(a2),"d"(a3),"r"(r8),"r"(r9),"r"(r10) : 129 | "memory","rcx","r11"); 130 | return retval; 131 | } 132 | 133 | int 134 | set_thread_area(void* tp) { 135 | return syscall2(__NR_arch_prctl, ARCH_SET_FS, (uintptr_t) tp); 136 | } 137 | 138 | void* 139 | get_thread_area(void) { 140 | void* tp; 141 | __asm__("mov %%fs:0, %0" : "=r"(tp)); 142 | return tp; 143 | } 144 | 145 | #elif defined(__aarch64__) 146 | #define R_RELATIVE R_AARCH64_RELATIVE 147 | 148 | ASM_BLOCK( 149 | .weak _DYNAMIC; 150 | .hidden _DYNAMIC; 151 | .global _start; 152 | _start: 153 | mov x0, sp; 154 | adrp x1, _DYNAMIC; 155 | add x1, x1, #:lo12:_DYNAMIC; 156 | and sp, x0, #0xfffffffffffffff0; 157 | bl __start_main; 158 | ); 159 | 160 | ASM_BLOCK( 161 | .global __clone; 162 | .type __clone, @function; 163 | __clone: 164 | and x1, x1, -16; // store arg on child stack 165 | stp x0, x3, [x1, -16]!; 166 | 167 | uxtw x0, w2; 168 | mov x2, x4; 169 | mov x3, x5; 170 | mov x4, x6; 171 | mov x8, 220; // __NR_clone 172 | svc 0; 173 | cbnz x0, 1f; 174 | 175 | ldp x1, x0, [sp], 16; 176 | blr x1; 177 | mov x8, 93; // __NR_exit 178 | svc 0; 179 | 180 | 1: ret; 181 | ); 182 | 183 | ASM_BLOCK( 184 | __restore: 185 | mov x8, __NR_rt_sigreturn; 186 | svc 0; 187 | ); 188 | 189 | static 190 | size_t 191 | syscall0(int syscall_number) 192 | { 193 | register size_t num __asm__("x8") = syscall_number; 194 | register size_t retval __asm__("x0"); 195 | __asm__ volatile("svc #0" : "=r"(retval) : "r"(num) : "memory"); 196 | return retval; 197 | } 198 | 199 | static 200 | size_t 201 | syscall1(int syscall_number, size_t arg1) 202 | { 203 | register size_t p0 __asm__("x0") = arg1; 204 | register size_t num __asm__("x8") = syscall_number; 205 | register size_t retval __asm__("x0"); 206 | __asm__ volatile("svc #0" : "=r"(retval) : "r"(num),"r"(p0) : "memory"); 207 | return retval; 208 | } 209 | 210 | static 211 | size_t 212 | syscall2(int syscall_number, size_t arg1, size_t arg2) 213 | { 214 | register size_t p0 __asm__("x0") = arg1; 215 | register size_t p1 __asm__("x1") = arg2; 216 | register size_t num __asm__("x8") = syscall_number; 217 | register size_t retval __asm__("x0"); 218 | __asm__ volatile("svc #0" : "=r"(retval) : 219 | "r"(num),"r"(p0),"r"(p1) : "memory"); 220 | return retval; 221 | } 222 | 223 | static 224 | size_t 225 | syscall3(int syscall_number, size_t arg1, size_t arg2, size_t arg3) 226 | { 227 | register size_t p0 __asm__("x0") = arg1; 228 | register size_t p1 __asm__("x1") = arg2; 229 | register size_t p2 __asm__("x2") = arg3; 230 | register size_t num __asm__("x8") = syscall_number; 231 | register size_t retval __asm__("x0"); 232 | __asm__ volatile("svc #0" : "=r"(retval) : 233 | "r"(num),"r"(p0),"r"(p1),"r"(p2) : "memory"); 234 | return retval; 235 | } 236 | 237 | static 238 | size_t 239 | syscall4(int syscall_number, size_t arg1, size_t arg2, size_t arg3, 240 | size_t arg4) 241 | { 242 | register size_t p0 __asm__("x0") = arg1; 243 | register size_t p1 __asm__("x1") = arg2; 244 | register size_t p2 __asm__("x2") = arg3; 245 | register size_t p3 __asm__("x3") = arg4; 246 | register size_t num __asm__("x8") = syscall_number; 247 | register size_t retval __asm__("x0"); 248 | __asm__ volatile("svc #0" : "=r"(retval) : 249 | "r"(num),"r"(p0),"r"(p1),"r"(p2),"r"(p3) : "memory"); 250 | return retval; 251 | } 252 | 253 | static 254 | size_t 255 | syscall6(int syscall_number, size_t arg1, size_t arg2, size_t arg3, 256 | size_t arg4, size_t arg5, size_t arg6) 257 | { 258 | register size_t p0 __asm__("x0") = arg1; 259 | register size_t p1 __asm__("x1") = arg2; 260 | register size_t p2 __asm__("x2") = arg3; 261 | register size_t p3 __asm__("x3") = arg4; 262 | register size_t p4 __asm__("x4") = arg5; 263 | register size_t p5 __asm__("x5") = arg6; 264 | register size_t num __asm__("x8") = syscall_number; 265 | register size_t retval __asm__("x0"); 266 | __asm__ volatile("svc #0" : "=r"(retval) : 267 | "r"(num),"r"(p0),"r"(p1),"r"(p2),"r"(p3),"r"(p4),"r"(p5) : 268 | "memory"); 269 | return retval; 270 | } 271 | 272 | int 273 | set_thread_area(void* tp) { 274 | __asm__ volatile("msr tpidr_el0, %0" :: "r"(tp) : "memory"); 275 | return 0; 276 | } 277 | 278 | void* 279 | get_thread_area(void) { 280 | void* tp; 281 | __asm__("mrs %0, tpidr_el0" : "=r"(tp)); 282 | return tp; 283 | } 284 | 285 | #else 286 | #error 287 | #endif 288 | 289 | char** environ; 290 | static const size_t* __auxvptr; 291 | static size_t pagesize; 292 | 293 | long syscall(long number, long a1, long a2, long a3, long a4, long a5, 294 | long a6) { 295 | return syscall6(number, a1, a2, a3, a4, a5, a6); 296 | } 297 | 298 | int getpid(void) { 299 | return syscall0(__NR_getpid); 300 | } 301 | int gettid(void) { 302 | return syscall0(__NR_gettid); 303 | } 304 | int open(const char* pathname, int flags, int mode) { 305 | return openat(AT_FDCWD, pathname, flags, mode); 306 | } 307 | int openat(int dirfd, const char* pathname, int flags, int mode) { 308 | return syscall4(__NR_openat, dirfd, (size_t) pathname, flags, mode); 309 | } 310 | off_t lseek(int fd, off_t offset, int whence) { 311 | return syscall3(__NR_lseek, fd, offset, whence); 312 | } 313 | ssize_t read(int fd, void* buf, size_t count) { 314 | return syscall3(__NR_read, fd, (size_t) buf, count); 315 | } 316 | ssize_t write(int fd, const void* buf, size_t count) { 317 | return syscall3(__NR_write, fd, (size_t) buf, count); 318 | } 319 | int close(int fd) { 320 | return syscall1(__NR_close, fd); 321 | } 322 | 323 | ssize_t recvmsg(int fd, struct msghdr* msg, int flags) { 324 | return syscall3(__NR_recvmsg, fd, (size_t) msg, flags); 325 | } 326 | 327 | ssize_t read_full(int fd, void* buf, size_t nbytes) { 328 | size_t total_read = 0; 329 | uint8_t* buf_cp = buf; 330 | while (total_read < nbytes) { 331 | ssize_t bytes_read = read(fd, buf_cp + total_read, nbytes - total_read); 332 | if (bytes_read < 0) 333 | return bytes_read; 334 | if (bytes_read == 0) 335 | return -EIO; 336 | total_read += bytes_read; 337 | } 338 | return total_read; 339 | } 340 | ssize_t write_full(int fd, const void* buf, size_t nbytes) { 341 | size_t total_written = 0; 342 | const uint8_t* buf_cp = buf; 343 | while (total_written < nbytes) { 344 | ssize_t bytes_written = write(fd, buf_cp + total_written, nbytes - total_written); 345 | if (bytes_written < 0) 346 | return bytes_written; 347 | if (bytes_written == 0) 348 | return -EIO; 349 | total_written += bytes_written; 350 | } 351 | return total_written; 352 | } 353 | 354 | void* 355 | mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) { 356 | #if __SIZEOF_POINTER__ == 8 357 | return (void*) syscall6(__NR_mmap, (size_t) addr, length, prot, flags, fd, 358 | offset); 359 | #else 360 | return (void*) syscall6(__NR_mmap2, (size_t) addr, length, prot, flags, fd, 361 | offset >> 12); 362 | #endif 363 | } 364 | int mprotect(void* addr, size_t len, int prot) { 365 | return syscall3(__NR_mprotect, (size_t) addr, len, prot); 366 | } 367 | int munmap(void* addr, size_t length) { 368 | return syscall2(__NR_munmap, (size_t) addr, length); 369 | } 370 | 371 | int clock_gettime(int clk_id, struct timespec* tp) { 372 | return syscall2(__NR_clock_gettime, clk_id, (size_t) tp); 373 | } 374 | 375 | int nanosleep(const struct timespec* req, struct timespec* rem) { 376 | return syscall2(__NR_nanosleep, (uintptr_t) req, (uintptr_t) rem); 377 | } 378 | 379 | __attribute__((noreturn)) 380 | void _exit(int status) { 381 | syscall1(__NR_exit, status); 382 | __builtin_unreachable(); 383 | } 384 | 385 | int execve(const char* filename, const char* const argv[], const char* const envp[]) { 386 | return syscall3(__NR_execve, (uintptr_t) filename, (uintptr_t) argv, 387 | (uintptr_t) envp); 388 | } 389 | int dup2(int oldfd, int newfd) { 390 | if (oldfd == newfd) { 391 | int ret = syscall2(__NR_fcntl, oldfd, F_GETFL); 392 | return ret < 0 ? ret : newfd; 393 | } 394 | return syscall3(__NR_dup3, oldfd, newfd, 0); 395 | } 396 | int pipe2(int pipefd[2], int flags) { 397 | return syscall2(__NR_pipe2, (uintptr_t) pipefd, flags); 398 | } 399 | 400 | size_t strlen(const char* s) { 401 | size_t len = 0; 402 | for (; *s != '\0'; ++len, ++s); 403 | return len; 404 | } 405 | int strcmp(const char* s1, const char* s2) { 406 | for (; *s1 && *s1 == *s2; s1++, s2++); 407 | return *(const unsigned char*) s1 - *(const unsigned char*) s2; 408 | } 409 | int strncmp(const char* s1, const char* s2, size_t n) { 410 | if (n > 0) { 411 | for (; --n && *s1 && *s1 == *s2; s1++, s2++); 412 | return *(const unsigned char*) s1 - *(const unsigned char*) s2; 413 | } 414 | return 0; 415 | } 416 | char* strchr(const char* s, int c) { 417 | for (; *s != '\0' && *s != c; s++); 418 | return *s == c ? (char*) s : NULL; 419 | } 420 | 421 | int puts(const char* s) { 422 | write(1, s, strlen(s)); 423 | write(1, "\n", 1); 424 | return 0; 425 | } 426 | 427 | typedef void (*PrintfWriteFunc)(void*, const char*, size_t); 428 | 429 | static 430 | size_t 431 | printf_driver(PrintfWriteFunc write_func, void* data, const char* format, 432 | va_list args) { 433 | size_t bytes_written = 0; 434 | 435 | char buffer[32] = {0}; 436 | int buflen; 437 | 438 | while (*format != '\0') { 439 | char* next_format = strchr(format, '%'); 440 | if (next_format == NULL) { 441 | int len = strlen(format); 442 | write_func(data, format, len); 443 | bytes_written += len; 444 | format += len; 445 | break; 446 | } 447 | else if (next_format != format) { 448 | write_func(data, format, next_format - format); 449 | bytes_written += next_format - format; 450 | format = next_format; 451 | } 452 | 453 | // Skip '%' 454 | format++; 455 | 456 | char format_spec = *format; 457 | if (format_spec == '\0') { 458 | write_func(data, "%", 1); 459 | bytes_written++; 460 | break; 461 | } 462 | 463 | format++; 464 | if (format_spec == 's') { 465 | const char* str = va_arg(args, const char*); 466 | size_t len = strlen(str); 467 | write_func(data, str, len); 468 | bytes_written += len; 469 | } 470 | else if (format_spec == 'c') { 471 | int chr = va_arg(args, int); 472 | write_func(data, (char*) &chr, 1); 473 | bytes_written += 1; 474 | } 475 | else if (format_spec == 'p') { 476 | uintptr_t value = va_arg(args, uintptr_t); 477 | if (value == 0) { 478 | write_func(data, "(nil)", 5); 479 | bytes_written += 5; 480 | continue; 481 | } 482 | 483 | buffer[0] = '0'; 484 | buffer[1] = 'x'; 485 | buflen = 2; 486 | 487 | int highest_bit = 8 * sizeof(uintptr_t) - __builtin_clzl(value); 488 | int nibbles = (highest_bit + 3) >> 2; 489 | for (int i = nibbles - 1; i >= 0; i--) { 490 | uint8_t nibble = (value >> (4 * i)) & 0xf; 491 | buffer[buflen++] = "0123456789abcdef"[nibble]; 492 | } 493 | 494 | write_func(data, buffer, buflen); 495 | bytes_written += buflen; 496 | } 497 | else if (format_spec == 'u') { 498 | uint32_t value = va_arg(args, uint32_t); 499 | size_t buf_idx = sizeof(buffer) - 1; 500 | if (value == 0) { 501 | buffer[buf_idx] = '0'; 502 | } 503 | else { 504 | while (value > 0) { 505 | uint32_t digit = value % 10; 506 | buffer[buf_idx--] = '0' + digit; 507 | value /= 10; 508 | } 509 | buf_idx++; 510 | } 511 | write_func(data, buffer + buf_idx, sizeof(buffer) - buf_idx); 512 | bytes_written += sizeof(buffer) - buf_idx; 513 | } 514 | else if (format_spec == 'x') { 515 | uint32_t value = va_arg(args, uint32_t); 516 | int nibbles = 1; 517 | if (value != 0) { 518 | int highest_bit = 8 * sizeof(uint32_t) - __builtin_clz(value); 519 | nibbles = (highest_bit + 3) >> 2; 520 | } 521 | buflen = 0; 522 | for (int i = nibbles - 1; i >= 0; i--) { 523 | uint8_t nibble = (value >> (4 * i)) & 0xf; 524 | buffer[buflen++] = "0123456789abcdef"[nibble]; 525 | } 526 | 527 | write_func(data, buffer, buflen); 528 | bytes_written += buflen; 529 | } 530 | else if (format_spec == 'l' && *format == 'x') { 531 | format++; 532 | 533 | size_t value = va_arg(args, size_t); 534 | int nibbles = 1; 535 | if (value != 0) { 536 | int highest_bit = 8 * sizeof(size_t) - __builtin_clzl(value); 537 | nibbles = (highest_bit + 3) >> 2; 538 | } 539 | buflen = 0; 540 | for (int i = nibbles - 1; i >= 0; i--) { 541 | uint8_t nibble = (value >> (4 * i)) & 0xf; 542 | buffer[buflen++] = "0123456789abcdef"[nibble]; 543 | } 544 | 545 | write_func(data, buffer, buflen); 546 | bytes_written += buflen; 547 | } 548 | } 549 | 550 | return bytes_written; 551 | 552 | } 553 | 554 | struct SPrintfHelperData { 555 | char* buf; 556 | size_t len; 557 | }; 558 | 559 | static 560 | void 561 | sprintf_helper(struct SPrintfHelperData* data, const char* buf, size_t len) 562 | { 563 | if (data->len == 0) 564 | { 565 | memcpy(data->buf, buf, len); 566 | data->buf += len; 567 | } 568 | else if (data->len > len) 569 | { 570 | memcpy(data->buf, buf, len); 571 | data->buf += len; 572 | data->len -= len; 573 | } 574 | else 575 | { 576 | memcpy(data->buf, buf, data->len - 1); 577 | data->buf += data->len - 1; 578 | data->len = 1; 579 | } 580 | } 581 | 582 | int 583 | vsnprintf(char* str, size_t size, const char* format, va_list args) 584 | { 585 | struct SPrintfHelperData data = { str, size }; 586 | 587 | int result = printf_driver((PrintfWriteFunc) sprintf_helper, &data, format, 588 | args); 589 | 590 | *data.buf = '\0'; 591 | 592 | return result; 593 | } 594 | 595 | GNU_FORCE_EXTERN 596 | int 597 | snprintf(char* str, size_t size, const char* format, ...) 598 | { 599 | va_list args; 600 | va_start(args, format); 601 | 602 | int result = vsnprintf(str, size, format, args); 603 | 604 | va_end(args); 605 | 606 | return result; 607 | } 608 | 609 | struct DPrintfHelperData { 610 | int fd; 611 | size_t off; 612 | char buf[1024]; 613 | }; 614 | 615 | static 616 | void 617 | dprintf_helper(struct DPrintfHelperData* data, const void* buf, size_t count) 618 | { 619 | if (count <= sizeof(data->buf) - data->off) { 620 | memcpy(data->buf + data->off, buf, count); 621 | data->off += count; 622 | } else { 623 | write(data->fd, data->buf, data->off); 624 | if (count <= sizeof(data->buf)) { 625 | memcpy(data->buf, buf, count); 626 | data->off = count; 627 | } else { 628 | write(data->fd, buf, count); 629 | data->off = 0; 630 | } 631 | } 632 | } 633 | 634 | int 635 | vdprintf(int fd, const char* format, va_list args) 636 | { 637 | // Intentionally leave data.buf unassigned. 638 | struct DPrintfHelperData data; 639 | data.fd = fd; 640 | data.off = 0; 641 | 642 | int result = printf_driver((PrintfWriteFunc) dprintf_helper, &data, format, 643 | args); 644 | 645 | if (data.off) 646 | write(data.fd, data.buf, data.off); 647 | 648 | return result; 649 | } 650 | 651 | int 652 | dprintf(int fd, const char* format, ...) 653 | { 654 | va_list args; 655 | va_start(args, format); 656 | 657 | int result = vdprintf(fd, format, args); 658 | 659 | va_end(args); 660 | 661 | return result; 662 | } 663 | 664 | int 665 | printf(const char* format, ...) 666 | { 667 | va_list args; 668 | va_start(args, format); 669 | 670 | int result = vdprintf(1, format, args); 671 | 672 | va_end(args); 673 | 674 | return result; 675 | } 676 | 677 | // Hack: ensure that sigset_t is actually an unsigned long. 678 | _Static_assert(sizeof(sigset_t) == _NSIG/8, "sigset_t size mismatch"); 679 | 680 | static unsigned long* 681 | sigsetptr(sigset_t* set, int signum) { 682 | #if defined(__x86_64__) 683 | _Static_assert(_Generic((sigset_t) 0, unsigned long: 1, default: 0), 684 | "sigset_t should be an unsigned long"); 685 | (void) signum; 686 | return set; 687 | #else 688 | return &set->sig[signum / 8 / sizeof *set->sig]; 689 | #endif 690 | } 691 | 692 | static unsigned long 693 | sigsetmsk(int signum) { 694 | #if defined(__x86_64__) 695 | return 1ul << (signum - 1); 696 | #else 697 | return 1ul << ((signum - 1) & (8 * sizeof(*((sigset_t*) NULL)->sig) - 1)); 698 | #endif 699 | } 700 | 701 | int 702 | sigemptyset(sigset_t* set) { 703 | memset(set, 0, sizeof *set); 704 | return 0; 705 | } 706 | 707 | int 708 | sigfillset(sigset_t* set) { 709 | memset(set, 0xff, sizeof *set); 710 | return 0; 711 | } 712 | 713 | int 714 | sigaddset(sigset_t* set, int signum) { 715 | if (signum <= 0 || signum > _NSIG) 716 | return -EINVAL; 717 | *sigsetptr(set, signum) |= sigsetmsk(signum); 718 | return 0; 719 | } 720 | 721 | int 722 | sigdelset(sigset_t* set, int signum) { 723 | if (signum <= 0 || signum > _NSIG) 724 | return -EINVAL; 725 | *sigsetptr(set, signum) &= ~sigsetmsk(signum); 726 | return 0; 727 | } 728 | 729 | int 730 | sigismember(const sigset_t* set, int signum) { 731 | if (signum <= 0 || signum > _NSIG) 732 | return -EINVAL; 733 | return (*sigsetptr((sigset_t*) set, signum) & sigsetmsk(signum)) != 0; 734 | } 735 | 736 | int 737 | sigaction(int num, const struct sigaction* restrict act, 738 | struct sigaction* restrict oact) { 739 | struct sigaction kact; 740 | if (act) { 741 | kact = *act; 742 | kact.sa_flags |= SA_RESTORER; 743 | kact.sa_restorer = __restore; 744 | act = &kact; 745 | } 746 | return syscall4(__NR_rt_sigaction, num, (uintptr_t) act, (uintptr_t) oact, 747 | _NSIG/8); 748 | } 749 | 750 | int 751 | sigprocmask(int how, const sigset_t* restrict set, sigset_t* restrict old) { 752 | return syscall4(__NR_rt_sigprocmask, how, (uintptr_t) set, (uintptr_t) old, 753 | _NSIG/8); 754 | } 755 | 756 | int 757 | sigsuspend(const sigset_t* set) { 758 | return syscall2(__NR_rt_sigsuspend, (uintptr_t) set, _NSIG/8); 759 | } 760 | 761 | int kill(pid_t pid, int sig) { 762 | return syscall2(__NR_kill, pid, sig); 763 | } 764 | 765 | unsigned long getauxval(unsigned long type) { 766 | for (const size_t* aux = __auxvptr; *aux != 0; aux += 2) 767 | if (*aux == type) 768 | return aux[1]; 769 | return 0; 770 | } 771 | 772 | size_t getpagesize(void) { 773 | return pagesize; 774 | } 775 | 776 | // May be overriden by an architecture-specific implementation. 777 | __attribute__((weak)) GNU_FORCE_EXTERN 778 | void* memset(void* s, int c, size_t n) { 779 | unsigned char* sptr = s; 780 | for (; n > 0; n--, sptr++) 781 | *sptr = c; 782 | return s; 783 | } 784 | 785 | int memcmp(const void* s1, const void* s2, size_t n) { 786 | const uint8_t* s1ptr = s1; 787 | const uint8_t* s2ptr = s2; 788 | for (; n > 0; n--, s1ptr++, s2ptr++) 789 | if (*s1ptr != *s2ptr) 790 | return *s1ptr - *s2ptr; 791 | return 0; 792 | } 793 | 794 | GNU_FORCE_EXTERN 795 | void* memcpy(void* dest, const void* src, size_t n) { 796 | uint8_t* s1ptr = dest; 797 | const uint8_t* s2ptr = src; 798 | for (; n > 0; n--) 799 | *(s1ptr++) = *(s2ptr++); 800 | return dest; 801 | } 802 | 803 | __attribute__((noreturn)) 804 | GNU_FORCE_EXTERN 805 | void 806 | __start_main(const size_t* initial_stack, const size_t* dynv) 807 | { 808 | int argc = (int) initial_stack[0]; 809 | char** local_environ = (char**) &initial_stack[argc + 2]; 810 | 811 | const size_t* aux = &initial_stack[argc + 2]; 812 | for (; *aux != 0; ++aux) {} 813 | __auxvptr = (const size_t*) ++aux; 814 | 815 | pagesize = getauxval(AT_PAGESZ); 816 | 817 | // Process relocations, if present. 818 | if (dynv) { 819 | uintptr_t base = 0; 820 | 821 | size_t phnum = getauxval(AT_PHNUM); 822 | size_t phent = getauxval(AT_PHENT); 823 | Elf_Phdr* phdr = (void*) getauxval(AT_PHDR); 824 | if (sizeof(*phdr) != phent) 825 | _exit(-ENOEXEC); 826 | for (unsigned i = 0; i != phnum; i++) { 827 | if (phdr[i].p_type == PT_DYNAMIC) { 828 | base = (uintptr_t) dynv - phdr[i].p_vaddr; 829 | break; 830 | } 831 | } 832 | 833 | size_t* rel = NULL, * rela = NULL; 834 | size_t relsz = 0, relasz = 0; 835 | for (; dynv[0]; dynv += 2) { 836 | switch (dynv[0]) { 837 | case DT_REL: rel = (void*) (base + dynv[1]); break; 838 | case DT_RELA: rela = (void*) (base + dynv[1]); break; 839 | case DT_RELSZ: relsz = dynv[1]; break; 840 | case DT_RELASZ: relasz = dynv[1]; break; 841 | default: break; 842 | } 843 | } 844 | 845 | for (; relsz; rel += 2, relsz -= 2*sizeof(size_t)) { 846 | if (ELF_R_TYPE(rel[1]) != R_RELATIVE) 847 | _exit(-ENOEXEC); 848 | *((size_t*) (base + rel[0])) += base; 849 | } 850 | for (; relasz; rela += 3, relasz -= 3*sizeof(size_t)) { 851 | if (ELF_R_TYPE(rela[1]) != R_RELATIVE) 852 | _exit(-ENOEXEC); 853 | *((size_t*) (base + rela[0])) = base + rela[2]; 854 | } 855 | } 856 | 857 | __asm__ volatile("" ::: "memory"); // memory barrier for compiler 858 | environ = local_environ; 859 | 860 | int retval = main(initial_stack[0], (char**) (initial_stack + 1)); 861 | _exit(retval); 862 | } 863 | -------------------------------------------------------------------------------- /client/rtld.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include 13 | 14 | 15 | // Old elf.h don't include unwind sections 16 | #if !defined(SHT_X86_64_UNWIND) 17 | #define SHT_X86_64_UNWIND 0x70000001 18 | #endif // !defined(SHT_X86_64_UNWIND) 19 | 20 | #if defined(__x86_64__) 21 | #define EM_CURRENT EM_X86_64 22 | #elif defined(__aarch64__) 23 | #define EM_CURRENT EM_AARCH64 24 | #endif 25 | #define elf_check_arch(x) ((x)->e_machine == EM_CURRENT) 26 | 27 | #define CHECK_SIGNED_BITS(val,bits) \ 28 | ((val) >= -(1ll << (bits-1)) && (val) < (1ll << (bits-1))-1) 29 | #define CHECK_UNSIGNED_BITS(val,bits) ((val) < (1ull << (bits))-1) 30 | 31 | static bool 32 | rtld_elf_signed_range(int64_t val, unsigned bits, const char* relinfo) { 33 | if (!CHECK_SIGNED_BITS(val, bits)) { 34 | dprintf(2, "relocation offset out of range (%s): %lx\n", relinfo, val); 35 | return false; 36 | } 37 | return true; 38 | } 39 | 40 | static bool 41 | rtld_elf_unsigned_range(uint64_t val, unsigned bits, const char* relinfo) { 42 | if (!CHECK_UNSIGNED_BITS(val, bits)) { 43 | dprintf(2, "relocation offset out of range (%s): %lx\n", relinfo, val); 44 | return false; 45 | } 46 | return true; 47 | } 48 | 49 | static void 50 | rtld_blend(void* tgt, uint64_t mask, uint64_t data) { 51 | if (mask > UINT32_MAX) 52 | *(uint64_t*) tgt = (data & mask) | (*(uint64_t*) tgt & ~mask); 53 | else if (mask > UINT16_MAX) 54 | *(uint32_t*) tgt = (data & mask) | (*(uint32_t*) tgt & ~mask); 55 | else if (mask > UINT8_MAX) 56 | *(uint16_t*) tgt = (data & mask) | (*(uint16_t*) tgt & ~mask); 57 | else 58 | *(uint8_t*) tgt = (data & mask) | (*(uint8_t*) tgt & ~mask); 59 | } 60 | 61 | #define RTLD_HASH_BITS 17 62 | #define RTLD_HASH_MASK ((1 << RTLD_HASH_BITS) - 1) 63 | #define RTLD_HASH(addr) (((addr >> 2)) & RTLD_HASH_MASK) 64 | 65 | struct PltEntry { 66 | const char* name; 67 | uintptr_t func; 68 | }; 69 | 70 | // Declare functions, but avoid name collision in C. 71 | #define PLT_ENTRY(name, func) \ 72 | extern void PASTE(rtld_plt_, func)() __asm__(STRINGIFY(func)); 73 | #include "plt.inc" 74 | #undef PLT_ENTRY 75 | 76 | static const struct PltEntry plt_entries[] = { 77 | { "instrew_quick_dispatch", 0 }, // dynamically set below 78 | { "instrew_full_dispatch", 0 }, // dynamically set below 79 | #define PLT_ENTRY(name, func) { name, (uintptr_t) &(PASTE(rtld_plt_, func)) }, 80 | #include "plt.inc" 81 | #undef PLT_ENTRY 82 | { NULL, 0 } 83 | }; 84 | 85 | #if defined(__x86_64__) 86 | #define PLT_FUNC_SIZE 8 87 | #elif defined(__aarch64__) 88 | #define PLT_FUNC_SIZE 8 89 | #else 90 | #error "currently unsupported architecture" 91 | #endif 92 | 93 | static int 94 | plt_create(const struct DispatcherInfo* disp_info, void** out_plt) { 95 | size_t plt_entry_count = sizeof(plt_entries) / sizeof(plt_entries[0]) - 1; 96 | size_t code_size = plt_entry_count * PLT_FUNC_SIZE; 97 | size_t data_offset = ALIGN_UP(code_size, 0x40u); 98 | size_t data_size = plt_entry_count * sizeof(uintptr_t); 99 | size_t plt_size = data_offset + data_size; 100 | 101 | uintptr_t plt[ALIGN_UP(plt_size, sizeof(uintptr_t)) / sizeof(uintptr_t)]; 102 | 103 | for (size_t i = 0; i < plt_entry_count; i++) { 104 | void* code_ptr = (uint8_t*) plt + i * PLT_FUNC_SIZE; 105 | uintptr_t* data_ptr = &plt[data_offset / sizeof(uintptr_t) + i]; 106 | ptrdiff_t offset = (char*) data_ptr - (char*) code_ptr; 107 | 108 | if (i == 0) 109 | *data_ptr = disp_info->quick_dispatch_func; 110 | else if (i == 1) 111 | *data_ptr = disp_info->full_dispatch_func; 112 | else 113 | *data_ptr = plt_entries[i].func; 114 | #if defined(__x86_64__) 115 | // This is: "jmp [rip + offset]; ud2" 116 | *((uint64_t*) code_ptr) = 0x0b0f0000000025ff | ((offset - 6) << 16); 117 | #elif defined(__aarch64__) 118 | *((uint32_t*) code_ptr+0) = 0x58000011 | (offset << 3); // ldr x17, [pc+off] 119 | *((uint32_t*) code_ptr+1) = 0xd61f0220; // br x17 120 | #else 121 | #error 122 | #endif // defined(__x86_64__) 123 | } 124 | 125 | void* pltcode = mem_alloc_code(sizeof(plt), 0x40); 126 | if (BAD_ADDR(pltcode)) 127 | return (int) (uintptr_t) pltcode; 128 | int ret = mem_write_code(pltcode, plt, sizeof(plt)); 129 | if (ret < 0) 130 | return ret; 131 | *out_plt = pltcode; 132 | 133 | return 0; 134 | } 135 | 136 | static int 137 | rtld_patch_create_stub(Rtld* rtld, const struct RtldPatchData* patch_data, 138 | uintptr_t* out_stub) { 139 | _Static_assert(_Alignof(struct RtldPatchData) <= 0x10, 140 | "patch data alignment too big"); 141 | _Alignas(0x10) uint8_t stcode[0x10 + sizeof(*patch_data)]; 142 | 143 | void* stub = mem_alloc_code(sizeof(stcode), 0x40); 144 | if (BAD_ADDR(stub)) 145 | return (int) (uintptr_t) stub; 146 | 147 | uintptr_t jmptgt = (uintptr_t) rtld->plt + 1 * PLT_FUNC_SIZE; 148 | ptrdiff_t jmptgtdiff = jmptgt - (uintptr_t) stub; 149 | unsigned pdr = rtld->disp_info->patch_data_reg; 150 | 151 | #if defined(__x86_64__) 152 | uint8_t tmpl[] = { 153 | 0x48 + 4*(pdr>=8), 0x8d, 5+((pdr&7)<<3), 9, 0, 0, 0, // lea rXX, [rip+9] 154 | 0xe9, // jmp ... 155 | }; 156 | memcpy(stcode, tmpl, sizeof tmpl); 157 | *(uint32_t*) (stcode + 8) = jmptgtdiff - 12; 158 | *(uint32_t*) (stcode + 12) = 0x0b0f0b0f; // ud2 159 | #elif defined(__aarch64__) 160 | *(uint32_t*) (stcode) = 0x10000080 + pdr; // ADR xXX, pc + 0x10 161 | *(uint32_t*) (stcode + 4) = 0x14000000; // B ... 162 | if (!rtld_elf_signed_range(jmptgtdiff - 4, 28, "R_AARCH64_JUMP26")) 163 | return -EINVAL; 164 | rtld_blend(stcode + 4, 0x03ffffff, (jmptgtdiff - 4) >> 2); 165 | #else 166 | #error "missing patch stub" 167 | #endif 168 | 169 | memcpy(stcode+sizeof(stcode)-sizeof(*patch_data), patch_data, sizeof(*patch_data)); 170 | 171 | int ret = mem_write_code(stub, stcode, sizeof(stcode)); 172 | if (ret < 0) 173 | return ret; 174 | 175 | *out_stub = (uintptr_t) stub; 176 | return 0; 177 | } 178 | 179 | 180 | struct RtldObject { 181 | _Atomic uintptr_t addr; 182 | void* entry; 183 | void* base; 184 | size_t size; 185 | }; 186 | 187 | struct RtldElf { 188 | uint8_t* base; 189 | size_t size; 190 | uint64_t skew; 191 | Elf64_Ehdr* re_ehdr; 192 | Elf64_Shdr* re_shdr; 193 | 194 | // Global PLT 195 | Rtld* rtld; 196 | }; 197 | typedef struct RtldElf RtldElf; 198 | 199 | static int 200 | rtld_elf_init(RtldElf* re, void* obj_base, size_t obj_size, uint64_t skew, 201 | Rtld* rtld) { 202 | if (obj_size < sizeof(Elf64_Ehdr)) 203 | goto err; 204 | 205 | re->base = obj_base; 206 | re->size = obj_size; 207 | re->skew = skew; 208 | re->re_ehdr = obj_base; 209 | re->rtld = rtld; 210 | 211 | if (memcmp(re->re_ehdr, ELFMAG, SELFMAG) != 0) 212 | goto err; 213 | if (re->re_ehdr->e_type != ET_REL) 214 | goto err; 215 | if (re->re_ehdr->e_ident[EI_CLASS] != ELFCLASS64) 216 | goto err; 217 | if (!elf_check_arch(re->re_ehdr)) 218 | goto err; 219 | 220 | if (re->re_ehdr->e_shentsize != sizeof(Elf64_Shdr)) 221 | goto err; 222 | if (obj_size < re->re_ehdr->e_shoff + re->re_ehdr->e_shentsize * re->re_ehdr->e_shnum) 223 | goto err; 224 | 225 | re->re_shdr = (Elf64_Shdr*) ((uint8_t*) obj_base + re->re_ehdr->e_shoff); 226 | 227 | return 0; 228 | 229 | err: 230 | return -EINVAL; 231 | } 232 | 233 | static int 234 | rtld_elf_decode_name(RtldElf* re, const char* name, uintptr_t* out_addr) { 235 | uintptr_t addr = 0; 236 | if (name[0] != 'Z' && name[0] != 'S') 237 | return -EINVAL; 238 | for (unsigned k = 1; name[k] && name[k] != '_'; k++) { 239 | if (name[k] < '0' || name[k] >= '8') 240 | return 0; 241 | addr = (addr << 3) | (name[k] - '0'); 242 | } 243 | if (name[0] == 'S') 244 | addr += re->skew; 245 | *out_addr = addr; 246 | return 0; 247 | } 248 | 249 | static int 250 | rtld_elf_resolve_str(RtldElf* re, size_t strtab_idx, size_t str_idx, const char** out_addr) { 251 | if (strtab_idx == 0 || strtab_idx >= re->re_ehdr->e_shnum) 252 | return -EINVAL; 253 | Elf64_Shdr* str_shdr = re->re_shdr + strtab_idx; 254 | if (str_shdr->sh_type != SHT_STRTAB) 255 | return -EINVAL; 256 | if (str_idx >= str_shdr->sh_size) 257 | return -EINVAL; 258 | 259 | *out_addr = (const char*) (re->base + str_shdr->sh_offset) + str_idx; 260 | 261 | return 0; 262 | } 263 | 264 | static int 265 | rtld_elf_resolve_sym(RtldElf* re, size_t symtab_idx, size_t sym_idx, 266 | struct RtldPatchData* patch_data, uintptr_t* out_addr) { 267 | if (symtab_idx == 0 || symtab_idx >= re->re_ehdr->e_shnum) 268 | return -EINVAL; 269 | Elf64_Shdr* sym_shdr = re->re_shdr + symtab_idx; 270 | if (sym_shdr->sh_type != SHT_SYMTAB) 271 | return -EINVAL; 272 | if (sym_shdr->sh_entsize != sizeof(Elf64_Sym)) 273 | return -EINVAL; 274 | if (sym_idx == 0 || sym_idx >= sym_shdr->sh_size / sizeof(Elf64_Sym)) 275 | return -EINVAL; 276 | 277 | Elf64_Sym* sym = (Elf64_Sym*) (re->base + sym_shdr->sh_offset) + sym_idx; 278 | if (sym->st_shndx == SHN_UNDEF) { 279 | const char* name = ""; 280 | rtld_elf_resolve_str(re, sym_shdr->sh_link, sym->st_name, &name); 281 | if (!strncmp(name, "glob_", 5)) { 282 | dprintf(2, "undefined symbol reference to %s\n", name); 283 | return -EINVAL; 284 | } else if (!strcmp(name, "instrew_baseaddr")) { 285 | *out_addr = re->skew; 286 | return 0; 287 | } else { 288 | uintptr_t addr = 0; 289 | if (!rtld_elf_decode_name(re, name, &addr)) { 290 | if (!rtld_resolve(re->rtld, addr, (void**) out_addr)) 291 | return 0; // we got it already 292 | // Create a stub. We cannot use the normal dispatcher, as the 293 | // target address is not necessarily set. 294 | patch_data->sym_addr = addr; 295 | return rtld_patch_create_stub(re->rtld, patch_data, out_addr); 296 | } 297 | 298 | // Search through PLT 299 | for (size_t i = 0; plt_entries[i].name; i++) { 300 | if (!strcmp(name, plt_entries[i].name)) { 301 | *out_addr = (uintptr_t) re->rtld->plt + i * PLT_FUNC_SIZE; 302 | return 0; 303 | } 304 | } 305 | 306 | dprintf(2, "undefined symbol reference to %s\n", name); 307 | return -EINVAL; 308 | } 309 | } else if (sym->st_shndx == SHN_ABS) { 310 | *out_addr = sym->st_value; 311 | } else if (sym->st_shndx < re->re_ehdr->e_shnum) { 312 | Elf64_Shdr* tgt_shdr = re->re_shdr + sym->st_shndx; 313 | *out_addr = tgt_shdr->sh_addr + sym->st_value; 314 | } else { 315 | return -EINVAL; 316 | } 317 | 318 | return 0; 319 | } 320 | 321 | #if defined(__aarch64__) 322 | static int 323 | rtld_elf_add_stub(uintptr_t sym, uintptr_t* out_stub) { 324 | uint32_t stcode[] = { 325 | 0xd2800010 | (((sym >> 0) & 0xffff) << 5), // movz x16, ... 326 | 0xf2a00010 | (((sym >> 16) & 0xffff) << 5), // movk x16, ..., lsl 16 327 | 0xf2c00010 | (((sym >> 32) & 0xffff) << 5), // movk x16, ..., lsl 32 328 | 0xf2e00010 | (((sym >> 48) & 0xffff) << 5), // movk x16, ..., lsl 48 329 | 0xd61f0200, // br x16 330 | }; 331 | 332 | void* stub = mem_alloc_code(sizeof(stcode), 0x40); 333 | if (BAD_ADDR(stub)) 334 | return (int) (uintptr_t) stub; 335 | int ret = mem_write_code(stub, stcode, sizeof(stcode)); 336 | if (ret < 0) 337 | return ret; 338 | 339 | *out_stub = (uintptr_t) stub; 340 | return 0; 341 | } 342 | #endif 343 | 344 | static int 345 | rtld_reloc_at(const struct RtldPatchData* patch_data, void* tgt, void* sym) { 346 | uint64_t syma = (uintptr_t) sym + patch_data->addend; 347 | uint64_t pc = patch_data->patch_addr; 348 | int64_t prel_syma = syma - (int64_t) pc; 349 | 350 | switch (patch_data->rel_type) { 351 | #if defined(__x86_64__) 352 | case R_X86_64_PC64: 353 | rtld_blend(tgt, UINT64_MAX, prel_syma); 354 | break; 355 | case R_X86_64_64: 356 | rtld_blend(tgt, UINT64_MAX, syma); 357 | break; 358 | case R_X86_64_PC32: 359 | if (!rtld_elf_signed_range(prel_syma, 32, "R_X86_64_PC32")) 360 | return -EINVAL; 361 | rtld_blend(tgt, 0xffffffff, prel_syma); 362 | break; 363 | case R_X86_64_PLT32: 364 | if (!rtld_elf_signed_range(prel_syma, 32, "R_X86_64_PLT32")) 365 | return -EINVAL; 366 | rtld_blend(tgt, 0xffffffff, prel_syma); 367 | break; 368 | case R_X86_64_32S: 369 | if (!rtld_elf_signed_range(syma, 32, "R_X86_64_32S")) 370 | return -EINVAL; 371 | rtld_blend(tgt, 0xffffffff, syma); 372 | break; 373 | case R_X86_64_32: 374 | if (!rtld_elf_unsigned_range(syma, 32, "R_X86_64_32S")) 375 | return -EINVAL; 376 | rtld_blend(tgt, 0xffffffff, syma); 377 | break; 378 | #elif defined(__aarch64__) 379 | case R_AARCH64_PREL64: 380 | rtld_blend(tgt, UINT64_MAX, prel_syma); 381 | break; 382 | case R_AARCH64_PREL32: 383 | if (!rtld_elf_signed_range(prel_syma, 32, "R_AARCH64_PREL32")) 384 | return -EINVAL; 385 | rtld_blend(tgt, 0xffffffff, prel_syma); 386 | break; 387 | case R_AARCH64_JUMP26: 388 | case R_AARCH64_CALL26: 389 | if (!CHECK_SIGNED_BITS(prel_syma, 28)) { 390 | // Ok, let's create a stub. 391 | // TODO: make stubs more compact/efficient 392 | uintptr_t stub = 0; 393 | int ret = rtld_elf_add_stub(syma, &stub); 394 | if (ret < 0) 395 | return ret; 396 | prel_syma = stub - pc; 397 | } 398 | if (!rtld_elf_signed_range(prel_syma, 28, "R_AARCH64_JUMP26")) 399 | return -EINVAL; 400 | rtld_blend(tgt, 0x03ffffff, prel_syma >> 2); 401 | break; 402 | case R_AARCH64_ADR_PREL_PG_HI21: 403 | prel_syma = ALIGN_DOWN(syma, 1<<12) - ALIGN_DOWN(pc, 1<<12); 404 | prel_syma >>= 12; 405 | if (!rtld_elf_signed_range(prel_syma, 21, "R_AARCH64_PG_HI21")) 406 | return -EINVAL; 407 | rtld_blend(tgt, 0x60ffffe0, ((prel_syma & 3) << 29) | 408 | (((prel_syma >> 2) & 0x7ffff) << 5)); 409 | break; 410 | case R_AARCH64_ADD_ABS_LO12_NC: 411 | rtld_blend(tgt, 0xfff << 10, syma << 10); 412 | break; 413 | case R_AARCH64_LDST8_ABS_LO12_NC: 414 | rtld_blend(tgt, 0xfff << 10, syma << 10); 415 | break; 416 | case R_AARCH64_LDST16_ABS_LO12_NC: 417 | rtld_blend(tgt, 0xfff >> 1 << 10, syma >> 1 << 10); 418 | break; 419 | case R_AARCH64_LDST32_ABS_LO12_NC: 420 | rtld_blend(tgt, 0xfff >> 2 << 10, syma >> 2 << 10); 421 | break; 422 | case R_AARCH64_LDST64_ABS_LO12_NC: 423 | rtld_blend(tgt, 0xfff >> 3 << 10, syma >> 3 << 10); 424 | break; 425 | case R_AARCH64_LDST128_ABS_LO12_NC: 426 | rtld_blend(tgt, 0xfff >> 4 << 10, syma >> 4 << 10); 427 | break; 428 | case R_AARCH64_MOVW_UABS_G0_NC: 429 | rtld_blend(tgt, 0xffff << 5, syma >> 0 << 5); 430 | break; 431 | case R_AARCH64_MOVW_UABS_G1_NC: 432 | rtld_blend(tgt, 0xffff << 5, syma >> 16 << 5); 433 | break; 434 | case R_AARCH64_MOVW_UABS_G2_NC: 435 | rtld_blend(tgt, 0xffff << 5, syma >> 32 << 5); 436 | break; 437 | case R_AARCH64_MOVW_UABS_G3: 438 | rtld_blend(tgt, 0xffff << 5, syma >> 48 << 5); 439 | break; 440 | #endif 441 | default: 442 | dprintf(2, "unhandled relocation %u\n", patch_data->rel_type); 443 | return -EINVAL; 444 | } 445 | 446 | return 0; 447 | } 448 | 449 | static int 450 | rtld_elf_process_rela(RtldElf* re, int rela_idx) { 451 | if (rela_idx == 0 || rela_idx >= re->re_ehdr->e_shnum) 452 | return -EINVAL; 453 | Elf64_Shdr* rela_shdr = re->re_shdr + rela_idx; 454 | if (rela_shdr->sh_type != SHT_RELA) 455 | return -EINVAL; 456 | if (rela_shdr->sh_entsize != sizeof(Elf64_Rela)) 457 | return -EINVAL; 458 | 459 | Elf64_Rela* elf_rela = (Elf64_Rela*) ((uint8_t*) re->base + rela_shdr->sh_offset); 460 | Elf64_Rela* elf_rela_end = elf_rela + rela_shdr->sh_size / sizeof(Elf64_Rela); 461 | 462 | if (rela_shdr->sh_info == 0 || rela_shdr->sh_info >= re->re_ehdr->e_shnum) 463 | return -EINVAL; 464 | Elf64_Shdr* tgt_shdr = &re->re_shdr[rela_shdr->sh_info]; 465 | if (!(tgt_shdr->sh_flags & SHF_ALLOC)) 466 | return -EINVAL; 467 | 468 | uint8_t* sec_write_addr = re->base + tgt_shdr->sh_offset; 469 | 470 | unsigned symtab_idx = rela_shdr->sh_link; 471 | 472 | for (; elf_rela != elf_rela_end; ++elf_rela) { 473 | // TODO: ensure that size doesn't overflow 474 | if (elf_rela->r_offset >= tgt_shdr->sh_size) 475 | return -EINVAL; 476 | 477 | struct RtldPatchData reloc_patch = { 478 | .rel_type = ELF64_R_TYPE(elf_rela->r_info), 479 | .rel_size = 8, // TODO: be more accurate 480 | .addend = elf_rela->r_addend, 481 | .patch_addr = tgt_shdr->sh_addr + elf_rela->r_offset, 482 | }; 483 | 484 | unsigned sym_idx = ELF64_R_SYM(elf_rela->r_info); 485 | uint64_t sym; 486 | if (rtld_elf_resolve_sym(re, symtab_idx, sym_idx, &reloc_patch, &sym) < 0) 487 | return -EINVAL; 488 | 489 | uint8_t* tgt = sec_write_addr + elf_rela->r_offset; 490 | int retval = rtld_reloc_at(&reloc_patch, tgt, (void*) sym); 491 | if (retval < 0) 492 | return retval; 493 | } 494 | 495 | return 0; 496 | } 497 | 498 | static int rtld_set(Rtld* r, uintptr_t addr, void* entry, void* obj_base, 499 | size_t obj_size) { 500 | // Note: not thread-safe. We first find a spot, then populate the data, and 501 | // then write the address, so that concurrent readers only ever see valid 502 | // data. However, a concurrent call to add an entry might cause them to 503 | // write to the same spot. 504 | 505 | if (!addr) // 0 is reserved for "empty" 506 | return -EINVAL; 507 | size_t hash = RTLD_HASH(addr); 508 | for (size_t i = 0; i <= RTLD_HASH_MASK; i++) { 509 | RtldObject* obj = &r->objects[(hash + i) & RTLD_HASH_MASK]; 510 | uintptr_t obj_addr = atomic_load_explicit(&obj->addr, memory_order_relaxed); 511 | if (obj_addr == addr) 512 | return -EEXIST; 513 | if (!obj_addr) { 514 | obj->entry = entry; 515 | obj->base = obj_base; 516 | obj->size = obj_size; 517 | atomic_store_explicit(&obj->addr, addr, memory_order_release); 518 | return 0; 519 | } 520 | } 521 | 522 | return -ENOSPC; 523 | } 524 | 525 | // Perf support for simple maps and jitdump files. 526 | // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt 527 | // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt 528 | 529 | struct RtldPerfJitHeader { 530 | uint32_t magic; 531 | uint32_t version; 532 | uint32_t total_size; 533 | uint32_t elf_mach; 534 | uint32_t pad1; 535 | uint32_t pid; 536 | uint64_t timestamp; 537 | uint64_t flags; 538 | }; 539 | 540 | struct RtldPerfJitRecordHeader { 541 | uint32_t id; 542 | uint32_t total_size; 543 | uint64_t timestamp; 544 | }; 545 | 546 | struct RtldPerfJitRecordCodeLoad { 547 | struct RtldPerfJitRecordHeader header; 548 | uint32_t pid; 549 | uint32_t tid; 550 | uint64_t vma; 551 | uint64_t code_addr; 552 | uint64_t code_size; 553 | uint64_t code_index; 554 | }; 555 | 556 | static uint64_t 557 | rtld_perf_timestamp(void) { 558 | struct timespec ts; 559 | if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) 560 | return (uint64_t) ts.tv_sec * 1000000000 + (uint64_t) ts.tv_nsec; 561 | return 0; 562 | } 563 | 564 | static void 565 | rtld_perf_notify(Rtld* r, uintptr_t addr, void* entry, size_t codesize, 566 | const char* name) { 567 | if (UNLIKELY(r->perfmap_fd >= 0)) { 568 | dprintf(r->perfmap_fd, "%lx %lx %lx_%s\n", 569 | (uintptr_t) entry, codesize, addr, name); 570 | } 571 | 572 | if (UNLIKELY(r->perfdump_fd >= 0)) { 573 | char namebuf[64]; 574 | size_t namelen = snprintf(namebuf, sizeof(namebuf), "%lx_%s", addr, name); 575 | if (namelen >= sizeof(namebuf)) 576 | namelen = sizeof(namebuf) - 1; 577 | 578 | struct RtldPerfJitRecordCodeLoad record = { 579 | .header = { 580 | .id = 0 /* JIT_CODE_LOAD */, 581 | .total_size = (uint32_t) (sizeof(record) + namelen + 1 + codesize), 582 | .timestamp = rtld_perf_timestamp(), 583 | }, 584 | .pid = (uint32_t) getpid(), 585 | .tid = (uint32_t) gettid(), 586 | .vma = (uint64_t) entry, 587 | .code_addr = (uint64_t) entry, 588 | .code_size = codesize, 589 | .code_index = addr, // unique index, use guest virtual address 590 | }; 591 | write_full(r->perfdump_fd, &record, sizeof(record)); 592 | write_full(r->perfdump_fd, namebuf, namelen + 1); 593 | write_full(r->perfdump_fd, entry, codesize); 594 | } 595 | } 596 | 597 | int 598 | rtld_perf_init(Rtld* r, int mode) { 599 | if (mode < 1) 600 | return 0; 601 | 602 | int pid = getpid(); 603 | 604 | char name[32]; 605 | snprintf(name, sizeof(name), "/tmp/perf-%u.map", pid); 606 | int map_fd = open(name, O_CREAT|O_TRUNC|O_NOFOLLOW|O_WRONLY|O_CLOEXEC, 0600); 607 | if (map_fd < 0) 608 | return map_fd; 609 | 610 | r->perfmap_fd = map_fd; 611 | 612 | if (mode < 2) 613 | return 0; 614 | 615 | snprintf(name, sizeof(name), "/tmp/jit-%u.dump", pid); 616 | int dump_fd = open(name, O_CREAT|O_TRUNC|O_NOFOLLOW|O_RDWR|O_CLOEXEC, 0600); 617 | if (dump_fd < 0) 618 | return dump_fd; 619 | 620 | struct RtldPerfJitHeader header = { 621 | .magic = 0x4A695444, // ASCII "JiTD" 622 | .version = 1, 623 | .total_size = sizeof(header), 624 | .elf_mach = EM_CURRENT, 625 | .pad1 = 0, 626 | .pid = (uint32_t) getpid(), 627 | .timestamp = rtld_perf_timestamp(), 628 | .flags = 0, 629 | }; 630 | if (write_full(dump_fd, &header, sizeof(header)) < 0) { 631 | close(dump_fd); 632 | return -EIO; 633 | } 634 | void* marker = mmap(NULL, getpagesize(), PROT_READ|PROT_EXEC, MAP_PRIVATE, 635 | dump_fd, 0); 636 | if (BAD_ADDR(marker)) { 637 | close(dump_fd); 638 | return -EIO; 639 | } 640 | 641 | r->perfdump_fd = dump_fd; 642 | 643 | return 0; 644 | } 645 | 646 | int rtld_add_object(Rtld* r, void* obj_base, size_t obj_size, uint64_t skew) { 647 | int retval; 648 | 649 | RtldElf re; 650 | if ((retval = rtld_elf_init(&re, obj_base, obj_size, skew, r)) < 0) 651 | goto out; 652 | 653 | int i; 654 | Elf64_Shdr* elf_shnt; 655 | 656 | // First, check flags and determine total allocation size and alignment 657 | size_t totsz = 0; 658 | size_t totalign = 1; 659 | for (i = 0, elf_shnt = re.re_shdr; i < re.re_ehdr->e_shnum; i++, elf_shnt++) { 660 | // We don't support more flags 661 | if (elf_shnt->sh_flags & ~(SHF_ALLOC|SHF_EXECINSTR|SHF_MERGE|SHF_STRINGS|SHF_INFO_LINK)) { 662 | dprintf(2, "unsupported section flags\n"); 663 | return -EINVAL; 664 | } 665 | if (elf_shnt->sh_flags & SHF_ALLOC) { 666 | totsz = ALIGN_UP(totsz, elf_shnt->sh_addralign); 667 | elf_shnt->sh_addr = totsz; // keep offset into allocation 668 | totsz += elf_shnt->sh_size; 669 | if (totalign < elf_shnt->sh_addralign) 670 | totalign = elf_shnt->sh_addralign; 671 | } 672 | } 673 | 674 | char* base = mem_alloc_code(totsz, totalign); 675 | if (BAD_ADDR(base)) 676 | return (int) (uintptr_t) base; 677 | 678 | for (i = 0, elf_shnt = re.re_shdr; i < re.re_ehdr->e_shnum; i++, elf_shnt++) 679 | if (elf_shnt->sh_flags & SHF_ALLOC) 680 | elf_shnt->sh_addr += (uintptr_t) base; 681 | 682 | // Second pass to resolve relocations, now that all sections are allocated. 683 | for (i = 0, elf_shnt = re.re_shdr; i < re.re_ehdr->e_shnum; i++, elf_shnt++) { 684 | if (elf_shnt->sh_type != SHT_RELA) 685 | continue; 686 | retval = rtld_elf_process_rela(&re, i); 687 | if (retval < 0) 688 | goto out; 689 | } 690 | 691 | // Third pass to actually copy code into target allocation 692 | for (i = 0, elf_shnt = re.re_shdr; i < re.re_ehdr->e_shnum; i++, elf_shnt++) { 693 | if (elf_shnt->sh_type != SHT_PROGBITS) 694 | continue; 695 | uint8_t* src = re.base + elf_shnt->sh_offset; 696 | void* dst = (void*) elf_shnt->sh_addr; 697 | if ((retval = mem_write_code(dst, src, elf_shnt->sh_size)) < 0) 698 | goto out; 699 | } 700 | 701 | // Last pass to store final addresses in the hash table. This is done after 702 | // the code is put into its final place to avoid storing invalid addresses. 703 | for (i = 0, elf_shnt = re.re_shdr; i < re.re_ehdr->e_shnum; i++, elf_shnt++) { 704 | if (elf_shnt->sh_type != SHT_SYMTAB) 705 | continue; 706 | 707 | retval = -EINVAL; 708 | if (elf_shnt->sh_entsize != sizeof(Elf64_Sym)) 709 | goto out; 710 | Elf64_Sym* elf_sym = (Elf64_Sym*) ((uint8_t*) obj_base + elf_shnt->sh_offset); 711 | Elf64_Sym* elf_sym_end = elf_sym + elf_shnt->sh_size / sizeof(Elf64_Sym); 712 | for (; elf_sym != elf_sym_end; elf_sym++) { 713 | if (ELF64_ST_BIND(elf_sym->st_info) != STB_GLOBAL) 714 | continue; 715 | if (ELF64_ST_TYPE(elf_sym->st_info) != STT_FUNC) 716 | continue; 717 | if (ELF64_ST_VISIBILITY(elf_sym->st_other) != STV_DEFAULT) 718 | continue; 719 | if (elf_sym->st_shndx == SHN_UNDEF) 720 | continue; 721 | retval = -EINVAL; 722 | if (elf_sym->st_shndx >= re.re_ehdr->e_shnum) 723 | goto out; 724 | uintptr_t entry = re.re_shdr[elf_sym->st_shndx].sh_addr + elf_sym->st_value; 725 | 726 | // Determine address from name, encoded in Z_ignored 727 | const char* name = NULL; 728 | rtld_elf_resolve_str(&re, elf_shnt->sh_link, elf_sym->st_name, &name); 729 | if (!name) 730 | goto out; 731 | uintptr_t addr = 0; 732 | retval = rtld_elf_decode_name(&re, name, &addr); 733 | if (retval < 0 || addr == 0) { 734 | dprintf(2, "invalid function name %s\n", name); 735 | goto out; 736 | } 737 | retval = rtld_set(r, addr, (void*) entry, obj_base, obj_size); 738 | if (retval < 0) 739 | goto out; 740 | 741 | rtld_perf_notify(r, addr, (void*) entry, elf_sym->st_size, name); 742 | } 743 | } 744 | 745 | return 0; 746 | 747 | out: 748 | // TODO: deallocate memory on failure 749 | return retval; 750 | } 751 | 752 | int 753 | rtld_init(Rtld* r, const struct DispatcherInfo* disp_info) { 754 | size_t table_size = sizeof(RtldObject) * (1 << RTLD_HASH_BITS); 755 | RtldObject* objects = mem_alloc_data(table_size, getpagesize()); 756 | if (BAD_ADDR(objects)) 757 | return (int) (uintptr_t) objects; 758 | 759 | r->objects = objects; 760 | r->perfmap_fd = -1; 761 | r->perfdump_fd = -1; 762 | r->disp_info = disp_info; 763 | 764 | int retval = plt_create(disp_info, &r->plt); 765 | if (retval < 0) 766 | return retval; 767 | 768 | return 0; 769 | } 770 | 771 | int 772 | rtld_resolve(Rtld* r, uintptr_t addr, void** out_entry) { 773 | if (!addr) // 0 is reserved for "empty" 774 | return -ENOENT; 775 | size_t hash = RTLD_HASH(addr); 776 | for (size_t i = 0; i <= RTLD_HASH_MASK; i++) { 777 | RtldObject* obj = &r->objects[(hash + i) & RTLD_HASH_MASK]; 778 | uintptr_t obj_addr = atomic_load_explicit(&obj->addr, memory_order_acquire); 779 | if (!obj_addr) 780 | break; 781 | if (obj_addr == addr) { 782 | *out_entry = obj->entry; 783 | return 0; 784 | } 785 | } 786 | 787 | return -ENOENT; 788 | } 789 | 790 | void 791 | rtld_patch(struct RtldPatchData* patch_data, void* sym) { 792 | // Ignore relocations failures and cases where nothing is to patch. 793 | char reloc_buf[8]; 794 | if (!patch_data) 795 | return; 796 | if (patch_data->rel_size > sizeof reloc_buf) 797 | return; 798 | memcpy(reloc_buf, (void*) patch_data->patch_addr, patch_data->rel_size); 799 | (void) rtld_reloc_at(patch_data, reloc_buf, sym); 800 | mem_write_code((void*) patch_data->patch_addr, reloc_buf, patch_data->rel_size); 801 | } 802 | --------------------------------------------------------------------------------