├── .gitmodules ├── CMakeLists.txt ├── LICENSE.md ├── README.md ├── external └── external.cmake ├── src ├── CMakeLists.txt ├── allocators.cpp ├── allocators.h ├── arch │ ├── arch.h │ ├── definitions.h │ ├── generic │ │ ├── codegen_generic.cpp │ │ ├── codegen_generic.h │ │ ├── runtime_context_generic.cpp │ │ └── runtime_context_generic.h │ ├── ppc64le │ │ ├── codegen │ │ │ ├── abi.h │ │ │ ├── assembler.cpp │ │ │ ├── assembler.h │ │ │ ├── codegen_fixed_helpers.cpp │ │ │ ├── codegen_ppc64le.cpp │ │ │ ├── codegen_ppc64le.h │ │ │ ├── codegen_ppc64le_internal.h │ │ │ ├── codegen_target_x86_64.cpp │ │ │ ├── codegen_types.cpp │ │ │ ├── codegen_types.h │ │ │ ├── register_allocator.cpp │ │ │ └── register_allocator.h │ │ ├── cpu_context_ppc64le.h │ │ ├── llir │ │ │ └── llir_registers_ppc64le.h │ │ ├── runtime_context_ppc64le.cpp │ │ ├── runtime_context_ppc64le.h │ │ ├── syscalls.cpp │ │ └── syscalls.h │ ├── runtime_context_dispatcher.cpp │ ├── runtime_context_dispatcher.h │ ├── target_environment.h │ └── x86_64 │ │ ├── cpu_context_x86_64.h │ │ ├── llir │ │ ├── llir_lifter_x86_64.cpp │ │ ├── llir_lifter_x86_64.h │ │ ├── llir_operands_x86_64.h │ │ └── llir_registers_x86_64.h │ │ ├── syscalls.h │ │ ├── target_environment.cpp │ │ └── target_environment.h ├── codegen.cpp ├── codegen.h ├── disassembler.cpp ├── disassembler.h ├── dynamic_recompiler.cpp ├── dynamic_recompiler.h ├── elf_loader.cpp ├── elf_loader.h ├── execution_context.cpp ├── execution_context.h ├── instruction_stream.h ├── llir.h ├── main.cpp ├── mapped_file.cpp ├── mapped_file.h ├── platform │ ├── generic_syscalls.cpp │ ├── generic_syscalls.h │ ├── syscall_emulator.cpp │ ├── syscall_emulator.h │ └── syscall_types.h ├── process_memory_map.cpp ├── process_memory_map.h ├── util │ ├── magic.h │ ├── staticvector.h │ ├── util.cpp │ └── util.h ├── virtual_address_mapper.cpp └── virtual_address_mapper.h └── test ├── Makefile ├── asm ├── .addressing_modes.bin.expected ├── .hello_sse.bin.expected ├── .loadstore.bin.expected ├── .pushpop.bin.expected ├── .stos.bin.expected ├── addressing_modes.S ├── callret.S ├── gentest.py ├── hello.S ├── hello.asm ├── hello_sse.S ├── jump.S ├── jump.asm ├── jump_sf.S ├── jump_zf.S ├── loadstore.S ├── mov.S ├── pushpop.S └── stos.S ├── c ├── .hello.fs.bin.expected ├── .print_args.fs.bin.env ├── .print_args.fs.bin.expected ├── auxval.fs.c ├── cpuid.fs.c ├── hello.fs.c ├── print_args.fs.c ├── start.h └── syscall.h └── runtests.py /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "external/capstone"] 2 | path = external/capstone 3 | url = https://github.com/aquynh/capstone 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(retrec) 3 | 4 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 5 | 6 | EXECUTE_PROCESS(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE HOST_ARCH) 7 | message(STATUS "Detected host arch: ${HOST_ARCH}") 8 | 9 | if(NOT CMAKE_BUILD_TYPE) 10 | set(CMAKE_BUILD_TYPE Release) 11 | endif() 12 | 13 | set(RETREC_DEBUG_BUILD 0) 14 | if(CMAKE_BUILD_TYPE STREQUAL "Debug") 15 | set(RETREC_DEBUG_BUILD 1) 16 | endif() 17 | 18 | # Compile external dependencies 19 | include(external/external.cmake) 20 | 21 | # 22 | # User configurable options 23 | # 24 | set(MIN_LOG_LEVEL AUTO CACHE STRING "Set minimum log level {AUTO, 0, 1, 2, 3}") 25 | set_property(CACHE MIN_LOG_LEVEL PROPERTY STRINGS "AUTO" "0" "1" "2" "3") 26 | if(${MIN_LOG_LEVEL} MATCHES "AUTO") 27 | if (CMAKE_BUILD_TYPE STREQUAL "Debug") 28 | # Set min log level to lowest for debug builds 29 | set(MIN_LOG_LEVEL 0) 30 | else() 31 | # Otherwise set it to WARN 32 | set(MIN_LOG_LEVEL 2) 33 | endif() 34 | endif() 35 | message(STATUS "Minimum log level is: ${MIN_LOG_LEVEL}") # Set with 'cmake -DMIN_LOG_LEVEL=<0,1,2,3>' 36 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DRETREC_MINIMUM_LOG_LEVEL=${MIN_LOG_LEVEL}") 37 | 38 | # 39 | # Compiler flags 40 | # 41 | set(CMAKE_CXX_STANDARD 17) 42 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIE -fno-exceptions -Wall -Wimplicit-fallthrough -Wextra -Wpessimizing-move -Wno-psabi") 43 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DRETREC_DEBUG_BUILD=${RETREC_DEBUG_BUILD}") 44 | set(CMAKE_SOURCE_DIR "src") 45 | set(CMAKE_CXX_FLAGS_DEBUG "") 46 | set(CMAKE_CXX_FLAGS_RELEASE "") 47 | 48 | message(STATUS "Build type is: ${CMAKE_BUILD_TYPE}") 49 | if(CMAKE_BUILD_TYPE STREQUAL "Debug") 50 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Og -g -Werror=switch") 51 | elseif(CMAKE_BUILD_TYPE STREQUAL "ReleaseDebug") 52 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -mcpu=power9 -g -DNDEBUG -Wno-unused-variable -Wno-unused-parameter") 53 | elseif(CMAKE_BUILD_TYPE STREQUAL "Release") 54 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -Wno-unused-variable -Wno-unused-parameter") 55 | else() 56 | message(FATAL_ERROR "Unknown CMAKE_BUILD_TYPE: Choices are 'Release', 'ReleaseDebug', 'Debug'") 57 | endif() 58 | 59 | # Compiler-specific flags 60 | if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") 61 | if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10.0) 62 | # GCC >=10 has non-broken -Wconversion, so we can enable it 63 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion -Wno-sign-conversion") 64 | endif() 65 | elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") 66 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion -Wno-sign-conversion") 67 | endif() 68 | 69 | set(CODEGEN_PPC64LE 0) 70 | set(CODEGEN_GENERIC 1) 71 | set(CODEGEN_BACKENDS "generic") 72 | 73 | # Architecture-specific flags 74 | if(${HOST_ARCH} MATCHES "ppc64") 75 | set(CODEGEN_PPC64LE 1) 76 | set(CODEGEN_BACKENDS "${CODEGEN_BACKENDS}, ppc64le") 77 | else() 78 | message(STATUS "No codegen implemented for your architecture - falling back to generic interpreter!") 79 | endif() 80 | 81 | message(STATUS "Codegen backends enabled: ${CODEGEN_BACKENDS}") 82 | 83 | # Define detected architecture/codegen flags 84 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \ 85 | -DRETREC_CODEGEN_GENERIC=${CODEGEN_GENERIC} \ 86 | -DRETREC_CODEGEN_PPC64LE=${CODEGEN_PPC64LE}" 87 | ) 88 | 89 | # 90 | # Dependencies 91 | # 92 | find_package(PkgConfig REQUIRED) 93 | pkg_check_modules(LIBELF REQUIRED IMPORTED_TARGET libelf) 94 | 95 | # Main source directory 96 | add_subdirectory(src) 97 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | ### GNU LESSER GENERAL PUBLIC LICENSE 2 | 3 | Version 3, 29 June 2007 4 | 5 | Copyright (C) 2007 Free Software Foundation, Inc. 6 | 7 | 8 | Everyone is permitted to copy and distribute verbatim copies of this 9 | license document, but changing it is not allowed. 10 | 11 | This version of the GNU Lesser General Public License incorporates the 12 | terms and conditions of version 3 of the GNU General Public License, 13 | supplemented by the additional permissions listed below. 14 | 15 | #### 0. Additional Definitions. 16 | 17 | As used herein, "this License" refers to version 3 of the GNU Lesser 18 | General Public License, and the "GNU GPL" refers to version 3 of the 19 | GNU General Public License. 20 | 21 | "The Library" refers to a covered work governed by this License, other 22 | than an Application or a Combined Work as defined below. 23 | 24 | An "Application" is any work that makes use of an interface provided 25 | by the Library, but which is not otherwise based on the Library. 26 | Defining a subclass of a class defined by the Library is deemed a mode 27 | of using an interface provided by the Library. 28 | 29 | A "Combined Work" is a work produced by combining or linking an 30 | Application with the Library. The particular version of the Library 31 | with which the Combined Work was made is also called the "Linked 32 | Version". 33 | 34 | The "Minimal Corresponding Source" for a Combined Work means the 35 | Corresponding Source for the Combined Work, excluding any source code 36 | for portions of the Combined Work that, considered in isolation, are 37 | based on the Application, and not on the Linked Version. 38 | 39 | The "Corresponding Application Code" for a Combined Work means the 40 | object code and/or source code for the Application, including any data 41 | and utility programs needed for reproducing the Combined Work from the 42 | Application, but excluding the System Libraries of the Combined Work. 43 | 44 | #### 1. Exception to Section 3 of the GNU GPL. 45 | 46 | You may convey a covered work under sections 3 and 4 of this License 47 | without being bound by section 3 of the GNU GPL. 48 | 49 | #### 2. Conveying Modified Versions. 50 | 51 | If you modify a copy of the Library, and, in your modifications, a 52 | facility refers to a function or data to be supplied by an Application 53 | that uses the facility (other than as an argument passed when the 54 | facility is invoked), then you may convey a copy of the modified 55 | version: 56 | 57 | - a) under this License, provided that you make a good faith effort 58 | to ensure that, in the event an Application does not supply the 59 | function or data, the facility still operates, and performs 60 | whatever part of its purpose remains meaningful, or 61 | - b) under the GNU GPL, with none of the additional permissions of 62 | this License applicable to that copy. 63 | 64 | #### 3. Object Code Incorporating Material from Library Header Files. 65 | 66 | The object code form of an Application may incorporate material from a 67 | header file that is part of the Library. You may convey such object 68 | code under terms of your choice, provided that, if the incorporated 69 | material is not limited to numerical parameters, data structure 70 | layouts and accessors, or small macros, inline functions and templates 71 | (ten or fewer lines in length), you do both of the following: 72 | 73 | - a) Give prominent notice with each copy of the object code that 74 | the Library is used in it and that the Library and its use are 75 | covered by this License. 76 | - b) Accompany the object code with a copy of the GNU GPL and this 77 | license document. 78 | 79 | #### 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, taken 82 | together, effectively do not restrict modification of the portions of 83 | the Library contained in the Combined Work and reverse engineering for 84 | debugging such modifications, if you also do each of the following: 85 | 86 | - a) Give prominent notice with each copy of the Combined Work that 87 | the Library is used in it and that the Library and its use are 88 | covered by this License. 89 | - b) Accompany the Combined Work with a copy of the GNU GPL and this 90 | license document. 91 | - c) For a Combined Work that displays copyright notices during 92 | execution, include the copyright notice for the Library among 93 | these notices, as well as a reference directing the user to the 94 | copies of the GNU GPL and this license document. 95 | - d) Do one of the following: 96 | - 0) Convey the Minimal Corresponding Source under the terms of 97 | this License, and the Corresponding Application Code in a form 98 | suitable for, and under terms that permit, the user to 99 | recombine or relink the Application with a modified version of 100 | the Linked Version to produce a modified Combined Work, in the 101 | manner specified by section 6 of the GNU GPL for conveying 102 | Corresponding Source. 103 | - 1) Use a suitable shared library mechanism for linking with 104 | the Library. A suitable mechanism is one that (a) uses at run 105 | time a copy of the Library already present on the user's 106 | computer system, and (b) will operate properly with a modified 107 | version of the Library that is interface-compatible with the 108 | Linked Version. 109 | - e) Provide Installation Information, but only if you would 110 | otherwise be required to provide such information under section 6 111 | of the GNU GPL, and only to the extent that such information is 112 | necessary to install and execute a modified version of the 113 | Combined Work produced by recombining or relinking the Application 114 | with a modified version of the Linked Version. (If you use option 115 | 4d0, the Installation Information must accompany the Minimal 116 | Corresponding Source and Corresponding Application Code. If you 117 | use option 4d1, you must provide the Installation Information in 118 | the manner specified by section 6 of the GNU GPL for conveying 119 | Corresponding Source.) 120 | 121 | #### 5. Combined Libraries. 122 | 123 | You may place library facilities that are a work based on the Library 124 | side by side in a single library together with other library 125 | facilities that are not Applications and are not covered by this 126 | License, and convey such a combined library under terms of your 127 | choice, if you do both of the following: 128 | 129 | - a) Accompany the combined library with a copy of the same work 130 | based on the Library, uncombined with any other library 131 | facilities, conveyed under the terms of this License. 132 | - b) Give prominent notice with the combined library that part of it 133 | is a work based on the Library, and explaining where to find the 134 | accompanying uncombined form of the same work. 135 | 136 | #### 6. Revised Versions of the GNU Lesser General Public License. 137 | 138 | The Free Software Foundation may publish revised and/or new versions 139 | of the GNU Lesser General Public License from time to time. Such new 140 | versions will be similar in spirit to the present version, but may 141 | differ in detail to address new problems or concerns. 142 | 143 | Each version is given a distinguishing version number. If the Library 144 | as you received it specifies that a certain numbered version of the 145 | GNU Lesser General Public License "or any later version" applies to 146 | it, you have the option of following the terms and conditions either 147 | of that published version or of any later version published by the 148 | Free Software Foundation. If the Library as you received it does not 149 | specify a version number of the GNU Lesser General Public License, you 150 | may choose any version of the GNU Lesser General Public License ever 151 | published by the Free Software Foundation. 152 | 153 | If the Library as you received it specifies that a proxy can decide 154 | whether future versions of the GNU Lesser General Public License shall 155 | apply, that proxy's public statement of acceptance of any version is 156 | permanent authorization for you to choose that version for the 157 | Library. 158 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | retrec 2 | ====== 3 | 4 | retrec is a retargetable dynamic recompiler for Linux userspace binaries that 5 | currently targets x86\_64 binaries on ppc64le (ISA 3.0B+) hosts. Support for other 6 | host ISAs (riscv64, aarch64) is planned. 7 | 8 | Unlike other retargetable userspace binary translators like [qemu-user](https://www.qemu.org/docs/master/user/main.html), 9 | retrec trades simplicity and portability for performance. While QEMU's code generator (TCG) 10 | is designed to be easy to port, retrec's code generation is designed to enable the most efficient 11 | translation possible, at the cost of requiring more complex architecture-specific backends. 12 | 13 | An example of the design differences between the two can be seen in the intermediate representations 14 | (IR) used by each. All QEMU target ISAs are lowered to a lowest-common-denominator IR (TCGops), 15 | whereas retrec's IR (llir) aims to provide a lossless 1:1 representation for any source ISA 16 | instruction. This means that while llir codegen backends must necessarily be more complex 17 | than TCG backends, they are also potentially able to emit more optimized code since no 18 | lossy conversions to/from representations have occurred. 19 | 20 | In the future, retrec also aims to provide support for invoking host library routines from translated 21 | processes, much like the [box86](https://github.com/ptitSeb/box86) project. Unlike box86, though, 22 | retrec only targets 64-bit ISAs and has a retargetable code generator instead of an ARM-only one. 23 | 24 | Status 25 | ------ 26 | retrec is under heavy development and currently only implements a small fraction of the X86\_64 27 | ISA and only supports ppc64le (ISA 3.0B+) hosts. Currently only very basic statically-linked C programs run. 28 | 29 | Roadmap: 30 | 31 | - [x] Basic ELF loader 32 | - [x] Support for basic x86\_64 instructions (integer ALU, branch, etc.) (in progress) 33 | - [x] ppc64le codegen backend (in progress) 34 | - [ ] Support for more x86\_64 instructions (FPU, misc.) (in progress) 35 | - [ ] Support for more syscalls 36 | - [ ] Thread support 37 | - [ ] Dynamically linked binary support 38 | - [ ] SIMD (SSE, AVX) 39 | - [ ] aarch64 codegen backend 40 | - [ ] riscv64 codegen backend 41 | - [ ] JIT cache 42 | - [ ] Optimization passes 43 | - [ ] Support for calling into host libraries 44 | - [ ] Potential LLVM integration for translating hot routines? 45 | - [ ] ??? 46 | 47 | If you would like to contribute to retrec's development, don't hesitate reach out! 48 | 49 | Building 50 | -------- 51 | retrec is still in very early stages, so building is currently only useful for those interested 52 | in contributing to its development. The only dependencies are cmake, libelf, and a C++17 compiler. 53 | 54 | ``` 55 | $ mkdir build && cd build 56 | $ cmake .. -DCMAKE_BUILD_TYPE=Debug 57 | $ make 58 | ``` 59 | 60 | Afterwards, you can run the test suite. Note that this requires an `x86_64-unknown-linux-gnu` toolchain. 61 | ``` 62 | $ cd ../test 63 | $ make 64 | $ ./runtests.py ../build/src/retrec 65 | ``` 66 | 67 | License 68 | ------- 69 | retrec is licensed under the GNU Lesser General Public License (LGPL), version 3 or later. See LICENSE.md. 70 | -------------------------------------------------------------------------------- /external/external.cmake: -------------------------------------------------------------------------------- 1 | set(ROOT ${PROJECT_SOURCE_DIR}) 2 | 3 | # Clone capstone if necesssry 4 | if(NOT EXISTS "${ROOT}/external/capstone/CMakeLists.txt") 5 | message(STATUS "Cloning capstone git") 6 | execute_process(COMMAND git submodule update --init -- external/capstone WORKING_DIRECTORY "${ROOT}") 7 | endif() 8 | 9 | # For Release/ReleaseDebug, enable DIET mode for a large performance boost 10 | if(CMAKE_BUILD_TYPE MATCHES "Release") 11 | set(CAPSTONE_BUILD_DIET ON CACHE BOOL "") 12 | else() 13 | set(CAPSTONE_BUILD_DIET OFF CACHE BOOL "") 14 | endif() 15 | 16 | set(CAPSTONE_BUILD_STATIC ON CACHE BOOL "") 17 | set(CAPSTONE_BUILD_SHARED OFF CACHE BOOL "") 18 | set(CAPSTONE_INSTALL OFF CACHE BOOL "") 19 | set(CAPSTONE_ARCHITECTURE_DEFAULT OFF CACHE BOOL "") 20 | set(CAPSTONE_X86_SUPPORT ON CACHE BOOL "") 21 | 22 | set(CMAKE_BUILD_TYPE_OLD ${CMAKE_BUILD_TYPE}) 23 | set(CMAKE_BUILD_TYPE Release) 24 | add_subdirectory(external/capstone "${ROOT}/capstone" EXCLUDE_FROM_ALL) 25 | set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE_OLD}) 26 | 27 | set(CapstoneGit_INCLUDE "${ROOT}/external/capstone/include") 28 | set(CapstoneGit_LIBS capstone-static) 29 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Cleaner file name macro for use in logging 2 | # https://stackoverflow.com/a/16658858 3 | string(LENGTH "${CMAKE_SOURCE_DIR}/" SOURCE_PATH_SIZE) 4 | add_definitions("-DSOURCE_PATH_SIZE=${SOURCE_PATH_SIZE}") 5 | 6 | # 7 | # Common source files 8 | # 9 | set(RETREC_SOURCES 10 | allocators.cpp 11 | allocators.h 12 | arch/arch.h 13 | arch/definitions.h 14 | arch/generic/codegen_generic.cpp 15 | arch/generic/codegen_generic.h 16 | arch/generic/runtime_context_generic.cpp 17 | arch/generic/runtime_context_generic.h 18 | arch/ppc64le/codegen/abi.h 19 | arch/ppc64le/codegen/assembler.h 20 | arch/ppc64le/codegen/codegen_ppc64le.h 21 | arch/ppc64le/codegen/codegen_ppc64le_internal.h 22 | arch/ppc64le/codegen/codegen_types.h 23 | arch/ppc64le/codegen/register_allocator.h 24 | arch/ppc64le/cpu_context_ppc64le.h 25 | arch/ppc64le/llir/llir_registers_ppc64le.h 26 | arch/ppc64le/runtime_context_ppc64le.h 27 | arch/ppc64le/syscalls.h 28 | arch/runtime_context_dispatcher.cpp 29 | arch/runtime_context_dispatcher.h 30 | arch/target_environment.h 31 | arch/x86_64/cpu_context_x86_64.h 32 | arch/x86_64/llir/llir_lifter_x86_64.cpp 33 | arch/x86_64/llir/llir_lifter_x86_64.h 34 | arch/x86_64/llir/llir_operands_x86_64.h 35 | arch/x86_64/llir/llir_registers_x86_64.h 36 | arch/x86_64/syscalls.h 37 | arch/x86_64/target_environment.cpp 38 | arch/x86_64/target_environment.h 39 | codegen.cpp 40 | codegen.h 41 | disassembler.cpp 42 | disassembler.h 43 | dynamic_recompiler.cpp 44 | dynamic_recompiler.h 45 | elf_loader.cpp 46 | elf_loader.h 47 | execution_context.cpp 48 | execution_context.h 49 | instruction_stream.h 50 | llir.h 51 | main.cpp 52 | mapped_file.cpp 53 | mapped_file.h 54 | platform/generic_syscalls.cpp 55 | platform/generic_syscalls.h 56 | platform/syscall_emulator.cpp 57 | platform/syscall_emulator.h 58 | platform/syscall_types.h 59 | process_memory_map.cpp 60 | process_memory_map.h 61 | util/magic.h 62 | util/staticvector.h 63 | util/util.cpp 64 | util/util.h 65 | virtual_address_mapper.cpp 66 | virtual_address_mapper.h 67 | ) 68 | 69 | # 70 | # Architecture-dependant source files 71 | # 72 | if(HOST_ARCH MATCHES "ppc64") 73 | set(RETREC_SOURCES ${RETREC_SOURCES} 74 | arch/ppc64le/codegen/assembler.cpp 75 | arch/ppc64le/codegen/codegen_fixed_helpers.cpp 76 | arch/ppc64le/codegen/codegen_ppc64le.cpp 77 | arch/ppc64le/codegen/codegen_target_x86_64.cpp 78 | arch/ppc64le/codegen/codegen_types.cpp 79 | arch/ppc64le/codegen/register_allocator.cpp 80 | arch/ppc64le/runtime_context_ppc64le.cpp 81 | arch/ppc64le/syscalls.cpp 82 | ) 83 | endif() 84 | 85 | add_executable(retrec ${RETREC_SOURCES}) 86 | 87 | target_link_libraries(retrec PUBLIC ${LIBELF_LIBRARIES} ${CapstoneGit_LIBS}) 88 | target_include_directories(retrec PUBLIC ${LIBELF_INCLUDE_DIRS} ${CapstoneGit_INCLUDE}) 89 | 90 | target_include_directories(retrec PRIVATE .) 91 | 92 | -------------------------------------------------------------------------------- /src/allocators.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | 22 | using namespace retrec; 23 | 24 | void simple_placement_allocator::init(void *region_, size_t region_size_) { 25 | region = region_; 26 | region_size = region_size_; 27 | } 28 | 29 | void *simple_placement_allocator::allocate(size_t size) { 30 | if (size > region_size - used) { 31 | return nullptr; 32 | } else { 33 | void *start = (void *)((uint8_t *)region + used); 34 | used += size; 35 | return start; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/allocators.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | namespace retrec { 31 | 32 | class simple_placement_allocator { 33 | void *region; 34 | size_t region_size; 35 | size_t used { 0 }; 36 | 37 | public: 38 | void init(void *region_, size_t region_size_); 39 | 40 | void *allocate(size_t size); 41 | void free([[maybe_unused]] void *buffer) { /* Placement allocators can't free */ } 42 | }; 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/arch/arch.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | /** 21 | * This file defines architecture-specific definitions used for compile-time feature selection. 22 | */ 23 | 24 | #pragma once 25 | 26 | #define HOST_ARCH_AARCH64 0 27 | #define HOST_ARCH_PPC64LE 0 28 | #define HOST_ARCH_X86_64 0 29 | 30 | // 31 | // Arch detection and dependant inclusion 32 | // 33 | #if defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && defined(_CALL_ELF) && (_CALL_ELF == 2) 34 | 35 | #undef HOST_ARCH_PPC64LE 36 | #define HOST_ARCH_PPC64LE 1 37 | 38 | #include 39 | 40 | #elif defined(__x86_64__) 41 | 42 | #undef HOST_ARCH_X86_64 43 | #define HOST_ARCH_X86_64 1 44 | 45 | #elif defined(__aarch64__) 46 | 47 | #undef HOST_ARCH_AARCH64 48 | #define HOST_ARCH_AARCH64 1 49 | 50 | #else 51 | #error "Unsupported host architecture!" 52 | #endif 53 | -------------------------------------------------------------------------------- /src/arch/definitions.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | /** 21 | * This file contains common definitions for architecture-specific code 22 | */ 23 | 24 | #pragma once 25 | 26 | #include 27 | 28 | // Entry/exit function pointers emitted by arch-specific code 29 | extern void (*arch_enter_translated_code_ptr)(void *runtime_context); 30 | extern void (*arch_leave_translated_code_ptr)(); 31 | 32 | // 128 bit register type 33 | struct reg128 { 34 | union { 35 | struct { 36 | int64_t lo, hi; 37 | } le; 38 | 39 | struct { 40 | int64_t hi, lo; 41 | } be; 42 | }; 43 | }; 44 | 45 | -------------------------------------------------------------------------------- /src/arch/generic/codegen_generic.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | 22 | using namespace retrec; 23 | 24 | status_code codegen_generic::init() { 25 | TODO(); 26 | } 27 | 28 | status_code codegen_generic::translate(const lifted_llir_block &insns, std::optional &out) { 29 | (void)insns; 30 | (void)out; 31 | TODO(); 32 | } 33 | 34 | uint64_t codegen_generic::get_last_untranslated_access(void *rctx) { 35 | (void)rctx; 36 | TODO(); 37 | } 38 | 39 | status_code codegen_generic::patch_translated_access(void *rctx, uint64_t resolved_haddr) { 40 | (void)rctx; 41 | (void)resolved_haddr; 42 | TODO(); 43 | } 44 | 45 | -------------------------------------------------------------------------------- /src/arch/generic/codegen_generic.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | /** 21 | * Class definition for generic (interpreter) codegen backend. 22 | */ 23 | 24 | #include 25 | 26 | namespace retrec { 27 | 28 | class codegen_generic : public codegen { 29 | public: 30 | status_code init(); 31 | status_code translate(const lifted_llir_block &insns, std::optional &out); 32 | uint64_t get_last_untranslated_access(void *rctx); 33 | status_code patch_translated_access(void *rctx, uint64_t resolved_haddr); 34 | }; 35 | 36 | static inline std::unique_ptr make_codegen_generic(Architecture, execution_context &, virtual_address_mapper *) { 37 | TODO(); 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/arch/generic/runtime_context_generic.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | 22 | using namespace retrec; 23 | 24 | status_code runtime_context_generic::init(Architecture, void *, void *, 25 | virtual_address_mapper *, syscall_emulator *) { 26 | TODO(); 27 | } 28 | 29 | status_code runtime_context_generic::execute() { 30 | TODO(); 31 | } 32 | -------------------------------------------------------------------------------- /src/arch/generic/runtime_context_generic.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | /** 21 | * Definition for the generic interpreter backend's runtime context 22 | */ 23 | 24 | #include 25 | 26 | namespace retrec { 27 | 28 | // Forward 29 | class syscall_emulator; 30 | class virtual_address_mapper; 31 | 32 | struct runtime_context_generic { 33 | runtime_context_generic() {} 34 | status_code init(Architecture target_arch, void *entry, void *stack, virtual_address_mapper *vam_, 35 | syscall_emulator *syscall_emu_); 36 | status_code execute(); 37 | }; 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/arch/ppc64le/codegen/codegen_ppc64le_internal.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | /** 21 | * This file define internal macros/helpers for ppc64le codegen routines 22 | */ 23 | 24 | #pragma once 25 | 26 | /** 27 | * Helper macros for using relocations with local labels 28 | */ 29 | #define RELOC_DECLARE_LABEL(name) \ 30 | do ctx.stream->add_aux(true, relocation{1, relocation::declare_label{name}}); while (0) 31 | #define RELOC_DECLARE_LABEL_AFTER(name) \ 32 | do ctx.stream->add_aux(true, relocation{1, relocation::declare_label_after{name}}); while (0) 33 | #define RELOC_FIXUP_LABEL(name, pos) \ 34 | do ctx.stream->add_aux(true, relocation{1, relocation::imm_rel_label_fixup{name, LabelPosition::pos}}); while (0) 35 | 36 | // x-macro for all targets supported by the ppc64le backend 37 | #define PPC64LE_ENUMERATE_SUPPORTED_TARGET_TRAITS(x, ...) \ 38 | x(TargetTraitsX86_64, __VA_ARGS__) 39 | 40 | // macro to instantiate codegen class for all traits 41 | #define PPC64LE_INSTANTIATE_CODEGEN_FOR_ALL_TRAITS() \ 42 | PPC64LE_ENUMERATE_SUPPORTED_TARGET_TRAITS(PPC64LE_INSTANTIATE_CODEGEN_FOR_TRAITS, _) 43 | #define PPC64LE_INSTANTIATE_CODEGEN_FOR_TRAITS(x, ...) \ 44 | template class retrec::codegen_ppc64le; 45 | 46 | // macro to instantiate a single method for all traits 47 | #define PPC64LE_INSTANTIATE_CODGEN_MEMBER_(x, ret, name, ...) \ 48 | template ret codegen_ppc64le::name(__VA_ARGS__); 49 | #define PPC64LE_INSTANTIATE_CODEGEN_MEMBER(ret, name, ...) \ 50 | PPC64LE_ENUMERATE_SUPPORTED_TARGET_TRAITS(PPC64LE_INSTANTIATE_CODGEN_MEMBER_, ret, name, __VA_ARGS__) 51 | -------------------------------------------------------------------------------- /src/arch/ppc64le/codegen/codegen_types.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | 22 | const char *retrec::ppc64le::operation_names[] = { 23 | #define OPERATION_NAME(op, ...) "Operation::" #op, 24 | PPC64LE_ENUMERATE_OPERATIONS(OPERATION_NAME) 25 | #undef OPERATION_NAME 26 | }; 27 | 28 | -------------------------------------------------------------------------------- /src/arch/ppc64le/codegen/codegen_types.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | /** 29 | * This is an x-macro that defines each supported cpu operation, along with a corresponding 30 | * `assembler` method that emits it. In cases where multiple methods can emit an instruction, 31 | * only the primary one is defined here. 32 | * 33 | * This macro is used later down to define the entries of the `Operation` enum. For this, only 34 | * the first field is actually used. 35 | */ 36 | #define PPC64LE_ENUMERATE_OPERATIONS(x) \ 37 | x(ADD, &assembler::add) \ 38 | x(ADDI, &assembler::addi) \ 39 | x(ADDIS, &assembler::addis) \ 40 | x(ADDPCIS, &assembler::addpcis) \ 41 | x(AND, &assembler::_and) \ 42 | x(ANDI_, &assembler::andi_) \ 43 | x(B, assembler::b_type) \ 44 | x(BC, assembler::bc_type) \ 45 | x(BCCTR, assembler::bcctr_type) \ 46 | x(BCLR, assembler::bclr_type) \ 47 | x(CMP, &assembler::cmp) \ 48 | x(CMPI, &assembler::cmpi) \ 49 | x(CMPL, &assembler::cmpl) \ 50 | x(CMPLI, &assembler::cmpli) \ 51 | x(CRAND, &assembler::crand) \ 52 | x(CRANDC, &assembler::crandc) \ 53 | x(CREQV, &assembler::creqv) \ 54 | x(CRNAND, &assembler::crnand) \ 55 | x(CRNOR, &assembler::crnor) \ 56 | x(CROR, &assembler::cror) \ 57 | x(CRORC, &assembler::crorc) \ 58 | x(CRXOR, &assembler::crxor) \ 59 | x(DCBST, &assembler::dcbst) \ 60 | x(EQV, &assembler::creqv) \ 61 | x(EXTSB, &assembler::extsb) \ 62 | x(EXTSH, &assembler::extsh) \ 63 | x(EXTSW, &assembler::extsw) \ 64 | x(ICBI, &assembler::icbi) \ 65 | x(ISEL, &assembler::isel) \ 66 | x(ISYNC, &assembler::isync) \ 67 | x(LBZ, &assembler::lbz) \ 68 | x(LBZU, &assembler::lbzu) \ 69 | x(LBZUX, &assembler::lbzux) \ 70 | x(LBZX, &assembler::lbzx) \ 71 | x(LD, &assembler::ld) \ 72 | x(LDU, &assembler::ldu) \ 73 | x(LDUX, &assembler::ldux) \ 74 | x(LDX, &assembler::ldx) \ 75 | x(LFD, &assembler::lfd) \ 76 | x(LHA, &assembler::lha) \ 77 | x(LHAU, &assembler::lhau) \ 78 | x(LHAUX, &assembler::lhaux) \ 79 | x(LHAX, &assembler::lhax) \ 80 | x(LHZ, &assembler::lhz) \ 81 | x(LHZU, &assembler::lhzu) \ 82 | x(LHZUX, &assembler::lhzux) \ 83 | x(LHZX, &assembler::lhzx) \ 84 | x(LWA, &assembler::lwa) \ 85 | x(LWAUX, &assembler::lwaux) \ 86 | x(LWAX, &assembler::lwax) \ 87 | x(LWZ, &assembler::lwz) \ 88 | x(LWZU, &assembler::lwzu) \ 89 | x(LWZUX, &assembler::lwzux) \ 90 | x(LWZX, &assembler::lwzx) \ 91 | x(LXSIWZX, &assembler::lxsiwzx) \ 92 | x(LXV, &assembler::lxv) \ 93 | x(LXVX, &assembler::lxvx) \ 94 | x(MCRF, &assembler::mcrf) \ 95 | x(MCRXRX, &assembler::mcrxrx) \ 96 | x(MFCR, &assembler::mfcr) \ 97 | x(MFOCRF, &assembler::mfocrf) \ 98 | x(MFSPR, &assembler::mfspr) \ 99 | x(MFVSRD, &assembler::mfvsrd) \ 100 | x(MFVSRLD, &assembler::mfvsrld) \ 101 | x(MTCRF, &assembler::mtcrf) \ 102 | x(MTOCRF, &assembler::mtocrf) \ 103 | x(MTSPR, &assembler::mtspr) \ 104 | x(MTVSRDD, &assembler::mtvsrdd) \ 105 | x(MULHD, &assembler::mulhd) \ 106 | x(MULHDU, &assembler::mulhdu) \ 107 | x(MULHW, &assembler::mulhw) \ 108 | x(MULLD, &assembler::mulld) \ 109 | x(MULLW, &assembler::mullw) \ 110 | x(NAND, &assembler::nand) \ 111 | x(NEG, &assembler::neg) \ 112 | x(OR, &assembler::_or) \ 113 | x(ORI, &assembler::ori) \ 114 | x(ORIS, &assembler::oris) \ 115 | x(RLDCL, &assembler::rldcl) \ 116 | x(RLDICL, &assembler::rldicl) \ 117 | x(RLDICR, &assembler::rldicr) \ 118 | x(RLDIMI, &assembler::rldimi) \ 119 | x(RLWIMI, &assembler::rlwimi) \ 120 | x(RLWINM, &assembler::rlwinm) \ 121 | x(SC, &assembler::sc) \ 122 | x(SETB, &assembler::setb) \ 123 | x(SLD, &assembler::sld) \ 124 | x(SLDI, &assembler::sldi) \ 125 | x(SRAD, &assembler::srad) \ 126 | x(SRADI, &assembler::sradi) \ 127 | x(SRAWI, &assembler::srawi) \ 128 | x(SRD, &assembler::srd) \ 129 | x(SRDI, &assembler::srdi) \ 130 | x(STB, &assembler::stb) \ 131 | x(STBU, &assembler::stbu) \ 132 | x(STBUX, &assembler::stbux) \ 133 | x(STBX, &assembler::stbx) \ 134 | x(STD, &assembler::std) \ 135 | x(STDU, &assembler::stdu) \ 136 | x(STDUX, &assembler::stdux) \ 137 | x(STDX, &assembler::stdx) \ 138 | x(STFD, &assembler::stfd) \ 139 | x(STH, &assembler::sth) \ 140 | x(STHU, &assembler::sthu) \ 141 | x(STHUX, &assembler::sthux) \ 142 | x(STHX, &assembler::sthx) \ 143 | x(STW, &assembler::stw) \ 144 | x(STWU, &assembler::stwu) \ 145 | x(STWUX, &assembler::stwux) \ 146 | x(STWX, &assembler::stwx) \ 147 | x(STXV, &assembler::stxv) \ 148 | x(STXVX, &assembler::stxvx) \ 149 | x(SUB, &assembler::sub) \ 150 | x(SUBC, &assembler::subc) \ 151 | x(SUBE, &assembler::sube) \ 152 | x(SYNC, &assembler::sync) \ 153 | x(XOR, &assembler::_xor) \ 154 | x(XXLOR, &assembler::xxlor) \ 155 | x(INVALID, &assembler::invalid) \ 156 | x(U32, &assembler::u32) 157 | 158 | namespace retrec { 159 | 160 | namespace ppc64le { 161 | 162 | constexpr int INSN_SIZE = 4; // ISA 3.1 be damned 163 | 164 | // 165 | // Types used by the assembler and related code 166 | // 167 | 168 | class assembler; 169 | 170 | // A list of all Operation types. See PPC64LE_ENUMERATE_OPERATIONS above. 171 | enum class Operation { 172 | #define OPERATION(op, ...) op, 173 | PPC64LE_ENUMERATE_OPERATIONS(OPERATION) 174 | #undef OPERATION 175 | SIZE 176 | }; 177 | 178 | // A list of strings for all Operation types 179 | extern const char *operation_names[(std::underlying_type_t)Operation::SIZE]; 180 | 181 | enum class BO : uint8_t { 182 | ALWAYS = 0b10100, // Branch unconditionally 183 | FIELD_CLR = 0b00100, // Branch if given CR field is clear (0) 184 | FIELD_SET = 0b01100 // Branch if given CR Field is set (1) 185 | }; 186 | 187 | enum class SPR : uint16_t { 188 | XER = 1, 189 | DSCR = 3, 190 | LR = 8, 191 | CTR = 9 192 | }; 193 | 194 | // Annotated types for assembler operands. Allows inspection code (like relocation) to determine 195 | // parameter uses without hardcoding table of per-instruction meanings. 196 | // 197 | // To ensure that the new types are distinct from their underlying types (for use in std::variant), 198 | // they are declared as enums. A typedef/using declaration would allow implicit conversion and make 199 | // it difficult to store the types in std::variants that can also contain the underlying type. 200 | enum BI : uint8_t {}; // Branch CR field 201 | enum AA : bool {}; // Branch absolute address toggle 202 | enum LK : bool {}; // Branch linkage toggle 203 | enum rel_off_26bit : int32_t {}; // 26-bit relative offset (e.g. B) 204 | enum rel_off_16bit : int16_t {}; // 16-bit relative offset (e.g. BC) 205 | 206 | class instruction_stream; 207 | 208 | // 209 | // Types used by codegen_ppc64le and related higher-level code 210 | // 211 | 212 | enum class LabelPosition { 213 | BEFORE, 214 | AFTER 215 | }; 216 | 217 | struct relocation { 218 | // Fill in the relative offset to an absolute target virtual address 219 | struct imm_rel_vaddr_fixup { uint64_t vaddr; }; 220 | 221 | // Helpers for declaring labels and referencing them 222 | struct imm_rel_label_fixup { std::string label_name; LabelPosition position; }; 223 | struct declare_label { std::string label_name; }; 224 | struct declare_label_after { std::string label_name; }; 225 | 226 | // Emit a direct call to a given virtual address 227 | struct imm_rel_direct_call { uint64_t vaddr; }; 228 | 229 | // Emit a direct jmp to a given virtual address 230 | struct imm_rel_direct_jmp { uint64_t vaddr; }; 231 | 232 | using DataT = std::variant; 234 | 235 | size_t insn_cnt; // Number of instructions reserved for this Relocation 236 | DataT data; // Relocation-specific data 237 | }; 238 | 239 | // Auxiliary data that can be attached to an instruction stream entry 240 | struct instruction_aux { 241 | bool always_keep; // Whether we should never let this instruction be optimized away 242 | std::optional relocation; 243 | 244 | instruction_aux(bool always_keep_, decltype(relocation) relocation_) 245 | : always_keep(always_keep_), relocation(std::move(relocation_)) {} 246 | }; 247 | 248 | // Guaranteed to hold an immediate relative offset 249 | using rel_off_t = int32_t; 250 | 251 | }; // namespace ppc64le 252 | 253 | }; // namespace retrec 254 | -------------------------------------------------------------------------------- /src/arch/ppc64le/codegen/register_allocator.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | #include 22 | 23 | using namespace retrec; 24 | using namespace retrec::ppc64le; 25 | 26 | // 27 | // Static allocation manager 28 | // 29 | 30 | template 31 | typename register_allocator::static_allocation_set register_allocator::static_allocations; 32 | 33 | template 34 | register_allocator::static_allocation_set::static_allocation_set() { 35 | for (size_t i=0; i::fixed_regs) { 40 | allocations[reserved_index(pair.target)] = pair.host; 41 | } 42 | } 43 | 44 | template 45 | size_t register_allocator::static_allocation_set::reserved_index(const llir::Register &) { 46 | static_assert(!std::is_same_v, "Unimplemented static_allocation_set for this target"); 47 | return 0; 48 | } 49 | 50 | template <> 51 | size_t register_allocator::static_allocation_set::reserved_index(const llir::Register ®) { 52 | return (size_t)reg.x86_64 - 1; // Subtract 1 to account for first INVALID element 53 | } 54 | 55 | template 56 | bool register_allocator::static_allocation_set::is_reserved(llir::PPC64Register reg) { 57 | for (size_t i=0; i 68 | register_allocator::register_allocator() { 69 | for (size_t i=0; i::non_volatile_regs) { 74 | regs[(size_t)reg - 1] = { RegisterInfo::State::RESERVED }; 75 | } 76 | } 77 | 78 | template 79 | register_allocator::~register_allocator() {} 80 | 81 | template 82 | typename register_allocator::AllocatedRegT register_allocator::allocate_gpr() { 83 | constexpr size_t FIRST_GPR_INDEX = (size_t)llir::PPC64Register::R0 - 1; 84 | constexpr size_t LAST_GPR_INDEX = (size_t)llir::PPC64Register::R31 - 1; 85 | for (size_t i = FIRST_GPR_INDEX + 1 /* skip GPR0 which is sometimes useless */; i <= LAST_GPR_INDEX; i++) { 86 | if (regs[i].state == RegisterInfo::State::FREE) { 87 | regs[i].state = RegisterInfo::State::ALLOCATED; 88 | return register_allocator::AllocatedRegT((llir::PPC64Register)(i + 1), *this); 89 | } 90 | } 91 | 92 | ASSERT_NOT_REACHED(); // No free registers 93 | } 94 | 95 | template 96 | typename register_allocator::AllocatedRegT register_allocator::get_fixed_reg(const llir::Register ®) { 97 | auto ret = static_allocations.allocations[static_allocations.reserved_index(reg)]; 98 | assert(ret != llir::PPC64Register::INVALID); 99 | return register_allocator::AllocatedRegT(ret, *this); 100 | } 101 | 102 | template 103 | typename register_allocator::AllocatedRegT register_allocator::get_fixed_reg(typename T::RegisterT reg) { 104 | auto ret = static_allocations.allocations[static_allocations.reserved_index(reg)]; 105 | assert(ret != llir::PPC64Register::INVALID); 106 | return register_allocator::AllocatedRegT(ret, *this); 107 | } 108 | 109 | template 110 | void register_allocator::free_reg(llir::PPC64Register reg) { 111 | assert(reg != llir::PPC64Register::INVALID); 112 | if (regs[(size_t)reg - 1].state == RegisterInfo::State::RESERVED) 113 | return; 114 | assert(regs[(size_t)reg - 1].state == RegisterInfo::State::ALLOCATED); 115 | regs[(size_t)reg - 1].state = RegisterInfo::State::FREE; 116 | } 117 | 118 | // Explicitly instantiate for all supported target traits 119 | template class ppc64le::register_allocator; 120 | -------------------------------------------------------------------------------- /src/arch/ppc64le/codegen/register_allocator.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | 25 | namespace retrec { 26 | namespace ppc64le { 27 | 28 | using gpr_t = uint8_t; 29 | using vsr_t = uint8_t; 30 | static constexpr gpr_t GPR_INVALID = (gpr_t)-1; 31 | 32 | // RAII wrapper returned by GPR allocations 33 | template 34 | class allocated_reg { 35 | llir::PPC64Register reg; 36 | RegisterAllocatorT *allocator { nullptr }; 37 | bool fixed { false }; 38 | 39 | allocated_reg(llir::PPC64Register reg, RegisterAllocatorT &allocator) 40 | : reg(reg), allocator(&allocator) {} 41 | allocated_reg(llir::PPC64Register reg) 42 | : reg(reg), fixed(true) {} 43 | 44 | public: 45 | allocated_reg() : reg(llir::PPC64Register::INVALID), allocator(nullptr) {} 46 | static allocated_reg from_host_register(llir::Register host_reg) { 47 | assert(host_reg.arch == Architecture::ppc64le); 48 | return allocated_reg { host_reg.ppc64 }; 49 | } 50 | friend RegisterAllocatorT; 51 | 52 | gpr_t gpr() const { assert(fixed || allocator); return llir::PPC64RegisterGPRIndex(reg); } 53 | gpr_t vsr() const { assert(fixed || allocator); return llir::PPC64RegisterVSRIndex(reg); } 54 | explicit operator bool() { return !!allocator; } 55 | 56 | // Only allow moves 57 | ~allocated_reg() { if (allocator) allocator->free_reg(reg); } 58 | allocated_reg(const allocated_reg &) = delete; 59 | allocated_reg &operator= (allocated_reg &) = delete; 60 | allocated_reg(allocated_reg &&other) 61 | : reg(other.reg), allocator(std::exchange(other.allocator, nullptr)) {} 62 | allocated_reg &operator= (allocated_reg &&other) { 63 | std::swap(reg, other.reg); 64 | std::swap(allocator, other.allocator); 65 | return *this; 66 | } 67 | }; 68 | 69 | /** 70 | * Register allocator for X86_64 targets 71 | */ 72 | template 73 | class register_allocator { 74 | // Allocation status of GPRs. True = reserved, false = free. 75 | struct RegisterInfo { 76 | enum class State { 77 | FREE, 78 | ALLOCATED, 79 | RESERVED 80 | } state; 81 | } regs[(size_t)llir::PPC64Register::MAXIMUM - 1]; 82 | 83 | // Statically allocated GPRs 84 | static struct static_allocation_set { 85 | static_allocation_set(); 86 | using TargetRegisterT = typename TargetTraits::RegisterT; 87 | 88 | // Maps a given x86_64 register to a reserved ppc64 register, if available 89 | llir::PPC64Register allocations[(size_t)TargetRegisterT::MAXIMUM - 1]; 90 | 91 | // allocations doesn't reserve space for the invalid register index 0, so subtract 1 to get index 92 | //size_t reserved_index(const llir::Register ®) { return (size_t)reg.x86_64 - 1; /* FIXME: not hardcoded to x86_64 */ } 93 | size_t reserved_index(const llir::Register ®); 94 | size_t reserved_index(TargetRegisterT reg) { return (size_t)reg - 1; } 95 | 96 | bool is_reserved(llir::PPC64Register reg); 97 | } static_allocations; 98 | 99 | public: 100 | using AllocatedRegT = allocated_reg>; 101 | friend AllocatedRegT; 102 | 103 | register_allocator(); 104 | ~register_allocator(); 105 | DISABLE_COPY_AND_MOVE(register_allocator) 106 | 107 | AllocatedRegT allocate_gpr(); 108 | AllocatedRegT get_fixed_reg(const llir::Register ®); 109 | AllocatedRegT get_fixed_reg(typename TargetTraits::RegisterT reg); 110 | 111 | private: 112 | void free_reg(llir::PPC64Register reg); 113 | }; 114 | 115 | }; 116 | }; 117 | -------------------------------------------------------------------------------- /src/arch/ppc64le/cpu_context_ppc64le.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | 28 | namespace retrec { 29 | 30 | struct alignas(16) cpu_context_ppc64le { 31 | int64_t gprs[32] { 0 }; 32 | int64_t lr { 0 }; 33 | int64_t cr { 0 }; 34 | int64_t nip { 0 }; 35 | 36 | int64_t _pad0; 37 | reg128 vsr[64] { { .le = { 0, 0 } } }; 38 | int32_t vrsave { 0 }; 39 | }; 40 | 41 | static_assert(offsetof(cpu_context_ppc64le, vsr) % 16 == 0, "vsr registers not quadword aligned!\n"); 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/arch/ppc64le/llir/llir_registers_ppc64le.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #ifndef LLIR_ALLOW_INTERNAL_INCLUDE 24 | #error "Don't include this directly! Use llir.h" 25 | #endif 26 | 27 | enum class PPC64Register : uint8_t { 28 | INVALID, 29 | 30 | R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, 31 | R10, R11, R12, R13, R14, R15, R16, R17, 32 | R18, R19, R20, R21, R22, R23, R24, R25, 33 | R26, R27, R28, R29, R30, R31, 34 | 35 | LR, 36 | CR, 37 | CTR, 38 | XER, 39 | 40 | F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, 41 | F10, F11, F12, F13, F14, F15, F16, F17, 42 | F18, F19, F20, F21, F22, F23, F24, F25, 43 | F26, F27, F28, F29, F30, F31, 44 | 45 | FPSCR, 46 | 47 | VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9, 48 | VR10, VR11, VR12, VR13, VR14, VR15, VR16, VR17, 49 | VR18, VR19, VR20, VR21, VR22, VR23, VR24, VR25, 50 | VR26, VR27, VR28, VR29, VR30, VR31, 51 | 52 | VSR0, VSR1, VSR2, VSR3, VSR4, VSR5, VSR6, VSR7, VSR8, 53 | VSR9, VSR10, VSR11, VSR12, VSR13, VSR14, VSR15, VSR16, 54 | VSR17, VSR18, VSR19, VSR20, VSR21, VSR22, VSR23, VSR24, 55 | VSR25, VSR26, VSR27, VSR28, VSR29, VSR30, VSR31, 56 | 57 | VSCR, 58 | VRSAVE, 59 | 60 | MAXIMUM 61 | }; 62 | 63 | enum class PPC64RegisterType { 64 | INVALID, 65 | GPR, 66 | SPECIAL, 67 | FPR, 68 | VR, 69 | VSR 70 | }; 71 | 72 | static constexpr inline PPC64RegisterType PPC64RegisterGetType(PPC64Register reg) { 73 | auto reg_int = enum_cast(reg); 74 | if (reg_int >= enum_cast(PPC64Register::R0) && reg_int <= enum_cast(PPC64Register::R31)) 75 | return PPC64RegisterType::GPR; 76 | else if (reg_int >= enum_cast(PPC64Register::LR) && reg_int <= enum_cast(PPC64Register::XER)) 77 | return PPC64RegisterType::SPECIAL; 78 | else if (reg_int >= enum_cast(PPC64Register::F0) && reg_int <= enum_cast(PPC64Register::F31)) 79 | return PPC64RegisterType::FPR; 80 | else if (reg_int >= enum_cast(PPC64Register::VR0) && reg_int <= enum_cast(PPC64Register::VR31)) 81 | return PPC64RegisterType::VR; 82 | else if (reg_int >= enum_cast(PPC64Register::VSR0) && reg_int <= enum_cast(PPC64Register::VSR31)) 83 | return PPC64RegisterType::VSR; 84 | else 85 | return PPC64RegisterType::INVALID; 86 | } 87 | 88 | static constexpr inline std::underlying_type_t PPC64RegisterGPRIndex(PPC64Register reg) { 89 | assert(PPC64RegisterGetType(reg) == PPC64RegisterType::GPR); 90 | return enum_cast(reg) - enum_cast(PPC64Register::R0); 91 | } 92 | 93 | static constexpr inline std::underlying_type_t PPC64RegisterFPRIndex(PPC64Register reg) { 94 | assert(PPC64RegisterGetType(reg) == PPC64RegisterType::FPR); 95 | return enum_cast(reg) - enum_cast(PPC64Register::F0); 96 | } 97 | 98 | static constexpr inline std::underlying_type_t PPC64RegisterVRIndex(PPC64Register reg) { 99 | assert(PPC64RegisterGetType(reg) == PPC64RegisterType::VR); 100 | return enum_cast(reg) - enum_cast(PPC64Register::VR0); 101 | } 102 | 103 | static constexpr inline std::underlying_type_t PPC64RegisterVSRIndex(PPC64Register reg) { 104 | assert(PPC64RegisterGetType(reg) == PPC64RegisterType::VSR); 105 | return enum_cast(reg) - enum_cast(PPC64Register::VSR0); 106 | } 107 | -------------------------------------------------------------------------------- /src/arch/ppc64le/runtime_context_ppc64le.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | 29 | namespace retrec { 30 | 31 | class syscall_emulator; // Forward 32 | 33 | /** 34 | * Data accessed by translated code and retrec runtime 35 | */ 36 | struct runtime_context_ppc64le { 37 | // 38 | // State of translated CPU 39 | // 40 | cpu_context_ppc64le host_native_context; // Host CPU context when in native code 41 | cpu_context_ppc64le host_translated_context; // Host CPU context when in translated code 42 | bool flush_icache { false }; // Whether to invalidate icache before jumping to translated code 43 | 44 | // 45 | // Storage used for communication between translated and native code 46 | // 47 | void (*leave_translated_code_ptr)(void) { nullptr }; // Function pointer to arch_leave_translated_code thunk 48 | 49 | // Pointers to virtual_address_mapper for use with things like indirect call resolution 50 | virtual_address_mapper *vam { nullptr }; 51 | uint64_t (virtual_address_mapper::* vam_lookup_and_update_call_cache)(uint64_t, uint64_t, uint64_t) { nullptr }; 52 | uint64_t (virtual_address_mapper::* vam_lookup_check_call_cache)(uint64_t) { nullptr }; 53 | 54 | // If the translated code wishes to call into native code, it will set the target here 55 | enum class NativeTarget : uint16_t /* fit in an instruction immediate field */ { 56 | INVALID, 57 | SYSCALL, // Execute a syscall 58 | CALL, // Emulate a CALL instruction 59 | JUMP, // Emulate a JUMP instruction 60 | PATCH_CALL, // Patch in a direct CALL 61 | PATCH_JUMP, // Patch in a direct JUMP 62 | } native_function_call_target { NativeTarget::INVALID }; 63 | 64 | // Target CPU emulated context 65 | Architecture arch; 66 | union { 67 | cpu_context_x86_64 x86_64_ucontext; 68 | }; 69 | 70 | bool should_exit { false }; 71 | int exit_code { 0 }; 72 | 73 | // Pointer to syscall emulator, used by native code 74 | syscall_emulator *syscall_emu { nullptr }; 75 | 76 | // 77 | // Initialization and accessor functions 78 | // 79 | runtime_context_ppc64le() {} 80 | 81 | status_code init(Architecture target_arch, void *entry, void *stack, virtual_address_mapper *vam_, 82 | syscall_emulator *syscall_emu_); 83 | status_code execute(); 84 | void dump_emulated_machine_state(); 85 | }; 86 | static_assert(std::is_standard_layout::value, "Runtime context must have standard layout, since we access it manually from emitted ASM."); 87 | static_assert(sizeof(runtime_context_ppc64le) <= 32768, "Runtime context must be accessible with signed 16-bit displacements!"); 88 | 89 | class translated_code_region; 90 | 91 | } 92 | -------------------------------------------------------------------------------- /src/arch/runtime_context_dispatcher.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | using namespace retrec; 25 | 26 | runtime_context_dispatcher::runtime_context_dispatcher(CodegenBackend backend) { 27 | switch (backend) { 28 | #if RETREC_CODEGEN_PPC64LE 29 | case CodegenBackend::PowerPC64LE: 30 | context = runtime_context_ppc64le {}; 31 | break; 32 | #endif 33 | 34 | #if RETREC_CODEGEN_GENERIC 35 | case CodegenBackend::Generic: 36 | context = runtime_context_generic {}; 37 | break; 38 | #endif 39 | default: 40 | ASSERT_NOT_REACHED(); 41 | } 42 | } 43 | 44 | status_code runtime_context_dispatcher::init(Architecture target_arch, void *entry, void *stack, virtual_address_mapper *vam, 45 | syscall_emulator *syscall_emu) { 46 | 47 | return std::visit([=](auto &rc) -> status_code { 48 | if constexpr (!types_are_same_v>) 49 | return rc.init(target_arch, entry, stack, vam, syscall_emu); 50 | else 51 | ASSERT_NOT_REACHED(); 52 | }, context); 53 | } 54 | 55 | status_code runtime_context_dispatcher::execute() { 56 | return std::visit([](auto &rc) -> status_code { 57 | if constexpr (!types_are_same_v>) 58 | return rc.execute(); 59 | else 60 | ASSERT_NOT_REACHED(); 61 | }, context); 62 | } 63 | 64 | void *runtime_context_dispatcher::get_data() { 65 | return std::visit([](auto &rc) -> void * { 66 | if constexpr (!types_are_same_v>) 67 | return &rc; 68 | else 69 | ASSERT_NOT_REACHED(); 70 | }, context); 71 | } 72 | -------------------------------------------------------------------------------- /src/arch/runtime_context_dispatcher.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | 29 | namespace retrec { 30 | 31 | class runtime_context_dispatcher { 32 | std::variant< 33 | #if RETREC_CODEGEN_GENERIC 34 | runtime_context_generic, 35 | #endif 36 | #if RETREC_CODEGEN_PPC64LE 37 | runtime_context_ppc64le, 38 | #endif 39 | Sentinel<0> 40 | > context; 41 | 42 | public: 43 | explicit runtime_context_dispatcher(CodegenBackend backend); 44 | 45 | status_code init(Architecture target_arch, void *entry, void *stack, virtual_address_mapper *vam, 46 | syscall_emulator *syscall_emu); 47 | status_code execute(); 48 | void *get_data(); 49 | }; 50 | 51 | } 52 | -------------------------------------------------------------------------------- /src/arch/target_environment.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | 29 | namespace retrec { 30 | 31 | static inline void *initialize_target_stack(Architecture target, void *stack, 32 | const std::vector &argv, 33 | const std::vector &envp, 34 | const elf_loader &elf_loader) { 35 | switch (target) { 36 | case Architecture::X86_64: 37 | return x86_64::initialize_target_stack(stack, argv, envp, elf_loader); 38 | default: 39 | TODO(); 40 | } 41 | } 42 | 43 | }; 44 | -------------------------------------------------------------------------------- /src/arch/x86_64/cpu_context_x86_64.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | 28 | namespace retrec { 29 | 30 | struct cpu_context_x86_64 { 31 | int64_t gprs[16] { 0 }; 32 | int64_t segments[6] { 0 }; // Acutally only 16-bit, but made 64-bit for get_reg 33 | int64_t rip { 0 }; 34 | 35 | // x86/MMX registers 36 | struct x87_reg { 37 | uint64_t lo { 0 }; // Low 64 bits of x87 register, aliased to MMX MM0-7 38 | uint16_t hi { 0 }; 39 | uint16_t pad[3] { 0 }; 40 | }; 41 | x87_reg x87[8]; 42 | uint16_t x87_control { 0 }; 43 | uint16_t x87_status { 0 }; 44 | uint16_t x87_tag { 0 }; 45 | uint64_t x87_last_ip { 0 }; 46 | uint64_t x87_last_data_ptr { 0 }; 47 | uint16_t x87_opcode { 0 }; 48 | 49 | // Pseudo-register for storing the offset from x87[0] where the stack TOP is, in bytes. 50 | uint16_t st_top_offset { 0 }; 51 | static constexpr uint16_t st_offset_mask = 0b1110000; 52 | 53 | // SSE registers 54 | reg128 xmm[16]; 55 | uint32_t mxcsr { 0 }; 56 | 57 | template 58 | T *get_reg(llir::X86_64Register reg) { 59 | switch (reg) { 60 | case llir::X86_64Register::RAX: 61 | case llir::X86_64Register::RBX: 62 | case llir::X86_64Register::RCX: 63 | case llir::X86_64Register::RDX: 64 | case llir::X86_64Register::RSP: 65 | case llir::X86_64Register::RBP: 66 | case llir::X86_64Register::RSI: 67 | case llir::X86_64Register::RDI: 68 | case llir::X86_64Register::R8: 69 | case llir::X86_64Register::R9: 70 | case llir::X86_64Register::R10: 71 | case llir::X86_64Register::R11: 72 | case llir::X86_64Register::R12: 73 | case llir::X86_64Register::R13: 74 | case llir::X86_64Register::R14: 75 | case llir::X86_64Register::R15: 76 | if constexpr (types_are_same_v) 77 | return &gprs[(size_t)reg - (size_t)llir::X86_64Register::RAX]; 78 | break; 79 | 80 | case llir::X86_64Register::FR0: 81 | case llir::X86_64Register::FR1: 82 | case llir::X86_64Register::FR2: 83 | case llir::X86_64Register::FR3: 84 | case llir::X86_64Register::FR4: 85 | case llir::X86_64Register::FR5: 86 | case llir::X86_64Register::FR6: 87 | case llir::X86_64Register::FR7: 88 | if constexpr (types_are_same_v) 89 | return &x87[(size_t)reg - (size_t)llir::X86_64Register::FR0]; 90 | break; 91 | 92 | case llir::X86_64Register::XMM0: 93 | case llir::X86_64Register::XMM1: 94 | case llir::X86_64Register::XMM2: 95 | case llir::X86_64Register::XMM3: 96 | case llir::X86_64Register::XMM4: 97 | case llir::X86_64Register::XMM5: 98 | case llir::X86_64Register::XMM6: 99 | case llir::X86_64Register::XMM7: 100 | case llir::X86_64Register::XMM8: 101 | case llir::X86_64Register::XMM9: 102 | case llir::X86_64Register::XMM10: 103 | case llir::X86_64Register::XMM11: 104 | case llir::X86_64Register::XMM12: 105 | case llir::X86_64Register::XMM13: 106 | case llir::X86_64Register::XMM14: 107 | case llir::X86_64Register::XMM15: 108 | if constexpr (types_are_same_v) 109 | return &xmm[(size_t)reg - (size_t)llir::X86_64Register::XMM0]; 110 | break; 111 | case llir::X86_64Register::MXCSR: 112 | if constexpr (types_are_same_v) 113 | return &mxcsr; 114 | break; 115 | 116 | case llir::X86_64Register::RIP: 117 | if constexpr (types_are_same_v) 118 | return &rip; 119 | break; 120 | 121 | case llir::X86_64Register::FS: 122 | case llir::X86_64Register::GS: 123 | case llir::X86_64Register::CS: 124 | case llir::X86_64Register::SS: 125 | case llir::X86_64Register::DS: 126 | case llir::X86_64Register::ES: 127 | if constexpr (types_are_same_v) 128 | return &segments[(size_t)reg - (size_t)llir::X86_64Register::FS]; 129 | break; 130 | 131 | default: 132 | break; 133 | } 134 | 135 | // Unsupported register/mismatched type provided 136 | ASSERT_NOT_REACHED(); 137 | } 138 | }; 139 | 140 | } 141 | -------------------------------------------------------------------------------- /src/arch/x86_64/llir/llir_lifter_x86_64.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | 24 | namespace retrec { 25 | class llir_lifter_x86_64 final : public llir_lifter { 26 | static llir::Operand::Width get_width(uint8_t width); 27 | void fill_operand(cs_x86_op &op, llir::Operand &out); 28 | llir::Register get_reg(x86_reg reg); 29 | llir::Operand get_reg_op(x86_reg reg); 30 | 31 | public: 32 | llir_lifter_x86_64() {} 33 | ~llir_lifter_x86_64(); 34 | status_code lift(cs_insn *insn, std::vector &out) override; 35 | }; 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/arch/x86_64/llir/llir_operands_x86_64.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #ifndef LLIR_ALLOW_INTERNAL_INCLUDE 23 | #error "Don't include this directly! Use llir.h" 24 | #endif 25 | 26 | struct X86_64MemOp { 27 | Register segment; 28 | Register base; 29 | Register index; 30 | uint8_t scale; 31 | int64_t disp; 32 | 33 | // Whether or not disp's sign is determined by the direction flag 34 | bool disp_sign_from_df { false }; 35 | }; 36 | -------------------------------------------------------------------------------- /src/arch/x86_64/llir/llir_registers_x86_64.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #ifndef LLIR_ALLOW_INTERNAL_INCLUDE 23 | #error "Don't include this directly! Use llir.h" 24 | #endif 25 | 26 | #define LLIR_ENUMERATE_X86_64_REGISTERS(x) \ 27 | x(INVALID) \ 28 | /* GPRs */\ 29 | x(RAX) \ 30 | x(RBX) \ 31 | x(RCX) \ 32 | x(RDX) \ 33 | x(RSP) \ 34 | x(RBP) \ 35 | x(RSI) \ 36 | x(RDI) \ 37 | x(R8) \ 38 | x(R9) \ 39 | x(R10) \ 40 | x(R11) \ 41 | x(R12) \ 42 | x(R13) \ 43 | x(R14) \ 44 | x(R15) \ 45 | /* x87 regs (absolute address) */\ 46 | x(FR0) \ 47 | x(FR1) \ 48 | x(FR2) \ 49 | x(FR3) \ 50 | x(FR4) \ 51 | x(FR5) \ 52 | x(FR6) \ 53 | x(FR7) \ 54 | /* X87 regs (relative to TOP)*/\ 55 | x(ST0) \ 56 | x(ST1) \ 57 | x(ST2) \ 58 | x(ST3) \ 59 | x(ST4) \ 60 | x(ST5) \ 61 | x(ST6) \ 62 | x(ST7) \ 63 | /* MMX regs */\ 64 | x(MM0) \ 65 | x(MM1) \ 66 | x(MM2) \ 67 | x(MM3) \ 68 | x(MM4) \ 69 | x(MM5) \ 70 | x(MM6) \ 71 | x(MM7) \ 72 | /* SSE regs */\ 73 | x(XMM0) \ 74 | x(XMM1) \ 75 | x(XMM2) \ 76 | x(XMM3) \ 77 | x(XMM4) \ 78 | x(XMM5) \ 79 | x(XMM6) \ 80 | x(XMM7) \ 81 | x(XMM8) \ 82 | x(XMM9) \ 83 | x(XMM10) \ 84 | x(XMM11) \ 85 | x(XMM12) \ 86 | x(XMM13) \ 87 | x(XMM14) \ 88 | x(XMM15) \ 89 | x(MXCSR) \ 90 | /* Instruction pointer */ \ 91 | x(RIP) \ 92 | /* Segments */ \ 93 | x(FS) \ 94 | x(GS) \ 95 | x(CS) \ 96 | x(SS) \ 97 | x(DS) \ 98 | x(ES) \ 99 | x(MAXIMUM) 100 | 101 | 102 | enum class X86_64Register { 103 | #define declare_enum(x) x, 104 | LLIR_ENUMERATE_X86_64_REGISTERS(declare_enum) 105 | #undef declare_enum 106 | }; 107 | 108 | -------------------------------------------------------------------------------- /src/arch/x86_64/syscalls.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | 24 | #include 25 | 26 | namespace retrec { 27 | 28 | enum class SyscallLinuxX86_64 : int64_t { 29 | #define declare_enum(name, val) \ 30 | name = val, 31 | 32 | ENUMERATE_GENERIC_LINUX_SYSCALLS(declare_enum) 33 | #undef declare_enum 34 | }; 35 | 36 | struct SyscallDetailsLinuxX86_64 { 37 | using SyscallNumberT = SyscallLinuxX86_64; 38 | 39 | // 40 | // Definitions 41 | // 42 | 43 | // Define architecture-specific type mappings 44 | #define enumerate_type_mappings(x) \ 45 | x(char, u8_le) \ 46 | x(short, s16_le) \ 47 | x(int, s32_le) \ 48 | x(long, s64_le) \ 49 | x(long long, s64_le) \ 50 | x(unsigned char, u8_le) \ 51 | x(unsigned short, u16_le) \ 52 | x(unsigned int, u32_le) \ 53 | x(unsigned long, u64_le) \ 54 | x(unsigned long long, u64_le) \ 55 | x(void *, ptr64) \ 56 | /* Declare aliases for agnostic types */ \ 57 | ENUMERATE_SYSCALL_ARG_TYPES(x) 58 | 59 | // Define signatures of all supported syscalls 60 | #define enumerate_syscalls(x) \ 61 | /* Enumerate common syscalls first */ \ 62 | ENUMERATE_COMMON_SYSCALL_SIGNATURES(x) 63 | 64 | #define access_type_a(a, _) a, 65 | #define access_type_b(_, b) sc_types::b, 66 | MAGIC_GEN_TYPE_TO_TYPE_LOOKUP(enumerate_type_mappings, arch_types, access_type_a, access_type_b) 67 | #undef access_type_a 68 | #undef access_type_b 69 | 70 | #define access_enum(e, ...) SyscallLinuxX86_64::e, 71 | #define access_sig(name, ret, ...) SyscallSignature, 72 | MAGIC_GEN_ENUM_TO_TYPE_LOOKUP(enumerate_syscalls, signatures_lut, access_enum, access_sig, SyscallNumberT) 73 | #undef access_enum 74 | #undef access_sig 75 | 76 | // 77 | // Accessors 78 | // 79 | 80 | // Accessor for retrieving the signature of a syscall 81 | template 82 | using signature_from_syscall = signatures_lut_look_up_type; 83 | 84 | // Accessor for retrieving the corresponding agnostic type for a given archtiecture-specific type 85 | template 86 | using agnostic_type_from_type = arch_types_look_up_type_b; 87 | 88 | // 89 | // Run-time helpers 90 | // 91 | static constexpr int64_t get_generic_syscall_number(int64_t x86_64_syscall_number) { return x86_64_syscall_number; } 92 | 93 | #undef enumerate_syscalls 94 | #undef enumerate_type_mappings 95 | }; 96 | 97 | } 98 | -------------------------------------------------------------------------------- /src/arch/x86_64/target_environment.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | /** 21 | * Definitions and helpers for the X86_64 target binary environment 22 | */ 23 | #pragma once 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #include 30 | 31 | namespace retrec { 32 | namespace x86_64 { 33 | 34 | /** 35 | * Initialize a stack with the given argv/envp. 36 | * Returns the decremented stack pointer that should be passed to translated runtime. 37 | */ 38 | void *initialize_target_stack(void *stack, const std::vector &argv, 39 | const std::vector &envp, const elf_loader &elf_loader); 40 | 41 | 42 | struct CpuidResult { 43 | uint32_t eax; 44 | uint32_t ebx; 45 | uint32_t ecx; 46 | uint32_t edx; 47 | }; 48 | 49 | /** 50 | * Returns the CPUID(func, subfunc) result for the target CPU 51 | */ 52 | void get_cpuid(uint32_t func, uint32_t subfunc, CpuidResult *res); 53 | 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/codegen.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | using namespace retrec; 27 | 28 | std::unique_ptr retrec::make_codegen(CodegenBackend backend, Architecture target_arch, execution_context &econtext, 29 | virtual_address_mapper *vam) { 30 | switch (backend) { 31 | case CodegenBackend::PowerPC64LE: 32 | if constexpr (RETREC_CODEGEN_PPC64LE) 33 | return make_codegen_ppc64le(target_arch, econtext, vam); 34 | break; 35 | 36 | case CodegenBackend::Generic: 37 | if constexpr (RETREC_CODEGEN_GENERIC) 38 | return make_codegen_generic(target_arch, econtext, vam); 39 | break; 40 | } 41 | 42 | ASSERT_NOT_REACHED(); 43 | } 44 | 45 | -------------------------------------------------------------------------------- /src/codegen.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | namespace retrec { 31 | 32 | class virtual_address_mapper; // Forward 33 | 34 | class lifted_llir_block { 35 | public: 36 | // Declared using struct+enum instead of C++11 scoped enums to allow more 37 | // ergonomic usage as a bit-field. 38 | struct Flags { 39 | enum Type : uint32_t { 40 | NONE, 41 | FULL_FUNCTION = (1 << 0), // Block is a full function lifted from the target executable 42 | }; 43 | }; 44 | 45 | lifted_llir_block(std::vector &&insns_, Flags::Type flags_) : insns(insns_), flags(flags_) {} 46 | 47 | const std::vector &get_insns() const { return insns; } 48 | Flags::Type get_flags() const { return flags; } 49 | 50 | private: 51 | std::vector insns; 52 | Flags::Type flags; 53 | }; 54 | 55 | class translated_code_region { 56 | void *code_buffer; 57 | size_t code_buffer_size; 58 | 59 | public: 60 | translated_code_region(void *code_buffer_, size_t code_buffer_size_) 61 | : code_buffer(code_buffer_), code_buffer_size(code_buffer_size_) {} 62 | 63 | void *code() { return code_buffer; } 64 | size_t size() const { return code_buffer_size; } 65 | }; 66 | 67 | enum class CodegenBackend { 68 | Generic, 69 | PowerPC64LE, 70 | }; 71 | 72 | constexpr CodegenBackend default_codegen_backend = []{ 73 | if constexpr (RETREC_CODEGEN_PPC64LE) 74 | return CodegenBackend::PowerPC64LE; 75 | else if constexpr (RETREC_CODEGEN_GENERIC) 76 | return CodegenBackend::Generic; 77 | }(); 78 | 79 | class codegen { 80 | public: 81 | virtual status_code init() = 0; 82 | virtual status_code translate(const lifted_llir_block& insns, std::optional &out) = 0; 83 | virtual uint64_t get_last_untranslated_access(void *rctx) = 0; 84 | virtual status_code patch_translated_access(void *rctx, uint64_t resolved_haddr) = 0; 85 | virtual ~codegen() {} 86 | }; 87 | 88 | std::unique_ptr make_codegen(CodegenBackend backend, Architecture target_arch, execution_context &econtext, 89 | virtual_address_mapper *vam); 90 | 91 | } 92 | -------------------------------------------------------------------------------- /src/disassembler.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | 26 | using namespace retrec; 27 | 28 | template 29 | std::string array_to_string(T arr[], size_t len) { 30 | std::string ret = "{"; 31 | if (len) { 32 | for (size_t i = 0; i < len - 1; i++) { 33 | ret += std::to_string(arr[i]) + ", "; 34 | } 35 | ret += std::to_string(arr[len - 1]) + "}"; 36 | } else { 37 | ret += "}"; 38 | } 39 | return ret; 40 | }; 41 | 42 | template <> 43 | std::string array_to_string(cs_x86_op arr[], size_t len) { 44 | std::string ret = "{"; 45 | if (len) { 46 | for (size_t i = 0; i < len - 1; i++) { 47 | ret += std::to_string(arr[i].type) + ", "; 48 | } 49 | ret += std::to_string(arr[len - 1].type) + "}"; 50 | } else { 51 | ret += "}"; 52 | } 53 | return ret; 54 | }; 55 | 56 | disassembler::~disassembler() { 57 | if (init_done) 58 | cs_close(&capstone_handle); 59 | } 60 | 61 | status_code disassembler::init() { 62 | cs_arch capstone_arch; 63 | cs_mode capstone_mode; 64 | switch(loader.target_arch()) { 65 | case Architecture::X86_64: 66 | capstone_arch = CS_ARCH_X86; 67 | capstone_mode = CS_MODE_64; 68 | 69 | if (cs_open(capstone_arch, capstone_mode, &capstone_handle) != CS_ERR_OK) 70 | return status_code::NOMEM; 71 | 72 | lifter = std::make_unique(); 73 | break; 74 | 75 | default: 76 | pr_error("Unsupported architecture %d!\n", (int)arch); 77 | return status_code::BADARCH; 78 | } 79 | 80 | cs_option(capstone_handle, CS_OPT_DETAIL, CS_OPT_ON); 81 | 82 | init_done = true; 83 | return status_code::SUCCESS; 84 | } 85 | 86 | status_code disassembler::disassemble_region(const void *code, size_t max_length, uint64_t ip, 87 | std::vector &llir_out, Mode mode) { 88 | cs_insn *cur = cs_malloc(capstone_handle); 89 | unique_cs_insn_arr insns(cur, cs_insn_deleter(1)); 90 | std::vector llir_insns; 91 | 92 | while (cs_disasm_iter(capstone_handle, (const uint8_t **)&code, &max_length, &ip, cur)) { 93 | cs_detail *detail = cur->detail; 94 | assert(detail); 95 | 96 | pr_debug("0x%zx: %s %s, operands: %s, groups: %s\n", cur->address, cur->mnemonic, cur->op_str, 97 | array_to_string(detail->x86.operands, detail->x86.op_count).c_str(), 98 | array_to_string(detail->groups, detail->groups_count).c_str()); 99 | 100 | // Lift to LLIR 101 | status_code res = lifter->lift(cur, llir_insns); 102 | if (res != status_code::SUCCESS) { 103 | pr_error("Failed to lift instruction!\n"); 104 | return res; 105 | } 106 | 107 | if (mode == Mode::PARTIAL) { 108 | // In partial mode, we need to stop whenever a branch is encountered 109 | auto last_insn = llir_insns.end() - 1; 110 | if (last_insn->iclass() == llir::Insn::Class::BRANCH) 111 | break; 112 | } 113 | 114 | pr_debug("LLIR: %s\n", llir::to_string(*(llir_insns.end() - 1)).c_str()); 115 | } 116 | 117 | llir_out = std::move(llir_insns); 118 | return status_code::SUCCESS; 119 | } 120 | -------------------------------------------------------------------------------- /src/disassembler.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | 28 | #include 29 | 30 | namespace retrec { 31 | 32 | // 33 | // Unique Pointer for cs_insn array 34 | // 35 | struct cs_insn_deleter { 36 | size_t count; 37 | cs_insn_deleter(size_t count_) : count(count_) {} 38 | void operator()(cs_insn *insn) { cs_free(insn, count); } 39 | }; 40 | using unique_cs_insn_arr = std::unique_ptr; 41 | 42 | class llir_lifter { 43 | public: 44 | virtual status_code lift(cs_insn *insn, std::vector &out) = 0; 45 | virtual ~llir_lifter() {}; 46 | }; 47 | 48 | class disassembler { 49 | elf_loader &loader; 50 | 51 | bool init_done = false; 52 | Architecture arch; 53 | csh capstone_handle; 54 | std::unique_ptr lifter; 55 | public: 56 | DISABLE_COPY_AND_MOVE(disassembler) 57 | explicit disassembler(elf_loader &loader_) : 58 | loader(loader_) {} 59 | ~disassembler(); 60 | 61 | enum class Mode { 62 | FULL_FUNCTION, // Disassemble an entire function 63 | PARTIAL, // Disassemble until the first branch insn 64 | }; 65 | 66 | status_code init(); 67 | status_code disassemble_region(const void *code, size_t max_length, uint64_t ip, 68 | std::vector &llir_out, Mode mode); 69 | }; 70 | 71 | } 72 | 73 | -------------------------------------------------------------------------------- /src/dynamic_recompiler.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | 22 | using namespace retrec; 23 | 24 | status_code dynamic_recompiler::init() { 25 | auto ret = econtext.init(); 26 | if (ret != status_code::SUCCESS) 27 | return ret; 28 | 29 | ret = loader.init(); 30 | if (ret != status_code::SUCCESS) 31 | return ret; 32 | 33 | ret = loader.load_all(); 34 | if (ret != status_code::SUCCESS) 35 | return ret; 36 | 37 | ret = disasm.init(); 38 | if (ret != status_code::SUCCESS) 39 | return ret; 40 | 41 | // We have to wait until here to initialize the codegen with 42 | // the correct architecture detected by the elf loader. 43 | gen = make_codegen(backend, loader.target_arch(), econtext, &vam); 44 | 45 | ret = gen->init(); 46 | if (ret != status_code::SUCCESS) 47 | return ret; 48 | 49 | syscall_emu = std::make_unique(loader.target_arch()); 50 | 51 | return status_code::SUCCESS; 52 | } 53 | 54 | status_code dynamic_recompiler::execute() { 55 | /** 56 | * Translate entrypoint 57 | * Jump to entrypoint 58 | */ 59 | 60 | // Lookup entrypoint's symbol 61 | auto *entry_symbol = loader.lookup(loader.entrypoint(), loader.text_section_index(), elf_loader::Symbol::Bind::GLOBAL, 62 | elf_loader::LookupPolicy::EXACT); 63 | if (!entry_symbol) { 64 | pr_error("Failed to find entrypoint symbol!\n"); 65 | return status_code::BADELF; 66 | } 67 | 68 | // Translate function 69 | auto res = translate_elf_function(*entry_symbol); 70 | if (res != status_code::SUCCESS) 71 | return res; 72 | auto &code = *translated_regions.begin(); 73 | 74 | // Initialize runtime context with entrypoint as target 75 | res = econtext.initialize_runtime_context(loader.target_arch(), code.code(), &vam, syscall_emu.get()); 76 | if (res != status_code::SUCCESS) { 77 | pr_error("Failed to initialize runtime context for translated code!\n"); 78 | return res; 79 | } 80 | 81 | // Code execution loop 82 | for (;;) { 83 | status_code res = econtext.enter_translated_code(); 84 | switch (res) { 85 | case status_code::HALT: 86 | // Translated code gracefully exited 87 | return status_code::SUCCESS; 88 | 89 | case status_code::UNTRANSLATED: 90 | { 91 | res = runtime_handle_untranslated_access(); 92 | if (res != status_code::SUCCESS) { 93 | pr_error("Failed to handle untranslated access: %s\n", status_code_str(res)); 94 | return res; 95 | } 96 | break; 97 | } 98 | 99 | default: 100 | // Other status - return it 101 | return res; 102 | } 103 | } 104 | } 105 | 106 | status_code dynamic_recompiler::translate_elf_function(const elf_loader::Symbol &symbol) { 107 | // Determine length of target routine 108 | uint64_t func_len = loader.get_symbol_size(symbol); 109 | if (func_len == 0) { 110 | // The size attribute isn't present, probably due to hand-written assembly 111 | // missing a .size directive. 112 | return translate_raw_code_block(symbol.value); 113 | } 114 | 115 | pr_debug("function length: %zu\n", func_len); 116 | const void *code_ptr = loader.get_symbol_data_ptr(symbol); 117 | if (!code_ptr) { 118 | pr_error("Failed to get symbol data ptr!\n"); 119 | return status_code::NOMEM; 120 | } 121 | 122 | // Disassemble 123 | std::vector lifted_insns; 124 | status_code res = disasm.disassemble_region(code_ptr, func_len, symbol.value, lifted_insns, 125 | disassembler::Mode::FULL_FUNCTION); 126 | if (res != status_code::SUCCESS) { 127 | pr_error("Failed to disassemble region!\n"); 128 | return res; 129 | } 130 | 131 | lifted_llir_block block(std::move(lifted_insns), lifted_llir_block::Flags::FULL_FUNCTION); 132 | 133 | // Translate the routine 134 | std::optional translated_code; 135 | auto ret = gen->translate(block, translated_code); 136 | if (ret != status_code::SUCCESS) { 137 | pr_error("Failed to translate routine!\n"); 138 | return ret; 139 | } 140 | translated_regions.push_back(*translated_code); 141 | 142 | return status_code::SUCCESS; 143 | } 144 | 145 | status_code dynamic_recompiler::translate_raw_code_block(uint64_t vaddr) { 146 | // Find the Mapping that the vaddr lies within 147 | size_t mapping_index; 148 | auto mapping_opt = econtext.map().find(vaddr, 1, &mapping_index, process_memory_map::FindPolicy::CONTAINS); 149 | if (!mapping_opt) { 150 | pr_debug("Unable to find mapping containing target vaddr 0x%lx\n", vaddr); 151 | return status_code::BADACCESS; 152 | } 153 | auto mapping = *mapping_opt; 154 | 155 | size_t max_size = 0; 156 | // Determine the maximum length of the code buffer by walking the memory map 157 | // and adding the size of all contiguous memory regions. 158 | max_size = mapping.end - vaddr; 159 | for (size_t i = mapping_index + 1; i < econtext.map().size(); i++) { 160 | auto &cur = econtext.map()[i]; 161 | auto &prev = econtext.map()[i-1]; 162 | if (prev.end != cur.start) { 163 | // Discontinuity, stop increasing size 164 | break; 165 | } else { 166 | max_size += cur.end; 167 | } 168 | } 169 | 170 | pr_debug("Translating raw code region of max size: %zu\n", max_size); 171 | 172 | // Disassemble 173 | std::vector lifted_insns; 174 | status_code res = disasm.disassemble_region((void *)vaddr, max_size, vaddr, lifted_insns, 175 | disassembler::Mode::PARTIAL); 176 | if (res != status_code::SUCCESS) { 177 | pr_error("Failed to disassemble region!\n"); 178 | return res; 179 | } 180 | 181 | lifted_llir_block block(std::move(lifted_insns), lifted_llir_block::Flags::NONE); 182 | 183 | // Translate the partial routine 184 | std::optional translated_code; 185 | auto ret = gen->translate(block, translated_code); 186 | if (ret != status_code::SUCCESS) { 187 | pr_error("Failed to translate routine!\n"); 188 | return ret; 189 | } 190 | translated_regions.push_back(*translated_code); 191 | 192 | return status_code::SUCCESS; 193 | } 194 | 195 | /** 196 | * Translate the code block present at the specified target virtual address. 197 | * Automatically dispatches to translate_elf_function or translate_raw_code_block 198 | * as necessary. 199 | */ 200 | status_code dynamic_recompiler::translate_referenced_address(uint64_t address, uint64_t *resolved_out) { 201 | // See if address is contained within a function in the original ELF binary 202 | const auto *func_sym = loader.lookup(address, loader.text_section_index(), elf_loader::Symbol::Bind::_ANY, 203 | elf_loader::LookupPolicy::CONTAINS); 204 | if (func_sym) { 205 | // Translate the whole function containing the target vaddr 206 | status_code res = translate_elf_function(*func_sym); 207 | if (res != status_code::SUCCESS) 208 | return res; 209 | } else { 210 | // The branch target doesn't lie within a function in the original binary, or it 211 | // does but the function isn't marked with a .size attribute. Treat it as a raw region and 212 | // lift until the first branch. 213 | status_code res = translate_raw_code_block(address); 214 | if (res != status_code::SUCCESS) 215 | return res; 216 | } 217 | 218 | // Ensure that the virtual address mapper can now resolve the vaddr 219 | uint64_t resolved = vam.lookup(address); 220 | if (!resolved) { 221 | pr_debug("Couldn't resolve virtual address 0x%lx even after function translation! Bailing out.\n", address); 222 | return status_code::BADACCESS; 223 | } 224 | *resolved_out = resolved; 225 | 226 | return status_code::SUCCESS; 227 | } 228 | 229 | /** 230 | * Handle an access by the translated code to untranslated instructions by 231 | * first translating the address and then calling into codegen code to patch 232 | * the access. 233 | */ 234 | status_code dynamic_recompiler::runtime_handle_untranslated_access() { 235 | void *rctx = econtext.runtime_ctx(); 236 | uint64_t referenced_vaddr = gen->get_last_untranslated_access(rctx); 237 | 238 | pr_info("Translating access to virtual address 0x%lx\n", referenced_vaddr); 239 | 240 | // Translate code at referenced address if it isn't already translated 241 | uint64_t resolved = vam.lookup(referenced_vaddr); 242 | if (!resolved) { 243 | status_code res = translate_referenced_address(referenced_vaddr, &resolved); 244 | if (res != status_code::SUCCESS) { 245 | pr_error("Failed to resolve reference to virtual address: 0x%lx\n", referenced_vaddr); 246 | return status_code::BADACCESS; 247 | } 248 | } 249 | 250 | // Patch code buffer with reference to newly translated address 251 | return gen->patch_translated_access(rctx, resolved); 252 | } 253 | -------------------------------------------------------------------------------- /src/dynamic_recompiler.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | namespace retrec { 38 | 39 | class dynamic_recompiler { 40 | target_environment target_env; 41 | execution_context econtext; 42 | elf_loader loader; 43 | disassembler disasm; 44 | CodegenBackend backend = default_codegen_backend; 45 | 46 | std::unique_ptr gen; 47 | std::list translated_regions; 48 | virtual_address_mapper vam; 49 | std::unique_ptr syscall_emu; 50 | 51 | // 52 | // Translation helpers 53 | // 54 | status_code translate_elf_function(const elf_loader::Symbol &symbol); 55 | status_code translate_raw_code_block(uint64_t vaddr); 56 | status_code translate_referenced_address(uint64_t address, uint64_t *resolved_out); 57 | status_code runtime_handle_untranslated_access(); 58 | 59 | public: 60 | dynamic_recompiler(target_environment target_env_) : 61 | target_env(std::move(target_env_)), 62 | econtext(target_env, loader), 63 | loader(econtext, target_env_.binary), 64 | disasm(loader) 65 | { 66 | } 67 | 68 | // 69 | // Public functions 70 | // 71 | status_code init(); 72 | status_code execute(); 73 | }; 74 | 75 | } 76 | 77 | -------------------------------------------------------------------------------- /src/elf_loader.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #include 31 | #include 32 | 33 | namespace retrec { 34 | 35 | class elf_loader { 36 | execution_context &econtext; 37 | mapped_file &file; 38 | Elf *elf = nullptr; 39 | GElf_Ehdr ehdr; 40 | Architecture arch; 41 | 42 | uint64_t text_shndx { 0 }; 43 | uint64_t base_load_address { 0 }; 44 | GElf_Shdr text_shdr; 45 | public: 46 | DISABLE_COPY_AND_MOVE(elf_loader) 47 | elf_loader(execution_context &econtext_, mapped_file &file_) : 48 | econtext(econtext_), file(file_) {} 49 | ~elf_loader(); 50 | 51 | status_code init(); 52 | status_code load_all(); 53 | 54 | struct Symbol { 55 | std::string name; 56 | uint8_t info; 57 | uint8_t other; 58 | uint64_t shndx; 59 | uint64_t value; 60 | uint64_t size; 61 | enum class Bind { 62 | LOCAL = 0, 63 | GLOBAL = 1, 64 | WEAK = 2, 65 | NUM = 3, 66 | GNU_UNIQUE = 10, 67 | 68 | _ANY = 255 69 | } bind; 70 | }; 71 | 72 | enum class LookupPolicy { 73 | EXACT, // Exact matches only 74 | CONTAINS, // addr is within symbol start + size 75 | }; 76 | 77 | [[nodiscard]] const Symbol *lookup(uint64_t addr, uint64_t shndx, Symbol::Bind bind, LookupPolicy policy) const; 78 | [[nodiscard]] uint64_t get_symbol_size(const Symbol &sym) const; 79 | const void *get_symbol_data_ptr(const elf_loader::Symbol &sym); 80 | 81 | Architecture target_arch() const { return arch; } 82 | uint64_t entrypoint() const { return ehdr.e_entry; } 83 | const std::vector &symbol_table() const { return symbols; } 84 | uint64_t text_section_index() const { return text_shndx; } 85 | const auto &get_ehdr() const { return ehdr; } 86 | auto get_base_address() const { return base_load_address; } 87 | 88 | private: 89 | std::vector symbols; 90 | }; 91 | 92 | } 93 | -------------------------------------------------------------------------------- /src/execution_context.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | using namespace retrec; 29 | 30 | execution_context::execution_context(const target_environment &target_env_, elf_loader &loader_) 31 | : vaddr_map(getpid()), page_size(sysconf(_SC_PAGESIZE)), 32 | target_env(target_env_), loader(loader_) {} 33 | 34 | execution_context::~execution_context() {} 35 | 36 | status_code execution_context::init() { 37 | // Setup virtual address space allocator 38 | status_code res = vaddr_map.init(); 39 | if (res != status_code::SUCCESS) 40 | return res; 41 | 42 | // Allocate code buffer 43 | void *code_start; 44 | res = allocate_and_map_vaddr(HIGH_MEM_RANGE, CODE_REGION_MAX_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, &code_start); 45 | if (res != status_code::SUCCESS) 46 | return res; 47 | 48 | code_allocator.init(code_start, CODE_REGION_MAX_SIZE); 49 | 50 | return status_code::SUCCESS; 51 | } 52 | 53 | status_code execution_context::allocate_and_map_vaddr(process_memory_map::Range range, size_t size, int prot, void **region_out) { 54 | uint64_t vaddr = vaddr_map.allocate_vaddr_in_range(size, range); 55 | if (!vaddr) 56 | return status_code::NOMEM; 57 | 58 | // Map the allocated address space 59 | void *mem = mmap((void *)vaddr, size, prot, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); 60 | if (mem == (void *)-1) { 61 | pr_debug("mmap failed at %p: %m\n", (void *)vaddr); 62 | vaddr_map.free(vaddr, size); 63 | return status_code::NOMEM; 64 | } 65 | 66 | *region_out = mem; 67 | return status_code::SUCCESS; 68 | } 69 | 70 | status_code execution_context::allocate_new_stack(size_t size, void **stack_out) { 71 | // Determine the number of pages to allocate 72 | size_t allocation_size = align_to(size, page_size) + 1*page_size /* guard page */; 73 | assert(allocation_size >= 2); 74 | 75 | // Allocate at the end of the address space 76 | void *stack; 77 | auto res = allocate_and_map_vaddr(HIGH_MEM_RANGE, allocation_size, PROT_READ | PROT_WRITE, &stack); 78 | if (res != status_code::SUCCESS) 79 | return res; 80 | 81 | // Mark the guard page as !R, !W, !X 82 | mprotect((void *)stack, page_size, PROT_NONE); 83 | 84 | *stack_out = (void *)((char *)stack + allocation_size); 85 | return status_code::SUCCESS; 86 | } 87 | 88 | status_code execution_context::allocate_region(uint64_t start, size_t len, int prot, void **region_out, 89 | process_memory_map::Mapping::Type type) { 90 | if (start % page_size != 0) 91 | return status_code::BADALIGN; 92 | 93 | if (vaddr_map.find(start, len, nullptr, process_memory_map::FindPolicy::CONTAINS)) 94 | return status_code::OVERLAP; 95 | 96 | pr_info("allocated region at 0x%zx\n", start); 97 | // Just try to map the region with mmap 98 | void *region = mmap((void *)start, len, prot, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); 99 | if (region == (void *)-1) 100 | return status_code::NOMEM; 101 | 102 | if ((uint64_t)region != start) { 103 | pr_info("Kernel didn't map pages at requested address!\n"); 104 | munmap(region, len); 105 | return status_code::NOMEM; 106 | } 107 | 108 | // Mark region as allocated 109 | vaddr_map.mark_allocated({start, start+len, type, prot}); 110 | 111 | if (region_out) 112 | *region_out = region; 113 | return status_code::SUCCESS; 114 | } 115 | 116 | void *execution_context::get_region_ptr(uint64_t ptr) { 117 | if (!vaddr_map.find(ptr, sizeof(ptr), nullptr, process_memory_map::FindPolicy::CONTAINS)) 118 | return nullptr; 119 | 120 | return (void *)ptr; 121 | } 122 | 123 | status_code execution_context::initialize_runtime_context(Architecture target_arch, void *entry, virtual_address_mapper *vam, 124 | syscall_emulator *syscall_emu) { 125 | // Allocate an initial stack + guard page 126 | void *new_stack; 127 | auto res = allocate_new_stack(DEFAULT_STACK_SIZE, &new_stack); 128 | if (res != status_code::SUCCESS) { 129 | pr_error("Failed to allocate stack for translated code: %s\n", status_code_str(res)); 130 | return res; 131 | } 132 | 133 | // Initialize the stack with program arguments 134 | void *sp = initialize_target_stack(target_arch, new_stack, target_env.argv, target_env.envp, loader); 135 | 136 | // Call host-architecture-specific function to populate the runtime context 137 | runtime_context = std::make_unique(default_codegen_backend); 138 | res = runtime_context->init(target_arch, entry, sp, vam, syscall_emu); 139 | if (res != status_code::SUCCESS) 140 | return res; 141 | 142 | return status_code::SUCCESS; 143 | } 144 | 145 | status_code execution_context::enter_translated_code() { 146 | assert(runtime_context); 147 | return runtime_context->execute(); 148 | } 149 | 150 | status_code execution_context::protect_region(uint64_t start, uint64_t len, int prot) { 151 | auto mapping = vaddr_map.find(start, len, nullptr); 152 | if (!mapping) 153 | return status_code::NOMEM; 154 | 155 | mprotect((void *)start, len, prot); 156 | mapping->prot = prot; 157 | 158 | return status_code::SUCCESS; 159 | } 160 | 161 | void *execution_context::runtime_ctx() { 162 | assert(runtime_context); 163 | return runtime_context->get_data(); 164 | } 165 | -------------------------------------------------------------------------------- /src/execution_context.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | namespace retrec { 36 | 37 | // Forward-declare translated_code_region since #including results in cyclic includes. 38 | class translated_code_region; 39 | class runtime_context_dispatcher; 40 | 41 | // Forward-declare elf_loader 42 | class elf_loader; 43 | 44 | // 45 | // Configuration of target environment 46 | // 47 | struct target_environment { 48 | mapped_file binary; 49 | std::vector argv; 50 | std::vector envp; 51 | }; 52 | 53 | // 54 | // A simple execution context for running in the current process' address space. 55 | // 56 | class execution_context { 57 | process_memory_map vaddr_map; 58 | long page_size; 59 | const target_environment &target_env; 60 | elf_loader &loader; 61 | simple_placement_allocator code_allocator; 62 | 63 | std::unique_ptr runtime_context; 64 | 65 | static constexpr size_t CODE_REGION_MAX_SIZE = 0x10000 * 32; // 2M ought to be enough for anybody :) 66 | static constexpr size_t DEFAULT_STACK_SIZE = 0x10000; // 64K default stack 67 | 68 | public: 69 | DISABLE_COPY_AND_MOVE(execution_context) 70 | execution_context(const target_environment &target_env_, elf_loader &loader_); 71 | ~execution_context(); 72 | status_code init(); 73 | 74 | enum class VaddrLocation { 75 | LOW, // 0x1000+ 76 | HIGH, // 0x3fff+ 77 | }; 78 | 79 | static constexpr process_memory_map::Range HIGH_MEM_RANGE = {0x3fff00000000, 0x7fffffffffff}; 80 | static constexpr process_memory_map::Range LOW_MEM_RANGE = {0x10000, 0xfffeffff}; 81 | 82 | // 83 | // Accessors 84 | // 85 | process_memory_map &map() { return vaddr_map; } 86 | simple_placement_allocator &get_code_allocator() { return code_allocator; } 87 | void *get_region_ptr(uint64_t ptr); 88 | void *runtime_ctx(); 89 | 90 | // 91 | // Functions 92 | // 93 | status_code allocate_and_map_vaddr(process_memory_map::Range range, size_t size, int prot, void **region_out); 94 | status_code allocate_new_stack(size_t size, void **stack_out); 95 | status_code allocate_region(uint64_t start, size_t len, int prot, void **region_out, 96 | process_memory_map::Mapping::Type type = process_memory_map::Mapping::Type::USER); 97 | status_code protect_region(uint64_t start, size_t len, int prot); 98 | status_code initialize_runtime_context(Architecture target_arch, void *entry, virtual_address_mapper *vam, 99 | syscall_emulator *syscall_emu); 100 | status_code enter_translated_code(); 101 | }; 102 | 103 | } 104 | -------------------------------------------------------------------------------- /src/instruction_stream.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | namespace retrec { 35 | 36 | /** 37 | * A stream of instructions emitted by an assembler. Each instruction object can be called to 38 | * emit the instruction to a provided buffer. 39 | * 40 | * Traits expected members: 41 | * (Type) AssemblerT - Opaque type for assembler 42 | * (Type) InsnT - Type for instruction (see below) 43 | * constexpr size_t calculate_code_size(const InsnT *insn_buf, size_t count) 44 | * - Function to calculate the total code size that will be emitted for a given instruction buffer. 45 | * 46 | * InsnT is the type of each entry in this instruction stream. Its expected members are: 47 | * Public constructor that can be called through a perfect forwarding template 48 | * Move constructor 49 | * status_code operator()(assembler*) - Method to emit the instruction using the provided assembler 50 | * void add_aux(...) - Method to construct an auxiliary data structure in-place to store with the instruction 51 | */ 52 | template 53 | class instruction_stream { 54 | public: 55 | instruction_stream(typename Traits::AssemblerT &assembler_) 56 | : insns(), assembler(assembler_) {} 57 | DISABLE_COPY_AND_MOVE(instruction_stream) 58 | 59 | template 60 | auto &emplace_back(Ts&&... params) { return insns.emplace_back(std::forward(params)...); } 61 | 62 | /** 63 | * Append auxiliary data to the last instruction emitted. 64 | */ 65 | template 66 | void add_aux(Ts&&... args) { 67 | assert(insns.size()); 68 | (*(insns.end() - 1)).add_aux(std::forward(args)...); 69 | } 70 | 71 | /** 72 | * Emit all instructions in this stream to the provided code buffer. 73 | */ 74 | status_code emit_all_to_buf(uint8_t *buf, size_t size) { 75 | out_buf = buf; 76 | buf_size = size; 77 | offset = 0; 78 | 79 | for (auto &insn : insns) { 80 | status_code res = insn(&assembler); 81 | if (res != status_code::SUCCESS) 82 | return res; 83 | } 84 | 85 | return status_code::SUCCESS; 86 | } 87 | 88 | // The total size the contained code will take once emitted 89 | size_t code_size() const { return Traits::calculate_code_size(&insns[0], insns.size()); } 90 | 91 | // Accessors for internal insn_data vec 92 | size_t size() const { return insns.size(); } 93 | typename Traits::InsnT &operator[](size_t i) { return insns[i]; } 94 | 95 | // Accessors for output buffer 96 | uint8_t *buf() const { return out_buf; } 97 | 98 | friend typename Traits::AssemblerT; 99 | 100 | private: 101 | status_code write32(uint32_t x) { 102 | if (offset+4 > buf_size) 103 | return status_code::OVERFLOW; 104 | 105 | *(uint32_t *)(out_buf + offset) = x; 106 | offset += 4; 107 | 108 | return status_code::SUCCESS; 109 | }; 110 | 111 | std::vector insns {}; 112 | typename Traits::AssemblerT &assembler; 113 | 114 | uint8_t *out_buf { nullptr }; 115 | size_t buf_size { 0 }; 116 | size_t offset { 0 }; 117 | }; 118 | 119 | } // namespace retrec 120 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | using namespace retrec; 26 | 27 | std::vector build_argv_vec(int start, int argc, char **argv) { 28 | // Insert arguments starting at argv[start] 29 | return {&argv[start], &argv[argc]}; 30 | } 31 | 32 | std::vector build_envp_vec(char **envp) { 33 | // Pass through host envp 34 | size_t len; 35 | for (len = 0; envp[len]; len++) 36 | ; 37 | return {&envp[0], &envp[len]}; 38 | } 39 | 40 | int main(int argc, char **argv, char **envp) { 41 | if (argc < 2) { 42 | fprintf(stderr, "Usage: %s \n", argv[0]); 43 | return 1; 44 | } 45 | const char *binary_path = argv[1]; 46 | 47 | // Map the user provided binary 48 | mapped_file binary(binary_path, true); 49 | if (binary.map() != status_code::SUCCESS) { 50 | pr_error("Failed to open binary: %s\n", binary_path); 51 | return 1; 52 | } 53 | 54 | // Initialize the dynamic recompiler and target environment 55 | target_environment env = { 56 | .binary = std::move(binary), 57 | .argv = build_argv_vec(1, argc, argv), 58 | .envp = build_envp_vec(envp) 59 | }; 60 | dynamic_recompiler rec(std::move(env)); 61 | status_code res = rec.init(); 62 | if (res != status_code::SUCCESS) { 63 | pr_error("Failed to init dynamic recompiler: %s\n", status_code_str(res)); 64 | return 1; 65 | } 66 | 67 | rec.execute(); 68 | 69 | return 0; 70 | } 71 | -------------------------------------------------------------------------------- /src/mapped_file.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "mapped_file.h" 27 | 28 | using namespace retrec; 29 | 30 | mapped_file::~mapped_file() { 31 | if (valid) 32 | munmap(data_region, data_length); 33 | } 34 | 35 | status_code mapped_file::map() { 36 | int flags = O_CLOEXEC | (readonly ? O_RDONLY : O_RDWR); 37 | int fd = open(path.c_str(), flags); 38 | if (fd < 0) 39 | return status_code::BADFILE; 40 | 41 | struct stat statbuf; 42 | if (fstat(fd, &statbuf) < 0) { 43 | close(fd); 44 | return status_code::BADFILE; 45 | } 46 | data_length = statbuf.st_size; 47 | int prot = PROT_READ | (readonly ? 0 : PROT_WRITE); 48 | 49 | data_region = mmap(nullptr, data_length, prot, MAP_SHARED, fd, 0); 50 | if (data_region == (void *)-1) { 51 | close(fd); 52 | return status_code::BADFILE; 53 | } 54 | 55 | valid = true; 56 | return status_code::SUCCESS; 57 | } -------------------------------------------------------------------------------- /src/mapped_file.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | namespace retrec { 29 | 30 | class mapped_file { 31 | std::string path; 32 | bool readonly; 33 | 34 | bool valid = false; 35 | void *data_region = nullptr; 36 | size_t data_length = 0; 37 | public: 38 | mapped_file(std::string path_, bool readonly_): path(path_), readonly(readonly_) {} 39 | 40 | ~mapped_file(); 41 | 42 | // Disable copy construction, allow move construction 43 | mapped_file(const mapped_file &) = delete; 44 | 45 | mapped_file &operator=(const mapped_file &) = delete; 46 | 47 | mapped_file(mapped_file &&other): 48 | path(std::move(other.path)), 49 | valid(std::exchange(other.valid, false)), 50 | data_region(other.data_region), data_length(other.data_length) {} 51 | 52 | mapped_file &operator=(mapped_file &&other) { 53 | std::swap(path, other.path); 54 | std::swap(valid, other.valid); 55 | std::swap(data_region, other.data_region); 56 | std::swap(data_length, other.data_length); 57 | return *this; 58 | } 59 | 60 | status_code map(); 61 | 62 | template 63 | T data() { static_assert(std::is_pointer_v); return static_cast(data_region); } 64 | 65 | size_t length() const { return data_length; }; 66 | }; 67 | 68 | } 69 | -------------------------------------------------------------------------------- /src/platform/generic_syscalls.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | 22 | using namespace retrec; 23 | 24 | const char *retrec::generic_linux_syscall_name(SyscallLinuxGeneric number) { 25 | switch (number) { 26 | #define declare_case(name, _) \ 27 | case SyscallLinuxGeneric::name: return #name; 28 | 29 | ENUMERATE_GENERIC_LINUX_SYSCALLS(declare_case) 30 | #undef declare_case 31 | default: UNREACHABLE(); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/platform/syscall_emulator.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more rewriter. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | 29 | using namespace retrec; 30 | 31 | std::unique_ptr make_syscall_rewriter(Architecture target_arch) { 32 | if constexpr (HOST_ARCH_PPC64LE) { 33 | switch (target_arch) { 34 | #define declare_case(arch, details) \ 35 | case arch: return std::make_unique>(); 36 | 37 | ENUMERATE_ALL_LINUX_SYSCALL_DETAILS(declare_case) 38 | #undef declare_case 39 | default: UNREACHABLE(); 40 | } 41 | } else { 42 | TODO(); 43 | } 44 | } 45 | 46 | syscall_emulator::syscall_emulator(Architecture target_arch_) 47 | : target_arch(target_arch_), 48 | rewriter(make_syscall_rewriter(target_arch)) {} 49 | 50 | 51 | std::variant syscall_emulator::emulate_syscall(int64_t target_number, 52 | const SyscallParameters ¶meters) { 53 | return rewriter->invoke_syscall(target_number, parameters); 54 | } 55 | 56 | -------------------------------------------------------------------------------- /src/platform/syscall_emulator.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | #include 28 | 29 | namespace retrec { 30 | 31 | class syscall_emulator { 32 | public: 33 | syscall_emulator(Architecture target_arch_); 34 | 35 | std::variant emulate_syscall(int64_t target_number, 36 | const SyscallParameters ¶meters); 37 | private: 38 | Architecture target_arch; 39 | std::unique_ptr rewriter; 40 | }; 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/platform/syscall_types.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | /** 23 | * Definitions of and helpers for architecture-agnostic types, useful for representing 24 | * syscall parameters in a cross-platform way. 25 | */ 26 | 27 | namespace retrec::sc_types { 28 | 29 | #define ENUMERATE_SYSCALL_ARG_TYPES(x) \ 30 | x(sc_types::u8_le, u8_le) \ 31 | x(sc_types::u16_le, u16_le) \ 32 | x(sc_types::u32_le, u32_le) \ 33 | x(sc_types::u64_le, u64_le) \ 34 | x(sc_types::s8_le, s8_le) \ 35 | x(sc_types::s16_le, s16_le) \ 36 | x(sc_types::s32_le, s32_le) \ 37 | x(sc_types::s64_le, s64_le) \ 38 | x(sc_types::u8_be, u8_be) \ 39 | x(sc_types::u16_be, u16_be) \ 40 | x(sc_types::u32_be, u32_be) \ 41 | x(sc_types::u64_be, u64_be) \ 42 | x(sc_types::s8_be, s8_be) \ 43 | x(sc_types::s16_be, s16_be) \ 44 | x(sc_types::s32_be, s32_be) \ 45 | x(sc_types::s64_be, s64_be) \ 46 | x(sc_types::ptr32, ptr32) \ 47 | x(sc_types::ptr64, ptr64) 48 | 49 | #define declare_type(_, x) struct x {}; 50 | ENUMERATE_SYSCALL_ARG_TYPES(declare_type) 51 | #undef declare_type 52 | 53 | }; 54 | 55 | -------------------------------------------------------------------------------- /src/process_memory_map.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | 22 | #include 23 | 24 | using namespace retrec; 25 | 26 | 27 | process_memory_map::process_memory_map(pid_t pid_) : pid(pid_), page_size(sysconf(_SC_PAGESIZE)) {} 28 | 29 | status_code process_memory_map::init() { 30 | std::string path = std::string{"/proc/"} + std::to_string(pid) + "/maps"; 31 | std::fstream maps(path, std::ios::in); 32 | if (!maps.is_open()) { 33 | pr_error("Failed to open %s!\n", path.c_str()); 34 | return status_code::BADFILE; 35 | } 36 | 37 | std::string cur_line; 38 | while (std::getline(maps, cur_line)) { 39 | // Extract address range 40 | std::string range = cur_line.substr(0, cur_line.find(' ')); 41 | std::string start_str = range.substr(0, range.find('-')); 42 | std::string end_str = range.substr(range.find('-') + 1, range.size()); 43 | 44 | // Convert range to u64 45 | uint64_t start = std::stoull(start_str, 0, 16); 46 | uint64_t end = std::stoull(end_str, 0, 16); 47 | 48 | map.emplace_back(start, end, Mapping::Type::SYSTEM); 49 | pr_debug("added mapping: 0x%016lx-0x%016lx\n", start, end); 50 | } 51 | 52 | return status_code::SUCCESS; 53 | } 54 | 55 | uint64_t process_memory_map::allocate_vaddr_in_range(size_t size, Range range) { 56 | if (size % page_size != 0) 57 | return 0; 58 | if (range.low % page_size != 0) 59 | return 0; 60 | if (range.high - range.low < size) 61 | return 0; 62 | 63 | // Scan through address space at start of new account 64 | uint64_t cur_start = range.low; 65 | while (cur_start + size <= range.high) { 66 | // See if mapping already exists at this address 67 | auto mapping_opt = find(cur_start, size, nullptr, FindPolicy::CONTAINS); 68 | if (mapping_opt) { 69 | // It does - skip to the match's end 70 | cur_start = mapping_opt->end; 71 | } else { 72 | // It doesn't - we can use this region 73 | uint64_t cur_end = cur_start + size; 74 | map.emplace_back(cur_start, cur_end, Mapping::Type::USER); 75 | assert(cur_end % page_size == 0); 76 | sort(); 77 | return cur_start; 78 | } 79 | } 80 | 81 | return 0; 82 | } 83 | 84 | void process_memory_map::sort() { 85 | std::sort(map.begin(), map.end(), [](auto &a, auto &b) { 86 | return a.start < b.start; 87 | }); 88 | } 89 | 90 | void process_memory_map::mark_allocated(Mapping entry) { 91 | map.push_back(entry); 92 | sort(); 93 | } 94 | 95 | std::optional process_memory_map::find(uint64_t addr, uint64_t len, size_t *index_out, 96 | FindPolicy policy) { 97 | size_t i = 0; 98 | for (auto &cur : map) { 99 | switch (policy) { 100 | case FindPolicy::EXACT: 101 | if (cur.start == addr && cur.end == len + addr) { 102 | if (index_out) 103 | *index_out = i; 104 | return cur; 105 | } 106 | break; 107 | 108 | case FindPolicy::CONTAINS: 109 | if (cur.start <= addr && (addr + len) <= cur.end) { 110 | if (index_out) 111 | *index_out = i; 112 | return cur; 113 | } 114 | break; 115 | } 116 | 117 | ++i; 118 | } 119 | 120 | return std::nullopt; 121 | } 122 | 123 | void process_memory_map::free(uint64_t addr, uint64_t len) { 124 | size_t mapping_index; 125 | auto mapping = find(addr, len, &mapping_index); 126 | assert(mapping); 127 | 128 | map.erase(map.begin() + mapping_index); 129 | } 130 | 131 | -------------------------------------------------------------------------------- /src/process_memory_map.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include 31 | 32 | namespace retrec { 33 | 34 | class process_memory_map { 35 | public: 36 | struct Mapping { 37 | uint64_t start; // inclusive 38 | uint64_t end; // exclusive 39 | 40 | enum class Type { 41 | SYSTEM, // Allocated by the system/runtime 42 | USER, // Allocated by us 43 | ELF, // Part of the mapped target ELF 44 | } type; 45 | 46 | int prot; 47 | 48 | Mapping(uint64_t start_, uint64_t end_, Type type_) 49 | : start(start_), end(end_), type(type_) {} 50 | Mapping(uint64_t start_, uint64_t end_, Type type_, int prot_) 51 | : start(start_), end(end_), type(type_), prot(prot_) {} 52 | }; 53 | 54 | struct Range { 55 | uint64_t low; // inclusive 56 | uint64_t high; // exclusive 57 | }; 58 | 59 | enum class FindPolicy { 60 | EXACT, // Exact matches only 61 | CONTAINS, // addr is within [start, end) 62 | }; 63 | 64 | explicit process_memory_map(pid_t pid_); 65 | status_code init(); 66 | 67 | // Accessors for internal map 68 | const auto &operator[](size_t i) const { return map[i]; } 69 | auto size() const { return map.size(); } 70 | 71 | uint64_t allocate_vaddr_in_range(size_t size, Range range); 72 | void mark_allocated(Mapping entry); 73 | std::optional find(uint64_t addr, uint64_t len, size_t *index_out, FindPolicy = FindPolicy::EXACT); 74 | void free(uint64_t addr, uint64_t len); 75 | 76 | private: 77 | pid_t pid; 78 | long page_size; 79 | std::vector map; 80 | 81 | void sort(); 82 | }; 83 | 84 | } 85 | -------------------------------------------------------------------------------- /src/util/staticvector.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | namespace retrec { 30 | 31 | /** 32 | * A simple vector class with fixed static storage 33 | */ 34 | template 35 | class StaticVector { 36 | std::array arr; 37 | size_t count { 0 }; 38 | using UnderlyingArr = std::array; 39 | 40 | template 41 | constexpr StaticVector(const ArrT &in_arr, size_t n, std::index_sequence) 42 | : arr({in_arr[I]...}), count(n) {} 43 | public: 44 | // Construct from an array of Ts 45 | template 46 | constexpr StaticVector(const ArrT (&in_arr)[N]) 47 | : StaticVector(in_arr, N, std::make_index_sequence{}) { 48 | static_assert(N <= MAX_SIZE); 49 | } 50 | 51 | // Construct from Ts 52 | template 53 | constexpr StaticVector(ElemTs... elements) : arr({elements...}), count(N) { 54 | static_assert(N <= MAX_SIZE); 55 | } 56 | 57 | constexpr StaticVector() : arr() {} 58 | constexpr StaticVector(const StaticVector &other) : 59 | arr(other.arr), count(other.count) {} 60 | constexpr StaticVector(StaticVector &&other) : 61 | arr(std::move(other.arr)), count(other.count) {} 62 | constexpr StaticVector &operator=(const StaticVector &other) { 63 | arr = other.arr; 64 | count = other.count; 65 | return *this; 66 | } 67 | constexpr StaticVector &operator=(StaticVector &&other) { 68 | arr = std::move(other.arr); 69 | count = other.count; 70 | return *this; 71 | } 72 | 73 | const T &operator[](size_t i) const { 74 | assert(i < count); 75 | return arr[i]; 76 | } 77 | 78 | bool operator==(const StaticVector &other) const { 79 | if (count != other.count) 80 | return false; 81 | return arr == other.arr; 82 | } 83 | 84 | void push_back(const T &val) { 85 | assert(count < MAX_SIZE); 86 | arr[count++] = val; 87 | } 88 | 89 | void push_back(T &&val) { 90 | assert(count < MAX_SIZE); 91 | arr[count++] = std::forward(val); 92 | } 93 | 94 | void remove(size_t i) { 95 | assert(i < count); 96 | // Shift all elements after i back one 97 | for (size_t j = i; i < count; i++) { 98 | arr[j] = std::move(arr[j + 1]); 99 | } 100 | count -= 1; 101 | } 102 | 103 | // Return new array of elements present in this and not other 104 | StaticVector difference(const StaticVector &other) const { 105 | std::array ret; 106 | size_t ret_size = 0; 107 | for (auto &elem : *this) { 108 | if (!contains(other, elem)) 109 | ret[ret_size++] = elem; 110 | } 111 | return ret; 112 | } 113 | 114 | size_t size() const { return count; } 115 | const T *cbegin() const { return &arr[0]; } 116 | const T *cend() const { return &arr[count]; } 117 | const T *begin() const { return &arr[0]; } 118 | const T *end() const { return &arr[count]; } 119 | }; 120 | 121 | } // namespace retrec 122 | -------------------------------------------------------------------------------- /src/util/util.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | using namespace retrec; 26 | 27 | const char *retrec::status_code_str(status_code code) { 28 | switch (code) { 29 | case status_code::SUCCESS: 30 | return "Success"; 31 | case status_code::BADACCESS: 32 | return "Bad memory access"; 33 | case status_code::BADALIGN: 34 | return "Bad alignment"; 35 | case status_code::BADARCH: 36 | return "Bad architecture"; 37 | case status_code::BADBRANCH: 38 | return "Unable to resolve branch target"; 39 | case status_code::BADELF: 40 | return "Bad ELF file"; 41 | case status_code::BADFILE: 42 | return "Bad file"; 43 | case status_code::DEFER: 44 | return "Operation should be tried again later"; 45 | case status_code::HALT: 46 | return "Translated code execution requested to halt"; 47 | case status_code::NOMEM: 48 | return "No memory available"; 49 | case status_code::OVERFLOW: 50 | return "Overflow"; 51 | case status_code::OVERLAP: 52 | return "Operation would result in memory region overlap"; 53 | case status_code::UNIMPL_INSN: 54 | return "Unimplemented instruction"; 55 | case status_code::UNIMPL_SYSCALL: 56 | return "Unimplemented syscall"; 57 | case status_code::UNTRANSLATED: 58 | return "Attempt to reference untranslated code"; 59 | } 60 | UNREACHABLE(); 61 | } 62 | 63 | const char *log_level_names[] = { 64 | "[DEBUG]", 65 | "[INFO]", 66 | "[WARN]", 67 | "[ERROR]" 68 | }; 69 | 70 | void retrec::log_impl(log_level level, const char *file, int line, const char *fmt, ...) { 71 | va_list args; 72 | va_start(args, fmt); 73 | 74 | fprintf(stderr, "%s %s:%d: ", log_level_names[level], file, line); 75 | vfprintf(stderr, fmt, args); 76 | 77 | va_end(args); 78 | } 79 | -------------------------------------------------------------------------------- /src/util/util.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include 31 | 32 | #define __weak __attribute__((weak)) 33 | 34 | namespace retrec { 35 | 36 | // 37 | // General definitions 38 | // 39 | 40 | enum class status_code { 41 | SUCCESS, 42 | BADACCESS, 43 | BADALIGN, 44 | BADARCH, 45 | BADBRANCH, 46 | BADELF, 47 | BADFILE, 48 | DEFER, 49 | HALT, 50 | NOMEM, 51 | OVERFLOW, 52 | OVERLAP, 53 | UNIMPL_INSN, 54 | UNIMPL_SYSCALL, 55 | UNTRANSLATED, 56 | }; 57 | const char *status_code_str(status_code code); 58 | 59 | enum class Architecture { 60 | X86_64, 61 | ppc64le 62 | }; 63 | 64 | // 65 | // Misc. Helpers 66 | // 67 | 68 | #define ARRAY_SIZE(x) (sizeof((x)) / sizeof(*(x))) 69 | 70 | // Useful for declaring comma-separated lists with x-macros 71 | #define X_LIST(x, ...) x, 72 | 73 | // Template for creating dummy/sentinel types 74 | template 75 | class Sentinel {}; 76 | 77 | template 78 | std::enable_if_t<(std::is_pointer_v || std::is_integral_v), ValT> 79 | align_to(ValT val, AlignT alignment) { 80 | return (ValT)((uintptr_t)val & ~(alignment - 1)); 81 | } 82 | 83 | template 84 | bool contains(const ContainerT &container, ValT val) { 85 | return std::find(container.cbegin(), container.cend(), val) != container.cend(); 86 | } 87 | 88 | template 89 | bool contains_any(const ContainerT &container, const ValListT &val_list) { 90 | for (auto &val : val_list) { 91 | if (contains(container, val)) 92 | return true; 93 | } 94 | return false; 95 | } 96 | 97 | template 98 | bool contains_all(const ContainerT &container, const ValListT &val_list) { 99 | for (auto &val : val_list) { 100 | if (!contains(container, val)) 101 | return false; 102 | } 103 | return true; 104 | } 105 | 106 | template struct Overloaded : Ts... { using Ts::operator()...; }; 107 | template Overloaded(Ts...) -> Overloaded; 108 | 109 | template 110 | constexpr std::remove_reference_t ref_cast(T val) { 111 | return static_cast>(val); 112 | } 113 | 114 | template 115 | constexpr std::underlying_type_t enum_cast(EnumT val) { 116 | return static_cast>(val); 117 | } 118 | 119 | #define DISABLE_COPY_AND_MOVE(classname) \ 120 | classname(const classname &other) = delete; \ 121 | classname& operator=(const classname &other) = delete; \ 122 | classname(classname &&other) = delete; \ 123 | classname& operator=(classname &&other) = delete; 124 | 125 | template 126 | constexpr bool types_are_same() { 127 | return std::is_same_v< 128 | std::remove_reference_t< 129 | std::remove_cv_t< 130 | A 131 | > 132 | >, 133 | std::remove_reference_t< 134 | std::remove_cv_t< 135 | B 136 | > 137 | > 138 | >; 139 | } 140 | 141 | template 142 | constexpr bool types_are_same_v = types_are_same(); 143 | 144 | template 145 | uint32_t clz(T) { 146 | static_assert(!std::is_same_v, "Unimplemented clz for this type"); 147 | return 0; 148 | } 149 | template <> 150 | inline uint32_t clz(unsigned short val) { return __builtin_clz(val) - 16; } 151 | template <> 152 | inline uint32_t clz(unsigned int val) { return __builtin_clz(val); } 153 | template <> 154 | inline uint32_t clz(unsigned long val) { return __builtin_clzl(val); } 155 | 156 | template 157 | std::enable_if_t, std::string> to_hex_string(T number) { 158 | std::stringstream ss; 159 | ss << std::hex << number; 160 | return ss.str(); 161 | } 162 | 163 | // 164 | // Logging 165 | // 166 | 167 | enum log_level { 168 | #define _LOGL_DEBUG 0 169 | LOGL_DEBUG = _LOGL_DEBUG, 170 | #define _LOGL_INFO 1 171 | LOGL_INFO = _LOGL_INFO, 172 | #define _LOGL_WARN 2 173 | LOGL_WARN = _LOGL_WARN, 174 | #define _LOGL_ERROR 3 175 | LOGL_ERROR = _LOGL_ERROR, 176 | }; 177 | 178 | __attribute__((format (printf, 4, 5))) 179 | void log_impl(log_level level, const char *file, int line, const char *fmt, ...); 180 | 181 | } // namespace retrec 182 | 183 | #define TODO() do { \ 184 | pr_error("Unimplemented code path hit!\n"); \ 185 | abort(); \ 186 | } while(0) 187 | 188 | #define ASSERT_NOT_REACHED() do { \ 189 | pr_error("Assert not reached!\n"); \ 190 | abort(); \ 191 | } while (0) 192 | 193 | #define ALLOW_IMPLICIT_INT_CONVERSION() do { \ 194 | _Pragma("GCC diagnostic push"); \ 195 | _Pragma("GCC diagnostic ignored \"-Wconversion\""); \ 196 | } while (0) 197 | 198 | #define DISALLOW_IMPLICIT_INT_CONVERSION() do { \ 199 | _Pragma("GCC diagnostic pop"); \ 200 | } while (0) 201 | 202 | #define UNREACHABLE() __builtin_unreachable() 203 | 204 | #ifndef RETREC_MINIMUM_LOG_LEVEL 205 | #error "RETREC_MINIMUM_LOG_LEVEL not defined! Broken build system?" 206 | #elif (RETREC_MINIMUM_LOG_LEVEL < _LOGL_DEBUG) || (RETREC_MINIMUM_LOG_LEVEL > _LOGL_ERROR) 207 | #error "Invalid MINIMUM_LOG_LEVEL specified!" 208 | #endif 209 | 210 | /** 211 | * Only define logging macros if the minimum log level is <= to it. 212 | */ 213 | #if RETREC_MINIMUM_LOG_LEVEL <= _LOGL_DEBUG 214 | #define pr_debug(fmt, ...) retrec::log_impl(LOGL_DEBUG, &__FILE__[SOURCE_PATH_SIZE], __LINE__, fmt, ##__VA_ARGS__) 215 | #else 216 | #define pr_debug(...) 217 | #endif 218 | 219 | #if RETREC_MINIMUM_LOG_LEVEL <= _LOGL_INFO 220 | #define pr_info(fmt, ...) retrec::log_impl(LOGL_INFO, &__FILE__[SOURCE_PATH_SIZE], __LINE__, fmt, ##__VA_ARGS__) 221 | #else 222 | #define pr_info(...) 223 | #endif 224 | 225 | #if RETREC_MINIMUM_LOG_LEVEL <= _LOGL_WARN 226 | #define pr_warn(fmt, ...) retrec::log_impl(LOGL_WARN, &__FILE__[SOURCE_PATH_SIZE], __LINE__, fmt, ##__VA_ARGS__) 227 | #else 228 | #define pr_warn(...) 229 | #endif 230 | 231 | // Always define PR_ERROR 232 | #define pr_error(fmt, ...) retrec::log_impl(LOGL_ERROR, &__FILE__[SOURCE_PATH_SIZE], __LINE__, fmt, ##__VA_ARGS__) 233 | -------------------------------------------------------------------------------- /src/virtual_address_mapper.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #include 21 | #include 22 | 23 | using namespace retrec; 24 | 25 | virtual_address_mapper::virtual_address_mapper() {} 26 | 27 | /** 28 | * insert - Register a {vaddr : haddr} mapping 29 | */ 30 | void virtual_address_mapper::insert(VAddrT vaddr, HAddrT haddr) { 31 | map.insert({vaddr, haddr}); 32 | } 33 | 34 | /** 35 | * Lookup - Find the corresponding haddr for a given vaddr 36 | */ 37 | auto virtual_address_mapper::lookup(VAddrT vaddr) -> HAddrT { 38 | auto pair_it = map.find(vaddr); 39 | if (pair_it == map.end()) 40 | return 0; 41 | return pair_it->second; 42 | } 43 | 44 | /** 45 | * lookup_and_update_call_cache - Find the corresponding haddr for a given vaddr, 46 | * and update the call cache with the given (vaddr, haddr) pair so it can be quickly 47 | * looked up by future RETs. 48 | * 49 | * Useful for implementing "CALL". 50 | */ 51 | auto virtual_address_mapper::lookup_and_update_call_cache(VAddrT target, VAddrT ret_vaddr, 52 | HAddrT ret_haddr) -> HAddrT { 53 | // Try to insert return's address into the call cache 54 | if (free_cache_entries > 0) { 55 | for (auto &entry : call_cache) { 56 | if (!entry.valid) { 57 | free_cache_entries--; 58 | entry.valid = true; 59 | entry.vaddr = ret_vaddr; 60 | entry.haddr = ret_haddr; 61 | break; 62 | } 63 | } 64 | } 65 | 66 | // Find the target host address and return it 67 | auto pair_it = map.find(target); 68 | if (pair_it == map.end()) 69 | return 0; 70 | return pair_it->second; 71 | } 72 | 73 | /** 74 | * lookup_check_call_cache - Find the corresponding haddr for a given vaddr, 75 | * checking the call cache first then falling back to the map. 76 | * 77 | * Useful for implementing "RET". 78 | */ 79 | auto virtual_address_mapper::lookup_check_call_cache(VAddrT target) -> HAddrT { 80 | // Check the call cache 81 | if (free_cache_entries != CALL_CACHE_SIZE) { 82 | for (auto &entry : call_cache) { 83 | if (entry.valid && entry.vaddr == target) { 84 | // Invalidate the entry and return the haddr. In the future the 85 | // cache should probably be more clever. 86 | entry.valid = false; 87 | free_cache_entries++; 88 | return entry.haddr; 89 | } 90 | } 91 | } 92 | 93 | // Nothing in the call cache, check the map 94 | auto pair_it = map.find(target); 95 | if (pair_it == map.end()) 96 | return 0; 97 | return pair_it->second; 98 | } 99 | -------------------------------------------------------------------------------- /src/virtual_address_mapper.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2021 Shawn Anastasio. 3 | * 4 | * This file is part of retrec. 5 | * 6 | * retrec is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Lesser General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * retrec is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with retrec. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | namespace retrec { 27 | 28 | /** 29 | * virtual_address_mapper - Collection of known target virtual address to host address mappings. 30 | * 31 | * Entries are inserted by host codegen routines and looked up by translated code, either through 32 | * native function calls to member functions or via direct member access in emitted code. 33 | */ 34 | class virtual_address_mapper { 35 | public: 36 | using VAddrT = uint64_t; // Target virtual address 37 | using HAddrT = uint64_t; // Host addres 38 | 39 | virtual_address_mapper(); 40 | 41 | void insert(VAddrT vaddr, HAddrT haddr); 42 | HAddrT lookup(VAddrT vaddr); 43 | HAddrT lookup_and_update_call_cache(VAddrT target, VAddrT ret_vaddr, HAddrT ret_haddr); 44 | HAddrT lookup_check_call_cache(VAddrT target); 45 | 46 | struct call_cache_entry { 47 | uint64_t valid; // Boolean but 64 bits for easy access from assembly 48 | VAddrT vaddr; 49 | HAddrT haddr; 50 | }; 51 | static constexpr size_t CALL_CACHE_SIZE = 32; 52 | 53 | // 54 | // Member variables 55 | // 56 | 57 | // Map of all known vaddr:haddr pairs 58 | std::unordered_map map; 59 | 60 | // Cache of vaddr:haddr pairs used for quick call cache resolution 61 | size_t free_cache_entries { CALL_CACHE_SIZE }; 62 | call_cache_entry call_cache[CALL_CACHE_SIZE] = {{0, 0, 0}}; 63 | }; 64 | 65 | } // namespace retrec 66 | -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | ASM_SRCS:=$(shell find . -name "*.S") 2 | ASM_OBJS:=$(ASM_SRCS:.S=.o) 3 | ASM_TARGETS:=$(ASM_SRCS:.S=.bin) 4 | GENERATED_ASM_TARGETS= \ 5 | asm/zf.gen.bin \ 6 | asm/sf.gen.bin \ 7 | asm/cf.gen.bin \ 8 | asm/of.gen.bin \ 9 | asm/above.gen.bin \ 10 | asm/greater_eq.gen.bin \ 11 | asm/greater.gen.bin \ 12 | asm/alu.gen.bin \ 13 | asm/load.gen.bin \ 14 | asm/setcc.gen.bin \ 15 | asm/x87loadstore.gen.bin 16 | GENERATED_ASM_SRCS=$(GENERATED_ASM_TARGETS:.bin=.S) 17 | 18 | # Freestanding (no libc) C targets 19 | FS_C_SRCS:=$(shell find . -name "*.fs.c") 20 | FS_C_TARGETS:=$(FS_C_SRCS:.c=.bin) 21 | FS_CFLAGS:=-static -nostdlib -nostdinc -ffreestanding -Wall -O2 -std=gnu99 22 | 23 | # Keep intermediate objects 24 | .SECONDARY: 25 | 26 | # Tell LD to account for a max page size of 64k. This lets us easily test 27 | # our binaries on ppc64le 64k hosts. Real binaries will need to be run on a 28 | # 4k host. 29 | # 30 | # --no-relax prevents the linker from optimizing away things like RIP-relative 31 | # addressing which we want to explicitly keep for testing 32 | LDFLAGS:=-z max-page-size=65536 --no-relax 33 | C_LDFLAGS:=-Wl,-z,max-page-size=65536 34 | 35 | all: $(ASM_TARGETS) $(FS_C_TARGETS) $(GENERATED_ASM_TARGETS) 36 | 37 | %.gen.S: asm/gentest.py 38 | asm/gentest.py $(notdir $(@:.gen.S=)) $@ 39 | 40 | %.bin: %.o 41 | x86_64-unknown-linux-gnu-ld $(LDFLAGS) $< -o $@ 42 | 43 | %.o: %.S 44 | x86_64-unknown-linux-gnu-as $< -o $@ 45 | 46 | %.fs.bin: %.fs.c 47 | x86_64-unknown-linux-gnu-gcc $(C_LDFLAGS) $(FS_CFLAGS) $< -o $@ 48 | 49 | clean: 50 | rm -rf $(ASM_TARGETS) 51 | rm -rf *.o 52 | rm -rf $(GENERATED_ASM_TARGETS) 53 | rm -rf $(GENERATED_ASM_SRCS) 54 | rm -rf $(FS_C_TARGETS) 55 | -------------------------------------------------------------------------------- /test/asm/.addressing_modes.bin.expected: -------------------------------------------------------------------------------- 1 | PASS: DISP Load/Store 2 | PASS: BASE Load/Store 3 | PASS: BASE_IDX Load/Store 4 | PASS: BASE_IDX_DISP Load/Store 5 | PASS: BASE_IDX_SCALE Load/Store 6 | PASS: IDX_SCALE_DISP Load/Store 7 | PASS: BASE_IDX_SCALE_DISP Load/Store 8 | PASS: RIPREL Load/Store 9 | PASS: STORE_IMM Load/Store 10 | -------------------------------------------------------------------------------- /test/asm/.hello_sse.bin.expected: -------------------------------------------------------------------------------- 1 | Hello, world!!! 2 | Hello, world!!! 3 | Hello, world!!! 4 | Hello, world!!! 5 | -------------------------------------------------------------------------------- /test/asm/.loadstore.bin.expected: -------------------------------------------------------------------------------- 1 | PASS: BYTE memcpy worked! 2 | PASS: WORD memcpy worked! 3 | PASS: DWORD memcpy worked!! 4 | PASS: QWORD memcpy worked....!! 5 | PASS: Load to aliased register (ah) worked! 6 | PASS: Load to aliased register (al) worked! 7 | PASS: Load to aliased register (ax) worked! 8 | PASS: NEGATIVE_INDEX loadstore worked! 9 | PASS: NEGATIVE_INDEX32 loadstore worked! 10 | PASS: ALIASED_BASE loadstore worked! 11 | -------------------------------------------------------------------------------- /test/asm/.pushpop.bin.expected: -------------------------------------------------------------------------------- 1 | PASS: PUSH_IMM! 2 | PASS: PUSH_REG! 3 | PASS: PUSH_MEM! 4 | -------------------------------------------------------------------------------- /test/asm/.stos.bin.expected: -------------------------------------------------------------------------------- 1 | STOSB_EDI: should be '11': 11 2 | STOSB_FORWARD: should be '11': 11 3 | STOSB_BACKWARD: should be '11': 11 4 | REP_STOSB: should be '1110': 1110 5 | STOSW_FORWARD: should be '1111': 1111 6 | STOSW_BACKWARD: should be '1111': 1111 7 | STOSD_FORWARD: should be '11111111': 11111111 8 | STOSD_BACKWARD: should be '11111111': 11111111 9 | STOSQ_FORWARD: should be '1111111111111111': 1111111111111111 10 | STOSQ_BACKWARD: should be '1111111111111111': 1111111111111111 11 | -------------------------------------------------------------------------------- /test/asm/addressing_modes.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | 3 | .data 4 | PASS_STR: .ascii "PASS" 5 | 6 | DISP_STR: .ascii "FAIL: DISP Load/Store\n" 7 | DISP_STR_LEN = . - DISP_STR 8 | 9 | BASE_STR: .ascii "FAIL: BASE Load/Store\n" 10 | BASE_STR_LEN = . - BASE_STR 11 | 12 | BASE_IDX_STR: .ascii "FAIL: BASE_IDX Load/Store\n" 13 | BASE_IDX_STR_LEN = . - BASE_IDX_STR 14 | 15 | BASE_IDX_DISP_STR: .ascii "FAIL: BASE_IDX_DISP Load/Store\n" 16 | BASE_IDX_DISP_STR_LEN = . - BASE_IDX_DISP_STR 17 | 18 | BASE_IDX_SCALE_STR: .ascii "FAIL: BASE_IDX_SCALE Load/Store\n" 19 | BASE_IDX_SCALE_STR_LEN = . - BASE_IDX_SCALE_STR 20 | 21 | IDX_SCALE_DISP_STR: .ascii "FAIL: IDX_SCALE_DISP Load/Store\n" 22 | IDX_SCALE_DISP_STR_LEN = . - IDX_SCALE_DISP_STR 23 | 24 | BASE_IDX_SCALE_DISP_STR: .ascii "FAIL: BASE_IDX_SCALE_DISP Load/Store\n" 25 | BASE_IDX_SCALE_DISP_STR_LEN = . - BASE_IDX_SCALE_DISP_STR 26 | 27 | RIPREL_STR: .ascii "FAIL: RIPREL Load/Store\n" 28 | RIPREL_STR_LEN = . - RIPREL_STR 29 | 30 | STORE_IMM_STR: .ascii "FAIL: STORE_IMM Load/Store\n" 31 | STORE_IMM_STR_LEN = . - STORE_IMM_STR 32 | .lcomm PASS_BUF,4 33 | 34 | .text 35 | 36 | .global _start 37 | _start: 38 | # Displacement 39 | mov eax, dword ptr [PASS_STR] 40 | mov dword ptr [DISP_STR], eax 41 | 42 | mov rax, 1 # SYS_write 43 | mov rdi, 1 44 | mov rsi, OFFSET DISP_STR 45 | mov edx, DISP_STR_LEN 46 | syscall 47 | 48 | # Base 49 | mov eax, OFFSET PASS_STR 50 | mov eax, [eax] 51 | mov ebx, OFFSET BASE_STR 52 | mov [ebx], eax 53 | 54 | mov rax, 1 # SYS_write 55 | mov rdi, 1 56 | mov rsi, OFFSET BASE_STR 57 | mov edx, BASE_STR_LEN 58 | syscall 59 | 60 | # Base+Index 61 | mov eax, OFFSET PASS_STR-8 62 | mov ecx, OFFSET 8 63 | mov eax, [eax+ecx] 64 | 65 | mov ebx, OFFSET BASE_IDX_STR-8 66 | mov [ebx + ecx], eax 67 | 68 | mov rax, 1 # SYS_write 69 | mov rdi, 1 70 | mov rsi, OFFSET BASE_IDX_STR 71 | mov edx, BASE_IDX_STR_LEN 72 | syscall 73 | 74 | # Base+Index+Displacement 75 | mov eax, OFFSET PASS_STR-12 76 | mov ecx, OFFSET 8 77 | mov eax, [eax + ecx + 4] 78 | 79 | mov ebx, OFFSET BASE_IDX_DISP_STR-12 80 | mov [ebx + ecx + 4], eax 81 | 82 | mov rax, 1 # SYS_write 83 | mov rdi, 1 84 | mov rsi, OFFSET BASE_IDX_DISP_STR 85 | mov edx, BASE_IDX_DISP_STR_LEN 86 | syscall 87 | 88 | # Base+Index*Scale 89 | mov eax, OFFSET PASS_STR-8 90 | mov ecx, OFFSET 4 91 | mov eax, [eax + ecx*2] 92 | 93 | mov ebx, OFFSET BASE_IDX_SCALE_STR-8 94 | mov [ebx + ecx*2], eax 95 | 96 | mov rax, 1 # SYS_write 97 | mov rdi, 1 98 | mov rsi, OFFSET BASE_IDX_SCALE_STR 99 | mov edx, BASE_IDX_SCALE_STR_LEN 100 | syscall 101 | 102 | # Index*Scale+Displacement 103 | mov eax, OFFSET PASS_STR 104 | mov eax, [eax*2 - 0x421018] # 0x421018 - address of PASS_STR 105 | 106 | mov ebx, OFFSET IDX_SCALE_DISP_STR 107 | mov [ebx*2 - 0x4210a1], eax # 0x4210a1 - address of IDX_SCALE_DISP_STR 108 | 109 | mov rax, 1 # SYS_write 110 | mov rdi, 1 111 | mov rsi, OFFSET IDX_SCALE_DISP_STR 112 | mov edx, IDX_SCALE_DISP_STR_LEN 113 | syscall 114 | 115 | # Base + Index*Scale + Displacement 116 | mov eax, OFFSET PASS_STR - 9 117 | mov ecx, 4 118 | mov eax, [eax + ecx*2 + 1] 119 | 120 | mov ebx, OFFSET BASE_IDX_SCALE_DISP_STR - 9 121 | mov [ebx + ecx*2 + 1], eax 122 | 123 | mov rax, 1 # SYS_write 124 | mov rdi, 1 125 | mov rsi, OFFSET BASE_IDX_SCALE_DISP_STR 126 | mov edx, BASE_IDX_SCALE_DISP_STR_LEN 127 | syscall 128 | 129 | # RIP-relative 130 | mov eax, dword ptr [rip + PASS_STR@GOTPCREL] 131 | mov eax, [eax] 132 | 133 | mov ebx, [rip + RIPREL_STR@GOTPCREL] 134 | mov [ebx], eax 135 | 136 | mov rax, 1 # SYS_write 137 | mov rdi, 1 138 | mov rsi, OFFSET RIPREL_STR 139 | mov edx, RIPREL_STR_LEN 140 | syscall 141 | 142 | # Store immediate 143 | mov dword ptr [PASS_BUF], 0x53534150 # 'PASS' 144 | mov eax, [PASS_BUF] 145 | mov [STORE_IMM_STR], eax 146 | 147 | mov rax, 1 # SYS_write 148 | mov rdi, 1 149 | mov rsi, OFFSET STORE_IMM_STR 150 | mov edx, STORE_IMM_STR_LEN 151 | syscall 152 | 153 | # Exit 154 | mov rax, 60 # SYS_exit 155 | mov rdi, 0 156 | syscall 157 | -------------------------------------------------------------------------------- /test/asm/callret.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | 3 | .data 4 | CALL_ADDR_PASS_STR: .ascii "PASS: CALL pushed correct address onto the stack\n" 5 | CALL_ADDR_PASS_STR_LEN = . - CALL_ADDR_PASS_STR 6 | CALL_ADDR_FAIL_STR: .ascii "FAIL: CALL pushed wrong address onto the stack\n" 7 | CALL_ADDR_FAIL_STR_LEN = . - CALL_ADDR_FAIL_STR 8 | 9 | CALL_RET_IMM_PASS_STR: .ascii "PASS: call(imm)+ret executed successfully\n" 10 | CALL_RET_IMM_PASS_STR_LEN = . - CALL_RET_IMM_PASS_STR 11 | CALL_RET_IMM_FAIL_STR: .ascii "FAIL: call(imm)+ret executed incorrectly\n" 12 | CALL_RET_IMM_FAIL_STR_LEN = . - CALL_RET_IMM_FAIL_STR 13 | 14 | CALL_RET_REG_PASS_STR: .ascii "PASS: call(reg)+ret executed successfully\n" 15 | CALL_RET_REG_PASS_STR_LEN = . - CALL_RET_REG_PASS_STR 16 | CALL_RET_REG_FAIL_STR: .ascii "FAIL: call(reg)+ret executed incorrectly\n" 17 | CALL_RET_REG_FAIL_STR_LEN = . - CALL_RET_REG_FAIL_STR 18 | 19 | CALL_RET_IMM_BEFORE_PASS_STR: .ascii "PASS: call(imm)+ret (before) executed successfully\n" 20 | CALL_RET_IMM_BEFORE_PASS_STR_LEN = . - CALL_RET_IMM_BEFORE_PASS_STR 21 | CALL_RET_IMM_BEFORE_FAIL_STR: .ascii "FAIL: call(imm)+ret (before) executed incorrectly\n" 22 | CALL_RET_IMM_BEFORE_FAIL_STR_LEN = . - CALL_RET_IMM_BEFORE_FAIL_STR 23 | 24 | CALL_RET_IMM_GLOBAL_PASS_STR: .ascii "PASS: call(imm)+ret to global function executed successfully\n" 25 | CALL_RET_IMM_GLOBAL_PASS_STR_LEN = . - CALL_RET_IMM_GLOBAL_PASS_STR 26 | CALL_RET_IMM_GLOBAL_FAIL_STR: .ascii "FAIL: call(imm)+ret to global function executed incorrectly\n" 27 | CALL_RET_IMM_GLOBAL_FAIL_STR_LEN = . - CALL_RET_IMM_GLOBAL_FAIL_STR 28 | .text 29 | 30 | .type _start, @function 31 | .global _start 32 | _start: 33 | # 34 | # Test 1: Check return address pushed to stack on CALL 35 | # 36 | call 1f 37 | 38 | mov rax, 60 # SYS_exit 39 | mov rdi, 1 # FAIL: Unreachable 40 | syscall 41 | 42 | 1: 43 | # Check whether the return address matches expected 44 | pop rax 45 | cmp rax, 0x410005 # expected return address 46 | je 2f 47 | 48 | #no match 49 | mov rax, 1 # SYS_write 50 | mov rdi, 1 51 | mov rsi, OFFSET CALL_ADDR_FAIL_STR 52 | mov edx, CALL_ADDR_FAIL_STR_LEN 53 | syscall 54 | jmp 10f 55 | 56 | 2: #match 57 | mov rax, 1 # SYS_write 58 | mov rdi, 1 59 | mov rsi, OFFSET CALL_ADDR_PASS_STR 60 | mov edx, CALL_ADDR_PASS_STR_LEN 61 | syscall 62 | 63 | # 64 | # Test 2 : Check CALL (REL_IMM) and RET 65 | # 66 | 10: 67 | mov rax, 0 68 | call 2f 69 | 70 | cmp rax, 1 71 | jne 1f 72 | 73 | #pass 74 | mov rax, 1 # SYS_write 75 | mov rdi, 1 76 | mov rsi, OFFSET CALL_RET_IMM_PASS_STR 77 | mov edx, CALL_RET_IMM_PASS_STR_LEN 78 | syscall 79 | jmp 10f 80 | 81 | #fail 82 | 1: 83 | mov rax, 1 # SYS_write 84 | mov rdi, 1 85 | mov rsi, OFFSET CALL_RET_IMM_FAIL_STR 86 | mov edx, CALL_RET_IMM_FAIL_STR_LEN 87 | syscall 88 | jmp 10f 89 | 90 | 91 | 2: mov rax, 1 92 | ret 93 | mov rax, 60 # SYS_exit 94 | mov rdi, 1 95 | syscall 96 | 97 | # 98 | # Test 3 : Check CALL (REG) and RET 99 | # 100 | 10: 101 | mov rax, 0 102 | lea rdi, [rip+2f] 103 | call rdi 104 | 105 | cmp rax, 1 106 | jne 1f 107 | 108 | #pass 109 | mov rax, 1 # SYS_write 110 | mov rdi, 1 111 | mov rsi, OFFSET CALL_RET_REG_PASS_STR 112 | mov edx, CALL_RET_REG_PASS_STR_LEN 113 | syscall 114 | jmp 10f 115 | 116 | #fail 117 | 1: 118 | mov rax, 1 # SYS_write 119 | mov rdi, 1 120 | mov rsi, OFFSET CALL_RET_REG_FAIL_STR 121 | mov edx, CALL_RET_REG_FAIL_STR_LEN 122 | syscall 123 | jmp 10f 124 | 125 | 2: 126 | mov rax, 1 127 | ret 128 | 129 | # 130 | # Test 4: Check CALL (IMM) and RET when caller occurs AFTER destination 131 | # 132 | 10: 133 | jmp 2f 134 | 135 | 1: #destination 136 | mov rax, 1 137 | ret 138 | 139 | 2: #test entry 140 | mov rax, 0 141 | call 1b 142 | cmp rax, 1 143 | jne 1f 144 | 145 | #pass 146 | mov rax, 1 # SYS_write 147 | mov rdi, 1 148 | mov rsi, OFFSET CALL_RET_IMM_BEFORE_PASS_STR 149 | mov edx, CALL_RET_IMM_BEFORE_PASS_STR_LEN 150 | syscall 151 | jmp 10f 152 | 153 | #fail 154 | 1: 155 | mov rax, 1 # SYS_write 156 | mov rdi, 1 157 | mov rsi, OFFSET CALL_RET_IMM_BEFORE_FAIL_STR 158 | mov edx, CALL_RET_IMM_BEFORE_FAIL_STR_LEN 159 | syscall 160 | # 161 | # Test 5: CALL (IMM) global function 162 | # 163 | 10: 164 | mov rax, 0 165 | call test5_func 166 | cmp rax, 1 167 | jne 1f 168 | 169 | #pass 170 | mov rax, 1 # SYS_write 171 | mov rdi, 1 172 | mov rsi, OFFSET CALL_RET_IMM_GLOBAL_PASS_STR 173 | mov edx, CALL_RET_IMM_GLOBAL_PASS_STR_LEN 174 | syscall 175 | jmp 10f 176 | 177 | #fail 178 | 1: 179 | mov rax, 1 # SYS_write 180 | mov rdi, 1 181 | mov rsi, OFFSET CALL_RET_IMM_GLOBAL_FAIL_STR 182 | mov edx, CALL_RET_IMM_GLOBAL_FAIL_STR_LEN 183 | syscall 184 | 185 | # 186 | # End 187 | # 188 | 10: 189 | mov rax, 60 # SYS_exit 190 | mov rdi, 0 191 | syscall 192 | .size _start, .-_start 193 | 194 | 195 | # 196 | # Function called by test 5 197 | # 198 | .type test5_func, @function 199 | .global test5_func 200 | test5_func: 201 | mov rax, 1 202 | ret 203 | .size test5_func, .-test5_func 204 | -------------------------------------------------------------------------------- /test/asm/hello.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | 3 | .data 4 | msg: 5 | .ascii "Hello, world!\n" 6 | len = . - msg 7 | 8 | .text 9 | 10 | .global _start 11 | _start: 12 | mov rax, 1 # SYS_write 13 | mov rdi, 1 14 | mov rsi, OFFSET msg 15 | mov edx, len 16 | syscall 17 | 18 | mov rax, 60 # SYS_exit 19 | mov rdi, 0 20 | syscall 21 | -------------------------------------------------------------------------------- /test/asm/hello.asm: -------------------------------------------------------------------------------- 1 | section .data 2 | HELLO_STR: db 'Hello, World', 10 3 | HELLO_STR_LEN: equ $-HELLO_STR 4 | 5 | section .text 6 | 7 | global _start 8 | _start: 9 | mov rax, 1 ; SYS_write 10 | mov rdi, 1 11 | mov rsi, HELLO_STR 12 | mov edx, HELLO_STR_LEN 13 | syscall 14 | 15 | mov rax, 60 ; SYS_exit 16 | mov rdi, 0 17 | syscall 18 | -------------------------------------------------------------------------------- /test/asm/hello_sse.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | 3 | .data 4 | hello_str: .ascii "Hello, world!!!\n" 5 | 6 | .align 16 7 | .lcomm hello_buf, 16 8 | .text 9 | 10 | .align 4 11 | .global _start 12 | _start: 13 | # Test 1: load+store from/to aligned buffer 14 | mov rdx, OFFSET hello_str 15 | movapd xmm0, [rdx] 16 | mov rdx, OFFSET hello_buf 17 | movapd [rdx], xmm0 18 | 19 | mov rax, 1 # SYS_write 20 | mov rdi, 1 21 | mov rsi, OFFSET hello_buf 22 | mov edx, 16 23 | syscall 24 | 25 | # Test 2: load+store from/to unaligned buffer 26 | mov rdx, OFFSET hello_str - 1 27 | movupd xmm0, [rdx + 1] 28 | mov rdx, OFFSET hello_buf - 1 29 | movupd [rdx + 1], xmm0 30 | 31 | mov rax, 1 # SYS_write 32 | mov rdi, 1 33 | mov rsi, OFFSET hello_buf 34 | mov edx, 16 35 | syscall 36 | 37 | # Test 3: load+store with intermediate reg-reg mov 38 | mov rdx, OFFSET hello_str 39 | movapd xmm0, [rdx] 40 | movapd xmm1, xmm0 41 | mov rdx, OFFSET hello_buf 42 | movapd [rdx], xmm1 43 | 44 | mov rax, 1 # SYS_write 45 | mov rdi, 1 46 | mov rsi, OFFSET hello_buf 47 | mov edx, 16 48 | syscall 49 | 50 | # Test 4: MOVD 51 | movd xmm0, dword ptr [hello_str] 52 | movd dword ptr [hello_buf], xmm0 53 | movd xmm0, dword ptr [hello_str + 4] 54 | movd dword ptr [hello_buf + 4], xmm0 55 | movd xmm0, dword ptr [hello_str + 8] 56 | movd dword ptr [hello_buf + 8], xmm0 57 | movd xmm0, dword ptr [hello_str + 12] 58 | movd dword ptr [hello_buf + 12], xmm0 59 | 60 | mov rax, 1 # SYS_write 61 | mov rdi, 1 62 | mov rsi, OFFSET hello_buf 63 | mov edx, 16 64 | syscall 65 | 66 | # exit(0) 67 | mov rax, 60 # SYS_exit 68 | mov rdi, 0 69 | syscall 70 | -------------------------------------------------------------------------------- /test/asm/jump.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | 3 | .data 4 | JUMP_1_STR: .ascii "Jump 1 taken\n" 5 | JUMP_1_STR_LEN = . - JUMP_1_STR 6 | JUMP_2_STR: .ascii "Jump 2 taken\n" 7 | JUMP_2_STR_LEN = . - JUMP_2_STR 8 | 9 | .text 10 | 11 | .global _start 12 | _start: 13 | jmp 1f 14 | 2: 15 | mov eax, 1 # SYS_write 16 | mov edi, 1 17 | mov esi, OFFSET JUMP_2_STR 18 | mov edx, JUMP_2_STR_LEN 19 | syscall 20 | jmp 3f 21 | 22 | 1: 23 | mov eax, 1 # SYS_write 24 | mov edi, 1 25 | mov esi, OFFSET JUMP_1_STR 26 | mov edx, JUMP_1_STR_LEN 27 | syscall 28 | jmp 2b 29 | 30 | 3: 31 | mov rax, 60 # SYS_exit 32 | mov edi, 0 33 | syscall 34 | -------------------------------------------------------------------------------- /test/asm/jump.asm: -------------------------------------------------------------------------------- 1 | section .data 2 | JUMP_1_STR: db 'Jump 1 taken', 10 3 | JUMP_1_STR_LEN: equ $-JUMP_1_STR 4 | JUMP_2_STR: db 'Jump 2 taken', 10 5 | JUMP_2_STR_LEN: equ $-JUMP_2_STR 6 | 7 | section .text 8 | 9 | global _start 10 | _start: 11 | jmp 1f 12 | ..@j2: 13 | mov rax, 1 ; SYS_write 14 | mov rdi, 1 15 | mov rsi, JUMP_2_STR 16 | mov edx, JUMP_2_STR_LEN 17 | syscall 18 | jmp ..@exit 19 | 20 | 1: 21 | mov rax, 1 ; SYS_write 22 | mov rdi, 1 23 | mov rsi, JUMP_1_STR 24 | mov edx, JUMP_1_STR_LEN 25 | syscall 26 | jmp ..@j2 27 | 28 | ..@exit: 29 | mov rax, 60 ; SYS_exit 30 | mov rdi, 0 31 | syscall 32 | -------------------------------------------------------------------------------- /test/asm/jump_sf.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | 3 | .data 4 | TEST_1_STR_PASS: .ascii "PASS: (100-110) -> SF=1\n" 5 | TEST_1_STR_PASS_LEN = . - TEST_1_STR_PASS 6 | TEST_1_STR_FAIL: .ascii "FAIL: (100-110) -> SF=0\n" 7 | TEST_1_STR_FAIL_LEN = . - TEST_1_STR_FAIL 8 | 9 | TEST_2_STR_PASS: .ascii "PASS: (100-99) -> SF=0\n" 10 | TEST_2_STR_PASS_LEN = . - TEST_2_STR_PASS 11 | TEST_2_STR_FAIL: .ascii "FAIL: (100-99) -> SF=1\n" 12 | TEST_2_STR_FAIL_LEN = . - TEST_2_STR_FAIL 13 | 14 | .text 15 | 16 | .macro print str len 17 | mov eax, 1 # SYS_write 18 | mov edi, 1 19 | mov esi, OFFSET \str 20 | mov edx, OFFSET \len 21 | syscall 22 | .endm 23 | 24 | .global _start 25 | _start: 26 | 0: # Test 1: !ZF 27 | mov eax, OFFSET 100 28 | cmp eax, OFFSET 110 29 | jns 2f # Bad - 100-110 > 0 30 | js 1f # Good - 100-110 < 0 31 | jmp 2f # Bad - Unreachable 32 | 1: # Test 1 PASS 33 | print TEST_1_STR_PASS, TEST_1_STR_PASS_LEN 34 | jmp 10f 35 | 2: #Test 1 FAIL 36 | print TEST_1_STR_FAIL, TEST_1_STR_FAIL_LEN 37 | jmp 1000f #exit 38 | 39 | 10: # TEST 2: ZF 40 | mov eax, OFFSET 100 41 | cmp eax, 99 42 | js 2f # Bad - 100-99 < 0 43 | jns 1f # Good - 100-99 > 0 44 | jmp 2f # Bad - Unreachable 45 | 1: # Test 2 PASS 46 | print TEST_2_STR_PASS, TEST_2_STR_PASS_LEN 47 | jmp 20f 48 | 2: #Test 2 FAIL 49 | print TEST_2_STR_FAIL, TEST_2_STR_FAIL_LEN 50 | jmp 1000f #exit 51 | 52 | 20: 53 | 54 | 1000: 55 | mov rax, 60 # SYS_exit 56 | mov edi, 0 57 | syscall 58 | -------------------------------------------------------------------------------- /test/asm/jump_zf.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | 3 | .data 4 | TEST_1_STR_PASS: .ascii "PASS: (100-10) -> ZF=0\n" 5 | TEST_1_STR_PASS_LEN = . - TEST_1_STR_PASS 6 | TEST_1_STR_FAIL: .ascii "FAIL: (100-10) -> ZF=1\n" 7 | TEST_1_STR_FAIL_LEN = . - TEST_1_STR_FAIL 8 | 9 | TEST_2_STR_PASS: .ascii "PASS: (100-100) -> ZF=1\n" 10 | TEST_2_STR_PASS_LEN = . - TEST_2_STR_PASS 11 | TEST_2_STR_FAIL: .ascii "FAIL: (100-100) -> ZF=0\n" 12 | TEST_2_STR_FAIL_LEN = . - TEST_2_STR_FAIL 13 | 14 | TEST_3_STR_PASS: .ascii "PASS: ((u32)0xDEADBEEFCAFEBABA-(u32)0xCAFEBABA) -> ZF=1\n" 15 | TEST_3_STR_PASS_LEN = . - TEST_3_STR_PASS 16 | TEST_3_STR_FAIL: .ascii "FAIL: ((u32)0xDEADBEEFCAFEBABA-(u32)0xCAFEBABA) -> ZF=0\n" 17 | TEST_3_STR_FAIL_LEN = . - TEST_3_STR_FAIL 18 | 19 | TEST_3a_STR_PASS: .ascii "PASS: (0xDEADBEEFCAFEBABA-0xCAFEBABA) -> ZF=0\n" 20 | TEST_3a_STR_PASS_LEN = . - TEST_3a_STR_PASS 21 | TEST_3a_STR_FAIL: .ascii "FAIL: (0xDEADBEEFCAFEBABA-0xCAFEBABA) -> ZF=1\n" 22 | TEST_3a_STR_FAIL_LEN = . - TEST_3a_STR_FAIL 23 | 24 | .text 25 | 26 | .macro print str len 27 | mov eax, 1 # SYS_write 28 | mov edi, 1 29 | mov esi, OFFSET \str 30 | mov edx, OFFSET \len 31 | syscall 32 | .endm 33 | 34 | .global _start 35 | _start: 36 | 0: # Test 1: !ZF (32-bit) 37 | mov eax, OFFSET 100 38 | cmp eax, OFFSET 10 39 | jz 2f # Bad - 100-10 == 0 40 | jnz 1f # Good - 100-10 != 0 41 | jmp 2f # Bad - Unreachable 42 | 1: # Test 1 PASS 43 | print TEST_1_STR_PASS, TEST_1_STR_PASS_LEN 44 | jmp 10f 45 | 2: #Test 1 FAIL 46 | print TEST_1_STR_FAIL, TEST_1_STR_FAIL_LEN 47 | jmp 1000f #exit 48 | 49 | 10: # TEST 2: ZF 50 | mov eax, OFFSET 100 51 | cmp eax, 100 52 | jnz 2f # Bad - 100-100 != 0 53 | jz 1f # Good - 100-100 == 0 54 | jmp 2f # Bad - Unreachable 55 | 1: # Test 2 PASS 56 | print TEST_2_STR_PASS, TEST_2_STR_PASS_LEN 57 | jmp 20f 58 | 2: #Test 2 FAIL 59 | print TEST_2_STR_FAIL, TEST_2_STR_FAIL_LEN 60 | jmp 1000f #exit 61 | 62 | 63 | 20: # Test 3: !ZF (64-bit vs 32-bit) 64 | mov rax, OFFSET 0xDEADBEEFCAFEBABA 65 | mov rbx, OFFSET 0xCAFEBABA 66 | # for 32-bit, rbx == rax 67 | # for 64-bit, rax != rbx 68 | cmp eax, ebx 69 | jnz 2f # Bad 70 | jz 1f # Good 71 | jmp 2f # Bad 72 | 2: 73 | print TEST_3_STR_FAIL, TEST_3_STR_FAIL_LEN 74 | jmp 1000f #exit 75 | 76 | 1: # Now try 64-bit 77 | cmp rax, rbx 78 | print TEST_3_STR_PASS, TEST_3_STR_PASS_LEN 79 | jz 2f # Bad 80 | jnz 1f # Good 81 | jmp 2f # Bad 82 | 2: 83 | print TEST_3a_STR_FAIL, TEST_3a_STR_FAIL_LEN 84 | jmp 1000f 85 | 1: 86 | print TEST_3a_STR_PASS, TEST_3a_STR_PASS_LEN 87 | 88 | 1000: 89 | mov rax, 60 # SYS_exit 90 | mov edi, 0 91 | syscall 92 | -------------------------------------------------------------------------------- /test/asm/loadstore.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | 3 | .data 4 | BYTE_STR: .ascii "PASS: BYTE memcpy worked!\n" 5 | BYTE_STR_LEN = . - BYTE_STR 6 | 7 | WORD_STR: .ascii "PASS: WORD memcpy worked!\n" 8 | WORD_STR_LEN = . - WORD_STR # 26 9 | 10 | DWORD_STR: .ascii "PASS: DWORD memcpy worked!!\n" 11 | DWORD_STR_LEN = . - DWORD_STR # 28 12 | 13 | QWORD_STR: .ascii "PASS: QWORD memcpy worked....!!\n" 14 | QWORD_STR_LEN = . - QWORD_STR # 32 15 | 16 | ALIAS_PASS_STR: .ascii "PASS: Load to aliased register (ah) worked!\n" 17 | ALIAS_PASS_STR_LEN = . - ALIAS_PASS_STR 18 | ALIAS_FAIL_STR: .ascii "FAIL: Load to ailased register (ah) failed!\n" 19 | ALIAS_FAIL_STR_LEN = . - ALIAS_FAIL_STR 20 | 21 | ALIAS2_PASS_STR: .ascii "PASS: Load to aliased register (al) worked!\n" 22 | ALIAS2_PASS_STR_LEN = . - ALIAS2_PASS_STR 23 | ALIAS2_FAIL_STR: .ascii "FAIL: Load to ailased register (al) failed!\n" 24 | ALIAS2_FAIL_STR_LEN = . - ALIAS2_FAIL_STR 25 | 26 | ALIAS3_PASS_STR: .ascii "PASS: Load to aliased register (ax) worked!\n" 27 | ALIAS3_PASS_STR_LEN = . - ALIAS3_PASS_STR 28 | ALIAS3_FAIL_STR: .ascii "FAIL: Load to ailased register (ax) failed!\n" 29 | ALIAS3_FAIL_STR_LEN = . - ALIAS3_FAIL_STR 30 | 31 | NEGATIVE_INDEX_STR: .ascii "FAIL: NEGATIVE_INDEX loadstore worked!\n" 32 | NEGATIVE_INDEX_STR_LEN = . - NEGATIVE_INDEX_STR 33 | 34 | NEGATIVE_INDEX32_STR: .ascii "FAIL: NEGATIVE_INDEX32 loadstore worked!\n" 35 | NEGATIVE_INDEX32_STR_LEN = . - NEGATIVE_INDEX32_STR 36 | 37 | ALIASED_BASE_STR: .ascii "FAIL: ALIASED_BASE loadstore worked!\n" 38 | ALIASED_BASE_STR_LEN = . - ALIASED_BASE_STR 39 | 40 | .lcomm buf, 1024 41 | zero_byte: .byte 0 42 | zero_word: .word 0 43 | .text 44 | 45 | .global _start 46 | _start: 47 | # Copy BYTE_STR one byte at a time 48 | mov rax, OFFSET BYTE_STR 49 | mov rbx, 0 50 | mov rcx, OFFSET buf 51 | 0: 52 | cmp rbx, BYTE_STR_LEN 53 | jge 1f 54 | 55 | lea rdx, BYTE PTR[rax+rbx] 56 | mov dh, BYTE PTR[rdx] 57 | mov BYTE PTR[rcx+rbx], dh 58 | inc rbx 59 | 60 | jmp 0b 61 | 62 | 1: # Done copying BYTE_STR 63 | mov rax, 1 # SYS_write 64 | mov rdi, 1 65 | mov rsi, OFFSET buf 66 | mov edx, BYTE_STR_LEN 67 | syscall 68 | 69 | # Copy WORD_STR one word at a time 70 | mov rax, OFFSET WORD_STR 71 | mov rbx, 0 72 | mov rcx, OFFSET buf 73 | 0: 74 | cmp rbx, WORD_STR_LEN/2 75 | jge 1f 76 | 77 | mov dx, WORD PTR[rax+rbx*2] 78 | mov WORD PTR[rcx+rbx*2], dx 79 | inc rbx 80 | 81 | jmp 0b 82 | 1: # Done copying WORD_STR 83 | mov rax, 1 # SYS_write 84 | mov rdi, 1 85 | mov rsi, OFFSET buf 86 | mov edx, WORD_STR_LEN 87 | syscall 88 | 89 | # Copy DWORD_STR one dword at a time 90 | mov rax, OFFSET DWORD_STR 91 | mov rbx, 0 92 | mov rcx, OFFSET buf 93 | 0: 94 | cmp rbx, DWORD_STR_LEN/4 95 | jge 1f 96 | 97 | mov edx, DWORD PTR[rax+rbx*4] 98 | mov DWORD PTR[rcx+rbx*4], edx 99 | inc rbx 100 | 101 | jmp 0b 102 | 1: # Done copying DWORD_STR 103 | mov rax, 1 # SYS_write 104 | mov rdi, 1 105 | mov rsi, OFFSET buf 106 | mov edx, DWORD_STR_LEN 107 | syscall 108 | 109 | # Copy QWORD_STR one dword at a time 110 | mov rax, OFFSET QWORD_STR 111 | mov rbx, 0 112 | mov rcx, OFFSET buf 113 | 0: 114 | cmp rbx, QWORD_STR_LEN/8 115 | jge 1f 116 | 117 | mov rdx, QWORD PTR[rax+rbx*8] 118 | mov QWORD PTR[rcx+rbx*8], rdx 119 | inc rbx 120 | 121 | jmp 0b 122 | 1: # Done copying QWORD_STR 123 | mov rax, 1 # SYS_write 124 | mov rdi, 1 125 | mov rsi, OFFSET buf 126 | mov edx, QWORD_STR_LEN 127 | syscall 128 | 129 | # Confirm loads of aliased registers work 130 | mov rax, -1 131 | mov ah, [zero_byte] 132 | mov rbx, 0xFFFFFFFFFFFF00FF 133 | cmp rax, rbx 134 | jne 1f 135 | 136 | mov rax, 1 # SYS_write 137 | mov rdi, 1 138 | mov rsi, OFFSET ALIAS_PASS_STR 139 | mov edx, ALIAS_PASS_STR_LEN 140 | syscall 141 | jmp 10f 142 | 143 | 1: #fail 144 | mov rax, 1 # SYS_write 145 | mov rdi, 1 146 | mov rsi, OFFSET ALIAS_FAIL_STR 147 | mov edx, ALIAS_FAIL_STR_LEN 148 | syscall 149 | 150 | 10: 151 | mov rax, -1 152 | mov al, [zero_byte] 153 | mov rbx, 0xFFFFFFFFFFFFFF00 154 | cmp rax, rbx 155 | jne 1f 156 | 157 | mov rax, 1 # SYS_write 158 | mov rdi, 1 159 | mov rsi, OFFSET ALIAS2_PASS_STR 160 | mov edx, ALIAS2_PASS_STR_LEN 161 | syscall 162 | jmp 10f 163 | 164 | 1: #fail 165 | mov rax, 1 # SYS_write 166 | mov rdi, 1 167 | mov rsi, OFFSET ALIAS2_FAIL_STR 168 | mov edx, ALIAS2_FAIL_STR_LEN 169 | syscall 170 | 171 | 10: 172 | mov rax, -1 173 | mov ax, [zero_word] 174 | mov rbx, 0xFFFFFFFFFFFF0000 175 | cmp rax, rbx 176 | jne 1f 177 | 178 | mov rax, 1 # SYS_write 179 | mov rdi, 1 180 | mov rsi, OFFSET ALIAS3_PASS_STR 181 | mov edx, ALIAS3_PASS_STR_LEN 182 | syscall 183 | jmp 10f 184 | 185 | 1: #fail 186 | mov rax, 1 # SYS_write 187 | mov rdi, 1 188 | mov rsi, OFFSET ALIAS3_FAIL_STR 189 | mov edx, ALIAS3_FAIL_STR_LEN 190 | syscall 191 | 192 | 10: # Negative index 193 | mov rax, OFFSET NEGATIVE_INDEX_STR+4 194 | mov rbx, -4 195 | mov dword ptr [rax+rbx], 0x53534150 # 'PASS' 196 | 197 | mov rax, 1 # SYS_write 198 | mov rdi, 1 199 | mov rsi, OFFSET NEGATIVE_INDEX_STR 200 | mov edx, NEGATIVE_INDEX_STR_LEN 201 | syscall 202 | 203 | 10: # Negative 32-bit index 204 | mov rax, OFFSET NEGATIVE_INDEX32_STR+4 205 | mov rbx, 0x80000000fffffffc # -4 as an i32, garbage as an i64 206 | mov dword ptr [eax+ebx], 0x53534150 # 'PASS' 207 | 208 | mov rax, 1 # SYS_write 209 | mov rdi, 1 210 | mov rsi, OFFSET NEGATIVE_INDEX32_STR 211 | mov edx, NEGATIVE_INDEX32_STR_LEN 212 | syscall 213 | 214 | 10: # Aliased base 215 | mov rax, OFFSET ALIASED_BASE_STR 216 | mov rbx, 0x8000000000000000 217 | or rax, rbx 218 | mov dword ptr [eax], 0x53534150 # 'PASS' 219 | 220 | mov rax, 1 # SYS_write 221 | mov rdi, 1 222 | mov rsi, OFFSET ALIASED_BASE_STR 223 | mov edx, ALIASED_BASE_STR_LEN 224 | syscall 225 | 226 | 99: 227 | mov rax, 60 # SYS_exit 228 | mov rdi, 0 229 | syscall 230 | .size _start, .-_start 231 | -------------------------------------------------------------------------------- /test/asm/mov.S: -------------------------------------------------------------------------------- 1 | 2 | .intel_syntax noprefix 3 | 4 | .macro print str len 5 | mov eax, 1 # SYS_write 6 | mov edi, 1 7 | mov esi, OFFSET \str 8 | mov edx, OFFSET \len 9 | syscall 10 | .endm 11 | 12 | .data 13 | TEST_0_STR_PASS: .ascii "PASS: (cmp (u16)0x7FFF, (u16)-1) -> jo\n" 14 | TEST_0_STR_PASS_LEN = . - TEST_0_STR_PASS 15 | TEST_0_STR_FAIL: .ascii "FAIL: (cmp (u16)0x7FFF, (u16)-1) -> jno\n" 16 | TEST_0_STR_FAIL_LEN = . - TEST_0_STR_FAIL 17 | TEST_0_STR_UNREACHABLE: .ascii "FAIL: (cmp (u16)0x7FFF, (u16)-1) -> UNREACHABLE!\n" 18 | TEST_0_STR_UNREACHABLE_LEN = . - TEST_0_STR_UNREACHABLE 19 | 20 | TEST_1_STR_PASS: .ascii "PASS: (cmp (u16)0x8000, (u16)1) -> jo\n" 21 | TEST_1_STR_PASS_LEN = . - TEST_1_STR_PASS 22 | TEST_1_STR_FAIL: .ascii "FAIL: (cmp (u16)0x8000, (u16)1) -> jno\n" 23 | TEST_1_STR_FAIL_LEN = . - TEST_1_STR_FAIL 24 | TEST_1_STR_UNREACHABLE: .ascii "FAIL: (cmp (u16)0x8000, (u16)1) -> UNREACHABLE!\n" 25 | TEST_1_STR_UNREACHABLE_LEN = . - TEST_1_STR_UNREACHABLE 26 | 27 | TEST_2_STR_PASS: .ascii "PASS: (cmp (u16)0x8000, (u16)1) -> jo\n" 28 | TEST_2_STR_PASS_LEN = . - TEST_2_STR_PASS 29 | TEST_2_STR_FAIL: .ascii "FAIL: (cmp (u16)0x8000, (u16)1) -> jno\n" 30 | TEST_2_STR_FAIL_LEN = . - TEST_2_STR_FAIL 31 | TEST_2_STR_UNREACHABLE: .ascii "FAIL: (cmp (u16)0x8000, (u16)1) -> UNREACHABLE!\n" 32 | TEST_2_STR_UNREACHABLE_LEN = . - TEST_2_STR_UNREACHABLE 33 | 34 | .text 35 | .global _start 36 | _start: 37 | mov ax, OFFSET 0x8000 38 | mov rax, 60 # SYS_exit 39 | mov edi, 0 40 | syscall 41 | -------------------------------------------------------------------------------- /test/asm/pushpop.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | 3 | .data 4 | PUSH_IMM_STR: .ascii "FAIL: PUSH_IMM!\n" 5 | PUSH_IMM_STR_LEN = . - PUSH_IMM_STR 6 | 7 | PUSH_REG_STR: .ascii "FAIL: PUSH_REG!\n" 8 | PUSH_REG_STR_LEN = . - PUSH_REG_STR 9 | 10 | PUSH_MEM_STR: .ascii "FAIL: PUSH_MEM!\n" 11 | PUSH_MEM_STR_LEN = . - PUSH_MEM_STR 12 | 13 | .lcomm tmp, 4 14 | 15 | .text 16 | 17 | .global _start 18 | _start: 19 | # Push Immediate 20 | push 0x53534150 # 'PASS' 21 | # Push Register 22 | mov eax, 0x53534150 23 | push rax 24 | # Push Memory 25 | mov dword ptr [tmp], 0x53534150 26 | push qword ptr [tmp] 27 | 28 | # Pop Memory 29 | pop rax 30 | mov dword ptr [PUSH_MEM_STR], eax 31 | # Pop Register 32 | pop rax 33 | mov dword ptr [PUSH_REG_STR], eax 34 | # Pop Immediate 35 | pop rax 36 | mov dword ptr [PUSH_IMM_STR], eax 37 | 38 | # Print all 39 | mov rax, 1 # SYS_write 40 | mov rdi, 1 41 | mov rsi, OFFSET PUSH_IMM_STR 42 | mov edx, PUSH_IMM_STR_LEN 43 | syscall 44 | 45 | mov rax, 1 # SYS_write 46 | mov rdi, 1 47 | mov rsi, OFFSET PUSH_REG_STR 48 | mov edx, PUSH_REG_STR_LEN 49 | syscall 50 | 51 | mov rax, 1 # SYS_write 52 | mov rdi, 1 53 | mov rsi, OFFSET PUSH_MEM_STR 54 | mov edx, PUSH_MEM_STR_LEN 55 | syscall 56 | 57 | mov rax, 60 # SYS_exit 58 | mov rdi, 0 59 | syscall 60 | -------------------------------------------------------------------------------- /test/asm/stos.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | 3 | .data 4 | STOSB_EDI_STR: .ascii "STOSB_EDI: should be '11': 00\n" 5 | STOSB_EDI_STR_LEN = . - STOSB_EDI_STR 6 | 7 | STOSB_FORWARD_STR: .ascii "STOSB_FORWARD: should be '11': 00\n" 8 | STOSB_FORWARD_STR_LEN = . - STOSB_FORWARD_STR 9 | STOSB_BACKWARD_STR: .ascii "STOSB_BACKWARD: should be '11': 00\n" 10 | STOSB_BACKWARD_STR_LEN = . - STOSB_BACKWARD_STR 11 | 12 | REP_STOSB_STR: .ascii "REP_STOSB: should be '1110': 0000\n" 13 | REP_STOSB_STR_LEN = . - REP_STOSB_STR 14 | 15 | STOSW_FORWARD_STR: .ascii "STOSW_FORWARD: should be '1111': 0000\n" 16 | STOSW_FORWARD_STR_LEN = . - STOSW_FORWARD_STR 17 | STOSW_BACKWARD_STR: .ascii "STOSW_BACKWARD: should be '1111': 0000\n" 18 | STOSW_BACKWARD_STR_LEN = . - STOSW_BACKWARD_STR 19 | 20 | STOSD_FORWARD_STR: .ascii "STOSD_FORWARD: should be '11111111': 00000000\n" 21 | STOSD_FORWARD_STR_LEN = . - STOSD_FORWARD_STR 22 | STOSD_BACKWARD_STR: .ascii "STOSD_BACKWARD: should be '11111111': 00000000\n" 23 | STOSD_BACKWARD_STR_LEN = . - STOSD_BACKWARD_STR 24 | 25 | STOSQ_FORWARD_STR: .ascii "STOSQ_FORWARD: should be '1111111111111111': 0000000000000000\n" 26 | STOSQ_FORWARD_STR_LEN = . - STOSQ_FORWARD_STR 27 | STOSQ_BACKWARD_STR: .ascii "STOSQ_BACKWARD: should be '1111111111111111': 0000000000000000\n" 28 | STOSQ_BACKWARD_STR_LEN = . - STOSQ_BACKWARD_STR 29 | .text 30 | 31 | .global _start 32 | _start: 33 | # STOSB, DF=0, edi 34 | cld # clear direction flag 35 | 36 | mov rdi, OFFSET STOSB_EDI_STR + STOSB_EDI_STR_LEN - 3 37 | mov al, 0x31 # ASCII '1' 38 | mov r9, 0x8000000000000000 39 | or rdi, r9 40 | stosb [edi] 41 | stosb [edi] 42 | 43 | mov rax, 1 # SYS_write 44 | mov rdi, 1 45 | mov rsi, OFFSET STOSB_EDI_STR 46 | mov edx, STOSB_EDI_STR_LEN 47 | syscall 48 | 49 | # STOSB, DF=0 50 | cld # clear direction flag 51 | 52 | mov rdi, OFFSET STOSB_FORWARD_STR + STOSB_FORWARD_STR_LEN - 3 53 | mov al, 0x31 # ASCII '1' 54 | stosb [rdi] 55 | stosb [rdi] 56 | 57 | mov rax, 1 # SYS_write 58 | mov rdi, 1 59 | mov rsi, OFFSET STOSB_FORWARD_STR 60 | mov edx, STOSB_FORWARD_STR_LEN 61 | syscall 62 | 63 | #STOSB, DF=1 64 | std # set direction flag 65 | 66 | mov rdi, OFFSET STOSB_BACKWARD_STR + STOSB_BACKWARD_STR_LEN - 2 67 | mov al, 0x31 # ASCII '1' 68 | stosb [rdi] 69 | stosb [rdi] 70 | 71 | mov rax, 1 # SYS_write 72 | mov rdi, 1 73 | mov rsi, OFFSET STOSB_BACKWARD_STR 74 | mov edx, STOSB_BACKWARD_STR_LEN 75 | syscall 76 | 77 | # REP STOSB, DF=0 78 | cld # clear direction flag 79 | 80 | mov rdi, OFFSET REP_STOSB_STR + REP_STOSB_STR_LEN - 5 81 | mov al, 0x31 # ASCII '1' 82 | mov ecx, 3 83 | rep stosb [edi] 84 | 85 | mov rax, 1 # SYS_write 86 | mov rdi, 1 87 | mov rsi, OFFSET REP_STOSB_STR 88 | mov edx, REP_STOSB_STR_LEN 89 | syscall 90 | 91 | # STOSW, DF=0 92 | cld # clear direction flag 93 | 94 | mov rdi, OFFSET STOSW_FORWARD_STR + STOSW_FORWARD_STR_LEN - 5 95 | mov ax, 0x3131 # ASCII '11' 96 | stosw [rdi] 97 | stosw [rdi] 98 | 99 | mov rax, 1 # SYS_write 100 | mov rdi, 1 101 | mov rsi, OFFSET STOSW_FORWARD_STR 102 | mov edx, STOSW_FORWARD_STR_LEN 103 | syscall 104 | 105 | #STOSW, DF=1 106 | std # set direction flag 107 | 108 | mov rdi, OFFSET STOSW_BACKWARD_STR + STOSW_BACKWARD_STR_LEN - 3 109 | mov ax, 0x3131 # ASCII '11' 110 | stosw [rdi] 111 | stosw [rdi] 112 | 113 | mov rax, 1 # SYS_write 114 | mov rdi, 1 115 | mov rsi, OFFSET STOSW_BACKWARD_STR 116 | mov edx, STOSW_BACKWARD_STR_LEN 117 | syscall 118 | 119 | # STOSD, DF=0 120 | cld # clear direction flag 121 | 122 | mov rdi, OFFSET STOSD_FORWARD_STR + STOSD_FORWARD_STR_LEN - 9 123 | mov eax, 0x31313131 # ASCII '1111' 124 | stosd [rdi] 125 | stosd [rdi] 126 | 127 | mov rax, 1 # SYS_write 128 | mov rdi, 1 129 | mov rsi, OFFSET STOSD_FORWARD_STR 130 | mov edx, STOSD_FORWARD_STR_LEN 131 | syscall 132 | 133 | #STOSD, DF=1 134 | std # set direction flag 135 | 136 | mov rdi, OFFSET STOSD_BACKWARD_STR + STOSD_BACKWARD_STR_LEN - 5 137 | mov eax, 0x31313131 # ASCII '1111' 138 | stosd [rdi] 139 | stosd [rdi] 140 | 141 | mov rax, 1 # SYS_write 142 | mov rdi, 1 143 | mov rsi, OFFSET STOSD_BACKWARD_STR 144 | mov edx, STOSD_BACKWARD_STR_LEN 145 | syscall 146 | 147 | # STOSQ, DF=0 148 | cld # clear direction flag 149 | 150 | mov rdi, OFFSET STOSQ_FORWARD_STR + STOSQ_FORWARD_STR_LEN - 17 151 | mov rax, 0x3131313131313131 # ASCII '11111111' 152 | stosq [rdi] 153 | stosq [rdi] 154 | 155 | mov rax, 1 # SYS_write 156 | mov rdi, 1 157 | mov rsi, OFFSET STOSQ_FORWARD_STR 158 | mov edx, STOSQ_FORWARD_STR_LEN 159 | syscall 160 | 161 | #STOSQ, DF=1 162 | std # set direction flag 163 | 164 | mov rdi, OFFSET STOSQ_BACKWARD_STR + STOSQ_BACKWARD_STR_LEN - 9 165 | mov rax, 0x3131313131313131 # ASCII '11111111' 166 | stosq [rdi] 167 | stosq [rdi] 168 | 169 | mov rax, 1 # SYS_write 170 | mov rdi, 1 171 | mov rsi, OFFSET STOSQ_BACKWARD_STR 172 | mov edx, STOSQ_BACKWARD_STR_LEN 173 | syscall 174 | 175 | #exit 176 | mov rax, 60 # SYS_exit 177 | mov rdi, 0 178 | syscall 179 | -------------------------------------------------------------------------------- /test/c/.hello.fs.bin.expected: -------------------------------------------------------------------------------- 1 | Hello from C! 2 | -------------------------------------------------------------------------------- /test/c/.print_args.fs.bin.env: -------------------------------------------------------------------------------- 1 | { 2 | "_" : "print_args.fs.bin", 3 | "SHELL" : "/bin/bash", 4 | "RETREC_ENVIRONMENT_WORKS" : "YES" 5 | } 6 | -------------------------------------------------------------------------------- /test/c/.print_args.fs.bin.expected: -------------------------------------------------------------------------------- 1 | argc=1 2 | ------- 3 | argv: 4 | c/print_args.fs.bin 5 | ------- 6 | ------- 7 | envp: 8 | _=print_args.fs.bin 9 | SHELL=/bin/bash 10 | RETREC_ENVIRONMENT_WORKS=YES 11 | ------- 12 | -------------------------------------------------------------------------------- /test/c/auxval.fs.c: -------------------------------------------------------------------------------- 1 | #include "syscall.h" 2 | #include "start.h" 3 | 4 | #define ARRAY_SIZE(x) (sizeof((x)) / sizeof(*(x))) 5 | 6 | unsigned long strlen(const char *str) { 7 | unsigned long ret = 0; 8 | while (*(str++)) 9 | ++ret; 10 | return ret; 11 | } 12 | 13 | void print(const char *str) { 14 | __syscall4(1 /* sys_write */, 1 /* stdout */, (long)str, strlen(str), 0); 15 | } 16 | 17 | char *itoa64(unsigned long val, char *out_buf, unsigned long out_buf_len) { 18 | out_buf[out_buf_len-1] = '\0'; 19 | unsigned long i = out_buf_len - 1; 20 | while (i-- > 0) { 21 | out_buf[i] = '0' + (val % 10); 22 | val /= 10; 23 | if (!val) 24 | break; 25 | } 26 | return out_buf + i; 27 | } 28 | 29 | char hexchr(unsigned int digit) { 30 | switch (digit) { 31 | case 0x0: return '0'; 32 | case 0x1: return '1'; 33 | case 0x2: return '2'; 34 | case 0x3: return '3'; 35 | case 0x4: return '4'; 36 | case 0x5: return '5'; 37 | case 0x6: return '6'; 38 | case 0x7: return '7'; 39 | case 0x8: return '8'; 40 | case 0x9: return '9'; 41 | case 0xA: return 'A'; 42 | case 0xB: return 'B'; 43 | case 0xC: return 'C'; 44 | case 0xD: return 'D'; 45 | case 0xE: return 'E'; 46 | case 0xF: return 'F'; 47 | } 48 | return '?'; 49 | } 50 | 51 | char *itoa64_hex(unsigned long val, char *out_buf, unsigned long buf_len) { 52 | // Null terminate buffer 53 | unsigned long i = buf_len - 1; 54 | out_buf[i] = '\0'; 55 | 56 | // Add digits in reverse 57 | unsigned long curval = val; 58 | unsigned int written = 0; 59 | do { 60 | unsigned int digit = curval % 16; 61 | out_buf[--i] = hexchr(digit); 62 | curval /= 16; 63 | written++; 64 | 65 | if (!i) 66 | break; 67 | } while (curval); 68 | 69 | // Pad to 16 digits 70 | while (written < 16 && (i - 1)) { 71 | out_buf[--i] = '0'; 72 | written++; 73 | } 74 | 75 | return out_buf + i; 76 | } 77 | 78 | 79 | // Lifted from /usr/include/elf.h 80 | typedef struct 81 | { 82 | unsigned long a_type; /* Entry type */ 83 | union 84 | { 85 | unsigned long a_val; /* Integer value */ 86 | } a_un; 87 | } Elf64_auxv_t; 88 | 89 | int main(int argc, char **argv, char **envp) { 90 | char buf[256]; 91 | char **auxv_ptr = envp; 92 | while (*auxv_ptr++); /* increment auxv to end of envp */ 93 | 94 | Elf64_auxv_t *auxv; 95 | for (auxv = (Elf64_auxv_t *)auxv_ptr; auxv->a_type != 0 /* AT_NULL */; auxv++) { 96 | print("auxv type: "); 97 | print(itoa64(auxv->a_type, buf, sizeof(buf))); 98 | print(", value: "); 99 | switch (auxv->a_type) { 100 | case 15 /* AT_PLATFORM */: 101 | case 31 /* AT_EXECFN */: 102 | print((char *)auxv->a_un.a_val); 103 | break; 104 | case 25 /* AT_RANDOM */: 105 | { 106 | unsigned long *rand = (void *)auxv->a_un.a_val; 107 | print(itoa64_hex(rand[0], buf, sizeof(buf))); 108 | print(itoa64_hex(rand[1], buf, sizeof(buf))); 109 | break; 110 | } 111 | 112 | default: 113 | print(itoa64_hex(auxv->a_un.a_val, buf, sizeof(buf))); 114 | } 115 | print("\n"); 116 | } 117 | 118 | return 0; 119 | } 120 | -------------------------------------------------------------------------------- /test/c/cpuid.fs.c: -------------------------------------------------------------------------------- 1 | #include "syscall.h" 2 | #include "start.h" 3 | 4 | #define ARRAY_SIZE(x) (sizeof((x)) / sizeof(*(x))) 5 | 6 | unsigned long strlen(const char *str) { 7 | unsigned long ret = 0; 8 | while (*(str++)) 9 | ++ret; 10 | return ret; 11 | } 12 | 13 | void print(const char *str) { 14 | __syscall4(1 /* sys_write */, 1 /* stdout */, (long)str, strlen(str), 0); 15 | } 16 | 17 | char hexchr(unsigned int digit) { 18 | switch (digit) { 19 | case 0x0: return '0'; 20 | case 0x1: return '1'; 21 | case 0x2: return '2'; 22 | case 0x3: return '3'; 23 | case 0x4: return '4'; 24 | case 0x5: return '5'; 25 | case 0x6: return '6'; 26 | case 0x7: return '7'; 27 | case 0x8: return '8'; 28 | case 0x9: return '9'; 29 | case 0xA: return 'A'; 30 | case 0xB: return 'B'; 31 | case 0xC: return 'C'; 32 | case 0xD: return 'D'; 33 | case 0xE: return 'E'; 34 | case 0xF: return 'F'; 35 | } 36 | return '?'; 37 | } 38 | 39 | char *itoa_hex(unsigned int val, char *out_buf, unsigned long buf_len) { 40 | // Null terminate buffer 41 | unsigned long i = buf_len - 1; 42 | out_buf[i] = '\0'; 43 | 44 | // Add digits in reverse 45 | unsigned int curval = val; 46 | unsigned int written = 0; 47 | do { 48 | unsigned int digit = curval % 16; 49 | out_buf[--i] = hexchr(digit); 50 | curval /= 16; 51 | written++; 52 | 53 | if (!i) 54 | break; 55 | } while (curval); 56 | 57 | // Pad to 8 digits 58 | while (written < 8 && (i - 1)) { 59 | out_buf[--i] = '0'; 60 | written++; 61 | } 62 | 63 | return out_buf + i; 64 | } 65 | 66 | struct cpuid_result { 67 | unsigned int eax, ebx, ecx, edx; 68 | }; 69 | 70 | void get_cpuid(int func, int sub_func, struct cpuid_result *out); 71 | asm( 72 | ".intel_syntax noprefix\n" 73 | 74 | ".global get_cpuid\n" 75 | "get_cpuid:\n" 76 | "push rbx\n" 77 | "mov eax, edi\n" 78 | "mov ecx, esi\n" 79 | "mov r8, rdx\n" 80 | "cpuid\n" 81 | "mov dword ptr [r8], eax\n" 82 | "mov dword ptr [r8 + 4], ebx\n" 83 | "mov dword ptr [r8 + 8], ecx\n" 84 | "mov dword ptr [r8 + 12], edx\n" 85 | "pop rbx\n" 86 | "ret\n" 87 | 88 | ".att_syntax\n" 89 | ); 90 | 91 | int main(int argc, char **argv, char **envp) { 92 | #define BUF_SIZE 128 93 | char buf[255]; 94 | static const struct { 95 | unsigned int func, sub_func; 96 | } cpuid_funcs[] = { 97 | {0x0, 0}, 98 | {0x1, 0}, 99 | {0x2, 0}, 100 | {0x3, 0}, 101 | {0x4, 0}, 102 | {0x4, 1}, 103 | {0x4, 2}, 104 | {0x4, 3}, 105 | {0x4, 4}, 106 | {0x5, 0}, 107 | {0x6, 0}, 108 | {0x7, 0}, 109 | {0x7, 1}, 110 | {0x7, 2}, 111 | {0x7, 3}, 112 | {0x7, 4}, 113 | {0x9, 0}, 114 | {0xA, 0}, 115 | {0xB, 0}, 116 | {0xD, 0}, 117 | {0xD, 1}, 118 | {0xD, 2}, 119 | {0xD, 3}, 120 | {0xD, 4}, 121 | {0xF, 0}, 122 | {0xF, 1}, 123 | {0x10, 0}, 124 | {0x10, 1}, 125 | {0x10, 2}, 126 | {0x12, 0}, 127 | {0x12, 1}, 128 | {0x12, 2}, 129 | {0x14, 0}, 130 | {0x14, 1}, 131 | {0x15, 0}, 132 | {0x16, 0}, 133 | {0x17, 0}, 134 | {0x17, 1}, 135 | {0x17, 2}, 136 | {0x17, 3}, 137 | {0x80000000, 0}, 138 | {0x80000001, 0}, 139 | {0x80000002, 0}, 140 | {0x80000003, 0}, 141 | {0x80000004, 0}, 142 | {0x80000005, 0}, 143 | {0x80000006, 0}, 144 | {0x80000007, 0}, 145 | {0x80000008, 0}, 146 | }; 147 | 148 | for (int i = 0; i < ARRAY_SIZE(cpuid_funcs); i++) { 149 | struct cpuid_result result; 150 | get_cpuid(cpuid_funcs[i].func, cpuid_funcs[i].sub_func, &result); 151 | 152 | print("CPUID("); 153 | print(itoa_hex(cpuid_funcs[i].func, buf, BUF_SIZE)); 154 | print(", "); 155 | print(itoa_hex(cpuid_funcs[i].sub_func, buf, BUF_SIZE)); 156 | print(")\n"); 157 | 158 | print("eax="); 159 | print(itoa_hex(result.eax, buf, BUF_SIZE)); 160 | print("\n"); 161 | 162 | print("ebx="); 163 | print(itoa_hex(result.ebx, buf, BUF_SIZE)); 164 | print("\n"); 165 | 166 | print("ecx="); 167 | print(itoa_hex(result.ecx, buf, BUF_SIZE)); 168 | print("\n"); 169 | 170 | print("edx="); 171 | print(itoa_hex(result.edx, buf, BUF_SIZE)); 172 | print("\n\n"); 173 | } 174 | 175 | return 0; 176 | } 177 | -------------------------------------------------------------------------------- /test/c/hello.fs.c: -------------------------------------------------------------------------------- 1 | #include "syscall.h" 2 | #include "start.h" 3 | 4 | unsigned long strlen(const char *str) { 5 | unsigned long ret = 0; 6 | while (*(str++)) 7 | ++ret; 8 | return ret; 9 | } 10 | 11 | void print(const char *str) { 12 | __syscall3(1 /* sys_write */, 1 /* stdout */, (long)str, strlen(str)); 13 | } 14 | 15 | int main(int argc, char **argv, char **envp) { 16 | print("Hello from C!\n"); 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /test/c/print_args.fs.c: -------------------------------------------------------------------------------- 1 | #include "syscall.h" 2 | #include "start.h" 3 | 4 | unsigned long strlen(const char *str) { 5 | unsigned long ret = 0; 6 | while (*(str++)) 7 | ++ret; 8 | return ret; 9 | } 10 | 11 | void print(const char *str) { 12 | __syscall3(1 /* sys_write */, 1 /* stdout */, (long)str, strlen(str)); 13 | } 14 | 15 | char *itoa(int val, char *out_buf, unsigned long out_buf_len) { 16 | out_buf[out_buf_len-1] = '\0'; 17 | unsigned long i = out_buf_len - 1; 18 | while (i-- > 0) { 19 | out_buf[i] = '0' + (val % 10); 20 | val /= 10; 21 | if (!val) 22 | break; 23 | } 24 | return out_buf + i; 25 | } 26 | 27 | int main(int argc, char **argv, char **envp) { 28 | char buf[255]; 29 | print("argc="); 30 | print(itoa(argc, buf, sizeof(buf))); 31 | print("\n"); 32 | 33 | print("-------\n"); 34 | print("argv:\n"); 35 | while (*argv) { 36 | print(*(argv++)); 37 | print("\n"); 38 | } 39 | print("-------\n"); 40 | 41 | print("-------\n"); 42 | print("envp:\n"); 43 | while (*envp) { 44 | print(*(envp++)); 45 | print("\n"); 46 | } 47 | print("-------\n"); 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /test/c/start.h: -------------------------------------------------------------------------------- 1 | // [[noreturn]] _start(void) { exit(main(argc, argv, envp)); } 2 | asm( 3 | ".intel_syntax noprefix\n" 4 | 5 | ".global _start\n" 6 | "_start:\n" 7 | // Load argc, argv, envp 8 | "mov rdi, [rsp]\n" 9 | "lea rsi, [rsp + 8]\n" 10 | "lea rdx, [rsp+rdi*8+16]\n" 11 | 12 | // Call main 13 | "call main\n" 14 | 15 | // Call exit() with main's result 16 | "mov rdi, rax\n" 17 | "mov rax, 60\n" // SYS_exit 18 | "syscall\n" 19 | 20 | ".att_syntax\n" 21 | ); 22 | 23 | -------------------------------------------------------------------------------- /test/c/syscall.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /** 4 | * syscall helpers borrowed from musl libc: 5 | * https://git.musl-libc.org/cgit/musl/tree/arch/x86_64/syscall_arch.h 6 | * 7 | * Copyright © 2005-2020 Rich Felker, et al. 8 | * Permission is hereby granted, free of charge, to any person obtaining 9 | * a copy of this software and associated documentation files (the 10 | * "Software"), to deal in the Software without restriction, including 11 | * without limitation the rights to use, copy, modify, merge, publish, 12 | * distribute, sublicense, and/or sell copies of the Software, and to 13 | * permit persons to whom the Software is furnished to do so, subject to 14 | * the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be 17 | * included in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 23 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | */ 27 | static __inline long __syscall0(long n) 28 | { 29 | unsigned long ret; 30 | __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n) : "rcx", "r11", "memory"); 31 | return ret; 32 | } 33 | 34 | static __inline long __syscall1(long n, long a1) 35 | { 36 | unsigned long ret; 37 | __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1) : "rcx", "r11", "memory"); 38 | return ret; 39 | } 40 | 41 | static __inline long __syscall2(long n, long a1, long a2) 42 | { 43 | unsigned long ret; 44 | __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2) 45 | : "rcx", "r11", "memory"); 46 | return ret; 47 | } 48 | 49 | static __inline long __syscall3(long n, long a1, long a2, long a3) 50 | { 51 | unsigned long ret; 52 | __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), 53 | "d"(a3) : "rcx", "r11", "memory"); 54 | return ret; 55 | } 56 | 57 | static __inline long __syscall4(long n, long a1, long a2, long a3, long a4) 58 | { 59 | unsigned long ret; 60 | register long r10 __asm__("r10") = a4; 61 | __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), 62 | "d"(a3), "r"(r10): "rcx", "r11", "memory"); 63 | return ret; 64 | } 65 | 66 | static __inline long __syscall5(long n, long a1, long a2, long a3, long a4, long a5) 67 | { 68 | unsigned long ret; 69 | register long r10 __asm__("r10") = a4; 70 | register long r8 __asm__("r8") = a5; 71 | __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), 72 | "d"(a3), "r"(r10), "r"(r8) : "rcx", "r11", "memory"); 73 | return ret; 74 | } 75 | 76 | static __inline long __syscall6(long n, long a1, long a2, long a3, long a4, long a5, long a6) 77 | { 78 | unsigned long ret; 79 | register long r10 __asm__("r10") = a4; 80 | register long r8 __asm__("r8") = a5; 81 | register long r9 __asm__("r9") = a6; 82 | __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), 83 | "d"(a3), "r"(r10), "r"(r8), "r"(r9) : "rcx", "r11", "memory"); 84 | return ret; 85 | } 86 | 87 | -------------------------------------------------------------------------------- /test/runtests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import glob 5 | import re 6 | import subprocess 7 | import os 8 | import json 9 | 10 | COLOR_RESET = "\u001b[0m" 11 | COLOR_YELLOW = "\u001b[33m" 12 | COLOR_GREEN = "\u001b[32m" 13 | COLOR_RED = "\u001b[31m" 14 | CARGS = {'reset': COLOR_RESET, 'yellow': COLOR_YELLOW, 'green': COLOR_GREEN, 'red': COLOR_RED} 15 | 16 | def run_test(retrec, test): 17 | env_path = os.path.dirname(test) + "/." + os.path.basename(test) + ".env" 18 | if os.path.isfile(env_path): 19 | with open(env_path) as f: 20 | env = json.load(f) 21 | else: 22 | env = None 23 | 24 | failures = [] 25 | passes = [] 26 | 27 | result = subprocess.run([retrec, test], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, env=env) 28 | try: 29 | output = result.stdout.decode("UTF-8") 30 | except Exception as e: 31 | failures.append("Unable to parse program output: {}".format(e)) 32 | return (failures, passes) 33 | 34 | print(output.rstrip()) 35 | 36 | failures += re.findall("(FAIL:.*)", output) 37 | passes += re.findall("PASS:.*", output) 38 | 39 | if result.returncode != 0: 40 | failures.append("Process exited with code {}".format(result.returncode)) 41 | return (failures, passes) 42 | 43 | expected_results_path = os.path.dirname(test) + "/." + os.path.basename(test) + ".expected" 44 | if os.path.isfile(expected_results_path): 45 | # Compare output to expected 46 | with open(expected_results_path, "r") as f: 47 | expected = f.read() 48 | 49 | if output != expected: 50 | failures.append("Output doesn't match {}".format(expected_results_path)) 51 | 52 | return (failures, passes) 53 | 54 | def main(): 55 | if len(sys.argv) != 2: 56 | print("Usage: {} ".format(sys.argv[0])) 57 | sys.exit(1) 58 | 59 | retrec = sys.argv[1] 60 | 61 | # Enumerate tests 62 | tests = glob.glob("**/*.bin") 63 | fails = [] 64 | total_passes = 0 65 | for test in tests: 66 | print("--- RUNNING {} ---".format(test)) 67 | (failures, passes) = run_test(retrec, test) 68 | if len(failures): 69 | fails += [(test, x) for x in failures] 70 | total_passes += len(passes) 71 | print("--- {}: {} failures, {} passes ---".format(test, len(failures), len(passes))) 72 | 73 | print("==============================") 74 | print("TOTAL: {red}{} failures{reset}, {green}{} passes{reset}".format(len(fails), total_passes, **CARGS)) 75 | print("==============================") 76 | 77 | for fail in fails: 78 | print("{}: {red}{}{reset}".format(fail[0], fail[1], **CARGS)) 79 | 80 | 81 | if __name__ == "__main__": 82 | main() 83 | --------------------------------------------------------------------------------