├── .gitmodules
├── CMakeLists.txt
├── LICENSE.md
├── README.md
├── external
└── external.cmake
├── src
├── CMakeLists.txt
├── allocators.cpp
├── allocators.h
├── arch
│ ├── arch.h
│ ├── definitions.h
│ ├── generic
│ │ ├── codegen_generic.cpp
│ │ ├── codegen_generic.h
│ │ ├── runtime_context_generic.cpp
│ │ └── runtime_context_generic.h
│ ├── ppc64le
│ │ ├── codegen
│ │ │ ├── abi.h
│ │ │ ├── assembler.cpp
│ │ │ ├── assembler.h
│ │ │ ├── codegen_fixed_helpers.cpp
│ │ │ ├── codegen_ppc64le.cpp
│ │ │ ├── codegen_ppc64le.h
│ │ │ ├── codegen_ppc64le_internal.h
│ │ │ ├── codegen_target_x86_64.cpp
│ │ │ ├── codegen_types.cpp
│ │ │ ├── codegen_types.h
│ │ │ ├── register_allocator.cpp
│ │ │ └── register_allocator.h
│ │ ├── cpu_context_ppc64le.h
│ │ ├── llir
│ │ │ └── llir_registers_ppc64le.h
│ │ ├── runtime_context_ppc64le.cpp
│ │ ├── runtime_context_ppc64le.h
│ │ ├── syscalls.cpp
│ │ └── syscalls.h
│ ├── runtime_context_dispatcher.cpp
│ ├── runtime_context_dispatcher.h
│ ├── target_environment.h
│ └── x86_64
│ │ ├── cpu_context_x86_64.h
│ │ ├── llir
│ │ ├── llir_lifter_x86_64.cpp
│ │ ├── llir_lifter_x86_64.h
│ │ ├── llir_operands_x86_64.h
│ │ └── llir_registers_x86_64.h
│ │ ├── syscalls.h
│ │ ├── target_environment.cpp
│ │ └── target_environment.h
├── codegen.cpp
├── codegen.h
├── disassembler.cpp
├── disassembler.h
├── dynamic_recompiler.cpp
├── dynamic_recompiler.h
├── elf_loader.cpp
├── elf_loader.h
├── execution_context.cpp
├── execution_context.h
├── instruction_stream.h
├── llir.h
├── main.cpp
├── mapped_file.cpp
├── mapped_file.h
├── platform
│ ├── generic_syscalls.cpp
│ ├── generic_syscalls.h
│ ├── syscall_emulator.cpp
│ ├── syscall_emulator.h
│ └── syscall_types.h
├── process_memory_map.cpp
├── process_memory_map.h
├── util
│ ├── magic.h
│ ├── staticvector.h
│ ├── util.cpp
│ └── util.h
├── virtual_address_mapper.cpp
└── virtual_address_mapper.h
└── test
├── Makefile
├── asm
├── .addressing_modes.bin.expected
├── .hello_sse.bin.expected
├── .loadstore.bin.expected
├── .pushpop.bin.expected
├── .stos.bin.expected
├── addressing_modes.S
├── callret.S
├── gentest.py
├── hello.S
├── hello.asm
├── hello_sse.S
├── jump.S
├── jump.asm
├── jump_sf.S
├── jump_zf.S
├── loadstore.S
├── mov.S
├── pushpop.S
└── stos.S
├── c
├── .hello.fs.bin.expected
├── .print_args.fs.bin.env
├── .print_args.fs.bin.expected
├── auxval.fs.c
├── cpuid.fs.c
├── hello.fs.c
├── print_args.fs.c
├── start.h
└── syscall.h
└── runtests.py
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "external/capstone"]
2 | path = external/capstone
3 | url = https://github.com/aquynh/capstone
4 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 | project(retrec)
3 |
4 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
5 |
6 | EXECUTE_PROCESS(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE HOST_ARCH)
7 | message(STATUS "Detected host arch: ${HOST_ARCH}")
8 |
9 | if(NOT CMAKE_BUILD_TYPE)
10 | set(CMAKE_BUILD_TYPE Release)
11 | endif()
12 |
13 | set(RETREC_DEBUG_BUILD 0)
14 | if(CMAKE_BUILD_TYPE STREQUAL "Debug")
15 | set(RETREC_DEBUG_BUILD 1)
16 | endif()
17 |
18 | # Compile external dependencies
19 | include(external/external.cmake)
20 |
21 | #
22 | # User configurable options
23 | #
24 | set(MIN_LOG_LEVEL AUTO CACHE STRING "Set minimum log level {AUTO, 0, 1, 2, 3}")
25 | set_property(CACHE MIN_LOG_LEVEL PROPERTY STRINGS "AUTO" "0" "1" "2" "3")
26 | if(${MIN_LOG_LEVEL} MATCHES "AUTO")
27 | if (CMAKE_BUILD_TYPE STREQUAL "Debug")
28 | # Set min log level to lowest for debug builds
29 | set(MIN_LOG_LEVEL 0)
30 | else()
31 | # Otherwise set it to WARN
32 | set(MIN_LOG_LEVEL 2)
33 | endif()
34 | endif()
35 | message(STATUS "Minimum log level is: ${MIN_LOG_LEVEL}") # Set with 'cmake -DMIN_LOG_LEVEL=<0,1,2,3>'
36 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DRETREC_MINIMUM_LOG_LEVEL=${MIN_LOG_LEVEL}")
37 |
38 | #
39 | # Compiler flags
40 | #
41 | set(CMAKE_CXX_STANDARD 17)
42 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIE -fno-exceptions -Wall -Wimplicit-fallthrough -Wextra -Wpessimizing-move -Wno-psabi")
43 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DRETREC_DEBUG_BUILD=${RETREC_DEBUG_BUILD}")
44 | set(CMAKE_SOURCE_DIR "src")
45 | set(CMAKE_CXX_FLAGS_DEBUG "")
46 | set(CMAKE_CXX_FLAGS_RELEASE "")
47 |
48 | message(STATUS "Build type is: ${CMAKE_BUILD_TYPE}")
49 | if(CMAKE_BUILD_TYPE STREQUAL "Debug")
50 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Og -g -Werror=switch")
51 | elseif(CMAKE_BUILD_TYPE STREQUAL "ReleaseDebug")
52 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -mcpu=power9 -g -DNDEBUG -Wno-unused-variable -Wno-unused-parameter")
53 | elseif(CMAKE_BUILD_TYPE STREQUAL "Release")
54 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -Wno-unused-variable -Wno-unused-parameter")
55 | else()
56 | message(FATAL_ERROR "Unknown CMAKE_BUILD_TYPE: Choices are 'Release', 'ReleaseDebug', 'Debug'")
57 | endif()
58 |
59 | # Compiler-specific flags
60 | if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
61 | if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10.0)
62 | # GCC >=10 has non-broken -Wconversion, so we can enable it
63 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion -Wno-sign-conversion")
64 | endif()
65 | elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
66 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion -Wno-sign-conversion")
67 | endif()
68 |
69 | set(CODEGEN_PPC64LE 0)
70 | set(CODEGEN_GENERIC 1)
71 | set(CODEGEN_BACKENDS "generic")
72 |
73 | # Architecture-specific flags
74 | if(${HOST_ARCH} MATCHES "ppc64")
75 | set(CODEGEN_PPC64LE 1)
76 | set(CODEGEN_BACKENDS "${CODEGEN_BACKENDS}, ppc64le")
77 | else()
78 | message(STATUS "No codegen implemented for your architecture - falling back to generic interpreter!")
79 | endif()
80 |
81 | message(STATUS "Codegen backends enabled: ${CODEGEN_BACKENDS}")
82 |
83 | # Define detected architecture/codegen flags
84 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
85 | -DRETREC_CODEGEN_GENERIC=${CODEGEN_GENERIC} \
86 | -DRETREC_CODEGEN_PPC64LE=${CODEGEN_PPC64LE}"
87 | )
88 |
89 | #
90 | # Dependencies
91 | #
92 | find_package(PkgConfig REQUIRED)
93 | pkg_check_modules(LIBELF REQUIRED IMPORTED_TARGET libelf)
94 |
95 | # Main source directory
96 | add_subdirectory(src)
97 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | ### GNU LESSER GENERAL PUBLIC LICENSE
2 |
3 | Version 3, 29 June 2007
4 |
5 | Copyright (C) 2007 Free Software Foundation, Inc.
6 |
7 |
8 | Everyone is permitted to copy and distribute verbatim copies of this
9 | license document, but changing it is not allowed.
10 |
11 | This version of the GNU Lesser General Public License incorporates the
12 | terms and conditions of version 3 of the GNU General Public License,
13 | supplemented by the additional permissions listed below.
14 |
15 | #### 0. Additional Definitions.
16 |
17 | As used herein, "this License" refers to version 3 of the GNU Lesser
18 | General Public License, and the "GNU GPL" refers to version 3 of the
19 | GNU General Public License.
20 |
21 | "The Library" refers to a covered work governed by this License, other
22 | than an Application or a Combined Work as defined below.
23 |
24 | An "Application" is any work that makes use of an interface provided
25 | by the Library, but which is not otherwise based on the Library.
26 | Defining a subclass of a class defined by the Library is deemed a mode
27 | of using an interface provided by the Library.
28 |
29 | A "Combined Work" is a work produced by combining or linking an
30 | Application with the Library. The particular version of the Library
31 | with which the Combined Work was made is also called the "Linked
32 | Version".
33 |
34 | The "Minimal Corresponding Source" for a Combined Work means the
35 | Corresponding Source for the Combined Work, excluding any source code
36 | for portions of the Combined Work that, considered in isolation, are
37 | based on the Application, and not on the Linked Version.
38 |
39 | The "Corresponding Application Code" for a Combined Work means the
40 | object code and/or source code for the Application, including any data
41 | and utility programs needed for reproducing the Combined Work from the
42 | Application, but excluding the System Libraries of the Combined Work.
43 |
44 | #### 1. Exception to Section 3 of the GNU GPL.
45 |
46 | You may convey a covered work under sections 3 and 4 of this License
47 | without being bound by section 3 of the GNU GPL.
48 |
49 | #### 2. Conveying Modified Versions.
50 |
51 | If you modify a copy of the Library, and, in your modifications, a
52 | facility refers to a function or data to be supplied by an Application
53 | that uses the facility (other than as an argument passed when the
54 | facility is invoked), then you may convey a copy of the modified
55 | version:
56 |
57 | - a) under this License, provided that you make a good faith effort
58 | to ensure that, in the event an Application does not supply the
59 | function or data, the facility still operates, and performs
60 | whatever part of its purpose remains meaningful, or
61 | - b) under the GNU GPL, with none of the additional permissions of
62 | this License applicable to that copy.
63 |
64 | #### 3. Object Code Incorporating Material from Library Header Files.
65 |
66 | The object code form of an Application may incorporate material from a
67 | header file that is part of the Library. You may convey such object
68 | code under terms of your choice, provided that, if the incorporated
69 | material is not limited to numerical parameters, data structure
70 | layouts and accessors, or small macros, inline functions and templates
71 | (ten or fewer lines in length), you do both of the following:
72 |
73 | - a) Give prominent notice with each copy of the object code that
74 | the Library is used in it and that the Library and its use are
75 | covered by this License.
76 | - b) Accompany the object code with a copy of the GNU GPL and this
77 | license document.
78 |
79 | #### 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that, taken
82 | together, effectively do not restrict modification of the portions of
83 | the Library contained in the Combined Work and reverse engineering for
84 | debugging such modifications, if you also do each of the following:
85 |
86 | - a) Give prominent notice with each copy of the Combined Work that
87 | the Library is used in it and that the Library and its use are
88 | covered by this License.
89 | - b) Accompany the Combined Work with a copy of the GNU GPL and this
90 | license document.
91 | - c) For a Combined Work that displays copyright notices during
92 | execution, include the copyright notice for the Library among
93 | these notices, as well as a reference directing the user to the
94 | copies of the GNU GPL and this license document.
95 | - d) Do one of the following:
96 | - 0) Convey the Minimal Corresponding Source under the terms of
97 | this License, and the Corresponding Application Code in a form
98 | suitable for, and under terms that permit, the user to
99 | recombine or relink the Application with a modified version of
100 | the Linked Version to produce a modified Combined Work, in the
101 | manner specified by section 6 of the GNU GPL for conveying
102 | Corresponding Source.
103 | - 1) Use a suitable shared library mechanism for linking with
104 | the Library. A suitable mechanism is one that (a) uses at run
105 | time a copy of the Library already present on the user's
106 | computer system, and (b) will operate properly with a modified
107 | version of the Library that is interface-compatible with the
108 | Linked Version.
109 | - e) Provide Installation Information, but only if you would
110 | otherwise be required to provide such information under section 6
111 | of the GNU GPL, and only to the extent that such information is
112 | necessary to install and execute a modified version of the
113 | Combined Work produced by recombining or relinking the Application
114 | with a modified version of the Linked Version. (If you use option
115 | 4d0, the Installation Information must accompany the Minimal
116 | Corresponding Source and Corresponding Application Code. If you
117 | use option 4d1, you must provide the Installation Information in
118 | the manner specified by section 6 of the GNU GPL for conveying
119 | Corresponding Source.)
120 |
121 | #### 5. Combined Libraries.
122 |
123 | You may place library facilities that are a work based on the Library
124 | side by side in a single library together with other library
125 | facilities that are not Applications and are not covered by this
126 | License, and convey such a combined library under terms of your
127 | choice, if you do both of the following:
128 |
129 | - a) Accompany the combined library with a copy of the same work
130 | based on the Library, uncombined with any other library
131 | facilities, conveyed under the terms of this License.
132 | - b) Give prominent notice with the combined library that part of it
133 | is a work based on the Library, and explaining where to find the
134 | accompanying uncombined form of the same work.
135 |
136 | #### 6. Revised Versions of the GNU Lesser General Public License.
137 |
138 | The Free Software Foundation may publish revised and/or new versions
139 | of the GNU Lesser General Public License from time to time. Such new
140 | versions will be similar in spirit to the present version, but may
141 | differ in detail to address new problems or concerns.
142 |
143 | Each version is given a distinguishing version number. If the Library
144 | as you received it specifies that a certain numbered version of the
145 | GNU Lesser General Public License "or any later version" applies to
146 | it, you have the option of following the terms and conditions either
147 | of that published version or of any later version published by the
148 | Free Software Foundation. If the Library as you received it does not
149 | specify a version number of the GNU Lesser General Public License, you
150 | may choose any version of the GNU Lesser General Public License ever
151 | published by the Free Software Foundation.
152 |
153 | If the Library as you received it specifies that a proxy can decide
154 | whether future versions of the GNU Lesser General Public License shall
155 | apply, that proxy's public statement of acceptance of any version is
156 | permanent authorization for you to choose that version for the
157 | Library.
158 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | retrec
2 | ======
3 |
4 | retrec is a retargetable dynamic recompiler for Linux userspace binaries that
5 | currently targets x86\_64 binaries on ppc64le (ISA 3.0B+) hosts. Support for other
6 | host ISAs (riscv64, aarch64) is planned.
7 |
8 | Unlike other retargetable userspace binary translators like [qemu-user](https://www.qemu.org/docs/master/user/main.html),
9 | retrec trades simplicity and portability for performance. While QEMU's code generator (TCG)
10 | is designed to be easy to port, retrec's code generation is designed to enable the most efficient
11 | translation possible, at the cost of requiring more complex architecture-specific backends.
12 |
13 | An example of the design differences between the two can be seen in the intermediate representations
14 | (IR) used by each. All QEMU target ISAs are lowered to a lowest-common-denominator IR (TCGops),
15 | whereas retrec's IR (llir) aims to provide a lossless 1:1 representation for any source ISA
16 | instruction. This means that while llir codegen backends must necessarily be more complex
17 | than TCG backends, they are also potentially able to emit more optimized code since no
18 | lossy conversions to/from representations have occurred.
19 |
20 | In the future, retrec also aims to provide support for invoking host library routines from translated
21 | processes, much like the [box86](https://github.com/ptitSeb/box86) project. Unlike box86, though,
22 | retrec only targets 64-bit ISAs and has a retargetable code generator instead of an ARM-only one.
23 |
24 | Status
25 | ------
26 | retrec is under heavy development and currently only implements a small fraction of the X86\_64
27 | ISA and only supports ppc64le (ISA 3.0B+) hosts. Currently only very basic statically-linked C programs run.
28 |
29 | Roadmap:
30 |
31 | - [x] Basic ELF loader
32 | - [x] Support for basic x86\_64 instructions (integer ALU, branch, etc.) (in progress)
33 | - [x] ppc64le codegen backend (in progress)
34 | - [ ] Support for more x86\_64 instructions (FPU, misc.) (in progress)
35 | - [ ] Support for more syscalls
36 | - [ ] Thread support
37 | - [ ] Dynamically linked binary support
38 | - [ ] SIMD (SSE, AVX)
39 | - [ ] aarch64 codegen backend
40 | - [ ] riscv64 codegen backend
41 | - [ ] JIT cache
42 | - [ ] Optimization passes
43 | - [ ] Support for calling into host libraries
44 | - [ ] Potential LLVM integration for translating hot routines?
45 | - [ ] ???
46 |
47 | If you would like to contribute to retrec's development, don't hesitate reach out!
48 |
49 | Building
50 | --------
51 | retrec is still in very early stages, so building is currently only useful for those interested
52 | in contributing to its development. The only dependencies are cmake, libelf, and a C++17 compiler.
53 |
54 | ```
55 | $ mkdir build && cd build
56 | $ cmake .. -DCMAKE_BUILD_TYPE=Debug
57 | $ make
58 | ```
59 |
60 | Afterwards, you can run the test suite. Note that this requires an `x86_64-unknown-linux-gnu` toolchain.
61 | ```
62 | $ cd ../test
63 | $ make
64 | $ ./runtests.py ../build/src/retrec
65 | ```
66 |
67 | License
68 | -------
69 | retrec is licensed under the GNU Lesser General Public License (LGPL), version 3 or later. See LICENSE.md.
70 |
--------------------------------------------------------------------------------
/external/external.cmake:
--------------------------------------------------------------------------------
1 | set(ROOT ${PROJECT_SOURCE_DIR})
2 |
3 | # Clone capstone if necesssry
4 | if(NOT EXISTS "${ROOT}/external/capstone/CMakeLists.txt")
5 | message(STATUS "Cloning capstone git")
6 | execute_process(COMMAND git submodule update --init -- external/capstone WORKING_DIRECTORY "${ROOT}")
7 | endif()
8 |
9 | # For Release/ReleaseDebug, enable DIET mode for a large performance boost
10 | if(CMAKE_BUILD_TYPE MATCHES "Release")
11 | set(CAPSTONE_BUILD_DIET ON CACHE BOOL "")
12 | else()
13 | set(CAPSTONE_BUILD_DIET OFF CACHE BOOL "")
14 | endif()
15 |
16 | set(CAPSTONE_BUILD_STATIC ON CACHE BOOL "")
17 | set(CAPSTONE_BUILD_SHARED OFF CACHE BOOL "")
18 | set(CAPSTONE_INSTALL OFF CACHE BOOL "")
19 | set(CAPSTONE_ARCHITECTURE_DEFAULT OFF CACHE BOOL "")
20 | set(CAPSTONE_X86_SUPPORT ON CACHE BOOL "")
21 |
22 | set(CMAKE_BUILD_TYPE_OLD ${CMAKE_BUILD_TYPE})
23 | set(CMAKE_BUILD_TYPE Release)
24 | add_subdirectory(external/capstone "${ROOT}/capstone" EXCLUDE_FROM_ALL)
25 | set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE_OLD})
26 |
27 | set(CapstoneGit_INCLUDE "${ROOT}/external/capstone/include")
28 | set(CapstoneGit_LIBS capstone-static)
29 |
--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Cleaner file name macro for use in logging
2 | # https://stackoverflow.com/a/16658858
3 | string(LENGTH "${CMAKE_SOURCE_DIR}/" SOURCE_PATH_SIZE)
4 | add_definitions("-DSOURCE_PATH_SIZE=${SOURCE_PATH_SIZE}")
5 |
6 | #
7 | # Common source files
8 | #
9 | set(RETREC_SOURCES
10 | allocators.cpp
11 | allocators.h
12 | arch/arch.h
13 | arch/definitions.h
14 | arch/generic/codegen_generic.cpp
15 | arch/generic/codegen_generic.h
16 | arch/generic/runtime_context_generic.cpp
17 | arch/generic/runtime_context_generic.h
18 | arch/ppc64le/codegen/abi.h
19 | arch/ppc64le/codegen/assembler.h
20 | arch/ppc64le/codegen/codegen_ppc64le.h
21 | arch/ppc64le/codegen/codegen_ppc64le_internal.h
22 | arch/ppc64le/codegen/codegen_types.h
23 | arch/ppc64le/codegen/register_allocator.h
24 | arch/ppc64le/cpu_context_ppc64le.h
25 | arch/ppc64le/llir/llir_registers_ppc64le.h
26 | arch/ppc64le/runtime_context_ppc64le.h
27 | arch/ppc64le/syscalls.h
28 | arch/runtime_context_dispatcher.cpp
29 | arch/runtime_context_dispatcher.h
30 | arch/target_environment.h
31 | arch/x86_64/cpu_context_x86_64.h
32 | arch/x86_64/llir/llir_lifter_x86_64.cpp
33 | arch/x86_64/llir/llir_lifter_x86_64.h
34 | arch/x86_64/llir/llir_operands_x86_64.h
35 | arch/x86_64/llir/llir_registers_x86_64.h
36 | arch/x86_64/syscalls.h
37 | arch/x86_64/target_environment.cpp
38 | arch/x86_64/target_environment.h
39 | codegen.cpp
40 | codegen.h
41 | disassembler.cpp
42 | disassembler.h
43 | dynamic_recompiler.cpp
44 | dynamic_recompiler.h
45 | elf_loader.cpp
46 | elf_loader.h
47 | execution_context.cpp
48 | execution_context.h
49 | instruction_stream.h
50 | llir.h
51 | main.cpp
52 | mapped_file.cpp
53 | mapped_file.h
54 | platform/generic_syscalls.cpp
55 | platform/generic_syscalls.h
56 | platform/syscall_emulator.cpp
57 | platform/syscall_emulator.h
58 | platform/syscall_types.h
59 | process_memory_map.cpp
60 | process_memory_map.h
61 | util/magic.h
62 | util/staticvector.h
63 | util/util.cpp
64 | util/util.h
65 | virtual_address_mapper.cpp
66 | virtual_address_mapper.h
67 | )
68 |
69 | #
70 | # Architecture-dependant source files
71 | #
72 | if(HOST_ARCH MATCHES "ppc64")
73 | set(RETREC_SOURCES ${RETREC_SOURCES}
74 | arch/ppc64le/codegen/assembler.cpp
75 | arch/ppc64le/codegen/codegen_fixed_helpers.cpp
76 | arch/ppc64le/codegen/codegen_ppc64le.cpp
77 | arch/ppc64le/codegen/codegen_target_x86_64.cpp
78 | arch/ppc64le/codegen/codegen_types.cpp
79 | arch/ppc64le/codegen/register_allocator.cpp
80 | arch/ppc64le/runtime_context_ppc64le.cpp
81 | arch/ppc64le/syscalls.cpp
82 | )
83 | endif()
84 |
85 | add_executable(retrec ${RETREC_SOURCES})
86 |
87 | target_link_libraries(retrec PUBLIC ${LIBELF_LIBRARIES} ${CapstoneGit_LIBS})
88 | target_include_directories(retrec PUBLIC ${LIBELF_INCLUDE_DIRS} ${CapstoneGit_INCLUDE})
89 |
90 | target_include_directories(retrec PRIVATE .)
91 |
92 |
--------------------------------------------------------------------------------
/src/allocators.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #include
21 |
22 | using namespace retrec;
23 |
24 | void simple_placement_allocator::init(void *region_, size_t region_size_) {
25 | region = region_;
26 | region_size = region_size_;
27 | }
28 |
29 | void *simple_placement_allocator::allocate(size_t size) {
30 | if (size > region_size - used) {
31 | return nullptr;
32 | } else {
33 | void *start = (void *)((uint8_t *)region + used);
34 | used += size;
35 | return start;
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/allocators.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 |
30 | namespace retrec {
31 |
32 | class simple_placement_allocator {
33 | void *region;
34 | size_t region_size;
35 | size_t used { 0 };
36 |
37 | public:
38 | void init(void *region_, size_t region_size_);
39 |
40 | void *allocate(size_t size);
41 | void free([[maybe_unused]] void *buffer) { /* Placement allocators can't free */ }
42 | };
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/src/arch/arch.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | /**
21 | * This file defines architecture-specific definitions used for compile-time feature selection.
22 | */
23 |
24 | #pragma once
25 |
26 | #define HOST_ARCH_AARCH64 0
27 | #define HOST_ARCH_PPC64LE 0
28 | #define HOST_ARCH_X86_64 0
29 |
30 | //
31 | // Arch detection and dependant inclusion
32 | //
33 | #if defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && defined(_CALL_ELF) && (_CALL_ELF == 2)
34 |
35 | #undef HOST_ARCH_PPC64LE
36 | #define HOST_ARCH_PPC64LE 1
37 |
38 | #include
39 |
40 | #elif defined(__x86_64__)
41 |
42 | #undef HOST_ARCH_X86_64
43 | #define HOST_ARCH_X86_64 1
44 |
45 | #elif defined(__aarch64__)
46 |
47 | #undef HOST_ARCH_AARCH64
48 | #define HOST_ARCH_AARCH64 1
49 |
50 | #else
51 | #error "Unsupported host architecture!"
52 | #endif
53 |
--------------------------------------------------------------------------------
/src/arch/definitions.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | /**
21 | * This file contains common definitions for architecture-specific code
22 | */
23 |
24 | #pragma once
25 |
26 | #include
27 |
28 | // Entry/exit function pointers emitted by arch-specific code
29 | extern void (*arch_enter_translated_code_ptr)(void *runtime_context);
30 | extern void (*arch_leave_translated_code_ptr)();
31 |
32 | // 128 bit register type
33 | struct reg128 {
34 | union {
35 | struct {
36 | int64_t lo, hi;
37 | } le;
38 |
39 | struct {
40 | int64_t hi, lo;
41 | } be;
42 | };
43 | };
44 |
45 |
--------------------------------------------------------------------------------
/src/arch/generic/codegen_generic.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #include
21 |
22 | using namespace retrec;
23 |
24 | status_code codegen_generic::init() {
25 | TODO();
26 | }
27 |
28 | status_code codegen_generic::translate(const lifted_llir_block &insns, std::optional &out) {
29 | (void)insns;
30 | (void)out;
31 | TODO();
32 | }
33 |
34 | uint64_t codegen_generic::get_last_untranslated_access(void *rctx) {
35 | (void)rctx;
36 | TODO();
37 | }
38 |
39 | status_code codegen_generic::patch_translated_access(void *rctx, uint64_t resolved_haddr) {
40 | (void)rctx;
41 | (void)resolved_haddr;
42 | TODO();
43 | }
44 |
45 |
--------------------------------------------------------------------------------
/src/arch/generic/codegen_generic.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | /**
21 | * Class definition for generic (interpreter) codegen backend.
22 | */
23 |
24 | #include
25 |
26 | namespace retrec {
27 |
28 | class codegen_generic : public codegen {
29 | public:
30 | status_code init();
31 | status_code translate(const lifted_llir_block &insns, std::optional &out);
32 | uint64_t get_last_untranslated_access(void *rctx);
33 | status_code patch_translated_access(void *rctx, uint64_t resolved_haddr);
34 | };
35 |
36 | static inline std::unique_ptr make_codegen_generic(Architecture, execution_context &, virtual_address_mapper *) {
37 | TODO();
38 | }
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/src/arch/generic/runtime_context_generic.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #include
21 |
22 | using namespace retrec;
23 |
24 | status_code runtime_context_generic::init(Architecture, void *, void *,
25 | virtual_address_mapper *, syscall_emulator *) {
26 | TODO();
27 | }
28 |
29 | status_code runtime_context_generic::execute() {
30 | TODO();
31 | }
32 |
--------------------------------------------------------------------------------
/src/arch/generic/runtime_context_generic.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | /**
21 | * Definition for the generic interpreter backend's runtime context
22 | */
23 |
24 | #include
25 |
26 | namespace retrec {
27 |
28 | // Forward
29 | class syscall_emulator;
30 | class virtual_address_mapper;
31 |
32 | struct runtime_context_generic {
33 | runtime_context_generic() {}
34 | status_code init(Architecture target_arch, void *entry, void *stack, virtual_address_mapper *vam_,
35 | syscall_emulator *syscall_emu_);
36 | status_code execute();
37 | };
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/src/arch/ppc64le/codegen/codegen_ppc64le_internal.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | /**
21 | * This file define internal macros/helpers for ppc64le codegen routines
22 | */
23 |
24 | #pragma once
25 |
26 | /**
27 | * Helper macros for using relocations with local labels
28 | */
29 | #define RELOC_DECLARE_LABEL(name) \
30 | do ctx.stream->add_aux(true, relocation{1, relocation::declare_label{name}}); while (0)
31 | #define RELOC_DECLARE_LABEL_AFTER(name) \
32 | do ctx.stream->add_aux(true, relocation{1, relocation::declare_label_after{name}}); while (0)
33 | #define RELOC_FIXUP_LABEL(name, pos) \
34 | do ctx.stream->add_aux(true, relocation{1, relocation::imm_rel_label_fixup{name, LabelPosition::pos}}); while (0)
35 |
36 | // x-macro for all targets supported by the ppc64le backend
37 | #define PPC64LE_ENUMERATE_SUPPORTED_TARGET_TRAITS(x, ...) \
38 | x(TargetTraitsX86_64, __VA_ARGS__)
39 |
40 | // macro to instantiate codegen class for all traits
41 | #define PPC64LE_INSTANTIATE_CODEGEN_FOR_ALL_TRAITS() \
42 | PPC64LE_ENUMERATE_SUPPORTED_TARGET_TRAITS(PPC64LE_INSTANTIATE_CODEGEN_FOR_TRAITS, _)
43 | #define PPC64LE_INSTANTIATE_CODEGEN_FOR_TRAITS(x, ...) \
44 | template class retrec::codegen_ppc64le;
45 |
46 | // macro to instantiate a single method for all traits
47 | #define PPC64LE_INSTANTIATE_CODGEN_MEMBER_(x, ret, name, ...) \
48 | template ret codegen_ppc64le::name(__VA_ARGS__);
49 | #define PPC64LE_INSTANTIATE_CODEGEN_MEMBER(ret, name, ...) \
50 | PPC64LE_ENUMERATE_SUPPORTED_TARGET_TRAITS(PPC64LE_INSTANTIATE_CODGEN_MEMBER_, ret, name, __VA_ARGS__)
51 |
--------------------------------------------------------------------------------
/src/arch/ppc64le/codegen/codegen_types.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #include
21 |
22 | const char *retrec::ppc64le::operation_names[] = {
23 | #define OPERATION_NAME(op, ...) "Operation::" #op,
24 | PPC64LE_ENUMERATE_OPERATIONS(OPERATION_NAME)
25 | #undef OPERATION_NAME
26 | };
27 |
28 |
--------------------------------------------------------------------------------
/src/arch/ppc64le/codegen/codegen_types.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 |
28 | /**
29 | * This is an x-macro that defines each supported cpu operation, along with a corresponding
30 | * `assembler` method that emits it. In cases where multiple methods can emit an instruction,
31 | * only the primary one is defined here.
32 | *
33 | * This macro is used later down to define the entries of the `Operation` enum. For this, only
34 | * the first field is actually used.
35 | */
36 | #define PPC64LE_ENUMERATE_OPERATIONS(x) \
37 | x(ADD, &assembler::add) \
38 | x(ADDI, &assembler::addi) \
39 | x(ADDIS, &assembler::addis) \
40 | x(ADDPCIS, &assembler::addpcis) \
41 | x(AND, &assembler::_and) \
42 | x(ANDI_, &assembler::andi_) \
43 | x(B, assembler::b_type) \
44 | x(BC, assembler::bc_type) \
45 | x(BCCTR, assembler::bcctr_type) \
46 | x(BCLR, assembler::bclr_type) \
47 | x(CMP, &assembler::cmp) \
48 | x(CMPI, &assembler::cmpi) \
49 | x(CMPL, &assembler::cmpl) \
50 | x(CMPLI, &assembler::cmpli) \
51 | x(CRAND, &assembler::crand) \
52 | x(CRANDC, &assembler::crandc) \
53 | x(CREQV, &assembler::creqv) \
54 | x(CRNAND, &assembler::crnand) \
55 | x(CRNOR, &assembler::crnor) \
56 | x(CROR, &assembler::cror) \
57 | x(CRORC, &assembler::crorc) \
58 | x(CRXOR, &assembler::crxor) \
59 | x(DCBST, &assembler::dcbst) \
60 | x(EQV, &assembler::creqv) \
61 | x(EXTSB, &assembler::extsb) \
62 | x(EXTSH, &assembler::extsh) \
63 | x(EXTSW, &assembler::extsw) \
64 | x(ICBI, &assembler::icbi) \
65 | x(ISEL, &assembler::isel) \
66 | x(ISYNC, &assembler::isync) \
67 | x(LBZ, &assembler::lbz) \
68 | x(LBZU, &assembler::lbzu) \
69 | x(LBZUX, &assembler::lbzux) \
70 | x(LBZX, &assembler::lbzx) \
71 | x(LD, &assembler::ld) \
72 | x(LDU, &assembler::ldu) \
73 | x(LDUX, &assembler::ldux) \
74 | x(LDX, &assembler::ldx) \
75 | x(LFD, &assembler::lfd) \
76 | x(LHA, &assembler::lha) \
77 | x(LHAU, &assembler::lhau) \
78 | x(LHAUX, &assembler::lhaux) \
79 | x(LHAX, &assembler::lhax) \
80 | x(LHZ, &assembler::lhz) \
81 | x(LHZU, &assembler::lhzu) \
82 | x(LHZUX, &assembler::lhzux) \
83 | x(LHZX, &assembler::lhzx) \
84 | x(LWA, &assembler::lwa) \
85 | x(LWAUX, &assembler::lwaux) \
86 | x(LWAX, &assembler::lwax) \
87 | x(LWZ, &assembler::lwz) \
88 | x(LWZU, &assembler::lwzu) \
89 | x(LWZUX, &assembler::lwzux) \
90 | x(LWZX, &assembler::lwzx) \
91 | x(LXSIWZX, &assembler::lxsiwzx) \
92 | x(LXV, &assembler::lxv) \
93 | x(LXVX, &assembler::lxvx) \
94 | x(MCRF, &assembler::mcrf) \
95 | x(MCRXRX, &assembler::mcrxrx) \
96 | x(MFCR, &assembler::mfcr) \
97 | x(MFOCRF, &assembler::mfocrf) \
98 | x(MFSPR, &assembler::mfspr) \
99 | x(MFVSRD, &assembler::mfvsrd) \
100 | x(MFVSRLD, &assembler::mfvsrld) \
101 | x(MTCRF, &assembler::mtcrf) \
102 | x(MTOCRF, &assembler::mtocrf) \
103 | x(MTSPR, &assembler::mtspr) \
104 | x(MTVSRDD, &assembler::mtvsrdd) \
105 | x(MULHD, &assembler::mulhd) \
106 | x(MULHDU, &assembler::mulhdu) \
107 | x(MULHW, &assembler::mulhw) \
108 | x(MULLD, &assembler::mulld) \
109 | x(MULLW, &assembler::mullw) \
110 | x(NAND, &assembler::nand) \
111 | x(NEG, &assembler::neg) \
112 | x(OR, &assembler::_or) \
113 | x(ORI, &assembler::ori) \
114 | x(ORIS, &assembler::oris) \
115 | x(RLDCL, &assembler::rldcl) \
116 | x(RLDICL, &assembler::rldicl) \
117 | x(RLDICR, &assembler::rldicr) \
118 | x(RLDIMI, &assembler::rldimi) \
119 | x(RLWIMI, &assembler::rlwimi) \
120 | x(RLWINM, &assembler::rlwinm) \
121 | x(SC, &assembler::sc) \
122 | x(SETB, &assembler::setb) \
123 | x(SLD, &assembler::sld) \
124 | x(SLDI, &assembler::sldi) \
125 | x(SRAD, &assembler::srad) \
126 | x(SRADI, &assembler::sradi) \
127 | x(SRAWI, &assembler::srawi) \
128 | x(SRD, &assembler::srd) \
129 | x(SRDI, &assembler::srdi) \
130 | x(STB, &assembler::stb) \
131 | x(STBU, &assembler::stbu) \
132 | x(STBUX, &assembler::stbux) \
133 | x(STBX, &assembler::stbx) \
134 | x(STD, &assembler::std) \
135 | x(STDU, &assembler::stdu) \
136 | x(STDUX, &assembler::stdux) \
137 | x(STDX, &assembler::stdx) \
138 | x(STFD, &assembler::stfd) \
139 | x(STH, &assembler::sth) \
140 | x(STHU, &assembler::sthu) \
141 | x(STHUX, &assembler::sthux) \
142 | x(STHX, &assembler::sthx) \
143 | x(STW, &assembler::stw) \
144 | x(STWU, &assembler::stwu) \
145 | x(STWUX, &assembler::stwux) \
146 | x(STWX, &assembler::stwx) \
147 | x(STXV, &assembler::stxv) \
148 | x(STXVX, &assembler::stxvx) \
149 | x(SUB, &assembler::sub) \
150 | x(SUBC, &assembler::subc) \
151 | x(SUBE, &assembler::sube) \
152 | x(SYNC, &assembler::sync) \
153 | x(XOR, &assembler::_xor) \
154 | x(XXLOR, &assembler::xxlor) \
155 | x(INVALID, &assembler::invalid) \
156 | x(U32, &assembler::u32)
157 |
158 | namespace retrec {
159 |
160 | namespace ppc64le {
161 |
162 | constexpr int INSN_SIZE = 4; // ISA 3.1 be damned
163 |
164 | //
165 | // Types used by the assembler and related code
166 | //
167 |
168 | class assembler;
169 |
170 | // A list of all Operation types. See PPC64LE_ENUMERATE_OPERATIONS above.
171 | enum class Operation {
172 | #define OPERATION(op, ...) op,
173 | PPC64LE_ENUMERATE_OPERATIONS(OPERATION)
174 | #undef OPERATION
175 | SIZE
176 | };
177 |
178 | // A list of strings for all Operation types
179 | extern const char *operation_names[(std::underlying_type_t)Operation::SIZE];
180 |
181 | enum class BO : uint8_t {
182 | ALWAYS = 0b10100, // Branch unconditionally
183 | FIELD_CLR = 0b00100, // Branch if given CR field is clear (0)
184 | FIELD_SET = 0b01100 // Branch if given CR Field is set (1)
185 | };
186 |
187 | enum class SPR : uint16_t {
188 | XER = 1,
189 | DSCR = 3,
190 | LR = 8,
191 | CTR = 9
192 | };
193 |
194 | // Annotated types for assembler operands. Allows inspection code (like relocation) to determine
195 | // parameter uses without hardcoding table of per-instruction meanings.
196 | //
197 | // To ensure that the new types are distinct from their underlying types (for use in std::variant),
198 | // they are declared as enums. A typedef/using declaration would allow implicit conversion and make
199 | // it difficult to store the types in std::variants that can also contain the underlying type.
200 | enum BI : uint8_t {}; // Branch CR field
201 | enum AA : bool {}; // Branch absolute address toggle
202 | enum LK : bool {}; // Branch linkage toggle
203 | enum rel_off_26bit : int32_t {}; // 26-bit relative offset (e.g. B)
204 | enum rel_off_16bit : int16_t {}; // 16-bit relative offset (e.g. BC)
205 |
206 | class instruction_stream;
207 |
208 | //
209 | // Types used by codegen_ppc64le and related higher-level code
210 | //
211 |
212 | enum class LabelPosition {
213 | BEFORE,
214 | AFTER
215 | };
216 |
217 | struct relocation {
218 | // Fill in the relative offset to an absolute target virtual address
219 | struct imm_rel_vaddr_fixup { uint64_t vaddr; };
220 |
221 | // Helpers for declaring labels and referencing them
222 | struct imm_rel_label_fixup { std::string label_name; LabelPosition position; };
223 | struct declare_label { std::string label_name; };
224 | struct declare_label_after { std::string label_name; };
225 |
226 | // Emit a direct call to a given virtual address
227 | struct imm_rel_direct_call { uint64_t vaddr; };
228 |
229 | // Emit a direct jmp to a given virtual address
230 | struct imm_rel_direct_jmp { uint64_t vaddr; };
231 |
232 | using DataT = std::variant;
234 |
235 | size_t insn_cnt; // Number of instructions reserved for this Relocation
236 | DataT data; // Relocation-specific data
237 | };
238 |
239 | // Auxiliary data that can be attached to an instruction stream entry
240 | struct instruction_aux {
241 | bool always_keep; // Whether we should never let this instruction be optimized away
242 | std::optional relocation;
243 |
244 | instruction_aux(bool always_keep_, decltype(relocation) relocation_)
245 | : always_keep(always_keep_), relocation(std::move(relocation_)) {}
246 | };
247 |
248 | // Guaranteed to hold an immediate relative offset
249 | using rel_off_t = int32_t;
250 |
251 | }; // namespace ppc64le
252 |
253 | }; // namespace retrec
254 |
--------------------------------------------------------------------------------
/src/arch/ppc64le/codegen/register_allocator.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #include
21 | #include
22 |
23 | using namespace retrec;
24 | using namespace retrec::ppc64le;
25 |
26 | //
27 | // Static allocation manager
28 | //
29 |
30 | template
31 | typename register_allocator::static_allocation_set register_allocator::static_allocations;
32 |
33 | template
34 | register_allocator::static_allocation_set::static_allocation_set() {
35 | for (size_t i=0; i::fixed_regs) {
40 | allocations[reserved_index(pair.target)] = pair.host;
41 | }
42 | }
43 |
44 | template
45 | size_t register_allocator::static_allocation_set::reserved_index(const llir::Register &) {
46 | static_assert(!std::is_same_v, "Unimplemented static_allocation_set for this target");
47 | return 0;
48 | }
49 |
50 | template <>
51 | size_t register_allocator::static_allocation_set::reserved_index(const llir::Register ®) {
52 | return (size_t)reg.x86_64 - 1; // Subtract 1 to account for first INVALID element
53 | }
54 |
55 | template
56 | bool register_allocator::static_allocation_set::is_reserved(llir::PPC64Register reg) {
57 | for (size_t i=0; i
68 | register_allocator::register_allocator() {
69 | for (size_t i=0; i::non_volatile_regs) {
74 | regs[(size_t)reg - 1] = { RegisterInfo::State::RESERVED };
75 | }
76 | }
77 |
78 | template
79 | register_allocator::~register_allocator() {}
80 |
81 | template
82 | typename register_allocator::AllocatedRegT register_allocator::allocate_gpr() {
83 | constexpr size_t FIRST_GPR_INDEX = (size_t)llir::PPC64Register::R0 - 1;
84 | constexpr size_t LAST_GPR_INDEX = (size_t)llir::PPC64Register::R31 - 1;
85 | for (size_t i = FIRST_GPR_INDEX + 1 /* skip GPR0 which is sometimes useless */; i <= LAST_GPR_INDEX; i++) {
86 | if (regs[i].state == RegisterInfo::State::FREE) {
87 | regs[i].state = RegisterInfo::State::ALLOCATED;
88 | return register_allocator::AllocatedRegT((llir::PPC64Register)(i + 1), *this);
89 | }
90 | }
91 |
92 | ASSERT_NOT_REACHED(); // No free registers
93 | }
94 |
95 | template
96 | typename register_allocator::AllocatedRegT register_allocator::get_fixed_reg(const llir::Register ®) {
97 | auto ret = static_allocations.allocations[static_allocations.reserved_index(reg)];
98 | assert(ret != llir::PPC64Register::INVALID);
99 | return register_allocator::AllocatedRegT(ret, *this);
100 | }
101 |
102 | template
103 | typename register_allocator::AllocatedRegT register_allocator::get_fixed_reg(typename T::RegisterT reg) {
104 | auto ret = static_allocations.allocations[static_allocations.reserved_index(reg)];
105 | assert(ret != llir::PPC64Register::INVALID);
106 | return register_allocator::AllocatedRegT(ret, *this);
107 | }
108 |
109 | template
110 | void register_allocator::free_reg(llir::PPC64Register reg) {
111 | assert(reg != llir::PPC64Register::INVALID);
112 | if (regs[(size_t)reg - 1].state == RegisterInfo::State::RESERVED)
113 | return;
114 | assert(regs[(size_t)reg - 1].state == RegisterInfo::State::ALLOCATED);
115 | regs[(size_t)reg - 1].state = RegisterInfo::State::FREE;
116 | }
117 |
118 | // Explicitly instantiate for all supported target traits
119 | template class ppc64le::register_allocator;
120 |
--------------------------------------------------------------------------------
/src/arch/ppc64le/codegen/register_allocator.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 |
25 | namespace retrec {
26 | namespace ppc64le {
27 |
28 | using gpr_t = uint8_t;
29 | using vsr_t = uint8_t;
30 | static constexpr gpr_t GPR_INVALID = (gpr_t)-1;
31 |
32 | // RAII wrapper returned by GPR allocations
33 | template
34 | class allocated_reg {
35 | llir::PPC64Register reg;
36 | RegisterAllocatorT *allocator { nullptr };
37 | bool fixed { false };
38 |
39 | allocated_reg(llir::PPC64Register reg, RegisterAllocatorT &allocator)
40 | : reg(reg), allocator(&allocator) {}
41 | allocated_reg(llir::PPC64Register reg)
42 | : reg(reg), fixed(true) {}
43 |
44 | public:
45 | allocated_reg() : reg(llir::PPC64Register::INVALID), allocator(nullptr) {}
46 | static allocated_reg from_host_register(llir::Register host_reg) {
47 | assert(host_reg.arch == Architecture::ppc64le);
48 | return allocated_reg { host_reg.ppc64 };
49 | }
50 | friend RegisterAllocatorT;
51 |
52 | gpr_t gpr() const { assert(fixed || allocator); return llir::PPC64RegisterGPRIndex(reg); }
53 | gpr_t vsr() const { assert(fixed || allocator); return llir::PPC64RegisterVSRIndex(reg); }
54 | explicit operator bool() { return !!allocator; }
55 |
56 | // Only allow moves
57 | ~allocated_reg() { if (allocator) allocator->free_reg(reg); }
58 | allocated_reg(const allocated_reg &) = delete;
59 | allocated_reg &operator= (allocated_reg &) = delete;
60 | allocated_reg(allocated_reg &&other)
61 | : reg(other.reg), allocator(std::exchange(other.allocator, nullptr)) {}
62 | allocated_reg &operator= (allocated_reg &&other) {
63 | std::swap(reg, other.reg);
64 | std::swap(allocator, other.allocator);
65 | return *this;
66 | }
67 | };
68 |
69 | /**
70 | * Register allocator for X86_64 targets
71 | */
72 | template
73 | class register_allocator {
74 | // Allocation status of GPRs. True = reserved, false = free.
75 | struct RegisterInfo {
76 | enum class State {
77 | FREE,
78 | ALLOCATED,
79 | RESERVED
80 | } state;
81 | } regs[(size_t)llir::PPC64Register::MAXIMUM - 1];
82 |
83 | // Statically allocated GPRs
84 | static struct static_allocation_set {
85 | static_allocation_set();
86 | using TargetRegisterT = typename TargetTraits::RegisterT;
87 |
88 | // Maps a given x86_64 register to a reserved ppc64 register, if available
89 | llir::PPC64Register allocations[(size_t)TargetRegisterT::MAXIMUM - 1];
90 |
91 | // allocations doesn't reserve space for the invalid register index 0, so subtract 1 to get index
92 | //size_t reserved_index(const llir::Register ®) { return (size_t)reg.x86_64 - 1; /* FIXME: not hardcoded to x86_64 */ }
93 | size_t reserved_index(const llir::Register ®);
94 | size_t reserved_index(TargetRegisterT reg) { return (size_t)reg - 1; }
95 |
96 | bool is_reserved(llir::PPC64Register reg);
97 | } static_allocations;
98 |
99 | public:
100 | using AllocatedRegT = allocated_reg>;
101 | friend AllocatedRegT;
102 |
103 | register_allocator();
104 | ~register_allocator();
105 | DISABLE_COPY_AND_MOVE(register_allocator)
106 |
107 | AllocatedRegT allocate_gpr();
108 | AllocatedRegT get_fixed_reg(const llir::Register ®);
109 | AllocatedRegT get_fixed_reg(typename TargetTraits::RegisterT reg);
110 |
111 | private:
112 | void free_reg(llir::PPC64Register reg);
113 | };
114 |
115 | };
116 | };
117 |
--------------------------------------------------------------------------------
/src/arch/ppc64le/cpu_context_ppc64le.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 |
25 | #include
26 | #include
27 |
28 | namespace retrec {
29 |
30 | struct alignas(16) cpu_context_ppc64le {
31 | int64_t gprs[32] { 0 };
32 | int64_t lr { 0 };
33 | int64_t cr { 0 };
34 | int64_t nip { 0 };
35 |
36 | int64_t _pad0;
37 | reg128 vsr[64] { { .le = { 0, 0 } } };
38 | int32_t vrsave { 0 };
39 | };
40 |
41 | static_assert(offsetof(cpu_context_ppc64le, vsr) % 16 == 0, "vsr registers not quadword aligned!\n");
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/src/arch/ppc64le/llir/llir_registers_ppc64le.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #ifndef LLIR_ALLOW_INTERNAL_INCLUDE
24 | #error "Don't include this directly! Use llir.h"
25 | #endif
26 |
27 | enum class PPC64Register : uint8_t {
28 | INVALID,
29 |
30 | R0, R1, R2, R3, R4, R5, R6, R7, R8, R9,
31 | R10, R11, R12, R13, R14, R15, R16, R17,
32 | R18, R19, R20, R21, R22, R23, R24, R25,
33 | R26, R27, R28, R29, R30, R31,
34 |
35 | LR,
36 | CR,
37 | CTR,
38 | XER,
39 |
40 | F0, F1, F2, F3, F4, F5, F6, F7, F8, F9,
41 | F10, F11, F12, F13, F14, F15, F16, F17,
42 | F18, F19, F20, F21, F22, F23, F24, F25,
43 | F26, F27, F28, F29, F30, F31,
44 |
45 | FPSCR,
46 |
47 | VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9,
48 | VR10, VR11, VR12, VR13, VR14, VR15, VR16, VR17,
49 | VR18, VR19, VR20, VR21, VR22, VR23, VR24, VR25,
50 | VR26, VR27, VR28, VR29, VR30, VR31,
51 |
52 | VSR0, VSR1, VSR2, VSR3, VSR4, VSR5, VSR6, VSR7, VSR8,
53 | VSR9, VSR10, VSR11, VSR12, VSR13, VSR14, VSR15, VSR16,
54 | VSR17, VSR18, VSR19, VSR20, VSR21, VSR22, VSR23, VSR24,
55 | VSR25, VSR26, VSR27, VSR28, VSR29, VSR30, VSR31,
56 |
57 | VSCR,
58 | VRSAVE,
59 |
60 | MAXIMUM
61 | };
62 |
63 | enum class PPC64RegisterType {
64 | INVALID,
65 | GPR,
66 | SPECIAL,
67 | FPR,
68 | VR,
69 | VSR
70 | };
71 |
72 | static constexpr inline PPC64RegisterType PPC64RegisterGetType(PPC64Register reg) {
73 | auto reg_int = enum_cast(reg);
74 | if (reg_int >= enum_cast(PPC64Register::R0) && reg_int <= enum_cast(PPC64Register::R31))
75 | return PPC64RegisterType::GPR;
76 | else if (reg_int >= enum_cast(PPC64Register::LR) && reg_int <= enum_cast(PPC64Register::XER))
77 | return PPC64RegisterType::SPECIAL;
78 | else if (reg_int >= enum_cast(PPC64Register::F0) && reg_int <= enum_cast(PPC64Register::F31))
79 | return PPC64RegisterType::FPR;
80 | else if (reg_int >= enum_cast(PPC64Register::VR0) && reg_int <= enum_cast(PPC64Register::VR31))
81 | return PPC64RegisterType::VR;
82 | else if (reg_int >= enum_cast(PPC64Register::VSR0) && reg_int <= enum_cast(PPC64Register::VSR31))
83 | return PPC64RegisterType::VSR;
84 | else
85 | return PPC64RegisterType::INVALID;
86 | }
87 |
88 | static constexpr inline std::underlying_type_t PPC64RegisterGPRIndex(PPC64Register reg) {
89 | assert(PPC64RegisterGetType(reg) == PPC64RegisterType::GPR);
90 | return enum_cast(reg) - enum_cast(PPC64Register::R0);
91 | }
92 |
93 | static constexpr inline std::underlying_type_t PPC64RegisterFPRIndex(PPC64Register reg) {
94 | assert(PPC64RegisterGetType(reg) == PPC64RegisterType::FPR);
95 | return enum_cast(reg) - enum_cast(PPC64Register::F0);
96 | }
97 |
98 | static constexpr inline std::underlying_type_t PPC64RegisterVRIndex(PPC64Register reg) {
99 | assert(PPC64RegisterGetType(reg) == PPC64RegisterType::VR);
100 | return enum_cast(reg) - enum_cast(PPC64Register::VR0);
101 | }
102 |
103 | static constexpr inline std::underlying_type_t PPC64RegisterVSRIndex(PPC64Register reg) {
104 | assert(PPC64RegisterGetType(reg) == PPC64RegisterType::VSR);
105 | return enum_cast(reg) - enum_cast(PPC64Register::VSR0);
106 | }
107 |
--------------------------------------------------------------------------------
/src/arch/ppc64le/runtime_context_ppc64le.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 | #include
25 | #include
26 |
27 | #include
28 |
29 | namespace retrec {
30 |
31 | class syscall_emulator; // Forward
32 |
33 | /**
34 | * Data accessed by translated code and retrec runtime
35 | */
36 | struct runtime_context_ppc64le {
37 | //
38 | // State of translated CPU
39 | //
40 | cpu_context_ppc64le host_native_context; // Host CPU context when in native code
41 | cpu_context_ppc64le host_translated_context; // Host CPU context when in translated code
42 | bool flush_icache { false }; // Whether to invalidate icache before jumping to translated code
43 |
44 | //
45 | // Storage used for communication between translated and native code
46 | //
47 | void (*leave_translated_code_ptr)(void) { nullptr }; // Function pointer to arch_leave_translated_code thunk
48 |
49 | // Pointers to virtual_address_mapper for use with things like indirect call resolution
50 | virtual_address_mapper *vam { nullptr };
51 | uint64_t (virtual_address_mapper::* vam_lookup_and_update_call_cache)(uint64_t, uint64_t, uint64_t) { nullptr };
52 | uint64_t (virtual_address_mapper::* vam_lookup_check_call_cache)(uint64_t) { nullptr };
53 |
54 | // If the translated code wishes to call into native code, it will set the target here
55 | enum class NativeTarget : uint16_t /* fit in an instruction immediate field */ {
56 | INVALID,
57 | SYSCALL, // Execute a syscall
58 | CALL, // Emulate a CALL instruction
59 | JUMP, // Emulate a JUMP instruction
60 | PATCH_CALL, // Patch in a direct CALL
61 | PATCH_JUMP, // Patch in a direct JUMP
62 | } native_function_call_target { NativeTarget::INVALID };
63 |
64 | // Target CPU emulated context
65 | Architecture arch;
66 | union {
67 | cpu_context_x86_64 x86_64_ucontext;
68 | };
69 |
70 | bool should_exit { false };
71 | int exit_code { 0 };
72 |
73 | // Pointer to syscall emulator, used by native code
74 | syscall_emulator *syscall_emu { nullptr };
75 |
76 | //
77 | // Initialization and accessor functions
78 | //
79 | runtime_context_ppc64le() {}
80 |
81 | status_code init(Architecture target_arch, void *entry, void *stack, virtual_address_mapper *vam_,
82 | syscall_emulator *syscall_emu_);
83 | status_code execute();
84 | void dump_emulated_machine_state();
85 | };
86 | static_assert(std::is_standard_layout::value, "Runtime context must have standard layout, since we access it manually from emitted ASM.");
87 | static_assert(sizeof(runtime_context_ppc64le) <= 32768, "Runtime context must be accessible with signed 16-bit displacements!");
88 |
89 | class translated_code_region;
90 |
91 | }
92 |
--------------------------------------------------------------------------------
/src/arch/runtime_context_dispatcher.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #include
21 | #include
22 | #include
23 |
24 | using namespace retrec;
25 |
26 | runtime_context_dispatcher::runtime_context_dispatcher(CodegenBackend backend) {
27 | switch (backend) {
28 | #if RETREC_CODEGEN_PPC64LE
29 | case CodegenBackend::PowerPC64LE:
30 | context = runtime_context_ppc64le {};
31 | break;
32 | #endif
33 |
34 | #if RETREC_CODEGEN_GENERIC
35 | case CodegenBackend::Generic:
36 | context = runtime_context_generic {};
37 | break;
38 | #endif
39 | default:
40 | ASSERT_NOT_REACHED();
41 | }
42 | }
43 |
44 | status_code runtime_context_dispatcher::init(Architecture target_arch, void *entry, void *stack, virtual_address_mapper *vam,
45 | syscall_emulator *syscall_emu) {
46 |
47 | return std::visit([=](auto &rc) -> status_code {
48 | if constexpr (!types_are_same_v>)
49 | return rc.init(target_arch, entry, stack, vam, syscall_emu);
50 | else
51 | ASSERT_NOT_REACHED();
52 | }, context);
53 | }
54 |
55 | status_code runtime_context_dispatcher::execute() {
56 | return std::visit([](auto &rc) -> status_code {
57 | if constexpr (!types_are_same_v>)
58 | return rc.execute();
59 | else
60 | ASSERT_NOT_REACHED();
61 | }, context);
62 | }
63 |
64 | void *runtime_context_dispatcher::get_data() {
65 | return std::visit([](auto &rc) -> void * {
66 | if constexpr (!types_are_same_v>)
67 | return &rc;
68 | else
69 | ASSERT_NOT_REACHED();
70 | }, context);
71 | }
72 |
--------------------------------------------------------------------------------
/src/arch/runtime_context_dispatcher.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 | #include
25 | #include
26 |
27 | #include
28 |
29 | namespace retrec {
30 |
31 | class runtime_context_dispatcher {
32 | std::variant<
33 | #if RETREC_CODEGEN_GENERIC
34 | runtime_context_generic,
35 | #endif
36 | #if RETREC_CODEGEN_PPC64LE
37 | runtime_context_ppc64le,
38 | #endif
39 | Sentinel<0>
40 | > context;
41 |
42 | public:
43 | explicit runtime_context_dispatcher(CodegenBackend backend);
44 |
45 | status_code init(Architecture target_arch, void *entry, void *stack, virtual_address_mapper *vam,
46 | syscall_emulator *syscall_emu);
47 | status_code execute();
48 | void *get_data();
49 | };
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/src/arch/target_environment.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 | #include
25 |
26 | #include
27 | #include
28 |
29 | namespace retrec {
30 |
31 | static inline void *initialize_target_stack(Architecture target, void *stack,
32 | const std::vector &argv,
33 | const std::vector &envp,
34 | const elf_loader &elf_loader) {
35 | switch (target) {
36 | case Architecture::X86_64:
37 | return x86_64::initialize_target_stack(stack, argv, envp, elf_loader);
38 | default:
39 | TODO();
40 | }
41 | }
42 |
43 | };
44 |
--------------------------------------------------------------------------------
/src/arch/x86_64/cpu_context_x86_64.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 |
25 | #include
26 | #include
27 |
28 | namespace retrec {
29 |
30 | struct cpu_context_x86_64 {
31 | int64_t gprs[16] { 0 };
32 | int64_t segments[6] { 0 }; // Acutally only 16-bit, but made 64-bit for get_reg
33 | int64_t rip { 0 };
34 |
35 | // x86/MMX registers
36 | struct x87_reg {
37 | uint64_t lo { 0 }; // Low 64 bits of x87 register, aliased to MMX MM0-7
38 | uint16_t hi { 0 };
39 | uint16_t pad[3] { 0 };
40 | };
41 | x87_reg x87[8];
42 | uint16_t x87_control { 0 };
43 | uint16_t x87_status { 0 };
44 | uint16_t x87_tag { 0 };
45 | uint64_t x87_last_ip { 0 };
46 | uint64_t x87_last_data_ptr { 0 };
47 | uint16_t x87_opcode { 0 };
48 |
49 | // Pseudo-register for storing the offset from x87[0] where the stack TOP is, in bytes.
50 | uint16_t st_top_offset { 0 };
51 | static constexpr uint16_t st_offset_mask = 0b1110000;
52 |
53 | // SSE registers
54 | reg128 xmm[16];
55 | uint32_t mxcsr { 0 };
56 |
57 | template
58 | T *get_reg(llir::X86_64Register reg) {
59 | switch (reg) {
60 | case llir::X86_64Register::RAX:
61 | case llir::X86_64Register::RBX:
62 | case llir::X86_64Register::RCX:
63 | case llir::X86_64Register::RDX:
64 | case llir::X86_64Register::RSP:
65 | case llir::X86_64Register::RBP:
66 | case llir::X86_64Register::RSI:
67 | case llir::X86_64Register::RDI:
68 | case llir::X86_64Register::R8:
69 | case llir::X86_64Register::R9:
70 | case llir::X86_64Register::R10:
71 | case llir::X86_64Register::R11:
72 | case llir::X86_64Register::R12:
73 | case llir::X86_64Register::R13:
74 | case llir::X86_64Register::R14:
75 | case llir::X86_64Register::R15:
76 | if constexpr (types_are_same_v)
77 | return &gprs[(size_t)reg - (size_t)llir::X86_64Register::RAX];
78 | break;
79 |
80 | case llir::X86_64Register::FR0:
81 | case llir::X86_64Register::FR1:
82 | case llir::X86_64Register::FR2:
83 | case llir::X86_64Register::FR3:
84 | case llir::X86_64Register::FR4:
85 | case llir::X86_64Register::FR5:
86 | case llir::X86_64Register::FR6:
87 | case llir::X86_64Register::FR7:
88 | if constexpr (types_are_same_v)
89 | return &x87[(size_t)reg - (size_t)llir::X86_64Register::FR0];
90 | break;
91 |
92 | case llir::X86_64Register::XMM0:
93 | case llir::X86_64Register::XMM1:
94 | case llir::X86_64Register::XMM2:
95 | case llir::X86_64Register::XMM3:
96 | case llir::X86_64Register::XMM4:
97 | case llir::X86_64Register::XMM5:
98 | case llir::X86_64Register::XMM6:
99 | case llir::X86_64Register::XMM7:
100 | case llir::X86_64Register::XMM8:
101 | case llir::X86_64Register::XMM9:
102 | case llir::X86_64Register::XMM10:
103 | case llir::X86_64Register::XMM11:
104 | case llir::X86_64Register::XMM12:
105 | case llir::X86_64Register::XMM13:
106 | case llir::X86_64Register::XMM14:
107 | case llir::X86_64Register::XMM15:
108 | if constexpr (types_are_same_v)
109 | return &xmm[(size_t)reg - (size_t)llir::X86_64Register::XMM0];
110 | break;
111 | case llir::X86_64Register::MXCSR:
112 | if constexpr (types_are_same_v)
113 | return &mxcsr;
114 | break;
115 |
116 | case llir::X86_64Register::RIP:
117 | if constexpr (types_are_same_v)
118 | return &rip;
119 | break;
120 |
121 | case llir::X86_64Register::FS:
122 | case llir::X86_64Register::GS:
123 | case llir::X86_64Register::CS:
124 | case llir::X86_64Register::SS:
125 | case llir::X86_64Register::DS:
126 | case llir::X86_64Register::ES:
127 | if constexpr (types_are_same_v)
128 | return &segments[(size_t)reg - (size_t)llir::X86_64Register::FS];
129 | break;
130 |
131 | default:
132 | break;
133 | }
134 |
135 | // Unsupported register/mismatched type provided
136 | ASSERT_NOT_REACHED();
137 | }
138 | };
139 |
140 | }
141 |
--------------------------------------------------------------------------------
/src/arch/x86_64/llir/llir_lifter_x86_64.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 |
24 | namespace retrec {
25 | class llir_lifter_x86_64 final : public llir_lifter {
26 | static llir::Operand::Width get_width(uint8_t width);
27 | void fill_operand(cs_x86_op &op, llir::Operand &out);
28 | llir::Register get_reg(x86_reg reg);
29 | llir::Operand get_reg_op(x86_reg reg);
30 |
31 | public:
32 | llir_lifter_x86_64() {}
33 | ~llir_lifter_x86_64();
34 | status_code lift(cs_insn *insn, std::vector &out) override;
35 | };
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/src/arch/x86_64/llir/llir_operands_x86_64.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #ifndef LLIR_ALLOW_INTERNAL_INCLUDE
23 | #error "Don't include this directly! Use llir.h"
24 | #endif
25 |
26 | struct X86_64MemOp {
27 | Register segment;
28 | Register base;
29 | Register index;
30 | uint8_t scale;
31 | int64_t disp;
32 |
33 | // Whether or not disp's sign is determined by the direction flag
34 | bool disp_sign_from_df { false };
35 | };
36 |
--------------------------------------------------------------------------------
/src/arch/x86_64/llir/llir_registers_x86_64.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #ifndef LLIR_ALLOW_INTERNAL_INCLUDE
23 | #error "Don't include this directly! Use llir.h"
24 | #endif
25 |
26 | #define LLIR_ENUMERATE_X86_64_REGISTERS(x) \
27 | x(INVALID) \
28 | /* GPRs */\
29 | x(RAX) \
30 | x(RBX) \
31 | x(RCX) \
32 | x(RDX) \
33 | x(RSP) \
34 | x(RBP) \
35 | x(RSI) \
36 | x(RDI) \
37 | x(R8) \
38 | x(R9) \
39 | x(R10) \
40 | x(R11) \
41 | x(R12) \
42 | x(R13) \
43 | x(R14) \
44 | x(R15) \
45 | /* x87 regs (absolute address) */\
46 | x(FR0) \
47 | x(FR1) \
48 | x(FR2) \
49 | x(FR3) \
50 | x(FR4) \
51 | x(FR5) \
52 | x(FR6) \
53 | x(FR7) \
54 | /* X87 regs (relative to TOP)*/\
55 | x(ST0) \
56 | x(ST1) \
57 | x(ST2) \
58 | x(ST3) \
59 | x(ST4) \
60 | x(ST5) \
61 | x(ST6) \
62 | x(ST7) \
63 | /* MMX regs */\
64 | x(MM0) \
65 | x(MM1) \
66 | x(MM2) \
67 | x(MM3) \
68 | x(MM4) \
69 | x(MM5) \
70 | x(MM6) \
71 | x(MM7) \
72 | /* SSE regs */\
73 | x(XMM0) \
74 | x(XMM1) \
75 | x(XMM2) \
76 | x(XMM3) \
77 | x(XMM4) \
78 | x(XMM5) \
79 | x(XMM6) \
80 | x(XMM7) \
81 | x(XMM8) \
82 | x(XMM9) \
83 | x(XMM10) \
84 | x(XMM11) \
85 | x(XMM12) \
86 | x(XMM13) \
87 | x(XMM14) \
88 | x(XMM15) \
89 | x(MXCSR) \
90 | /* Instruction pointer */ \
91 | x(RIP) \
92 | /* Segments */ \
93 | x(FS) \
94 | x(GS) \
95 | x(CS) \
96 | x(SS) \
97 | x(DS) \
98 | x(ES) \
99 | x(MAXIMUM)
100 |
101 |
102 | enum class X86_64Register {
103 | #define declare_enum(x) x,
104 | LLIR_ENUMERATE_X86_64_REGISTERS(declare_enum)
105 | #undef declare_enum
106 | };
107 |
108 |
--------------------------------------------------------------------------------
/src/arch/x86_64/syscalls.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 |
24 | #include
25 |
26 | namespace retrec {
27 |
28 | enum class SyscallLinuxX86_64 : int64_t {
29 | #define declare_enum(name, val) \
30 | name = val,
31 |
32 | ENUMERATE_GENERIC_LINUX_SYSCALLS(declare_enum)
33 | #undef declare_enum
34 | };
35 |
36 | struct SyscallDetailsLinuxX86_64 {
37 | using SyscallNumberT = SyscallLinuxX86_64;
38 |
39 | //
40 | // Definitions
41 | //
42 |
43 | // Define architecture-specific type mappings
44 | #define enumerate_type_mappings(x) \
45 | x(char, u8_le) \
46 | x(short, s16_le) \
47 | x(int, s32_le) \
48 | x(long, s64_le) \
49 | x(long long, s64_le) \
50 | x(unsigned char, u8_le) \
51 | x(unsigned short, u16_le) \
52 | x(unsigned int, u32_le) \
53 | x(unsigned long, u64_le) \
54 | x(unsigned long long, u64_le) \
55 | x(void *, ptr64) \
56 | /* Declare aliases for agnostic types */ \
57 | ENUMERATE_SYSCALL_ARG_TYPES(x)
58 |
59 | // Define signatures of all supported syscalls
60 | #define enumerate_syscalls(x) \
61 | /* Enumerate common syscalls first */ \
62 | ENUMERATE_COMMON_SYSCALL_SIGNATURES(x)
63 |
64 | #define access_type_a(a, _) a,
65 | #define access_type_b(_, b) sc_types::b,
66 | MAGIC_GEN_TYPE_TO_TYPE_LOOKUP(enumerate_type_mappings, arch_types, access_type_a, access_type_b)
67 | #undef access_type_a
68 | #undef access_type_b
69 |
70 | #define access_enum(e, ...) SyscallLinuxX86_64::e,
71 | #define access_sig(name, ret, ...) SyscallSignature,
72 | MAGIC_GEN_ENUM_TO_TYPE_LOOKUP(enumerate_syscalls, signatures_lut, access_enum, access_sig, SyscallNumberT)
73 | #undef access_enum
74 | #undef access_sig
75 |
76 | //
77 | // Accessors
78 | //
79 |
80 | // Accessor for retrieving the signature of a syscall
81 | template
82 | using signature_from_syscall = signatures_lut_look_up_type;
83 |
84 | // Accessor for retrieving the corresponding agnostic type for a given archtiecture-specific type
85 | template
86 | using agnostic_type_from_type = arch_types_look_up_type_b;
87 |
88 | //
89 | // Run-time helpers
90 | //
91 | static constexpr int64_t get_generic_syscall_number(int64_t x86_64_syscall_number) { return x86_64_syscall_number; }
92 |
93 | #undef enumerate_syscalls
94 | #undef enumerate_type_mappings
95 | };
96 |
97 | }
98 |
--------------------------------------------------------------------------------
/src/arch/x86_64/target_environment.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | /**
21 | * Definitions and helpers for the X86_64 target binary environment
22 | */
23 | #pragma once
24 |
25 | #include
26 | #include
27 | #include
28 |
29 | #include
30 |
31 | namespace retrec {
32 | namespace x86_64 {
33 |
34 | /**
35 | * Initialize a stack with the given argv/envp.
36 | * Returns the decremented stack pointer that should be passed to translated runtime.
37 | */
38 | void *initialize_target_stack(void *stack, const std::vector &argv,
39 | const std::vector &envp, const elf_loader &elf_loader);
40 |
41 |
42 | struct CpuidResult {
43 | uint32_t eax;
44 | uint32_t ebx;
45 | uint32_t ecx;
46 | uint32_t edx;
47 | };
48 |
49 | /**
50 | * Returns the CPUID(func, subfunc) result for the target CPU
51 | */
52 | void get_cpuid(uint32_t func, uint32_t subfunc, CpuidResult *res);
53 |
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/codegen.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 |
26 | using namespace retrec;
27 |
28 | std::unique_ptr retrec::make_codegen(CodegenBackend backend, Architecture target_arch, execution_context &econtext,
29 | virtual_address_mapper *vam) {
30 | switch (backend) {
31 | case CodegenBackend::PowerPC64LE:
32 | if constexpr (RETREC_CODEGEN_PPC64LE)
33 | return make_codegen_ppc64le(target_arch, econtext, vam);
34 | break;
35 |
36 | case CodegenBackend::Generic:
37 | if constexpr (RETREC_CODEGEN_GENERIC)
38 | return make_codegen_generic(target_arch, econtext, vam);
39 | break;
40 | }
41 |
42 | ASSERT_NOT_REACHED();
43 | }
44 |
45 |
--------------------------------------------------------------------------------
/src/codegen.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 | #include
25 |
26 | #include
27 | #include
28 | #include
29 |
30 | namespace retrec {
31 |
32 | class virtual_address_mapper; // Forward
33 |
34 | class lifted_llir_block {
35 | public:
36 | // Declared using struct+enum instead of C++11 scoped enums to allow more
37 | // ergonomic usage as a bit-field.
38 | struct Flags {
39 | enum Type : uint32_t {
40 | NONE,
41 | FULL_FUNCTION = (1 << 0), // Block is a full function lifted from the target executable
42 | };
43 | };
44 |
45 | lifted_llir_block(std::vector &&insns_, Flags::Type flags_) : insns(insns_), flags(flags_) {}
46 |
47 | const std::vector &get_insns() const { return insns; }
48 | Flags::Type get_flags() const { return flags; }
49 |
50 | private:
51 | std::vector insns;
52 | Flags::Type flags;
53 | };
54 |
55 | class translated_code_region {
56 | void *code_buffer;
57 | size_t code_buffer_size;
58 |
59 | public:
60 | translated_code_region(void *code_buffer_, size_t code_buffer_size_)
61 | : code_buffer(code_buffer_), code_buffer_size(code_buffer_size_) {}
62 |
63 | void *code() { return code_buffer; }
64 | size_t size() const { return code_buffer_size; }
65 | };
66 |
67 | enum class CodegenBackend {
68 | Generic,
69 | PowerPC64LE,
70 | };
71 |
72 | constexpr CodegenBackend default_codegen_backend = []{
73 | if constexpr (RETREC_CODEGEN_PPC64LE)
74 | return CodegenBackend::PowerPC64LE;
75 | else if constexpr (RETREC_CODEGEN_GENERIC)
76 | return CodegenBackend::Generic;
77 | }();
78 |
79 | class codegen {
80 | public:
81 | virtual status_code init() = 0;
82 | virtual status_code translate(const lifted_llir_block& insns, std::optional &out) = 0;
83 | virtual uint64_t get_last_untranslated_access(void *rctx) = 0;
84 | virtual status_code patch_translated_access(void *rctx, uint64_t resolved_haddr) = 0;
85 | virtual ~codegen() {}
86 | };
87 |
88 | std::unique_ptr make_codegen(CodegenBackend backend, Architecture target_arch, execution_context &econtext,
89 | virtual_address_mapper *vam);
90 |
91 | }
92 |
--------------------------------------------------------------------------------
/src/disassembler.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #include
21 | #include
22 |
23 | #include
24 | #include
25 |
26 | using namespace retrec;
27 |
28 | template
29 | std::string array_to_string(T arr[], size_t len) {
30 | std::string ret = "{";
31 | if (len) {
32 | for (size_t i = 0; i < len - 1; i++) {
33 | ret += std::to_string(arr[i]) + ", ";
34 | }
35 | ret += std::to_string(arr[len - 1]) + "}";
36 | } else {
37 | ret += "}";
38 | }
39 | return ret;
40 | };
41 |
42 | template <>
43 | std::string array_to_string(cs_x86_op arr[], size_t len) {
44 | std::string ret = "{";
45 | if (len) {
46 | for (size_t i = 0; i < len - 1; i++) {
47 | ret += std::to_string(arr[i].type) + ", ";
48 | }
49 | ret += std::to_string(arr[len - 1].type) + "}";
50 | } else {
51 | ret += "}";
52 | }
53 | return ret;
54 | };
55 |
56 | disassembler::~disassembler() {
57 | if (init_done)
58 | cs_close(&capstone_handle);
59 | }
60 |
61 | status_code disassembler::init() {
62 | cs_arch capstone_arch;
63 | cs_mode capstone_mode;
64 | switch(loader.target_arch()) {
65 | case Architecture::X86_64:
66 | capstone_arch = CS_ARCH_X86;
67 | capstone_mode = CS_MODE_64;
68 |
69 | if (cs_open(capstone_arch, capstone_mode, &capstone_handle) != CS_ERR_OK)
70 | return status_code::NOMEM;
71 |
72 | lifter = std::make_unique();
73 | break;
74 |
75 | default:
76 | pr_error("Unsupported architecture %d!\n", (int)arch);
77 | return status_code::BADARCH;
78 | }
79 |
80 | cs_option(capstone_handle, CS_OPT_DETAIL, CS_OPT_ON);
81 |
82 | init_done = true;
83 | return status_code::SUCCESS;
84 | }
85 |
86 | status_code disassembler::disassemble_region(const void *code, size_t max_length, uint64_t ip,
87 | std::vector &llir_out, Mode mode) {
88 | cs_insn *cur = cs_malloc(capstone_handle);
89 | unique_cs_insn_arr insns(cur, cs_insn_deleter(1));
90 | std::vector llir_insns;
91 |
92 | while (cs_disasm_iter(capstone_handle, (const uint8_t **)&code, &max_length, &ip, cur)) {
93 | cs_detail *detail = cur->detail;
94 | assert(detail);
95 |
96 | pr_debug("0x%zx: %s %s, operands: %s, groups: %s\n", cur->address, cur->mnemonic, cur->op_str,
97 | array_to_string(detail->x86.operands, detail->x86.op_count).c_str(),
98 | array_to_string(detail->groups, detail->groups_count).c_str());
99 |
100 | // Lift to LLIR
101 | status_code res = lifter->lift(cur, llir_insns);
102 | if (res != status_code::SUCCESS) {
103 | pr_error("Failed to lift instruction!\n");
104 | return res;
105 | }
106 |
107 | if (mode == Mode::PARTIAL) {
108 | // In partial mode, we need to stop whenever a branch is encountered
109 | auto last_insn = llir_insns.end() - 1;
110 | if (last_insn->iclass() == llir::Insn::Class::BRANCH)
111 | break;
112 | }
113 |
114 | pr_debug("LLIR: %s\n", llir::to_string(*(llir_insns.end() - 1)).c_str());
115 | }
116 |
117 | llir_out = std::move(llir_insns);
118 | return status_code::SUCCESS;
119 | }
120 |
--------------------------------------------------------------------------------
/src/disassembler.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 |
25 | #include
26 | #include
27 |
28 | #include
29 |
30 | namespace retrec {
31 |
32 | //
33 | // Unique Pointer for cs_insn array
34 | //
35 | struct cs_insn_deleter {
36 | size_t count;
37 | cs_insn_deleter(size_t count_) : count(count_) {}
38 | void operator()(cs_insn *insn) { cs_free(insn, count); }
39 | };
40 | using unique_cs_insn_arr = std::unique_ptr;
41 |
42 | class llir_lifter {
43 | public:
44 | virtual status_code lift(cs_insn *insn, std::vector &out) = 0;
45 | virtual ~llir_lifter() {};
46 | };
47 |
48 | class disassembler {
49 | elf_loader &loader;
50 |
51 | bool init_done = false;
52 | Architecture arch;
53 | csh capstone_handle;
54 | std::unique_ptr lifter;
55 | public:
56 | DISABLE_COPY_AND_MOVE(disassembler)
57 | explicit disassembler(elf_loader &loader_) :
58 | loader(loader_) {}
59 | ~disassembler();
60 |
61 | enum class Mode {
62 | FULL_FUNCTION, // Disassemble an entire function
63 | PARTIAL, // Disassemble until the first branch insn
64 | };
65 |
66 | status_code init();
67 | status_code disassemble_region(const void *code, size_t max_length, uint64_t ip,
68 | std::vector &llir_out, Mode mode);
69 | };
70 |
71 | }
72 |
73 |
--------------------------------------------------------------------------------
/src/dynamic_recompiler.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #include
21 |
22 | using namespace retrec;
23 |
24 | status_code dynamic_recompiler::init() {
25 | auto ret = econtext.init();
26 | if (ret != status_code::SUCCESS)
27 | return ret;
28 |
29 | ret = loader.init();
30 | if (ret != status_code::SUCCESS)
31 | return ret;
32 |
33 | ret = loader.load_all();
34 | if (ret != status_code::SUCCESS)
35 | return ret;
36 |
37 | ret = disasm.init();
38 | if (ret != status_code::SUCCESS)
39 | return ret;
40 |
41 | // We have to wait until here to initialize the codegen with
42 | // the correct architecture detected by the elf loader.
43 | gen = make_codegen(backend, loader.target_arch(), econtext, &vam);
44 |
45 | ret = gen->init();
46 | if (ret != status_code::SUCCESS)
47 | return ret;
48 |
49 | syscall_emu = std::make_unique(loader.target_arch());
50 |
51 | return status_code::SUCCESS;
52 | }
53 |
54 | status_code dynamic_recompiler::execute() {
55 | /**
56 | * Translate entrypoint
57 | * Jump to entrypoint
58 | */
59 |
60 | // Lookup entrypoint's symbol
61 | auto *entry_symbol = loader.lookup(loader.entrypoint(), loader.text_section_index(), elf_loader::Symbol::Bind::GLOBAL,
62 | elf_loader::LookupPolicy::EXACT);
63 | if (!entry_symbol) {
64 | pr_error("Failed to find entrypoint symbol!\n");
65 | return status_code::BADELF;
66 | }
67 |
68 | // Translate function
69 | auto res = translate_elf_function(*entry_symbol);
70 | if (res != status_code::SUCCESS)
71 | return res;
72 | auto &code = *translated_regions.begin();
73 |
74 | // Initialize runtime context with entrypoint as target
75 | res = econtext.initialize_runtime_context(loader.target_arch(), code.code(), &vam, syscall_emu.get());
76 | if (res != status_code::SUCCESS) {
77 | pr_error("Failed to initialize runtime context for translated code!\n");
78 | return res;
79 | }
80 |
81 | // Code execution loop
82 | for (;;) {
83 | status_code res = econtext.enter_translated_code();
84 | switch (res) {
85 | case status_code::HALT:
86 | // Translated code gracefully exited
87 | return status_code::SUCCESS;
88 |
89 | case status_code::UNTRANSLATED:
90 | {
91 | res = runtime_handle_untranslated_access();
92 | if (res != status_code::SUCCESS) {
93 | pr_error("Failed to handle untranslated access: %s\n", status_code_str(res));
94 | return res;
95 | }
96 | break;
97 | }
98 |
99 | default:
100 | // Other status - return it
101 | return res;
102 | }
103 | }
104 | }
105 |
106 | status_code dynamic_recompiler::translate_elf_function(const elf_loader::Symbol &symbol) {
107 | // Determine length of target routine
108 | uint64_t func_len = loader.get_symbol_size(symbol);
109 | if (func_len == 0) {
110 | // The size attribute isn't present, probably due to hand-written assembly
111 | // missing a .size directive.
112 | return translate_raw_code_block(symbol.value);
113 | }
114 |
115 | pr_debug("function length: %zu\n", func_len);
116 | const void *code_ptr = loader.get_symbol_data_ptr(symbol);
117 | if (!code_ptr) {
118 | pr_error("Failed to get symbol data ptr!\n");
119 | return status_code::NOMEM;
120 | }
121 |
122 | // Disassemble
123 | std::vector lifted_insns;
124 | status_code res = disasm.disassemble_region(code_ptr, func_len, symbol.value, lifted_insns,
125 | disassembler::Mode::FULL_FUNCTION);
126 | if (res != status_code::SUCCESS) {
127 | pr_error("Failed to disassemble region!\n");
128 | return res;
129 | }
130 |
131 | lifted_llir_block block(std::move(lifted_insns), lifted_llir_block::Flags::FULL_FUNCTION);
132 |
133 | // Translate the routine
134 | std::optional translated_code;
135 | auto ret = gen->translate(block, translated_code);
136 | if (ret != status_code::SUCCESS) {
137 | pr_error("Failed to translate routine!\n");
138 | return ret;
139 | }
140 | translated_regions.push_back(*translated_code);
141 |
142 | return status_code::SUCCESS;
143 | }
144 |
145 | status_code dynamic_recompiler::translate_raw_code_block(uint64_t vaddr) {
146 | // Find the Mapping that the vaddr lies within
147 | size_t mapping_index;
148 | auto mapping_opt = econtext.map().find(vaddr, 1, &mapping_index, process_memory_map::FindPolicy::CONTAINS);
149 | if (!mapping_opt) {
150 | pr_debug("Unable to find mapping containing target vaddr 0x%lx\n", vaddr);
151 | return status_code::BADACCESS;
152 | }
153 | auto mapping = *mapping_opt;
154 |
155 | size_t max_size = 0;
156 | // Determine the maximum length of the code buffer by walking the memory map
157 | // and adding the size of all contiguous memory regions.
158 | max_size = mapping.end - vaddr;
159 | for (size_t i = mapping_index + 1; i < econtext.map().size(); i++) {
160 | auto &cur = econtext.map()[i];
161 | auto &prev = econtext.map()[i-1];
162 | if (prev.end != cur.start) {
163 | // Discontinuity, stop increasing size
164 | break;
165 | } else {
166 | max_size += cur.end;
167 | }
168 | }
169 |
170 | pr_debug("Translating raw code region of max size: %zu\n", max_size);
171 |
172 | // Disassemble
173 | std::vector lifted_insns;
174 | status_code res = disasm.disassemble_region((void *)vaddr, max_size, vaddr, lifted_insns,
175 | disassembler::Mode::PARTIAL);
176 | if (res != status_code::SUCCESS) {
177 | pr_error("Failed to disassemble region!\n");
178 | return res;
179 | }
180 |
181 | lifted_llir_block block(std::move(lifted_insns), lifted_llir_block::Flags::NONE);
182 |
183 | // Translate the partial routine
184 | std::optional translated_code;
185 | auto ret = gen->translate(block, translated_code);
186 | if (ret != status_code::SUCCESS) {
187 | pr_error("Failed to translate routine!\n");
188 | return ret;
189 | }
190 | translated_regions.push_back(*translated_code);
191 |
192 | return status_code::SUCCESS;
193 | }
194 |
195 | /**
196 | * Translate the code block present at the specified target virtual address.
197 | * Automatically dispatches to translate_elf_function or translate_raw_code_block
198 | * as necessary.
199 | */
200 | status_code dynamic_recompiler::translate_referenced_address(uint64_t address, uint64_t *resolved_out) {
201 | // See if address is contained within a function in the original ELF binary
202 | const auto *func_sym = loader.lookup(address, loader.text_section_index(), elf_loader::Symbol::Bind::_ANY,
203 | elf_loader::LookupPolicy::CONTAINS);
204 | if (func_sym) {
205 | // Translate the whole function containing the target vaddr
206 | status_code res = translate_elf_function(*func_sym);
207 | if (res != status_code::SUCCESS)
208 | return res;
209 | } else {
210 | // The branch target doesn't lie within a function in the original binary, or it
211 | // does but the function isn't marked with a .size attribute. Treat it as a raw region and
212 | // lift until the first branch.
213 | status_code res = translate_raw_code_block(address);
214 | if (res != status_code::SUCCESS)
215 | return res;
216 | }
217 |
218 | // Ensure that the virtual address mapper can now resolve the vaddr
219 | uint64_t resolved = vam.lookup(address);
220 | if (!resolved) {
221 | pr_debug("Couldn't resolve virtual address 0x%lx even after function translation! Bailing out.\n", address);
222 | return status_code::BADACCESS;
223 | }
224 | *resolved_out = resolved;
225 |
226 | return status_code::SUCCESS;
227 | }
228 |
229 | /**
230 | * Handle an access by the translated code to untranslated instructions by
231 | * first translating the address and then calling into codegen code to patch
232 | * the access.
233 | */
234 | status_code dynamic_recompiler::runtime_handle_untranslated_access() {
235 | void *rctx = econtext.runtime_ctx();
236 | uint64_t referenced_vaddr = gen->get_last_untranslated_access(rctx);
237 |
238 | pr_info("Translating access to virtual address 0x%lx\n", referenced_vaddr);
239 |
240 | // Translate code at referenced address if it isn't already translated
241 | uint64_t resolved = vam.lookup(referenced_vaddr);
242 | if (!resolved) {
243 | status_code res = translate_referenced_address(referenced_vaddr, &resolved);
244 | if (res != status_code::SUCCESS) {
245 | pr_error("Failed to resolve reference to virtual address: 0x%lx\n", referenced_vaddr);
246 | return status_code::BADACCESS;
247 | }
248 | }
249 |
250 | // Patch code buffer with reference to newly translated address
251 | return gen->patch_translated_access(rctx, resolved);
252 | }
253 |
--------------------------------------------------------------------------------
/src/dynamic_recompiler.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 |
31 | #include
32 | #include
33 | #include
34 | #include
35 | #include
36 |
37 | namespace retrec {
38 |
39 | class dynamic_recompiler {
40 | target_environment target_env;
41 | execution_context econtext;
42 | elf_loader loader;
43 | disassembler disasm;
44 | CodegenBackend backend = default_codegen_backend;
45 |
46 | std::unique_ptr gen;
47 | std::list translated_regions;
48 | virtual_address_mapper vam;
49 | std::unique_ptr syscall_emu;
50 |
51 | //
52 | // Translation helpers
53 | //
54 | status_code translate_elf_function(const elf_loader::Symbol &symbol);
55 | status_code translate_raw_code_block(uint64_t vaddr);
56 | status_code translate_referenced_address(uint64_t address, uint64_t *resolved_out);
57 | status_code runtime_handle_untranslated_access();
58 |
59 | public:
60 | dynamic_recompiler(target_environment target_env_) :
61 | target_env(std::move(target_env_)),
62 | econtext(target_env, loader),
63 | loader(econtext, target_env_.binary),
64 | disasm(loader)
65 | {
66 | }
67 |
68 | //
69 | // Public functions
70 | //
71 | status_code init();
72 | status_code execute();
73 | };
74 |
75 | }
76 |
77 |
--------------------------------------------------------------------------------
/src/elf_loader.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 | #include
25 |
26 | #include
27 | #include
28 | #include
29 |
30 | #include
31 | #include
32 |
33 | namespace retrec {
34 |
35 | class elf_loader {
36 | execution_context &econtext;
37 | mapped_file &file;
38 | Elf *elf = nullptr;
39 | GElf_Ehdr ehdr;
40 | Architecture arch;
41 |
42 | uint64_t text_shndx { 0 };
43 | uint64_t base_load_address { 0 };
44 | GElf_Shdr text_shdr;
45 | public:
46 | DISABLE_COPY_AND_MOVE(elf_loader)
47 | elf_loader(execution_context &econtext_, mapped_file &file_) :
48 | econtext(econtext_), file(file_) {}
49 | ~elf_loader();
50 |
51 | status_code init();
52 | status_code load_all();
53 |
54 | struct Symbol {
55 | std::string name;
56 | uint8_t info;
57 | uint8_t other;
58 | uint64_t shndx;
59 | uint64_t value;
60 | uint64_t size;
61 | enum class Bind {
62 | LOCAL = 0,
63 | GLOBAL = 1,
64 | WEAK = 2,
65 | NUM = 3,
66 | GNU_UNIQUE = 10,
67 |
68 | _ANY = 255
69 | } bind;
70 | };
71 |
72 | enum class LookupPolicy {
73 | EXACT, // Exact matches only
74 | CONTAINS, // addr is within symbol start + size
75 | };
76 |
77 | [[nodiscard]] const Symbol *lookup(uint64_t addr, uint64_t shndx, Symbol::Bind bind, LookupPolicy policy) const;
78 | [[nodiscard]] uint64_t get_symbol_size(const Symbol &sym) const;
79 | const void *get_symbol_data_ptr(const elf_loader::Symbol &sym);
80 |
81 | Architecture target_arch() const { return arch; }
82 | uint64_t entrypoint() const { return ehdr.e_entry; }
83 | const std::vector &symbol_table() const { return symbols; }
84 | uint64_t text_section_index() const { return text_shndx; }
85 | const auto &get_ehdr() const { return ehdr; }
86 | auto get_base_address() const { return base_load_address; }
87 |
88 | private:
89 | std::vector symbols;
90 | };
91 |
92 | }
93 |
--------------------------------------------------------------------------------
/src/execution_context.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #include
21 | #include
22 | #include
23 |
24 | #include
25 | #include
26 | #include
27 |
28 | using namespace retrec;
29 |
30 | execution_context::execution_context(const target_environment &target_env_, elf_loader &loader_)
31 | : vaddr_map(getpid()), page_size(sysconf(_SC_PAGESIZE)),
32 | target_env(target_env_), loader(loader_) {}
33 |
34 | execution_context::~execution_context() {}
35 |
36 | status_code execution_context::init() {
37 | // Setup virtual address space allocator
38 | status_code res = vaddr_map.init();
39 | if (res != status_code::SUCCESS)
40 | return res;
41 |
42 | // Allocate code buffer
43 | void *code_start;
44 | res = allocate_and_map_vaddr(HIGH_MEM_RANGE, CODE_REGION_MAX_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, &code_start);
45 | if (res != status_code::SUCCESS)
46 | return res;
47 |
48 | code_allocator.init(code_start, CODE_REGION_MAX_SIZE);
49 |
50 | return status_code::SUCCESS;
51 | }
52 |
53 | status_code execution_context::allocate_and_map_vaddr(process_memory_map::Range range, size_t size, int prot, void **region_out) {
54 | uint64_t vaddr = vaddr_map.allocate_vaddr_in_range(size, range);
55 | if (!vaddr)
56 | return status_code::NOMEM;
57 |
58 | // Map the allocated address space
59 | void *mem = mmap((void *)vaddr, size, prot, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
60 | if (mem == (void *)-1) {
61 | pr_debug("mmap failed at %p: %m\n", (void *)vaddr);
62 | vaddr_map.free(vaddr, size);
63 | return status_code::NOMEM;
64 | }
65 |
66 | *region_out = mem;
67 | return status_code::SUCCESS;
68 | }
69 |
70 | status_code execution_context::allocate_new_stack(size_t size, void **stack_out) {
71 | // Determine the number of pages to allocate
72 | size_t allocation_size = align_to(size, page_size) + 1*page_size /* guard page */;
73 | assert(allocation_size >= 2);
74 |
75 | // Allocate at the end of the address space
76 | void *stack;
77 | auto res = allocate_and_map_vaddr(HIGH_MEM_RANGE, allocation_size, PROT_READ | PROT_WRITE, &stack);
78 | if (res != status_code::SUCCESS)
79 | return res;
80 |
81 | // Mark the guard page as !R, !W, !X
82 | mprotect((void *)stack, page_size, PROT_NONE);
83 |
84 | *stack_out = (void *)((char *)stack + allocation_size);
85 | return status_code::SUCCESS;
86 | }
87 |
88 | status_code execution_context::allocate_region(uint64_t start, size_t len, int prot, void **region_out,
89 | process_memory_map::Mapping::Type type) {
90 | if (start % page_size != 0)
91 | return status_code::BADALIGN;
92 |
93 | if (vaddr_map.find(start, len, nullptr, process_memory_map::FindPolicy::CONTAINS))
94 | return status_code::OVERLAP;
95 |
96 | pr_info("allocated region at 0x%zx\n", start);
97 | // Just try to map the region with mmap
98 | void *region = mmap((void *)start, len, prot, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
99 | if (region == (void *)-1)
100 | return status_code::NOMEM;
101 |
102 | if ((uint64_t)region != start) {
103 | pr_info("Kernel didn't map pages at requested address!\n");
104 | munmap(region, len);
105 | return status_code::NOMEM;
106 | }
107 |
108 | // Mark region as allocated
109 | vaddr_map.mark_allocated({start, start+len, type, prot});
110 |
111 | if (region_out)
112 | *region_out = region;
113 | return status_code::SUCCESS;
114 | }
115 |
116 | void *execution_context::get_region_ptr(uint64_t ptr) {
117 | if (!vaddr_map.find(ptr, sizeof(ptr), nullptr, process_memory_map::FindPolicy::CONTAINS))
118 | return nullptr;
119 |
120 | return (void *)ptr;
121 | }
122 |
123 | status_code execution_context::initialize_runtime_context(Architecture target_arch, void *entry, virtual_address_mapper *vam,
124 | syscall_emulator *syscall_emu) {
125 | // Allocate an initial stack + guard page
126 | void *new_stack;
127 | auto res = allocate_new_stack(DEFAULT_STACK_SIZE, &new_stack);
128 | if (res != status_code::SUCCESS) {
129 | pr_error("Failed to allocate stack for translated code: %s\n", status_code_str(res));
130 | return res;
131 | }
132 |
133 | // Initialize the stack with program arguments
134 | void *sp = initialize_target_stack(target_arch, new_stack, target_env.argv, target_env.envp, loader);
135 |
136 | // Call host-architecture-specific function to populate the runtime context
137 | runtime_context = std::make_unique(default_codegen_backend);
138 | res = runtime_context->init(target_arch, entry, sp, vam, syscall_emu);
139 | if (res != status_code::SUCCESS)
140 | return res;
141 |
142 | return status_code::SUCCESS;
143 | }
144 |
145 | status_code execution_context::enter_translated_code() {
146 | assert(runtime_context);
147 | return runtime_context->execute();
148 | }
149 |
150 | status_code execution_context::protect_region(uint64_t start, uint64_t len, int prot) {
151 | auto mapping = vaddr_map.find(start, len, nullptr);
152 | if (!mapping)
153 | return status_code::NOMEM;
154 |
155 | mprotect((void *)start, len, prot);
156 | mapping->prot = prot;
157 |
158 | return status_code::SUCCESS;
159 | }
160 |
161 | void *execution_context::runtime_ctx() {
162 | assert(runtime_context);
163 | return runtime_context->get_data();
164 | }
165 |
--------------------------------------------------------------------------------
/src/execution_context.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 |
30 | #include
31 | #include
32 | #include
33 | #include
34 |
35 | namespace retrec {
36 |
37 | // Forward-declare translated_code_region since #including results in cyclic includes.
38 | class translated_code_region;
39 | class runtime_context_dispatcher;
40 |
41 | // Forward-declare elf_loader
42 | class elf_loader;
43 |
44 | //
45 | // Configuration of target environment
46 | //
47 | struct target_environment {
48 | mapped_file binary;
49 | std::vector argv;
50 | std::vector envp;
51 | };
52 |
53 | //
54 | // A simple execution context for running in the current process' address space.
55 | //
56 | class execution_context {
57 | process_memory_map vaddr_map;
58 | long page_size;
59 | const target_environment &target_env;
60 | elf_loader &loader;
61 | simple_placement_allocator code_allocator;
62 |
63 | std::unique_ptr runtime_context;
64 |
65 | static constexpr size_t CODE_REGION_MAX_SIZE = 0x10000 * 32; // 2M ought to be enough for anybody :)
66 | static constexpr size_t DEFAULT_STACK_SIZE = 0x10000; // 64K default stack
67 |
68 | public:
69 | DISABLE_COPY_AND_MOVE(execution_context)
70 | execution_context(const target_environment &target_env_, elf_loader &loader_);
71 | ~execution_context();
72 | status_code init();
73 |
74 | enum class VaddrLocation {
75 | LOW, // 0x1000+
76 | HIGH, // 0x3fff+
77 | };
78 |
79 | static constexpr process_memory_map::Range HIGH_MEM_RANGE = {0x3fff00000000, 0x7fffffffffff};
80 | static constexpr process_memory_map::Range LOW_MEM_RANGE = {0x10000, 0xfffeffff};
81 |
82 | //
83 | // Accessors
84 | //
85 | process_memory_map &map() { return vaddr_map; }
86 | simple_placement_allocator &get_code_allocator() { return code_allocator; }
87 | void *get_region_ptr(uint64_t ptr);
88 | void *runtime_ctx();
89 |
90 | //
91 | // Functions
92 | //
93 | status_code allocate_and_map_vaddr(process_memory_map::Range range, size_t size, int prot, void **region_out);
94 | status_code allocate_new_stack(size_t size, void **stack_out);
95 | status_code allocate_region(uint64_t start, size_t len, int prot, void **region_out,
96 | process_memory_map::Mapping::Type type = process_memory_map::Mapping::Type::USER);
97 | status_code protect_region(uint64_t start, size_t len, int prot);
98 | status_code initialize_runtime_context(Architecture target_arch, void *entry, virtual_address_mapper *vam,
99 | syscall_emulator *syscall_emu);
100 | status_code enter_translated_code();
101 | };
102 |
103 | }
104 |
--------------------------------------------------------------------------------
/src/instruction_stream.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 |
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include
33 |
34 | namespace retrec {
35 |
36 | /**
37 | * A stream of instructions emitted by an assembler. Each instruction object can be called to
38 | * emit the instruction to a provided buffer.
39 | *
40 | * Traits expected members:
41 | * (Type) AssemblerT - Opaque type for assembler
42 | * (Type) InsnT - Type for instruction (see below)
43 | * constexpr size_t calculate_code_size(const InsnT *insn_buf, size_t count)
44 | * - Function to calculate the total code size that will be emitted for a given instruction buffer.
45 | *
46 | * InsnT is the type of each entry in this instruction stream. Its expected members are:
47 | * Public constructor that can be called through a perfect forwarding template
48 | * Move constructor
49 | * status_code operator()(assembler*) - Method to emit the instruction using the provided assembler
50 | * void add_aux(...) - Method to construct an auxiliary data structure in-place to store with the instruction
51 | */
52 | template
53 | class instruction_stream {
54 | public:
55 | instruction_stream(typename Traits::AssemblerT &assembler_)
56 | : insns(), assembler(assembler_) {}
57 | DISABLE_COPY_AND_MOVE(instruction_stream)
58 |
59 | template
60 | auto &emplace_back(Ts&&... params) { return insns.emplace_back(std::forward(params)...); }
61 |
62 | /**
63 | * Append auxiliary data to the last instruction emitted.
64 | */
65 | template
66 | void add_aux(Ts&&... args) {
67 | assert(insns.size());
68 | (*(insns.end() - 1)).add_aux(std::forward(args)...);
69 | }
70 |
71 | /**
72 | * Emit all instructions in this stream to the provided code buffer.
73 | */
74 | status_code emit_all_to_buf(uint8_t *buf, size_t size) {
75 | out_buf = buf;
76 | buf_size = size;
77 | offset = 0;
78 |
79 | for (auto &insn : insns) {
80 | status_code res = insn(&assembler);
81 | if (res != status_code::SUCCESS)
82 | return res;
83 | }
84 |
85 | return status_code::SUCCESS;
86 | }
87 |
88 | // The total size the contained code will take once emitted
89 | size_t code_size() const { return Traits::calculate_code_size(&insns[0], insns.size()); }
90 |
91 | // Accessors for internal insn_data vec
92 | size_t size() const { return insns.size(); }
93 | typename Traits::InsnT &operator[](size_t i) { return insns[i]; }
94 |
95 | // Accessors for output buffer
96 | uint8_t *buf() const { return out_buf; }
97 |
98 | friend typename Traits::AssemblerT;
99 |
100 | private:
101 | status_code write32(uint32_t x) {
102 | if (offset+4 > buf_size)
103 | return status_code::OVERFLOW;
104 |
105 | *(uint32_t *)(out_buf + offset) = x;
106 | offset += 4;
107 |
108 | return status_code::SUCCESS;
109 | };
110 |
111 | std::vector insns {};
112 | typename Traits::AssemblerT &assembler;
113 |
114 | uint8_t *out_buf { nullptr };
115 | size_t buf_size { 0 };
116 | size_t offset { 0 };
117 | };
118 |
119 | } // namespace retrec
120 |
--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #include
21 |
22 | #include
23 | #include
24 |
25 | using namespace retrec;
26 |
27 | std::vector build_argv_vec(int start, int argc, char **argv) {
28 | // Insert arguments starting at argv[start]
29 | return {&argv[start], &argv[argc]};
30 | }
31 |
32 | std::vector build_envp_vec(char **envp) {
33 | // Pass through host envp
34 | size_t len;
35 | for (len = 0; envp[len]; len++)
36 | ;
37 | return {&envp[0], &envp[len]};
38 | }
39 |
40 | int main(int argc, char **argv, char **envp) {
41 | if (argc < 2) {
42 | fprintf(stderr, "Usage: %s \n", argv[0]);
43 | return 1;
44 | }
45 | const char *binary_path = argv[1];
46 |
47 | // Map the user provided binary
48 | mapped_file binary(binary_path, true);
49 | if (binary.map() != status_code::SUCCESS) {
50 | pr_error("Failed to open binary: %s\n", binary_path);
51 | return 1;
52 | }
53 |
54 | // Initialize the dynamic recompiler and target environment
55 | target_environment env = {
56 | .binary = std::move(binary),
57 | .argv = build_argv_vec(1, argc, argv),
58 | .envp = build_envp_vec(envp)
59 | };
60 | dynamic_recompiler rec(std::move(env));
61 | status_code res = rec.init();
62 | if (res != status_code::SUCCESS) {
63 | pr_error("Failed to init dynamic recompiler: %s\n", status_code_str(res));
64 | return 1;
65 | }
66 |
67 | rec.execute();
68 |
69 | return 0;
70 | }
71 |
--------------------------------------------------------------------------------
/src/mapped_file.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 |
26 | #include "mapped_file.h"
27 |
28 | using namespace retrec;
29 |
30 | mapped_file::~mapped_file() {
31 | if (valid)
32 | munmap(data_region, data_length);
33 | }
34 |
35 | status_code mapped_file::map() {
36 | int flags = O_CLOEXEC | (readonly ? O_RDONLY : O_RDWR);
37 | int fd = open(path.c_str(), flags);
38 | if (fd < 0)
39 | return status_code::BADFILE;
40 |
41 | struct stat statbuf;
42 | if (fstat(fd, &statbuf) < 0) {
43 | close(fd);
44 | return status_code::BADFILE;
45 | }
46 | data_length = statbuf.st_size;
47 | int prot = PROT_READ | (readonly ? 0 : PROT_WRITE);
48 |
49 | data_region = mmap(nullptr, data_length, prot, MAP_SHARED, fd, 0);
50 | if (data_region == (void *)-1) {
51 | close(fd);
52 | return status_code::BADFILE;
53 | }
54 |
55 | valid = true;
56 | return status_code::SUCCESS;
57 | }
--------------------------------------------------------------------------------
/src/mapped_file.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 |
24 | #include
25 | #include
26 | #include
27 |
28 | namespace retrec {
29 |
30 | class mapped_file {
31 | std::string path;
32 | bool readonly;
33 |
34 | bool valid = false;
35 | void *data_region = nullptr;
36 | size_t data_length = 0;
37 | public:
38 | mapped_file(std::string path_, bool readonly_): path(path_), readonly(readonly_) {}
39 |
40 | ~mapped_file();
41 |
42 | // Disable copy construction, allow move construction
43 | mapped_file(const mapped_file &) = delete;
44 |
45 | mapped_file &operator=(const mapped_file &) = delete;
46 |
47 | mapped_file(mapped_file &&other):
48 | path(std::move(other.path)),
49 | valid(std::exchange(other.valid, false)),
50 | data_region(other.data_region), data_length(other.data_length) {}
51 |
52 | mapped_file &operator=(mapped_file &&other) {
53 | std::swap(path, other.path);
54 | std::swap(valid, other.valid);
55 | std::swap(data_region, other.data_region);
56 | std::swap(data_length, other.data_length);
57 | return *this;
58 | }
59 |
60 | status_code map();
61 |
62 | template
63 | T data() { static_assert(std::is_pointer_v); return static_cast(data_region); }
64 |
65 | size_t length() const { return data_length; };
66 | };
67 |
68 | }
69 |
--------------------------------------------------------------------------------
/src/platform/generic_syscalls.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #include
21 |
22 | using namespace retrec;
23 |
24 | const char *retrec::generic_linux_syscall_name(SyscallLinuxGeneric number) {
25 | switch (number) {
26 | #define declare_case(name, _) \
27 | case SyscallLinuxGeneric::name: return #name;
28 |
29 | ENUMERATE_GENERIC_LINUX_SYSCALLS(declare_case)
30 | #undef declare_case
31 | default: UNREACHABLE();
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/platform/syscall_emulator.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020-2021 Shawn Anastasio.
3 | *
4 | * This file is part of retrec.
5 | *
6 | * retrec is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * retrec is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more rewriter.
15 | *
16 | * You should have received a copy of the GNU Lesser General Public License
17 | * along with retrec. If not, see .
18 | */
19 |
20 | #include
21 | #include
22 |
23 | #include
24 | #include
25 |
26 | #include
27 | #include