├── .gitattributes ├── .github ├── FUNDING.yml └── workflows │ └── docker-image.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── Dockerfile ├── LICENSE ├── README.md ├── Zydis-4.1.0 └── Zydis │ ├── Zydis.c │ └── Zydis.h ├── cmake.toml ├── cmake ├── FindIced-Wrapper.cmake ├── FindLLVM-Wrapper.cmake ├── cmkr.cmake └── cmkr.cmake; ├── cmkr.cmake ├── docs ├── BUILDING.md ├── themida_output.ll └── themida_output_lazy_fix.ll ├── icpped_rust ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── build.rs └── src │ └── lib.rs ├── images ├── branches_0.png ├── branches_1.png ├── graph.png ├── mergen_dec.png ├── mergen_disass.png ├── org_decomp.png ├── org_disass.png ├── run_mergen.PNG ├── running_on_themida.png ├── themida_disas_b.png ├── themida_disas_v.png ├── themida_vm_v.png ├── themidavm.png ├── themidavm_settings.png ├── themidavm_vms.png ├── vmp_settings1.png └── vmp_settings2.png ├── lifter ├── .clang-format ├── CommonDisassembler.hpp ├── CommonMnemonics.h ├── CommonRegisters.h ├── CustomPasses.hpp ├── FunctionSignatures.cpp ├── FunctionSignatures.hpp ├── GEPTracker.cpp ├── GEPTracker.h ├── GEPTracker.ipp ├── OperandUtils.h ├── OperandUtils.ipp ├── PathSolver.cpp ├── PathSolver.h ├── PathSolver.ipp ├── Semantics.h ├── Semantics.ipp ├── ZydisDisassembler.hpp ├── ZydisDisassembler_mnemonics.h ├── ZydisDisassembler_registers.h ├── icedDisassembler.hpp ├── icedDisassembler_mnemonics.h ├── icedDisassembler_registers.h ├── includes.h ├── lifter.cpp ├── lifterClass.hpp ├── test_instructions.cpp ├── test_instructions.h ├── tester.hpp ├── utils.cpp └── utils.h └── testcases ├── CommutativeOrAssociative.asm ├── bench_add.asm ├── bench_add_concretized.asm ├── bench_add_mem.asm ├── bench_add_mem_concretized.asm ├── teb_test.asm ├── test_branch_mem.asm ├── test_branch_sf.asm ├── test_branch_zf.asm ├── test_branches.asm ├── test_div.asm ├── test_idiv.asm ├── test_indirect_mem.asm ├── test_indirect_mem2.asm ├── test_invalid_mem.asm ├── test_memory.asm └── test_reallocate.asm /.gitattributes: -------------------------------------------------------------------------------- 1 | # cmkr 2 | /**/CMakeLists.txt linguist-generated 3 | /**/cmkr.cmake linguist-vendored 4 | # Zydis 5 | /**/Zydis/Zydis.c linguist-vendored 6 | /**/Zydis/Zydis.h linguist-vendored 7 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: NaC-L # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 12 | polar: # Replace with a single Polar username 13 | buy_me_a_coffee: # Replace with a single Buy Me a Coffee username 14 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 15 | -------------------------------------------------------------------------------- /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | pull_request: 7 | branches: ["main"] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | variant: [iced, zydis] 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - name: Build the Docker image 20 | run: | 21 | TAG=my-image-name:${{ matrix.variant }} 22 | # decide whether to install Rust: only for "iced" 23 | if [ "${{ matrix.variant }}" = "iced" ]; then 24 | RUST_ARG=true 25 | else 26 | RUST_ARG=false 27 | fi 28 | docker build \ 29 | --build-arg INSTALL_RUST=$RUST_ARG \ 30 | --file Dockerfile \ 31 | --tag $TAG \ 32 | . 33 | 34 | - name: Extract the lifter binary 35 | run: | 36 | TAG=my-image-name:${{ matrix.variant }} 37 | docker create --name extract-container "$TAG" 38 | mkdir -p ./output 39 | docker cp extract-container:/root/Mergen/build/lifter \ 40 | ./output/lifter-${{ matrix.variant }} 41 | docker rm extract-container 42 | 43 | - name: Upload the extracted binary as an artifact 44 | uses: actions/upload-artifact@v4 45 | with: 46 | name: lifter-${{ matrix.variant }} 47 | path: ./output/lifter-${{ matrix.variant }} 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.suo 2 | *.db* 3 | *.ipch 4 | *.exe 5 | *.ilk 6 | *.tlog 7 | *.recipe 8 | *.log 9 | *.obj 10 | *.idb 11 | *.pdb 12 | *.ipdb 13 | *.iobj 14 | *\x64* 15 | *.exe* 16 | 17 | # cmkr 18 | build*/ 19 | cmake-build*/ 20 | CMakerLists.txt 21 | CMakeLists.txt.user 22 | /.cache 23 | /.vscode 24 | 25 | # clion 26 | .idea*/ 27 | 28 | # output files 29 | output.ll 30 | output_finalnoopt.ll 31 | /icpped_rust 32 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "linux-pe"] 2 | path = linux-pe 3 | url = https://github.com/can1357/linux-pe 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # This file is automatically generated from cmake.toml - DO NOT EDIT 2 | # See https://github.com/build-cpp/cmkr for more information 3 | 4 | cmake_minimum_required(VERSION 3.16) 5 | 6 | if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR) 7 | message(FATAL_ERROR "In-tree builds are not supported. Run CMake from a separate directory: cmake -B build") 8 | endif() 9 | 10 | set(CMKR_ROOT_PROJECT OFF) 11 | if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) 12 | set(CMKR_ROOT_PROJECT ON) 13 | 14 | # Bootstrap cmkr and automatically regenerate CMakeLists.txt 15 | include("cmake/cmkr.cmake;" OPTIONAL RESULT_VARIABLE CMKR_INCLUDE_RESULT) 16 | if(CMKR_INCLUDE_RESULT) 17 | cmkr() 18 | endif() 19 | 20 | # Enable folder support 21 | set_property(GLOBAL PROPERTY USE_FOLDERS ON) 22 | 23 | # Create a configure-time dependency on cmake.toml to improve IDE support 24 | configure_file(cmake.toml cmake.toml COPYONLY) 25 | endif() 26 | 27 | # Variables 28 | set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") 29 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -gfull") 30 | 31 | project(Mergen) 32 | 33 | # Packages 34 | find_package(Iced-Wrapper REQUIRED) 35 | 36 | find_package(LLVM-Wrapper REQUIRED) 37 | 38 | include(FetchContent) 39 | 40 | # Fix warnings about DOWNLOAD_EXTRACT_TIMESTAMP 41 | if(POLICY CMP0135) 42 | cmake_policy(SET CMP0135 NEW) 43 | endif() 44 | message(STATUS "Fetching linux-pe (be6d1f6)...") 45 | FetchContent_Declare(linux-pe SYSTEM 46 | GIT_REPOSITORY 47 | "https://github.com/can1357/linux-pe" 48 | GIT_TAG 49 | be6d1f6 50 | ) 51 | FetchContent_MakeAvailable(linux-pe) 52 | 53 | if(ICED_NOT_FOUND) # NOTE: unnamed condition 54 | message(STATUS "Fetching Zydis (v4.1.0)...") 55 | FetchContent_Declare(Zydis SYSTEM 56 | GIT_REPOSITORY 57 | "https://github.com/zyantific/zydis" 58 | GIT_TAG 59 | v4.1.0 60 | ) 61 | FetchContent_MakeAvailable(Zydis) 62 | 63 | endif() 64 | message(STATUS "Fetching magic_enum (a413fcc)...") 65 | FetchContent_Declare(magic_enum SYSTEM 66 | GIT_REPOSITORY 67 | "https://github.com/Neargye/magic_enum" 68 | GIT_TAG 69 | a413fcc 70 | ) 71 | FetchContent_MakeAvailable(magic_enum) 72 | 73 | # Target: lifter 74 | set(lifter_SOURCES 75 | "lifter/FunctionSignatures.cpp" 76 | "lifter/GEPTracker.cpp" 77 | "lifter/PathSolver.cpp" 78 | "lifter/lifter.cpp" 79 | "lifter/test_instructions.cpp" 80 | "lifter/utils.cpp" 81 | "lifter/CommonMnemonics.h" 82 | "lifter/CommonRegisters.h" 83 | "lifter/GEPTracker.h" 84 | "lifter/OperandUtils.h" 85 | "lifter/PathSolver.h" 86 | "lifter/Semantics.h" 87 | "lifter/ZydisDisassembler_mnemonics.h" 88 | "lifter/ZydisDisassembler_registers.h" 89 | "lifter/icedDisassembler_mnemonics.h" 90 | "lifter/icedDisassembler_registers.h" 91 | "lifter/includes.h" 92 | "lifter/test_instructions.h" 93 | "lifter/utils.h" 94 | "lifter/CommonDisassembler.hpp" 95 | "lifter/CustomPasses.hpp" 96 | "lifter/FunctionSignatures.hpp" 97 | "lifter/ZydisDisassembler.hpp" 98 | "lifter/icedDisassembler.hpp" 99 | "lifter/lifterClass.hpp" 100 | "lifter/tester.hpp" 101 | "lifter/GEPTracker.ipp" 102 | "lifter/OperandUtils.ipp" 103 | "lifter/PathSolver.ipp" 104 | "lifter/Semantics.ipp" 105 | cmake.toml 106 | ) 107 | 108 | add_executable(lifter) 109 | 110 | target_sources(lifter PRIVATE ${lifter_SOURCES}) 111 | source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${lifter_SOURCES}) 112 | 113 | if(DEFINED MERGEN_TEST) # testmode 114 | target_compile_definitions(lifter PRIVATE 115 | MERGEN_TEST 116 | ) 117 | endif() 118 | 119 | target_compile_features(lifter PRIVATE 120 | cxx_std_20 121 | ) 122 | 123 | target_link_libraries(lifter PRIVATE 124 | LLVM-Wrapper 125 | linux-pe 126 | magic_enum 127 | ) 128 | 129 | if(ICED_NOT_FOUND) # NOTE: unnamed condition 130 | target_link_libraries(lifter PRIVATE 131 | Zydis 132 | ) 133 | endif() 134 | 135 | if(ICED_FOUND) # NOTE: unnamed condition 136 | target_link_libraries(lifter PRIVATE 137 | Iced_Wrapper 138 | ) 139 | endif() 140 | 141 | if(WIN32) # windows 142 | target_link_libraries(lifter PRIVATE 143 | Ws2_32 144 | ) 145 | endif() 146 | 147 | get_directory_property(CMKR_VS_STARTUP_PROJECT DIRECTORY ${PROJECT_SOURCE_DIR} DEFINITION VS_STARTUP_PROJECT) 148 | if(NOT CMKR_VS_STARTUP_PROJECT) 149 | set_property(DIRECTORY ${PROJECT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT lifter) 150 | endif() 151 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ 4 | apt-get update && apt-get install -y \ 5 | lsb-release \ 6 | wget \ 7 | software-properties-common \ 8 | gnupg \ 9 | cmake \ 10 | git \ 11 | curl 12 | 13 | RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ 14 | wget https://apt.llvm.org/llvm.sh \ 15 | && chmod +x llvm.sh \ 16 | && ./llvm.sh 18 \ 17 | && rm llvm.sh 18 | 19 | ARG INSTALL_RUST=true 20 | 21 | RUN if [ "$INSTALL_RUST" = "true" ]; then \ 22 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ; \ 23 | fi 24 | 25 | 26 | COPY . /root/Mergen 27 | 28 | RUN ln -s /usr/bin/clang-18 /usr/bin/clang \ 29 | && ln -s /usr/bin/clang-cpp-18 /usr/bin/clang-cpp \ 30 | && ln -s /usr/bin/clang-cpp-18 /usr/bin/clang++ 31 | 32 | ENV CC=/usr/bin/clang 33 | ENV CXX=/usr/bin/clang++ 34 | 35 | RUN mkdir -p /root/Mergen/build 36 | WORKDIR /root/Mergen/build 37 | RUN cmake .. && cmake --build . -j $(nproc) 38 | 39 | # Provide the built binary path as the default output for the container 40 | WORKDIR /root/Mergen/build 41 | CMD ["cp", "/root/Mergen/build/lifter", "/output/lifter"] 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Project Overview: 2 | Mergen is a tool engineered to convert Assembly code into LLVM Intermediate Representation (IR). 3 | This tool is designed for: 4 | - The deobfuscation or devirtualization of obfuscated binary code 5 | - The enhancement of the reverse engineering process, making it more efficient and effective, especially for complex software systems. 6 | 7 | ## Guide to build & run 8 | 9 | To build and run the project, take a look at [**docs/BUILDING.md**](https://github.com/NaC-L/Mergen/blob/main/docs/BUILDING.md). 10 | 11 | ## Core Objectives: 12 | 13 | - ### Deobfuscation 14 | 15 | - ### Devirtualization 16 | 17 | - ### Optimization 18 | 19 | ## How does it work? 20 | 21 | We symbolicly execute (or symbolicly lift) the target, the idea here is not lifting individual instructions, but lifting a whole function. We dont expect one instruction nor one basic block to behave same each time, instead treat them like they can be and are for different purposes each time. We try to keep the generated IR simple and optimizeable as possible. We also have different needs than an usual compiler. We use analysis to evaluate control flow. We can't depend on LLVM for all of our analysis, because they are created for different goals and could be unoptimal for our use-case. 22 | 23 | ![image](images/graph.png) 24 | 25 | ## Examples 26 | 27 | This is the practical example to illustrate how Mergen solves against virtualized programs. 28 | 29 | 1. [VMProtect](#example-1-vmprotect) 30 | 2. [Branches/Jumptables](#example-2-branchesjumptables) 31 | 3. [Themida 3.1.6.0 LION64 (Red)](#example-3-themida-3160-lion64-red) 32 | 33 | ### Example #1 (VMProtect) 34 | 35 | This is our target program 36 | 37 | ```cpp 38 | struct test { 39 | int a; 40 | int b; 41 | int c; 42 | }; 43 | 44 | int maths(test a, int b, int c) { 45 | return a.a + b - c; 46 | } 47 | ``` 48 | ![image](images/org_disass.png) 49 | 50 | ![image](images/org_decomp.png) 51 | 52 | VMProtect settings, everything is turned off, we virtualize the function on ultra setting. (Tested versions 3.4.0-3.6.0 3.8.1) 53 | 54 | ![image](images/vmp_settings1.png) 55 | 56 | ![image](images/vmp_settings2.png) 57 | 58 | Here, we run mergen. First argument is the name of the file and the second argument is the address of the function. Look how simple it is to run. And we can compile the output so we can explore it using our favorite decompiler. 59 | 60 | ![image](images/run_mergen.PNG) 61 | 62 | ```llvm 63 | ; ModuleID = 'my_lifting_module' 64 | source_filename = "my_lifting_module" 65 | 66 | ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) 67 | define i64 @main(i64 %rax, i64 %rcx, i64 %rdx, i64 %rbx, i64 %0, i64 %rbp, i64 %rsi, i64 %rdi, i64 %r8, i64 %r9, i64 %r10, i64 %r11, i64 %r12, i64 %r13, i64 %r14, i64 %r15, ptr nocapture readonly %memory) local_unnamed_addr #0 { 68 | entry: 69 | %stackmemory = alloca i128, i128 13758960, align 8 70 | %1 = trunc i64 %r8 to i32 71 | %2 = trunc i64 %rdx to i32 72 | %GEPLoadxd-5369456437- = getelementptr i8, ptr %memory, i64 %rcx 73 | %3 = load i32, ptr %GEPLoadxd-5369456437-, align 4 74 | %adc-temp-5370242400- = sub i32 %2, %1 75 | %realnot-5369532059- = add i32 %adc-temp-5370242400-, %3 76 | %stackmemory10243.sroa.55.1375304.insert.ext10255 = zext i32 %realnot-5369532059- to i64 77 | ret i64 %stackmemory10243.sroa.55.1375304.insert.ext10255 78 | } 79 | 80 | attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) } 81 | ``` 82 | 83 | After compiling: 84 | 85 | ![image](images/mergen_disass.png) 86 | 87 | ![image](images/mergen_dec.png) 88 | 89 | Now you might notice the registers are a little bit off. This is because of we dont follow the calling conventions, if we were to follow the calling conventions, function signature would look like this: 90 | ```llvm 91 | define i64 @main(i64 %rcx, i64 %rdx, i64 %rdx, i64 %r8, i64 %r9 ...) 92 | ``` 93 | So, we just adjust the function signature to look normally. If you have more questions about this part, I suggest you research [calling conventions](https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170#parameter-passing) and [ABI](https://learn.microsoft.com/en-us/cpp/build/x64-software-conventions?view=msvc-170&source=recommendations#register-volatility-and-preservation). 94 | 95 | ## Example #2 (Branches/Jumptables) 96 | So, lets say we have this code. VM's will take the below code then turn it to an indirect jump, its slightly more unconvenient for the reverser. 97 | ```cpp 98 | int maths(int a, int b, int c) { 99 | if (a > b) 100 | return a + b + c; 101 | else 102 | return a - b - c; 103 | } 104 | ``` 105 | 106 | 107 | ``` 108 | next_handler = xxx; 109 | if ( a-b > 0 ) 110 | next_handler = yyy; 111 | jump next_handler; 112 | ``` 113 | 114 | We try to always analyze values and keep track of them. This allows us to understand control flow. 115 | [For jumptable-like branches](https://github.com/NaC-L/Mergen/blob/experimental-pattern-matching/testcases/test_branches.asm) 116 | Optimized output would be a simple 117 | ```llvm 118 | define i64 @main(i64 %rax, i64 %rcx, i64 %rdx, i64 %rbx, i64 %rsp, i64 %rbp, i64 %rsi, i64 %rdi, i64 %r8, i64 %r9, i64 %r10, i64 %r11, i64 %r12, i64 %r13, i64 %r14, i64 %r15, ptr nocapture readnone %TEB, ptr nocapture readnone %memory) local_unnamed_addr #0 { 119 | fake_ret: 120 | %0 = lshr i64 %rcx, 62 121 | %common.ret.op = and i64 %0, 2 122 | ret i64 %common.ret.op 123 | } 124 | ``` 125 | Unoptimized output. (DCE'd for readability) 126 | ```llvm 127 | source_filename = "my_lifting_module" 128 | 129 | define i64 @main(i64 %rax, i64 %rcx, i64 %rdx, i64 %rbx, i64 %rsp, i64 %rbp, i64 %rsi, i64 %rdi, i64 %r8, i64 %r9, i64 %r10, i64 %r11, i64 %r12, i64 %r13, i64 %r14, i64 %r15, ptr %TEB, ptr %memory) { 130 | %lsb = and i64 %rcx, 255 131 | %pf1 = mul i64 %lsb, 72340172838076673 132 | %pf2 = and i64 %pf1, -9205322385119247871 133 | %pf3 = urem i64 %pf2, 511 134 | %pf4 = and i64 %pf3, 1 135 | %pf5 = icmp eq i64 0, %pf4 136 | %0 = zext i1 %pf5 to i64 137 | %createrflag2 = shl i64 %0, 2 138 | %creatingrflag = or i64 2, %createrflag2 139 | %zeroflag = icmp eq i64 %rcx, 0 140 | %1 = zext i1 %zeroflag to i64 141 | %createrflag21 = shl i64 %1, 6 142 | %creatingrflag2 = or i64 %creatingrflag, %createrflag21 143 | %signflag = icmp slt i64 %rcx, 0 144 | %2 = zext i1 %signflag to i64 145 | %createrflag23 = shl i64 %2, 7 146 | %creatingrflag4 = or i64 %creatingrflag2, %createrflag23 147 | %GEPSTORE-5368713221- = getelementptr i8, ptr %memory, i64 1376032 148 | store i64 %creatingrflag4, ptr %GEPSTORE-5368713221-, align 4 149 | %realand-5368713229- = and i64 %creatingrflag4, 128 150 | %shr-lshr-5368713233- = lshr i64 %realand-5368713229-, 7 151 | %3 = mul i64 %shr-lshr-5368713233-, 4 152 | %bvalue_indexvalue = add i64 5368713249, %3 153 | %4 = icmp eq i64 %bvalue_indexvalue, 5368713253 154 | %lolb- = select i1 %4, i64 5368713264, i64 5368713257 155 | %GEPSTORE-5368713248- = getelementptr i8, ptr %memory, i64 1376032 156 | store i64 %lolb-, ptr %GEPSTORE-5368713248-, align 4 157 | br i1 %4, label %real_ret, label %real_ret41 158 | 159 | real_ret: ; preds = %fake_ret 160 | %inc-5368713273- = add i64 %shr-lshr-5368713233-, 1 161 | ret i64 %inc-5368713273- 162 | 163 | real_ret41: ; preds = %fake_ret 164 | ret i64 %shr-lshr-5368713233- 165 | } 166 | ``` 167 | Notice this part 168 | ``` 169 | %realand-5368713229- = and i64 %creatingrflag4, 128 170 | %shr-lshr-5368713233- = lshr i64 %realand-5368713229-, 7 171 | ``` 172 | We get the flags, then we get the 7th bit which is Sign Flag, then we use the Sign Flag to calculate an address. Through analysis, we determine the address could be one of two values, `5368713257` or `5368713264`, then we turn that into a comparison. If address is `5368713257`, take one branch, if other, take another. When doing this, it is also important to mark the condition appopriate value because later, we might need to calculate another jump with the same exact value. 173 | 174 | Even though we solve the indirect jumps, jumps with more than 2 possible location are not supported. This is because the analysis for them are not implemented yet. This allows us to solve the vm-style branches, but have problem with real life jumptables. 175 | 176 | ### Example #3 (Themida 3.1.6.0 LION64 (Red)) 177 | Our target program: 178 | 179 | ![image](images/themida_disas_b.png) 180 | 181 | Themida settings (we only care about vms atm): 182 | 183 | ![image](images/themida_vm_v.png) 184 | 185 | ![image](images/themidavm.png) 186 | 187 | ![image](images/themidavm_settings.png) 188 | 189 | After vm: 190 | 191 | ![image](images/themida_disas_v.png) 192 | 193 | Running Mergen: 194 | 195 | ![image](images/running_on_themida.png) 196 | 197 | Output code: [click here](docs/themida_output.ll) 198 | So, why our result is not succesful as lifting a binary thats protected by vmp? 199 | 200 | Themida actively writes on .themida section. Unlike stack, we cant disregard these writes, because these values might be read by other stuff later. 201 | 202 | But, we have a temporary solution to that. Remove all stores into .themida section. Since our program doesnt write into memory, [I just commented all the stores.](docs/themida_output_lazy_fix.ll) Now we are left with this: 203 | 204 | ```llvm 205 | source_filename = "my_lifting_module" 206 | 207 | define i64 @main(i64 %rax, i64 %rcx, i64 %rdx, i64 %rbx, i64 %rsp, i64 %rbp, i64 %rsi, i64 %rdi, i64 %r8, i64 %r9, i64 %r10, i64 %r11, i64 %r12, i64 %r13, i64 %r14, i64 %r15, ptr writeonly %memory) local_unnamed_addr #0 { 208 | %trunc = trunc i64 %r8 to i32 209 | %trunc1 = trunc i64 %rdx to i32 210 | %trunc2 = trunc i64 %rcx to i32 211 | %realadd-5369771371- = add i32 %trunc1, %trunc2 212 | %realadd-5369582686- = add i32 %realadd-5369771371-, %trunc 213 | %trunc457139 = zext i32 %realadd-5369582686- to i64 214 | ret i64 %trunc457139 215 | } 216 | 217 | attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) } 218 | ``` 219 | # Technical challenges 220 | - Loops 221 | - Self Modifying Code ( especially with conditional modification) 222 | - Being in an universe where "outlining" and "unrolling" passes doesnt exist. 223 | 224 | 225 | # Getting in touch 226 | Join our [Mergen Discord Server](https://discord.gg/e3eftYguqB) to trade ideas or just chatting in general. 227 | -------------------------------------------------------------------------------- /cmake.toml: -------------------------------------------------------------------------------- 1 | [cmake] 2 | version = "3.16" 3 | cmkr-include = "cmake/cmkr.cmake;" 4 | 5 | [project] 6 | name = "Mergen" 7 | 8 | [conditions] 9 | windows = "WIN32" 10 | testmode = "DEFINED MERGEN_TEST" 11 | 12 | 13 | [variables] 14 | CMAKE_MODULE_PATH = "${CMAKE_CURRENT_SOURCE_DIR}/cmake" 15 | CMAKE_CXX_FLAGS_DEBUG = "-O0 -gfull" 16 | 17 | [fetch-content.linux-pe] 18 | git = "https://github.com/can1357/linux-pe" 19 | tag = "be6d1f6" 20 | 21 | 22 | [find-package.Iced-Wrapper] 23 | 24 | [fetch-content.Zydis] 25 | condition = "ICED_NOT_FOUND" 26 | git = "https://github.com/zyantific/zydis" 27 | tag = "v4.1.0" 28 | 29 | [fetch-content.magic_enum] 30 | git = "https://github.com/Neargye/magic_enum" 31 | tag = "a413fcc" 32 | 33 | 34 | [find-package.LLVM-Wrapper] 35 | 36 | 37 | [target.lifter] 38 | type = "executable" 39 | sources = ["lifter/*.cpp"] 40 | headers = ["lifter/*.h", "lifter/*.hpp", "lifter/*.ipp"] 41 | link-libraries = ["LLVM-Wrapper", "linux-pe", "magic_enum"] 42 | ICED_NOT_FOUND.link-libraries = ["Zydis"] 43 | ICED_FOUND.link-libraries = ["Iced_Wrapper"] 44 | windows.link-libraries = ["Ws2_32"] 45 | compile-features = ["cxx_std_20"] 46 | testmode.compile-definitions = ["MERGEN_TEST"] 47 | -------------------------------------------------------------------------------- /cmake/FindIced-Wrapper.cmake: -------------------------------------------------------------------------------- 1 | 2 | 3 | if (BUILD_WITH_ZYDIS) 4 | message(STATUS "BUILD_WITH_ZYDIS=ON; forcing Zydis backend and skipping Cargo lookup") 5 | add_compile_definitions(ICED_NOT_FOUND) 6 | return() 7 | endif() 8 | 9 | find_program(CARGO_EXECUTABLE cargo) 10 | 11 | if (NOT CARGO_EXECUTABLE) 12 | message("Cargo not found. Default to Zydis.") 13 | set(ICED_NOT_FOUND TRUE CACHE BOOL "Rust/Cargo not found => building with the Zydis backend") 14 | add_compile_definitions(ICED_NOT_FOUND) 15 | 16 | return() 17 | endif() 18 | 19 | message("Cargo found. Default to Iced.") 20 | 21 | include(FetchContent) 22 | 23 | FetchContent_Declare( 24 | Corrosion 25 | GIT_REPOSITORY https://github.com/NaC-L/corrosion.git # some issue with linker, i forgot, enabling flag should help?, hopefully my patch doesnt break anything 26 | GIT_TAG 8b991b7 # Optionally specify a commit hash, version tag or branch here 27 | ) 28 | 29 | FetchContent_MakeAvailable(Corrosion) 30 | 31 | corrosion_import_crate(MANIFEST_PATH icpped_rust/Cargo.toml) 32 | 33 | 34 | set(ICED_FOUND TRUE CACHE BOOL "Rust/Cargo found => building with the Iced (Rust) backend") 35 | add_compile_definitions(ICED_FOUND) 36 | -------------------------------------------------------------------------------- /cmake/FindLLVM-Wrapper.cmake: -------------------------------------------------------------------------------- 1 | # This is an INTERFACE target for LLVM, usage: 2 | # target_link_libraries(${PROJECT_NAME} LLVM-Wrapper) 3 | # The include directories and compile definitions will be properly handled. 4 | 5 | if(LLVM-Wrapper_FOUND OR TARGET LLVM-Wrapper) 6 | return() 7 | endif() 8 | 9 | set(CMAKE_FOLDER_LLVM "${CMAKE_FOLDER}") 10 | if(CMAKE_FOLDER) 11 | set(CMAKE_FOLDER "${CMAKE_FOLDER}/LLVM") 12 | else() 13 | set(CMAKE_FOLDER "LLVM") 14 | endif() 15 | 16 | # Extract the arguments passed to find_package 17 | # Documentation: https://cmake.org/cmake/help/latest/manual/cmake-developer.7.html#find-modules 18 | list(APPEND FIND_ARGS "${LLVM-Wrapper_FIND_VERSION}") 19 | if(LLVM-Wrapper_FIND_QUIETLY) 20 | list(APPEND FIND_ARGS "QUIET") 21 | endif() 22 | if(LLVM-Wrapper_FIND_REQUIRED) 23 | list(APPEND FIND_ARGS "REQUIRED") 24 | endif() 25 | 26 | # Find LLVM 27 | find_package(LLVM ${FIND_ARGS}) 28 | 29 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 30 | message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") 31 | 32 | # Split the definitions properly (https://weliveindetail.github.io/blog/post/2017/07/17/notes-setup.html) 33 | separate_arguments(LLVM_DEFINITIONS) 34 | 35 | # Some diagnostics (https://stackoverflow.com/a/17666004/1806760) 36 | message(STATUS "LLVM libraries: ${LLVM_AVAILABLE_LIBS}") 37 | message(STATUS "LLVM includes: ${LLVM_INCLUDE_DIRS}") 38 | message(STATUS "LLVM definitions: ${LLVM_DEFINITIONS}") 39 | message(STATUS "LLVM tools: ${LLVM_TOOLS_BINARY_DIR}") 40 | 41 | add_library(LLVM-Wrapper INTERFACE IMPORTED) 42 | target_include_directories(LLVM-Wrapper SYSTEM INTERFACE ${LLVM_INCLUDE_DIRS}) 43 | target_compile_definitions(LLVM-Wrapper INTERFACE ${LLVM_DEFINITIONS}) 44 | 45 | if(WIN32) 46 | target_compile_definitions(LLVM-Wrapper INTERFACE NOMINMAX) 47 | endif() 48 | 49 | # https://github.com/JonathanSalwan/Triton/issues/1082#issuecomment-1030826696 50 | if(LLVM_LINK_LLVM_DYLIB) 51 | target_link_libraries(LLVM-Wrapper INTERFACE LLVM) 52 | else() 53 | target_link_libraries(LLVM-Wrapper INTERFACE ${LLVM_AVAILABLE_LIBS}) 54 | endif() 55 | 56 | # In LLVM 10 (and potentially below) there is a full path to diaguids.lib embedded in the installation 57 | if(WIN32 AND TARGET LLVMDebugInfoPDB) 58 | get_target_property(LLVMDebugInfoPDB_LIBS LLVMDebugInfoPDB INTERFACE_LINK_LIBRARIES) 59 | foreach(LLVMDebugInfoPDB_LIB ${LLVMDebugInfoPDB_LIBS}) 60 | if(LLVMDebugInfoPDB_LIB MATCHES "diaguids.lib") 61 | list(REMOVE_ITEM LLVMDebugInfoPDB_LIBS "${LLVMDebugInfoPDB_LIB}") 62 | list(APPEND LLVMDebugInfoPDB_LIBS "diaguids.lib") 63 | break() 64 | endif() 65 | endforeach() 66 | set_target_properties(LLVMDebugInfoPDB PROPERTIES 67 | INTERFACE_LINK_LIBRARIES "${LLVMDebugInfoPDB_LIBS}" 68 | ) 69 | unset(LLVMDebugInfoPDB_LIBS) 70 | endif() 71 | 72 | set(CMAKE_FOLDER "${CMAKE_FOLDER_LLVM}") 73 | unset(CMAKE_FOLDER_LLVM) 74 | 75 | set(LLVM-Wrapper_FOUND ON) 76 | -------------------------------------------------------------------------------- /cmake/cmkr.cmake: -------------------------------------------------------------------------------- 1 | include_guard() 2 | 3 | # Change these defaults to point to your infrastructure if desired 4 | set(CMKR_REPO "https://github.com/build-cpp/cmkr" CACHE STRING "cmkr git repository" FORCE) 5 | set(CMKR_TAG "v0.2.29" CACHE STRING "cmkr git tag (this needs to be available forever)" FORCE) 6 | set(CMKR_COMMIT_HASH "" CACHE STRING "cmkr git commit hash (optional)" FORCE) 7 | 8 | # To bootstrap/generate a cmkr project: cmake -P cmkr.cmake 9 | if(CMAKE_SCRIPT_MODE_FILE) 10 | set(CMAKE_BINARY_DIR "${CMAKE_BINARY_DIR}/build") 11 | set(CMAKE_CURRENT_BINARY_DIR "${CMAKE_BINARY_DIR}") 12 | file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}") 13 | endif() 14 | 15 | # Set these from the command line to customize for development/debugging purposes 16 | set(CMKR_EXECUTABLE "" CACHE FILEPATH "cmkr executable") 17 | set(CMKR_SKIP_GENERATION OFF CACHE BOOL "skip automatic cmkr generation") 18 | set(CMKR_BUILD_TYPE "Debug" CACHE STRING "cmkr build configuration") 19 | mark_as_advanced(CMKR_REPO CMKR_TAG CMKR_COMMIT_HASH CMKR_EXECUTABLE CMKR_SKIP_GENERATION CMKR_BUILD_TYPE) 20 | 21 | # Disable cmkr if generation is disabled 22 | if(DEFINED ENV{CI} OR CMKR_SKIP_GENERATION OR CMKR_BUILD_SKIP_GENERATION) 23 | message(STATUS "[cmkr] Skipping automatic cmkr generation") 24 | unset(CMKR_BUILD_SKIP_GENERATION CACHE) 25 | macro(cmkr) 26 | endmacro() 27 | return() 28 | endif() 29 | 30 | # Disable cmkr if no cmake.toml file is found 31 | if(NOT CMAKE_SCRIPT_MODE_FILE AND NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/cmake.toml") 32 | message(AUTHOR_WARNING "[cmkr] Not found: ${CMAKE_CURRENT_SOURCE_DIR}/cmake.toml") 33 | macro(cmkr) 34 | endmacro() 35 | return() 36 | endif() 37 | 38 | # Convert a Windows native path to CMake path 39 | if(CMKR_EXECUTABLE MATCHES "\\\\") 40 | string(REPLACE "\\" "/" CMKR_EXECUTABLE_CMAKE "${CMKR_EXECUTABLE}") 41 | set(CMKR_EXECUTABLE "${CMKR_EXECUTABLE_CMAKE}" CACHE FILEPATH "" FORCE) 42 | unset(CMKR_EXECUTABLE_CMAKE) 43 | endif() 44 | 45 | # Helper macro to execute a process (COMMAND_ERROR_IS_FATAL ANY is 3.19 and higher) 46 | function(cmkr_exec) 47 | execute_process(COMMAND ${ARGV} RESULT_VARIABLE CMKR_EXEC_RESULT) 48 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 49 | message(FATAL_ERROR "cmkr_exec(${ARGV}) failed (exit code ${CMKR_EXEC_RESULT})") 50 | endif() 51 | endfunction() 52 | 53 | # Windows-specific hack (CMAKE_EXECUTABLE_PREFIX is not set at the moment) 54 | if(WIN32) 55 | set(CMKR_EXECUTABLE_NAME "cmkr.exe") 56 | else() 57 | set(CMKR_EXECUTABLE_NAME "cmkr") 58 | endif() 59 | 60 | # Use cached cmkr if found 61 | if(DEFINED ENV{CMKR_CACHE}) 62 | set(CMKR_DIRECTORY_PREFIX "$ENV{CMKR_CACHE}") 63 | string(REPLACE "\\" "/" CMKR_DIRECTORY_PREFIX "${CMKR_DIRECTORY_PREFIX}") 64 | if(NOT CMKR_DIRECTORY_PREFIX MATCHES "\\/$") 65 | set(CMKR_DIRECTORY_PREFIX "${CMKR_DIRECTORY_PREFIX}/") 66 | endif() 67 | # Build in release mode for the cache 68 | set(CMKR_BUILD_TYPE "Release") 69 | else() 70 | set(CMKR_DIRECTORY_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/_cmkr_") 71 | endif() 72 | set(CMKR_DIRECTORY "${CMKR_DIRECTORY_PREFIX}${CMKR_TAG}") 73 | set(CMKR_CACHED_EXECUTABLE "${CMKR_DIRECTORY}/bin/${CMKR_EXECUTABLE_NAME}") 74 | 75 | # Helper function to check if a string starts with a prefix 76 | # Cannot use MATCHES, see: https://github.com/build-cpp/cmkr/issues/61 77 | function(cmkr_startswith str prefix result) 78 | string(LENGTH "${prefix}" prefix_length) 79 | string(LENGTH "${str}" str_length) 80 | if(prefix_length LESS_EQUAL str_length) 81 | string(SUBSTRING "${str}" 0 ${prefix_length} str_prefix) 82 | if(prefix STREQUAL str_prefix) 83 | set("${result}" ON PARENT_SCOPE) 84 | return() 85 | endif() 86 | endif() 87 | set("${result}" OFF PARENT_SCOPE) 88 | endfunction() 89 | 90 | # Handle upgrading logic 91 | if(CMKR_EXECUTABLE AND NOT CMKR_CACHED_EXECUTABLE STREQUAL CMKR_EXECUTABLE) 92 | cmkr_startswith("${CMKR_EXECUTABLE}" "${CMAKE_CURRENT_BINARY_DIR}/_cmkr" CMKR_STARTSWITH_BUILD) 93 | cmkr_startswith("${CMKR_EXECUTABLE}" "${CMKR_DIRECTORY_PREFIX}" CMKR_STARTSWITH_CACHE) 94 | if(CMKR_STARTSWITH_BUILD) 95 | if(DEFINED ENV{CMKR_CACHE}) 96 | message(AUTHOR_WARNING "[cmkr] Switching to cached cmkr: '${CMKR_CACHED_EXECUTABLE}'") 97 | if(EXISTS "${CMKR_CACHED_EXECUTABLE}") 98 | set(CMKR_EXECUTABLE "${CMKR_CACHED_EXECUTABLE}" CACHE FILEPATH "Full path to cmkr executable" FORCE) 99 | else() 100 | unset(CMKR_EXECUTABLE CACHE) 101 | endif() 102 | else() 103 | message(AUTHOR_WARNING "[cmkr] Upgrading '${CMKR_EXECUTABLE}' to '${CMKR_CACHED_EXECUTABLE}'") 104 | unset(CMKR_EXECUTABLE CACHE) 105 | endif() 106 | elseif(DEFINED ENV{CMKR_CACHE} AND CMKR_STARTSWITH_CACHE) 107 | message(AUTHOR_WARNING "[cmkr] Upgrading cached '${CMKR_EXECUTABLE}' to '${CMKR_CACHED_EXECUTABLE}'") 108 | unset(CMKR_EXECUTABLE CACHE) 109 | endif() 110 | endif() 111 | 112 | if(CMKR_EXECUTABLE AND EXISTS "${CMKR_EXECUTABLE}") 113 | message(VERBOSE "[cmkr] Found cmkr: '${CMKR_EXECUTABLE}'") 114 | elseif(CMKR_EXECUTABLE AND NOT CMKR_EXECUTABLE STREQUAL CMKR_CACHED_EXECUTABLE) 115 | message(FATAL_ERROR "[cmkr] '${CMKR_EXECUTABLE}' not found") 116 | elseif(NOT CMKR_EXECUTABLE AND EXISTS "${CMKR_CACHED_EXECUTABLE}") 117 | set(CMKR_EXECUTABLE "${CMKR_CACHED_EXECUTABLE}" CACHE FILEPATH "Full path to cmkr executable" FORCE) 118 | message(STATUS "[cmkr] Found cached cmkr: '${CMKR_EXECUTABLE}'") 119 | else() 120 | set(CMKR_EXECUTABLE "${CMKR_CACHED_EXECUTABLE}" CACHE FILEPATH "Full path to cmkr executable" FORCE) 121 | message(VERBOSE "[cmkr] Bootstrapping '${CMKR_EXECUTABLE}'") 122 | 123 | message(STATUS "[cmkr] Fetching cmkr...") 124 | if(EXISTS "${CMKR_DIRECTORY}") 125 | cmkr_exec("${CMAKE_COMMAND}" -E rm -rf "${CMKR_DIRECTORY}") 126 | endif() 127 | find_package(Git QUIET REQUIRED) 128 | cmkr_exec("${GIT_EXECUTABLE}" 129 | clone 130 | --config advice.detachedHead=false 131 | --branch ${CMKR_TAG} 132 | --depth 1 133 | ${CMKR_REPO} 134 | "${CMKR_DIRECTORY}" 135 | ) 136 | if(CMKR_COMMIT_HASH) 137 | execute_process( 138 | COMMAND "${GIT_EXECUTABLE}" checkout -q "${CMKR_COMMIT_HASH}" 139 | RESULT_VARIABLE CMKR_EXEC_RESULT 140 | WORKING_DIRECTORY "${CMKR_DIRECTORY}" 141 | ) 142 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 143 | message(FATAL_ERROR "Tag '${CMKR_TAG}' hash is not '${CMKR_COMMIT_HASH}'") 144 | endif() 145 | endif() 146 | message(STATUS "[cmkr] Building cmkr (using system compiler)...") 147 | cmkr_exec("${CMAKE_COMMAND}" 148 | --no-warn-unused-cli 149 | "${CMKR_DIRECTORY}" 150 | "-B${CMKR_DIRECTORY}/build" 151 | "-DCMAKE_BUILD_TYPE=${CMKR_BUILD_TYPE}" 152 | "-DCMAKE_UNITY_BUILD=ON" 153 | "-DCMAKE_INSTALL_PREFIX=${CMKR_DIRECTORY}" 154 | "-DCMKR_GENERATE_DOCUMENTATION=OFF" 155 | ) 156 | cmkr_exec("${CMAKE_COMMAND}" 157 | --build "${CMKR_DIRECTORY}/build" 158 | --config "${CMKR_BUILD_TYPE}" 159 | --parallel 160 | ) 161 | cmkr_exec("${CMAKE_COMMAND}" 162 | --install "${CMKR_DIRECTORY}/build" 163 | --config "${CMKR_BUILD_TYPE}" 164 | --prefix "${CMKR_DIRECTORY}" 165 | --component cmkr 166 | ) 167 | if(NOT EXISTS ${CMKR_EXECUTABLE}) 168 | message(FATAL_ERROR "[cmkr] Failed to bootstrap '${CMKR_EXECUTABLE}'") 169 | endif() 170 | cmkr_exec("${CMKR_EXECUTABLE}" version) 171 | message(STATUS "[cmkr] Bootstrapped ${CMKR_EXECUTABLE}") 172 | endif() 173 | execute_process(COMMAND "${CMKR_EXECUTABLE}" version 174 | RESULT_VARIABLE CMKR_EXEC_RESULT 175 | ) 176 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 177 | message(FATAL_ERROR "[cmkr] Failed to get version, try clearing the cache and rebuilding") 178 | endif() 179 | 180 | # Use cmkr.cmake as a script 181 | if(CMAKE_SCRIPT_MODE_FILE) 182 | if(NOT EXISTS "${CMAKE_SOURCE_DIR}/cmake.toml") 183 | execute_process(COMMAND "${CMKR_EXECUTABLE}" init 184 | RESULT_VARIABLE CMKR_EXEC_RESULT 185 | ) 186 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 187 | message(FATAL_ERROR "[cmkr] Failed to bootstrap cmkr project. Please report an issue: https://github.com/build-cpp/cmkr/issues/new") 188 | else() 189 | message(STATUS "[cmkr] Modify cmake.toml and then configure using: cmake -B build") 190 | endif() 191 | else() 192 | execute_process(COMMAND "${CMKR_EXECUTABLE}" gen 193 | RESULT_VARIABLE CMKR_EXEC_RESULT 194 | ) 195 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 196 | message(FATAL_ERROR "[cmkr] Failed to generate project.") 197 | else() 198 | message(STATUS "[cmkr] Configure using: cmake -B build") 199 | endif() 200 | endif() 201 | endif() 202 | 203 | # This is the macro that contains black magic 204 | macro(cmkr) 205 | # When this macro is called from the generated file, fake some internal CMake variables 206 | get_source_file_property(CMKR_CURRENT_LIST_FILE "${CMAKE_CURRENT_LIST_FILE}" CMKR_CURRENT_LIST_FILE) 207 | if(CMKR_CURRENT_LIST_FILE) 208 | set(CMAKE_CURRENT_LIST_FILE "${CMKR_CURRENT_LIST_FILE}") 209 | get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" DIRECTORY) 210 | endif() 211 | 212 | # File-based include guard (include_guard is not documented to work) 213 | get_source_file_property(CMKR_INCLUDE_GUARD "${CMAKE_CURRENT_LIST_FILE}" CMKR_INCLUDE_GUARD) 214 | if(NOT CMKR_INCLUDE_GUARD) 215 | set_source_files_properties("${CMAKE_CURRENT_LIST_FILE}" PROPERTIES CMKR_INCLUDE_GUARD TRUE) 216 | 217 | file(SHA256 "${CMAKE_CURRENT_LIST_FILE}" CMKR_LIST_FILE_SHA256_PRE) 218 | 219 | # Generate CMakeLists.txt 220 | cmkr_exec("${CMKR_EXECUTABLE}" gen 221 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" 222 | ) 223 | 224 | file(SHA256 "${CMAKE_CURRENT_LIST_FILE}" CMKR_LIST_FILE_SHA256_POST) 225 | 226 | # Delete the temporary file if it was left for some reason 227 | set(CMKR_TEMP_FILE "${CMAKE_CURRENT_SOURCE_DIR}/CMakerLists.txt") 228 | if(EXISTS "${CMKR_TEMP_FILE}") 229 | file(REMOVE "${CMKR_TEMP_FILE}") 230 | endif() 231 | 232 | if(NOT CMKR_LIST_FILE_SHA256_PRE STREQUAL CMKR_LIST_FILE_SHA256_POST) 233 | # Copy the now-generated CMakeLists.txt to CMakerLists.txt 234 | # This is done because you cannot include() a file you are currently in 235 | configure_file(CMakeLists.txt "${CMKR_TEMP_FILE}" COPYONLY) 236 | 237 | # Add the macro required for the hack at the start of the cmkr macro 238 | set_source_files_properties("${CMKR_TEMP_FILE}" PROPERTIES 239 | CMKR_CURRENT_LIST_FILE "${CMAKE_CURRENT_LIST_FILE}" 240 | ) 241 | 242 | # 'Execute' the newly-generated CMakeLists.txt 243 | include("${CMKR_TEMP_FILE}") 244 | 245 | # Delete the generated file 246 | file(REMOVE "${CMKR_TEMP_FILE}") 247 | 248 | # Do not execute the rest of the original CMakeLists.txt 249 | return() 250 | endif() 251 | # Resume executing the unmodified CMakeLists.txt 252 | endif() 253 | endmacro() 254 | -------------------------------------------------------------------------------- /cmake/cmkr.cmake;: -------------------------------------------------------------------------------- 1 | include_guard() 2 | 3 | # Change these defaults to point to your infrastructure if desired 4 | set(CMKR_REPO "https://github.com/build-cpp/cmkr" CACHE STRING "cmkr git repository" FORCE) 5 | set(CMKR_TAG "v0.2.29" CACHE STRING "cmkr git tag (this needs to be available forever)" FORCE) 6 | set(CMKR_COMMIT_HASH "" CACHE STRING "cmkr git commit hash (optional)" FORCE) 7 | 8 | # To bootstrap/generate a cmkr project: cmake -P cmkr.cmake 9 | if(CMAKE_SCRIPT_MODE_FILE) 10 | set(CMAKE_BINARY_DIR "${CMAKE_BINARY_DIR}/build") 11 | set(CMAKE_CURRENT_BINARY_DIR "${CMAKE_BINARY_DIR}") 12 | file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}") 13 | endif() 14 | 15 | # Set these from the command line to customize for development/debugging purposes 16 | set(CMKR_EXECUTABLE "" CACHE FILEPATH "cmkr executable") 17 | set(CMKR_SKIP_GENERATION OFF CACHE BOOL "skip automatic cmkr generation") 18 | set(CMKR_BUILD_TYPE "Debug" CACHE STRING "cmkr build configuration") 19 | mark_as_advanced(CMKR_REPO CMKR_TAG CMKR_COMMIT_HASH CMKR_EXECUTABLE CMKR_SKIP_GENERATION CMKR_BUILD_TYPE) 20 | 21 | # Disable cmkr if generation is disabled 22 | if(DEFINED ENV{CI} OR CMKR_SKIP_GENERATION OR CMKR_BUILD_SKIP_GENERATION) 23 | message(STATUS "[cmkr] Skipping automatic cmkr generation") 24 | unset(CMKR_BUILD_SKIP_GENERATION CACHE) 25 | macro(cmkr) 26 | endmacro() 27 | return() 28 | endif() 29 | 30 | # Disable cmkr if no cmake.toml file is found 31 | if(NOT CMAKE_SCRIPT_MODE_FILE AND NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/cmake.toml") 32 | message(AUTHOR_WARNING "[cmkr] Not found: ${CMAKE_CURRENT_SOURCE_DIR}/cmake.toml") 33 | macro(cmkr) 34 | endmacro() 35 | return() 36 | endif() 37 | 38 | # Convert a Windows native path to CMake path 39 | if(CMKR_EXECUTABLE MATCHES "\\\\") 40 | string(REPLACE "\\" "/" CMKR_EXECUTABLE_CMAKE "${CMKR_EXECUTABLE}") 41 | set(CMKR_EXECUTABLE "${CMKR_EXECUTABLE_CMAKE}" CACHE FILEPATH "" FORCE) 42 | unset(CMKR_EXECUTABLE_CMAKE) 43 | endif() 44 | 45 | # Helper macro to execute a process (COMMAND_ERROR_IS_FATAL ANY is 3.19 and higher) 46 | function(cmkr_exec) 47 | execute_process(COMMAND ${ARGV} RESULT_VARIABLE CMKR_EXEC_RESULT) 48 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 49 | message(FATAL_ERROR "cmkr_exec(${ARGV}) failed (exit code ${CMKR_EXEC_RESULT})") 50 | endif() 51 | endfunction() 52 | 53 | # Windows-specific hack (CMAKE_EXECUTABLE_PREFIX is not set at the moment) 54 | if(WIN32) 55 | set(CMKR_EXECUTABLE_NAME "cmkr.exe") 56 | else() 57 | set(CMKR_EXECUTABLE_NAME "cmkr") 58 | endif() 59 | 60 | # Use cached cmkr if found 61 | if(DEFINED ENV{CMKR_CACHE}) 62 | set(CMKR_DIRECTORY_PREFIX "$ENV{CMKR_CACHE}") 63 | string(REPLACE "\\" "/" CMKR_DIRECTORY_PREFIX "${CMKR_DIRECTORY_PREFIX}") 64 | if(NOT CMKR_DIRECTORY_PREFIX MATCHES "\\/$") 65 | set(CMKR_DIRECTORY_PREFIX "${CMKR_DIRECTORY_PREFIX}/") 66 | endif() 67 | # Build in release mode for the cache 68 | set(CMKR_BUILD_TYPE "Release") 69 | else() 70 | set(CMKR_DIRECTORY_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/_cmkr_") 71 | endif() 72 | set(CMKR_DIRECTORY "${CMKR_DIRECTORY_PREFIX}${CMKR_TAG}") 73 | set(CMKR_CACHED_EXECUTABLE "${CMKR_DIRECTORY}/bin/${CMKR_EXECUTABLE_NAME}") 74 | 75 | # Helper function to check if a string starts with a prefix 76 | # Cannot use MATCHES, see: https://github.com/build-cpp/cmkr/issues/61 77 | function(cmkr_startswith str prefix result) 78 | string(LENGTH "${prefix}" prefix_length) 79 | string(LENGTH "${str}" str_length) 80 | if(prefix_length LESS_EQUAL str_length) 81 | string(SUBSTRING "${str}" 0 ${prefix_length} str_prefix) 82 | if(prefix STREQUAL str_prefix) 83 | set("${result}" ON PARENT_SCOPE) 84 | return() 85 | endif() 86 | endif() 87 | set("${result}" OFF PARENT_SCOPE) 88 | endfunction() 89 | 90 | # Handle upgrading logic 91 | if(CMKR_EXECUTABLE AND NOT CMKR_CACHED_EXECUTABLE STREQUAL CMKR_EXECUTABLE) 92 | cmkr_startswith("${CMKR_EXECUTABLE}" "${CMAKE_CURRENT_BINARY_DIR}/_cmkr" CMKR_STARTSWITH_BUILD) 93 | cmkr_startswith("${CMKR_EXECUTABLE}" "${CMKR_DIRECTORY_PREFIX}" CMKR_STARTSWITH_CACHE) 94 | if(CMKR_STARTSWITH_BUILD) 95 | if(DEFINED ENV{CMKR_CACHE}) 96 | message(AUTHOR_WARNING "[cmkr] Switching to cached cmkr: '${CMKR_CACHED_EXECUTABLE}'") 97 | if(EXISTS "${CMKR_CACHED_EXECUTABLE}") 98 | set(CMKR_EXECUTABLE "${CMKR_CACHED_EXECUTABLE}" CACHE FILEPATH "Full path to cmkr executable" FORCE) 99 | else() 100 | unset(CMKR_EXECUTABLE CACHE) 101 | endif() 102 | else() 103 | message(AUTHOR_WARNING "[cmkr] Upgrading '${CMKR_EXECUTABLE}' to '${CMKR_CACHED_EXECUTABLE}'") 104 | unset(CMKR_EXECUTABLE CACHE) 105 | endif() 106 | elseif(DEFINED ENV{CMKR_CACHE} AND CMKR_STARTSWITH_CACHE) 107 | message(AUTHOR_WARNING "[cmkr] Upgrading cached '${CMKR_EXECUTABLE}' to '${CMKR_CACHED_EXECUTABLE}'") 108 | unset(CMKR_EXECUTABLE CACHE) 109 | endif() 110 | endif() 111 | 112 | if(CMKR_EXECUTABLE AND EXISTS "${CMKR_EXECUTABLE}") 113 | message(VERBOSE "[cmkr] Found cmkr: '${CMKR_EXECUTABLE}'") 114 | elseif(CMKR_EXECUTABLE AND NOT CMKR_EXECUTABLE STREQUAL CMKR_CACHED_EXECUTABLE) 115 | message(FATAL_ERROR "[cmkr] '${CMKR_EXECUTABLE}' not found") 116 | elseif(NOT CMKR_EXECUTABLE AND EXISTS "${CMKR_CACHED_EXECUTABLE}") 117 | set(CMKR_EXECUTABLE "${CMKR_CACHED_EXECUTABLE}" CACHE FILEPATH "Full path to cmkr executable" FORCE) 118 | message(STATUS "[cmkr] Found cached cmkr: '${CMKR_EXECUTABLE}'") 119 | else() 120 | set(CMKR_EXECUTABLE "${CMKR_CACHED_EXECUTABLE}" CACHE FILEPATH "Full path to cmkr executable" FORCE) 121 | message(VERBOSE "[cmkr] Bootstrapping '${CMKR_EXECUTABLE}'") 122 | 123 | message(STATUS "[cmkr] Fetching cmkr...") 124 | if(EXISTS "${CMKR_DIRECTORY}") 125 | cmkr_exec("${CMAKE_COMMAND}" -E rm -rf "${CMKR_DIRECTORY}") 126 | endif() 127 | find_package(Git QUIET REQUIRED) 128 | cmkr_exec("${GIT_EXECUTABLE}" 129 | clone 130 | --config advice.detachedHead=false 131 | --branch ${CMKR_TAG} 132 | --depth 1 133 | ${CMKR_REPO} 134 | "${CMKR_DIRECTORY}" 135 | ) 136 | if(CMKR_COMMIT_HASH) 137 | execute_process( 138 | COMMAND "${GIT_EXECUTABLE}" checkout -q "${CMKR_COMMIT_HASH}" 139 | RESULT_VARIABLE CMKR_EXEC_RESULT 140 | WORKING_DIRECTORY "${CMKR_DIRECTORY}" 141 | ) 142 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 143 | message(FATAL_ERROR "Tag '${CMKR_TAG}' hash is not '${CMKR_COMMIT_HASH}'") 144 | endif() 145 | endif() 146 | message(STATUS "[cmkr] Building cmkr (using system compiler)...") 147 | cmkr_exec("${CMAKE_COMMAND}" 148 | --no-warn-unused-cli 149 | "${CMKR_DIRECTORY}" 150 | "-B${CMKR_DIRECTORY}/build" 151 | "-DCMAKE_BUILD_TYPE=${CMKR_BUILD_TYPE}" 152 | "-DCMAKE_UNITY_BUILD=ON" 153 | "-DCMAKE_INSTALL_PREFIX=${CMKR_DIRECTORY}" 154 | "-DCMKR_GENERATE_DOCUMENTATION=OFF" 155 | ) 156 | cmkr_exec("${CMAKE_COMMAND}" 157 | --build "${CMKR_DIRECTORY}/build" 158 | --config "${CMKR_BUILD_TYPE}" 159 | --parallel 160 | ) 161 | cmkr_exec("${CMAKE_COMMAND}" 162 | --install "${CMKR_DIRECTORY}/build" 163 | --config "${CMKR_BUILD_TYPE}" 164 | --prefix "${CMKR_DIRECTORY}" 165 | --component cmkr 166 | ) 167 | if(NOT EXISTS ${CMKR_EXECUTABLE}) 168 | message(FATAL_ERROR "[cmkr] Failed to bootstrap '${CMKR_EXECUTABLE}'") 169 | endif() 170 | cmkr_exec("${CMKR_EXECUTABLE}" version) 171 | message(STATUS "[cmkr] Bootstrapped ${CMKR_EXECUTABLE}") 172 | endif() 173 | execute_process(COMMAND "${CMKR_EXECUTABLE}" version 174 | RESULT_VARIABLE CMKR_EXEC_RESULT 175 | ) 176 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 177 | message(FATAL_ERROR "[cmkr] Failed to get version, try clearing the cache and rebuilding") 178 | endif() 179 | 180 | # Use cmkr.cmake as a script 181 | if(CMAKE_SCRIPT_MODE_FILE) 182 | if(NOT EXISTS "${CMAKE_SOURCE_DIR}/cmake.toml") 183 | execute_process(COMMAND "${CMKR_EXECUTABLE}" init 184 | RESULT_VARIABLE CMKR_EXEC_RESULT 185 | ) 186 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 187 | message(FATAL_ERROR "[cmkr] Failed to bootstrap cmkr project. Please report an issue: https://github.com/build-cpp/cmkr/issues/new") 188 | else() 189 | message(STATUS "[cmkr] Modify cmake.toml and then configure using: cmake -B build") 190 | endif() 191 | else() 192 | execute_process(COMMAND "${CMKR_EXECUTABLE}" gen 193 | RESULT_VARIABLE CMKR_EXEC_RESULT 194 | ) 195 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 196 | message(FATAL_ERROR "[cmkr] Failed to generate project.") 197 | else() 198 | message(STATUS "[cmkr] Configure using: cmake -B build") 199 | endif() 200 | endif() 201 | endif() 202 | 203 | # This is the macro that contains black magic 204 | macro(cmkr) 205 | # When this macro is called from the generated file, fake some internal CMake variables 206 | get_source_file_property(CMKR_CURRENT_LIST_FILE "${CMAKE_CURRENT_LIST_FILE}" CMKR_CURRENT_LIST_FILE) 207 | if(CMKR_CURRENT_LIST_FILE) 208 | set(CMAKE_CURRENT_LIST_FILE "${CMKR_CURRENT_LIST_FILE}") 209 | get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" DIRECTORY) 210 | endif() 211 | 212 | # File-based include guard (include_guard is not documented to work) 213 | get_source_file_property(CMKR_INCLUDE_GUARD "${CMAKE_CURRENT_LIST_FILE}" CMKR_INCLUDE_GUARD) 214 | if(NOT CMKR_INCLUDE_GUARD) 215 | set_source_files_properties("${CMAKE_CURRENT_LIST_FILE}" PROPERTIES CMKR_INCLUDE_GUARD TRUE) 216 | 217 | file(SHA256 "${CMAKE_CURRENT_LIST_FILE}" CMKR_LIST_FILE_SHA256_PRE) 218 | 219 | # Generate CMakeLists.txt 220 | cmkr_exec("${CMKR_EXECUTABLE}" gen 221 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" 222 | ) 223 | 224 | file(SHA256 "${CMAKE_CURRENT_LIST_FILE}" CMKR_LIST_FILE_SHA256_POST) 225 | 226 | # Delete the temporary file if it was left for some reason 227 | set(CMKR_TEMP_FILE "${CMAKE_CURRENT_SOURCE_DIR}/CMakerLists.txt") 228 | if(EXISTS "${CMKR_TEMP_FILE}") 229 | file(REMOVE "${CMKR_TEMP_FILE}") 230 | endif() 231 | 232 | if(NOT CMKR_LIST_FILE_SHA256_PRE STREQUAL CMKR_LIST_FILE_SHA256_POST) 233 | # Copy the now-generated CMakeLists.txt to CMakerLists.txt 234 | # This is done because you cannot include() a file you are currently in 235 | configure_file(CMakeLists.txt "${CMKR_TEMP_FILE}" COPYONLY) 236 | 237 | # Add the macro required for the hack at the start of the cmkr macro 238 | set_source_files_properties("${CMKR_TEMP_FILE}" PROPERTIES 239 | CMKR_CURRENT_LIST_FILE "${CMAKE_CURRENT_LIST_FILE}" 240 | ) 241 | 242 | # 'Execute' the newly-generated CMakeLists.txt 243 | include("${CMKR_TEMP_FILE}") 244 | 245 | # Delete the generated file 246 | file(REMOVE "${CMKR_TEMP_FILE}") 247 | 248 | # Do not execute the rest of the original CMakeLists.txt 249 | return() 250 | endif() 251 | # Resume executing the unmodified CMakeLists.txt 252 | endif() 253 | endmacro() -------------------------------------------------------------------------------- /cmkr.cmake: -------------------------------------------------------------------------------- 1 | include_guard() 2 | 3 | # Change these defaults to point to your infrastructure if desired 4 | set(CMKR_REPO "https://github.com/build-cpp/cmkr" CACHE STRING "cmkr git repository" FORCE) 5 | set(CMKR_TAG "v0.2.29" CACHE STRING "cmkr git tag (this needs to be available forever)" FORCE) 6 | set(CMKR_COMMIT_HASH "" CACHE STRING "cmkr git commit hash (optional)" FORCE) 7 | 8 | # To bootstrap/generate a cmkr project: cmake -P cmkr.cmake 9 | if(CMAKE_SCRIPT_MODE_FILE) 10 | set(CMAKE_BINARY_DIR "${CMAKE_BINARY_DIR}/build") 11 | set(CMAKE_CURRENT_BINARY_DIR "${CMAKE_BINARY_DIR}") 12 | file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}") 13 | endif() 14 | 15 | # Set these from the command line to customize for development/debugging purposes 16 | set(CMKR_EXECUTABLE "" CACHE FILEPATH "cmkr executable") 17 | set(CMKR_SKIP_GENERATION OFF CACHE BOOL "skip automatic cmkr generation") 18 | set(CMKR_BUILD_TYPE "Debug" CACHE STRING "cmkr build configuration") 19 | mark_as_advanced(CMKR_REPO CMKR_TAG CMKR_COMMIT_HASH CMKR_EXECUTABLE CMKR_SKIP_GENERATION CMKR_BUILD_TYPE) 20 | 21 | # Disable cmkr if generation is disabled 22 | if(DEFINED ENV{CI} OR CMKR_SKIP_GENERATION OR CMKR_BUILD_SKIP_GENERATION) 23 | message(STATUS "[cmkr] Skipping automatic cmkr generation") 24 | unset(CMKR_BUILD_SKIP_GENERATION CACHE) 25 | macro(cmkr) 26 | endmacro() 27 | return() 28 | endif() 29 | 30 | # Disable cmkr if no cmake.toml file is found 31 | if(NOT CMAKE_SCRIPT_MODE_FILE AND NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/cmake.toml") 32 | message(AUTHOR_WARNING "[cmkr] Not found: ${CMAKE_CURRENT_SOURCE_DIR}/cmake.toml") 33 | macro(cmkr) 34 | endmacro() 35 | return() 36 | endif() 37 | 38 | # Convert a Windows native path to CMake path 39 | if(CMKR_EXECUTABLE MATCHES "\\\\") 40 | string(REPLACE "\\" "/" CMKR_EXECUTABLE_CMAKE "${CMKR_EXECUTABLE}") 41 | set(CMKR_EXECUTABLE "${CMKR_EXECUTABLE_CMAKE}" CACHE FILEPATH "" FORCE) 42 | unset(CMKR_EXECUTABLE_CMAKE) 43 | endif() 44 | 45 | # Helper macro to execute a process (COMMAND_ERROR_IS_FATAL ANY is 3.19 and higher) 46 | function(cmkr_exec) 47 | execute_process(COMMAND ${ARGV} RESULT_VARIABLE CMKR_EXEC_RESULT) 48 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 49 | message(FATAL_ERROR "cmkr_exec(${ARGV}) failed (exit code ${CMKR_EXEC_RESULT})") 50 | endif() 51 | endfunction() 52 | 53 | # Windows-specific hack (CMAKE_EXECUTABLE_PREFIX is not set at the moment) 54 | if(WIN32) 55 | set(CMKR_EXECUTABLE_NAME "cmkr.exe") 56 | else() 57 | set(CMKR_EXECUTABLE_NAME "cmkr") 58 | endif() 59 | 60 | # Use cached cmkr if found 61 | if(DEFINED ENV{CMKR_CACHE}) 62 | set(CMKR_DIRECTORY_PREFIX "$ENV{CMKR_CACHE}") 63 | string(REPLACE "\\" "/" CMKR_DIRECTORY_PREFIX "${CMKR_DIRECTORY_PREFIX}") 64 | if(NOT CMKR_DIRECTORY_PREFIX MATCHES "\\/$") 65 | set(CMKR_DIRECTORY_PREFIX "${CMKR_DIRECTORY_PREFIX}/") 66 | endif() 67 | # Build in release mode for the cache 68 | set(CMKR_BUILD_TYPE "Release") 69 | else() 70 | set(CMKR_DIRECTORY_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/_cmkr_") 71 | endif() 72 | set(CMKR_DIRECTORY "${CMKR_DIRECTORY_PREFIX}${CMKR_TAG}") 73 | set(CMKR_CACHED_EXECUTABLE "${CMKR_DIRECTORY}/bin/${CMKR_EXECUTABLE_NAME}") 74 | 75 | # Helper function to check if a string starts with a prefix 76 | # Cannot use MATCHES, see: https://github.com/build-cpp/cmkr/issues/61 77 | function(cmkr_startswith str prefix result) 78 | string(LENGTH "${prefix}" prefix_length) 79 | string(LENGTH "${str}" str_length) 80 | if(prefix_length LESS_EQUAL str_length) 81 | string(SUBSTRING "${str}" 0 ${prefix_length} str_prefix) 82 | if(prefix STREQUAL str_prefix) 83 | set("${result}" ON PARENT_SCOPE) 84 | return() 85 | endif() 86 | endif() 87 | set("${result}" OFF PARENT_SCOPE) 88 | endfunction() 89 | 90 | # Handle upgrading logic 91 | if(CMKR_EXECUTABLE AND NOT CMKR_CACHED_EXECUTABLE STREQUAL CMKR_EXECUTABLE) 92 | cmkr_startswith("${CMKR_EXECUTABLE}" "${CMAKE_CURRENT_BINARY_DIR}/_cmkr" CMKR_STARTSWITH_BUILD) 93 | cmkr_startswith("${CMKR_EXECUTABLE}" "${CMKR_DIRECTORY_PREFIX}" CMKR_STARTSWITH_CACHE) 94 | if(CMKR_STARTSWITH_BUILD) 95 | if(DEFINED ENV{CMKR_CACHE}) 96 | message(AUTHOR_WARNING "[cmkr] Switching to cached cmkr: '${CMKR_CACHED_EXECUTABLE}'") 97 | if(EXISTS "${CMKR_CACHED_EXECUTABLE}") 98 | set(CMKR_EXECUTABLE "${CMKR_CACHED_EXECUTABLE}" CACHE FILEPATH "Full path to cmkr executable" FORCE) 99 | else() 100 | unset(CMKR_EXECUTABLE CACHE) 101 | endif() 102 | else() 103 | message(AUTHOR_WARNING "[cmkr] Upgrading '${CMKR_EXECUTABLE}' to '${CMKR_CACHED_EXECUTABLE}'") 104 | unset(CMKR_EXECUTABLE CACHE) 105 | endif() 106 | elseif(DEFINED ENV{CMKR_CACHE} AND CMKR_STARTSWITH_CACHE) 107 | message(AUTHOR_WARNING "[cmkr] Upgrading cached '${CMKR_EXECUTABLE}' to '${CMKR_CACHED_EXECUTABLE}'") 108 | unset(CMKR_EXECUTABLE CACHE) 109 | endif() 110 | endif() 111 | 112 | if(CMKR_EXECUTABLE AND EXISTS "${CMKR_EXECUTABLE}") 113 | message(VERBOSE "[cmkr] Found cmkr: '${CMKR_EXECUTABLE}'") 114 | elseif(CMKR_EXECUTABLE AND NOT CMKR_EXECUTABLE STREQUAL CMKR_CACHED_EXECUTABLE) 115 | message(FATAL_ERROR "[cmkr] '${CMKR_EXECUTABLE}' not found") 116 | elseif(NOT CMKR_EXECUTABLE AND EXISTS "${CMKR_CACHED_EXECUTABLE}") 117 | set(CMKR_EXECUTABLE "${CMKR_CACHED_EXECUTABLE}" CACHE FILEPATH "Full path to cmkr executable" FORCE) 118 | message(STATUS "[cmkr] Found cached cmkr: '${CMKR_EXECUTABLE}'") 119 | else() 120 | set(CMKR_EXECUTABLE "${CMKR_CACHED_EXECUTABLE}" CACHE FILEPATH "Full path to cmkr executable" FORCE) 121 | message(VERBOSE "[cmkr] Bootstrapping '${CMKR_EXECUTABLE}'") 122 | 123 | message(STATUS "[cmkr] Fetching cmkr...") 124 | if(EXISTS "${CMKR_DIRECTORY}") 125 | cmkr_exec("${CMAKE_COMMAND}" -E rm -rf "${CMKR_DIRECTORY}") 126 | endif() 127 | find_package(Git QUIET REQUIRED) 128 | cmkr_exec("${GIT_EXECUTABLE}" 129 | clone 130 | --config advice.detachedHead=false 131 | --branch ${CMKR_TAG} 132 | --depth 1 133 | ${CMKR_REPO} 134 | "${CMKR_DIRECTORY}" 135 | ) 136 | if(CMKR_COMMIT_HASH) 137 | execute_process( 138 | COMMAND "${GIT_EXECUTABLE}" checkout -q "${CMKR_COMMIT_HASH}" 139 | RESULT_VARIABLE CMKR_EXEC_RESULT 140 | WORKING_DIRECTORY "${CMKR_DIRECTORY}" 141 | ) 142 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 143 | message(FATAL_ERROR "Tag '${CMKR_TAG}' hash is not '${CMKR_COMMIT_HASH}'") 144 | endif() 145 | endif() 146 | message(STATUS "[cmkr] Building cmkr (using system compiler)...") 147 | cmkr_exec("${CMAKE_COMMAND}" 148 | --no-warn-unused-cli 149 | "${CMKR_DIRECTORY}" 150 | "-B${CMKR_DIRECTORY}/build" 151 | "-DCMAKE_BUILD_TYPE=${CMKR_BUILD_TYPE}" 152 | "-DCMAKE_UNITY_BUILD=ON" 153 | "-DCMAKE_INSTALL_PREFIX=${CMKR_DIRECTORY}" 154 | "-DCMKR_GENERATE_DOCUMENTATION=OFF" 155 | ) 156 | cmkr_exec("${CMAKE_COMMAND}" 157 | --build "${CMKR_DIRECTORY}/build" 158 | --config "${CMKR_BUILD_TYPE}" 159 | --parallel 160 | ) 161 | cmkr_exec("${CMAKE_COMMAND}" 162 | --install "${CMKR_DIRECTORY}/build" 163 | --config "${CMKR_BUILD_TYPE}" 164 | --prefix "${CMKR_DIRECTORY}" 165 | --component cmkr 166 | ) 167 | if(NOT EXISTS ${CMKR_EXECUTABLE}) 168 | message(FATAL_ERROR "[cmkr] Failed to bootstrap '${CMKR_EXECUTABLE}'") 169 | endif() 170 | cmkr_exec("${CMKR_EXECUTABLE}" version) 171 | message(STATUS "[cmkr] Bootstrapped ${CMKR_EXECUTABLE}") 172 | endif() 173 | execute_process(COMMAND "${CMKR_EXECUTABLE}" version 174 | RESULT_VARIABLE CMKR_EXEC_RESULT 175 | ) 176 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 177 | message(FATAL_ERROR "[cmkr] Failed to get version, try clearing the cache and rebuilding") 178 | endif() 179 | 180 | # Use cmkr.cmake as a script 181 | if(CMAKE_SCRIPT_MODE_FILE) 182 | if(NOT EXISTS "${CMAKE_SOURCE_DIR}/cmake.toml") 183 | execute_process(COMMAND "${CMKR_EXECUTABLE}" init 184 | RESULT_VARIABLE CMKR_EXEC_RESULT 185 | ) 186 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 187 | message(FATAL_ERROR "[cmkr] Failed to bootstrap cmkr project. Please report an issue: https://github.com/build-cpp/cmkr/issues/new") 188 | else() 189 | message(STATUS "[cmkr] Modify cmake.toml and then configure using: cmake -B build") 190 | endif() 191 | else() 192 | execute_process(COMMAND "${CMKR_EXECUTABLE}" gen 193 | RESULT_VARIABLE CMKR_EXEC_RESULT 194 | ) 195 | if(NOT CMKR_EXEC_RESULT EQUAL 0) 196 | message(FATAL_ERROR "[cmkr] Failed to generate project.") 197 | else() 198 | message(STATUS "[cmkr] Configure using: cmake -B build") 199 | endif() 200 | endif() 201 | endif() 202 | 203 | # This is the macro that contains black magic 204 | macro(cmkr) 205 | # When this macro is called from the generated file, fake some internal CMake variables 206 | get_source_file_property(CMKR_CURRENT_LIST_FILE "${CMAKE_CURRENT_LIST_FILE}" CMKR_CURRENT_LIST_FILE) 207 | if(CMKR_CURRENT_LIST_FILE) 208 | set(CMAKE_CURRENT_LIST_FILE "${CMKR_CURRENT_LIST_FILE}") 209 | get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" DIRECTORY) 210 | endif() 211 | 212 | # File-based include guard (include_guard is not documented to work) 213 | get_source_file_property(CMKR_INCLUDE_GUARD "${CMAKE_CURRENT_LIST_FILE}" CMKR_INCLUDE_GUARD) 214 | if(NOT CMKR_INCLUDE_GUARD) 215 | set_source_files_properties("${CMAKE_CURRENT_LIST_FILE}" PROPERTIES CMKR_INCLUDE_GUARD TRUE) 216 | 217 | file(SHA256 "${CMAKE_CURRENT_LIST_FILE}" CMKR_LIST_FILE_SHA256_PRE) 218 | 219 | # Generate CMakeLists.txt 220 | cmkr_exec("${CMKR_EXECUTABLE}" gen 221 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" 222 | ) 223 | 224 | file(SHA256 "${CMAKE_CURRENT_LIST_FILE}" CMKR_LIST_FILE_SHA256_POST) 225 | 226 | # Delete the temporary file if it was left for some reason 227 | set(CMKR_TEMP_FILE "${CMAKE_CURRENT_SOURCE_DIR}/CMakerLists.txt") 228 | if(EXISTS "${CMKR_TEMP_FILE}") 229 | file(REMOVE "${CMKR_TEMP_FILE}") 230 | endif() 231 | 232 | if(NOT CMKR_LIST_FILE_SHA256_PRE STREQUAL CMKR_LIST_FILE_SHA256_POST) 233 | # Copy the now-generated CMakeLists.txt to CMakerLists.txt 234 | # This is done because you cannot include() a file you are currently in 235 | configure_file(CMakeLists.txt "${CMKR_TEMP_FILE}" COPYONLY) 236 | 237 | # Add the macro required for the hack at the start of the cmkr macro 238 | set_source_files_properties("${CMKR_TEMP_FILE}" PROPERTIES 239 | CMKR_CURRENT_LIST_FILE "${CMAKE_CURRENT_LIST_FILE}" 240 | ) 241 | 242 | # 'Execute' the newly-generated CMakeLists.txt 243 | include("${CMKR_TEMP_FILE}") 244 | 245 | # Delete the generated file 246 | file(REMOVE "${CMKR_TEMP_FILE}") 247 | 248 | # Do not execute the rest of the original CMakeLists.txt 249 | return() 250 | endif() 251 | # Resume executing the unmodified CMakeLists.txt 252 | endif() 253 | endmacro() 254 | -------------------------------------------------------------------------------- /docs/BUILDING.md: -------------------------------------------------------------------------------- 1 | # Docker 2 | 3 | To build Mergen in Docker run the following commands: 4 | 5 | ## Build image 6 | 7 | ```bash 8 | docker build . -t mergen 9 | ``` 10 | --- 11 | 12 | ## Run 13 | 14 | Place target binary in the Mergen's root dir, then run following command. 15 | 16 | Note that you have to replace target.exe with your binary and 0x123456789 with your obfuscated function address. 17 | 18 | ```bash 19 | # Powershell 20 | docker run --rm -v ${PWD}:/data mergen target.exe 0x123456789 21 | 22 | # command prompt 23 | docker run --rm -v %cd%:/data mergen target.exe 0x123456789 24 | 25 | # bash 26 | docker run --rm -v $PWD:/data mergen target.exe 0x123456789 27 | ``` 28 | --- 29 | 30 | # Windows 31 | 32 | Here's a detailed guide to setting up your environment to build LLVM 18.1.0 on Windows, using Clang and Ninja, and configuring it to compile Mergen. 33 | 34 | --- 35 | 36 | # Building LLVM 18.1.0 from Scratch on Windows 37 | 38 | To set up and build LLVM 18.1.0 from scratch on Windows, follow these steps. This guide includes instructions on installing the necessary tools, setting up Visual Studio and the correct SDK, configuring paths, and building with Ninja. 39 | 40 | ### Prerequisites 41 | 42 | --- 43 | 44 | 1. **Download and Install LLVM 18.1.0** 45 | - Download the LLVM 18.1.0 pre-built installer for Windows from this link: [LLVM-18.1.0-win64.exe](https://github.com/llvm/llvm-project/releases/download/llvmorg-18.1.0/LLVM-18.1.0-win64.exe). 46 | - Run the installer and follow the on-screen instructions. 47 | - During installation, choose the option to set the `PATH` environment variable for either: 48 | - **All users** or **Current user only**, depending on your preference. 49 | - This configuration will make `clang` and `clang++` directly accessible from any command prompt or terminal. 50 | 51 | 2. **Download LLVM Source** 52 | - Download LLVM 18.1.0 source from the [official release page](https://github.com/llvm/llvm-project/releases/tag/llvmorg-18.1.0). 53 | - Direct link to the source: [llvmorg-18.1.0.zip](https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-18.1.0.zip) 54 | - Extract the archive to a directory of your choice (e.g., `C:\llvm-project`). 55 | 56 | 3. **Install Visual Studio 2022** 57 | - Download and install **Visual Studio 2022** (Community edition is sufficient). 58 | - During installation, ensure you select: 59 | - **Desktop development with C++** workload. 60 | - Under Individual Components, check: 61 | - **MSVC v143 - VS 2022 C++ x64/x86 build tools** 62 | - **C++ CMake tools for Windows** 63 | - **Windows 10 SDK (10.0.19041.0)** or newer. 64 | - Make a note of the installation path, typically: 65 | - `C:\Program Files\Microsoft Visual Studio\2022\Community` 66 | 67 | 4. **Set Up System Environment Variables** 68 | - Open the Environment Variables settings in Windows and add the following paths to your `Path` variable: 69 | - **Ninja** (if not already installed): 70 | - Download Ninja from [Ninja GitHub](https://github.com/ninja-build/ninja/releases) and add the path to `ninja.exe` to your `Path`. 71 | - **CMake**: 72 | - If CMake is not installed, download it from [CMake's website](https://cmake.org/download/) and add it to your `Path`. 73 | - **LLVM tools** (once LLVM is built, add the installation directory to `Path` as needed). 74 | 75 | 5. **Additional Environment Variables for LLVM and Visual Studio Paths** 76 | - Define `CMAKE_C_COMPILER` and `CMAKE_CXX_COMPILER` paths to ensure LLVM uses the correct compiler: 77 | - `CMAKE_C_COMPILER="C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.41.34120/bin/Hostx64/x64/cl.exe"` 78 | - `CMAKE_CXX_COMPILER="C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.41.34120/bin/Hostx64/x64/cl.exe"` 79 | - Set the Windows Kit paths, specifically `RC` and `MT`: 80 | - `CMAKE_RC_COMPILER="C:/Program Files (x86)/Windows Kits/10/bin/10.0.19041.0/x64/rc.exe"` 81 | - `CMAKE_MT="C:/Program Files (x86)/Windows Kits/10/bin/10.0.19041.0/x64/mt.exe"` 82 | - Optionally, define `LLVM_INSTALL_PREFIX` for the installation directory: 83 | - Example: `LLVM_INSTALL_PREFIX="C:\llvm_stuff"` 84 | 85 | ### Building LLVM with Ninja 86 | 87 | 1. **Open a Developer Command Prompt** 88 | - Open a command prompt configured for Visual Studio: 89 | - Navigate to **Start Menu > Visual Studio 2022 > Developer Command Prompt for Visual Studio 2022**. 90 | 91 | 2. **Configure the Build with CMake** 92 | - Navigate to the root of the LLVM source directory: 93 | ```bash 94 | cd C:\llvm-project 95 | ``` 96 | - Run CMake with the following configuration: 97 | ```bash 98 | cmake -G "Ninja" -S llvm -B build -DCMAKE_BUILD_TYPE=Release -DLLVM_TARGETS_TO_BUILD="X86" -DCMAKE_INSTALL_PREFIX="C:\llvm_stuff" -DLLVM_HOST_TRIPLE=x86_64-pc-windows-msvc -DCMAKE_C_COMPILER="C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.41.34120/bin/Hostx64/x64/cl.exe" -DCMAKE_CXX_COMPILER="C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.41.34120/bin/Hostx64/x64/cl.exe" -DCMAKE_RC_COMPILER="C:/Program Files (x86)/Windows Kits/10/bin/10.0.19041.0/x64/rc.exe" -DCMAKE_MT="C:/Program Files (x86)/Windows Kits/10/bin/10.0.19041.0/x64/mt.exe" 99 | ``` 100 | 101 | 3. **Install LLVM** 102 | - Once built, install LLVM to the specified installation directory: 103 | ```bash 104 | ninja -C build install 105 | ``` 106 | 107 | Here’s the updated **Setting Up Mergen Build** section with the repository URL included and configured for a recursive clone: 108 | 109 | --- 110 | 111 | ### Setting Up Mergen Build 112 | 113 | With LLVM successfully built and installed, you can configure Mergen to use the newly built LLVM. 114 | 115 | 1. **Set the `LLVM_DIR` Environment Variable** 116 | - To ensure Mergen can locate the LLVM CMake configuration files, set `LLVM_DIR` as a system environment variable. 117 | - Open Command Prompt as Administrator and run the following command: 118 | ```cmd 119 | setx LLVM_DIR "c:\llvm_stuff\build\lib\cmake\llvm" /M 120 | ``` 121 | - Alternatively, in PowerShell (also as Administrator), use: 122 | ```powershell 123 | [System.Environment]::SetEnvironmentVariable("LLVM_DIR", "C:\llvm_stuff\build\lib\cmake\llvm", "Machine") 124 | ``` 125 | - This makes `LLVM_DIR` available system-wide, allowing CMake to locate LLVM when building Mergen. Restart any command prompt or terminal session to ensure the environment variable is recognized. 126 | 127 | 2. **Clone the Mergen Repository** (recursively, to include submodules): 128 | ```bash 129 | git clone --recursive https://github.com/NaC-L/Mergen.git 130 | cd Mergen 131 | ``` 132 | 133 | 3. **Run CMake for Mergen Build** 134 | - Configure CMake to use Clang as the compiler: 135 | ```bash 136 | cmake -G Ninja -S . -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER="clang++" -DCMAKE_C_COMPILER="clang" 137 | ``` 138 | 139 | 4. **Build Mergen with Ninja** 140 | ```bash 141 | ninja 142 | ``` 143 | 144 | --- 145 | 146 | -------------------------------------------------------------------------------- /icpped_rust/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /icpped_rust/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "Iced-Wrapper" 7 | version = "0.1.0" 8 | dependencies = [ 9 | "iced-x86", 10 | "memoffset", 11 | ] 12 | 13 | [[package]] 14 | name = "autocfg" 15 | version = "1.4.0" 16 | source = "registry+https://github.com/rust-lang/crates.io-index" 17 | checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" 18 | 19 | [[package]] 20 | name = "iced-x86" 21 | version = "1.21.0" 22 | source = "registry+https://github.com/rust-lang/crates.io-index" 23 | checksum = "7c447cff8c7f384a7d4f741cfcff32f75f3ad02b406432e8d6c878d56b1edf6b" 24 | dependencies = [ 25 | "lazy_static", 26 | ] 27 | 28 | [[package]] 29 | name = "lazy_static" 30 | version = "1.5.0" 31 | source = "registry+https://github.com/rust-lang/crates.io-index" 32 | checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" 33 | 34 | [[package]] 35 | name = "memoffset" 36 | version = "0.9.1" 37 | source = "registry+https://github.com/rust-lang/crates.io-index" 38 | checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" 39 | dependencies = [ 40 | "autocfg", 41 | ] 42 | -------------------------------------------------------------------------------- /icpped_rust/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "Iced-Wrapper" 3 | version = "0.1.0" 4 | edition = "2018" 5 | 6 | [lib] 7 | crate-type = ["staticlib"] 8 | 9 | 10 | 11 | [dependencies] 12 | iced-x86 = "1.21" 13 | memoffset = "0.9.1" -------------------------------------------------------------------------------- /icpped_rust/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("cargo:rustc-link-lib=dylib=userenv"); 3 | println!("cargo:rustc-link-lib=dylib=ntdll"); 4 | } -------------------------------------------------------------------------------- /icpped_rust/src/lib.rs: -------------------------------------------------------------------------------- 1 | use iced_x86::{Decoder, DecoderOptions, Formatter, Instruction, MemorySize, Mnemonic, NasmFormatter, OpKind, Register, SpecializedFormatter, SpecializedFormatterTraitOptions}; 2 | use std::slice; 3 | use std::ffi::CString; 4 | use std::os::raw::c_char; 5 | use memoffset::offset_of; 6 | 7 | /// A C-compatible version of your disassembled instruction structure. 8 | 9 | pub const PREFIX_NONE: u8 = 0; 10 | pub const PREFIX_REP: u8 = 1; 11 | pub const PREFIX_REPE: u8 = 1; 12 | pub const PREFIX_REPNE: u8 = 2; 13 | pub const PREFIX_LOCK: u8 = 3; 14 | pub const PREFIX_END: u8 = 3; 15 | 16 | 17 | 18 | #[derive(Debug, Clone, Copy)] 19 | enum OperandType { 20 | Invalid, 21 | Register8, 22 | Register16, 23 | Register32, 24 | Register64, 25 | Memory8, 26 | Memory16, 27 | Memory32, 28 | Memory64, 29 | Immediate8, 30 | Immediate8_2nd, // enter/exit 31 | Immediate16, 32 | Immediate32, 33 | Immediate64 34 | } 35 | 36 | #[repr(C)] 37 | #[derive(Debug, Clone)] 38 | pub struct MergenDisassembledInstructionBase { 39 | pub mnemonic: u16, 40 | 41 | pub mem_base: u8, 42 | pub mem_index: u8, 43 | pub mem_scale: u8, 44 | 45 | 46 | pub stack_growth: u8, 47 | 48 | 49 | 50 | pub regs: [u8; 4], 51 | pub types: [u8; 4], 52 | 53 | pub attributes: u8, 54 | 55 | pub length: u8, 56 | 57 | pub operand_count_visible: u8, 58 | 59 | pub immediate: u64, 60 | 61 | pub mem_disp: u64, // aka imm2 62 | 63 | pub text: *mut i8, 64 | } 65 | 66 | // make sure its same as our c structure 67 | 68 | const _: () = assert!(offset_of!(MergenDisassembledInstructionBase, mnemonic) == 0, "invalid offset"); 69 | const _: () = assert!(offset_of!(MergenDisassembledInstructionBase, mem_base) == 2, "invalid offset"); 70 | const _: () = assert!(offset_of!(MergenDisassembledInstructionBase, mem_index) == 3, "invalid offset"); 71 | const _: () = assert!(offset_of!(MergenDisassembledInstructionBase, mem_scale) == 4, "invalid offset"); 72 | const _: () = assert!(offset_of!(MergenDisassembledInstructionBase, stack_growth) == 5, "invalid offset"); 73 | const _: () = assert!(offset_of!(MergenDisassembledInstructionBase, regs) == 6, "invalid offset"); 74 | const _: () = assert!(offset_of!(MergenDisassembledInstructionBase, types) == 10, "invalid offset"); 75 | const _: () = assert!(offset_of!(MergenDisassembledInstructionBase, attributes) == 14, "invalid offset"); 76 | const _: () = assert!(offset_of!(MergenDisassembledInstructionBase, length) == 15, "invalid offset"); 77 | const _: () = assert!(offset_of!(MergenDisassembledInstructionBase, operand_count_visible) == 16, "invalid offset"); 78 | const _: () = assert!(offset_of!(MergenDisassembledInstructionBase, immediate) == 24, "invalid offset"); 79 | const _: () = assert!(offset_of!(MergenDisassembledInstructionBase, mem_disp) == 32, "invalid offset"); 80 | const _: () = assert!(offset_of!(MergenDisassembledInstructionBase, text) == 40, "invalid offset"); 81 | 82 | fn has_64bit_immediate(instr: &Instruction) -> bool { 83 | for i in 0..instr.op_count() { 84 | match instr.op_kind(i) { 85 | OpKind::Immediate64 => return true, 86 | _ => continue, 87 | } 88 | } 89 | false 90 | } 91 | 92 | fn is_relative_jump(instr: &Instruction) -> bool { 93 | if instr.memory_base() == Register::RIP { 94 | return true; 95 | } 96 | // at max, its 5, use constants to unroll & less lookup 97 | for i in 0..5{ 98 | match instr.op_kind(i) { 99 | // if nearbranch, base isnt rip, kinda annoyin 100 | OpKind::NearBranch16 | OpKind::NearBranch32 | OpKind::NearBranch64 => return true, 101 | _ => {} 102 | } 103 | } 104 | false 105 | } 106 | 107 | fn convert_type_to_mergen(inst : &Instruction, index : u32) -> OperandType { 108 | 109 | match inst.op_kind(index) { 110 | OpKind::Register => { 111 | match inst.op_register(index).size() { 112 | 1 => { return OperandType::Register8; }, 113 | 2 => {return OperandType::Register16}, 114 | 4 => {return OperandType::Register32}, 115 | 8 => {return OperandType::Register64}, 116 | // 16 => OperandType::Register128, 117 | 0 => {return OperandType::Invalid}, 118 | _ => {panic!("Unhandled register size: {}", inst.op_register(index).size());}, 119 | } 120 | }, 121 | OpKind::Immediate8 => { return OperandType::Immediate8 }, 122 | OpKind::Immediate8_2nd => { return OperandType::Immediate8_2nd }, 123 | OpKind::Immediate16 => {return OperandType::Immediate16}, 124 | OpKind::Immediate32 => return OperandType::Immediate32, 125 | OpKind::Immediate64 => return OperandType::Immediate64, 126 | 127 | // these get Sign Extended, but we smart, we know what to sign extend XD 128 | // todo: add these types to enum 129 | OpKind::Immediate8to16 => return OperandType::Immediate8, 130 | OpKind::Immediate8to32 => return OperandType::Immediate8, 131 | OpKind::Immediate8to64 => return OperandType::Immediate8, 132 | OpKind::Immediate32to64 => return OperandType::Immediate32, 133 | 134 | OpKind::NearBranch16 => return OperandType::Immediate16, 135 | OpKind::NearBranch32 => return OperandType::Immediate32, 136 | OpKind::NearBranch64 => return OperandType::Immediate64, 137 | 138 | OpKind::FarBranch16 => return OperandType::Immediate16, 139 | OpKind::FarBranch32 => return OperandType::Immediate32, 140 | // OpKind::FarBranch64 => return OperandType::Immediate64, 141 | 142 | // OpKind::NearBranch16 => return OperandType::Immediate16, 143 | 144 | OpKind::Memory => { 145 | match inst.memory_size().size() { 146 | 0 => { 147 | /* 148 | if inst.mnemonic() == Mnemonic::Lea { 149 | return convert_type_to_mergen(&inst, 0); 150 | } 151 | */ 152 | return OperandType::Invalid; 153 | }, 154 | 1 => {return OperandType::Memory8}, 155 | 2 => {return OperandType::Memory16}, 156 | 4 => {return OperandType::Memory32}, 157 | 8 => {return OperandType::Memory64}, 158 | // 16 => OperandType::Memory128 159 | _ => {println!("{:?}", inst.memory_size());panic!("Unhandled memory size: {}", inst.memory_size().size())}, 160 | } 161 | }, 162 | 163 | _ => return OperandType::Invalid, 164 | } 165 | } 166 | 167 | 168 | /// Decodes a machine-code buffer and translates the first instruction into a 169 | /// C‑compatible MergenDisassembledInstructionBase structure. 170 | /// 171 | /// # Parameters 172 | /// 173 | /// - `code_ptr`: Pointer to the machine code bytes. 174 | /// - `len`: Length of the code in bytes. 175 | /// - `out`: Pointer to a MergenDisassembledInstructionBase structure that will be filled. 176 | /// 177 | /// # Returns 178 | /// 179 | /// Returns 0 on success, or -1 if the input pointers are null or length is 0. 180 | #[no_mangle] 181 | pub extern "C" fn disas( 182 | out: *mut MergenDisassembledInstructionBase, 183 | code_ptr: *const u8, 184 | len: usize 185 | ) -> i32 { 186 | // Fast null/length check 187 | if out.is_null() || code_ptr.is_null() || len == 0 { 188 | return -1; 189 | } 190 | 191 | // SAFETY: we've checked that pointers are non-null and len > 0 192 | let code = unsafe { std::slice::from_raw_parts(code_ptr, len) }; 193 | let mut decoder = Decoder::new(64, code, DecoderOptions::NONE); 194 | let instr = decoder.decode(); 195 | 196 | 197 | // Precompute commonly used values 198 | let disp64 = instr.memory_displacement64(); 199 | let instr_len = instr.len() as u64; 200 | let is_rel = is_relative_jump(&instr); 201 | let has_imm64 = has_64bit_immediate(&instr); 202 | 203 | // Compute immediate in one branch sequence 204 | let immediate = if is_rel { 205 | // relative jump: displacement minus instruction length 206 | disp64.wrapping_sub(instr_len) 207 | } else if has_imm64 { 208 | instr.immediate64() as u64 209 | } else { 210 | instr.immediate32() as u64 211 | }; 212 | 213 | // Compute prefix attributes via bitflags 214 | let mut attrs = 0u8; 215 | if instr.has_rep_prefix() { attrs = PREFIX_REP as u8; } 216 | if instr.has_repne_prefix() { attrs = PREFIX_REPNE as u8; } 217 | if instr.has_lock_prefix() { attrs = PREFIX_LOCK as u8; } 218 | 219 | // Build output struct in one go 220 | let out_value = MergenDisassembledInstructionBase { 221 | mnemonic: instr.mnemonic() as u16, 222 | mem_base: instr.memory_base() as u8, 223 | mem_index: instr.memory_index() as u8, 224 | mem_scale: instr.memory_index_scale() as u8, 225 | mem_disp: if is_rel { disp64.wrapping_sub(instr_len) } else { disp64 }, 226 | stack_growth: instr.stack_pointer_increment().unsigned_abs() as u8, 227 | immediate, 228 | regs: [ 229 | instr.op0_register() as u8, 230 | instr.op1_register() as u8, 231 | instr.op2_register() as u8, 232 | instr.op3_register() as u8, 233 | ], 234 | types: [ 235 | convert_type_to_mergen(&instr, 0) as u8, 236 | convert_type_to_mergen(&instr, 1) as u8, 237 | convert_type_to_mergen(&instr, 2) as u8, 238 | convert_type_to_mergen(&instr, 3) as u8, 239 | ], 240 | operand_count_visible: instr.op_count() as u8, 241 | attributes: attrs, 242 | length: instr.len() as u8, 243 | text: std::ptr::null_mut() 244 | }; 245 | 246 | // SAFETY: out is non-null and points to valid memory 247 | unsafe { std::ptr::write(out, out_value) }; 248 | 249 | 0 250 | } 251 | 252 | #[no_mangle] 253 | pub extern "C" fn disas2( 254 | out: *mut MergenDisassembledInstructionBase, 255 | code_ptr: *const u8, 256 | len: usize, 257 | ) -> i32 { 258 | if out.is_null() || code_ptr.is_null() || len == 0 { 259 | return -1; 260 | } 261 | let code = unsafe { std::slice::from_raw_parts(code_ptr, len) }; 262 | let mut decoder = Decoder::new(64, code, DecoderOptions::NONE); 263 | let instr = decoder.decode(); 264 | struct MyTraitOptions; 265 | impl SpecializedFormatterTraitOptions for MyTraitOptions { 266 | // If you never create a db/dw/dd/dq 'instruction', we don't need this feature. 267 | const ENABLE_DB_DW_DD_DQ: bool = false; 268 | // For a few percent faster code, you can also override `verify_output_has_enough_bytes_left()` and return `false` 269 | unsafe fn verify_output_has_enough_bytes_left() -> bool { 270 | false 271 | } 272 | } 273 | type MyFormatter = SpecializedFormatter; 274 | // Format instruction text 275 | let mut formatter = MyFormatter::new(); 276 | let mut formatted = String::new(); 277 | formatter.format(&instr, &mut formatted); 278 | let c_text = match CString::new(formatted) { 279 | Ok(cstr) => cstr.into_raw(), 280 | Err(_) => return -1, 281 | }; 282 | 283 | let disp64 = instr.memory_displacement64(); 284 | let instr_len = instr.len() as u64; 285 | let is_rel = is_relative_jump(&instr); 286 | let has_imm64 = has_64bit_immediate(&instr); 287 | 288 | let immediate = if is_rel { 289 | disp64.wrapping_sub(instr_len) 290 | } else if has_imm64 { 291 | instr.immediate64() as u64 292 | } else { 293 | instr.immediate32() as u64 294 | }; 295 | 296 | let mut attrs = 0u8; 297 | if instr.has_rep_prefix() { attrs |= 0b001; } 298 | if instr.has_repne_prefix() { attrs |= 0b010; } 299 | if instr.has_lock_prefix() { attrs |= 0b100; } 300 | 301 | let out_value = MergenDisassembledInstructionBase { 302 | mnemonic: instr.mnemonic() as u16, 303 | mem_base: instr.memory_base() as u8, 304 | mem_index: instr.memory_index() as u8, 305 | mem_scale: instr.memory_index_scale() as u8, 306 | mem_disp: if is_rel { disp64.wrapping_sub(instr_len) } else { disp64 }, 307 | stack_growth: instr.stack_pointer_increment().unsigned_abs() as u8, 308 | immediate, 309 | regs: [ 310 | instr.op0_register() as u8, 311 | instr.op1_register() as u8, 312 | instr.op2_register() as u8, 313 | instr.op3_register() as u8, 314 | ], 315 | types: [ 316 | convert_type_to_mergen(&instr, 0) as u8, 317 | convert_type_to_mergen(&instr, 1) as u8, 318 | convert_type_to_mergen(&instr, 2) as u8, 319 | convert_type_to_mergen(&instr, 3) as u8, 320 | ], 321 | operand_count_visible: instr.op_count() as u8, 322 | attributes: attrs, 323 | length: instr.len() as u8, 324 | text: c_text, 325 | }; 326 | unsafe { std::ptr::write(out, out_value) }; 327 | 0 328 | } 329 | 330 | /// Frees a string allocated by Rust (using CString::into_raw). 331 | /// 332 | /// # Parameters 333 | /// 334 | /// - `s`: Pointer to the C string allocated in Rust. 335 | #[no_mangle] 336 | pub extern "C" fn free_rust_string(s: *mut c_char) { 337 | if s.is_null() { 338 | return; 339 | } 340 | // Safety: reconstruct the CString so that it gets dropped. 341 | unsafe { let _ = CString::from_raw(s); }; 342 | } 343 | -------------------------------------------------------------------------------- /images/branches_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/branches_0.png -------------------------------------------------------------------------------- /images/branches_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/branches_1.png -------------------------------------------------------------------------------- /images/graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/graph.png -------------------------------------------------------------------------------- /images/mergen_dec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/mergen_dec.png -------------------------------------------------------------------------------- /images/mergen_disass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/mergen_disass.png -------------------------------------------------------------------------------- /images/org_decomp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/org_decomp.png -------------------------------------------------------------------------------- /images/org_disass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/org_disass.png -------------------------------------------------------------------------------- /images/run_mergen.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/run_mergen.PNG -------------------------------------------------------------------------------- /images/running_on_themida.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/running_on_themida.png -------------------------------------------------------------------------------- /images/themida_disas_b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/themida_disas_b.png -------------------------------------------------------------------------------- /images/themida_disas_v.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/themida_disas_v.png -------------------------------------------------------------------------------- /images/themida_vm_v.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/themida_vm_v.png -------------------------------------------------------------------------------- /images/themidavm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/themidavm.png -------------------------------------------------------------------------------- /images/themidavm_settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/themidavm_settings.png -------------------------------------------------------------------------------- /images/themidavm_vms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/themidavm_vms.png -------------------------------------------------------------------------------- /images/vmp_settings1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/vmp_settings1.png -------------------------------------------------------------------------------- /images/vmp_settings2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaC-L/Mergen/0f47dd47eaa035207ea13fb52118145e928d9e0d/images/vmp_settings2.png -------------------------------------------------------------------------------- /lifter/.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: LLVM 2 | IndentWidth: 2 3 | PointerAlignment: Left 4 | NamespaceIndentation: All -------------------------------------------------------------------------------- /lifter/CommonDisassembler.hpp: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_DISASSEMBLER_H 2 | #define COMMON_DISASSEMBLER_H 3 | 4 | #include "CommonMnemonics.h" 5 | #include "CommonRegisters.h" 6 | #include "ZydisDisassembler_mnemonics.h" 7 | #include "ZydisDisassembler_registers.h" 8 | #include 9 | #include 10 | #include 11 | 12 | // #include 13 | 14 | enum class OperandType : uint8_t { 15 | Invalid, 16 | Register8, 17 | Register16, 18 | Register32, 19 | Register64, 20 | Memory8, 21 | Memory16, 22 | Memory32, 23 | Memory64, 24 | Immediate8, 25 | Immediate8_2nd, // enter/exit 26 | Immediate16, 27 | Immediate32, 28 | Immediate64, 29 | End = Immediate64 30 | }; 31 | 32 | inline uint8_t GetTypeSize(OperandType op) { 33 | switch (op) { 34 | case OperandType::Register8: 35 | case OperandType::Memory8: 36 | case OperandType::Immediate8: 37 | case OperandType::Immediate8_2nd: { 38 | return 8; 39 | } 40 | case OperandType::Register16: 41 | case OperandType::Memory16: 42 | case OperandType::Immediate16: { 43 | return 16; 44 | } 45 | case OperandType::Register32: 46 | case OperandType::Memory32: 47 | case OperandType::Immediate32: { 48 | return 32; 49 | } 50 | case OperandType::Register64: 51 | case OperandType::Memory64: 52 | case OperandType::Immediate64: { 53 | return 64; 54 | } 55 | default: { 56 | // UNREACHABLE("invalid size"); 57 | } 58 | } 59 | return 0; 60 | } 61 | 62 | template inline Register getBiggestEncoding(Register reg) { 63 | 64 | switch (reg) { 65 | 66 | case Register::AL: 67 | case Register::AH: 68 | case Register::AX: 69 | case Register::EAX: 70 | case Register::RAX: 71 | return Register::RAX; 72 | 73 | case Register::CL: 74 | case Register::CH: 75 | case Register::CX: 76 | case Register::ECX: 77 | case Register::RCX: 78 | return Register::RCX; 79 | 80 | case Register::DL: 81 | case Register::DH: 82 | case Register::DX: 83 | case Register::EDX: 84 | case Register::RDX: 85 | return Register::RDX; 86 | 87 | case Register::BL: 88 | case Register::BH: 89 | case Register::BX: 90 | case Register::EBX: 91 | case Register::RBX: 92 | return Register::RBX; 93 | 94 | case Register::SPL: 95 | case Register::SP: 96 | case Register::ESP: 97 | case Register::RSP: 98 | return Register::RSP; 99 | 100 | case Register::BPL: 101 | case Register::BP: 102 | case Register::EBP: 103 | case Register::RBP: 104 | return Register::RBP; 105 | 106 | case Register::SIL: 107 | case Register::SI: 108 | case Register::ESI: 109 | case Register::RSI: 110 | return Register::RSI; 111 | 112 | case Register::DIL: 113 | case Register::DI: 114 | case Register::EDI: 115 | case Register::RDI: 116 | return Register::RDI; 117 | 118 | case Register::R8B: 119 | case Register::R8W: 120 | case Register::R8D: 121 | case Register::R8: 122 | return Register::R8; 123 | 124 | case Register::R9B: 125 | case Register::R9W: 126 | case Register::R9D: 127 | case Register::R9: 128 | return Register::R9; 129 | 130 | case Register::R10B: 131 | case Register::R10W: 132 | case Register::R10D: 133 | case Register::R10: 134 | return Register::R10; 135 | 136 | case Register::R11B: 137 | case Register::R11W: 138 | case Register::R11D: 139 | case Register::R11: 140 | return Register::R11; 141 | 142 | case Register::R12B: 143 | case Register::R12W: 144 | case Register::R12D: 145 | case Register::R12: 146 | return Register::R12; 147 | 148 | case Register::R13B: 149 | case Register::R13W: 150 | case Register::R13D: 151 | case Register::R13: 152 | return Register::R13; 153 | 154 | case Register::R14B: 155 | case Register::R14W: 156 | case Register::R14D: 157 | case Register::R14: 158 | return Register::R14; 159 | 160 | case Register::R15B: 161 | case Register::R15W: 162 | case Register::R15D: 163 | case Register::R15: 164 | return Register::R15; 165 | 166 | case Register::EFLAGS: 167 | case Register::RFLAGS: 168 | return Register::RFLAGS; 169 | 170 | case Register::EIP: 171 | case Register::RIP: 172 | return Register::RIP; 173 | 174 | default: 175 | return Register::None; 176 | } 177 | } 178 | 179 | template inline uint8_t getRegisterSize(Register reg) { 180 | 181 | switch (reg) { 182 | case Register::RAX: 183 | case Register::RCX: 184 | case Register::RDX: 185 | case Register::RBX: 186 | case Register::RSP: 187 | case Register::RBP: 188 | case Register::RSI: 189 | case Register::RDI: 190 | case Register::R8: 191 | case Register::R9: 192 | case Register::R10: 193 | case Register::R11: 194 | case Register::R12: 195 | case Register::R13: 196 | case Register::R14: 197 | case Register::R15: 198 | case Register::RIP: 199 | case Register::RFLAGS: 200 | return 64; 201 | 202 | case Register::EAX: 203 | case Register::ECX: 204 | case Register::EDX: 205 | case Register::EBX: 206 | case Register::ESP: 207 | case Register::EBP: 208 | case Register::ESI: 209 | case Register::EDI: 210 | case Register::R8D: 211 | case Register::R9D: 212 | case Register::R10D: 213 | case Register::R11D: 214 | case Register::R12D: 215 | case Register::R13D: 216 | case Register::R14D: 217 | case Register::R15D: 218 | case Register::EIP: 219 | case Register::EFLAGS: 220 | return 32; 221 | 222 | case Register::AX: 223 | case Register::CX: 224 | case Register::DX: 225 | case Register::BX: 226 | case Register::SP: 227 | case Register::BP: 228 | case Register::SI: 229 | case Register::DI: 230 | case Register::R8W: 231 | case Register::R9W: 232 | case Register::R10W: 233 | case Register::R11W: 234 | case Register::R12W: 235 | case Register::R13W: 236 | case Register::R14W: 237 | case Register::R15W: 238 | return 16; 239 | 240 | case Register::AL: 241 | case Register::AH: 242 | case Register::CL: 243 | case Register::CH: 244 | case Register::DL: 245 | case Register::DH: 246 | case Register::BL: 247 | case Register::BH: 248 | case Register::SPL: 249 | case Register::BPL: 250 | case Register::SIL: 251 | case Register::DIL: 252 | case Register::R8B: 253 | case Register::R9B: 254 | case Register::R10B: 255 | case Register::R11B: 256 | case Register::R12B: 257 | case Register::R13B: 258 | case Register::R14B: 259 | case Register::R15B: 260 | return 8; 261 | default: 262 | return 0; 263 | } 264 | } 265 | 266 | enum class InstructionPrefix : uint8_t { 267 | None = 0, 268 | Rep, 269 | Repe = Rep, 270 | Repne, 271 | Lock, 272 | End = Lock 273 | }; 274 | 275 | // This unified structure is meant to capture common disassembly information 276 | // In the future, we might need to extend this 277 | 278 | template 280 | struct MergenDisassembledInstruction_base { 281 | 282 | // we only care about explicit operands in this struct 283 | 284 | // we can do this because x86 allows maximum of one mem operand 285 | 286 | Register mem_base; 287 | Register mem_index; 288 | uint8_t mem_scale; 289 | 290 | uint8_t stack_growth; 291 | 292 | Register regs[4]; 293 | OperandType types[4]; 294 | 295 | // instruction prefix, attributes 296 | InstructionPrefix attributes; 297 | 298 | uint8_t length; 299 | uint8_t operand_count_visible; 300 | 301 | // instruction mnemonic 302 | Mnemonic mnemonic; 303 | 304 | // TODO : 32 bit 305 | uint64_t immediate; // 306 | 307 | union { 308 | uint64_t mem_disp; 309 | uint64_t immediate2; 310 | }; 311 | 312 | #ifndef _NODEV 313 | std::string text; 314 | #endif 315 | }; 316 | 317 | // using MergenDisassembledInstruction = MergenDisassembledInstruction_base<>; 318 | 319 | template 320 | concept Disassembler = requires(T d, void* buffer, size_t size) { 321 | { 322 | d.disassemble(buffer, size) 323 | } -> std::same_as>; 324 | }; 325 | 326 | template 328 | requires Disassembler 329 | inline MergenDisassembledInstruction_base 330 | runDisassembler(T& dis, void* buffer, size_t size = 15) { 331 | return dis.disassemble(buffer, size); 332 | } 333 | 334 | #endif // COMMON_DISASSEMBLER_H -------------------------------------------------------------------------------- /lifter/CommonRegisters.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_REGISTERS_H 2 | #define COMMON_REGISTERS_H 3 | 4 | #include 5 | enum class RegisterInternal : uint8_t { 6 | None = 0, 7 | AL, 8 | CL, 9 | DL, 10 | BL, 11 | AH, 12 | CH, 13 | DH, 14 | BH, 15 | SPL, 16 | BPL, 17 | SIL, 18 | DIL, 19 | R8B, 20 | R9B, 21 | R10B, 22 | R11B, 23 | R12B, 24 | R13B, 25 | R14B, 26 | R15B, 27 | AX, 28 | CX, 29 | DX, 30 | BX, 31 | SP, 32 | BP, 33 | SI, 34 | DI, 35 | R8W, 36 | R9W, 37 | R10W, 38 | R11W, 39 | R12W, 40 | R13W, 41 | R14W, 42 | R15W, 43 | EAX, 44 | ECX, 45 | EDX, 46 | EBX, 47 | ESP, 48 | EBP, 49 | ESI, 50 | EDI, 51 | R8D, 52 | R9D, 53 | R10D, 54 | R11D, 55 | R12D, 56 | R13D, 57 | R14D, 58 | R15D, 59 | RAX, 60 | RCX, 61 | RDX, 62 | RBX, 63 | RSP, 64 | RBP, 65 | RSI, 66 | RDI, 67 | R8, 68 | R9, 69 | R10, 70 | R11, 71 | R12, 72 | R13, 73 | R14, 74 | R15, 75 | EIP, 76 | RIP, 77 | ES, 78 | CS, 79 | SS, 80 | DS, 81 | FS, 82 | GS, 83 | XMM0, 84 | XMM1, 85 | XMM2, 86 | XMM3, 87 | XMM4, 88 | XMM5, 89 | XMM6, 90 | XMM7, 91 | XMM8, 92 | XMM9, 93 | XMM10, 94 | XMM11, 95 | XMM12, 96 | XMM13, 97 | XMM14, 98 | XMM15, 99 | XMM16, 100 | XMM17, 101 | XMM18, 102 | XMM19, 103 | XMM20, 104 | XMM21, 105 | XMM22, 106 | XMM23, 107 | XMM24, 108 | XMM25, 109 | XMM26, 110 | XMM27, 111 | XMM28, 112 | XMM29, 113 | XMM30, 114 | XMM31, 115 | YMM0, 116 | YMM1, 117 | YMM2, 118 | YMM3, 119 | YMM4, 120 | YMM5, 121 | YMM6, 122 | YMM7, 123 | YMM8, 124 | YMM9, 125 | YMM10, 126 | YMM11, 127 | YMM12, 128 | YMM13, 129 | YMM14, 130 | YMM15, 131 | YMM16, 132 | YMM17, 133 | YMM18, 134 | YMM19, 135 | YMM20, 136 | YMM21, 137 | YMM22, 138 | YMM23, 139 | YMM24, 140 | YMM25, 141 | YMM26, 142 | YMM27, 143 | YMM28, 144 | YMM29, 145 | YMM30, 146 | YMM31, 147 | ZMM0, 148 | ZMM1, 149 | ZMM2, 150 | ZMM3, 151 | ZMM4, 152 | ZMM5, 153 | ZMM6, 154 | ZMM7, 155 | ZMM8, 156 | ZMM9, 157 | ZMM10, 158 | ZMM11, 159 | ZMM12, 160 | ZMM13, 161 | ZMM14, 162 | ZMM15, 163 | ZMM16, 164 | ZMM17, 165 | ZMM18, 166 | ZMM19, 167 | ZMM20, 168 | ZMM21, 169 | ZMM22, 170 | ZMM23, 171 | ZMM24, 172 | ZMM25, 173 | ZMM26, 174 | ZMM27, 175 | ZMM28, 176 | ZMM29, 177 | ZMM30, 178 | ZMM31, 179 | K0, 180 | K1, 181 | K2, 182 | K3, 183 | K4, 184 | K5, 185 | K6, 186 | K7, 187 | BND0, 188 | BND1, 189 | BND2, 190 | BND3, 191 | CR0, 192 | CR1, 193 | CR2, 194 | CR3, 195 | CR4, 196 | CR5, 197 | CR6, 198 | CR7, 199 | CR8, 200 | CR9, 201 | CR10, 202 | CR11, 203 | CR12, 204 | CR13, 205 | CR14, 206 | CR15, 207 | DR0, 208 | DR1, 209 | DR2, 210 | DR3, 211 | DR4, 212 | DR5, 213 | DR6, 214 | DR7, 215 | DR8, 216 | DR9, 217 | DR10, 218 | DR11, 219 | DR12, 220 | DR13, 221 | DR14, 222 | DR15, 223 | ST0, 224 | ST1, 225 | ST2, 226 | ST3, 227 | ST4, 228 | ST5, 229 | ST6, 230 | ST7, 231 | MM0, 232 | MM1, 233 | MM2, 234 | MM3, 235 | MM4, 236 | MM5, 237 | MM6, 238 | MM7, 239 | TR0, 240 | TR1, 241 | TR2, 242 | TR3, 243 | TR4, 244 | TR5, 245 | TR6, 246 | TR7, 247 | TMM0, 248 | TMM1, 249 | TMM2, 250 | TMM3, 251 | TMM4, 252 | TMM5, 253 | TMM6, 254 | TMM7, 255 | 256 | EFLAGS, 257 | RFLAGS, 258 | 259 | START = None, 260 | END = TMM7 261 | }; 262 | 263 | template 264 | concept Registers = requires() { 265 | { T::None }; 266 | { T::AL }; 267 | { T::CL }; 268 | { T::DL }; 269 | { T::BL }; 270 | { T::AH }; 271 | { T::CH }; 272 | { T::DH }; 273 | { T::BH }; 274 | { T::SPL }; 275 | { T::BPL }; 276 | { T::SIL }; 277 | { T::DIL }; 278 | { T::R8B }; 279 | { T::R9B }; 280 | { T::R10B }; 281 | { T::R11B }; 282 | { T::R12B }; 283 | { T::R13B }; 284 | { T::R14B }; 285 | { T::R15B }; 286 | { T::AX }; 287 | { T::CX }; 288 | { T::DX }; 289 | { T::BX }; 290 | { T::SP }; 291 | { T::BP }; 292 | { T::SI }; 293 | { T::DI }; 294 | { T::R8W }; 295 | { T::R9W }; 296 | { T::R10W }; 297 | { T::R11W }; 298 | { T::R12W }; 299 | { T::R13W }; 300 | { T::R14W }; 301 | { T::R15W }; 302 | { T::EAX }; 303 | { T::ECX }; 304 | { T::EDX }; 305 | { T::EBX }; 306 | { T::ESP }; 307 | { T::EBP }; 308 | { T::ESI }; 309 | { T::EDI }; 310 | { T::R8D }; 311 | { T::R9D }; 312 | { T::R10D }; 313 | { T::R11D }; 314 | { T::R12D }; 315 | { T::R13D }; 316 | { T::R14D }; 317 | { T::R15D }; 318 | { T::RAX }; 319 | { T::RCX }; 320 | { T::RDX }; 321 | { T::RBX }; 322 | { T::RSP }; 323 | { T::RBP }; 324 | { T::RSI }; 325 | { T::RDI }; 326 | { T::R8 }; 327 | { T::R9 }; 328 | { T::R10 }; 329 | { T::R11 }; 330 | { T::R12 }; 331 | { T::R13 }; 332 | { T::R14 }; 333 | { T::R15 }; 334 | { T::EIP }; 335 | { T::RIP }; 336 | { T::ES }; 337 | { T::CS }; 338 | { T::SS }; 339 | { T::DS }; 340 | { T::FS }; 341 | { T::GS }; 342 | { T::XMM0 }; 343 | { T::XMM1 }; 344 | { T::XMM2 }; 345 | { T::XMM3 }; 346 | { T::XMM4 }; 347 | { T::XMM5 }; 348 | { T::XMM6 }; 349 | { T::XMM7 }; 350 | { T::XMM8 }; 351 | { T::XMM9 }; 352 | { T::XMM10 }; 353 | { T::XMM11 }; 354 | { T::XMM12 }; 355 | { T::XMM13 }; 356 | { T::XMM14 }; 357 | { T::XMM15 }; 358 | { T::XMM16 }; 359 | { T::XMM17 }; 360 | { T::XMM18 }; 361 | { T::XMM19 }; 362 | { T::XMM20 }; 363 | { T::XMM21 }; 364 | { T::XMM22 }; 365 | { T::XMM23 }; 366 | { T::XMM24 }; 367 | { T::XMM25 }; 368 | { T::XMM26 }; 369 | { T::XMM27 }; 370 | { T::XMM28 }; 371 | { T::XMM29 }; 372 | { T::XMM30 }; 373 | { T::XMM31 }; 374 | { T::YMM0 }; 375 | { T::YMM1 }; 376 | { T::YMM2 }; 377 | { T::YMM3 }; 378 | { T::YMM4 }; 379 | { T::YMM5 }; 380 | { T::YMM6 }; 381 | { T::YMM7 }; 382 | { T::YMM8 }; 383 | { T::YMM9 }; 384 | { T::YMM10 }; 385 | { T::YMM11 }; 386 | { T::YMM12 }; 387 | { T::YMM13 }; 388 | { T::YMM14 }; 389 | { T::YMM15 }; 390 | { T::YMM16 }; 391 | { T::YMM17 }; 392 | { T::YMM18 }; 393 | { T::YMM19 }; 394 | { T::YMM20 }; 395 | { T::YMM21 }; 396 | { T::YMM22 }; 397 | { T::YMM23 }; 398 | { T::YMM24 }; 399 | { T::YMM25 }; 400 | { T::YMM26 }; 401 | { T::YMM27 }; 402 | { T::YMM28 }; 403 | { T::YMM29 }; 404 | { T::YMM30 }; 405 | { T::YMM31 }; 406 | { T::ZMM0 }; 407 | { T::ZMM1 }; 408 | { T::ZMM2 }; 409 | { T::ZMM3 }; 410 | { T::ZMM4 }; 411 | { T::ZMM5 }; 412 | { T::ZMM6 }; 413 | { T::ZMM7 }; 414 | { T::ZMM8 }; 415 | { T::ZMM9 }; 416 | { T::ZMM10 }; 417 | { T::ZMM11 }; 418 | { T::ZMM12 }; 419 | { T::ZMM13 }; 420 | { T::ZMM14 }; 421 | { T::ZMM15 }; 422 | { T::ZMM16 }; 423 | { T::ZMM17 }; 424 | { T::ZMM18 }; 425 | { T::ZMM19 }; 426 | { T::ZMM20 }; 427 | { T::ZMM21 }; 428 | { T::ZMM22 }; 429 | { T::ZMM23 }; 430 | { T::ZMM24 }; 431 | { T::ZMM25 }; 432 | { T::ZMM26 }; 433 | { T::ZMM27 }; 434 | { T::ZMM28 }; 435 | { T::ZMM29 }; 436 | { T::ZMM30 }; 437 | { T::ZMM31 }; 438 | { T::K0 }; 439 | { T::K1 }; 440 | { T::K2 }; 441 | { T::K3 }; 442 | { T::K4 }; 443 | { T::K5 }; 444 | { T::K6 }; 445 | { T::K7 }; 446 | { T::BND0 }; 447 | { T::BND1 }; 448 | { T::BND2 }; 449 | { T::BND3 }; 450 | { T::CR0 }; 451 | { T::CR1 }; 452 | { T::CR2 }; 453 | { T::CR3 }; 454 | { T::CR4 }; 455 | { T::CR5 }; 456 | { T::CR6 }; 457 | { T::CR7 }; 458 | { T::CR8 }; 459 | { T::CR9 }; 460 | { T::CR10 }; 461 | { T::CR11 }; 462 | { T::CR12 }; 463 | { T::CR13 }; 464 | { T::CR14 }; 465 | { T::CR15 }; 466 | { T::DR0 }; 467 | { T::DR1 }; 468 | { T::DR2 }; 469 | { T::DR3 }; 470 | { T::DR4 }; 471 | { T::DR5 }; 472 | { T::DR6 }; 473 | { T::DR7 }; 474 | { T::DR8 }; 475 | { T::DR9 }; 476 | { T::DR10 }; 477 | { T::DR11 }; 478 | { T::DR12 }; 479 | { T::DR13 }; 480 | { T::DR14 }; 481 | { T::DR15 }; 482 | { T::ST0 }; 483 | { T::ST1 }; 484 | { T::ST2 }; 485 | { T::ST3 }; 486 | { T::ST4 }; 487 | { T::ST5 }; 488 | { T::ST6 }; 489 | { T::ST7 }; 490 | { T::MM0 }; 491 | { T::MM1 }; 492 | { T::MM2 }; 493 | { T::MM3 }; 494 | { T::MM4 }; 495 | { T::MM5 }; 496 | { T::MM6 }; 497 | { T::MM7 }; 498 | { T::TR0 }; 499 | { T::TR1 }; 500 | { T::TR2 }; 501 | { T::TR3 }; 502 | { T::TR4 }; 503 | { T::TR5 }; 504 | { T::TR6 }; 505 | { T::TR7 }; 506 | { T::TMM0 }; 507 | { T::TMM1 }; 508 | { T::TMM2 }; 509 | { T::TMM3 }; 510 | { T::TMM4 }; 511 | { T::TMM5 }; 512 | { T::TMM6 }; 513 | { T::TMM7 }; 514 | { T::EFLAGS }; 515 | { T::RFLAGS }; 516 | { T::START }; 517 | { T::END }; 518 | }; 519 | 520 | // using RegisterPlaceholder = RegisterInternal; 521 | 522 | #endif // COMMON_REGISTERS_H -------------------------------------------------------------------------------- /lifter/CustomPasses.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CUSTOMPASSES_H 2 | #define CUSTOMPASSES_H 3 | 4 | #include "GEPTracker.h" 5 | #include "OperandUtils.h" 6 | #include "includes.h" 7 | #include "utils.h" 8 | #include "llvm/IR/PassManager.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | using namespace llvm; 22 | 23 | class BasicBlockDotGraphPass 24 | : public llvm::PassInfoMixin { 25 | public: 26 | PreservedAnalyses run(llvm::Module& M, llvm::ModuleAnalysisManager&) { 27 | std::string filename = M.getName().str() + ".dot"; 28 | llvm::outs() << filename << "\n"; 29 | std::error_code EC; 30 | raw_fd_ostream fileStream(filename, EC); 31 | if (EC) { 32 | llvm::errs() << "Could not open file: " << EC.message() << "\n"; 33 | return llvm::PreservedAnalyses::all(); 34 | } 35 | 36 | fileStream << "digraph \"" << M.getName().str() << "\" {\n"; 37 | 38 | std::map bbNames; 39 | int bbCount = 0; 40 | 41 | for (const auto& F : M) { 42 | for (const BasicBlock& BB : F) { 43 | std::string sanitizedName; 44 | 45 | llvm::StringRef nameRef = BB.getName(); 46 | 47 | if (nameRef.count('-') >= 2) { 48 | size_t firstHyphen = nameRef.find('-'); 49 | size_t secondHyphen = nameRef.find('-', firstHyphen + 1); 50 | 51 | if (secondHyphen != llvm::StringRef::npos) { 52 | llvm::StringRef extracted = nameRef.substr(0, secondHyphen); 53 | 54 | std::string extractedStr = extracted.str(); 55 | 56 | if (!extractedStr.empty()) { 57 | extractedStr.pop_back(); 58 | } 59 | 60 | std::replace(extractedStr.begin(), extractedStr.end(), '-', '_'); 61 | 62 | sanitizedName = "BB" + extractedStr; 63 | } else { 64 | sanitizedName = "BBentry"; 65 | } 66 | } else { 67 | sanitizedName = "BBentry"; 68 | } 69 | 70 | bbNames[&BB] = sanitizedName; 71 | 72 | fileStream << " \"" << sanitizedName << "\" [label=\"" 73 | << sanitizedName << "\"];\n"; 74 | } 75 | 76 | for (const BasicBlock& BB : F) { 77 | for (const auto SI : successors(&BB)) { 78 | auto Succ = SI; 79 | fileStream << " " << bbNames[&BB] << " -> " << bbNames[Succ] 80 | << ";\n"; 81 | } 82 | } 83 | } 84 | 85 | fileStream << "}\n"; 86 | 87 | fileStream.close(); 88 | errs() << "Generated DOT file for function: " << M.getName() << "\n"; 89 | 90 | return PreservedAnalyses::all(); 91 | } 92 | }; 93 | 94 | class PromotePseudoStackPass 95 | : public llvm::PassInfoMixin { 96 | public: 97 | Value* mem = nullptr; 98 | PromotePseudoStackPass(Value* val) : mem(val){}; 99 | llvm::PreservedAnalyses run(llvm::Module& M, llvm::ModuleAnalysisManager&) { 100 | 101 | bool hasChanged = false; 102 | llvm::Value* stackMemory = NULL; 103 | for (auto& F : M) { 104 | llvm::Value* memory = mem; 105 | if (!stackMemory) { 106 | llvm::IRBuilder<> Builder(&*F.getEntryBlock().getFirstInsertionPt()); 107 | stackMemory = Builder.CreateAlloca( 108 | llvm::Type::getInt128Ty(M.getContext()), 109 | llvm::ConstantInt::get(llvm::Type::getInt128Ty(M.getContext()), 110 | STACKP_VALUE), 111 | "stackmemory"); 112 | } 113 | for (auto& BB : F) { 114 | for (auto& I : BB) { 115 | if (auto* GEP = llvm::dyn_cast(&I)) { 116 | 117 | // TODO: prettify here!!! 118 | auto* MemoryOperand = GEP->getOperand(GEP->getNumOperands() - 2); 119 | /* 120 | printvalueforce(MemoryOperand); 121 | printvalueforce(memory); 122 | */ 123 | if (memory != MemoryOperand) 124 | continue; 125 | 126 | auto* OffsetOperand = GEP->getOperand(GEP->getNumOperands() - 1); 127 | // printvalue(OffsetOperand) 128 | 129 | if (isa(OffsetOperand)) { 130 | if (auto* ConstInt = 131 | llvm::dyn_cast(OffsetOperand)) { 132 | uint64_t constintvalue = (uint64_t)ConstInt->getZExtValue(); 133 | if (constintvalue < STACKP_VALUE) { 134 | GEP->setOperand((GEP->getNumOperands() - 2), stackMemory); 135 | } 136 | } 137 | continue; 138 | } 139 | // if OffsetOperand is not a constant: 140 | auto offsetKB = computeKnownBits(OffsetOperand, M.getDataLayout()); 141 | auto StackSize = APInt(64, STACKP_VALUE); 142 | 143 | auto SSKB = KnownBits::makeConstant(StackSize); 144 | printvalue2(offsetKB); 145 | printvalue2(SSKB); 146 | if (KnownBits::ult(offsetKB, SSKB)) { 147 | // minimum of offsetKB 148 | GEP->setOperand((GEP->getNumOperands() - 2), stackMemory); 149 | } else if (auto select_inst = dyn_cast(OffsetOperand)) { 150 | if (isa(select_inst->getFalseValue()) && 151 | isa(select_inst->getTrueValue())) { 152 | if ((cast(select_inst->getTrueValue()) 153 | ->getZExtValue() < STACKP_VALUE) && 154 | (cast(select_inst->getFalseValue()) 155 | ->getZExtValue() < STACKP_VALUE)) { 156 | GEP->setOperand((GEP->getNumOperands() - 2), stackMemory); 157 | } 158 | } 159 | } 160 | // endif 161 | } 162 | } 163 | } 164 | } 165 | return hasChanged ? llvm::PreservedAnalyses::none() 166 | : llvm::PreservedAnalyses::all(); 167 | } 168 | }; 169 | 170 | // refactor 171 | class GEPLoadPass : public llvm::PassInfoMixin { 172 | public: 173 | uint8_t* filebase; 174 | 175 | Value* mem = nullptr; 176 | GEPLoadPass(Value* val, uint8_t* filebase) : mem(val), filebase(filebase){}; 177 | 178 | llvm::PreservedAnalyses run(llvm::Module& M, llvm::ModuleAnalysisManager&) { 179 | bool hasChanged = false; 180 | for (auto& F : M) { 181 | for (auto& BB : F) { 182 | for (auto& I : BB) { 183 | if (auto* GEP = llvm::dyn_cast(&I)) { 184 | 185 | auto* OffsetOperand = GEP->getOperand(GEP->getNumOperands() - 1); 186 | if (auto* ConstInt = 187 | llvm::dyn_cast(OffsetOperand)) { 188 | uint64_t constintvalue = (uint64_t)ConstInt->getZExtValue(); 189 | if (uint64_t offset = BinaryOperations::address_to_mapped_address( 190 | constintvalue)) { 191 | for (auto* User : GEP->users()) { 192 | if (auto* LoadInst = llvm::dyn_cast(User)) { 193 | llvm::Type* loadType = LoadInst->getType(); 194 | 195 | unsigned byteSize = loadType->getIntegerBitWidth() / 8; 196 | uint64_t tempvalue; 197 | 198 | std::memcpy( 199 | &tempvalue, 200 | reinterpret_cast(filebase + offset), 201 | byteSize); 202 | 203 | llvm::APInt readValue(byteSize * 8, tempvalue); 204 | llvm::Constant* newVal = 205 | llvm::ConstantInt::get(loadType, readValue); 206 | 207 | LoadInst->replaceAllUsesWith(newVal); 208 | hasChanged = true; 209 | } 210 | } 211 | } 212 | } 213 | } 214 | } 215 | } 216 | } 217 | return hasChanged ? llvm::PreservedAnalyses::none() 218 | : llvm::PreservedAnalyses::all(); 219 | } 220 | }; 221 | 222 | class ReplaceTruncWithLoadPass 223 | : public llvm::PassInfoMixin { 224 | public: 225 | llvm::PreservedAnalyses run(llvm::Module& M, llvm::ModuleAnalysisManager&) { 226 | bool hasChanged = false; 227 | std::vector toRemove; 228 | for (auto& F : M) { 229 | for (auto& BB : F) { 230 | for (auto I = BB.begin(), E = BB.end(); I != E;) { 231 | 232 | auto CurrentI = I++; 233 | 234 | if (auto* TruncInst = llvm::dyn_cast(&*CurrentI)) { 235 | 236 | if (TruncInst->getSrcTy()->isIntegerTy(64) && 237 | TruncInst->getDestTy()->isIntegerTy(32)) { 238 | 239 | if (auto* LoadInst = llvm::dyn_cast( 240 | TruncInst->getOperand(0))) { 241 | 242 | llvm::LoadInst* newLoad = new llvm::LoadInst( 243 | TruncInst->getType(), LoadInst->getPointerOperand(), 244 | "passload", false, LoadInst); 245 | 246 | TruncInst->replaceAllUsesWith(newLoad); 247 | 248 | toRemove.push_back(TruncInst); 249 | 250 | hasChanged = true; 251 | } 252 | } 253 | } 254 | } 255 | } 256 | } 257 | for (llvm::Instruction* Inst : toRemove) { 258 | Inst->eraseFromParent(); 259 | } 260 | toRemove.clear(); 261 | return hasChanged ? llvm::PreservedAnalyses::none() 262 | : llvm::PreservedAnalyses::all(); 263 | } 264 | }; 265 | 266 | // very simple pass 267 | /* 268 | convert 269 | %GEPLoadxd-5368713239- = getelementptr i8, ptr %memory, i64 5368725620 270 | to 271 | %GEPLoadxd-5368713239- = inttoptr i64 5368725620 to ptr 272 | */ 273 | class PromotePseudoMemory : public llvm::PassInfoMixin { 274 | public: 275 | Value* mem = nullptr; 276 | PromotePseudoMemory(Value* val) : mem(val){}; 277 | llvm::PreservedAnalyses run(llvm::Module& M, llvm::ModuleAnalysisManager&) { 278 | 279 | std::vector toPromote; 280 | 281 | bool hasChanged = false; 282 | for (auto& F : M) { 283 | Value* memory = mem; 284 | for (auto& BB : F) { 285 | for (auto& I : BB) { 286 | if (auto* GEP = llvm::dyn_cast(&I)) { 287 | if (GEP->getOperand(0) == memory) { 288 | llvm::IntToPtrInst* newPTR = new llvm::IntToPtrInst( 289 | GEP->getOperand(1), GEP->getType(), GEP->getName(), GEP); 290 | 291 | GEP->replaceAllUsesWith(newPTR); 292 | 293 | toPromote.push_back(GEP); 294 | 295 | hasChanged = true; 296 | } 297 | } 298 | } 299 | } 300 | 301 | for (llvm::Instruction* Inst : toPromote) { 302 | Inst->eraseFromParent(); 303 | } 304 | toPromote.clear(); 305 | } 306 | return hasChanged ? llvm::PreservedAnalyses::none() 307 | : llvm::PreservedAnalyses::all(); 308 | } 309 | }; 310 | class ResizeAllocatedStackPass 311 | : public llvm::PassInfoMixin { 312 | public: 313 | bool chainEnd(Instruction* inst) { 314 | return isa(inst) || isa(inst) || isa(inst); 315 | } 316 | void chainLook(llvm::Module& M, Instruction* GEP, 317 | uint64_t& smallest_val_of_chain) { 318 | for (auto user : GEP->users()) { 319 | auto inst = cast(user); 320 | auto offset = GEP->getOperand(1); 321 | auto offsetKB = computeKnownBits(offset, M.getDataLayout()); 322 | smallest_val_of_chain += offsetKB.getMinValue().getZExtValue(); 323 | if (chainEnd(inst)) { 324 | return; 325 | } 326 | chainLook(M, inst, smallest_val_of_chain); 327 | } 328 | } 329 | llvm::PreservedAnalyses run(llvm::Module& M, llvm::ModuleAnalysisManager&) { 330 | std::vector toResize; 331 | uint64_t smallest = std::numeric_limits::max(); 332 | bool hasChanged = false; 333 | 334 | for (auto& F : M) { 335 | if (F.isDeclaration()) 336 | continue; 337 | 338 | Instruction* Allocated = &(F.getEntryBlock().front()); 339 | if (!isa(Allocated)) 340 | continue; 341 | 342 | for (auto& BB : F) { 343 | for (auto& I : BB) { 344 | if (auto* GEP = llvm::dyn_cast(&I)) { 345 | if (GEP->getOperand(0) == Allocated) { 346 | uint64_t smallest_val_of_chain = 0; 347 | chainLook(M, GEP, smallest_val_of_chain); 348 | smallest = std::min(smallest_val_of_chain, smallest); 349 | toResize.push_back(GEP); 350 | } 351 | } 352 | } 353 | } 354 | 355 | if (smallest != std::numeric_limits::max()) { 356 | IRBuilder<> builder(M.getContext()); 357 | auto allocainst = cast(Allocated); 358 | auto allocaType = allocainst->getAllocatedType(); 359 | 360 | auto allocationSize = 361 | M.getDataLayout().getTypeAllocSize(allocaType) / 16; 362 | // / 16 because i128 is (i) 8 x 16 363 | auto newSize = allocationSize - smallest; 364 | Type* newType = 365 | ArrayType::get(Type::getInt8Ty(allocainst->getContext()), newSize); 366 | 367 | builder.SetInsertPoint(allocainst); 368 | AllocaInst* newAlloca = builder.CreateAlloca( 369 | newType, nullptr, allocainst->getName() + ".resized"); 370 | 371 | allocainst->replaceAllUsesWith(newAlloca); 372 | allocainst->eraseFromParent(); 373 | 374 | for (llvm::Instruction* GEPInst : toResize) { 375 | 376 | builder.SetInsertPoint(GEPInst); 377 | 378 | auto val = GEPInst->getOperand(1); 379 | 380 | Value* newval = builder.CreateSub(val, builder.getInt64(smallest)); 381 | GEPInst->setOperand(1, newval); 382 | } 383 | 384 | toResize.clear(); 385 | hasChanged = true; 386 | } 387 | } 388 | return hasChanged ? llvm::PreservedAnalyses::none() 389 | : llvm::PreservedAnalyses::all(); 390 | } 391 | }; 392 | 393 | #endif 394 | -------------------------------------------------------------------------------- /lifter/FunctionSignatures.cpp: -------------------------------------------------------------------------------- 1 | #include "FunctionSignatures.hpp" 2 | #include "GEPTracker.h" 3 | #include 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /lifter/FunctionSignatures.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FUNCSIGNATURES_H 2 | #define FUNCSIGNATURES_H 3 | #include "GEPTracker.h" 4 | #include "CommonDisassembler.hpp" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | // 8 << (arg.argtype.size - 1) 12 | enum ArgType { NONE = 0, I8 = 1, I16 = 2, I32 = 3, I64 = 4 }; 13 | 14 | template class funcsignatures { 15 | public: 16 | struct funcArgInfo { 17 | Register reg; 18 | 19 | struct argTypeInfo { 20 | uint8_t size : 4; // 4 bits for size 21 | uint8_t isPtr : 1; 22 | uint8_t pad : 3; 23 | argTypeInfo(ArgType type, bool isPtr) 24 | : size(static_cast(type)), isPtr(isPtr ? 1 : 0), pad(0) {} 25 | } argtype; 26 | funcArgInfo(Register Reg, ArgType type, bool isPtr) 27 | : reg(Reg), argtype(type, isPtr){}; 28 | }; 29 | 30 | using funcArgInfos = std::vector; 31 | struct functioninfo { 32 | functioninfo() {} 33 | functioninfo(const std::string& Name) : name(Name) {} 34 | 35 | functioninfo(const std::string& Name, std::vector Args) 36 | : name(Name), args(Args) {} 37 | 38 | functioninfo(const std::string& Name, const std::vector Args, 39 | const std::vector Bytes) 40 | : name(Name), args(Args), bytes(Bytes) {} 41 | std::string name; 42 | // 43 | funcArgInfos args = { 44 | funcArgInfo(Register::RAX, I64, 0), funcArgInfo(Register::RCX, I64, 0), 45 | funcArgInfo(Register::RDX, I64, 0), funcArgInfo(Register::RBX, I64, 0), 46 | funcArgInfo(Register::RSP, I64, 0), funcArgInfo(Register::RBP, I64, 0), 47 | funcArgInfo(Register::RSI, I64, 0), funcArgInfo(Register::RDI, I64, 0), 48 | funcArgInfo(Register::R8, I64, 0), funcArgInfo(Register::R9, I64, 0), 49 | funcArgInfo(Register::R10, I64, 0), funcArgInfo(Register::R11, I64, 0), 50 | funcArgInfo(Register::R12, I64, 0), funcArgInfo(Register::R13, I64, 0), 51 | funcArgInfo(Register::R14, I64, 0), funcArgInfo(Register::R15, I64, 0), 52 | funcArgInfo(Register::DS, I64, 1)}; 53 | 54 | std::vector bytes; 55 | // DS represents memory 56 | // (yeah i hate it aswell) 57 | // so the default is 58 | // rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15,memory 59 | // also 60 | // should SS represent stack ? (rsp+0x20 + (8 * arg) ) 61 | // (SS is always ptr) 62 | static inline std::vector offsets; 63 | static void add_offset(uint64_t offset) { 64 | offsets.push_back(BinaryOperations::fileOffsetToRVA(offset)); 65 | }; 66 | void display() const { 67 | std::cout << "Function Name: " << name << ", Offsets: "; 68 | for (const auto& offset : offsets) { 69 | std::cout << offset << " "; 70 | } 71 | std::cout << "end" << std::endl; 72 | }; 73 | }; 74 | 75 | struct siginfo { 76 | siginfo(const std::vector& Bytes); 77 | siginfo(const std::vector& Bytes, 78 | const std::vector& Args); 79 | 80 | std::vector bytes; 81 | std::vector args; 82 | }; 83 | 84 | struct VectorHash { 85 | std::size_t operator()(const std::vector& v) const { 86 | std::hash hasher; 87 | std::size_t seed = 0; 88 | for (unsigned char i : v) { 89 | seed ^= hasher(i) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 90 | } 91 | return seed; 92 | } 93 | }; 94 | 95 | class AhoCorasick { 96 | public: 97 | AhoCorasick( 98 | const std::unordered_map, functioninfo, 99 | VectorHash>& patterns_map) { 100 | trie.emplace_back(); 101 | int id = 0; 102 | for (const auto& [pattern, _] : patterns_map) { 103 | int current = 0; 104 | for (unsigned char c : pattern) { 105 | if (trie[current].children.count(c) == 0) { 106 | trie[current].children[c] = trie.size(); 107 | trie.emplace_back(); 108 | } 109 | current = trie[current].children[c]; 110 | } 111 | trie[current].output.push_back(id); 112 | patterns[id++] = pattern; 113 | } 114 | build(); 115 | }; 116 | 117 | std::vector> 118 | search(const std::vector& text) { 119 | std::vector> results; 120 | int current = 0; 121 | for (uint64_t i = 0; i < text.size(); ++i) { 122 | while (current != -1 && trie[current].children.count(text[i]) == 0) { 123 | current = trie[current].fail; 124 | } 125 | if (current == -1) { 126 | current = 0; 127 | continue; 128 | } 129 | current = trie[current].children[text[i]]; 130 | for (int id : trie[current].output) { 131 | results.emplace_back(i - patterns[id].size() + 1, id); 132 | } 133 | } 134 | return results; 135 | }; 136 | 137 | struct Node { 138 | std::map children; 139 | int fail = -1; 140 | std::vector output; 141 | }; 142 | 143 | std::vector trie; 144 | std::unordered_map> patterns; 145 | 146 | void build() { 147 | std::queue q; 148 | for (const auto& [c, next] : trie[0].children) { 149 | trie[next].fail = 0; 150 | q.push(next); 151 | } 152 | while (!q.empty()) { 153 | int current = q.front(); 154 | q.pop(); 155 | for (const auto& [c, next] : trie[current].children) { 156 | int fail = trie[current].fail; 157 | while (fail != -1 && trie[fail].children.count(c) == 0) { 158 | fail = trie[fail].fail; 159 | } 160 | if (fail != -1) { 161 | trie[next].fail = trie[fail].children[c]; 162 | } else { 163 | trie[next].fail = 0; 164 | } 165 | trie[next].output.insert(trie[next].output.end(), 166 | trie[trie[next].fail].output.begin(), 167 | trie[trie[next].fail].output.end()); 168 | q.push(next); 169 | } 170 | } 171 | }; 172 | }; 173 | 174 | static inline std::unordered_map, functioninfo, 175 | VectorHash> 176 | siglookup{ 177 | {{0x55, 0x48, 0x81, 0xEC, 0xA0, 00, 00, 00, 0x48, 0x8D, 0xAC, 0x24, 178 | 0x80, 00, 00, 00}, 179 | functioninfo("??$?6U?$char_traits@D@std@@@std@@YAAEAV?$basic_" 180 | "ostream@DU?$char_traits@D@std@@@0@AEAV10@PEBD@Z")}, 181 | 182 | {{0x4C, 0x8B, 0xDC, 0x4D, 0x89, 0x43, 0x18, 0x4D, 0x89, 0x4B, 0x20, 183 | 0x48, 0x83, 0xEC, 0x38}, 184 | functioninfo("swprintf_s", { 185 | funcArgInfo(Register::RCX, I64, 1), 186 | funcArgInfo(Register::RDX, I64, 0), 187 | funcArgInfo(Register::R8, I64, 1), 188 | funcArgInfo(Register::R9, I64, 0), 189 | })}}; 190 | 191 | static inline std::unordered_map functions; 192 | static inline std::unordered_map functionsByName{ 193 | {"MessageBoxW", functioninfo("MessageBoxW", 194 | { 195 | funcArgInfo(Register::RCX, I64, 0), 196 | funcArgInfo(Register::RDX, I64, 1), 197 | funcArgInfo(Register::R8, I64, 1), 198 | funcArgInfo(Register::R9, I64, 0), 199 | })}, 200 | {"GetTickCount64", functioninfo("GetTickCount64", {})}, 201 | }; 202 | ; 203 | 204 | static inline std::unordered_map, functioninfo, 205 | VectorHash> 206 | search_signatures(const std::vector& data) { 207 | AhoCorasick ac(siglookup); 208 | std::vector> matches = ac.search(data); 209 | for (const auto& [pos, id] : matches) { 210 | auto it = siglookup.find(ac.patterns[id]); 211 | if (it != siglookup.end()) { 212 | it->second.add_offset(pos); 213 | } 214 | } 215 | return siglookup; 216 | }; 217 | 218 | std::vector convertToVector(const unsigned char* data, 219 | size_t size) { 220 | return std::vector(data, data + size); 221 | }; 222 | 223 | static void createOffsetMap() { 224 | for (auto value : siglookup) { 225 | for (auto offsets : value.second.offsets) { 226 | functions[offsets] = value.second; 227 | } 228 | functionsByName[value.second.name] = value.second; 229 | } 230 | }; 231 | static functioninfo* getFunctionInfo(uint64_t addr) { 232 | if (functions.count(addr) == 0) 233 | return nullptr; 234 | return &(functions[addr]); 235 | }; 236 | 237 | static functioninfo* getFunctionInfo(const std::string& name) { 238 | if (functionsByName.count(name) == 0) 239 | return nullptr; 240 | return &(functionsByName[name]); 241 | }; 242 | 243 | }; // funcsignatures 244 | 245 | #endif // FUNCSIGNATURES_H 246 | -------------------------------------------------------------------------------- /lifter/GEPTracker.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "GEPTracker.h" 3 | #include "nt/nt_headers.hpp" 4 | 5 | namespace BinaryOperations { 6 | 7 | // ???, do this while creating the pass 8 | uint8_t* data_g; 9 | arch_mode is64Bit; 10 | bool concretize_unsafe_reads = 0; 11 | // this is the worst way of doing this 12 | 13 | void initBases(uint8_t* data, arch_mode is64) { 14 | data_g = data; 15 | is64Bit = is64; 16 | } 17 | 18 | int getBitness() { return is64Bit == X64 ? 64 : 32; } 19 | void getBases(uint8_t** data) { *data = data_g; } 20 | 21 | const char* getName(uint64_t offset) { 22 | auto dosHeader = (win::dos_header_t*)data_g; 23 | auto ntHeaders = (const void*)((uint8_t*)data_g + dosHeader->e_lfanew); 24 | auto rvaOffset = RvaToFileOffset(ntHeaders, offset); 25 | return (const char*)data_g + rvaOffset; 26 | } 27 | 28 | bool isImport(uint64_t addr) { 29 | auto dosHeader = reinterpret_cast(data_g); 30 | auto ntHeadersBase = 31 | reinterpret_cast(data_g) + dosHeader->e_lfanew; 32 | 33 | uint64_t imageBase; 34 | if (is64Bit == X64) { 35 | auto ntHeaders = 36 | reinterpret_cast*>(ntHeadersBase); 37 | imageBase = ntHeaders->optional_header.image_base; 38 | } else { 39 | auto ntHeaders = 40 | reinterpret_cast*>(ntHeadersBase); 41 | imageBase = ntHeaders->optional_header.image_base; 42 | } 43 | 44 | APInt tmp; 45 | return readMemory(imageBase + addr, 1, tmp); 46 | } 47 | 48 | DenseSet MemWrites; 49 | 50 | bool isWrittenTo(uint64_t addr) { 51 | return MemWrites.find(addr) != MemWrites.end(); 52 | } 53 | 54 | void WriteTo(uint64_t addr) { MemWrites.insert(addr); } 55 | 56 | // sections 57 | bool readMemory(uint64_t addr, unsigned byteSize, APInt& value) { 58 | 59 | uint64_t mappedAddr = address_to_mapped_address(addr); 60 | uint64_t tempValue = 0; 61 | if (mappedAddr > 0) { 62 | std::memcpy(&tempValue, 63 | reinterpret_cast(data_g + mappedAddr), byteSize); 64 | 65 | APInt readValue(byteSize * 8, tempValue); 66 | value = readValue; 67 | return 1; 68 | } 69 | 70 | return 0; 71 | } 72 | 73 | // TODO 74 | // 1- if writes into execute section, flag that address, if we execute that 75 | // address then do fancy stuff to figure out what we wrote so we know what 76 | // we will be executing 77 | void writeMemory(); 78 | 79 | uint64_t RvaToFileOffset(const void* ntHeadersBase, uint32_t rva) { 80 | const auto* sectionHeader = 81 | is64Bit == X64 82 | ? reinterpret_cast*>(ntHeadersBase) 83 | ->get_sections() 84 | : reinterpret_cast*>(ntHeadersBase) 85 | ->get_sections(); 86 | 87 | int numSections = 88 | is64Bit == X64 89 | ? reinterpret_cast*>(ntHeadersBase) 90 | ->file_header.num_sections 91 | : reinterpret_cast*>(ntHeadersBase) 92 | ->file_header.num_sections; 93 | 94 | for (int i = 0; i < numSections; i++, sectionHeader++) { 95 | if (rva >= sectionHeader->virtual_address && 96 | rva < 97 | (sectionHeader->virtual_address + sectionHeader->virtual_size)) { 98 | 99 | return rva - sectionHeader->virtual_address + 100 | sectionHeader->ptr_raw_data; 101 | } 102 | } 103 | return 0; 104 | } 105 | 106 | uint64_t address_to_mapped_address(uint64_t rva) { 107 | auto dosHeader = reinterpret_cast(data_g); 108 | auto ntHeadersBase = 109 | reinterpret_cast(data_g) + dosHeader->e_lfanew; 110 | 111 | uint64_t imageBase; 112 | if (is64Bit == X64) { 113 | auto ntHeaders = 114 | reinterpret_cast*>(ntHeadersBase); 115 | imageBase = ntHeaders->optional_header.image_base; 116 | } else { 117 | auto ntHeaders = 118 | reinterpret_cast*>(ntHeadersBase); 119 | imageBase = ntHeaders->optional_header.image_base; 120 | } 121 | 122 | uint64_t address = rva - imageBase; 123 | return RvaToFileOffset(ntHeadersBase, address); 124 | } 125 | 126 | uint64_t fileOffsetToRVA(uint64_t offset) { 127 | // this function is duplicate of RvaToFileOffset ?????????????? 128 | if (!data_g) { 129 | return 0; // Ensure data is initialized 130 | } 131 | 132 | // Get DOS header 133 | auto dosHeader = reinterpret_cast(data_g); 134 | auto ntHeadersBase = 135 | reinterpret_cast(data_g) + dosHeader->e_lfanew; 136 | 137 | // Determine NT headers based on architecture 138 | uint64_t imageBase; 139 | auto sectionHeader = 140 | is64Bit == X64 141 | ? reinterpret_cast*>(ntHeadersBase) 142 | ->get_sections() 143 | : reinterpret_cast*>(ntHeadersBase) 144 | ->get_sections(); 145 | 146 | int numSections = 147 | is64Bit == X64 148 | ? reinterpret_cast*>(ntHeadersBase) 149 | ->file_header.num_sections 150 | : reinterpret_cast*>(ntHeadersBase) 151 | ->file_header.num_sections; 152 | 153 | imageBase = 154 | is64Bit == X64 155 | ? reinterpret_cast*>(ntHeadersBase) 156 | ->optional_header.image_base 157 | : reinterpret_cast*>(ntHeadersBase) 158 | ->optional_header.image_base; 159 | 160 | // Iterate over section headers to find matching section 161 | for (int i = 0; i < numSections; i++, sectionHeader++) { 162 | if (offset >= sectionHeader->ptr_raw_data && 163 | offset < 164 | (sectionHeader->ptr_raw_data + sectionHeader->size_raw_data)) { 165 | 166 | if (!sectionHeader->characteristics 167 | .mem_write) // if section is writeable, then it might be not 168 | // safe to concretize this read, only do this if 169 | // we are sure we want to do this 170 | // also, this code is trash 171 | return imageBase + offset - sectionHeader->ptr_raw_data + 172 | sectionHeader->virtual_address; 173 | else 174 | return 0; 175 | } 176 | } 177 | 178 | return 0; // Offset not found in any section 179 | } 180 | 181 | }; // namespace BinaryOperations -------------------------------------------------------------------------------- /lifter/GEPTracker.h: -------------------------------------------------------------------------------- 1 | #ifndef GEPTracker_H 2 | #define GEPTracker_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace llvm; 9 | 10 | enum Assumption { Real, Assumed }; // add None 11 | 12 | enum arch_mode { X86 = 0, X64 = 1 }; 13 | 14 | enum isPaged { MEMORY_PAGED, MEMORY_MIGHT_BE_PAGED, MEMORY_NOT_PAGED }; 15 | 16 | struct APIntComparator { 17 | bool operator()(const llvm::APInt& lhs, const llvm::APInt& rhs) const { 18 | return lhs.ult(rhs); // unsigned less-than comparison 19 | } 20 | }; 21 | 22 | class ValueByteReference { 23 | public: 24 | // Instruction* storeInst; 25 | llvm::Value* value; 26 | uint8_t byteOffset; 27 | // ValueByteReference() : storeInst(nullptr), value(nullptr), byteOffset(0) {} 28 | ValueByteReference() : value(nullptr), byteOffset(0) {} 29 | 30 | /* 31 | ValueByteReference(Instruction* inst, Value* val, short offset) 32 | : storeInst(inst), value(val), byteOffset(offset) {} 33 | */ 34 | 35 | ValueByteReference(llvm::Value* val, short offset) 36 | : value(val), byteOffset(offset) {} 37 | }; 38 | 39 | class ValueByteReferenceRange { 40 | public: 41 | union { 42 | ValueByteReference ref; 43 | uint64_t memoryAddress; 44 | }; 45 | 46 | uint8_t start; 47 | uint8_t end; 48 | 49 | bool isRef; 50 | ValueByteReferenceRange(ValueByteReference vref, uint8_t startv, uint8_t endv) 51 | : ref(vref), start(startv), end(endv), isRef(true) {} 52 | 53 | // Constructor for ValueByteReferenceRange using memoryAddress 54 | ValueByteReferenceRange(uint64_t addr, uint8_t startv, uint8_t endv) 55 | : memoryAddress(addr), start(startv), end(endv), isRef(false) {} 56 | }; 57 | 58 | namespace BinaryOperations { 59 | extern bool concretize_unsafe_reads; 60 | const char* getName(const uint64_t offset); 61 | 62 | int getBitness(); 63 | 64 | void initBases(uint8_t* data, arch_mode is64); // ? 65 | 66 | void getBases(uint8_t** data); 67 | 68 | bool isImport(uint64_t addr); 69 | 70 | bool readMemory(const uint64_t addr, unsigned byteSize, llvm::APInt& value); 71 | 72 | bool isWrittenTo(const uint64_t addr); 73 | 74 | void WriteTo(uint64_t addr); 75 | 76 | uint64_t RvaToFileOffset(const void* ntHeadersBase, uint32_t rva); 77 | 78 | uint64_t address_to_mapped_address(uint64_t rva); 79 | 80 | uint64_t fileOffsetToRVA(uint64_t fileAddress); 81 | 82 | }; // namespace BinaryOperations 83 | 84 | /* 85 | namespace SCCPSimplifier { 86 | void init(Function* function); 87 | SCCPSolver* get(); 88 | 89 | void cleanup(); 90 | } // namespace SCCPSimplifier 91 | */ 92 | 93 | #endif -------------------------------------------------------------------------------- /lifter/OperandUtils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "PathSolver.ipp" 9 | 10 | inline bool comesBefore(llvm::Instruction* a, llvm::Instruction* b, 11 | llvm::DominatorTree& DT) { 12 | 13 | bool sameBlock = 14 | a->getParent() == b->getParent(); // if same block, use ->comesBefore, 15 | 16 | if (sameBlock) { 17 | return a->comesBefore(b); // if a comes before b, return true 18 | } 19 | // if "a"'s block dominates "b"'s block, "a" comes first. 20 | bool dominate = DT.properlyDominates(a->getParent(), b->getParent()); 21 | return dominate; 22 | } 23 | -------------------------------------------------------------------------------- /lifter/PathSolver.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "CustomPasses.hpp" 3 | #include "OperandUtils.h" 4 | #include "lifterClass.hpp" 5 | #include "utils.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | using namespace llvm; 17 | 18 | // simplify Users with BFS 19 | // because => 20 | // x = add a, b 21 | // if we go simplify a then simplify x, then simplify b, we might miss 22 | // simplifying x if we go simplify a, then simplify b, then simplify x we will 23 | // not miss 24 | // 25 | // also refactor this 26 | 27 | PATH_info getConstraintVal(llvm::Function* function, Value* constraint, 28 | uint64_t& dest) { 29 | PATH_info result = PATH_unsolved; 30 | printvalue(constraint); 31 | /* 32 | auto simplified_constraint = simplifyValue( 33 | constraint, 34 | function->getParent()->getDataLayout()); // this is such a hack 35 | // 36 | printvalue(simplified_constraint); 37 | 38 | if (llvm::ConstantInt* constInt = 39 | dyn_cast(simplified_constraint)) { 40 | printvalue(constInt) dest = constInt->getZExtValue(); 41 | result = PATH_solved; 42 | return result; 43 | } 44 | */ 45 | 46 | return result; 47 | } 48 | 49 | void final_optpass(llvm::Function* clonedFuncx, Value* mem, uint8_t* filebase) { 50 | llvm::PassBuilder passBuilder; 51 | 52 | llvm::LoopAnalysisManager loopAnalysisManager; 53 | llvm::FunctionAnalysisManager functionAnalysisManager; 54 | llvm::CGSCCAnalysisManager cGSCCAnalysisManager; 55 | llvm::ModuleAnalysisManager moduleAnalysisManager; 56 | 57 | passBuilder.registerModuleAnalyses(moduleAnalysisManager); 58 | passBuilder.registerCGSCCAnalyses(cGSCCAnalysisManager); 59 | passBuilder.registerFunctionAnalyses(functionAnalysisManager); 60 | passBuilder.registerLoopAnalyses(loopAnalysisManager); 61 | passBuilder.crossRegisterProxies(loopAnalysisManager, functionAnalysisManager, 62 | cGSCCAnalysisManager, moduleAnalysisManager); 63 | 64 | llvm::ModulePassManager modulePassManager = 65 | passBuilder.buildPerModuleDefaultPipeline(llvm::OptimizationLevel::O0); 66 | 67 | llvm::Module* module = clonedFuncx->getParent(); 68 | /* 69 | modulePassManager.addPass(BasicBlockDotGraphPass()); 70 | 71 | modulePassManager.run(*module, moduleAnalysisManager); 72 | */ 73 | bool changed = 0; 74 | do { 75 | changed = false; 76 | 77 | const size_t beforeSize = module->getInstructionCount(); 78 | 79 | modulePassManager = 80 | passBuilder.buildPerModuleDefaultPipeline(llvm::OptimizationLevel::O1); 81 | 82 | modulePassManager.addPass(GEPLoadPass(mem, filebase)); 83 | modulePassManager.addPass(ReplaceTruncWithLoadPass()); 84 | modulePassManager.addPass(PromotePseudoStackPass(mem)); 85 | 86 | modulePassManager.run(*module, moduleAnalysisManager); 87 | 88 | const size_t afterSize = module->getInstructionCount(); 89 | 90 | changed = beforeSize != afterSize; 91 | 92 | } while (changed); 93 | 94 | modulePassManager = 95 | passBuilder.buildPerModuleDefaultPipeline(llvm::OptimizationLevel::O2); 96 | 97 | modulePassManager.addPass(ResizeAllocatedStackPass()); 98 | modulePassManager.addPass(PromotePseudoMemory(mem)); 99 | 100 | modulePassManager.run(*module, moduleAnalysisManager); 101 | } 102 | -------------------------------------------------------------------------------- /lifter/PathSolver.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | enum PATH_info { 6 | PATH_unsolved = 0, 7 | PATH_solved = 1, 8 | }; 9 | 10 | PATH_info getConstraintVal(llvm::Function* function, llvm::Value* constraint, 11 | uint64_t& dest); 12 | 13 | void final_optpass(llvm::Function* clonedFuncx, llvm::Value* mem, 14 | uint8_t* filebase); 15 | 16 | PATH_info solvePath(llvm::Function* function, uint64_t& dest, 17 | llvm::Value* simplifyValue); -------------------------------------------------------------------------------- /lifter/PathSolver.ipp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "PathSolver.h" 4 | #include "lifterClass.hpp" 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | MERGEN_LIFTER_DEFINITION_TEMPLATES(PATH_info)::solvePath( 11 | llvm::Function* function, uint64_t& dest, Value* simplifyValue) { 12 | PATH_info result = PATH_unsolved; 13 | if (llvm::ConstantInt* constInt = 14 | dyn_cast(simplifyValue)) { 15 | dest = constInt->getZExtValue(); 16 | result = PATH_solved; 17 | run = 0; 18 | auto bb_solved = BasicBlock::Create( 19 | function->getContext(), "bb_constraint-" + std::to_string(dest) + "-", 20 | builder.GetInsertBlock()->getParent()); 21 | 22 | builder.CreateBr(bb_solved); 23 | blockInfo = BBInfo(dest, bb_solved); 24 | return result; 25 | } 26 | 27 | if (PATH_info solved = getConstraintVal(function, simplifyValue, dest)) { 28 | if (solved == PATH_solved) { 29 | run = 0; 30 | std::cout << "Solved the constraint and moving to next path\n" 31 | << std::flush; 32 | auto bb_solved = BasicBlock::Create( 33 | function->getContext(), "bb_constraint-" + std::to_string(dest) + "-", 34 | builder.GetInsertBlock()->getParent()); 35 | 36 | builder.CreateBr(bb_solved); 37 | blockInfo = BBInfo(dest, bb_solved); 38 | return solved; 39 | } 40 | } 41 | 42 | // unsolved 43 | printvalue(simplifyValue); 44 | run = 0; 45 | auto pvset = computePossibleValues(simplifyValue); 46 | std::vector pv(pvset.begin(), pvset.end()); 47 | if (pv.size() == 1) { 48 | printvalue2(pv[0]); 49 | auto bb_solved = BasicBlock::Create(function->getContext(), "bb_false", 50 | builder.GetInsertBlock()->getParent()); 51 | 52 | builder.CreateBr(bb_solved); 53 | blockInfo = BBInfo(pv[0].getZExtValue(), bb_solved); 54 | } 55 | if (pv.size() == 2) { 56 | auto bb_false = BasicBlock::Create(function->getContext(), "bb_false", 57 | builder.GetInsertBlock()->getParent()); 58 | auto bb_true = BasicBlock::Create(function->getContext(), "bb_true", 59 | builder.GetInsertBlock()->getParent()); 60 | 61 | auto firstcase = pv[0]; 62 | auto secondcase = pv[1]; 63 | 64 | static auto try_simplify = [&](APInt c1, 65 | Value* simplifyv) -> std::optional { 66 | if (auto si = dyn_cast(simplifyv)) { 67 | auto firstcase_v = builder.getIntN( 68 | simplifyv->getType()->getIntegerBitWidth(), c1.getZExtValue()); 69 | if (si->getTrueValue() == firstcase_v) 70 | return si->getCondition(); 71 | } 72 | return std::nullopt; 73 | }; 74 | 75 | Value* condition = nullptr; 76 | 77 | // condition value is a kind of hack 78 | // 1- if its a select, we can extract the condition 79 | // 1a- if firstcase is in the select, extract the condition 80 | // 1b- if secondcase is in the select, extract the condition and reverse 81 | // values 82 | // 2- create a hacky compare for condition == potentialvalue 83 | 84 | if (auto can_simplify = try_simplify(firstcase, simplifyValue)) 85 | condition = can_simplify.value(); 86 | else if (auto can_simplify2 = try_simplify(secondcase, simplifyValue)) { 87 | std::swap(firstcase, secondcase); 88 | condition = can_simplify2.value(); 89 | } else 90 | condition = createICMPFolder( 91 | llvm::CmpInst::ICMP_EQ, simplifyValue, 92 | builder.getIntN(simplifyValue->getType()->getIntegerBitWidth(), 93 | firstcase.getZExtValue())); 94 | printvalue(condition); 95 | auto BR = builder.CreateCondBr(condition, bb_true, bb_false); 96 | 97 | RegisterBranch(BR); 98 | 99 | printvalue2(firstcase); 100 | printvalue2(secondcase); 101 | blockInfo = BBInfo(secondcase.getZExtValue(), bb_false); 102 | // for [this], we can assume condition is true 103 | // we can simplify any value tied to is dependent on condition, 104 | // and try to simplify any value calculates condition 105 | 106 | lifterClass* newlifter = new lifterClass(*this); 107 | 108 | // for [newlifter], we can assume condition is false 109 | newlifter->blockInfo = BBInfo(firstcase.getZExtValue(), bb_true); 110 | printvalue(condition); 111 | newlifter->assumptions[cast(condition)] = 1; 112 | 113 | assumptions[cast(condition)] = 0; 114 | 115 | lifters.push_back(newlifter); 116 | 117 | debugging::doIfDebug([&]() { 118 | std::string Filename = "output_newpath.ll"; 119 | std::error_code EC; 120 | llvm::raw_fd_ostream OS(Filename, EC); 121 | function->getParent()->print(OS, nullptr); 122 | }); 123 | std::cout << "created a new path\n" << std::flush; 124 | } 125 | if (pv.size() > 2) { 126 | UNREACHABLE("cant reach more than 2 paths!"); 127 | } 128 | 129 | return result; 130 | } 131 | -------------------------------------------------------------------------------- /lifter/Semantics.h: -------------------------------------------------------------------------------- 1 | #pragma once -------------------------------------------------------------------------------- /lifter/ZydisDisassembler_registers.h: -------------------------------------------------------------------------------- 1 | #ifdef ICED_NOT_FOUND 2 | #ifndef REGISTERS_ZYDIS_H 3 | #define REGISTERS_ZYDIS_H 4 | 5 | #include 6 | #include 7 | namespace Mergen { 8 | enum class ZydisRegister : uint16_t { 9 | None = ZYDIS_REGISTER_NONE, 10 | AL = ZYDIS_REGISTER_AL, 11 | CL = ZYDIS_REGISTER_CL, 12 | DL = ZYDIS_REGISTER_DL, 13 | BL = ZYDIS_REGISTER_BL, 14 | AH = ZYDIS_REGISTER_AH, 15 | CH = ZYDIS_REGISTER_CH, 16 | DH = ZYDIS_REGISTER_DH, 17 | BH = ZYDIS_REGISTER_BH, 18 | SPL = ZYDIS_REGISTER_SPL, 19 | BPL = ZYDIS_REGISTER_BPL, 20 | SIL = ZYDIS_REGISTER_SIL, 21 | DIL = ZYDIS_REGISTER_DIL, 22 | R8B = ZYDIS_REGISTER_R8B, 23 | R9B = ZYDIS_REGISTER_R9B, 24 | R10B = ZYDIS_REGISTER_R10B, 25 | R11B = ZYDIS_REGISTER_R11B, 26 | R12B = ZYDIS_REGISTER_R12B, 27 | R13B = ZYDIS_REGISTER_R13B, 28 | R14B = ZYDIS_REGISTER_R14B, 29 | R15B = ZYDIS_REGISTER_R15B, 30 | AX = ZYDIS_REGISTER_AX, 31 | CX = ZYDIS_REGISTER_CX, 32 | DX = ZYDIS_REGISTER_DX, 33 | BX = ZYDIS_REGISTER_BX, 34 | SP = ZYDIS_REGISTER_SP, 35 | BP = ZYDIS_REGISTER_BP, 36 | SI = ZYDIS_REGISTER_SI, 37 | DI = ZYDIS_REGISTER_DI, 38 | R8W = ZYDIS_REGISTER_R8W, 39 | R9W = ZYDIS_REGISTER_R9W, 40 | R10W = ZYDIS_REGISTER_R10W, 41 | R11W = ZYDIS_REGISTER_R11W, 42 | R12W = ZYDIS_REGISTER_R12W, 43 | R13W = ZYDIS_REGISTER_R13W, 44 | R14W = ZYDIS_REGISTER_R14W, 45 | R15W = ZYDIS_REGISTER_R15W, 46 | EAX = ZYDIS_REGISTER_EAX, 47 | ECX = ZYDIS_REGISTER_ECX, 48 | EDX = ZYDIS_REGISTER_EDX, 49 | EBX = ZYDIS_REGISTER_EBX, 50 | ESP = ZYDIS_REGISTER_ESP, 51 | EBP = ZYDIS_REGISTER_EBP, 52 | ESI = ZYDIS_REGISTER_ESI, 53 | EDI = ZYDIS_REGISTER_EDI, 54 | R8D = ZYDIS_REGISTER_R8D, 55 | R9D = ZYDIS_REGISTER_R9D, 56 | R10D = ZYDIS_REGISTER_R10D, 57 | R11D = ZYDIS_REGISTER_R11D, 58 | R12D = ZYDIS_REGISTER_R12D, 59 | R13D = ZYDIS_REGISTER_R13D, 60 | R14D = ZYDIS_REGISTER_R14D, 61 | R15D = ZYDIS_REGISTER_R15D, 62 | RAX = ZYDIS_REGISTER_RAX, 63 | RCX = ZYDIS_REGISTER_RCX, 64 | RDX = ZYDIS_REGISTER_RDX, 65 | RBX = ZYDIS_REGISTER_RBX, 66 | RSP = ZYDIS_REGISTER_RSP, 67 | RBP = ZYDIS_REGISTER_RBP, 68 | RSI = ZYDIS_REGISTER_RSI, 69 | RDI = ZYDIS_REGISTER_RDI, 70 | R8 = ZYDIS_REGISTER_R8, 71 | R9 = ZYDIS_REGISTER_R9, 72 | R10 = ZYDIS_REGISTER_R10, 73 | R11 = ZYDIS_REGISTER_R11, 74 | R12 = ZYDIS_REGISTER_R12, 75 | R13 = ZYDIS_REGISTER_R13, 76 | R14 = ZYDIS_REGISTER_R14, 77 | R15 = ZYDIS_REGISTER_R15, 78 | EIP = ZYDIS_REGISTER_EIP, 79 | RIP = ZYDIS_REGISTER_RIP, 80 | ES = ZYDIS_REGISTER_ES, 81 | CS = ZYDIS_REGISTER_CS, 82 | SS = ZYDIS_REGISTER_SS, 83 | DS = ZYDIS_REGISTER_DS, 84 | FS = ZYDIS_REGISTER_FS, 85 | GS = ZYDIS_REGISTER_GS, 86 | XMM0 = ZYDIS_REGISTER_XMM0, 87 | XMM1 = ZYDIS_REGISTER_XMM1, 88 | XMM2 = ZYDIS_REGISTER_XMM2, 89 | XMM3 = ZYDIS_REGISTER_XMM3, 90 | XMM4 = ZYDIS_REGISTER_XMM4, 91 | XMM5 = ZYDIS_REGISTER_XMM5, 92 | XMM6 = ZYDIS_REGISTER_XMM6, 93 | XMM7 = ZYDIS_REGISTER_XMM7, 94 | XMM8 = ZYDIS_REGISTER_XMM8, 95 | XMM9 = ZYDIS_REGISTER_XMM9, 96 | XMM10 = ZYDIS_REGISTER_XMM10, 97 | XMM11 = ZYDIS_REGISTER_XMM11, 98 | XMM12 = ZYDIS_REGISTER_XMM12, 99 | XMM13 = ZYDIS_REGISTER_XMM13, 100 | XMM14 = ZYDIS_REGISTER_XMM14, 101 | XMM15 = ZYDIS_REGISTER_XMM15, 102 | XMM16 = ZYDIS_REGISTER_XMM16, 103 | XMM17 = ZYDIS_REGISTER_XMM17, 104 | XMM18 = ZYDIS_REGISTER_XMM18, 105 | XMM19 = ZYDIS_REGISTER_XMM19, 106 | XMM20 = ZYDIS_REGISTER_XMM20, 107 | XMM21 = ZYDIS_REGISTER_XMM21, 108 | XMM22 = ZYDIS_REGISTER_XMM22, 109 | XMM23 = ZYDIS_REGISTER_XMM23, 110 | XMM24 = ZYDIS_REGISTER_XMM24, 111 | XMM25 = ZYDIS_REGISTER_XMM25, 112 | XMM26 = ZYDIS_REGISTER_XMM26, 113 | XMM27 = ZYDIS_REGISTER_XMM27, 114 | XMM28 = ZYDIS_REGISTER_XMM28, 115 | XMM29 = ZYDIS_REGISTER_XMM29, 116 | XMM30 = ZYDIS_REGISTER_XMM30, 117 | XMM31 = ZYDIS_REGISTER_XMM31, 118 | YMM0 = ZYDIS_REGISTER_YMM0, 119 | YMM1 = ZYDIS_REGISTER_YMM1, 120 | YMM2 = ZYDIS_REGISTER_YMM2, 121 | YMM3 = ZYDIS_REGISTER_YMM3, 122 | YMM4 = ZYDIS_REGISTER_YMM4, 123 | YMM5 = ZYDIS_REGISTER_YMM5, 124 | YMM6 = ZYDIS_REGISTER_YMM6, 125 | YMM7 = ZYDIS_REGISTER_YMM7, 126 | YMM8 = ZYDIS_REGISTER_YMM8, 127 | YMM9 = ZYDIS_REGISTER_YMM9, 128 | YMM10 = ZYDIS_REGISTER_YMM10, 129 | YMM11 = ZYDIS_REGISTER_YMM11, 130 | YMM12 = ZYDIS_REGISTER_YMM12, 131 | YMM13 = ZYDIS_REGISTER_YMM13, 132 | YMM14 = ZYDIS_REGISTER_YMM14, 133 | YMM15 = ZYDIS_REGISTER_YMM15, 134 | YMM16 = ZYDIS_REGISTER_YMM16, 135 | YMM17 = ZYDIS_REGISTER_YMM17, 136 | YMM18 = ZYDIS_REGISTER_YMM18, 137 | YMM19 = ZYDIS_REGISTER_YMM19, 138 | YMM20 = ZYDIS_REGISTER_YMM20, 139 | YMM21 = ZYDIS_REGISTER_YMM21, 140 | YMM22 = ZYDIS_REGISTER_YMM22, 141 | YMM23 = ZYDIS_REGISTER_YMM23, 142 | YMM24 = ZYDIS_REGISTER_YMM24, 143 | YMM25 = ZYDIS_REGISTER_YMM25, 144 | YMM26 = ZYDIS_REGISTER_YMM26, 145 | YMM27 = ZYDIS_REGISTER_YMM27, 146 | YMM28 = ZYDIS_REGISTER_YMM28, 147 | YMM29 = ZYDIS_REGISTER_YMM29, 148 | YMM30 = ZYDIS_REGISTER_YMM30, 149 | YMM31 = ZYDIS_REGISTER_YMM31, 150 | ZMM0 = ZYDIS_REGISTER_ZMM0, 151 | ZMM1 = ZYDIS_REGISTER_ZMM1, 152 | ZMM2 = ZYDIS_REGISTER_ZMM2, 153 | ZMM3 = ZYDIS_REGISTER_ZMM3, 154 | ZMM4 = ZYDIS_REGISTER_ZMM4, 155 | ZMM5 = ZYDIS_REGISTER_ZMM5, 156 | ZMM6 = ZYDIS_REGISTER_ZMM6, 157 | ZMM7 = ZYDIS_REGISTER_ZMM7, 158 | ZMM8 = ZYDIS_REGISTER_ZMM8, 159 | ZMM9 = ZYDIS_REGISTER_ZMM9, 160 | ZMM10 = ZYDIS_REGISTER_ZMM10, 161 | ZMM11 = ZYDIS_REGISTER_ZMM11, 162 | ZMM12 = ZYDIS_REGISTER_ZMM12, 163 | ZMM13 = ZYDIS_REGISTER_ZMM13, 164 | ZMM14 = ZYDIS_REGISTER_ZMM14, 165 | ZMM15 = ZYDIS_REGISTER_ZMM15, 166 | ZMM16 = ZYDIS_REGISTER_ZMM16, 167 | ZMM17 = ZYDIS_REGISTER_ZMM17, 168 | ZMM18 = ZYDIS_REGISTER_ZMM18, 169 | ZMM19 = ZYDIS_REGISTER_ZMM19, 170 | ZMM20 = ZYDIS_REGISTER_ZMM20, 171 | ZMM21 = ZYDIS_REGISTER_ZMM21, 172 | ZMM22 = ZYDIS_REGISTER_ZMM22, 173 | ZMM23 = ZYDIS_REGISTER_ZMM23, 174 | ZMM24 = ZYDIS_REGISTER_ZMM24, 175 | ZMM25 = ZYDIS_REGISTER_ZMM25, 176 | ZMM26 = ZYDIS_REGISTER_ZMM26, 177 | ZMM27 = ZYDIS_REGISTER_ZMM27, 178 | ZMM28 = ZYDIS_REGISTER_ZMM28, 179 | ZMM29 = ZYDIS_REGISTER_ZMM29, 180 | ZMM30 = ZYDIS_REGISTER_ZMM30, 181 | ZMM31 = ZYDIS_REGISTER_ZMM31, 182 | K0 = ZYDIS_REGISTER_K0, 183 | K1 = ZYDIS_REGISTER_K1, 184 | K2 = ZYDIS_REGISTER_K2, 185 | K3 = ZYDIS_REGISTER_K3, 186 | K4 = ZYDIS_REGISTER_K4, 187 | K5 = ZYDIS_REGISTER_K5, 188 | K6 = ZYDIS_REGISTER_K6, 189 | K7 = ZYDIS_REGISTER_K7, 190 | BND0 = ZYDIS_REGISTER_BND0, 191 | BND1 = ZYDIS_REGISTER_BND1, 192 | BND2 = ZYDIS_REGISTER_BND2, 193 | BND3 = ZYDIS_REGISTER_BND3, 194 | CR0 = ZYDIS_REGISTER_CR0, 195 | CR1 = ZYDIS_REGISTER_CR1, 196 | CR2 = ZYDIS_REGISTER_CR2, 197 | CR3 = ZYDIS_REGISTER_CR3, 198 | CR4 = ZYDIS_REGISTER_CR4, 199 | CR5 = ZYDIS_REGISTER_CR5, 200 | CR6 = ZYDIS_REGISTER_CR6, 201 | CR7 = ZYDIS_REGISTER_CR7, 202 | CR8 = ZYDIS_REGISTER_CR8, 203 | CR9 = ZYDIS_REGISTER_CR9, 204 | CR10 = ZYDIS_REGISTER_CR10, 205 | CR11 = ZYDIS_REGISTER_CR11, 206 | CR12 = ZYDIS_REGISTER_CR12, 207 | CR13 = ZYDIS_REGISTER_CR13, 208 | CR14 = ZYDIS_REGISTER_CR14, 209 | CR15 = ZYDIS_REGISTER_CR15, 210 | DR0 = ZYDIS_REGISTER_DR0, 211 | DR1 = ZYDIS_REGISTER_DR1, 212 | DR2 = ZYDIS_REGISTER_DR2, 213 | DR3 = ZYDIS_REGISTER_DR3, 214 | DR4 = ZYDIS_REGISTER_DR4, 215 | DR5 = ZYDIS_REGISTER_DR5, 216 | DR6 = ZYDIS_REGISTER_DR6, 217 | DR7 = ZYDIS_REGISTER_DR7, 218 | DR8 = ZYDIS_REGISTER_DR8, 219 | DR9 = ZYDIS_REGISTER_DR9, 220 | DR10 = ZYDIS_REGISTER_DR10, 221 | DR11 = ZYDIS_REGISTER_DR11, 222 | DR12 = ZYDIS_REGISTER_DR12, 223 | DR13 = ZYDIS_REGISTER_DR13, 224 | DR14 = ZYDIS_REGISTER_DR14, 225 | DR15 = ZYDIS_REGISTER_DR15, 226 | ST0 = ZYDIS_REGISTER_ST0, 227 | ST1 = ZYDIS_REGISTER_ST1, 228 | ST2 = ZYDIS_REGISTER_ST2, 229 | ST3 = ZYDIS_REGISTER_ST3, 230 | ST4 = ZYDIS_REGISTER_ST4, 231 | ST5 = ZYDIS_REGISTER_ST5, 232 | ST6 = ZYDIS_REGISTER_ST6, 233 | ST7 = ZYDIS_REGISTER_ST7, 234 | MM0 = ZYDIS_REGISTER_MM0, 235 | MM1 = ZYDIS_REGISTER_MM1, 236 | MM2 = ZYDIS_REGISTER_MM2, 237 | MM3 = ZYDIS_REGISTER_MM3, 238 | MM4 = ZYDIS_REGISTER_MM4, 239 | MM5 = ZYDIS_REGISTER_MM5, 240 | MM6 = ZYDIS_REGISTER_MM6, 241 | MM7 = ZYDIS_REGISTER_MM7, 242 | TR0 = ZYDIS_REGISTER_TR0, 243 | TR1 = ZYDIS_REGISTER_TR1, 244 | TR2 = ZYDIS_REGISTER_TR2, 245 | TR3 = ZYDIS_REGISTER_TR3, 246 | TR4 = ZYDIS_REGISTER_TR4, 247 | TR5 = ZYDIS_REGISTER_TR5, 248 | TR6 = ZYDIS_REGISTER_TR6, 249 | TR7 = ZYDIS_REGISTER_TR7, 250 | TMM0 = ZYDIS_REGISTER_TMM0, 251 | TMM1 = ZYDIS_REGISTER_TMM1, 252 | TMM2 = ZYDIS_REGISTER_TMM2, 253 | TMM3 = ZYDIS_REGISTER_TMM3, 254 | TMM4 = ZYDIS_REGISTER_TMM4, 255 | TMM5 = ZYDIS_REGISTER_TMM5, 256 | TMM6 = ZYDIS_REGISTER_TMM6, 257 | TMM7 = ZYDIS_REGISTER_TMM7, 258 | 259 | EFLAGS = ZYDIS_REGISTER_EFLAGS, 260 | RFLAGS = ZYDIS_REGISTER_RFLAGS, 261 | 262 | START = ZYDIS_REGISTER_NONE, 263 | END = ZYDIS_REGISTER_RFLAGS 264 | }; 265 | } 266 | #endif // REGISTERS_ZYDIS_H 267 | #endif // ICED_NOT_FOUND -------------------------------------------------------------------------------- /lifter/includes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #ifndef _CRT_SECURE_NO_WARNINGS 4 | #define _CRT_SECURE_NO_WARNINGS 5 | #endif // _CRT_SECURE_NO_WARNINGS 6 | #define _SILENCE_ALL_CXX20_DEPRECATION_WARNINGS 7 | #define _SILENCE_ALL_CXX23_DEPRECATION_WARNINGS 8 | #ifndef ZYDIS_STATIC_BUILD 9 | #define ZYDIS_STATIC_BUILD 10 | #endif // ZYDIS_STATIC_BUILD 11 | 12 | // #define _NODEV why? 13 | 14 | #pragma warning(disable : 4996) 15 | #pragma warning(disable : 4146) 16 | 17 | #ifdef _WIN32 18 | #ifndef NOMINMAX 19 | #define NOMINMAX 20 | #endif // NOMINMAX 21 | #else 22 | #endif // _WIN32 23 | 24 | #include "llvm/ADT/DenseMap.h" 25 | #include "llvm/IR/DerivedTypes.h" 26 | #include "llvm/IR/InstrTypes.h" 27 | #include "llvm/IR/Instructions.h" 28 | 29 | using Value = llvm::Value; 30 | using Instruction = llvm::Instruction; 31 | using Type = llvm::Type; 32 | using Twine = llvm::Twine; 33 | using ConstantInt = llvm::ConstantInt; 34 | using Constant = llvm::Constant; 35 | using APInt = llvm::APInt; 36 | using BasicBlock = llvm::BasicBlock; 37 | using SelectInst = llvm::SelectInst; 38 | 39 | #if LLVM_VERSION_MAJOR < 17 40 | inline llvm::raw_ostream& operator<<(llvm::raw_ostream& OS, 41 | const llvm::KnownBits& KB) { 42 | KB.print(OS); 43 | return OS; 44 | } 45 | #endif 46 | 47 | #define STACKP_VALUE 0x14FCA8 48 | // if this value changes, its only for debug purposes 49 | 50 | using ReverseRegisterMap = llvm::DenseMap; 51 | using RegisterMap = llvm::DenseMap; // we dont actually need 53 | // this to be a map 54 | 55 | enum Flag { 56 | FLAG_CF = 0, // Carry flag 57 | FLAG_RESERVED1 = 1, // Reserved, typically not 58 | // used by programs 59 | FLAG_PF = 2, // Parity flag 60 | FLAG_RESERVED3 = 3, // Reserved, typically not 61 | // used by programs 62 | FLAG_AF = 4, // Auxiliary Carry flag 63 | FLAG_RESERVED5 = 5, // Reserved, typically not 64 | // used by programs 65 | FLAG_ZF = 6, // Zero flag 66 | FLAG_SF = 7, // Sign flag 67 | FLAG_TF = 8, // Trap flag 68 | FLAG_IF = 9, // Interrupt enable flag 69 | FLAG_DF = 10, // Direction flag 70 | FLAG_OF = 11, // Overflow flag 71 | FLAG_IOPL = 12, // I/O privilege level (286+ only) 72 | // always all-1s on 8086 and 186 73 | FLAG_IOPL2 = 13, // I/O privilege level (286+ only) 74 | // always all-1s on 8086 and 186 75 | FLAG_NT = 14, // Nested task flag (286+ only), 76 | // always 1 on 8086 and 186 77 | FLAG_MD = 15, // Mode flag (NEC V-series only), 78 | // reserved on all Intel CPUs. Always 1 79 | // on 8086 / 186, 0 on 286 and later. 80 | FLAG_RF = 16, // Resume flag (386+ only) 81 | FLAG_VM = 17, // Virtual 8086 mode flag (386+ only) 82 | FLAG_AC = 18, // Alignment Check (486+, ring 3), 83 | FLAG_VIF = 19, // Virtual interrupt flag (Pentium+) 84 | FLAG_VIP = 20, // Virtual interrupt pending (Pentium+) 85 | FLAG_ID = 21, // Able to use CPUID instruction 86 | // (Pentium+) 87 | FLAG_RES22 = 22, // Reserved, typically not 88 | // used by programs 89 | FLAG_RES23 = 23, // Reserved, typically not 90 | // used by programs 91 | FLAG_RES24 = 24, // Reserved, typically not 92 | // used by programs 93 | FLAG_RES25 = 25, // Reserved, typically not 94 | // used by programs 95 | FLAG_RES26 = 26, // Reserved, typically not 96 | // used by programs 97 | FLAG_RES27 = 27, // Reserved, typically not 98 | // used by programs 99 | FLAG_RES28 = 28, // Reserved, typically not 100 | // used by programs 101 | FLAG_RES29 = 29, // Reserved, typically not 102 | // used by programs 103 | FLAG_AES = 30, // AES key schedule loaded flag 104 | FLAG_AI = 31, // Alternate Instruction Set enabled 105 | // reserved above 32-63 106 | FLAGS_END = FLAG_IOPL, 107 | FLAGS_START = FLAG_CF 108 | }; 109 | 110 | //...... 111 | inline llvm::raw_ostream& operator<<(llvm::raw_ostream& os, const Flag flag) { 112 | switch (flag) { 113 | case FLAG_CF: 114 | os << "FLAG_CF"; 115 | break; 116 | case FLAG_RESERVED1: 117 | os << "FLAG_RESERVED1"; 118 | break; 119 | case FLAG_PF: 120 | os << "FLAG_PF"; 121 | break; 122 | case FLAG_RESERVED3: 123 | os << "FLAG_RESERVED3"; 124 | break; 125 | case FLAG_AF: 126 | os << "FLAG_AF"; 127 | break; 128 | case FLAG_RESERVED5: 129 | os << "FLAG_RESERVED5"; 130 | break; 131 | case FLAG_ZF: 132 | os << "FLAG_ZF"; 133 | break; 134 | case FLAG_SF: 135 | os << "FLAG_SF"; 136 | break; 137 | case FLAG_TF: 138 | os << "FLAG_TF"; 139 | break; 140 | case FLAG_IF: 141 | os << "FLAG_IF"; 142 | break; 143 | case FLAG_DF: 144 | os << "FLAG_DF"; 145 | break; 146 | case FLAG_OF: 147 | os << "FLAG_OF"; 148 | break; 149 | case FLAG_IOPL: 150 | os << "FLAG_IOPL"; 151 | break; 152 | case FLAG_IOPL2: 153 | os << "FLAG_IOPL2"; 154 | break; 155 | case FLAG_NT: 156 | os << "FLAG_NT"; 157 | break; 158 | case FLAG_MD: 159 | os << "FLAG_MD"; 160 | break; 161 | case FLAG_RF: 162 | os << "FLAG_RF"; 163 | break; 164 | case FLAG_VM: 165 | os << "FLAG_VM"; 166 | break; 167 | case FLAG_AC: 168 | os << "FLAG_AC"; 169 | break; 170 | case FLAG_VIF: 171 | os << "FLAG_VIF"; 172 | break; 173 | case FLAG_VIP: 174 | os << "FLAG_VIP"; 175 | break; 176 | case FLAG_ID: 177 | os << "FLAG_ID"; 178 | break; 179 | case FLAG_RES22: 180 | os << "FLAG_RES22"; 181 | break; 182 | case FLAG_RES23: 183 | os << "FLAG_RES23"; 184 | break; 185 | case FLAG_RES24: 186 | os << "FLAG_RES24"; 187 | break; 188 | case FLAG_RES25: 189 | os << "FLAG_RES25"; 190 | break; 191 | case FLAG_RES26: 192 | os << "FLAG_RES26"; 193 | break; 194 | case FLAG_RES27: 195 | os << "FLAG_RES27"; 196 | break; 197 | case FLAG_RES28: 198 | os << "FLAG_RES28"; 199 | break; 200 | case FLAG_RES29: 201 | os << "FLAG_RES29"; 202 | break; 203 | case FLAG_AES: 204 | os << "FLAG_AES"; 205 | break; 206 | case FLAG_AI: 207 | os << "FLAG_AI"; 208 | break; 209 | default: 210 | os << "UNKNOWN_FLAG(" << static_cast(flag) << ")"; 211 | break; 212 | } 213 | return os; 214 | } 215 | 216 | enum opaque_info { NOT_OPAQUE = 0, OPAQUE_TRUE = 1, OPAQUE_FALSE = 2 }; 217 | 218 | enum ROP_info { 219 | ROP_return = 0, 220 | REAL_return = 1, 221 | }; 222 | 223 | enum JMP_info { 224 | JOP_jmp = 0, 225 | JOP_jmp_unsolved = 1, 226 | }; 227 | -------------------------------------------------------------------------------- /lifter/lifter.cpp: -------------------------------------------------------------------------------- 1 | 2 | #define MAGIC_ENUM_RANGE_MIN -1000 3 | #define MAGIC_ENUM_RANGE_MAX 1000 4 | 5 | #include "CommonMnemonics.h" 6 | #include "CommonRegisters.h" 7 | #include "FunctionSignatures.hpp" 8 | #include "GEPTracker.h" 9 | #include "PathSolver.h" 10 | #include "ZydisDisassembler.hpp" 11 | #include "icedDisassembler.hpp" 12 | #include "includes.h" 13 | #include "lifterClass.hpp" 14 | #include "nt/nt_headers.hpp" 15 | 16 | // #include "test_instructions.h" 17 | #include "utils.h" 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "OperandUtils.ipp" 31 | #include "Semantics.ipp" 32 | 33 | // #define TEST 34 | std::vector*> lifters; 35 | uint64_t original_address = 0; 36 | unsigned int pathNo = 0; 37 | // consider having this function in a class, later we can use multi-threading to 38 | // explore different paths 39 | unsigned int breaking = 0; 40 | arch_mode is64Bit; 41 | 42 | void asm_to_zydis_to_lift(std::vector& fileData) { 43 | 44 | auto data = fileData.data(); 45 | BinaryOperations::initBases(data, is64Bit); 46 | 47 | // Initialize the context structure 48 | 49 | while (lifters.size() > 0) { 50 | auto lifter = lifters.back(); 51 | uint64_t offset = BinaryOperations::address_to_mapped_address( 52 | lifter->blockInfo.runtime_address); 53 | debugging::doIfDebug([&]() { 54 | const auto printv = 55 | "runtime_addr: " + std::to_string(lifter->blockInfo.runtime_address) + 56 | " offset:" + std::to_string(offset) + " byte there: 0x" + 57 | std::to_string((int)*(data + offset)) + "\n" + 58 | "offset: " + std::to_string(offset) + 59 | " file_base: " + std::to_string(original_address) + 60 | " runtime: " + std::to_string(lifter->blockInfo.runtime_address) + 61 | "\n"; 62 | printvalue2(printv); 63 | }); 64 | 65 | lifter->builder.SetInsertPoint(lifter->blockInfo.block); 66 | 67 | lifter->run = 1; 68 | while ((lifter->run && !lifter->finished)) { 69 | 70 | // ZydisDecodedInstruction instruction; 71 | 72 | if (BinaryOperations::isWrittenTo(lifter->blockInfo.runtime_address)) { 73 | printvalueforce2(lifter->blockInfo.runtime_address); 74 | UNREACHABLE("Found Self Modifying Code! we dont support it"); 75 | } 76 | ++(lifter->counter); 77 | 78 | auto counter = debugging::increaseInstCounter() - 1; 79 | /* 80 | ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; 81 | ZydisDecoderDecodeFull(&decoder, data + offset, 15, &(instruction), 82 | operands); 83 | 84 | 85 | 86 | debugging::doIfDebug([&]() { 87 | ZydisFormatter formatter; 88 | 89 | ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL); 90 | char buffer[256]; 91 | ZyanU64 runtime_address = 0; 92 | ZydisFormatterFormatInstruction( 93 | &formatter, &(instruction), operands, 94 | lifter->instruction.operand_count_visible, &buffer[0], 95 | sizeof(buffer), runtime_address, ZYAN_NULL); 96 | const auto ct = (llvm::format_hex_no_prefix(lifter->counter, 0)); 97 | printvalue2(ct); 98 | const auto inst = buffer; 99 | printvalue2(inst); 100 | const auto runtime = lifter->blockInfo.runtime_address; 101 | printvalue2(runtime); 102 | }); 103 | */ 104 | lifter->runDisassembler(data + offset); 105 | /* 106 | icedDisassembler dis; 107 | auto res = dis.disassemble(data + offset); 108 | 109 | for (int i = 0; i < 4; i++) { 110 | auto typecheck = res.types[i] == lifter->instruction.types[i]; 111 | if (!typecheck) { 112 | printvalueforce2(res.text); 113 | printvalueforce2(i); 114 | printvalueforce2(uint32_t(res.types[i])); 115 | printvalueforce2(magic_enum::enum_name(res.types[i])); 116 | printvalueforce2(magic_enum::enum_name(lifter->instruction.types[i])); 117 | printvalueforce2(magic_enum::enum_name(lifter->instruction.regs[i])); 118 | } 119 | } 120 | */ 121 | const auto ct = (llvm::format_hex_no_prefix(lifter->counter, 0)); 122 | 123 | const auto runtime_address = 124 | (llvm::format_hex_no_prefix(lifter->blockInfo.runtime_address, 0)); 125 | 126 | printvalue2(ct); 127 | printvalue2(runtime_address); 128 | 129 | #ifndef _NODEV 130 | debugging::doIfDebug([&]() { printvalue2(lifter->instruction.text); }); 131 | #endif 132 | 133 | // printvalue2(lifter->instruction.text); 134 | 135 | // lifter->instruction = runDisassembler(disas, data + offset); 136 | lifter->blockInfo.runtime_address += lifter->instruction.length; 137 | 138 | lifter->liftInstruction(); 139 | lifter->runtime_address_prev = lifter->blockInfo.runtime_address; 140 | printvalue2(lifter->finished); 141 | if (lifter->finished) { 142 | lifter->run = 0; 143 | lifters.pop_back(); 144 | 145 | debugging::doIfDebug([&]() { 146 | std::string Filename = 147 | "output_path_" + std::to_string(++pathNo) + ".ll"; 148 | std::error_code EC; 149 | llvm::raw_fd_ostream OS(Filename, EC); 150 | lifter->fnc->getParent()->print(OS, nullptr); 151 | }); 152 | auto nextlift = "next lifter instance\n"; 153 | printvalue2(nextlift); 154 | 155 | delete lifter; 156 | break; 157 | } 158 | 159 | offset += lifter->instruction.length; 160 | } 161 | } 162 | } 163 | 164 | void InitFunction_and_LiftInstructions(const uint64_t runtime_address, 165 | std::vector fileData) { 166 | 167 | auto fileBase = fileData.data(); 168 | llvm::LLVMContext context; 169 | std::string mod_name = "my_lifting_module"; 170 | llvm::Module lifting_module = llvm::Module(mod_name.c_str(), context); 171 | 172 | std::vector argTypes; 173 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 174 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 175 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 176 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 177 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 178 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 179 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 180 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 181 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 182 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 183 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 184 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 185 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 186 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 187 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 188 | argTypes.push_back(llvm::Type::getInt64Ty(context)); 189 | argTypes.push_back(llvm::PointerType::get(context, 0)); 190 | argTypes.push_back(llvm::PointerType::get(context, 0)); // temp fix TEB 191 | 192 | auto functionType = 193 | llvm::FunctionType::get(llvm::Type::getInt64Ty(context), argTypes, 0); 194 | 195 | const std::string function_name = "main"; 196 | auto function = 197 | llvm::Function::Create(functionType, llvm::Function::ExternalLinkage, 198 | function_name.c_str(), lifting_module); 199 | const std::string block_name = "entry"; 200 | auto bb = llvm::BasicBlock::Create(context, block_name.c_str(), function); 201 | 202 | llvm::InstSimplifyFolder Folder(lifting_module.getDataLayout()); 203 | llvm::IRBuilder builder = 204 | llvm::IRBuilder(bb, Folder); 205 | 206 | // auto RegisterList = InitRegisters(builder, function, runtime_address); 207 | 208 | auto main = new lifterClass(builder); 209 | main->InitRegisters(function, runtime_address); 210 | main->blockInfo = BBInfo(runtime_address, bb); 211 | 212 | main->fnc = function; 213 | main->initDomTree(*function); 214 | auto dosHeader = (win::dos_header_t*)fileBase; 215 | if (*(unsigned short*)fileBase != 0x5a4d) { 216 | UNREACHABLE("Only PE files are supported"); 217 | } 218 | 219 | auto IMAGE_NT_OPTIONAL_HDR32_MAGIC = 0x10b; 220 | auto IMAGE_NT_OPTIONAL_HDR64_MAGIC = 0x20b; 221 | 222 | auto ntHeaders = (win::nt_headers_t*)(fileBase + dosHeader->e_lfanew); 223 | auto PEmagic = ntHeaders->optional_header.magic; 224 | 225 | is64Bit = (arch_mode)(PEmagic == IMAGE_NT_OPTIONAL_HDR64_MAGIC); 226 | 227 | auto processHeaders = [fileBase, runtime_address, 228 | main](const void* ntHeadersBase) -> uint64_t { 229 | uint64_t address, imageSize, stackSize; 230 | 231 | if (is64Bit) { 232 | auto ntHeaders = 233 | reinterpret_cast*>(ntHeadersBase); 234 | address = ntHeaders->optional_header.image_base; 235 | imageSize = ntHeaders->optional_header.size_image; 236 | stackSize = ntHeaders->optional_header.size_stack_reserve; 237 | } else { 238 | auto ntHeaders = 239 | reinterpret_cast*>(ntHeadersBase); 240 | address = ntHeaders->optional_header.image_base; 241 | imageSize = ntHeaders->optional_header.size_image; 242 | stackSize = ntHeaders->optional_header.size_stack_reserve; 243 | } 244 | 245 | const uint64_t RVA = static_cast(runtime_address - address); 246 | const uint64_t fileOffset = 247 | BinaryOperations::RvaToFileOffset(ntHeadersBase, RVA); 248 | const uint8_t* dataAtAddress = 249 | reinterpret_cast(fileBase) + fileOffset; 250 | 251 | std::cout << std::hex << "0x" << static_cast(*dataAtAddress) 252 | << std::endl; 253 | 254 | std::cout << "address: " << address << " imageSize: " << imageSize 255 | << " filebase: " << reinterpret_cast(fileBase) 256 | << " fOffset: " << fileOffset << " RVA: " << RVA 257 | << " stackSize: " << stackSize << std::endl; 258 | 259 | main->markMemPaged(STACKP_VALUE - stackSize, STACKP_VALUE + stackSize); 260 | printvalue2(stackSize); 261 | main->markMemPaged(address, address + imageSize); 262 | return imageSize; 263 | }; 264 | 265 | original_address = processHeaders(fileBase + dosHeader->e_lfanew); 266 | 267 | main->signatures.search_signatures(fileData); 268 | main->signatures.createOffsetMap(); // ? 269 | for (const auto& [key, value] : main->signatures.siglookup) { 270 | value.display(); 271 | } 272 | auto ms = timer::getTimer(); 273 | std::cout << "\n" << std::dec << ms << " milliseconds has past" << std::endl; 274 | 275 | // blockAddresses->push_back(make_tuple(runtime_address, bb, 276 | // RegisterList)); 277 | lifters.push_back(main); 278 | 279 | asm_to_zydis_to_lift(fileData); 280 | 281 | ms = timer::getTimer(); 282 | 283 | std::cout << "\nlifting complete, " << std::dec << ms 284 | << " milliseconds has past" << std::endl; 285 | const std::string Filename_noopt = "output_no_opts.ll"; 286 | std::error_code EC_noopt; 287 | llvm::raw_fd_ostream OS_noopt(Filename_noopt, EC_noopt); 288 | 289 | lifting_module.print(OS_noopt, nullptr); 290 | 291 | std::cout << "\nwriting complete, " << std::dec << ms 292 | << " milliseconds has past" << std::endl; 293 | 294 | final_optpass(function, function->getArg(17), fileData.data()); 295 | const std::string Filename = "output.ll"; 296 | std::error_code EC; 297 | llvm::raw_fd_ostream OS(Filename, EC); 298 | 299 | lifting_module.print(OS, nullptr); 300 | 301 | return; 302 | } 303 | 304 | // #define TEST 305 | 306 | int main(int argc, char* argv[]) { 307 | 308 | std::vector args(argv, argv + argc); 309 | argparser::parseArguments(args); 310 | timer::startTimer(); 311 | 312 | #ifdef MERGEN_TEST 313 | if (1 == 1) 314 | return testInit(args[1]); 315 | #endif 316 | // use parser 317 | if (args.size() < 3) { 318 | std::cerr << "Usage: " << args[0] << " " << std::endl; 319 | return 1; 320 | } 321 | 322 | // debugging::enableDebug(); 323 | 324 | const char* filename = args[1].c_str(); 325 | uint64_t startAddr = stoull(args[2], nullptr, 0); 326 | 327 | std::ifstream ifs(filename, std::ios::binary); 328 | if (!ifs.is_open()) { 329 | std::cout << "Failed to open the file." << std::endl; 330 | return 1; 331 | } 332 | 333 | ifs.seekg(0, std::ios::end); 334 | std::vector fileData(ifs.tellg()); 335 | ifs.seekg(0, std::ios::beg); 336 | 337 | if (!ifs.read((char*)fileData.data(), fileData.size())) { 338 | std::cout << "Failed to read the file." << std::endl; 339 | return 1; 340 | } 341 | ifs.close(); 342 | 343 | InitFunction_and_LiftInstructions(startAddr, fileData); 344 | auto milliseconds = timer::stopTimer(); 345 | std::cout << "\n" 346 | << std::dec << milliseconds << " milliseconds has past" 347 | << std::endl; 348 | std::cout << "Lifted and optimized " << debugging::increaseInstCounter() - 1 349 | << " total insts"; 350 | } 351 | -------------------------------------------------------------------------------- /lifter/test_instructions.cpp: -------------------------------------------------------------------------------- 1 | 2 | // #include "lifterClass.hpp" 3 | // #include "tester.hpp" 4 | // #include 5 | // #include 6 | // #include 7 | // #include 8 | // #include 9 | // #include 10 | 11 | // // & all the tests, if test fail, it should return 0 12 | 13 | // // make this so tests can be added seperately 14 | 15 | // bool test1(Tester* tester) { 16 | 17 | // std::vector bytes = {0x48, 0x01, 0xc8}; 18 | // tester->setRegister(ZYDIS_REGISTER_RAX, 5); 19 | // tester->setRegister(ZYDIS_REGISTER_RCX, 5); 20 | // tester->disassembleBytesAndLift(bytes); 21 | 22 | // auto res1 = tester->isRegisterEqualTo(ZYDIS_REGISTER_RAX, 10); 23 | // return res1; 24 | // } 25 | 26 | // bool test2(Tester* tester) { 27 | 28 | // std::vector bytes = {0x48, 0x01, 0xc8}; 29 | // tester->setRegister(ZYDIS_REGISTER_RAX, 10); 30 | // tester->setRegister(ZYDIS_REGISTER_RCX, 10); 31 | // tester->disassembleBytesAndLift(bytes); 32 | 33 | // auto res1 = tester->isRegisterEqualTo(ZYDIS_REGISTER_RAX, 20); 34 | // return res1; 35 | // } 36 | 37 | // struct ParsedSide { 38 | // std::vector> registers; 39 | // std::optional> flags; 40 | // }; 41 | 42 | // struct InstructionHeader { 43 | // std::vector instruction_bytes; 44 | // std::string mnemonic; 45 | // std::string operand_size; 46 | // }; 47 | 48 | // std::string trim(const std::string& s) { 49 | // size_t start = s.find_first_not_of(" \t"); 50 | // size_t end = s.find_last_not_of(" \t"); 51 | // if (start == std::string::npos) 52 | // return ""; 53 | // return s.substr(start, end - start + 1); 54 | // } 55 | 56 | // uint64_t swap_endianness(uint64_t value, int num_bytes) { 57 | // uint64_t result = 0; 58 | // for (int i = 0; i < num_bytes; ++i) { 59 | // int shift = 8 * (num_bytes - 1 - i); 60 | // uint8_t byte = (value >> shift) & 0xFF; 61 | // result |= static_cast(byte) << (8 * i); 62 | // } 63 | // return result; 64 | // } 65 | 66 | // ParsedSide parse_side(const std::string& side_str) { 67 | // ParsedSide result; 68 | 69 | // std::string s = side_str; 70 | 71 | // // Remove leading "in:" or "out:" 72 | // size_t colon_pos = s.find(':'); 73 | // if (colon_pos != std::string::npos) { 74 | // std::string prefix = s.substr(0, colon_pos); 75 | // if (prefix == "in" || prefix == "out") { 76 | // s = s.substr(colon_pos + 1); 77 | // } 78 | // } 79 | 80 | // std::vector tokens; 81 | // size_t start = 0; 82 | // while (true) { 83 | // size_t comma_pos = s.find(',', start); 84 | // if (comma_pos == std::string::npos) { 85 | // tokens.push_back(trim(s.substr(start))); 86 | // break; 87 | // } 88 | // tokens.push_back(trim(s.substr(start, comma_pos - start))); 89 | // start = comma_pos + 1; 90 | // } 91 | 92 | // for (const auto& token : tokens) { 93 | // if (token.empty()) 94 | // continue; 95 | 96 | // if (token.substr(0, 6) == "flags:") { 97 | // std::string value_str = token.substr(6); 98 | // value_str = trim(value_str); 99 | // if (value_str.empty()) 100 | // continue; 101 | // if (value_str[0] == '#') { 102 | // value_str = value_str.substr(1); 103 | // } 104 | // int num_bytes = value_str.length() / 2; 105 | // uint64_t value = 0; 106 | // try { 107 | // value = std::stoull(value_str, nullptr, 16); 108 | // } catch (...) { 109 | // // invalid value, treat as 0 110 | // } 111 | // result.flags = std::make_pair(value, num_bytes); 112 | // } else { 113 | // size_t colon_pos = token.find(':'); 114 | // if (colon_pos == std::string::npos) 115 | // continue; 116 | // std::string reg_part = trim(token.substr(0, colon_pos)); 117 | // std::string value_part = trim(token.substr(colon_pos + 1)); 118 | // if (value_part.empty()) 119 | // continue; 120 | // if (value_part[0] == '#') { 121 | // value_part = value_part.substr(1); 122 | // } 123 | // int num_bytes = value_part.length() / 2; 124 | // uint64_t value = 0; 125 | // try { 126 | // value = std::stoull(value_part, nullptr, 16); 127 | // } catch (...) { 128 | // // invalid value, treat as 0 129 | // } 130 | // result.registers.emplace_back(reg_part, value, num_bytes); 131 | // } 132 | // } 133 | 134 | // return result; 135 | // } 136 | 137 | // std::pair parse_test_line(const std::string& line) { 138 | // size_t pipe_pos = line.find('|'); 139 | // if (pipe_pos == std::string::npos) { 140 | // return {ParsedSide(), ParsedSide()}; 141 | // } 142 | 143 | // std::string in_str = line.substr(0, pipe_pos); 144 | // std::string out_str = line.substr(pipe_pos + 1); 145 | 146 | // ParsedSide in_side = parse_side(in_str); 147 | // ParsedSide out_side = parse_side(out_str); 148 | 149 | // return {in_side, out_side}; 150 | // } 151 | 152 | // InstructionHeader parse_instruction_header(const std::string& line) { 153 | // InstructionHeader header; 154 | // std::vector parts; 155 | // size_t start = 0; 156 | // while (true) { 157 | // size_t semicolon_pos = line.find(';', start); 158 | // if (semicolon_pos == std::string::npos) { 159 | // parts.push_back(trim(line.substr(start))); 160 | // break; 161 | // } 162 | // parts.push_back(trim(line.substr(start, semicolon_pos - start))); 163 | // start = semicolon_pos + 1; 164 | // } 165 | 166 | // if (parts.size() < 4) { 167 | // return header; 168 | // } 169 | 170 | // std::string bytes_str = parts[1]; 171 | // if (bytes_str.empty() || bytes_str[0] != '#') { 172 | // return header; 173 | // } 174 | // bytes_str = bytes_str.substr(1); 175 | // for (size_t i = 0; i < bytes_str.size(); i += 2) { 176 | // std::string byte_str = bytes_str.substr(i, 2); 177 | // try { 178 | // uint8_t byte = static_cast(std::stoul(byte_str, nullptr, 16)); 179 | // header.instruction_bytes.push_back(byte); 180 | // } catch (...) { 181 | // // invalid byte, skip 182 | // } 183 | // } 184 | 185 | // header.mnemonic = parts[2]; 186 | // header.operand_size = parts[3]; 187 | 188 | // return header; 189 | // } 190 | 191 | // ZydisRegister register_name_to_enum(const std::string& reg_name) { 192 | // static const std::unordered_map register_map = 193 | // { 194 | // {"rax", ZYDIS_REGISTER_RAX}, {"eax", ZYDIS_REGISTER_EAX}, 195 | // {"ax", ZYDIS_REGISTER_AX}, {"al", ZYDIS_REGISTER_AL}, 196 | // {"rcx", ZYDIS_REGISTER_RCX}, {"ecx", ZYDIS_REGISTER_ECX}, 197 | // {"cx", ZYDIS_REGISTER_CX}, {"cl", ZYDIS_REGISTER_CL}, 198 | // {"rdx", ZYDIS_REGISTER_RDX}, {"edx", ZYDIS_REGISTER_EDX}, 199 | // {"dx", ZYDIS_REGISTER_DX}, {"dl", ZYDIS_REGISTER_DL}, 200 | // {"rbx", ZYDIS_REGISTER_RBX}, {"ebx", ZYDIS_REGISTER_EBX}, 201 | // {"bx", ZYDIS_REGISTER_BX}, {"bl", ZYDIS_REGISTER_BL}, 202 | // {"rsp", ZYDIS_REGISTER_RSP}, {"esp", ZYDIS_REGISTER_ESP}, 203 | // {"sp", ZYDIS_REGISTER_SP}, {"spl", ZYDIS_REGISTER_SPL}, 204 | // {"rbp", ZYDIS_REGISTER_RBP}, {"ebp", ZYDIS_REGISTER_EBP}, 205 | // {"bp", ZYDIS_REGISTER_BP}, {"bpl", ZYDIS_REGISTER_BPL}, 206 | // {"rsi", ZYDIS_REGISTER_RSI}, {"esi", ZYDIS_REGISTER_ESI}, 207 | // {"si", ZYDIS_REGISTER_SI}, {"sil", ZYDIS_REGISTER_SIL}, 208 | // {"rdi", ZYDIS_REGISTER_RDI}, {"edi", ZYDIS_REGISTER_EDI}, 209 | // {"di", ZYDIS_REGISTER_DI}, {"dil", ZYDIS_REGISTER_DIL}, 210 | // {"r8", ZYDIS_REGISTER_R8}, {"r8d", ZYDIS_REGISTER_R8D}, 211 | // {"r8w", ZYDIS_REGISTER_R8W}, {"r8b", ZYDIS_REGISTER_R8B}, 212 | // {"r9", ZYDIS_REGISTER_R9}, {"r9d", ZYDIS_REGISTER_R9D}, 213 | // {"r9w", ZYDIS_REGISTER_R9W}, {"r9b", ZYDIS_REGISTER_R9B}, 214 | // {"r10", ZYDIS_REGISTER_R10}, {"r10d", ZYDIS_REGISTER_R10D}, 215 | // {"r10w", ZYDIS_REGISTER_R10W}, {"r10b", ZYDIS_REGISTER_R10B}, 216 | // {"r11", ZYDIS_REGISTER_R11}, {"r11d", ZYDIS_REGISTER_R11D}, 217 | // {"r11w", ZYDIS_REGISTER_R11W}, {"r11b", ZYDIS_REGISTER_R11B}, 218 | // {"r12", ZYDIS_REGISTER_R12}, {"r12d", ZYDIS_REGISTER_R12D}, 219 | // {"r12w", ZYDIS_REGISTER_R12W}, {"r12b", ZYDIS_REGISTER_R12B}, 220 | // {"r13", ZYDIS_REGISTER_R13}, {"r13d", ZYDIS_REGISTER_R13D}, 221 | // {"r13w", ZYDIS_REGISTER_R13W}, {"r13b", ZYDIS_REGISTER_R13B}, 222 | // {"r14", ZYDIS_REGISTER_R14}, {"r14d", ZYDIS_REGISTER_R14D}, 223 | // {"r14w", ZYDIS_REGISTER_R14W}, {"r14b", ZYDIS_REGISTER_R14B}, 224 | // {"r15", ZYDIS_REGISTER_R15}, {"r15d", ZYDIS_REGISTER_R15D}, 225 | // {"r15w", ZYDIS_REGISTER_R15W}, {"r15b", ZYDIS_REGISTER_R15B}, 226 | // }; 227 | 228 | // auto it = register_map.find(reg_name); 229 | // if (it != register_map.end()) { 230 | // return it->second; 231 | // } 232 | // return ZYDIS_REGISTER_NONE; 233 | // } 234 | 235 | // void process_block(const std::vector& block, 236 | // std::vector& test_cases) { 237 | // if (block.empty()) 238 | // return; 239 | // InstructionHeader header = parse_instruction_header(block[0]); 240 | // if (header.instruction_bytes.empty()) 241 | // return; 242 | 243 | // static int test_case_counter = 1; 244 | 245 | // for (size_t i = 1; i < block.size(); ++i) { 246 | // const std::string& line = block[i]; 247 | // auto [in_side, out_side] = parse_test_line(line); 248 | 249 | // TestCase tc; 250 | // tc.name = header.mnemonic + "_test" + 251 | // std::to_string(test_case_counter++); std::replace(tc.name.begin(), 252 | // tc.name.end(), ' ', '_'); std::replace(tc.name.begin(), tc.name.end(), 253 | // ',', '_'); tc.instruction_bytes = header.instruction_bytes; 254 | // tc.couldBeUndefined = true; 255 | 256 | // // Process initial registers 257 | // for (const auto& reg_tuple : in_side.registers) { 258 | // std::string reg_name; 259 | // uint64_t value; 260 | // int num_bytes; 261 | // std::tie(reg_name, value, num_bytes) = reg_tuple; 262 | // ZydisRegister reg = register_name_to_enum(reg_name); 263 | // if (reg == ZYDIS_REGISTER_NONE) 264 | // continue; 265 | // uint64_t swapped_value = swap_endianness(value, num_bytes); 266 | // tc.initial_registers.emplace_back(reg, swapped_value); 267 | // } 268 | 269 | // // Process initial flags 270 | // if (in_side.flags) { 271 | // auto [value, num_bytes] = in_side.flags.value(); 272 | // uint64_t swapped_value = swap_endianness(value, num_bytes); 273 | // tc.initial_flags = parseFlagStates(swapped_value); 274 | // } 275 | 276 | // // Process expected registers 277 | // for (const auto& reg_tuple : out_side.registers) { 278 | // std::string reg_name; 279 | // uint64_t value; 280 | // int num_bytes; 281 | // std::tie(reg_name, value, num_bytes) = reg_tuple; 282 | // ZydisRegister reg = register_name_to_enum(reg_name); 283 | // if (reg == ZYDIS_REGISTER_NONE) 284 | // continue; 285 | // uint64_t swapped_value = swap_endianness(value, num_bytes); 286 | // tc.expected_registers.emplace_back(reg, swapped_value); 287 | // } 288 | 289 | // // Process expected flags 290 | // if (out_side.flags) { 291 | // auto [value, num_bytes] = out_side.flags.value(); 292 | // uint64_t swapped_value = swap_endianness(value, num_bytes); 293 | // tc.expected_flags = parseFlagStates(swapped_value); 294 | // } 295 | 296 | // test_cases.push_back(tc); 297 | // } 298 | // } 299 | // std::vector parse_test_cases(const std::string& filename) { 300 | // std::vector test_cases; 301 | // std::ifstream file(filename.c_str(), std::ios::binary); 302 | // if (!file.is_open()) { 303 | // return test_cases; 304 | // } 305 | 306 | // std::vector current_block; 307 | // std::string line; 308 | // while (std::getline(file, line)) { 309 | // line = trim(line); 310 | // if (line.empty()) 311 | // continue; 312 | 313 | // if (line.find("instr:") == 0) { 314 | // if (!current_block.empty()) { 315 | // process_block(current_block, test_cases); 316 | // current_block.clear(); 317 | // } 318 | // current_block.push_back(line); 319 | // } else { 320 | // if (!current_block.empty()) { 321 | // current_block.push_back(line); 322 | // } 323 | // } 324 | // } 325 | 326 | // if (!current_block.empty()) { 327 | // process_block(current_block, test_cases); 328 | // } 329 | 330 | // return test_cases; 331 | // } 332 | 333 | // int testInit(std::string file) { 334 | // llvm::LLVMContext context; 335 | // std::string mod_name = "my_lifting_module"; 336 | // llvm::Module lifting_module = llvm::Module(mod_name.c_str(), context); 337 | 338 | // std::vector argTypes; 339 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 340 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 341 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 342 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 343 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 344 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 345 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 346 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 347 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 348 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 349 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 350 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 351 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 352 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 353 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 354 | // argTypes.push_back(llvm::Type::getInt64Ty(context)); 355 | // argTypes.push_back(llvm::PointerType::get(context, 0)); 356 | // argTypes.push_back(llvm::PointerType::get(context, 0)); // temp fix TEB 357 | 358 | // auto functionType = 359 | // llvm::FunctionType::get(llvm::Type::getInt64Ty(context), argTypes, 0); 360 | 361 | // const std::string function_name = "main"; 362 | // auto function = 363 | // llvm::Function::Create(functionType, llvm::Function::ExternalLinkage, 364 | // function_name.c_str(), lifting_module); 365 | // const std::string block_name = "entry"; 366 | // auto bb = llvm::BasicBlock::Create(context, block_name.c_str(), function); 367 | 368 | // llvm::InstSimplifyFolder Folder(lifting_module.getDataLayout()); 369 | // llvm::IRBuilder builder = 370 | // llvm::IRBuilder(bb, Folder); 371 | 372 | // lifterClass<>* main = new lifterClass(builder, 0x133700); 373 | 374 | // // we will need a resetter, though im not sure if we need to only reset 375 | // // registers, flags and mem or llvm context? 376 | 377 | // auto tester = Tester(main, true); 378 | // tester.addTest(test1, "test"); 379 | // tester.addTest(test2, "test2"); 380 | // TestCase tc = {.name = "testcase", 381 | // .instruction_bytes = {0x90}, 382 | // .initial_registers = {{ZYDIS_REGISTER_RAX, 1}}, 383 | // .initial_flags = {{FLAG_CF, FlagState::SET}}, 384 | 385 | // .expected_registers = {{ZYDIS_REGISTER_RAX, 1}}, 386 | // .expected_flags = {{FLAG_CF, FlagState::SET}}, 387 | // .couldBeUndefined = false}; 388 | 389 | // /* 390 | // auto expectedFlags = tester.parseFlagStates(0b101); 391 | // for (auto [a, b] : expectedFlags) { 392 | // outs() << "a: " << a << " b:" << b << "\n"; 393 | // } 394 | // */ 395 | // TestCase tc2 = {.name = "testcase2", 396 | // .instruction_bytes = {0x90}, 397 | // .initial_flags = parseFlagStates(4), 398 | 399 | // .expected_flags = parseFlagStates(4), 400 | // .couldBeUndefined = true}; 401 | 402 | // TestCase tccmov = {.name = "testcasecmov", 403 | // .instruction_bytes = {0x48, 0x0F, 0x44, 0xC1}, 404 | // .initial_registers = {{ZYDIS_REGISTER_RAX, 1}, 405 | // {ZYDIS_REGISTER_RCX, 1337}}, 406 | // .initial_flags = {{FLAG_ZF, FlagState::SET}}, 407 | 408 | // .expected_registers = {{ZYDIS_REGISTER_RAX, 1337}}, 409 | // .couldBeUndefined = true}; 410 | 411 | // TestCase tccmov2 = {.name = "testcasecmov23", 412 | // .instruction_bytes = {0x48, 0x0F, 0x44, 0xC1}, 413 | // .initial_registers = {{ZYDIS_REGISTER_RAX, 1337}, 414 | // {ZYDIS_REGISTER_RCX, 1}}, 415 | // .initial_flags = {{FLAG_ZF, FlagState::CLEAR}}, 416 | 417 | // .expected_registers = {{ZYDIS_REGISTER_RAX, 1337}}, 418 | // .couldBeUndefined = true}; 419 | 420 | // tester.addTest(tc); 421 | // tester.addTest(tc2); 422 | // tester.addTest(tccmov); 423 | // tester.addTest(tccmov2); 424 | // auto a = parse_test_cases(file); 425 | // for (auto x : a) { 426 | // tester.addTest(x); 427 | // } 428 | // return tester.runAllTests(); 429 | // } 430 | -------------------------------------------------------------------------------- /lifter/test_instructions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | int test1(); 5 | int test2(); 6 | int testInit(std::string file); -------------------------------------------------------------------------------- /lifter/tester.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "OperandUtils.ipp" 4 | #include "ZydisDisassembler.hpp" 5 | #include "includes.h" 6 | #include "lifterClass.hpp" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | enum FlagState { UNDEF = -1, CLEAR = 0, SET = 1, UNKNOWN }; 17 | 18 | struct TestCase { 19 | 20 | struct RegisterState { 21 | ZydisRegister reg = ZYDIS_REGISTER_NONE; 22 | uint64_t value; 23 | }; 24 | 25 | struct FlagsStatus { 26 | Flag flag = FLAGS_END; 27 | FlagState state = UNKNOWN; // to catch bugs 28 | }; 29 | 30 | std::string name; 31 | 32 | // Inputs 33 | // TODO: std::array for constexpr 34 | std::vector instruction_bytes; 35 | std::vector initial_registers; 36 | std::vector initial_flags; 37 | 38 | // Expected outputs 39 | std::vector expected_registers; 40 | std::vector expected_flags; 41 | bool couldBeUndefined = true; 42 | }; 43 | 44 | inline std::vector parseFlagStates(uint64_t flagint) { 45 | std::vector result; 46 | result.resize(FLAGS_END); 47 | 48 | for (size_t i = 0; i < FLAGS_END; i++) { 49 | bool isSet = (flagint >> i) & 1; 50 | result[i] = TestCase::FlagsStatus{ 51 | .flag = (Flag)i, .state = isSet ? FlagState::SET : FlagState::CLEAR}; 52 | } 53 | 54 | return result; 55 | } 56 | 57 | class Tester { 58 | public: 59 | ZydisDecoder decoder; 60 | lifterClass<>* lifter; 61 | 62 | using TestFunction = std::function; 63 | 64 | std::vector> tests; 65 | std::vector testCases; 66 | 67 | void addTest(TestFunction fn, const std::string& name) { 68 | tests.emplace_back(fn, name); 69 | } 70 | void addTest(const TestCase& fn) { 71 | // 72 | testCases.emplace_back(fn); 73 | } 74 | 75 | bool execute_test_case(const TestCase& tc) { 76 | 77 | bool isSuccessfull = true; 78 | std::string str; 79 | llvm::raw_string_ostream failureDetails(str); 80 | 81 | for (const auto& reg : tc.initial_registers) { 82 | setRegister(reg.reg, reg.value); 83 | } 84 | 85 | for (const auto& reg : tc.initial_flags) { 86 | setFlag(reg.flag, reg.state); 87 | } 88 | 89 | disassembleBytesAndLift(tc.instruction_bytes); 90 | 91 | // Verify registers 92 | for (const auto& expected : tc.expected_registers) { 93 | // registers usually shouldn't be undefined 94 | if (!isRegisterEqualTo(expected.reg, expected.value, 95 | tc.couldBeUndefined)) { 96 | 97 | failureDetails << "Incorrect register:" << "\n Register: " 98 | << ZydisRegisterGetString(expected.reg) 99 | << "\n Expected: " << expected.value 100 | << "\n Actual: " /* */; 101 | 102 | // print register 103 | 104 | // print as const if possible for convenience 105 | getRegister(expected.reg)->print(failureDetails); 106 | 107 | failureDetails << "\n"; 108 | 109 | isSuccessfull = false; 110 | } 111 | } 112 | 113 | const auto flagcompare = [](FlagState original, FlagState compare, 114 | bool couldBeUndefined = true) { 115 | if (couldBeUndefined && original == FlagState::UNDEF) 116 | return true; 117 | return original == compare; 118 | }; 119 | 120 | for (const auto& flag : tc.expected_flags) { 121 | FlagState flagState = getFlagState(flag.flag); 122 | if (!flagcompare(flagState, flag.state, tc.couldBeUndefined)) { 123 | 124 | failureDetails << "Incorrect flag:" // 125 | << "\n Flag: " << flag.flag << "(" << (int)flag.flag 126 | << ")" << "\n Expected: " << flag.state 127 | << "\n Actual: " << flagState << "\n"; 128 | 129 | isSuccessfull = false; 130 | } 131 | } 132 | 133 | // TODO: check for unexpected changes 134 | 135 | std::cout << "[" << (isSuccessfull ? " OK " : " FAIL ") << "] " << tc.name 136 | << "\n"; 137 | if (!isSuccessfull) { 138 | std::cout << failureDetails.str() << std::endl; 139 | } 140 | 141 | return isSuccessfull; 142 | } 143 | 144 | int runAllTests() { 145 | int failures = 0; 146 | for (const auto& [testFn, name] : tests) { 147 | reset(); 148 | bool result = testFn(this); 149 | std::cout << "[" << (result ? " OK " : " FAIL ") << "] " << name 150 | << "\n"; 151 | failures += !result; 152 | if (!result) 153 | exit(0); 154 | } 155 | 156 | for (const auto& tc : testCases) { 157 | reset(); 158 | bool result = execute_test_case(tc); 159 | failures += !result; 160 | if (!result) 161 | exit(0); 162 | } 163 | 164 | return failures; 165 | } 166 | 167 | Tester(lifterClass<>* lifter, bool is64Bit = true) : lifter(lifter) { 168 | 169 | ZydisDecoderInit(&decoder, 170 | is64Bit ? ZYDIS_MACHINE_MODE_LONG_64 171 | : ZYDIS_MACHINE_MODE_LEGACY_32, 172 | is64Bit ? ZYDIS_STACK_WIDTH_64 : ZYDIS_STACK_WIDTH_32); 173 | reset(); 174 | } 175 | 176 | bool isRegisterEqualTo(ZydisRegister reg, uint64_t v, 177 | bool couldBeUndefined = true) { 178 | /* 179 | auto val = lifter->GetRegisterValue(zydisRegisterToMergenRegister(reg)); 180 | 181 | if (auto a_c = dyn_cast(val)) { 182 | return (a_c->equalsInt(v)); 183 | } 184 | 185 | if (couldBeUndefined && isa(val)) 186 | return 1; 187 | */ 188 | 189 | return 0; 190 | } 191 | 192 | void setRegister(ZydisRegister reg, uint64_t value) { 193 | // lifter->SetRegisterValue(zydisRegisterToMergenRegister(reg), 194 | // lifter->builder.getInt64(value)); 195 | } 196 | 197 | Value* getRegister(ZydisRegister reg) { 198 | // auto val = lifter->GetRegisterValue(zydisRegisterToMergenRegister(reg)); 199 | return nullptr; 200 | } 201 | 202 | Value* getFlag(Flag reg) { 203 | // 204 | return lifter->getFlag(reg); 205 | } 206 | 207 | FlagState getFlagState(Flag reg) { 208 | auto flag = lifter->getFlag(reg); 209 | if (isa(flag)) 210 | return FlagState::UNDEF; 211 | if (auto flagv = dyn_cast(flag)) { 212 | if (flagv->getZExtValue() == 0) 213 | return FlagState::CLEAR; 214 | if (flagv->getZExtValue() == 1) 215 | return FlagState::SET; 216 | } 217 | return FlagState::UNKNOWN; 218 | } 219 | 220 | void setFlagState(Flag flag, FlagState state) { 221 | switch (state) { 222 | case FlagState::CLEAR: 223 | case FlagState::SET: { 224 | lifter->setFlag(flag, lifter->builder.getInt1(state)); 225 | break; 226 | } 227 | case FlagState::UNDEF: { 228 | 229 | auto undef_f = UndefValue::get(lifter->builder.getInt1Ty()); 230 | lifter->setFlag(flag, undef_f); 231 | break; 232 | } 233 | case FlagState::UNKNOWN: { 234 | // ? 235 | break; 236 | } 237 | } 238 | } 239 | 240 | void setFlag(Flag reg, uint64_t value) { 241 | lifter->setFlag(reg, lifter->builder.getInt64(value)); 242 | } 243 | 244 | void resetRegistersAndFlags() { 245 | // 246 | // set every value to undef 247 | auto undef = UndefValue::get(lifter->builder.getInt64Ty()); 248 | auto undef_f = UndefValue::get(lifter->builder.getInt1Ty()); 249 | for (int i = 0; i < RegisterManager::RegisterIndex::REGISTER_COUNT; i++) 250 | lifter->Registers.vec[i] = undef; 251 | 252 | for (int i = 0; i < FLAGS_END; i++) 253 | lifter->FlagList[i] = undef; 254 | } 255 | 256 | void reset() { 257 | // 258 | resetRegistersAndFlags(); 259 | } 260 | 261 | void disassembleBytesAndLift(const std::vector& bytes) { 262 | ZydisDecodedInstruction instruction; 263 | ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; 264 | 265 | ZydisDecoderDecodeFull(&decoder, bytes.data(), 15, &instruction, operands); 266 | 267 | lifter->instruction.attributes = instruction.attributes; 268 | 269 | lifter->instruction.mnemonic = (instruction.mnemonic); 270 | 271 | // lifter->instruction.operand_count_total = instruction.operand_count; 272 | 273 | lifter->instruction.operand_count_visible = 274 | instruction.operand_count_visible; 275 | 276 | lifter->liftInstructionSemantics(); 277 | } 278 | }; 279 | -------------------------------------------------------------------------------- /lifter/utils.cpp: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | #include "llvm/IR/Value.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | // #include 9 | 10 | /* 11 | 12 | 13 | float intBitsToFloat(int bits) { 14 | // Extract components from the int using IEEE 754 single precision format 15 | int sign = (bits >> 31) & 0x1; 16 | int exponent = (bits >> 23) & 0xFF; 17 | int mantissa = bits & 0x7FFFFF; 18 | 19 | // Build float value according to IEEE 754 formula 20 | float value = 0; 21 | if (exponent == 0) { 22 | if (mantissa == 0) { 23 | value = sign ? -0.0f : 0.0f; 24 | } else { 25 | // Denormalized number 26 | value = (sign ? -1.0f : 1.0f) * (mantissa / (float)(1 << 23)) * 27 | powf(2.0f, -126); 28 | } 29 | } else if (exponent == 0xFF) { 30 | if (mantissa == 0) { 31 | value = sign ? -INFINITY : INFINITY; 32 | } else { 33 | value = NAN; 34 | } 35 | } else { 36 | // Normalized number 37 | value = (sign ? -1.0f : 1.0f) * (1.0f + mantissa / (float)(1 << 23)) * 38 | powf(2.0f, exponent - 127); 39 | } 40 | 41 | return value; 42 | } 43 | int floatBitsToInt(float f) { 44 | if (f == 0.0f) { 45 | return (std::signbit(f) ? 0x80000000 : 0); 46 | } 47 | 48 | if (std::isinf(f)) { 49 | return (f < 0 ? 0xFF800000 : 0x7F800000); 50 | } 51 | 52 | if (std::isnan(f)) { 53 | return 0x7FC00000; // One common NaN pattern 54 | } 55 | 56 | int sign = std::signbit(f) ? 1 : 0; 57 | float abs_f = std::fabs(f); 58 | 59 | int exponent = std::ilogbf(abs_f) + 127; // Get biased exponent 60 | 61 | // Handle denormals 62 | if (exponent <= 0) { 63 | float mantissa_f = abs_f * powf(2.0f, 149); // 126 + 23 64 | int mantissa = (int)mantissa_f; 65 | return (sign << 31) | mantissa; 66 | } 67 | 68 | // Extract mantissa (23 bits of precision) 69 | float mantissa_f = (abs_f / powf(2.0f, std::ilogbf(abs_f)) - 1.0f) * 70 | (float)(1 << 23); int mantissa = (int)mantissa_f; 71 | 72 | return (sign << 31) | (exponent << 23) | mantissa; 73 | } 74 | 75 | */ 76 | 77 | namespace debugging { 78 | int ic = 1; 79 | int increaseInstCounter() { return ++ic; } 80 | bool shouldDebug = false; 81 | llvm::raw_ostream* debugStream = nullptr; 82 | std::unique_ptr fileStream; 83 | 84 | void enableDebug(const std::string& filename = "") { 85 | shouldDebug = true; 86 | if (!filename.empty()) { 87 | std::error_code EC; 88 | fileStream = std::make_unique(filename, EC); 89 | if (EC) { 90 | llvm::errs() << "Error opening debug file: " << EC.message() << "\n"; 91 | fileStream.reset(); 92 | debugStream = &llvm::errs(); 93 | shouldDebug = false; 94 | return; 95 | } 96 | debugStream = fileStream.get(); 97 | } else { 98 | debugStream = &llvm::outs(); 99 | } 100 | llvm::outs() << "Debugging enabled\n"; 101 | } 102 | void printLLVMValue(llvm::Value* v, const char* name) { 103 | if (!shouldDebug || !debugStream) 104 | return; 105 | *debugStream << " " << name << " : "; 106 | v->print(*debugStream); 107 | *debugStream << "\n"; 108 | debugStream->flush(); 109 | } 110 | 111 | // Other functions remain the same, but use debugStream instead of 112 | // llvm::outs() For example: 113 | 114 | void doIfDebug(const std::function& dothis) { 115 | if (!shouldDebug) 116 | return; 117 | (dothis)(); 118 | } 119 | 120 | } // namespace debugging 121 | 122 | namespace argparser { 123 | void printHelp() { 124 | std::cerr << "Options:\n" 125 | << " -d, --enable-debug Enable debugging mode\n" 126 | << " -h Display this help message\n" 127 | << " --concretize-unsafe-reads Concretizes potentially unsafe " 128 | "reads to writable sections \n"; 129 | } 130 | 131 | std::map> options = { 132 | {"-d", []() { debugging::enableDebug("debug.txt"); }}, 133 | // 134 | {"-h", printHelp}}; 135 | 136 | void parseArguments(std::vector& args) { 137 | std::vector newArgs; 138 | 139 | for (const auto& arg : args) { 140 | // cout << arg << "\n"; 141 | if (options.find(arg) != options.end()) 142 | options[arg](); 143 | else if (*(arg.c_str()) == '-') 144 | printHelp(); 145 | else 146 | newArgs.push_back(arg); 147 | } 148 | 149 | args.swap(newArgs); 150 | } 151 | 152 | } // namespace argparser 153 | 154 | namespace timer { 155 | using clock = std::chrono::high_resolution_clock; 156 | using time_point = std::chrono::time_point; 157 | using duration = std::chrono::duration; 158 | 159 | time_point startTime; 160 | bool running = false; 161 | 162 | void startTimer() { 163 | startTime = clock::now(); 164 | running = true; 165 | } 166 | 167 | double getTimer() { 168 | if (running) { 169 | return std::chrono::duration_cast(clock::now() - startTime) 170 | .count(); 171 | } 172 | return 0.0; 173 | } 174 | 175 | double stopTimer() { 176 | if (running) { 177 | running = false; 178 | return std::chrono::duration_cast(clock::now() - startTime) 179 | .count(); 180 | } 181 | return 0.0; 182 | } 183 | 184 | void resetTimer() { 185 | startTime = clock::now(); 186 | running = true; 187 | } 188 | } // namespace timer -------------------------------------------------------------------------------- /lifter/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "CommonDisassembler.hpp" 3 | #include "llvm/IR/Value.h" 4 | #include 5 | #include 6 | 7 | // #define _NODEV why? 8 | 9 | #ifndef UNREACHABLE 10 | #define UNREACHABLE(msg) \ 11 | do { \ 12 | \ 13 | /*llvm::outs().flush();*/ \ 14 | /*std::cout.flush();*/ \ 15 | llvm::llvm_unreachable_internal(msg, __FILE__, __LINE__); \ 16 | } while (0) 17 | #endif 18 | 19 | #ifndef _NODEV 20 | #define printvalue(x) \ 21 | do { \ 22 | debugging::printLLVMValue(x, #x); \ 23 | } while (0); 24 | // outs() << " " #x " : "; x->print(outs()); 25 | // outs() << "\n"; outs().flush(); 26 | #define printvalue2(x) \ 27 | do { \ 28 | debugging::printValue(x, #x); \ 29 | } while (0); 30 | #else 31 | #define printvalue(x) ((void)0); 32 | #define printvalue2(x) ((void)0); 33 | #endif // _NODEV 34 | 35 | #define printvalueforce(x) \ 36 | do { \ 37 | outs() << " " #x " : "; \ 38 | x->print(outs()); \ 39 | outs() << "\n"; \ 40 | outs().flush(); \ 41 | } while (0); 42 | 43 | #define printvalueforce2(x) \ 44 | do { \ 45 | llvm::outs() << " " #x " : " << x << __FILE__ << __LINE__ << "\n"; \ 46 | llvm::outs().flush(); \ 47 | } while (0); 48 | 49 | namespace debugging { 50 | int increaseInstCounter(); 51 | void enableDebug(const std::string& filename); 52 | void printLLVMValue(llvm::Value* v, const char* name); 53 | void doIfDebug(const std::function& dothis); 54 | 55 | extern bool shouldDebug; 56 | extern llvm::raw_ostream* debugStream; 57 | 58 | template void printValue(const T& v, const char* name) { 59 | if (!shouldDebug || !debugStream) 60 | return; 61 | 62 | if constexpr (std::is_same_v || std::is_same_v) { 63 | *debugStream << " " << name << " : " << static_cast(v) << "\n"; 64 | debugStream->flush(); 65 | return; 66 | } /* 67 | if constexpr (std::is_same_v) { 68 | *debugStream << " " << name << " : " 69 | << static_cast(v).to_string() << "\n"; 70 | debugStream->flush(); 71 | return; 72 | }*/ 73 | else 74 | *debugStream << " " << name << " : " << v << "\n"; 75 | debugStream->flush(); 76 | } 77 | 78 | } // namespace debugging 79 | 80 | namespace argparser { 81 | void parseArguments(std::vector& args); 82 | } // namespace argparser 83 | 84 | namespace timer { 85 | void startTimer(); 86 | double stopTimer(); 87 | double getTimer(); 88 | void suspendTimer(); 89 | void resumeTimer(); 90 | } // namespace timer 91 | -------------------------------------------------------------------------------- /testcases/CommutativeOrAssociative.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | 4 | ; The idea is taking 5 | ; %0 = add 10, %a 6 | ; %1 = add 5, %0 7 | ; and transforming to 8 | ; %0 = add %a, 10 9 | ; %1 = add %0, 5 10 | ; now we can check if RHS is a constant, and fold the instruction 11 | ; %0 = add %a, 15 12 | 13 | global main 14 | main: 15 | mov rax, 10 16 | add rax, rcx 17 | sub rax, rcx 18 | ret -------------------------------------------------------------------------------- /testcases/bench_add.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global main 4 | main: 5 | xor rcx, rcx 6 | do_loop: 7 | add rax, rax 8 | inc rcx 9 | cmp rcx, 1000 10 | jbe do_loop 11 | ret -------------------------------------------------------------------------------- /testcases/bench_add_concretized.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global main 4 | main: 5 | xor rcx, rcx 6 | 7 | do_loop: 8 | add rcx, rcx 9 | inc rcx 10 | cmp rcx, 1000 11 | jbe do_loop 12 | 13 | ret -------------------------------------------------------------------------------- /testcases/bench_add_mem.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global main 4 | main: 5 | xor rcx, rcx 6 | push rax 7 | 8 | do_loop: 9 | add [rsp], rax 10 | inc rcx 11 | cmp rcx, 1000 12 | jbe do_loop 13 | 14 | pop rax 15 | ret -------------------------------------------------------------------------------- /testcases/bench_add_mem_concretized.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global main 4 | main: 5 | xor rcx, rcx 6 | push rcx 7 | 8 | do_loop: 9 | add [rsp], rcx 10 | inc rcx 11 | cmp rcx, 1000 12 | jbe do_loop 13 | 14 | pop rax 15 | ret -------------------------------------------------------------------------------- /testcases/teb_test.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global main 4 | main: 5 | mov rax, fs:[0x30] 6 | ret -------------------------------------------------------------------------------- /testcases/test_branch_mem.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global main 4 | main: 5 | cmp rax, 1 6 | push rax 7 | jz condition_taken_zf 8 | pop rax 9 | push rcx 10 | pop rax 11 | ret 12 | 13 | condition_taken_zf: 14 | pop rax 15 | inc rax 16 | cond_not_taken_zf: 17 | ret 18 | 19 | -------------------------------------------------------------------------------- /testcases/test_branch_sf.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global main 4 | main: 5 | cmp rax, 0 ; zf = rax-0 == 0 ; sf = rax-0 < 0; of = (rax ^ 0) < 0; .... 6 | 7 | jns cond_not_taken_sf ; sf == 0; if not taken, we can say rax is negative, so rax | 18446744073709551616 (sign bit is set) 8 | 9 | condition_taken_sf: ; so the basic block here will assume rax's msb is set 10 | 11 | shr rax, 63 ; rax will be 1 12 | 13 | cond_not_taken_sf: ; but this basicblock wont assume rax is 0 14 | 15 | ret -------------------------------------------------------------------------------- /testcases/test_branch_zf.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global main 4 | main: 5 | cmp rax, 1 ; zf = rax-0 == 0 ; sf = rax-0 < 0; of = (rax ^ 0) < 0; .... 6 | 7 | jz condition_taken_zf ; zf == 0; if not taken, we can say rax is 0 for this branch, we can do this by rax & 0. 8 | ret 9 | condition_taken_zf: ; so the basic block here will assume rax is 0 10 | 11 | inc rax ; rax will be 1 12 | 13 | cond_not_taken_zf: ; but this basicblock wont assume rax is 0 14 | 15 | ret 16 | 17 | 18 | main2: 19 | lea rcx, [rcx+rax] 20 | cmp rax, 0 21 | condition_taken_zf2: ; so the basic block here will assume rax is 0 22 | inc rax ; rax will be 1 23 | add rax, rcx ; rcx + 1, not rcx+rax+1 24 | cond_not_taken_zf2: ; but this basicblock wont assume rax is 0 25 | ret 26 | 27 | ; %a = rcx + rax 28 | ; %zf = rax == 0 29 | ; rax_zero.bb: 30 | ; %inc = 0 + 1 ; simplified to 1 31 | ; %b = %a + %inc ; %a can be simplified to %rcx 32 | ; ret %b 33 | 34 | ; rax_nonzero.bb 35 | ; ret %rax 36 | 37 | ; we can only say something sure about what generates the flag, in this case, its cmp rax, 0 38 | ; so 39 | ; %zf0 = %v - 0 40 | ; %zf1 = %zf0 == 0 41 | ; we can only assume the value of %zf0 because we check %zf1 42 | ; so %zf0 should be 0 43 | ; by extension %v should be 0 44 | ; 45 | 46 | ; if it was 47 | ; cmp rax, rcx 48 | ; then 49 | ; %zf0 = %rax - %rcx 50 | ; %zf1 = %zf0 == 0 51 | ; we can only assume %zf0 is 0 (if true) 52 | ; -------------------------------------------------------------------------------- /testcases/test_branches.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | %define SF 0x80 4 | 5 | global main 6 | main: 7 | sub rcx, 0 ; turn SF if rcx is - 8 | pushfq 9 | pop rsi 10 | and rsi, SF ; check if SF is turned on 11 | shr rsi, 7 12 | lea rcx, [rel jtable] 13 | mov rax, [rcx+rsi*4] 14 | lea rax, [rcx+rax] 15 | push rax 16 | ret 17 | 18 | 19 | jtable: dd test1 - jtable 20 | dd test2 - jtable 21 | 22 | test1: 23 | xor rax, rax 24 | or rax, rsi 25 | ret 26 | test2: 27 | xor rax, rax 28 | or rax, rsi 29 | inc rax 30 | ret -------------------------------------------------------------------------------- /testcases/test_div.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global test_div_64 4 | test_div_64: 5 | mov rdx, 0xbf01 6 | mov rax, 0x800000007F65B9DD 7 | mov rcx, rax 8 | mov rax, 0x11 9 | div rcx 10 | ret 11 | 12 | global test_div_32 13 | test_div_32: 14 | mov edx, 0x12345678 15 | mov ecx, 0x1000 16 | mov eax, 0x87654321 17 | div ecx 18 | ret 19 | 20 | global test_div_16 21 | test_div_16: 22 | mov dx, 0x1234 23 | mov ax, 0x5678 24 | mov cx, 0x100 25 | div cx 26 | ret 27 | 28 | global test_div_8 29 | test_div_8: 30 | mov ax, 0x1278 31 | mov cl, 0x10 32 | div cl 33 | ret -------------------------------------------------------------------------------- /testcases/test_idiv.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global test_idiv_64 4 | test_idiv_64: 5 | mov rdx, 0xbf01 6 | mov rax, 0x800000007F65B9DD 7 | mov rcx, rax 8 | mov rax, 0x11 9 | idiv rcx 10 | ret 11 | 12 | global test_idiv_32 13 | test_idiv_32: 14 | mov edx, 0x12345678 15 | mov ecx, 0x1000 16 | mov eax, 0x87654321 17 | idiv ecx 18 | ret 19 | 20 | global test_idiv_16 21 | test_idiv_16: 22 | mov dx, 0x1234 23 | mov ax, 0x5678 24 | mov cx, 0x100 25 | idiv cx 26 | ret 27 | 28 | global test_idiv_8 29 | test_idiv_8: 30 | mov ax, 0x1278 31 | mov cl, 0x10 32 | idiv cl 33 | ret -------------------------------------------------------------------------------- /testcases/test_indirect_mem.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global main 4 | main: 5 | push rax 6 | push rcx 7 | and rcx, 1 8 | mov rax, [rsp+rcx*8] 9 | add rsp, 16 10 | ret -------------------------------------------------------------------------------- /testcases/test_indirect_mem2.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global main 4 | main: 5 | push rcx 6 | push rcx 7 | and rcx, 1 8 | mov rax, [rsp+rcx*8] 9 | add rsp, 16 10 | ret -------------------------------------------------------------------------------- /testcases/test_invalid_mem.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global main 4 | main: 5 | mov rax, 10 6 | push rax 7 | call [rsp] 8 | ret -------------------------------------------------------------------------------- /testcases/test_memory.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global main_2 4 | main_2: ; assume rsp is 24 5 | mov rdx, 0xffffffffffffffff ; 6 | mov ecx, 0x2222 7 | push rdx; [16] FF [17] FF [18] FF FF [19] FF [20] FF FF [22] FF [23] 8 | mov dword [rsp-1], ecx ; [15] 22 [16] 22-[17] 11 [18] 11 FF [19] FF-[20] FF FF [22] FF [23] 9 | mov rax, [rsp] ; [16] 22-[17] 11 [18] 11 FF [19] FF-[20] FF FF [22] FF [23] 10 | pop rdx ;0xFF_FF_FF_FF_FF_FF_11_11_22 11 | ret 12 | 13 | global main 14 | main: ; assume rsp is 24 15 | mov rdx, 0xffffffffffffffff ; 16 | mov ecx, 0x2222 17 | push rdx; [16] FF [17] FF [18] FF FF [19] FF [20] FF FF FF [23] 18 | mov word [rsp+2], cx ; [15] FF [16] FF-[17] FF [18] 22 22 [19] FF-[20] FF FF FF [23] 19 | mov rax, [rsp] ; [16] FF-[17] FF [18] 22 22 [19] FF-[20] FF FF FF [23] 20 | pop rdx 21 | ret ;0xFF_FF_FF_FF_22_22_FF_FF 22 | 23 | 24 | 25 | global main_ 26 | main_: ; assume rsp is 24 27 | mov rdx, 0xffffffffffffffff ; 28 | mov ecx, 0x2222 29 | push rdx; [16] FF [17] FF [18] FF FF [19] FF [20] FF FF [22] FF [23] 30 | mov word [rsp+7], cx ; [15] FF [16] FF-[17] FF [18] FF FF [19] FF-[20] FF FF [22] 22 [23] 22 [24] 31 | mov rax, [rsp] ; [16] FF-[17] FF [18] FF FF [19] FF-[20] FF FF [22] 22 [23] 22 [24] 32 | pop rdx ;0x22_FF_FF_FF_FF_FF_FF_FF_FF 33 | ret 34 | 35 | 36 | global main3 37 | main3: ; assume rsp is 24 38 | mov rdx, 0x1122334455667788 ; 39 | mov rcx, 0x44444444 ; 40 | push rdx ; 0x1122334455667788 -> [16] 88 [17] 77 66 55-[19] 44 [20] 33 22 11 [23] 41 | mov dword [rsp+1], ecx ; [16] 88 [17] 44 44 44 [19] 44-[20] 33 22 11 [23] 42 | mov ecx, 0x2222 43 | mov word [rsp-1], cx ; [15] 22 [16] 22-[17] 44 44 44 [19] 44-[20] 33 22 11 [23] 44 | mov rax, [rsp] ; [16] 22-[17] 44 44 44 [19] 44-[20] 33 22 11 [23] 45 | pop rdx 46 | ret 47 | 48 | section .text 49 | global main4 50 | main4: 51 | mov rax, 0x1122334455667788 52 | push rax 53 | mov dword [rsp+4], 0x44332211 54 | pop rax 55 | ret 56 | 57 | -------------------------------------------------------------------------------- /testcases/test_reallocate.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | 3 | global main 4 | main: 5 | sub rsp, 0x200 6 | mov rax, rsp 7 | and rcx, 1 8 | lea rax, [rax+rcx*8] 9 | mov [rax], rcx 10 | add rsp, 0x200 11 | ret --------------------------------------------------------------------------------