├── .gitignore ├── LICENSE ├── README.md ├── images ├── deobfuscated.jpg └── obfuscated.jpg ├── requirements.txt ├── samples ├── linux │ ├── CFF.bin │ ├── CFF_full.bin │ ├── CFF_full.c │ ├── deob_CFF.bin │ └── deob_CFF_full.bin ├── linux64 │ ├── CFF_full_linux64.bin │ └── deob_CFF_full_linux64.bin ├── win │ ├── CFF_win.cc │ ├── CFF_win.exe │ ├── CFF_win_full.cc │ ├── CFF_win_full.exe │ ├── deob_CFF_win.bin │ └── deob_CFF_win_full.bin └── win64 │ ├── CFF_win.cc │ ├── CFF_win64.exe │ ├── CFF_win64_full.cc │ ├── CFF_win64_full.exe │ ├── deob_CFF_win64.bin │ └── deob_CFF_win64_full.bin └── unflattener ├── __init__.py ├── __main__.py ├── binrewrite.py └── unflattener.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # pyenv 7 | .python-version 8 | 9 | # Virtual environments 10 | .env 11 | .venv 12 | env/ 13 | venv/ 14 | ENV/ 15 | env.bak/ 16 | venv.bak/ 17 | 18 | # Local development settings 19 | *.env 20 | .env.local 21 | .env.development.local 22 | .env.test.local 23 | .env.production.local 24 | 25 | .vscode -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ollvm-unflattener 2 | 3 | A Python tool to deobfuscate control flow flattening applied by OLLVM (Obfuscator-LLVM). This tool leverages the Miasm framework to analyze and recover the original control flow of functions obfuscated with OLLVM's control flow flattening technique. 4 | 5 | ## Project Description 6 | 7 | OLLVM (Obfuscator-LLVM) is a popular code obfuscation tool that implements various obfuscation techniques, including control flow flattening on the function level. Control flow flattening transforms the original flow of a function into a flat structure using a state variable and a dispatch mechanism, making it difficult to understand the program's logic during the reverse engineering process. 8 | 9 | This tool: 10 | - Reconstructs the original control flow of an obfuscated function by identifying and connecting basic blocks 11 | - Generates a deobfuscated binary with the original control flow restored 12 | - Supports multi-layered function deobfuscation by following calls made by the target function using breadth-first search (BFS) 13 | - Supports deobfuscation for Windows & Linux binaries (x86 and x64 architectures) 14 | 15 | This project is inspired by [MODeflattener](https://github.com/mrT4ntr4/MODeflattener) and the awesome work from [Quarkslab](https://blog.quarkslab.com/deobfuscation-recovering-an-ollvm-protected-program.html)! Unlike **MODeflattener** that solves CFF deobfuscation with a static approach, this project utilitizes Miasm's symbolic execution engine to execute and recover the original control flow. 16 | 17 | ## Installation 18 | 19 | ### Prerequisites 20 | - Python 3.10+ 21 | - Git 22 | 23 | ### Setup 24 | 25 | 1. Clone the repository: 26 | ```bash 27 | git clone https://github.com/cdong1012/ollvm-unflattener.git 28 | cd ollvm-unflattener 29 | ``` 30 | 31 | 2. Install required dependencies: 32 | ```bash 33 | pip install -r requirements.txt 34 | ``` 35 | 36 | The `requirements.txt` file includes: 37 | ``` 38 | miasm 39 | graphviz 40 | keystone-engine 41 | ``` 42 | 43 | ## Usage 44 | 45 | ### Basic Usage 46 | 47 | ```bash 48 | python unflattener -i -o -t -a 49 | ``` 50 | 51 | ### Arguments 52 | 53 | - `-i, --input`: Path to the obfuscated binary (required) 54 | - `-o, --output`: Path where the deobfuscated binary will be saved (required) 55 | - `-t, --target`: Address of the function to deobfuscate (required) 56 | - `-a, --all`: Follow all calls and deobfuscate all functions we can reach 57 | - `-h, --help`: show help message 58 | 59 | ### Example 60 | 61 | ```bash 62 | # Deobfuscate a single function 63 | python unflattener -i ./samples/linux/CFF.bin -o ./samples/linux/deob_CFF.bin -t 0x80491A0 64 | python unflattener -i ./samples/win/CFF_win.exe -o ./samples/win/deob_CFF_win.bin -t 0x401600 65 | 66 | # Deobfuscate a function and follows all of its calls 67 | python unflattener -i ./samples/linux/CFF_full.bin -o ./samples/linux/deob_CFF_full.bin -t 0x8049E00 -a 68 | python unflattener -i ./samples/win/CFF_win_full.exe -o ./samples/win/deob_CFF_win_full.bin -t 0x401F10 -a 69 | ``` 70 | 71 | ## Results 72 | 73 | ### Visual Comparison 74 | 75 | Below you can see the control flow graph (CFG) before and after deobfuscation: 76 | 77 | ![Obfuscated CFG](./images/obfuscated.jpg) 78 | *Figure 1: Obfuscated CFG* 79 | 80 | ![Obfuscated CFG](./images/deobfuscated.jpg) 81 | 82 | *Figure 2: Deobfuscated CFG* 83 | 84 | These images are from deobfuscating function ```target_function``` from the sample [CFF_full.bin](./samples/linux/CFF_full.bin). 85 | 86 | ## Acknowledgements 87 | 88 | - [Miasm](https://github.com/cea-sec/miasm) 89 | - [Obfuscator-LLVM](https://github.com/obfuscator-llvm/obfuscator) 90 | - [MODeflattener](https://github.com/mrT4ntr4/MODeflattener) 91 | - [Quarkslab](https://blog.quarkslab.com/deobfuscation-recovering-an-ollvm-protected-program.html) 92 | -------------------------------------------------------------------------------- /images/deobfuscated.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/images/deobfuscated.jpg -------------------------------------------------------------------------------- /images/obfuscated.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/images/obfuscated.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | miasm 2 | graphviz 3 | keystone-engine -------------------------------------------------------------------------------- /samples/linux/CFF.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/linux/CFF.bin -------------------------------------------------------------------------------- /samples/linux/CFF_full.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/linux/CFF_full.bin -------------------------------------------------------------------------------- /samples/linux/CFF_full.c: -------------------------------------------------------------------------------- 1 | // Shout-out to Claude for generating this test file for me! 2 | 3 | #include 4 | #include 5 | 6 | // Helper function to calculate factorial 7 | int calculate_factorial(int n) 8 | { 9 | if (n <= 1) 10 | return 1; 11 | 12 | int result = 1; 13 | for (int i = 2; i <= n; i++) 14 | { 15 | result *= i; 16 | } 17 | return result; 18 | } 19 | 20 | // Function to calculate absolute value 21 | int absolute_value(int n) 22 | { 23 | return n < 0 ? -n : n; 24 | } 25 | 26 | // Helper function to compute powers 27 | int compute_power(int base, int exponent) 28 | { 29 | int result = 1; 30 | for (int i = 0; i < exponent; i++) 31 | { 32 | result *= base; 33 | } 34 | return result; 35 | } 36 | 37 | // Helper function to find greatest common divisor 38 | int find_greatest_common_divisor(int a, int b) 39 | { 40 | a = absolute_value(a); 41 | b = absolute_value(b); 42 | 43 | while (b != 0) 44 | { 45 | int temp = b; 46 | b = a % b; 47 | a = temp; 48 | } 49 | 50 | return a; 51 | } 52 | 53 | // Helper function to print binary representation 54 | void print_binary(int num) 55 | { 56 | if (num > 1) 57 | { 58 | print_binary(num / 2); 59 | } 60 | printf("%d", num % 2); 61 | } 62 | 63 | // The main target function 64 | int target_function(int a) 65 | { 66 | printf("Starting target_function with input: %d\n", a); 67 | 68 | // Variable declarations 69 | int result = 0; 70 | int temp = a; 71 | int floating_result = 0; 72 | 73 | // Basic arithmetic operations 74 | result = a * 5; 75 | printf("After multiplication by 5: %d\n", result); 76 | 77 | // Division with check for zero 78 | if (a != 0) 79 | { 80 | result = result / a; 81 | printf("After division by input: %d\n", result); 82 | } 83 | else 84 | { 85 | printf("Cannot divide by zero\n"); 86 | result = 100; // Default value 87 | } 88 | 89 | // Conditional statement with multiple branches 90 | if (a > 100) 91 | { 92 | printf("Input is very large\n"); 93 | result = result + 50; 94 | } 95 | else if (a > 50) 96 | { 97 | printf("Input is large\n"); 98 | result = result + 25; 99 | } 100 | else if (a > 10) 101 | { 102 | printf("Input is medium\n"); 103 | result = result + 10; 104 | } 105 | else 106 | { 107 | printf("Input is small\n"); 108 | result = result + 5; 109 | } 110 | 111 | // Loop to perform some calculations 112 | printf("Starting loop calculations...\n"); 113 | for (int i = 0; i < 5; i++) 114 | { 115 | temp += i * 2; 116 | printf("Loop iteration %d: temp = %d\n", i, temp); 117 | 118 | if (temp > 100) 119 | { 120 | printf("Breaking loop as temp exceeded 100\n"); 121 | break; 122 | } 123 | } 124 | 125 | // Call to factorial function 126 | int factorial_result = calculate_factorial(absolute_value(a) % 10); // Use modulo to avoid large factorials 127 | printf("Factorial of %d is: %d\n", absolute_value(a) % 10, factorial_result); 128 | result += factorial_result; 129 | 130 | // Compute some powers 131 | floating_result = compute_power(a, 2); 132 | printf("Square of %d is: %d\n", a, floating_result); 133 | 134 | // Find GCD if applicable 135 | if (a != 0) 136 | { 137 | int gcd = find_greatest_common_divisor(a, 24); 138 | printf("GCD of %d and 24 is: %d\n", a, gcd); 139 | result += gcd; 140 | } 141 | 142 | // Another loop with different structure 143 | int j = 0; 144 | printf("Starting while loop...\n"); 145 | while (j < absolute_value(a) % 5) 146 | { 147 | printf("While loop iteration %d\n", j); 148 | result += j; 149 | j++; 150 | } 151 | 152 | // Print the binary representation of our input 153 | printf("Binary representation of %d: ", a); 154 | print_binary(a); 155 | printf("\n"); 156 | 157 | // Bit manipulation operations 158 | int bit_shifted = a << 2; 159 | printf("Value after left shift by 2: %d\n", bit_shifted); 160 | 161 | int bit_and = a & 0x0F; 162 | printf("Result of bitwise AND with 0x0F: %d\n", bit_and); 163 | 164 | // Final calculations 165 | result = result % 1000; // Keep result reasonable 166 | printf("Final result: %d\n", result); 167 | 168 | return result; 169 | } 170 | 171 | // Main function to demonstrate usage 172 | int main() 173 | { 174 | int test_values[] = {5, 25, 75, 150, 0}; 175 | 176 | for (int i = 0; i < 5; i++) 177 | { 178 | printf("\n===== Testing target_function with input: %d =====\n", test_values[i]); 179 | int result = target_function(test_values[i]); 180 | printf("target_function returned: %d\n", result); 181 | printf("===============================================\n"); 182 | } 183 | 184 | return 0; 185 | } -------------------------------------------------------------------------------- /samples/linux/deob_CFF.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/linux/deob_CFF.bin -------------------------------------------------------------------------------- /samples/linux/deob_CFF_full.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/linux/deob_CFF_full.bin -------------------------------------------------------------------------------- /samples/linux64/CFF_full_linux64.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/linux64/CFF_full_linux64.bin -------------------------------------------------------------------------------- /samples/linux64/deob_CFF_full_linux64.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/linux64/deob_CFF_full_linux64.bin -------------------------------------------------------------------------------- /samples/win/CFF_win.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // Function prototype 5 | void target_function(int value); 6 | 7 | int main() 8 | { 9 | printf("Starting program...\n"); 10 | 11 | // Call target_function with an arbitrary value 12 | int input_value = 10; 13 | printf("Calling target_function with value: %d\n", input_value); 14 | 15 | target_function(input_value); 16 | 17 | printf("Program execution completed.\n"); 18 | return 0; 19 | } 20 | 21 | // Implementation of target_function that takes one argument 22 | void target_function(int value) 23 | { 24 | printf("Entered target_function with value: %d\n", value); 25 | 26 | // If statement as requested 27 | if (value > 5) 28 | { 29 | printf("The value %d is greater than 5.\n", value); 30 | 31 | // Initialize a counter for the while loop 32 | int counter = value; 33 | 34 | // While loop as requested 35 | while (counter > 0) 36 | { 37 | printf("Loop iteration: %d, Counter value: %d\n", 38 | (value - counter + 1), counter); 39 | 40 | // Demonstrate some processing inside the loop 41 | if (counter % 2 == 0) 42 | { 43 | printf(" → %d is an even number.\n", counter); 44 | } 45 | else 46 | { 47 | printf(" → %d is an odd number.\n", counter); 48 | } 49 | 50 | counter--; 51 | } 52 | 53 | printf("While loop completed after %d iterations.\n", value); 54 | } 55 | else 56 | { 57 | printf("The value %d is less than or equal to 5.\n", value); 58 | printf("Skipping the while loop processing.\n"); 59 | } 60 | 61 | printf("Exiting target_function.\n"); 62 | } -------------------------------------------------------------------------------- /samples/win/CFF_win.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win/CFF_win.exe -------------------------------------------------------------------------------- /samples/win/CFF_win_full.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | // Recursive factorial function 6 | int factorial(int n) 7 | { 8 | if (n <= 1) 9 | { 10 | return 1; 11 | } 12 | return n * factorial(n - 1); 13 | } 14 | 15 | // Function to check if file exists 16 | bool check_file_exists(const char *filename) 17 | { 18 | DWORD attributes = GetFileAttributesA(filename); 19 | return (attributes != INVALID_FILE_ATTRIBUTES && 20 | !(attributes & FILE_ATTRIBUTE_DIRECTORY)); 21 | } 22 | 23 | // Function that creates a mutex 24 | HANDLE create_named_mutex(const char *mutex_name) 25 | { 26 | HANDLE hMutex = CreateMutexA(NULL, FALSE, mutex_name); 27 | if (hMutex == NULL) 28 | { 29 | printf("CreateMutex error: %lu\n", GetLastError()); 30 | return NULL; 31 | } 32 | 33 | if (GetLastError() == ERROR_ALREADY_EXISTS) 34 | { 35 | printf("Mutex already exists!\n"); 36 | } 37 | 38 | return hMutex; 39 | } 40 | 41 | // Function to write data to file 42 | bool write_to_file(const char *filename, const char *data) 43 | { 44 | HANDLE hFile = CreateFileA(filename, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, 45 | FILE_ATTRIBUTE_NORMAL, NULL); 46 | 47 | if (hFile == INVALID_HANDLE_VALUE) 48 | { 49 | printf("Could not create file: %lu\n", GetLastError()); 50 | return false; 51 | } 52 | 53 | DWORD bytesWritten; 54 | BOOL result = WriteFile(hFile, data, strlen(data), &bytesWritten, NULL); 55 | 56 | CloseHandle(hFile); 57 | return result != 0; 58 | } 59 | 60 | // Target function with various control structures and API calls 61 | void target_function(int iterations) 62 | { 63 | printf("Starting target function with %d iterations\n", iterations); 64 | 65 | // Create a mutex 66 | HANDLE hMutex = create_named_mutex("OllvmTestMutex"); 67 | 68 | // Loop with conditional branching 69 | for (int i = 0; i < iterations; i++) 70 | { 71 | printf("Iteration %d of %d\n", i + 1, iterations); 72 | 73 | if (i % 3 == 0) 74 | { 75 | printf("Computing factorial of %d: %d\n", i, factorial(i)); 76 | } 77 | else if (i % 3 == 1) 78 | { 79 | printf("Sleeping for %d milliseconds\n", i * 100); 80 | Sleep(i * 100); 81 | } 82 | else 83 | { 84 | const char *filename = "ollvm_test.txt"; 85 | char buffer[100]; 86 | sprintf(buffer, "Data from iteration %d\n", i); 87 | 88 | if (write_to_file(filename, buffer)) 89 | { 90 | printf("Successfully wrote to file\n"); 91 | } 92 | 93 | if (check_file_exists(filename)) 94 | { 95 | printf("Verified file exists\n"); 96 | } 97 | } 98 | } 99 | 100 | // Clean up 101 | if (hMutex != NULL) 102 | { 103 | CloseHandle(hMutex); 104 | } 105 | 106 | printf("Target function completed\n"); 107 | } 108 | 109 | int main(int argc, char *argv[]) 110 | { 111 | int iterations = 5; 112 | 113 | if (argc > 1) 114 | { 115 | iterations = atoi(argv[1]); 116 | } 117 | 118 | printf("OLLVM Test Program\n"); 119 | target_function(iterations); 120 | 121 | return 0; 122 | } -------------------------------------------------------------------------------- /samples/win/CFF_win_full.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win/CFF_win_full.exe -------------------------------------------------------------------------------- /samples/win/deob_CFF_win.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win/deob_CFF_win.bin -------------------------------------------------------------------------------- /samples/win/deob_CFF_win_full.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win/deob_CFF_win_full.bin -------------------------------------------------------------------------------- /samples/win64/CFF_win.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // Function prototype 5 | void target_function(int value); 6 | 7 | int main() 8 | { 9 | printf("Starting program...\n"); 10 | 11 | // Call target_function with an arbitrary value 12 | int input_value = 10; 13 | printf("Calling target_function with value: %d\n", input_value); 14 | 15 | target_function(input_value); 16 | 17 | printf("Program execution completed.\n"); 18 | return 0; 19 | } 20 | 21 | // Implementation of target_function that takes one argument 22 | void target_function(int value) 23 | { 24 | printf("Entered target_function with value: %d\n", value); 25 | 26 | // If statement as requested 27 | if (value > 5) 28 | { 29 | printf("The value %d is greater than 5.\n", value); 30 | 31 | // Initialize a counter for the while loop 32 | int counter = value; 33 | 34 | // While loop as requested 35 | while (counter > 0) 36 | { 37 | printf("Loop iteration: %d, Counter value: %d\n", 38 | (value - counter + 1), counter); 39 | 40 | // Demonstrate some processing inside the loop 41 | if (counter % 2 == 0) 42 | { 43 | printf(" → %d is an even number.\n", counter); 44 | } 45 | else 46 | { 47 | printf(" → %d is an odd number.\n", counter); 48 | } 49 | 50 | counter--; 51 | } 52 | 53 | printf("While loop completed after %d iterations.\n", value); 54 | } 55 | else 56 | { 57 | printf("The value %d is less than or equal to 5.\n", value); 58 | printf("Skipping the while loop processing.\n"); 59 | } 60 | 61 | printf("Exiting target_function.\n"); 62 | } -------------------------------------------------------------------------------- /samples/win64/CFF_win64.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win64/CFF_win64.exe -------------------------------------------------------------------------------- /samples/win64/CFF_win64_full.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // Forward declarations 5 | void LogMessage(const wchar_t *message); 6 | int Factorial(int n); 7 | BOOL CreateTestFile(const wchar_t *filename, const wchar_t *content); 8 | BOOL ReadTestFile(const wchar_t *filename, wchar_t *buffer, DWORD bufferSize); 9 | void SleepOperation(DWORD milliseconds); 10 | void MutexOperation(); 11 | BOOL CheckFileExists(const wchar_t *filename); 12 | void DeleteFileIfExists(const wchar_t *filename); 13 | 14 | // Recursive factorial function 15 | int Factorial(int n) { 16 | if (n <= 1) { 17 | return 1; 18 | } 19 | return n * Factorial(n - 1); 20 | } 21 | 22 | // File creation function 23 | BOOL CreateTestFile(const wchar_t *filename, const wchar_t *content) { 24 | HANDLE hFile = CreateFileW(filename, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, 25 | FILE_ATTRIBUTE_NORMAL, NULL); 26 | 27 | if (hFile == INVALID_HANDLE_VALUE) { 28 | return FALSE; 29 | } 30 | 31 | DWORD bytesWritten; 32 | BOOL result = 33 | WriteFile(hFile, content, lstrlenW(content), &bytesWritten, NULL); 34 | 35 | CloseHandle(hFile); 36 | return result; 37 | } 38 | 39 | // File reading function 40 | BOOL ReadTestFile(const wchar_t *filename, wchar_t *buffer, DWORD bufferSize) { 41 | HANDLE hFile = CreateFileW(filename, GENERIC_READ, FILE_SHARE_READ, NULL, 42 | OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); 43 | 44 | if (hFile == INVALID_HANDLE_VALUE) { 45 | return FALSE; 46 | } 47 | 48 | DWORD bytesRead; 49 | BOOL result = ReadFile(hFile, buffer, bufferSize - 1, &bytesRead, NULL); 50 | 51 | if (result) { 52 | buffer[bytesRead] = '\0'; 53 | } 54 | 55 | CloseHandle(hFile); 56 | return result; 57 | } 58 | 59 | // Sleep operation function 60 | void SleepOperation(DWORD milliseconds) { Sleep(milliseconds); } 61 | 62 | // Mutex operation function 63 | void MutexOperation() { 64 | HANDLE hMutex = CreateMutexW(NULL, FALSE, L"OllvmTestMutex"); 65 | if (hMutex != NULL) { 66 | DWORD waitResult = WaitForSingleObject(hMutex, 1000); 67 | if (waitResult == WAIT_OBJECT_0) { 68 | // Critical section 69 | LogMessage(L"Mutex acquired"); 70 | SleepOperation(100); 71 | ReleaseMutex(hMutex); 72 | LogMessage(L"Mutex released"); 73 | } 74 | CloseHandle(hMutex); 75 | } 76 | } 77 | 78 | // Logging function 79 | void LogMessage(const wchar_t *message) { 80 | SYSTEMTIME st; 81 | GetLocalTime(&st); 82 | 83 | wchar_t timestamp[100]; 84 | wsprintfW(timestamp, L"[%02d:%02d:%02d.%03d] %s\r\n", st.wHour, st.wMinute, 85 | st.wSecond, st.wMilliseconds, message); 86 | 87 | OutputDebugStringW(timestamp); 88 | 89 | HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE); 90 | DWORD written; 91 | WriteConsole(hConsole, timestamp, lstrlenW(timestamp), &written, NULL); 92 | } 93 | 94 | // File existence check 95 | BOOL CheckFileExists(const wchar_t *filename) { 96 | DWORD attrib = GetFileAttributesW(filename); 97 | return (attrib != INVALID_FILE_ATTRIBUTES && 98 | !(attrib & FILE_ATTRIBUTE_DIRECTORY)); 99 | } 100 | 101 | // Delete file if it exists 102 | void DeleteFileIfExists(const wchar_t *filename) { 103 | if (CheckFileExists(filename)) { 104 | DeleteFileW(filename); 105 | LogMessage(L"File deleted"); 106 | } 107 | } 108 | 109 | // Target function to be obfuscated by OLLVM 110 | void TargetFunction() { 111 | LogMessage(L"Starting target_function"); 112 | 113 | // Variables initialization 114 | const wchar_t *testFilename = L"ollvm_test.txt"; 115 | const wchar_t *backupFilename = L"ollvm_backup.txt"; 116 | int numbers[] = {1, 2, 3, 4, 5, 6, 7, 8}; 117 | int numbersSize = 8; 118 | int sum = 0; 119 | wchar_t buffer[1024]; 120 | wchar_t messageBuffer[256]; 121 | 122 | // Conditional statements 123 | if (CheckFileExists(testFilename)) { 124 | LogMessage(L"Test file already exists"); 125 | DeleteFileIfExists(testFilename); 126 | } else { 127 | LogMessage(L"Test file does not exist yet"); 128 | } 129 | 130 | // For loop 131 | for (int i = 0; i < numbersSize; i++) { 132 | sum += numbers[i]; 133 | if (i % 2 == 0) { 134 | wsprintfW(messageBuffer, L"Processing even index: %d", i); 135 | LogMessage(messageBuffer); 136 | } else { 137 | wsprintfW(messageBuffer, L"Processing odd index: %d", i); 138 | LogMessage(messageBuffer); 139 | } 140 | } 141 | 142 | // Create a test file 143 | wsprintfW(buffer, L"This is a test file for OLLVM. Sum calculated: %d", sum); 144 | if (CreateTestFile(testFilename, buffer)) { 145 | LogMessage(L"Created test file successfully"); 146 | } else { 147 | LogMessage(L"Failed to create test file"); 148 | return; 149 | } 150 | 151 | // Sleep operation 152 | LogMessage(L"Sleeping for 500ms"); 153 | SleepOperation(500); 154 | 155 | // While loop with file operations 156 | int retryCount = 0; 157 | while (retryCount < 3) { 158 | if (ReadTestFile(testFilename, buffer, sizeof(buffer))) { 159 | LogMessage(L"Read file successfully"); 160 | break; 161 | } 162 | LogMessage(L"Failed to read file, retrying"); 163 | retryCount++; 164 | SleepOperation(100); 165 | } 166 | 167 | // Do-while loop with factorial calculation 168 | int factInput = 5; 169 | int factResult = 0; 170 | do { 171 | factResult = Factorial(factInput); 172 | wsprintfW(messageBuffer, L"Factorial of %d is %d", factInput, factResult); 173 | LogMessage(messageBuffer); 174 | factInput--; 175 | } while (factInput > 0); 176 | 177 | // Mutex operation 178 | MutexOperation(); 179 | 180 | // Final cleanup 181 | if (CheckFileExists(testFilename)) { 182 | // Create backup before deleting 183 | if (ReadTestFile(testFilename, buffer, sizeof(buffer))) { 184 | if (CreateTestFile(backupFilename, buffer)) { 185 | LogMessage(L"Created backup file"); 186 | } 187 | } 188 | DeleteFileIfExists(testFilename); 189 | } 190 | 191 | LogMessage(L"Completed target_function"); 192 | } 193 | 194 | // Main function 195 | int main(int argc, char *argv[]) { 196 | LogMessage(L"Program started"); 197 | 198 | TargetFunction(); 199 | 200 | LogMessage(L"Program completed successfully"); 201 | return 0; 202 | } -------------------------------------------------------------------------------- /samples/win64/CFF_win64_full.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win64/CFF_win64_full.exe -------------------------------------------------------------------------------- /samples/win64/deob_CFF_win64.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win64/deob_CFF_win64.bin -------------------------------------------------------------------------------- /samples/win64/deob_CFF_win64_full.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win64/deob_CFF_win64_full.bin -------------------------------------------------------------------------------- /unflattener/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | unflattener API Wrapper 3 | ~~~~~~~~~~~~~~~~~~~ 4 | 5 | A wrapper for the unflattener API. 6 | 7 | """ 8 | 9 | from .unflattener import * 10 | from .binrewrite import * -------------------------------------------------------------------------------- /unflattener/__main__.py: -------------------------------------------------------------------------------- 1 | import unflattener 2 | import argparse 3 | import logging as logger 4 | 5 | def main() -> None: 6 | parser = argparse.ArgumentParser(prog='unflattener', description='Python program to unflatten binaries obfuscated by ollvm') 7 | parser.add_argument('-i', '--input', type=str, help='Obfuscated binary path', required=True) 8 | parser.add_argument('-o', '--output', type=str, help='Deobfuscated output binary path', required=True) 9 | parser.add_argument('-t', '--target', type=str, help='Target address (hex) to deobfuscate', required=True) 10 | parser.add_argument('-a', '--all', action='store_true', help='Iteratively deobfuscate all functions called by the target function') 11 | 12 | args = parser.parse_args() 13 | 14 | logger.basicConfig(level=logger.INFO) 15 | 16 | unflat_engine = unflattener.Unflattener(args.input) 17 | try: 18 | target_address = int(args.target, 16) 19 | except: 20 | logger.info('Target address must be a valid hex value') 21 | 22 | patch_data_list = [] 23 | if not args.all: 24 | logger.info("Unflattening function {}".format(hex(target_address))) 25 | try: 26 | patch, func_interval = unflat_engine.unflat(target_address) 27 | if patch is not None: 28 | logger.info("Generate patch for {} successfully".format(hex(target_address))) 29 | patch_data_list.append((patch, func_interval)) 30 | else: 31 | logger.info("Function {} is not flattened".format(hex(target_address))) 32 | except Exception as e: 33 | logger.info("Fail to unflat function {}".format(hex(target_address))) 34 | else: 35 | patch_data_list += unflat_engine.unflat_follow_calls(target_address, args.output) 36 | 37 | if len(patch_data_list) != 0: 38 | unflat_engine.apply_patches(patch_data_list, args.output) 39 | logger.info("Patch successfully. Deobfuscated binary is written to {}".format(args.output)) 40 | 41 | if __name__ == '__main__': 42 | main() -------------------------------------------------------------------------------- /unflattener/binrewrite.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from miasm.core.asmblock import AsmCFG, AsmBlock 3 | from miasm.ir.symbexec import SymbolicExecutionEngine 4 | from keystone import Ks, KS_ARCH_X86, KS_MODE_32, KS_MODE_64 5 | from miasm.expression.expression import ExprLoc 6 | import re 7 | 8 | class RewriteInstruction: 9 | def __init__(self, instruction: str, old_offset: int, new_offset: int): 10 | """Construction for a rewrite instruction 11 | 12 | Args: 13 | instruction (str): string representation of the instruction 14 | old_offset (int): original offset 15 | new_offset (int): relocated offset 16 | """ 17 | self.instruction: str = instruction 18 | self.old_offset: int = old_offset 19 | self.new_offset: int = new_offset 20 | # assemble the new instruction bytes 21 | 22 | if BinaryRewriter.KS._mode == KS_MODE_64 and '[RIP ' in instruction: 23 | # handlee x64 RIP-addressing 24 | self.fix_RIP_addressing() 25 | 26 | instruction_encoding = BinaryRewriter.KS.asm(self.instruction, new_offset)[0] 27 | self.asm: bytes = bytes(instruction_encoding) 28 | 29 | def fix_RIP_addressing(self): 30 | """Fix offset for RIP-addressing instruction 31 | Example: lea rcx, [rip + 0x1234] 32 | keystone does not automatically calculate the RIP for us 33 | and will assemble this wrong with our relocate address 34 | """ 35 | rip_pattern_match = re.search(BinaryRewriter.X64_RIP_REGEX_PATTERN, self.instruction) 36 | assert rip_pattern_match is not None 37 | relative_offset = int(rip_pattern_match.group().replace('[RIP ', '').replace(']', '').replace(' ', ''), 16) 38 | 39 | instruction_encoding = BinaryRewriter.KS.asm(self.instruction, self.old_offset)[0] 40 | old_RIP = self.old_offset + len(instruction_encoding) 41 | target_offset = old_RIP + relative_offset 42 | new_RIP = self.new_offset + len(instruction_encoding) 43 | 44 | new_relative_offset = target_offset - new_RIP 45 | 46 | if new_relative_offset >= 0: 47 | rip_addressing_field = '[RIP + {}]'.format(hex(new_relative_offset)) 48 | else: 49 | rip_addressing_field = '[RIP - {}]'.format(hex(-new_relative_offset)) 50 | 51 | self.instruction = self.instruction.replace(rip_pattern_match.group(), rip_addressing_field) 52 | 53 | def __str__(self) -> str: 54 | """Get the string representation of the rewrite instruction 55 | 56 | Returns: 57 | str: String representation of the rewrite instruction 58 | """ 59 | result = hex(self.old_offset) + ': ' + self.instruction 60 | result += '\n\tNew offset: ' + hex(self.new_offset) 61 | return result 62 | 63 | class BinaryRewriter: 64 | JMP_INSTRUCTION_DEFAULT_LEN = 6 65 | KS: Ks = None 66 | X64_RIP_REGEX_PATTERN = r'\[RIP [\+\-] 0x(\d|[A-F])+\]' 67 | 68 | def __init__(self, asmcfg: AsmCFG, arch: str): 69 | """BinaryRewriter constructor 70 | 71 | Args: 72 | asmcfg (AsmCFG): function's AsmCFG 73 | arch (str): binary architecture 74 | 75 | Raises: 76 | Exception: None supported architecture 77 | """ 78 | 79 | self.asmcfg: AsmCFG = asmcfg 80 | # reloc offset -> RewriteInstruction 81 | self.rewrite_instructions: defaultdict[int, RewriteInstruction] = defaultdict(RewriteInstruction) 82 | 83 | # original offset -> reloc offset 84 | self.reloc_map: defaultdict[int, int] = defaultdict(int) 85 | 86 | if arch == 'x86_32': 87 | BinaryRewriter.KS = Ks(KS_ARCH_X86, KS_MODE_32) 88 | elif arch == 'x86_64': 89 | BinaryRewriter.KS = Ks(KS_ARCH_X86, KS_MODE_64) 90 | else: 91 | raise Exception('Not supported architecture') 92 | 93 | def init_CFF_data(self, state_order_map: defaultdict, state_to_lockey_map: defaultdict, symbex_engine: SymbolicExecutionEngine): 94 | """Initialize CFF information needed to generate patch 95 | 96 | Args: 97 | state_order_map (defaultdict): State order map 98 | state_to_lockey_map (defaultdict): State lockey map 99 | symbex_engine (SymbolicExecutionEngine): miasm symbolic execution engine 100 | """ 101 | self.state_order_map = state_order_map 102 | self.state_to_lockey_map = state_to_lockey_map 103 | self.symbex_engine = symbex_engine 104 | 105 | def reorder_blocks(self, target_address: int): 106 | """Reallocate the blocks in the fixed AsmCFG 107 | 108 | Args: 109 | target_address (int): Function address 110 | """ 111 | curr_reloc_address = target_address 112 | 113 | # 1. start from the head, write all head blocks 114 | # queue: list of tuple (current state value, loc_key for current tail block) 115 | process_queue = [0] 116 | processed_state_val_list = [] 117 | 118 | while len(process_queue) != 0: 119 | curr_state_val = process_queue.pop() 120 | while True: 121 | if curr_state_val in processed_state_val_list: 122 | # Do not process the same state twice 123 | break 124 | processed_state_val_list.append(curr_state_val) 125 | next_state_vals = self.state_order_map[curr_state_val] 126 | 127 | cond_jump_type = None 128 | next_state_val = 0 129 | # process current state val 130 | for index, state_block_loc in enumerate(self.state_to_lockey_map[curr_state_val]): 131 | state_block: AsmBlock = self.asmcfg.loc_key_to_block(state_block_loc) 132 | 133 | if state_block.lines[-1].name == 'JMP': 134 | # delete the jump to dispatcher 135 | del state_block.lines[-1] 136 | 137 | if len(next_state_vals) == 2: 138 | # conditional block, delete CMOV instruction 139 | for index, instruction in enumerate(state_block.lines): 140 | if 'CMOV' in instruction.name: 141 | cond_jump_type = instruction.name.replace('CMOV', 'J') 142 | del state_block.lines[index] 143 | break 144 | 145 | for instruction in state_block.lines: 146 | instruction_str = str(instruction) 147 | if instruction.name == 'CALL': 148 | # has to resolve the call destination from loc key 149 | if isinstance(instruction.args[0], ExprLoc): 150 | call_dst = int(self.symbex_engine.eval_exprloc(instruction.args[0])) 151 | instruction_str = 'CALL {}'.format(hex(call_dst)) 152 | 153 | # relocate the instruction 154 | self.rewrite_instructions[curr_reloc_address] = RewriteInstruction(instruction_str, instruction.offset, curr_reloc_address) 155 | 156 | self.reloc_map[instruction.offset] = curr_reloc_address 157 | if instruction_str[0] == 'J': 158 | # force JMP/JCC instruction to always have length 6 159 | curr_reloc_address += BinaryRewriter.JMP_INSTRUCTION_DEFAULT_LEN 160 | else: 161 | curr_reloc_address += len(self.rewrite_instructions[curr_reloc_address].asm) 162 | 163 | if len(next_state_vals) == 0: 164 | # ret block 165 | break 166 | 167 | if len(next_state_vals) == 1: 168 | # only one next state (non conditional) 169 | next_state_val = next_state_vals[0] 170 | 171 | assert next_state_val in self.state_to_lockey_map 172 | next_state_head_loc = self.state_to_lockey_map[next_state_val][0] 173 | jump_dst = self.asmcfg.loc_db.get_location_offset(next_state_head_loc) 174 | 175 | if jump_dst in self.rewrite_instructions: 176 | # already written before, jump backward 177 | self.rewrite_instructions[curr_reloc_address] = RewriteInstruction('JMP {}'.format(hex(jump_dst)), -1, curr_reloc_address) 178 | curr_reloc_address += BinaryRewriter.JMP_INSTRUCTION_DEFAULT_LEN 179 | 180 | # else just write the next block directly after 181 | elif len(next_state_vals) == 2: 182 | # processing conditional 183 | true_state_val = next_state_vals[0] 184 | false_state_val = next_state_vals[1] 185 | assert true_state_val in self.state_to_lockey_map 186 | assert false_state_val in self.state_to_lockey_map 187 | 188 | # processing false path 189 | false_head_loc = self.state_to_lockey_map[false_state_val][0] 190 | false_dst = self.asmcfg.loc_db.get_location_offset(false_head_loc) 191 | # old offset is -1 here cause we create this instruction out of thin air 192 | # it does not exist in the original instruction 193 | self.rewrite_instructions[curr_reloc_address] = RewriteInstruction('{} {}'.format(cond_jump_type, hex(false_dst)), -1, curr_reloc_address) 194 | curr_reloc_address += BinaryRewriter.JMP_INSTRUCTION_DEFAULT_LEN 195 | 196 | # stash the false state for later traversal 197 | process_queue.append(false_state_val) 198 | 199 | # processing true path 200 | true_head_loc = self.state_to_lockey_map[true_state_val][0] 201 | true_dst = self.asmcfg.loc_db.get_location_offset(true_head_loc) 202 | 203 | if true_dst in self.rewrite_instructions: 204 | # true destination is already written, JMP backward 205 | self.rewrite_instructions[curr_reloc_address] = RewriteInstruction('JMP {}'.format(hex(true_dst)), -1, curr_reloc_address) 206 | curr_reloc_address += BinaryRewriter.JMP_INSTRUCTION_DEFAULT_LEN 207 | 208 | next_state_val = true_state_val 209 | 210 | # if we already processed this state, skip writing the next block 211 | if next_state_val in processed_state_val_list: 212 | # before we terminate this path 213 | # gotta add a JMP to the head block of that state 214 | next_state_head_loc = self.state_to_lockey_map[next_state_val][0] 215 | jump_dst = self.asmcfg.loc_db.get_location_offset(next_state_head_loc) 216 | self.rewrite_instructions[curr_reloc_address] = RewriteInstruction('JMP {}'.format(hex(jump_dst)), -1, curr_reloc_address) 217 | curr_reloc_address += BinaryRewriter.JMP_INSTRUCTION_DEFAULT_LEN 218 | break 219 | curr_state_val = next_state_val 220 | 221 | def generate_patch(self) -> bytes: 222 | """Generate the patch for the current function 223 | 224 | Returns: 225 | bytes: Function patch data 226 | """ 227 | patch = b'' 228 | # generate a patch for all of the rewrite instructions 229 | for reloc_addr in sorted(self.rewrite_instructions.keys()): 230 | reloc_instruction = self.rewrite_instructions[reloc_addr] 231 | 232 | instruction_str = reloc_instruction.instruction 233 | 234 | # get instruction patch 235 | instruction_patch = None 236 | 237 | # if is a JMP/JCC we manually add 238 | if reloc_instruction.old_offset == -1: 239 | # need to recalculate destination 240 | # because the destination of the JMP/JCC instruction has been relocated 241 | jump_type, destination = instruction_str.split(' ') 242 | destination = int(destination, 16) 243 | instruction_str = '{} {}'.format(jump_type, hex(self.reloc_map[destination])) 244 | 245 | instruction_encoding = BinaryRewriter.KS.asm(instruction_str, reloc_addr)[0] 246 | 247 | instruction_patch = bytes(instruction_encoding) 248 | 249 | # Adding NOPs in case the length is not 6 250 | # I do not wanna deal separating JMP near and JMP short here 251 | instruction_patch += b'\x90' * (self.JMP_INSTRUCTION_DEFAULT_LEN - len(instruction_encoding)) 252 | else: 253 | instruction_patch = reloc_instruction.asm 254 | 255 | patch += instruction_patch 256 | return patch -------------------------------------------------------------------------------- /unflattener/unflattener.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import pprint 3 | from typing import List 4 | from miasm.core.locationdb import LocationDB 5 | from miasm.analysis.binary import Container 6 | import logging as logger 7 | from miasm.analysis.machine import Machine 8 | from miasm.core.asmblock import AsmCFG, AsmBlock 9 | from miasm.ir.ir import IRCFG 10 | from miasm.arch.ppc.regs import * 11 | from miasm.analysis.simplifier import * 12 | from miasm.expression.expression import * 13 | from miasm.ir.symbexec import SymbolicExecutionEngine 14 | import graphviz 15 | from miasm.arch.x86.arch import instruction_x86, mn_x86 16 | from miasm.arch.x86.disasm import dis_x86_32 17 | from miasm.core.interval import interval 18 | from miasm.loader.elf_init import ELF 19 | from miasm.loader.pe_init import PE 20 | from binrewrite import BinaryRewriter 21 | 22 | def calc_flattening_score(asm_graph: AsmCFG) -> float: 23 | """Function to calculate flatenning score 24 | 25 | https://gist.github.com/mrphrazer/da32217f231e1dd842986f94aa6d9d37#file-flattening_heuristic-py 26 | 27 | Args: 28 | asm_graph (AsmCFG): Function's asm CFG 29 | 30 | Returns: 31 | float: Function's flattening score 32 | """ 33 | 34 | # init score 35 | score = 0.0 36 | # walk over all entry nodes in the graph 37 | for head in asm_graph.heads_iter(): 38 | # since miasm breaks basic block into multiple ones separated by CALL instruction 39 | # need to move this head to the final successor whose last instruction is not a CALL instruction 40 | # basically the tail of this head block 41 | skipped_head_loc_count = 0 42 | while asm_graph.loc_key_to_block(head).lines[-1].name == 'CALL': 43 | skipped_head_loc_count += 1 44 | head = asm_graph.successors(head)[0] 45 | 46 | # compute dominator tree 47 | dominator_tree = asm_graph.compute_dominator_tree(head) 48 | # walk over all basic blocks 49 | for block in asm_graph.blocks: 50 | # get location key for basic block via basic block address 51 | block_key = asm_graph.loc_db.get_offset_location(block.lines[0].offset) 52 | # get all blocks that are dominated by the current block 53 | dominated = set( 54 | [block_key] + [b for b in dominator_tree.walk_depth_first_forward(block_key)]) 55 | # check for a back edge 56 | if not any([b in dominated for b in asm_graph.predecessors(block_key)]): 57 | continue 58 | # calculate relation of dominated blocks to the blocks in the graph 59 | score = max(score, len(dominated)/(len(asm_graph.nodes()) - skipped_head_loc_count)) 60 | return score 61 | 62 | class Unflattener: 63 | """ 64 | Class for the unflattener engine 65 | """ 66 | 67 | def __init__(self, filename: str): 68 | """Constructor for the unflattener engine 69 | 70 | Args: 71 | filename (str): deobfuscated binary path 72 | """ 73 | 74 | self.loc_db: LocationDB = LocationDB() 75 | self.container: Container = Container.from_stream(open(filename, 'rb'), self.loc_db) 76 | self.machine: Machine = Machine(self.container.arch) 77 | self.mdis = self.machine.dis_engine(self.container.bin_stream, loc_db=self.loc_db) 78 | self.original_filename: str = filename 79 | self.flatten_func_queue: list = [] 80 | self.flatten_func_encountered: list = [] 81 | 82 | def unflat(self, target_address: int) -> tuple[bytes, interval]: 83 | """Unflatten the CFG of a function 84 | 85 | Args: 86 | target_address (int): Target function address 87 | Returns: 88 | tuple[bytes, interval]: Function patch & function interval 89 | """ 90 | 91 | # get text section range & binary base virtual address 92 | if isinstance(self.container.executable, PE): 93 | text_section_Shdr = self.container.executable.getsectionbyvad(target_address) 94 | self.binary_base_va = self.container.executable.NThdr.ImageBase + (text_section_Shdr.addr - text_section_Shdr.offset) 95 | self.text_section_range = {'lower': self.container.executable.NThdr.ImageBase + text_section_Shdr.addr, 96 | 'upper': self.container.executable.NThdr.ImageBase + text_section_Shdr.addr + text_section_Shdr.size} 97 | elif isinstance(self.container.executable, ELF): 98 | text_section_Shdr = self.container.executable.getsectionbyvad(target_address).sh 99 | self.binary_base_va = text_section_Shdr.addr - text_section_Shdr.offset 100 | 101 | self.text_section_range = {'lower': text_section_Shdr.addr, 102 | 'upper': text_section_Shdr.addr + text_section_Shdr.size} 103 | else: 104 | raise Exception('Unsupported binary type') 105 | 106 | self.asmcfg: AsmCFG = self.mdis.dis_multiblock(target_address) 107 | self.lifter = self.machine.lifter_model_call(self.mdis.loc_db) 108 | self.ircfg: IRCFG = self.lifter.new_ircfg_from_asmcfg(self.asmcfg) 109 | score = calc_flattening_score(self.asmcfg) 110 | 111 | if score < 0.9: 112 | return (None, None) 113 | patch = self.recover_CFG(target_address) 114 | func_interval = interval(block.get_range() for block in self.asmcfg.blocks) 115 | return (patch, func_interval) 116 | 117 | def unflat_follow_calls(self, target_address: int, out_filename: str) -> list[tuple[bytes, interval]]: 118 | """Unflat the target function & all calls to unflat other obfuscated functions 119 | 120 | Args: 121 | target_address (int): Target function address 122 | out_filename (str): Deobfuscated output path 123 | Returns: 124 | list[tuple[bytes, interval]]: List of function patch & function interval 125 | """ 126 | self.flatten_func_queue: list[int] = [target_address] 127 | processed_flatten_func_list: list[int] = [] 128 | 129 | patch_data_list: list[tuple[bytes, interval]] = [] 130 | while len(self.flatten_func_queue) != 0: 131 | flatten_func_addr = self.flatten_func_queue.pop() 132 | 133 | if flatten_func_addr in processed_flatten_func_list: 134 | # do not try to flatten the same function again 135 | continue 136 | 137 | logger.info("Unflattening function {}".format(hex(flatten_func_addr))) 138 | try: 139 | patch, func_interval = self.unflat(flatten_func_addr) 140 | if patch is not None: 141 | logger.info("Generate patch for {} successfully".format(hex(target_address))) 142 | patch_data_list.append((patch, func_interval)) 143 | else: 144 | logger.info("Function {} is not flattened".format(hex(target_address))) 145 | except: 146 | logger.info("Fail to unflat function {}".format(hex(flatten_func_addr))) 147 | return patch_data_list 148 | 149 | def render(self, dot_filename: str, image_filename: str): 150 | """Render the function's CFG into a DOT and PNG file 151 | 152 | Args: 153 | dot_filename (str): DOT file path 154 | image_filename (str): PNG file path 155 | """ 156 | with open(dot_filename, 'w') as f: 157 | f.write(self.asmcfg.dot()) 158 | graphviz.render('dot', 'png', dot_filename, outfile=image_filename) 159 | 160 | def print_block(self, loc_key: LocKey): 161 | """Print a block at the specified location 162 | 163 | Args: 164 | loc_key (LocKey): Location key 165 | """ 166 | print('{} {}'.format(str(loc_key), str(self.asmcfg.loc_key_to_block(loc_key)))) 167 | 168 | def to_loc_key(self, expr) -> LocKey: 169 | """Convert an expression into a location key 170 | 171 | Args: 172 | expr : Target expression 173 | 174 | Returns: 175 | LocKey: Location key 176 | """ 177 | if isinstance(expr, LocKey): 178 | return expr 179 | if isinstance(expr, ExprLoc): 180 | return expr.loc_key 181 | if isinstance(expr, ExprInt): 182 | return self.container.loc_db.get_offset_location(int(expr)) 183 | if isinstance(expr, int): 184 | return self.container.loc_db.get_offset_location(expr) 185 | return None 186 | 187 | def find_backbone_blocks(self, predispatcher_loc: LocKey) -> List[LocKey]: 188 | """Find all backbone blocks (blocks with code from the original program) 189 | 190 | Args: 191 | predispatcher_loc (LocKey): predispatcher location key 192 | 193 | Returns: 194 | List[LocKey]: List of backbone location keys 195 | """ 196 | backbone_blocks = [] 197 | for block_loc in self.asmcfg.predecessors(predispatcher_loc): 198 | # each parent block of the predispatcher is a backbone block 199 | last_predecessor_loc = block_loc 200 | curr_predecessor_loc = self.asmcfg.predecessors(block_loc)[0] 201 | backbone_blocks.append(last_predecessor_loc) 202 | 203 | # traverse upward from each backbone block to find all backbone blocks above it 204 | # this is due to CALL instructions breaking up basic block into multiple ones 205 | while True: 206 | curr_predecessor_block = self.asmcfg.loc_key_to_block(curr_predecessor_loc) 207 | if curr_predecessor_block.lines[-1].name in ['JZ', 'JMP', 'JNZ']: 208 | break 209 | backbone_blocks.append(curr_predecessor_loc) 210 | curr_predecessor_loc = self.asmcfg.predecessors(curr_predecessor_loc)[0] 211 | 212 | # add function's tail (block with no successor) to backbone blocks 213 | for block in self.asmcfg.blocks: 214 | if len(self.asmcfg.successors(block.loc_key)) == 0: 215 | last_tail_loc = block.loc_key 216 | backbone_blocks.append(last_tail_loc) 217 | 218 | # traverse upward from each backbone block to find all backbone blocks above it 219 | # this is due to CALL instructions breaking up basic block into multiple ones 220 | curr_predecessor_tail_loc = self.asmcfg.predecessors(last_tail_loc)[0] 221 | while True: 222 | 223 | curr_predecessor_tail_block = self.asmcfg.loc_key_to_block(curr_predecessor_tail_loc) 224 | if curr_predecessor_tail_block.lines[-1].name in ['JZ', 'JMP', 'JNZ']: 225 | break 226 | backbone_blocks.append(curr_predecessor_tail_loc) 227 | curr_predecessor_tail_loc = self.asmcfg.predecessors(curr_predecessor_tail_loc)[0] 228 | 229 | return backbone_blocks 230 | 231 | def symbex_block(self, symbex_engine: SymbolicExecutionEngine, loc_key: LocKey) -> Expr: 232 | """symbolically executing a block 233 | 234 | Args: 235 | symbex_engine (SymbolicExecutionEngine): Symbolic execution engine 236 | loc_key (LocKey): Location key to execute 237 | 238 | Returns: 239 | Expr: Result symbolic expression 240 | """ 241 | curr_block = self.asmcfg.loc_key_to_block(loc_key) 242 | 243 | if curr_block is None: 244 | return symbex_engine.run_block_at(self.ircfg, loc_key) 245 | 246 | # retrieve the cmp/test instruction & cmovcc instruction 247 | cmp_instruction = None 248 | cmov_instruction = None 249 | 250 | for instruction in curr_block.lines: 251 | if instruction.name in ['CMP', 'TEST']: 252 | cmp_instruction = instruction 253 | if 'CMOV' in instruction.name: 254 | cmov_instruction = instruction 255 | break 256 | 257 | if curr_block.lines[-1].name == 'CALL': 258 | # process call regularly but we reset RSP/RBP to old RSP/RBP instead 259 | # of an ExprMem depending on miasm's call_func_stack 260 | # basically overwriting the execution result of the CALL IR instruction. 261 | # Here, we assume that the CALL IR does not impact the stack pointer 262 | original_rsp = symbex_engine.symbols[ExprId('RSP', 64)] 263 | original_rbp = symbex_engine.symbols[ExprId('RBP', 64)] 264 | original_esp = symbex_engine.symbols[ExprId('ESP', 32)] 265 | original_ebp = symbex_engine.symbols[ExprId('EBP', 32)] 266 | result = symbex_engine.run_block_at(self.ircfg, loc_key) 267 | if self.container.arch == 'x86_32': 268 | symbex_engine.symbols[ExprId('ESP', 32)] = original_esp 269 | symbex_engine.symbols[ExprId('EBP', 32)] = original_ebp 270 | elif self.container.arch == 'x86_64': 271 | symbex_engine.symbols[ExprId('RSP', 64)] = original_rsp 272 | symbex_engine.symbols[ExprId('RBP', 64)] = original_rbp 273 | return result 274 | 275 | # is an ollvm condition block if CMP instruction is followed by CMOVCC instruction 276 | if cmov_instruction is not None and cmp_instruction is not None\ 277 | and curr_block.lines.index(cmp_instruction) < curr_block.lines.index(cmov_instruction): 278 | curr_loc = loc_key 279 | 280 | while True: 281 | # continue to simulate to check each IR block 282 | # this is because condition-generating instructions (idiv, cmov) 283 | # split a single asm block into multiple IR blocks 284 | curr_ir_block: IRBlock = self.ircfg.get_block(curr_loc) 285 | if curr_ir_block is None: 286 | return symbex_engine.run_block_at(self.ircfg, loc_key) 287 | 288 | for assign_block in curr_ir_block: 289 | # once found the IR assign block for the CMOV instruction 290 | if 'CMOV' in assign_block.instr.name: 291 | # symbex the block as normal 292 | symbex_engine.run_block_at(self.ircfg, curr_loc) 293 | 294 | # NOTE: We don't return the condition produced by symbex_engine.run_block_at here. 295 | # This is because if the condition is deterministic(in a for loop for example) 296 | # symbex_engine.run_block_at will evaluate the cond automatically 297 | # and return ExprInt for the address 298 | # We don't want this as we want to still split the IR path into two 299 | # so we have to get the ExprCond directly from the assign block 300 | cmov_cond_expr = assign_block.values()[-1] 301 | 302 | # example: CMOVNZ -> JNZ 303 | if 'CMOVN' in cmov_instruction.name: 304 | return cmov_cond_expr.copy() 305 | 306 | # example: CMOVZ -> JZ 307 | # need to flip the condition src fields 308 | return ExprCond(cmov_cond_expr._cond.copy(), 309 | cmov_cond_expr._src2.copy(), 310 | cmov_cond_expr._src1.copy()) 311 | curr_loc = symbex_engine.run_block_at(self.ircfg, curr_loc) 312 | continue 313 | else: 314 | # just a regular block, symbex normally 315 | return symbex_engine.run_block_at(self.ircfg, loc_key) 316 | 317 | def recover_CFG(self, target_address: int): 318 | """Recover the function's CFG 319 | 320 | Args: 321 | target_address (int): Target function address 322 | """ 323 | 324 | # predispatcher is the block with the most number of parents 325 | predispatcher = sorted(self.asmcfg.blocks, key=lambda key: len(self.asmcfg.predecessors(key.loc_key)), reverse=True)[0] 326 | predispatcher_loc = predispatcher.loc_key 327 | 328 | # dispatcher is the only child of the predispatcher 329 | dispatcher_loc = self.asmcfg.successors(predispatcher_loc)[0] 330 | 331 | # backbone: everything that is needed in the final asmcfg (except the head) 332 | backbone_loc_list = self.find_backbone_blocks(predispatcher_loc) 333 | 334 | # state var is the seceond expr in the first instructions of the dispatcher 335 | dispatcher_block = self.asmcfg.loc_key_to_block(dispatcher_loc) 336 | state_var_expr = dispatcher_block.lines[0].get_args_expr()[1] 337 | logger.debug('State var: ' + str(state_var_expr)) 338 | 339 | # symbols for symbex 340 | init_symbols = {} 341 | for i, r in enumerate(all_regs_ids): 342 | init_symbols[r] = all_regs_ids_init[i] 343 | 344 | # parent loc -> [children loc] 345 | loc_successors_map = defaultdict(list) 346 | 347 | # exec_queue: queue containing (address/loc to exec, symbex engine symbols, current state value) 348 | exec_queue = [] 349 | exec_queue.append((self.to_loc_key(target_address), init_symbols, None)) 350 | 351 | # starting state val for traversal 352 | first_state_val = None 353 | 354 | # curr state -> [next state/states] 355 | state_order_map = defaultdict(list) 356 | 357 | # state value -> [loc key/loc keys] 358 | state_to_lockey_map = defaultdict(list) 359 | 360 | # list to track all backbone blocks encountered 361 | backbone_encountered_list = [] 362 | 363 | while len(exec_queue) != 0: 364 | # pop a loc_key to start symbex 365 | curr_loc, symbols, curr_state_val = exec_queue.pop() 366 | symbex_engine = SymbolicExecutionEngine(self.lifter, symbols) 367 | 368 | while True: 369 | # if current loc is a backbone block 370 | if curr_loc in backbone_loc_list: 371 | if curr_loc in backbone_encountered_list: 372 | # if we already process all backbones, stop symbex 373 | break 374 | backbone_encountered_list.append(curr_loc) 375 | 376 | # get the current value for the state variable 377 | curr_state_val = int(symbex_engine.eval_expr(state_var_expr)) 378 | 379 | # map state val -> [current loc] 380 | if curr_loc not in state_to_lockey_map[curr_state_val]: 381 | state_to_lockey_map[curr_state_val].append(curr_loc) 382 | 383 | # get first state val for later traversal 384 | if first_state_val is None: 385 | first_state_val = curr_state_val 386 | 387 | # predispatcher processing 388 | if curr_loc == predispatcher_loc: 389 | # evaluate next state var 390 | next_state_val = int(symbex_engine.eval_expr(state_var_expr)) 391 | 392 | # map curr state val -> next state val 393 | if next_state_val not in state_order_map[curr_state_val]: 394 | state_order_map[curr_state_val].append(next_state_val) 395 | 396 | # reset curr state val 397 | curr_state_val = None 398 | 399 | # for flatten while following calls 400 | # if this block ends with a CALL, extract the call destination and add to self.flatten_func_queue 401 | curr_block = self.asmcfg.loc_key_to_block(curr_loc) 402 | if curr_block is not None: 403 | last_instruction = curr_block.lines[-1] 404 | if last_instruction.name == 'CALL': 405 | destination_loc = symbex_engine.eval_expr(last_instruction.args[0]) 406 | 407 | if isinstance(destination_loc, ExprInt): 408 | destination_loc = int(destination_loc) 409 | # only follows calls that are in the .text section only (avoid library calls) 410 | if self.text_section_range['lower'] <= destination_loc <= self.text_section_range['upper']: 411 | if destination_loc not in self.flatten_func_encountered: 412 | self.flatten_func_queue.append(int(destination_loc)) 413 | self.flatten_func_encountered.append(destination_loc) 414 | 415 | # symbex block at current loc_key 416 | symbex_expr_result = self.symbex_block(symbex_engine, curr_loc) 417 | 418 | # if reach the end (ret), stop this path traversal 419 | if symbex_expr_result is None: 420 | break 421 | 422 | if isinstance(symbex_expr_result, ExprCond): 423 | # if we reach a conditional expression 424 | 425 | # Evaluate the jump addresses if the branch is taken or not 426 | cond_true = {symbex_expr_result.cond: ExprInt(1, 32)} 427 | cond_false = {symbex_expr_result.cond: ExprInt(0, 32)} 428 | addr_true = expr_simp( 429 | symbex_engine.eval_expr(symbex_expr_result.replace_expr(cond_true), {})) 430 | addr_false = expr_simp( 431 | symbex_engine.eval_expr(symbex_expr_result.replace_expr(cond_false), {})) 432 | 433 | addr_true = self.to_loc_key(addr_true) 434 | addr_false = self.to_loc_key(addr_false) 435 | 436 | # stash false path away 437 | exec_queue.append((addr_false, symbex_engine.symbols.copy(), curr_state_val)) 438 | 439 | # map curr loc -> [addr true] 440 | loc_successors_map[curr_loc].append(addr_true) 441 | 442 | # next loc_key we're jumping to 443 | next_loc = addr_true 444 | else: 445 | # find next loc_key we're jumping to 446 | next_loc = expr_simp(symbex_engine.eval_expr(symbex_expr_result)) 447 | 448 | # map exec states -> [next_loc] 449 | next_loc = self.to_loc_key(next_loc) 450 | if next_loc not in loc_successors_map[curr_loc]: 451 | loc_successors_map[curr_loc].append(next_loc) 452 | 453 | # update current loc_key to the next loc_key 454 | curr_loc = next_loc 455 | 456 | # logger.info('loc_successors_map') 457 | # pprint.pprint(loc_successors_map) 458 | # logger.info('state order map') 459 | # pprint.pprint(state_order_map) 460 | # logger.info('state to loc_key_map') 461 | # pprint.pprint(state_to_lockey_map) 462 | 463 | # NOTE: not all backbone loc_key is relevant. Only take the ones from state_to_lockey_map 464 | backbone_loc_list = [loc for sublist in state_to_lockey_map.values() for loc in sublist] 465 | 466 | # add prologue blocks to backbone list 467 | state_order_map[0].append(first_state_val) 468 | prologue_tail_loc = None 469 | for block_loc in self.asmcfg.predecessors(dispatcher_loc): 470 | # head block is the other predecessor of dispatcher beside the predispatcher 471 | if block_loc == predispatcher_loc: 472 | continue 473 | 474 | # add head to backbone 475 | prologue_tail_loc = block_loc 476 | backbone_loc_list.append(prologue_tail_loc) 477 | state_to_lockey_map[0].append(prologue_tail_loc) 478 | 479 | # add all prologue blocks above the prologue tail 480 | curr_prologue_loc = prologue_tail_loc 481 | while len(self.asmcfg.predecessors(curr_prologue_loc)) != 0: 482 | prev_prologue_block = self.asmcfg.predecessors(curr_prologue_loc)[0] 483 | backbone_loc_list.append(prev_prologue_block) 484 | state_to_lockey_map[0].append(prev_prologue_block) 485 | curr_prologue_loc = prev_prologue_block 486 | break 487 | 488 | # state value 0 is associated with the prologue blocks 489 | 490 | # since we add from the prologue tail up to the prologue head 491 | # need to flip the order before we reorder the CFG 492 | state_to_lockey_map[0] = state_to_lockey_map[0][::-1] 493 | 494 | # irrelevant blocks are original blocks that are not a backbone block 495 | irrelevant_loc_list = [original_block.loc_key for original_block in self.asmcfg.blocks if original_block.loc_key not in backbone_loc_list] 496 | 497 | # delete all irrelevant blocks 498 | for loc_key in irrelevant_loc_list: 499 | self.asmcfg.del_block(self.asmcfg.loc_key_to_block(loc_key)) 500 | 501 | # init BinaryRewriter to reorder the CFG and generate a patch for rewriting 502 | rewriter = BinaryRewriter(self.asmcfg, self.container.arch) 503 | rewriter.init_CFF_data(state_order_map, state_to_lockey_map, symbex_engine) 504 | rewriter.reorder_blocks(target_address) 505 | return rewriter.generate_patch() 506 | 507 | def apply_patches(self, patch_data_list: list[tuple[bytes, interval]], out_filename: str): 508 | """Applying patches to the deobfuscated output file 509 | 510 | Args: 511 | patch_data_list (list[tuple[bytes, interval]]): List of function patches & function intervals 512 | out_filename (str): Deobfuscated output filename 513 | 514 | Returns: 515 | bool: _description_ 516 | """ 517 | out_file = open(out_filename, 'wb') 518 | in_file = open(self.original_filename, 'rb') 519 | out_file.write(in_file.read()) 520 | in_file.close() 521 | 522 | for patch_data in patch_data_list: 523 | patch, func_interval = patch_data 524 | func_start = func_interval.hull()[0] 525 | for i in range(func_interval.hull()[0], func_interval.hull()[1]): 526 | out_file.seek(i - self.binary_base_va) 527 | out_file.write(b"\xCC") 528 | out_file.seek(func_start - self.binary_base_va) 529 | out_file.write(patch) 530 | 531 | out_file.close() 532 | --------------------------------------------------------------------------------