├── .gitignore
├── LICENSE
├── README.md
├── images
├── deobfuscated.jpg
└── obfuscated.jpg
├── requirements.txt
├── samples
├── linux
│ ├── CFF.bin
│ ├── CFF_full.bin
│ ├── CFF_full.c
│ ├── deob_CFF.bin
│ └── deob_CFF_full.bin
├── linux64
│ ├── CFF_full_linux64.bin
│ └── deob_CFF_full_linux64.bin
├── win
│ ├── CFF_win.cc
│ ├── CFF_win.exe
│ ├── CFF_win_full.cc
│ ├── CFF_win_full.exe
│ ├── deob_CFF_win.bin
│ └── deob_CFF_win_full.bin
└── win64
│ ├── CFF_win.cc
│ ├── CFF_win64.exe
│ ├── CFF_win64_full.cc
│ ├── CFF_win64_full.exe
│ ├── deob_CFF_win64.bin
│ └── deob_CFF_win64_full.bin
└── unflattener
├── __init__.py
├── __main__.py
├── binrewrite.py
└── unflattener.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # pyenv
7 | .python-version
8 |
9 | # Virtual environments
10 | .env
11 | .venv
12 | env/
13 | venv/
14 | ENV/
15 | env.bak/
16 | venv.bak/
17 |
18 | # Local development settings
19 | *.env
20 | .env.local
21 | .env.development.local
22 | .env.test.local
23 | .env.production.local
24 |
25 | .vscode
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ollvm-unflattener
2 |
3 | A Python tool to deobfuscate control flow flattening applied by OLLVM (Obfuscator-LLVM). This tool leverages the Miasm framework to analyze and recover the original control flow of functions obfuscated with OLLVM's control flow flattening technique.
4 |
5 | ## Project Description
6 |
7 | OLLVM (Obfuscator-LLVM) is a popular code obfuscation tool that implements various obfuscation techniques, including control flow flattening on the function level. Control flow flattening transforms the original flow of a function into a flat structure using a state variable and a dispatch mechanism, making it difficult to understand the program's logic during the reverse engineering process.
8 |
9 | This tool:
10 | - Reconstructs the original control flow of an obfuscated function by identifying and connecting basic blocks
11 | - Generates a deobfuscated binary with the original control flow restored
12 | - Supports multi-layered function deobfuscation by following calls made by the target function using breadth-first search (BFS)
13 | - Supports deobfuscation for Windows & Linux binaries (x86 and x64 architectures)
14 |
15 | This project is inspired by [MODeflattener](https://github.com/mrT4ntr4/MODeflattener) and the awesome work from [Quarkslab](https://blog.quarkslab.com/deobfuscation-recovering-an-ollvm-protected-program.html)! Unlike **MODeflattener** that solves CFF deobfuscation with a static approach, this project utilitizes Miasm's symbolic execution engine to execute and recover the original control flow.
16 |
17 | ## Installation
18 |
19 | ### Prerequisites
20 | - Python 3.10+
21 | - Git
22 |
23 | ### Setup
24 |
25 | 1. Clone the repository:
26 | ```bash
27 | git clone https://github.com/cdong1012/ollvm-unflattener.git
28 | cd ollvm-unflattener
29 | ```
30 |
31 | 2. Install required dependencies:
32 | ```bash
33 | pip install -r requirements.txt
34 | ```
35 |
36 | The `requirements.txt` file includes:
37 | ```
38 | miasm
39 | graphviz
40 | keystone-engine
41 | ```
42 |
43 | ## Usage
44 |
45 | ### Basic Usage
46 |
47 | ```bash
48 | python unflattener -i -o -t -a
49 | ```
50 |
51 | ### Arguments
52 |
53 | - `-i, --input`: Path to the obfuscated binary (required)
54 | - `-o, --output`: Path where the deobfuscated binary will be saved (required)
55 | - `-t, --target`: Address of the function to deobfuscate (required)
56 | - `-a, --all`: Follow all calls and deobfuscate all functions we can reach
57 | - `-h, --help`: show help message
58 |
59 | ### Example
60 |
61 | ```bash
62 | # Deobfuscate a single function
63 | python unflattener -i ./samples/linux/CFF.bin -o ./samples/linux/deob_CFF.bin -t 0x80491A0
64 | python unflattener -i ./samples/win/CFF_win.exe -o ./samples/win/deob_CFF_win.bin -t 0x401600
65 |
66 | # Deobfuscate a function and follows all of its calls
67 | python unflattener -i ./samples/linux/CFF_full.bin -o ./samples/linux/deob_CFF_full.bin -t 0x8049E00 -a
68 | python unflattener -i ./samples/win/CFF_win_full.exe -o ./samples/win/deob_CFF_win_full.bin -t 0x401F10 -a
69 | ```
70 |
71 | ## Results
72 |
73 | ### Visual Comparison
74 |
75 | Below you can see the control flow graph (CFG) before and after deobfuscation:
76 |
77 | 
78 | *Figure 1: Obfuscated CFG*
79 |
80 | 
81 |
82 | *Figure 2: Deobfuscated CFG*
83 |
84 | These images are from deobfuscating function ```target_function``` from the sample [CFF_full.bin](./samples/linux/CFF_full.bin).
85 |
86 | ## Acknowledgements
87 |
88 | - [Miasm](https://github.com/cea-sec/miasm)
89 | - [Obfuscator-LLVM](https://github.com/obfuscator-llvm/obfuscator)
90 | - [MODeflattener](https://github.com/mrT4ntr4/MODeflattener)
91 | - [Quarkslab](https://blog.quarkslab.com/deobfuscation-recovering-an-ollvm-protected-program.html)
92 |
--------------------------------------------------------------------------------
/images/deobfuscated.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/images/deobfuscated.jpg
--------------------------------------------------------------------------------
/images/obfuscated.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/images/obfuscated.jpg
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | miasm
2 | graphviz
3 | keystone-engine
--------------------------------------------------------------------------------
/samples/linux/CFF.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/linux/CFF.bin
--------------------------------------------------------------------------------
/samples/linux/CFF_full.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/linux/CFF_full.bin
--------------------------------------------------------------------------------
/samples/linux/CFF_full.c:
--------------------------------------------------------------------------------
1 | // Shout-out to Claude for generating this test file for me!
2 |
3 | #include
4 | #include
5 |
6 | // Helper function to calculate factorial
7 | int calculate_factorial(int n)
8 | {
9 | if (n <= 1)
10 | return 1;
11 |
12 | int result = 1;
13 | for (int i = 2; i <= n; i++)
14 | {
15 | result *= i;
16 | }
17 | return result;
18 | }
19 |
20 | // Function to calculate absolute value
21 | int absolute_value(int n)
22 | {
23 | return n < 0 ? -n : n;
24 | }
25 |
26 | // Helper function to compute powers
27 | int compute_power(int base, int exponent)
28 | {
29 | int result = 1;
30 | for (int i = 0; i < exponent; i++)
31 | {
32 | result *= base;
33 | }
34 | return result;
35 | }
36 |
37 | // Helper function to find greatest common divisor
38 | int find_greatest_common_divisor(int a, int b)
39 | {
40 | a = absolute_value(a);
41 | b = absolute_value(b);
42 |
43 | while (b != 0)
44 | {
45 | int temp = b;
46 | b = a % b;
47 | a = temp;
48 | }
49 |
50 | return a;
51 | }
52 |
53 | // Helper function to print binary representation
54 | void print_binary(int num)
55 | {
56 | if (num > 1)
57 | {
58 | print_binary(num / 2);
59 | }
60 | printf("%d", num % 2);
61 | }
62 |
63 | // The main target function
64 | int target_function(int a)
65 | {
66 | printf("Starting target_function with input: %d\n", a);
67 |
68 | // Variable declarations
69 | int result = 0;
70 | int temp = a;
71 | int floating_result = 0;
72 |
73 | // Basic arithmetic operations
74 | result = a * 5;
75 | printf("After multiplication by 5: %d\n", result);
76 |
77 | // Division with check for zero
78 | if (a != 0)
79 | {
80 | result = result / a;
81 | printf("After division by input: %d\n", result);
82 | }
83 | else
84 | {
85 | printf("Cannot divide by zero\n");
86 | result = 100; // Default value
87 | }
88 |
89 | // Conditional statement with multiple branches
90 | if (a > 100)
91 | {
92 | printf("Input is very large\n");
93 | result = result + 50;
94 | }
95 | else if (a > 50)
96 | {
97 | printf("Input is large\n");
98 | result = result + 25;
99 | }
100 | else if (a > 10)
101 | {
102 | printf("Input is medium\n");
103 | result = result + 10;
104 | }
105 | else
106 | {
107 | printf("Input is small\n");
108 | result = result + 5;
109 | }
110 |
111 | // Loop to perform some calculations
112 | printf("Starting loop calculations...\n");
113 | for (int i = 0; i < 5; i++)
114 | {
115 | temp += i * 2;
116 | printf("Loop iteration %d: temp = %d\n", i, temp);
117 |
118 | if (temp > 100)
119 | {
120 | printf("Breaking loop as temp exceeded 100\n");
121 | break;
122 | }
123 | }
124 |
125 | // Call to factorial function
126 | int factorial_result = calculate_factorial(absolute_value(a) % 10); // Use modulo to avoid large factorials
127 | printf("Factorial of %d is: %d\n", absolute_value(a) % 10, factorial_result);
128 | result += factorial_result;
129 |
130 | // Compute some powers
131 | floating_result = compute_power(a, 2);
132 | printf("Square of %d is: %d\n", a, floating_result);
133 |
134 | // Find GCD if applicable
135 | if (a != 0)
136 | {
137 | int gcd = find_greatest_common_divisor(a, 24);
138 | printf("GCD of %d and 24 is: %d\n", a, gcd);
139 | result += gcd;
140 | }
141 |
142 | // Another loop with different structure
143 | int j = 0;
144 | printf("Starting while loop...\n");
145 | while (j < absolute_value(a) % 5)
146 | {
147 | printf("While loop iteration %d\n", j);
148 | result += j;
149 | j++;
150 | }
151 |
152 | // Print the binary representation of our input
153 | printf("Binary representation of %d: ", a);
154 | print_binary(a);
155 | printf("\n");
156 |
157 | // Bit manipulation operations
158 | int bit_shifted = a << 2;
159 | printf("Value after left shift by 2: %d\n", bit_shifted);
160 |
161 | int bit_and = a & 0x0F;
162 | printf("Result of bitwise AND with 0x0F: %d\n", bit_and);
163 |
164 | // Final calculations
165 | result = result % 1000; // Keep result reasonable
166 | printf("Final result: %d\n", result);
167 |
168 | return result;
169 | }
170 |
171 | // Main function to demonstrate usage
172 | int main()
173 | {
174 | int test_values[] = {5, 25, 75, 150, 0};
175 |
176 | for (int i = 0; i < 5; i++)
177 | {
178 | printf("\n===== Testing target_function with input: %d =====\n", test_values[i]);
179 | int result = target_function(test_values[i]);
180 | printf("target_function returned: %d\n", result);
181 | printf("===============================================\n");
182 | }
183 |
184 | return 0;
185 | }
--------------------------------------------------------------------------------
/samples/linux/deob_CFF.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/linux/deob_CFF.bin
--------------------------------------------------------------------------------
/samples/linux/deob_CFF_full.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/linux/deob_CFF_full.bin
--------------------------------------------------------------------------------
/samples/linux64/CFF_full_linux64.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/linux64/CFF_full_linux64.bin
--------------------------------------------------------------------------------
/samples/linux64/deob_CFF_full_linux64.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/linux64/deob_CFF_full_linux64.bin
--------------------------------------------------------------------------------
/samples/win/CFF_win.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | // Function prototype
5 | void target_function(int value);
6 |
7 | int main()
8 | {
9 | printf("Starting program...\n");
10 |
11 | // Call target_function with an arbitrary value
12 | int input_value = 10;
13 | printf("Calling target_function with value: %d\n", input_value);
14 |
15 | target_function(input_value);
16 |
17 | printf("Program execution completed.\n");
18 | return 0;
19 | }
20 |
21 | // Implementation of target_function that takes one argument
22 | void target_function(int value)
23 | {
24 | printf("Entered target_function with value: %d\n", value);
25 |
26 | // If statement as requested
27 | if (value > 5)
28 | {
29 | printf("The value %d is greater than 5.\n", value);
30 |
31 | // Initialize a counter for the while loop
32 | int counter = value;
33 |
34 | // While loop as requested
35 | while (counter > 0)
36 | {
37 | printf("Loop iteration: %d, Counter value: %d\n",
38 | (value - counter + 1), counter);
39 |
40 | // Demonstrate some processing inside the loop
41 | if (counter % 2 == 0)
42 | {
43 | printf(" → %d is an even number.\n", counter);
44 | }
45 | else
46 | {
47 | printf(" → %d is an odd number.\n", counter);
48 | }
49 |
50 | counter--;
51 | }
52 |
53 | printf("While loop completed after %d iterations.\n", value);
54 | }
55 | else
56 | {
57 | printf("The value %d is less than or equal to 5.\n", value);
58 | printf("Skipping the while loop processing.\n");
59 | }
60 |
61 | printf("Exiting target_function.\n");
62 | }
--------------------------------------------------------------------------------
/samples/win/CFF_win.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win/CFF_win.exe
--------------------------------------------------------------------------------
/samples/win/CFF_win_full.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | // Recursive factorial function
6 | int factorial(int n)
7 | {
8 | if (n <= 1)
9 | {
10 | return 1;
11 | }
12 | return n * factorial(n - 1);
13 | }
14 |
15 | // Function to check if file exists
16 | bool check_file_exists(const char *filename)
17 | {
18 | DWORD attributes = GetFileAttributesA(filename);
19 | return (attributes != INVALID_FILE_ATTRIBUTES &&
20 | !(attributes & FILE_ATTRIBUTE_DIRECTORY));
21 | }
22 |
23 | // Function that creates a mutex
24 | HANDLE create_named_mutex(const char *mutex_name)
25 | {
26 | HANDLE hMutex = CreateMutexA(NULL, FALSE, mutex_name);
27 | if (hMutex == NULL)
28 | {
29 | printf("CreateMutex error: %lu\n", GetLastError());
30 | return NULL;
31 | }
32 |
33 | if (GetLastError() == ERROR_ALREADY_EXISTS)
34 | {
35 | printf("Mutex already exists!\n");
36 | }
37 |
38 | return hMutex;
39 | }
40 |
41 | // Function to write data to file
42 | bool write_to_file(const char *filename, const char *data)
43 | {
44 | HANDLE hFile = CreateFileA(filename, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS,
45 | FILE_ATTRIBUTE_NORMAL, NULL);
46 |
47 | if (hFile == INVALID_HANDLE_VALUE)
48 | {
49 | printf("Could not create file: %lu\n", GetLastError());
50 | return false;
51 | }
52 |
53 | DWORD bytesWritten;
54 | BOOL result = WriteFile(hFile, data, strlen(data), &bytesWritten, NULL);
55 |
56 | CloseHandle(hFile);
57 | return result != 0;
58 | }
59 |
60 | // Target function with various control structures and API calls
61 | void target_function(int iterations)
62 | {
63 | printf("Starting target function with %d iterations\n", iterations);
64 |
65 | // Create a mutex
66 | HANDLE hMutex = create_named_mutex("OllvmTestMutex");
67 |
68 | // Loop with conditional branching
69 | for (int i = 0; i < iterations; i++)
70 | {
71 | printf("Iteration %d of %d\n", i + 1, iterations);
72 |
73 | if (i % 3 == 0)
74 | {
75 | printf("Computing factorial of %d: %d\n", i, factorial(i));
76 | }
77 | else if (i % 3 == 1)
78 | {
79 | printf("Sleeping for %d milliseconds\n", i * 100);
80 | Sleep(i * 100);
81 | }
82 | else
83 | {
84 | const char *filename = "ollvm_test.txt";
85 | char buffer[100];
86 | sprintf(buffer, "Data from iteration %d\n", i);
87 |
88 | if (write_to_file(filename, buffer))
89 | {
90 | printf("Successfully wrote to file\n");
91 | }
92 |
93 | if (check_file_exists(filename))
94 | {
95 | printf("Verified file exists\n");
96 | }
97 | }
98 | }
99 |
100 | // Clean up
101 | if (hMutex != NULL)
102 | {
103 | CloseHandle(hMutex);
104 | }
105 |
106 | printf("Target function completed\n");
107 | }
108 |
109 | int main(int argc, char *argv[])
110 | {
111 | int iterations = 5;
112 |
113 | if (argc > 1)
114 | {
115 | iterations = atoi(argv[1]);
116 | }
117 |
118 | printf("OLLVM Test Program\n");
119 | target_function(iterations);
120 |
121 | return 0;
122 | }
--------------------------------------------------------------------------------
/samples/win/CFF_win_full.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win/CFF_win_full.exe
--------------------------------------------------------------------------------
/samples/win/deob_CFF_win.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win/deob_CFF_win.bin
--------------------------------------------------------------------------------
/samples/win/deob_CFF_win_full.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win/deob_CFF_win_full.bin
--------------------------------------------------------------------------------
/samples/win64/CFF_win.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | // Function prototype
5 | void target_function(int value);
6 |
7 | int main()
8 | {
9 | printf("Starting program...\n");
10 |
11 | // Call target_function with an arbitrary value
12 | int input_value = 10;
13 | printf("Calling target_function with value: %d\n", input_value);
14 |
15 | target_function(input_value);
16 |
17 | printf("Program execution completed.\n");
18 | return 0;
19 | }
20 |
21 | // Implementation of target_function that takes one argument
22 | void target_function(int value)
23 | {
24 | printf("Entered target_function with value: %d\n", value);
25 |
26 | // If statement as requested
27 | if (value > 5)
28 | {
29 | printf("The value %d is greater than 5.\n", value);
30 |
31 | // Initialize a counter for the while loop
32 | int counter = value;
33 |
34 | // While loop as requested
35 | while (counter > 0)
36 | {
37 | printf("Loop iteration: %d, Counter value: %d\n",
38 | (value - counter + 1), counter);
39 |
40 | // Demonstrate some processing inside the loop
41 | if (counter % 2 == 0)
42 | {
43 | printf(" → %d is an even number.\n", counter);
44 | }
45 | else
46 | {
47 | printf(" → %d is an odd number.\n", counter);
48 | }
49 |
50 | counter--;
51 | }
52 |
53 | printf("While loop completed after %d iterations.\n", value);
54 | }
55 | else
56 | {
57 | printf("The value %d is less than or equal to 5.\n", value);
58 | printf("Skipping the while loop processing.\n");
59 | }
60 |
61 | printf("Exiting target_function.\n");
62 | }
--------------------------------------------------------------------------------
/samples/win64/CFF_win64.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win64/CFF_win64.exe
--------------------------------------------------------------------------------
/samples/win64/CFF_win64_full.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | // Forward declarations
5 | void LogMessage(const wchar_t *message);
6 | int Factorial(int n);
7 | BOOL CreateTestFile(const wchar_t *filename, const wchar_t *content);
8 | BOOL ReadTestFile(const wchar_t *filename, wchar_t *buffer, DWORD bufferSize);
9 | void SleepOperation(DWORD milliseconds);
10 | void MutexOperation();
11 | BOOL CheckFileExists(const wchar_t *filename);
12 | void DeleteFileIfExists(const wchar_t *filename);
13 |
14 | // Recursive factorial function
15 | int Factorial(int n) {
16 | if (n <= 1) {
17 | return 1;
18 | }
19 | return n * Factorial(n - 1);
20 | }
21 |
22 | // File creation function
23 | BOOL CreateTestFile(const wchar_t *filename, const wchar_t *content) {
24 | HANDLE hFile = CreateFileW(filename, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS,
25 | FILE_ATTRIBUTE_NORMAL, NULL);
26 |
27 | if (hFile == INVALID_HANDLE_VALUE) {
28 | return FALSE;
29 | }
30 |
31 | DWORD bytesWritten;
32 | BOOL result =
33 | WriteFile(hFile, content, lstrlenW(content), &bytesWritten, NULL);
34 |
35 | CloseHandle(hFile);
36 | return result;
37 | }
38 |
39 | // File reading function
40 | BOOL ReadTestFile(const wchar_t *filename, wchar_t *buffer, DWORD bufferSize) {
41 | HANDLE hFile = CreateFileW(filename, GENERIC_READ, FILE_SHARE_READ, NULL,
42 | OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
43 |
44 | if (hFile == INVALID_HANDLE_VALUE) {
45 | return FALSE;
46 | }
47 |
48 | DWORD bytesRead;
49 | BOOL result = ReadFile(hFile, buffer, bufferSize - 1, &bytesRead, NULL);
50 |
51 | if (result) {
52 | buffer[bytesRead] = '\0';
53 | }
54 |
55 | CloseHandle(hFile);
56 | return result;
57 | }
58 |
59 | // Sleep operation function
60 | void SleepOperation(DWORD milliseconds) { Sleep(milliseconds); }
61 |
62 | // Mutex operation function
63 | void MutexOperation() {
64 | HANDLE hMutex = CreateMutexW(NULL, FALSE, L"OllvmTestMutex");
65 | if (hMutex != NULL) {
66 | DWORD waitResult = WaitForSingleObject(hMutex, 1000);
67 | if (waitResult == WAIT_OBJECT_0) {
68 | // Critical section
69 | LogMessage(L"Mutex acquired");
70 | SleepOperation(100);
71 | ReleaseMutex(hMutex);
72 | LogMessage(L"Mutex released");
73 | }
74 | CloseHandle(hMutex);
75 | }
76 | }
77 |
78 | // Logging function
79 | void LogMessage(const wchar_t *message) {
80 | SYSTEMTIME st;
81 | GetLocalTime(&st);
82 |
83 | wchar_t timestamp[100];
84 | wsprintfW(timestamp, L"[%02d:%02d:%02d.%03d] %s\r\n", st.wHour, st.wMinute,
85 | st.wSecond, st.wMilliseconds, message);
86 |
87 | OutputDebugStringW(timestamp);
88 |
89 | HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
90 | DWORD written;
91 | WriteConsole(hConsole, timestamp, lstrlenW(timestamp), &written, NULL);
92 | }
93 |
94 | // File existence check
95 | BOOL CheckFileExists(const wchar_t *filename) {
96 | DWORD attrib = GetFileAttributesW(filename);
97 | return (attrib != INVALID_FILE_ATTRIBUTES &&
98 | !(attrib & FILE_ATTRIBUTE_DIRECTORY));
99 | }
100 |
101 | // Delete file if it exists
102 | void DeleteFileIfExists(const wchar_t *filename) {
103 | if (CheckFileExists(filename)) {
104 | DeleteFileW(filename);
105 | LogMessage(L"File deleted");
106 | }
107 | }
108 |
109 | // Target function to be obfuscated by OLLVM
110 | void TargetFunction() {
111 | LogMessage(L"Starting target_function");
112 |
113 | // Variables initialization
114 | const wchar_t *testFilename = L"ollvm_test.txt";
115 | const wchar_t *backupFilename = L"ollvm_backup.txt";
116 | int numbers[] = {1, 2, 3, 4, 5, 6, 7, 8};
117 | int numbersSize = 8;
118 | int sum = 0;
119 | wchar_t buffer[1024];
120 | wchar_t messageBuffer[256];
121 |
122 | // Conditional statements
123 | if (CheckFileExists(testFilename)) {
124 | LogMessage(L"Test file already exists");
125 | DeleteFileIfExists(testFilename);
126 | } else {
127 | LogMessage(L"Test file does not exist yet");
128 | }
129 |
130 | // For loop
131 | for (int i = 0; i < numbersSize; i++) {
132 | sum += numbers[i];
133 | if (i % 2 == 0) {
134 | wsprintfW(messageBuffer, L"Processing even index: %d", i);
135 | LogMessage(messageBuffer);
136 | } else {
137 | wsprintfW(messageBuffer, L"Processing odd index: %d", i);
138 | LogMessage(messageBuffer);
139 | }
140 | }
141 |
142 | // Create a test file
143 | wsprintfW(buffer, L"This is a test file for OLLVM. Sum calculated: %d", sum);
144 | if (CreateTestFile(testFilename, buffer)) {
145 | LogMessage(L"Created test file successfully");
146 | } else {
147 | LogMessage(L"Failed to create test file");
148 | return;
149 | }
150 |
151 | // Sleep operation
152 | LogMessage(L"Sleeping for 500ms");
153 | SleepOperation(500);
154 |
155 | // While loop with file operations
156 | int retryCount = 0;
157 | while (retryCount < 3) {
158 | if (ReadTestFile(testFilename, buffer, sizeof(buffer))) {
159 | LogMessage(L"Read file successfully");
160 | break;
161 | }
162 | LogMessage(L"Failed to read file, retrying");
163 | retryCount++;
164 | SleepOperation(100);
165 | }
166 |
167 | // Do-while loop with factorial calculation
168 | int factInput = 5;
169 | int factResult = 0;
170 | do {
171 | factResult = Factorial(factInput);
172 | wsprintfW(messageBuffer, L"Factorial of %d is %d", factInput, factResult);
173 | LogMessage(messageBuffer);
174 | factInput--;
175 | } while (factInput > 0);
176 |
177 | // Mutex operation
178 | MutexOperation();
179 |
180 | // Final cleanup
181 | if (CheckFileExists(testFilename)) {
182 | // Create backup before deleting
183 | if (ReadTestFile(testFilename, buffer, sizeof(buffer))) {
184 | if (CreateTestFile(backupFilename, buffer)) {
185 | LogMessage(L"Created backup file");
186 | }
187 | }
188 | DeleteFileIfExists(testFilename);
189 | }
190 |
191 | LogMessage(L"Completed target_function");
192 | }
193 |
194 | // Main function
195 | int main(int argc, char *argv[]) {
196 | LogMessage(L"Program started");
197 |
198 | TargetFunction();
199 |
200 | LogMessage(L"Program completed successfully");
201 | return 0;
202 | }
--------------------------------------------------------------------------------
/samples/win64/CFF_win64_full.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win64/CFF_win64_full.exe
--------------------------------------------------------------------------------
/samples/win64/deob_CFF_win64.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win64/deob_CFF_win64.bin
--------------------------------------------------------------------------------
/samples/win64/deob_CFF_win64_full.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdong1012/ollvm-unflattener/d0062012b66ab2c845207c733216900d5e99f0a9/samples/win64/deob_CFF_win64_full.bin
--------------------------------------------------------------------------------
/unflattener/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | unflattener API Wrapper
3 | ~~~~~~~~~~~~~~~~~~~
4 |
5 | A wrapper for the unflattener API.
6 |
7 | """
8 |
9 | from .unflattener import *
10 | from .binrewrite import *
--------------------------------------------------------------------------------
/unflattener/__main__.py:
--------------------------------------------------------------------------------
1 | import unflattener
2 | import argparse
3 | import logging as logger
4 |
5 | def main() -> None:
6 | parser = argparse.ArgumentParser(prog='unflattener', description='Python program to unflatten binaries obfuscated by ollvm')
7 | parser.add_argument('-i', '--input', type=str, help='Obfuscated binary path', required=True)
8 | parser.add_argument('-o', '--output', type=str, help='Deobfuscated output binary path', required=True)
9 | parser.add_argument('-t', '--target', type=str, help='Target address (hex) to deobfuscate', required=True)
10 | parser.add_argument('-a', '--all', action='store_true', help='Iteratively deobfuscate all functions called by the target function')
11 |
12 | args = parser.parse_args()
13 |
14 | logger.basicConfig(level=logger.INFO)
15 |
16 | unflat_engine = unflattener.Unflattener(args.input)
17 | try:
18 | target_address = int(args.target, 16)
19 | except:
20 | logger.info('Target address must be a valid hex value')
21 |
22 | patch_data_list = []
23 | if not args.all:
24 | logger.info("Unflattening function {}".format(hex(target_address)))
25 | try:
26 | patch, func_interval = unflat_engine.unflat(target_address)
27 | if patch is not None:
28 | logger.info("Generate patch for {} successfully".format(hex(target_address)))
29 | patch_data_list.append((patch, func_interval))
30 | else:
31 | logger.info("Function {} is not flattened".format(hex(target_address)))
32 | except Exception as e:
33 | logger.info("Fail to unflat function {}".format(hex(target_address)))
34 | else:
35 | patch_data_list += unflat_engine.unflat_follow_calls(target_address, args.output)
36 |
37 | if len(patch_data_list) != 0:
38 | unflat_engine.apply_patches(patch_data_list, args.output)
39 | logger.info("Patch successfully. Deobfuscated binary is written to {}".format(args.output))
40 |
41 | if __name__ == '__main__':
42 | main()
--------------------------------------------------------------------------------
/unflattener/binrewrite.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from miasm.core.asmblock import AsmCFG, AsmBlock
3 | from miasm.ir.symbexec import SymbolicExecutionEngine
4 | from keystone import Ks, KS_ARCH_X86, KS_MODE_32, KS_MODE_64
5 | from miasm.expression.expression import ExprLoc
6 | import re
7 |
8 | class RewriteInstruction:
9 | def __init__(self, instruction: str, old_offset: int, new_offset: int):
10 | """Construction for a rewrite instruction
11 |
12 | Args:
13 | instruction (str): string representation of the instruction
14 | old_offset (int): original offset
15 | new_offset (int): relocated offset
16 | """
17 | self.instruction: str = instruction
18 | self.old_offset: int = old_offset
19 | self.new_offset: int = new_offset
20 | # assemble the new instruction bytes
21 |
22 | if BinaryRewriter.KS._mode == KS_MODE_64 and '[RIP ' in instruction:
23 | # handlee x64 RIP-addressing
24 | self.fix_RIP_addressing()
25 |
26 | instruction_encoding = BinaryRewriter.KS.asm(self.instruction, new_offset)[0]
27 | self.asm: bytes = bytes(instruction_encoding)
28 |
29 | def fix_RIP_addressing(self):
30 | """Fix offset for RIP-addressing instruction
31 | Example: lea rcx, [rip + 0x1234]
32 | keystone does not automatically calculate the RIP for us
33 | and will assemble this wrong with our relocate address
34 | """
35 | rip_pattern_match = re.search(BinaryRewriter.X64_RIP_REGEX_PATTERN, self.instruction)
36 | assert rip_pattern_match is not None
37 | relative_offset = int(rip_pattern_match.group().replace('[RIP ', '').replace(']', '').replace(' ', ''), 16)
38 |
39 | instruction_encoding = BinaryRewriter.KS.asm(self.instruction, self.old_offset)[0]
40 | old_RIP = self.old_offset + len(instruction_encoding)
41 | target_offset = old_RIP + relative_offset
42 | new_RIP = self.new_offset + len(instruction_encoding)
43 |
44 | new_relative_offset = target_offset - new_RIP
45 |
46 | if new_relative_offset >= 0:
47 | rip_addressing_field = '[RIP + {}]'.format(hex(new_relative_offset))
48 | else:
49 | rip_addressing_field = '[RIP - {}]'.format(hex(-new_relative_offset))
50 |
51 | self.instruction = self.instruction.replace(rip_pattern_match.group(), rip_addressing_field)
52 |
53 | def __str__(self) -> str:
54 | """Get the string representation of the rewrite instruction
55 |
56 | Returns:
57 | str: String representation of the rewrite instruction
58 | """
59 | result = hex(self.old_offset) + ': ' + self.instruction
60 | result += '\n\tNew offset: ' + hex(self.new_offset)
61 | return result
62 |
63 | class BinaryRewriter:
64 | JMP_INSTRUCTION_DEFAULT_LEN = 6
65 | KS: Ks = None
66 | X64_RIP_REGEX_PATTERN = r'\[RIP [\+\-] 0x(\d|[A-F])+\]'
67 |
68 | def __init__(self, asmcfg: AsmCFG, arch: str):
69 | """BinaryRewriter constructor
70 |
71 | Args:
72 | asmcfg (AsmCFG): function's AsmCFG
73 | arch (str): binary architecture
74 |
75 | Raises:
76 | Exception: None supported architecture
77 | """
78 |
79 | self.asmcfg: AsmCFG = asmcfg
80 | # reloc offset -> RewriteInstruction
81 | self.rewrite_instructions: defaultdict[int, RewriteInstruction] = defaultdict(RewriteInstruction)
82 |
83 | # original offset -> reloc offset
84 | self.reloc_map: defaultdict[int, int] = defaultdict(int)
85 |
86 | if arch == 'x86_32':
87 | BinaryRewriter.KS = Ks(KS_ARCH_X86, KS_MODE_32)
88 | elif arch == 'x86_64':
89 | BinaryRewriter.KS = Ks(KS_ARCH_X86, KS_MODE_64)
90 | else:
91 | raise Exception('Not supported architecture')
92 |
93 | def init_CFF_data(self, state_order_map: defaultdict, state_to_lockey_map: defaultdict, symbex_engine: SymbolicExecutionEngine):
94 | """Initialize CFF information needed to generate patch
95 |
96 | Args:
97 | state_order_map (defaultdict): State order map
98 | state_to_lockey_map (defaultdict): State lockey map
99 | symbex_engine (SymbolicExecutionEngine): miasm symbolic execution engine
100 | """
101 | self.state_order_map = state_order_map
102 | self.state_to_lockey_map = state_to_lockey_map
103 | self.symbex_engine = symbex_engine
104 |
105 | def reorder_blocks(self, target_address: int):
106 | """Reallocate the blocks in the fixed AsmCFG
107 |
108 | Args:
109 | target_address (int): Function address
110 | """
111 | curr_reloc_address = target_address
112 |
113 | # 1. start from the head, write all head blocks
114 | # queue: list of tuple (current state value, loc_key for current tail block)
115 | process_queue = [0]
116 | processed_state_val_list = []
117 |
118 | while len(process_queue) != 0:
119 | curr_state_val = process_queue.pop()
120 | while True:
121 | if curr_state_val in processed_state_val_list:
122 | # Do not process the same state twice
123 | break
124 | processed_state_val_list.append(curr_state_val)
125 | next_state_vals = self.state_order_map[curr_state_val]
126 |
127 | cond_jump_type = None
128 | next_state_val = 0
129 | # process current state val
130 | for index, state_block_loc in enumerate(self.state_to_lockey_map[curr_state_val]):
131 | state_block: AsmBlock = self.asmcfg.loc_key_to_block(state_block_loc)
132 |
133 | if state_block.lines[-1].name == 'JMP':
134 | # delete the jump to dispatcher
135 | del state_block.lines[-1]
136 |
137 | if len(next_state_vals) == 2:
138 | # conditional block, delete CMOV instruction
139 | for index, instruction in enumerate(state_block.lines):
140 | if 'CMOV' in instruction.name:
141 | cond_jump_type = instruction.name.replace('CMOV', 'J')
142 | del state_block.lines[index]
143 | break
144 |
145 | for instruction in state_block.lines:
146 | instruction_str = str(instruction)
147 | if instruction.name == 'CALL':
148 | # has to resolve the call destination from loc key
149 | if isinstance(instruction.args[0], ExprLoc):
150 | call_dst = int(self.symbex_engine.eval_exprloc(instruction.args[0]))
151 | instruction_str = 'CALL {}'.format(hex(call_dst))
152 |
153 | # relocate the instruction
154 | self.rewrite_instructions[curr_reloc_address] = RewriteInstruction(instruction_str, instruction.offset, curr_reloc_address)
155 |
156 | self.reloc_map[instruction.offset] = curr_reloc_address
157 | if instruction_str[0] == 'J':
158 | # force JMP/JCC instruction to always have length 6
159 | curr_reloc_address += BinaryRewriter.JMP_INSTRUCTION_DEFAULT_LEN
160 | else:
161 | curr_reloc_address += len(self.rewrite_instructions[curr_reloc_address].asm)
162 |
163 | if len(next_state_vals) == 0:
164 | # ret block
165 | break
166 |
167 | if len(next_state_vals) == 1:
168 | # only one next state (non conditional)
169 | next_state_val = next_state_vals[0]
170 |
171 | assert next_state_val in self.state_to_lockey_map
172 | next_state_head_loc = self.state_to_lockey_map[next_state_val][0]
173 | jump_dst = self.asmcfg.loc_db.get_location_offset(next_state_head_loc)
174 |
175 | if jump_dst in self.rewrite_instructions:
176 | # already written before, jump backward
177 | self.rewrite_instructions[curr_reloc_address] = RewriteInstruction('JMP {}'.format(hex(jump_dst)), -1, curr_reloc_address)
178 | curr_reloc_address += BinaryRewriter.JMP_INSTRUCTION_DEFAULT_LEN
179 |
180 | # else just write the next block directly after
181 | elif len(next_state_vals) == 2:
182 | # processing conditional
183 | true_state_val = next_state_vals[0]
184 | false_state_val = next_state_vals[1]
185 | assert true_state_val in self.state_to_lockey_map
186 | assert false_state_val in self.state_to_lockey_map
187 |
188 | # processing false path
189 | false_head_loc = self.state_to_lockey_map[false_state_val][0]
190 | false_dst = self.asmcfg.loc_db.get_location_offset(false_head_loc)
191 | # old offset is -1 here cause we create this instruction out of thin air
192 | # it does not exist in the original instruction
193 | self.rewrite_instructions[curr_reloc_address] = RewriteInstruction('{} {}'.format(cond_jump_type, hex(false_dst)), -1, curr_reloc_address)
194 | curr_reloc_address += BinaryRewriter.JMP_INSTRUCTION_DEFAULT_LEN
195 |
196 | # stash the false state for later traversal
197 | process_queue.append(false_state_val)
198 |
199 | # processing true path
200 | true_head_loc = self.state_to_lockey_map[true_state_val][0]
201 | true_dst = self.asmcfg.loc_db.get_location_offset(true_head_loc)
202 |
203 | if true_dst in self.rewrite_instructions:
204 | # true destination is already written, JMP backward
205 | self.rewrite_instructions[curr_reloc_address] = RewriteInstruction('JMP {}'.format(hex(true_dst)), -1, curr_reloc_address)
206 | curr_reloc_address += BinaryRewriter.JMP_INSTRUCTION_DEFAULT_LEN
207 |
208 | next_state_val = true_state_val
209 |
210 | # if we already processed this state, skip writing the next block
211 | if next_state_val in processed_state_val_list:
212 | # before we terminate this path
213 | # gotta add a JMP to the head block of that state
214 | next_state_head_loc = self.state_to_lockey_map[next_state_val][0]
215 | jump_dst = self.asmcfg.loc_db.get_location_offset(next_state_head_loc)
216 | self.rewrite_instructions[curr_reloc_address] = RewriteInstruction('JMP {}'.format(hex(jump_dst)), -1, curr_reloc_address)
217 | curr_reloc_address += BinaryRewriter.JMP_INSTRUCTION_DEFAULT_LEN
218 | break
219 | curr_state_val = next_state_val
220 |
221 | def generate_patch(self) -> bytes:
222 | """Generate the patch for the current function
223 |
224 | Returns:
225 | bytes: Function patch data
226 | """
227 | patch = b''
228 | # generate a patch for all of the rewrite instructions
229 | for reloc_addr in sorted(self.rewrite_instructions.keys()):
230 | reloc_instruction = self.rewrite_instructions[reloc_addr]
231 |
232 | instruction_str = reloc_instruction.instruction
233 |
234 | # get instruction patch
235 | instruction_patch = None
236 |
237 | # if is a JMP/JCC we manually add
238 | if reloc_instruction.old_offset == -1:
239 | # need to recalculate destination
240 | # because the destination of the JMP/JCC instruction has been relocated
241 | jump_type, destination = instruction_str.split(' ')
242 | destination = int(destination, 16)
243 | instruction_str = '{} {}'.format(jump_type, hex(self.reloc_map[destination]))
244 |
245 | instruction_encoding = BinaryRewriter.KS.asm(instruction_str, reloc_addr)[0]
246 |
247 | instruction_patch = bytes(instruction_encoding)
248 |
249 | # Adding NOPs in case the length is not 6
250 | # I do not wanna deal separating JMP near and JMP short here
251 | instruction_patch += b'\x90' * (self.JMP_INSTRUCTION_DEFAULT_LEN - len(instruction_encoding))
252 | else:
253 | instruction_patch = reloc_instruction.asm
254 |
255 | patch += instruction_patch
256 | return patch
--------------------------------------------------------------------------------
/unflattener/unflattener.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | import pprint
3 | from typing import List
4 | from miasm.core.locationdb import LocationDB
5 | from miasm.analysis.binary import Container
6 | import logging as logger
7 | from miasm.analysis.machine import Machine
8 | from miasm.core.asmblock import AsmCFG, AsmBlock
9 | from miasm.ir.ir import IRCFG
10 | from miasm.arch.ppc.regs import *
11 | from miasm.analysis.simplifier import *
12 | from miasm.expression.expression import *
13 | from miasm.ir.symbexec import SymbolicExecutionEngine
14 | import graphviz
15 | from miasm.arch.x86.arch import instruction_x86, mn_x86
16 | from miasm.arch.x86.disasm import dis_x86_32
17 | from miasm.core.interval import interval
18 | from miasm.loader.elf_init import ELF
19 | from miasm.loader.pe_init import PE
20 | from binrewrite import BinaryRewriter
21 |
22 | def calc_flattening_score(asm_graph: AsmCFG) -> float:
23 | """Function to calculate flatenning score
24 |
25 | https://gist.github.com/mrphrazer/da32217f231e1dd842986f94aa6d9d37#file-flattening_heuristic-py
26 |
27 | Args:
28 | asm_graph (AsmCFG): Function's asm CFG
29 |
30 | Returns:
31 | float: Function's flattening score
32 | """
33 |
34 | # init score
35 | score = 0.0
36 | # walk over all entry nodes in the graph
37 | for head in asm_graph.heads_iter():
38 | # since miasm breaks basic block into multiple ones separated by CALL instruction
39 | # need to move this head to the final successor whose last instruction is not a CALL instruction
40 | # basically the tail of this head block
41 | skipped_head_loc_count = 0
42 | while asm_graph.loc_key_to_block(head).lines[-1].name == 'CALL':
43 | skipped_head_loc_count += 1
44 | head = asm_graph.successors(head)[0]
45 |
46 | # compute dominator tree
47 | dominator_tree = asm_graph.compute_dominator_tree(head)
48 | # walk over all basic blocks
49 | for block in asm_graph.blocks:
50 | # get location key for basic block via basic block address
51 | block_key = asm_graph.loc_db.get_offset_location(block.lines[0].offset)
52 | # get all blocks that are dominated by the current block
53 | dominated = set(
54 | [block_key] + [b for b in dominator_tree.walk_depth_first_forward(block_key)])
55 | # check for a back edge
56 | if not any([b in dominated for b in asm_graph.predecessors(block_key)]):
57 | continue
58 | # calculate relation of dominated blocks to the blocks in the graph
59 | score = max(score, len(dominated)/(len(asm_graph.nodes()) - skipped_head_loc_count))
60 | return score
61 |
62 | class Unflattener:
63 | """
64 | Class for the unflattener engine
65 | """
66 |
67 | def __init__(self, filename: str):
68 | """Constructor for the unflattener engine
69 |
70 | Args:
71 | filename (str): deobfuscated binary path
72 | """
73 |
74 | self.loc_db: LocationDB = LocationDB()
75 | self.container: Container = Container.from_stream(open(filename, 'rb'), self.loc_db)
76 | self.machine: Machine = Machine(self.container.arch)
77 | self.mdis = self.machine.dis_engine(self.container.bin_stream, loc_db=self.loc_db)
78 | self.original_filename: str = filename
79 | self.flatten_func_queue: list = []
80 | self.flatten_func_encountered: list = []
81 |
82 | def unflat(self, target_address: int) -> tuple[bytes, interval]:
83 | """Unflatten the CFG of a function
84 |
85 | Args:
86 | target_address (int): Target function address
87 | Returns:
88 | tuple[bytes, interval]: Function patch & function interval
89 | """
90 |
91 | # get text section range & binary base virtual address
92 | if isinstance(self.container.executable, PE):
93 | text_section_Shdr = self.container.executable.getsectionbyvad(target_address)
94 | self.binary_base_va = self.container.executable.NThdr.ImageBase + (text_section_Shdr.addr - text_section_Shdr.offset)
95 | self.text_section_range = {'lower': self.container.executable.NThdr.ImageBase + text_section_Shdr.addr,
96 | 'upper': self.container.executable.NThdr.ImageBase + text_section_Shdr.addr + text_section_Shdr.size}
97 | elif isinstance(self.container.executable, ELF):
98 | text_section_Shdr = self.container.executable.getsectionbyvad(target_address).sh
99 | self.binary_base_va = text_section_Shdr.addr - text_section_Shdr.offset
100 |
101 | self.text_section_range = {'lower': text_section_Shdr.addr,
102 | 'upper': text_section_Shdr.addr + text_section_Shdr.size}
103 | else:
104 | raise Exception('Unsupported binary type')
105 |
106 | self.asmcfg: AsmCFG = self.mdis.dis_multiblock(target_address)
107 | self.lifter = self.machine.lifter_model_call(self.mdis.loc_db)
108 | self.ircfg: IRCFG = self.lifter.new_ircfg_from_asmcfg(self.asmcfg)
109 | score = calc_flattening_score(self.asmcfg)
110 |
111 | if score < 0.9:
112 | return (None, None)
113 | patch = self.recover_CFG(target_address)
114 | func_interval = interval(block.get_range() for block in self.asmcfg.blocks)
115 | return (patch, func_interval)
116 |
117 | def unflat_follow_calls(self, target_address: int, out_filename: str) -> list[tuple[bytes, interval]]:
118 | """Unflat the target function & all calls to unflat other obfuscated functions
119 |
120 | Args:
121 | target_address (int): Target function address
122 | out_filename (str): Deobfuscated output path
123 | Returns:
124 | list[tuple[bytes, interval]]: List of function patch & function interval
125 | """
126 | self.flatten_func_queue: list[int] = [target_address]
127 | processed_flatten_func_list: list[int] = []
128 |
129 | patch_data_list: list[tuple[bytes, interval]] = []
130 | while len(self.flatten_func_queue) != 0:
131 | flatten_func_addr = self.flatten_func_queue.pop()
132 |
133 | if flatten_func_addr in processed_flatten_func_list:
134 | # do not try to flatten the same function again
135 | continue
136 |
137 | logger.info("Unflattening function {}".format(hex(flatten_func_addr)))
138 | try:
139 | patch, func_interval = self.unflat(flatten_func_addr)
140 | if patch is not None:
141 | logger.info("Generate patch for {} successfully".format(hex(target_address)))
142 | patch_data_list.append((patch, func_interval))
143 | else:
144 | logger.info("Function {} is not flattened".format(hex(target_address)))
145 | except:
146 | logger.info("Fail to unflat function {}".format(hex(flatten_func_addr)))
147 | return patch_data_list
148 |
149 | def render(self, dot_filename: str, image_filename: str):
150 | """Render the function's CFG into a DOT and PNG file
151 |
152 | Args:
153 | dot_filename (str): DOT file path
154 | image_filename (str): PNG file path
155 | """
156 | with open(dot_filename, 'w') as f:
157 | f.write(self.asmcfg.dot())
158 | graphviz.render('dot', 'png', dot_filename, outfile=image_filename)
159 |
160 | def print_block(self, loc_key: LocKey):
161 | """Print a block at the specified location
162 |
163 | Args:
164 | loc_key (LocKey): Location key
165 | """
166 | print('{} {}'.format(str(loc_key), str(self.asmcfg.loc_key_to_block(loc_key))))
167 |
168 | def to_loc_key(self, expr) -> LocKey:
169 | """Convert an expression into a location key
170 |
171 | Args:
172 | expr : Target expression
173 |
174 | Returns:
175 | LocKey: Location key
176 | """
177 | if isinstance(expr, LocKey):
178 | return expr
179 | if isinstance(expr, ExprLoc):
180 | return expr.loc_key
181 | if isinstance(expr, ExprInt):
182 | return self.container.loc_db.get_offset_location(int(expr))
183 | if isinstance(expr, int):
184 | return self.container.loc_db.get_offset_location(expr)
185 | return None
186 |
187 | def find_backbone_blocks(self, predispatcher_loc: LocKey) -> List[LocKey]:
188 | """Find all backbone blocks (blocks with code from the original program)
189 |
190 | Args:
191 | predispatcher_loc (LocKey): predispatcher location key
192 |
193 | Returns:
194 | List[LocKey]: List of backbone location keys
195 | """
196 | backbone_blocks = []
197 | for block_loc in self.asmcfg.predecessors(predispatcher_loc):
198 | # each parent block of the predispatcher is a backbone block
199 | last_predecessor_loc = block_loc
200 | curr_predecessor_loc = self.asmcfg.predecessors(block_loc)[0]
201 | backbone_blocks.append(last_predecessor_loc)
202 |
203 | # traverse upward from each backbone block to find all backbone blocks above it
204 | # this is due to CALL instructions breaking up basic block into multiple ones
205 | while True:
206 | curr_predecessor_block = self.asmcfg.loc_key_to_block(curr_predecessor_loc)
207 | if curr_predecessor_block.lines[-1].name in ['JZ', 'JMP', 'JNZ']:
208 | break
209 | backbone_blocks.append(curr_predecessor_loc)
210 | curr_predecessor_loc = self.asmcfg.predecessors(curr_predecessor_loc)[0]
211 |
212 | # add function's tail (block with no successor) to backbone blocks
213 | for block in self.asmcfg.blocks:
214 | if len(self.asmcfg.successors(block.loc_key)) == 0:
215 | last_tail_loc = block.loc_key
216 | backbone_blocks.append(last_tail_loc)
217 |
218 | # traverse upward from each backbone block to find all backbone blocks above it
219 | # this is due to CALL instructions breaking up basic block into multiple ones
220 | curr_predecessor_tail_loc = self.asmcfg.predecessors(last_tail_loc)[0]
221 | while True:
222 |
223 | curr_predecessor_tail_block = self.asmcfg.loc_key_to_block(curr_predecessor_tail_loc)
224 | if curr_predecessor_tail_block.lines[-1].name in ['JZ', 'JMP', 'JNZ']:
225 | break
226 | backbone_blocks.append(curr_predecessor_tail_loc)
227 | curr_predecessor_tail_loc = self.asmcfg.predecessors(curr_predecessor_tail_loc)[0]
228 |
229 | return backbone_blocks
230 |
231 | def symbex_block(self, symbex_engine: SymbolicExecutionEngine, loc_key: LocKey) -> Expr:
232 | """symbolically executing a block
233 |
234 | Args:
235 | symbex_engine (SymbolicExecutionEngine): Symbolic execution engine
236 | loc_key (LocKey): Location key to execute
237 |
238 | Returns:
239 | Expr: Result symbolic expression
240 | """
241 | curr_block = self.asmcfg.loc_key_to_block(loc_key)
242 |
243 | if curr_block is None:
244 | return symbex_engine.run_block_at(self.ircfg, loc_key)
245 |
246 | # retrieve the cmp/test instruction & cmovcc instruction
247 | cmp_instruction = None
248 | cmov_instruction = None
249 |
250 | for instruction in curr_block.lines:
251 | if instruction.name in ['CMP', 'TEST']:
252 | cmp_instruction = instruction
253 | if 'CMOV' in instruction.name:
254 | cmov_instruction = instruction
255 | break
256 |
257 | if curr_block.lines[-1].name == 'CALL':
258 | # process call regularly but we reset RSP/RBP to old RSP/RBP instead
259 | # of an ExprMem depending on miasm's call_func_stack
260 | # basically overwriting the execution result of the CALL IR instruction.
261 | # Here, we assume that the CALL IR does not impact the stack pointer
262 | original_rsp = symbex_engine.symbols[ExprId('RSP', 64)]
263 | original_rbp = symbex_engine.symbols[ExprId('RBP', 64)]
264 | original_esp = symbex_engine.symbols[ExprId('ESP', 32)]
265 | original_ebp = symbex_engine.symbols[ExprId('EBP', 32)]
266 | result = symbex_engine.run_block_at(self.ircfg, loc_key)
267 | if self.container.arch == 'x86_32':
268 | symbex_engine.symbols[ExprId('ESP', 32)] = original_esp
269 | symbex_engine.symbols[ExprId('EBP', 32)] = original_ebp
270 | elif self.container.arch == 'x86_64':
271 | symbex_engine.symbols[ExprId('RSP', 64)] = original_rsp
272 | symbex_engine.symbols[ExprId('RBP', 64)] = original_rbp
273 | return result
274 |
275 | # is an ollvm condition block if CMP instruction is followed by CMOVCC instruction
276 | if cmov_instruction is not None and cmp_instruction is not None\
277 | and curr_block.lines.index(cmp_instruction) < curr_block.lines.index(cmov_instruction):
278 | curr_loc = loc_key
279 |
280 | while True:
281 | # continue to simulate to check each IR block
282 | # this is because condition-generating instructions (idiv, cmov)
283 | # split a single asm block into multiple IR blocks
284 | curr_ir_block: IRBlock = self.ircfg.get_block(curr_loc)
285 | if curr_ir_block is None:
286 | return symbex_engine.run_block_at(self.ircfg, loc_key)
287 |
288 | for assign_block in curr_ir_block:
289 | # once found the IR assign block for the CMOV instruction
290 | if 'CMOV' in assign_block.instr.name:
291 | # symbex the block as normal
292 | symbex_engine.run_block_at(self.ircfg, curr_loc)
293 |
294 | # NOTE: We don't return the condition produced by symbex_engine.run_block_at here.
295 | # This is because if the condition is deterministic(in a for loop for example)
296 | # symbex_engine.run_block_at will evaluate the cond automatically
297 | # and return ExprInt for the address
298 | # We don't want this as we want to still split the IR path into two
299 | # so we have to get the ExprCond directly from the assign block
300 | cmov_cond_expr = assign_block.values()[-1]
301 |
302 | # example: CMOVNZ -> JNZ
303 | if 'CMOVN' in cmov_instruction.name:
304 | return cmov_cond_expr.copy()
305 |
306 | # example: CMOVZ -> JZ
307 | # need to flip the condition src fields
308 | return ExprCond(cmov_cond_expr._cond.copy(),
309 | cmov_cond_expr._src2.copy(),
310 | cmov_cond_expr._src1.copy())
311 | curr_loc = symbex_engine.run_block_at(self.ircfg, curr_loc)
312 | continue
313 | else:
314 | # just a regular block, symbex normally
315 | return symbex_engine.run_block_at(self.ircfg, loc_key)
316 |
317 | def recover_CFG(self, target_address: int):
318 | """Recover the function's CFG
319 |
320 | Args:
321 | target_address (int): Target function address
322 | """
323 |
324 | # predispatcher is the block with the most number of parents
325 | predispatcher = sorted(self.asmcfg.blocks, key=lambda key: len(self.asmcfg.predecessors(key.loc_key)), reverse=True)[0]
326 | predispatcher_loc = predispatcher.loc_key
327 |
328 | # dispatcher is the only child of the predispatcher
329 | dispatcher_loc = self.asmcfg.successors(predispatcher_loc)[0]
330 |
331 | # backbone: everything that is needed in the final asmcfg (except the head)
332 | backbone_loc_list = self.find_backbone_blocks(predispatcher_loc)
333 |
334 | # state var is the seceond expr in the first instructions of the dispatcher
335 | dispatcher_block = self.asmcfg.loc_key_to_block(dispatcher_loc)
336 | state_var_expr = dispatcher_block.lines[0].get_args_expr()[1]
337 | logger.debug('State var: ' + str(state_var_expr))
338 |
339 | # symbols for symbex
340 | init_symbols = {}
341 | for i, r in enumerate(all_regs_ids):
342 | init_symbols[r] = all_regs_ids_init[i]
343 |
344 | # parent loc -> [children loc]
345 | loc_successors_map = defaultdict(list)
346 |
347 | # exec_queue: queue containing (address/loc to exec, symbex engine symbols, current state value)
348 | exec_queue = []
349 | exec_queue.append((self.to_loc_key(target_address), init_symbols, None))
350 |
351 | # starting state val for traversal
352 | first_state_val = None
353 |
354 | # curr state -> [next state/states]
355 | state_order_map = defaultdict(list)
356 |
357 | # state value -> [loc key/loc keys]
358 | state_to_lockey_map = defaultdict(list)
359 |
360 | # list to track all backbone blocks encountered
361 | backbone_encountered_list = []
362 |
363 | while len(exec_queue) != 0:
364 | # pop a loc_key to start symbex
365 | curr_loc, symbols, curr_state_val = exec_queue.pop()
366 | symbex_engine = SymbolicExecutionEngine(self.lifter, symbols)
367 |
368 | while True:
369 | # if current loc is a backbone block
370 | if curr_loc in backbone_loc_list:
371 | if curr_loc in backbone_encountered_list:
372 | # if we already process all backbones, stop symbex
373 | break
374 | backbone_encountered_list.append(curr_loc)
375 |
376 | # get the current value for the state variable
377 | curr_state_val = int(symbex_engine.eval_expr(state_var_expr))
378 |
379 | # map state val -> [current loc]
380 | if curr_loc not in state_to_lockey_map[curr_state_val]:
381 | state_to_lockey_map[curr_state_val].append(curr_loc)
382 |
383 | # get first state val for later traversal
384 | if first_state_val is None:
385 | first_state_val = curr_state_val
386 |
387 | # predispatcher processing
388 | if curr_loc == predispatcher_loc:
389 | # evaluate next state var
390 | next_state_val = int(symbex_engine.eval_expr(state_var_expr))
391 |
392 | # map curr state val -> next state val
393 | if next_state_val not in state_order_map[curr_state_val]:
394 | state_order_map[curr_state_val].append(next_state_val)
395 |
396 | # reset curr state val
397 | curr_state_val = None
398 |
399 | # for flatten while following calls
400 | # if this block ends with a CALL, extract the call destination and add to self.flatten_func_queue
401 | curr_block = self.asmcfg.loc_key_to_block(curr_loc)
402 | if curr_block is not None:
403 | last_instruction = curr_block.lines[-1]
404 | if last_instruction.name == 'CALL':
405 | destination_loc = symbex_engine.eval_expr(last_instruction.args[0])
406 |
407 | if isinstance(destination_loc, ExprInt):
408 | destination_loc = int(destination_loc)
409 | # only follows calls that are in the .text section only (avoid library calls)
410 | if self.text_section_range['lower'] <= destination_loc <= self.text_section_range['upper']:
411 | if destination_loc not in self.flatten_func_encountered:
412 | self.flatten_func_queue.append(int(destination_loc))
413 | self.flatten_func_encountered.append(destination_loc)
414 |
415 | # symbex block at current loc_key
416 | symbex_expr_result = self.symbex_block(symbex_engine, curr_loc)
417 |
418 | # if reach the end (ret), stop this path traversal
419 | if symbex_expr_result is None:
420 | break
421 |
422 | if isinstance(symbex_expr_result, ExprCond):
423 | # if we reach a conditional expression
424 |
425 | # Evaluate the jump addresses if the branch is taken or not
426 | cond_true = {symbex_expr_result.cond: ExprInt(1, 32)}
427 | cond_false = {symbex_expr_result.cond: ExprInt(0, 32)}
428 | addr_true = expr_simp(
429 | symbex_engine.eval_expr(symbex_expr_result.replace_expr(cond_true), {}))
430 | addr_false = expr_simp(
431 | symbex_engine.eval_expr(symbex_expr_result.replace_expr(cond_false), {}))
432 |
433 | addr_true = self.to_loc_key(addr_true)
434 | addr_false = self.to_loc_key(addr_false)
435 |
436 | # stash false path away
437 | exec_queue.append((addr_false, symbex_engine.symbols.copy(), curr_state_val))
438 |
439 | # map curr loc -> [addr true]
440 | loc_successors_map[curr_loc].append(addr_true)
441 |
442 | # next loc_key we're jumping to
443 | next_loc = addr_true
444 | else:
445 | # find next loc_key we're jumping to
446 | next_loc = expr_simp(symbex_engine.eval_expr(symbex_expr_result))
447 |
448 | # map exec states -> [next_loc]
449 | next_loc = self.to_loc_key(next_loc)
450 | if next_loc not in loc_successors_map[curr_loc]:
451 | loc_successors_map[curr_loc].append(next_loc)
452 |
453 | # update current loc_key to the next loc_key
454 | curr_loc = next_loc
455 |
456 | # logger.info('loc_successors_map')
457 | # pprint.pprint(loc_successors_map)
458 | # logger.info('state order map')
459 | # pprint.pprint(state_order_map)
460 | # logger.info('state to loc_key_map')
461 | # pprint.pprint(state_to_lockey_map)
462 |
463 | # NOTE: not all backbone loc_key is relevant. Only take the ones from state_to_lockey_map
464 | backbone_loc_list = [loc for sublist in state_to_lockey_map.values() for loc in sublist]
465 |
466 | # add prologue blocks to backbone list
467 | state_order_map[0].append(first_state_val)
468 | prologue_tail_loc = None
469 | for block_loc in self.asmcfg.predecessors(dispatcher_loc):
470 | # head block is the other predecessor of dispatcher beside the predispatcher
471 | if block_loc == predispatcher_loc:
472 | continue
473 |
474 | # add head to backbone
475 | prologue_tail_loc = block_loc
476 | backbone_loc_list.append(prologue_tail_loc)
477 | state_to_lockey_map[0].append(prologue_tail_loc)
478 |
479 | # add all prologue blocks above the prologue tail
480 | curr_prologue_loc = prologue_tail_loc
481 | while len(self.asmcfg.predecessors(curr_prologue_loc)) != 0:
482 | prev_prologue_block = self.asmcfg.predecessors(curr_prologue_loc)[0]
483 | backbone_loc_list.append(prev_prologue_block)
484 | state_to_lockey_map[0].append(prev_prologue_block)
485 | curr_prologue_loc = prev_prologue_block
486 | break
487 |
488 | # state value 0 is associated with the prologue blocks
489 |
490 | # since we add from the prologue tail up to the prologue head
491 | # need to flip the order before we reorder the CFG
492 | state_to_lockey_map[0] = state_to_lockey_map[0][::-1]
493 |
494 | # irrelevant blocks are original blocks that are not a backbone block
495 | irrelevant_loc_list = [original_block.loc_key for original_block in self.asmcfg.blocks if original_block.loc_key not in backbone_loc_list]
496 |
497 | # delete all irrelevant blocks
498 | for loc_key in irrelevant_loc_list:
499 | self.asmcfg.del_block(self.asmcfg.loc_key_to_block(loc_key))
500 |
501 | # init BinaryRewriter to reorder the CFG and generate a patch for rewriting
502 | rewriter = BinaryRewriter(self.asmcfg, self.container.arch)
503 | rewriter.init_CFF_data(state_order_map, state_to_lockey_map, symbex_engine)
504 | rewriter.reorder_blocks(target_address)
505 | return rewriter.generate_patch()
506 |
507 | def apply_patches(self, patch_data_list: list[tuple[bytes, interval]], out_filename: str):
508 | """Applying patches to the deobfuscated output file
509 |
510 | Args:
511 | patch_data_list (list[tuple[bytes, interval]]): List of function patches & function intervals
512 | out_filename (str): Deobfuscated output filename
513 |
514 | Returns:
515 | bool: _description_
516 | """
517 | out_file = open(out_filename, 'wb')
518 | in_file = open(self.original_filename, 'rb')
519 | out_file.write(in_file.read())
520 | in_file.close()
521 |
522 | for patch_data in patch_data_list:
523 | patch, func_interval = patch_data
524 | func_start = func_interval.hull()[0]
525 | for i in range(func_interval.hull()[0], func_interval.hull()[1]):
526 | out_file.seek(i - self.binary_base_va)
527 | out_file.write(b"\xCC")
528 | out_file.seek(func_start - self.binary_base_va)
529 | out_file.write(patch)
530 |
531 | out_file.close()
532 |
--------------------------------------------------------------------------------