├── LICENSE
├── README.md
└── vm_str.hpp
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Monokuma
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # vm_str.hpp
2 | vm_str.hpp is a header only C++20 compile time string obfuscator.
3 |
4 | # About
5 | I got this idea floating in my head about trying to do something like xorstr, but with random constexpr-chosen operations instead of just xor.
6 |
7 | At compile time (constexpr), the library generates bytecode representing the obfuscation schema. Then, at runtime, a stack-based VM interprets this bytecode to reconstruct the original string.
8 |
9 | The string is constructed on the stack at runtime and does not appear anywhere in the executable prior to execution.
10 |
11 |
12 |
13 | *How the string construction appears on IDA decompiler.*
14 |
15 | # Use
16 | - `VM_CSTR(...)` to get a pointer to a c-like string.
17 | - `VM_STR(...)` to get a std c++ string.
18 |
19 | - `VM_W_CSTR(...)` to get a pointer to a c-like wide string.
20 | - `VM_W_STR(...)` to get a std c++ wide string.
21 |
22 | ```cpp
23 | #include "vm_str.hpp"
24 |
25 | int main() {
26 | const char *c_like_string = VM_CSTR("Hello, ");
27 | std::string cpp_std_string = VM_STR("World!");
28 | std::cout << c_like_string << cpp_std_string << std::endl;
29 |
30 | const wchar_t *cw_like_string = VM_W_CSTR(L"Hello, ");
31 | std::wstring cpp_std_wstring = VM_W_STR(L"World!");
32 | std::wcout << cw_like_string << cpp_std_wstring << std::endl;
33 | }
34 | ```
35 |
36 | # Features
37 | - Generates different obfuscation schema for every build, making general deobfuscators like [that](https://github.com/yubie-re/ida-jm-xorstr-decrypt-plugin) harder to develop.
38 | - String is constructed on the stack at runtime and does not appear in the `.data` section.
39 | - The string's runtime decryption is purposefully convoluted, making static analysis harder.
40 |
41 | # Data types supported
42 | - [x] char*
43 | - [x] std::string
44 | - [x] wchar_t*
45 | - [x] std::wstring
46 |
47 | See [Limitations](#Limitations)
48 |
49 | # Supported compilers
50 | - [x] msvc
51 |
52 | See [Limitations](#Limitations)
53 |
54 | # Limitations
55 | - We currently support UTF-8 and UTF-16 strings; other encodings are not supported.
56 | - No compiler other than MSVC will be supported.
57 | - Builds with C++ standards earlier than C++20 will fail.
58 | - Build time is highly affected by `vm_str.hpp` since it makes extensive use of `constexpr` evaluations to generate the bytecode. Based on anecdotal data, even a single character can increase build time by ~1 second. Runtime performance does not seem to be significantly affected, though.
59 |
--------------------------------------------------------------------------------
/vm_str.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | namespace VmStr {
10 |
11 | #define VMSTR_INLINE __forceinline
12 |
13 | #define BYTECODE_MAX_SIZE 1000
14 | #define STACK_MAX_SIZE 200
15 |
16 | namespace Util {
17 | constexpr int digit_to_int(char c) { return c - '0'; }
18 |
19 | template
20 | inline void _memcpy(volatile void *dst, const volatile void *src,
21 | std::size_t size) {
22 | auto *d = static_cast(dst);
23 | auto *s = static_cast(src);
24 | for (std::size_t i = 0; i < size / sizeof(T); ++i)
25 | d[i] = s[i];
26 | }
27 |
28 | constexpr uint32_t rand_int(uint32_t seed) {
29 | uint32_t val = seed;
30 | val = (1103515245u * val + 12345u) & 0x7FFFFFFFu;
31 | return val;
32 | }
33 | } // namespace Util
34 |
35 | namespace Global {
36 |
37 | constexpr char _time[] = __TIME__;
38 |
39 | constexpr int seed =
40 | Util::digit_to_int(_time[7]) + Util::digit_to_int(_time[6]) * 10 +
41 | Util::digit_to_int(_time[4]) * 60 + Util::digit_to_int(_time[3]) * 600 +
42 | Util::digit_to_int(_time[1]) * 3600 + Util::digit_to_int(_time[0]) * 36000;
43 |
44 | consteval std::array ET1(uint8_t key) {
45 | std::array ret{};
46 | for (size_t i = 0; i < BYTECODE_MAX_SIZE - 1; ++i) {
47 | uint8_t val = Util::rand_int(Global::seed + i) % 255;
48 | ret[i] = val ^ key;
49 | }
50 | return ret;
51 | }
52 |
53 | consteval std::array ET2() {
54 | std::array ret{};
55 | for (size_t i = 0; i < BYTECODE_MAX_SIZE - 1; ++i) {
56 | uint8_t val = Util::rand_int(Global::seed + i) % 255;
57 | ret[i] = val;
58 | }
59 | return ret;
60 | }
61 |
62 | constexpr uint8_t GK = Util::rand_int(Global::seed + 0xdeadbeef);
63 |
64 | static std::array _ET1 = ET1(GK);
65 | constexpr std::array _ET2 = ET2();
66 |
67 | std::array _ET3{};
68 |
69 | } // namespace Global
70 |
71 | namespace Expression {
72 |
73 | #define DEFINE_EXPR(name, body) \
74 | template \
75 | constexpr T cexpr_##name(T n, [[maybe_unused]] T d) body \
76 | \
77 | template \
78 | VMSTR_INLINE T name(T n, [[maybe_unused]] T d) body
79 |
80 | DEFINE_EXPR(_xor, { return n ^ d; })
81 | DEFINE_EXPR(_not, { return ~n; })
82 |
83 | DEFINE_EXPR(rotl, {
84 | constexpr T INT_BITS = std::numeric_limits::digits;
85 | d %= INT_BITS;
86 | return (n << d) | (n >> (INT_BITS - d));
87 | })
88 |
89 | DEFINE_EXPR(rotr, {
90 | constexpr T INT_BITS = std::numeric_limits::digits;
91 | d %= INT_BITS;
92 | return (n >> d) | (n << (INT_BITS - d));
93 | })
94 |
95 | template VMSTR_INLINE T alt_xor(T a, T b) {
96 | return ((((((~a | ~b) ^ -~a) + (2 * ((~a | ~b) & -~a))) & b) +
97 | ((((~a | ~b) ^ -~a) + (2 * ((~a | ~b) & -~a))) | b)) -
98 | ((((~a & ~b) & b) + ((~a & ~b) | b)) - ~a));
99 | }
100 |
101 | template VMSTR_INLINE T alt_not(T a, T b) {
102 | return (((~~a | ((((~a | b) - ~a) + ((a & ~b) + b)) -
103 | (((~a | b) - ~a) + ((a & ~b) + b)))) -
104 | ~~a) +
105 | (~a | ((((~a | b) - ~a) + ((a & ~b) + b)) -
106 | (((~a | b) - ~a) + ((a & ~b) + b)))));
107 | }
108 |
109 | } // namespace Expression
110 |
111 | namespace Vm {
112 |
113 | enum OP : uint8_t {
114 | PUSH,
115 | XOR,
116 | NOT,
117 | ROTR,
118 | ROTL,
119 |
120 | ALT_XOR,
121 | ALT_NOT,
122 |
123 | END,
124 |
125 | TERMINATOR,
126 | };
127 |
128 | #define INSERT(val) \
129 | ret[vip] = static_cast(val); \
130 | vip++;
131 |
132 | #define K static_cast(table[vip])
133 |
134 | template
135 | constexpr auto gen_bytecode(const ST (&str)[N]) {
136 | std::array ret{};
137 | auto table = Global::_ET2;
138 |
139 | int vip = 0;
140 | for (size_t i = 0; i < N - 1; ++i) {
141 | OP op = static_cast(Util::rand_int(Global::seed + i) %
142 | (static_cast(OP::END)));
143 | if (op == OP::XOR) {
144 | T key = static_cast(Util::rand_int(Global::seed + i) %
145 | std::numeric_limits::max());
146 | T val = Expression::cexpr__xor(str[i], key);
147 | INSERT(OP::PUSH);
148 | INSERT(val ^ K);
149 | INSERT(OP::PUSH);
150 | INSERT(static_cast(key) ^ K);
151 | INSERT(OP::XOR);
152 | continue;
153 | } else if (op == OP::NOT) {
154 | T val = Expression::cexpr__not(str[i], 0);
155 | INSERT(OP::PUSH);
156 | INSERT(val ^ K);
157 | INSERT(OP::NOT);
158 | continue;
159 | } else if (op == OP::ROTR) {
160 | T key = static_cast(Util::rand_int(Global::seed + i) %
161 | std::numeric_limits::max());
162 | T val = Expression::cexpr_rotr(str[i], key);
163 | INSERT(OP::PUSH);
164 | INSERT(val ^ K);
165 | INSERT(OP::PUSH);
166 | INSERT(static_cast(key) ^ K);
167 | INSERT(OP::ROTL);
168 | continue;
169 | } else if (op == OP::ROTL) {
170 | T key = static_cast(Util::rand_int(Global::seed + i) %
171 | std::numeric_limits::max());
172 | T val = Expression::cexpr_rotl(str[i], key);
173 | INSERT(OP::PUSH);
174 | INSERT(val ^ K);
175 | INSERT(OP::PUSH);
176 | INSERT(static_cast(key) ^ K);
177 | INSERT(OP::ROTR);
178 | continue;
179 | } else if (op == OP::ALT_XOR) {
180 | T key = static_cast(Util::rand_int(Global::seed + i) %
181 | std::numeric_limits::max());
182 | T val = Expression::cexpr__xor(str[i], key);
183 | INSERT(OP::PUSH);
184 | INSERT(val ^ K);
185 | INSERT(OP::PUSH);
186 | INSERT(static_cast(key) ^ K);
187 | INSERT(OP::ALT_XOR);
188 | continue;
189 | } else if (op == OP::ALT_NOT) {
190 | auto val = Expression::cexpr__not(str[i], 0);
191 | INSERT(OP::PUSH);
192 | INSERT(val ^ K);
193 | INSERT(OP::ALT_NOT);
194 | continue;
195 | }
196 |
197 | INSERT(OP::PUSH);
198 | INSERT(str[i] ^ K);
199 | }
200 |
201 | ret[vip] = OP::TERMINATOR;
202 |
203 | return ret;
204 | }
205 |
206 | #define CHAIN(offset) run(vsp, stack)
207 | #define VSP(x) x + opaque_var
208 |
209 | template
210 | VMSTR_INLINE void run(volatile size_t &vsp, volatile T *stack) {
211 | volatile uint8_t opaque_var = Expression::alt_xor(
212 | Global::_ET1[vip + 1], Global::_ET1[vip + 1]);
213 | constexpr OP op = static_cast(bytecode[vip]);
214 | if constexpr (op == OP::PUSH) {
215 | Global::_ET3[vip + 1] = Global::_ET1[vip + 1];
216 | Global::_ET3[vip + 1] =
217 | Expression::alt_xor(Global::_ET3[vip + 1], Global::GK);
218 | volatile uint8_t _key = Global::_ET3[vip + 1];
219 | T imm = bytecode[vip + 1] ^ static_cast(_key);
220 | Util::_memcpy(stack + vsp, &imm, sizeof(T));
221 | vsp += VSP(1);
222 | CHAIN(2);
223 | } else if constexpr (op == OP::XOR) {
224 | volatile T imm = stack[vsp - 2];
225 | volatile T key = stack[vsp - 1];
226 | vsp -= VSP(2);
227 | T xor_val = Expression::_xor(imm, (key + opaque_var));
228 | Util::_memcpy(stack + vsp, &xor_val, sizeof(T));
229 | vsp += VSP(1);
230 | CHAIN(1);
231 | } else if constexpr (op == OP::NOT) {
232 | volatile T imm = stack[vsp - 1];
233 | vsp -= VSP(1);
234 | volatile T not_val = Expression::_not(imm + opaque_var, 0);
235 | Util::_memcpy(stack + vsp, ¬_val, sizeof(T));
236 | vsp += VSP(1);
237 | CHAIN(1);
238 | } else if constexpr (op == OP::ROTR) {
239 | volatile T imm = stack[vsp - 2];
240 | volatile T key = stack[vsp - 1];
241 | vsp -= VSP(2);
242 | volatile T rotr_val = Expression::rotr(imm, key + opaque_var);
243 | Util::_memcpy(stack + vsp, &rotr_val, sizeof(T));
244 | vsp += VSP(1);
245 | CHAIN(1);
246 | } else if constexpr (op == OP::ROTL) {
247 | volatile T imm = stack[vsp - 2];
248 | volatile T key = stack[vsp - 1];
249 | vsp -= VSP(2);
250 | volatile T rotl_val = Expression::rotl(imm, key + opaque_var);
251 | Util::_memcpy(stack + vsp, &rotl_val, sizeof(T));
252 | vsp += VSP(1);
253 | CHAIN(1);
254 | } else if constexpr (op == OP::ALT_XOR) {
255 | volatile T imm = stack[vsp - 2];
256 | volatile T key = stack[vsp - 1];
257 | vsp -= VSP(2);
258 | volatile T mbaxor_val = Expression::alt_xor(imm, key + opaque_var);
259 | Util::_memcpy(stack + vsp, &mbaxor_val, sizeof(T));
260 | vsp += VSP(1);
261 | CHAIN(1);
262 | } else if constexpr (op == OP::ALT_NOT) {
263 | volatile T imm = stack[vsp - 1];
264 | vsp -= VSP(1);
265 | volatile T not_val = Expression::alt_not(imm + opaque_var, imm);
266 | Util::_memcpy(stack + vsp, ¬_val, sizeof(T));
267 | vsp += VSP(1);
268 | CHAIN(1);
269 | } else if constexpr (op == OP::TERMINATOR) {
270 | T terminator = 0x0;
271 | Util::_memcpy(stack + vsp, &terminator, sizeof(T));
272 | return;
273 | } else {
274 | assert(false && "Invalid opcode");
275 | }
276 | }
277 |
278 | template VMSTR_INLINE std::string exec_str() {
279 | volatile size_t vsp = 0;
280 | volatile uint8_t stack[STACK_MAX_SIZE];
281 | run(vsp, stack);
282 |
283 | uint8_t const *t = const_cast(stack);
284 | const char *t2 = reinterpret_cast(t);
285 | auto str = std::string(t2);
286 |
287 | return str;
288 | }
289 |
290 | template VMSTR_INLINE const char *exec_cstr() {
291 | volatile size_t vsp = 0;
292 | static volatile uint8_t stack[STACK_MAX_SIZE];
293 | run(vsp, stack);
294 |
295 | uint8_t const *t = const_cast(stack);
296 | const char *t2 = reinterpret_cast(t);
297 |
298 | return t2;
299 | }
300 |
301 | template VMSTR_INLINE std::wstring exec_wstr() {
302 | volatile size_t vsp = 0;
303 | volatile uint16_t stack[STACK_MAX_SIZE];
304 | run(vsp, stack);
305 |
306 | uint16_t const *t = const_cast(stack);
307 | const wchar_t *t2 = reinterpret_cast(t);
308 | auto str = std::wstring(t2);
309 |
310 | return str;
311 | }
312 |
313 | template VMSTR_INLINE const wchar_t *exec_cwstr() {
314 | volatile size_t vsp = 0;
315 | static volatile uint16_t stack[STACK_MAX_SIZE];
316 | run(vsp, stack);
317 |
318 | uint16_t const *t = const_cast(stack);
319 | const wchar_t *t2 = reinterpret_cast(t);
320 |
321 | return t2;
322 | }
323 |
324 | } // namespace Vm
325 | } // namespace VmStr
326 |
327 | #define VM_STR(str) \
328 | VmStr::Vm::exec_str(str)>()
329 | #define VM_CSTR(str) \
330 | VmStr::Vm::exec_cstr(str)>()
331 |
332 | #define VM_W_STR(wstr) \
333 | VmStr::Vm::exec_wstr(wstr)>()
334 | #define VM_W_CSTR(wstr) \
335 | VmStr::Vm::exec_cwstr(wstr)>()
336 |
--------------------------------------------------------------------------------