├── LICENSE ├── README.md └── vm_str.hpp /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Monokuma 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # vm_str.hpp 2 | vm_str.hpp is a header only C++20 compile time string obfuscator. 3 | 4 | # About 5 | I got this idea floating in my head about trying to do something like xorstr, but with random constexpr-chosen operations instead of just xor. 6 | 7 | At compile time (constexpr), the library generates bytecode representing the obfuscation schema. Then, at runtime, a stack-based VM interprets this bytecode to reconstruct the original string. 8 | 9 | The string is constructed on the stack at runtime and does not appear anywhere in the executable prior to execution. 10 | 11 | Captura de tela 2025-08-06 190129 12 | 13 | *How the string construction appears on IDA decompiler.* 14 | 15 | # Use 16 | - `VM_CSTR(...)` to get a pointer to a c-like string. 17 | - `VM_STR(...)` to get a std c++ string. 18 | 19 | - `VM_W_CSTR(...)` to get a pointer to a c-like wide string. 20 | - `VM_W_STR(...)` to get a std c++ wide string. 21 | 22 | ```cpp 23 | #include "vm_str.hpp" 24 | 25 | int main() { 26 | const char *c_like_string = VM_CSTR("Hello, "); 27 | std::string cpp_std_string = VM_STR("World!"); 28 | std::cout << c_like_string << cpp_std_string << std::endl; 29 | 30 | const wchar_t *cw_like_string = VM_W_CSTR(L"Hello, "); 31 | std::wstring cpp_std_wstring = VM_W_STR(L"World!"); 32 | std::wcout << cw_like_string << cpp_std_wstring << std::endl; 33 | } 34 | ``` 35 | 36 | # Features 37 | - Generates different obfuscation schema for every build, making general deobfuscators like [that](https://github.com/yubie-re/ida-jm-xorstr-decrypt-plugin) harder to develop. 38 | - String is constructed on the stack at runtime and does not appear in the `.data` section. 39 | - The string's runtime decryption is purposefully convoluted, making static analysis harder. 40 | 41 | # Data types supported 42 | - [x] char* 43 | - [x] std::string 44 | - [x] wchar_t* 45 | - [x] std::wstring 46 | 47 | See [Limitations](#Limitations) 48 | 49 | # Supported compilers 50 | - [x] msvc 51 | 52 | See [Limitations](#Limitations) 53 | 54 | # Limitations 55 | - We currently support UTF-8 and UTF-16 strings; other encodings are not supported. 56 | - No compiler other than MSVC will be supported. 57 | - Builds with C++ standards earlier than C++20 will fail. 58 | - Build time is highly affected by `vm_str.hpp` since it makes extensive use of `constexpr` evaluations to generate the bytecode. Based on anecdotal data, even a single character can increase build time by ~1 second. Runtime performance does not seem to be significantly affected, though. 59 | -------------------------------------------------------------------------------- /vm_str.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace VmStr { 10 | 11 | #define VMSTR_INLINE __forceinline 12 | 13 | #define BYTECODE_MAX_SIZE 1000 14 | #define STACK_MAX_SIZE 200 15 | 16 | namespace Util { 17 | constexpr int digit_to_int(char c) { return c - '0'; } 18 | 19 | template 20 | inline void _memcpy(volatile void *dst, const volatile void *src, 21 | std::size_t size) { 22 | auto *d = static_cast(dst); 23 | auto *s = static_cast(src); 24 | for (std::size_t i = 0; i < size / sizeof(T); ++i) 25 | d[i] = s[i]; 26 | } 27 | 28 | constexpr uint32_t rand_int(uint32_t seed) { 29 | uint32_t val = seed; 30 | val = (1103515245u * val + 12345u) & 0x7FFFFFFFu; 31 | return val; 32 | } 33 | } // namespace Util 34 | 35 | namespace Global { 36 | 37 | constexpr char _time[] = __TIME__; 38 | 39 | constexpr int seed = 40 | Util::digit_to_int(_time[7]) + Util::digit_to_int(_time[6]) * 10 + 41 | Util::digit_to_int(_time[4]) * 60 + Util::digit_to_int(_time[3]) * 600 + 42 | Util::digit_to_int(_time[1]) * 3600 + Util::digit_to_int(_time[0]) * 36000; 43 | 44 | consteval std::array ET1(uint8_t key) { 45 | std::array ret{}; 46 | for (size_t i = 0; i < BYTECODE_MAX_SIZE - 1; ++i) { 47 | uint8_t val = Util::rand_int(Global::seed + i) % 255; 48 | ret[i] = val ^ key; 49 | } 50 | return ret; 51 | } 52 | 53 | consteval std::array ET2() { 54 | std::array ret{}; 55 | for (size_t i = 0; i < BYTECODE_MAX_SIZE - 1; ++i) { 56 | uint8_t val = Util::rand_int(Global::seed + i) % 255; 57 | ret[i] = val; 58 | } 59 | return ret; 60 | } 61 | 62 | constexpr uint8_t GK = Util::rand_int(Global::seed + 0xdeadbeef); 63 | 64 | static std::array _ET1 = ET1(GK); 65 | constexpr std::array _ET2 = ET2(); 66 | 67 | std::array _ET3{}; 68 | 69 | } // namespace Global 70 | 71 | namespace Expression { 72 | 73 | #define DEFINE_EXPR(name, body) \ 74 | template \ 75 | constexpr T cexpr_##name(T n, [[maybe_unused]] T d) body \ 76 | \ 77 | template \ 78 | VMSTR_INLINE T name(T n, [[maybe_unused]] T d) body 79 | 80 | DEFINE_EXPR(_xor, { return n ^ d; }) 81 | DEFINE_EXPR(_not, { return ~n; }) 82 | 83 | DEFINE_EXPR(rotl, { 84 | constexpr T INT_BITS = std::numeric_limits::digits; 85 | d %= INT_BITS; 86 | return (n << d) | (n >> (INT_BITS - d)); 87 | }) 88 | 89 | DEFINE_EXPR(rotr, { 90 | constexpr T INT_BITS = std::numeric_limits::digits; 91 | d %= INT_BITS; 92 | return (n >> d) | (n << (INT_BITS - d)); 93 | }) 94 | 95 | template VMSTR_INLINE T alt_xor(T a, T b) { 96 | return ((((((~a | ~b) ^ -~a) + (2 * ((~a | ~b) & -~a))) & b) + 97 | ((((~a | ~b) ^ -~a) + (2 * ((~a | ~b) & -~a))) | b)) - 98 | ((((~a & ~b) & b) + ((~a & ~b) | b)) - ~a)); 99 | } 100 | 101 | template VMSTR_INLINE T alt_not(T a, T b) { 102 | return (((~~a | ((((~a | b) - ~a) + ((a & ~b) + b)) - 103 | (((~a | b) - ~a) + ((a & ~b) + b)))) - 104 | ~~a) + 105 | (~a | ((((~a | b) - ~a) + ((a & ~b) + b)) - 106 | (((~a | b) - ~a) + ((a & ~b) + b))))); 107 | } 108 | 109 | } // namespace Expression 110 | 111 | namespace Vm { 112 | 113 | enum OP : uint8_t { 114 | PUSH, 115 | XOR, 116 | NOT, 117 | ROTR, 118 | ROTL, 119 | 120 | ALT_XOR, 121 | ALT_NOT, 122 | 123 | END, 124 | 125 | TERMINATOR, 126 | }; 127 | 128 | #define INSERT(val) \ 129 | ret[vip] = static_cast(val); \ 130 | vip++; 131 | 132 | #define K static_cast(table[vip]) 133 | 134 | template 135 | constexpr auto gen_bytecode(const ST (&str)[N]) { 136 | std::array ret{}; 137 | auto table = Global::_ET2; 138 | 139 | int vip = 0; 140 | for (size_t i = 0; i < N - 1; ++i) { 141 | OP op = static_cast(Util::rand_int(Global::seed + i) % 142 | (static_cast(OP::END))); 143 | if (op == OP::XOR) { 144 | T key = static_cast(Util::rand_int(Global::seed + i) % 145 | std::numeric_limits::max()); 146 | T val = Expression::cexpr__xor(str[i], key); 147 | INSERT(OP::PUSH); 148 | INSERT(val ^ K); 149 | INSERT(OP::PUSH); 150 | INSERT(static_cast(key) ^ K); 151 | INSERT(OP::XOR); 152 | continue; 153 | } else if (op == OP::NOT) { 154 | T val = Expression::cexpr__not(str[i], 0); 155 | INSERT(OP::PUSH); 156 | INSERT(val ^ K); 157 | INSERT(OP::NOT); 158 | continue; 159 | } else if (op == OP::ROTR) { 160 | T key = static_cast(Util::rand_int(Global::seed + i) % 161 | std::numeric_limits::max()); 162 | T val = Expression::cexpr_rotr(str[i], key); 163 | INSERT(OP::PUSH); 164 | INSERT(val ^ K); 165 | INSERT(OP::PUSH); 166 | INSERT(static_cast(key) ^ K); 167 | INSERT(OP::ROTL); 168 | continue; 169 | } else if (op == OP::ROTL) { 170 | T key = static_cast(Util::rand_int(Global::seed + i) % 171 | std::numeric_limits::max()); 172 | T val = Expression::cexpr_rotl(str[i], key); 173 | INSERT(OP::PUSH); 174 | INSERT(val ^ K); 175 | INSERT(OP::PUSH); 176 | INSERT(static_cast(key) ^ K); 177 | INSERT(OP::ROTR); 178 | continue; 179 | } else if (op == OP::ALT_XOR) { 180 | T key = static_cast(Util::rand_int(Global::seed + i) % 181 | std::numeric_limits::max()); 182 | T val = Expression::cexpr__xor(str[i], key); 183 | INSERT(OP::PUSH); 184 | INSERT(val ^ K); 185 | INSERT(OP::PUSH); 186 | INSERT(static_cast(key) ^ K); 187 | INSERT(OP::ALT_XOR); 188 | continue; 189 | } else if (op == OP::ALT_NOT) { 190 | auto val = Expression::cexpr__not(str[i], 0); 191 | INSERT(OP::PUSH); 192 | INSERT(val ^ K); 193 | INSERT(OP::ALT_NOT); 194 | continue; 195 | } 196 | 197 | INSERT(OP::PUSH); 198 | INSERT(str[i] ^ K); 199 | } 200 | 201 | ret[vip] = OP::TERMINATOR; 202 | 203 | return ret; 204 | } 205 | 206 | #define CHAIN(offset) run(vsp, stack) 207 | #define VSP(x) x + opaque_var 208 | 209 | template 210 | VMSTR_INLINE void run(volatile size_t &vsp, volatile T *stack) { 211 | volatile uint8_t opaque_var = Expression::alt_xor( 212 | Global::_ET1[vip + 1], Global::_ET1[vip + 1]); 213 | constexpr OP op = static_cast(bytecode[vip]); 214 | if constexpr (op == OP::PUSH) { 215 | Global::_ET3[vip + 1] = Global::_ET1[vip + 1]; 216 | Global::_ET3[vip + 1] = 217 | Expression::alt_xor(Global::_ET3[vip + 1], Global::GK); 218 | volatile uint8_t _key = Global::_ET3[vip + 1]; 219 | T imm = bytecode[vip + 1] ^ static_cast(_key); 220 | Util::_memcpy(stack + vsp, &imm, sizeof(T)); 221 | vsp += VSP(1); 222 | CHAIN(2); 223 | } else if constexpr (op == OP::XOR) { 224 | volatile T imm = stack[vsp - 2]; 225 | volatile T key = stack[vsp - 1]; 226 | vsp -= VSP(2); 227 | T xor_val = Expression::_xor(imm, (key + opaque_var)); 228 | Util::_memcpy(stack + vsp, &xor_val, sizeof(T)); 229 | vsp += VSP(1); 230 | CHAIN(1); 231 | } else if constexpr (op == OP::NOT) { 232 | volatile T imm = stack[vsp - 1]; 233 | vsp -= VSP(1); 234 | volatile T not_val = Expression::_not(imm + opaque_var, 0); 235 | Util::_memcpy(stack + vsp, ¬_val, sizeof(T)); 236 | vsp += VSP(1); 237 | CHAIN(1); 238 | } else if constexpr (op == OP::ROTR) { 239 | volatile T imm = stack[vsp - 2]; 240 | volatile T key = stack[vsp - 1]; 241 | vsp -= VSP(2); 242 | volatile T rotr_val = Expression::rotr(imm, key + opaque_var); 243 | Util::_memcpy(stack + vsp, &rotr_val, sizeof(T)); 244 | vsp += VSP(1); 245 | CHAIN(1); 246 | } else if constexpr (op == OP::ROTL) { 247 | volatile T imm = stack[vsp - 2]; 248 | volatile T key = stack[vsp - 1]; 249 | vsp -= VSP(2); 250 | volatile T rotl_val = Expression::rotl(imm, key + opaque_var); 251 | Util::_memcpy(stack + vsp, &rotl_val, sizeof(T)); 252 | vsp += VSP(1); 253 | CHAIN(1); 254 | } else if constexpr (op == OP::ALT_XOR) { 255 | volatile T imm = stack[vsp - 2]; 256 | volatile T key = stack[vsp - 1]; 257 | vsp -= VSP(2); 258 | volatile T mbaxor_val = Expression::alt_xor(imm, key + opaque_var); 259 | Util::_memcpy(stack + vsp, &mbaxor_val, sizeof(T)); 260 | vsp += VSP(1); 261 | CHAIN(1); 262 | } else if constexpr (op == OP::ALT_NOT) { 263 | volatile T imm = stack[vsp - 1]; 264 | vsp -= VSP(1); 265 | volatile T not_val = Expression::alt_not(imm + opaque_var, imm); 266 | Util::_memcpy(stack + vsp, ¬_val, sizeof(T)); 267 | vsp += VSP(1); 268 | CHAIN(1); 269 | } else if constexpr (op == OP::TERMINATOR) { 270 | T terminator = 0x0; 271 | Util::_memcpy(stack + vsp, &terminator, sizeof(T)); 272 | return; 273 | } else { 274 | assert(false && "Invalid opcode"); 275 | } 276 | } 277 | 278 | template VMSTR_INLINE std::string exec_str() { 279 | volatile size_t vsp = 0; 280 | volatile uint8_t stack[STACK_MAX_SIZE]; 281 | run(vsp, stack); 282 | 283 | uint8_t const *t = const_cast(stack); 284 | const char *t2 = reinterpret_cast(t); 285 | auto str = std::string(t2); 286 | 287 | return str; 288 | } 289 | 290 | template VMSTR_INLINE const char *exec_cstr() { 291 | volatile size_t vsp = 0; 292 | static volatile uint8_t stack[STACK_MAX_SIZE]; 293 | run(vsp, stack); 294 | 295 | uint8_t const *t = const_cast(stack); 296 | const char *t2 = reinterpret_cast(t); 297 | 298 | return t2; 299 | } 300 | 301 | template VMSTR_INLINE std::wstring exec_wstr() { 302 | volatile size_t vsp = 0; 303 | volatile uint16_t stack[STACK_MAX_SIZE]; 304 | run(vsp, stack); 305 | 306 | uint16_t const *t = const_cast(stack); 307 | const wchar_t *t2 = reinterpret_cast(t); 308 | auto str = std::wstring(t2); 309 | 310 | return str; 311 | } 312 | 313 | template VMSTR_INLINE const wchar_t *exec_cwstr() { 314 | volatile size_t vsp = 0; 315 | static volatile uint16_t stack[STACK_MAX_SIZE]; 316 | run(vsp, stack); 317 | 318 | uint16_t const *t = const_cast(stack); 319 | const wchar_t *t2 = reinterpret_cast(t); 320 | 321 | return t2; 322 | } 323 | 324 | } // namespace Vm 325 | } // namespace VmStr 326 | 327 | #define VM_STR(str) \ 328 | VmStr::Vm::exec_str(str)>() 329 | #define VM_CSTR(str) \ 330 | VmStr::Vm::exec_cstr(str)>() 331 | 332 | #define VM_W_STR(wstr) \ 333 | VmStr::Vm::exec_wstr(wstr)>() 334 | #define VM_W_CSTR(wstr) \ 335 | VmStr::Vm::exec_cwstr(wstr)>() 336 | --------------------------------------------------------------------------------