├── AllocaFixer.cpp ├── AllocaFixer.hpp ├── CFFlattenInfo.cpp ├── CFFlattenInfo.hpp ├── Config.hpp ├── DefUtil.cpp ├── DefUtil.hpp ├── HexRaysDeob.sln ├── HexRaysDeob.vcxproj ├── HexRaysDeob.vcxproj.filters ├── HexRaysDeob.vcxproj.user ├── HexRaysUtil.cpp ├── HexRaysUtil.hpp ├── LICENSE ├── MicrocodeExplorer.cpp ├── MicrocodeExplorer.hpp ├── PatternDeobfuscate.cpp ├── PatternDeobfuscate.hpp ├── PatternDeobfuscateUtil.cpp ├── PatternDeobfuscateUtil.hpp ├── README.md ├── TargetUtil.cpp ├── TargetUtil.hpp ├── Unflattener.cpp ├── Unflattener.hpp ├── bin ├── IDA71_32 │ └── HexRaysDeob.dll └── IDA72_32 │ └── HexRaysDeob.dll ├── main.cpp ├── makefile └── makefile.lnx /AllocaFixer.cpp: -------------------------------------------------------------------------------- 1 | // This file tries to fix the stack pointer differentials at call sites for 2 | // alloca(). Basically, this binary uses a GCC-like argument-passing schema 3 | // like sub esp, 4 / mov [esp], eax, except that the "sub esp, 4" is 4 | // implemented as a call to __alloca_probe. IDA usually handles these calls 5 | // remarkably well for ordinary compiled binaries, but this obfuscator doesn't 6 | // produce ordinary binaries. Thus, IDA's typical analysis fails to determine 7 | // the integer values passed to __alloca_probe, and hence also does not change 8 | // the stack pointer accordingly. (Note also that the binary also creates stack 9 | // buffers with this technique, not just function arguments.) 10 | 11 | // However, the decompiler is able to determine the integer parameters to 12 | // __alloca_probe. Thus, we examine all cross-references to __alloca_probe, 13 | // decompile the referring functions, extract the arguments, and use them 14 | // to set stack pointer differentials on the addresses after the calls. 15 | 16 | #include 17 | 18 | #include 19 | #include "HexRaysUtil.hpp" 20 | #include 21 | #include "Config.hpp" 22 | 23 | // Finds calls to alloca in a function's decompilation microcode, and 24 | // records the integer parameter from each call site. 25 | struct AllocaFixer : minsn_visitor_t 26 | { 27 | // Results are stored here 28 | std::vector > m_FixupLocations; 29 | 30 | int visit_minsn(void) 31 | { 32 | // Only process calls to alloca 33 | if (curins->opcode != m_call || curins->l.t != mop_h || qstrcmp(curins->l.helper, "alloca") != 0) 34 | return 0; 35 | 36 | // Sanity check that the microinstruction's operand is a list of arguments 37 | if (curins->d.t != mop_f) 38 | { 39 | msg("[E] %a: Call to alloca()'s d operand was unexpectedly %s\n", curins->ea, mopt_t_to_string(curins->r.t)); 40 | return 0; 41 | } 42 | 43 | // Sanity check that the microinstruction's argument list is not null 44 | #if IDA_SDK_VERSION == 710 45 | mfuncinfo_t *func = curins->d.f; 46 | #elif IDA_SDK_VERSION >= 720 47 | mcallinfo_t *func = curins->d.f; 48 | #endif 49 | if (func == NULL) 50 | { 51 | msg("[E] %a: curins->d.f was NULL?", curins->ea); 52 | return 0; 53 | } 54 | 55 | // Sanity check that the call to alloca passes one argument 56 | #if IDA_SDK_VERSION == 710 57 | mfuncargs_t &args = func->args; 58 | #elif IDA_SDK_VERSION >= 720 59 | mcallargs_t &args = func->args; 60 | #endif 61 | if (args.size() != 1) 62 | { 63 | msg("[E] Call to alloca had %d arguments instead of 1?\n", args.size()); 64 | return 0; 65 | } 66 | 67 | // We can only fix the call site if its parameter is a constant number 68 | if (args[0].t != mop_n) 69 | { 70 | msg("[E] Call to alloca did not have a constant number; type was %s\n", mopt_t_to_string(args[0].t)); 71 | return 0; 72 | } 73 | 74 | // Everything went according to plan. Save the call to alloca's address and integer parameter. 75 | m_FixupLocations.push_back(std::pair(curins->ea, args[0].nnn->value)); 76 | return 0; 77 | } 78 | }; 79 | 80 | // Find all calls to __alloca_probe, extract the parameters, and update the 81 | // stack pointer differentials. 82 | void FixCallsToAllocaProbe() 83 | { 84 | ea_t eaAlloca = get_name_ea(BADADDR, "__alloca_probe"); 85 | if (eaAlloca == BADADDR) 86 | { 87 | msg("[E] Couldn't find __alloca_probe\n"); 88 | return; 89 | } 90 | 91 | // Collect up all functions (as func_t * objects) that call __alloca_probe. 92 | std::set funcsCallingAlloca; 93 | xrefblk_t xr; 94 | bool bFirst = true; 95 | 96 | // Examine all addresses that reference __alloca_probe, and collect their 97 | // func_t * containing function objects. 98 | while (bFirst ? xr.first_to(eaAlloca, XREF_FAR) : xr.next_to()) 99 | { 100 | bFirst = false; 101 | if (xr.type != fl_CN) 102 | continue; 103 | 104 | func_t *f = get_func(xr.from); 105 | if (f == NULL) 106 | { 107 | msg("[E] Call to alloca from %a is not within a function; will not be processed\n", xr.from); 108 | continue; 109 | } 110 | 111 | funcsCallingAlloca.insert(f); 112 | } 113 | 114 | // For each function that calls __alloca_probe(), extract the address of 115 | // each such call and its integer argument. Set a stack pointer delta at 116 | // that address with that value. 117 | for (auto f : funcsCallingAlloca) 118 | { 119 | // Decompile the function 120 | mba_ranges_t mbr(f); 121 | hexrays_failure_t hf; 122 | mbl_array_t *mba = gen_microcode(mbr, &hf); 123 | if (mba == NULL) 124 | { 125 | msg("[E] FixCallsToAllocaProbe(%a): decompilation failed (%s)\n", f->start_ea, hf.desc().c_str()); 126 | continue; 127 | } 128 | 129 | // Extract the alloca information by visiting its top-level instructions 130 | AllocaFixer af; 131 | mba->for_all_insns(af); 132 | 133 | // We own the mbl_array_t produced by gen_microcode, so we have to delete it. 134 | delete mba; 135 | 136 | // For each location that references alloca... 137 | for (auto g : af.m_FixupLocations) 138 | { 139 | // Set the stack point on the *subsequent* EA (thanks, Hex-Rays!) 140 | ea_t eaNext = get_item_end(g.first); 141 | msg("[I] Adding auto stack point at %a: %d\n", eaNext, -g.second); 142 | // ... fix its stack pointer differential 143 | if (!add_auto_stkpnt(f, eaNext, -g.second)) 144 | { 145 | msg("[E] Couldn't change stack delta to %d at %a\n", -g.second, g.first); 146 | // YOLO 147 | add_user_stkpnt(eaNext, -g.second); 148 | } 149 | } 150 | 151 | // Force re-analysis of the function 152 | reanalyze_function(f); 153 | } 154 | } 155 | 156 | -------------------------------------------------------------------------------- /AllocaFixer.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | void FixCallsToAllocaProbe(); 4 | 5 | -------------------------------------------------------------------------------- /CFFlattenInfo.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "HexRaysUtil.hpp" 3 | #include "CFFlattenInfo.hpp" 4 | #include "Config.hpp" 5 | 6 | #define MIN_NUM_COMPARISONS 2 7 | 8 | extern std::set g_BlackList; 9 | extern std::set g_WhiteList; 10 | 11 | 12 | static int debugmsg(const char *fmt, ...) 13 | { 14 | #if UNFLATTENVERBOSE 15 | va_list va; 16 | va_start(va, fmt); 17 | return vmsg(fmt, va); 18 | #endif 19 | return 0; 20 | } 21 | 22 | // This method determines whether a given function is likely obfuscated. It 23 | // does this by ensuring that: 24 | // 1) Some minimum number of comparisons are made against the "comparison 25 | // variable" 26 | // 2) The constant values used in the comparisons are sufficiently entropic. 27 | bool JZInfo::ShouldBlacklist() 28 | { 29 | // This check is pretty weak. I thought I could set the minimum number to 30 | // 6, but the pattern deobfuscators might eliminate some of them before 31 | // this function gets called. 32 | if (nSeen < MIN_NUM_COMPARISONS) 33 | { 34 | #if UNFLATTENVERBOSE 35 | debugmsg("[I] Blacklisting due to lack of JZ/JG comparisons (%d < minimum of %d)\n", nSeen, MIN_NUM_COMPARISONS); 36 | #endif 37 | return true; 38 | }; 39 | 40 | // Count the number of 1-bits in the constant values used for comparison 41 | int iNumBits = 0; 42 | int iNumOnes = 0; 43 | for (auto num : nums) 44 | { 45 | iNumBits += num->size * 8; 46 | uint64 v = num->nnn->value; 47 | for (int i = 0; i < num->size * 8; ++i) 48 | { 49 | if (v & (1 << i)) 50 | ++iNumOnes; 51 | } 52 | } 53 | 54 | // Compute the percentage of 1-bits. Given that these constants seem to be 55 | // created pseudorandomly, the percentage should be roughly 1/2. 56 | float fEntropy = iNumBits == 0 ? 0.0 : (float)iNumOnes / (float(iNumBits)); 57 | #if UNFLATTENVERBOSE 58 | debugmsg("[I] %d comparisons, %d numbers, %d bits, %d ones, %f entropy\n", 59 | nSeen, 60 | nums.size(), 61 | iNumBits, 62 | iNumOnes, 63 | fEntropy); 64 | #endif 65 | 66 | // We'll give 10% leeway on the 50% expectation. 67 | if (fEntropy < 0.4 || fEntropy > 0.6) 68 | { 69 | warning("[I] Entropy %f indicates this function is not obfuscated\n", fEntropy); 70 | return true; 71 | } 72 | return false; 73 | } 74 | 75 | 76 | // This class looks for jz/jg comparisons against constant values. For each 77 | // thing being compared, we use a JZInfo structure to collect the number of 78 | // times it's been used in a comparison, and a list of the values it was 79 | // compared against. 80 | struct JZCollector : public minsn_visitor_t 81 | { 82 | std::vector m_SeenComparisons; 83 | int m_nMaxJz; 84 | 85 | JZCollector() : m_nMaxJz(-1) {}; 86 | 87 | int visit_minsn(void) 88 | { 89 | // We're looking for jz/jg instructions... 90 | if (curins->opcode != m_jz && curins->opcode != m_jg) 91 | return 0; 92 | 93 | // ... which compare something against a number ... 94 | if (curins->r.t != mop_n) 95 | return 0; 96 | 97 | int iFound = 0; 98 | mop_t *thisMop = &curins->l; 99 | 100 | int idxFound = 0; 101 | // Search for the comparison operand in the saved information 102 | for (auto &sc : m_SeenComparisons) 103 | { 104 | if (equal_mops_ignore_size(*sc.op, *thisMop)) 105 | { 106 | // If found, update the counter and save the number 107 | sc.nSeen += 1; 108 | sc.nums.push_back(&curins->r); 109 | iFound = sc.nSeen; 110 | break; 111 | } 112 | ++idxFound; 113 | } 114 | 115 | // If we didn't find it in the vector, create a new JZInfo structure 116 | if (!iFound) 117 | { 118 | m_SeenComparisons.emplace_back(); 119 | JZInfo &jz = m_SeenComparisons.back(); 120 | jz.op = thisMop; 121 | jz.nSeen = 1; 122 | jz.nums.push_back(&curins->r); 123 | iFound = 1; 124 | } 125 | 126 | // If the variable we just saw has been used more often than the previous 127 | // candidate, mark this variable as the new candidate 128 | if (m_nMaxJz < 0 || iFound > m_SeenComparisons[m_nMaxJz].nSeen) 129 | m_nMaxJz = idxFound; 130 | 131 | return 0; 132 | } 133 | }; 134 | 135 | // This function finds the "first" block immediately before the control flow 136 | // flattening dispatcher begins. The logic is simple; start at the beginning 137 | // of the function, keep moving forward until the next block has more than one 138 | // predecessor. As it happens, this is where the assignment to the switch 139 | // dispatch variable takes place, and that's mostly why we want it. 140 | // The information is recorded in the arguments iFirst and iDispatch. 141 | mblock_t *GetFirstBlock(mbl_array_t *mba, int &iFirst, int &iDispatch) 142 | { 143 | // Initialise iFirst and iDispatch to erroneous values 144 | iFirst = -1, iDispatch = -1; 145 | 146 | mblock_t *mb; 147 | int iCurr = 0; 148 | 149 | while (true) 150 | { 151 | // If we find a block with more than one successor, we failed. 152 | mb = mba->get_mblock(iCurr); 153 | if (mb->nsucc() != 1) 154 | { 155 | #if UNFLATTENVERBOSE 156 | debugmsg("[E] Block %d had %d (!= 1) successors\n", iCurr, mb->nsucc()); 157 | #endif 158 | return NULL; 159 | } 160 | 161 | // Get the successor block 162 | int iSucc = mb->succ(0); 163 | mblock_t *mNextBlock = mba->get_mblock(iSucc); 164 | 165 | // If the successor has more than one predecessor, we're done 166 | if (mNextBlock->npred() != 1) 167 | break; 168 | 169 | // Otherwise, move onto the next block 170 | iCurr = iSucc; 171 | } 172 | // We found it; pass the information back to the caller 173 | iFirst = iCurr; 174 | iDispatch = mb->succ(0); 175 | return mb; 176 | } 177 | 178 | // This class is used to find all variables that have 32-bit numeric values 179 | // assigned to them in the first block (as well as the values that are 180 | // assigned to them). 181 | struct BlockInsnAssignNumberExtractor : public minsn_visitor_t 182 | { 183 | std::vector > m_SeenAssignments; 184 | int visit_minsn() 185 | { 186 | // We're looking for MOV(const.4,x) 187 | if (curins->opcode != m_mov || curins->l.t != mop_n || curins->l.size != 4) 188 | return 0; 189 | 190 | // Record all such information in the vector 191 | m_SeenAssignments.push_back(std::pair(&curins->d, curins->l.nnn->value)); 192 | return 0; 193 | } 194 | }; 195 | 196 | // Protected functions might use either one, or two, variables for the switch 197 | // dispatch number. If it uses two, one of them is the "update" variable, whose 198 | // contents will be copied into the "comparison" variable in the first dispatch 199 | // block. This class is used to locate the "update" variable, by simply looking 200 | // for a variable whose contents are copied into the "comparison" variable, 201 | // which must have had a number assigned to it in the first block. 202 | struct HandoffVarFinder : public minsn_visitor_t 203 | { 204 | // We're looking for assignments to this variable 205 | mop_t *m_ComparisonVar; 206 | 207 | // These are the numeric assignments from the first block 208 | std::vector > &m_SeenAssignments; 209 | 210 | // This information is generated by this class. Namely, it's a list of 211 | // variables that are seen copied into the comparison variable, as well 212 | // as a count of the number of times it is copied. 213 | std::vector > m_SeenCopies; 214 | 215 | HandoffVarFinder(mop_t *opMax, std::vector > &assignments) : 216 | m_ComparisonVar(opMax), 217 | m_SeenAssignments(assignments) 218 | {}; 219 | 220 | int visit_minsn(void) 221 | { 222 | // We want copies into our comparison variable 223 | if (curins->opcode != m_mov || !equal_mops_ignore_size(curins->d, *m_ComparisonVar)) 224 | return 0; 225 | 226 | // Iterate through the numeric assignments from the first block. These 227 | // are our candidates. 228 | for (auto &as : m_SeenAssignments) 229 | { 230 | if (equal_mops_ignore_size(curins->l, *as.first)) 231 | { 232 | // If we found a copy into our comparison variable from a 233 | // variable that was assigned to a constant in the first block, 234 | // add it to the vector (or increment its counter if it was 235 | // already there). 236 | bool bFound = false; 237 | for (auto sc : m_SeenCopies) 238 | { 239 | if (equal_mops_ignore_size(*as.first, *sc.first)) 240 | { 241 | sc.second += 1; 242 | bFound = true; 243 | } 244 | } 245 | if (!bFound) 246 | m_SeenCopies.push_back(std::pair(as.first, 1)); 247 | } 248 | } 249 | return 0; 250 | } 251 | }; 252 | 253 | // Once we know which variable is the one used for comparisons, look for all 254 | // jz instructions that compare a number against this variable. This then tells 255 | // us which number corresponds to which basic block. 256 | struct JZMapper : public minsn_visitor_t 257 | { 258 | std::map &m_KeyToBlock; 259 | std::map &m_BlockToKey; 260 | mop_t *m_CompareVar; 261 | mop_t *m_AssignVar; 262 | int m_DispatchBlockNo; 263 | JZMapper(mop_t *mc, mop_t *ma, int iFirst, std::map &map, std::map &map2) : 264 | m_CompareVar(mc), 265 | m_AssignVar(ma), 266 | m_DispatchBlockNo(iFirst), 267 | m_KeyToBlock(map), 268 | m_BlockToKey(map2) {}; 269 | 270 | int visit_minsn(void) 271 | { 272 | // We're looking for jz instructions that compare a number ... 273 | if (curins->opcode != m_jz || curins->r.t != mop_n) 274 | return 0; 275 | 276 | // ... against our comparison variable ... 277 | if (!equal_mops_ignore_size(*m_CompareVar, curins->l)) 278 | { 279 | // ... or, if it's the dispatch block, possibly the assignment variable ... 280 | if (blk->serial != m_DispatchBlockNo || !equal_mops_ignore_size(*m_AssignVar, curins->l)) 281 | return 0; 282 | } 283 | 284 | // ... and the destination of the jz must be a block 285 | if(curins->d.t != mop_b) 286 | return 0; 287 | 288 | #if UNFLATTENVERBOSE 289 | debugmsg("[I] Inserting %08lx->%d into map\n", (uint32)curins->r.nnn->value, curins->d.b); 290 | #endif 291 | // Record the information in two maps 292 | uint64 keyVal = curins->r.nnn->value; 293 | int blockNo = curins->d.b; 294 | 295 | m_KeyToBlock[keyVal] = blockNo; 296 | m_BlockToKey[blockNo] = keyVal; 297 | return 0; 298 | } 299 | }; 300 | 301 | // Compute dominator information for the function. 302 | array_of_bitsets *ComputeDominators(mbl_array_t *mba) 303 | { 304 | int iNumBlocks = mba->qty; 305 | assert(iNumBlocks >= 1); 306 | 307 | // Use Hex-Rays' handy array_of_bitsets to represent dominators 308 | array_of_bitsets *domInfo = new array_of_bitsets; 309 | domInfo->resize(iNumBlocks); 310 | 311 | // Per the algorithm, initialize each block to be dominated by every block 312 | for (auto &bs : *domInfo) 313 | bs.fill_with_ones(iNumBlocks - 1); 314 | 315 | // ... except the first block, which only dominates itself 316 | domInfo->front().clear(); 317 | domInfo->front().add(0); 318 | 319 | // Now we've got a standard, not-especially-optimized dataflow analysis 320 | // fixedpoint computation... 321 | bool bChanged; 322 | do 323 | { 324 | bChanged = false; 325 | // For every block... 326 | for (int i = 1; i < iNumBlocks; ++i) 327 | { 328 | // Grab its current dataflow value and copy it 329 | bitset_t &bsCurr = domInfo->at(i); 330 | bitset_t bsBefore(bsCurr); 331 | 332 | // Get that block from the graph 333 | mblock_t *blockI = mba->get_mblock(i); 334 | 335 | // Iterate over its predecessors, intersecting their dataflow 336 | // values against this one's values 337 | for (int j = 0; j < blockI->npred(); ++j) 338 | bsCurr.intersect(domInfo->at(blockI->pred(j))); 339 | 340 | // Then, re-indicate that the block dominates itself 341 | bsCurr.add(i); 342 | 343 | // If this process changed the dataflow information, we're going to 344 | // need another iteration 345 | bChanged |= bsBefore != bsCurr; 346 | } 347 | } 348 | // Keep going until the dataflow information stops changing 349 | while (bChanged); 350 | 351 | // The dominator information has been computed. Now we're going to derive 352 | // some information from it. Namely, the current representation tells us, 353 | // for each block, which blocks dominate it. We want to know, instead, for 354 | // each block, which blocks are dominated by it. This is a simple 355 | // transformation; for each block b and dominator d, update the information 356 | // for d to indicate that it dominates b. 357 | 358 | // Create a new array_of_bitsets 359 | array_of_bitsets *domInfoOutput = new array_of_bitsets; 360 | domInfoOutput->resize(iNumBlocks); 361 | 362 | // Iterate over each block 363 | for (int i = 0; i < iNumBlocks; ++i) 364 | { 365 | // Get the dominator information for this block (b) 366 | bitset_t &bsCurr = domInfo->at(i); 367 | 368 | // For each block d that dominates this one, mark that d dominates b 369 | for (auto it = bsCurr.begin(); it != bsCurr.end(); bsCurr.inc(it)) 370 | domInfoOutput->at(*it).add(i); 371 | } 372 | 373 | // Don't need the original dominator information anymore; get rid of it 374 | delete domInfo; 375 | 376 | // Just return the inverted dominator information 377 | return domInfoOutput; 378 | } 379 | 380 | // Convenience function to look up a block number by its key. This way, we can 381 | // write the iterator-end check once, so clients don't have to do it. 382 | int CFFlattenInfo::FindBlockByKey(uint64 key) 383 | { 384 | if (m_KeyToBlock.find(key) == m_KeyToBlock.end()) 385 | return -1; 386 | return m_KeyToBlock[key]; 387 | } 388 | 389 | // This function computes all of the preliminary information needed for 390 | // unflattening. 391 | bool CFFlattenInfo::GetAssignedAndComparisonVariables(mblock_t *blk) 392 | { 393 | // Erase any existing information in this structure. 394 | Clear(true); 395 | 396 | // Ensure that this function hasn't been blacklisted (e.g. because entropy 397 | // calculation indicates that it isn't obfuscated). 398 | mbl_array_t *mba = blk->mba; 399 | if (g_BlackList.find(mba->entry_ea) != g_BlackList.end()) 400 | return false; 401 | 402 | // There's also a separate whitelist for functions that were previously 403 | // seen to be obfuscated. 404 | bool bWasWhitelisted = g_WhiteList.find(mba->entry_ea) != g_WhiteList.end(); 405 | 406 | // Look for the variable that was used in the largest number of jz/jg 407 | // comparisons against a constant. This is our "comparison" variable. 408 | JZCollector jzc; 409 | mba->for_all_topinsns(jzc); 410 | if (jzc.m_nMaxJz < 0) 411 | { 412 | // If there were no comparisons and we haven't seen this function 413 | // before, blacklist it. 414 | #if UNFLATTENVERBOSE 415 | debugmsg("[I] No comparisons seen; failed\n"); 416 | #endif 417 | if (!bWasWhitelisted) 418 | g_BlackList.insert(mba->entry_ea); 419 | return false; 420 | } 421 | 422 | // Otherwise, we were able to find jz comparison information. Use that to 423 | // determine if the constants look entropic enough. If not, blacklist this 424 | // function. If so, whitelist it. 425 | if (!bWasWhitelisted) 426 | { 427 | if (jzc.m_SeenComparisons[jzc.m_nMaxJz].ShouldBlacklist()) 428 | { 429 | g_BlackList.insert(mba->entry_ea); 430 | return false; 431 | } 432 | g_WhiteList.insert(mba->entry_ea); 433 | } 434 | 435 | // opMax is our "comparison" variable used in the control flow switch. 436 | mop_t *opMax = jzc.m_SeenComparisons[jzc.m_nMaxJz].op; 437 | 438 | // Find the "first" block in the function, the one immediately before the 439 | // control flow switch. 440 | mblock_t *first = GetFirstBlock(mba, this->iFirst, this->iDispatch); 441 | if (first == NULL) 442 | { 443 | #if UNFLATTENVERBOSE 444 | debugmsg("[E] Can't find top-level block in function\n"); 445 | #endif 446 | return false; 447 | } 448 | 449 | // Get all variables assigned to numbers in the first block. If we find the 450 | // comparison variable in there, then the assignment and comparison 451 | // variables are the same. If we don't, then there are two separate 452 | // variables. 453 | BlockInsnAssignNumberExtractor fbe; 454 | first->for_all_insns(fbe); 455 | 456 | // Was the comparison variable assigned a number in the first block? 457 | bool bFound = false; 458 | for (auto as : fbe.m_SeenAssignments) 459 | { 460 | if (equal_mops_ignore_size(*as.first, *opMax)) 461 | { 462 | bFound = true; 463 | break; 464 | } 465 | } 466 | 467 | // This is the "assignment" variable, whose value is updated by the switch 468 | // case code 469 | mop_t *localOpAssigned; 470 | 471 | // If the "comparison" variable was assigned a number in the first block, 472 | // then the function is only using one variable, not two, for dispatch. 473 | if (bFound) 474 | localOpAssigned = opMax; 475 | 476 | // Otherwise, look for assignments of one of the variables assigned a 477 | // number in the first block to the comparison variable 478 | else 479 | { 480 | // For all variables assigned a number in the first block, find all 481 | // assignments throughout the function to the comparison variable 482 | HandoffVarFinder hvf(opMax, fbe.m_SeenAssignments); 483 | mba->for_all_topinsns(hvf); 484 | 485 | // There should have only been one of them; is that true? 486 | if (hvf.m_SeenCopies.size() != 1) 487 | { 488 | #if UNFLATTENVERBOSE 489 | debugmsg("[E] Comparison var was copied from %d assigned-to-constant variables, not 1 as expected\n", hvf.m_SeenCopies.size()); 490 | for (auto sc : hvf.m_SeenCopies) 491 | debugmsg("\t%s (%d copies)\n", mopt_t_to_string(sc.first->t), sc.second); 492 | #endif 493 | return false; 494 | } 495 | 496 | // If only one variable (X) assigned a number in the first block was 497 | // ever copied into the comparison variable, then X is our "assignment" 498 | // variable. 499 | localOpAssigned = hvf.m_SeenCopies[0].first; 500 | 501 | // Find the number that was assigned to the assignment variable in the 502 | // first block. 503 | bool bFound = false; 504 | for (auto as : fbe.m_SeenAssignments) 505 | { 506 | if (equal_mops_ignore_size(*as.first, *localOpAssigned)) 507 | { 508 | uFirst = as.second; 509 | bFound = true; 510 | break; 511 | } 512 | } 513 | if (!bFound) 514 | { 515 | debugmsg("[E] ??? couldn't find assignment to assignment variable?\n"); 516 | return false; 517 | } 518 | } 519 | // Make copies of the comparison and assignment variables so we don't run 520 | // into liveness issues 521 | this->opCompared = new mop_t(*opMax); 522 | this->opAssigned = new mop_t(*localOpAssigned); 523 | 524 | // Extract the key-to-block mapping for each JZ against the comparison 525 | // variable 526 | JZMapper jzm(opCompared, localOpAssigned, iDispatch, m_KeyToBlock, m_BlockToKey); 527 | mba->for_all_topinsns(jzm); 528 | 529 | // Save off the current function's starting EA 530 | m_WhichFunc = mba->entry_ea; 531 | 532 | // Compute the dominator information for this function and stash it 533 | array_of_bitsets *ab = ComputeDominators(mba); 534 | m_DomInfo = ab; 535 | 536 | // Compute some more information from the dominators. Basically, once the 537 | // control flow dispatch switch has transferred control to the function's 538 | // code, there might be multiple basic blocks that can execute before 539 | // control goes back to the switch statement. For all of those blocks, we 540 | // want to know the "first" block as part of that region of the graph, 541 | // i.e., the one targeted by a jump out of the control flow dispatch 542 | // switch. 543 | 544 | // Allocate an array mapping each basic block to the block that dominates 545 | // it and was targeted by the control flow switch. 546 | int *DominatedClusters = new int[mba->qty]; 547 | memset(DominatedClusters, 0xFF, sizeof(int)*mba->qty); 548 | 549 | // For each block/key pair (the targets of the control flow switch) 550 | for (auto bk : m_BlockToKey) 551 | { 552 | int i = bk.first; 553 | // For each block dominated by this control flow switch target, mark 554 | // that this block its the beginning of its cluster. 555 | for (auto it = ab->at(i).begin(); it != ab->at(i).end(); ab->at(i).inc(it)) 556 | DominatedClusters[*it] = i; 557 | } 558 | 559 | // Save that information off. 560 | m_DominatedClusters = DominatedClusters; 561 | 562 | // Ready to go! 563 | return true; 564 | } 565 | 566 | -------------------------------------------------------------------------------- /CFFlattenInfo.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | struct JZInfo 5 | { 6 | JZInfo() : op(NULL) {}; 7 | 8 | mop_t *op; 9 | int nSeen; 10 | std::vector nums; 11 | 12 | bool ShouldBlacklist(); 13 | }; 14 | 15 | struct CFFlattenInfo 16 | { 17 | mop_t *opAssigned, *opCompared; 18 | uint64 uFirst; 19 | int iFirst, iDispatch; 20 | std::map m_KeyToBlock; 21 | std::map m_BlockToKey; 22 | ea_t m_WhichFunc; 23 | array_of_bitsets *m_DomInfo; 24 | int *m_DominatedClusters; 25 | 26 | int FindBlockByKey(uint64 key); 27 | void Clear(bool bFree) 28 | { 29 | if (bFree && opAssigned != NULL) 30 | delete opAssigned; 31 | opAssigned = NULL; 32 | 33 | if (bFree && opCompared != NULL) 34 | delete opCompared; 35 | opCompared = NULL; 36 | 37 | iFirst = -1; 38 | iDispatch = -1; 39 | uFirst = 0LL; 40 | m_WhichFunc = BADADDR; 41 | if (bFree && m_DomInfo != NULL) 42 | delete m_DomInfo; 43 | m_DomInfo = NULL; 44 | 45 | if (bFree && m_DominatedClusters != NULL) 46 | delete m_DominatedClusters; 47 | m_DominatedClusters = NULL; 48 | 49 | m_KeyToBlock.clear(); 50 | m_BlockToKey.clear(); 51 | }; 52 | CFFlattenInfo() { Clear(false); } 53 | ~CFFlattenInfo() { Clear(true); } 54 | bool GetAssignedAndComparisonVariables(mblock_t *blk); 55 | }; 56 | -------------------------------------------------------------------------------- /Config.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define DO_OPTIMIZATION 1 4 | #define VERBOSE 0 5 | #define OPTVERBOSE 0 6 | #define UNFLATTENVERBOSE 0 7 | #define UNFLATTENDEBUG 0 8 | -------------------------------------------------------------------------------- /DefUtil.cpp: -------------------------------------------------------------------------------- 1 | #define USE_DANGEROUS_FUNCTIONS 2 | #include 3 | #include "HexRaysUtil.hpp" 4 | #include "DefUtil.hpp" 5 | #include "Config.hpp" 6 | 7 | static int debugmsg(const char *fmt, ...) 8 | { 9 | #if UNFLATTENVERBOSE 10 | va_list va; 11 | va_start(va, fmt); 12 | return vmsg(fmt, va); 13 | #endif 14 | return 0; 15 | } 16 | 17 | // Put an mop_t into an mlist_t. The op must be either a register or a stack 18 | // variable. 19 | bool InsertOp(mblock_t *mb, mlist_t &ml, mop_t *op) 20 | { 21 | if (op->t != mop_r && op->t != mop_S) 22 | return false; 23 | 24 | // I needed help from Hex-Rays with this line. Some of the example plugins 25 | // showed how to insert a register into an mlist_t. None of them showed 26 | // how to insert a stack variable. I figured out a way to do it by reverse 27 | // engineering Hex-Rays, but it seemed really janky. This is The Official 28 | // Method (TM). 29 | mb->append_use_list(&ml, *op, MUST_ACCESS); 30 | return true; 31 | 32 | // For posterity, here was what I came up with on my own for inserting a 33 | // stack variable into an mlist_t: 34 | /* 35 | ivl_t ivl(op->s->off | MAX_SUPPORTED_STACK_SIZE, op->size); 36 | ml.mem.add(ivl); 37 | */ 38 | } 39 | 40 | // Ilfak sent me this function in response to a similar support request. It 41 | // walks backwards through a block, instruction-by-instruction, looking at 42 | // what each instruction defines. It stops when it finds definitions for 43 | // everything in the mlist_t, or when it hits the beginning of the block. 44 | minsn_t *my_find_def_backwards(mblock_t *mb, mlist_t &ml, minsn_t *start) 45 | { 46 | minsn_t *mend = mb->head; 47 | for (minsn_t *p = start != NULL ? start : mb->tail; p != NULL; p = p->prev) 48 | { 49 | mlist_t def = mb->build_def_list(*p, MAY_ACCESS | FULL_XDSU); 50 | if (def.includes(ml)) 51 | return p; 52 | } 53 | return NULL; 54 | } 55 | 56 | // This is a nearly identical version of the function above, except it works 57 | // in the forward direction rather than backwards. 58 | minsn_t *my_find_def_forwards(mblock_t *mb, mlist_t &ml, minsn_t *start) 59 | { 60 | minsn_t *mend = mb->head; 61 | for (minsn_t *p = start != NULL ? start : mb->head; p != NULL; p = p->next) 62 | { 63 | mlist_t def = mb->build_def_list(*p, MAY_ACCESS | FULL_XDSU); 64 | if (def.includes(ml)) 65 | return p; 66 | } 67 | return NULL; 68 | 69 | } 70 | 71 | // This function has way too many arguments. Basically, it's a wrapper around 72 | // my_find_def_backwards from above. It is extended in the following ways: 73 | // * If my_find_def_backwards identifies a definition of the variable "op" 74 | // which is an assignment from another variable, this function then continues 75 | // looking for numeric assignments to that variable (and recursively so, if 76 | // that variable is in turn assigned from another variable). 77 | // * It keeps a list of all the assignment instructions it finds along the way, 78 | // storing them in the vector passed as the "chain" argument. 79 | // * It has support for traversing more than one basic block in a graph, if 80 | // the bRecursive argument is true. It won't traverse into blocks with more 81 | // than one successor if bAllowMultiSuccs is false. In any case, it will 82 | // never traverse past the block numbered iBlockStop, if that parameter is 83 | // non-negative. 84 | bool FindNumericDefBackwards(mblock_t *blk, mop_t *op, mop_t *&opNum, MovChain &chain, bool bRecursive, bool bAllowMultiSuccs, int iBlockStop) 85 | { 86 | mbl_array_t *mba = blk->mba; 87 | 88 | char buf[1000]; 89 | mlist_t ml; 90 | 91 | if (!InsertOp(blk, ml, op)) 92 | return false; 93 | 94 | // Start from the end of the block. This variable gets updated when a copy 95 | // is encountered, so that subsequent searches start from the right place. 96 | minsn_t *mStart = NULL; 97 | do 98 | { 99 | // Told you this function was just a wrapper around 100 | // my_find_def_backwards. 101 | minsn_t *mDef = my_find_def_backwards(blk, ml, mStart); 102 | 103 | // If we did find a definition... 104 | if (mDef != NULL) 105 | { 106 | // Ensure that it's a mov instruction. We don't want, for example, 107 | // an "stx" instruction, which is assumed to redefine everything 108 | // until its aliasing information is refined. 109 | if (mDef->opcode != m_mov) 110 | { 111 | mcode_t_to_string(mDef, buf, sizeof(buf)); 112 | #if UNFLATTENVERBOSE 113 | debugmsg("[E] FindNumericDef: found %s\n", buf); 114 | #endif 115 | return false; 116 | } 117 | 118 | // Now that we found a mov, add it to the chain. 119 | chain.emplace_back(); 120 | MovInfo &mi = chain.back(); 121 | mi.opCopy = &mDef->l; 122 | mi.iBlock = blk->serial; 123 | mi.insMov = mDef; 124 | 125 | // Was it a numeric assignment? 126 | if (mDef->l.t == mop_n) 127 | { 128 | // Great! We're done. 129 | opNum = &mDef->l; 130 | return true; 131 | } 132 | 133 | // Otherwise, if it was not a numeric assignment, then try to track 134 | // whatever was assigned to it. This can only succeed if the thing 135 | // that was assigned was a register or stack variable. 136 | #if UNFLATTENVERBOSE 137 | qstring qs; 138 | mDef->l.print(&qs); 139 | tag_remove(&qs); 140 | debugmsg("[III] Now tracking %s\n", qs.c_str()); 141 | #endif 142 | 143 | // Try to start tracking the other thing... 144 | ml.clear(); 145 | if (!InsertOp(blk, ml, &mDef->l)) 146 | return false; 147 | 148 | // Resume the search from the assignment instruction we just 149 | // processed. 150 | mStart = mDef; 151 | } 152 | 153 | // Otherwise, we did not find a definition of the currently-tracked 154 | // variable on this block. Try to continue if the parameters allow. 155 | else 156 | { 157 | // If recursion was disallowed, or we reached the topmost legal 158 | // block, then quit. 159 | if (!bRecursive || blk->serial == iBlockStop) 160 | return false; 161 | 162 | // If there is more than one predecessor for this block, we don't 163 | // know which one to follow, so stop. 164 | if (blk->npred() != 1) 165 | return false; 166 | 167 | // Recurse into sole predecessor block 168 | int iPred = blk->pred(0); 169 | blk = mba->get_mblock(iPred); 170 | 171 | // If the predecessor has more than one successor, check to see 172 | // whether the arguments allow that. 173 | if (!bAllowMultiSuccs && blk->nsucc() != 1) 174 | return false; 175 | 176 | // Resume the search at the end of the new block. 177 | mStart = NULL; 178 | } 179 | } while (true); 180 | return false; 181 | } 182 | 183 | // This function finds a numeric definition by searching in the forward 184 | // direction. 185 | mop_t *FindForwardNumericDef(mblock_t *blk, mop_t *mop, minsn_t *&assign_insn) 186 | { 187 | mlist_t ml; 188 | if (!InsertOp(blk, ml, mop)) 189 | return NULL; 190 | 191 | // Find a forward definition 192 | assign_insn = my_find_def_forwards(blk, ml, NULL); 193 | if (assign_insn != NULL) 194 | { 195 | 196 | #if UNFLATTENVERBOSE 197 | qstring qs; 198 | assign_insn->print(&qs); 199 | tag_remove(&qs); 200 | debugmsg("[III] Forward search found %s\n", qs.c_str()); 201 | #endif 202 | 203 | // We only want MOV instructions with numeric left-hand sides 204 | if (assign_insn->opcode != m_mov || assign_insn->l.t != mop_n) 205 | return NULL; 206 | 207 | // Return the numeric operand if we found it 208 | return &assign_insn->l; 209 | } 210 | return NULL; 211 | } 212 | 213 | // This function is just a thin wrapper around FindForwardNumericDef, which 214 | // also inserts the mov into the "chain" argument. 215 | mop_t *FindForwardStackVarDef(mblock_t *mbClusterHead, mop_t *opCopy, MovChain &chain) 216 | { 217 | // Must be a non-NULL stack variable 218 | if (opCopy == NULL || opCopy->t != mop_S) 219 | return NULL; 220 | 221 | minsn_t *ins; 222 | 223 | // Find the definition 224 | mop_t *num = FindForwardNumericDef(mbClusterHead, opCopy, ins); 225 | if (num == NULL) 226 | return NULL; 227 | 228 | #if UNFLATTENVERBOSE 229 | qstring qs; 230 | num->print(&qs); 231 | tag_remove(&qs); 232 | debugmsg("[III] Forward method found %s!\n", qs.c_str()); 233 | #endif 234 | 235 | // If the found definition was suitable, add the assignment to the chain 236 | chain.emplace_back(); 237 | MovInfo &mi = chain.back(); 238 | mi.opCopy = num; 239 | mi.iBlock = mbClusterHead->serial; 240 | mi.insMov = ins; 241 | 242 | // Return the number 243 | return num; 244 | } 245 | 246 | -------------------------------------------------------------------------------- /DefUtil.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | struct MovInfo 6 | { 7 | mop_t *opCopy; 8 | minsn_t *insMov; 9 | int iBlock; 10 | }; 11 | 12 | typedef std::vector MovChain; 13 | 14 | bool FindNumericDefBackwards(mblock_t *blk, mop_t *op, mop_t *&opNum, MovChain &chain, bool bRecursive, bool bAllowMultiSuccs, int iBlockStop = -1); 15 | mop_t *FindForwardStackVarDef(mblock_t *mbClusterHead, mop_t *opCopy, MovChain &chain); -------------------------------------------------------------------------------- /HexRaysDeob.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.27428.1 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "HexRaysDeob", "HexRaysDeob.vcxproj", "{5EF5C070-D860-43CF-A65C-3934E448C8F2}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | ida32 Debug|x64 = ida32 Debug|x64 11 | ida32 Debug|x86 = ida32 Debug|x86 12 | ida64 Debug|x64 = ida64 Debug|x64 13 | ida64 Debug|x86 = ida64 Debug|x86 14 | Release|x64 = Release|x64 15 | Release|x86 = Release|x86 16 | EndGlobalSection 17 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 18 | {5EF5C070-D860-43CF-A65C-3934E448C8F2}.ida32 Debug|x64.ActiveCfg = ida32 Debug|x64 19 | {5EF5C070-D860-43CF-A65C-3934E448C8F2}.ida32 Debug|x64.Build.0 = ida32 Debug|x64 20 | {5EF5C070-D860-43CF-A65C-3934E448C8F2}.ida32 Debug|x86.ActiveCfg = ida32 Debug|Win32 21 | {5EF5C070-D860-43CF-A65C-3934E448C8F2}.ida32 Debug|x86.Build.0 = ida32 Debug|Win32 22 | {5EF5C070-D860-43CF-A65C-3934E448C8F2}.ida64 Debug|x64.ActiveCfg = ida64 Debug|x64 23 | {5EF5C070-D860-43CF-A65C-3934E448C8F2}.ida64 Debug|x64.Build.0 = ida64 Debug|x64 24 | {5EF5C070-D860-43CF-A65C-3934E448C8F2}.ida64 Debug|x86.ActiveCfg = ida64 Debug|Win32 25 | {5EF5C070-D860-43CF-A65C-3934E448C8F2}.ida64 Debug|x86.Build.0 = ida64 Debug|Win32 26 | {5EF5C070-D860-43CF-A65C-3934E448C8F2}.Release|x64.ActiveCfg = Release|x64 27 | {5EF5C070-D860-43CF-A65C-3934E448C8F2}.Release|x64.Build.0 = Release|x64 28 | {5EF5C070-D860-43CF-A65C-3934E448C8F2}.Release|x86.ActiveCfg = Release|Win32 29 | {5EF5C070-D860-43CF-A65C-3934E448C8F2}.Release|x86.Build.0 = Release|Win32 30 | EndGlobalSection 31 | GlobalSection(SolutionProperties) = preSolution 32 | HideSolutionNode = FALSE 33 | EndGlobalSection 34 | GlobalSection(ExtensibilityGlobals) = postSolution 35 | SolutionGuid = {82CCB6F1-6963-45C3-8EFA-C8068710E082} 36 | EndGlobalSection 37 | EndGlobal 38 | -------------------------------------------------------------------------------- /HexRaysDeob.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | ida32 Debug 6 | Win32 7 | 8 | 9 | ida64 Debug 10 | Win32 11 | 12 | 13 | ida64 Debug 14 | x64 15 | 16 | 17 | Release 18 | Win32 19 | 20 | 21 | ida32 Debug 22 | x64 23 | 24 | 25 | Release 26 | x64 27 | 28 | 29 | 30 | 15.0 31 | {5EF5C070-D860-43CF-A65C-3934E448C8F2} 32 | Win32Proj 33 | 8.1 34 | 35 | 36 | 37 | Application 38 | true 39 | v141 40 | 41 | 42 | Application 43 | true 44 | v141 45 | 46 | 47 | Application 48 | false 49 | v141 50 | 51 | 52 | DynamicLibrary 53 | true 54 | v141 55 | 56 | 57 | DynamicLibrary 58 | true 59 | v141 60 | 61 | 62 | Application 63 | false 64 | v141 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | true 92 | $(VC_IncludePath);$(WindowsSDK_IncludePath);c:\work\src\idasdk72\include 93 | 94 | 95 | true 96 | $(VC_IncludePath);$(WindowsSDK_IncludePath);c:\work\src\idasdk72\include 97 | 98 | 99 | true 100 | 101 | 102 | $(VC_IncludePath);$(WindowsSDK_IncludePath);c:\work\src\idasdk72\include 103 | 104 | 105 | $(VC_IncludePath);$(WindowsSDK_IncludePath);c:\work\src\idasdk72\include 106 | 107 | 108 | 109 | WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions) 110 | MultiThreadedDebugDLL 111 | Level3 112 | ProgramDatabase 113 | Disabled 114 | 115 | 116 | MachineX86 117 | true 118 | Windows 119 | 120 | 121 | 122 | 123 | WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions) 124 | MultiThreadedDebugDLL 125 | Level3 126 | ProgramDatabase 127 | Disabled 128 | 129 | 130 | MachineX86 131 | true 132 | Windows 133 | 134 | 135 | 136 | 137 | WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions) 138 | MultiThreadedDLL 139 | Level3 140 | ProgramDatabase 141 | 142 | 143 | MachineX86 144 | true 145 | Windows 146 | true 147 | true 148 | 149 | 150 | 151 | 152 | MultiThreadedDLL 153 | _WINDLL;%(PreprocessorDefinitions);__NT__;__IDP__;__X64__ 154 | 155 | 156 | $(OutDir)\$(ProjectName).dll 157 | /EXPORT:PLUGIN %(AdditionalOptions) 158 | kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies);c:\work\src\idasdk72\lib\x64_win_vc_32\ida.lib 159 | DebugFull 160 | 161 | 162 | 163 | 164 | MultiThreadedDLL 165 | _WINDLL;%(PreprocessorDefinitions);__NT__;__IDP__;__X64__;__EA64__; 166 | 167 | 168 | $(OutDir)\$(ProjectName).dll 169 | /EXPORT:PLUGIN %(AdditionalOptions) 170 | kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies);c:\work\src\idasdk72\lib\x64_win_vc_64\ida.lib 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | -------------------------------------------------------------------------------- /HexRaysDeob.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | Source Files 26 | 27 | 28 | Source Files 29 | 30 | 31 | Source Files 32 | 33 | 34 | Source Files 35 | 36 | 37 | Source Files 38 | 39 | 40 | Source Files 41 | 42 | 43 | Source Files 44 | 45 | 46 | Source Files 47 | 48 | 49 | 50 | 51 | Header Files 52 | 53 | 54 | Source Files 55 | 56 | 57 | Source Files 58 | 59 | 60 | Source Files 61 | 62 | 63 | Source Files 64 | 65 | 66 | Source Files 67 | 68 | 69 | Source Files 70 | 71 | 72 | Source Files 73 | 74 | 75 | Header Files 76 | 77 | 78 | Source Files 79 | 80 | 81 | -------------------------------------------------------------------------------- /HexRaysDeob.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | C:\Program Files\IDA 7.2\ida.exe 5 | WindowsLocalDebugger 6 | 7 | 8 | c:\Program Files\IDA 7.2\ida64.exe 9 | WindowsLocalDebugger 10 | 11 | -------------------------------------------------------------------------------- /HexRaysUtil.cpp: -------------------------------------------------------------------------------- 1 | #define USE_DANGEROUS_FUNCTIONS 2 | #include 3 | 4 | // Produce a string for an operand type 5 | const char *mopt_t_to_string(mopt_t t) 6 | { 7 | switch (t) 8 | { 9 | case mop_z: return "mop_z"; 10 | case mop_r: return "mop_r"; 11 | case mop_n: return "mop_n"; 12 | case mop_str: return "mop_str"; 13 | case mop_d: return "mop_d"; 14 | case mop_S: return "mop_S"; 15 | case mop_v: return "mop_v"; 16 | case mop_b: return "mop_b"; 17 | case mop_f: return "mop_f"; 18 | case mop_l: return "mop_l"; 19 | case mop_a: return "mop_a"; 20 | case mop_h: return "mop_h"; 21 | case mop_c: return "mop_c"; 22 | case mop_fn: return "mop_fn"; 23 | case mop_p: return "mop_p"; 24 | case mop_sc: return "mop_sc"; 25 | }; 26 | return "???"; 27 | } 28 | 29 | // Produce a brief representation of a microinstruction, including the types 30 | // of its operands. 31 | void mcode_t_to_string(minsn_t *o, char *outBuf, size_t n) 32 | { 33 | switch (o->opcode) 34 | { 35 | case m_nop: snprintf(outBuf, n, "m_nop"); break; 36 | case m_stx: snprintf(outBuf, n, "m_stx(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 37 | case m_ldx: snprintf(outBuf, n, "m_ldx(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 38 | case m_ldc: snprintf(outBuf, n, "m_ldc(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 39 | case m_mov: snprintf(outBuf, n, "m_mov(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 40 | case m_neg: snprintf(outBuf, n, "m_neg(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 41 | case m_lnot: snprintf(outBuf, n, "m_lnot(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 42 | case m_bnot: snprintf(outBuf, n, "m_bnot(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 43 | case m_xds: snprintf(outBuf, n, "m_xds(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 44 | case m_xdu: snprintf(outBuf, n, "m_xdu(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 45 | case m_low: snprintf(outBuf, n, "m_low(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 46 | case m_high: snprintf(outBuf, n, "m_high(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 47 | case m_add: snprintf(outBuf, n, "m_add(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 48 | case m_sub: snprintf(outBuf, n, "m_sub(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 49 | case m_mul: snprintf(outBuf, n, "m_mul(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 50 | case m_udiv: snprintf(outBuf, n, "m_udiv(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 51 | case m_sdiv: snprintf(outBuf, n, "m_sdiv(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 52 | case m_umod: snprintf(outBuf, n, "m_umod(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 53 | case m_smod: snprintf(outBuf, n, "m_smod(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 54 | case m_or: snprintf(outBuf, n, "m_or(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 55 | case m_and: snprintf(outBuf, n, "m_and(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 56 | case m_xor: snprintf(outBuf, n, "m_xor(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 57 | case m_shl: snprintf(outBuf, n, "m_shl(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 58 | case m_shr: snprintf(outBuf, n, "m_shr(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 59 | case m_sar: snprintf(outBuf, n, "m_sar(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 60 | case m_cfadd: snprintf(outBuf, n, "m_cfadd(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 61 | case m_ofadd: snprintf(outBuf, n, "m_ofadd(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 62 | case m_cfshl: snprintf(outBuf, n, "m_cfshl(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 63 | case m_cfshr: snprintf(outBuf, n, "m_cfshr(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 64 | case m_sets: snprintf(outBuf, n, "m_sets(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 65 | case m_seto: snprintf(outBuf, n, "m_seto(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 66 | case m_setp: snprintf(outBuf, n, "m_setp(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 67 | case m_setnz: snprintf(outBuf, n, "m_setnz(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 68 | case m_setz: snprintf(outBuf, n, "m_setz(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 69 | case m_setae: snprintf(outBuf, n, "m_setae(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 70 | case m_setb: snprintf(outBuf, n, "m_setb(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 71 | case m_seta: snprintf(outBuf, n, "m_seta(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 72 | case m_setbe: snprintf(outBuf, n, "m_setbe(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 73 | case m_setg: snprintf(outBuf, n, "m_setg(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 74 | case m_setge: snprintf(outBuf, n, "m_setge(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 75 | case m_setl: snprintf(outBuf, n, "m_setl(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 76 | case m_setle: snprintf(outBuf, n, "m_setle(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 77 | case m_jcnd: snprintf(outBuf, n, "m_jcnd(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 78 | case m_jnz: snprintf(outBuf, n, "m_jnz(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 79 | case m_jz: snprintf(outBuf, n, "m_jz(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 80 | case m_jae: snprintf(outBuf, n, "m_jae(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 81 | case m_jb: snprintf(outBuf, n, "m_jb(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 82 | case m_ja: snprintf(outBuf, n, "m_ja(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 83 | case m_jbe: snprintf(outBuf, n, "m_jbe(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 84 | case m_jg: snprintf(outBuf, n, "m_jg(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 85 | case m_jge: snprintf(outBuf, n, "m_jge(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 86 | case m_jl: snprintf(outBuf, n, "m_jl(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 87 | case m_jle: snprintf(outBuf, n, "m_jle(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 88 | case m_jtbl: snprintf(outBuf, n, "m_jtbl(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t)); break; 89 | case m_ijmp: snprintf(outBuf, n, "m_ijmp(%s,%s)", mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 90 | case m_goto: snprintf(outBuf, n, "m_goto(%s)", mopt_t_to_string(o->l.t)); break; 91 | case m_call: snprintf(outBuf, n, "m_call(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 92 | case m_icall: snprintf(outBuf, n, "m_icall(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 93 | case m_ret: snprintf(outBuf, n, "m_ret"); break; 94 | case m_push: snprintf(outBuf, n, "m_push(%s)", mopt_t_to_string(o->l.t)); break; 95 | case m_pop: snprintf(outBuf, n, "m_pop(%s)", mopt_t_to_string(o->d.t)); break; 96 | case m_und: snprintf(outBuf, n, "m_und(%s)", mopt_t_to_string(o->d.t)); break; 97 | case m_ext: snprintf(outBuf, n, "m_ext(???)"); break; 98 | case m_f2i: snprintf(outBuf, n, "m_f2i(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 99 | case m_f2u: snprintf(outBuf, n, "m_f2u(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 100 | case m_i2f: snprintf(outBuf, n, "m_i2f(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 101 | case m_u2f: snprintf(outBuf, n, "m_u2f(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 102 | case m_f2f: snprintf(outBuf, n, "m_f2f(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 103 | case m_fneg: snprintf(outBuf, n, "m_fneg(%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->d.t)); break; 104 | case m_fadd: snprintf(outBuf, n, "m_fadd(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 105 | case m_fsub: snprintf(outBuf, n, "m_fsub(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 106 | case m_fmul: snprintf(outBuf, n, "m_fmul(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 107 | case m_fdiv: snprintf(outBuf, n, "m_fdiv(%s,%s,%s)", mopt_t_to_string(o->l.t), mopt_t_to_string(o->r.t), mopt_t_to_string(o->d.t)); break; 108 | } 109 | } 110 | 111 | // Produce a string describing the microcode maturity level. 112 | const char *MicroMaturityToString(mba_maturity_t mmt) 113 | { 114 | switch (mmt) 115 | { 116 | case MMAT_ZERO: return "MMAT_ZERO"; 117 | case MMAT_GENERATED: return "MMAT_GENERATED"; 118 | case MMAT_PREOPTIMIZED: return "MMAT_PREOPTIMIZED"; 119 | case MMAT_LOCOPT: return "MMAT_LOCOPT"; 120 | case MMAT_CALLS: return "MMAT_CALLS"; 121 | case MMAT_GLBOPT1: return "MMAT_GLBOPT1"; 122 | case MMAT_GLBOPT2: return "MMAT_GLBOPT2"; 123 | case MMAT_GLBOPT3: return "MMAT_GLBOPT3"; 124 | case MMAT_LVARS: return "MMAT_LVARS"; 125 | default: return "???"; 126 | } 127 | } 128 | 129 | // Copied from http://www.hexblog.com/?p=1198 130 | // I did add code for the mop_d case; it used to return false 131 | 132 | //-------------------------------------------------------------------------- 133 | // compare operands but ignore the sizes 134 | bool equal_mops_ignore_size(const mop_t &lo, const mop_t &ro) 135 | { 136 | if (lo.t != ro.t) 137 | return false; 138 | 139 | switch (lo.t) 140 | { 141 | case mop_z: // none 142 | return true; 143 | case mop_fn: // floating point 144 | return *ro.fpc == *lo.fpc; 145 | case mop_n: // immediate 146 | { 147 | int minsize = qmin(lo.size, ro.size); 148 | uint64 v1 = extend_sign(ro.nnn->value, minsize, false); 149 | uint64 v2 = extend_sign(lo.nnn->value, minsize, false); 150 | return v1 == v2; 151 | } 152 | case mop_S: // stack variable 153 | return *ro.s == *lo.s; 154 | case mop_v: // global variable 155 | return ro.g == lo.g; 156 | case mop_d: // result of another instruction 157 | // I added this 158 | return ro.d->equal_insns(*lo.d, EQ_IGNSIZE | EQ_IGNCODE); 159 | case mop_b: // micro basic block (mblock_t) 160 | return ro.b == lo.b; 161 | case mop_r: // register 162 | return ro.r == lo.r; 163 | case mop_f: 164 | break; // not implemented 165 | case mop_l: 166 | return *ro.l == *lo.l; 167 | case mop_a: 168 | return lo.a->insize == ro.a->insize 169 | && lo.a->outsize == ro.a->outsize 170 | && equal_mops_ignore_size(*lo.a, *ro.a); 171 | case mop_h: 172 | return streq(ro.helper, lo.helper); 173 | case mop_str: 174 | return streq(ro.cstr, lo.cstr); 175 | case mop_c: 176 | return *ro.c == *lo.c; 177 | case mop_p: 178 | return equal_mops_ignore_size(lo.pair->lop, ro.pair->lop) 179 | && equal_mops_ignore_size(lo.pair->hop, ro.pair->hop); 180 | case mop_sc: // not implemented 181 | break; 182 | } 183 | return false; 184 | } 185 | -------------------------------------------------------------------------------- /HexRaysUtil.hpp: -------------------------------------------------------------------------------- 1 | // Miscellaneous utility functions 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | // Produce strings for various objects in the Hex-Rays ecosystem (for 8 | // debugging / informational purposes) 9 | const char *mopt_t_to_string(mopt_t t); 10 | void mcode_t_to_string(minsn_t *o, char *outBuf, size_t n); 11 | const char *MicroMaturityToString(mba_maturity_t mmt); 12 | 13 | // Compare two mop_t objects. Hopefully this will be an official export in the 14 | // microcode API in the future, so we won't have to implement it ourselves. 15 | bool equal_mops_ignore_size(const mop_t &lo, const mop_t &ro); 16 | 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /MicrocodeExplorer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #define USE_DANGEROUS_FUNCTIONS 3 | #include 4 | #include "HexRaysUtil.hpp" 5 | 6 | typedef std::shared_ptr shared_mbl_array_t; 7 | 8 | struct mblock_virtual_dumper_t : public vd_printer_t 9 | { 10 | int nline; 11 | int serial; 12 | mblock_virtual_dumper_t() : nline(0), serial(0) {}; 13 | virtual void AddLine(qstring &qs) = 0; 14 | AS_PRINTF(3, 4) int print(int indent, const char *format, ...) 15 | { 16 | qstring buf; 17 | if (indent > 0) 18 | buf.fill(0, ' ', indent); 19 | va_list va; 20 | va_start(va, format); 21 | buf.cat_vsprnt(format, va); 22 | va_end(va); 23 | 24 | // ida 7.1 apparently has a problem with line prefixes, remove this color 25 | static const char pfx_on[] = { COLOR_ON, COLOR_PREFIX }; 26 | static const char pfx_off[] = { COLOR_OFF, COLOR_PREFIX }; 27 | buf.replace(pfx_on, ""); 28 | buf.replace(pfx_off, ""); 29 | 30 | AddLine(buf); 31 | return buf.length(); 32 | } 33 | }; 34 | 35 | struct mblock_qstring_dumper_t : public mblock_virtual_dumper_t 36 | { 37 | qstring qStr; 38 | mblock_qstring_dumper_t() : mblock_virtual_dumper_t() {}; 39 | virtual void AddLine(qstring &qs) 40 | { 41 | qStr.append(qs); 42 | } 43 | }; 44 | 45 | struct mblock_dumper_t : public mblock_virtual_dumper_t 46 | { 47 | strvec_t lines; 48 | mblock_dumper_t() : mblock_virtual_dumper_t() {}; 49 | virtual void AddLine(qstring &qs) 50 | { 51 | lines.push_back(simpleline_t(qs)); 52 | } 53 | }; 54 | 55 | struct sample_info_t 56 | { 57 | TWidget *cv; 58 | mblock_dumper_t md; 59 | shared_mbl_array_t mba; 60 | mba_maturity_t mat; 61 | sample_info_t() : cv(NULL), mba(NULL) {} 62 | }; 63 | 64 | #include 65 | 66 | class MicrocodeInstructionGraph 67 | { 68 | public: 69 | qstring tmp; // temporary buffer for grcode_user_text 70 | qstrvec_t m_ShortText; 71 | qstrvec_t m_BlockText; 72 | intvec_t m_EdgeColors; 73 | edgevec_t m_Edges; 74 | int m_NumBlocks; 75 | 76 | void Clear() 77 | { 78 | m_ShortText.clear(); 79 | m_BlockText.clear(); 80 | m_EdgeColors.clear(); 81 | m_Edges.clear(); 82 | m_NumBlocks = 0; 83 | } 84 | 85 | void Build(minsn_t *top) 86 | { 87 | Clear(); 88 | Insert(top, -1); 89 | } 90 | 91 | protected: 92 | void AddEdge(int iSrc, int iDest, int iPos) 93 | { 94 | if (iSrc < 0 || iDest < 0) 95 | return; 96 | 97 | m_Edges.push_back(edge_t(iSrc, iDest)); 98 | m_EdgeColors.push_back(iPos); 99 | } 100 | 101 | int GetIncrBlockNum() 102 | { 103 | return m_NumBlocks++; 104 | } 105 | 106 | int Insert(minsn_t *ins, int iParent) 107 | { 108 | char l_Buf[MAXSTR]; 109 | mcode_t_to_string(ins, l_Buf, sizeof(l_Buf)); 110 | m_ShortText.push_back(l_Buf); 111 | 112 | qstring qStr; 113 | ins->print(&qStr); 114 | m_BlockText.push_back(qStr); 115 | 116 | int iThisBlock = GetIncrBlockNum(); 117 | 118 | Insert(ins->l, iThisBlock, 0); 119 | Insert(ins->r, iThisBlock, 1); 120 | Insert(ins->d, iThisBlock, 2); 121 | 122 | return iThisBlock; 123 | } 124 | int Insert(mop_t &op, int iParent, int iPos) 125 | { 126 | if (op.t == mop_z) 127 | return -1; 128 | 129 | m_ShortText.push_back(mopt_t_to_string(op.t)); 130 | 131 | qstring qStr; 132 | op.print(&qStr); 133 | m_BlockText.push_back(qStr); 134 | 135 | int iThisBlock = GetIncrBlockNum(); 136 | AddEdge(iParent, iThisBlock, iPos); 137 | 138 | switch (op.t) 139 | { 140 | case mop_d: // result of another instruction 141 | { 142 | int iDestBlock = Insert(op.d, iThisBlock); 143 | AddEdge(iThisBlock, iDestBlock, 0); 144 | break; 145 | } 146 | case mop_f: // list of arguments 147 | for (int i = 0; i < op.f->args.size(); ++i) 148 | Insert(op.f->args[i], iThisBlock, i); 149 | break; 150 | case mop_p: // operand pair 151 | { 152 | Insert(op.pair->lop, iThisBlock, 0); 153 | Insert(op.pair->hop, iThisBlock, 1); 154 | break; 155 | } 156 | case mop_a: // result of another instruction 157 | { 158 | int iDestBlock = Insert(*op.a, iThisBlock, 0); 159 | break; 160 | } 161 | } 162 | return iThisBlock; 163 | } 164 | }; 165 | 166 | class MicrocodeInstructionGraphContainer; 167 | 168 | static ssize_t idaapi migr_callback(void *ud, int code, va_list va); 169 | 170 | class MicrocodeInstructionGraphContainer 171 | { 172 | protected: 173 | TWidget * m_TW; 174 | graph_viewer_t *m_GV; 175 | qstring m_Title; 176 | qstring m_GVName; 177 | 178 | public: 179 | MicrocodeInstructionGraph m_MG; 180 | MicrocodeInstructionGraphContainer() : m_TW(NULL), m_GV(NULL) {}; 181 | 182 | bool Display(minsn_t *top, sample_info_t *si, int nBlock, int nSerial) 183 | { 184 | mbl_array_t *mba = *si->mba; 185 | m_MG.Build(top); 186 | 187 | m_Title.cat_sprnt("Microinstruction Graph - %a[%s]/%d:%d", mba->entry_ea, MicroMaturityToString(si->mat), nBlock, nSerial); 188 | m_TW = create_empty_widget(m_Title.c_str()); 189 | netnode id; 190 | id.create(); 191 | 192 | m_GVName.cat_sprnt("microins_%a_%s_%d_%d", mba->entry_ea, MicroMaturityToString(si->mat), nBlock, nSerial); 193 | m_GV = create_graph_viewer(m_GVName.c_str(), id, migr_callback, this, 0, m_TW); 194 | activate_widget(m_TW, true); 195 | #if IDA_SDK_VERSION == 710 196 | display_widget(m_TW, WOPN_TAB | WOPN_MENU); 197 | #elif IDA_SDK_VERSION == 720 198 | display_widget(m_TW, WOPN_TAB); 199 | #elif IDA_SDK_VERSION >= 730 200 | display_widget(m_TW, WOPN_DP_TAB); 201 | #endif 202 | viewer_fit_window(m_GV); 203 | return true; 204 | } 205 | }; 206 | 207 | static ssize_t idaapi migr_callback(void *ud, int code, va_list va) 208 | { 209 | MicrocodeInstructionGraphContainer *gcont = (MicrocodeInstructionGraphContainer *)ud; 210 | MicrocodeInstructionGraph *microg = &gcont->m_MG; 211 | bool result = false; 212 | 213 | switch (code) 214 | { 215 | case grcode_user_gentext: 216 | result = true; 217 | break; 218 | 219 | // refresh user-defined graph nodes and edges 220 | case grcode_user_refresh: 221 | // in: mutable_graph_t *g 222 | // out: success 223 | { 224 | mutable_graph_t *mg = va_arg(va, mutable_graph_t *); 225 | 226 | // we have to resize 227 | mg->resize(microg->m_NumBlocks); 228 | 229 | for (auto &it : microg->m_Edges) 230 | mg->add_edge(it.src, it.dst, NULL); 231 | 232 | result = true; 233 | } 234 | break; 235 | 236 | // retrieve text for user-defined graph node 237 | case grcode_user_text: 238 | //mutable_graph_t *g 239 | // int node 240 | // const char **result 241 | // bgcolor_t *bg_color (maybe NULL) 242 | // out: must return 0, result must be filled 243 | // NB: do not use anything calling GDI! 244 | { 245 | va_arg(va, mutable_graph_t *); 246 | int node = va_arg(va, int); 247 | const char **text = va_arg(va, const char **); 248 | 249 | microg->tmp = microg->m_ShortText[node]; 250 | microg->tmp.append('\n'); 251 | microg->tmp.append(microg->m_BlockText[node]); 252 | *text = microg->tmp.begin(); 253 | result = true; 254 | } 255 | break; 256 | } 257 | return (int)result; 258 | } 259 | 260 | static ssize_t idaapi mgr_callback(void *ud, int code, va_list va); 261 | 262 | class MicrocodeGraphContainer 263 | { 264 | public: 265 | shared_mbl_array_t m_MBA; 266 | mblock_qstring_dumper_t m_MQD; 267 | qstring m_Title; 268 | qstring m_GVName; 269 | qstring tmp; 270 | MicrocodeGraphContainer(shared_mbl_array_t mba) : m_MBA(mba) {}; 271 | bool Display(sample_info_t *si) 272 | { 273 | mbl_array_t *mba = *si->mba; 274 | m_Title.cat_sprnt("Microcode Graph - %a[%s]", mba->entry_ea, MicroMaturityToString(si->mat)); 275 | 276 | TWidget *tw = create_empty_widget(m_Title.c_str()); 277 | netnode id; 278 | id.create(); 279 | 280 | m_GVName.cat_sprnt("microblkgraph_%a_%s", mba->entry_ea, MicroMaturityToString(si->mat)); 281 | graph_viewer_t *gv = create_graph_viewer(m_GVName.c_str(), id, mgr_callback, this, 0, tw); 282 | activate_widget(tw, true); 283 | #if IDA_SDK_VERSION == 710 284 | display_widget(tw, WOPN_TAB | WOPN_MENU); 285 | #elif IDA_SDK_VERSION == 720 286 | display_widget(tw, WOPN_TAB); 287 | #elif IDA_SDK_VERSION >= 730 288 | display_widget(tw, WOPN_DP_TAB); 289 | #endif 290 | viewer_fit_window(gv); 291 | return true; 292 | } 293 | 294 | }; 295 | 296 | static ssize_t idaapi mgr_callback(void *ud, int code, va_list va) 297 | { 298 | MicrocodeGraphContainer *gcont = (MicrocodeGraphContainer *)ud; 299 | mbl_array_t *mba = *gcont->m_MBA; 300 | bool result = false; 301 | 302 | switch (code) 303 | { 304 | case grcode_user_gentext: 305 | result = true; 306 | break; 307 | 308 | // refresh user-defined graph nodes and edges 309 | case grcode_user_refresh: 310 | // in: mutable_graph_t *g 311 | // out: success 312 | { 313 | mutable_graph_t *mg = va_arg(va, mutable_graph_t *); 314 | 315 | // we have to resize 316 | mg->resize(mba->qty); 317 | 318 | for (int i = 0; i < mba->qty; ++i) 319 | for (auto dst : mba->get_mblock(i)->succset) 320 | mg->add_edge(i, dst, NULL); 321 | 322 | result = true; 323 | } 324 | break; 325 | 326 | // retrieve text for user-defined graph node 327 | case grcode_user_text: 328 | //mutable_graph_t *g 329 | // int node 330 | // const char **result 331 | // bgcolor_t *bg_color (maybe NULL) 332 | // out: must return 0, result must be filled 333 | // NB: do not use anything calling GDI! 334 | { 335 | va_arg(va, mutable_graph_t *); 336 | int node = va_arg(va, int); 337 | const char **text = va_arg(va, const char **); 338 | 339 | gcont->m_MQD.qStr.clear(); 340 | mba->get_mblock(node)->print(gcont->m_MQD); 341 | *text = gcont->m_MQD.qStr.begin(); 342 | result = true; 343 | } 344 | break; 345 | } 346 | return (int)result; 347 | } 348 | 349 | static bool idaapi ct_keyboard(TWidget * /*v*/, int key, int shift, void *ud) 350 | { 351 | if (shift == 0) 352 | { 353 | sample_info_t *si = (sample_info_t *)ud; 354 | switch (key) 355 | { 356 | case 'G': 357 | { 358 | MicrocodeGraphContainer *mgc = new MicrocodeGraphContainer(si->mba); 359 | return mgc->Display(si); 360 | } 361 | 362 | 363 | // User wants to show a graph of the current instruction 364 | case 'I': 365 | { 366 | qstring buf; 367 | tag_remove(&buf, get_custom_viewer_curline(si->cv, false)); 368 | const char *pLine = buf.c_str(); 369 | const char *pDot = strchr(pLine, '.'); 370 | if (pDot == NULL) 371 | { 372 | warning( 373 | "Couldn't find the block number on the current line; was the block empty?\n" 374 | "If it was not empty, and you don't see [int].[int] at the beginning of the lines\n" 375 | "please run the plugin again to generate a new microcode listing.\n" 376 | "That should fix it."); 377 | return true; // reacted to the keypress 378 | } 379 | int nBlock = atoi(pLine); 380 | int nSerial = atoi(pDot + 1); 381 | mbl_array_t *mba = *si->mba; 382 | 383 | if (nBlock > mba->qty) 384 | { 385 | warning("Plugin error: line prefix was %d:%d, but block only has %d basic blocks.", nBlock, nSerial, mba->qty); 386 | return true; 387 | } 388 | 389 | mblock_t *blk = mba->get_mblock(nBlock); 390 | minsn_t *minsn = blk->head; 391 | int i; 392 | for (i = 0; i < nSerial; ++i) 393 | { 394 | minsn = minsn->next; 395 | if (minsn == NULL) 396 | break; 397 | } 398 | 399 | if (minsn == NULL) 400 | { 401 | if (i == 0) 402 | warning( 403 | "Couldn't get first minsn_t from %d:%d; was the block empty?\n" 404 | "If it was not empty, and you don't see [int].[int] at the beginning of the lines\n" 405 | "please run the plugin again to generate a new microcode listing.\n" 406 | "That should fix it.", nBlock, nSerial); 407 | else 408 | warning("Couldn't get first minsn_t from %d:%d; last valid instruction was %d", nBlock, nSerial, i - 1); 409 | return true; 410 | } 411 | 412 | char repr[MAXSTR]; 413 | mcode_t_to_string(minsn, repr, sizeof(repr)); 414 | MicrocodeInstructionGraphContainer *mcg = new MicrocodeInstructionGraphContainer; 415 | return mcg->Display(minsn, si, nBlock, nSerial); 416 | } 417 | case IK_ESCAPE: 418 | close_widget(si->cv, WCLS_SAVE | WCLS_CLOSE_LATER); 419 | return true; 420 | } 421 | } 422 | return false; 423 | } 424 | 425 | static const custom_viewer_handlers_t handlers( 426 | ct_keyboard, 427 | NULL, // popup 428 | NULL, // mouse_moved 429 | NULL, // click 430 | NULL, // dblclick 431 | NULL, 432 | NULL, // close 433 | NULL, // help 434 | NULL);// adjust_place 435 | 436 | ssize_t idaapi ui_callback(void *ud, int code, va_list va) 437 | { 438 | sample_info_t *si = (sample_info_t *)ud; 439 | switch (code) 440 | { 441 | case ui_widget_invisible: 442 | { 443 | TWidget *f = va_arg(va, TWidget *); 444 | if (f == si->cv) 445 | { 446 | delete si; 447 | unhook_from_notification_point(HT_UI, ui_callback); 448 | } 449 | } 450 | break; 451 | } 452 | return 0; 453 | } 454 | 455 | const char *matLevels[] = 456 | { 457 | "MMAT_GENERATED", 458 | "MMAT_PREOPTIMIZED", 459 | "MMAT_LOCOPT", 460 | "MMAT_CALLS", 461 | "MMAT_GLBOPT1", 462 | "MMAT_GLBOPT2", 463 | "MMAT_GLBOPT3", 464 | "MMAT_LVARS" 465 | }; 466 | 467 | mba_maturity_t AskDesiredMaturity() 468 | { 469 | const char dlgText[] = 470 | "Select maturity level\n" 471 | "\n"; 472 | 473 | qstrvec_t opts; 474 | for (int i = 0; i < qnumber(matLevels); ++i) 475 | opts.push_back(matLevels[i]); 476 | 477 | int sel = 0; 478 | int ret = ask_form(dlgText, &opts, &sel); 479 | 480 | if (ret > 0) 481 | return (mba_maturity_t)((int)MMAT_GENERATED + sel); 482 | return MMAT_ZERO; 483 | } 484 | 485 | void ShowMicrocodeExplorer() 486 | { 487 | func_t *pfn = get_func(get_screen_ea()); 488 | if (pfn == NULL) 489 | { 490 | warning("Please position the cursor within a function"); 491 | return; 492 | } 493 | 494 | mba_maturity_t mmat = AskDesiredMaturity(); 495 | if (mmat == MMAT_ZERO) 496 | return; 497 | 498 | hexrays_failure_t hf; 499 | mbl_array_t *mba = gen_microcode(pfn, &hf, NULL, 0, mmat); 500 | if (mba == NULL) 501 | { 502 | warning("#error \"%a: %s", hf.errea, hf.desc().c_str()); 503 | return; 504 | } 505 | 506 | sample_info_t *si = new sample_info_t; 507 | si->mba = std::make_shared(mba); 508 | si->mat = mmat; 509 | // Dump the microcode to the output window 510 | mba->print(si->md); 511 | 512 | simpleline_place_t s1; 513 | simpleline_place_t s2(si->md.lines.size() - 1); 514 | 515 | qstring title; 516 | title.cat_sprnt("Microcode Explorer - %a - %s", pfn->start_ea, MicroMaturityToString(mmat)); 517 | 518 | si->cv = create_custom_viewer( 519 | title.c_str(), // title 520 | &s1, // minplace 521 | &s2, // maxplace 522 | &s1, // curplace 523 | NULL, // renderer_info_t *rinfo 524 | &si->md.lines, // ud 525 | &handlers, // cvhandlers 526 | si, // cvhandlers_ud 527 | NULL); // parent 528 | 529 | hook_to_notification_point(HT_UI, ui_callback, si); 530 | #if IDA_SDK_VERSION >= 730 531 | display_widget(si->cv, WOPN_DP_TAB | WOPN_RESTORE); 532 | #else 533 | display_widget(si->cv, WOPN_TAB | WOPN_RESTORE); 534 | #endif 535 | } 536 | 537 | -------------------------------------------------------------------------------- /MicrocodeExplorer.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | void ShowMicrocodeExplorer(); 4 | -------------------------------------------------------------------------------- /PatternDeobfuscate.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "HexRaysUtil.hpp" 3 | #include "PatternDeobfuscateUtil.hpp" 4 | #include "Config.hpp" 5 | 6 | // Our pattern-based deobfuscation is implemented as an optinsn_t structure, 7 | // which allows us to hook directly into the microcode generation phase and 8 | // perform optimizations automatically, whenever code is decompiled. 9 | struct ObfCompilerOptimizer : public optinsn_t 10 | { 11 | // This function simplifies microinstruction patterns that look like 12 | // either: (x & 1) | (y & 1) ==> (x | y) & 1 13 | // or: (x & 1) ^ (y & 1) ==> (x ^ y) & 1 14 | // Though it may not seem like much of an "obfuscation" or "deobfuscation" 15 | // technique on its own, getting rid of the "&1" terms helps reveal other 16 | // patterns so they can be deobfuscated. 17 | int pat_LogicAnd1(minsn_t *ins) 18 | { 19 | // Only applies to OR / XOR microinstructions 20 | if (ins->opcode != m_or && ins->opcode != m_xor) 21 | return 0; 22 | 23 | // Only applies when the operands are results of other 24 | // microinstructions (since, after all, we are expecting them to be 25 | // ANDed by 1, which is represented in terms of a microinstruction 26 | // provider mop_d operand). 27 | if (ins->l.t != mop_d || ins->r.t != mop_d) 28 | return 0; 29 | 30 | minsn_t *insLeft, *insRight; 31 | mop_t *opLeft, *opRight; 32 | 33 | // Get rid of & 1. bLeft1 is true if there was an &1. 34 | bool bLeft1 = TunnelThroughAnd1(ins->l.d, insLeft, true, &opLeft); 35 | if (!bLeft1) 36 | return 0; 37 | 38 | // Same for right-hand side 39 | bool bRight1 = TunnelThroughAnd1(ins->r.d, insRight, true, &opRight); 40 | if (!bRight1) 41 | return 0; 42 | 43 | // If we get here, then the pattern matched. 44 | // Move the logical operation (OR or XOR) to the left-hand side, 45 | // with the operands that have the &1 removed. 46 | ins->l.d->opcode = ins->opcode; 47 | ins->l.d->l.swap(*opLeft); 48 | ins->l.d->r.swap(*opRight); 49 | 50 | // Change the top-level instruction from OR or XOR to AND, and set the 51 | // right-hand side to the 1-bit constant value 1. 52 | ins->opcode = m_and; 53 | ins->r.make_number(1, 1); 54 | 55 | // msg("[I] pat_LogicAnd1\n"); 56 | // Return 1 to indicate that we changed the instruction. 57 | return 1; 58 | } 59 | 60 | // One of the obfuscation patterns involves a subtraction by 1. In the 61 | // assembly code, this is implemented by something like: 62 | // 63 | // add eax, 2 64 | // add eax, ecx ; or whatever 65 | // sub eax, 3 66 | // 67 | // Usually, Hex-Rays will automatically simplify this to (eax+ecx)-1. 68 | // However, I did experience situations where Hex-Rays still represented 69 | // the decompiled output as 2+(eax+ecx)-3. This function, then, determines 70 | // when Hex-Rays has represented the subtraction as just mentioned. If so, 71 | // it extracts the term that is being subtracted by 1. 72 | bool pat_IsSubBy1(minsn_t *ins, mop_t *&op) 73 | { 74 | // We're looking for x+(y-z), where x and z are numeric 75 | if (ins->opcode != m_add) 76 | return false; 77 | 78 | // Extract x and (y-z) 79 | mop_t *opAddNum = NULL, *opAddNonNum = NULL; 80 | if (!ExtractNumAndNonNum(ins, opAddNum, opAddNonNum)) 81 | return false; 82 | 83 | // Ensure that the purported (y-z) term actually is a subtraction 84 | if (opAddNonNum->t != mop_d || opAddNonNum->d->opcode != m_sub) 85 | return false; 86 | 87 | // Extract y and z. I guess technically I shouldn't use 88 | // ExtractNumAndNonNum here since subtraction isn't commutative... 89 | // Call that a bug, but it didn't matter in practice. 90 | mop_t *opSubNum = NULL, *opSubNonNum = NULL; 91 | if (!ExtractNumAndNonNum(opAddNonNum->d, opSubNum, opSubNonNum)) 92 | return false; 93 | 94 | // Pass y back to the caller 95 | op = opSubNonNum; 96 | 97 | // x-z must be -1, or, equivalently, z-x must be 1. 98 | return (opSubNum->nnn->value - opAddNum->nnn->value) == 1LL; 99 | } 100 | 101 | // This function performs the following pattern-substitution: 102 | // (x * (x-1)) & 1 ==> 0 103 | int pat_MulSub(minsn_t *andIns) 104 | { 105 | // Topmost term has to be &1. The 1 is not required to be 1-byte large. 106 | minsn_t *ins = andIns; 107 | if (!TunnelThroughAnd1(ins, ins, false)) 108 | return 0; 109 | 110 | // Looking for multiplication terms 111 | if (ins->opcode != m_mul) 112 | return 0; 113 | 114 | // We have two different mechanisms for determining if there is a 115 | // subtraction by 1. 116 | bool bWasSubBy1 = false; 117 | 118 | // Ultimately, we need to find thse things: 119 | minsn_t *insSub; // Subtraction instruction x-1 120 | mop_t *opMulNonSub; // Operand of multiply that isn't a subtraction 121 | mop_t *subNonNum; // x from the x-1 instruction 122 | 123 | // Try first method for locating subtraction by 1, i.e., simply 124 | // subtraction by the constant number 1. 125 | do 126 | { 127 | // Find the subtraction subterm of the multiplication 128 | if (!ExtractByOpcodeType(ins, m_sub, insSub, opMulNonSub)) 129 | break; 130 | 131 | mop_t *subNum; 132 | // Find the numeric part of the subtraction. Again, I shouldn't use 133 | // ExtractNumAndNonNum here since subtraction isn't commutative. 134 | if (!ExtractNumAndNonNum(insSub, subNum, subNonNum)) 135 | break; 136 | 137 | // Ensure that the subtraction amount is 1. 138 | if (subNum->nnn->value != 1) 139 | break; 140 | 141 | // Indicate that we successfully found the subtraction. 142 | bWasSubBy1 = true; 143 | } while (0); 144 | 145 | // If we didn't find the subtraction, see if we have an add/sub pair 146 | // instead, which totals to subtraction minus one. 147 | if (!bWasSubBy1) 148 | { 149 | // Find the ADD subterm of the multiplication. If this fails, both 150 | // methods failed to find the pattern, so return. 151 | if (!ExtractByOpcodeType(ins, m_add, insSub, opMulNonSub)) 152 | return 0; 153 | 154 | // Call the previous function to determine whether the ADD 155 | // implements a subtraction by 1. 156 | bWasSubBy1 = pat_IsSubBy1(insSub, subNonNum); 157 | } 158 | 159 | // If both methods failed, bail. 160 | if (!bWasSubBy1) 161 | return 0; 162 | 163 | // We know we're dealing with (x-1) * y. ensure x==y. 164 | if (!equal_mops_ignore_size(*opMulNonSub, *subNonNum)) 165 | return 0; 166 | 167 | // If we get here, the pattern matched. 168 | // Replace the whole multiplication instruction by 0. 169 | ins->l.make_number(0, ins->l.size); 170 | #if IDA_SDK_VERSION == 710 171 | andIns->optimize_flat(); 172 | #elif IDA_SDK_VERSION >= 720 173 | andIns->optimize_solo(); 174 | #endif 175 | // msg("[I] pat_MulSub\n"); 176 | return 1; 177 | } 178 | 179 | // This function looks tries to replace patterns of the form 180 | // either: (x&y)|(x^y) ==> x|y 181 | // or: (x&y)|(y^x) ==> x|y 182 | int pat_OrViaXorAnd(minsn_t *ins) 183 | { 184 | #if OPTVERBOSE 185 | qstring qIns; 186 | ins->print(&qIns); 187 | msg("Trying to optimize jcc cond: %s\n", qIns.c_str()); 188 | #endif 189 | // Looking for OR instructions... 190 | if (ins->opcode != m_or) 191 | return 0; 192 | 193 | // ... where one side is a compound XOR, and the other is not ... 194 | minsn_t *xorInsn; 195 | mop_t *nonXorOp; 196 | if (!ExtractByOpcodeType(ins, m_xor, xorInsn, nonXorOp)) 197 | return 0; 198 | 199 | // .. and the other side is a compound AND ... 200 | if (nonXorOp->t != mop_d || nonXorOp->d->opcode != m_and) 201 | return 0; 202 | 203 | // Extract the operands for the AND and XOR terms 204 | mop_t *xorOp1 = &xorInsn->l, *xorOp2 = &xorInsn->r; 205 | mop_t *andOp1 = &nonXorOp->d->l, *andOp2 = &nonXorOp->d->r; 206 | 207 | // The operands must be equal 208 | if (!(equal_mops_ignore_size(*xorOp1, *andOp1) && equal_mops_ignore_size(*xorOp2, *andOp2)) || 209 | (equal_mops_ignore_size(*xorOp1, *andOp2) && equal_mops_ignore_size(*xorOp2, *andOp1))) 210 | return 0; 211 | 212 | // Move the operands up to the top-level OR instruction 213 | ins->l.swap(*xorOp1); 214 | ins->r.swap(*xorOp2); 215 | #if IDA_SDK_VERSION == 710 216 | ins->optimize_flat(); 217 | #elif IDA_SDK_VERSION >= 720 218 | ins->optimize_solo(); 219 | #endif 220 | // msg("[I] pat_OrViaXorAnd\n"); 221 | return 1; 222 | } 223 | 224 | // This pattern replaces microcode of the form (x|!x), where x is a 225 | // conditional, and !x is its syntactically-negated version, with 1. 226 | int pat_OrNegatedSameCondition(minsn_t *ins) 227 | { 228 | #if OPTVERBOSE 229 | qstring qIns; 230 | ins->print(&qIns); 231 | msg("Trying to optimize jcc cond: %s\n", qIns.c_str()); 232 | #endif 233 | // Only applies to (x|y) 234 | if (ins->opcode != m_or) 235 | return 0; 236 | 237 | // Only applies when x and y are compound expressions, i.e., results 238 | // of other microcode instructions. 239 | if (ins->l.t != mop_d || ins->r.t != mop_d) 240 | return 0; 241 | 242 | // Ensure x and y are syntactically-opposite versions of the same 243 | // conditional. 244 | if (!AreConditionsOpposite(ins->l.d, ins->r.d)) 245 | return 0; 246 | 247 | // If we get here, the pattern matched. Replace both sides of OR with 248 | // 1, and then call optimize_flat to fold the constants. 249 | ins->l.make_number(1, 1); 250 | ins->r.make_number(1, 1); 251 | #if IDA_SDK_VERSION == 710 252 | ins->optimize_flat(); 253 | #elif IDA_SDK_VERSION >= 720 254 | ins->optimize_solo(); 255 | #endif 256 | // msg("[I] pat_OrNegatedSameCondition\n"); 257 | return 1; 258 | } 259 | 260 | // Replace patterns of the form (x&c)|(~x&d) (when c and d are numbers such 261 | // that c == ~d) with x^d. 262 | int pat_OrAndNot(minsn_t *ins) 263 | { 264 | // Looking for OR instructions... 265 | if(ins->opcode != m_or) 266 | return 0; 267 | 268 | // ... with compound operands ... 269 | if (ins->l.t != mop_d || ins->r.t != mop_d) 270 | return 0; 271 | 272 | minsn_t *lhs1 = ins->l.d; 273 | minsn_t *rhs1 = ins->r.d; 274 | 275 | // ... where each operand is an AND ... 276 | if (lhs1->opcode != m_and || rhs1->opcode != m_and) 277 | return 0; 278 | 279 | // Extract the numeric and non-numeric operands from both AND terms 280 | mop_t *lhsNum = NULL, *rhsNum = NULL; 281 | mop_t *lhsNonNum = NULL, *rhsNonNum = NULL; 282 | bool bLhsSucc = ExtractNumAndNonNum(lhs1, lhsNum, lhsNonNum); 283 | bool bRhsSucc = ExtractNumAndNonNum(rhs1, rhsNum, rhsNonNum); 284 | 285 | // ... both AND terms must have one constant ... 286 | if (!bLhsSucc || !bRhsSucc) 287 | return 0; 288 | 289 | // .. both constants have a size, and are the same size ... 290 | if (lhsNum->size == NOSIZE || lhsNum->size != rhsNum->size) 291 | return 0; 292 | 293 | // ... and the constants are bitwise inverses of one another ... 294 | if ((lhsNum->nnn->value & rhsNum->nnn->value) != 0) 295 | return 0; 296 | 297 | // One of the non-numeric parts must have a binary not (i.e., ~) on it 298 | minsn_t *sourceOfResult = NULL; 299 | mop_t *nonNottedInsn = NULL, *nottedNum = NULL, *nottedInsn = NULL; 300 | 301 | // Check the left-hand size for binary not 302 | if (lhsNonNum->t == mop_d && lhsNonNum->d->opcode == m_bnot) 303 | { 304 | // Extract the NOTed term 305 | nottedInsn = &lhsNonNum->d->l; 306 | // Make note of the corresponding constant value 307 | nottedNum = lhsNum; 308 | } 309 | else 310 | nonNottedInsn = lhsNonNum; 311 | 312 | // Check the left-hand size for binary not 313 | if (rhsNonNum->t == mop_d && rhsNonNum->d->opcode == m_bnot) 314 | { 315 | // Both sides NOT? Not what we want, return 0 316 | if (nottedInsn != NULL) 317 | return 0; 318 | 319 | // Extract the NOTed term 320 | nottedInsn = &rhsNonNum->d->l; 321 | // Make note of the corresponding constant value 322 | nottedNum = rhsNum; 323 | } 324 | else 325 | { 326 | // Neither side has a NOT? Bail 327 | if (nonNottedInsn != NULL) 328 | return 0; 329 | nonNottedInsn = rhsNonNum; 330 | } 331 | 332 | // The expression that was NOTed must match the non-NOTed operand 333 | if (!equal_mops_ignore_size(*nottedInsn, *nonNottedInsn)) 334 | return 0; 335 | 336 | // Okay, all of our conditions matched. Make an XOR(x,d) instruction 337 | ins->opcode = m_xor; 338 | ins->l.swap(*nonNottedInsn); 339 | ins->r.swap(*nottedNum); 340 | // msg("[I] pat_OrAndNot\n"); 341 | return 1; 342 | } 343 | 344 | // Remove XOR chains with common terms. E.g. x^5^y^6^5^x ==> y^6. 345 | // This uses the XorSimplifier class from PatternDeobfuscateUtil. 346 | int pat_XorChain(minsn_t *ins) 347 | { 348 | if (ins->opcode != m_xor) 349 | return 0; 350 | 351 | #if OPTVERBOSE 352 | qstring qInsBefore, qInsAfter; 353 | ins->print(&qInsBefore); 354 | #endif 355 | 356 | // Automagically find duplicated expressions and erase them 357 | XorSimplifier xs; 358 | if (!xs.Simplify(ins)) 359 | return 0; 360 | 361 | #if OPTVERBOSE 362 | ins->print(&qInsAfter); 363 | msg("[I] Optimized XOR from:\n\t%s\nto:\t%s\n", qInsBefore.c_str(), qInsAfter.c_str()); 364 | #endif 365 | // msg("[I] pat_XorChain\n"); 366 | return 1; 367 | 368 | } 369 | 370 | // Compare two sets of mop_t * element-by-element. Return true if they match. 371 | bool NonConstSetsMatch(std::set *s1, std::set *s2) 372 | { 373 | // Iterate over one set 374 | for (auto eL : *s1) 375 | { 376 | bool bFound = false; 377 | // Iterate over the other set 378 | for (auto eR : *s2) 379 | { 380 | // Compare the element from the first set against the ones in 381 | // the other set. 382 | if (equal_mops_ignore_size(*eL, *eR)) 383 | { 384 | bFound = true; 385 | break; 386 | } 387 | } 388 | // If we can't find some element from the first set in the other, we're done 389 | if (!bFound) 390 | return false; 391 | } 392 | // All elements matched 393 | return true; 394 | } 395 | 396 | // Compare two sets of mop_t * (number values) element-by-element. There 397 | // should be one value in the larger set that's not in the smaller set. 398 | // Find and return it if that's the case. 399 | mop_t *FindNonCommonConstant(std::set *smaller, std::set *bigger) 400 | { 401 | mop_t *noMatch = NULL; 402 | // Iterate through the larger set 403 | for (auto eL : *bigger) 404 | { 405 | bool bFound = false; 406 | // Find each element in the smaller set 407 | for (auto eR : *smaller) 408 | { 409 | if (equal_mops_ignore_size(*eL, *eR)) 410 | { 411 | bFound = true; 412 | break; 413 | } 414 | } 415 | // We're looking for one constant in the larger set that isn't 416 | // present in the smaller set. 417 | if (!bFound) 418 | { 419 | // If noMatch was not NULL, then there was more than one 420 | // constant in the larger set that wasn't in the smaller one, 421 | // so return NULL on failure. 422 | if (noMatch != NULL) 423 | return 0; 424 | 425 | noMatch = eL; 426 | } 427 | } 428 | // Return the constant from the larger set that wasn't in the smaller 429 | return noMatch; 430 | } 431 | 432 | // Matches patterns of the form: 433 | // (a^b^c^d) & (a^b^c^d^e) => (a^b^c^d) & ~e, where e is numeric 434 | // The terms don't necessarily have to be in the same order; we extract the 435 | // XOR subterms from both sides and find the missing value from the smaller 436 | // XOR chain. 437 | int pat_AndXor(minsn_t *ins) 438 | { 439 | // Instruction must be AND ... 440 | if (ins->opcode != m_and) 441 | return 0; 442 | 443 | // ... at least one side must be XOR ... 444 | bool bLeftIsNotXor = ins->l.t != mop_d || ins->l.d->opcode == m_xor; 445 | bool bRightIsNotXor = ins->r.t != mop_d || ins->r.d->opcode == m_xor; 446 | if (!bLeftIsNotXor && !bRightIsNotXor) 447 | return 0; 448 | 449 | // Collect the constant and non-constant parts of the XOR chains. We 450 | // use the XorSimplifier class, but we don't actually simplify the 451 | // instruction; we just make use of the existing functionality to 452 | // collect the operands that are XORed together. 453 | XorSimplifier xsL, xsR; 454 | xsL.Insert(&ins->l); 455 | xsR.Insert(&ins->r); 456 | 457 | // There must be the same number of non-constant terms on both sides 458 | if (xsL.m_NonConst.size() != xsR.m_NonConst.size()) 459 | return 0; 460 | 461 | bool bLeftIsSmaller; 462 | std::set *smaller, *bigger; 463 | 464 | // Either the left is one bigger than the right... 465 | if (xsL.m_Const.size() == xsR.m_Const.size() + 1) 466 | smaller = &xsR.m_Const, bigger = &xsL.m_Const, bLeftIsSmaller = false; 467 | 468 | // Or the right is one bigger than the left... 469 | else 470 | if (xsR.m_Const.size() == xsL.m_Const.size() + 1) 471 | smaller = &xsL.m_Const, bigger = &xsR.m_Const, bLeftIsSmaller = true; 472 | 473 | // Or, the pattern doesn't match, so return 0. 474 | else 475 | return 0; 476 | 477 | // The sets of non-constant operands must match 478 | if (!(NonConstSetsMatch(&xsL.m_NonConst, &xsR.m_NonConst))) 479 | return 0; 480 | 481 | // Find the one constant value that wasn't common to both sides 482 | mop_t *noMatch = FindNonCommonConstant(smaller, bigger); 483 | 484 | // If there wasn't one, the pattern failed, so return 0 485 | if (noMatch == NULL) 486 | return 0; 487 | 488 | // Invert the non-common number and truncate it down to its proper size 489 | noMatch->nnn->update_value(~noMatch->nnn->value & ((1ULL << (noMatch->size * 8)) - 1)); 490 | 491 | // Replace the larger XOR construct with the now-inverted value 492 | if (bLeftIsSmaller) 493 | ins->r.swap(*noMatch); 494 | else 495 | ins->l.swap(*noMatch); 496 | 497 | // msg("[I] pat_AndXor\n"); 498 | return 1; 499 | } 500 | 501 | // Replaces conditionals of the form !(!c1 || !c2) with (c1 && c2). 502 | int pat_LnotOrLnotLnot(minsn_t *ins) 503 | { 504 | // The whole expression must be logically negated. 505 | minsn_t *inner; 506 | if (!ExtractLogicallyNegatedTerm(ins, inner) || inner == NULL) 507 | return 0; 508 | 509 | // The thing that was negated must be an OR with compound operands. 510 | if (inner->opcode != m_or || inner->l.t != mop_d || inner->r.t != mop_d) 511 | return 0; 512 | 513 | // The two compound operands must also be negated 514 | minsn_t *insLeft = inner->l.d; 515 | minsn_t *insRight = inner->r.d; 516 | mop_t *opLeft, *opRight; 517 | if (!ExtractLogicallyNegatedTerm(inner->l.d, insLeft, &opLeft) || !ExtractLogicallyNegatedTerm(inner->r.d, insRight, &opRight)) 518 | return 0; 519 | 520 | // If we're here, the pattern matched. Make the AND. 521 | ins->opcode = m_and; 522 | ins->l.swap(*opLeft); 523 | ins->r.swap(*opRight); 524 | // msg("[I] pat_LnotOrLnotLnot\n"); 525 | return 1; 526 | } 527 | 528 | // Replaces terms of the form ~(~x | n), where n is a number, with x & ~n. 529 | int pat_BnotOrBnotConst(minsn_t *ins) 530 | { 531 | // We're looking for BNOT instructions (~y)... 532 | if (ins->opcode != m_bnot || ins->l.t != mop_d) 533 | return 0; 534 | 535 | // ... where x is an OR instruction ... 536 | minsn_t *inner = ins->l.d; 537 | if (inner->opcode != m_or) 538 | return 0; 539 | 540 | // ... and one side is constant, where the other one isn't ... 541 | mop_t *orNum, *orNonNum; 542 | if (!ExtractNumAndNonNum(inner, orNum, orNonNum)) 543 | return 0; 544 | 545 | // ... and the non-constant part is itself a BNOT instruction (~x) 546 | if (orNonNum->t != mop_d || orNonNum->d->opcode != m_bnot) 547 | return 0; 548 | 549 | // Once we found it, rewrite the top-level BNOT with an AND 550 | ins->opcode = m_and; 551 | ins->l.swap(orNonNum->d->l); 552 | 553 | // Invert the numeric part 554 | uint64 notNum = ~(orNum->nnn->value) & ((1ULL << (orNum->size * 8)) - 1); 555 | ins->r.make_number(notNum, orNum->size); 556 | 557 | return 1; 558 | } 559 | 560 | // This function just inspects the instruction and calls the 561 | // pattern-replacement functions above to perform deobfuscation. 562 | int Optimize(minsn_t *ins) 563 | { 564 | int iLocalRetVal = 0; 565 | 566 | switch (ins->opcode) 567 | { 568 | case m_bnot: 569 | iLocalRetVal = pat_BnotOrBnotConst(ins); 570 | break; 571 | case m_or: 572 | iLocalRetVal = pat_OrAndNot(ins); 573 | if (!iLocalRetVal) 574 | iLocalRetVal = pat_OrViaXorAnd(ins); 575 | if (!iLocalRetVal) 576 | iLocalRetVal = pat_OrNegatedSameCondition(ins); 577 | if (!iLocalRetVal) 578 | iLocalRetVal = pat_LogicAnd1(ins); 579 | 580 | break; 581 | case m_and: 582 | iLocalRetVal = pat_AndXor(ins); 583 | if (!iLocalRetVal) 584 | iLocalRetVal = pat_MulSub(ins); 585 | break; 586 | case m_xor: 587 | iLocalRetVal = pat_XorChain(ins); 588 | if(!iLocalRetVal) 589 | iLocalRetVal = pat_LnotOrLnotLnot(ins); 590 | if (!iLocalRetVal) 591 | iLocalRetVal = pat_LogicAnd1(ins); 592 | break; 593 | case m_lnot: 594 | iLocalRetVal = pat_LnotOrLnotLnot(ins); 595 | break; 596 | } 597 | return iLocalRetVal; 598 | } 599 | 600 | // This is the virtual function dictated by the optinsn_t interface. This 601 | // function gets called by the Hex-Rays kernel; we optimize the microcode. 602 | int func(mblock_t *blk, minsn_t *ins); 603 | }; 604 | 605 | // Callback function. Do pattern-deobfuscation. 606 | int ObfCompilerOptimizer::func(mblock_t *blk, minsn_t *ins) 607 | { 608 | #if OPTVERBOSE 609 | char buf[1000]; 610 | mcode_t_to_string(ins, buf, sizeof(buf)); 611 | msg("ObfCompilerOptimizer: %a %s\n", ins->ea, buf); 612 | #endif 613 | 614 | int retVal = Optimize(ins); 615 | int iLocalRetVal = 0; 616 | 617 | // This callback doesn't seem to get called for subinstructions of 618 | // conditional branches. So, if we're dealing with a conditional branch, 619 | // manually optimize the condition expression 620 | if ((is_mcode_jcond(ins->opcode) || is_mcode_set(ins->opcode)) && ins->l.t == mop_d) 621 | { 622 | // In order to optimize the jcc condition, we actually need a different 623 | // structure than optinsn_t: in particular, we need a minsn_visitor_t. 624 | // This local structure declaration just passes the calls to 625 | // minsn_visitor_t::visit_minsn onto the Optimize function in this 626 | // optinsn_t object. 627 | struct Blah : minsn_visitor_t 628 | { 629 | int visit_minsn() 630 | { 631 | return othis->Optimize(this->curins); 632 | } 633 | ObfCompilerOptimizer *othis; 634 | Blah(ObfCompilerOptimizer *o) : othis(o) { }; 635 | }; 636 | 637 | Blah b(this); 638 | 639 | // Optimize all subinstructions of the JCC conditional 640 | iLocalRetVal += ins->for_all_insns(b); 641 | // For good measure, optimize the top-level instruction again. I don't 642 | // know if this is necessary or important, but whatever. 643 | // iLocalRetVal += Optimize(ins); 644 | } 645 | retVal += iLocalRetVal; 646 | 647 | // If any optimizations were performed... 648 | if (retVal) 649 | { 650 | #if OPTVERBOSE 651 | // ... inform the user ... 652 | mcode_t_to_string(ins, buf, sizeof(buf)); 653 | msg("ObfCompilerOptimizer: replaced by %s\n", buf); 654 | #endif 655 | #if IDA_SDK_VERSION == 710 656 | ins->optimize_flat(); 657 | #elif IDA_SDK_VERSION >= 720 658 | ins->optimize_solo(); 659 | #endif 660 | // I got an INTERR if I optimized jcc conditionals without marking the lists dirty. 661 | blk->mark_lists_dirty(); 662 | blk->mba->verify(true); 663 | //blk->mba->optimize_local(0); 664 | // ... verify we haven't corrupted anything 665 | //blk->mba->verify(true); 666 | } 667 | return retVal; 668 | } 669 | 670 | -------------------------------------------------------------------------------- /PatternDeobfuscate.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | struct ObfCompilerOptimizer : public optinsn_t 5 | { 6 | int func(mblock_t *blk, minsn_t *ins); 7 | }; 8 | -------------------------------------------------------------------------------- /PatternDeobfuscateUtil.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "HexRaysUtil.hpp" 3 | #include "PatternDeobfuscateUtil.hpp" 4 | #include "Config.hpp" 5 | 6 | // For microinstructions with two or more operands (in l and r), check to see 7 | // if one of them is numeric and the other one isn't. If this is the case, 8 | // return pointers to the operands in the appropriately-named argument 9 | // variables and return true. Otherwise, return false. 10 | // This is a utility function that helps implement many other pattern-matching 11 | // deobfuscations. 12 | bool ExtractNumAndNonNum(minsn_t *insn, mop_t *&numOp, mop_t *&otherOp) 13 | { 14 | mop_t *num = NULL, *other = NULL; 15 | 16 | if (insn->l.t == mop_n) 17 | { 18 | num = &insn->l; 19 | other = &insn->r; 20 | } 21 | 22 | if (insn->r.t == mop_n) 23 | { 24 | if (num != NULL) 25 | { 26 | // Technically we have an option to perform constant folding 27 | // here... but Hex-Rays should have done / should do that for us 28 | return false; 29 | } 30 | num = &insn->r; 31 | other = &insn->l; 32 | } 33 | if (num == NULL) 34 | return false; 35 | 36 | numOp = num; 37 | otherOp = other; 38 | 39 | return true; 40 | } 41 | 42 | // For microinstructions with two or more operands (in l and r), check to see 43 | // if one of them is a mop_d (result of another microinstruction), where the 44 | // provider microinstruction is has opcode type mc. If successful, return the 45 | // provider microinstruction and the non-matching micro-operand in the 46 | // appropriately-named arguments. Otherwise, return false. 47 | // This helper function is useful for performing pattern-matching upon 48 | // commutative operations. Without it, we'd have to write each of our patterns 49 | // twice: once for when the operation we were looking for was on the left-hand 50 | // side, and once for when the operation was on the right-hand side. 51 | bool ExtractByOpcodeType(minsn_t *ins, mcode_t mc, minsn_t *&match, mop_t*& noMatch) 52 | { 53 | mop_t *possNoMatch = NULL; 54 | minsn_t *possMatch = NULL; 55 | 56 | // Does the left-hand side contain the operation we're looking for? 57 | // Update possNoMatch or possMatch, depending. 58 | if (!ins->l.is_insn() || ins->l.d->opcode != mc) 59 | possNoMatch = &ins->l; 60 | else 61 | possMatch = ins->l.d; 62 | 63 | // Perform the same check on the right-hand side. 64 | if (!ins->r.is_insn() || ins->r.d->opcode != mc) 65 | possNoMatch = &ins->r; 66 | else 67 | possMatch = ins->r.d; 68 | 69 | // If both sides matched, or neither side matched, fail. 70 | if (possNoMatch == NULL || possMatch == NULL) 71 | return false; 72 | 73 | match = possMatch; 74 | noMatch = possNoMatch; 75 | return true; 76 | } 77 | 78 | // The obfuscation techniques upon conditional operations have "&1" 79 | // miscellaneously present or not present within them. Writing pattern-matching 80 | // rules for all of the many possibilities would be extremely tedious. This 81 | // helper function reduces the tedium by checking to see whether the provided 82 | // microinstruction is "x & 1" (or "1 & x"), and it extracts x (as both an 83 | // operand, and, if the operand is a mop_d (result of another 84 | // microinstruction), return the provider instruction also. 85 | bool TunnelThroughAnd1(minsn_t *ins, minsn_t *&inner, bool bRequireSize1, mop_t **opInner) 86 | { 87 | // Microinstruction must be AND 88 | if (ins->opcode != m_and) 89 | return false; 90 | 91 | // One side must be numeric, the other one non-numeric 92 | mop_t *andNum, *andNonNum; 93 | if (!ExtractNumAndNonNum(ins, andNum, andNonNum)) 94 | return false; 95 | 96 | // The number must be the value 1 97 | if (andNum->nnn->value != 1) 98 | return false; 99 | 100 | if(bRequireSize1 && andNum->size != 1) 101 | return false; 102 | 103 | // If requested, pass the operand back to the caller this point 104 | if(opInner != NULL) 105 | *opInner = andNonNum; 106 | 107 | // If the non-numeric operand is an instruction, extract the 108 | // microinstruction and pass that back to the caller. 109 | if (andNonNum->is_insn()) 110 | { 111 | inner = andNonNum->d; 112 | return true; 113 | } 114 | 115 | // Otherwise, if the non-numeric part wasn't a mop_d, check to see whether 116 | // the caller specifically wanted a mop_d. If they did, fail. If the caller 117 | // was willing to accept another operand type, return true. 118 | return opInner != NULL; 119 | } 120 | 121 | // The obfuscator implements boolean inversion via "x ^ 1". Hex-Rays, or one of 122 | // our other deobfuscation rules, could also convert these to m_lnot 123 | // instructions. This function checks to see if the microinstruction passed as 124 | // argument matches one of those patterns, and if so, extracts the negated 125 | // term as both a micro-operand and a microinstruction (if the negated operand 126 | // was of mop_d type). 127 | bool ExtractLogicallyNegatedTerm(minsn_t *ins, minsn_t *&insNegated, mop_t **opNegated) 128 | { 129 | mop_t *nonNegated; 130 | 131 | // Check the m_lnot case. 132 | if (ins->opcode == m_lnot) 133 | { 134 | // Extract the operand, if requested by the caller. 135 | if(opNegated != NULL) 136 | *opNegated = &ins->l; 137 | 138 | // If the operand was mop_d (i.e., result of another microinstruction), 139 | // retrieve the provider microinstruction. Get rid of the pesky "&1" 140 | // terms while we're at it. 141 | if (ins->l.is_insn()) 142 | { 143 | insNegated = ins->l.d; 144 | while(TunnelThroughAnd1(insNegated, insNegated)); 145 | return true; 146 | } 147 | 148 | // Otherwise, if the operand was not of type mop_d, "success" depends 149 | // on whether the caller was willing to accept a non-mop_d operand. 150 | else 151 | { 152 | insNegated = NULL; 153 | return opNegated != NULL; 154 | } 155 | } 156 | 157 | // If the operand wasn't m_lnot, check the m_xor case. 158 | if (ins->opcode != m_xor) 159 | return false; 160 | 161 | // We're looking for XORs with one constant and one non-constant operand 162 | mop_t *xorNum, *xorNonNum; 163 | if (!ExtractNumAndNonNum(ins, xorNum, xorNonNum)) 164 | return false; 165 | 166 | // The constant must be the 1-byte value 1 167 | if (xorNum->nnn->value != 1 || xorNum->size != 1) 168 | return false; 169 | 170 | // The non-numeric part must also be 1. This check is probably unnecessary. 171 | if (xorNonNum->size != 1) 172 | return false; 173 | 174 | // If the caller wanted an operand, give it to them. 175 | if (opNegated != NULL) 176 | *opNegated = xorNonNum; 177 | 178 | // If the operand was mop_d (result of another microinstruction), extract 179 | // it and remove the &1 terms. 180 | if (xorNonNum->is_insn()) 181 | { 182 | insNegated = xorNonNum->d; 183 | while (TunnelThroughAnd1(insNegated, insNegated)); 184 | return true; 185 | } 186 | 187 | // Otherwise, if the operand was not of type mop_d, "success" depends on 188 | // whether the caller was willing to accept a non-mop_d operand. 189 | insNegated = NULL; 190 | return opNegated != NULL; 191 | } 192 | 193 | // This function checks whether two conditional terms are logically opposite. 194 | // For example, "eax =s 1" would be considered logically 195 | // opposite. The check is purely syntactic; semantically-equivalent conditions 196 | // that were not implemented as syntactic logical opposites will not be 197 | // considered the same by this function. 198 | bool AreConditionsOpposite(minsn_t *lhsCond, minsn_t *rhsCond) 199 | { 200 | // Get rid of pesky &1 terms 201 | while (TunnelThroughAnd1(lhsCond, lhsCond)); 202 | while (TunnelThroughAnd1(rhsCond, rhsCond)); 203 | 204 | // If the conditions were negated via m_lnot or m_xor by 1, get the 205 | // un-negated part as a microinstruction. 206 | bool bLhsWasNegated = ExtractLogicallyNegatedTerm(lhsCond, lhsCond); 207 | bool bRhsWasNegated = ExtractLogicallyNegatedTerm(rhsCond, rhsCond); 208 | 209 | // lhsCond and rhsCond will be set to NULL if their original terms were 210 | // negated, but the thing that was negated wasn't the result of another 211 | // microinstruction. 212 | if (lhsCond == NULL || rhsCond == NULL) 213 | return false; 214 | 215 | // If one was negated and the other wasn't, compare them for equality. 216 | // If the non-negated part of the negated comparison was identical to 217 | // the non-negated comparison, then the conditions are clearly opposite. 218 | // I guess this could also be extended by incorporating the logic from 219 | // below, but I didn't need to do that in practice. 220 | if (bLhsWasNegated != bRhsWasNegated) 221 | return lhsCond->equal_insns(*rhsCond, EQ_IGNSIZE | EQ_IGNCODE); 222 | 223 | // Otherwise, if both were negated or both were non-negated, compare the 224 | // conditionals term-wise. First, ensure that both microoperands are 225 | // setXX instructions. 226 | else if (is_mcode_set(lhsCond->opcode) && is_mcode_set(rhsCond->opcode)) 227 | { 228 | // Now we have two possibilities. 229 | // #1: Condition codes are opposite, LHS and RHS are both equal 230 | if (negate_mcode_relation(lhsCond->opcode) == rhsCond->opcode) 231 | return 232 | equal_mops_ignore_size(lhsCond->l, rhsCond->l) && 233 | equal_mops_ignore_size(lhsCond->r, rhsCond->r); 234 | 235 | // #2: Condition codes are the same, LHS and RHS are swapped 236 | if (lhsCond->opcode == rhsCond->opcode) 237 | return 238 | equal_mops_ignore_size(lhsCond->l, rhsCond->r) && 239 | equal_mops_ignore_size(lhsCond->r, rhsCond->l); 240 | } 241 | 242 | // No dice. 243 | return false; 244 | } 245 | 246 | // Insert a micro-operand into one of the two sets above. Remove 247 | // duplicates -- meaning, if the operand we're trying to insert is already 248 | // in the set, remove the existing one instead. This is the "cancellation" 249 | // in practice. 250 | bool XorSimplifier::Insert(std::set &whichSet, mop_t *op) 251 | { 252 | mop_t &rop = *op; 253 | 254 | // Because mop_t types currently cannot be compared or hashed in the 255 | // current microcode API, I had to use a slow linear search procedure 256 | // to compare the micro-operand we're trying to insert against all 257 | // previously-inserted values in the relevant set. 258 | for (auto otherOp : whichSet) 259 | { 260 | // If the micro-operand was already in the set, get rid of it. 261 | if (equal_mops_ignore_size(rop, *otherOp)) 262 | { 263 | whichSet.erase(otherOp); 264 | 265 | // Mark these operands as being able to be deleted. 266 | m_ZeroOut.push_back(op); 267 | m_ZeroOut.push_back(otherOp); 268 | 269 | // Couldn't insert. 270 | return false; 271 | } 272 | } 273 | 274 | // Otherwise, if it didn't match an operand already in the set, insert 275 | // it into the set and return true on successful insertion. 276 | whichSet.insert(op); 277 | return true; 278 | } 279 | 280 | // Wrapper to insert constant and non-constant terms 281 | bool XorSimplifier::InsertNonConst(mop_t *op) 282 | { 283 | ++m_InsertedNonConst; 284 | return Insert(m_NonConst, op); 285 | } 286 | 287 | bool XorSimplifier::InsertConst(mop_t *op) 288 | { 289 | ++m_InsertedConst; 290 | return Insert(m_Const, op); 291 | } 292 | 293 | // Insert one micro-operand. If the operand is the result of another XOR 294 | // microinstruction, recursively insert the operands being XORed. 295 | // Otherwise, insert the micro-operand into the proper set (constant or 296 | // non-constant) depending upon its operand type. 297 | void XorSimplifier::Insert(mop_t *op) 298 | { 299 | // If operand is m_xor microinstruction, recursively insert children 300 | if (op->t == mop_d && op->d->opcode == m_xor) 301 | { 302 | Insert(&op->d->l); 303 | Insert(&op->d->r); 304 | return; 305 | } 306 | // Otherwise, insert it into the constant or non-constant set 307 | if (op->t == mop_n) 308 | InsertConst(op); 309 | else 310 | InsertNonConst(op); 311 | } 312 | 313 | // This function takes an XOR microinstruction and inserts its operands 314 | // by calling the function above 315 | void XorSimplifier::Insert(minsn_t *insn) 316 | { 317 | if (insn->opcode != m_xor) 318 | { 319 | #if OPTVERBOSE 320 | char buf[1000]; 321 | mcode_t_to_string(insn, buf, sizeof(buf)); 322 | msg("[I] Tried to insert from non-XOR instruction of type %s at %a\n", buf, insn->ea); 323 | #endif 324 | return; 325 | } 326 | 327 | // Insert children 328 | Insert(&insn->l); 329 | Insert(&insn->r); 330 | } 331 | 332 | // Were any cancellations performed? 333 | bool XorSimplifier::DidSimplify() 334 | { 335 | return !m_ZeroOut.empty(); 336 | //return m_Const.size() != m_InsertedConst || m_NonConst.size() != m_InsertedNonConst; 337 | } 338 | 339 | // Top-level functionality to simplify an XOR microinstruction. Insert the 340 | // instruction, then see if any simplifications could be performed. If so, 341 | // remove the simplified terms. 342 | bool XorSimplifier::Simplify(minsn_t *insn) 343 | { 344 | // Only insert XOR instructions 345 | if (insn->opcode != m_xor) 346 | return false; 347 | 348 | Insert(insn); 349 | 350 | // Were there common terms that could be cancelled? 351 | if (!DidSimplify()) 352 | return false; 353 | 354 | // Perform the cancellations by zeroing out the common micro-operands 355 | for (auto zo : m_ZeroOut) 356 | zo->make_number(0, zo->size); 357 | 358 | // Trigger Hex-Rays' ordinary optimizations, which will remove the 359 | // XOR 0 terms. Return true. 360 | #if IDA_SDK_VERSION == 710 361 | insn->optimize_flat(); 362 | #elif IDA_SDK_VERSION >= 720 363 | insn->optimize_solo(); 364 | #endif 365 | return true; 366 | } 367 | -------------------------------------------------------------------------------- /PatternDeobfuscateUtil.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | bool ExtractNumAndNonNum(minsn_t *insn, mop_t *&numOp, mop_t *&otherOp); 6 | bool ExtractByOpcodeType(minsn_t *ins, mcode_t mc, minsn_t *&match, mop_t*& noMatch); 7 | bool TunnelThroughAnd1(minsn_t *ins, minsn_t *&inner, bool bRequireSize1 = true, mop_t **opInner = NULL); 8 | bool AreConditionCodesOpposite(mcode_t c1, mcode_t c2); 9 | bool ExtractLogicallyNegatedTerm(minsn_t *ins, minsn_t *&insNegated, mop_t **opNegated = NULL); 10 | bool AreConditionsOpposite(minsn_t *lhsCond, minsn_t *rhsCond); 11 | 12 | class XorSimplifier 13 | { 14 | public: 15 | XorSimplifier() : m_InsertedConst(0), m_InsertedNonConst(0) {}; 16 | 17 | // The set of terms in the XOR chain that aren't constant numbers. 18 | std::set m_NonConst; 19 | // A counter for number of insertions of non-constant terms. 20 | int m_InsertedNonConst; 21 | 22 | // The set of constant number terms, and an insertion counter. 23 | std::set m_Const; 24 | int m_InsertedConst; 25 | 26 | // This contains pointers to the operands that can be zeroed out. I.e., 27 | // the terms that were cancelled out, before we actually erase them from 28 | // the microcode itself. 29 | std::vector m_ZeroOut; 30 | 31 | bool Insert(std::set &whichSet, mop_t *op); 32 | bool InsertNonConst(mop_t *op); 33 | bool InsertConst(mop_t *op); 34 | void Insert(mop_t *op); 35 | void Insert(minsn_t *insn); 36 | bool DidSimplify(); 37 | bool Simplify(minsn_t *insn); 38 | }; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HexRaysDeob 2 | Hex-Rays microcode API plugin for breaking an obfuscating compiler 3 | -------------------------------------------------------------------------------- /TargetUtil.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "HexRaysUtil.hpp" 3 | #include "TargetUtil.hpp" 4 | 5 | static int debugmsg(const char *fmt, ...) 6 | { 7 | #if UNFLATTENVERBOSE 8 | va_list va; 9 | va_start(va, fmt); 10 | return vmsg(fmt, va); 11 | #endif 12 | return 0; 13 | } 14 | 15 | // Append a goto onto a non-empty block, which is assumed not to already have 16 | // a goto at the end of it. 17 | void AppendGotoOntoNonEmptyBlock(mblock_t *blk, int iBlockDest) 18 | { 19 | assert(blk->tail != NULL); 20 | 21 | // Allocate a new instruction, using the tail as a template 22 | minsn_t *newGoto = new minsn_t(*blk->tail); 23 | 24 | // Create a goto instruction to the specified block 25 | newGoto->opcode = m_goto; 26 | newGoto->l.t = mop_b; 27 | newGoto->l.b = iBlockDest; 28 | newGoto->l.size = NOSIZE; 29 | newGoto->r.erase(); 30 | newGoto->d.erase(); 31 | 32 | // Add it onto the block 33 | blk->insert_into_block(newGoto, blk->tail); 34 | } 35 | 36 | // For a block with a single successor, change its target from some old block 37 | // to a new block. This is only on the graph level, not in terms of gotos. 38 | void ChangeSingleTarget(mblock_t *blk, int iOldTarget, int iNewTarget) 39 | { 40 | assert(blk->nsucc() == 1); 41 | mbl_array_t *mba = blk->mba; 42 | 43 | // Overwrite the successor with the new target 44 | blk->succset[0] = iNewTarget; 45 | 46 | // Add this block to the predecessor set of the target 47 | mba->get_mblock(iNewTarget)->predset.add(blk->serial); 48 | 49 | // Remove this block from the predecessor set of the old target 50 | mba->get_mblock(iOldTarget)->predset.del(blk->serial); 51 | } 52 | 53 | // Reverse engineered from hexrays.dll (though it's obvious). Basically: does 54 | // this block end in a call instruction? 55 | bool is_call_block(mblock_t *blk) 56 | { 57 | if (blk->tail == NULL) 58 | return false; 59 | 60 | return blk->tail->opcode == m_call || blk->tail->opcode == m_icall; 61 | } 62 | 63 | #define GOTO_NOT_SINGLE -1 64 | 65 | // This function eliminates transfers to blocks with a single goto on them. 66 | // Either if a given block has a goto at the end of it, where the destination 67 | // is a block with a single goto on it, or if the block doesn't end in a goto, 68 | // but simply falls through to a block with a single goto on it. Also, this 69 | // process happens recursively; i.e., if A goes to B, and B goes to C, and C 70 | // goes to D, then after we've done our tranformations, A will go to D. 71 | int RemoveSingleGotos(mbl_array_t *mba) 72 | { 73 | // This information determines, ultimately, to which block a goto will go. 74 | // As mentioned in the function comment, this accounts for gotos-to-gotos. 75 | int *forwarderInfo = new int[mba->qty]; 76 | 77 | // For each block 78 | for (int i = 0; i < mba->qty; ++i) 79 | { 80 | // Begin by initializing its information to say that it does not 81 | // consist of a single goto. Update later if it does. 82 | forwarderInfo[i] = GOTO_NOT_SINGLE; 83 | 84 | // Get the block and skip any "assert" instructions. 85 | mblock_t *b = mba->get_mblock(i); 86 | minsn_t *m2 = getf_reginsn(b->head); 87 | 88 | // Is the first non-assert instruction a goto? 89 | if (m2 == NULL || m2->opcode != m_goto) 90 | continue; 91 | 92 | // If it was a goto, record the destination block number 93 | forwarderInfo[i] = m2->l.b; 94 | } 95 | 96 | int iRetVal = 0; 97 | // Now, actually replace transfer-to-goto blocks with their destinations. 98 | for (int i = 0; i < mba->qty; ++i) 99 | { 100 | mblock_t *blk = mba->get_mblock(i); 101 | 102 | // FYI, don't screw with blocks that have calls at the end of them. 103 | // You'll get an INTERR. Also, if this block has more than one 104 | // successor, then it couldn't possibly be a transfer to a goto. 105 | if (is_call_block(blk) || blk->nsucc() != 1) 106 | continue; 107 | 108 | // Get the last instruction on the block 109 | minsn_t *mgoto = blk->tail; 110 | if (mgoto == NULL) 111 | continue; 112 | 113 | int iOriginalGotoTarget; 114 | // Now, look up the block number of the destination. 115 | bool bWasGoto = true; 116 | 117 | // If the last instruction was a goto, get the information from there. 118 | if (mgoto->opcode == m_goto) 119 | iOriginalGotoTarget = mgoto->l.b; 120 | 121 | // Otherwise, take the number of the only successor block. 122 | else 123 | { 124 | iOriginalGotoTarget = blk->succ(0); 125 | bWasGoto = false; 126 | } 127 | 128 | // Now, we determine if the target was a single-goto block. 129 | int iGotoTarget = iOriginalGotoTarget; 130 | bool bShouldReplace = false; 131 | intvec_t visited; 132 | 133 | // Keep looping while we still find goto-to-gotos. 134 | while (true) 135 | { 136 | // Keep track of the blocks we've seen so far, so we don't end up 137 | // in an infinite loop if the goto blocks form a cycle in the 138 | // graph. 139 | if (!visited.add_unique(iGotoTarget)) 140 | { 141 | bShouldReplace = false; 142 | break; 143 | } 144 | // Once we find the first non-single-goto block, stop. 145 | if (forwarderInfo[iGotoTarget] == GOTO_NOT_SINGLE) 146 | break; 147 | 148 | // If we find at least one single goto at the destination, then 149 | // indicate that we should replace. Keep looping, though, to find 150 | // the ultimate destination. 151 | bShouldReplace = true; 152 | 153 | // Now check: did the single-goto block also target a single-goto 154 | // block? 155 | iGotoTarget = forwarderInfo[iGotoTarget]; 156 | } 157 | 158 | // If the target wasn't a single-goto block, or there was an infinite 159 | // loop in the graph, don't touch this block. 160 | if (!bShouldReplace) 161 | continue; 162 | 163 | // Otherwise, update the destination with the final target. 164 | 165 | // If the block had a goto, overwrite its block destination. 166 | if (bWasGoto) 167 | mgoto->l.b = iGotoTarget; 168 | 169 | // Otherwise, add a goto onto the block. You might think you could skip 170 | // this step and just change the successor information, but you'll get 171 | // an INTERR if you do. 172 | else 173 | AppendGotoOntoNonEmptyBlock(blk, iGotoTarget); 174 | 175 | // Change the successor/predecessor information for this block and its 176 | // old and new target. 177 | ChangeSingleTarget(blk, iOriginalGotoTarget, iGotoTarget); 178 | 179 | // Counter of the number of blocks changed. 180 | ++iRetVal; 181 | } 182 | 183 | // Don't need the forwarder information anymore. 184 | delete[] forwarderInfo; 185 | 186 | // Return the number of blocks whose destinations were changed 187 | return iRetVal; 188 | } 189 | 190 | // For a block that ends in a conditional jump, extract the integer block 191 | // numbers for the "taken" and "not taken" cases. 192 | bool ExtractJccParts(mblock_t *pred1, mblock_t *&endsWithJcc, int &jccDest, int &jccFallthrough) 193 | { 194 | if (is_mcode_jcond(pred1->tail->opcode)) 195 | { 196 | if (pred1->tail->d.t != mop_b) 197 | { 198 | debugmsg("[I] SplitMblocksByJccEnding: block %d was jcc, but destination was %s, not mop_b\n", mopt_t_to_string(pred1->tail->d.t)); 199 | return false; 200 | } 201 | endsWithJcc = pred1; 202 | jccDest = pred1->tail->d.b; 203 | 204 | // The fallthrough location is the block that's not directly targeted 205 | // by the jcc instruction. Determine that by looking at the successors. 206 | // I guess technically Hex-Rays enforces that it must be the 207 | // sequentially-next-numbered block, but oh well. 208 | jccFallthrough = pred1->succ(0) == jccDest ? pred1->succ(1) : pred1->succ(0); 209 | return true; 210 | } 211 | return false; 212 | } 213 | 214 | // For a block with two predecessors, figure out if one of them ends in a jcc 215 | // instruction. Return pointers to the block that ends in a jcc and the one 216 | // that doesn't. Also return the integer numbers of those blocks. 217 | bool SplitMblocksByJccEnding(mblock_t *pred1, mblock_t *pred2, mblock_t *&endsWithJcc, mblock_t *&nonJcc, int &jccDest, int &jccFallthrough) 218 | { 219 | endsWithJcc = NULL; 220 | nonJcc = NULL; 221 | if (pred1->tail == NULL || pred2->tail == NULL) 222 | return false; 223 | 224 | // Check if the first block ends with jcc. Make sure the second one 225 | // doesn't also. 226 | if (ExtractJccParts(pred1, endsWithJcc, jccDest, jccFallthrough)) 227 | { 228 | if (is_mcode_jcond(pred2->tail->opcode)) 229 | return false; 230 | 231 | nonJcc = pred2; 232 | } 233 | // Otherwise, check if the second block ends with jcc. Make sure the first 234 | // one doesn't also. 235 | else 236 | { 237 | if (!ExtractJccParts(pred2, endsWithJcc, jccDest, jccFallthrough)) 238 | return false; 239 | nonJcc = pred1; 240 | } 241 | return true; 242 | } 243 | 244 | // Plan to remove an edge from src->dest 245 | void DeferredGraphModifier::Remove(int src, int dest) 246 | { 247 | m_RemoveEdges.push_back(std::pair(src, dest)); 248 | } 249 | 250 | // Plan to add an edge from src->dest 251 | void DeferredGraphModifier::Add(int src, int dest) 252 | { 253 | m_AddEdges.push_back(std::pair(src, dest)); 254 | } 255 | 256 | // Plan to replace an edge from src->oldDest to src->newDest 257 | void DeferredGraphModifier::Replace(int src, int oldDest, int newDest) 258 | { 259 | Remove(src, oldDest); 260 | Add(src, newDest); 261 | } 262 | 263 | // Apply the planned changes to the graph 264 | int DeferredGraphModifier::Apply(mbl_array_t *mba) 265 | { 266 | int iChanged = 0; 267 | 268 | // Iterate through the edges slated for removal 269 | for (auto re : m_RemoveEdges) 270 | { 271 | mblock_t *mSrc = mba->get_mblock(re.first); 272 | mblock_t *mDst = mba->get_mblock(re.second); 273 | 274 | // Remove the source as a predecessor for dest, and vice versa 275 | mSrc->succset.del(mDst->serial); 276 | mDst->predset.del(mSrc->serial); 277 | 278 | #if UNFLATTENVERBOSE 279 | debugmsg("[I] Removed edge %d->%d (%d->%d)\n", mSrc->serial, mDst->serial, re.first, re.second); 280 | #endif 281 | ++iChanged; 282 | } 283 | 284 | // Iterate through the edges slated for addition 285 | for (auto ae : m_AddEdges) 286 | { 287 | mblock_t *mSrc = mba->get_mblock(ae.first); 288 | mblock_t *mDst = mba->get_mblock(ae.second); 289 | 290 | // Add the source as a predecessor for dest, and vice versa 291 | mSrc->succset.add(mDst->serial); 292 | mDst->predset.add(mSrc->serial); 293 | 294 | #if UNFLATTENVERBOSE 295 | debugmsg("[I] Added edge %d->%d (%d->%d)\n", mSrc->serial, mDst->serial, ae.first, ae.second); 296 | #endif 297 | ++iChanged; 298 | } 299 | return iChanged; 300 | } 301 | 302 | // Either change the destination of an existing goto, or add a new goto onto 303 | // the end of the block to the destination. Also, plan to modify the graph 304 | // structure later to reflect these changes. 305 | bool DeferredGraphModifier::ChangeGoto(mblock_t *blk, int iOld, int iNew) 306 | { 307 | bool bChanged = true; 308 | int iDispPred = blk->serial; 309 | 310 | // If the last instruction isn't a goto, add a new one 311 | if (blk->tail->opcode != m_goto) 312 | AppendGotoOntoNonEmptyBlock(blk, iNew); 313 | 314 | // Otherwise, if it is a goto... 315 | else 316 | { 317 | // Be sure we're actually *changing* the destination to a different 318 | // location 319 | int prev = blk->tail->l.b; 320 | if (prev == iNew) 321 | bChanged = false; 322 | 323 | // And if so, do it 324 | else 325 | blk->tail->l.b = iNew; 326 | } 327 | 328 | // If we did change the destination, plan to update the graph later 329 | if (bChanged) 330 | Replace(blk->serial, iOld, iNew); 331 | 332 | return bChanged; 333 | } 334 | 335 | // Delete all instructions on a block, and remove its outgoing edges. Blocks 336 | // will be deleted if we have removed edges in the graph such that the block 337 | // is no longer reachable from block #0. 338 | void DeleteBlock(mblock_t *mb) 339 | { 340 | mbl_array_t *mba = mb->mba; 341 | 342 | // Delete this block from the predecessor set of the successors 343 | for (int j = 0; j < mb->nsucc(); ++j) 344 | mba->get_mblock(mb->succ(j))->predset.del(mb->serial); 345 | 346 | // Delete all successor edges 347 | while (mb->nsucc() != 0) 348 | mb->succset.del(mb->succ(0)); 349 | 350 | // Delete the instructions on the block 351 | minsn_t *pCurr = mb->head, *pNext = NULL; 352 | while (pCurr != NULL) 353 | { 354 | pNext = pCurr->next; 355 | delete pCurr; 356 | pCurr = pNext; 357 | } 358 | 359 | // Mark that the block now has no instructions. 360 | mb->head = NULL; 361 | mb->tail = NULL; 362 | } 363 | 364 | // The goto-to-goto elimination and unflattening phases remove edges in the 365 | // control flow graph represented in the mbl_array_t *. As a result, certain 366 | // blocks might no longer be reachable anymore in the graph. Thus, they can be 367 | // deleted with no ill-effects. In theory, we could wait for Hex-Rays to remove 368 | // these blocks, which it eventually will, sometime after MMAT_GLBOPT2. 369 | // Originally, I just let Hex-Rays remove the blocks. However, it turned out 370 | // that the blocks were removed too late, which hampered other optimizations 371 | // that Hex-Rays otherwise would have been able to perform had the blocks been 372 | // eliminated earlier. Thus, I wrote this function to remove the unreachable 373 | // blocks immediately after unflattening, which allowed the aforementioned 374 | // simplifications to happen. 375 | // 376 | // At the time of writing, I'm still coordinating with Hex-Rays to see if I can 377 | // make use of internal decompiler machinery to perform elimination. If I can, 378 | // we'll use that instead of this function. For now, we prune manually. 379 | int PruneUnreachable(mbl_array_t *mba) 380 | { 381 | // This set marks the vertices we've already visited. This both prevents 382 | // infinite loops in the depth-first search, as well as records the 383 | // unreachable blocks after the search terminates. 384 | bitset_t visited; 385 | 386 | // This is a standard worklist-based algorithm. This list keeps track of 387 | // reachable predecessors yet-to-be-visited. 388 | qlist worklist; 389 | 390 | // Initialize the worklist to block #0, which always denotes the entry 391 | // block in an mbl_array_t. 392 | worklist.push_back(0); 393 | 394 | // Worklist iteration: process the next reachable block. 395 | while (!worklist.empty()) 396 | { 397 | // Get the reachable block number, and remove it from the worklist. 398 | int iCurr = worklist.back(); 399 | worklist.pop_back(); 400 | 401 | // Prevent infinite loops by not visiting blocks more than once. 402 | if (visited.has(iCurr)) 403 | continue; 404 | 405 | // Mark that we have visited this particular block. 406 | visited.add(iCurr); 407 | 408 | // Insert all of the successors of this block into the worklist. It's 409 | // fine if we insert a block that's already been visited, as the check 410 | // above will prevent it from being visited again. 411 | for (auto iSucc : mba->get_mblock(iCurr)->succset) 412 | worklist.push_back(iSucc); 413 | } 414 | 415 | 416 | // Count the number of unreachable blocks we remove. 417 | int nRemoved = 0; 418 | 419 | // Iterate over all blocks in the mbl_array_t... 420 | for (int i = 0; i < mba->qty; ++i) 421 | { 422 | // ... if it wasn't visited by the procedure above, then it's 423 | // unreachable. 424 | if (!visited.has(i)) 425 | { 426 | // If so, delete the instructions on the block and remove any 427 | // outgoing edges. 428 | DeleteBlock(mba->get_mblock(i)); 429 | ++nRemoved; 430 | } 431 | } 432 | 433 | // At this point we have to explicitly trigger removal of empty blocks. If 434 | // we don't, we'll get an INTERR. 435 | if(nRemoved != 0) 436 | mba->remove_empty_blocks(); 437 | 438 | // Returns the number of blocks removed. 439 | return nRemoved; 440 | } 441 | -------------------------------------------------------------------------------- /TargetUtil.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | int RemoveSingleGotos(mbl_array_t *mba); 5 | bool SplitMblocksByJccEnding(mblock_t *pred1, mblock_t *pred2, mblock_t *&endsWithJcc, mblock_t *&nonJcc, int &jccDest, int &jccFallthrough); 6 | int PruneUnreachable(mbl_array_t *mba); 7 | 8 | // The "deferred graph modifier" records changes that the client wishes to make 9 | // to a given graph, but does not apply them immediately. Weird things could 10 | // happen if we were to modify a graph while we were iterating over it, so save 11 | // the modifications until we're done iterating over the graph. 12 | struct DeferredGraphModifier 13 | { 14 | std::vector > m_RemoveEdges; 15 | std::vector > m_AddEdges; 16 | void Remove(int src, int dest); 17 | void Add(int src, int dest); 18 | void Replace(int src, int oldDest, int newDest); 19 | int Apply(mbl_array_t *mba); 20 | bool ChangeGoto(mblock_t *blk, int iOld, int iNew); 21 | void Clear() { m_RemoveEdges.clear(); m_AddEdges.clear(); } 22 | }; -------------------------------------------------------------------------------- /Unflattener.cpp: -------------------------------------------------------------------------------- 1 | // ROLF TODO: Change control flow in func so that PruneUnreachable gets called even if CFI info fails? 2 | // Probably doesn't matter at that point. 3 | 4 | #define USE_DANGEROUS_FUNCTIONS 5 | #include 6 | #include "HexRaysUtil.hpp" 7 | #include "Unflattener.hpp" 8 | #include "CFFlattenInfo.hpp" 9 | #include "TargetUtil.hpp" 10 | #include "DefUtil.hpp" 11 | #include "Config.hpp" 12 | 13 | std::set g_BlackList; 14 | std::set g_WhiteList; 15 | 16 | static int debugmsg(const char *fmt, ...) 17 | { 18 | #if UNFLATTENVERBOSE 19 | va_list va; 20 | va_start(va, fmt); 21 | return vmsg(fmt, va); 22 | #endif 23 | return 0; 24 | } 25 | 26 | void DumpMBAToFile(mbl_array_t *mba, const char *fpath) 27 | { 28 | FILE *fp = qfopen(fpath, "w"); 29 | file_printer_t fpt(fp); 30 | mba->print(fpt); 31 | qfclose(fp); 32 | } 33 | 34 | mba_maturity_t g_Last = MMAT_ZERO; 35 | int g_NumGotosRemoved = 0; 36 | int atThisMaturity = 0; 37 | 38 | // Find the block that dominates iDispPred, and which is one of the targets of 39 | // the control flow flattening switch. 40 | mblock_t *CFUnflattener::GetDominatedClusterHead(mbl_array_t *mba, int iDispPred, int &iClusterHead) 41 | { 42 | mblock_t *mbClusterHead = NULL; 43 | // Find the block that is targeted by the dispatcher, and that 44 | // dominates the block we're currently looking at. This logic won't 45 | // work for the first block (since it wasn't targeted by the control 46 | // flow dispatch switch, so it doesn't have an entry in the dominated 47 | // cluster information), so we special-case it. 48 | if (iDispPred == cfi.iFirst) 49 | iClusterHead = cfi.iFirst, mbClusterHead = mba->get_mblock(cfi.iFirst); 50 | 51 | else 52 | { 53 | // If it wasn't the first block, look up its cluster head block 54 | iClusterHead = cfi.m_DominatedClusters[iDispPred]; 55 | if (iClusterHead < 0) 56 | { 57 | debugmsg("[I] Block %d was not part of a dominated cluster\n", iDispPred); 58 | return NULL; 59 | } 60 | mbClusterHead = mba->get_mblock(iClusterHead); 61 | #if UNFLATTENVERBOSE 62 | debugmsg("[I] Block %d was part of dominated cluster %d\n", iDispPred, iClusterHead); 63 | #endif 64 | } 65 | return mbClusterHead; 66 | 67 | } 68 | 69 | // This function attempts to locate the numeric assignment to a given variable 70 | // "what" starting from the end of the block "mb". It follows definitions 71 | // backwards, even across blocks, until it either reaches the block 72 | // "mbClusterHead", or, if the boolean "bAllowMultiSuccs" is false, it will 73 | // stop the first time it reaches a block with more than one successor. 74 | // If it finds an assignment whose source is a stack variable, then it will not 75 | // be able to continue in the backwards direction, because intervening memory 76 | // writes will make the definition information useless. In that case, it 77 | // switches to a strategy of searching in the forward direction from 78 | // mbClusterHead, looking for assignments to that stack variable. 79 | // Information about the chain of assignment instructions along the way are 80 | // stored in the vector called m_DeferredErasuresLocal, a member variable of 81 | // the CFUnflattener class. 82 | int CFUnflattener::FindBlockTargetOrLastCopy(mblock_t *mb, mblock_t *mbClusterHead, mop_t *what, bool bAllowMultiSuccs) 83 | { 84 | mbl_array_t *mba = mb->mba; 85 | int iClusterHead = mbClusterHead->serial; 86 | 87 | MovChain local; 88 | 89 | mop_t *opNum = NULL, *opCopy; 90 | // Search backwards looking for a numeric assignment to "what". We may or 91 | // may not find a numeric assignment, but we might find intervening 92 | // assignments where "what" is copied from other variables. 93 | bool bFound = FindNumericDefBackwards(mb, what, opNum, local, true, bAllowMultiSuccs, iClusterHead); 94 | 95 | // If we found no intervening assignments to "what", that's bad. 96 | if (local.empty()) 97 | return -1; 98 | 99 | // opCopy now contains the last non-numeric assignment that we saw before 100 | // FindNumericDefBackwards terminated (either due to not being able to 101 | // follow definitions, or, if bAllowMultiSuccs is true, because it recursed 102 | // into a block with more than one successor. 103 | opCopy = local.back().opCopy; 104 | 105 | // Copy the assignment chain into the erasures vector, so we can later 106 | // remove them if our analysis succeeds. 107 | m_DeferredErasuresLocal.insert(m_DeferredErasuresLocal.end(), local.begin(), local.end()); 108 | 109 | // If we didn't find a numeric definition, but we did find an assignment 110 | // from a stack variable, switch to a forward analysis from the beginning 111 | // of the cluster. If we don't find it, this is not necessarily an 112 | // indication that the analysis failed; for blocks with two successors, 113 | // we do further analysis. 114 | if (!bFound && opCopy != NULL && opCopy->t == mop_S) 115 | { 116 | mop_t *num = FindForwardStackVarDef(mbClusterHead, opCopy, local); 117 | if (num) 118 | opNum = num, bFound = true; 119 | else 120 | { 121 | #if UNFLATTENVERBOSE 122 | debugmsg("[EEE] Forward method also failed\n"); 123 | #endif 124 | } 125 | 126 | } 127 | 128 | // If we found a numeric assignment... 129 | if (bFound) 130 | { 131 | // Look up the integer number of the block corresponding to that value. 132 | int iDestNo = cfi.FindBlockByKey(opNum->nnn->value); 133 | 134 | // If we couldn't find the block, that's bad news. 135 | if (iDestNo < 0) 136 | msg("[E] Block %d assigned unknown key %llx to assigned var\n", mb->serial, opNum->nnn->value); 137 | 138 | // Otherwise, we win! Return the block number. 139 | else 140 | return iDestNo; 141 | } 142 | 143 | // Negative return code indicates failure. 144 | return -1; 145 | } 146 | 147 | // This function is used for unflattening constructs that have two successors, 148 | // such as if statements. Given a block that assigns to the assignment variable 149 | // that has two predecessors, analyze each of the predecessors looking for 150 | // numeric assignments by calling the previous function. 151 | bool CFUnflattener::HandleTwoPreds(mblock_t *mb, mblock_t *mbClusterHead, mop_t *opCopy, mblock_t *&nonJcc, int &actualGotoTarget, int &actualJccTarget) 152 | { 153 | char buf[1000]; 154 | mbl_array_t *mba = mb->mba; 155 | int iDispPred = mb->serial; 156 | int iClusterHead = mbClusterHead->serial; 157 | 158 | // No really, don't call this function on a block that doesn't have two 159 | // predecessors. I was kind enough to warn you in the documentation; now 160 | // you get an assertion failure. 161 | assert(mb->npred() == 2); 162 | 163 | mblock_t *pred1 = mba->get_mblock(mb->pred(0)); 164 | mblock_t *pred2 = mba->get_mblock(mb->pred(1)); 165 | 166 | mblock_t *endsWithJcc = NULL; 167 | nonJcc = NULL; 168 | int jccDest = -1, jccFallthrough = -1; 169 | 170 | // Given the two predecessors, find the block with the conditional jump at 171 | // the end of it (store the block in "endsWithJcc") and the one without 172 | // (store it in nonJcc). Also find the block number of the jcc target, and 173 | // the block number of the jcc fallthrough (i.e., the block number of 174 | // nonJcc). 175 | if (!SplitMblocksByJccEnding(pred1, pred2, endsWithJcc, nonJcc, jccDest, jccFallthrough)) 176 | { 177 | debugmsg("[I] Block %d w/preds %d, %d did not have one predecessor ending in jcc, one without\n", iDispPred, pred1->serial, pred2->serial); 178 | return false; 179 | } 180 | 181 | // Sanity checking the structure of the graph. The nonJcc block should only 182 | // have one incoming edge... 183 | if (nonJcc->npred() != 1) 184 | { 185 | debugmsg("[I] Block %d w/preds %d, %d, non-jcc pred %d had %d predecessors (not 1)\n", iDispPred, pred1->serial, pred2->serial, nonJcc->serial, nonJcc->npred()); 186 | return false; 187 | } 188 | 189 | // ... namely, from the block ending with the jcc. 190 | if (nonJcc->pred(0) != endsWithJcc->serial) 191 | { 192 | debugmsg("[I] Block %d w/preds %d, %d, non-jcc pred %d did not have the other as its predecessor\n", iDispPred, pred1->serial, pred2->serial, nonJcc->serial); 193 | return false; 194 | } 195 | 196 | // Call the previous function to locate the numeric definition of the 197 | // variable that is used to update the assignment variable if the jcc is 198 | // not taken. 199 | actualGotoTarget = FindBlockTargetOrLastCopy(endsWithJcc, mbClusterHead, opCopy, false); 200 | 201 | // If that succeeded... 202 | if (actualGotoTarget >= 0) 203 | { 204 | // ... then do the same thing when the jcc is not taken. 205 | actualJccTarget = FindBlockTargetOrLastCopy(nonJcc, mbClusterHead, opCopy, true); 206 | 207 | // If that succeeded, great! We can unflatten this two-way block. 208 | if (actualJccTarget >= 0) 209 | return true; 210 | } 211 | return false; 212 | } 213 | 214 | // Erase the now-superfluous chain of instructions that were used to copy a 215 | // numeric value into the assignment variable. 216 | void CFUnflattener::ProcessErasures(mbl_array_t *mba) 217 | { 218 | m_PerformedErasuresGlobal.insert(m_PerformedErasuresGlobal.end(), m_DeferredErasuresLocal.begin(), m_DeferredErasuresLocal.end()); 219 | for (auto erase : m_DeferredErasuresLocal) 220 | { 221 | #if UNFLATTENVERBOSE 222 | qstring qs; 223 | erase.insMov->print(&qs); 224 | tag_remove(&qs); 225 | msg("[I] Erasing %a: %s\n", erase.insMov->ea, qs.c_str()); 226 | #endif 227 | // Be gone, sucker 228 | mba->get_mblock(erase.iBlock)->make_nop(erase.insMov); 229 | } 230 | 231 | m_DeferredErasuresLocal.clear(); 232 | } 233 | 234 | /* 235 | // This method was suggested by Hex-Rays to force block recombination, as 236 | // opposed to my own function PruneUnreachable. At present, it does not do what 237 | // it's supposed to, so I'm continuing to use my own code for now. 238 | 239 | #define MBA_CMBBLK 0x00000400 // request to combine blocks 240 | void RequestBlockCombination(mbl_array_t *mba) 241 | { 242 | uint32 *flags = reinterpret_cast(mba); 243 | *flags |= MBA_CMBBLK; 244 | } 245 | */ 246 | 247 | // This is the top-level un-flattening function for an entire graph. Hex-Rays 248 | // calls this function since we register our CFUnflattener class as a block 249 | // optimizer. 250 | int idaapi CFUnflattener::func(mblock_t *blk) 251 | { 252 | char buf[1000]; 253 | vd_printer_t vd; 254 | 255 | // Was this function blacklisted? Skip it if so 256 | mbl_array_t *mba = blk->mba; 257 | if (g_BlackList.find(mba->entry_ea) != g_BlackList.end()) 258 | return 0; 259 | 260 | #if UNFLATTENVERBOSE || UNFLATTENDEBUG 261 | const char *matStr = MicroMaturityToString(mba->maturity); 262 | #endif 263 | #if UNFLATTENVERBOSE 264 | debugmsg("[I] Block optimization called at maturity level %s\n", matStr); 265 | #endif 266 | 267 | // Only operate once per maturity level 268 | if (g_Last == mba->maturity) 269 | return 0; 270 | 271 | // Update the maturity level 272 | g_Last = mba->maturity; 273 | 274 | #if UNFLATTENDEBUG 275 | // If we're debugging, save a copy of the graph on disk 276 | snprintf(buf, sizeof(buf), "c:\\temp\\dumpBefore-%s-%d.txt", matStr, atThisMaturity); 277 | DumpMBAToFile(mba, buf); 278 | #endif 279 | 280 | // We only operate at MMAT_LOCOPT 281 | if (mba->maturity != MMAT_LOCOPT) 282 | return 0; 283 | 284 | int iChanged = 0; 285 | 286 | // If local optimization has just been completed, remove transfer-to-gotos 287 | iChanged = RemoveSingleGotos(mba); 288 | //return iChanged; 289 | 290 | #if UNFLATTENVERBOSE 291 | debugmsg("\tRemoved %d vacuous GOTOs\n", iChanged); 292 | #endif 293 | 294 | #if UNFLATTENDEBUG 295 | snprintf(buf, sizeof(buf), "c:\\temp\\dumpAfter-%s-%d.txt", matStr, atThisMaturity); 296 | DumpMBAToFile(mba, buf); 297 | #endif 298 | 299 | // Might as well verify we haven't broken anything 300 | if (iChanged) 301 | mba->verify(true); 302 | 303 | #if UNFLATTENVERBOSE 304 | mba->print(vd); 305 | #endif 306 | 307 | // Get the preliminary information needed for control flow flattening, such 308 | // as the assignment/comparison variables. 309 | if (!cfi.GetAssignedAndComparisonVariables(blk)) 310 | { 311 | debugmsg("[E] Couldn't get control-flow flattening information\n"); 312 | return iChanged; 313 | } 314 | 315 | // Create an object that allows us to modify the graph at a future point. 316 | DeferredGraphModifier dgm; 317 | bool bDirtyChains = false; 318 | 319 | // Iterate through the predecessors of the top-level control flow switch 320 | for (auto iDispPred : mba->get_mblock(cfi.iDispatch)->predset) 321 | { 322 | mblock_t *mb = mba->get_mblock(iDispPred); 323 | 324 | // The predecessors should only have one successor, i.e., they should 325 | // directly branch to the dispatcher, not in a conditional fashion 326 | if (mb->nsucc() != 1) 327 | { 328 | debugmsg("[I] Block %d had %d successors, not 1\n", iDispPred, mb->nsucc()); 329 | continue; 330 | } 331 | 332 | // Find the block that dominates this cluster, or skip this block if 333 | // we can't. This ensures that we only try to unflatten parts of the 334 | // control flow graph that were actually flattened. Also, we need the 335 | // cluster head so we know where to bound our searches for numeric 336 | // definitions. 337 | int iClusterHead; 338 | mblock_t *mbClusterHead = GetDominatedClusterHead(mba, iDispPred, iClusterHead); 339 | if (mbClusterHead == NULL) 340 | continue; 341 | 342 | // It's best to process erasures for every block we unflatten 343 | // immediately, so we don't end up duplicating instructions that we 344 | // want to eliminate 345 | m_DeferredErasuresLocal.clear(); 346 | 347 | // Try to find a numeric assignment to the assignment variable, but 348 | // pass false for the last parameter so that the search stops if it 349 | // reaches a block with more than one successor. This ought to succeed 350 | // if the flattened control flow region only has one destination, 351 | // rather than two destinations for flattening of if-statements. 352 | int iDestNo = FindBlockTargetOrLastCopy(mb, mbClusterHead, cfi.opAssigned, false); 353 | 354 | // Couldn't find any assignments at all to the assignment variable? 355 | // That's bad, don't continue. 356 | if (m_DeferredErasuresLocal.empty()) 357 | continue; 358 | 359 | // Did we find a block target? Great; just update the CFG to point the 360 | // destination directly to its target, rather than back to the 361 | // dispatcher. 362 | if (iDestNo >= 0) 363 | { 364 | // Make a note to ourselves to modify the graph structure later 365 | dgm.ChangeGoto(mb, cfi.iDispatch, iDestNo); 366 | 367 | // Erase the intermediary assignments to the assignment variable 368 | ProcessErasures(mba); 369 | 370 | #if UNFLATTENVERBOSE 371 | msg("[I] Changed goto on %d to %d\n", iDispPred, iDestNo); 372 | #endif 373 | 374 | ++iChanged; 375 | continue; 376 | } 377 | 378 | // Stash off a copy of the last variable in the chain of assignments 379 | // to the assignment variable, as well as the assignment instruction 380 | // (the latter only for debug-printing purposes). 381 | mop_t *opCopy = m_DeferredErasuresLocal.back().opCopy; 382 | minsn_t *m = m_DeferredErasuresLocal.back().insMov; 383 | 384 | #if UNFLATTENVERBOSE 385 | debugmsg("[I] Block %d did not define assign a number to assigned var; assigned %s instead\n", iDispPred, mopt_t_to_string(m->l.t)); 386 | #endif 387 | 388 | // If the block we're currently examining has more than two 389 | // predecessors, that's unexpected, so stop. 390 | if (mb->npred() != 2) 391 | { 392 | #if UNFLATTENVERBOSE 393 | debugmsg("[I] Block %d that assigned non-numeric value had %d predecessors, not 2\n", iDispPred, mb->npred()); 394 | #endif 395 | continue; 396 | } 397 | 398 | mblock_t *nonJcc; 399 | int actualGotoTarget, actualJccTarget; 400 | 401 | // Call the function that handles the case of a conditional assignment 402 | // to the assignment variable (i.e., the flattened version of an 403 | // if-statement). 404 | if (HandleTwoPreds(mb, mbClusterHead, opCopy, nonJcc, actualGotoTarget, actualJccTarget)) 405 | { 406 | // If it succeeded... 407 | 408 | // Get rid of the superfluous assignments 409 | ProcessErasures(mba); 410 | 411 | // Make a note to ourselves to modify the graph structure later, 412 | // for the non-taken side of the conditional. Change the goto 413 | // target. 414 | dgm.Replace(mb->serial, cfi.iDispatch, actualGotoTarget); 415 | mb->tail->l.b = actualGotoTarget; 416 | 417 | // Mark that the def-use information will need re-analyzing 418 | bDirtyChains = true; 419 | 420 | // Copy the instructions from the block that targets the dispatcher 421 | // onto the end of the jcc taken block. 422 | minsn_t *mbHead = mb->head; 423 | minsn_t *mbCurr = mbHead; 424 | do 425 | { 426 | minsn_t *mCopy = new minsn_t(*mbCurr); 427 | nonJcc->insert_into_block(mCopy, nonJcc->tail); 428 | mbCurr = mbCurr->next; 429 | 430 | #if UNFLATTENVERBOSE 431 | mcode_t_to_string(nonJcc->tail, buf, sizeof(buf)); 432 | debugmsg("[I] %d: tail is %s\n", nonJcc->serial, buf); 433 | #endif 434 | 435 | } while (mbCurr != NULL); 436 | 437 | 438 | // Make a note to ourselves to modify the graph structure later, 439 | // for the taken side of the conditional. Change the goto target. 440 | dgm.Replace(nonJcc->serial, mb->serial, actualJccTarget); 441 | nonJcc->tail->l.b = actualJccTarget; 442 | 443 | // We added instructions to the nonJcc block, so its def-use lists 444 | // are now spoiled. Mark it dirty. 445 | nonJcc->mark_lists_dirty(); 446 | } 447 | } // end for loop that unflattens all blocks 448 | 449 | // After we've processed every block, apply the deferred modifications to 450 | // the graph structure. 451 | iChanged += dgm.Apply(mba); 452 | 453 | // If we modified the graph structure, hopefully some blocks (especially 454 | // those making up the control flow dispatch switch, but also perhaps 455 | // intermediary goto-to-goto blocks) will now be unreachable. Prune them, 456 | // so that later optimization phases don't have to consider their contents 457 | // anymore and can do a better job. 458 | if (iChanged != 0) 459 | { 460 | int nRemoved = PruneUnreachable(mba); 461 | iChanged += nRemoved; 462 | #if UNFLATTENVERBOSE 463 | msg("[I] Removed %d blocks\n", nRemoved); 464 | #endif 465 | } 466 | 467 | // If there were any two-way conditionals, that means we copied 468 | // instructions onto the jcc taken blocks, which means the def-use info is 469 | // stale. Mark them dirty, and perform local optimization for the lulz too. 470 | if (bDirtyChains) 471 | { 472 | #if IDA_SDK_VERSION == 710 473 | mba->make_chains_dirty(); 474 | #elif IDA_SDK_VERSION >= 720 475 | mba->mark_chains_dirty(); 476 | #endif 477 | mba->optimize_local(0); 478 | } 479 | 480 | // If we changed the graph, verify that we did so legally. 481 | if (iChanged != 0) 482 | mba->verify(true); 483 | 484 | return iChanged; 485 | } 486 | -------------------------------------------------------------------------------- /Unflattener.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include "CFFlattenInfo.hpp" 5 | #include "DefUtil.hpp" 6 | 7 | struct CFUnflattener : public optblock_t 8 | { 9 | CFFlattenInfo cfi; 10 | MovChain m_DeferredErasuresLocal; 11 | MovChain m_PerformedErasuresGlobal; 12 | 13 | void Clear(bool bFree) 14 | { 15 | cfi.Clear(bFree); 16 | m_DeferredErasuresLocal.clear(); 17 | m_PerformedErasuresGlobal.clear(); 18 | } 19 | 20 | CFUnflattener() { Clear(false); }; 21 | ~CFUnflattener() { Clear(true); } 22 | int idaapi func(mblock_t *blk); 23 | mblock_t *GetDominatedClusterHead(mbl_array_t *mba, int iDispPred, int &iClusterHead); 24 | int FindBlockTargetOrLastCopy(mblock_t *mb, mblock_t *mbClusterHead, mop_t *what, bool bAllowMultiSuccs); 25 | bool HandleTwoPreds(mblock_t *mb, mblock_t *mbClusterHead, mop_t *opCopy, mblock_t *&endsWithJcc, int &actualGotoTarget, int &actualJccTarget); 26 | void ProcessErasures(mbl_array_t *mba); 27 | }; -------------------------------------------------------------------------------- /bin/IDA71_32/HexRaysDeob.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RolfRolles/HexRaysDeob/f1964874da0d182f6a5ba0436ce6e7c9ac721be9/bin/IDA71_32/HexRaysDeob.dll -------------------------------------------------------------------------------- /bin/IDA72_32/HexRaysDeob.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RolfRolles/HexRaysDeob/f1964874da0d182f6a5ba0436ce6e7c9ac721be9/bin/IDA72_32/HexRaysDeob.dll -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Hex-Rays Decompiler project 3 | * Copyright (c) 2007-2018 by Hex-Rays, support@hex-rays.com 4 | * ALL RIGHTS RESERVED. 5 | * 6 | * Sample plugin for Hex-Rays Decompiler. 7 | * It generates microcode for selection and dumps it to the output window. 8 | */ 9 | 10 | #include 11 | #define USE_DANGEROUS_FUNCTIONS 12 | #include 13 | #include "HexRaysUtil.hpp" 14 | #include "MicrocodeExplorer.hpp" 15 | #include "PatternDeobfuscate.hpp" 16 | #include "AllocaFixer.hpp" 17 | #include "Unflattener.hpp" 18 | #include "Config.hpp" 19 | 20 | extern plugin_t PLUGIN; 21 | 22 | // Hex-Rays API pointer 23 | hexdsp_t *hexdsp = NULL; 24 | 25 | ObfCompilerOptimizer hook; 26 | CFUnflattener cfu; 27 | 28 | //-------------------------------------------------------------------------- 29 | int idaapi init(void) 30 | { 31 | if (!init_hexrays_plugin()) 32 | return PLUGIN_SKIP; // no decompiler 33 | const char *hxver = get_hexrays_version(); 34 | msg("Hex-rays version %s has been detected, %s ready to use\n", hxver, PLUGIN.wanted_name); 35 | 36 | // Install our block and instruction optimization classes. 37 | #if DO_OPTIMIZATION 38 | install_optinsn_handler(&hook); 39 | install_optblock_handler(&cfu); 40 | #endif 41 | return PLUGIN_KEEP; 42 | } 43 | 44 | //-------------------------------------------------------------------------- 45 | void idaapi term(void) 46 | { 47 | if (hexdsp != NULL) 48 | { 49 | 50 | // Uninstall our block and instruction optimization classes. 51 | #if DO_OPTIMIZATION 52 | remove_optinsn_handler(&hook); 53 | remove_optblock_handler(&cfu); 54 | 55 | // I couldn't figure out why, but my plugin would segfault if it tried 56 | // to free mop_t pointers that it had allocated. Maybe hexdsp had been 57 | // set to NULL at that point, so the calls to delete crashed? Anyway, 58 | // cleaning up before we unload solved the issues. 59 | cfu.Clear(true); 60 | #endif 61 | term_hexrays_plugin(); 62 | } 63 | } 64 | 65 | //-------------------------------------------------------------------------- 66 | bool idaapi run(size_t arg) 67 | { 68 | if (arg == 0xbeef) 69 | { 70 | PLUGIN.flags |= PLUGIN_UNL; 71 | return true; 72 | } 73 | if (arg == 2) 74 | { 75 | FixCallsToAllocaProbe(); 76 | return true; 77 | } 78 | #if IDA_SDK_VERSION >= 730 79 | if (arg == 0) 80 | #else 81 | if (arg == 3) 82 | #endif 83 | { 84 | ShowMicrocodeExplorer(); 85 | return true; 86 | } 87 | 88 | return true; 89 | } 90 | 91 | //-------------------------------------------------------------------------- 92 | static const char comment[] = "Show microcode"; 93 | 94 | 95 | //-------------------------------------------------------------------------- 96 | // 97 | // PLUGIN DESCRIPTION BLOCK 98 | // 99 | //-------------------------------------------------------------------------- 100 | plugin_t PLUGIN = 101 | { 102 | IDP_INTERFACE_VERSION, 103 | 0, // plugin flags 104 | init, // initialize 105 | term, // terminate. this pointer may be NULL. 106 | run, // invoke plugin 107 | comment, // long comment about the plugin 108 | // it could appear in the status line 109 | // or as a hint 110 | "", // multiline help about the plugin 111 | "Microcode explorer", // the preferred short name of the plugin 112 | "" // the preferred hotkey to run the plugin 113 | }; 114 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | PROC=HexRaysDeob 2 | include ../plugin.mak 3 | 4 | __CFLAGS=-std=c++14 5 | 6 | $(F)AllocaFixer$(O): $(I)bitrange.hpp $(I)bytes.hpp $(I)config.hpp \ 7 | $(I)fpro.h $(I)funcs.hpp $(I)gdl.hpp $(I)hexrays.hpp \ 8 | $(I)ida.hpp $(I)idp.hpp $(I)ieee.h $(I)kernwin.hpp \ 9 | $(I)lines.hpp $(I)llong.hpp $(I)loader.hpp $(I)nalt.hpp \ 10 | $(I)name.hpp $(I)netnode.hpp $(I)pro.h $(I)range.hpp \ 11 | $(I)segment.hpp $(I)typeinf.hpp $(I)ua.hpp $(I)xref.hpp \ 12 | AllocaFixer.hpp AllocaFixer.cpp 13 | 14 | $(F)CFFlattenInfo$(O): $(I)bitrange.hpp $(I)bytes.hpp $(I)config.hpp \ 15 | $(I)fpro.h $(I)funcs.hpp $(I)gdl.hpp $(I)hexrays.hpp \ 16 | $(I)ida.hpp $(I)idp.hpp $(I)ieee.h $(I)kernwin.hpp \ 17 | $(I)lines.hpp $(I)llong.hpp $(I)loader.hpp $(I)nalt.hpp \ 18 | $(I)name.hpp $(I)netnode.hpp $(I)pro.h $(I)range.hpp \ 19 | $(I)segment.hpp $(I)typeinf.hpp $(I)ua.hpp $(I)xref.hpp \ 20 | CFFlattenInfo.hpp CFFlattenInfo.cpp 21 | 22 | $(F)DefUtil$(O): $(I)bitrange.hpp $(I)bytes.hpp $(I)config.hpp \ 23 | $(I)fpro.h $(I)funcs.hpp $(I)gdl.hpp $(I)hexrays.hpp \ 24 | $(I)ida.hpp $(I)idp.hpp $(I)ieee.h $(I)kernwin.hpp \ 25 | $(I)lines.hpp $(I)llong.hpp $(I)loader.hpp $(I)nalt.hpp \ 26 | $(I)name.hpp $(I)netnode.hpp $(I)pro.h $(I)range.hpp \ 27 | $(I)segment.hpp $(I)typeinf.hpp $(I)ua.hpp $(I)xref.hpp \ 28 | DefUtil.hpp DefUtil.cpp 29 | 30 | $(F)HexRaysUtil$(O): $(I)bitrange.hpp $(I)bytes.hpp $(I)config.hpp \ 31 | $(I)fpro.h $(I)funcs.hpp $(I)gdl.hpp $(I)hexrays.hpp \ 32 | $(I)ida.hpp $(I)idp.hpp $(I)ieee.h $(I)kernwin.hpp \ 33 | $(I)lines.hpp $(I)llong.hpp $(I)loader.hpp $(I)nalt.hpp \ 34 | $(I)name.hpp $(I)netnode.hpp $(I)pro.h $(I)range.hpp \ 35 | $(I)segment.hpp $(I)typeinf.hpp $(I)ua.hpp $(I)xref.hpp \ 36 | HexRaysUtil.hpp HexRaysUtil.cpp 37 | 38 | $(F)MicrocodeExplorer$(O): $(I)bitrange.hpp $(I)bytes.hpp $(I)config.hpp \ 39 | $(I)fpro.h $(I)funcs.hpp $(I)gdl.hpp $(I)hexrays.hpp \ 40 | $(I)ida.hpp $(I)idp.hpp $(I)ieee.h $(I)kernwin.hpp \ 41 | $(I)lines.hpp $(I)llong.hpp $(I)loader.hpp $(I)nalt.hpp \ 42 | $(I)name.hpp $(I)netnode.hpp $(I)pro.h $(I)range.hpp \ 43 | $(I)segment.hpp $(I)typeinf.hpp $(I)ua.hpp $(I)xref.hpp \ 44 | MicrocodeExplorer.hpp MicrocodeExplorer.cpp 45 | 46 | $(F)PatternDeobfuscate$(O): $(I)bitrange.hpp $(I)bytes.hpp $(I)config.hpp \ 47 | $(I)fpro.h $(I)funcs.hpp $(I)gdl.hpp $(I)hexrays.hpp \ 48 | $(I)ida.hpp $(I)idp.hpp $(I)ieee.h $(I)kernwin.hpp \ 49 | $(I)lines.hpp $(I)llong.hpp $(I)loader.hpp $(I)nalt.hpp \ 50 | $(I)name.hpp $(I)netnode.hpp $(I)pro.h $(I)range.hpp \ 51 | $(I)segment.hpp $(I)typeinf.hpp $(I)ua.hpp $(I)xref.hpp \ 52 | PatternDeobfuscate.hpp PatternDeobfuscate.cpp 53 | 54 | $(F)PatternDeobfuscateUtil$(O): $(I)bitrange.hpp $(I)bytes.hpp $(I)config.hpp \ 55 | $(I)fpro.h $(I)funcs.hpp $(I)gdl.hpp $(I)hexrays.hpp \ 56 | $(I)ida.hpp $(I)idp.hpp $(I)ieee.h $(I)kernwin.hpp \ 57 | $(I)lines.hpp $(I)llong.hpp $(I)loader.hpp $(I)nalt.hpp \ 58 | $(I)name.hpp $(I)netnode.hpp $(I)pro.h $(I)range.hpp \ 59 | $(I)segment.hpp $(I)typeinf.hpp $(I)ua.hpp $(I)xref.hpp \ 60 | PatternDeobfuscateUtil.hpp PatternDeobfuscateUtil.cpp 61 | 62 | $(F)TargetUtil$(O): $(I)bitrange.hpp $(I)bytes.hpp $(I)config.hpp \ 63 | $(I)fpro.h $(I)funcs.hpp $(I)gdl.hpp $(I)hexrays.hpp \ 64 | $(I)ida.hpp $(I)idp.hpp $(I)ieee.h $(I)kernwin.hpp \ 65 | $(I)lines.hpp $(I)llong.hpp $(I)loader.hpp $(I)nalt.hpp \ 66 | $(I)name.hpp $(I)netnode.hpp $(I)pro.h $(I)range.hpp \ 67 | $(I)segment.hpp $(I)typeinf.hpp $(I)ua.hpp $(I)xref.hpp \ 68 | TargetUtil.hpp TargetUtil.cpp 69 | 70 | $(F)Unflattener$(O): $(I)bitrange.hpp $(I)bytes.hpp $(I)config.hpp \ 71 | $(I)fpro.h $(I)funcs.hpp $(I)gdl.hpp $(I)hexrays.hpp \ 72 | $(I)ida.hpp $(I)idp.hpp $(I)ieee.h $(I)kernwin.hpp \ 73 | $(I)lines.hpp $(I)llong.hpp $(I)loader.hpp $(I)nalt.hpp \ 74 | $(I)name.hpp $(I)netnode.hpp $(I)pro.h $(I)range.hpp \ 75 | $(I)segment.hpp $(I)typeinf.hpp $(I)ua.hpp $(I)xref.hpp \ 76 | Unflattener.hpp Unflattener.cpp 77 | 78 | $(F)main$(O): $(I)bitrange.hpp $(I)bytes.hpp $(I)config.hpp \ 79 | $(I)fpro.h $(I)funcs.hpp $(I)gdl.hpp $(I)hexrays.hpp \ 80 | $(I)ida.hpp $(I)idp.hpp $(I)ieee.h $(I)kernwin.hpp \ 81 | $(I)lines.hpp $(I)llong.hpp $(I)loader.hpp $(I)nalt.hpp \ 82 | $(I)name.hpp $(I)netnode.hpp $(I)pro.h $(I)range.hpp \ 83 | $(I)segment.hpp $(I)typeinf.hpp $(I)ua.hpp $(I)xref.hpp \ 84 | main.cpp 85 | 86 | $(F)HexRaysDeob$(O): $(F)AllocaFixer$(O) $(F)CFFlattenInfo$(O) $(F)DefUtil$(O) \ 87 | $(F)HexRaysUtil$(O) $(F)MicrocodeExplorer$(O) $(F)PatternDeobfuscate$(O) \ 88 | $(F)PatternDeobfuscateUtil$(O) $(F)TargetUtil$(O) $(F)Unflattener$(O) $(F)main$(O) 89 | $(CCL) $(STDLIBS) $(IDALIB) -shared -o $@ $^ 90 | -------------------------------------------------------------------------------- /makefile.lnx: -------------------------------------------------------------------------------- 1 | # use this makefile to build HexRaysDeob for Linux 2 | # based off HexRaysCodeXplorer makefile for Linux 3 | # 4 | # Instructions: 5 | # After setting up IDA SDK and Hex Rays SDK to work, in the HexRaysDeob folder do: 6 | # IDA_DIR= IDA_SDK= make -f makefile.lnx 7 | 8 | CC=g++ 9 | LD=ld 10 | LDFLAGS=-shared -m64 -static-libgcc -static-libstdc++ 11 | 12 | LIBDIR=-L$(IDA_DIR) 13 | SRCDIR=./ 14 | HEXRAYS_SDK=$(IDA_DIR)/plugins/hexrays_sdk 15 | INCLUDES=-I$(IDA_SDK)/include -I$(HEXRAYS_SDK)/include 16 | __X64__=1 17 | 18 | SRC=$(SRCDIR)AllocaFixer.cpp \ 19 | $(SRCDIR)CFFlattenInfo.cpp \ 20 | $(SRCDIR)DefUtil.cpp \ 21 | $(SRCDIR)HexRaysUtil.cpp \ 22 | $(SRCDIR)MicrocodeExplorer.cpp \ 23 | $(SRCDIR)PatternDeobfuscate.cpp \ 24 | $(SRCDIR)PatternDeobfuscateUtil.cpp \ 25 | $(SRCDIR)TargetUtil.cpp \ 26 | $(SRCDIR)Unflattener.cpp \ 27 | $(SRCDIR)main.cpp \ 28 | 29 | OBJS=$(subst .cpp,.o,$(SRC)) 30 | 31 | CFLAGS=-m64 -fPIC -D__LINUX__ -D__PLUGIN__ -std=c++14 -D__X64__ -D_GLIBCXX_USE_CXX11_ABI=0 32 | LIBS=-lc -lpthread -ldl 33 | 34 | ifeq ($(EA64),1) 35 | CFLAGS+=-D__EA64__ 36 | LIBS+=-lida64 37 | EXT=so 38 | SUFFIX=64 39 | else 40 | EXT=so 41 | LIBS+=-lida 42 | SUFFIX= 43 | endif 44 | 45 | all: check-env clean HexRaysDeob$(SUFFIX).$(EXT) 46 | 47 | HexRaysDeob$(SUFFIX).$(EXT): $(OBJS) 48 | $(CC) $(LDFLAGS) $(LIBDIR) -o HexRaysDeob$(SUFFIX).$(EXT) $(OBJS) $(LIBS) 49 | 50 | %.o: %.cpp 51 | $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ 52 | 53 | clean: 54 | rm -f $(OBJS) HexRaysDeob$(SUFFIX).$(EXT) 55 | 56 | install: 57 | cp -f HexRaysDeob$(SUFFIX).$(EXT) $(IDA_DIR)/plugins 58 | 59 | check-env: 60 | ifndef IDA_SDK 61 | $(error IDA_SDK is undefined) 62 | endif 63 | ifndef IDA_DIR 64 | $(error IDA_DIR is undefined) 65 | endif 66 | ifndef EA64 67 | $(error specify EA64=0 for 32 bit build or EA64=1 for 64 bit build) 68 | endif 69 | .PHONY: check-env 70 | --------------------------------------------------------------------------------