├── README.md ├── analyzer.py ├── decrypt_strings.py ├── extract_c2.py ├── extract_xor_key.py ├── for_training ├── answers │ ├── db.json │ ├── decrypt_string_stage1.py │ ├── decrypt_string_stage2.py │ ├── emulator.py │ ├── extract_c2_stage1.py │ ├── extract_c2_stage2.py │ ├── extract_xor_key.py │ └── make_hash_db.py ├── db.json ├── decrypt_string_stage1_not_impl.py ├── decrypt_string_stage2_not_impl.py ├── emulator.py ├── extract_c2_stage1_not_impl.py ├── extract_c2_stage2_not_impl.py ├── requirements.txt ├── search_hash_not_impl.py └── utils.py ├── make_hash_db.py ├── requirements.txt └── search_hash.py /README.md: -------------------------------------------------------------------------------- 1 | # Malware Analysis at Scale ~ Defeating EMOTET by Ghidra ~ 2 | 3 | This repository provides a set of Ghidra Script for EMOTET analysis. Included scripts are following. 4 | 5 | * `extract_xor_key.py`: extract XOR key for API hash. extracted XOR key will be used to generate hash database (`db.json`) for resolving hashes. 6 | * `make_hash_db.py`: generate hash database for resolving API hash. this script is NOT Ghidra Script. make sure to run this on your host. 7 | * `search_hash.py`: search hashes in target binary and add comment with corresponding API. database for hashes which is used for lookup APIs must be generated by `make_hash_db.py`. 8 | * `decrypt_strings.py`: decrypt strings as much as possible. 9 | * `extract_c2.py`: extract C&C URL (including protocol, hostname and port) automatically. 10 | * `analyzer.py`: all-in-one script of above scripts, especially for Headless Analyzer. 11 | 12 | Originally these scripts were shared in Japan Security Analyst Summit (JSAC) 2021. If you need the scripts that is used in that workshop, you can find them in `for_training` folder. The slide deck is available [here](https://jsac.jpcert.or.jp/archive/2021/pdf/JSAC2021_workshop_malware-analysis_jp.pdf) (Japanese). 13 | 14 | ## Before use 15 | 16 | This is Gihdra Script. You need to install Ghidra before running script. If you're not familiar with Ghidra Script, please see the documents below. 17 | * https://ghidra.re/courses/GhidraClass/Intermediate/Scripting_withNotes.html#Scripting.html 18 | * https://ghidra.re/ghidra_docs/analyzeHeadlessREADME.html 19 | 20 | Plus this should be noted, these scripts will analyzer "unpacked" EMOTET. So you need unpacked one before running this script. You can get it by yourself with [hollow_hunter](https://github.com/hasherezade/hollows_hunter) or use online sandbox for unpack (like [CAPE](https://capesandbox.com/)). 21 | 22 | ## Installation 23 | 24 | clone this repository, and add the cloned path into your `Script Directories`. 25 | 26 | ```bash 27 | > git clone https://github.com/AllsafeCyberSecurity/malware-analysis-at-scale-defeating-emotet-by-ghidra.git 28 | > cd malware-analysis-at-scale-defeating-emotet-by-ghidra.git 29 | > python -m pip install -r requirements.txt 30 | ``` 31 | 32 | ## Usage 33 | 34 | Only `analyzer.py` is Headless Script. After making sure to import the unpacked EMOTET in your Ghidra Project, You can run this script as following. 35 | 36 | ```bash 37 | > %GHIDRA_INSTALL_DIR%\support\analyzeHeadless.bat -process -scriptPath -postScript analyzer.py .json 38 | ``` 39 | 40 | Other scripts are Ghidra Script. You can run them from Script Manager. -------------------------------------------------------------------------------- /analyzer.py: -------------------------------------------------------------------------------- 1 | from __main__ import * 2 | 3 | import binascii 4 | import json 5 | from collections import defaultdict 6 | from extract_xor_key import get_api_xor_key, get_lib_xor_key 7 | from decrypt_strings import find_encrypted_data_addr, decrypt_string 8 | from extract_c2 import find_config_address, extract_c2 9 | 10 | 11 | def is_der_format_rsa_key(b): 12 | return b[0] == 0x30 and b[2] == 0x02 13 | 14 | def to_hex(l): 15 | return binascii.hexlify(bytearray(l)) 16 | 17 | def dump_to_json(dest): 18 | with open(dest, 'w') as f: 19 | json.dump(results, f, indent=4) 20 | 21 | 22 | def main(): 23 | 24 | # headless analyzer gets argument by this method 25 | args = getScriptArgs() 26 | 27 | if len(args) != 1: 28 | print('[!] Usage: analyzeHeadless ... analyzer.py [path-to-output-json]') 29 | return 30 | 31 | results = defaultdict(dict) 32 | 33 | try: 34 | # extract xor key 35 | lib_xor_key = get_lib_xor_key() 36 | if lib_xor_key: 37 | print('[*] XOR key for DLL: {}'.format(lib_xor_key)) 38 | results['xor_key']['dll'] = lib_xor_key.getValue() 39 | 40 | api_xor_key = get_api_xor_key() 41 | if api_xor_key: 42 | print('[*] XOR key for API: {}'.format(api_xor_key)) 43 | results['xor_key']['api'] =api_xor_key.getValue() 44 | 45 | # find RSA key 46 | for found in find_encrypted_data_addr(): 47 | decrypted = decrypt_string(found.data_addr, raw=True) 48 | if is_der_format_rsa_key(decrypted): 49 | hex_rsa_key = to_hex(decrypted) 50 | print('[*] RSA key: {}'.format(hex_rsa_key)) 51 | results['rsa_key'] = hex_rsa_key 52 | 53 | # extract c2 servers 54 | print('[*] c2 servers:') 55 | results['url'] = [] 56 | for c2 in extract_c2(): 57 | print(c2) 58 | results['url'].append(c2) 59 | 60 | # save the results into json 61 | with open(args[0], 'w') as f: 62 | json.dump(results, f, indent=4) 63 | print('[*] saved results at {}'.format(args[0])) 64 | 65 | except Exception as e: 66 | print('[!] {}'.format(e)) 67 | 68 | 69 | if __name__ == '__main__': 70 | main() -------------------------------------------------------------------------------- /decrypt_strings.py: -------------------------------------------------------------------------------- 1 | from __main__ import * 2 | 3 | import struct 4 | from collections import namedtuple 5 | from itertools import cycle, chain 6 | 7 | from ghidra.program.model.mem import MemoryAccessException 8 | from ghidra.program.model.listing import CodeUnit 9 | 10 | SearchResult = namedtuple('SearchResult', ['instruction', 'data_addr']) 11 | 12 | 13 | def decrypt_string(enc_data_addr, raw=False): 14 | key = getInt(enc_data_addr) 15 | xored_length = getInt(enc_data_addr.add(4)) 16 | original_length = key ^ xored_length 17 | enc = getBytes(enc_data_addr.add(8), original_length).tostring() 18 | 19 | dec = [ord(k) ^ ord(v) for k, v in zip(cycle(struct.pack('DAT_1001f000 53 | # 1000e26a 89 72 24 MOV dword ptr [EDX + 0x24],ESI=>DAT_1001f000 54 | # 1000e26d 89 4a 10 MOV dword ptr [EDX + 0x10],ECX 55 | # 1000e270 eb 04 JMP LAB_1000e276 56 | '\\x89.{1}\\x28\\x89.{2}\\x89.{2}\\xeb\\x04', 57 | 58 | # Ptn 2 (since 2021-01-28~) 59 | # 007718a4 c7 40 04 MOV dword ptr [EAX + pCVar4->field_0x4],DAT_0077d390 60 | # 007718ab c7 40 10 MOV dword ptr [EAX + pCVar4->c2_info],DAT_0077d390 61 | # 007718b2 c7 40 0c MOV dword ptr [EAX + pCVar4->field_0xc],0x0 62 | '\\xc7.{5}\\x00\\xc7.{5}\\x00\\xc7.{5}\\x00', 63 | ] 64 | 65 | # try all patterns 66 | for asm in asm_ptns: 67 | found = findBytes(None, asm, -1) 68 | if not found: 69 | continue 70 | elif len(found) != 1: 71 | raise ValueError('False detection of config') 72 | 73 | inst = getInstructionAt(found[0]) 74 | config_addr = inst.getAddress(1) 75 | return config_addr 76 | 77 | # if nothing found, it comes here. 78 | raise RuntimeError('Config not found') 79 | 80 | def extract_c2(): 81 | ''' extract all the C2 configs of emotet. you can give a config address, 82 | otherwise it will search it by regex. 83 | ''' 84 | config_addr = find_config_address() 85 | for (ip, port) in iterate_config(config_addr): 86 | yield 'http://{}:{}'.format(ip, port) 87 | 88 | def main(): 89 | for c2_server in extract_c2(): 90 | print(c2_server) 91 | 92 | if __name__ == '__main__': 93 | main() -------------------------------------------------------------------------------- /extract_xor_key.py: -------------------------------------------------------------------------------- 1 | from __main__ import * 2 | 3 | def find_calc_api_hash_func(): 4 | ''' 5 | 00402b5c 8b 55 fc MOV EDX,dword ptr [EBP + local_8] 6 | 00402b5f 0f be 03 MOVSX EAX,byte ptr [EBX] 7 | 00402b62 89 45 fc MOV dword ptr [EBP + local_8],EAX 8 | 00402b65 01 75 fc ADD dword ptr [EBP + local_8],ESI 9 | 00402b68 d3 e2 SHL EDX,param_1 10 | 00402b6a 01 55 fc ADD dword ptr [EBP + local_8],EDX 11 | 00402b6d 29 7d fc SUB dword ptr [EBP + local_8],EDI 12 | 13 | ''' 14 | asm = b'\\x8b.{2,3}\\x0f.{2,3}\\x89.{2,3}\\x01.{2,4}\\xd3\\xe2\\x01.{2,3}\\x29.{2,3}' 15 | found = findBytes(None, asm, -1) 16 | if found: 17 | calc_hash_func = getFunctionContaining(found[0]) 18 | return calc_hash_func 19 | 20 | 21 | def find_calc_lib_hash_func(): 22 | ''' 23 | 00402c29 d3 e7 SHL EDI,libname 24 | 00402c2b 83 f8 41 CMP char_hex,0x41 25 | 00402c2e 72 08 JC LAB_00402c38 26 | 00402c30 83 f8 5a CMP char_hex,0x5a 27 | 00402c33 77 03 JA LAB_00402c38 28 | 00402c35 83 c0 20 ADD char_hex,0x20 29 | ''' 30 | asm = '\\xd3\\xe7\\x83.{1}\\x41\\x72\\x08\\x83.{1}\\x5a\\x77\\x03\\x83.{1}\\x20' 31 | found = findBytes(None, asm, -1) 32 | if found: 33 | calc_hash_func = getFunctionContaining(found[0]) 34 | return calc_hash_func 35 | 36 | 37 | def get_xor_key(calc_hash_func): 38 | for xref in getReferencesTo(calc_hash_func.getEntryPoint()): 39 | # instruction should be like; 40 | # CALL calc_hash 41 | # XOR EAX,0x 42 | caller = xref.getFromAddress() 43 | next_inst = getInstructionAfter(caller) 44 | if str(next_inst).startswith('XOR EAX,0x'): 45 | return next_inst.getOpObjects(1)[0] 46 | 47 | def get_api_xor_key(): 48 | calc_api_hash_func = find_calc_api_hash_func() 49 | if calc_api_hash_func: 50 | xor_key = get_xor_key(calc_api_hash_func) 51 | return xor_key 52 | 53 | def get_lib_xor_key(): 54 | calc_lib_hash_func = find_calc_lib_hash_func() 55 | if calc_lib_hash_func: 56 | xor_key = get_xor_key(calc_lib_hash_func) 57 | return xor_key 58 | 59 | def main(): 60 | lib_xor_key = get_lib_xor_key() 61 | if lib_xor_key: 62 | print('[*] XOR key for Lib: {}'.format(lib_xor_key)) 63 | else: 64 | print('[*] XOR key for Lib was not found') 65 | 66 | api_xor_key = get_api_xor_key() 67 | if api_xor_key: 68 | print('[*] XOR key for API: {}'.format(api_xor_key)) 69 | else: 70 | print('[*] XOR key for API was not found') 71 | 72 | if __name__ == '__main__': 73 | main() 74 | -------------------------------------------------------------------------------- /for_training/answers/decrypt_string_stage1.py: -------------------------------------------------------------------------------- 1 | from __main__ import * 2 | 3 | import struct 4 | from itertools import cycle 5 | 6 | from ghidra.program.model.listing import CodeUnit 7 | 8 | def xor_with_multi_bytes(enc, key): 9 | ''' xor enc (bytes) with key (int) and return it as string 10 | ''' 11 | return ''.join([chr(ord(k) ^ ord(v)) for k, v in zip(cycle(struct.pack('DAT_1001f000 53 | # 1000e26a 89 72 24 MOV dword ptr [EDX + 0x24],ESI=>DAT_1001f000 54 | # 1000e26d 89 4a 10 MOV dword ptr [EDX + 0x10],ECX 55 | # 1000e270 eb 04 JMP LAB_1000e276 56 | '\\x89.{1}\\x28\\x89.{2}\\x89.{2}\\xeb\\x04', 57 | 58 | # Ptn 2 (since 2021-01-28~) 59 | # 007718a4 c7 40 04 MOV dword ptr [EAX + pCVar4->field_0x4],DAT_0077d390 60 | # 007718ab c7 40 10 MOV dword ptr [EAX + pCVar4->c2_info],DAT_0077d390 61 | # 007718b2 c7 40 0c MOV dword ptr [EAX + pCVar4->field_0xc],0x0 62 | '\\xc7.{5}\\x00\\xc7.{5}\\x00\\xc7.{5}\\x00', 63 | ] 64 | 65 | # try all patterns 66 | for asm in asm_ptns: 67 | found = findBytes(None, asm, -1) 68 | if not found: 69 | continue 70 | elif len(found) != 1: 71 | raise ValueError('False detection of config') 72 | 73 | inst = getInstructionAt(found[0]) 74 | config_addr = inst.getAddress(1) 75 | return config_addr 76 | 77 | # if nothing found, it comes here. 78 | raise RuntimeError('Config not found') 79 | 80 | def extract_c2(config_addr=None): 81 | ''' extract all the C2 configs of emotet. you can give a config address, 82 | otherwise it will search it by regex. 83 | ''' 84 | if config_addr is None: 85 | config_addr = find_config_address() 86 | 87 | print('[*] Config Address: {}'.format(config_addr)) 88 | for (ip, port) in iterate_config(config_addr): 89 | print('http://{}:{}'.format(ip, port)) 90 | 91 | def main(): 92 | extract_c2() 93 | 94 | if __name__ == '__main__': 95 | main() -------------------------------------------------------------------------------- /for_training/answers/extract_xor_key.py: -------------------------------------------------------------------------------- 1 | from __main__ import * 2 | 3 | def find_calc_api_hash_func(): 4 | ''' 5 | 00402b5c 8b 55 fc MOV EDX,dword ptr [EBP + local_8] 6 | 00402b5f 0f be 03 MOVSX EAX,byte ptr [EBX] 7 | 00402b62 89 45 fc MOV dword ptr [EBP + local_8],EAX 8 | 00402b65 01 75 fc ADD dword ptr [EBP + local_8],ESI 9 | 00402b68 d3 e2 SHL EDX,param_1 10 | 00402b6a 01 55 fc ADD dword ptr [EBP + local_8],EDX 11 | 00402b6d 29 7d fc SUB dword ptr [EBP + local_8],EDI 12 | 13 | ''' 14 | asm = b'\\x8b\\x55.{1}\\x0f\\xbe.{1}\\x89\\x45.{1}\\x01\\x75.{1}\\xd3\\xe2\\x01\\x55.{1}\\x29\\x7d.{1}' 15 | found = findBytes(None, asm, -1) 16 | if not found: 17 | raise RuntimeError('api hashing function is not found') 18 | 19 | calc_hash_func = getFunctionContaining(found[0]) 20 | return calc_hash_func 21 | 22 | 23 | def find_calc_lib_hash_func(): 24 | ''' 25 | 00402c29 d3 e7 SHL EDI,libname 26 | 00402c2b 83 f8 41 CMP char_hex,0x41 27 | 00402c2e 72 08 JC LAB_00402c38 28 | 00402c30 83 f8 5a CMP char_hex,0x5a 29 | 00402c33 77 03 JA LAB_00402c38 30 | 00402c35 83 c0 20 ADD char_hex,0x20 31 | ''' 32 | asm = '\\xd3\\xe7\\x83.{1}\\x41\\x72\\x08\\x83.{1}\\x5a\\x77\\x03\\x83.{1}\\x20' 33 | found = findBytes(None, asm, -1) 34 | if not found: 35 | raise RuntimeError('lib hashing function is not found') 36 | 37 | calc_hash_func = getFunctionContaining(found[0]) 38 | return calc_hash_func 39 | 40 | 41 | def get_xor_key(calc_hash_func): 42 | for xref in getReferencesTo(calc_hash_func.getEntryPoint()): 43 | # instruction should be like; 44 | # CALL calc_hash 45 | # XOR EAX,0x 46 | caller = xref.getFromAddress() 47 | next_inst = getInstructionAfter(caller) 48 | if str(next_inst).startswith('XOR EAX,0x'): 49 | return next_inst.getOpObjects(1)[0] 50 | 51 | def get_api_xor_key(): 52 | calc_api_hash_func = find_calc_api_hash_func() 53 | xor_key = get_xor_key(calc_api_hash_func) 54 | return xor_key 55 | 56 | def get_lib_xor_key(): 57 | calc_lib_hash_func = find_calc_lib_hash_func() 58 | xor_key = get_xor_key(calc_lib_hash_func) 59 | return xor_key 60 | 61 | def main(): 62 | lib_xor_key = get_lib_xor_key() 63 | print('[*] XOR key for Lib: {}'.format(lib_xor_key)) 64 | 65 | api_xor_key = get_api_xor_key() 66 | print('[*] XOR key for API: {}'.format(api_xor_key)) 67 | 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /for_training/answers/make_hash_db.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import os 4 | from collections import defaultdict 5 | from functools import partial 6 | 7 | import pefile 8 | 9 | API_XOR_KEY = 0x5a80eae 10 | LIB_XOR_KEY = 0x1fc325da 11 | 12 | INTERESTING_DLLS = [ 13 | 'kernel32.dll', 'comctl32.dll', 'advapi32.dll', 'comdlg32.dll', 14 | 'gdi32.dll', 'msvcrt.dll', 'netapi32.dll', 'ntdll.dll', 15 | 'ntoskrnl.exe', 'oleaut32.dll', 'psapi.dll', 'shell32.dll', 16 | 'shlwapi.dll', 'srsvc.dll', 'urlmon.dll', 'user32.dll', 17 | 'winhttp.dll', 'wininet.dll', 'ws2_32.dll', 'wship6.dll', 18 | 'advpack.dll', 'crypt32.dll', 'userenv.dll', 'wtsapi32.dll' 19 | ] 20 | 21 | def calc_hash(name, key): 22 | value = 0 23 | for c in name: 24 | value = (ord(c) + value * 0x1003f) & 0xffffffff 25 | return value ^ key 26 | 27 | def get_export_api(dllpath): 28 | pe = pefile.PE(dllpath) 29 | if ((not hasattr(pe, "DIRECTORY_ENTRY_EXPORT")) or (pe.DIRECTORY_ENTRY_EXPORT is None)): 30 | raise RuntimeError(f'{dllpath} doesn\'t have export table') 31 | 32 | for sym in pe.DIRECTORY_ENTRY_EXPORT.symbols: 33 | if sym.name is not None: 34 | yield sym.name.decode('utf-8') 35 | 36 | def main(): 37 | import argparse 38 | 39 | p = argparse.ArgumentParser() 40 | p.add_argument('--lib-key', dest='lib_key', default=LIB_XOR_KEY, type=lambda x: int(x, 0), help='XOR key for lib name hashing') 41 | p.add_argument('--api-key', dest='api_key', default=API_XOR_KEY, type=lambda x: int(x, 0), help='XOR key for API hashing') 42 | 43 | args = p.parse_args() 44 | 45 | # partially apply calc_hash function with XOR key 46 | emotet_xor_for_lib = partial(calc_hash, key=args.lib_key) 47 | emotet_xor_for_api = partial(calc_hash, key=args.api_key) 48 | 49 | results = defaultdict(dict) 50 | 51 | # enumerate all .dll files in %windir%\system32 52 | target_path = os.path.join(os.environ.get('windir'), 'system32') 53 | for dll_filepath in glob.glob(os.path.join(target_path, '*.dll')): 54 | dllname = os.path.basename(dll_filepath) 55 | 56 | # filter uninteresting dlls for easy use 57 | if dllname.lower() not in INTERESTING_DLLS: 58 | continue 59 | 60 | # calc hash of dll name 61 | hashed_dllname = emotet_xor_for_lib(dllname.lower()) 62 | results[hex(hashed_dllname)] = dllname 63 | 64 | # calc hash of each API name in DLL 65 | for api in get_export_api(dll_filepath): 66 | hashed_api = emotet_xor_for_api(api) 67 | results[hex(hashed_api)] = api 68 | 69 | # dump tp json file 70 | with open('db.json', 'w') as f: 71 | json.dump(results, f, indent=4) 72 | 73 | 74 | if __name__ == '__main__': 75 | main() 76 | -------------------------------------------------------------------------------- /for_training/decrypt_string_stage1_not_impl.py: -------------------------------------------------------------------------------- 1 | from __main__ import * 2 | 3 | import struct 4 | from itertools import cycle 5 | 6 | from ghidra.program.model.listing import CodeUnit 7 | 8 | def xor_with_multi_bytes(enc, key): 9 | ''' xor enc (bytes) with key (int) and return it as string 10 | ''' 11 | return ''.join([chr(ord(k) ^ ord(v)) for k, v in zip(cycle(struct.pack('>> addr = toAddr(0x401000) 21 | >>> new_addr = addr.add(4) 22 | 4. use `xor_with_multi_bytes` to xor bytes with DWORD 23 | 5. in jython, you can convert array to string by `tostring` method 24 | >>> getBytes(0x408000) 25 | array('b', [116, 101, 115, 116]) 26 | >>> getBytes(0x408000).tostring() 27 | 'test' 28 | ''' 29 | raise NotImplementedError('not implemented') 30 | 31 | def add_bookmark_comment(addr, comment): 32 | cu = currentProgram.getListing().getCodeUnitAt(addr) 33 | createBookmark(addr, "decrypted_str", comment) 34 | cu.setComment(CodeUnit.EOL_COMMENT, comment) 35 | 36 | def get_instructions_before(addr, n=1): 37 | r = [] 38 | for _ in range(n): 39 | inst = getInstructionBefore(addr) 40 | r.append(inst) 41 | addr = inst.getAddress() 42 | return r 43 | 44 | def is_mov_ecx(inst): 45 | return str(inst).startswith('MOV EDX,0x') 46 | 47 | def main(): 48 | 49 | decrypt_string_addr = toAddr(0x1000732d) 50 | 51 | for xref in getReferencesTo(decrypt_string_addr): 52 | # get instructions before callee address 53 | insts = get_instructions_before(xref.getFromAddress(), 50) 54 | 55 | # find instruction that passes 56 | # address of encrypted data via EDX 57 | for inst in insts: 58 | if is_mov_ecx(inst): 59 | # get encrypted data address and decrypt it 60 | data_addr = inst.getAddress(1) 61 | if data_addr: 62 | decrypted_str = decrypt_string(data_addr) 63 | # add comment 64 | print('[*] found at {} : {!r}'.format(inst.getAddress(), decrypted_str)) 65 | add_bookmark_comment(inst.getAddress(), decrypted_str) 66 | 67 | if __name__ == '__main__': 68 | main() 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /for_training/decrypt_string_stage2_not_impl.py: -------------------------------------------------------------------------------- 1 | from __main__ import * 2 | 3 | import struct 4 | from itertools import cycle 5 | 6 | from ghidra.program.model.listing import CodeUnit 7 | 8 | def decrypt_string(enc_data_addr): 9 | key = getInt(enc_data_addr) 10 | xored_length = getInt(enc_data_addr.add(4)) 11 | original_length = key ^ xored_length 12 | enc = getBytes(enc_data_addr.add(8), original_length).tostring() 13 | return ''.join([chr(ord(k) ^ ord(v)) for k, v in zip(cycle(struct.pack('' or 'MOV ,', 32 | retrun scalar value. if it's not, return None. 33 | 34 | Hint: 35 | - check API document of Instruction interface 36 | ''' 37 | raise NotImplementedError('not implemented') 38 | 39 | def main(): 40 | db_path = askFile("DB for hashes", "import").getPath() 41 | 42 | # for Headless usage, you can get arugment via command line 43 | # db_path = getScriptArgs()[0] 44 | 45 | db = HashDB(db_path) 46 | 47 | # get all instructions in program 48 | instructions = currentProgram.getListing().getInstructions(True) 49 | 50 | # process each instruction 51 | for inst in instructions: 52 | value = get_scalar_argument(inst) 53 | if value is not None: 54 | try: 55 | # then lookup DB 56 | orig_name = db.lookup(str(value)) 57 | 58 | # if it exists, add comment and add bookmark 59 | if orig_name: 60 | print('[*] {} at {}: {}'.format(str(value), inst.getAddress(), orig_name)) 61 | add_bookmark_comment(inst.getAddress(), orig_name) 62 | except Exception as e: 63 | print(e) 64 | 65 | if __name__ == '__main__': 66 | main() 67 | -------------------------------------------------------------------------------- /for_training/utils.py: -------------------------------------------------------------------------------- 1 | from __main__ import * 2 | 3 | from ghidra.program.model.listing import CodeUnit 4 | from ghidra.program.model.scalar import Scalar 5 | from ghidra.app.decompiler import DecompInterface 6 | from ghidra.program.model.listing import CodeUnit 7 | from ghidra.program.model.pcode import PcodeOp 8 | from ghidra.program.model.scalar import Scalar 9 | 10 | 11 | def add_bookmark_comment(addr, comment, name): 12 | cu = currentProgram.getListing().getCodeUnitAt(addr) 13 | createBookmark(addr, name, comment) 14 | cu.setComment(CodeUnit.EOL_COMMENT, comment) 15 | 16 | def get_instructions_before(addr, n=1): 17 | '''get N instructions before specified address.''' 18 | r = [] 19 | for _ in range(n): 20 | inst = getInstructionBefore(addr) 21 | r.append(inst) 22 | addr = inst.getAddress() 23 | return r 24 | 25 | def decompile_func(func): 26 | decompiler = DecompInterface() 27 | decompiler.openProgram(currentProgram) 28 | 29 | results = decompiler.decompileFunction(func, 0, monitor) 30 | if not results.decompileCompleted(): 31 | raise RuntimeError('failed to decompile function: {}'.format(func)) 32 | 33 | high_func = results.getHighFunction() 34 | return high_func 35 | 36 | def get_pcode_at(addr): 37 | caller = getFunctionContaining(addr) 38 | high_func = decompile_func(caller) 39 | pcodes = high_func.getPcodeOps(addr) 40 | return pcodes 41 | 42 | def get_all_functions(): 43 | '''get all function in program.''' 44 | funcs = [] 45 | func = getFirstFunction() 46 | while func is not None: 47 | funcs.append(func) 48 | func = getFunctionAfter(func) 49 | return funcs 50 | 51 | def get_func_xref_count(n=10): 52 | '''get function reference count in program.''' 53 | 54 | # get all functions as list 55 | funcs = get_all_functions() 56 | 57 | # get xref counts for each function 58 | xref_counts = [ 59 | {'name': func.getName(), 'count': len(getReferencesTo(func.getEntryPoint()))} 60 | for func in funcs 61 | ] 62 | 63 | # sort with ascending by 'count' value and get top N 64 | topn_called_funcs = sorted(xref_counts, key=lambda x: x['count'])[::-1][:n] 65 | return topn_called_funcs 66 | 67 | def xor(key, data): 68 | '''xor data with multi-bytes key.''' 69 | return ''.join([chr(ord(k) ^ ord(d)) for k, d in zip(key, data)]) 70 | 71 | def calc_hash_case_insensitive(name, key): 72 | value = 0 73 | for c in name.lower(): 74 | value = (ord(c) + value * 0x1003f) & 0xffffffff 75 | return value ^ key -------------------------------------------------------------------------------- /make_hash_db.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import os 4 | from collections import defaultdict 5 | from functools import partial 6 | 7 | import pefile 8 | 9 | API_XOR_KEY = 0x5a80eae 10 | LIB_XOR_KEY = 0x1fc325da 11 | 12 | INTERESTING_DLLS = [ 13 | 'kernel32.dll', 'comctl32.dll', 'advapi32.dll', 'comdlg32.dll', 14 | 'gdi32.dll', 'msvcrt.dll', 'netapi32.dll', 'ntdll.dll', 15 | 'ntoskrnl.exe', 'oleaut32.dll', 'psapi.dll', 'shell32.dll', 16 | 'shlwapi.dll', 'srsvc.dll', 'urlmon.dll', 'user32.dll', 17 | 'winhttp.dll', 'wininet.dll', 'ws2_32.dll', 'wship6.dll', 18 | 'advpack.dll', 'crypt32.dll', 'userenv.dll', 'wtsapi32.dll' 19 | ] 20 | 21 | def calc_hash(name, key): 22 | value = 0 23 | for c in name: 24 | value = (ord(c) + value * 0x1003f) & 0xffffffff 25 | return value ^ key 26 | 27 | def get_export_api(dllpath): 28 | pe = pefile.PE(dllpath) 29 | if ((not hasattr(pe, "DIRECTORY_ENTRY_EXPORT")) or (pe.DIRECTORY_ENTRY_EXPORT is None)): 30 | raise RuntimeError(f'{dllpath} doesn\'t have export table') 31 | 32 | for sym in pe.DIRECTORY_ENTRY_EXPORT.symbols: 33 | if sym.name is not None: 34 | yield sym.name.decode('utf-8') 35 | 36 | def main(): 37 | import argparse 38 | 39 | p = argparse.ArgumentParser() 40 | p.add_argument('--lib-key', dest='lib_key', default=LIB_XOR_KEY, type=lambda x: int(x, 0), help='XOR key for lib name hashing') 41 | p.add_argument('--api-key', dest='api_key', default=API_XOR_KEY, type=lambda x: int(x, 0), help='XOR key for API hashing') 42 | 43 | args = p.parse_args() 44 | 45 | # partially apply calc_hash function with XOR key 46 | emotet_xor_for_lib = partial(calc_hash, key=args.lib_key) 47 | emotet_xor_for_api = partial(calc_hash, key=args.api_key) 48 | 49 | results = defaultdict(dict) 50 | 51 | # enumerate all .dll files in %windir%\system32 52 | target_path = os.path.join(os.environ.get('windir'), 'system32') 53 | for dll_filepath in glob.glob(os.path.join(target_path, '*.dll')): 54 | dllname = os.path.basename(dll_filepath) 55 | 56 | # filter uninteresting dlls for easy use 57 | if dllname.lower() not in INTERESTING_DLLS: 58 | continue 59 | 60 | # calc hash of dll name 61 | hashed_dllname = emotet_xor_for_lib(dllname.lower()) 62 | results[hex(hashed_dllname)] = dllname 63 | 64 | # calc hash of each API name in DLL 65 | for api in get_export_api(dll_filepath): 66 | hashed_api = emotet_xor_for_api(api) 67 | results[hex(hashed_api)] = api 68 | 69 | # dump tp json file 70 | with open('db.json', 'w') as f: 71 | json.dump(results, f, indent=4) 72 | 73 | 74 | if __name__ == '__main__': 75 | main() 76 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pefile -------------------------------------------------------------------------------- /search_hash.py: -------------------------------------------------------------------------------- 1 | from __main__ import * 2 | 3 | import json 4 | 5 | from ghidra.program.model.listing import (CodeUnit, Instruction) 6 | from ghidra.program.model.scalar import Scalar 7 | 8 | 9 | def add_bookmark_comment(addr, comment): 10 | cu = currentProgram.getListing().getCodeUnitAt(addr) 11 | createBookmark(addr, "hashed_name", comment) 12 | cu.setComment(CodeUnit.EOL_COMMENT, comment) 13 | 14 | class HashDB: 15 | def __init__(self, db_path): 16 | self.db = self._load(db_path) 17 | 18 | def _load(self, db_path): 19 | with open(db_path) as f: 20 | return json.loads(f.read()) 21 | 22 | def lookup(self, value): 23 | found = self.db.get(value) 24 | if not found: 25 | raise ValueError('{} is not found in DB'.format(value)) 26 | return found 27 | 28 | def get_scalar_argument(inst): 29 | # get mnemonic in one instruction 30 | mnemonic = inst.getMnemonicString() 31 | 32 | if mnemonic == 'PUSH': 33 | # get operand in index 1 34 | value = inst.getOpObjects(0)[0] 35 | if isinstance(value, Scalar) and value.bitLength() == 32: 36 | return value 37 | elif mnemonic == 'MOV': 38 | value = inst.getOpObjects(1)[0] 39 | if isinstance(value, Scalar) and value.bitLength() == 32: 40 | return value 41 | 42 | 43 | 44 | def main(): 45 | db_path = askFile("DB for hashes", "import").getPath() 46 | 47 | # for Headless usage, you can get arugment via command line 48 | # db_path = getScriptArgs()[0] 49 | 50 | db = HashDB(db_path) 51 | 52 | # get all instructions in program 53 | instructions = currentProgram.getListing().getInstructions(True) 54 | 55 | # process each instruction 56 | for inst in instructions: 57 | value = get_scalar_argument(inst) 58 | if value is not None: 59 | try: 60 | # then lookup DB 61 | orig_name = db.lookup(str(value)) 62 | 63 | # if it exists, add comment and add bookmark 64 | if orig_name: 65 | print('[*] {} at {} : {}'.format(str(value), inst.getAddress(), orig_name)) 66 | add_bookmark_comment(inst.getAddress(), orig_name) 67 | except Exception as e: 68 | pass 69 | 70 | if __name__ == '__main__': 71 | main() 72 | --------------------------------------------------------------------------------