├── requirements.txt ├── yara ├── rule_daa0.yara ├── rule_cooper.yara └── rule_ffd8.yara ├── .gitignore ├── firsttry_hextoascii.py ├── README.md └── parse.py /requirements.txt: -------------------------------------------------------------------------------- 1 | lief==0.9.0 2 | -------------------------------------------------------------------------------- /yara/rule_daa0.yara: -------------------------------------------------------------------------------- 1 | import "pe" 2 | 3 | rule TheDao { 4 | strings: 5 | $b = { DA A0 } 6 | 7 | condition: 8 | uint16(0) == 0x5a4d and $b at pe.overlay.offset and pe.overlay.size > 100 9 | } 10 | -------------------------------------------------------------------------------- /yara/rule_cooper.yara: -------------------------------------------------------------------------------- 1 | 2 | rule TwinPeaks 3 | { 4 | strings: 5 | $cooper = "Cooper" 6 | $pattern = { e9 ea eb ec ed ee ef f0} 7 | 8 | condition: 9 | uint16(0) == 0x5a4d and $cooper and ($pattern in (@cooper[1]..@cooper[1]+100)) 10 | } 11 | -------------------------------------------------------------------------------- /yara/rule_ffd8.yara: -------------------------------------------------------------------------------- 1 | 2 | rule MockingJay 3 | { 4 | strings: 5 | $load_magic = { C7 44 ?? ?? FF D8 FF E0 } 6 | $iter = { E9 EA EB EC ED EE EF F0 } 7 | $jpeg = { FF D8 FF E0 00 00 00 00 00 00 } 8 | 9 | condition: 10 | uint16(0) == 0x5a4d and 11 | $jpeg and 12 | ($load_magic or $iter in (@jpeg[1]..@jpeg[1]+200)) and 13 | for any i in (1..#jpeg): ( uint8(@jpeg[i] + 11) != 0 ) 14 | } 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Environments 2 | .env 3 | .venv 4 | env/ 5 | venv/ 6 | ENV/ 7 | env.bak/ 8 | venv.bak/ 9 | 10 | # Windows 11 | Thumbs.db 12 | ehthumbs.db 13 | Desktop.ini 14 | $RECYCLE.BIN/ 15 | 16 | # OSX 17 | .DS_Store 18 | .AppleDouble 19 | .LSOverride 20 | Icon 21 | ._* 22 | .Spotlight-V100 23 | .Trashes 24 | 25 | # Project Files 26 | .idea/ 27 | .idea_modules/ 28 | atlassian-ide-plugin.xml 29 | com_crashlytics_export_strings.xml 30 | *.sublime-workspace 31 | -------------------------------------------------------------------------------- /firsttry_hextoascii.py: -------------------------------------------------------------------------------- 1 | path = input('Locate the file: \n') 2 | 3 | def xoring(pattern, key): 4 | xor_this = "0x" + pattern 5 | xor_this = int(xor_this, 16) 6 | with_that = "0x" + key 7 | with_that = int(with_that, 16) 8 | return hex(xor_this ^ with_that) 9 | 10 | with open(path, "rb") as f: 11 | hex_file = bytearray(f.read()).hex().replace("\n", "") 12 | file_pattern = "daa0c7cbf4f0" + hex_file.split("daa0c7cbf4f0")[1] 13 | 14 | 15 | # iterating over the bytes 16 | # via https://stackoverflow.com/questions/434287/what-is-the-most-pythonic-way-to-iterate-over-a-list-in-chunks 17 | def chunker(seq, size): 18 | return [seq[pos:pos + size] for pos in range(0, len(seq), size)] 19 | 20 | hex_list = [ "0" + str(hex(number).replace('0x','')).upper() if len(hex(number).replace('0x','')) < 2 21 | else "0" + str(number).upper() if len(hex(number).replace('0x','')) < 2 22 | else hex(number).replace('0x','').upper() 23 | for number in range(256)] 24 | 25 | pattern_list = [group for group in chunker(file_pattern, 2)] 26 | 27 | xored_list=[] 28 | starting_point = 153 29 | 30 | for pattern in pattern_list: 31 | if starting_point == len(hex_list): 32 | starting_point = 0 33 | xored_list.append(xoring(pattern, hex_list[starting_point])) 34 | starting_point += 1 35 | else: 36 | xored_list.append(xoring(pattern, hex_list[starting_point])) 37 | starting_point += 1 38 | 39 | all_items = [] 40 | 41 | for item in xored_list: 42 | try: 43 | if int(item,16) < 0x20: 44 | all_items.append(" ") 45 | elif int(item,16) >=0x20 and int(item,16) <= 0x7E: 46 | all_items.append(chr(int(item,16))) 47 | else: 48 | all_items.append(" ") 49 | except ValueError as e: 50 | print(item, e) 51 | 52 | joined_string = ''.join(all_items) 53 | print(joined_string) 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Winnti analysis 2 | For a number of years now, a group of professional hackers has been busy spying on businesses all over the world: Winnti. It is believed to be a digital mercenary group controlled by China. For the first time, in a joint investigation, German public broadcasters BR and NDR are shedding light on how the hackers operate and how widespread they are. 3 | 4 | Read the full article on hackers for hire, conducting industrial espionage, here: 5 | - **BR24**: [Attacking the Heart of the German Industry](https://br24.de/winnti/english). 6 | 7 | ## Background 8 | The search for affected company networks is mostly build around so-called **campaign identifiers**. In some instances, Winnti operators wrote the names of their targets directly into the malware, obfuscated with a [rolling XOR cipher](https://my.safaribooksonline.com/book/networking/security/9780470613030/de-obfuscation/decoding_common_algorithms). In a first step, we tried to verify the information we were provided with, using a (not very good) [python script](https://github.com/br-data/2019-winnti-analyse/blob/master/firsttry_hextoascii.py). We then used [yara](https://yara.readthedocs.io) rules to hunt for Winnti samples. The yara rules we used are included in this repo, hopefully they prove useful to other researchers. 9 | 10 | Another way of finding networks with Winnti infections is [this Nmap script](https://github.com/TKCERT/winnti-nmap-script) by the Thyssenkrupp CERT. 11 | 12 | ## Analysis 13 | An execellent script for extracting the configuration details from a Winnti sample was written by [Moritz Contag](https://www.syssec.ruhr-uni-bochum.de/chair/staff/mcontag/). He thankfully allowed us to share it. Here is how to use it: 14 | 15 | ### Requirements 16 | The script requires `lief` in version 0.9 to be installed and thus is currently tied to Python 2.7. The dependency can be installed running `pip` on the command line: 17 | 18 | ``` 19 | pip2 install -r requirements.txt 20 | ``` 21 | 22 | ### Usage 23 | To extract the configuration of multiple Winnti samples, simply pass the directory to the script. The script will also recurse into subdirectory and blindly try to parse each file it encounters. 24 | 25 | The script does not try to identify Winnti samples and might produce incoherent output if the sample looks too different. Currently, it tries to parse configuration information stored in the executable's _overlay_ as well as _inline_ configurations indicated by a special marker. Further, it also tries to repair broken or "encrypted" files before processing them. 26 | 27 | It is recommended to name the samples according to their, e.g., SHA-256 hash for better identification. 28 | 29 | To scan a directory called `samples`, simply invoke the script as follows: 30 | ``` 31 | $ python2 parse.py ./samples 32 | 33 | ---------------------------------------------------------------------------------------------------- 34 | 35 | ./9c3415507b38694d65262e28f73c3fade5038e455b83d41060f024403c26c9ee: Parsed configuration (overlay). 36 | 37 | - Size: 0x50E 38 | - Type: exe 39 | - Configuration: 40 | 41 | +0x000: "" 42 | +0x304: "1" 43 | +0x324: "shinetsu" 44 | +0x356: 4B A0 D6 05 45 | +0x3C2: "HpInsightEx.dll" 46 | +0x3E2: "kb25489.dat" 47 | +0x402: "HPSupportService" 48 | +0x442: "HP Insight Extension Support" 49 | +0x50A: A9 A1 A5 A6 50 | 51 | ---------------------------------------------------------------------------------------------------- 52 | 53 | ./585fa6bbc8bc9dbd8821a0855432c911cf828e834ec86e27546b46652afbfa5e: Parsed configuration (overlay). 54 | 55 | - Size: 0x048 56 | - Type: dll exe 57 | - Exports: #3 58 | GetFilterVersion 59 | HttpFilterProc 60 | TerminateFilter 61 | 62 | - Configuration: 63 | 64 | +0x000: "DEHENSV533-IIS" 65 | +0x020: "de.henkelgroup.net" 66 | +0x044: 99 DE DF E0 67 | 68 | ``` 69 | 70 | ## Acknowledgments 71 | - [Moritz Contag](https://www.syssec.ruhr-uni-bochum.de/chair/staff/mcontag/) for writing the great script and allowing us to share it 72 | - [Silas Cutler](https://twitter.com/silascutler) who helped us a great deal to corroborate our findings 73 | 74 | ## Contact 75 | BR Data is a data-driven investigative unit at the German public broadcaster Bayerischer Rundfunk. We are a team of journalists, developers and data scientist. We specialize in data- and document-driven research and interactive storytelling. 76 | 77 | Please send us your questions and feedback: 78 | - Twitter: [@br_data](https://twitter.com/br_data) 79 | - E-Mail: [data@br.de](mailto:data@br.de) -------------------------------------------------------------------------------- /parse.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python2 2 | 3 | # Tested with lief==0.9.0. 4 | 5 | from __future__ import print_function 6 | 7 | import os 8 | import sys 9 | import lief 10 | import string 11 | import struct 12 | import tempfile 13 | 14 | from collections import Counter 15 | 16 | 17 | SIZES = Counter() 18 | 19 | # Arbitrary upper bound on configuration size. 20 | MAX_CONFIG_SIZE = 0x600 21 | 22 | TYPE = { 23 | lief.PE.HEADER_CHARACTERISTICS.EXECUTABLE_IMAGE: 'exe', 24 | lief.PE.HEADER_CHARACTERISTICS.DLL: 'dll', 25 | lief.PE.HEADER_CHARACTERISTICS.SYSTEM: 'sys', 26 | } 27 | 28 | 29 | def pretty_print(config): 30 | i = 0 31 | n = len(config) 32 | 33 | # Print all data and its respective offset while skipping zero bytes. 34 | while i < n: 35 | sys.stdout.write('\n\t+0x{:03X}: '.format(i)) 36 | 37 | data = [] 38 | while i < n and config[i]: 39 | data.append(chr(config[i])) 40 | i += 1 41 | 42 | if all(x in string.printable for x in data): 43 | data = '"{}"'.format(''.join(data)) 44 | else: 45 | data = ''.join('%02X ' % ord(x) for x in data) 46 | 47 | sys.stdout.write(data) 48 | while i < n and not config[i]: 49 | i += 1 50 | 51 | print('\n') 52 | 53 | 54 | def handle_file(exe, path, data, kind): 55 | global SIZES 56 | SIZES[len(data)] += 1 57 | 58 | print('-' * 100) 59 | print('') 60 | 61 | print('{}: Parsed configuration ({}).\n'.format(path, kind)) 62 | print('- Size: 0x{:03X}'.format(len(data))) 63 | 64 | if exe is not None: 65 | sys.stdout.write('- Type: ') 66 | for k, v in TYPE.iteritems(): 67 | if exe.header.has_characteristic(k): 68 | sys.stdout.write('{} '.format(v)) 69 | 70 | if len(exe.exported_functions): 71 | # Print the first three exported functions for quick clustering. 72 | print('\n- Exports: #{}'.format(len(exe.exported_functions))) 73 | 74 | for i, exp in enumerate(exe.exported_functions[:3]): 75 | print(' {}'.format(exp)) 76 | 77 | if len(exe.exported_functions) > 3: 78 | print(' ...') 79 | 80 | if len(exe.signature.certificates): 81 | print('\n- Certificates:\n') 82 | for cert in exe.signature.certificates: 83 | print(cert) 84 | 85 | # print(exe.rich_header) 86 | print('\n- Configuration:') 87 | pretty_print(data) 88 | 89 | 90 | def decrypt_overlay(overlay): 91 | # Most likely, the first entry is a path somewhere into C:\, so guess 'C' 92 | # as the first character and try the resulting key first. Only then test 93 | # all other potential keys. 94 | k = overlay[0] ^ ord('C') 95 | keys = [k, 0x99, 0x9d] + list(range(256)) 96 | 97 | plain = [] 98 | for k in keys: 99 | plain = [o ^ ((k + i) & 0xff) for i, o in enumerate(overlay)] 100 | candidate = Counter(plain).most_common(1) 101 | if not candidate: 102 | continue 103 | 104 | # If the zero byte is most common, the decryption most likely 105 | # succeeded. Configurations are often populated sparsely. 106 | byte, _count = candidate[0] 107 | if byte == 0: 108 | break 109 | 110 | return plain 111 | 112 | 113 | def fix_header(data, offset): 114 | # Fix up headers, assuming PE64 for simplicity (we do not want to run this 115 | # anyway.) 116 | data[0:2] = '\x4d\x5a' 117 | data[offset:offset + 4] = '\x50\x45\x00\x00' 118 | 119 | data[offset + 4:offset + 6] = '\x4c\x01' 120 | data[offset + 0x16:offset + 0x18] = '\x02\x00' 121 | data[offset + 0x18:offset + 0x1a] = '\x0b\x02' 122 | data[offset + 0x5c:offset + 0x5e] = '\x02\x00' 123 | 124 | return data 125 | 126 | 127 | def swap(b): 128 | return (b >> 4) | ((b & 0xf) << 4) 129 | 130 | 131 | def decrypt(data, offset): 132 | data = [swap(d ^ 0x36) for d in data] 133 | data = bytearray(chr(d) for d in data) 134 | 135 | return data 136 | 137 | 138 | def check_file(path): 139 | with open(path, 'rb') as f: 140 | data = bytearray(f.read()) 141 | 142 | magic = struct.unpack(' 100: 191 | return None 192 | 193 | # Find the end of the configuration -- ideally, we would get this from the 194 | # binary itself, but let's not hack some assembly fingerprint together. 195 | # Same reasoning as above, unlikely to have repetitive bytes in rolling 196 | # xor. 197 | y = data.find(b'\x00\x00', x) 198 | if y == -1: 199 | return None 200 | 201 | # These configs are rather short so let's try not to guess a key based on 202 | # the number of zeroes. We did not encounter any other key anyways. 203 | config = data[x:y] 204 | config = [ord(x) ^ ((0x99 + i) & 0xff) for i, x in enumerate(config)] 205 | return ''.join(map(chr, config)) 206 | 207 | 208 | def main(): 209 | if len(sys.argv) < 2: 210 | print('Usage: parse.py ') 211 | return 212 | 213 | # lief.Logger.enable() 214 | 215 | for root, _dirs, files in os.walk(sys.argv[1]): 216 | for path in files: 217 | path = os.path.join(root, path) 218 | 219 | # Fix up the file, if we have to. There are three scenarios: 220 | # - Its MZ header has been mangled with. 221 | # - Most of its header has been stripped for manually mapping. 222 | # - It is "encrypted". 223 | path = check_file(path) 224 | 225 | exe = lief.parse(path) 226 | with open(path, 'rb') as f: 227 | data = f.read() 228 | 229 | # The configuration may be stored inline and hinted at by a marker. 230 | for magic in MAGIC: 231 | config = detect_inline_config(data, magic) 232 | if config is None: 233 | continue 234 | 235 | if len(config) > MAX_CONFIG_SIZE: 236 | continue 237 | 238 | handle_file(exe, path, bytearray(config), 'inline') 239 | 240 | if not exe: 241 | continue 242 | 243 | if exe.overlay is None: 244 | continue 245 | 246 | # Otherwise, look for the configuration in its overlay. 247 | try: 248 | n = 0 249 | overlay = exe.overlay 250 | 251 | # We could simply just parse the last dword to read the 252 | # configuration size, but some samples are broken in that they 253 | # append additional zero bytes to the overlay. This code tries 254 | # to detect and skip these. 255 | while not n: 256 | n = ''.join(chr(o) for o in overlay[-4:]) 257 | n = struct.unpack(' MAX_CONFIG_SIZE: 262 | continue 263 | 264 | overlay = overlay[-n - 4:] 265 | overlay = decrypt_overlay(overlay) 266 | 267 | handle_file(exe, path, overlay, 'overlay') 268 | except Exception as _: 269 | pass 270 | 271 | print('\n\n\nConfiguration sizes:\n') 272 | for k, v in SIZES.most_common(): 273 | print(' - 0x{:04X}: #{}'.format(k, v)) 274 | 275 | 276 | if __name__ == '__main__': 277 | main() 278 | --------------------------------------------------------------------------------