├── requirements.txt
├── yara
    ├── rule_daa0.yara
    ├── rule_cooper.yara
    └── rule_ffd8.yara
├── .gitignore
├── firsttry_hextoascii.py
├── README.md
└── parse.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | lief==0.9.0
2 | 


--------------------------------------------------------------------------------
/yara/rule_daa0.yara:
--------------------------------------------------------------------------------
 1 | import "pe"
 2 | 
 3 | rule TheDao {
 4 |   strings:
 5 |     $b = { DA A0 }
 6 | 
 7 |   condition:
 8 |     uint16(0) == 0x5a4d and $b at pe.overlay.offset and pe.overlay.size > 100
 9 | }
10 | 


--------------------------------------------------------------------------------
/yara/rule_cooper.yara:
--------------------------------------------------------------------------------
 1 | 
 2 | rule TwinPeaks
 3 | {
 4 |   strings:
 5 |     $cooper = "Cooper"
 6 |     $pattern = { e9 ea eb ec ed ee ef f0}
 7 | 
 8 |   condition:
 9 |     uint16(0) == 0x5a4d and $cooper and ($pattern in (@cooper[1]..@cooper[1]+100))
10 | }
11 | 


--------------------------------------------------------------------------------
/yara/rule_ffd8.yara:
--------------------------------------------------------------------------------
 1 | 
 2 | rule MockingJay
 3 | {
 4 |   strings:
 5 |     $load_magic = { C7 44 ?? ?? FF D8 FF E0 }
 6 |     $iter = { E9 EA EB EC ED EE EF F0 }
 7 |     $jpeg = { FF D8 FF E0 00 00 00 00 00 00 }
 8 | 
 9 |   condition:
10 |     uint16(0) == 0x5a4d and
11 |       $jpeg and
12 |       ($load_magic or $iter in (@jpeg[1]..@jpeg[1]+200)) and
13 |       for any i in (1..#jpeg): ( uint8(@jpeg[i] + 11) != 0 )
14 | }
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Environments
 2 | .env
 3 | .venv
 4 | env/
 5 | venv/
 6 | ENV/
 7 | env.bak/
 8 | venv.bak/
 9 | 
10 | # Windows
11 | Thumbs.db
12 | ehthumbs.db
13 | Desktop.ini
14 | $RECYCLE.BIN/
15 | 
16 | # OSX
17 | .DS_Store
18 | .AppleDouble
19 | .LSOverride
20 | Icon
21 | ._*
22 | .Spotlight-V100
23 | .Trashes
24 | 
25 | # Project Files
26 | .idea/
27 | .idea_modules/
28 | atlassian-ide-plugin.xml
29 | com_crashlytics_export_strings.xml
30 | *.sublime-workspace
31 | 


--------------------------------------------------------------------------------
/firsttry_hextoascii.py:
--------------------------------------------------------------------------------
 1 | path = input('Locate the file: \n')
 2 | 
 3 | def xoring(pattern, key):
 4 |     xor_this = "0x" + pattern
 5 |     xor_this = int(xor_this, 16)
 6 |     with_that = "0x" + key
 7 |     with_that = int(with_that, 16)
 8 |     return hex(xor_this ^ with_that)
 9 | 
10 | with open(path, "rb") as f:
11 |     hex_file = bytearray(f.read()).hex().replace("\n", "")
12 |     file_pattern = "daa0c7cbf4f0" + hex_file.split("daa0c7cbf4f0")[1]
13 | 
14 | 
15 | # iterating over the bytes
16 | # via https://stackoverflow.com/questions/434287/what-is-the-most-pythonic-way-to-iterate-over-a-list-in-chunks
17 | def chunker(seq, size):
18 |     return [seq[pos:pos + size] for pos in range(0, len(seq), size)]
19 | 
20 | hex_list = [ "0" + str(hex(number).replace('0x','')).upper() if len(hex(number).replace('0x','')) < 2
21 |             else "0" + str(number).upper() if len(hex(number).replace('0x','')) < 2 
22 |             else hex(number).replace('0x','').upper() 
23 |             for number in range(256)]
24 | 
25 | pattern_list = [group for group in chunker(file_pattern, 2)]
26 | 
27 | xored_list=[]
28 | starting_point = 153
29 | 
30 | for pattern in pattern_list:
31 |     if starting_point == len(hex_list):
32 |         starting_point = 0
33 |         xored_list.append(xoring(pattern, hex_list[starting_point]))
34 |         starting_point += 1
35 |     else:
36 |         xored_list.append(xoring(pattern, hex_list[starting_point]))
37 |         starting_point += 1
38 | 
39 | all_items = []
40 | 
41 | for item in xored_list:
42 |     try:
43 |         if int(item,16) < 0x20:
44 |             all_items.append(" ")
45 |         elif int(item,16) >=0x20 and int(item,16) <= 0x7E:
46 |             all_items.append(chr(int(item,16)))
47 |         else:
48 |             all_items.append(" ")
49 |     except ValueError as e:
50 |         print(item, e)
51 | 
52 | joined_string = ''.join(all_items)
53 | print(joined_string)
54 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Winnti analysis
 2 | For a number of years now, a group of professional hackers has been busy spying on businesses all over the world: Winnti. It is believed to be a digital mercenary group controlled by China. For the first time, in a joint investigation, German public broadcasters BR and NDR are shedding light on how the hackers operate and how widespread they are.
 3 | 
 4 | Read the full article on hackers for hire, conducting industrial espionage, here:
 5 | - **BR24**: [Attacking the Heart of the German Industry](https://br24.de/winnti/english).
 6 | 
 7 | ## Background
 8 | The search for affected company networks is mostly build around so-called **campaign identifiers**. In some instances, Winnti operators wrote the names of their targets directly into the malware, obfuscated with a [rolling XOR cipher](https://my.safaribooksonline.com/book/networking/security/9780470613030/de-obfuscation/decoding_common_algorithms). In a first step, we tried to verify the information we were provided with, using a (not very good) [python script](https://github.com/br-data/2019-winnti-analyse/blob/master/firsttry_hextoascii.py). We then used [yara](https://yara.readthedocs.io) rules to hunt for Winnti samples. The yara rules we used are included in this repo, hopefully they prove useful to other researchers. 
 9 | 
10 | Another way of finding networks with Winnti infections is [this Nmap script](https://github.com/TKCERT/winnti-nmap-script) by the Thyssenkrupp CERT.
11 | 
12 | ## Analysis
13 | An execellent script for extracting the configuration details from a Winnti sample was written by [Moritz Contag](https://www.syssec.ruhr-uni-bochum.de/chair/staff/mcontag/). He thankfully allowed us to share it. Here is how to use it:
14 | 
15 | ### Requirements
16 | The script requires `lief` in version 0.9 to be installed and thus is currently tied to Python 2.7. The dependency can be installed running `pip` on the command line:
17 | 
18 | ```
19 | pip2 install -r requirements.txt
20 | ```
21 | 
22 | ### Usage
23 | To extract the configuration of multiple Winnti samples, simply pass the directory to the script. The script will also recurse into subdirectory and blindly try to parse each file it encounters.
24 | 
25 | The script does not try to identify Winnti samples and might produce incoherent output if the sample looks too different. Currently, it tries to parse configuration information stored in the executable's _overlay_ as well as _inline_ configurations indicated by a special marker. Further, it also tries to repair broken or "encrypted" files before processing them.
26 | 
27 | It is recommended to name the samples according to their, e.g., SHA-256 hash for better identification.
28 | 
29 | To scan a directory called `samples`, simply invoke the script as follows:
30 | ```
31 | $ python2 parse.py ./samples
32 | 
33 | ----------------------------------------------------------------------------------------------------
34 | 
35 | ./9c3415507b38694d65262e28f73c3fade5038e455b83d41060f024403c26c9ee: Parsed configuration (overlay).
36 | 
37 | - Size:    0x50E
38 | - Type:    exe 
39 | - Configuration:
40 | 
41 | 	+0x000:  ""
42 | 	+0x304:  "1"
43 | 	+0x324:  "shinetsu"
44 | 	+0x356:  4B A0 D6 05 
45 | 	+0x3C2:  "HpInsightEx.dll"
46 | 	+0x3E2:  "kb25489.dat"
47 | 	+0x402:  "HPSupportService"
48 | 	+0x442:  "HP Insight Extension Support"
49 | 	+0x50A:  A9 A1 A5 A6 
50 | 
51 | ----------------------------------------------------------------------------------------------------
52 | 
53 | ./585fa6bbc8bc9dbd8821a0855432c911cf828e834ec86e27546b46652afbfa5e: Parsed configuration (overlay).
54 | 
55 | - Size:    0x048
56 | - Type:    dll exe 
57 | - Exports: #3
58 |            GetFilterVersion
59 |            HttpFilterProc
60 |            TerminateFilter
61 | 
62 | - Configuration:
63 | 
64 | 	+0x000:  "DEHENSV533-IIS"
65 | 	+0x020:  "de.henkelgroup.net"
66 | 	+0x044:  99 DE DF E0 
67 | 
68 | ```
69 | 
70 | ## Acknowledgments
71 | - [Moritz Contag](https://www.syssec.ruhr-uni-bochum.de/chair/staff/mcontag/) for writing the great script and allowing us to share it
72 | - [Silas Cutler](https://twitter.com/silascutler) who helped us a great deal to corroborate our findings
73 | 
74 | ## Contact
75 | BR Data is a data-driven investigative unit at the German public broadcaster Bayerischer Rundfunk. We are a team of journalists, developers and data scientist. We specialize in data- and document-driven research and interactive storytelling.
76 | 
77 | Please send us your questions and feedback:
78 | - Twitter: [@br_data](https://twitter.com/br_data)
79 | - E-Mail: [data@br.de](mailto:data@br.de)


--------------------------------------------------------------------------------
/parse.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python2
  2 | 
  3 | # Tested with lief==0.9.0.
  4 | 
  5 | from __future__ import print_function
  6 | 
  7 | import os
  8 | import sys
  9 | import lief
 10 | import string
 11 | import struct
 12 | import tempfile
 13 | 
 14 | from collections import Counter
 15 | 
 16 | 
 17 | SIZES = Counter()
 18 | 
 19 | # Arbitrary upper bound on configuration size.
 20 | MAX_CONFIG_SIZE = 0x600
 21 | 
 22 | TYPE = {
 23 |     lief.PE.HEADER_CHARACTERISTICS.EXECUTABLE_IMAGE: 'exe',
 24 |     lief.PE.HEADER_CHARACTERISTICS.DLL: 'dll',
 25 |     lief.PE.HEADER_CHARACTERISTICS.SYSTEM: 'sys',
 26 | }
 27 | 
 28 | 
 29 | def pretty_print(config):
 30 |     i = 0
 31 |     n = len(config)
 32 | 
 33 |     # Print all data and its respective offset while skipping zero bytes.
 34 |     while i < n:
 35 |         sys.stdout.write('\n\t+0x{:03X}:  '.format(i))
 36 | 
 37 |         data = []
 38 |         while i < n and config[i]:
 39 |             data.append(chr(config[i]))
 40 |             i += 1
 41 | 
 42 |         if all(x in string.printable for x in data):
 43 |             data = '"{}"'.format(''.join(data))
 44 |         else:
 45 |             data = ''.join('%02X ' % ord(x) for x in data)
 46 | 
 47 |         sys.stdout.write(data)
 48 |         while i < n and not config[i]:
 49 |             i += 1
 50 | 
 51 |     print('\n')
 52 | 
 53 | 
 54 | def handle_file(exe, path, data, kind):
 55 |     global SIZES
 56 |     SIZES[len(data)] += 1
 57 | 
 58 |     print('-' * 100)
 59 |     print('')
 60 | 
 61 |     print('{}: Parsed configuration ({}).\n'.format(path, kind))
 62 |     print('- Size:    0x{:03X}'.format(len(data)))
 63 | 
 64 |     if exe is not None:
 65 |         sys.stdout.write('- Type:    ')
 66 |         for k, v in TYPE.iteritems():
 67 |             if exe.header.has_characteristic(k):
 68 |                 sys.stdout.write('{} '.format(v))
 69 | 
 70 |         if len(exe.exported_functions):
 71 |             # Print the first three exported functions for quick clustering.
 72 |             print('\n- Exports: #{}'.format(len(exe.exported_functions)))
 73 | 
 74 |             for i, exp in enumerate(exe.exported_functions[:3]):
 75 |                 print('           {}'.format(exp))
 76 | 
 77 |             if len(exe.exported_functions) > 3:
 78 |                 print('           ...')
 79 | 
 80 |         if len(exe.signature.certificates):
 81 |             print('\n- Certificates:\n')
 82 |             for cert in exe.signature.certificates:
 83 |                 print(cert)
 84 | 
 85 |     # print(exe.rich_header)
 86 |     print('\n- Configuration:')
 87 |     pretty_print(data)
 88 | 
 89 | 
 90 | def decrypt_overlay(overlay):
 91 |     # Most likely, the first entry is a path somewhere into C:\, so guess 'C'
 92 |     # as the first character and try the resulting key first. Only then test
 93 |     # all other potential keys.
 94 |     k = overlay[0] ^ ord('C')
 95 |     keys = [k, 0x99, 0x9d] + list(range(256))
 96 | 
 97 |     plain = []
 98 |     for k in keys:
 99 |         plain = [o ^ ((k + i) & 0xff) for i, o in enumerate(overlay)]
100 |         candidate = Counter(plain).most_common(1)
101 |         if not candidate:
102 |             continue
103 | 
104 |         # If the zero byte is most common, the decryption most likely
105 |         # succeeded. Configurations are often populated sparsely.
106 |         byte, _count = candidate[0]
107 |         if byte == 0:
108 |             break
109 | 
110 |     return plain
111 | 
112 | 
113 | def fix_header(data, offset):
114 |     # Fix up headers, assuming PE64 for simplicity (we do not want to run this
115 |     # anyway.)
116 |     data[0:2] = '\x4d\x5a'
117 |     data[offset:offset + 4] = '\x50\x45\x00\x00'
118 | 
119 |     data[offset + 4:offset + 6] = '\x4c\x01'
120 |     data[offset + 0x16:offset + 0x18] = '\x02\x00'
121 |     data[offset + 0x18:offset + 0x1a] = '\x0b\x02'
122 |     data[offset + 0x5c:offset + 0x5e] = '\x02\x00'
123 | 
124 |     return data
125 | 
126 | 
127 | def swap(b):
128 |     return (b >> 4) | ((b & 0xf) << 4)
129 | 
130 | 
131 | def decrypt(data, offset):
132 |     data = [swap(d ^ 0x36) for d in data]
133 |     data = bytearray(chr(d) for d in data)
134 | 
135 |     return data
136 | 
137 | 
138 | def check_file(path):
139 |     with open(path, 'rb') as f:
140 |         data = bytearray(f.read())
141 | 
142 |     magic = struct.unpack('<H', data[:2])[0]
143 |     if magic == 0x5a4d:
144 |         return path
145 | 
146 |     offset = struct.unpack('<I', data[0x3c:(0x3c + 4)])[0]
147 | 
148 |     # Assume encryption with key 0x36 (we did not encounter anthing else yet.)
149 |     if magic == 0x3636:
150 |         data = decrypt(data, offset)
151 | 
152 |         offset = struct.unpack('<I', data[0x3c:(0x3c + 4)])[0]
153 |         data = fix_header(data, offset)
154 | 
155 |     elif data[offset:offset + 2] == '\x50\x45':
156 |         data[0:2] = '\x4d\x5a'
157 | 
158 |     elif magic == 0:
159 |         data = fix_header(data, offset)
160 | 
161 |     else:
162 |         return path
163 | 
164 |     with tempfile.NamedTemporaryFile(delete=False) as f:
165 |         f.write(data)
166 |         new_path = f.name
167 | 
168 |     return new_path
169 | 
170 | 
171 | MAGIC = [
172 |     b'\xff\xd8\xff\xe0\x00\x00\x00\x00\x00\x00',
173 |     b'Cooper',
174 | ]
175 | 
176 | 
177 | def detect_inline_config(data, magic):
178 |     m = data.find(magic)
179 |     if m == -1:
180 |         return None
181 | 
182 |     x = m + len(magic)
183 | 
184 |     # Skip any null bytes following our magic number. This works as the rolling
185 |     # is unlikely to contain repetitive bytes right at the beginning.
186 |     while data[x] == '\x00':
187 |         x += 1
188 | 
189 |     # Skipping too many bytes indicates a different scenario.
190 |     if (x - m) > 100:
191 |         return None
192 | 
193 |     # Find the end of the configuration -- ideally, we would get this from the
194 |     # binary itself, but let's not hack some assembly fingerprint together.
195 |     # Same reasoning as above, unlikely to have repetitive bytes in rolling
196 |     # xor.
197 |     y = data.find(b'\x00\x00', x)
198 |     if y == -1:
199 |         return None
200 | 
201 |     # These configs are rather short so let's try not to guess a key based on
202 |     # the number of zeroes. We did not encounter any other key anyways.
203 |     config = data[x:y]
204 |     config = [ord(x) ^ ((0x99 + i) & 0xff) for i, x in enumerate(config)]
205 |     return ''.join(map(chr, config))
206 | 
207 | 
208 | def main():
209 |     if len(sys.argv) < 2:
210 |         print('Usage: parse.py <directory_with_samples>')
211 |         return
212 | 
213 |     # lief.Logger.enable()
214 | 
215 |     for root, _dirs, files in os.walk(sys.argv[1]):
216 |         for path in files:
217 |             path = os.path.join(root, path)
218 | 
219 |             # Fix up the file, if we have to. There are three scenarios:
220 |             # - Its MZ header has been mangled with.
221 |             # - Most of its header has been stripped for manually mapping.
222 |             # - It is "encrypted".
223 |             path = check_file(path)
224 | 
225 |             exe = lief.parse(path)
226 |             with open(path, 'rb') as f:
227 |                 data = f.read()
228 | 
229 |             # The configuration may be stored inline and hinted at by a marker.
230 |             for magic in MAGIC:
231 |                 config = detect_inline_config(data, magic)
232 |                 if config is None:
233 |                     continue
234 | 
235 |                 if len(config) > MAX_CONFIG_SIZE:
236 |                     continue
237 | 
238 |                 handle_file(exe, path, bytearray(config), 'inline')
239 | 
240 |             if not exe:
241 |                 continue
242 | 
243 |             if exe.overlay is None:
244 |                 continue
245 | 
246 |             # Otherwise, look for the configuration in its overlay.
247 |             try:
248 |                 n = 0
249 |                 overlay = exe.overlay
250 | 
251 |                 # We could simply just parse the last dword to read the
252 |                 # configuration size, but some samples are broken in that they
253 |                 # append additional zero bytes to the overlay. This code tries
254 |                 # to detect and skip these.
255 |                 while not n:
256 |                     n = ''.join(chr(o) for o in overlay[-4:])
257 |                     n = struct.unpack('<I', n)[0]
258 |                     if not n:
259 |                         overlay = overlay[:-4]
260 | 
261 |                 if n > MAX_CONFIG_SIZE:
262 |                     continue
263 | 
264 |                 overlay = overlay[-n - 4:]
265 |                 overlay = decrypt_overlay(overlay)
266 | 
267 |                 handle_file(exe, path, overlay, 'overlay')
268 |             except Exception as _:
269 |                 pass
270 | 
271 |     print('\n\n\nConfiguration sizes:\n')
272 |     for k, v in SIZES.most_common():
273 |         print('  - 0x{:04X}: #{}'.format(k, v))
274 | 
275 | 
276 | if __name__ == '__main__':
277 |     main()
278 | 


--------------------------------------------------------------------------------