├── .gitignore
├── ANNOUNCE
├── AUTHORS
├── CHANGES
├── LICENSE
├── MANIFEST.in
├── README.rst
├── bits
    ├── __init__.py
    ├── bits.py
    ├── carver.py
    ├── const.py
    ├── helpers
    │   ├── __init__.py
    │   ├── fields.py
    │   └── tools.py
    ├── sampler.py
    ├── structs.py
    └── writer.py
├── requirements.txt
├── scripts
    └── bits_parser
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | 
21 | # Installer logs
22 | pip-log.txt
23 | 
24 | # Vim
25 | *.swp
26 | *.un~
27 | 
28 | # Atom
29 | .cache
30 | 
31 | # Generated documentation
32 | docs/_build/*
33 | 
34 | # tests
35 | .coverage
36 | .hypothesis
37 | 


--------------------------------------------------------------------------------
/ANNOUNCE:
--------------------------------------------------------------------------------
 1 | =================
 2 | bits_parser 1.0.0
 3 | =================
 4 | 
 5 | What is bits_parser ?
 6 | =====================
 7 | 
 8 | bits_parser is a tool to analyze Background Intelligent Transfer (BITS)
 9 | logfiles for forensic purposes. It also supports disk carving and more.
10 | 
11 | Run bits_parser --help for more info.
12 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | ANSSI - Bureau Investigation Numérique
2 | 


--------------------------------------------------------------------------------
/CHANGES:
--------------------------------------------------------------------------------
 1 | # CHANGELOG
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](http://keepchangelog.com/) and this
 6 | project adheres to [Semantic Versioning](http://semver.org/).
 7 | 
 8 | ## 1.0.0 - 2018-01-22
 9 | Public release.
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018 ANSSI
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include ANNOUNCE AUTHORS CHANGES LICENSE README.rst requirements.txt
2 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | ===========
 2 | bits_parser
 3 | ===========
 4 | 
 5 | 
 6 | Extract BITS jobs from QMGR queue and store them as CSV records.
 7 | 
 8 | This topic has been presented during a talk at the French conference `CORI&IN 2018 <https://www.cecyf.fr/activites/recherche-et-developpement/coriin-2018/>`_
 9 | 
10 | 
11 | Installation
12 | ============
13 | 
14 | If you want to run the latest version of ``bits_parser`` you can install it
15 | from PyPI by running the following command:
16 | 
17 |   .. code:: bash
18 | 
19 |     pip install bits_parser
20 | 
21 | 
22 | To install it from the sources:
23 | 
24 |   .. code:: bash
25 | 
26 |     python setup.py install
27 | 
28 | 
29 | Usage
30 | =====
31 | 
32 | QMGR queues are usually *.dat* files located in the folder
33 | ``%%ALLUSERSPROFILE%%\Microsoft\Network\Downloader`` on a Windows system.
34 | 
35 | Once those files have been located (*e.g.* ``qmgr0.dat`` and ``qmgr1.dat``) you
36 | can run `bits_parser` by issuing the following command:
37 | 
38 |   .. code:: bash
39 | 
40 |     bits_parser qmgr0.dat
41 | 
42 | `bits_parser` also supports full-disk analysis but the process is longer and
43 | the results are dirtier (some data from adjacent data clusters can leak in the
44 | result). This mode is enable with the switch `-i`:
45 | 
46 |   .. code:: bash
47 | 
48 |     bits_parser -i image.bin
49 | 
50 | The disk mode works by looking for expected bit sequences (markers) and
51 | collecting surrounding data. The amount of surrounding data (the radiance) is
52 | settable and defaulted to 2048 kB:
53 | 
54 |   .. code:: bash
55 | 
56 |     bits_parser -i --radiance=4096 image.bin
57 | 
58 | Increasing the radiance could help to retrieve more data but the default value
59 | is normally enough.
60 | 
61 | When the processing is finished, the result is csv-formatted and then displayed
62 | on the standard output. The output can be written to a file with `-o`:
63 | 
64 |   .. code:: bash
65 | 
66 |     bits_parser -o jobs.csv qmgr0.dat
67 | 
68 | Use `--help` to display all options options of ``bits_parser``.
69 | 
70 | 
71 | Related works
72 | =============
73 | 
74 | `Finding your naughty BITS <https://www.dfrws.org/sites/default/files/session-files/pres-finding_your_naughty_bits.pdf>`_ [DFRWS USA 2015, Matthew Geiger]
75 | 
76 | `BITSInject <https://github.com/SafeBreach-Labs/BITSInject>`_ [DEFCON 2017, Dor Azouri]
77 | 


--------------------------------------------------------------------------------
/bits/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 ANSSI. All Rights Reserved.
 2 | #
 3 | # Licensed under the MIT License (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | """bits_parser"""
 6 | 
 7 | import logging
 8 | from bits.bits import Bits
 9 | from bits.writer import write_csv
10 | from bits.sampler import sample_disk
11 | 
12 | 
13 | logger = logging.getLogger(__name__)
14 | 
15 | 
16 | __version__ = '1.0.0'
17 | __all__ = Bits,
18 | 


--------------------------------------------------------------------------------
/bits/bits.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 ANSSI. All Rights Reserved.
  2 | #
  3 | # Licensed under the MIT License (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | """Bits object."""
  6 | import logging
  7 | import construct.core
  8 | 
  9 | from pathlib import Path
 10 | 
 11 | from bits.structs import QUEUE, JOB, FILE
 12 | from bits.const import JOB_DELIMITERS, XFER_DELIMITER
 13 | from bits.carver import carve_queues, carve_jobs, carve_sections
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | class Bits:
 19 |     """
 20 |     An interface to store data and apply different strategies to extract job
 21 |     details from legitimate or (partially) corrupted data.
 22 | 
 23 |     Args:
 24 |         delimiter: force the job delimiter.
 25 |     """
 26 | 
 27 |     def __init__(self, delimiter=None):
 28 | 
 29 |         self._raw_data = bytes()
 30 |         self._bits_data = bytes()
 31 |         self.delimiter = delimiter
 32 | 
 33 |     @classmethod
 34 |     def load_file(cls, fp):
 35 |         """Create a Bits instance and load data from a QMGR file.
 36 | 
 37 |         This method is a simple helper to append the content of a file and
 38 |         automatically call `guess_info()`.
 39 | 
 40 |         Args:
 41 |             fp: file path to a QMGR file.
 42 |         """
 43 |         logger.info('Processing BITS queue %s' % fp)
 44 | 
 45 |         rv = cls()
 46 | 
 47 |         path = Path(fp).resolve()
 48 |         with path.open('rb') as f:
 49 |             data = f.read()
 50 |         try:
 51 |             content = QUEUE.parse(data)
 52 |             rv.append_data(content.jobs, raw=False)
 53 |             rv.append_data(content.remains, raw=True)
 54 |             if content.job_count:
 55 |                 logger.info('%s legitimate job(s) detected' % content.job_count)
 56 | 
 57 |         except construct.core.ConstructError as e:
 58 |             logger.warning('incoherent data, carving mode only.')
 59 |             rv.append_data(data, raw=True)
 60 | 
 61 |         rv.guess_info()
 62 |         return rv
 63 | 
 64 |     def append_data(self, data, raw=True):
 65 |         """Append data to analyze.
 66 | 
 67 |         Args:
 68 |             data: bytes to append.
 69 |             raw: true when appending unparsed raw data.
 70 |         """
 71 |         data = data.strip(b'\x00')  # strip unwanted zeroes
 72 |         logger.debug('%d bytes loaded (raw=%s)' % (len(data), raw))
 73 |         if raw:
 74 |             self._raw_data += data
 75 |         else:
 76 |             self._bits_data += data
 77 | 
 78 |     def guess_info(self):
 79 |         """Try to guess information from available data."""
 80 |         # select as candidate the known delimiter with the most occurences
 81 |         data = self._bits_data + self._raw_data
 82 | 
 83 |         if not self.delimiter:
 84 |             count, candidate = max(
 85 |                 (data.count(bytes.fromhex(d)), bytes.fromhex(d))
 86 |                 for d in JOB_DELIMITERS.values()
 87 |             )
 88 | 
 89 |             self.delimiter = candidate if count else None
 90 | 
 91 |         # log
 92 |         if self.delimiter is not None:
 93 |             logger.info('Job delimiter is %s' % self.delimiter.hex().upper())
 94 |         else:
 95 |             logger.warning('Job delimiter is undefined')
 96 | 
 97 |     def parse(self):
 98 |         """Parse and yield job data in BITS data structures.
 99 | 
100 |         This method is based on expected data structures in a BITS queue and
101 |         works on well-formatted data.
102 | 
103 |         Yields: jobs.
104 |         """
105 |         xfer_delimiter = bytes.fromhex(XFER_DELIMITER)
106 | 
107 |         if self._bits_data and self.delimiter:
108 |             logger.debug('Analysis of %d bytes' % len(self._bits_data))
109 |             chunks = (j for j in self._bits_data.split(self.delimiter) if j)
110 |             for data in chunks:
111 | 
112 |                 try:
113 |                     job = dict(JOB.parse(data))
114 |                 except construct.core.ConstructError as e:
115 |                     logger.debug('%d bytes of unknown data' % len(data))
116 |                     continue
117 | 
118 |                 xfers = (x for x in job.pop('files').split(xfer_delimiter))
119 |                 job['files'] = []
120 | 
121 |                 for f in xfers:
122 |                     try:
123 |                         job['files'].append(FILE.parse(f))
124 |                     except construct.core.ConstructError as e:
125 |                         logger.debug('%d bytes of unknown data' % len(f))
126 | 
127 |                 if job['file_count'] != len(job['files']):
128 |                     err_msg = 'Invalid transfer count: %d found, %d expected.'
129 |                     logger.warning(err_msg % (len(job['files']),
130 |                                               job['file_count']))
131 | 
132 |                 yield job
133 |         else:
134 |             logger.info('No legitimate data found.')
135 | 
136 |     def carve(self, raw=True):
137 |         """Search and yield job data in raw bytes by carving it.
138 | 
139 |         This method uses multiple functions to retrieve fragments of queues,
140 |         jobs or internal sections and consolidate this all together.
141 | 
142 |         Data with no relevant informations (empty or completely erroneous) are
143 |         dropped.
144 | 
145 |         Args:
146 |             raw: carve raw bytes (default: True)
147 | 
148 |         Yields: jobs or partial jobs.
149 |         """
150 |         data = self._raw_data if raw else self._bits_data
151 |         logger.debug('Analysis of %d bytes' % len(data))
152 | 
153 |         for b_queue in carve_queues(data):
154 |             for b_job in carve_jobs(b_queue, self.delimiter):
155 |                     job, lost_bytes = carve_sections(b_job)
156 | 
157 |                     # no job data
158 |                     if not job:
159 |                         continue
160 | 
161 |                     # no value found
162 |                     if not any(job.values()):
163 |                         continue
164 | 
165 |                     # no file information
166 |                     if job.get('file_count', 0) == 1 and \
167 |                        not any(job['files'][0].values()):
168 |                         continue
169 | 
170 |                     job['carved'] = True    # indicate the job was carved
171 |                     yield job
172 | 
173 |     def __iter__(self):
174 | 
175 |         yield from self.parse()
176 |         yield from self.carve()
177 | 


--------------------------------------------------------------------------------
/bits/carver.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 ANSSI. All Rights Reserved.
  2 | #
  3 | # Licensed under the MIT License (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | """Implements a features to carve ill-formatted data."""
  6 | 
  7 | import logging
  8 | import construct.core
  9 | 
 10 | from bits.const import FILE_HEADER, QUEUE_HEADER, XFER_HEADER
 11 | from bits.helpers.fields import PascalUtf16
 12 | from bits.structs import METADATA, \
 13 |                          FILE, FILE_PART_0, \
 14 |                          CONTROL_PART_0, CONTROL_PART_1
 15 | 
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | def carve_queues(data):
 21 |     """Carve binary queue fragments."""
 22 |     delimiter = bytes.fromhex(QUEUE_HEADER)
 23 |     queues = [q for q in data.split(delimiter) if q.strip(b'\x00')]
 24 |     logger.debug('queues: %d non-empty candidates' % len(queues))
 25 |     return queues
 26 | 
 27 | 
 28 | def carve_jobs(data, delimiter):
 29 |     """Carve binary job fragments."""
 30 |     if delimiter is None:
 31 |         jobs = [data]
 32 |     else:
 33 |         jobs = [j for j in data.split(delimiter) if j.strip(b'\x00')]
 34 | 
 35 |     logger.debug('jobs: %d non-empty candidates' % len(jobs))
 36 |     return jobs
 37 | 
 38 | 
 39 | def rcarve_pascal_utf16(data, *fields):
 40 |     """Search for utf16 fields in bytes."""
 41 |     rv = {}
 42 |     remaining_data = None
 43 | 
 44 |     for field in fields:
 45 |         valid_string = None
 46 | 
 47 |         for i in range(len(data) - 4, -1, -2):
 48 |             try:
 49 |                 valid_string = PascalUtf16().parse(data[i:])
 50 |             except construct.core.ConstructError:
 51 |                 pass    # invalid data
 52 |             else:
 53 |                 rv[field] = valid_string
 54 |                 data = data[:i]
 55 |                 remaining_data = data
 56 |                 break
 57 | 
 58 |         if valid_string is None:
 59 |             remaining_data = None
 60 |             # UGLY: extraction tentative of the remaining bytes
 61 |             for j in range(2, len(data), 2):
 62 |                 try:
 63 |                     res = data[-j:].replace(b'\x00', b'').decode()
 64 |                 except UnicodeDecodeError:
 65 |                     break
 66 |                 else:
 67 |                     if res:
 68 |                         rv[field] = res
 69 |             break       # no more data available
 70 | 
 71 |     return rv, remaining_data
 72 | 
 73 | 
 74 | def files_deep_carving(data, pivot_offset):
 75 |     """Carve partial file information from bytes."""
 76 |     carved_files = []
 77 | 
 78 |     # the data is split in two parts on the pivot offset to separate stable
 79 |     # data from truncated data.
 80 |     partial = data[:pivot_offset]
 81 |     remains = data[pivot_offset:]
 82 | 
 83 |     # process the first bytes for relevant data
 84 |     rv, _ = rcarve_pascal_utf16(partial, 'tmp_fn', 'src_fn', 'dest_fn')
 85 |     if rv:
 86 |         carved_files.append(rv)
 87 |     else:
 88 |         return carved_files
 89 | 
 90 |     # update file #0 informations
 91 |     try:
 92 |         rv = FILE_PART_0.parse(remains)
 93 |     except construct.core.ConstructError:
 94 |         return carved_files
 95 |     else:
 96 |         carved_files[0].update(rv)
 97 |         remains = remains[rv.offset:]
 98 | 
 99 |     # insert files #1 and others if any
100 |     while remains:
101 |         try:
102 |             new_file = FILE.parse(remains)
103 |         except construct.core.ConstructError:
104 |             break
105 |         else:
106 |             carved_files.append(dict(new_file))
107 |             remains = remains[new_file.offset:]
108 | 
109 |     return carved_files
110 | 
111 | 
112 | def control_deep_carving(data, pivot_offset):
113 |     """Carve partial file information from bytes."""
114 |     # the data is split in two parts on the pivot offset to separate stable
115 |     # data from truncated data.
116 |     partial = data[:pivot_offset]
117 |     remains = data[pivot_offset:]
118 | 
119 |     rv, sub_data = rcarve_pascal_utf16(partial, 'args', 'cmd', 'desc', 'name')
120 |     if sub_data and len(sub_data) == 32:
121 |         try:
122 |             rv.update(CONTROL_PART_0.parse(sub_data))
123 |         except construct.core.ConstructError:
124 |             pass
125 | 
126 |     try:
127 |         rv.update(CONTROL_PART_1.parse(remains))
128 |     except construct.core.ConstructError as e:
129 |         pass
130 | 
131 |     return rv
132 | 
133 | 
134 | def deep_carving(data):
135 |     """Try to carve bytes for recognizable data."""
136 | 
137 |     rv = {}
138 | 
139 |     if data.startswith(bytes.fromhex(FILE_HEADER)):
140 |         data = data[16:]
141 | 
142 |     # Search for an SID (always starts with S-1- in utf16)
143 |     pattern = b'S\x00-\x001\x00-\x00'
144 |     sid_index = data.find(pattern)
145 | 
146 |     pattern = b'.\x00t\x00m\x00p\x00'
147 |     bittmp_index = data.find(pattern)
148 | 
149 |     if sid_index > -1:
150 |         rv.update(control_deep_carving(data, sid_index - 4))
151 | 
152 |     elif bittmp_index > -1:
153 |         files = files_deep_carving(data, bittmp_index + 10)
154 |         if files:
155 |             rv['file_count'] = len(files)
156 |             rv['files'] = files
157 | 
158 |     return rv
159 | 
160 | 
161 | def carve_sections(data):
162 |     """Carve data has potential section in a job."""
163 |     # A valid job is comprised of 2 to 3 sections:
164 |     #
165 |     # - description and controls
166 |     # - file transfers (optional)
167 |     # - metadata
168 |     #
169 |     # When carving data, most of the time, the first available section is
170 |     # partially overwritten making it difficult to retrieve relevant data.
171 |     # The last available one is always the metadata section.
172 |     delimiter = bytes.fromhex(XFER_HEADER)
173 |     sections = [s for s in data.split(delimiter) if s.strip(b'\x00')]
174 | 
175 |     lost_bytes = 0
176 | 
177 |     rv = {}
178 | 
179 |     for section in reversed(sections):
180 | 
181 |         logger.debug('searching for file transfers ...')
182 |         files = []
183 | 
184 |         file_count = int.from_bytes(section[:4], byteorder='little')
185 | 
186 |         if file_count * 37 < len(section):
187 |             logger.debug('trying to carve %d transfers' % file_count)
188 |             offset = 4
189 |             while file_count > len(files) and section[offset:]:
190 |                 try:
191 |                     recfile = FILE.parse(section[offset:])
192 |                     if any(v for k, v in recfile.items() if k != 'offset'):
193 |                         files.append(recfile)
194 | 
195 |                     # remove invalid transfer_size
196 |                     if recfile['transfer_size'] == 0xFFFFFFFFFFFFFFFF:
197 |                         recfile['transfer_size'] = ''
198 | 
199 |                 except (UnicodeDecodeError, construct.core.ConstructError):
200 |                     offset += 1
201 |                     if offset == 16:   # don't waste time on irrelevant data.
202 |                         break          # 16 is an arbitrary high value
203 |                 else:
204 |                     if files:
205 |                         logger.debug('new transfer found!')
206 |                         offset += recfile.offset  # the offset is now after the
207 |                                                   # newly carved file transfer
208 | 
209 |         if files:
210 |             rv['file_count'] = file_count
211 |             rv['files'] = files
212 |             continue
213 |         else:
214 |             logger.debug('unrecognized transfer section')
215 | 
216 |         try:
217 |             rv.update(METADATA.parse(section))
218 |         except (OverflowError, construct.core.ConstructError):
219 |             logger.debug('unrecognized metadata section')
220 |         else:
221 |             continue
222 | 
223 |         logger.debug('trying to deep carve %d bytes' % (len(section)))
224 |         remains = deep_carving(section)
225 |         if remains:
226 |             rv.update(remains)
227 | 
228 |         else:
229 |             lost_bytes += len(section)
230 | 
231 |     if lost_bytes:
232 |         logger.debug('%d bytes of unknown data' % lost_bytes)
233 | 
234 |     return rv, lost_bytes
235 | 


--------------------------------------------------------------------------------
/bits/const.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 ANSSI. All Rights Reserved.
 2 | #
 3 | # Licensed under the MIT License (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | """Known constants."""
 6 | 
 7 | FILE_HEADER =    '13F72BC84099124A9F1A3AAEBD894EEA'
 8 | QUEUE_HEADER =   '47445F00A9BDBA449851C47BB6C07ACE'
 9 | XFER_HEADER =    '36DA56776F515A43ACAC44A248FFF34D'
10 | XFER_DELIMITER = '03000000'
11 | 
12 | WINVER = {
13 |     0: 'NT 5.1',    # Windows 2003 / Windows XP
14 |     1: 'NT 5.2',    # Windows 2003 R2 / Windows XP 64
15 |     2: 'NT 6.0',    # Windows Vista / Windows 2008
16 |     3: 'NT 6.1',    # Windows 7 / Windows 2008 R2
17 |     4: 'NT 6.2',    # Windows 8 / Windows 2012
18 |     5: 'NT 6.3',    # Windows 8.1 / Windows 2012 R2
19 | }
20 | 
21 | 
22 | # each version of BITS has its own job delimiter.
23 | JOB_DELIMITERS = {
24 |     1: '93362035A00C104A84F3B17E7B499CD7',
25 |     2: '101370C83653B34183E581557F361B87',
26 |     3: '8C93EA64030F6840B46FF97FE51D4DCD',
27 |     4: 'B346ED3D3B10F944BC2FE8378BD31986',
28 |     5: '74E70C81D2BBCC489E47862E8D58F3C6',
29 | }
30 | 


--------------------------------------------------------------------------------
/bits/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ANSSI-FR/bits_parser/bd3c79b0ccc9191ecc8209e9f0b836a2b16e0357/bits/helpers/__init__.py


--------------------------------------------------------------------------------
/bits/helpers/fields.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 ANSSI. All Rights Reserved.
 2 | #
 3 | # Licensed under the MIT License (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | """Some helpers."""
 6 | 
 7 | from uuid import UUID as _UUID
 8 | from datetime import datetime, timedelta
 9 | from construct import Adapter, Sequence, RepeatUntil, Byte, Bytes, Computed, \
10 |                       Int32ul, Seek, this, Container
11 | 
12 | 
13 | class _StripDelimiter(Adapter):
14 | 
15 |     def _decode(self, obj, context, path):
16 |         return bytes(obj[1])
17 | 
18 | 
19 | class _Utf16(Adapter):
20 | 
21 |     def _decode(self, obj, context, path):
22 |         try:
23 |             return obj[1].decode('utf16').strip('\x00')
24 |         except UnicodeDecodeError:
25 |             # TODO: improve that
26 |             return 'unreadable data'
27 | 
28 | class DateTime(Adapter):
29 | 
30 |     def _decode(self, obj, context, path):
31 |         return datetime.fromtimestamp(obj)
32 | 
33 | 
34 | class UUID(Adapter):
35 | 
36 |     def _decode(self, obj, context, path):
37 |         return str(_UUID(bytes_le=obj))
38 | 
39 | 
40 | class FileTime(Adapter):
41 | 
42 |     def _decode(self, obj, context, path):
43 |         return datetime(1601, 1, 1) + timedelta(microseconds=(obj / 10))
44 | 
45 | def DelimitedField(stop):
46 | 
47 |     return _StripDelimiter(Sequence(
48 |         'with_delimiter' / RepeatUntil(
49 |             lambda x, lst, ctx: lst[-len(stop):] == [int(c) for c in stop],
50 |             Byte
51 |         ),
52 |         'stripped' / Computed(this['with_delimiter'][:-len(stop)]),
53 |         Seek(-len(stop), whence=1)
54 |     ))
55 | 
56 | 
57 | def PascalUtf16(size_type=Int32ul):
58 |     """Parse a length-defined string in UTF-16."""
59 | 
60 |     return _Utf16(Sequence(
61 |         'size_type' / size_type,
62 |         Bytes(this['size_type'] * 2),
63 |     ))
64 | 
65 | 
66 | class FlattenStruct(Adapter):
67 |     
68 |     def _decode(self, obj, context, path):
69 |         result = Container()
70 |         for key, value in obj.items():
71 |             if type(value) is Container:
72 |                 result.update(value)
73 |             else:
74 |                 result[key] = value
75 |         
76 |         return result
77 | 


--------------------------------------------------------------------------------
/bits/helpers/tools.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 ANSSI. All Rights Reserved.
 2 | #
 3 | # Licensed under the MIT License (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | """Some helpers."""
 6 | 
 7 | 
 8 | def tcid(obj, key, default=None):
 9 |     """Search a dict by its value."""
10 |     d = {v: k for k, v in obj.items()}
11 |     return d.get(key, default)
12 | 
13 | 
14 | def btcid(obj, key, default=None):
15 |     """Search a binary value in a dict."""
16 |     if hasattr(key, 'hex'):
17 |         key = key.hex().upper()
18 |     return tcid(obj, key, default)
19 | 


--------------------------------------------------------------------------------
/bits/sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 ANSSI. All Rights Reserved.
 2 | #
 3 | # Licensed under the MIT License (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | """Disk analysis features."""
 6 | import logging
 7 | 
 8 | 
 9 | from pathlib import Path
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | def _radiance_read(f, start_offset, pattern, radiance):
15 | 
16 |     # Radiance algorithm :
17 |     #
18 |     #      @0             @1             @2
19 |     #  <--------[pattern]----[pattern]-------->
20 |     #
21 |     # @0: predecessing bytes not containing the pattern.
22 |     # @1: intermediate data not containing the pattern with a size
23 |     #    inferior at the size of the radiance.
24 |     # @2: following bytes not containing the pattern.
25 |     #
26 |     # size(@0) == size(@2) == size(radiance)
27 |     # size(@1) < size(radiance)
28 | 
29 |     # get predecessing bytes
30 |     f.seek(start_offset)
31 |     rv = f.read((radiance * 1024) + len(pattern))   # read @0 + 1st pattern
32 | 
33 |     while True:
34 |         rv_tmp = f.read(radiance * 1024)
35 | 
36 |         if len(rv_tmp) < radiance * 1024:               # end of the file
37 |             return rv + rv_tmp
38 | 
39 |         local_offset = rv_tmp.rfind(pattern)
40 |         if local_offset >= 0:                           # intermediate pattern
41 |             rv += rv_tmp[:local_offset + len(pattern)]
42 |             f.seek(f.tell() - (radiance * 1024) + local_offset + len(pattern))
43 |         else:
44 |             return rv + rv_tmp                          # pattern not found
45 | 
46 | 
47 | def sample_disk(img_fp, pattern, radiance=4096):
48 |     """Extract interesting disk image samples containing a specific pattern.
49 | 
50 |     img_fp: disk image file path.
51 |     pattern: bytes or hex-string of the specific pattern.
52 |     radiance: size in kB of collected data not containing the pattern
53 |         surrounding the matched pattern.
54 | 
55 |     Yields: disk samples (bytes)
56 |     """
57 | 
58 |     img_fp = Path(img_fp).resolve()
59 | 
60 |     logger.info('disk analysis of %s', img_fp)
61 |     logger.info('search for pattern 0x%s R:%d', pattern, radiance)
62 | 
63 |     # ensure pattern is bytes
64 |     if isinstance(pattern, str):
65 |         pattern = bytes.fromhex(pattern)
66 | 
67 |     buf = [bytearray(512), bytearray(512)]  # dual buffer
68 | 
69 |     with img_fp.open('rb') as f:
70 |         while f.readinto(buf[1]):
71 |             data = b''.join(buf)
72 | 
73 |             local_offset = data.find(pattern, 511-len(pattern))
74 |             if local_offset >= 0:
75 | 
76 |                 # absolute offset of the pattern in the file.
77 |                 abs_offset = f.tell() - 1024 + local_offset
78 | 
79 |                 # radiance start offset
80 |                 start_offset = max(0, abs_offset - (radiance * 1024))
81 |                 yield _radiance_read(f, start_offset, pattern, radiance)
82 | 
83 |             buf.reverse()   # permute the list
84 | 
85 |     logger.info('disk analysis complete')
86 | 


--------------------------------------------------------------------------------
/bits/structs.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 ANSSI. All Rights Reserved.
  2 | #
  3 | # Licensed under the MIT License (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | """Data structures.
  6 | 
  7 | Multiple data structures are available. Those structures are defined to
  8 | facilitate parsing and carving but returns an object or list of objects
  9 | containing all the following fields:
 10 | 
 11 | header
 12 | job_count
 13 | jobs ->
 14 |     type                    Job type (enumeration).
 15 |     priority                Job priority (enumeration).
 16 |     state                   State of the job (enumeration).
 17 |     job_id                  UUID of the job.
 18 |     name                    Name of the job.
 19 |     desc                    Description string of the job.
 20 |     cmd                     Command executed when the job is done.
 21 |     args                    Arguments of the command.
 22 |     sid                     Owner of the job.
 23 |     flags
 24 |     access_token
 25 |     file_count              Count of transferred files of the job.
 26 |     files ->
 27 |         dest_fn             Destination file path of a file.
 28 |         src_fn              Source URL.
 29 |         tmp_fn              Temporary file path of a file.
 30 |         download_size       The count of donwloaded bytes.
 31 |         transfer_size
 32 |         drive               Destination drive.
 33 |         vol_guid            Volume GUID of the drive.
 34 |     error_count
 35 |     errors ->
 36 |         code
 37 |         stat1
 38 |         stat2
 39 |         stat3
 40 |         stat4
 41 |     transient_error_count
 42 |     retry_delay
 43 |     timeout
 44 |     ctime
 45 |     mtime
 46 |     other_time0
 47 |     other_time1
 48 |     other_time2
 49 | 
 50 | 
 51 | """
 52 | 
 53 | # available fields
 54 | 
 55 | 
 56 | 
 57 | from bits.const import FILE_HEADER, QUEUE_HEADER, XFER_HEADER
 58 | 
 59 | from bits.helpers.fields import DelimitedField, PascalUtf16, FileTime, UUID, FlattenStruct
 60 | from construct import Struct, Array, Enum, Const, GreedyBytes, Int64ul, \
 61 |     Int32ul, Bytes, Byte, Pass, Padding, Tell, Seek, this
 62 | 
 63 | 
 64 | QUEUE = Struct(
 65 |     'header'        / DelimitedField(bytes.fromhex(FILE_HEADER)),
 66 |     Const(bytes.fromhex(FILE_HEADER)),
 67 |     Const(bytes.fromhex(QUEUE_HEADER)),
 68 |     'job_count'     / Int32ul,
 69 |     'jobs'          / DelimitedField(bytes.fromhex(QUEUE_HEADER)),
 70 |     Const(bytes.fromhex(QUEUE_HEADER)),
 71 |     'unknown'       / DelimitedField(bytes.fromhex(FILE_HEADER)),
 72 |     Const(bytes.fromhex(FILE_HEADER)),
 73 |     'remains'       / GreedyBytes,
 74 | )
 75 | 
 76 | 
 77 | # CONTROL : job control informations
 78 | CONTROL_PART_0 = Struct(
 79 |     'type'          / Enum(Int32ul, default=Pass,
 80 |         download=0,
 81 |         upload=1,
 82 |         upload_reply=2),
 83 |     'priority'      / Enum(Int32ul, default=Pass,
 84 |         foreground=0,
 85 |         high=1,
 86 |         normal=2,
 87 |         low=3),
 88 |     'state'         / Enum(Int32ul, default=Pass,
 89 |         queued=0,
 90 |         connecting=1,
 91 |         transferring=2,
 92 |         suspended=3,
 93 |         error=4,
 94 |         transient_error=5,
 95 |         transferred=6,
 96 |         acknowleged=7,
 97 |         cancelled=8),
 98 |     Int32ul,
 99 |     'job_id'        / UUID(Bytes(16)),
100 | )
101 | 
102 | 
103 | CONTROL_PART_1 = Struct(
104 |     'sid'           / PascalUtf16(Int32ul),
105 |     'flags'         / Enum(Int32ul, default=Pass,
106 |         BG_NOTIFY_JOB_TRANSFERRED=1,
107 |         BG_NOTIFY_JOB_ERROR=2,
108 |         BG_NOTIFY_JOB_TRANSFERRED_BG_NOTIFY_JOB_ERROR=3,
109 |         BG_NOTIFY_DISABLE=4,
110 |         BG_NOTIFY_JOB_TRANSFERRED_BG_NOTIFY_DISABLE=5,
111 |         BG_NOTIFY_JOB_ERROR_BG_NOTIFY_DISABLE=6,
112 |         BG_NOTIFY_JOB_TRANSFERRED_BG_NOTIFY_JOB_ERROR_BG_NOTIFY_DISABLE=7,
113 |         BG_NOTIFY_JOB_MODIFICATION=8,
114 |         BG_NOTIFY_FILE_TRANSFERRED=16),
115 | )
116 | 
117 | 
118 | CONTROL = FlattenStruct(Struct(
119 |     'control_part_0' / CONTROL_PART_0,
120 |     'name'          / PascalUtf16(Int32ul),
121 |     'desc'          / PascalUtf16(Int32ul),
122 |     'cmd'           / PascalUtf16(Int32ul),
123 |     'args'          / PascalUtf16(Int32ul),
124 |     'control_part_1' / CONTROL_PART_1,
125 |     'access_token'  / DelimitedField(bytes.fromhex(XFER_HEADER)),
126 | ))
127 | 
128 | 
129 | # XFER : file transfer informations
130 | 
131 | FILE_PART_0 = Struct(
132 |     'download_size' / Int64ul,
133 |     'transfer_size' / Int64ul,
134 |     Byte,
135 |     'drive'         / PascalUtf16(Int32ul),
136 |     'vol_guid'      / PascalUtf16(Int32ul),
137 |     'offset'        / Tell,                     # required by carving
138 | )
139 | 
140 | 
141 | FILE = FlattenStruct(Struct(
142 |     DelimitedField(b':'),
143 |     Seek(-6, whence=1),
144 |     'dest_fn'       / PascalUtf16(Int32ul),
145 |     'src_fn'        / PascalUtf16(Int32ul),
146 |     'tmp_fn'        / PascalUtf16(Int32ul),     # always ends with .tmp
147 |     'file_part_0' / FILE_PART_0,
148 | ))
149 | 
150 | 
151 | ERROR = Struct(
152 |      'code'         / Int64ul,
153 |      'stat1'        / Int32ul,
154 |      'stat2'        / Int32ul,
155 |      'stat3'        / Int32ul,
156 |      'stat4'        / Int32ul,
157 |      Byte
158 | )
159 | 
160 | 
161 | METADATA = Struct(
162 |     'error_count'     / Int32ul,
163 |     'errors'        / Array(this.error_count, ERROR),
164 |     'transient_error_count' / Int32ul,
165 |     'retry_delay'   / Int32ul,
166 |     'timeout'       / Int32ul,
167 |     'ctime'         / FileTime(Int64ul),
168 |     'mtime'         / FileTime(Int64ul),
169 |     'other_time0'   / FileTime(Int64ul),
170 |     Padding(14),
171 |     'other_time1'   / FileTime(Int64ul),
172 |     'other_time2'   / FileTime(Int64ul),
173 | )
174 | 
175 | 
176 | JOB = FlattenStruct(Struct(
177 |     'control' / CONTROL,
178 |     Const(bytes.fromhex(XFER_HEADER)),
179 |     'file_count'    / Int32ul,
180 |     'files'         / DelimitedField(bytes.fromhex(XFER_HEADER)),
181 |     Const(bytes.fromhex(XFER_HEADER)),
182 |     'metadata' / METADATA,
183 | ))
184 | 


--------------------------------------------------------------------------------
/bits/writer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 ANSSI. All Rights Reserved.
 2 | #
 3 | # Licensed under the MIT License (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | """CSV writer."""
 6 | import csv
 7 | 
 8 | 
 9 | DEFAULT_VALUES = (
10 |     ('job_id', None),
11 |     ('name', None),
12 |     ('desc', None),
13 |     ('type', None),
14 |     ('priority', None),
15 |     ('sid', None),
16 |     ('state', None),
17 |     ('cmd', None),
18 |     ('args', None),
19 |     ('file_count', 0),
20 |     ('file_id', 0),
21 |     ('dest_fn', None),
22 |     ('src_fn', None),
23 |     ('tmp_fn', None),
24 |     ('download_size', -1),
25 |     ('transfer_size', -1),
26 |     ('drive', None),
27 |     ('vol_guid', None),
28 |     ('ctime', None),
29 |     ('mtime', None),
30 |     ('other_time0', None),
31 |     ('other_time1', None),
32 |     ('other_time2', None),
33 |     ('carved', False)
34 | )
35 | 
36 | 
37 | def flattener(job):
38 | 
39 |     def _f(index, file):
40 |         rv = {k: file.get(k, job.get(k, v))  for k, v in DEFAULT_VALUES}
41 |         rv['file_id'] = index
42 |         return rv
43 | 
44 |     files = job.get('files', [])
45 | 
46 |     if files:
47 |         return [_f(index, f) for index, f in enumerate(files)]
48 | 
49 |     return [_f(0, {})]
50 | 
51 | 
52 | def write_csv(filename, records):
53 |     """Write records to a CSV file."""
54 | 
55 |     with filename.open('w') as csvfile:
56 |         writer = csv.DictWriter(csvfile,
57 |                                 fieldnames=[k for k, _ in DEFAULT_VALUES])
58 |         writer.writeheader()
59 |         for r in records:
60 |             for sub_r in flattener(r):
61 |                 writer.writerow(sub_r)
62 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | construct==2.10.70
2 | docopt==0.6.2
3 | 


--------------------------------------------------------------------------------
/scripts/bits_parser:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Extract BITS jobs from QMGR queue or disk image to CSV file.
  4 | 
  5 | Usage:
  6 |   bits_parser [options] [-o OUTPUT] FILE
  7 | 
  8 | Options:
  9 |   --no-carving                        Disable carving.
 10 | 
 11 |   --disk-image, -i                    Data input is a disk image.
 12 |   --radiance=VALUE                    Radiance in kB. [default: 2048]
 13 |   --skip-sampling                     Skip sampling and load file in memory.
 14 |   --checkpoint=PATH                   Store disk checkpoint file.
 15 | 
 16 |   --out=OUTPUT, -o OUTPUT             Write result to OUTPUT [default: stdout]
 17 |   --verbose, -v                       More verbosity.
 18 |   --debug                             Display debug messages.
 19 | 
 20 |   --help, -h                          Show this screen.
 21 |   --version                           Show version.
 22 | """
 23 | 
 24 | from docopt import docopt
 25 | from pathlib import Path
 26 | 
 27 | import bits
 28 | import logging
 29 | import logging.config
 30 | 
 31 | from bits.const import XFER_HEADER
 32 | 
 33 | # default logger configuration
 34 | logging.config.dictConfig({
 35 |     'version': 1,
 36 |     'disable_existing_loggers': False,
 37 |     'formatters': {
 38 |         'default': {
 39 |             'format': ('%(asctime)s.%(msecs)03d '
 40 |                        '[%(levelname)s] %(name)s: %(message)s'),
 41 |             'datefmt': '%Y-%m-%dT%H:%M:%S'
 42 |         },
 43 |     },
 44 |     'handlers': {
 45 |         'default': {
 46 |             'class': 'logging.StreamHandler',
 47 |             'formatter': 'default',
 48 |         },
 49 |     },
 50 |     'loggers': {
 51 |         '': {
 52 |             'handlers': ['default'],
 53 |             'level': 'WARNING',
 54 |             'propagate': True,
 55 |         },
 56 |     },
 57 | })
 58 | 
 59 | 
 60 | if __name__ == '__main__':
 61 | 
 62 |     args = docopt(__doc__, version=bits.__version__)
 63 | 
 64 |     if args['--verbose']:
 65 |         logging.getLogger().setLevel(logging.INFO)
 66 | 
 67 |     if args['--debug']:
 68 |         logging.getLogger().setLevel(logging.DEBUG)
 69 | 
 70 |     file_in = Path(args['FILE'])
 71 |     file_out = Path(
 72 |         '/dev/stdout' if args['--out'] == 'stdout' else args['--out']
 73 |     )
 74 | 
 75 |     if args['--disk-image'] and not args['--skip-sampling']:
 76 |         # load interesting fragments as raw data
 77 |         analyzer = bits.Bits()
 78 |         radiance = int(args['--radiance'])
 79 | 
 80 |         checkpoint = None
 81 |         checkpoint_fp = args['--checkpoint']
 82 |         if checkpoint_fp is not None:
 83 |             checkpoint_fp = Path(checkpoint_fp)
 84 |             checkpoint = checkpoint_fp.open('wb')
 85 | 
 86 |         for sample in bits.sample_disk(file_in, XFER_HEADER, radiance):
 87 |             analyzer.append_data(sample)
 88 |             if checkpoint:
 89 |                 checkpoint.write(sample)
 90 | 
 91 |         if checkpoint:
 92 |             checkpoint.close()
 93 | 
 94 |         analyzer.guess_info()
 95 |     elif args['--disk-image']:
 96 |         analyzer = bits.Bits()
 97 |         with file_in.open('rb') as f:
 98 |             analyzer.append_data(f.read())
 99 |         analyzer.guess_info()
100 | 
101 |     else:
102 |         analyzer = bits.Bits.load_file(file_in)
103 | 
104 |     jobs = analyzer.parse() if args['--no-carving'] else analyzer
105 |     bits.write_csv(file_out, jobs)
106 | 
107 |     exit()
108 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """bits_parser"""
 3 | import sys
 4 | assert sys.version_info.major == 3, 'Python 3 required'
 5 | 
 6 | import re
 7 | from pathlib import Path
 8 | from setuptools import setup, find_packages
 9 | 
10 | 
11 | # read the version number from package
12 | with (Path(__file__).resolve().parent / 'bits' / '__init__.py').open() as f:
13 |     v, = re.search(".*__version__ = '(.*)'.*", f.read(), re.MULTILINE).groups()
14 | 
15 | 
16 | setup(
17 | 
18 |     name='bits_parser',
19 |     version=v,
20 | 
21 |     author='ANSSI-INM',
22 |     author_email='',
23 | 
24 |     url='',
25 |     description=__doc__,
26 |     long_description=open('README.rst').read(),
27 | 
28 |     install_requires=open('requirements.txt').read().splitlines(),
29 | 
30 |     packages=find_packages(),
31 |     include_package_data=True,
32 |     zip_safe=False,
33 |     scripts=[
34 |         'scripts/bits_parser',
35 |     ],
36 |     license='MIT',
37 |     platforms='any',
38 |     classifiers=[
39 |         'Development Status :: 4 - Beta',
40 |         'Programming Language :: Python',
41 |         'Programming Language :: Python :: 3',
42 |         'Programming Language :: Python :: 3.5',
43 |         'Programming Language :: Python :: 3.6',
44 |     ]
45 | 
46 | )
47 | 


--------------------------------------------------------------------------------