├── .gitignore
├── ANNOUNCE
├── AUTHORS
├── CHANGES
├── LICENSE
├── MANIFEST.in
├── README.rst
├── bits
├── __init__.py
├── bits.py
├── carver.py
├── const.py
├── helpers
│ ├── __init__.py
│ ├── fields.py
│ └── tools.py
├── sampler.py
├── structs.py
└── writer.py
├── requirements.txt
├── scripts
└── bits_parser
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[cod]
2 |
3 | # C extensions
4 | *.so
5 |
6 | # Packages
7 | *.egg
8 | *.egg-info
9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 |
21 | # Installer logs
22 | pip-log.txt
23 |
24 | # Vim
25 | *.swp
26 | *.un~
27 |
28 | # Atom
29 | .cache
30 |
31 | # Generated documentation
32 | docs/_build/*
33 |
34 | # tests
35 | .coverage
36 | .hypothesis
37 |
--------------------------------------------------------------------------------
/ANNOUNCE:
--------------------------------------------------------------------------------
1 | =================
2 | bits_parser 1.0.0
3 | =================
4 |
5 | What is bits_parser ?
6 | =====================
7 |
8 | bits_parser is a tool to analyze Background Intelligent Transfer (BITS)
9 | logfiles for forensic purposes. It also supports disk carving and more.
10 |
11 | Run bits_parser --help for more info.
12 |
--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | ANSSI - Bureau Investigation Numérique
2 |
--------------------------------------------------------------------------------
/CHANGES:
--------------------------------------------------------------------------------
1 | # CHANGELOG
2 |
3 | All notable changes to this project will be documented in this file.
4 |
5 | The format is based on [Keep a Changelog](http://keepchangelog.com/) and this
6 | project adheres to [Semantic Versioning](http://semver.org/).
7 |
8 | ## 1.0.0 - 2018-01-22
9 | Public release.
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2018 ANSSI
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include ANNOUNCE AUTHORS CHANGES LICENSE README.rst requirements.txt
2 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | ===========
2 | bits_parser
3 | ===========
4 |
5 |
6 | Extract BITS jobs from QMGR queue and store them as CSV records.
7 |
8 | This topic has been presented during a talk at the French conference `CORI&IN 2018 `_
9 |
10 |
11 | Installation
12 | ============
13 |
14 | If you want to run the latest version of ``bits_parser`` you can install it
15 | from PyPI by running the following command:
16 |
17 | .. code:: bash
18 |
19 | pip install bits_parser
20 |
21 |
22 | To install it from the sources:
23 |
24 | .. code:: bash
25 |
26 | python setup.py install
27 |
28 |
29 | Usage
30 | =====
31 |
32 | QMGR queues are usually *.dat* files located in the folder
33 | ``%%ALLUSERSPROFILE%%\Microsoft\Network\Downloader`` on a Windows system.
34 |
35 | Once those files have been located (*e.g.* ``qmgr0.dat`` and ``qmgr1.dat``) you
36 | can run `bits_parser` by issuing the following command:
37 |
38 | .. code:: bash
39 |
40 | bits_parser qmgr0.dat
41 |
42 | `bits_parser` also supports full-disk analysis but the process is longer and
43 | the results are dirtier (some data from adjacent data clusters can leak in the
44 | result). This mode is enable with the switch `-i`:
45 |
46 | .. code:: bash
47 |
48 | bits_parser -i image.bin
49 |
50 | The disk mode works by looking for expected bit sequences (markers) and
51 | collecting surrounding data. The amount of surrounding data (the radiance) is
52 | settable and defaulted to 2048 kB:
53 |
54 | .. code:: bash
55 |
56 | bits_parser -i --radiance=4096 image.bin
57 |
58 | Increasing the radiance could help to retrieve more data but the default value
59 | is normally enough.
60 |
61 | When the processing is finished, the result is csv-formatted and then displayed
62 | on the standard output. The output can be written to a file with `-o`:
63 |
64 | .. code:: bash
65 |
66 | bits_parser -o jobs.csv qmgr0.dat
67 |
68 | Use `--help` to display all options options of ``bits_parser``.
69 |
70 |
71 | Related works
72 | =============
73 |
74 | `Finding your naughty BITS `_ [DFRWS USA 2015, Matthew Geiger]
75 |
76 | `BITSInject `_ [DEFCON 2017, Dor Azouri]
77 |
--------------------------------------------------------------------------------
/bits/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 ANSSI. All Rights Reserved.
2 | #
3 | # Licensed under the MIT License (the "License");
4 | # you may not use this file except in compliance with the License.
5 | """bits_parser"""
6 |
7 | import logging
8 | from bits.bits import Bits
9 | from bits.writer import write_csv
10 | from bits.sampler import sample_disk
11 |
12 |
13 | logger = logging.getLogger(__name__)
14 |
15 |
16 | __version__ = '1.0.0'
17 | __all__ = Bits,
18 |
--------------------------------------------------------------------------------
/bits/bits.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 ANSSI. All Rights Reserved.
2 | #
3 | # Licensed under the MIT License (the "License");
4 | # you may not use this file except in compliance with the License.
5 | """Bits object."""
6 | import logging
7 | import construct.core
8 |
9 | from pathlib import Path
10 |
11 | from bits.structs import QUEUE, JOB, FILE
12 | from bits.const import JOB_DELIMITERS, XFER_DELIMITER
13 | from bits.carver import carve_queues, carve_jobs, carve_sections
14 |
15 | logger = logging.getLogger(__name__)
16 |
17 |
18 | class Bits:
19 | """
20 | An interface to store data and apply different strategies to extract job
21 | details from legitimate or (partially) corrupted data.
22 |
23 | Args:
24 | delimiter: force the job delimiter.
25 | """
26 |
27 | def __init__(self, delimiter=None):
28 |
29 | self._raw_data = bytes()
30 | self._bits_data = bytes()
31 | self.delimiter = delimiter
32 |
33 | @classmethod
34 | def load_file(cls, fp):
35 | """Create a Bits instance and load data from a QMGR file.
36 |
37 | This method is a simple helper to append the content of a file and
38 | automatically call `guess_info()`.
39 |
40 | Args:
41 | fp: file path to a QMGR file.
42 | """
43 | logger.info('Processing BITS queue %s' % fp)
44 |
45 | rv = cls()
46 |
47 | path = Path(fp).resolve()
48 | with path.open('rb') as f:
49 | data = f.read()
50 | try:
51 | content = QUEUE.parse(data)
52 | rv.append_data(content.jobs, raw=False)
53 | rv.append_data(content.remains, raw=True)
54 | if content.job_count:
55 | logger.info('%s legitimate job(s) detected' % content.job_count)
56 |
57 | except construct.core.ConstructError as e:
58 | logger.warning('incoherent data, carving mode only.')
59 | rv.append_data(data, raw=True)
60 |
61 | rv.guess_info()
62 | return rv
63 |
64 | def append_data(self, data, raw=True):
65 | """Append data to analyze.
66 |
67 | Args:
68 | data: bytes to append.
69 | raw: true when appending unparsed raw data.
70 | """
71 | data = data.strip(b'\x00') # strip unwanted zeroes
72 | logger.debug('%d bytes loaded (raw=%s)' % (len(data), raw))
73 | if raw:
74 | self._raw_data += data
75 | else:
76 | self._bits_data += data
77 |
78 | def guess_info(self):
79 | """Try to guess information from available data."""
80 | # select as candidate the known delimiter with the most occurences
81 | data = self._bits_data + self._raw_data
82 |
83 | if not self.delimiter:
84 | count, candidate = max(
85 | (data.count(bytes.fromhex(d)), bytes.fromhex(d))
86 | for d in JOB_DELIMITERS.values()
87 | )
88 |
89 | self.delimiter = candidate if count else None
90 |
91 | # log
92 | if self.delimiter is not None:
93 | logger.info('Job delimiter is %s' % self.delimiter.hex().upper())
94 | else:
95 | logger.warning('Job delimiter is undefined')
96 |
97 | def parse(self):
98 | """Parse and yield job data in BITS data structures.
99 |
100 | This method is based on expected data structures in a BITS queue and
101 | works on well-formatted data.
102 |
103 | Yields: jobs.
104 | """
105 | xfer_delimiter = bytes.fromhex(XFER_DELIMITER)
106 |
107 | if self._bits_data and self.delimiter:
108 | logger.debug('Analysis of %d bytes' % len(self._bits_data))
109 | chunks = (j for j in self._bits_data.split(self.delimiter) if j)
110 | for data in chunks:
111 |
112 | try:
113 | job = dict(JOB.parse(data))
114 | except construct.core.ConstructError as e:
115 | logger.debug('%d bytes of unknown data' % len(data))
116 | continue
117 |
118 | xfers = (x for x in job.pop('files').split(xfer_delimiter))
119 | job['files'] = []
120 |
121 | for f in xfers:
122 | try:
123 | job['files'].append(FILE.parse(f))
124 | except construct.core.ConstructError as e:
125 | logger.debug('%d bytes of unknown data' % len(f))
126 |
127 | if job['file_count'] != len(job['files']):
128 | err_msg = 'Invalid transfer count: %d found, %d expected.'
129 | logger.warning(err_msg % (len(job['files']),
130 | job['file_count']))
131 |
132 | yield job
133 | else:
134 | logger.info('No legitimate data found.')
135 |
136 | def carve(self, raw=True):
137 | """Search and yield job data in raw bytes by carving it.
138 |
139 | This method uses multiple functions to retrieve fragments of queues,
140 | jobs or internal sections and consolidate this all together.
141 |
142 | Data with no relevant informations (empty or completely erroneous) are
143 | dropped.
144 |
145 | Args:
146 | raw: carve raw bytes (default: True)
147 |
148 | Yields: jobs or partial jobs.
149 | """
150 | data = self._raw_data if raw else self._bits_data
151 | logger.debug('Analysis of %d bytes' % len(data))
152 |
153 | for b_queue in carve_queues(data):
154 | for b_job in carve_jobs(b_queue, self.delimiter):
155 | job, lost_bytes = carve_sections(b_job)
156 |
157 | # no job data
158 | if not job:
159 | continue
160 |
161 | # no value found
162 | if not any(job.values()):
163 | continue
164 |
165 | # no file information
166 | if job.get('file_count', 0) == 1 and \
167 | not any(job['files'][0].values()):
168 | continue
169 |
170 | job['carved'] = True # indicate the job was carved
171 | yield job
172 |
173 | def __iter__(self):
174 |
175 | yield from self.parse()
176 | yield from self.carve()
177 |
--------------------------------------------------------------------------------
/bits/carver.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 ANSSI. All Rights Reserved.
2 | #
3 | # Licensed under the MIT License (the "License");
4 | # you may not use this file except in compliance with the License.
5 | """Implements a features to carve ill-formatted data."""
6 |
7 | import logging
8 | import construct.core
9 |
10 | from bits.const import FILE_HEADER, QUEUE_HEADER, XFER_HEADER
11 | from bits.helpers.fields import PascalUtf16
12 | from bits.structs import METADATA, \
13 | FILE, FILE_PART_0, \
14 | CONTROL_PART_0, CONTROL_PART_1
15 |
16 |
17 | logger = logging.getLogger(__name__)
18 |
19 |
20 | def carve_queues(data):
21 | """Carve binary queue fragments."""
22 | delimiter = bytes.fromhex(QUEUE_HEADER)
23 | queues = [q for q in data.split(delimiter) if q.strip(b'\x00')]
24 | logger.debug('queues: %d non-empty candidates' % len(queues))
25 | return queues
26 |
27 |
28 | def carve_jobs(data, delimiter):
29 | """Carve binary job fragments."""
30 | if delimiter is None:
31 | jobs = [data]
32 | else:
33 | jobs = [j for j in data.split(delimiter) if j.strip(b'\x00')]
34 |
35 | logger.debug('jobs: %d non-empty candidates' % len(jobs))
36 | return jobs
37 |
38 |
39 | def rcarve_pascal_utf16(data, *fields):
40 | """Search for utf16 fields in bytes."""
41 | rv = {}
42 | remaining_data = None
43 |
44 | for field in fields:
45 | valid_string = None
46 |
47 | for i in range(len(data) - 4, -1, -2):
48 | try:
49 | valid_string = PascalUtf16().parse(data[i:])
50 | except construct.core.ConstructError:
51 | pass # invalid data
52 | else:
53 | rv[field] = valid_string
54 | data = data[:i]
55 | remaining_data = data
56 | break
57 |
58 | if valid_string is None:
59 | remaining_data = None
60 | # UGLY: extraction tentative of the remaining bytes
61 | for j in range(2, len(data), 2):
62 | try:
63 | res = data[-j:].replace(b'\x00', b'').decode()
64 | except UnicodeDecodeError:
65 | break
66 | else:
67 | if res:
68 | rv[field] = res
69 | break # no more data available
70 |
71 | return rv, remaining_data
72 |
73 |
74 | def files_deep_carving(data, pivot_offset):
75 | """Carve partial file information from bytes."""
76 | carved_files = []
77 |
78 | # the data is split in two parts on the pivot offset to separate stable
79 | # data from truncated data.
80 | partial = data[:pivot_offset]
81 | remains = data[pivot_offset:]
82 |
83 | # process the first bytes for relevant data
84 | rv, _ = rcarve_pascal_utf16(partial, 'tmp_fn', 'src_fn', 'dest_fn')
85 | if rv:
86 | carved_files.append(rv)
87 | else:
88 | return carved_files
89 |
90 | # update file #0 informations
91 | try:
92 | rv = FILE_PART_0.parse(remains)
93 | except construct.core.ConstructError:
94 | return carved_files
95 | else:
96 | carved_files[0].update(rv)
97 | remains = remains[rv.offset:]
98 |
99 | # insert files #1 and others if any
100 | while remains:
101 | try:
102 | new_file = FILE.parse(remains)
103 | except construct.core.ConstructError:
104 | break
105 | else:
106 | carved_files.append(dict(new_file))
107 | remains = remains[new_file.offset:]
108 |
109 | return carved_files
110 |
111 |
112 | def control_deep_carving(data, pivot_offset):
113 | """Carve partial file information from bytes."""
114 | # the data is split in two parts on the pivot offset to separate stable
115 | # data from truncated data.
116 | partial = data[:pivot_offset]
117 | remains = data[pivot_offset:]
118 |
119 | rv, sub_data = rcarve_pascal_utf16(partial, 'args', 'cmd', 'desc', 'name')
120 | if sub_data and len(sub_data) == 32:
121 | try:
122 | rv.update(CONTROL_PART_0.parse(sub_data))
123 | except construct.core.ConstructError:
124 | pass
125 |
126 | try:
127 | rv.update(CONTROL_PART_1.parse(remains))
128 | except construct.core.ConstructError as e:
129 | pass
130 |
131 | return rv
132 |
133 |
134 | def deep_carving(data):
135 | """Try to carve bytes for recognizable data."""
136 |
137 | rv = {}
138 |
139 | if data.startswith(bytes.fromhex(FILE_HEADER)):
140 | data = data[16:]
141 |
142 | # Search for an SID (always starts with S-1- in utf16)
143 | pattern = b'S\x00-\x001\x00-\x00'
144 | sid_index = data.find(pattern)
145 |
146 | pattern = b'.\x00t\x00m\x00p\x00'
147 | bittmp_index = data.find(pattern)
148 |
149 | if sid_index > -1:
150 | rv.update(control_deep_carving(data, sid_index - 4))
151 |
152 | elif bittmp_index > -1:
153 | files = files_deep_carving(data, bittmp_index + 10)
154 | if files:
155 | rv['file_count'] = len(files)
156 | rv['files'] = files
157 |
158 | return rv
159 |
160 |
161 | def carve_sections(data):
162 | """Carve data has potential section in a job."""
163 | # A valid job is comprised of 2 to 3 sections:
164 | #
165 | # - description and controls
166 | # - file transfers (optional)
167 | # - metadata
168 | #
169 | # When carving data, most of the time, the first available section is
170 | # partially overwritten making it difficult to retrieve relevant data.
171 | # The last available one is always the metadata section.
172 | delimiter = bytes.fromhex(XFER_HEADER)
173 | sections = [s for s in data.split(delimiter) if s.strip(b'\x00')]
174 |
175 | lost_bytes = 0
176 |
177 | rv = {}
178 |
179 | for section in reversed(sections):
180 |
181 | logger.debug('searching for file transfers ...')
182 | files = []
183 |
184 | file_count = int.from_bytes(section[:4], byteorder='little')
185 |
186 | if file_count * 37 < len(section):
187 | logger.debug('trying to carve %d transfers' % file_count)
188 | offset = 4
189 | while file_count > len(files) and section[offset:]:
190 | try:
191 | recfile = FILE.parse(section[offset:])
192 | if any(v for k, v in recfile.items() if k != 'offset'):
193 | files.append(recfile)
194 |
195 | # remove invalid transfer_size
196 | if recfile['transfer_size'] == 0xFFFFFFFFFFFFFFFF:
197 | recfile['transfer_size'] = ''
198 |
199 | except (UnicodeDecodeError, construct.core.ConstructError):
200 | offset += 1
201 | if offset == 16: # don't waste time on irrelevant data.
202 | break # 16 is an arbitrary high value
203 | else:
204 | if files:
205 | logger.debug('new transfer found!')
206 | offset += recfile.offset # the offset is now after the
207 | # newly carved file transfer
208 |
209 | if files:
210 | rv['file_count'] = file_count
211 | rv['files'] = files
212 | continue
213 | else:
214 | logger.debug('unrecognized transfer section')
215 |
216 | try:
217 | rv.update(METADATA.parse(section))
218 | except (OverflowError, construct.core.ConstructError):
219 | logger.debug('unrecognized metadata section')
220 | else:
221 | continue
222 |
223 | logger.debug('trying to deep carve %d bytes' % (len(section)))
224 | remains = deep_carving(section)
225 | if remains:
226 | rv.update(remains)
227 |
228 | else:
229 | lost_bytes += len(section)
230 |
231 | if lost_bytes:
232 | logger.debug('%d bytes of unknown data' % lost_bytes)
233 |
234 | return rv, lost_bytes
235 |
--------------------------------------------------------------------------------
/bits/const.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 ANSSI. All Rights Reserved.
2 | #
3 | # Licensed under the MIT License (the "License");
4 | # you may not use this file except in compliance with the License.
5 | """Known constants."""
6 |
7 | FILE_HEADER = '13F72BC84099124A9F1A3AAEBD894EEA'
8 | QUEUE_HEADER = '47445F00A9BDBA449851C47BB6C07ACE'
9 | XFER_HEADER = '36DA56776F515A43ACAC44A248FFF34D'
10 | XFER_DELIMITER = '03000000'
11 |
12 | WINVER = {
13 | 0: 'NT 5.1', # Windows 2003 / Windows XP
14 | 1: 'NT 5.2', # Windows 2003 R2 / Windows XP 64
15 | 2: 'NT 6.0', # Windows Vista / Windows 2008
16 | 3: 'NT 6.1', # Windows 7 / Windows 2008 R2
17 | 4: 'NT 6.2', # Windows 8 / Windows 2012
18 | 5: 'NT 6.3', # Windows 8.1 / Windows 2012 R2
19 | }
20 |
21 |
22 | # each version of BITS has its own job delimiter.
23 | JOB_DELIMITERS = {
24 | 1: '93362035A00C104A84F3B17E7B499CD7',
25 | 2: '101370C83653B34183E581557F361B87',
26 | 3: '8C93EA64030F6840B46FF97FE51D4DCD',
27 | 4: 'B346ED3D3B10F944BC2FE8378BD31986',
28 | 5: '74E70C81D2BBCC489E47862E8D58F3C6',
29 | }
30 |
--------------------------------------------------------------------------------
/bits/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ANSSI-FR/bits_parser/bd3c79b0ccc9191ecc8209e9f0b836a2b16e0357/bits/helpers/__init__.py
--------------------------------------------------------------------------------
/bits/helpers/fields.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 ANSSI. All Rights Reserved.
2 | #
3 | # Licensed under the MIT License (the "License");
4 | # you may not use this file except in compliance with the License.
5 | """Some helpers."""
6 |
7 | from uuid import UUID as _UUID
8 | from datetime import datetime, timedelta
9 | from construct import Adapter, Sequence, RepeatUntil, Byte, Bytes, Computed, \
10 | Int32ul, Seek, this, Container
11 |
12 |
13 | class _StripDelimiter(Adapter):
14 |
15 | def _decode(self, obj, context, path):
16 | return bytes(obj[1])
17 |
18 |
19 | class _Utf16(Adapter):
20 |
21 | def _decode(self, obj, context, path):
22 | try:
23 | return obj[1].decode('utf16').strip('\x00')
24 | except UnicodeDecodeError:
25 | # TODO: improve that
26 | return 'unreadable data'
27 |
28 | class DateTime(Adapter):
29 |
30 | def _decode(self, obj, context, path):
31 | return datetime.fromtimestamp(obj)
32 |
33 |
34 | class UUID(Adapter):
35 |
36 | def _decode(self, obj, context, path):
37 | return str(_UUID(bytes_le=obj))
38 |
39 |
40 | class FileTime(Adapter):
41 |
42 | def _decode(self, obj, context, path):
43 | return datetime(1601, 1, 1) + timedelta(microseconds=(obj / 10))
44 |
45 | def DelimitedField(stop):
46 |
47 | return _StripDelimiter(Sequence(
48 | 'with_delimiter' / RepeatUntil(
49 | lambda x, lst, ctx: lst[-len(stop):] == [int(c) for c in stop],
50 | Byte
51 | ),
52 | 'stripped' / Computed(this['with_delimiter'][:-len(stop)]),
53 | Seek(-len(stop), whence=1)
54 | ))
55 |
56 |
57 | def PascalUtf16(size_type=Int32ul):
58 | """Parse a length-defined string in UTF-16."""
59 |
60 | return _Utf16(Sequence(
61 | 'size_type' / size_type,
62 | Bytes(this['size_type'] * 2),
63 | ))
64 |
65 |
66 | class FlattenStruct(Adapter):
67 |
68 | def _decode(self, obj, context, path):
69 | result = Container()
70 | for key, value in obj.items():
71 | if type(value) is Container:
72 | result.update(value)
73 | else:
74 | result[key] = value
75 |
76 | return result
77 |
--------------------------------------------------------------------------------
/bits/helpers/tools.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 ANSSI. All Rights Reserved.
2 | #
3 | # Licensed under the MIT License (the "License");
4 | # you may not use this file except in compliance with the License.
5 | """Some helpers."""
6 |
7 |
8 | def tcid(obj, key, default=None):
9 | """Search a dict by its value."""
10 | d = {v: k for k, v in obj.items()}
11 | return d.get(key, default)
12 |
13 |
14 | def btcid(obj, key, default=None):
15 | """Search a binary value in a dict."""
16 | if hasattr(key, 'hex'):
17 | key = key.hex().upper()
18 | return tcid(obj, key, default)
19 |
--------------------------------------------------------------------------------
/bits/sampler.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 ANSSI. All Rights Reserved.
2 | #
3 | # Licensed under the MIT License (the "License");
4 | # you may not use this file except in compliance with the License.
5 | """Disk analysis features."""
6 | import logging
7 |
8 |
9 | from pathlib import Path
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | def _radiance_read(f, start_offset, pattern, radiance):
15 |
16 | # Radiance algorithm :
17 | #
18 | # @0 @1 @2
19 | # <--------[pattern]----[pattern]-------->
20 | #
21 | # @0: predecessing bytes not containing the pattern.
22 | # @1: intermediate data not containing the pattern with a size
23 | # inferior at the size of the radiance.
24 | # @2: following bytes not containing the pattern.
25 | #
26 | # size(@0) == size(@2) == size(radiance)
27 | # size(@1) < size(radiance)
28 |
29 | # get predecessing bytes
30 | f.seek(start_offset)
31 | rv = f.read((radiance * 1024) + len(pattern)) # read @0 + 1st pattern
32 |
33 | while True:
34 | rv_tmp = f.read(radiance * 1024)
35 |
36 | if len(rv_tmp) < radiance * 1024: # end of the file
37 | return rv + rv_tmp
38 |
39 | local_offset = rv_tmp.rfind(pattern)
40 | if local_offset >= 0: # intermediate pattern
41 | rv += rv_tmp[:local_offset + len(pattern)]
42 | f.seek(f.tell() - (radiance * 1024) + local_offset + len(pattern))
43 | else:
44 | return rv + rv_tmp # pattern not found
45 |
46 |
47 | def sample_disk(img_fp, pattern, radiance=4096):
48 | """Extract interesting disk image samples containing a specific pattern.
49 |
50 | img_fp: disk image file path.
51 | pattern: bytes or hex-string of the specific pattern.
52 | radiance: size in kB of collected data not containing the pattern
53 | surrounding the matched pattern.
54 |
55 | Yields: disk samples (bytes)
56 | """
57 |
58 | img_fp = Path(img_fp).resolve()
59 |
60 | logger.info('disk analysis of %s', img_fp)
61 | logger.info('search for pattern 0x%s R:%d', pattern, radiance)
62 |
63 | # ensure pattern is bytes
64 | if isinstance(pattern, str):
65 | pattern = bytes.fromhex(pattern)
66 |
67 | buf = [bytearray(512), bytearray(512)] # dual buffer
68 |
69 | with img_fp.open('rb') as f:
70 | while f.readinto(buf[1]):
71 | data = b''.join(buf)
72 |
73 | local_offset = data.find(pattern, 511-len(pattern))
74 | if local_offset >= 0:
75 |
76 | # absolute offset of the pattern in the file.
77 | abs_offset = f.tell() - 1024 + local_offset
78 |
79 | # radiance start offset
80 | start_offset = max(0, abs_offset - (radiance * 1024))
81 | yield _radiance_read(f, start_offset, pattern, radiance)
82 |
83 | buf.reverse() # permute the list
84 |
85 | logger.info('disk analysis complete')
86 |
--------------------------------------------------------------------------------
/bits/structs.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 ANSSI. All Rights Reserved.
2 | #
3 | # Licensed under the MIT License (the "License");
4 | # you may not use this file except in compliance with the License.
5 | """Data structures.
6 |
7 | Multiple data structures are available. Those structures are defined to
8 | facilitate parsing and carving but returns an object or list of objects
9 | containing all the following fields:
10 |
11 | header
12 | job_count
13 | jobs ->
14 | type Job type (enumeration).
15 | priority Job priority (enumeration).
16 | state State of the job (enumeration).
17 | job_id UUID of the job.
18 | name Name of the job.
19 | desc Description string of the job.
20 | cmd Command executed when the job is done.
21 | args Arguments of the command.
22 | sid Owner of the job.
23 | flags
24 | access_token
25 | file_count Count of transferred files of the job.
26 | files ->
27 | dest_fn Destination file path of a file.
28 | src_fn Source URL.
29 | tmp_fn Temporary file path of a file.
30 | download_size The count of donwloaded bytes.
31 | transfer_size
32 | drive Destination drive.
33 | vol_guid Volume GUID of the drive.
34 | error_count
35 | errors ->
36 | code
37 | stat1
38 | stat2
39 | stat3
40 | stat4
41 | transient_error_count
42 | retry_delay
43 | timeout
44 | ctime
45 | mtime
46 | other_time0
47 | other_time1
48 | other_time2
49 |
50 |
51 | """
52 |
53 | # available fields
54 |
55 |
56 |
57 | from bits.const import FILE_HEADER, QUEUE_HEADER, XFER_HEADER
58 |
59 | from bits.helpers.fields import DelimitedField, PascalUtf16, FileTime, UUID, FlattenStruct
60 | from construct import Struct, Array, Enum, Const, GreedyBytes, Int64ul, \
61 | Int32ul, Bytes, Byte, Pass, Padding, Tell, Seek, this
62 |
63 |
64 | QUEUE = Struct(
65 | 'header' / DelimitedField(bytes.fromhex(FILE_HEADER)),
66 | Const(bytes.fromhex(FILE_HEADER)),
67 | Const(bytes.fromhex(QUEUE_HEADER)),
68 | 'job_count' / Int32ul,
69 | 'jobs' / DelimitedField(bytes.fromhex(QUEUE_HEADER)),
70 | Const(bytes.fromhex(QUEUE_HEADER)),
71 | 'unknown' / DelimitedField(bytes.fromhex(FILE_HEADER)),
72 | Const(bytes.fromhex(FILE_HEADER)),
73 | 'remains' / GreedyBytes,
74 | )
75 |
76 |
77 | # CONTROL : job control informations
78 | CONTROL_PART_0 = Struct(
79 | 'type' / Enum(Int32ul, default=Pass,
80 | download=0,
81 | upload=1,
82 | upload_reply=2),
83 | 'priority' / Enum(Int32ul, default=Pass,
84 | foreground=0,
85 | high=1,
86 | normal=2,
87 | low=3),
88 | 'state' / Enum(Int32ul, default=Pass,
89 | queued=0,
90 | connecting=1,
91 | transferring=2,
92 | suspended=3,
93 | error=4,
94 | transient_error=5,
95 | transferred=6,
96 | acknowleged=7,
97 | cancelled=8),
98 | Int32ul,
99 | 'job_id' / UUID(Bytes(16)),
100 | )
101 |
102 |
103 | CONTROL_PART_1 = Struct(
104 | 'sid' / PascalUtf16(Int32ul),
105 | 'flags' / Enum(Int32ul, default=Pass,
106 | BG_NOTIFY_JOB_TRANSFERRED=1,
107 | BG_NOTIFY_JOB_ERROR=2,
108 | BG_NOTIFY_JOB_TRANSFERRED_BG_NOTIFY_JOB_ERROR=3,
109 | BG_NOTIFY_DISABLE=4,
110 | BG_NOTIFY_JOB_TRANSFERRED_BG_NOTIFY_DISABLE=5,
111 | BG_NOTIFY_JOB_ERROR_BG_NOTIFY_DISABLE=6,
112 | BG_NOTIFY_JOB_TRANSFERRED_BG_NOTIFY_JOB_ERROR_BG_NOTIFY_DISABLE=7,
113 | BG_NOTIFY_JOB_MODIFICATION=8,
114 | BG_NOTIFY_FILE_TRANSFERRED=16),
115 | )
116 |
117 |
118 | CONTROL = FlattenStruct(Struct(
119 | 'control_part_0' / CONTROL_PART_0,
120 | 'name' / PascalUtf16(Int32ul),
121 | 'desc' / PascalUtf16(Int32ul),
122 | 'cmd' / PascalUtf16(Int32ul),
123 | 'args' / PascalUtf16(Int32ul),
124 | 'control_part_1' / CONTROL_PART_1,
125 | 'access_token' / DelimitedField(bytes.fromhex(XFER_HEADER)),
126 | ))
127 |
128 |
129 | # XFER : file transfer informations
130 |
131 | FILE_PART_0 = Struct(
132 | 'download_size' / Int64ul,
133 | 'transfer_size' / Int64ul,
134 | Byte,
135 | 'drive' / PascalUtf16(Int32ul),
136 | 'vol_guid' / PascalUtf16(Int32ul),
137 | 'offset' / Tell, # required by carving
138 | )
139 |
140 |
141 | FILE = FlattenStruct(Struct(
142 | DelimitedField(b':'),
143 | Seek(-6, whence=1),
144 | 'dest_fn' / PascalUtf16(Int32ul),
145 | 'src_fn' / PascalUtf16(Int32ul),
146 | 'tmp_fn' / PascalUtf16(Int32ul), # always ends with .tmp
147 | 'file_part_0' / FILE_PART_0,
148 | ))
149 |
150 |
151 | ERROR = Struct(
152 | 'code' / Int64ul,
153 | 'stat1' / Int32ul,
154 | 'stat2' / Int32ul,
155 | 'stat3' / Int32ul,
156 | 'stat4' / Int32ul,
157 | Byte
158 | )
159 |
160 |
161 | METADATA = Struct(
162 | 'error_count' / Int32ul,
163 | 'errors' / Array(this.error_count, ERROR),
164 | 'transient_error_count' / Int32ul,
165 | 'retry_delay' / Int32ul,
166 | 'timeout' / Int32ul,
167 | 'ctime' / FileTime(Int64ul),
168 | 'mtime' / FileTime(Int64ul),
169 | 'other_time0' / FileTime(Int64ul),
170 | Padding(14),
171 | 'other_time1' / FileTime(Int64ul),
172 | 'other_time2' / FileTime(Int64ul),
173 | )
174 |
175 |
176 | JOB = FlattenStruct(Struct(
177 | 'control' / CONTROL,
178 | Const(bytes.fromhex(XFER_HEADER)),
179 | 'file_count' / Int32ul,
180 | 'files' / DelimitedField(bytes.fromhex(XFER_HEADER)),
181 | Const(bytes.fromhex(XFER_HEADER)),
182 | 'metadata' / METADATA,
183 | ))
184 |
--------------------------------------------------------------------------------
/bits/writer.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 ANSSI. All Rights Reserved.
2 | #
3 | # Licensed under the MIT License (the "License");
4 | # you may not use this file except in compliance with the License.
5 | """CSV writer."""
6 | import csv
7 |
8 |
9 | DEFAULT_VALUES = (
10 | ('job_id', None),
11 | ('name', None),
12 | ('desc', None),
13 | ('type', None),
14 | ('priority', None),
15 | ('sid', None),
16 | ('state', None),
17 | ('cmd', None),
18 | ('args', None),
19 | ('file_count', 0),
20 | ('file_id', 0),
21 | ('dest_fn', None),
22 | ('src_fn', None),
23 | ('tmp_fn', None),
24 | ('download_size', -1),
25 | ('transfer_size', -1),
26 | ('drive', None),
27 | ('vol_guid', None),
28 | ('ctime', None),
29 | ('mtime', None),
30 | ('other_time0', None),
31 | ('other_time1', None),
32 | ('other_time2', None),
33 | ('carved', False)
34 | )
35 |
36 |
37 | def flattener(job):
38 |
39 | def _f(index, file):
40 | rv = {k: file.get(k, job.get(k, v)) for k, v in DEFAULT_VALUES}
41 | rv['file_id'] = index
42 | return rv
43 |
44 | files = job.get('files', [])
45 |
46 | if files:
47 | return [_f(index, f) for index, f in enumerate(files)]
48 |
49 | return [_f(0, {})]
50 |
51 |
52 | def write_csv(filename, records):
53 | """Write records to a CSV file."""
54 |
55 | with filename.open('w') as csvfile:
56 | writer = csv.DictWriter(csvfile,
57 | fieldnames=[k for k, _ in DEFAULT_VALUES])
58 | writer.writeheader()
59 | for r in records:
60 | for sub_r in flattener(r):
61 | writer.writerow(sub_r)
62 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | construct==2.10.70
2 | docopt==0.6.2
3 |
--------------------------------------------------------------------------------
/scripts/bits_parser:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Extract BITS jobs from QMGR queue or disk image to CSV file.
4 |
5 | Usage:
6 | bits_parser [options] [-o OUTPUT] FILE
7 |
8 | Options:
9 | --no-carving Disable carving.
10 |
11 | --disk-image, -i Data input is a disk image.
12 | --radiance=VALUE Radiance in kB. [default: 2048]
13 | --skip-sampling Skip sampling and load file in memory.
14 | --checkpoint=PATH Store disk checkpoint file.
15 |
16 | --out=OUTPUT, -o OUTPUT Write result to OUTPUT [default: stdout]
17 | --verbose, -v More verbosity.
18 | --debug Display debug messages.
19 |
20 | --help, -h Show this screen.
21 | --version Show version.
22 | """
23 |
24 | from docopt import docopt
25 | from pathlib import Path
26 |
27 | import bits
28 | import logging
29 | import logging.config
30 |
31 | from bits.const import XFER_HEADER
32 |
33 | # default logger configuration
34 | logging.config.dictConfig({
35 | 'version': 1,
36 | 'disable_existing_loggers': False,
37 | 'formatters': {
38 | 'default': {
39 | 'format': ('%(asctime)s.%(msecs)03d '
40 | '[%(levelname)s] %(name)s: %(message)s'),
41 | 'datefmt': '%Y-%m-%dT%H:%M:%S'
42 | },
43 | },
44 | 'handlers': {
45 | 'default': {
46 | 'class': 'logging.StreamHandler',
47 | 'formatter': 'default',
48 | },
49 | },
50 | 'loggers': {
51 | '': {
52 | 'handlers': ['default'],
53 | 'level': 'WARNING',
54 | 'propagate': True,
55 | },
56 | },
57 | })
58 |
59 |
60 | if __name__ == '__main__':
61 |
62 | args = docopt(__doc__, version=bits.__version__)
63 |
64 | if args['--verbose']:
65 | logging.getLogger().setLevel(logging.INFO)
66 |
67 | if args['--debug']:
68 | logging.getLogger().setLevel(logging.DEBUG)
69 |
70 | file_in = Path(args['FILE'])
71 | file_out = Path(
72 | '/dev/stdout' if args['--out'] == 'stdout' else args['--out']
73 | )
74 |
75 | if args['--disk-image'] and not args['--skip-sampling']:
76 | # load interesting fragments as raw data
77 | analyzer = bits.Bits()
78 | radiance = int(args['--radiance'])
79 |
80 | checkpoint = None
81 | checkpoint_fp = args['--checkpoint']
82 | if checkpoint_fp is not None:
83 | checkpoint_fp = Path(checkpoint_fp)
84 | checkpoint = checkpoint_fp.open('wb')
85 |
86 | for sample in bits.sample_disk(file_in, XFER_HEADER, radiance):
87 | analyzer.append_data(sample)
88 | if checkpoint:
89 | checkpoint.write(sample)
90 |
91 | if checkpoint:
92 | checkpoint.close()
93 |
94 | analyzer.guess_info()
95 | elif args['--disk-image']:
96 | analyzer = bits.Bits()
97 | with file_in.open('rb') as f:
98 | analyzer.append_data(f.read())
99 | analyzer.guess_info()
100 |
101 | else:
102 | analyzer = bits.Bits.load_file(file_in)
103 |
104 | jobs = analyzer.parse() if args['--no-carving'] else analyzer
105 | bits.write_csv(file_out, jobs)
106 |
107 | exit()
108 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """bits_parser"""
3 | import sys
4 | assert sys.version_info.major == 3, 'Python 3 required'
5 |
6 | import re
7 | from pathlib import Path
8 | from setuptools import setup, find_packages
9 |
10 |
11 | # read the version number from package
12 | with (Path(__file__).resolve().parent / 'bits' / '__init__.py').open() as f:
13 | v, = re.search(".*__version__ = '(.*)'.*", f.read(), re.MULTILINE).groups()
14 |
15 |
16 | setup(
17 |
18 | name='bits_parser',
19 | version=v,
20 |
21 | author='ANSSI-INM',
22 | author_email='',
23 |
24 | url='',
25 | description=__doc__,
26 | long_description=open('README.rst').read(),
27 |
28 | install_requires=open('requirements.txt').read().splitlines(),
29 |
30 | packages=find_packages(),
31 | include_package_data=True,
32 | zip_safe=False,
33 | scripts=[
34 | 'scripts/bits_parser',
35 | ],
36 | license='MIT',
37 | platforms='any',
38 | classifiers=[
39 | 'Development Status :: 4 - Beta',
40 | 'Programming Language :: Python',
41 | 'Programming Language :: Python :: 3',
42 | 'Programming Language :: Python :: 3.5',
43 | 'Programming Language :: Python :: 3.6',
44 | ]
45 |
46 | )
47 |
--------------------------------------------------------------------------------