├── .github
    └── workflows
    │   ├── publish.yml
    │   └── test.yml
├── .gitignore
├── .justfile
├── Evtx
    ├── BinaryParser.py
    ├── Evtx.py
    ├── Nodes.py
    ├── Views.py
    └── __init__.py
├── LICENSE.TXT
├── README.md
├── pyproject.toml
├── scripts
    ├── evtx_dates.py
    ├── evtx_dump.py
    ├── evtx_dump_chunk_slack.py
    ├── evtx_dump_json.py
    ├── evtx_eid_record_numbers.py
    ├── evtx_extract_record.py
    ├── evtx_filter_records.py
    ├── evtx_info.py
    ├── evtx_record_structure.py
    ├── evtx_record_template.py
    ├── evtx_structure.py
    └── evtx_templates.py
└── tests
    ├── conftest.py
    ├── data
        ├── dns_log_malformed.evtx
        ├── issue_38.evtx
        ├── issue_39.evtx
        ├── issue_43.evtx
        ├── readme.md
        ├── security.evtx
        └── system.evtx
    ├── fixtures.py
    ├── test_chunks.py
    ├── test_header.py
    ├── test_issue_37.py
    ├── test_issue_38.py
    ├── test_issue_39.py
    ├── test_issue_43.py
    └── test_records.py


/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | # use PyPI trusted publishing, as described here:
 2 | # https://blog.trailofbits.com/2023/05/23/trusted-publishing-a-new-benchmark-for-packaging-security/
 3 | name: publish to pypi
 4 | 
 5 | on:
 6 |   release:
 7 |     types: [published]
 8 | 
 9 | permissions:
10 |   contents: write
11 | 
12 | jobs:
13 |   pypi-publish:
14 |     runs-on: ubuntu-latest
15 |     environment:
16 |       name: release
17 |     permissions:
18 |       id-token: write
19 |     steps:
20 |       - uses: actions/checkout@v2
21 |       - uses: astral-sh/setup-uv@v5
22 |       - name: install
23 |         run: uv sync --group build
24 |       - name: build package
25 |         run: uv run python -m build
26 |       - name: upload package artifacts
27 |         uses: actions/upload-artifact@v4
28 |         with:
29 |           path: dist/*
30 |       - name: publish package
31 |         uses: pypa/gh-action-pypi-publish@release/v1
32 |         with:
33 |           skip-existing: true
34 |           verbose: true
35 |           print-hash: true
36 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   tests:
11 |     name: Tests in ${{ matrix.python }}
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         include:
17 |           - python: 3.9
18 |           - python: 3.13
19 |     steps:
20 |     - name: Checkout python-evtx with submodules
21 |       uses: actions/checkout@v2
22 |       with:
23 |         submodules: true
24 | 
25 |     - uses: astral-sh/setup-uv@v5
26 |       with:
27 |         version: "0.7.2"
28 |         python-version: ${{ matrix.python-version }}
29 | 
30 |     - name: install
31 |       run: uv sync --all-extras
32 | 
33 |     - uses: extractions/setup-just@v2
34 |       with:
35 |         just-version: 1.5.0
36 | 
37 |     - name: lint
38 |       run: just lint
39 | 
40 | 
41 |     - name: test
42 |       run: just test
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | 
21 | # Installer logs
22 | pip-log.txt
23 | 
24 | # Unit test / coverage reports
25 | .coverage
26 | .tox
27 | nosetests.xml
28 | 
29 | # Translations
30 | *.mo
31 | 
32 | # Mr Developer
33 | .mr.developer.cfg
34 | .project
35 | .pydevproject
36 | 
37 | .idea/*
38 | need-to-fix/*
39 | testing-evtxs/*
40 | 
41 | .direnv/
42 | .env/
43 | .envrc
44 | .venv
45 | 


--------------------------------------------------------------------------------
/.justfile:
--------------------------------------------------------------------------------
 1 | isort:
 2 |     uvx isort --length-sort --profile black --line-length 120 Evtx/ tests/ scripts/
 3 | 
 4 | black:
 5 |     uvx black --line-length 120 Evtx/ tests/ scripts/
 6 | 
 7 | ruff:
 8 |     uvx ruff check --line-length 120 Evtx/ tests/ scripts/
 9 | 
10 | mypy:
11 |     uvx mypy --check-untyped-defs --ignore-missing-imports Evtx/ tests/ scripts/
12 | 
13 | lint:
14 |     -just isort
15 |     -just black
16 |     -just ruff
17 |     # this doesn't pass cleanly today
18 |     #-just mypy
19 | 
20 | test:
21 |     uv run pytest tests/
22 | 


--------------------------------------------------------------------------------
/Evtx/BinaryParser.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | #    This file is part of python-evtx.
  3 | #
  4 | #   Copyright 2012, 2013 Willi Ballenthin <william.ballenthin@mandiant.com>
  5 | #                    while at Mandiant <http://www.mandiant.com>
  6 | #
  7 | #   Licensed under the Apache License, Version 2.0 (the "License");
  8 | #   you may not use this file except in compliance with the License.
  9 | #   You may obtain a copy of the License at
 10 | #
 11 | #       http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | #   Unless required by applicable law or agreed to in writing, software
 14 | #   distributed under the License is distributed on an "AS IS" BASIS,
 15 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | #   See the License for the specific language governing permissions and
 17 | #   limitations under the License.
 18 | #
 19 | #   Version v.0.3.0
 20 | from __future__ import absolute_import
 21 | 
 22 | import struct
 23 | import datetime
 24 | from functools import partial
 25 | 
 26 | 
 27 | class memoize(object):
 28 |     """cache the return value of a method
 29 | 
 30 |     From http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/
 31 | 
 32 |     This class is meant to be used as a decorator of methods. The return value
 33 |     from a given method invocation will be cached on the instance whose method
 34 |     was invoked. All arguments passed to a method decorated with memoize must
 35 |     be hashable.
 36 | 
 37 |     If a memoized method is invoked directly on its class the result will not
 38 |     be cached. Instead the method will be invoked like a static method:
 39 |     class Obj(object):
 40 |         @memoize
 41 |         def add_to(self, arg):
 42 |             return self + arg
 43 |     Obj.add_to(1) # not enough arguments
 44 |     Obj.add_to(1, 2) # returns 3, result is not cached
 45 |     """
 46 | 
 47 |     def __init__(self, func):
 48 |         self.func = func
 49 | 
 50 |     def __get__(self, obj, objtype=None):
 51 |         if obj is None:
 52 |             return self.func
 53 |         return partial(self, obj)
 54 | 
 55 |     def __call__(self, *args, **kw):
 56 |         obj = args[0]
 57 |         try:
 58 |             cache = obj.__cache
 59 |         except AttributeError:
 60 |             cache = obj.__cache = {}
 61 |         key = (self.func, args[1:], frozenset(list(kw.items())))
 62 |         if key not in cache:
 63 |             cache[key] = self.func(*args, **kw)
 64 |         return cache[key]
 65 | 
 66 | 
 67 | def align(offset, alignment):
 68 |     """
 69 |     Return the offset aligned to the nearest greater given alignment
 70 |     Arguments:
 71 |     - `offset`: An integer
 72 |     - `alignment`: An integer
 73 |     """
 74 |     if offset % alignment == 0:
 75 |         return offset
 76 |     return offset + (alignment - (offset % alignment))
 77 | 
 78 | 
 79 | def dosdate(dosdate, dostime):
 80 |     """
 81 |     `dosdate`: 2 bytes, little endian.
 82 |     `dostime`: 2 bytes, little endian.
 83 |     returns: datetime.datetime or datetime.datetime.min on error
 84 |     """
 85 |     try:
 86 |         t = ord(dosdate[1]) << 8
 87 |         t |= ord(dosdate[0])
 88 |         day = t & 0b0000000000011111
 89 |         month = (t & 0b0000000111100000) >> 5
 90 |         year = (t & 0b1111111000000000) >> 9
 91 |         year += 1980
 92 | 
 93 |         t = ord(dostime[1]) << 8
 94 |         t |= ord(dostime[0])
 95 |         sec = t & 0b0000000000011111
 96 |         sec *= 2
 97 |         minute = (t & 0b0000011111100000) >> 5
 98 |         hour = (t & 0b1111100000000000) >> 11
 99 | 
100 |         return datetime.datetime(year, month, day, hour, minute, sec)
101 |     except ValueError:
102 |         return datetime.datetime.min
103 | 
104 | 
105 | def parse_filetime(qword):
106 |     # see http://integriography.wordpress.com/2010/01/16/using-phython-to-parse-and-present-windows-64-bit-timestamps/
107 |     if qword == 0:
108 |         return datetime.datetime.min
109 | 
110 |     try:
111 |         return datetime.datetime.fromtimestamp(float(qword) * 1e-7 - 11644473600, datetime.timezone.utc)
112 |     except (ValueError, OSError):
113 |         return datetime.datetime.min
114 | 
115 | 
116 | class BinaryParserException(Exception):
117 |     """
118 |     Base Exception class for binary parsing.
119 |     """
120 | 
121 |     def __init__(self, value):
122 |         """
123 |         Constructor.
124 |         Arguments:
125 |         - `value`: A string description.
126 |         """
127 |         super(BinaryParserException, self).__init__()
128 |         self._value = value
129 | 
130 |     def __repr__(self):
131 |         return "BinaryParserException({!r})".format(self._value)
132 | 
133 |     def __str__(self):
134 |         return "Binary Parser Exception: {}".format(self._value)
135 | 
136 | 
137 | class ParseException(BinaryParserException):
138 |     """
139 |     An exception to be thrown during binary parsing, such as
140 |     when an invalid header is encountered.
141 |     """
142 | 
143 |     def __init__(self, value):
144 |         """
145 |         Constructor.
146 |         Arguments:
147 |         - `value`: A string description.
148 |         """
149 |         super(ParseException, self).__init__(value)
150 | 
151 |     def __repr__(self):
152 |         return "ParseException({!r})".format(self._value)
153 | 
154 |     def __str__(self):
155 |         return "Parse Exception({})".format(self._value)
156 | 
157 | 
158 | class OverrunBufferException(ParseException):
159 |     def __init__(self, readOffs, bufLen):
160 |         tvalue = "read: {}, buffer length: {}".format(hex(readOffs), hex(bufLen))
161 |         super(ParseException, self).__init__(tvalue)
162 | 
163 |     def __repr__(self):
164 |         return "OverrunBufferException({!r})".format(self._value)
165 | 
166 |     def __str__(self):
167 |         return "Tried to parse beyond the end of the file ({})".format(self._value)
168 | 
169 | 
170 | class Block(object):
171 |     """
172 |     Base class for structure blocks in binary parsing.
173 |     A block is associated with a offset into a byte-string.
174 |     """
175 | 
176 |     def __init__(self, buf, offset):
177 |         """
178 |         Constructor.
179 |         Arguments:
180 |         - `buf`: Byte string containing stuff to parse.
181 |         - `offset`: The offset into the buffer at which the block starts.
182 |         """
183 |         self._buf = buf
184 |         self._offset = offset
185 |         self._implicit_offset = 0
186 | 
187 |     def __repr__(self):
188 |         return "Block(buf={!r}, offset={!r})".format(self._buf, self._offset)
189 | 
190 |     def __str__(self):
191 |         return str(self)
192 | 
193 |     def declare_field(self, type, name, offset=None, length=None):
194 |         """
195 |         Declaratively add fields to this block.
196 |         This method will dynamically add corresponding
197 |           offset and unpacker methods to this block.
198 |         Arguments:
199 |         - `type`: A string. Should be one of the unpack_* types.
200 |         - `name`: A string.
201 |         - `offset`: A number.
202 |         - `length`: (Optional) A number. For (w)strings, length in chars.
203 |         """
204 |         if offset is None:
205 |             offset = self._implicit_offset
206 | 
207 |         if length is None:
208 | 
209 |             def no_length_handler():
210 |                 f = getattr(self, "unpack_" + type)
211 |                 return f(offset)
212 | 
213 |             setattr(self, name, no_length_handler)
214 |         else:
215 | 
216 |             def explicit_length_handler():
217 |                 f = getattr(self, "unpack_" + type)
218 |                 return f(offset, length)
219 | 
220 |             setattr(self, name, explicit_length_handler)
221 | 
222 |         setattr(self, "_off_" + name, offset)
223 |         if type == "byte":
224 |             self._implicit_offset = offset + 1
225 |         elif type == "int8":
226 |             self._implicit_offset = offset + 1
227 |         elif type == "word":
228 |             self._implicit_offset = offset + 2
229 |         elif type == "word_be":
230 |             self._implicit_offset = offset + 2
231 |         elif type == "int16":
232 |             self._implicit_offset = offset + 2
233 |         elif type == "dword":
234 |             self._implicit_offset = offset + 4
235 |         elif type == "dword_be":
236 |             self._implicit_offset = offset + 4
237 |         elif type == "int32":
238 |             self._implicit_offset = offset + 4
239 |         elif type == "qword":
240 |             self._implicit_offset = offset + 8
241 |         elif type == "int64":
242 |             self._implicit_offset = offset + 8
243 |         elif type == "float":
244 |             self._implicit_offset = offset + 4
245 |         elif type == "double":
246 |             self._implicit_offset = offset + 8
247 |         elif type == "dosdate":
248 |             self._implicit_offset = offset + 4
249 |         elif type == "filetime":
250 |             self._implicit_offset = offset + 8
251 |         elif type == "systemtime":
252 |             self._implicit_offset = offset + 8
253 |         elif type == "guid":
254 |             self._implicit_offset = offset + 16
255 |         elif type == "binary":
256 |             self._implicit_offset = offset + length
257 |         elif type == "string" and length is not None:
258 |             self._implicit_offset = offset + length
259 |         elif type == "wstring" and length is not None:
260 |             self._implicit_offset = offset + (2 * length)
261 |         elif "string" in type and length is None:
262 |             raise ParseException("Implicit offset not supported " "for dynamic length strings")
263 |         else:
264 |             raise ParseException("Implicit offset not supported " "for type: {}".format(type))
265 | 
266 |     def current_field_offset(self):
267 |         return self._implicit_offset
268 | 
269 |     def unpack_byte(self, offset):
270 |         """
271 |         Returns a little-endian unsigned byte from the relative offset.
272 |         Arguments:
273 |         - `offset`: The relative offset from the start of the block.
274 |         Throws:
275 |         - `OverrunBufferException`
276 |         """
277 |         o = self._offset + offset
278 |         try:
279 |             return struct.unpack_from("<B", self._buf, o)[0]
280 |         except struct.error:
281 |             raise OverrunBufferException(o, len(self._buf))
282 | 
283 |     def unpack_int8(self, offset):
284 |         """
285 |         Returns a little-endian signed byte from the relative offset.
286 |         Arguments:
287 |         - `offset`: The relative offset from the start of the block.
288 |         Throws:
289 |         - `OverrunBufferException`
290 |         """
291 |         o = self._offset + offset
292 |         try:
293 |             return struct.unpack_from("<b", self._buf, o)[0]
294 |         except struct.error:
295 |             raise OverrunBufferException(o, len(self._buf))
296 | 
297 |     def unpack_word(self, offset):
298 |         """
299 |         Returns a little-endian unsigned WORD (2 bytes) from the
300 |           relative offset.
301 |         Arguments:
302 |         - `offset`: The relative offset from the start of the block.
303 |         Throws:
304 |         - `OverrunBufferException`
305 |         """
306 |         o = self._offset + offset
307 |         try:
308 |             return struct.unpack_from("<H", self._buf, o)[0]
309 |         except struct.error:
310 |             raise OverrunBufferException(o, len(self._buf))
311 | 
312 |     def unpack_word_be(self, offset):
313 |         """
314 |         Returns a big-endian unsigned WORD (2 bytes) from the
315 |           relative offset.
316 |         Arguments:
317 |         - `offset`: The relative offset from the start of the block.
318 |         Throws:
319 |         - `OverrunBufferException`
320 |         """
321 |         o = self._offset + offset
322 |         try:
323 |             return struct.unpack_from(">H", self._buf, o)[0]
324 |         except struct.error:
325 |             raise OverrunBufferException(o, len(self._buf))
326 | 
327 |     def unpack_int16(self, offset):
328 |         """
329 |         Returns a little-endian signed WORD (2 bytes) from the
330 |           relative offset.
331 |         Arguments:
332 |         - `offset`: The relative offset from the start of the block.
333 |         Throws:
334 |         - `OverrunBufferException`
335 |         """
336 |         o = self._offset + offset
337 |         try:
338 |             return struct.unpack_from("<h", self._buf, o)[0]
339 |         except struct.error:
340 |             raise OverrunBufferException(o, len(self._buf))
341 | 
342 |     def pack_word(self, offset, word):
343 |         """
344 |         Applies the little-endian WORD (2 bytes) to the relative offset.
345 |         Arguments:
346 |         - `offset`: The relative offset from the start of the block.
347 |         - `word`: The data to apply.
348 |         """
349 |         o = self._offset + offset
350 |         return struct.pack_into("<H", self._buf, o, word)
351 | 
352 |     def unpack_dword(self, offset):
353 |         """
354 |         Returns a little-endian DWORD (4 bytes) from the relative offset.
355 |         Arguments:
356 |         - `offset`: The relative offset from the start of the block.
357 |         Throws:
358 |         - `OverrunBufferException`
359 |         """
360 |         o = self._offset + offset
361 |         try:
362 |             return struct.unpack_from("<I", self._buf, o)[0]
363 |         except struct.error:
364 |             raise OverrunBufferException(o, len(self._buf))
365 | 
366 |     def unpack_dword_be(self, offset):
367 |         """
368 |         Returns a big-endian DWORD (4 bytes) from the relative offset.
369 |         Arguments:
370 |         - `offset`: The relative offset from the start of the block.
371 |         Throws:
372 |         - `OverrunBufferException`
373 |         """
374 |         o = self._offset + offset
375 |         try:
376 |             return struct.unpack_from(">I", self._buf, o)[0]
377 |         except struct.error:
378 |             raise OverrunBufferException(o, len(self._buf))
379 | 
380 |     def unpack_int32(self, offset):
381 |         """
382 |         Returns a little-endian signed integer (4 bytes) from the
383 |           relative offset.
384 |         Arguments:
385 |         - `offset`: The relative offset from the start of the block.
386 |         Throws:
387 |         - `OverrunBufferException`
388 |         """
389 |         o = self._offset + offset
390 |         try:
391 |             return struct.unpack_from("<i", self._buf, o)[0]
392 |         except struct.error:
393 |             raise OverrunBufferException(o, len(self._buf))
394 | 
395 |     def unpack_qword(self, offset):
396 |         """
397 |         Returns a little-endian QWORD (8 bytes) from the relative offset.
398 |         Arguments:
399 |         - `offset`: The relative offset from the start of the block.
400 |         Throws:
401 |         - `OverrunBufferException`
402 |         """
403 |         o = self._offset + offset
404 |         try:
405 |             return struct.unpack_from("<Q", self._buf, o)[0]
406 |         except struct.error:
407 |             raise OverrunBufferException(o, len(self._buf))
408 | 
409 |     def unpack_int64(self, offset):
410 |         """
411 |         Returns a little-endian signed 64-bit integer (8 bytes) from
412 |           the relative offset.
413 |         Arguments:
414 |         - `offset`: The relative offset from the start of the block.
415 |         Throws:
416 |         - `OverrunBufferException`
417 |         """
418 |         o = self._offset + offset
419 |         try:
420 |             return struct.unpack_from("<q", self._buf, o)[0]
421 |         except struct.error:
422 |             raise OverrunBufferException(o, len(self._buf))
423 | 
424 |     def unpack_float(self, offset):
425 |         """
426 |         Returns a single-precision float (4 bytes) from
427 |           the relative offset.  IEEE 754 format.
428 |         Arguments:
429 |         - `offset`: The relative offset from the start of the block.
430 |         Throws:
431 |         - `OverrunBufferException`
432 |         """
433 |         o = self._offset + offset
434 |         try:
435 |             return struct.unpack_from("<f", self._buf, o)[0]
436 |         except struct.error:
437 |             raise OverrunBufferException(o, len(self._buf))
438 | 
439 |     def unpack_double(self, offset):
440 |         """
441 |         Returns a double-precision float (8 bytes) from
442 |           the relative offset.  IEEE 754 format.
443 |         Arguments:
444 |         - `offset`: The relative offset from the start of the block.
445 |         Throws:
446 |         - `OverrunBufferException`
447 |         """
448 |         o = self._offset + offset
449 |         try:
450 |             return struct.unpack_from("<d", self._buf, o)[0]
451 |         except struct.error:
452 |             raise OverrunBufferException(o, len(self._buf))
453 | 
454 |     def unpack_binary(self, offset, length=False):
455 |         """
456 |         Returns raw binary data from the relative offset with the given length.
457 |         Arguments:
458 |         - `offset`: The relative offset from the start of the block.
459 |         - `length`: The length of the binary blob. If zero, the empty string
460 |             zero length is returned.
461 |         Throws:
462 |         - `OverrunBufferException`
463 |         """
464 |         if not length:
465 |             return bytes("".encode("ascii"))
466 |         o = self._offset + offset
467 |         try:
468 |             return bytes(struct.unpack_from("<{}s".format(length), self._buf, o)[0])
469 |         except struct.error:
470 |             raise OverrunBufferException(o, len(self._buf))
471 | 
472 |     def unpack_string(self, offset, length):
473 |         """
474 |         Returns a string from the relative offset with the given length.
475 |         Arguments:
476 |         - `offset`: The relative offset from the start of the block.
477 |         - `length`: The length of the string.
478 |         Throws:
479 |         - `OverrunBufferException`
480 |         """
481 |         return self.unpack_binary(offset, length).decode("ascii")
482 | 
483 |     def unpack_wstring(self, offset, length):
484 |         """
485 |         Returns a string from the relative offset with the given length,
486 |         where each character is a wchar (2 bytes)
487 |         Arguments:
488 |         - `offset`: The relative offset from the start of the block.
489 |         - `length`: The length of the string.
490 |         Throws:
491 |         - `UnicodeDecodeError`
492 |         """
493 |         start = self._offset + offset
494 |         end = self._offset + offset + 2 * length
495 |         try:
496 |             return bytes(self._buf[start:end]).decode("utf16")
497 |         except AttributeError:  # already a 'str' ?
498 |             return bytes(self._buf[start:end]).decode("utf16")
499 | 
500 |     def unpack_dosdate(self, offset):
501 |         """
502 |         Returns a datetime from the DOSDATE and DOSTIME starting at
503 |         the relative offset.
504 |         Arguments:
505 |         - `offset`: The relative offset from the start of the block.
506 |         Throws:
507 |         - `OverrunBufferException`
508 |         """
509 |         try:
510 |             o = self._offset + offset
511 |             return dosdate(self._buf[o : o + 2], self._buf[o + 2 : o + 4])
512 |         except struct.error:
513 |             raise OverrunBufferException(o, len(self._buf))
514 | 
515 |     def unpack_filetime(self, offset):
516 |         """
517 |         Returns a datetime from the QWORD Windows timestamp starting at
518 |         the relative offset.
519 |         Arguments:
520 |         - `offset`: The relative offset from the start of the block.
521 |         Throws:
522 |         - `OverrunBufferException`
523 |         """
524 |         return parse_filetime(self.unpack_qword(offset))
525 | 
526 |     def unpack_systemtime(self, offset):
527 |         """
528 |         Returns a datetime from the QWORD Windows SYSTEMTIME timestamp
529 |           starting at the relative offset.
530 |           See http://msdn.microsoft.com/en-us/library/ms724950%28VS.85%29.aspx
531 |         Arguments:
532 |         - `offset`: The relative offset from the start of the block.
533 |         Throws:
534 |         - `OverrunBufferException`
535 |         """
536 |         o = self._offset + offset
537 |         try:
538 |             parts = struct.unpack_from("<HHHHHHHH", self._buf, o)
539 |         except struct.error:
540 |             raise OverrunBufferException(o, len(self._buf))
541 |         return datetime.datetime(
542 |             parts[0], parts[1], parts[3], parts[4], parts[5], parts[6], parts[7]  # skip part 2 (day of week)
543 |         )
544 | 
545 |     def unpack_guid(self, offset):
546 |         """
547 |         Returns a string containing a GUID starting at the relative offset.
548 |         Arguments:
549 |         - `offset`: The relative offset from the start of the block.
550 |         Throws:
551 |         - `OverrunBufferException`
552 |         """
553 |         o = self._offset + offset
554 | 
555 |         try:
556 |             _bin = bytes(self._buf[o : o + 16])
557 |         except IndexError:
558 |             raise OverrunBufferException(o, len(self._buf))
559 | 
560 |         # Yeah, this is ugly
561 |         h = [_bin[i] for i in range(len(_bin))]
562 |         return """{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}""".format(
563 |             h[3], h[2], h[1], h[0], h[5], h[4], h[7], h[6], h[8], h[9], h[10], h[11], h[12], h[13], h[14], h[15]
564 |         )
565 | 
566 |     def absolute_offset(self, offset):
567 |         """
568 |         Get the absolute offset from an offset relative to this block
569 |         Arguments:
570 |         - `offset`: The relative offset into this block.
571 |         """
572 |         return self._offset + offset
573 | 
574 |     def offset(self):
575 |         """
576 |         Equivalent to self.absolute_offset(0x0), which is the starting
577 |           offset of this block.
578 |         """
579 |         return self._offset
580 | 


--------------------------------------------------------------------------------
/Evtx/Evtx.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | #    This file is part of python-evtx.
  3 | #
  4 | #   Copyright 2012, 2013 Willi Ballenthin <william.ballenthin@mandiant.com>
  5 | #                    while at Mandiant <http://www.mandiant.com>
  6 | #
  7 | #   Licensed under the Apache License, Version 2.0 (the "License");
  8 | #   you may not use this file except in compliance with the License.
  9 | #   You may obtain a copy of the License at
 10 | #
 11 | #       http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | #   Unless required by applicable law or agreed to in writing, software
 14 | #   distributed under the License is distributed on an "AS IS" BASIS,
 15 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | #   See the License for the specific language governing permissions and
 17 | #   limitations under the License.
 18 | #
 19 | #   Version v.0.3.0
 20 | from __future__ import absolute_import
 21 | 
 22 | import re
 23 | import sys
 24 | import mmap
 25 | import logging
 26 | import binascii
 27 | from functools import wraps
 28 | 
 29 | import Evtx.Views as e_views
 30 | 
 31 | from .Nodes import RootNode, TemplateNode, NameStringNode
 32 | from .BinaryParser import Block, ParseException
 33 | 
 34 | logger = logging.getLogger(__name__)
 35 | 
 36 | 
 37 | class InvalidRecordException(ParseException):
 38 |     def __init__(self):
 39 |         super(InvalidRecordException, self).__init__("Invalid record structure")
 40 | 
 41 | 
 42 | class Evtx(object):
 43 |     """
 44 |     A convenience class that makes it easy to open an
 45 |       EVTX file and start iterating the important structures.
 46 |     Note, this class must be used in a context statement
 47 |        (see the `with` keyword).
 48 |     Note, this class will mmap the target file, so ensure
 49 |       your platform supports this operation.
 50 |     """
 51 | 
 52 |     def __init__(self, filename):
 53 |         """
 54 |         @type filename:  str
 55 |         @param filename: A string that contains the path
 56 |           to the EVTX file to open.
 57 |         """
 58 |         self._filename = filename
 59 |         self._buf = None
 60 |         self._f = None
 61 |         self._fh = None
 62 | 
 63 |     def __enter__(self):
 64 |         self._f = open(self._filename, "rb")
 65 |         self._buf = mmap.mmap(self._f.fileno(), 0, access=mmap.ACCESS_READ)
 66 |         self._fh = FileHeader(self._buf, 0x0)
 67 |         return self
 68 | 
 69 |     def __exit__(self, type, value, traceback):
 70 |         self._buf.close()
 71 |         self._f.close()
 72 |         self._fh = None
 73 | 
 74 |     def ensure_contexted(func):
 75 |         """
 76 |         This decorator ensure that an instance of the
 77 |           Evtx class is used within a context statement.  That is,
 78 |           that the `with` statement is used, or `__enter__()`
 79 |           and `__exit__()` are called explicitly.
 80 |         """
 81 | 
 82 |         @wraps(func)
 83 |         def wrapped(self, *args, **kwargs):
 84 |             if self._buf is None:
 85 |                 raise TypeError("An Evtx object must be used with" " a context (see the `with` statement).")
 86 |             else:
 87 |                 return func(self, *args, **kwargs)
 88 | 
 89 |         return wrapped
 90 | 
 91 |     @ensure_contexted
 92 |     def chunks(self):
 93 |         """
 94 |         Get each of the ChunkHeaders from within this EVTX file.
 95 | 
 96 |         @rtype generator of ChunkHeader
 97 |         @return A generator of ChunkHeaders from this EVTX file.
 98 |         """
 99 |         for chunk in self._fh.chunks():
100 |             yield chunk
101 | 
102 |     @ensure_contexted
103 |     def records(self):
104 |         """
105 |         Get each of the Records from within this EVTX file.
106 | 
107 |         @rtype generator of Record
108 |         @return A generator of Records from this EVTX file.
109 |         """
110 |         for chunk in self.chunks():
111 |             for record in chunk.records():
112 |                 yield record
113 | 
114 |     @ensure_contexted
115 |     def get_record(self, record_num):
116 |         """
117 |         Get a Record by record number.
118 | 
119 |         @type record_num:  int
120 |         @param record_num: The record number of the the record to fetch.
121 |         @rtype Record or None
122 |         @return The record request by record number, or None if
123 |           the record is not found.
124 |         """
125 |         return self._fh.get_record(record_num)
126 | 
127 |     @ensure_contexted
128 |     def get_file_header(self):
129 |         return self._fh
130 | 
131 | 
132 | class FileHeader(Block):
133 |     def __init__(self, buf, offset):
134 |         logger.debug("FILE HEADER at {}.".format(hex(offset)))
135 |         super(FileHeader, self).__init__(buf, offset)
136 |         self.declare_field("string", "magic", 0x0, length=8)
137 |         self.declare_field("qword", "oldest_chunk")
138 |         self.declare_field("qword", "current_chunk_number")
139 |         self.declare_field("qword", "next_record_number")
140 |         self.declare_field("dword", "header_size")
141 |         self.declare_field("word", "minor_version")
142 |         self.declare_field("word", "major_version")
143 |         self.declare_field("word", "header_chunk_size")
144 |         self.declare_field("word", "chunk_count")
145 |         self.declare_field("binary", "unused1", length=0x4C)
146 |         self.declare_field("dword", "flags")
147 |         self.declare_field("dword", "checksum")
148 | 
149 |     def __repr__(self):
150 |         return "FileHeader(buf={!r}, offset={!r})".format(self._buf, self._offset)
151 | 
152 |     def __str__(self):
153 |         return "FileHeader(offset={})".format(hex(self._offset))
154 | 
155 |     def check_magic(self):
156 |         """
157 |         @return A boolean that indicates if the first eight bytes of
158 |           the FileHeader match the expected magic value.
159 |         """
160 |         try:
161 |             return self.magic() == "ElfFile\x00"
162 |         except UnicodeDecodeError:
163 |             return False
164 | 
165 |     def calculate_checksum(self):
166 |         """
167 |         @return A integer in the range of an unsigned int that
168 |           is the calculated CRC32 checksum off the first 0x78 bytes.
169 |           This is consistent with the checksum stored by the FileHeader.
170 |         """
171 |         return binascii.crc32(self.unpack_binary(0, 0x78)) & 0xFFFFFFFF
172 | 
173 |     def verify(self):
174 |         """
175 |         @return A boolean that indicates that the FileHeader
176 |           successfully passes a set of heuristic checks that
177 |           all EVTX FileHeaders should pass.
178 |         """
179 |         return (
180 |             self.check_magic()
181 |             and self.major_version() == 0x3
182 |             and self.minor_version() == 0x1
183 |             and self.header_chunk_size() == 0x1000
184 |             and self.checksum() == self.calculate_checksum()
185 |         )
186 | 
187 |     def is_dirty(self):
188 |         """
189 |         @return A boolean that indicates that the log has been
190 |           opened and was changed, though not all changes might be
191 |           reflected in the file header.
192 |         """
193 |         return self.flags() & 0x1 == 0x1
194 | 
195 |     def is_full(self):
196 |         """
197 |         @return A boolean that indicates that the log
198 |           has reached its maximum configured size and the retention
199 |           policy in effect does not allow to reclaim a suitable amount
200 |           of space from the oldest records and an event message could
201 |           not be written to the log file.
202 |         """
203 |         return self.flags() & 0x2 == 0x2
204 | 
205 |     def first_chunk(self):
206 |         """
207 |         @return A ChunkHeader instance that is the first chunk
208 |           in the log file, which is always found directly after
209 |           the FileHeader.
210 |         """
211 |         ofs = self._offset + self.header_chunk_size()
212 |         return ChunkHeader(self._buf, ofs)
213 | 
214 |     def current_chunk(self):
215 |         """
216 |         @return A ChunkHeader instance that is the current chunk
217 |           indicated by the FileHeader.
218 |         """
219 |         ofs = self._offset + self.header_chunk_size()
220 |         ofs += self.current_chunk_number() * 0x10000
221 |         return ChunkHeader(self._buf, ofs)
222 | 
223 |     def chunks(self, include_inactive=False):
224 |         """
225 |         @return A generator that yields the chunks of the log file
226 |           starting with the first chunk, which is always found directly
227 |           after the FileHeader.
228 | 
229 |         If `include_inactive` is set to true, enumerate chunks beyond those
230 |         declared in the file header (and may therefore be corrupt).
231 |         """
232 |         if include_inactive:
233 |             chunk_count = sys.maxsize
234 |         else:
235 |             chunk_count = self.chunk_count()
236 | 
237 |         i = 0
238 |         ofs = self._offset + self.header_chunk_size()
239 |         while ofs + 0x10000 <= len(self._buf) and i < chunk_count:
240 |             yield ChunkHeader(self._buf, ofs)
241 |             ofs += 0x10000
242 |             i += 1
243 | 
244 |     def get_record(self, record_num):
245 |         """
246 |         Get a Record by record number.
247 | 
248 |         @type record_num:  int
249 |         @param record_num: The record number of the the record to fetch.
250 |         @rtype Record or None
251 |         @return The record request by record number, or None if the
252 |           record is not found.
253 |         """
254 |         for chunk in self.chunks():
255 |             first_record = chunk.log_first_record_number()
256 |             last_record = chunk.log_last_record_number()
257 |             if not (first_record <= record_num <= last_record):
258 |                 continue
259 |             for record in chunk.records():
260 |                 if record.record_num() == record_num:
261 |                     return record
262 |         return None
263 | 
264 | 
265 | class Template(object):
266 |     def __init__(self, template_node):
267 |         self._template_node = template_node
268 |         self._xml = None
269 | 
270 |     def _load_xml(self):
271 |         """
272 |         TODO(wb): One day, nodes should generate format strings
273 |           instead of the XML format made-up abomination.
274 |         """
275 |         if self._xml is not None:
276 |             return
277 |         matcher = r"\[(?:Normal|Conditional) Substitution\(index=(\d+), type=\d+\)\]"
278 |         self._xml = re.sub(
279 |             matcher, "{\\1:}", self._template_node.template_format().replace("{", "{{").replace("}", "}}")
280 |         )
281 | 
282 |     def make_substitutions(self, substitutions):
283 |         """
284 | 
285 |         @type substitutions: list of VariantTypeNode
286 |         """
287 |         self._load_xml()
288 |         return self._xml.format(*[n.xml() for n in substitutions])
289 | 
290 |     def node(self):
291 |         return self._template_node
292 | 
293 | 
294 | class ChunkHeader(Block):
295 |     def __init__(self, buf, offset):
296 |         logger.debug("CHUNK HEADER at {}.".format(hex(offset)))
297 |         super(ChunkHeader, self).__init__(buf, offset)
298 |         self._strings = None
299 |         self._templates = None
300 | 
301 |         self.declare_field("string", "magic", 0x0, length=8)
302 |         self.declare_field("qword", "file_first_record_number")
303 |         self.declare_field("qword", "file_last_record_number")
304 |         self.declare_field("qword", "log_first_record_number")
305 |         self.declare_field("qword", "log_last_record_number")
306 |         self.declare_field("dword", "header_size")
307 |         self.declare_field("dword", "last_record_offset")
308 |         self.declare_field("dword", "next_record_offset")
309 |         self.declare_field("dword", "data_checksum")
310 |         self.declare_field("binary", "unused", length=0x44)
311 |         self.declare_field("dword", "header_checksum")
312 | 
313 |     def __repr__(self):
314 |         return "ChunkHeader(buf={!r}, offset={!r})".format(self._buf, self._offset)
315 | 
316 |     def __str__(self):
317 |         return "ChunkHeader(offset={})".format(hex(self._offset))
318 | 
319 |     def check_magic(self):
320 |         """
321 |         @return A boolean that indicates if the first eight bytes of
322 |           the ChunkHeader match the expected magic value.
323 |         """
324 |         try:
325 |             return self.magic() == "ElfChnk\x00"
326 |         except UnicodeDecodeError:
327 |             return False
328 | 
329 |     def calculate_header_checksum(self):
330 |         """
331 |         @return A integer in the range of an unsigned int that
332 |           is the calculated CRC32 checksum of the ChunkHeader fields.
333 |         """
334 |         data = self.unpack_binary(0x0, 0x78)
335 |         data += self.unpack_binary(0x80, 0x180)
336 |         return binascii.crc32(data) & 0xFFFFFFFF
337 | 
338 |     def calculate_data_checksum(self):
339 |         """
340 |         @return A integer in the range of an unsigned int that
341 |           is the calculated CRC32 checksum of the Chunk data.
342 |         """
343 |         data = self.unpack_binary(0x200, self.next_record_offset() - 0x200)
344 |         return binascii.crc32(data) & 0xFFFFFFFF
345 | 
346 |     def verify(self):
347 |         """
348 |         @return A boolean that indicates that the FileHeader
349 |           successfully passes a set of heuristic checks that
350 |           all EVTX ChunkHeaders should pass.
351 |         """
352 |         return (
353 |             self.check_magic()
354 |             and self.calculate_header_checksum() == self.header_checksum()
355 |             and self.calculate_data_checksum() == self.data_checksum()
356 |         )
357 | 
358 |     def _load_strings(self):
359 |         if self._strings is None:
360 |             self._strings = {}
361 |         for i in range(64):
362 |             ofs = self.unpack_dword(0x80 + (i * 4))
363 |             while ofs > 0:
364 |                 string_node = self.add_string(ofs)
365 |                 ofs = string_node.next_offset()
366 | 
367 |     def strings(self):
368 |         """
369 |         @return A dict(offset --> NameStringNode)
370 |         """
371 |         if not self._strings:
372 |             self._load_strings()
373 |         return self._strings
374 | 
375 |     def add_string(self, offset, parent=None):
376 |         """
377 |         @param offset An integer offset that is relative to the start of
378 |           this chunk.
379 |         @param parent (Optional) The parent of the newly created
380 |            NameStringNode instance. (Default: this chunk).
381 |         @return None
382 |         """
383 |         if self._strings is None:
384 |             self._load_strings()
385 |         string_node = NameStringNode(self._buf, self._offset + offset, self, parent or self)
386 |         self._strings[offset] = string_node
387 |         return string_node
388 | 
389 |     def _load_templates(self):
390 |         """
391 |         @return None
392 |         """
393 |         if self._templates is None:
394 |             self._templates = {}
395 |         for i in range(32):
396 |             ofs = self.unpack_dword(0x180 + (i * 4))
397 |             while ofs > 0:
398 |                 # unclear why these are found before the offset
399 |                 # this is a direct port from A.S.'s code
400 |                 token = self.unpack_byte(ofs - 10)
401 |                 pointer = self.unpack_dword(ofs - 4)
402 |                 if token != 0x0C or pointer != ofs:
403 |                     logger.warning("Unexpected token encountered")
404 |                     ofs = 0
405 |                     continue
406 |                 template = self.add_template(ofs)
407 |                 ofs = template.next_offset()
408 | 
409 |     def add_template(self, offset, parent=None):
410 |         """
411 |         @param offset An integer which contains the chunk-relative offset
412 |            to a template to load into this Chunk.
413 |         @param parent (Optional) The parent of the newly created
414 |            TemplateNode instance. (Default: this chunk).
415 |         @return Newly added TemplateNode instance.
416 |         """
417 |         if self._templates is None:
418 |             self._load_templates()
419 | 
420 |         node = TemplateNode(self._buf, self._offset + offset, self, parent or self)
421 |         self._templates[offset] = node
422 |         return node
423 | 
424 |     def templates(self):
425 |         """
426 |         @return A dict(offset --> Template) of all encountered
427 |           templates in this Chunk.
428 |         """
429 |         if not self._templates:
430 |             self._load_templates()
431 |         return self._templates
432 | 
433 |     def first_record(self):
434 |         return Record(self._buf, self._offset + 0x200, self)
435 | 
436 |     def records(self):
437 |         try:
438 |             record = self.first_record()
439 |         except InvalidRecordException:
440 |             return
441 |         while record._offset < self._offset + self.next_record_offset() and record.length() > 0:
442 |             yield record
443 |             try:
444 |                 record = Record(self._buf, record._offset + record.length(), self)
445 |             except InvalidRecordException:
446 |                 return
447 | 
448 | 
449 | class Record(Block):
450 |     def __init__(self, buf, offset, chunk):
451 |         logger.debug("Record at {}.".format(hex(offset)))
452 |         super(Record, self).__init__(buf, offset)
453 |         self._chunk = chunk
454 | 
455 |         self.declare_field("dword", "magic", 0x0)  # 0x00002a2a
456 |         self.declare_field("dword", "size")
457 |         self.declare_field("qword", "record_num")
458 |         self.declare_field("filetime", "timestamp")
459 | 
460 |         if self.size() > 0x10000:
461 |             raise InvalidRecordException()
462 | 
463 |         self.declare_field("dword", "size2", self.size() - 4)
464 | 
465 |     def __repr__(self):
466 |         return "Record(buf={!r}, offset={!r})".format(self._buf, self._offset)
467 | 
468 |     def __str__(self):
469 |         return "Record(offset={})".format(hex(self._offset))
470 | 
471 |     def root(self):
472 |         return RootNode(self._buf, self._offset + 0x18, self._chunk, self)
473 | 
474 |     def length(self):
475 |         return self.size()
476 | 
477 |     def verify(self):
478 |         return self.size() == self.size2()
479 | 
480 |     def data(self):
481 |         """
482 |         Return the raw data block which makes up this record as a bytestring.
483 | 
484 |         @rtype str
485 |         @return A string that is a copy of the buffer that makes
486 |           up this record.
487 |         """
488 |         return self._buf[self.offset() : self.offset() + self.size()]
489 | 
490 |     def xml(self):
491 |         """
492 |         render the record into XML.
493 |         does not include the xml declaration header.
494 | 
495 |         Returns:
496 |           str: the rendered xml document.
497 |         """
498 |         return e_views.evtx_record_xml_view(self)
499 | 
500 |     def lxml(self):
501 |         """
502 |         render the record into a lxml document.
503 |         this is useful for querying data from the record using xpath, etc.
504 | 
505 |         note: lxml must be installed.
506 | 
507 |         Returns:
508 |           lxml.etree.ElementTree: the rendered and parsed xml document.
509 | 
510 |         Raises:
511 |           ImportError: if lxml is not installed.
512 |         """
513 |         import lxml.etree
514 | 
515 |         return lxml.etree.fromstring((e_views.XML_HEADER + self.xml()).encode("utf-8"))
516 | 


--------------------------------------------------------------------------------
/Evtx/Nodes.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/python
   2 | #    This file is part of python-evtx.
   3 | #
   4 | #   Copyright 2012, 2013 Willi Ballenthin william.ballenthin@mandiant.com>
   5 | #                    while at Mandiant <http://www.mandiant.com>
   6 | #
   7 | #   Licensed under the Apache License, Version 2.0 (the "License");
   8 | #   you may not use this file except in compliance with the License.
   9 | #   You may obtain a copy of the License at
  10 | #
  11 | #       http://www.apache.org/licenses/LICENSE-2.0
  12 | #
  13 | #   Unless required by applicable law or agreed to in writing, software
  14 | #   distributed under the License is distributed on an "AS IS" BASIS,
  15 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16 | #   See the License for the specific language governing permissions and
  17 | #   limitations under the License.
  18 | from __future__ import absolute_import
  19 | 
  20 | import re
  21 | import base64
  22 | import itertools
  23 | 
  24 | import hexdump
  25 | 
  26 | from .BinaryParser import Block, ParseException, memoize
  27 | 
  28 | 
  29 | class SYSTEM_TOKENS:
  30 |     EndOfStreamToken = 0x00
  31 |     OpenStartElementToken = 0x01
  32 |     CloseStartElementToken = 0x02
  33 |     CloseEmptyElementToken = 0x03
  34 |     CloseElementToken = 0x04
  35 |     ValueToken = 0x05
  36 |     AttributeToken = 0x06
  37 |     CDataSectionToken = 0x07
  38 |     EntityReferenceToken = 0x08
  39 |     ProcessingInstructionTargetToken = 0x0A
  40 |     ProcessingInstructionDataToken = 0x0B
  41 |     TemplateInstanceToken = 0x0C
  42 |     NormalSubstitutionToken = 0x0D
  43 |     ConditionalSubstitutionToken = 0x0E
  44 |     StartOfStreamToken = 0x0F
  45 | 
  46 | 
  47 | class NODE_TYPES:
  48 |     NULL = 0x00
  49 |     WSTRING = 0x01
  50 |     STRING = 0x02
  51 |     SIGNED_BYTE = 0x03
  52 |     UNSIGNED_BYTE = 0x04
  53 |     SIGNED_WORD = 0x05
  54 |     UNSIGNED_WORD = 0x06
  55 |     SIGNED_DWORD = 0x07
  56 |     UNSIGNED_DWORD = 0x08
  57 |     SIGNED_QWORD = 0x09
  58 |     UNSIGNED_QWORD = 0x0A
  59 |     FLOAT = 0x0B
  60 |     DOUBLE = 0x0C
  61 |     BOOLEAN = 0x0D
  62 |     BINARY = 0x0E
  63 |     GUID = 0x0F
  64 |     SIZE = 0x10
  65 |     FILETIME = 0x11
  66 |     SYSTEMTIME = 0x12
  67 |     SID = 0x13
  68 |     HEX32 = 0x14
  69 |     HEX64 = 0x15
  70 |     BXML = 0x21
  71 |     WSTRINGARRAY = 0x81
  72 | 
  73 | 
  74 | node_dispatch_table = []  # updated at end of file
  75 | node_readable_tokens = []  # updated at end of file
  76 | 
  77 | 
  78 | class SuppressConditionalSubstitution(Exception):
  79 |     """
  80 |     This exception is to be thrown to indicate that a conditional
  81 |       substitution evaluated to NULL, and the parent element should
  82 |       be suppressed. This exception should be caught at the first
  83 |       opportunity, and must not propagate far up the call chain.
  84 | 
  85 |     Strategy:
  86 |       AttributeNode catches this, .xml() --> ""
  87 |       StartOpenElementNode catches this for each child, ensures
  88 |         there's at least one useful value.  Or, .xml() --> ""
  89 |     """
  90 | 
  91 |     def __init__(self, msg):
  92 |         super(SuppressConditionalSubstitution, self).__init__(msg)
  93 | 
  94 | 
  95 | class UnexpectedStateException(ParseException):
  96 |     """
  97 |     UnexpectedStateException is an exception to be thrown when the parser
  98 |       encounters an unexpected value or state. This probably means there
  99 |       is a bug in the parser, but could stem from a corrupted input file.
 100 |     """
 101 | 
 102 |     def __init__(self, msg):
 103 |         super(UnexpectedStateException, self).__init__(msg)
 104 | 
 105 | 
 106 | class BXmlNode(Block):
 107 | 
 108 |     def __init__(self, buf, offset, chunk, parent):
 109 |         super(BXmlNode, self).__init__(buf, offset)
 110 |         self._chunk = chunk
 111 |         self._parent = parent
 112 | 
 113 |     def __repr__(self):
 114 |         return "BXmlNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 115 |             self._buf, self.offset(), self._chunk, self._parent
 116 |         )
 117 | 
 118 |     def __str__(self):
 119 |         return "BXmlNode(offset={})".format(hex(self.offset()))
 120 | 
 121 |     def dump(self):
 122 |         b = self._buf[self.offset() : self.offset() + self.length()]
 123 |         return hexdump.hexdump(b, result="return")
 124 | 
 125 |     def tag_length(self):
 126 |         """
 127 |         This method must be implemented and overridden for all BXmlNodes.
 128 |         @return An integer specifying the length of this tag, not including
 129 |           its children.
 130 |         """
 131 |         raise NotImplementedError("tag_length not implemented for {!r}").format(self)
 132 | 
 133 |     def _children(self, max_children=None, end_tokens=[SYSTEM_TOKENS.EndOfStreamToken]):
 134 |         """
 135 |         @return A list containing all of the children BXmlNodes.
 136 |         """
 137 |         ret = []
 138 |         ofs = self.tag_length()
 139 | 
 140 |         if max_children:
 141 |             gen = list(range(max_children))
 142 |         else:
 143 |             gen = itertools.count()
 144 | 
 145 |         for _ in gen:
 146 |             # we lose error checking by masking off the higher nibble,
 147 |             #   but, some tokens like 0x01, make use of the flags nibble.
 148 |             token = self.unpack_byte(ofs) & 0x0F
 149 |             try:
 150 |                 HandlerNodeClass = node_dispatch_table[token]
 151 |                 child = HandlerNodeClass(self._buf, self.offset() + ofs, self._chunk, self)
 152 |             except IndexError:
 153 |                 raise ParseException("Unexpected token {:02X} at {}".format(token, self.absolute_offset(0x0) + ofs))
 154 |             ret.append(child)
 155 |             ofs += child.length()
 156 |             if token in end_tokens:
 157 |                 break
 158 |             if child.find_end_of_stream():
 159 |                 break
 160 |         return ret
 161 | 
 162 |     @memoize
 163 |     def children(self):
 164 |         return self._children()
 165 | 
 166 |     @memoize
 167 |     def length(self):
 168 |         """
 169 |         @return An integer specifying the length of this tag and all
 170 |           its children.
 171 |         """
 172 |         ret = self.tag_length()
 173 |         for child in self.children():
 174 |             ret += child.length()
 175 |         return ret
 176 | 
 177 |     @memoize
 178 |     def find_end_of_stream(self):
 179 |         for child in self.children():
 180 |             if isinstance(child, EndOfStreamNode):
 181 |                 return child
 182 |             ret = child.find_end_of_stream()
 183 |             if ret:
 184 |                 return ret
 185 |         return None
 186 | 
 187 | 
 188 | class NameStringNode(BXmlNode):
 189 |     def __init__(self, buf, offset, chunk, parent):
 190 |         super(NameStringNode, self).__init__(buf, offset, chunk, parent)
 191 |         self.declare_field("dword", "next_offset", 0x0)
 192 |         self.declare_field("word", "hash")
 193 |         self.declare_field("word", "string_length")
 194 |         self.declare_field("wstring", "string", length=self.string_length())
 195 | 
 196 |     def __repr__(self):
 197 |         return "NameStringNode(buf={!r}, offset={!r}, chunk={!r})".format(self._buf, self.offset(), self._chunk)
 198 | 
 199 |     def __str__(self):
 200 |         return "NameStringNode(offset={}, length={}, end={})".format(
 201 |             hex(self.offset()), hex(self.length()), hex(self.offset() + self.length())
 202 |         )
 203 | 
 204 |     def string(self):
 205 |         return str(self._string())
 206 | 
 207 |     def tag_length(self):
 208 |         return (self.string_length() * 2) + 8
 209 | 
 210 |     def length(self):
 211 |         # two bytes unaccounted for...
 212 |         return self.tag_length() + 2
 213 | 
 214 | 
 215 | class TemplateNode(BXmlNode):
 216 |     def __init__(self, buf, offset, chunk, parent):
 217 |         super(TemplateNode, self).__init__(buf, offset, chunk, parent)
 218 |         self.declare_field("dword", "next_offset", 0x0)
 219 |         self.declare_field("dword", "template_id")
 220 |         self.declare_field("guid", "guid", 0x04)  # unsure why this overlaps
 221 |         self.declare_field("dword", "data_length")
 222 | 
 223 |     def __repr__(self):
 224 |         return "TemplateNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 225 |             self._buf, self.offset(), self._chunk, self._parent
 226 |         )
 227 | 
 228 |     def __str__(self):
 229 |         return "TemplateNode(offset={}, guid={}, length={})".format(hex(self.offset()), self.guid(), hex(self.length()))
 230 | 
 231 |     def tag_length(self):
 232 |         return 0x18
 233 | 
 234 |     def length(self):
 235 |         return self.tag_length() + self.data_length()
 236 | 
 237 | 
 238 | class EndOfStreamNode(BXmlNode):
 239 |     """
 240 |     The binary XML node for the system token 0x00.
 241 | 
 242 |     This is the "end of stream" token. It may never actually
 243 |       be instantiated here.
 244 |     """
 245 | 
 246 |     def __init__(self, buf, offset, chunk, parent):
 247 |         super(EndOfStreamNode, self).__init__(buf, offset, chunk, parent)
 248 | 
 249 |     def __repr__(self):
 250 |         return "EndOfStreamNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 251 |             self._buf, self.offset(), self._chunk, self._parent
 252 |         )
 253 | 
 254 |     def __str__(self):
 255 |         return "EndOfStreamNode(offset={}, length={}, token={})".format(hex(self.offset()), hex(self.length()), 0x00)
 256 | 
 257 |     def flags(self):
 258 |         return self.token() >> 4
 259 | 
 260 |     def tag_length(self):
 261 |         return 1
 262 | 
 263 |     def length(self):
 264 |         return 1
 265 | 
 266 |     def children(self):
 267 |         return []
 268 | 
 269 | 
 270 | class OpenStartElementNode(BXmlNode):
 271 |     """
 272 |     The binary XML node for the system token 0x01.
 273 | 
 274 |     This is the "open start element" token.
 275 |     """
 276 | 
 277 |     def __init__(self, buf, offset, chunk, parent):
 278 |         super(OpenStartElementNode, self).__init__(buf, offset, chunk, parent)
 279 |         self.declare_field("byte", "token", 0x0)
 280 |         self.declare_field("word", "unknown0")
 281 |         # TODO(wb): use this size() field.
 282 |         self.declare_field("dword", "size")
 283 |         self.declare_field("dword", "string_offset")
 284 |         self._tag_length = 11
 285 |         self._element_type = 0
 286 | 
 287 |         if self.flags() & 0x04:
 288 |             self._tag_length += 4
 289 | 
 290 |         if self.string_offset() > self.offset() - self._chunk._offset:
 291 |             new_string = self._chunk.add_string(self.string_offset(), parent=self)
 292 |             self._tag_length += new_string.length()
 293 | 
 294 |     def __repr__(self):
 295 |         return "OpenStartElementNode(buf={!r}, offset={!r}, chunk={!r})".format(self._buf, self.offset(), self._chunk)
 296 | 
 297 |     def __str__(self):
 298 |         return "OpenStartElementNode(offset={}, name={}, length={}, token={}, end={}, taglength={}, endtag={})".format(
 299 |             hex(self.offset()),
 300 |             self.tag_name(),
 301 |             hex(self.length()),
 302 |             hex(self.token()),
 303 |             hex(self.offset() + self.length()),
 304 |             hex(self.tag_length()),
 305 |             hex(self.offset() + self.tag_length()),
 306 |         )
 307 | 
 308 |     @memoize
 309 |     def is_empty_node(self):
 310 |         for child in self.children():
 311 |             if type(child) is CloseEmptyElementNode:
 312 |                 return True
 313 |         return False
 314 | 
 315 |     def flags(self):
 316 |         return self.token() >> 4
 317 | 
 318 |     @memoize
 319 |     def tag_name(self):
 320 |         return self._chunk.strings()[self.string_offset()].string()
 321 | 
 322 |     def tag_length(self):
 323 |         return self._tag_length
 324 | 
 325 |     def verify(self):
 326 |         return self.flags() & 0x0B == 0 and self.opcode() & 0x0F == 0x01
 327 | 
 328 |     @memoize
 329 |     def children(self):
 330 |         return self._children(end_tokens=[SYSTEM_TOKENS.CloseElementToken, SYSTEM_TOKENS.CloseEmptyElementToken])
 331 | 
 332 | 
 333 | class CloseStartElementNode(BXmlNode):
 334 |     """
 335 |     The binary XML node for the system token 0x02.
 336 | 
 337 |     This is the "close start element" token.
 338 |     """
 339 | 
 340 |     def __init__(self, buf, offset, chunk, parent):
 341 |         super(CloseStartElementNode, self).__init__(buf, offset, chunk, parent)
 342 |         self.declare_field("byte", "token", 0x0)
 343 | 
 344 |     def __repr__(self):
 345 |         return "CloseStartElementNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 346 |             self._buf, self.offset(), self._chunk, self._parent
 347 |         )
 348 | 
 349 |     def __str__(self):
 350 |         return "CloseStartElementNode(offset={}, length={}, token={})".format(
 351 |             hex(self.offset()), hex(self.length()), hex(self.token())
 352 |         )
 353 | 
 354 |     def flags(self):
 355 |         return self.token() >> 4
 356 | 
 357 |     def tag_length(self):
 358 |         return 1
 359 | 
 360 |     def length(self):
 361 |         return 1
 362 | 
 363 |     def children(self):
 364 |         return []
 365 | 
 366 |     def verify(self):
 367 |         return self.flags() & 0x0F == 0 and self.opcode() & 0x0F == 0x02
 368 | 
 369 | 
 370 | class CloseEmptyElementNode(BXmlNode):
 371 |     """
 372 |     The binary XML node for the system token 0x03.
 373 |     """
 374 | 
 375 |     def __init__(self, buf, offset, chunk, parent):
 376 |         super(CloseEmptyElementNode, self).__init__(buf, offset, chunk, parent)
 377 |         self.declare_field("byte", "token", 0x0)
 378 | 
 379 |     def __repr__(self):
 380 |         return "CloseEmptyElementNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 381 |             self._buf, self.offset(), self._chunk, self._parent
 382 |         )
 383 | 
 384 |     def __str__(self):
 385 |         return "CloseEmptyElementNode(offset={}, length={}, token={})".format(
 386 |             hex(self.offset()), hex(self.length()), hex(0x03)
 387 |         )
 388 | 
 389 |     def flags(self):
 390 |         return self.token() >> 4
 391 | 
 392 |     def tag_length(self):
 393 |         return 1
 394 | 
 395 |     def length(self):
 396 |         return 1
 397 | 
 398 |     def children(self):
 399 |         return []
 400 | 
 401 | 
 402 | class CloseElementNode(BXmlNode):
 403 |     """
 404 |     The binary XML node for the system token 0x04.
 405 | 
 406 |     This is the "close element" token.
 407 |     """
 408 | 
 409 |     def __init__(self, buf, offset, chunk, parent):
 410 |         super(CloseElementNode, self).__init__(buf, offset, chunk, parent)
 411 |         self.declare_field("byte", "token", 0x0)
 412 | 
 413 |     def __repr__(self):
 414 |         return "CloseElementNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 415 |             self._buf, self.offset(), self._chunk, self._parent
 416 |         )
 417 | 
 418 |     def __str__(self):
 419 |         return "CloseElementNode(offset={}, length={}, token={})".format(
 420 |             hex(self.offset()), hex(self.length()), hex(self.token())
 421 |         )
 422 | 
 423 |     def flags(self):
 424 |         return self.token() >> 4
 425 | 
 426 |     def tag_length(self):
 427 |         return 1
 428 | 
 429 |     def length(self):
 430 |         return 1
 431 | 
 432 |     def children(self):
 433 |         return []
 434 | 
 435 |     def verify(self):
 436 |         return self.flags() & 0x0F == 0 and self.opcode() & 0x0F == 0x04
 437 | 
 438 | 
 439 | def get_variant_value(buf, offset, chunk, parent, type_, length=None):
 440 |     """
 441 |     @return A VariantType subclass instance found in the given
 442 |       buffer and offset.
 443 |     """
 444 |     types = {
 445 |         NODE_TYPES.NULL: NullTypeNode,
 446 |         NODE_TYPES.WSTRING: WstringTypeNode,
 447 |         NODE_TYPES.STRING: StringTypeNode,
 448 |         NODE_TYPES.SIGNED_BYTE: SignedByteTypeNode,
 449 |         NODE_TYPES.UNSIGNED_BYTE: UnsignedByteTypeNode,
 450 |         NODE_TYPES.SIGNED_WORD: SignedWordTypeNode,
 451 |         NODE_TYPES.UNSIGNED_WORD: UnsignedWordTypeNode,
 452 |         NODE_TYPES.SIGNED_DWORD: SignedDwordTypeNode,
 453 |         NODE_TYPES.UNSIGNED_DWORD: UnsignedDwordTypeNode,
 454 |         NODE_TYPES.SIGNED_QWORD: SignedQwordTypeNode,
 455 |         NODE_TYPES.UNSIGNED_QWORD: UnsignedQwordTypeNode,
 456 |         NODE_TYPES.FLOAT: FloatTypeNode,
 457 |         NODE_TYPES.DOUBLE: DoubleTypeNode,
 458 |         NODE_TYPES.BOOLEAN: BooleanTypeNode,
 459 |         NODE_TYPES.BINARY: BinaryTypeNode,
 460 |         NODE_TYPES.GUID: GuidTypeNode,
 461 |         NODE_TYPES.SIZE: SizeTypeNode,
 462 |         NODE_TYPES.FILETIME: FiletimeTypeNode,
 463 |         NODE_TYPES.SYSTEMTIME: SystemtimeTypeNode,
 464 |         NODE_TYPES.SID: SIDTypeNode,
 465 |         NODE_TYPES.HEX32: Hex32TypeNode,
 466 |         NODE_TYPES.HEX64: Hex64TypeNode,
 467 |         NODE_TYPES.BXML: BXmlTypeNode,
 468 |         NODE_TYPES.WSTRINGARRAY: WstringArrayTypeNode,
 469 |     }
 470 |     try:
 471 |         TypeClass = types[type_]
 472 |     except IndexError:
 473 |         raise NotImplementedError("Type {} not implemented".format(type_))
 474 |     return TypeClass(buf, offset, chunk, parent, length=length)
 475 | 
 476 | 
 477 | class ValueNode(BXmlNode):
 478 |     """
 479 |     The binary XML node for the system token 0x05.
 480 | 
 481 |     This is the "value" token.
 482 |     """
 483 | 
 484 |     def __init__(self, buf, offset, chunk, parent):
 485 |         super(ValueNode, self).__init__(buf, offset, chunk, parent)
 486 |         self.declare_field("byte", "token", 0x0)
 487 |         self.declare_field("byte", "type")
 488 | 
 489 |     def __repr__(self):
 490 |         return "ValueNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 491 |             self._buf, self.offset(), self._chunk, self._parent
 492 |         )
 493 | 
 494 |     def __str__(self):
 495 |         return "ValueNode(offset={}, length={}, token={}, value={})".format(
 496 |             hex(self.offset()), hex(self.length()), hex(self.token()), self.value().string()
 497 |         )
 498 | 
 499 |     def flags(self):
 500 |         return self.token() >> 4
 501 | 
 502 |     def value(self):
 503 |         return self.children()[0]
 504 | 
 505 |     def tag_length(self):
 506 |         return 2
 507 | 
 508 |     def children(self):
 509 |         child = get_variant_value(self._buf, self.offset() + self.tag_length(), self._chunk, self, self.type())
 510 |         return [child]
 511 | 
 512 |     def verify(self):
 513 |         return self.flags() & 0x0B == 0 and self.token() & 0x0F == SYSTEM_TOKENS.ValueToken
 514 | 
 515 | 
 516 | class AttributeNode(BXmlNode):
 517 |     """
 518 |     The binary XML node for the system token 0x06.
 519 | 
 520 |     This is the "attribute" token.
 521 |     """
 522 | 
 523 |     def __init__(self, buf, offset, chunk, parent):
 524 |         super(AttributeNode, self).__init__(buf, offset, chunk, parent)
 525 |         self.declare_field("byte", "token", 0x0)
 526 |         self.declare_field("dword", "string_offset")
 527 | 
 528 |         self._name_string_length = 0
 529 |         if self.string_offset() > self.offset() - self._chunk._offset:
 530 |             new_string = self._chunk.add_string(self.string_offset(), parent=self)
 531 |             self._name_string_length += new_string.length()
 532 | 
 533 |     def __repr__(self):
 534 |         return "AttributeNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 535 |             self._buf, self.offset(), self._chunk, self._parent
 536 |         )
 537 | 
 538 |     def __str__(self):
 539 |         return "AttributeNode(offset={}, length={}, token={}, name={}, value={})".format(
 540 |             hex(self.offset()), hex(self.length()), hex(self.token()), self.attribute_name(), self.attribute_value()
 541 |         )
 542 | 
 543 |     def flags(self):
 544 |         return self.token() >> 4
 545 | 
 546 |     def attribute_name(self):
 547 |         """
 548 |         @return A NameNode instance that contains the attribute name.
 549 |         """
 550 |         return self._chunk.strings()[self.string_offset()]
 551 | 
 552 |     def attribute_value(self):
 553 |         """
 554 |         @return A BXmlNode instance that is one of (ValueNode,
 555 |           ConditionalSubstitutionNode, NormalSubstitutionNode).
 556 |         """
 557 |         return self.children()[0]
 558 | 
 559 |     def tag_length(self):
 560 |         return 5 + self._name_string_length
 561 | 
 562 |     def verify(self):
 563 |         return self.flags() & 0x0B == 0 and self.opcode() & 0x0F == 0x06
 564 | 
 565 |     @memoize
 566 |     def children(self):
 567 |         return self._children(max_children=1)
 568 | 
 569 | 
 570 | class CDataSectionNode(BXmlNode):
 571 |     """
 572 |     The binary XML node for the system token 0x07.
 573 | 
 574 |     This is the "CDATA section" system token.
 575 |     """
 576 | 
 577 |     def __init__(self, buf, offset, chunk, parent):
 578 |         super(CDataSectionNode, self).__init__(buf, offset, chunk, parent)
 579 |         self.declare_field("byte", "token", 0x0)
 580 |         self.declare_field("word", "string_length")
 581 |         self.declare_field("wstring", "cdata", length=self.string_length() - 2)
 582 | 
 583 |     def __repr__(self):
 584 |         return "CDataSectionNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 585 |             self._buf, self.offset(), self._chunk, self._parent
 586 |         )
 587 | 
 588 |     def __str__(self):
 589 |         return "CDataSectionNode(offset={}, length={}, token={})".format(hex(self.offset()), hex(self.length()), 0x07)
 590 | 
 591 |     def flags(self):
 592 |         return self.token() >> 4
 593 | 
 594 |     def tag_length(self):
 595 |         return 0x3 + self.string_length()
 596 | 
 597 |     def length(self):
 598 |         return self.tag_length()
 599 | 
 600 |     def children(self):
 601 |         return []
 602 | 
 603 |     def verify(self):
 604 |         return self.flags() == 0x0 and self.token() & 0x0F == SYSTEM_TOKENS.CDataSectionToken
 605 | 
 606 | 
 607 | class CharacterReferenceNode(BXmlNode):
 608 |     """
 609 |     The binary XML node for the system token 0x08.
 610 | 
 611 |     This is an character reference node.  That is, something that represents
 612 |       a non-XML character, eg. & --> &#x0038;.
 613 |     """
 614 | 
 615 |     def __init__(self, buf, offset, chunk, parent):
 616 |         super(CharacterReferenceNode, self).__init__(buf, offset, chunk, parent)
 617 |         self.declare_field("byte", "token", 0x0)
 618 |         self.declare_field("word", "entity")
 619 |         self._tag_length = 3
 620 | 
 621 |     def __repr__(self):
 622 |         return "CharacterReferenceNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 623 |             self._buf, self.offset(), self._chunk, self._parent
 624 |         )
 625 | 
 626 |     def __str__(self):
 627 |         return "CharacterReferenceNode(offset={}, length={}, token={})".format(
 628 |             hex(self.offset()), hex(self.length()), hex(0x08)
 629 |         )
 630 | 
 631 |     def entity_reference(self):
 632 |         return "&#x%04x;" % (self.entity())
 633 | 
 634 |     def flags(self):
 635 |         return self.token() >> 4
 636 | 
 637 |     def tag_length(self):
 638 |         return self._tag_length
 639 | 
 640 |     def children(self):
 641 |         return []
 642 | 
 643 | 
 644 | class EntityReferenceNode(BXmlNode):
 645 |     """
 646 |     The binary XML node for the system token 0x09.
 647 | 
 648 |     This is an entity reference node.  That is, something that represents
 649 |       a non-XML character, eg. & --> &amp;.
 650 | 
 651 |     TODO(wb): this is untested.
 652 |     """
 653 | 
 654 |     def __init__(self, buf, offset, chunk, parent):
 655 |         super(EntityReferenceNode, self).__init__(buf, offset, chunk, parent)
 656 |         self.declare_field("byte", "token", 0x0)
 657 |         self.declare_field("dword", "string_offset")
 658 |         self._tag_length = 5
 659 | 
 660 |         if self.string_offset() > self.offset() - self._chunk.offset():
 661 |             new_string = self._chunk.add_string(self.string_offset(), parent=self)
 662 |             self._tag_length += new_string.length()
 663 | 
 664 |     def __repr__(self):
 665 |         return "EntityReferenceNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 666 |             self._buf, self.offset(), self._chunk, self._parent
 667 |         )
 668 | 
 669 |     def __str__(self):
 670 |         return "EntityReferenceNode(offset={}, length={}, token={})".format(
 671 |             hex(self.offset()), hex(self.length()), hex(0x09)
 672 |         )
 673 | 
 674 |     def entity_reference(self):
 675 |         return "&{};".format(self._chunk.strings()[self.string_offset()].string())
 676 | 
 677 |     def flags(self):
 678 |         return self.token() >> 4
 679 | 
 680 |     def tag_length(self):
 681 |         return self._tag_length
 682 | 
 683 |     def children(self):
 684 |         # TODO(wb): it may be possible for this element to have children.
 685 |         return []
 686 | 
 687 | 
 688 | class ProcessingInstructionTargetNode(BXmlNode):
 689 |     """
 690 |     The binary XML node for the system token 0x0A.
 691 | 
 692 |     TODO(wb): untested.
 693 |     """
 694 | 
 695 |     def __init__(self, buf, offset, chunk, parent):
 696 |         super(ProcessingInstructionTargetNode, self).__init__(buf, offset, chunk, parent)
 697 |         self.declare_field("byte", "token", 0x0)
 698 |         self.declare_field("dword", "string_offset")
 699 |         self._tag_length = 5
 700 | 
 701 |         if self.string_offset() > self.offset() - self._chunk.offset():
 702 |             new_string = self._chunk.add_string(self.string_offset(), parent=self)
 703 |             self._tag_length += new_string.length()
 704 | 
 705 |     def __repr__(self):
 706 |         return "ProcessingInstructionTargetNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 707 |             self._buf, self.offset(), self._chunk, self._parent
 708 |         )
 709 | 
 710 |     def __str__(self):
 711 |         return "ProcessingInstructionTargetNode(offset={}, length={}, token={})".format(
 712 |             hex(self.offset()), hex(self.length()), hex(0x0A)
 713 |         )
 714 | 
 715 |     def processing_instruction_target(self):
 716 |         return "<?{}".format(self._chunk.strings()[self.string_offset()].string())
 717 | 
 718 |     def flags(self):
 719 |         return self.token() >> 4
 720 | 
 721 |     def tag_length(self):
 722 |         return self._tag_length
 723 | 
 724 |     def children(self):
 725 |         # TODO(wb): it may be possible for this element to have children.
 726 |         return []
 727 | 
 728 | 
 729 | class ProcessingInstructionDataNode(BXmlNode):
 730 |     """
 731 |     The binary XML node for the system token 0x0B.
 732 | 
 733 |     TODO(wb): untested.
 734 |     """
 735 | 
 736 |     def __init__(self, buf, offset, chunk, parent):
 737 |         super(ProcessingInstructionDataNode, self).__init__(buf, offset, chunk, parent)
 738 |         self.declare_field("byte", "token", 0x0)
 739 |         self.declare_field("word", "string_length")
 740 |         self._tag_length = 3 + (2 * self.string_length())
 741 | 
 742 |         if self.string_length() > 0:
 743 |             self._string = self.unpack_wstring(0x3, self.string_length())
 744 |         else:
 745 |             self._string = ""
 746 | 
 747 |     def __repr__(self):
 748 |         return "ProcessingInstructionDataNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 749 |             self._buf, self.offset(), self._chunk, self._parent
 750 |         )
 751 | 
 752 |     def __str__(self):
 753 |         return "ProcessingInstructionDataNode(offset={}, length={}, token={})".format(
 754 |             hex(self.offset()), hex(self.length()), hex(0x0B)
 755 |         )
 756 | 
 757 |     def flags(self):
 758 |         return self.token() >> 4
 759 | 
 760 |     def string(self):
 761 |         if self.string_length() > 0:
 762 |             return " {}?>".format(self._string)
 763 |         else:
 764 |             return "?>"
 765 | 
 766 |     def tag_length(self):
 767 |         return self._tag_length
 768 | 
 769 |     def children(self):
 770 |         # TODO(wb): it may be possible for this element to have children.
 771 |         return []
 772 | 
 773 | 
 774 | class TemplateInstanceNode(BXmlNode):
 775 |     """
 776 |     The binary XML node for the system token 0x0C.
 777 |     """
 778 | 
 779 |     def __init__(self, buf, offset, chunk, parent):
 780 |         super(TemplateInstanceNode, self).__init__(buf, offset, chunk, parent)
 781 |         self.declare_field("byte", "token", 0x0)
 782 |         self.declare_field("byte", "unknown0")
 783 |         self.declare_field("dword", "template_id")
 784 |         self.declare_field("dword", "template_offset")
 785 | 
 786 |         self._data_length = 0
 787 | 
 788 |         if self.is_resident_template():
 789 |             new_template = self._chunk.add_template(self.template_offset(), parent=self)
 790 |             self._data_length += new_template.length()
 791 | 
 792 |     def __repr__(self):
 793 |         return "TemplateInstanceNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 794 |             self._buf, self.offset(), self._chunk, self._parent
 795 |         )
 796 | 
 797 |     def __str__(self):
 798 |         return "TemplateInstanceNode(offset={}, length={}, token={})".format(
 799 |             hex(self.offset()), hex(self.length()), hex(0x0C)
 800 |         )
 801 | 
 802 |     def flags(self):
 803 |         return self.token() >> 4
 804 | 
 805 |     def is_resident_template(self):
 806 |         return self.template_offset() > self.offset() - self._chunk._offset
 807 | 
 808 |     def tag_length(self):
 809 |         return 10
 810 | 
 811 |     def length(self):
 812 |         return self.tag_length() + self._data_length
 813 | 
 814 |     def template(self):
 815 |         return self._chunk.templates()[self.template_offset()]
 816 | 
 817 |     def children(self):
 818 |         return []
 819 | 
 820 |     @memoize
 821 |     def find_end_of_stream(self):
 822 |         return self.template().find_end_of_stream()
 823 | 
 824 | 
 825 | class NormalSubstitutionNode(BXmlNode):
 826 |     """
 827 |     The binary XML node for the system token 0x0D.
 828 | 
 829 |     This is a "normal substitution" token.
 830 |     """
 831 | 
 832 |     def __init__(self, buf, offset, chunk, parent):
 833 |         super(NormalSubstitutionNode, self).__init__(buf, offset, chunk, parent)
 834 |         self.declare_field("byte", "token", 0x0)
 835 |         self.declare_field("word", "index")
 836 |         self.declare_field("byte", "type")
 837 | 
 838 |     def __repr__(self):
 839 |         return "NormalSubstitutionNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 840 |             self._buf, self.offset(), self._chunk, self._parent
 841 |         )
 842 | 
 843 |     def __str__(self):
 844 |         return "NormalSubstitutionNode(offset={}, length={}, token={}, index={}, type={})".format(
 845 |             hex(self.offset()), hex(self.length()), hex(self.token()), self.index(), self.type()
 846 |         )
 847 | 
 848 |     def flags(self):
 849 |         return self.token() >> 4
 850 | 
 851 |     def tag_length(self):
 852 |         return 0x4
 853 | 
 854 |     def length(self):
 855 |         return self.tag_length()
 856 | 
 857 |     def children(self):
 858 |         return []
 859 | 
 860 |     def verify(self):
 861 |         return self.flags() == 0 and self.token() & 0x0F == SYSTEM_TOKENS.NormalSubstitutionToken
 862 | 
 863 | 
 864 | class ConditionalSubstitutionNode(BXmlNode):
 865 |     """
 866 |     The binary XML node for the system token 0x0E.
 867 |     """
 868 | 
 869 |     def __init__(self, buf, offset, chunk, parent):
 870 |         super(ConditionalSubstitutionNode, self).__init__(buf, offset, chunk, parent)
 871 |         self.declare_field("byte", "token", 0x0)
 872 |         self.declare_field("word", "index")
 873 |         self.declare_field("byte", "type")
 874 | 
 875 |     def __repr__(self):
 876 |         return "ConditionalSubstitutionNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 877 |             self._buf, self.offset(), self._chunk, self._parent
 878 |         )
 879 | 
 880 |     def __str__(self):
 881 |         return "ConditionalSubstitutionNode(offset={}, length={}, token={})".format(
 882 |             hex(self.offset()), hex(self.length()), hex(0x0E)
 883 |         )
 884 | 
 885 |     def should_suppress(self, substitutions):
 886 |         sub = substitutions[self.index()]
 887 |         return type(sub) is NullTypeNode
 888 | 
 889 |     def flags(self):
 890 |         return self.token() >> 4
 891 | 
 892 |     def tag_length(self):
 893 |         return 0x4
 894 | 
 895 |     def length(self):
 896 |         return self.tag_length()
 897 | 
 898 |     def children(self):
 899 |         return []
 900 | 
 901 |     def verify(self):
 902 |         return self.flags() == 0 and self.token() & 0x0F == SYSTEM_TOKENS.ConditionalSubstitutionToken
 903 | 
 904 | 
 905 | class StreamStartNode(BXmlNode):
 906 |     """
 907 |     The binary XML node for the system token 0x0F.
 908 | 
 909 |     This is the "start of stream" token.
 910 |     """
 911 | 
 912 |     def __init__(self, buf, offset, chunk, parent):
 913 |         super(StreamStartNode, self).__init__(buf, offset, chunk, parent)
 914 |         self.declare_field("byte", "token", 0x0)
 915 |         self.declare_field("byte", "unknown0")
 916 |         self.declare_field("word", "unknown1")
 917 | 
 918 |     def __repr__(self):
 919 |         return "StreamStartNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 920 |             self._buf, self.offset(), self._chunk, self._parent
 921 |         )
 922 | 
 923 |     def __str__(self):
 924 |         return "StreamStartNode(offset={}, length={}, token={})".format(
 925 |             hex(self.offset()), hex(self.length()), hex(self.token())
 926 |         )
 927 | 
 928 |     def verify(self):
 929 |         return (
 930 |             self.flags() == 0x0
 931 |             and self.token() & 0x0F == SYSTEM_TOKENS.StartOfStreamToken
 932 |             and self.unknown0() == 0x1
 933 |             and self.unknown1() == 0x1
 934 |         )
 935 | 
 936 |     def flags(self):
 937 |         return self.token() >> 4
 938 | 
 939 |     def tag_length(self):
 940 |         return 4
 941 | 
 942 |     def length(self):
 943 |         return self.tag_length() + 0
 944 | 
 945 |     def children(self):
 946 |         return []
 947 | 
 948 | 
 949 | class RootNode(BXmlNode):
 950 |     """
 951 |     The binary XML node for the Root node.
 952 |     """
 953 | 
 954 |     def __init__(self, buf, offset, chunk, parent):
 955 |         super(RootNode, self).__init__(buf, offset, chunk, parent)
 956 | 
 957 |     def __repr__(self):
 958 |         return "RootNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format(
 959 |             self._buf, self.offset(), self._chunk, self._parent
 960 |         )
 961 | 
 962 |     def __str__(self):
 963 |         return "RootNode(offset={}, length={})".format(hex(self.offset()), hex(self.length()))
 964 | 
 965 |     def tag_length(self):
 966 |         return 0
 967 | 
 968 |     @memoize
 969 |     def children(self):
 970 |         """
 971 |         @return The template instances which make up this node.
 972 |         """
 973 |         return self._children(end_tokens=[SYSTEM_TOKENS.EndOfStreamToken])
 974 | 
 975 |     def tag_and_children_length(self):
 976 |         """
 977 |         @return The length of the tag of this element, and the children.
 978 |           This does not take into account the substitutions that may be
 979 |           at the end of this element.
 980 |         """
 981 |         children_length = 0
 982 | 
 983 |         for child in self.children():
 984 |             children_length += child.length()
 985 | 
 986 |         return self.tag_length() + children_length
 987 | 
 988 |     def template_instance(self):
 989 |         """
 990 |         parse the template instance node.
 991 |         this is used to compute the location of the template definition structure.
 992 | 
 993 |         Returns:
 994 |           TemplateInstanceNode: the template instance.
 995 |         """
 996 |         ofs = self.offset()
 997 |         if self.unpack_byte(0x0) & 0x0F == 0xF:
 998 |             ofs += 4
 999 |         return TemplateInstanceNode(self._buf, ofs, self._chunk, self)
1000 | 
1001 |     def template(self):
1002 |         """
1003 |         parse the template referenced by this root node.
1004 |         note, this template structure is not guaranteed to be located within the root node's boundaries.
1005 | 
1006 |         Returns:
1007 |           TemplateNode: the template.
1008 |         """
1009 |         instance = self.template_instance()
1010 |         offset = self._chunk.offset() + instance.template_offset()
1011 |         node = TemplateNode(self._buf, offset, self._chunk, instance)
1012 |         return node
1013 | 
1014 |     @memoize
1015 |     def substitutions(self):
1016 |         """
1017 |         @return A list of VariantTypeNode subclass instances that
1018 |           contain the substitutions for this root node.
1019 |         """
1020 |         sub_decl = []
1021 |         sub_def = []
1022 |         ofs = self.tag_and_children_length()
1023 |         sub_count = self.unpack_dword(ofs)
1024 |         ofs += 4
1025 |         for _ in range(sub_count):
1026 |             size = self.unpack_word(ofs)
1027 |             type_ = self.unpack_byte(ofs + 0x2)
1028 |             sub_decl.append((size, type_))
1029 |             ofs += 4
1030 |         for size, type_ in sub_decl:
1031 |             val = get_variant_value(self._buf, self.offset() + ofs, self._chunk, self, type_, length=size)
1032 |             if abs(size - val.length()) > 4:
1033 |                 # TODO(wb): This is a hack, so I'm sorry.
1034 |                 #   But, we are not passing around a 'length' field,
1035 |                 #   so we have to depend on the structure of each
1036 |                 #   variant type.  It seems some BXmlTypeNode sizes
1037 |                 #   are not exact.  Hopefully, this is just alignment.
1038 |                 #   So, that's what we compensate for here.
1039 |                 raise ParseException("Invalid substitution value size")
1040 |             sub_def.append(val)
1041 |             ofs += size
1042 |         return sub_def
1043 | 
1044 |     @memoize
1045 |     def length(self):
1046 |         ofs = self.tag_and_children_length()
1047 |         sub_count = self.unpack_dword(ofs)
1048 |         ofs += 4
1049 |         ret = ofs
1050 |         for _ in range(sub_count):
1051 |             size = self.unpack_word(ofs)
1052 |             ret += size + 4
1053 |             ofs += 4
1054 |         return ret
1055 | 
1056 | 
1057 | class VariantTypeNode(BXmlNode):
1058 |     """ """
1059 | 
1060 |     def __init__(self, buf, offset, chunk, parent, length=None):
1061 |         super(VariantTypeNode, self).__init__(buf, offset, chunk, parent)
1062 |         self._length = length
1063 | 
1064 |     def __repr__(self):
1065 |         return "{}(buf={!r}, offset={}, chunk={!r})".format(
1066 |             self.__class__.__name__, self._buf, hex(self.offset()), self._chunk
1067 |         )
1068 | 
1069 |     def __str__(self):
1070 |         return "{}(offset={}, length={}, string={})".format(
1071 |             self.__class__.__name__, hex(self.offset()), hex(self.length()), self.string()
1072 |         )
1073 | 
1074 |     def tag_length(self):
1075 |         raise NotImplementedError("tag_length not implemented for {!r}".format(self))
1076 | 
1077 |     def length(self):
1078 |         return self.tag_length()
1079 | 
1080 |     def children(self):
1081 |         return []
1082 | 
1083 |     def string(self):
1084 |         raise NotImplementedError("string not implemented for {!r}".format(self))
1085 | 
1086 | 
1087 | # but satisfies the contract of VariantTypeNode, BXmlNode, but not Block
1088 | class NullTypeNode(object):
1089 |     """
1090 |     Variant type 0x00.
1091 |     """
1092 | 
1093 |     def __init__(self, buf, offset, chunk, parent, length=None):
1094 |         super(NullTypeNode, self).__init__()
1095 |         self._offset = offset
1096 |         self._length = length
1097 | 
1098 |     def __str__(self):
1099 |         return "NullTypeNode"
1100 | 
1101 |     def string(self):
1102 |         return ""
1103 | 
1104 |     def length(self):
1105 |         return self._length or 0
1106 | 
1107 |     def tag_length(self):
1108 |         return self._length or 0
1109 | 
1110 |     def children(self):
1111 |         return []
1112 | 
1113 |     def offset(self):
1114 |         return self._offset
1115 | 
1116 | 
1117 | class WstringTypeNode(VariantTypeNode):
1118 |     """
1119 |     Variant ttype 0x01.
1120 |     """
1121 | 
1122 |     def __init__(self, buf, offset, chunk, parent, length=None):
1123 |         super(WstringTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1124 |         if self._length is None:
1125 |             self.declare_field("word", "string_length", 0x0)
1126 |             self.declare_field("wstring", "_string", length=(self.string_length()))
1127 |         else:
1128 |             self.declare_field("wstring", "_string", 0x0, length=(self._length // 2))
1129 | 
1130 |     def tag_length(self):
1131 |         if self._length is None:
1132 |             return 2 + (self.string_length() * 2)
1133 |         return self._length
1134 | 
1135 |     def string(self):
1136 |         return self._string().rstrip("\x00")
1137 | 
1138 | 
1139 | class StringTypeNode(VariantTypeNode):
1140 |     """
1141 |     Variant type 0x02.
1142 |     """
1143 | 
1144 |     def __init__(self, buf, offset, chunk, parent, length=None):
1145 |         super(StringTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1146 |         if self._length is None:
1147 |             self.declare_field("word", "string_length", 0x0)
1148 |             self.declare_field("string", "_string", length=(self.string_length()))
1149 |         else:
1150 |             self.declare_field("string", "_string", 0x0, length=self._length)
1151 | 
1152 |     def tag_length(self):
1153 |         if self._length is None:
1154 |             return 2 + (self.string_length())
1155 |         return self._length
1156 | 
1157 |     def string(self):
1158 |         return self._string().rstrip("\x00")
1159 | 
1160 | 
1161 | class SignedByteTypeNode(VariantTypeNode):
1162 |     """
1163 |     Variant type 0x03.
1164 |     """
1165 | 
1166 |     def __init__(self, buf, offset, chunk, parent, length=None):
1167 |         super(SignedByteTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1168 |         self.declare_field("int8", "byte", 0x0)
1169 | 
1170 |     def tag_length(self):
1171 |         return 1
1172 | 
1173 |     def string(self):
1174 |         return str(self.byte())
1175 | 
1176 | 
1177 | class UnsignedByteTypeNode(VariantTypeNode):
1178 |     """
1179 |     Variant type 0x04.
1180 |     """
1181 | 
1182 |     def __init__(self, buf, offset, chunk, parent, length=None):
1183 |         super(UnsignedByteTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1184 |         self.declare_field("byte", "byte", 0x0)
1185 | 
1186 |     def tag_length(self):
1187 |         return 1
1188 | 
1189 |     def string(self):
1190 |         return str(self.byte())
1191 | 
1192 | 
1193 | class SignedWordTypeNode(VariantTypeNode):
1194 |     """
1195 |     Variant type 0x05.
1196 |     """
1197 | 
1198 |     def __init__(self, buf, offset, chunk, parent, length=None):
1199 |         super(SignedWordTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1200 |         self.declare_field("int16", "word", 0x0)
1201 | 
1202 |     def tag_length(self):
1203 |         return 2
1204 | 
1205 |     def string(self):
1206 |         return str(self.word())
1207 | 
1208 | 
1209 | class UnsignedWordTypeNode(VariantTypeNode):
1210 |     """
1211 |     Variant type 0x06.
1212 |     """
1213 | 
1214 |     def __init__(self, buf, offset, chunk, parent, length=None):
1215 |         super(UnsignedWordTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1216 |         self.declare_field("word", "word", 0x0)
1217 | 
1218 |     def tag_length(self):
1219 |         return 2
1220 | 
1221 |     def string(self):
1222 |         return str(self.word())
1223 | 
1224 | 
1225 | class SignedDwordTypeNode(VariantTypeNode):
1226 |     """
1227 |     Variant type 0x07.
1228 |     """
1229 | 
1230 |     def __init__(self, buf, offset, chunk, parent, length=None):
1231 |         super(SignedDwordTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1232 |         self.declare_field("int32", "dword", 0x0)
1233 | 
1234 |     def tag_length(self):
1235 |         return 4
1236 | 
1237 |     def string(self):
1238 |         return str(self.dword())
1239 | 
1240 | 
1241 | class UnsignedDwordTypeNode(VariantTypeNode):
1242 |     """
1243 |     Variant type 0x08.
1244 |     """
1245 | 
1246 |     def __init__(self, buf, offset, chunk, parent, length=None):
1247 |         super(UnsignedDwordTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1248 |         self.declare_field("dword", "dword", 0x0)
1249 | 
1250 |     def tag_length(self):
1251 |         return 4
1252 | 
1253 |     def string(self):
1254 |         return str(self.dword())
1255 | 
1256 | 
1257 | class SignedQwordTypeNode(VariantTypeNode):
1258 |     """
1259 |     Variant type 0x09.
1260 |     """
1261 | 
1262 |     def __init__(self, buf, offset, chunk, parent, length=None):
1263 |         super(SignedQwordTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1264 |         self.declare_field("int64", "qword", 0x0)
1265 | 
1266 |     def tag_length(self):
1267 |         return 8
1268 | 
1269 |     def string(self):
1270 |         return str(self.qword())
1271 | 
1272 | 
1273 | class UnsignedQwordTypeNode(VariantTypeNode):
1274 |     """
1275 |     Variant type 0x0A.
1276 |     """
1277 | 
1278 |     def __init__(self, buf, offset, chunk, parent, length=None):
1279 |         super(UnsignedQwordTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1280 |         self.declare_field("qword", "qword", 0x0)
1281 | 
1282 |     def tag_length(self):
1283 |         return 8
1284 | 
1285 |     def string(self):
1286 |         return str(self.qword())
1287 | 
1288 | 
1289 | class FloatTypeNode(VariantTypeNode):
1290 |     """
1291 |     Variant type 0x0B.
1292 |     """
1293 | 
1294 |     def __init__(self, buf, offset, chunk, parent, length=None):
1295 |         super(FloatTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1296 |         self.declare_field("float", "float", 0x0)
1297 | 
1298 |     def tag_length(self):
1299 |         return 4
1300 | 
1301 |     def string(self):
1302 |         return str(self.float())
1303 | 
1304 | 
1305 | class DoubleTypeNode(VariantTypeNode):
1306 |     """
1307 |     Variant type 0x0C.
1308 |     """
1309 | 
1310 |     def __init__(self, buf, offset, chunk, parent, length=None):
1311 |         super(DoubleTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1312 |         self.declare_field("double", "double", 0x0)
1313 | 
1314 |     def tag_length(self):
1315 |         return 8
1316 | 
1317 |     def string(self):
1318 |         return str(self.double())
1319 | 
1320 | 
1321 | class BooleanTypeNode(VariantTypeNode):
1322 |     """
1323 |     Variant type 0x0D.
1324 |     """
1325 | 
1326 |     def __init__(self, buf, offset, chunk, parent, length=None):
1327 |         super(BooleanTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1328 |         self.declare_field("int32", "int32", 0x0)
1329 | 
1330 |     def tag_length(self):
1331 |         return 4
1332 | 
1333 |     def string(self):
1334 |         if self.int32() > 0:
1335 |             return "True"
1336 |         return "False"
1337 | 
1338 | 
1339 | class BinaryTypeNode(VariantTypeNode):
1340 |     """
1341 |     Variant type 0x0E.
1342 | 
1343 |     String/XML representation is Base64 encoded.
1344 |     """
1345 | 
1346 |     def __init__(self, buf, offset, chunk, parent, length=None):
1347 |         super(BinaryTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1348 |         if self._length is None:
1349 |             self.declare_field("dword", "size", 0x0)
1350 |             self.declare_field("binary", "binary", length=self.size())
1351 |         else:
1352 |             self.declare_field("binary", "binary", 0x0, length=self._length)
1353 | 
1354 |     def tag_length(self):
1355 |         if self._length is None:
1356 |             return 4 + self.size()
1357 |         return self._length
1358 | 
1359 |     def string(self):
1360 |         return base64.b64encode(self.binary()).decode("ascii")
1361 | 
1362 | 
1363 | class GuidTypeNode(VariantTypeNode):
1364 |     """
1365 |     Variant type 0x0F.
1366 |     """
1367 | 
1368 |     def __init__(self, buf, offset, chunk, parent, length=None):
1369 |         super(GuidTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1370 |         self.declare_field("guid", "guid", 0x0)
1371 | 
1372 |     def tag_length(self):
1373 |         return 16
1374 | 
1375 |     def string(self):
1376 |         return "{" + self.guid() + "}"
1377 | 
1378 | 
1379 | class SizeTypeNode(VariantTypeNode):
1380 |     """
1381 |     Variant type 0x10.
1382 | 
1383 |     Note: Assuming sizeof(size_t) == 0x8.
1384 |     """
1385 | 
1386 |     def __init__(self, buf, offset, chunk, parent, length=None):
1387 |         super(SizeTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1388 |         if self._length == 0x4:
1389 |             self.declare_field("dword", "num", 0x0)
1390 |         elif self._length == 0x8:
1391 |             self.declare_field("qword", "num", 0x0)
1392 |         else:
1393 |             self.declare_field("qword", "num", 0x0)
1394 | 
1395 |     def tag_length(self):
1396 |         if self._length is None:
1397 |             return 8
1398 |         return self._length
1399 | 
1400 |     def string(self):
1401 |         return str(self.num())
1402 | 
1403 | 
1404 | class FiletimeTypeNode(VariantTypeNode):
1405 |     """
1406 |     Variant type 0x11.
1407 |     """
1408 | 
1409 |     def __init__(self, buf, offset, chunk, parent, length=None):
1410 |         super(FiletimeTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1411 |         self.declare_field("filetime", "filetime", 0x0)
1412 | 
1413 |     def string(self):
1414 |         return self.filetime().isoformat(" ")
1415 | 
1416 |     def tag_length(self):
1417 |         return 8
1418 | 
1419 | 
1420 | class SystemtimeTypeNode(VariantTypeNode):
1421 |     """
1422 |     Variant type 0x12.
1423 |     """
1424 | 
1425 |     def __init__(self, buf, offset, chunk, parent, length=None):
1426 |         super(SystemtimeTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1427 |         self.declare_field("systemtime", "systemtime", 0x0)
1428 | 
1429 |     def tag_length(self):
1430 |         return 16
1431 | 
1432 |     def string(self):
1433 |         return self.systemtime().isoformat(" ")
1434 | 
1435 | 
1436 | class SIDTypeNode(VariantTypeNode):
1437 |     """
1438 |     Variant type 0x13.
1439 |     """
1440 | 
1441 |     def __init__(self, buf, offset, chunk, parent, length=None):
1442 |         super(SIDTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1443 |         self.declare_field("byte", "version", 0x0)
1444 |         self.declare_field("byte", "num_elements")
1445 |         self.declare_field("dword_be", "id_high")
1446 |         self.declare_field("word_be", "id_low")
1447 | 
1448 |     @memoize
1449 |     def elements(self):
1450 |         ret = []
1451 |         for i in range(self.num_elements()):
1452 |             ret.append(self.unpack_dword(self.current_field_offset() + 4 * i))
1453 |         return ret
1454 | 
1455 |     @memoize
1456 |     def id(self):
1457 |         ret = "S-{}-{}".format(self.version(), (self.id_high() << 16) ^ self.id_low())
1458 |         for elem in self.elements():
1459 |             ret += "-{}".format(elem)
1460 |         return ret
1461 | 
1462 |     def tag_length(self):
1463 |         return 8 + 4 * self.num_elements()
1464 | 
1465 |     def string(self):
1466 |         return self.id()
1467 | 
1468 | 
1469 | class Hex32TypeNode(VariantTypeNode):
1470 |     """
1471 |     Variant type 0x14.
1472 |     """
1473 | 
1474 |     def __init__(self, buf, offset, chunk, parent, length=None):
1475 |         super(Hex32TypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1476 |         self.declare_field("binary", "hex", 0x0, length=0x4)
1477 | 
1478 |     def tag_length(self):
1479 |         return 4
1480 | 
1481 |     def string(self):
1482 |         ret = "0x"
1483 |         b = self.hex()[::-1]
1484 |         for i in range(len(b)):
1485 |             ret += "{:02x}".format(b[i])
1486 |         return ret
1487 | 
1488 | 
1489 | class Hex64TypeNode(VariantTypeNode):
1490 |     """
1491 |     Variant type 0x15.
1492 |     """
1493 | 
1494 |     def __init__(self, buf, offset, chunk, parent, length=None):
1495 |         super(Hex64TypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1496 |         self.declare_field("binary", "hex", 0x0, length=0x8)
1497 | 
1498 |     def tag_length(self):
1499 |         return 8
1500 | 
1501 |     def string(self):
1502 |         ret = "0x"
1503 |         b = self.hex()[::-1]
1504 |         for i in range(len(b)):
1505 |             ret += "{:02x}".format(b[i])
1506 |         return ret
1507 | 
1508 | 
1509 | class BXmlTypeNode(VariantTypeNode):
1510 |     """
1511 |     Variant type 0x21.
1512 |     """
1513 | 
1514 |     def __init__(self, buf, offset, chunk, parent, length=None):
1515 |         super(BXmlTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1516 |         self._root = RootNode(buf, offset, chunk, self)
1517 | 
1518 |     def tag_length(self):
1519 |         return self._length or self._root.length()
1520 | 
1521 |     def string(self):
1522 |         return ""
1523 | 
1524 |     def root(self):
1525 |         return self._root
1526 | 
1527 | 
1528 | class WstringArrayTypeNode(VariantTypeNode):
1529 |     """
1530 |     Variant ttype 0x81.
1531 |     """
1532 | 
1533 |     def __init__(self, buf, offset, chunk, parent, length=None):
1534 |         super(WstringArrayTypeNode, self).__init__(buf, offset, chunk, parent, length=length)
1535 |         if self._length is None:
1536 |             self.declare_field("word", "binary_length", 0x0)
1537 |             self.declare_field("binary", "binary", length=(self.binary_length()))
1538 |         else:
1539 |             self.declare_field("binary", "binary", 0x0, length=(self._length))
1540 | 
1541 |     def tag_length(self):
1542 |         if self._length is None:
1543 |             return 2 + self.binary_length()
1544 |         return self._length
1545 | 
1546 |     def string(self):
1547 |         binary = self.binary()
1548 |         acc = []
1549 |         while len(binary) > 0:
1550 |             match = re.search(b"((?:[^\x00].)+)", binary)
1551 |             if match:
1552 |                 frag = match.group()
1553 |                 acc.append("<string>")
1554 |                 acc.append(frag.decode("utf16"))
1555 |                 acc.append("</string>\n")
1556 |                 binary = binary[len(frag) + 2 :]
1557 |                 if len(binary) == 0:
1558 |                     break
1559 |             frag = re.search(b"(\x00*)", binary).group()
1560 |             if len(frag) % 2 == 0:
1561 |                 for _ in range(len(frag) // 2):
1562 |                     acc.append("<string></string>\n")
1563 |             else:
1564 |                 raise ParseException("Error parsing uneven substring of NULLs")
1565 |             binary = binary[len(frag) :]
1566 |         return "".join(acc)
1567 | 
1568 | 
1569 | node_dispatch_table = [
1570 |     EndOfStreamNode,
1571 |     OpenStartElementNode,
1572 |     CloseStartElementNode,
1573 |     CloseEmptyElementNode,
1574 |     CloseElementNode,
1575 |     ValueNode,
1576 |     AttributeNode,
1577 |     CDataSectionNode,
1578 |     CharacterReferenceNode,
1579 |     EntityReferenceNode,
1580 |     ProcessingInstructionTargetNode,
1581 |     ProcessingInstructionDataNode,
1582 |     TemplateInstanceNode,
1583 |     NormalSubstitutionNode,
1584 |     ConditionalSubstitutionNode,
1585 |     StreamStartNode,
1586 | ]
1587 | 
1588 | node_readable_tokens = [
1589 |     "End of Stream",
1590 |     "Open Start Element",
1591 |     "Close Start Element",
1592 |     "Close Empty Element",
1593 |     "Close Element",
1594 |     "Value",
1595 |     "Attribute",
1596 |     "unknown",
1597 |     "unknown",
1598 |     "unknown",
1599 |     "unknown",
1600 |     "unknown",
1601 |     "TemplateInstanceNode",
1602 |     "Normal Substitution",
1603 |     "Conditional Substitution",
1604 |     "Start of Stream",
1605 | ]
1606 | 


--------------------------------------------------------------------------------
/Evtx/Views.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | #    This file is part of python-evtx.
  3 | #
  4 | #   Copyright 2012, 2013 Willi Ballenthin <william.ballenthin@mandiant.com>
  5 | #                    while at Mandiant <http://www.mandiant.com>
  6 | #
  7 | #   Licensed under the Apache License, Version 2.0 (the "License");
  8 | #   you may not use this file except in compliance with the License.
  9 | #   You may obtain a copy of the License at
 10 | #
 11 | #       http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | #   Unless required by applicable law or agreed to in writing, software
 14 | #   distributed under the License is distributed on an "AS IS" BASIS,
 15 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | #   See the License for the specific language governing permissions and
 17 | #   limitations under the License.
 18 | from __future__ import absolute_import
 19 | 
 20 | import re
 21 | import xml.sax.saxutils
 22 | 
 23 | import Evtx.Nodes as e_nodes
 24 | 
 25 | XML_HEADER = '<?xml version="1.1" encoding="utf-8" standalone="yes" ?>\n'
 26 | 
 27 | 
 28 | class UnexpectedElementException(Exception):
 29 |     def __init__(self, msg):
 30 |         super(UnexpectedElementException, self).__init__(msg)
 31 | 
 32 | 
 33 | # ref: https://www.w3.org/TR/xml11/#charsets
 34 | RESTRICTED_CHARS = re.compile("[\x01-\x08\x0b\x0c\x0e-\x1f\x7f-\x84\x86-\x9f]")
 35 | 
 36 | 
 37 | def escape_attr(s):
 38 |     """
 39 |     escape the given string such that it can be placed in an XML attribute, like:
 40 | 
 41 |         <foo bar='$value'>
 42 | 
 43 |     Args:
 44 |       s (str): the string to escape.
 45 | 
 46 |     Returns:
 47 |       str: the escaped string.
 48 |     """
 49 |     esc = xml.sax.saxutils.quoteattr(s)
 50 |     esc = esc.encode("ascii", "xmlcharrefreplace").decode("ascii")
 51 |     esc = RESTRICTED_CHARS.sub("", esc)
 52 |     return esc
 53 | 
 54 | 
 55 | def escape_value(s):
 56 |     """
 57 |     escape the given string such that it can be placed in an XML value location, like:
 58 | 
 59 |         <foo>
 60 |           $value
 61 |         </foo>
 62 | 
 63 |     Args:
 64 |       s (str): the string to escape.
 65 | 
 66 |     Returns:
 67 |       str: the escaped string.
 68 |     """
 69 |     esc = xml.sax.saxutils.escape(s)
 70 |     esc = esc.encode("ascii", "xmlcharrefreplace").decode("ascii")
 71 |     esc = RESTRICTED_CHARS.sub("", esc)
 72 |     return esc
 73 | 
 74 | 
 75 | # ref: https://www.w3.org/TR/xml/#NT-NameStartChar
 76 | # but we are going to require a even stricter subset.
 77 | NAME_PATTERN = re.compile(r"[a-zA-Z_][a-zA-Z_\-]*")
 78 | 
 79 | 
 80 | def validate_name(s):
 81 |     """
 82 |     ensure the given name can be used as an XML entity name, such as tag or attribute name.
 83 | 
 84 |     Args:
 85 |       s (str): the string to validate.
 86 | 
 87 |     Raises:
 88 |       RuntimeError: if the string is not suitable to be an XML name.
 89 |     """
 90 |     if not NAME_PATTERN.match(s):
 91 |         raise RuntimeError("invalid xml name: %s" % (s))
 92 |     return s
 93 | 
 94 | 
 95 | def render_root_node_with_subs(root_node, subs):
 96 |     """
 97 |     render the given root node using the given substitutions into XML.
 98 | 
 99 |     Args:
100 |       root_node (e_nodes.RootNode): the node to render.
101 |       subs (list[str]): the substitutions that maybe included in the XML.
102 | 
103 |     Returns:
104 |       str: the rendered XML document.
105 |     """
106 | 
107 |     def rec(node, acc):
108 |         if isinstance(node, e_nodes.EndOfStreamNode):
109 |             pass  # intended
110 |         elif isinstance(node, e_nodes.OpenStartElementNode):
111 |             acc.append("<")
112 |             acc.append(node.tag_name())
113 |             for child in node.children():
114 |                 if isinstance(child, e_nodes.AttributeNode):
115 |                     acc.append(" ")
116 |                     acc.append(validate_name(child.attribute_name().string()))
117 |                     acc.append('="')
118 |                     # TODO: should use xml.sax.saxutils.quoteattr here
119 |                     # but to do so, we'd need to ensure we're not double-quoting this value.
120 |                     rec(child.attribute_value(), acc)
121 |                     acc.append('"')
122 |             acc.append(">")
123 |             for child in node.children():
124 |                 rec(child, acc)
125 |             acc.append("</")
126 |             acc.append(validate_name(node.tag_name()))
127 |             acc.append(">\n")
128 |         elif isinstance(node, e_nodes.CloseStartElementNode):
129 |             pass  # intended
130 |         elif isinstance(node, e_nodes.CloseEmptyElementNode):
131 |             pass  # intended
132 |         elif isinstance(node, e_nodes.CloseElementNode):
133 |             pass  # intended
134 |         elif isinstance(node, e_nodes.ValueNode):
135 |             acc.append(escape_value(node.children()[0].string()))
136 |         elif isinstance(node, e_nodes.AttributeNode):
137 |             pass  # intended
138 |         elif isinstance(node, e_nodes.CDataSectionNode):
139 |             acc.append("<![CDATA[")
140 |             # TODO: is this correct escaping???
141 |             acc.append(escape_value(node.cdata()))
142 |             acc.append("]]>")
143 |         elif isinstance(node, e_nodes.EntityReferenceNode):
144 |             acc.append(escape_value(node.entity_reference()))
145 |         elif isinstance(node, e_nodes.ProcessingInstructionTargetNode):
146 |             acc.append(escape_value(node.processing_instruction_target()))
147 |         elif isinstance(node, e_nodes.ProcessingInstructionDataNode):
148 |             acc.append(escape_value(node.string()))
149 |         elif isinstance(node, e_nodes.TemplateInstanceNode):
150 |             raise UnexpectedElementException("TemplateInstanceNode")
151 |         elif isinstance(node, e_nodes.NormalSubstitutionNode):
152 |             sub = subs[node.index()]
153 | 
154 |             if isinstance(sub, e_nodes.BXmlTypeNode):
155 |                 sub = render_root_node(sub.root())
156 |             else:
157 |                 sub = escape_value(sub.string())
158 | 
159 |             acc.append(sub)
160 |         elif isinstance(node, e_nodes.ConditionalSubstitutionNode):
161 |             sub = subs[node.index()]
162 | 
163 |             if isinstance(sub, e_nodes.BXmlTypeNode):
164 |                 sub = render_root_node(sub.root())
165 |             else:
166 |                 sub = escape_value(sub.string())
167 | 
168 |             acc.append(sub)
169 |         elif isinstance(node, e_nodes.StreamStartNode):
170 |             pass  # intended
171 | 
172 |     acc = []
173 |     for c in root_node.template().children():
174 |         rec(c, acc)
175 |     return "".join(acc)
176 | 
177 | 
178 | def render_root_node(root_node):
179 |     subs = []
180 |     for sub in root_node.substitutions():
181 |         if isinstance(sub, str):
182 |             raise RuntimeError("string sub?")
183 | 
184 |         if sub is None:
185 |             raise RuntimeError("null sub?")
186 | 
187 |         subs.append(sub)
188 | 
189 |     return render_root_node_with_subs(root_node, subs)
190 | 
191 | 
192 | def evtx_record_xml_view(record, cache=None):
193 |     """
194 |     render the given record into an XML document.
195 | 
196 |     Args:
197 |       record (Evtx.Record): the record to render.
198 | 
199 |     Returns:
200 |       str: the rendered XML document.
201 |     """
202 |     return render_root_node(record.root())
203 | 
204 | 
205 | def evtx_chunk_xml_view(chunk):
206 |     """
207 |     Generate XML representations of the records in an EVTX chunk.
208 | 
209 |     Does not include the XML <?xml... header.
210 |     Records are ordered by chunk.records()
211 | 
212 |     Args:
213 |       chunk (Evtx.Chunk): the chunk to render.
214 | 
215 |     Yields:
216 |       tuple[str, Evtx.Record]: the rendered XML document and the raw record.
217 |     """
218 |     for record in chunk.records():
219 |         record_str = evtx_record_xml_view(record)
220 |         yield record_str, record
221 | 
222 | 
223 | def evtx_file_xml_view(file_header):
224 |     """
225 |     Generate XML representations of the records in an EVTX file.
226 | 
227 |     Does not include the XML <?xml... header.
228 |     Records are ordered by file_header.chunks(), and then by chunk.records()
229 | 
230 |     Args:
231 |       chunk (Evtx.FileHeader): the file header to render.
232 | 
233 |     Yields:
234 |       tuple[str, Evtx.Record]: the rendered XML document and the raw record.
235 |     """
236 |     for chunk in file_header.chunks():
237 |         for record in chunk.records():
238 |             record_str = evtx_record_xml_view(record)
239 |             yield record_str, record
240 | 
241 | 
242 | def evtx_template_readable_view(root_node, cache=None):
243 |     def rec(node, acc):
244 |         if isinstance(node, e_nodes.EndOfStreamNode):
245 |             pass  # intended
246 |         elif isinstance(node, e_nodes.OpenStartElementNode):
247 |             acc.append("<")
248 |             acc.append(node.tag_name())
249 |             for child in node.children():
250 |                 if isinstance(child, e_nodes.AttributeNode):
251 |                     acc.append(" ")
252 |                     acc.append(child.attribute_name().string())
253 |                     acc.append('="')
254 |                     rec(child.attribute_value(), acc)
255 |                     acc.append('"')
256 |             acc.append(">")
257 |             for child in node.children():
258 |                 rec(child, acc)
259 |             acc.append("</")
260 |             acc.append(node.tag_name())
261 |             acc.append(">\n")
262 |         elif isinstance(node, e_nodes.CloseStartElementNode):
263 |             pass  # intended
264 |         elif isinstance(node, e_nodes.CloseEmptyElementNode):
265 |             pass  # intended
266 |         elif isinstance(node, e_nodes.CloseElementNode):
267 |             pass  # intended
268 |         elif isinstance(node, e_nodes.ValueNode):
269 |             acc.append(node.children()[0].string())
270 |         elif isinstance(node, e_nodes.AttributeNode):
271 |             pass  # intended
272 |         elif isinstance(node, e_nodes.CDataSectionNode):
273 |             acc.append("<![CDATA[")
274 |             acc.append(node.cdata())
275 |             acc.append("]]>")
276 |         elif isinstance(node, e_nodes.EntityReferenceNode):
277 |             acc.append(node.entity_reference())
278 |         elif isinstance(node, e_nodes.ProcessingInstructionTargetNode):
279 |             acc.append(node.processing_instruction_target())
280 |         elif isinstance(node, e_nodes.ProcessingInstructionDataNode):
281 |             acc.append(node.string())
282 |         elif isinstance(node, e_nodes.TemplateInstanceNode):
283 |             raise UnexpectedElementException("TemplateInstanceNode")
284 |         elif isinstance(node, e_nodes.NormalSubstitutionNode):
285 |             acc.append("[Normal Substitution(index={}, type={})]".format(node.index(), node.type()))
286 |         elif isinstance(node, e_nodes.ConditionalSubstitutionNode):
287 |             acc.append("[Conditional Substitution(index={}, type={})]".format(node.index(), node.type()))
288 |         elif isinstance(node, e_nodes.StreamStartNode):
289 |             pass  # intended
290 | 
291 |     acc = []
292 |     for c in root_node.template().children():
293 |         rec(c, acc)
294 |     return "".join(acc)
295 | 


--------------------------------------------------------------------------------
/Evtx/__init__.py:
--------------------------------------------------------------------------------
 1 | #    This file is part of python-evtx.
 2 | #
 3 | #   Copyright 2012 Willi Ballenthin <william.ballenthin@mandiant.com>
 4 | #                    while at Mandiant <http://www.mandiant.com>
 5 | #
 6 | #   Licensed under the Apache License, Version 2.0 (the "License");
 7 | #   you may not use this file except in compliance with the License.
 8 | #   You may obtain a copy of the License at
 9 | #
10 | #       http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | #   Unless required by applicable law or agreed to in writing, software
13 | #   distributed under the License is distributed on an "AS IS" BASIS,
14 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | #   See the License for the specific language governing permissions and
16 | #   limitations under the License.
17 | __all__ = [
18 |     "Evtx",
19 |     "BinaryParser",
20 |     "Nodes",
21 |     "Views",
22 | ]
23 | 


--------------------------------------------------------------------------------
/LICENSE.TXT:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | python-evtx
 2 | ===========
 3 | 
 4 | Introduction
 5 | ------------
 6 | 
 7 | python-evtx is a pure Python parser for recent Windows Event Log files (those with the file extension ".evtx").  The module provides programmatic access to the File and Chunk headers, record templates, and event entries.  For example, you can use python-evtx to review the event logs of Windows 7 systems from a Mac or Linux workstation. The structure definitions and parsing strategies were heavily inspired by the work of Andreas Schuster and his Perl implementation "Parse-Evtx".
 8 | 
 9 | Background
10 | ----------
11 | With the release of Windows Vista, Microsoft introduced an updated event log file format.  The format used in Windows XP was a circular buffer of record structures that each contained a list of strings.  A viewer resolved templates hosted in system library files and inserted the strings into appropriate positions.  The newer event log format is proprietary binary XML.  Unpacking chunks from an event log file from Windows 7 results in a complete XML document with a variable schema.  The changes helped Microsoft tune the file format to real-world uses of event logs, such as long running logs with hundreds of megabytes of data, and system independent template resolution.
12 | 
13 | Related Work
14 | ------------
15 | Andreas Schuster released the first public description of the .evtx file format in 2007.  He is the author of the thorough document "Introducing the Microsoft Vista event log file format" that describes the motivation and details of the format.  Mr. Schuster also maintains the Perl implementation of a parser called "Parse-Evtx".  I referred to the source code of this library extensively during the development of python-evtx.
16 | 
17 | Joachim Metz also released a cross-platform, LGPL licensed C++ based parser in 2011.  His document "Windows XML Event Log (EVTX): Analysis of EVTX" provides a detailed description of the structures and context of newer event log files.
18 | 
19 | Dependencies
20 | ------------
21 | python-evtx is a pure Python 3 module, so it works equally well across platforms like Windows, macOS, and Linux. 
22 | 
23 | python-evtx operates on event log files from Windows operating systems newer than Windows Vista.  These files typically have the file extension .evtx.  Version 5.09 of the `file` utility identifies such a file as "MS Vista Windows Event Log".  To manual confirm the file type, look for the ASCII string "ElfFile" in the first seven bytes:
24 | 
25 |     willi/evtx  » xxd -l 32 Security.evtx 
26 |     0000000: 456c 6646 696c 6500 0000 0000 0000 0000  ElfFile.........
27 |     0000010: d300 0000 0000 0000 375e 0000 0000 0000  ........7^......
28 | 
29 | 
30 | Examples
31 | --------
32 | Provided with the parsing module `Evtx` are four scripts that mimic the tools distributed with Parse-Evtx.  `evtx_info.py` prints metadata about the event log and verifies the checksums of each chunk.  `evtx_templates.py` builds and prints the templates used throughout the event log. `evtx_dump.py` parses the event log and transforms the binary XML into a human readable ASCII XML format. Finally, `evtx_dump_json.py` parses event logs, similar to `evtx_dump.py` and transforms the binary XML into JSON with the added capability to output the JSON array to a file. 
33 | 
34 | Note the length of the `evtx_dump.py` script: its only 20 lines.  Now, review the contents and notice the complete implementation of the logic:
35 | 
36 |     print(e_views.XML_HEADER)
37 |     print('<Events>')
38 |     for record in log.records:
39 |         print(record.xml())
40 |     print('</Events>')  
41 | 
42 | Working with python-evtx is really easy!
43 | 
44 | 
45 | Installation
46 | ------------
47 | Updates to python-evtx are pushed to PyPi, so you can install the module using `pip`.  For example:
48 | 
49 |     pip install python-evtx
50 | 
51 | The source code for python-evtx is hosted at Github, and you may download, fork, and review it from this repository (http://www.github.com/williballenthin/python-evtx).  Please report issues or feature requests through Github's bug tracker associated with the project.
52 | 
53 | Development
54 | -----------
55 | For formatting, use isort:
56 |     
57 |     isort --length-sort --profile black --line-length=120 Evtx/ scripts/ tests/
58 | 
59 | and black:
60 | 
61 |     black --line-length=120 Evtx/ scripts/ tests/
62 | 
63 | For linting, use ruff:
64 | 
65 |     ruff check Evtx/ scripts/ tests/
66 | 
67 | Or use [just](https://github.com/casey/just) to run the linters:
68 | 
69 |     just lint
70 | 
71 | License
72 | -------
73 | python-evtx is licensed under the Apache License, Version 2.0.  This means it is freely available for use and modification in a personal and professional capacity.  
74 | 
75 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "python-evtx"
 7 | version = "0.8.1"
 8 | description = "Pure Python parser for Windows event log files (.evtx)."
 9 | readme = "README.md"
10 | license = "Apache-2.0"
11 | authors = [
12 |     { name = "Willi Ballenthin", email = "willi.ballenthin@gmail.com" },
13 | ]
14 | requires-python = ">=3.9"
15 | dependencies = [
16 |     "hexdump>=3.3",
17 | ]
18 | classifiers = [
19 |     "Development Status :: 5 - Production/Stable", # Assuming based on version 0.8.0
20 |     "Intended Audience :: Developers",
21 |     "Intended Audience :: Information Technology",
22 |     "Operating System :: OS Independent",
23 |     "Programming Language :: Python :: 3",
24 |     "Programming Language :: Python :: 3.9",
25 |     "Programming Language :: Python :: 3.10",
26 |     "Programming Language :: Python :: 3.11",
27 |     "Programming Language :: Python :: 3.12",
28 |     "Programming Language :: Python :: 3.13",
29 |     "Topic :: System :: Logging",
30 |     "Topic :: Software Development :: Libraries :: Python Modules",
31 |     "Topic :: Security",
32 | ]
33 | 
34 | [project.urls]
35 | Homepage = "https://github.com/williballenthin/python-evtx"
36 | Repository = "https://github.com/williballenthin/python-evtx"
37 | Bug-Tracker = "https://github.com/williballenthin/python-evtx/issues"
38 | 
39 | [project.optional-dependencies]
40 | test = [
41 |     "pytest-cov>=5.0.0",
42 |     "pytest>=8.2.2",
43 |     "lxml>=5.2.2",
44 |     "black>=24.4.2",
45 |     "isort>=5.13.2",
46 |     "ruff>=0.4.10",
47 | ]
48 | 
49 | [project.scripts]
50 | evtx_dump = "scripts.evtx_dump:main"
51 | evtx_dump_json = "scripts.evtx_dump_json:main"
52 | evtx_dump_chunk_slack = "scripts.evtx_dump_chunk_slack:main"
53 | evtx_eid_record_numbers = "scripts.evtx_eid_record_numbers:main"
54 | evtx_extract_record = "scripts.evtx_extract_record:main"
55 | evtx_filter_records = "scripts.evtx_filter_records:main"
56 | evtx_info = "scripts.evtx_info:main"
57 | evtx_record_structure = "scripts.evtx_record_structure:main"
58 | evtx_structure = "scripts.evtx_structure:main"
59 | evtx_templates = "scripts.evtx_templates:main"
60 | 
61 | [tool.setuptools]
62 | packages = ["Evtx"]
63 | 
64 | [tool.black]
65 | line-length = 120
66 | 
67 | [tool.isort]
68 | profile = "black"
69 | line_length = 120
70 | length_sort = true
71 | 
72 | [tool.ruff]
73 | line-length = 120
74 | 
75 | [dependency-groups]
76 | build = [
77 |     "build>=1.2.2.post1",
78 | ]
79 | 


--------------------------------------------------------------------------------
/scripts/evtx_dates.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from datetime import datetime
 4 | 
 5 | from lxml import etree
 6 | 
 7 | from Evtx.Evtx import Evtx
 8 | from Evtx.Views import evtx_file_xml_view
 9 | 
10 | 
11 | def get_child(node, tag, ns="{http://schemas.microsoft.com/win/2004/08/events/event}"):
12 |     return node.find("%s%s" % (ns, tag))
13 | 
14 | 
15 | def to_lxml(record_xml):
16 |     return etree.fromstring('<?xml version="1.0" encoding="utf-8" standalone="yes" ?>%s' % record_xml.encode("utf-8"))
17 | 
18 | 
19 | def xml_records(filename):
20 |     with Evtx(filename) as evtx:
21 |         for xml, record in evtx_file_xml_view(evtx.get_file_header()):
22 |             try:
23 |                 yield to_lxml(xml), None
24 |             except etree.XMLSyntaxError as e:
25 |                 yield xml, e
26 | 
27 | 
28 | def parsed_date(dstr):
29 |     ts = None
30 |     try:
31 |         ts = datetime.strptime(dstr, "%Y-%m-%d %H:%M:%S")
32 |     except ValueError:
33 |         ts = datetime.strptime(dstr, "%Y-%m-%d %H:%M:%S.%f")
34 |     return ts
35 | 
36 | 
37 | def event_in_daterange(d, start, end):
38 |     is_in_range = True
39 |     if d < start:
40 |         is_in_range = False
41 |     if d > end:
42 |         is_in_range = False
43 |     return is_in_range
44 | 
45 | 
46 | def matching_records(evtfile, sdatetime, edatetime):
47 |     for node, err in xml_records(evtfile):
48 |         if err is not None:
49 |             continue
50 |         else:
51 |             sys = get_child(node, "System")
52 |             t = parsed_date(get_child(sys, "TimeCreated").get("SystemTime"))
53 |             if event_in_daterange(t, sdatetime, edatetime):
54 |                 yield node
55 | 
56 | 
57 | def main():
58 |     import argparse
59 | 
60 |     parser = argparse.ArgumentParser()
61 |     parser.add_argument("evtfile", type=str)
62 |     parser.add_argument("start", type=parsed_date, help="Start date/time YYYY-mm-dd HH:MM:SS(.f)")
63 |     parser.add_argument(
64 |         "-e", dest="end", type=parsed_date, help="End date/time YYYY-mm-dd HH:MM:SS(.f)", default=datetime.now()
65 |     )
66 |     args = parser.parse_args()
67 | 
68 |     for record in matching_records(args.evtfile, args.start, args.end):
69 |         print(etree.tostring(record, pretty_print=True))
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     main()
74 | 


--------------------------------------------------------------------------------
/scripts/evtx_dump.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #    This file is part of python-evtx.
 3 | #
 4 | #   Copyright 2012, 2013 Willi Ballenthin <william.ballenthin@mandiant.com>
 5 | #                    while at Mandiant <http://www.mandiant.com>
 6 | #
 7 | #   Licensed under the Apache License, Version 2.0 (the "License");
 8 | #   you may not use this file except in compliance with the License.
 9 | #   You may obtain a copy of the License at
10 | #
11 | #       http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | #   Unless required by applicable law or agreed to in writing, software
14 | #   distributed under the License is distributed on an "AS IS" BASIS,
15 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | #   See the License for the specific language governing permissions and
17 | #   limitations under the License.
18 | #
19 | #   Version v0.1.1
20 | import Evtx.Evtx as evtx
21 | import Evtx.Views as e_views
22 | 
23 | 
24 | def main():
25 |     import argparse
26 | 
27 |     parser = argparse.ArgumentParser(description="Dump a binary EVTX file into XML.")
28 |     parser.add_argument("evtx", type=str, help="Path to the Windows EVTX event log file")
29 |     args = parser.parse_args()
30 | 
31 |     with evtx.Evtx(args.evtx) as log:
32 |         print(e_views.XML_HEADER)
33 |         print("<Events>")
34 |         for record in log.records():
35 |             print(record.xml())
36 |         print("</Events>")
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     main()
41 | 


--------------------------------------------------------------------------------
/scripts/evtx_dump_chunk_slack.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #    This file is part of python-evtx.
 3 | #
 4 | #   Copyright 2015 Willi Ballenthin <william.ballenthin@mandiant.com>
 5 | #                    while at Mandiant <http://www.mandiant.com>
 6 | #
 7 | #   Licensed under the Apache License, Version 2.0 (the "License");
 8 | #   you may not use this file except in compliance with the License.
 9 | #   You may obtain a copy of the License at
10 | #
11 | #       http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | #   Unless required by applicable law or agreed to in writing, software
14 | #   distributed under the License is distributed on an "AS IS" BASIS,
15 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | #   See the License for the specific language governing permissions and
17 | #   limitations under the License.
18 | import sys
19 | import mmap
20 | import argparse
21 | import contextlib
22 | 
23 | from Evtx.Evtx import FileHeader
24 | 
25 | 
26 | def main():
27 |     parser = argparse.ArgumentParser(description="Dump the slack space of an EVTX file.")
28 |     parser.add_argument("evtx", type=str, help="Path to the Windows EVTX event log file")
29 |     args = parser.parse_args()
30 | 
31 |     with open(args.evtx, "r") as f:
32 |         with contextlib.closing(mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)) as buf:
33 |             fh = FileHeader(buf, 0x0)
34 |             for chunk in fh.chunks():
35 |                 chunk_start = chunk.offset()
36 |                 last_allocated_offset = chunk_start
37 |                 for record in chunk.records():
38 |                     last_allocated_offset = record.offset() + record.size()
39 | 
40 |                 sys.stdout.buffer.write(buf[last_allocated_offset : chunk_start + 0x10000])
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     main()
45 | 


--------------------------------------------------------------------------------
/scripts/evtx_dump_json.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | #   This file is part of python-evtx.
 3 | #   Written by AJ Read (ajread4) with help/inspiration from the evtx_dump.py file written by Willi Ballenthin.
 4 | #
 5 | #   Purpose: User can dump evtx data into JSON format to either the command line or a JSON file in new line delimited format/JSON array.
 6 | #   Details: The JSON object is created with only the EventRecordID from the System section of the evtx XML and all of the information within the EventData section.
 7 | #
 8 | #   Requires:
 9 | #     - xmltodict >= 0.12.0
10 | import os
11 | import json
12 | 
13 | import xmltodict
14 | 
15 | import Evtx.Evtx as evtx
16 | 
17 | 
18 | def main():
19 |     import argparse
20 | 
21 |     parser = argparse.ArgumentParser(description="Dump a binary EVTX file into XML.")
22 |     parser.add_argument("evtx", type=str, action="store", help="Path to the Windows EVTX event log file")
23 |     parser.add_argument("-o", "--output", type=str, action="store", help="Path of output JSON file")
24 |     args = parser.parse_args()
25 | 
26 |     with evtx.Evtx(args.evtx) as log:
27 | 
28 |         # Instantiate the final json object
29 |         final_json = []
30 | 
31 |         # Loop through each record in the evtx log
32 |         for record in log.records():
33 | 
34 |             # Convert the record to a dictionary for ease of parsing
35 |             data_dict = xmltodict.parse(record.xml())
36 | 
37 |             # Loop through each key,value pair of the System section of the evtx logs and extract the EventRecordID
38 |             for event_system_key, event_system_value in data_dict["Event"]["System"].items():
39 |                 if event_system_key == "EventRecordID":
40 |                     json_subline = {}
41 |                     firstline = {event_system_key: event_system_value}
42 | 
43 |                     # Add information to the JSON object for this specific log
44 |                     json_subline.update(firstline)  # add the event ID to JSON subline
45 | 
46 |             # Loop through each key, value pair of the EventData section of the evtx logs
47 |             for event_data_key, event_data_value in data_dict["Event"]["EventData"].items():
48 |                 for values in event_data_value:
49 | 
50 |                     # Loop through each subvalue within the EvenData section to extract necessary information
51 |                     for event_data_subkey, event_data_subvalue in values.items():
52 |                         if event_data_subkey == "@Name":
53 |                             data_name = event_data_subvalue
54 |                         else:
55 |                             data_value = event_data_subvalue
56 | 
57 |                             # Add information to the JSON object for this specific log
58 |                             json_subline.update({data_name: data_value})
59 | 
60 |             # Print the JSON object for the specific log if not requested to output to file
61 |             if not args.output:
62 |                 print(json_subline)
63 | 
64 |             # Add specific log JSON object to the final JSON object
65 |             if not final_json:
66 |                 final_json = [json_subline]
67 |             else:
68 |                 final_json.append(json_subline)
69 | 
70 |         # If output is desired
71 |         if args.output:
72 | 
73 |             # Output the JSON data
74 |             if os.path.splitext(args.output)[1] == ".json":
75 |                 json_file = args.output
76 |             else:
77 |                 json_file = args.output + ".json"
78 | 
79 |             # Write to JSON file
80 |             with open(json_file, "w") as outfile:
81 |                 json.dump(final_json, outfile)
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     main()
86 | 


--------------------------------------------------------------------------------
/scripts/evtx_eid_record_numbers.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import lxml.etree
 4 | from filter_records import get_child
 5 | 
 6 | import Evtx.Evtx as evtx
 7 | 
 8 | 
 9 | def main():
10 |     import argparse
11 | 
12 |     parser = argparse.ArgumentParser(
13 |         description="Print the record numbers of EVTX log entries " "that match the given EID."
14 |     )
15 |     parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file")
16 |     parser.add_argument("eid", type=int, help="The EID of records to extract")
17 |     args = parser.parse_args()
18 | 
19 |     with evtx.Evtx(args.evtx) as log:
20 |         for record in log.records():
21 |             try:
22 |                 node = record.lxml()
23 |             except lxml.etree.XMLSyntaxError:
24 |                 continue
25 |             if args.eid != int(get_child(get_child(node, "System"), "EventID").text):
26 |                 continue
27 |             print(record.record_num())
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     main()
32 | 


--------------------------------------------------------------------------------
/scripts/evtx_extract_record.py:
--------------------------------------------------------------------------------
 1 | #!/usr/usr/bin/env python
 2 | #    This file is part of python-evtx.
 3 | #
 4 | #   Copyright 2012, 2013 Willi Ballenthin <william.ballenthin@mandiant.com>
 5 | #                    while at Mandiant <http://www.mandiant.com>
 6 | #
 7 | #   Licensed under the Apache License, Version 2.0 (the "License");
 8 | #   you may not use this file except in compliance with the License.
 9 | #   You may obtain a copy of the License at
10 | #
11 | #       http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | #   Unless required by applicable law or agreed to in writing, software
14 | #   distributed under the License is distributed on an "AS IS" BASIS,
15 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | #   See the License for the specific language governing permissions and
17 | #   limitations under the License.
18 | #
19 | #   Version v.0.1
20 | import Evtx.Evtx as evtx
21 | 
22 | 
23 | def main():
24 |     import argparse
25 | 
26 |     parser = argparse.ArgumentParser(description="Write the raw data for a EVTX record to STDOUT")
27 |     parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file")
28 |     parser.add_argument("record", type=int, help="The record number of the record to extract")
29 |     args = parser.parse_args()
30 | 
31 |     with evtx.Evtx(args.evtx) as log:
32 |         record = log.get_record(args.record)
33 |         if record is None:
34 |             raise RuntimeError("Cannot find the record specified.")
35 |         print(record.data())
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     main()
40 | 


--------------------------------------------------------------------------------
/scripts/evtx_filter_records.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from lxml import etree
 4 | 
 5 | from Evtx.Evtx import Evtx
 6 | from Evtx.Views import evtx_file_xml_view
 7 | 
 8 | # import xml.etree.cElementTree as etree
 9 | 
10 | 
11 | def to_lxml(record_xml):
12 |     """
13 |     @type record: Record
14 |     """
15 |     return etree.fromstring('<?xml version="1.0" encoding="utf-8" standalone="yes" ?>%s' % record_xml)
16 | 
17 | 
18 | def xml_records(filename):
19 |     """
20 |     If the second return value is not None, then it is an
21 |       Exception encountered during parsing.  The first return value
22 |       will be the XML string.
23 | 
24 |     @type filename str
25 |     @rtype: generator of (etree.Element or str), (None or Exception)
26 |     """
27 |     with Evtx(filename) as evtx:
28 |         for xml, record in evtx_file_xml_view(evtx.get_file_header()):
29 |             try:
30 |                 yield to_lxml(xml), None
31 |             except etree.XMLSyntaxError as e:
32 |                 yield xml, e
33 | 
34 | 
35 | def get_child(node, tag, ns="{http://schemas.microsoft.com/win/2004/08/events/event}"):
36 |     """
37 |     @type node: etree.Element
38 |     @type tag: str
39 |     @type ns: str
40 |     """
41 |     return node.find("%s%s" % (ns, tag))
42 | 
43 | 
44 | def main():
45 |     import argparse
46 | 
47 |     parser = argparse.ArgumentParser(description="Print only entries from an EVTX file with a given EID.")
48 |     parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file")
49 |     parser.add_argument("eid", type=int, help="The EID of records to print")
50 | 
51 |     args = parser.parse_args()
52 | 
53 |     for node, err in xml_records(args.evtx):
54 |         if err is not None:
55 |             continue
56 |         sys = get_child(node, "System")
57 |         if args.eid == int(get_child(sys, "EventID").text):
58 |             print(etree.tostring(node, pretty_print=True))
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     main()
63 | 


--------------------------------------------------------------------------------
/scripts/evtx_info.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #    This file is part of python-evtx.
  3 | #
  4 | #   Copyright 2012, 2013 Willi Ballenthin <william.ballenthin@mandiant.com>
  5 | #                    while at Mandiant <http://www.mandiant.com>
  6 | #
  7 | #   Licensed under the Apache License, Version 2.0 (the "License");
  8 | #   you may not use this file except in compliance with the License.
  9 | #   You may obtain a copy of the License at
 10 | #
 11 | #       http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | #   Unless required by applicable law or agreed to in writing, software
 14 | #   distributed under the License is distributed on an "AS IS" BASIS,
 15 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | #   See the License for the specific language governing permissions and
 17 | #   limitations under the License.
 18 | #
 19 | #   Version v0.1
 20 | import Evtx.Evtx as evtx
 21 | 
 22 | 
 23 | def main():
 24 |     import argparse
 25 | 
 26 |     parser = argparse.ArgumentParser(description="Dump information about an EVTX file.")
 27 |     parser.add_argument("evtx", type=str, help="Path to the Windows EVTX event log file")
 28 |     args = parser.parse_args()
 29 | 
 30 |     with evtx.Evtx(args.evtx) as log:
 31 |         fh = log.get_file_header()
 32 | 
 33 |         print("Information from file header:")
 34 |         print(("Format version  : %d.%d" % (fh.major_version(), fh.minor_version())))
 35 |         print(("Flags           : 0x%08x" % (fh.flags())))
 36 |         dirty_string = "clean"
 37 |         if fh.is_dirty():
 38 |             dirty_string = "dirty"
 39 |         print(("File is         : %s" % (dirty_string)))
 40 |         full_string = "no"
 41 |         if fh.is_full():
 42 |             full_string = "yes"
 43 |         print(("Log is full     : %s" % (full_string)))
 44 |         print(("Current chunk   : %d of %d" % (fh.current_chunk_number(), fh.chunk_count())))
 45 |         print(("Oldest chunk    : %d" % (fh.oldest_chunk() + 1)))
 46 |         print(("Next record#    : %d" % (fh.next_record_number())))
 47 |         checksum_string = "fail"
 48 |         if fh.calculate_checksum() == fh.checksum():
 49 |             checksum_string = "pass"
 50 |         print(("Check sum       : %s" % (checksum_string)))
 51 |         print("")
 52 | 
 53 |         if fh.is_dirty():
 54 |             chunk_count = sum([1 for c in fh.chunks() if c.verify()])
 55 | 
 56 |             last_chunk = None
 57 |             for chunk in fh.chunks():
 58 |                 if not chunk.verify():
 59 |                     continue
 60 |                 last_chunk = chunk
 61 |             next_record_num = last_chunk.log_last_record_number() + 1
 62 | 
 63 |             print("Suspected updated header values (header is dirty):")
 64 |             print(("Current chunk   : %d of %d" % (chunk_count, chunk_count)))
 65 |             print(("Next record#    : %d" % (next_record_num)))
 66 |             print("")
 67 | 
 68 |         print("Information from chunks:")
 69 |         print("  Chunk file (first/last)     log (first/last)      Header Data")
 70 |         print("- ----- --------------------- --------------------- ------ ------")
 71 |         for i, chunk in enumerate(fh.chunks(include_inactive=True), 1):
 72 |             note_string = " "
 73 |             if i == fh.current_chunk_number() + 1:
 74 |                 note_string = "*"
 75 |             elif i == fh.oldest_chunk() + 1:
 76 |                 note_string = ">"
 77 | 
 78 |             if not chunk.check_magic():
 79 |                 try:
 80 |                     magic = chunk.magic()
 81 |                 except UnicodeDecodeError:
 82 |                     magic = ""
 83 | 
 84 |                 if magic == "\x00\x00\x00\x00\x00\x00\x00\x00":
 85 |                     print("%s  %4d     [EMPTY]" % (note_string, i))
 86 |                 else:
 87 |                     print("%s  %4d   [INVALID]" % (note_string, i))
 88 |                 continue
 89 | 
 90 |             header_checksum_string = "fail"
 91 |             if chunk.calculate_header_checksum() == chunk.header_checksum():
 92 |                 header_checksum_string = "pass"
 93 | 
 94 |             data_checksum_string = "fail"
 95 |             if chunk.calculate_data_checksum() == chunk.data_checksum():
 96 |                 data_checksum_string = "pass"
 97 | 
 98 |             print(
 99 |                 "%s  %4d   %8d  %8d    %8d  %8d   %s   %s"
100 |                 % (
101 |                     note_string,
102 |                     i,
103 |                     chunk.file_first_record_number(),
104 |                     chunk.file_last_record_number(),
105 |                     chunk.log_first_record_number(),
106 |                     chunk.log_last_record_number(),
107 |                     header_checksum_string,
108 |                     data_checksum_string,
109 |                 )
110 |             )
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     main()
115 | 


--------------------------------------------------------------------------------
/scripts/evtx_record_structure.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import hexdump
  3 | 
  4 | import Evtx.Evtx as evtx
  5 | from Evtx.Nodes import RootNode, BXmlTypeNode, VariantTypeNode, TemplateInstanceNode
  6 | 
  7 | 
  8 | def describe_root(record, root, indent=0, suppress_values=False):
  9 |     """
 10 |     Args:
 11 |       record (Evtx.Record):
 12 |       indent (int):
 13 |     """
 14 | 
 15 |     def format_node(n, extra=None, indent=0):
 16 |         """
 17 |         Depends on closure over `record` and `suppress_values`.
 18 | 
 19 |         Args:
 20 |           n (Evtx.Nodes.BXmlNode):
 21 |           extra (str):
 22 | 
 23 |         Returns:
 24 |           str:
 25 |         """
 26 |         ret = ""
 27 |         indent_s = "  " * indent
 28 |         name = n.__class__.__name__
 29 |         offset = n.offset() - record.offset()
 30 |         if extra is not None:
 31 |             ret = "%s%s(offset=%s, %s)" % (indent_s, name, hex(offset), extra)
 32 |         else:
 33 |             ret = "%s%s(offset=%s)" % (indent_s, name, hex(offset))
 34 | 
 35 |         if not suppress_values and isinstance(n, VariantTypeNode):
 36 |             ret += " --> %s" % (n.string())
 37 |             if isinstance(n, BXmlTypeNode):
 38 |                 ret += "\n"
 39 |                 ret += describe_root(record, n._root, indent=indent + 1)
 40 | 
 41 |         return ret
 42 | 
 43 |     def rec(node, indent=0):
 44 |         """
 45 |         Args:
 46 |           node (Evtx.Nodes.BXmlNode):
 47 |           indent (int):
 48 | 
 49 |         Returns:
 50 |           str:
 51 |         """
 52 |         ret = ""
 53 |         if isinstance(node, TemplateInstanceNode):
 54 |             if node.is_resident_template():
 55 |                 extra = "resident=True, length=%s" % (hex(node.template().data_length()))
 56 |                 ret += "%s\n" % (format_node(node, extra=extra, indent=indent))
 57 |                 ret += rec(node.template(), indent=indent + 1)
 58 |             else:
 59 |                 ret += "%s\n" % (format_node(node, extra="resident=False", indent=indent))
 60 |         else:
 61 |             ret += "%s\n" % (format_node(node, indent=indent))
 62 | 
 63 |         for child in node.children():
 64 |             ret += rec(child, indent=indent + 1)
 65 | 
 66 |         if isinstance(node, RootNode):
 67 |             ofs = node.tag_and_children_length()
 68 |             indent_s = "  " * (indent + 1)
 69 |             offset = node.offset() - record.offset() + ofs
 70 |             ret += "%sSubstitutions(offset=%s)\n" % (indent_s, hex(offset))
 71 |             for sub in node.substitutions():
 72 |                 ret += "%s\n" % (format_node(sub, indent=indent + 2))
 73 | 
 74 |         return ret
 75 | 
 76 |     ret = ""
 77 |     ret += rec(root, indent=indent)
 78 |     return ret
 79 | 
 80 | 
 81 | def main():
 82 |     import argparse
 83 | 
 84 |     parser = argparse.ArgumentParser(description="Pretty print the binary structure of an EVTX record.")
 85 |     parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file")
 86 |     parser.add_argument("record", type=int, help="Record number")
 87 |     parser.add_argument("--suppress_values", action="store_true", help="Do not print the values of substitutions.")
 88 |     args = parser.parse_args()
 89 | 
 90 |     with evtx.Evtx(args.evtx) as log:
 91 |         hexdump.hexdump(log.get_record(args.record).data())
 92 | 
 93 |         record = log.get_record(args.record)
 94 |         print("record(absolute_offset=%s)" % record.offset())
 95 |         print(describe_root(record, record.root(), suppress_values=args.suppress_values))
 96 |         print(record.xml())
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     main()
101 | 


--------------------------------------------------------------------------------
/scripts/evtx_record_template.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | import Evtx.Evtx as evtx
 5 | import Evtx.Views as e_views
 6 | 
 7 | 
 8 | def main():
 9 |     import argparse
10 | 
11 |     parser = argparse.ArgumentParser(description="Print the structure of an EVTX record's template.")
12 |     parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file")
13 |     parser.add_argument("record", type=int, help="Record number")
14 |     args = parser.parse_args()
15 | 
16 |     with evtx.Evtx(args.evtx) as log:
17 |         r = log.get_record(args.record)
18 |         if r is None:
19 |             print("error: record not found")
20 |             return -1
21 |         else:
22 |             print(e_views.evtx_template_readable_view(r.root()))
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/scripts/evtx_structure.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #    This file is part of python-evtx.
  3 | #
  4 | #   Copyright 2012, 2013 Willi Ballenthin <william.ballenthin@mandiant.com>
  5 | #                    while at Mandiant <http://www.mandiant.com>
  6 | #
  7 | #   Licensed under the Apache License, Version 2.0 (the "License");
  8 | #   you may not use this file except in compliance with the License.
  9 | #   You may obtain a copy of the License at
 10 | #
 11 | #       http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | #   Unless required by applicable law or agreed to in writing, software
 14 | #   distributed under the License is distributed on an "AS IS" BASIS,
 15 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | #   See the License for the specific language governing permissions and
 17 | #   limitations under the License.
 18 | import Evtx.Evtx as evtx
 19 | import Evtx.Nodes as e_nodes
 20 | 
 21 | 
 22 | class EvtxFormatter(object):
 23 |     def __init__(self):
 24 |         super(EvtxFormatter, self).__init__()
 25 |         self._indent_stack = []
 26 |         self._indent_unit = "  "
 27 | 
 28 |     def _indent(self):
 29 |         self._indent_stack.append(self._indent_unit)
 30 | 
 31 |     def _dedent(self):
 32 |         if len(self._indent_stack) > 0:
 33 |             self._indent_stack = self._indent_stack[:-1]
 34 | 
 35 |     def save_indent(self):
 36 |         return self._indent_stack[:]
 37 | 
 38 |     def restore_indent(self, indent):
 39 |         self._indent_stack = indent
 40 | 
 41 |     def _l(self, s):
 42 |         return "".join(self._indent_stack) + s
 43 | 
 44 |     def format_header(self, fh):
 45 |         yield self._l("File header")
 46 |         self._indent()
 47 |         yield self._l("magic: %s" % (fh.magic()))
 48 |         for num_field in [
 49 |             "oldest_chunk",
 50 |             "current_chunk_number",
 51 |             "next_record_number",
 52 |             "header_size",
 53 |             "minor_version",
 54 |             "major_version",
 55 |             "header_chunk_size",
 56 |             "chunk_count",
 57 |             "flags",
 58 |             "checksum",
 59 |         ]:
 60 |             yield self._l("%s: %s" % (num_field, hex(getattr(fh, num_field)())))
 61 | 
 62 |         yield self._l("verify: %s" % (fh.verify()))
 63 |         yield self._l("dirty: %s" % (fh.is_dirty()))
 64 |         yield self._l("full: %s" % (fh.is_full()))
 65 | 
 66 |         for chunk in fh.chunks():
 67 |             for line in self.format_chunk(chunk):
 68 |                 yield line
 69 |         self._dedent()
 70 | 
 71 |     def format_chunk(self, chunk):
 72 |         yield self._l("Chunk")
 73 |         self._indent()
 74 |         yield self._l("offset: %s" % (hex(chunk.offset())))
 75 |         yield self._l("magic: %s" % (chunk.magic()))
 76 | 
 77 |         for num_field in [
 78 |             "file_first_record_number",
 79 |             "file_last_record_number",
 80 |             "log_first_record_number",
 81 |             "log_last_record_number",
 82 |             "header_size",
 83 |             "last_record_offset",
 84 |             "next_record_offset",
 85 |             "data_checksum",
 86 |             "header_checksum",
 87 |         ]:
 88 |             yield self._l("%s: %s" % (num_field, hex(getattr(chunk, num_field)())))
 89 | 
 90 |         yield self._l("verify: %s" % (chunk.verify()))
 91 |         yield self._l("templates: %d" % (len(chunk.templates())))
 92 | 
 93 |         for record in chunk.records():
 94 |             for line in self.format_record(record):
 95 |                 yield line
 96 |         self._dedent()
 97 | 
 98 |     def format_record(self, record):
 99 |         yield self._l("Record")
100 |         self._indent()
101 |         yield self._l("offset: %s" % (hex(record.offset())))
102 |         yield self._l("magic: %s" % (hex(record.magic())))
103 |         yield self._l("size: %s" % (hex(record.size())))
104 |         yield self._l("number: %s" % (hex(record.record_num())))
105 |         yield self._l("timestamp: %s" % (record.timestamp()))
106 |         yield self._l("verify: %s" % (record.verify()))
107 | 
108 |         try:
109 |             s = self.save_indent()
110 |             for line in self.format_node(record, record.root()):
111 |                 yield line
112 |         except Exception as e:
113 |             self.restore_indent(s)
114 |             yield "ERROR: " + str(e)
115 |         self._dedent()
116 | 
117 |     def _format_node_name(self, record, node, extra=None):
118 |         """
119 |         note: this doesn't yield, it returns
120 |         """
121 |         line = ""
122 |         if extra is not None:
123 |             line = "%s(offset=%s, %s)" % (node.__class__.__name__, hex(node.offset() - record.offset()), extra)
124 |         else:
125 |             line = "%s(offset=%s)" % (node.__class__.__name__, hex(node.offset() - record.offset()))
126 | 
127 |         if isinstance(node, e_nodes.VariantTypeNode):
128 |             line += " --> %s" % (node.string())
129 |         if isinstance(node, e_nodes.OpenStartElementNode):
130 |             line += " --> %s" % (node.tag_name())
131 |         if isinstance(node, e_nodes.AttributeNode):
132 |             line += " --> %s" % (node.attribute_name().string())
133 |         return line
134 | 
135 |     def format_node(self, record, node):
136 |         extra = None
137 |         if isinstance(node, e_nodes.TemplateInstanceNode) and node.is_resident_template():
138 |             extra = "resident=True, length=%s" % (hex(node.template().data_length()))
139 |         elif isinstance(node, e_nodes.TemplateInstanceNode):
140 |             extra = "resident=False"
141 |         yield self._l(self._format_node_name(record, node, extra=extra))
142 | 
143 |         if isinstance(node, e_nodes.BXmlTypeNode):
144 |             self._indent()
145 |             for line in self.format_node(record, node._root):
146 |                 yield line
147 |             self._dedent()
148 |         elif isinstance(node, e_nodes.TemplateInstanceNode) and node.is_resident_template():
149 |             self._indent()
150 |             for line in self.format_node(record, node.template()):
151 |                 yield line
152 |             self._dedent()
153 | 
154 |         self._indent()
155 |         for child in node.children():
156 |             for line in self.format_node(record, child):
157 |                 yield line
158 |         self._dedent()
159 | 
160 |         if isinstance(node, e_nodes.RootNode):
161 |             ofs = node.tag_and_children_length()
162 |             yield self._l("Substitutions(offset=%s)" % (hex(node.offset() - record.offset() + ofs)))
163 |             self._indent()
164 | 
165 |             for sub in node.substitutions():
166 |                 for line in self.format_node(record, sub):
167 |                     yield line
168 |             self._dedent()
169 | 
170 | 
171 | def main():
172 |     import argparse
173 | 
174 |     parser = argparse.ArgumentParser(description="Dump the structure of an EVTX file.")
175 |     parser.add_argument("evtx", type=str, help="Path to the Windows EVTX event log file")
176 |     args = parser.parse_args()
177 | 
178 |     with evtx.Evtx(args.evtx) as log:
179 |         formatter = EvtxFormatter()
180 |         for line in formatter.format_header(log.get_file_header()):
181 |             print(line)
182 | 
183 | 
184 | if __name__ == "__main__":
185 |     main()
186 | 


--------------------------------------------------------------------------------
/scripts/evtx_templates.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #    This file is part of python-evtx.
 3 | #
 4 | #   Copyright 2012, 2013 Willi Ballenthin <william.ballenthin@mandiant.com>
 5 | #                    while at Mandiant <http://www.mandiant.com>
 6 | #
 7 | #   Licensed under the Apache License, Version 2.0 (the "License");
 8 | #   you may not use this file except in compliance with the License.
 9 | #   You may obtain a copy of the License at
10 | #
11 | #       http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | #   Unless required by applicable law or agreed to in writing, software
14 | #   distributed under the License is distributed on an "AS IS" BASIS,
15 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | #   See the License for the specific language governing permissions and
17 | #   limitations under the License.
18 | #
19 | #   Version v0.1
20 | import Evtx.Evtx as evtx
21 | import Evtx.Views as e_views
22 | 
23 | 
24 | def main():
25 |     import argparse
26 | 
27 |     parser = argparse.ArgumentParser(description="Dump templates from a binary EVTX file.")
28 |     parser.add_argument("evtx", type=str, help="Path to the Windows EVTX event log file")
29 |     args = parser.parse_args()
30 | 
31 |     with evtx.Evtx(args.evtx) as log:
32 |         for i, chunk in enumerate(log.chunks()):
33 |             for template in list(chunk.templates().values()):
34 |                 print("Template {%s} at chunk %d, offset %s" % (template.guid(), i, hex(template.absolute_offset(0x0))))
35 |                 print(e_views.evtx_template_readable_view(template))
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     main()
40 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | #  you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at: [package root]/LICENSE.txt
 5 | # Unless required by applicable law or agreed to in writing, software distributed under the License
 6 | #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 7 | # See the License for the specific language governing permissions and limitations under the License.
 8 | 
 9 | # import all the symbols from our fixtures
10 | # and make available to test cases, implicitly.
11 | # this is thanks to pytest magic.
12 | #
13 | # see the following for a discussion:
14 | # https://www.revsys.com/tidbits/pytest-fixtures-are-magic/
15 | # https://lobste.rs/s/j8xgym/pytest_fixtures_are_magic
16 | from fixtures import *  # noqa: F403 [unable to detect undefined names]
17 | 


--------------------------------------------------------------------------------
/tests/data/dns_log_malformed.evtx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/williballenthin/python-evtx/3d9ab5207c12e0ace4147df0b36aaed59e5b58ba/tests/data/dns_log_malformed.evtx


--------------------------------------------------------------------------------
/tests/data/issue_38.evtx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/williballenthin/python-evtx/3d9ab5207c12e0ace4147df0b36aaed59e5b58ba/tests/data/issue_38.evtx


--------------------------------------------------------------------------------
/tests/data/issue_39.evtx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/williballenthin/python-evtx/3d9ab5207c12e0ace4147df0b36aaed59e5b58ba/tests/data/issue_39.evtx


--------------------------------------------------------------------------------
/tests/data/issue_43.evtx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/williballenthin/python-evtx/3d9ab5207c12e0ace4147df0b36aaed59e5b58ba/tests/data/issue_43.evtx


--------------------------------------------------------------------------------
/tests/data/readme.md:
--------------------------------------------------------------------------------
 1 | The source for system.evtx with md5 182de19fe6a25b928a34ad59af0bbf1e
 2 |  was https://github.com/log2timeline/plaso/tree/1e2fa282efa2f839e1f179a3e98dbf922b5dbbc7/test_data
 3 | 
 4 | The source for security.evtx with md5 8fa20a376cb6745453bc51f906e0fcd0
 5 |  was Carlos Dias, via email, on May 4, 2017.
 6 | 
 7 | The source for ae831beda7dfda43f4de0e18a1035f64/dns_log_malformed.evtx
 8 |  was @stephensheridan, via Github issue #37 (https://github.com/williballenthin/python-evtx/issues/37).
 9 | 
10 | The source for d75c90e629f38c7b9e612905e02e2255  issue_38.evtx
11 |  was @nbareil, via Github issue #38 (https://github.com/williballenthin/python-evtx/issues/38).
12 | 
13 | 


--------------------------------------------------------------------------------
/tests/data/security.evtx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/williballenthin/python-evtx/3d9ab5207c12e0ace4147df0b36aaed59e5b58ba/tests/data/security.evtx


--------------------------------------------------------------------------------
/tests/data/system.evtx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/williballenthin/python-evtx/3d9ab5207c12e0ace4147df0b36aaed59e5b58ba/tests/data/system.evtx


--------------------------------------------------------------------------------
/tests/fixtures.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import mmap
 3 | import os.path
 4 | import contextlib
 5 | 
 6 | import pytest
 7 | 
 8 | 
 9 | def system_path():
10 |     """
11 |     fetch the file system path of the system.evtx test file.
12 | 
13 |     Returns:
14 |       str: the file system path of the test file.
15 |     """
16 |     cd = os.path.dirname(__file__)
17 |     datadir = os.path.join(cd, "data")
18 |     systempath = os.path.join(datadir, "system.evtx")
19 |     return systempath
20 | 
21 | 
22 | @pytest.fixture
23 | def system():
24 |     """
25 |     yields the contents of the system.evtx test file.
26 |     the returned value is a memory map of the contents,
27 |      so it acts pretty much like a byte string.
28 | 
29 |     Returns:
30 |       mmap.mmap: the contents of the test file.
31 |     """
32 |     p = system_path()
33 |     with open(p, "rb") as f:
34 |         with contextlib.closing(mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)) as buf:
35 |             yield buf
36 | 
37 | 
38 | def security_path():
39 |     """
40 |     fetch the file system path of the security.evtx test file.
41 | 
42 |     Returns:
43 |       str: the file system path of the test file.
44 |     """
45 |     cd = os.path.dirname(__file__)
46 |     datadir = os.path.join(cd, "data")
47 |     secpath = os.path.join(datadir, "security.evtx")
48 |     return secpath
49 | 
50 | 
51 | @pytest.fixture
52 | def security():
53 |     """
54 |     yields the contents of the security.evtx test file.
55 |     the returned value is a memory map of the contents,
56 |      so it acts pretty much like a byte string.
57 | 
58 |     Returns:
59 |       mmap.mmap: the contents of the test file.
60 |     """
61 |     p = security_path()
62 |     with open(p, "rb") as f:
63 |         with contextlib.closing(mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)) as buf:
64 |             yield buf
65 | 
66 | 
67 | @pytest.fixture
68 | def data_path():
69 |     """
70 |     fetch the file system path of the directory containing test files.
71 | 
72 |     Returns:
73 |       str: the file system path of the test directory.
74 |     """
75 |     cd = os.path.dirname(__file__)
76 |     datadir = os.path.join(cd, "data")
77 |     return datadir
78 | 


--------------------------------------------------------------------------------
/tests/test_chunks.py:
--------------------------------------------------------------------------------
  1 | import Evtx.Evtx as evtx
  2 | 
  3 | EMPTY_MAGIC = "\x00" * 0x8
  4 | 
  5 | 
  6 | def test_chunks(system):
  7 |     """
  8 |     regression test parsing some known fields in the file chunks.
  9 | 
 10 |     Args:
 11 |       system (bytes): the system.evtx test file contents. pytest fixture.
 12 |     """
 13 |     fh = evtx.FileHeader(system, 0x0)
 14 | 
 15 |     # collected empirically
 16 |     expecteds = [
 17 |         {"start_file": 1, "end_file": 153, "start_log": 12049, "end_log": 12201},
 18 |         {"start_file": 154, "end_file": 336, "start_log": 12202, "end_log": 12384},
 19 |         {"start_file": 337, "end_file": 526, "start_log": 12385, "end_log": 12574},
 20 |         {"start_file": 527, "end_file": 708, "start_log": 12575, "end_log": 12756},
 21 |         {"start_file": 709, "end_file": 882, "start_log": 12757, "end_log": 12930},
 22 |         {"start_file": 883, "end_file": 1059, "start_log": 12931, "end_log": 13107},
 23 |         {"start_file": 1060, "end_file": 1241, "start_log": 13108, "end_log": 13289},
 24 |         {"start_file": 1242, "end_file": 1424, "start_log": 13290, "end_log": 13472},
 25 |         {"start_file": 1425, "end_file": 1601, "start_log": 13473, "end_log": 13649},
 26 |     ]
 27 | 
 28 |     for i, chunk in enumerate(fh.chunks()):
 29 |         # collected empirically
 30 |         if i < 9:
 31 |             assert chunk.check_magic() is True
 32 |             assert chunk.magic() == "ElfChnk\x00"
 33 |             assert chunk.calculate_header_checksum() == chunk.header_checksum()
 34 |             assert chunk.calculate_data_checksum() == chunk.data_checksum()
 35 | 
 36 |             expected = expecteds[i]
 37 |             assert chunk.file_first_record_number() == expected["start_file"]
 38 |             assert chunk.file_last_record_number() == expected["end_file"]
 39 |             assert chunk.log_first_record_number() == expected["start_log"]
 40 |             assert chunk.log_last_record_number() == expected["end_log"]
 41 | 
 42 |         else:
 43 |             assert chunk.check_magic() is False
 44 |             assert chunk.magic() == EMPTY_MAGIC
 45 | 
 46 | 
 47 | def test_chunks2(security):
 48 |     """
 49 |     regression test parsing some known fields in the file chunks.
 50 | 
 51 |     Args:
 52 |       security (bytes): the security.evtx test file contents. pytest fixture.
 53 |     """
 54 |     fh = evtx.FileHeader(security, 0x0)
 55 | 
 56 |     # collected empirically
 57 |     expecteds = [
 58 |         {"start_file": 1, "end_file": 91, "start_log": 1, "end_log": 91},
 59 |         {"start_file": 92, "end_file": 177, "start_log": 92, "end_log": 177},
 60 |         {"start_file": 178, "end_file": 260, "start_log": 178, "end_log": 260},
 61 |         {"start_file": 261, "end_file": 349, "start_log": 261, "end_log": 349},
 62 |         {"start_file": 350, "end_file": 441, "start_log": 350, "end_log": 441},
 63 |         {"start_file": 442, "end_file": 530, "start_log": 442, "end_log": 530},
 64 |         {"start_file": 531, "end_file": 622, "start_log": 531, "end_log": 622},
 65 |         {"start_file": 623, "end_file": 711, "start_log": 623, "end_log": 711},
 66 |         {"start_file": 712, "end_file": 802, "start_log": 712, "end_log": 802},
 67 |         {"start_file": 803, "end_file": 888, "start_log": 803, "end_log": 888},
 68 |         {"start_file": 889, "end_file": 976, "start_log": 889, "end_log": 976},
 69 |         {"start_file": 977, "end_file": 1063, "start_log": 977, "end_log": 1063},
 70 |         {"start_file": 1064, "end_file": 1148, "start_log": 1064, "end_log": 1148},
 71 |         {"start_file": 1149, "end_file": 1239, "start_log": 1149, "end_log": 1239},
 72 |         {"start_file": 1240, "end_file": 1327, "start_log": 1240, "end_log": 1327},
 73 |         {"start_file": 1328, "end_file": 1414, "start_log": 1328, "end_log": 1414},
 74 |         {"start_file": 1415, "end_file": 1501, "start_log": 1415, "end_log": 1501},
 75 |         {"start_file": 1502, "end_file": 1587, "start_log": 1502, "end_log": 1587},
 76 |         {"start_file": 1588, "end_file": 1682, "start_log": 1588, "end_log": 1682},
 77 |         {"start_file": 1683, "end_file": 1766, "start_log": 1683, "end_log": 1766},
 78 |         {"start_file": 1767, "end_file": 1847, "start_log": 1767, "end_log": 1847},
 79 |         {"start_file": 1848, "end_file": 1942, "start_log": 1848, "end_log": 1942},
 80 |         {"start_file": 1943, "end_file": 2027, "start_log": 1943, "end_log": 2027},
 81 |         {"start_file": 2028, "end_file": 2109, "start_log": 2028, "end_log": 2109},
 82 |         {"start_file": 2110, "end_file": 2201, "start_log": 2110, "end_log": 2201},
 83 |         {"start_file": 2202, "end_file": 2261, "start_log": 2202, "end_log": 2261},
 84 |     ]
 85 | 
 86 |     for i, chunk in enumerate(fh.chunks()):
 87 |         # collected empirically
 88 |         if i < 26:
 89 |             assert chunk.check_magic() is True
 90 |             assert chunk.magic() == "ElfChnk\x00"
 91 |             assert chunk.calculate_header_checksum() == chunk.header_checksum()
 92 |             assert chunk.calculate_data_checksum() == chunk.data_checksum()
 93 | 
 94 |             expected = expecteds[i]
 95 |             assert chunk.file_first_record_number() == expected["start_file"]
 96 |             assert chunk.file_last_record_number() == expected["end_file"]
 97 |             assert chunk.log_first_record_number() == expected["start_log"]
 98 |             assert chunk.log_last_record_number() == expected["end_log"]
 99 | 
100 |         else:
101 |             assert chunk.check_magic() is False
102 |             assert chunk.magic() == EMPTY_MAGIC
103 | 


--------------------------------------------------------------------------------
/tests/test_header.py:
--------------------------------------------------------------------------------
 1 | import Evtx.Evtx as evtx
 2 | 
 3 | 
 4 | def test_file_header(system):
 5 |     """
 6 |     regression test parsing some known fields in the file header.
 7 | 
 8 |     Args:
 9 |       system (bytes): the system.evtx test file contents. pytest fixture.
10 |     """
11 |     fh = evtx.FileHeader(system, 0x0)
12 | 
13 |     # collected empirically
14 |     assert fh.magic() == "ElfFile\x00"
15 |     assert fh.major_version() == 0x3
16 |     assert fh.minor_version() == 0x1
17 |     assert fh.flags() == 0x1
18 |     assert fh.is_dirty() is True
19 |     assert fh.is_full() is False
20 |     assert fh.current_chunk_number() == 0x8
21 |     assert fh.chunk_count() == 0x9
22 |     assert fh.oldest_chunk() == 0x0
23 |     assert fh.next_record_number() == 0x34D8
24 |     assert fh.checksum() == 0x41B4B1EC
25 |     assert fh.calculate_checksum() == fh.checksum()
26 | 
27 | 
28 | def test_file_header2(security):
29 |     """
30 |     regression test parsing some known fields in the file header.
31 | 
32 |     Args:
33 |       security (bytes): the security.evtx test file contents. pytest fixture.
34 |     """
35 |     fh = evtx.FileHeader(security, 0x0)
36 | 
37 |     # collected empirically
38 |     assert fh.magic() == "ElfFile\x00"
39 |     assert fh.major_version() == 0x3
40 |     assert fh.minor_version() == 0x1
41 |     assert fh.flags() == 0x1
42 |     assert fh.is_dirty() is True
43 |     assert fh.is_full() is False
44 |     assert fh.current_chunk_number() == 0x19
45 |     assert fh.chunk_count() == 0x1A
46 |     assert fh.oldest_chunk() == 0x0
47 |     assert fh.next_record_number() == 0x8B2
48 |     assert fh.checksum() == 0x3F6E33D5
49 |     assert fh.calculate_checksum() == fh.checksum()
50 | 


--------------------------------------------------------------------------------
/tests/test_issue_37.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | import Evtx.Evtx as evtx
 6 | 
 7 | 
 8 | def test_corrupt_ascii_example(data_path):
 9 |     """
10 |     regression test demonstrating issue 37.
11 | 
12 |     Args:
13 |       data_path (str): the file system path of the test directory.
14 |     """
15 |     # record number two contains a QNAME xml element
16 |     # with an ASCII text value that is invalid ASCII:
17 |     #
18 |     #     000002E0:                                31 39 33 2E 31 2E            193.1.
19 |     #     000002F0: 33 36 2E 31 32 31 30 2E  39 2E 31 35 2E 32 30 32  36.1210.9.15.202
20 |     #     00000300: 01 62 2E 5F 64 6E 73 2D  73 64 2E 5F 75 64 70 2E  .b._dns-sd._udp.
21 |     #     00000310: 40 A6 35 01 2E                                    @.5..
22 |     #                  ^^ ^^ ^^
23 |     #
24 |     with pytest.raises(UnicodeDecodeError):
25 |         with evtx.Evtx(os.path.join(data_path, "dns_log_malformed.evtx")) as log:
26 |             for chunk in log.chunks():
27 |                 for record in chunk.records():
28 |                     assert record.xml() is not None
29 | 
30 | 
31 | def test_continue_parsing_after_corrupt_ascii(data_path):
32 |     """
33 |     regression test demonstrating issue 37.
34 | 
35 |     Args:
36 |       data_path (str): the file system path of the test directory.
37 |     """
38 |     attempted = 0
39 |     completed = 0
40 |     failed = 0
41 |     with evtx.Evtx(os.path.join(data_path, "dns_log_malformed.evtx")) as log:
42 |         for chunk in log.chunks():
43 |             for record in chunk.records():
44 |                 try:
45 |                     attempted += 1
46 |                     assert record.xml() is not None
47 |                     completed += 1
48 |                 except UnicodeDecodeError:
49 |                     failed += 1
50 | 
51 |     # this small log file has exactly five records.
52 |     assert attempted == 5
53 |     # the first record is valid.
54 |     assert completed == 1
55 |     # however the remaining four have corrupted ASCII strings,
56 |     # which we are unable to decode.
57 |     assert failed == 4
58 | 


--------------------------------------------------------------------------------
/tests/test_issue_38.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import Evtx.Evtx as evtx
 4 | 
 5 | 
 6 | def one(iterable):
 7 |     """
 8 |     fetch a single element from the given iterable.
 9 | 
10 |     Args:
11 |       iterable (iterable): a sequence of things.
12 | 
13 |     Returns:
14 |       object: the first thing in the sequence.
15 |     """
16 |     for i in iterable:
17 |         return i
18 | 
19 | 
20 | def get_child(node, tag, ns="{http://schemas.microsoft.com/win/2004/08/events/event}"):
21 |     return node.find("%s%s" % (ns, tag))
22 | 
23 | 
24 | def test_hex64_value(data_path):
25 |     """
26 |     regression test demonstrating issue 38.
27 | 
28 |     Args:
29 |       data_path (str): the file system path of the test directory.
30 |     """
31 |     with evtx.Evtx(os.path.join(data_path, "issue_38.evtx")) as log:
32 |         for chunk in log.chunks():
33 |             record = one(chunk.records())
34 |             event_data = get_child(record.lxml(), "EventData")
35 |             for data in event_data:
36 |                 if data.get("Name") != "SubjectLogonId":
37 |                     continue
38 | 
39 |                 assert data.text == "0x000000000019d3af"
40 | 


--------------------------------------------------------------------------------
/tests/test_issue_39.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import Evtx.Evtx as evtx
 4 | 
 5 | 
 6 | def one(iterable):
 7 |     """
 8 |     fetch a single element from the given iterable.
 9 | 
10 |     Args:
11 |       iterable (iterable): a sequence of things.
12 | 
13 |     Returns:
14 |       object: the first thing in the sequence.
15 |     """
16 |     for i in iterable:
17 |         return i
18 | 
19 | 
20 | def get_child(node, tag, ns="{http://schemas.microsoft.com/win/2004/08/events/event}"):
21 |     return node.find("%s%s" % (ns, tag))
22 | 
23 | 
24 | def get_children(node, tags, ns="{http://schemas.microsoft.com/win/2004/08/events/event}"):
25 |     for tag in tags:
26 |         node = get_child(node, tag, ns=ns)
27 |     return node
28 | 
29 | 
30 | def test_systemtime(data_path):
31 |     """
32 |     regression test demonstrating issue 39.
33 | 
34 |     Args:
35 |       data_path (str): the file system path of the test directory.
36 |     """
37 |     with evtx.Evtx(os.path.join(data_path, "issue_39.evtx")) as log:
38 |         for record in log.records():
39 |             if record.record_num() != 129:
40 |                 continue
41 | 
42 |             time_created = get_children(record.lxml(), ["System", "TimeCreated"])
43 |             assert time_created.get("SystemTime") == "2017-04-21 07:41:17.003393+00:00"
44 | 


--------------------------------------------------------------------------------
/tests/test_issue_43.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | import Evtx.Evtx as evtx
 6 | 
 7 | 
 8 | def get_record_by_num(log, record_num):
 9 |     for record in log.records():
10 |         if record.record_num() == record_num:
11 |             return record
12 |     raise KeyError(record_num)
13 | 
14 | 
15 | def test_issue_43(data_path):
16 |     """
17 |     regression test demonstrating issue 43.
18 | 
19 |     Args:
20 |       data_path (str): the file system path of the test directory.
21 |     """
22 |     with evtx.Evtx(os.path.join(data_path, "issue_43.evtx")) as log:
23 |         bad_rec = get_record_by_num(log, 508)
24 |         with pytest.raises(UnicodeDecodeError):
25 |             _ = bad_rec.xml()
26 | 


--------------------------------------------------------------------------------
/tests/test_records.py:
--------------------------------------------------------------------------------
  1 | import textwrap
  2 | import importlib.util
  3 | 
  4 | import pytest
  5 | 
  6 | import Evtx.Evtx as evtx
  7 | import Evtx.Nodes as e_nodes
  8 | 
  9 | if importlib.util.find_spec("lxml"):
 10 |     no_lxml = False
 11 | else:
 12 |     no_lxml = True
 13 | 
 14 | 
 15 | def test_parse_records(system):
 16 |     """
 17 |     regression test demonstrating that all record metadata can be parsed.
 18 | 
 19 |     Args:
 20 |       system (bytes): the system.evtx test file contents. pytest fixture.
 21 |     """
 22 |     fh = evtx.FileHeader(system, 0x0)
 23 |     for i, chunk in enumerate(fh.chunks()):
 24 |         for j, record in enumerate(chunk.records()):
 25 |             assert record.magic() == 0x2A2A
 26 | 
 27 | 
 28 | def test_parse_records2(security):
 29 |     """
 30 |     regression test demonstrating that all record metadata can be parsed.
 31 | 
 32 |     Args:
 33 |       security (bytes): the security.evtx test file contents. pytest fixture.
 34 |     """
 35 |     fh = evtx.FileHeader(security, 0x0)
 36 |     for i, chunk in enumerate(fh.chunks()):
 37 |         for j, record in enumerate(chunk.records()):
 38 |             assert record.magic() == 0x2A2A
 39 | 
 40 | 
 41 | def one(iterable):
 42 |     """
 43 |     fetch a single element from the given iterable.
 44 | 
 45 |     Args:
 46 |       iterable (iterable): a sequence of things.
 47 | 
 48 |     Returns:
 49 |       object: the first thing in the sequence.
 50 |     """
 51 |     for i in iterable:
 52 |         return i
 53 | 
 54 | 
 55 | def extract_structure(node):
 56 |     """
 57 |     given an evtx bxml node, generate a tree of all the nodes.
 58 |     each node has:
 59 |       - str: node type
 60 |       - str: (optional) value
 61 |       - list: (optional) children
 62 | 
 63 |     Args:
 64 |       node (evtx.Node): the root node.
 65 | 
 66 |     Returns:
 67 |       list: the tree representing the bxml structure.
 68 |     """
 69 |     name = node.__class__.__name__
 70 | 
 71 |     if isinstance(node, e_nodes.BXmlTypeNode):
 72 |         # must go before is VariantTypeNode
 73 |         value = None
 74 |     elif isinstance(node, e_nodes.VariantTypeNode):
 75 |         value = node.string()
 76 |     elif isinstance(node, e_nodes.OpenStartElementNode):
 77 |         value = node.tag_name()
 78 |     elif isinstance(node, e_nodes.AttributeNode):
 79 |         value = node.attribute_name().string()
 80 |     else:
 81 |         value = None
 82 | 
 83 |     children = []
 84 |     if isinstance(node, e_nodes.BXmlTypeNode):
 85 |         children.append(extract_structure(node._root))
 86 |     elif isinstance(node, e_nodes.TemplateInstanceNode) and node.is_resident_template():
 87 |         children.append(extract_structure(node.template()))
 88 | 
 89 |     children.extend(list(map(extract_structure, node.children())))
 90 | 
 91 |     if isinstance(node, e_nodes.RootNode):
 92 |         substitutions = list(map(extract_structure, node.substitutions()))
 93 |         children.append(["Substitutions", None, substitutions])
 94 | 
 95 |     if children:
 96 |         return [name, value, children]
 97 |     elif value:
 98 |         return [name, value]
 99 |     else:
100 |         return [name]
101 | 
102 | 
103 | def test_parse_record(system):
104 |     """
105 |     regression test demonstrating binary xml nodes getting parsed.
106 | 
107 |     Args:
108 |       system (bytes): the system.evtx test file contents. pytest fixture.
109 |     """
110 |     fh = evtx.FileHeader(system, 0x0)
111 |     chunk = one(fh.chunks())
112 |     record = one(chunk.records())
113 | 
114 |     # generated by hand, but matches the output of extract_structure.
115 |     expected = [
116 |         "RootNode",
117 |         None,
118 |         [
119 |             ["StreamStartNode"],
120 |             [
121 |                 "TemplateInstanceNode",
122 |                 None,
123 |                 [
124 |                     [
125 |                         "TemplateNode",
126 |                         None,
127 |                         [
128 |                             ["StreamStartNode"],
129 |                             [
130 |                                 "OpenStartElementNode",
131 |                                 "Event",
132 |                                 [
133 |                                     [
134 |                                         "AttributeNode",
135 |                                         "xmlns",
136 |                                         [
137 |                                             [
138 |                                                 "ValueNode",
139 |                                                 None,
140 |                                                 [
141 |                                                     [
142 |                                                         "WstringTypeNode",
143 |                                                         "http://schemas.microsoft.com/win/2004/08/events/event",
144 |                                                     ]
145 |                                                 ],
146 |                                             ]
147 |                                         ],
148 |                                     ],
149 |                                     ["CloseStartElementNode"],
150 |                                     [
151 |                                         "OpenStartElementNode",
152 |                                         "System",
153 |                                         [
154 |                                             ["CloseStartElementNode"],
155 |                                             [
156 |                                                 "OpenStartElementNode",
157 |                                                 "Provider",
158 |                                                 [
159 |                                                     [
160 |                                                         "AttributeNode",
161 |                                                         "Name",
162 |                                                         [
163 |                                                             [
164 |                                                                 "ValueNode",
165 |                                                                 None,
166 |                                                                 [["WstringTypeNode", "Microsoft-Windows-Eventlog"]],
167 |                                                             ]
168 |                                                         ],
169 |                                                     ],
170 |                                                     [
171 |                                                         "AttributeNode",
172 |                                                         "Guid",
173 |                                                         [
174 |                                                             [
175 |                                                                 "ValueNode",
176 |                                                                 None,
177 |                                                                 [
178 |                                                                     [
179 |                                                                         "WstringTypeNode",
180 |                                                                         "{fc65ddd8-d6ef-4962-83d5-6e5cfe9ce148}",
181 |                                                                     ]
182 |                                                                 ],
183 |                                                             ]
184 |                                                         ],
185 |                                                     ],
186 |                                                     ["CloseEmptyElementNode"],
187 |                                                 ],
188 |                                             ],
189 |                                             [
190 |                                                 "OpenStartElementNode",
191 |                                                 "EventID",
192 |                                                 [
193 |                                                     ["AttributeNode", "Qualifiers", [["ConditionalSubstitutionNode"]]],
194 |                                                     ["CloseStartElementNode"],
195 |                                                     ["ConditionalSubstitutionNode"],
196 |                                                     ["CloseElementNode"],
197 |                                                 ],
198 |                                             ],
199 |                                             [
200 |                                                 "OpenStartElementNode",
201 |                                                 "Version",
202 |                                                 [
203 |                                                     ["CloseStartElementNode"],
204 |                                                     ["ConditionalSubstitutionNode"],
205 |                                                     ["CloseElementNode"],
206 |                                                 ],
207 |                                             ],
208 |                                             [
209 |                                                 "OpenStartElementNode",
210 |                                                 "Level",
211 |                                                 [
212 |                                                     ["CloseStartElementNode"],
213 |                                                     ["ConditionalSubstitutionNode"],
214 |                                                     ["CloseElementNode"],
215 |                                                 ],
216 |                                             ],
217 |                                             [
218 |                                                 "OpenStartElementNode",
219 |                                                 "Task",
220 |                                                 [
221 |                                                     ["CloseStartElementNode"],
222 |                                                     ["ConditionalSubstitutionNode"],
223 |                                                     ["CloseElementNode"],
224 |                                                 ],
225 |                                             ],
226 |                                             [
227 |                                                 "OpenStartElementNode",
228 |                                                 "Opcode",
229 |                                                 [
230 |                                                     ["CloseStartElementNode"],
231 |                                                     ["ConditionalSubstitutionNode"],
232 |                                                     ["CloseElementNode"],
233 |                                                 ],
234 |                                             ],
235 |                                             [
236 |                                                 "OpenStartElementNode",
237 |                                                 "Keywords",
238 |                                                 [
239 |                                                     ["CloseStartElementNode"],
240 |                                                     ["ConditionalSubstitutionNode"],
241 |                                                     ["CloseElementNode"],
242 |                                                 ],
243 |                                             ],
244 |                                             [
245 |                                                 "OpenStartElementNode",
246 |                                                 "TimeCreated",
247 |                                                 [
248 |                                                     ["AttributeNode", "SystemTime", [["ConditionalSubstitutionNode"]]],
249 |                                                     ["CloseEmptyElementNode"],
250 |                                                 ],
251 |                                             ],
252 |                                             [
253 |                                                 "OpenStartElementNode",
254 |                                                 "EventRecordID",
255 |                                                 [
256 |                                                     ["CloseStartElementNode"],
257 |                                                     ["ConditionalSubstitutionNode"],
258 |                                                     ["CloseElementNode"],
259 |                                                 ],
260 |                                             ],
261 |                                             [
262 |                                                 "OpenStartElementNode",
263 |                                                 "Correlation",
264 |                                                 [
265 |                                                     ["AttributeNode", "ActivityID", [["ConditionalSubstitutionNode"]]],
266 |                                                     [
267 |                                                         "AttributeNode",
268 |                                                         "RelatedActivityID",
269 |                                                         [["ConditionalSubstitutionNode"]],
270 |                                                     ],
271 |                                                     ["CloseEmptyElementNode"],
272 |                                                 ],
273 |                                             ],
274 |                                             [
275 |                                                 "OpenStartElementNode",
276 |                                                 "Execution",
277 |                                                 [
278 |                                                     ["AttributeNode", "ProcessID", [["ConditionalSubstitutionNode"]]],
279 |                                                     ["AttributeNode", "ThreadID", [["ConditionalSubstitutionNode"]]],
280 |                                                     ["CloseEmptyElementNode"],
281 |                                                 ],
282 |                                             ],
283 |                                             [
284 |                                                 "OpenStartElementNode",
285 |                                                 "Channel",
286 |                                                 [
287 |                                                     ["CloseStartElementNode"],
288 |                                                     ["ValueNode", None, [["WstringTypeNode", "System"]]],
289 |                                                     ["CloseElementNode"],
290 |                                                 ],
291 |                                             ],
292 |                                             [
293 |                                                 "OpenStartElementNode",
294 |                                                 "Computer",
295 |                                                 [
296 |                                                     ["CloseStartElementNode"],
297 |                                                     [
298 |                                                         "ValueNode",
299 |                                                         None,
300 |                                                         [["WstringTypeNode", "WKS-WIN764BITB.shieldbase.local"]],
301 |                                                     ],
302 |                                                     ["CloseElementNode"],
303 |                                                 ],
304 |                                             ],
305 |                                             [
306 |                                                 "OpenStartElementNode",
307 |                                                 "Security",
308 |                                                 [
309 |                                                     ["AttributeNode", "UserID", [["ConditionalSubstitutionNode"]]],
310 |                                                     ["CloseEmptyElementNode"],
311 |                                                 ],
312 |                                             ],
313 |                                             ["CloseElementNode"],
314 |                                         ],
315 |                                     ],
316 |                                     [
317 |                                         "OpenStartElementNode",
318 |                                         "UserData",
319 |                                         [
320 |                                             ["CloseStartElementNode"],
321 |                                             ["ConditionalSubstitutionNode"],
322 |                                             ["CloseElementNode"],
323 |                                         ],
324 |                                     ],
325 |                                     ["CloseElementNode"],
326 |                                 ],
327 |                             ],
328 |                             ["EndOfStreamNode"],
329 |                         ],
330 |                     ]
331 |                 ],
332 |             ],
333 |             [
334 |                 "Substitutions",
335 |                 None,
336 |                 [
337 |                     ["UnsignedByteTypeNode", "4"],
338 |                     ["UnsignedByteTypeNode", "0"],
339 |                     ["UnsignedWordTypeNode", "105"],
340 |                     ["UnsignedWordTypeNode", "105"],
341 |                     ["NullTypeNode"],
342 |                     ["Hex64TypeNode", "0x8000000000000000"],
343 |                     ["FiletimeTypeNode", "2012-03-14 04:17:43.354563+00:00"],
344 |                     ["NullTypeNode"],
345 |                     ["UnsignedDwordTypeNode", "820"],
346 |                     ["UnsignedDwordTypeNode", "2868"],
347 |                     ["UnsignedQwordTypeNode", "12049"],
348 |                     ["UnsignedByteTypeNode", "0"],
349 |                     ["NullTypeNode"],
350 |                     ["NullTypeNode"],
351 |                     ["NullTypeNode"],
352 |                     ["NullTypeNode"],
353 |                     ["NullTypeNode"],
354 |                     ["NullTypeNode"],
355 |                     ["NullTypeNode"],
356 |                     [
357 |                         "BXmlTypeNode",
358 |                         None,
359 |                         [
360 |                             [
361 |                                 "RootNode",
362 |                                 None,
363 |                                 [
364 |                                     ["StreamStartNode"],
365 |                                     [
366 |                                         "TemplateInstanceNode",
367 |                                         None,
368 |                                         [
369 |                                             [
370 |                                                 "TemplateNode",
371 |                                                 None,
372 |                                                 [
373 |                                                     ["StreamStartNode"],
374 |                                                     [
375 |                                                         "OpenStartElementNode",
376 |                                                         "AutoBackup",
377 |                                                         [
378 |                                                             [
379 |                                                                 "AttributeNode",
380 |                                                                 "xmlns:auto-ns3",
381 |                                                                 [
382 |                                                                     [
383 |                                                                         "ValueNode",
384 |                                                                         None,
385 |                                                                         [
386 |                                                                             [
387 |                                                                                 "WstringTypeNode",
388 |                                                                                 "http://schemas.microsoft.com/win/2004/08/events",
389 |                                                                             ]
390 |                                                                         ],
391 |                                                                     ]
392 |                                                                 ],
393 |                                                             ],
394 |                                                             [
395 |                                                                 "AttributeNode",
396 |                                                                 "xmlns",
397 |                                                                 [
398 |                                                                     [
399 |                                                                         "ValueNode",
400 |                                                                         None,
401 |                                                                         [
402 |                                                                             [
403 |                                                                                 "WstringTypeNode",
404 |                                                                                 "http://manifests.microsoft.com/win/2004/08/windows/eventlog",
405 |                                                                             ]
406 |                                                                         ],
407 |                                                                     ]
408 |                                                                 ],
409 |                                                             ],
410 |                                                             ["CloseStartElementNode"],
411 |                                                             [
412 |                                                                 "OpenStartElementNode",
413 |                                                                 "Channel",
414 |                                                                 [
415 |                                                                     ["CloseStartElementNode"],
416 |                                                                     ["NormalSubstitutionNode"],
417 |                                                                     ["CloseElementNode"],
418 |                                                                 ],
419 |                                                             ],
420 |                                                             [
421 |                                                                 "OpenStartElementNode",
422 |                                                                 "BackupPath",
423 |                                                                 [
424 |                                                                     ["CloseStartElementNode"],
425 |                                                                     ["NormalSubstitutionNode"],
426 |                                                                     ["CloseElementNode"],
427 |                                                                 ],
428 |                                                             ],
429 |                                                             ["CloseElementNode"],
430 |                                                         ],
431 |                                                     ],
432 |                                                     ["EndOfStreamNode"],
433 |                                                 ],
434 |                                             ]
435 |                                         ],
436 |                                     ],
437 |                                     [
438 |                                         "Substitutions",
439 |                                         None,
440 |                                         [
441 |                                             ["WstringTypeNode", "System"],
442 |                                             [
443 |                                                 "WstringTypeNode",
444 |                                                 r"C:\Windows\System32\Winevt\Logs\Archive-System-2012-03-14-04-17-39-932.evtx",
445 |                                             ],
446 |                                         ],
447 |                                     ],
448 |                                 ],
449 |                             ]
450 |                         ],
451 |                     ],
452 |                 ],
453 |             ],
454 |         ],
455 |     ]
456 | 
457 |     assert extract_structure(record.root()) == expected
458 | 
459 | 
460 | def test_render_record(system):
461 |     """
462 |     regression test demonstrating formatting a record to xml.
463 | 
464 |     Args:
465 |       system (bytes): the system.evtx test file contents. pytest fixture.
466 |     """
467 |     fh = evtx.FileHeader(system, 0x0)
468 |     chunk = one(fh.chunks())
469 |     record = one(chunk.records())
470 | 
471 |     xml = record.xml()
472 |     assert xml == textwrap.dedent(
473 |         """\
474 |                                      <Event xmlns="http://schemas.microsoft.com/win/2004/08/events/event"><System><Provider Name="Microsoft-Windows-Eventlog" Guid="{fc65ddd8-d6ef-4962-83d5-6e5cfe9ce148}"></Provider>
475 |                                      <EventID Qualifiers="">105</EventID>
476 |                                      <Version>0</Version>
477 |                                      <Level>4</Level>
478 |                                      <Task>105</Task>
479 |                                      <Opcode>0</Opcode>
480 |                                      <Keywords>0x8000000000000000</Keywords>
481 |                                      <TimeCreated SystemTime="2012-03-14 04:17:43.354563+00:00"></TimeCreated>
482 |                                      <EventRecordID>12049</EventRecordID>
483 |                                      <Correlation ActivityID="" RelatedActivityID=""></Correlation>
484 |                                      <Execution ProcessID="820" ThreadID="2868"></Execution>
485 |                                      <Channel>System</Channel>
486 |                                      <Computer>WKS-WIN764BITB.shieldbase.local</Computer>
487 |                                      <Security UserID=""></Security>
488 |                                      </System>
489 |                                      <UserData><AutoBackup xmlns:auto-ns3="http://schemas.microsoft.com/win/2004/08/events" xmlns="http://manifests.microsoft.com/win/2004/08/windows/eventlog"><Channel>System</Channel>
490 |                                      <BackupPath>C:\\Windows\\System32\\Winevt\\Logs\\Archive-System-2012-03-14-04-17-39-932.evtx</BackupPath>
491 |                                      </AutoBackup>
492 |                                      </UserData>
493 |                                      </Event>
494 |                                      """
495 |     )
496 | 
497 | 
498 | def test_render_records(system):
499 |     """
500 |     regression test demonstrating formatting records to xml.
501 | 
502 |     Args:
503 |       system (bytes): the system.evtx test file contents. pytest fixture.
504 |     """
505 |     fh = evtx.FileHeader(system, 0x0)
506 |     for chunk in fh.chunks():
507 |         for record in chunk.records():
508 |             assert record.xml() is not None
509 | 
510 | 
511 | def test_render_records2(security):
512 |     """
513 |     regression test demonstrating formatting records to xml.
514 | 
515 |     Args:
516 |       security (bytes): the security.evtx test file contents. pytest fixture.
517 |     """
518 |     fh = evtx.FileHeader(security, 0x0)
519 |     for chunk in fh.chunks():
520 |         for record in chunk.records():
521 |             assert record.xml() is not None
522 | 
523 | 
524 | @pytest.mark.skipif(no_lxml, reason="lxml not installed")
525 | def test_render_records_lxml(system):
526 |     """
527 |     regression test demonstrating formatting records to xml.
528 | 
529 |     Args:
530 |       system (bytes): the system.evtx test file contents. pytest fixture.
531 |     """
532 |     fh = evtx.FileHeader(system, 0x0)
533 |     for i, chunk in enumerate(fh.chunks()):
534 |         for j, record in enumerate(chunk.records()):
535 |             assert record.lxml() is not None
536 | 
537 | 
538 | @pytest.mark.skipif(no_lxml, reason="lxml not installed")
539 | def test_render_records_lxml2(security):
540 |     """
541 |     regression test demonstrating formatting records to xml.
542 | 
543 |     Args:
544 |       security (bytes): the security.evtx test file contents. pytest fixture.
545 |     """
546 |     fh = evtx.FileHeader(security, 0x0)
547 |     for i, chunk in enumerate(fh.chunks()):
548 |         for j, record in enumerate(chunk.records()):
549 |             assert record.lxml() is not None
550 | 


--------------------------------------------------------------------------------