├── .github └── workflows │ ├── publish.yml │ └── test.yml ├── .gitignore ├── .justfile ├── Evtx ├── BinaryParser.py ├── Evtx.py ├── Nodes.py ├── Views.py └── __init__.py ├── LICENSE.TXT ├── README.md ├── pyproject.toml ├── scripts ├── evtx_dates.py ├── evtx_dump.py ├── evtx_dump_chunk_slack.py ├── evtx_dump_json.py ├── evtx_eid_record_numbers.py ├── evtx_extract_record.py ├── evtx_filter_records.py ├── evtx_info.py ├── evtx_record_structure.py ├── evtx_record_template.py ├── evtx_structure.py └── evtx_templates.py └── tests ├── conftest.py ├── data ├── dns_log_malformed.evtx ├── issue_38.evtx ├── issue_39.evtx ├── issue_43.evtx ├── readme.md ├── security.evtx └── system.evtx ├── fixtures.py ├── test_chunks.py ├── test_header.py ├── test_issue_37.py ├── test_issue_38.py ├── test_issue_39.py ├── test_issue_43.py └── test_records.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | # use PyPI trusted publishing, as described here: 2 | # https://blog.trailofbits.com/2023/05/23/trusted-publishing-a-new-benchmark-for-packaging-security/ 3 | name: publish to pypi 4 | 5 | on: 6 | release: 7 | types: [published] 8 | 9 | permissions: 10 | contents: write 11 | 12 | jobs: 13 | pypi-publish: 14 | runs-on: ubuntu-latest 15 | environment: 16 | name: release 17 | permissions: 18 | id-token: write 19 | steps: 20 | - uses: actions/checkout@v2 21 | - uses: astral-sh/setup-uv@v5 22 | - name: install 23 | run: uv sync --group build 24 | - name: build package 25 | run: uv run python -m build 26 | - name: upload package artifacts 27 | uses: actions/upload-artifact@v4 28 | with: 29 | path: dist/* 30 | - name: publish package 31 | uses: pypa/gh-action-pypi-publish@release/v1 32 | with: 33 | skip-existing: true 34 | verbose: true 35 | print-hash: true 36 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | tests: 11 | name: Tests in ${{ matrix.python }} 12 | runs-on: ubuntu-latest 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | include: 17 | - python: 3.9 18 | - python: 3.13 19 | steps: 20 | - name: Checkout python-evtx with submodules 21 | uses: actions/checkout@v2 22 | with: 23 | submodules: true 24 | 25 | - uses: astral-sh/setup-uv@v5 26 | with: 27 | version: "0.7.2" 28 | python-version: ${{ matrix.python-version }} 29 | 30 | - name: install 31 | run: uv sync --all-extras 32 | 33 | - uses: extractions/setup-just@v2 34 | with: 35 | just-version: 1.5.0 36 | 37 | - name: lint 38 | run: just lint 39 | 40 | 41 | - name: test 42 | run: just test 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | 29 | # Translations 30 | *.mo 31 | 32 | # Mr Developer 33 | .mr.developer.cfg 34 | .project 35 | .pydevproject 36 | 37 | .idea/* 38 | need-to-fix/* 39 | testing-evtxs/* 40 | 41 | .direnv/ 42 | .env/ 43 | .envrc 44 | .venv 45 | -------------------------------------------------------------------------------- /.justfile: -------------------------------------------------------------------------------- 1 | isort: 2 | uvx isort --length-sort --profile black --line-length 120 Evtx/ tests/ scripts/ 3 | 4 | black: 5 | uvx black --line-length 120 Evtx/ tests/ scripts/ 6 | 7 | ruff: 8 | uvx ruff check --line-length 120 Evtx/ tests/ scripts/ 9 | 10 | mypy: 11 | uvx mypy --check-untyped-defs --ignore-missing-imports Evtx/ tests/ scripts/ 12 | 13 | lint: 14 | -just isort 15 | -just black 16 | -just ruff 17 | # this doesn't pass cleanly today 18 | #-just mypy 19 | 20 | test: 21 | uv run pytest tests/ 22 | -------------------------------------------------------------------------------- /Evtx/BinaryParser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # This file is part of python-evtx. 3 | # 4 | # Copyright 2012, 2013 Willi Ballenthin 5 | # while at Mandiant 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | # Version v.0.3.0 20 | from __future__ import absolute_import 21 | 22 | import struct 23 | import datetime 24 | from functools import partial 25 | 26 | 27 | class memoize(object): 28 | """cache the return value of a method 29 | 30 | From http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/ 31 | 32 | This class is meant to be used as a decorator of methods. The return value 33 | from a given method invocation will be cached on the instance whose method 34 | was invoked. All arguments passed to a method decorated with memoize must 35 | be hashable. 36 | 37 | If a memoized method is invoked directly on its class the result will not 38 | be cached. Instead the method will be invoked like a static method: 39 | class Obj(object): 40 | @memoize 41 | def add_to(self, arg): 42 | return self + arg 43 | Obj.add_to(1) # not enough arguments 44 | Obj.add_to(1, 2) # returns 3, result is not cached 45 | """ 46 | 47 | def __init__(self, func): 48 | self.func = func 49 | 50 | def __get__(self, obj, objtype=None): 51 | if obj is None: 52 | return self.func 53 | return partial(self, obj) 54 | 55 | def __call__(self, *args, **kw): 56 | obj = args[0] 57 | try: 58 | cache = obj.__cache 59 | except AttributeError: 60 | cache = obj.__cache = {} 61 | key = (self.func, args[1:], frozenset(list(kw.items()))) 62 | if key not in cache: 63 | cache[key] = self.func(*args, **kw) 64 | return cache[key] 65 | 66 | 67 | def align(offset, alignment): 68 | """ 69 | Return the offset aligned to the nearest greater given alignment 70 | Arguments: 71 | - `offset`: An integer 72 | - `alignment`: An integer 73 | """ 74 | if offset % alignment == 0: 75 | return offset 76 | return offset + (alignment - (offset % alignment)) 77 | 78 | 79 | def dosdate(dosdate, dostime): 80 | """ 81 | `dosdate`: 2 bytes, little endian. 82 | `dostime`: 2 bytes, little endian. 83 | returns: datetime.datetime or datetime.datetime.min on error 84 | """ 85 | try: 86 | t = ord(dosdate[1]) << 8 87 | t |= ord(dosdate[0]) 88 | day = t & 0b0000000000011111 89 | month = (t & 0b0000000111100000) >> 5 90 | year = (t & 0b1111111000000000) >> 9 91 | year += 1980 92 | 93 | t = ord(dostime[1]) << 8 94 | t |= ord(dostime[0]) 95 | sec = t & 0b0000000000011111 96 | sec *= 2 97 | minute = (t & 0b0000011111100000) >> 5 98 | hour = (t & 0b1111100000000000) >> 11 99 | 100 | return datetime.datetime(year, month, day, hour, minute, sec) 101 | except ValueError: 102 | return datetime.datetime.min 103 | 104 | 105 | def parse_filetime(qword): 106 | # see http://integriography.wordpress.com/2010/01/16/using-phython-to-parse-and-present-windows-64-bit-timestamps/ 107 | if qword == 0: 108 | return datetime.datetime.min 109 | 110 | try: 111 | return datetime.datetime.fromtimestamp(float(qword) * 1e-7 - 11644473600, datetime.timezone.utc) 112 | except (ValueError, OSError): 113 | return datetime.datetime.min 114 | 115 | 116 | class BinaryParserException(Exception): 117 | """ 118 | Base Exception class for binary parsing. 119 | """ 120 | 121 | def __init__(self, value): 122 | """ 123 | Constructor. 124 | Arguments: 125 | - `value`: A string description. 126 | """ 127 | super(BinaryParserException, self).__init__() 128 | self._value = value 129 | 130 | def __repr__(self): 131 | return "BinaryParserException({!r})".format(self._value) 132 | 133 | def __str__(self): 134 | return "Binary Parser Exception: {}".format(self._value) 135 | 136 | 137 | class ParseException(BinaryParserException): 138 | """ 139 | An exception to be thrown during binary parsing, such as 140 | when an invalid header is encountered. 141 | """ 142 | 143 | def __init__(self, value): 144 | """ 145 | Constructor. 146 | Arguments: 147 | - `value`: A string description. 148 | """ 149 | super(ParseException, self).__init__(value) 150 | 151 | def __repr__(self): 152 | return "ParseException({!r})".format(self._value) 153 | 154 | def __str__(self): 155 | return "Parse Exception({})".format(self._value) 156 | 157 | 158 | class OverrunBufferException(ParseException): 159 | def __init__(self, readOffs, bufLen): 160 | tvalue = "read: {}, buffer length: {}".format(hex(readOffs), hex(bufLen)) 161 | super(ParseException, self).__init__(tvalue) 162 | 163 | def __repr__(self): 164 | return "OverrunBufferException({!r})".format(self._value) 165 | 166 | def __str__(self): 167 | return "Tried to parse beyond the end of the file ({})".format(self._value) 168 | 169 | 170 | class Block(object): 171 | """ 172 | Base class for structure blocks in binary parsing. 173 | A block is associated with a offset into a byte-string. 174 | """ 175 | 176 | def __init__(self, buf, offset): 177 | """ 178 | Constructor. 179 | Arguments: 180 | - `buf`: Byte string containing stuff to parse. 181 | - `offset`: The offset into the buffer at which the block starts. 182 | """ 183 | self._buf = buf 184 | self._offset = offset 185 | self._implicit_offset = 0 186 | 187 | def __repr__(self): 188 | return "Block(buf={!r}, offset={!r})".format(self._buf, self._offset) 189 | 190 | def __str__(self): 191 | return str(self) 192 | 193 | def declare_field(self, type, name, offset=None, length=None): 194 | """ 195 | Declaratively add fields to this block. 196 | This method will dynamically add corresponding 197 | offset and unpacker methods to this block. 198 | Arguments: 199 | - `type`: A string. Should be one of the unpack_* types. 200 | - `name`: A string. 201 | - `offset`: A number. 202 | - `length`: (Optional) A number. For (w)strings, length in chars. 203 | """ 204 | if offset is None: 205 | offset = self._implicit_offset 206 | 207 | if length is None: 208 | 209 | def no_length_handler(): 210 | f = getattr(self, "unpack_" + type) 211 | return f(offset) 212 | 213 | setattr(self, name, no_length_handler) 214 | else: 215 | 216 | def explicit_length_handler(): 217 | f = getattr(self, "unpack_" + type) 218 | return f(offset, length) 219 | 220 | setattr(self, name, explicit_length_handler) 221 | 222 | setattr(self, "_off_" + name, offset) 223 | if type == "byte": 224 | self._implicit_offset = offset + 1 225 | elif type == "int8": 226 | self._implicit_offset = offset + 1 227 | elif type == "word": 228 | self._implicit_offset = offset + 2 229 | elif type == "word_be": 230 | self._implicit_offset = offset + 2 231 | elif type == "int16": 232 | self._implicit_offset = offset + 2 233 | elif type == "dword": 234 | self._implicit_offset = offset + 4 235 | elif type == "dword_be": 236 | self._implicit_offset = offset + 4 237 | elif type == "int32": 238 | self._implicit_offset = offset + 4 239 | elif type == "qword": 240 | self._implicit_offset = offset + 8 241 | elif type == "int64": 242 | self._implicit_offset = offset + 8 243 | elif type == "float": 244 | self._implicit_offset = offset + 4 245 | elif type == "double": 246 | self._implicit_offset = offset + 8 247 | elif type == "dosdate": 248 | self._implicit_offset = offset + 4 249 | elif type == "filetime": 250 | self._implicit_offset = offset + 8 251 | elif type == "systemtime": 252 | self._implicit_offset = offset + 8 253 | elif type == "guid": 254 | self._implicit_offset = offset + 16 255 | elif type == "binary": 256 | self._implicit_offset = offset + length 257 | elif type == "string" and length is not None: 258 | self._implicit_offset = offset + length 259 | elif type == "wstring" and length is not None: 260 | self._implicit_offset = offset + (2 * length) 261 | elif "string" in type and length is None: 262 | raise ParseException("Implicit offset not supported " "for dynamic length strings") 263 | else: 264 | raise ParseException("Implicit offset not supported " "for type: {}".format(type)) 265 | 266 | def current_field_offset(self): 267 | return self._implicit_offset 268 | 269 | def unpack_byte(self, offset): 270 | """ 271 | Returns a little-endian unsigned byte from the relative offset. 272 | Arguments: 273 | - `offset`: The relative offset from the start of the block. 274 | Throws: 275 | - `OverrunBufferException` 276 | """ 277 | o = self._offset + offset 278 | try: 279 | return struct.unpack_from("H", self._buf, o)[0] 324 | except struct.error: 325 | raise OverrunBufferException(o, len(self._buf)) 326 | 327 | def unpack_int16(self, offset): 328 | """ 329 | Returns a little-endian signed WORD (2 bytes) from the 330 | relative offset. 331 | Arguments: 332 | - `offset`: The relative offset from the start of the block. 333 | Throws: 334 | - `OverrunBufferException` 335 | """ 336 | o = self._offset + offset 337 | try: 338 | return struct.unpack_from("I", self._buf, o)[0] 377 | except struct.error: 378 | raise OverrunBufferException(o, len(self._buf)) 379 | 380 | def unpack_int32(self, offset): 381 | """ 382 | Returns a little-endian signed integer (4 bytes) from the 383 | relative offset. 384 | Arguments: 385 | - `offset`: The relative offset from the start of the block. 386 | Throws: 387 | - `OverrunBufferException` 388 | """ 389 | o = self._offset + offset 390 | try: 391 | return struct.unpack_from(" 5 | # while at Mandiant 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | # Version v.0.3.0 20 | from __future__ import absolute_import 21 | 22 | import re 23 | import sys 24 | import mmap 25 | import logging 26 | import binascii 27 | from functools import wraps 28 | 29 | import Evtx.Views as e_views 30 | 31 | from .Nodes import RootNode, TemplateNode, NameStringNode 32 | from .BinaryParser import Block, ParseException 33 | 34 | logger = logging.getLogger(__name__) 35 | 36 | 37 | class InvalidRecordException(ParseException): 38 | def __init__(self): 39 | super(InvalidRecordException, self).__init__("Invalid record structure") 40 | 41 | 42 | class Evtx(object): 43 | """ 44 | A convenience class that makes it easy to open an 45 | EVTX file and start iterating the important structures. 46 | Note, this class must be used in a context statement 47 | (see the `with` keyword). 48 | Note, this class will mmap the target file, so ensure 49 | your platform supports this operation. 50 | """ 51 | 52 | def __init__(self, filename): 53 | """ 54 | @type filename: str 55 | @param filename: A string that contains the path 56 | to the EVTX file to open. 57 | """ 58 | self._filename = filename 59 | self._buf = None 60 | self._f = None 61 | self._fh = None 62 | 63 | def __enter__(self): 64 | self._f = open(self._filename, "rb") 65 | self._buf = mmap.mmap(self._f.fileno(), 0, access=mmap.ACCESS_READ) 66 | self._fh = FileHeader(self._buf, 0x0) 67 | return self 68 | 69 | def __exit__(self, type, value, traceback): 70 | self._buf.close() 71 | self._f.close() 72 | self._fh = None 73 | 74 | def ensure_contexted(func): 75 | """ 76 | This decorator ensure that an instance of the 77 | Evtx class is used within a context statement. That is, 78 | that the `with` statement is used, or `__enter__()` 79 | and `__exit__()` are called explicitly. 80 | """ 81 | 82 | @wraps(func) 83 | def wrapped(self, *args, **kwargs): 84 | if self._buf is None: 85 | raise TypeError("An Evtx object must be used with" " a context (see the `with` statement).") 86 | else: 87 | return func(self, *args, **kwargs) 88 | 89 | return wrapped 90 | 91 | @ensure_contexted 92 | def chunks(self): 93 | """ 94 | Get each of the ChunkHeaders from within this EVTX file. 95 | 96 | @rtype generator of ChunkHeader 97 | @return A generator of ChunkHeaders from this EVTX file. 98 | """ 99 | for chunk in self._fh.chunks(): 100 | yield chunk 101 | 102 | @ensure_contexted 103 | def records(self): 104 | """ 105 | Get each of the Records from within this EVTX file. 106 | 107 | @rtype generator of Record 108 | @return A generator of Records from this EVTX file. 109 | """ 110 | for chunk in self.chunks(): 111 | for record in chunk.records(): 112 | yield record 113 | 114 | @ensure_contexted 115 | def get_record(self, record_num): 116 | """ 117 | Get a Record by record number. 118 | 119 | @type record_num: int 120 | @param record_num: The record number of the the record to fetch. 121 | @rtype Record or None 122 | @return The record request by record number, or None if 123 | the record is not found. 124 | """ 125 | return self._fh.get_record(record_num) 126 | 127 | @ensure_contexted 128 | def get_file_header(self): 129 | return self._fh 130 | 131 | 132 | class FileHeader(Block): 133 | def __init__(self, buf, offset): 134 | logger.debug("FILE HEADER at {}.".format(hex(offset))) 135 | super(FileHeader, self).__init__(buf, offset) 136 | self.declare_field("string", "magic", 0x0, length=8) 137 | self.declare_field("qword", "oldest_chunk") 138 | self.declare_field("qword", "current_chunk_number") 139 | self.declare_field("qword", "next_record_number") 140 | self.declare_field("dword", "header_size") 141 | self.declare_field("word", "minor_version") 142 | self.declare_field("word", "major_version") 143 | self.declare_field("word", "header_chunk_size") 144 | self.declare_field("word", "chunk_count") 145 | self.declare_field("binary", "unused1", length=0x4C) 146 | self.declare_field("dword", "flags") 147 | self.declare_field("dword", "checksum") 148 | 149 | def __repr__(self): 150 | return "FileHeader(buf={!r}, offset={!r})".format(self._buf, self._offset) 151 | 152 | def __str__(self): 153 | return "FileHeader(offset={})".format(hex(self._offset)) 154 | 155 | def check_magic(self): 156 | """ 157 | @return A boolean that indicates if the first eight bytes of 158 | the FileHeader match the expected magic value. 159 | """ 160 | try: 161 | return self.magic() == "ElfFile\x00" 162 | except UnicodeDecodeError: 163 | return False 164 | 165 | def calculate_checksum(self): 166 | """ 167 | @return A integer in the range of an unsigned int that 168 | is the calculated CRC32 checksum off the first 0x78 bytes. 169 | This is consistent with the checksum stored by the FileHeader. 170 | """ 171 | return binascii.crc32(self.unpack_binary(0, 0x78)) & 0xFFFFFFFF 172 | 173 | def verify(self): 174 | """ 175 | @return A boolean that indicates that the FileHeader 176 | successfully passes a set of heuristic checks that 177 | all EVTX FileHeaders should pass. 178 | """ 179 | return ( 180 | self.check_magic() 181 | and self.major_version() == 0x3 182 | and self.minor_version() == 0x1 183 | and self.header_chunk_size() == 0x1000 184 | and self.checksum() == self.calculate_checksum() 185 | ) 186 | 187 | def is_dirty(self): 188 | """ 189 | @return A boolean that indicates that the log has been 190 | opened and was changed, though not all changes might be 191 | reflected in the file header. 192 | """ 193 | return self.flags() & 0x1 == 0x1 194 | 195 | def is_full(self): 196 | """ 197 | @return A boolean that indicates that the log 198 | has reached its maximum configured size and the retention 199 | policy in effect does not allow to reclaim a suitable amount 200 | of space from the oldest records and an event message could 201 | not be written to the log file. 202 | """ 203 | return self.flags() & 0x2 == 0x2 204 | 205 | def first_chunk(self): 206 | """ 207 | @return A ChunkHeader instance that is the first chunk 208 | in the log file, which is always found directly after 209 | the FileHeader. 210 | """ 211 | ofs = self._offset + self.header_chunk_size() 212 | return ChunkHeader(self._buf, ofs) 213 | 214 | def current_chunk(self): 215 | """ 216 | @return A ChunkHeader instance that is the current chunk 217 | indicated by the FileHeader. 218 | """ 219 | ofs = self._offset + self.header_chunk_size() 220 | ofs += self.current_chunk_number() * 0x10000 221 | return ChunkHeader(self._buf, ofs) 222 | 223 | def chunks(self, include_inactive=False): 224 | """ 225 | @return A generator that yields the chunks of the log file 226 | starting with the first chunk, which is always found directly 227 | after the FileHeader. 228 | 229 | If `include_inactive` is set to true, enumerate chunks beyond those 230 | declared in the file header (and may therefore be corrupt). 231 | """ 232 | if include_inactive: 233 | chunk_count = sys.maxsize 234 | else: 235 | chunk_count = self.chunk_count() 236 | 237 | i = 0 238 | ofs = self._offset + self.header_chunk_size() 239 | while ofs + 0x10000 <= len(self._buf) and i < chunk_count: 240 | yield ChunkHeader(self._buf, ofs) 241 | ofs += 0x10000 242 | i += 1 243 | 244 | def get_record(self, record_num): 245 | """ 246 | Get a Record by record number. 247 | 248 | @type record_num: int 249 | @param record_num: The record number of the the record to fetch. 250 | @rtype Record or None 251 | @return The record request by record number, or None if the 252 | record is not found. 253 | """ 254 | for chunk in self.chunks(): 255 | first_record = chunk.log_first_record_number() 256 | last_record = chunk.log_last_record_number() 257 | if not (first_record <= record_num <= last_record): 258 | continue 259 | for record in chunk.records(): 260 | if record.record_num() == record_num: 261 | return record 262 | return None 263 | 264 | 265 | class Template(object): 266 | def __init__(self, template_node): 267 | self._template_node = template_node 268 | self._xml = None 269 | 270 | def _load_xml(self): 271 | """ 272 | TODO(wb): One day, nodes should generate format strings 273 | instead of the XML format made-up abomination. 274 | """ 275 | if self._xml is not None: 276 | return 277 | matcher = r"\[(?:Normal|Conditional) Substitution\(index=(\d+), type=\d+\)\]" 278 | self._xml = re.sub( 279 | matcher, "{\\1:}", self._template_node.template_format().replace("{", "{{").replace("}", "}}") 280 | ) 281 | 282 | def make_substitutions(self, substitutions): 283 | """ 284 | 285 | @type substitutions: list of VariantTypeNode 286 | """ 287 | self._load_xml() 288 | return self._xml.format(*[n.xml() for n in substitutions]) 289 | 290 | def node(self): 291 | return self._template_node 292 | 293 | 294 | class ChunkHeader(Block): 295 | def __init__(self, buf, offset): 296 | logger.debug("CHUNK HEADER at {}.".format(hex(offset))) 297 | super(ChunkHeader, self).__init__(buf, offset) 298 | self._strings = None 299 | self._templates = None 300 | 301 | self.declare_field("string", "magic", 0x0, length=8) 302 | self.declare_field("qword", "file_first_record_number") 303 | self.declare_field("qword", "file_last_record_number") 304 | self.declare_field("qword", "log_first_record_number") 305 | self.declare_field("qword", "log_last_record_number") 306 | self.declare_field("dword", "header_size") 307 | self.declare_field("dword", "last_record_offset") 308 | self.declare_field("dword", "next_record_offset") 309 | self.declare_field("dword", "data_checksum") 310 | self.declare_field("binary", "unused", length=0x44) 311 | self.declare_field("dword", "header_checksum") 312 | 313 | def __repr__(self): 314 | return "ChunkHeader(buf={!r}, offset={!r})".format(self._buf, self._offset) 315 | 316 | def __str__(self): 317 | return "ChunkHeader(offset={})".format(hex(self._offset)) 318 | 319 | def check_magic(self): 320 | """ 321 | @return A boolean that indicates if the first eight bytes of 322 | the ChunkHeader match the expected magic value. 323 | """ 324 | try: 325 | return self.magic() == "ElfChnk\x00" 326 | except UnicodeDecodeError: 327 | return False 328 | 329 | def calculate_header_checksum(self): 330 | """ 331 | @return A integer in the range of an unsigned int that 332 | is the calculated CRC32 checksum of the ChunkHeader fields. 333 | """ 334 | data = self.unpack_binary(0x0, 0x78) 335 | data += self.unpack_binary(0x80, 0x180) 336 | return binascii.crc32(data) & 0xFFFFFFFF 337 | 338 | def calculate_data_checksum(self): 339 | """ 340 | @return A integer in the range of an unsigned int that 341 | is the calculated CRC32 checksum of the Chunk data. 342 | """ 343 | data = self.unpack_binary(0x200, self.next_record_offset() - 0x200) 344 | return binascii.crc32(data) & 0xFFFFFFFF 345 | 346 | def verify(self): 347 | """ 348 | @return A boolean that indicates that the FileHeader 349 | successfully passes a set of heuristic checks that 350 | all EVTX ChunkHeaders should pass. 351 | """ 352 | return ( 353 | self.check_magic() 354 | and self.calculate_header_checksum() == self.header_checksum() 355 | and self.calculate_data_checksum() == self.data_checksum() 356 | ) 357 | 358 | def _load_strings(self): 359 | if self._strings is None: 360 | self._strings = {} 361 | for i in range(64): 362 | ofs = self.unpack_dword(0x80 + (i * 4)) 363 | while ofs > 0: 364 | string_node = self.add_string(ofs) 365 | ofs = string_node.next_offset() 366 | 367 | def strings(self): 368 | """ 369 | @return A dict(offset --> NameStringNode) 370 | """ 371 | if not self._strings: 372 | self._load_strings() 373 | return self._strings 374 | 375 | def add_string(self, offset, parent=None): 376 | """ 377 | @param offset An integer offset that is relative to the start of 378 | this chunk. 379 | @param parent (Optional) The parent of the newly created 380 | NameStringNode instance. (Default: this chunk). 381 | @return None 382 | """ 383 | if self._strings is None: 384 | self._load_strings() 385 | string_node = NameStringNode(self._buf, self._offset + offset, self, parent or self) 386 | self._strings[offset] = string_node 387 | return string_node 388 | 389 | def _load_templates(self): 390 | """ 391 | @return None 392 | """ 393 | if self._templates is None: 394 | self._templates = {} 395 | for i in range(32): 396 | ofs = self.unpack_dword(0x180 + (i * 4)) 397 | while ofs > 0: 398 | # unclear why these are found before the offset 399 | # this is a direct port from A.S.'s code 400 | token = self.unpack_byte(ofs - 10) 401 | pointer = self.unpack_dword(ofs - 4) 402 | if token != 0x0C or pointer != ofs: 403 | logger.warning("Unexpected token encountered") 404 | ofs = 0 405 | continue 406 | template = self.add_template(ofs) 407 | ofs = template.next_offset() 408 | 409 | def add_template(self, offset, parent=None): 410 | """ 411 | @param offset An integer which contains the chunk-relative offset 412 | to a template to load into this Chunk. 413 | @param parent (Optional) The parent of the newly created 414 | TemplateNode instance. (Default: this chunk). 415 | @return Newly added TemplateNode instance. 416 | """ 417 | if self._templates is None: 418 | self._load_templates() 419 | 420 | node = TemplateNode(self._buf, self._offset + offset, self, parent or self) 421 | self._templates[offset] = node 422 | return node 423 | 424 | def templates(self): 425 | """ 426 | @return A dict(offset --> Template) of all encountered 427 | templates in this Chunk. 428 | """ 429 | if not self._templates: 430 | self._load_templates() 431 | return self._templates 432 | 433 | def first_record(self): 434 | return Record(self._buf, self._offset + 0x200, self) 435 | 436 | def records(self): 437 | try: 438 | record = self.first_record() 439 | except InvalidRecordException: 440 | return 441 | while record._offset < self._offset + self.next_record_offset() and record.length() > 0: 442 | yield record 443 | try: 444 | record = Record(self._buf, record._offset + record.length(), self) 445 | except InvalidRecordException: 446 | return 447 | 448 | 449 | class Record(Block): 450 | def __init__(self, buf, offset, chunk): 451 | logger.debug("Record at {}.".format(hex(offset))) 452 | super(Record, self).__init__(buf, offset) 453 | self._chunk = chunk 454 | 455 | self.declare_field("dword", "magic", 0x0) # 0x00002a2a 456 | self.declare_field("dword", "size") 457 | self.declare_field("qword", "record_num") 458 | self.declare_field("filetime", "timestamp") 459 | 460 | if self.size() > 0x10000: 461 | raise InvalidRecordException() 462 | 463 | self.declare_field("dword", "size2", self.size() - 4) 464 | 465 | def __repr__(self): 466 | return "Record(buf={!r}, offset={!r})".format(self._buf, self._offset) 467 | 468 | def __str__(self): 469 | return "Record(offset={})".format(hex(self._offset)) 470 | 471 | def root(self): 472 | return RootNode(self._buf, self._offset + 0x18, self._chunk, self) 473 | 474 | def length(self): 475 | return self.size() 476 | 477 | def verify(self): 478 | return self.size() == self.size2() 479 | 480 | def data(self): 481 | """ 482 | Return the raw data block which makes up this record as a bytestring. 483 | 484 | @rtype str 485 | @return A string that is a copy of the buffer that makes 486 | up this record. 487 | """ 488 | return self._buf[self.offset() : self.offset() + self.size()] 489 | 490 | def xml(self): 491 | """ 492 | render the record into XML. 493 | does not include the xml declaration header. 494 | 495 | Returns: 496 | str: the rendered xml document. 497 | """ 498 | return e_views.evtx_record_xml_view(self) 499 | 500 | def lxml(self): 501 | """ 502 | render the record into a lxml document. 503 | this is useful for querying data from the record using xpath, etc. 504 | 505 | note: lxml must be installed. 506 | 507 | Returns: 508 | lxml.etree.ElementTree: the rendered and parsed xml document. 509 | 510 | Raises: 511 | ImportError: if lxml is not installed. 512 | """ 513 | import lxml.etree 514 | 515 | return lxml.etree.fromstring((e_views.XML_HEADER + self.xml()).encode("utf-8")) 516 | -------------------------------------------------------------------------------- /Evtx/Nodes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # This file is part of python-evtx. 3 | # 4 | # Copyright 2012, 2013 Willi Ballenthin william.ballenthin@mandiant.com> 5 | # while at Mandiant 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | from __future__ import absolute_import 19 | 20 | import re 21 | import base64 22 | import itertools 23 | 24 | import hexdump 25 | 26 | from .BinaryParser import Block, ParseException, memoize 27 | 28 | 29 | class SYSTEM_TOKENS: 30 | EndOfStreamToken = 0x00 31 | OpenStartElementToken = 0x01 32 | CloseStartElementToken = 0x02 33 | CloseEmptyElementToken = 0x03 34 | CloseElementToken = 0x04 35 | ValueToken = 0x05 36 | AttributeToken = 0x06 37 | CDataSectionToken = 0x07 38 | EntityReferenceToken = 0x08 39 | ProcessingInstructionTargetToken = 0x0A 40 | ProcessingInstructionDataToken = 0x0B 41 | TemplateInstanceToken = 0x0C 42 | NormalSubstitutionToken = 0x0D 43 | ConditionalSubstitutionToken = 0x0E 44 | StartOfStreamToken = 0x0F 45 | 46 | 47 | class NODE_TYPES: 48 | NULL = 0x00 49 | WSTRING = 0x01 50 | STRING = 0x02 51 | SIGNED_BYTE = 0x03 52 | UNSIGNED_BYTE = 0x04 53 | SIGNED_WORD = 0x05 54 | UNSIGNED_WORD = 0x06 55 | SIGNED_DWORD = 0x07 56 | UNSIGNED_DWORD = 0x08 57 | SIGNED_QWORD = 0x09 58 | UNSIGNED_QWORD = 0x0A 59 | FLOAT = 0x0B 60 | DOUBLE = 0x0C 61 | BOOLEAN = 0x0D 62 | BINARY = 0x0E 63 | GUID = 0x0F 64 | SIZE = 0x10 65 | FILETIME = 0x11 66 | SYSTEMTIME = 0x12 67 | SID = 0x13 68 | HEX32 = 0x14 69 | HEX64 = 0x15 70 | BXML = 0x21 71 | WSTRINGARRAY = 0x81 72 | 73 | 74 | node_dispatch_table = [] # updated at end of file 75 | node_readable_tokens = [] # updated at end of file 76 | 77 | 78 | class SuppressConditionalSubstitution(Exception): 79 | """ 80 | This exception is to be thrown to indicate that a conditional 81 | substitution evaluated to NULL, and the parent element should 82 | be suppressed. This exception should be caught at the first 83 | opportunity, and must not propagate far up the call chain. 84 | 85 | Strategy: 86 | AttributeNode catches this, .xml() --> "" 87 | StartOpenElementNode catches this for each child, ensures 88 | there's at least one useful value. Or, .xml() --> "" 89 | """ 90 | 91 | def __init__(self, msg): 92 | super(SuppressConditionalSubstitution, self).__init__(msg) 93 | 94 | 95 | class UnexpectedStateException(ParseException): 96 | """ 97 | UnexpectedStateException is an exception to be thrown when the parser 98 | encounters an unexpected value or state. This probably means there 99 | is a bug in the parser, but could stem from a corrupted input file. 100 | """ 101 | 102 | def __init__(self, msg): 103 | super(UnexpectedStateException, self).__init__(msg) 104 | 105 | 106 | class BXmlNode(Block): 107 | 108 | def __init__(self, buf, offset, chunk, parent): 109 | super(BXmlNode, self).__init__(buf, offset) 110 | self._chunk = chunk 111 | self._parent = parent 112 | 113 | def __repr__(self): 114 | return "BXmlNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 115 | self._buf, self.offset(), self._chunk, self._parent 116 | ) 117 | 118 | def __str__(self): 119 | return "BXmlNode(offset={})".format(hex(self.offset())) 120 | 121 | def dump(self): 122 | b = self._buf[self.offset() : self.offset() + self.length()] 123 | return hexdump.hexdump(b, result="return") 124 | 125 | def tag_length(self): 126 | """ 127 | This method must be implemented and overridden for all BXmlNodes. 128 | @return An integer specifying the length of this tag, not including 129 | its children. 130 | """ 131 | raise NotImplementedError("tag_length not implemented for {!r}").format(self) 132 | 133 | def _children(self, max_children=None, end_tokens=[SYSTEM_TOKENS.EndOfStreamToken]): 134 | """ 135 | @return A list containing all of the children BXmlNodes. 136 | """ 137 | ret = [] 138 | ofs = self.tag_length() 139 | 140 | if max_children: 141 | gen = list(range(max_children)) 142 | else: 143 | gen = itertools.count() 144 | 145 | for _ in gen: 146 | # we lose error checking by masking off the higher nibble, 147 | # but, some tokens like 0x01, make use of the flags nibble. 148 | token = self.unpack_byte(ofs) & 0x0F 149 | try: 150 | HandlerNodeClass = node_dispatch_table[token] 151 | child = HandlerNodeClass(self._buf, self.offset() + ofs, self._chunk, self) 152 | except IndexError: 153 | raise ParseException("Unexpected token {:02X} at {}".format(token, self.absolute_offset(0x0) + ofs)) 154 | ret.append(child) 155 | ofs += child.length() 156 | if token in end_tokens: 157 | break 158 | if child.find_end_of_stream(): 159 | break 160 | return ret 161 | 162 | @memoize 163 | def children(self): 164 | return self._children() 165 | 166 | @memoize 167 | def length(self): 168 | """ 169 | @return An integer specifying the length of this tag and all 170 | its children. 171 | """ 172 | ret = self.tag_length() 173 | for child in self.children(): 174 | ret += child.length() 175 | return ret 176 | 177 | @memoize 178 | def find_end_of_stream(self): 179 | for child in self.children(): 180 | if isinstance(child, EndOfStreamNode): 181 | return child 182 | ret = child.find_end_of_stream() 183 | if ret: 184 | return ret 185 | return None 186 | 187 | 188 | class NameStringNode(BXmlNode): 189 | def __init__(self, buf, offset, chunk, parent): 190 | super(NameStringNode, self).__init__(buf, offset, chunk, parent) 191 | self.declare_field("dword", "next_offset", 0x0) 192 | self.declare_field("word", "hash") 193 | self.declare_field("word", "string_length") 194 | self.declare_field("wstring", "string", length=self.string_length()) 195 | 196 | def __repr__(self): 197 | return "NameStringNode(buf={!r}, offset={!r}, chunk={!r})".format(self._buf, self.offset(), self._chunk) 198 | 199 | def __str__(self): 200 | return "NameStringNode(offset={}, length={}, end={})".format( 201 | hex(self.offset()), hex(self.length()), hex(self.offset() + self.length()) 202 | ) 203 | 204 | def string(self): 205 | return str(self._string()) 206 | 207 | def tag_length(self): 208 | return (self.string_length() * 2) + 8 209 | 210 | def length(self): 211 | # two bytes unaccounted for... 212 | return self.tag_length() + 2 213 | 214 | 215 | class TemplateNode(BXmlNode): 216 | def __init__(self, buf, offset, chunk, parent): 217 | super(TemplateNode, self).__init__(buf, offset, chunk, parent) 218 | self.declare_field("dword", "next_offset", 0x0) 219 | self.declare_field("dword", "template_id") 220 | self.declare_field("guid", "guid", 0x04) # unsure why this overlaps 221 | self.declare_field("dword", "data_length") 222 | 223 | def __repr__(self): 224 | return "TemplateNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 225 | self._buf, self.offset(), self._chunk, self._parent 226 | ) 227 | 228 | def __str__(self): 229 | return "TemplateNode(offset={}, guid={}, length={})".format(hex(self.offset()), self.guid(), hex(self.length())) 230 | 231 | def tag_length(self): 232 | return 0x18 233 | 234 | def length(self): 235 | return self.tag_length() + self.data_length() 236 | 237 | 238 | class EndOfStreamNode(BXmlNode): 239 | """ 240 | The binary XML node for the system token 0x00. 241 | 242 | This is the "end of stream" token. It may never actually 243 | be instantiated here. 244 | """ 245 | 246 | def __init__(self, buf, offset, chunk, parent): 247 | super(EndOfStreamNode, self).__init__(buf, offset, chunk, parent) 248 | 249 | def __repr__(self): 250 | return "EndOfStreamNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 251 | self._buf, self.offset(), self._chunk, self._parent 252 | ) 253 | 254 | def __str__(self): 255 | return "EndOfStreamNode(offset={}, length={}, token={})".format(hex(self.offset()), hex(self.length()), 0x00) 256 | 257 | def flags(self): 258 | return self.token() >> 4 259 | 260 | def tag_length(self): 261 | return 1 262 | 263 | def length(self): 264 | return 1 265 | 266 | def children(self): 267 | return [] 268 | 269 | 270 | class OpenStartElementNode(BXmlNode): 271 | """ 272 | The binary XML node for the system token 0x01. 273 | 274 | This is the "open start element" token. 275 | """ 276 | 277 | def __init__(self, buf, offset, chunk, parent): 278 | super(OpenStartElementNode, self).__init__(buf, offset, chunk, parent) 279 | self.declare_field("byte", "token", 0x0) 280 | self.declare_field("word", "unknown0") 281 | # TODO(wb): use this size() field. 282 | self.declare_field("dword", "size") 283 | self.declare_field("dword", "string_offset") 284 | self._tag_length = 11 285 | self._element_type = 0 286 | 287 | if self.flags() & 0x04: 288 | self._tag_length += 4 289 | 290 | if self.string_offset() > self.offset() - self._chunk._offset: 291 | new_string = self._chunk.add_string(self.string_offset(), parent=self) 292 | self._tag_length += new_string.length() 293 | 294 | def __repr__(self): 295 | return "OpenStartElementNode(buf={!r}, offset={!r}, chunk={!r})".format(self._buf, self.offset(), self._chunk) 296 | 297 | def __str__(self): 298 | return "OpenStartElementNode(offset={}, name={}, length={}, token={}, end={}, taglength={}, endtag={})".format( 299 | hex(self.offset()), 300 | self.tag_name(), 301 | hex(self.length()), 302 | hex(self.token()), 303 | hex(self.offset() + self.length()), 304 | hex(self.tag_length()), 305 | hex(self.offset() + self.tag_length()), 306 | ) 307 | 308 | @memoize 309 | def is_empty_node(self): 310 | for child in self.children(): 311 | if type(child) is CloseEmptyElementNode: 312 | return True 313 | return False 314 | 315 | def flags(self): 316 | return self.token() >> 4 317 | 318 | @memoize 319 | def tag_name(self): 320 | return self._chunk.strings()[self.string_offset()].string() 321 | 322 | def tag_length(self): 323 | return self._tag_length 324 | 325 | def verify(self): 326 | return self.flags() & 0x0B == 0 and self.opcode() & 0x0F == 0x01 327 | 328 | @memoize 329 | def children(self): 330 | return self._children(end_tokens=[SYSTEM_TOKENS.CloseElementToken, SYSTEM_TOKENS.CloseEmptyElementToken]) 331 | 332 | 333 | class CloseStartElementNode(BXmlNode): 334 | """ 335 | The binary XML node for the system token 0x02. 336 | 337 | This is the "close start element" token. 338 | """ 339 | 340 | def __init__(self, buf, offset, chunk, parent): 341 | super(CloseStartElementNode, self).__init__(buf, offset, chunk, parent) 342 | self.declare_field("byte", "token", 0x0) 343 | 344 | def __repr__(self): 345 | return "CloseStartElementNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 346 | self._buf, self.offset(), self._chunk, self._parent 347 | ) 348 | 349 | def __str__(self): 350 | return "CloseStartElementNode(offset={}, length={}, token={})".format( 351 | hex(self.offset()), hex(self.length()), hex(self.token()) 352 | ) 353 | 354 | def flags(self): 355 | return self.token() >> 4 356 | 357 | def tag_length(self): 358 | return 1 359 | 360 | def length(self): 361 | return 1 362 | 363 | def children(self): 364 | return [] 365 | 366 | def verify(self): 367 | return self.flags() & 0x0F == 0 and self.opcode() & 0x0F == 0x02 368 | 369 | 370 | class CloseEmptyElementNode(BXmlNode): 371 | """ 372 | The binary XML node for the system token 0x03. 373 | """ 374 | 375 | def __init__(self, buf, offset, chunk, parent): 376 | super(CloseEmptyElementNode, self).__init__(buf, offset, chunk, parent) 377 | self.declare_field("byte", "token", 0x0) 378 | 379 | def __repr__(self): 380 | return "CloseEmptyElementNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 381 | self._buf, self.offset(), self._chunk, self._parent 382 | ) 383 | 384 | def __str__(self): 385 | return "CloseEmptyElementNode(offset={}, length={}, token={})".format( 386 | hex(self.offset()), hex(self.length()), hex(0x03) 387 | ) 388 | 389 | def flags(self): 390 | return self.token() >> 4 391 | 392 | def tag_length(self): 393 | return 1 394 | 395 | def length(self): 396 | return 1 397 | 398 | def children(self): 399 | return [] 400 | 401 | 402 | class CloseElementNode(BXmlNode): 403 | """ 404 | The binary XML node for the system token 0x04. 405 | 406 | This is the "close element" token. 407 | """ 408 | 409 | def __init__(self, buf, offset, chunk, parent): 410 | super(CloseElementNode, self).__init__(buf, offset, chunk, parent) 411 | self.declare_field("byte", "token", 0x0) 412 | 413 | def __repr__(self): 414 | return "CloseElementNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 415 | self._buf, self.offset(), self._chunk, self._parent 416 | ) 417 | 418 | def __str__(self): 419 | return "CloseElementNode(offset={}, length={}, token={})".format( 420 | hex(self.offset()), hex(self.length()), hex(self.token()) 421 | ) 422 | 423 | def flags(self): 424 | return self.token() >> 4 425 | 426 | def tag_length(self): 427 | return 1 428 | 429 | def length(self): 430 | return 1 431 | 432 | def children(self): 433 | return [] 434 | 435 | def verify(self): 436 | return self.flags() & 0x0F == 0 and self.opcode() & 0x0F == 0x04 437 | 438 | 439 | def get_variant_value(buf, offset, chunk, parent, type_, length=None): 440 | """ 441 | @return A VariantType subclass instance found in the given 442 | buffer and offset. 443 | """ 444 | types = { 445 | NODE_TYPES.NULL: NullTypeNode, 446 | NODE_TYPES.WSTRING: WstringTypeNode, 447 | NODE_TYPES.STRING: StringTypeNode, 448 | NODE_TYPES.SIGNED_BYTE: SignedByteTypeNode, 449 | NODE_TYPES.UNSIGNED_BYTE: UnsignedByteTypeNode, 450 | NODE_TYPES.SIGNED_WORD: SignedWordTypeNode, 451 | NODE_TYPES.UNSIGNED_WORD: UnsignedWordTypeNode, 452 | NODE_TYPES.SIGNED_DWORD: SignedDwordTypeNode, 453 | NODE_TYPES.UNSIGNED_DWORD: UnsignedDwordTypeNode, 454 | NODE_TYPES.SIGNED_QWORD: SignedQwordTypeNode, 455 | NODE_TYPES.UNSIGNED_QWORD: UnsignedQwordTypeNode, 456 | NODE_TYPES.FLOAT: FloatTypeNode, 457 | NODE_TYPES.DOUBLE: DoubleTypeNode, 458 | NODE_TYPES.BOOLEAN: BooleanTypeNode, 459 | NODE_TYPES.BINARY: BinaryTypeNode, 460 | NODE_TYPES.GUID: GuidTypeNode, 461 | NODE_TYPES.SIZE: SizeTypeNode, 462 | NODE_TYPES.FILETIME: FiletimeTypeNode, 463 | NODE_TYPES.SYSTEMTIME: SystemtimeTypeNode, 464 | NODE_TYPES.SID: SIDTypeNode, 465 | NODE_TYPES.HEX32: Hex32TypeNode, 466 | NODE_TYPES.HEX64: Hex64TypeNode, 467 | NODE_TYPES.BXML: BXmlTypeNode, 468 | NODE_TYPES.WSTRINGARRAY: WstringArrayTypeNode, 469 | } 470 | try: 471 | TypeClass = types[type_] 472 | except IndexError: 473 | raise NotImplementedError("Type {} not implemented".format(type_)) 474 | return TypeClass(buf, offset, chunk, parent, length=length) 475 | 476 | 477 | class ValueNode(BXmlNode): 478 | """ 479 | The binary XML node for the system token 0x05. 480 | 481 | This is the "value" token. 482 | """ 483 | 484 | def __init__(self, buf, offset, chunk, parent): 485 | super(ValueNode, self).__init__(buf, offset, chunk, parent) 486 | self.declare_field("byte", "token", 0x0) 487 | self.declare_field("byte", "type") 488 | 489 | def __repr__(self): 490 | return "ValueNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 491 | self._buf, self.offset(), self._chunk, self._parent 492 | ) 493 | 494 | def __str__(self): 495 | return "ValueNode(offset={}, length={}, token={}, value={})".format( 496 | hex(self.offset()), hex(self.length()), hex(self.token()), self.value().string() 497 | ) 498 | 499 | def flags(self): 500 | return self.token() >> 4 501 | 502 | def value(self): 503 | return self.children()[0] 504 | 505 | def tag_length(self): 506 | return 2 507 | 508 | def children(self): 509 | child = get_variant_value(self._buf, self.offset() + self.tag_length(), self._chunk, self, self.type()) 510 | return [child] 511 | 512 | def verify(self): 513 | return self.flags() & 0x0B == 0 and self.token() & 0x0F == SYSTEM_TOKENS.ValueToken 514 | 515 | 516 | class AttributeNode(BXmlNode): 517 | """ 518 | The binary XML node for the system token 0x06. 519 | 520 | This is the "attribute" token. 521 | """ 522 | 523 | def __init__(self, buf, offset, chunk, parent): 524 | super(AttributeNode, self).__init__(buf, offset, chunk, parent) 525 | self.declare_field("byte", "token", 0x0) 526 | self.declare_field("dword", "string_offset") 527 | 528 | self._name_string_length = 0 529 | if self.string_offset() > self.offset() - self._chunk._offset: 530 | new_string = self._chunk.add_string(self.string_offset(), parent=self) 531 | self._name_string_length += new_string.length() 532 | 533 | def __repr__(self): 534 | return "AttributeNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 535 | self._buf, self.offset(), self._chunk, self._parent 536 | ) 537 | 538 | def __str__(self): 539 | return "AttributeNode(offset={}, length={}, token={}, name={}, value={})".format( 540 | hex(self.offset()), hex(self.length()), hex(self.token()), self.attribute_name(), self.attribute_value() 541 | ) 542 | 543 | def flags(self): 544 | return self.token() >> 4 545 | 546 | def attribute_name(self): 547 | """ 548 | @return A NameNode instance that contains the attribute name. 549 | """ 550 | return self._chunk.strings()[self.string_offset()] 551 | 552 | def attribute_value(self): 553 | """ 554 | @return A BXmlNode instance that is one of (ValueNode, 555 | ConditionalSubstitutionNode, NormalSubstitutionNode). 556 | """ 557 | return self.children()[0] 558 | 559 | def tag_length(self): 560 | return 5 + self._name_string_length 561 | 562 | def verify(self): 563 | return self.flags() & 0x0B == 0 and self.opcode() & 0x0F == 0x06 564 | 565 | @memoize 566 | def children(self): 567 | return self._children(max_children=1) 568 | 569 | 570 | class CDataSectionNode(BXmlNode): 571 | """ 572 | The binary XML node for the system token 0x07. 573 | 574 | This is the "CDATA section" system token. 575 | """ 576 | 577 | def __init__(self, buf, offset, chunk, parent): 578 | super(CDataSectionNode, self).__init__(buf, offset, chunk, parent) 579 | self.declare_field("byte", "token", 0x0) 580 | self.declare_field("word", "string_length") 581 | self.declare_field("wstring", "cdata", length=self.string_length() - 2) 582 | 583 | def __repr__(self): 584 | return "CDataSectionNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 585 | self._buf, self.offset(), self._chunk, self._parent 586 | ) 587 | 588 | def __str__(self): 589 | return "CDataSectionNode(offset={}, length={}, token={})".format(hex(self.offset()), hex(self.length()), 0x07) 590 | 591 | def flags(self): 592 | return self.token() >> 4 593 | 594 | def tag_length(self): 595 | return 0x3 + self.string_length() 596 | 597 | def length(self): 598 | return self.tag_length() 599 | 600 | def children(self): 601 | return [] 602 | 603 | def verify(self): 604 | return self.flags() == 0x0 and self.token() & 0x0F == SYSTEM_TOKENS.CDataSectionToken 605 | 606 | 607 | class CharacterReferenceNode(BXmlNode): 608 | """ 609 | The binary XML node for the system token 0x08. 610 | 611 | This is an character reference node. That is, something that represents 612 | a non-XML character, eg. & --> 8. 613 | """ 614 | 615 | def __init__(self, buf, offset, chunk, parent): 616 | super(CharacterReferenceNode, self).__init__(buf, offset, chunk, parent) 617 | self.declare_field("byte", "token", 0x0) 618 | self.declare_field("word", "entity") 619 | self._tag_length = 3 620 | 621 | def __repr__(self): 622 | return "CharacterReferenceNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 623 | self._buf, self.offset(), self._chunk, self._parent 624 | ) 625 | 626 | def __str__(self): 627 | return "CharacterReferenceNode(offset={}, length={}, token={})".format( 628 | hex(self.offset()), hex(self.length()), hex(0x08) 629 | ) 630 | 631 | def entity_reference(self): 632 | return "&#x%04x;" % (self.entity()) 633 | 634 | def flags(self): 635 | return self.token() >> 4 636 | 637 | def tag_length(self): 638 | return self._tag_length 639 | 640 | def children(self): 641 | return [] 642 | 643 | 644 | class EntityReferenceNode(BXmlNode): 645 | """ 646 | The binary XML node for the system token 0x09. 647 | 648 | This is an entity reference node. That is, something that represents 649 | a non-XML character, eg. & --> &. 650 | 651 | TODO(wb): this is untested. 652 | """ 653 | 654 | def __init__(self, buf, offset, chunk, parent): 655 | super(EntityReferenceNode, self).__init__(buf, offset, chunk, parent) 656 | self.declare_field("byte", "token", 0x0) 657 | self.declare_field("dword", "string_offset") 658 | self._tag_length = 5 659 | 660 | if self.string_offset() > self.offset() - self._chunk.offset(): 661 | new_string = self._chunk.add_string(self.string_offset(), parent=self) 662 | self._tag_length += new_string.length() 663 | 664 | def __repr__(self): 665 | return "EntityReferenceNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 666 | self._buf, self.offset(), self._chunk, self._parent 667 | ) 668 | 669 | def __str__(self): 670 | return "EntityReferenceNode(offset={}, length={}, token={})".format( 671 | hex(self.offset()), hex(self.length()), hex(0x09) 672 | ) 673 | 674 | def entity_reference(self): 675 | return "&{};".format(self._chunk.strings()[self.string_offset()].string()) 676 | 677 | def flags(self): 678 | return self.token() >> 4 679 | 680 | def tag_length(self): 681 | return self._tag_length 682 | 683 | def children(self): 684 | # TODO(wb): it may be possible for this element to have children. 685 | return [] 686 | 687 | 688 | class ProcessingInstructionTargetNode(BXmlNode): 689 | """ 690 | The binary XML node for the system token 0x0A. 691 | 692 | TODO(wb): untested. 693 | """ 694 | 695 | def __init__(self, buf, offset, chunk, parent): 696 | super(ProcessingInstructionTargetNode, self).__init__(buf, offset, chunk, parent) 697 | self.declare_field("byte", "token", 0x0) 698 | self.declare_field("dword", "string_offset") 699 | self._tag_length = 5 700 | 701 | if self.string_offset() > self.offset() - self._chunk.offset(): 702 | new_string = self._chunk.add_string(self.string_offset(), parent=self) 703 | self._tag_length += new_string.length() 704 | 705 | def __repr__(self): 706 | return "ProcessingInstructionTargetNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 707 | self._buf, self.offset(), self._chunk, self._parent 708 | ) 709 | 710 | def __str__(self): 711 | return "ProcessingInstructionTargetNode(offset={}, length={}, token={})".format( 712 | hex(self.offset()), hex(self.length()), hex(0x0A) 713 | ) 714 | 715 | def processing_instruction_target(self): 716 | return "> 4 720 | 721 | def tag_length(self): 722 | return self._tag_length 723 | 724 | def children(self): 725 | # TODO(wb): it may be possible for this element to have children. 726 | return [] 727 | 728 | 729 | class ProcessingInstructionDataNode(BXmlNode): 730 | """ 731 | The binary XML node for the system token 0x0B. 732 | 733 | TODO(wb): untested. 734 | """ 735 | 736 | def __init__(self, buf, offset, chunk, parent): 737 | super(ProcessingInstructionDataNode, self).__init__(buf, offset, chunk, parent) 738 | self.declare_field("byte", "token", 0x0) 739 | self.declare_field("word", "string_length") 740 | self._tag_length = 3 + (2 * self.string_length()) 741 | 742 | if self.string_length() > 0: 743 | self._string = self.unpack_wstring(0x3, self.string_length()) 744 | else: 745 | self._string = "" 746 | 747 | def __repr__(self): 748 | return "ProcessingInstructionDataNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 749 | self._buf, self.offset(), self._chunk, self._parent 750 | ) 751 | 752 | def __str__(self): 753 | return "ProcessingInstructionDataNode(offset={}, length={}, token={})".format( 754 | hex(self.offset()), hex(self.length()), hex(0x0B) 755 | ) 756 | 757 | def flags(self): 758 | return self.token() >> 4 759 | 760 | def string(self): 761 | if self.string_length() > 0: 762 | return " {}?>".format(self._string) 763 | else: 764 | return "?>" 765 | 766 | def tag_length(self): 767 | return self._tag_length 768 | 769 | def children(self): 770 | # TODO(wb): it may be possible for this element to have children. 771 | return [] 772 | 773 | 774 | class TemplateInstanceNode(BXmlNode): 775 | """ 776 | The binary XML node for the system token 0x0C. 777 | """ 778 | 779 | def __init__(self, buf, offset, chunk, parent): 780 | super(TemplateInstanceNode, self).__init__(buf, offset, chunk, parent) 781 | self.declare_field("byte", "token", 0x0) 782 | self.declare_field("byte", "unknown0") 783 | self.declare_field("dword", "template_id") 784 | self.declare_field("dword", "template_offset") 785 | 786 | self._data_length = 0 787 | 788 | if self.is_resident_template(): 789 | new_template = self._chunk.add_template(self.template_offset(), parent=self) 790 | self._data_length += new_template.length() 791 | 792 | def __repr__(self): 793 | return "TemplateInstanceNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 794 | self._buf, self.offset(), self._chunk, self._parent 795 | ) 796 | 797 | def __str__(self): 798 | return "TemplateInstanceNode(offset={}, length={}, token={})".format( 799 | hex(self.offset()), hex(self.length()), hex(0x0C) 800 | ) 801 | 802 | def flags(self): 803 | return self.token() >> 4 804 | 805 | def is_resident_template(self): 806 | return self.template_offset() > self.offset() - self._chunk._offset 807 | 808 | def tag_length(self): 809 | return 10 810 | 811 | def length(self): 812 | return self.tag_length() + self._data_length 813 | 814 | def template(self): 815 | return self._chunk.templates()[self.template_offset()] 816 | 817 | def children(self): 818 | return [] 819 | 820 | @memoize 821 | def find_end_of_stream(self): 822 | return self.template().find_end_of_stream() 823 | 824 | 825 | class NormalSubstitutionNode(BXmlNode): 826 | """ 827 | The binary XML node for the system token 0x0D. 828 | 829 | This is a "normal substitution" token. 830 | """ 831 | 832 | def __init__(self, buf, offset, chunk, parent): 833 | super(NormalSubstitutionNode, self).__init__(buf, offset, chunk, parent) 834 | self.declare_field("byte", "token", 0x0) 835 | self.declare_field("word", "index") 836 | self.declare_field("byte", "type") 837 | 838 | def __repr__(self): 839 | return "NormalSubstitutionNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 840 | self._buf, self.offset(), self._chunk, self._parent 841 | ) 842 | 843 | def __str__(self): 844 | return "NormalSubstitutionNode(offset={}, length={}, token={}, index={}, type={})".format( 845 | hex(self.offset()), hex(self.length()), hex(self.token()), self.index(), self.type() 846 | ) 847 | 848 | def flags(self): 849 | return self.token() >> 4 850 | 851 | def tag_length(self): 852 | return 0x4 853 | 854 | def length(self): 855 | return self.tag_length() 856 | 857 | def children(self): 858 | return [] 859 | 860 | def verify(self): 861 | return self.flags() == 0 and self.token() & 0x0F == SYSTEM_TOKENS.NormalSubstitutionToken 862 | 863 | 864 | class ConditionalSubstitutionNode(BXmlNode): 865 | """ 866 | The binary XML node for the system token 0x0E. 867 | """ 868 | 869 | def __init__(self, buf, offset, chunk, parent): 870 | super(ConditionalSubstitutionNode, self).__init__(buf, offset, chunk, parent) 871 | self.declare_field("byte", "token", 0x0) 872 | self.declare_field("word", "index") 873 | self.declare_field("byte", "type") 874 | 875 | def __repr__(self): 876 | return "ConditionalSubstitutionNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 877 | self._buf, self.offset(), self._chunk, self._parent 878 | ) 879 | 880 | def __str__(self): 881 | return "ConditionalSubstitutionNode(offset={}, length={}, token={})".format( 882 | hex(self.offset()), hex(self.length()), hex(0x0E) 883 | ) 884 | 885 | def should_suppress(self, substitutions): 886 | sub = substitutions[self.index()] 887 | return type(sub) is NullTypeNode 888 | 889 | def flags(self): 890 | return self.token() >> 4 891 | 892 | def tag_length(self): 893 | return 0x4 894 | 895 | def length(self): 896 | return self.tag_length() 897 | 898 | def children(self): 899 | return [] 900 | 901 | def verify(self): 902 | return self.flags() == 0 and self.token() & 0x0F == SYSTEM_TOKENS.ConditionalSubstitutionToken 903 | 904 | 905 | class StreamStartNode(BXmlNode): 906 | """ 907 | The binary XML node for the system token 0x0F. 908 | 909 | This is the "start of stream" token. 910 | """ 911 | 912 | def __init__(self, buf, offset, chunk, parent): 913 | super(StreamStartNode, self).__init__(buf, offset, chunk, parent) 914 | self.declare_field("byte", "token", 0x0) 915 | self.declare_field("byte", "unknown0") 916 | self.declare_field("word", "unknown1") 917 | 918 | def __repr__(self): 919 | return "StreamStartNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 920 | self._buf, self.offset(), self._chunk, self._parent 921 | ) 922 | 923 | def __str__(self): 924 | return "StreamStartNode(offset={}, length={}, token={})".format( 925 | hex(self.offset()), hex(self.length()), hex(self.token()) 926 | ) 927 | 928 | def verify(self): 929 | return ( 930 | self.flags() == 0x0 931 | and self.token() & 0x0F == SYSTEM_TOKENS.StartOfStreamToken 932 | and self.unknown0() == 0x1 933 | and self.unknown1() == 0x1 934 | ) 935 | 936 | def flags(self): 937 | return self.token() >> 4 938 | 939 | def tag_length(self): 940 | return 4 941 | 942 | def length(self): 943 | return self.tag_length() + 0 944 | 945 | def children(self): 946 | return [] 947 | 948 | 949 | class RootNode(BXmlNode): 950 | """ 951 | The binary XML node for the Root node. 952 | """ 953 | 954 | def __init__(self, buf, offset, chunk, parent): 955 | super(RootNode, self).__init__(buf, offset, chunk, parent) 956 | 957 | def __repr__(self): 958 | return "RootNode(buf={!r}, offset={!r}, chunk={!r}, parent={!r})".format( 959 | self._buf, self.offset(), self._chunk, self._parent 960 | ) 961 | 962 | def __str__(self): 963 | return "RootNode(offset={}, length={})".format(hex(self.offset()), hex(self.length())) 964 | 965 | def tag_length(self): 966 | return 0 967 | 968 | @memoize 969 | def children(self): 970 | """ 971 | @return The template instances which make up this node. 972 | """ 973 | return self._children(end_tokens=[SYSTEM_TOKENS.EndOfStreamToken]) 974 | 975 | def tag_and_children_length(self): 976 | """ 977 | @return The length of the tag of this element, and the children. 978 | This does not take into account the substitutions that may be 979 | at the end of this element. 980 | """ 981 | children_length = 0 982 | 983 | for child in self.children(): 984 | children_length += child.length() 985 | 986 | return self.tag_length() + children_length 987 | 988 | def template_instance(self): 989 | """ 990 | parse the template instance node. 991 | this is used to compute the location of the template definition structure. 992 | 993 | Returns: 994 | TemplateInstanceNode: the template instance. 995 | """ 996 | ofs = self.offset() 997 | if self.unpack_byte(0x0) & 0x0F == 0xF: 998 | ofs += 4 999 | return TemplateInstanceNode(self._buf, ofs, self._chunk, self) 1000 | 1001 | def template(self): 1002 | """ 1003 | parse the template referenced by this root node. 1004 | note, this template structure is not guaranteed to be located within the root node's boundaries. 1005 | 1006 | Returns: 1007 | TemplateNode: the template. 1008 | """ 1009 | instance = self.template_instance() 1010 | offset = self._chunk.offset() + instance.template_offset() 1011 | node = TemplateNode(self._buf, offset, self._chunk, instance) 1012 | return node 1013 | 1014 | @memoize 1015 | def substitutions(self): 1016 | """ 1017 | @return A list of VariantTypeNode subclass instances that 1018 | contain the substitutions for this root node. 1019 | """ 1020 | sub_decl = [] 1021 | sub_def = [] 1022 | ofs = self.tag_and_children_length() 1023 | sub_count = self.unpack_dword(ofs) 1024 | ofs += 4 1025 | for _ in range(sub_count): 1026 | size = self.unpack_word(ofs) 1027 | type_ = self.unpack_byte(ofs + 0x2) 1028 | sub_decl.append((size, type_)) 1029 | ofs += 4 1030 | for size, type_ in sub_decl: 1031 | val = get_variant_value(self._buf, self.offset() + ofs, self._chunk, self, type_, length=size) 1032 | if abs(size - val.length()) > 4: 1033 | # TODO(wb): This is a hack, so I'm sorry. 1034 | # But, we are not passing around a 'length' field, 1035 | # so we have to depend on the structure of each 1036 | # variant type. It seems some BXmlTypeNode sizes 1037 | # are not exact. Hopefully, this is just alignment. 1038 | # So, that's what we compensate for here. 1039 | raise ParseException("Invalid substitution value size") 1040 | sub_def.append(val) 1041 | ofs += size 1042 | return sub_def 1043 | 1044 | @memoize 1045 | def length(self): 1046 | ofs = self.tag_and_children_length() 1047 | sub_count = self.unpack_dword(ofs) 1048 | ofs += 4 1049 | ret = ofs 1050 | for _ in range(sub_count): 1051 | size = self.unpack_word(ofs) 1052 | ret += size + 4 1053 | ofs += 4 1054 | return ret 1055 | 1056 | 1057 | class VariantTypeNode(BXmlNode): 1058 | """ """ 1059 | 1060 | def __init__(self, buf, offset, chunk, parent, length=None): 1061 | super(VariantTypeNode, self).__init__(buf, offset, chunk, parent) 1062 | self._length = length 1063 | 1064 | def __repr__(self): 1065 | return "{}(buf={!r}, offset={}, chunk={!r})".format( 1066 | self.__class__.__name__, self._buf, hex(self.offset()), self._chunk 1067 | ) 1068 | 1069 | def __str__(self): 1070 | return "{}(offset={}, length={}, string={})".format( 1071 | self.__class__.__name__, hex(self.offset()), hex(self.length()), self.string() 1072 | ) 1073 | 1074 | def tag_length(self): 1075 | raise NotImplementedError("tag_length not implemented for {!r}".format(self)) 1076 | 1077 | def length(self): 1078 | return self.tag_length() 1079 | 1080 | def children(self): 1081 | return [] 1082 | 1083 | def string(self): 1084 | raise NotImplementedError("string not implemented for {!r}".format(self)) 1085 | 1086 | 1087 | # but satisfies the contract of VariantTypeNode, BXmlNode, but not Block 1088 | class NullTypeNode(object): 1089 | """ 1090 | Variant type 0x00. 1091 | """ 1092 | 1093 | def __init__(self, buf, offset, chunk, parent, length=None): 1094 | super(NullTypeNode, self).__init__() 1095 | self._offset = offset 1096 | self._length = length 1097 | 1098 | def __str__(self): 1099 | return "NullTypeNode" 1100 | 1101 | def string(self): 1102 | return "" 1103 | 1104 | def length(self): 1105 | return self._length or 0 1106 | 1107 | def tag_length(self): 1108 | return self._length or 0 1109 | 1110 | def children(self): 1111 | return [] 1112 | 1113 | def offset(self): 1114 | return self._offset 1115 | 1116 | 1117 | class WstringTypeNode(VariantTypeNode): 1118 | """ 1119 | Variant ttype 0x01. 1120 | """ 1121 | 1122 | def __init__(self, buf, offset, chunk, parent, length=None): 1123 | super(WstringTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1124 | if self._length is None: 1125 | self.declare_field("word", "string_length", 0x0) 1126 | self.declare_field("wstring", "_string", length=(self.string_length())) 1127 | else: 1128 | self.declare_field("wstring", "_string", 0x0, length=(self._length // 2)) 1129 | 1130 | def tag_length(self): 1131 | if self._length is None: 1132 | return 2 + (self.string_length() * 2) 1133 | return self._length 1134 | 1135 | def string(self): 1136 | return self._string().rstrip("\x00") 1137 | 1138 | 1139 | class StringTypeNode(VariantTypeNode): 1140 | """ 1141 | Variant type 0x02. 1142 | """ 1143 | 1144 | def __init__(self, buf, offset, chunk, parent, length=None): 1145 | super(StringTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1146 | if self._length is None: 1147 | self.declare_field("word", "string_length", 0x0) 1148 | self.declare_field("string", "_string", length=(self.string_length())) 1149 | else: 1150 | self.declare_field("string", "_string", 0x0, length=self._length) 1151 | 1152 | def tag_length(self): 1153 | if self._length is None: 1154 | return 2 + (self.string_length()) 1155 | return self._length 1156 | 1157 | def string(self): 1158 | return self._string().rstrip("\x00") 1159 | 1160 | 1161 | class SignedByteTypeNode(VariantTypeNode): 1162 | """ 1163 | Variant type 0x03. 1164 | """ 1165 | 1166 | def __init__(self, buf, offset, chunk, parent, length=None): 1167 | super(SignedByteTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1168 | self.declare_field("int8", "byte", 0x0) 1169 | 1170 | def tag_length(self): 1171 | return 1 1172 | 1173 | def string(self): 1174 | return str(self.byte()) 1175 | 1176 | 1177 | class UnsignedByteTypeNode(VariantTypeNode): 1178 | """ 1179 | Variant type 0x04. 1180 | """ 1181 | 1182 | def __init__(self, buf, offset, chunk, parent, length=None): 1183 | super(UnsignedByteTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1184 | self.declare_field("byte", "byte", 0x0) 1185 | 1186 | def tag_length(self): 1187 | return 1 1188 | 1189 | def string(self): 1190 | return str(self.byte()) 1191 | 1192 | 1193 | class SignedWordTypeNode(VariantTypeNode): 1194 | """ 1195 | Variant type 0x05. 1196 | """ 1197 | 1198 | def __init__(self, buf, offset, chunk, parent, length=None): 1199 | super(SignedWordTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1200 | self.declare_field("int16", "word", 0x0) 1201 | 1202 | def tag_length(self): 1203 | return 2 1204 | 1205 | def string(self): 1206 | return str(self.word()) 1207 | 1208 | 1209 | class UnsignedWordTypeNode(VariantTypeNode): 1210 | """ 1211 | Variant type 0x06. 1212 | """ 1213 | 1214 | def __init__(self, buf, offset, chunk, parent, length=None): 1215 | super(UnsignedWordTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1216 | self.declare_field("word", "word", 0x0) 1217 | 1218 | def tag_length(self): 1219 | return 2 1220 | 1221 | def string(self): 1222 | return str(self.word()) 1223 | 1224 | 1225 | class SignedDwordTypeNode(VariantTypeNode): 1226 | """ 1227 | Variant type 0x07. 1228 | """ 1229 | 1230 | def __init__(self, buf, offset, chunk, parent, length=None): 1231 | super(SignedDwordTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1232 | self.declare_field("int32", "dword", 0x0) 1233 | 1234 | def tag_length(self): 1235 | return 4 1236 | 1237 | def string(self): 1238 | return str(self.dword()) 1239 | 1240 | 1241 | class UnsignedDwordTypeNode(VariantTypeNode): 1242 | """ 1243 | Variant type 0x08. 1244 | """ 1245 | 1246 | def __init__(self, buf, offset, chunk, parent, length=None): 1247 | super(UnsignedDwordTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1248 | self.declare_field("dword", "dword", 0x0) 1249 | 1250 | def tag_length(self): 1251 | return 4 1252 | 1253 | def string(self): 1254 | return str(self.dword()) 1255 | 1256 | 1257 | class SignedQwordTypeNode(VariantTypeNode): 1258 | """ 1259 | Variant type 0x09. 1260 | """ 1261 | 1262 | def __init__(self, buf, offset, chunk, parent, length=None): 1263 | super(SignedQwordTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1264 | self.declare_field("int64", "qword", 0x0) 1265 | 1266 | def tag_length(self): 1267 | return 8 1268 | 1269 | def string(self): 1270 | return str(self.qword()) 1271 | 1272 | 1273 | class UnsignedQwordTypeNode(VariantTypeNode): 1274 | """ 1275 | Variant type 0x0A. 1276 | """ 1277 | 1278 | def __init__(self, buf, offset, chunk, parent, length=None): 1279 | super(UnsignedQwordTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1280 | self.declare_field("qword", "qword", 0x0) 1281 | 1282 | def tag_length(self): 1283 | return 8 1284 | 1285 | def string(self): 1286 | return str(self.qword()) 1287 | 1288 | 1289 | class FloatTypeNode(VariantTypeNode): 1290 | """ 1291 | Variant type 0x0B. 1292 | """ 1293 | 1294 | def __init__(self, buf, offset, chunk, parent, length=None): 1295 | super(FloatTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1296 | self.declare_field("float", "float", 0x0) 1297 | 1298 | def tag_length(self): 1299 | return 4 1300 | 1301 | def string(self): 1302 | return str(self.float()) 1303 | 1304 | 1305 | class DoubleTypeNode(VariantTypeNode): 1306 | """ 1307 | Variant type 0x0C. 1308 | """ 1309 | 1310 | def __init__(self, buf, offset, chunk, parent, length=None): 1311 | super(DoubleTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1312 | self.declare_field("double", "double", 0x0) 1313 | 1314 | def tag_length(self): 1315 | return 8 1316 | 1317 | def string(self): 1318 | return str(self.double()) 1319 | 1320 | 1321 | class BooleanTypeNode(VariantTypeNode): 1322 | """ 1323 | Variant type 0x0D. 1324 | """ 1325 | 1326 | def __init__(self, buf, offset, chunk, parent, length=None): 1327 | super(BooleanTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1328 | self.declare_field("int32", "int32", 0x0) 1329 | 1330 | def tag_length(self): 1331 | return 4 1332 | 1333 | def string(self): 1334 | if self.int32() > 0: 1335 | return "True" 1336 | return "False" 1337 | 1338 | 1339 | class BinaryTypeNode(VariantTypeNode): 1340 | """ 1341 | Variant type 0x0E. 1342 | 1343 | String/XML representation is Base64 encoded. 1344 | """ 1345 | 1346 | def __init__(self, buf, offset, chunk, parent, length=None): 1347 | super(BinaryTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1348 | if self._length is None: 1349 | self.declare_field("dword", "size", 0x0) 1350 | self.declare_field("binary", "binary", length=self.size()) 1351 | else: 1352 | self.declare_field("binary", "binary", 0x0, length=self._length) 1353 | 1354 | def tag_length(self): 1355 | if self._length is None: 1356 | return 4 + self.size() 1357 | return self._length 1358 | 1359 | def string(self): 1360 | return base64.b64encode(self.binary()).decode("ascii") 1361 | 1362 | 1363 | class GuidTypeNode(VariantTypeNode): 1364 | """ 1365 | Variant type 0x0F. 1366 | """ 1367 | 1368 | def __init__(self, buf, offset, chunk, parent, length=None): 1369 | super(GuidTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1370 | self.declare_field("guid", "guid", 0x0) 1371 | 1372 | def tag_length(self): 1373 | return 16 1374 | 1375 | def string(self): 1376 | return "{" + self.guid() + "}" 1377 | 1378 | 1379 | class SizeTypeNode(VariantTypeNode): 1380 | """ 1381 | Variant type 0x10. 1382 | 1383 | Note: Assuming sizeof(size_t) == 0x8. 1384 | """ 1385 | 1386 | def __init__(self, buf, offset, chunk, parent, length=None): 1387 | super(SizeTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1388 | if self._length == 0x4: 1389 | self.declare_field("dword", "num", 0x0) 1390 | elif self._length == 0x8: 1391 | self.declare_field("qword", "num", 0x0) 1392 | else: 1393 | self.declare_field("qword", "num", 0x0) 1394 | 1395 | def tag_length(self): 1396 | if self._length is None: 1397 | return 8 1398 | return self._length 1399 | 1400 | def string(self): 1401 | return str(self.num()) 1402 | 1403 | 1404 | class FiletimeTypeNode(VariantTypeNode): 1405 | """ 1406 | Variant type 0x11. 1407 | """ 1408 | 1409 | def __init__(self, buf, offset, chunk, parent, length=None): 1410 | super(FiletimeTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1411 | self.declare_field("filetime", "filetime", 0x0) 1412 | 1413 | def string(self): 1414 | return self.filetime().isoformat(" ") 1415 | 1416 | def tag_length(self): 1417 | return 8 1418 | 1419 | 1420 | class SystemtimeTypeNode(VariantTypeNode): 1421 | """ 1422 | Variant type 0x12. 1423 | """ 1424 | 1425 | def __init__(self, buf, offset, chunk, parent, length=None): 1426 | super(SystemtimeTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1427 | self.declare_field("systemtime", "systemtime", 0x0) 1428 | 1429 | def tag_length(self): 1430 | return 16 1431 | 1432 | def string(self): 1433 | return self.systemtime().isoformat(" ") 1434 | 1435 | 1436 | class SIDTypeNode(VariantTypeNode): 1437 | """ 1438 | Variant type 0x13. 1439 | """ 1440 | 1441 | def __init__(self, buf, offset, chunk, parent, length=None): 1442 | super(SIDTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1443 | self.declare_field("byte", "version", 0x0) 1444 | self.declare_field("byte", "num_elements") 1445 | self.declare_field("dword_be", "id_high") 1446 | self.declare_field("word_be", "id_low") 1447 | 1448 | @memoize 1449 | def elements(self): 1450 | ret = [] 1451 | for i in range(self.num_elements()): 1452 | ret.append(self.unpack_dword(self.current_field_offset() + 4 * i)) 1453 | return ret 1454 | 1455 | @memoize 1456 | def id(self): 1457 | ret = "S-{}-{}".format(self.version(), (self.id_high() << 16) ^ self.id_low()) 1458 | for elem in self.elements(): 1459 | ret += "-{}".format(elem) 1460 | return ret 1461 | 1462 | def tag_length(self): 1463 | return 8 + 4 * self.num_elements() 1464 | 1465 | def string(self): 1466 | return self.id() 1467 | 1468 | 1469 | class Hex32TypeNode(VariantTypeNode): 1470 | """ 1471 | Variant type 0x14. 1472 | """ 1473 | 1474 | def __init__(self, buf, offset, chunk, parent, length=None): 1475 | super(Hex32TypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1476 | self.declare_field("binary", "hex", 0x0, length=0x4) 1477 | 1478 | def tag_length(self): 1479 | return 4 1480 | 1481 | def string(self): 1482 | ret = "0x" 1483 | b = self.hex()[::-1] 1484 | for i in range(len(b)): 1485 | ret += "{:02x}".format(b[i]) 1486 | return ret 1487 | 1488 | 1489 | class Hex64TypeNode(VariantTypeNode): 1490 | """ 1491 | Variant type 0x15. 1492 | """ 1493 | 1494 | def __init__(self, buf, offset, chunk, parent, length=None): 1495 | super(Hex64TypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1496 | self.declare_field("binary", "hex", 0x0, length=0x8) 1497 | 1498 | def tag_length(self): 1499 | return 8 1500 | 1501 | def string(self): 1502 | ret = "0x" 1503 | b = self.hex()[::-1] 1504 | for i in range(len(b)): 1505 | ret += "{:02x}".format(b[i]) 1506 | return ret 1507 | 1508 | 1509 | class BXmlTypeNode(VariantTypeNode): 1510 | """ 1511 | Variant type 0x21. 1512 | """ 1513 | 1514 | def __init__(self, buf, offset, chunk, parent, length=None): 1515 | super(BXmlTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1516 | self._root = RootNode(buf, offset, chunk, self) 1517 | 1518 | def tag_length(self): 1519 | return self._length or self._root.length() 1520 | 1521 | def string(self): 1522 | return "" 1523 | 1524 | def root(self): 1525 | return self._root 1526 | 1527 | 1528 | class WstringArrayTypeNode(VariantTypeNode): 1529 | """ 1530 | Variant ttype 0x81. 1531 | """ 1532 | 1533 | def __init__(self, buf, offset, chunk, parent, length=None): 1534 | super(WstringArrayTypeNode, self).__init__(buf, offset, chunk, parent, length=length) 1535 | if self._length is None: 1536 | self.declare_field("word", "binary_length", 0x0) 1537 | self.declare_field("binary", "binary", length=(self.binary_length())) 1538 | else: 1539 | self.declare_field("binary", "binary", 0x0, length=(self._length)) 1540 | 1541 | def tag_length(self): 1542 | if self._length is None: 1543 | return 2 + self.binary_length() 1544 | return self._length 1545 | 1546 | def string(self): 1547 | binary = self.binary() 1548 | acc = [] 1549 | while len(binary) > 0: 1550 | match = re.search(b"((?:[^\x00].)+)", binary) 1551 | if match: 1552 | frag = match.group() 1553 | acc.append("") 1554 | acc.append(frag.decode("utf16")) 1555 | acc.append("\n") 1556 | binary = binary[len(frag) + 2 :] 1557 | if len(binary) == 0: 1558 | break 1559 | frag = re.search(b"(\x00*)", binary).group() 1560 | if len(frag) % 2 == 0: 1561 | for _ in range(len(frag) // 2): 1562 | acc.append("\n") 1563 | else: 1564 | raise ParseException("Error parsing uneven substring of NULLs") 1565 | binary = binary[len(frag) :] 1566 | return "".join(acc) 1567 | 1568 | 1569 | node_dispatch_table = [ 1570 | EndOfStreamNode, 1571 | OpenStartElementNode, 1572 | CloseStartElementNode, 1573 | CloseEmptyElementNode, 1574 | CloseElementNode, 1575 | ValueNode, 1576 | AttributeNode, 1577 | CDataSectionNode, 1578 | CharacterReferenceNode, 1579 | EntityReferenceNode, 1580 | ProcessingInstructionTargetNode, 1581 | ProcessingInstructionDataNode, 1582 | TemplateInstanceNode, 1583 | NormalSubstitutionNode, 1584 | ConditionalSubstitutionNode, 1585 | StreamStartNode, 1586 | ] 1587 | 1588 | node_readable_tokens = [ 1589 | "End of Stream", 1590 | "Open Start Element", 1591 | "Close Start Element", 1592 | "Close Empty Element", 1593 | "Close Element", 1594 | "Value", 1595 | "Attribute", 1596 | "unknown", 1597 | "unknown", 1598 | "unknown", 1599 | "unknown", 1600 | "unknown", 1601 | "TemplateInstanceNode", 1602 | "Normal Substitution", 1603 | "Conditional Substitution", 1604 | "Start of Stream", 1605 | ] 1606 | -------------------------------------------------------------------------------- /Evtx/Views.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # This file is part of python-evtx. 3 | # 4 | # Copyright 2012, 2013 Willi Ballenthin 5 | # while at Mandiant 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | from __future__ import absolute_import 19 | 20 | import re 21 | import xml.sax.saxutils 22 | 23 | import Evtx.Nodes as e_nodes 24 | 25 | XML_HEADER = '\n' 26 | 27 | 28 | class UnexpectedElementException(Exception): 29 | def __init__(self, msg): 30 | super(UnexpectedElementException, self).__init__(msg) 31 | 32 | 33 | # ref: https://www.w3.org/TR/xml11/#charsets 34 | RESTRICTED_CHARS = re.compile("[\x01-\x08\x0b\x0c\x0e-\x1f\x7f-\x84\x86-\x9f]") 35 | 36 | 37 | def escape_attr(s): 38 | """ 39 | escape the given string such that it can be placed in an XML attribute, like: 40 | 41 | 42 | 43 | Args: 44 | s (str): the string to escape. 45 | 46 | Returns: 47 | str: the escaped string. 48 | """ 49 | esc = xml.sax.saxutils.quoteattr(s) 50 | esc = esc.encode("ascii", "xmlcharrefreplace").decode("ascii") 51 | esc = RESTRICTED_CHARS.sub("", esc) 52 | return esc 53 | 54 | 55 | def escape_value(s): 56 | """ 57 | escape the given string such that it can be placed in an XML value location, like: 58 | 59 | 60 | $value 61 | 62 | 63 | Args: 64 | s (str): the string to escape. 65 | 66 | Returns: 67 | str: the escaped string. 68 | """ 69 | esc = xml.sax.saxutils.escape(s) 70 | esc = esc.encode("ascii", "xmlcharrefreplace").decode("ascii") 71 | esc = RESTRICTED_CHARS.sub("", esc) 72 | return esc 73 | 74 | 75 | # ref: https://www.w3.org/TR/xml/#NT-NameStartChar 76 | # but we are going to require a even stricter subset. 77 | NAME_PATTERN = re.compile(r"[a-zA-Z_][a-zA-Z_\-]*") 78 | 79 | 80 | def validate_name(s): 81 | """ 82 | ensure the given name can be used as an XML entity name, such as tag or attribute name. 83 | 84 | Args: 85 | s (str): the string to validate. 86 | 87 | Raises: 88 | RuntimeError: if the string is not suitable to be an XML name. 89 | """ 90 | if not NAME_PATTERN.match(s): 91 | raise RuntimeError("invalid xml name: %s" % (s)) 92 | return s 93 | 94 | 95 | def render_root_node_with_subs(root_node, subs): 96 | """ 97 | render the given root node using the given substitutions into XML. 98 | 99 | Args: 100 | root_node (e_nodes.RootNode): the node to render. 101 | subs (list[str]): the substitutions that maybe included in the XML. 102 | 103 | Returns: 104 | str: the rendered XML document. 105 | """ 106 | 107 | def rec(node, acc): 108 | if isinstance(node, e_nodes.EndOfStreamNode): 109 | pass # intended 110 | elif isinstance(node, e_nodes.OpenStartElementNode): 111 | acc.append("<") 112 | acc.append(node.tag_name()) 113 | for child in node.children(): 114 | if isinstance(child, e_nodes.AttributeNode): 115 | acc.append(" ") 116 | acc.append(validate_name(child.attribute_name().string())) 117 | acc.append('="') 118 | # TODO: should use xml.sax.saxutils.quoteattr here 119 | # but to do so, we'd need to ensure we're not double-quoting this value. 120 | rec(child.attribute_value(), acc) 121 | acc.append('"') 122 | acc.append(">") 123 | for child in node.children(): 124 | rec(child, acc) 125 | acc.append("\n") 128 | elif isinstance(node, e_nodes.CloseStartElementNode): 129 | pass # intended 130 | elif isinstance(node, e_nodes.CloseEmptyElementNode): 131 | pass # intended 132 | elif isinstance(node, e_nodes.CloseElementNode): 133 | pass # intended 134 | elif isinstance(node, e_nodes.ValueNode): 135 | acc.append(escape_value(node.children()[0].string())) 136 | elif isinstance(node, e_nodes.AttributeNode): 137 | pass # intended 138 | elif isinstance(node, e_nodes.CDataSectionNode): 139 | acc.append("") 143 | elif isinstance(node, e_nodes.EntityReferenceNode): 144 | acc.append(escape_value(node.entity_reference())) 145 | elif isinstance(node, e_nodes.ProcessingInstructionTargetNode): 146 | acc.append(escape_value(node.processing_instruction_target())) 147 | elif isinstance(node, e_nodes.ProcessingInstructionDataNode): 148 | acc.append(escape_value(node.string())) 149 | elif isinstance(node, e_nodes.TemplateInstanceNode): 150 | raise UnexpectedElementException("TemplateInstanceNode") 151 | elif isinstance(node, e_nodes.NormalSubstitutionNode): 152 | sub = subs[node.index()] 153 | 154 | if isinstance(sub, e_nodes.BXmlTypeNode): 155 | sub = render_root_node(sub.root()) 156 | else: 157 | sub = escape_value(sub.string()) 158 | 159 | acc.append(sub) 160 | elif isinstance(node, e_nodes.ConditionalSubstitutionNode): 161 | sub = subs[node.index()] 162 | 163 | if isinstance(sub, e_nodes.BXmlTypeNode): 164 | sub = render_root_node(sub.root()) 165 | else: 166 | sub = escape_value(sub.string()) 167 | 168 | acc.append(sub) 169 | elif isinstance(node, e_nodes.StreamStartNode): 170 | pass # intended 171 | 172 | acc = [] 173 | for c in root_node.template().children(): 174 | rec(c, acc) 175 | return "".join(acc) 176 | 177 | 178 | def render_root_node(root_node): 179 | subs = [] 180 | for sub in root_node.substitutions(): 181 | if isinstance(sub, str): 182 | raise RuntimeError("string sub?") 183 | 184 | if sub is None: 185 | raise RuntimeError("null sub?") 186 | 187 | subs.append(sub) 188 | 189 | return render_root_node_with_subs(root_node, subs) 190 | 191 | 192 | def evtx_record_xml_view(record, cache=None): 193 | """ 194 | render the given record into an XML document. 195 | 196 | Args: 197 | record (Evtx.Record): the record to render. 198 | 199 | Returns: 200 | str: the rendered XML document. 201 | """ 202 | return render_root_node(record.root()) 203 | 204 | 205 | def evtx_chunk_xml_view(chunk): 206 | """ 207 | Generate XML representations of the records in an EVTX chunk. 208 | 209 | Does not include the XML ") 257 | for child in node.children(): 258 | rec(child, acc) 259 | acc.append("\n") 262 | elif isinstance(node, e_nodes.CloseStartElementNode): 263 | pass # intended 264 | elif isinstance(node, e_nodes.CloseEmptyElementNode): 265 | pass # intended 266 | elif isinstance(node, e_nodes.CloseElementNode): 267 | pass # intended 268 | elif isinstance(node, e_nodes.ValueNode): 269 | acc.append(node.children()[0].string()) 270 | elif isinstance(node, e_nodes.AttributeNode): 271 | pass # intended 272 | elif isinstance(node, e_nodes.CDataSectionNode): 273 | acc.append("") 276 | elif isinstance(node, e_nodes.EntityReferenceNode): 277 | acc.append(node.entity_reference()) 278 | elif isinstance(node, e_nodes.ProcessingInstructionTargetNode): 279 | acc.append(node.processing_instruction_target()) 280 | elif isinstance(node, e_nodes.ProcessingInstructionDataNode): 281 | acc.append(node.string()) 282 | elif isinstance(node, e_nodes.TemplateInstanceNode): 283 | raise UnexpectedElementException("TemplateInstanceNode") 284 | elif isinstance(node, e_nodes.NormalSubstitutionNode): 285 | acc.append("[Normal Substitution(index={}, type={})]".format(node.index(), node.type())) 286 | elif isinstance(node, e_nodes.ConditionalSubstitutionNode): 287 | acc.append("[Conditional Substitution(index={}, type={})]".format(node.index(), node.type())) 288 | elif isinstance(node, e_nodes.StreamStartNode): 289 | pass # intended 290 | 291 | acc = [] 292 | for c in root_node.template().children(): 293 | rec(c, acc) 294 | return "".join(acc) 295 | -------------------------------------------------------------------------------- /Evtx/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is part of python-evtx. 2 | # 3 | # Copyright 2012 Willi Ballenthin 4 | # while at Mandiant 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | __all__ = [ 18 | "Evtx", 19 | "BinaryParser", 20 | "Nodes", 21 | "Views", 22 | ] 23 | -------------------------------------------------------------------------------- /LICENSE.TXT: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | python-evtx 2 | =========== 3 | 4 | Introduction 5 | ------------ 6 | 7 | python-evtx is a pure Python parser for recent Windows Event Log files (those with the file extension ".evtx"). The module provides programmatic access to the File and Chunk headers, record templates, and event entries. For example, you can use python-evtx to review the event logs of Windows 7 systems from a Mac or Linux workstation. The structure definitions and parsing strategies were heavily inspired by the work of Andreas Schuster and his Perl implementation "Parse-Evtx". 8 | 9 | Background 10 | ---------- 11 | With the release of Windows Vista, Microsoft introduced an updated event log file format. The format used in Windows XP was a circular buffer of record structures that each contained a list of strings. A viewer resolved templates hosted in system library files and inserted the strings into appropriate positions. The newer event log format is proprietary binary XML. Unpacking chunks from an event log file from Windows 7 results in a complete XML document with a variable schema. The changes helped Microsoft tune the file format to real-world uses of event logs, such as long running logs with hundreds of megabytes of data, and system independent template resolution. 12 | 13 | Related Work 14 | ------------ 15 | Andreas Schuster released the first public description of the .evtx file format in 2007. He is the author of the thorough document "Introducing the Microsoft Vista event log file format" that describes the motivation and details of the format. Mr. Schuster also maintains the Perl implementation of a parser called "Parse-Evtx". I referred to the source code of this library extensively during the development of python-evtx. 16 | 17 | Joachim Metz also released a cross-platform, LGPL licensed C++ based parser in 2011. His document "Windows XML Event Log (EVTX): Analysis of EVTX" provides a detailed description of the structures and context of newer event log files. 18 | 19 | Dependencies 20 | ------------ 21 | python-evtx is a pure Python 3 module, so it works equally well across platforms like Windows, macOS, and Linux. 22 | 23 | python-evtx operates on event log files from Windows operating systems newer than Windows Vista. These files typically have the file extension .evtx. Version 5.09 of the `file` utility identifies such a file as "MS Vista Windows Event Log". To manual confirm the file type, look for the ASCII string "ElfFile" in the first seven bytes: 24 | 25 | willi/evtx » xxd -l 32 Security.evtx 26 | 0000000: 456c 6646 696c 6500 0000 0000 0000 0000 ElfFile......... 27 | 0000010: d300 0000 0000 0000 375e 0000 0000 0000 ........7^...... 28 | 29 | 30 | Examples 31 | -------- 32 | Provided with the parsing module `Evtx` are four scripts that mimic the tools distributed with Parse-Evtx. `evtx_info.py` prints metadata about the event log and verifies the checksums of each chunk. `evtx_templates.py` builds and prints the templates used throughout the event log. `evtx_dump.py` parses the event log and transforms the binary XML into a human readable ASCII XML format. Finally, `evtx_dump_json.py` parses event logs, similar to `evtx_dump.py` and transforms the binary XML into JSON with the added capability to output the JSON array to a file. 33 | 34 | Note the length of the `evtx_dump.py` script: its only 20 lines. Now, review the contents and notice the complete implementation of the logic: 35 | 36 | print(e_views.XML_HEADER) 37 | print('') 38 | for record in log.records: 39 | print(record.xml()) 40 | print('') 41 | 42 | Working with python-evtx is really easy! 43 | 44 | 45 | Installation 46 | ------------ 47 | Updates to python-evtx are pushed to PyPi, so you can install the module using `pip`. For example: 48 | 49 | pip install python-evtx 50 | 51 | The source code for python-evtx is hosted at Github, and you may download, fork, and review it from this repository (http://www.github.com/williballenthin/python-evtx). Please report issues or feature requests through Github's bug tracker associated with the project. 52 | 53 | Development 54 | ----------- 55 | For formatting, use isort: 56 | 57 | isort --length-sort --profile black --line-length=120 Evtx/ scripts/ tests/ 58 | 59 | and black: 60 | 61 | black --line-length=120 Evtx/ scripts/ tests/ 62 | 63 | For linting, use ruff: 64 | 65 | ruff check Evtx/ scripts/ tests/ 66 | 67 | Or use [just](https://github.com/casey/just) to run the linters: 68 | 69 | just lint 70 | 71 | License 72 | ------- 73 | python-evtx is licensed under the Apache License, Version 2.0. This means it is freely available for use and modification in a personal and professional capacity. 74 | 75 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "python-evtx" 7 | version = "0.8.1" 8 | description = "Pure Python parser for Windows event log files (.evtx)." 9 | readme = "README.md" 10 | license = "Apache-2.0" 11 | authors = [ 12 | { name = "Willi Ballenthin", email = "willi.ballenthin@gmail.com" }, 13 | ] 14 | requires-python = ">=3.9" 15 | dependencies = [ 16 | "hexdump>=3.3", 17 | ] 18 | classifiers = [ 19 | "Development Status :: 5 - Production/Stable", # Assuming based on version 0.8.0 20 | "Intended Audience :: Developers", 21 | "Intended Audience :: Information Technology", 22 | "Operating System :: OS Independent", 23 | "Programming Language :: Python :: 3", 24 | "Programming Language :: Python :: 3.9", 25 | "Programming Language :: Python :: 3.10", 26 | "Programming Language :: Python :: 3.11", 27 | "Programming Language :: Python :: 3.12", 28 | "Programming Language :: Python :: 3.13", 29 | "Topic :: System :: Logging", 30 | "Topic :: Software Development :: Libraries :: Python Modules", 31 | "Topic :: Security", 32 | ] 33 | 34 | [project.urls] 35 | Homepage = "https://github.com/williballenthin/python-evtx" 36 | Repository = "https://github.com/williballenthin/python-evtx" 37 | Bug-Tracker = "https://github.com/williballenthin/python-evtx/issues" 38 | 39 | [project.optional-dependencies] 40 | test = [ 41 | "pytest-cov>=5.0.0", 42 | "pytest>=8.2.2", 43 | "lxml>=5.2.2", 44 | "black>=24.4.2", 45 | "isort>=5.13.2", 46 | "ruff>=0.4.10", 47 | ] 48 | 49 | [project.scripts] 50 | evtx_dump = "scripts.evtx_dump:main" 51 | evtx_dump_json = "scripts.evtx_dump_json:main" 52 | evtx_dump_chunk_slack = "scripts.evtx_dump_chunk_slack:main" 53 | evtx_eid_record_numbers = "scripts.evtx_eid_record_numbers:main" 54 | evtx_extract_record = "scripts.evtx_extract_record:main" 55 | evtx_filter_records = "scripts.evtx_filter_records:main" 56 | evtx_info = "scripts.evtx_info:main" 57 | evtx_record_structure = "scripts.evtx_record_structure:main" 58 | evtx_structure = "scripts.evtx_structure:main" 59 | evtx_templates = "scripts.evtx_templates:main" 60 | 61 | [tool.setuptools] 62 | packages = ["Evtx"] 63 | 64 | [tool.black] 65 | line-length = 120 66 | 67 | [tool.isort] 68 | profile = "black" 69 | line_length = 120 70 | length_sort = true 71 | 72 | [tool.ruff] 73 | line-length = 120 74 | 75 | [dependency-groups] 76 | build = [ 77 | "build>=1.2.2.post1", 78 | ] 79 | -------------------------------------------------------------------------------- /scripts/evtx_dates.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from datetime import datetime 4 | 5 | from lxml import etree 6 | 7 | from Evtx.Evtx import Evtx 8 | from Evtx.Views import evtx_file_xml_view 9 | 10 | 11 | def get_child(node, tag, ns="{http://schemas.microsoft.com/win/2004/08/events/event}"): 12 | return node.find("%s%s" % (ns, tag)) 13 | 14 | 15 | def to_lxml(record_xml): 16 | return etree.fromstring('%s' % record_xml.encode("utf-8")) 17 | 18 | 19 | def xml_records(filename): 20 | with Evtx(filename) as evtx: 21 | for xml, record in evtx_file_xml_view(evtx.get_file_header()): 22 | try: 23 | yield to_lxml(xml), None 24 | except etree.XMLSyntaxError as e: 25 | yield xml, e 26 | 27 | 28 | def parsed_date(dstr): 29 | ts = None 30 | try: 31 | ts = datetime.strptime(dstr, "%Y-%m-%d %H:%M:%S") 32 | except ValueError: 33 | ts = datetime.strptime(dstr, "%Y-%m-%d %H:%M:%S.%f") 34 | return ts 35 | 36 | 37 | def event_in_daterange(d, start, end): 38 | is_in_range = True 39 | if d < start: 40 | is_in_range = False 41 | if d > end: 42 | is_in_range = False 43 | return is_in_range 44 | 45 | 46 | def matching_records(evtfile, sdatetime, edatetime): 47 | for node, err in xml_records(evtfile): 48 | if err is not None: 49 | continue 50 | else: 51 | sys = get_child(node, "System") 52 | t = parsed_date(get_child(sys, "TimeCreated").get("SystemTime")) 53 | if event_in_daterange(t, sdatetime, edatetime): 54 | yield node 55 | 56 | 57 | def main(): 58 | import argparse 59 | 60 | parser = argparse.ArgumentParser() 61 | parser.add_argument("evtfile", type=str) 62 | parser.add_argument("start", type=parsed_date, help="Start date/time YYYY-mm-dd HH:MM:SS(.f)") 63 | parser.add_argument( 64 | "-e", dest="end", type=parsed_date, help="End date/time YYYY-mm-dd HH:MM:SS(.f)", default=datetime.now() 65 | ) 66 | args = parser.parse_args() 67 | 68 | for record in matching_records(args.evtfile, args.start, args.end): 69 | print(etree.tostring(record, pretty_print=True)) 70 | 71 | 72 | if __name__ == "__main__": 73 | main() 74 | -------------------------------------------------------------------------------- /scripts/evtx_dump.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # This file is part of python-evtx. 3 | # 4 | # Copyright 2012, 2013 Willi Ballenthin 5 | # while at Mandiant 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | # Version v0.1.1 20 | import Evtx.Evtx as evtx 21 | import Evtx.Views as e_views 22 | 23 | 24 | def main(): 25 | import argparse 26 | 27 | parser = argparse.ArgumentParser(description="Dump a binary EVTX file into XML.") 28 | parser.add_argument("evtx", type=str, help="Path to the Windows EVTX event log file") 29 | args = parser.parse_args() 30 | 31 | with evtx.Evtx(args.evtx) as log: 32 | print(e_views.XML_HEADER) 33 | print("") 34 | for record in log.records(): 35 | print(record.xml()) 36 | print("") 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /scripts/evtx_dump_chunk_slack.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # This file is part of python-evtx. 3 | # 4 | # Copyright 2015 Willi Ballenthin 5 | # while at Mandiant 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | import sys 19 | import mmap 20 | import argparse 21 | import contextlib 22 | 23 | from Evtx.Evtx import FileHeader 24 | 25 | 26 | def main(): 27 | parser = argparse.ArgumentParser(description="Dump the slack space of an EVTX file.") 28 | parser.add_argument("evtx", type=str, help="Path to the Windows EVTX event log file") 29 | args = parser.parse_args() 30 | 31 | with open(args.evtx, "r") as f: 32 | with contextlib.closing(mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)) as buf: 33 | fh = FileHeader(buf, 0x0) 34 | for chunk in fh.chunks(): 35 | chunk_start = chunk.offset() 36 | last_allocated_offset = chunk_start 37 | for record in chunk.records(): 38 | last_allocated_offset = record.offset() + record.size() 39 | 40 | sys.stdout.buffer.write(buf[last_allocated_offset : chunk_start + 0x10000]) 41 | 42 | 43 | if __name__ == "__main__": 44 | main() 45 | -------------------------------------------------------------------------------- /scripts/evtx_dump_json.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is part of python-evtx. 3 | # Written by AJ Read (ajread4) with help/inspiration from the evtx_dump.py file written by Willi Ballenthin. 4 | # 5 | # Purpose: User can dump evtx data into JSON format to either the command line or a JSON file in new line delimited format/JSON array. 6 | # Details: The JSON object is created with only the EventRecordID from the System section of the evtx XML and all of the information within the EventData section. 7 | # 8 | # Requires: 9 | # - xmltodict >= 0.12.0 10 | import os 11 | import json 12 | 13 | import xmltodict 14 | 15 | import Evtx.Evtx as evtx 16 | 17 | 18 | def main(): 19 | import argparse 20 | 21 | parser = argparse.ArgumentParser(description="Dump a binary EVTX file into XML.") 22 | parser.add_argument("evtx", type=str, action="store", help="Path to the Windows EVTX event log file") 23 | parser.add_argument("-o", "--output", type=str, action="store", help="Path of output JSON file") 24 | args = parser.parse_args() 25 | 26 | with evtx.Evtx(args.evtx) as log: 27 | 28 | # Instantiate the final json object 29 | final_json = [] 30 | 31 | # Loop through each record in the evtx log 32 | for record in log.records(): 33 | 34 | # Convert the record to a dictionary for ease of parsing 35 | data_dict = xmltodict.parse(record.xml()) 36 | 37 | # Loop through each key,value pair of the System section of the evtx logs and extract the EventRecordID 38 | for event_system_key, event_system_value in data_dict["Event"]["System"].items(): 39 | if event_system_key == "EventRecordID": 40 | json_subline = {} 41 | firstline = {event_system_key: event_system_value} 42 | 43 | # Add information to the JSON object for this specific log 44 | json_subline.update(firstline) # add the event ID to JSON subline 45 | 46 | # Loop through each key, value pair of the EventData section of the evtx logs 47 | for event_data_key, event_data_value in data_dict["Event"]["EventData"].items(): 48 | for values in event_data_value: 49 | 50 | # Loop through each subvalue within the EvenData section to extract necessary information 51 | for event_data_subkey, event_data_subvalue in values.items(): 52 | if event_data_subkey == "@Name": 53 | data_name = event_data_subvalue 54 | else: 55 | data_value = event_data_subvalue 56 | 57 | # Add information to the JSON object for this specific log 58 | json_subline.update({data_name: data_value}) 59 | 60 | # Print the JSON object for the specific log if not requested to output to file 61 | if not args.output: 62 | print(json_subline) 63 | 64 | # Add specific log JSON object to the final JSON object 65 | if not final_json: 66 | final_json = [json_subline] 67 | else: 68 | final_json.append(json_subline) 69 | 70 | # If output is desired 71 | if args.output: 72 | 73 | # Output the JSON data 74 | if os.path.splitext(args.output)[1] == ".json": 75 | json_file = args.output 76 | else: 77 | json_file = args.output + ".json" 78 | 79 | # Write to JSON file 80 | with open(json_file, "w") as outfile: 81 | json.dump(final_json, outfile) 82 | 83 | 84 | if __name__ == "__main__": 85 | main() 86 | -------------------------------------------------------------------------------- /scripts/evtx_eid_record_numbers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import lxml.etree 4 | from filter_records import get_child 5 | 6 | import Evtx.Evtx as evtx 7 | 8 | 9 | def main(): 10 | import argparse 11 | 12 | parser = argparse.ArgumentParser( 13 | description="Print the record numbers of EVTX log entries " "that match the given EID." 14 | ) 15 | parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file") 16 | parser.add_argument("eid", type=int, help="The EID of records to extract") 17 | args = parser.parse_args() 18 | 19 | with evtx.Evtx(args.evtx) as log: 20 | for record in log.records(): 21 | try: 22 | node = record.lxml() 23 | except lxml.etree.XMLSyntaxError: 24 | continue 25 | if args.eid != int(get_child(get_child(node, "System"), "EventID").text): 26 | continue 27 | print(record.record_num()) 28 | 29 | 30 | if __name__ == "__main__": 31 | main() 32 | -------------------------------------------------------------------------------- /scripts/evtx_extract_record.py: -------------------------------------------------------------------------------- 1 | #!/usr/usr/bin/env python 2 | # This file is part of python-evtx. 3 | # 4 | # Copyright 2012, 2013 Willi Ballenthin 5 | # while at Mandiant 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | # Version v.0.1 20 | import Evtx.Evtx as evtx 21 | 22 | 23 | def main(): 24 | import argparse 25 | 26 | parser = argparse.ArgumentParser(description="Write the raw data for a EVTX record to STDOUT") 27 | parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file") 28 | parser.add_argument("record", type=int, help="The record number of the record to extract") 29 | args = parser.parse_args() 30 | 31 | with evtx.Evtx(args.evtx) as log: 32 | record = log.get_record(args.record) 33 | if record is None: 34 | raise RuntimeError("Cannot find the record specified.") 35 | print(record.data()) 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /scripts/evtx_filter_records.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from lxml import etree 4 | 5 | from Evtx.Evtx import Evtx 6 | from Evtx.Views import evtx_file_xml_view 7 | 8 | # import xml.etree.cElementTree as etree 9 | 10 | 11 | def to_lxml(record_xml): 12 | """ 13 | @type record: Record 14 | """ 15 | return etree.fromstring('%s' % record_xml) 16 | 17 | 18 | def xml_records(filename): 19 | """ 20 | If the second return value is not None, then it is an 21 | Exception encountered during parsing. The first return value 22 | will be the XML string. 23 | 24 | @type filename str 25 | @rtype: generator of (etree.Element or str), (None or Exception) 26 | """ 27 | with Evtx(filename) as evtx: 28 | for xml, record in evtx_file_xml_view(evtx.get_file_header()): 29 | try: 30 | yield to_lxml(xml), None 31 | except etree.XMLSyntaxError as e: 32 | yield xml, e 33 | 34 | 35 | def get_child(node, tag, ns="{http://schemas.microsoft.com/win/2004/08/events/event}"): 36 | """ 37 | @type node: etree.Element 38 | @type tag: str 39 | @type ns: str 40 | """ 41 | return node.find("%s%s" % (ns, tag)) 42 | 43 | 44 | def main(): 45 | import argparse 46 | 47 | parser = argparse.ArgumentParser(description="Print only entries from an EVTX file with a given EID.") 48 | parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file") 49 | parser.add_argument("eid", type=int, help="The EID of records to print") 50 | 51 | args = parser.parse_args() 52 | 53 | for node, err in xml_records(args.evtx): 54 | if err is not None: 55 | continue 56 | sys = get_child(node, "System") 57 | if args.eid == int(get_child(sys, "EventID").text): 58 | print(etree.tostring(node, pretty_print=True)) 59 | 60 | 61 | if __name__ == "__main__": 62 | main() 63 | -------------------------------------------------------------------------------- /scripts/evtx_info.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # This file is part of python-evtx. 3 | # 4 | # Copyright 2012, 2013 Willi Ballenthin 5 | # while at Mandiant 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | # Version v0.1 20 | import Evtx.Evtx as evtx 21 | 22 | 23 | def main(): 24 | import argparse 25 | 26 | parser = argparse.ArgumentParser(description="Dump information about an EVTX file.") 27 | parser.add_argument("evtx", type=str, help="Path to the Windows EVTX event log file") 28 | args = parser.parse_args() 29 | 30 | with evtx.Evtx(args.evtx) as log: 31 | fh = log.get_file_header() 32 | 33 | print("Information from file header:") 34 | print(("Format version : %d.%d" % (fh.major_version(), fh.minor_version()))) 35 | print(("Flags : 0x%08x" % (fh.flags()))) 36 | dirty_string = "clean" 37 | if fh.is_dirty(): 38 | dirty_string = "dirty" 39 | print(("File is : %s" % (dirty_string))) 40 | full_string = "no" 41 | if fh.is_full(): 42 | full_string = "yes" 43 | print(("Log is full : %s" % (full_string))) 44 | print(("Current chunk : %d of %d" % (fh.current_chunk_number(), fh.chunk_count()))) 45 | print(("Oldest chunk : %d" % (fh.oldest_chunk() + 1))) 46 | print(("Next record# : %d" % (fh.next_record_number()))) 47 | checksum_string = "fail" 48 | if fh.calculate_checksum() == fh.checksum(): 49 | checksum_string = "pass" 50 | print(("Check sum : %s" % (checksum_string))) 51 | print("") 52 | 53 | if fh.is_dirty(): 54 | chunk_count = sum([1 for c in fh.chunks() if c.verify()]) 55 | 56 | last_chunk = None 57 | for chunk in fh.chunks(): 58 | if not chunk.verify(): 59 | continue 60 | last_chunk = chunk 61 | next_record_num = last_chunk.log_last_record_number() + 1 62 | 63 | print("Suspected updated header values (header is dirty):") 64 | print(("Current chunk : %d of %d" % (chunk_count, chunk_count))) 65 | print(("Next record# : %d" % (next_record_num))) 66 | print("") 67 | 68 | print("Information from chunks:") 69 | print(" Chunk file (first/last) log (first/last) Header Data") 70 | print("- ----- --------------------- --------------------- ------ ------") 71 | for i, chunk in enumerate(fh.chunks(include_inactive=True), 1): 72 | note_string = " " 73 | if i == fh.current_chunk_number() + 1: 74 | note_string = "*" 75 | elif i == fh.oldest_chunk() + 1: 76 | note_string = ">" 77 | 78 | if not chunk.check_magic(): 79 | try: 80 | magic = chunk.magic() 81 | except UnicodeDecodeError: 82 | magic = "" 83 | 84 | if magic == "\x00\x00\x00\x00\x00\x00\x00\x00": 85 | print("%s %4d [EMPTY]" % (note_string, i)) 86 | else: 87 | print("%s %4d [INVALID]" % (note_string, i)) 88 | continue 89 | 90 | header_checksum_string = "fail" 91 | if chunk.calculate_header_checksum() == chunk.header_checksum(): 92 | header_checksum_string = "pass" 93 | 94 | data_checksum_string = "fail" 95 | if chunk.calculate_data_checksum() == chunk.data_checksum(): 96 | data_checksum_string = "pass" 97 | 98 | print( 99 | "%s %4d %8d %8d %8d %8d %s %s" 100 | % ( 101 | note_string, 102 | i, 103 | chunk.file_first_record_number(), 104 | chunk.file_last_record_number(), 105 | chunk.log_first_record_number(), 106 | chunk.log_last_record_number(), 107 | header_checksum_string, 108 | data_checksum_string, 109 | ) 110 | ) 111 | 112 | 113 | if __name__ == "__main__": 114 | main() 115 | -------------------------------------------------------------------------------- /scripts/evtx_record_structure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import hexdump 3 | 4 | import Evtx.Evtx as evtx 5 | from Evtx.Nodes import RootNode, BXmlTypeNode, VariantTypeNode, TemplateInstanceNode 6 | 7 | 8 | def describe_root(record, root, indent=0, suppress_values=False): 9 | """ 10 | Args: 11 | record (Evtx.Record): 12 | indent (int): 13 | """ 14 | 15 | def format_node(n, extra=None, indent=0): 16 | """ 17 | Depends on closure over `record` and `suppress_values`. 18 | 19 | Args: 20 | n (Evtx.Nodes.BXmlNode): 21 | extra (str): 22 | 23 | Returns: 24 | str: 25 | """ 26 | ret = "" 27 | indent_s = " " * indent 28 | name = n.__class__.__name__ 29 | offset = n.offset() - record.offset() 30 | if extra is not None: 31 | ret = "%s%s(offset=%s, %s)" % (indent_s, name, hex(offset), extra) 32 | else: 33 | ret = "%s%s(offset=%s)" % (indent_s, name, hex(offset)) 34 | 35 | if not suppress_values and isinstance(n, VariantTypeNode): 36 | ret += " --> %s" % (n.string()) 37 | if isinstance(n, BXmlTypeNode): 38 | ret += "\n" 39 | ret += describe_root(record, n._root, indent=indent + 1) 40 | 41 | return ret 42 | 43 | def rec(node, indent=0): 44 | """ 45 | Args: 46 | node (Evtx.Nodes.BXmlNode): 47 | indent (int): 48 | 49 | Returns: 50 | str: 51 | """ 52 | ret = "" 53 | if isinstance(node, TemplateInstanceNode): 54 | if node.is_resident_template(): 55 | extra = "resident=True, length=%s" % (hex(node.template().data_length())) 56 | ret += "%s\n" % (format_node(node, extra=extra, indent=indent)) 57 | ret += rec(node.template(), indent=indent + 1) 58 | else: 59 | ret += "%s\n" % (format_node(node, extra="resident=False", indent=indent)) 60 | else: 61 | ret += "%s\n" % (format_node(node, indent=indent)) 62 | 63 | for child in node.children(): 64 | ret += rec(child, indent=indent + 1) 65 | 66 | if isinstance(node, RootNode): 67 | ofs = node.tag_and_children_length() 68 | indent_s = " " * (indent + 1) 69 | offset = node.offset() - record.offset() + ofs 70 | ret += "%sSubstitutions(offset=%s)\n" % (indent_s, hex(offset)) 71 | for sub in node.substitutions(): 72 | ret += "%s\n" % (format_node(sub, indent=indent + 2)) 73 | 74 | return ret 75 | 76 | ret = "" 77 | ret += rec(root, indent=indent) 78 | return ret 79 | 80 | 81 | def main(): 82 | import argparse 83 | 84 | parser = argparse.ArgumentParser(description="Pretty print the binary structure of an EVTX record.") 85 | parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file") 86 | parser.add_argument("record", type=int, help="Record number") 87 | parser.add_argument("--suppress_values", action="store_true", help="Do not print the values of substitutions.") 88 | args = parser.parse_args() 89 | 90 | with evtx.Evtx(args.evtx) as log: 91 | hexdump.hexdump(log.get_record(args.record).data()) 92 | 93 | record = log.get_record(args.record) 94 | print("record(absolute_offset=%s)" % record.offset()) 95 | print(describe_root(record, record.root(), suppress_values=args.suppress_values)) 96 | print(record.xml()) 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /scripts/evtx_record_template.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | import Evtx.Evtx as evtx 5 | import Evtx.Views as e_views 6 | 7 | 8 | def main(): 9 | import argparse 10 | 11 | parser = argparse.ArgumentParser(description="Print the structure of an EVTX record's template.") 12 | parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file") 13 | parser.add_argument("record", type=int, help="Record number") 14 | args = parser.parse_args() 15 | 16 | with evtx.Evtx(args.evtx) as log: 17 | r = log.get_record(args.record) 18 | if r is None: 19 | print("error: record not found") 20 | return -1 21 | else: 22 | print(e_views.evtx_template_readable_view(r.root())) 23 | 24 | 25 | if __name__ == "__main__": 26 | main() 27 | -------------------------------------------------------------------------------- /scripts/evtx_structure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # This file is part of python-evtx. 3 | # 4 | # Copyright 2012, 2013 Willi Ballenthin 5 | # while at Mandiant 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | import Evtx.Evtx as evtx 19 | import Evtx.Nodes as e_nodes 20 | 21 | 22 | class EvtxFormatter(object): 23 | def __init__(self): 24 | super(EvtxFormatter, self).__init__() 25 | self._indent_stack = [] 26 | self._indent_unit = " " 27 | 28 | def _indent(self): 29 | self._indent_stack.append(self._indent_unit) 30 | 31 | def _dedent(self): 32 | if len(self._indent_stack) > 0: 33 | self._indent_stack = self._indent_stack[:-1] 34 | 35 | def save_indent(self): 36 | return self._indent_stack[:] 37 | 38 | def restore_indent(self, indent): 39 | self._indent_stack = indent 40 | 41 | def _l(self, s): 42 | return "".join(self._indent_stack) + s 43 | 44 | def format_header(self, fh): 45 | yield self._l("File header") 46 | self._indent() 47 | yield self._l("magic: %s" % (fh.magic())) 48 | for num_field in [ 49 | "oldest_chunk", 50 | "current_chunk_number", 51 | "next_record_number", 52 | "header_size", 53 | "minor_version", 54 | "major_version", 55 | "header_chunk_size", 56 | "chunk_count", 57 | "flags", 58 | "checksum", 59 | ]: 60 | yield self._l("%s: %s" % (num_field, hex(getattr(fh, num_field)()))) 61 | 62 | yield self._l("verify: %s" % (fh.verify())) 63 | yield self._l("dirty: %s" % (fh.is_dirty())) 64 | yield self._l("full: %s" % (fh.is_full())) 65 | 66 | for chunk in fh.chunks(): 67 | for line in self.format_chunk(chunk): 68 | yield line 69 | self._dedent() 70 | 71 | def format_chunk(self, chunk): 72 | yield self._l("Chunk") 73 | self._indent() 74 | yield self._l("offset: %s" % (hex(chunk.offset()))) 75 | yield self._l("magic: %s" % (chunk.magic())) 76 | 77 | for num_field in [ 78 | "file_first_record_number", 79 | "file_last_record_number", 80 | "log_first_record_number", 81 | "log_last_record_number", 82 | "header_size", 83 | "last_record_offset", 84 | "next_record_offset", 85 | "data_checksum", 86 | "header_checksum", 87 | ]: 88 | yield self._l("%s: %s" % (num_field, hex(getattr(chunk, num_field)()))) 89 | 90 | yield self._l("verify: %s" % (chunk.verify())) 91 | yield self._l("templates: %d" % (len(chunk.templates()))) 92 | 93 | for record in chunk.records(): 94 | for line in self.format_record(record): 95 | yield line 96 | self._dedent() 97 | 98 | def format_record(self, record): 99 | yield self._l("Record") 100 | self._indent() 101 | yield self._l("offset: %s" % (hex(record.offset()))) 102 | yield self._l("magic: %s" % (hex(record.magic()))) 103 | yield self._l("size: %s" % (hex(record.size()))) 104 | yield self._l("number: %s" % (hex(record.record_num()))) 105 | yield self._l("timestamp: %s" % (record.timestamp())) 106 | yield self._l("verify: %s" % (record.verify())) 107 | 108 | try: 109 | s = self.save_indent() 110 | for line in self.format_node(record, record.root()): 111 | yield line 112 | except Exception as e: 113 | self.restore_indent(s) 114 | yield "ERROR: " + str(e) 115 | self._dedent() 116 | 117 | def _format_node_name(self, record, node, extra=None): 118 | """ 119 | note: this doesn't yield, it returns 120 | """ 121 | line = "" 122 | if extra is not None: 123 | line = "%s(offset=%s, %s)" % (node.__class__.__name__, hex(node.offset() - record.offset()), extra) 124 | else: 125 | line = "%s(offset=%s)" % (node.__class__.__name__, hex(node.offset() - record.offset())) 126 | 127 | if isinstance(node, e_nodes.VariantTypeNode): 128 | line += " --> %s" % (node.string()) 129 | if isinstance(node, e_nodes.OpenStartElementNode): 130 | line += " --> %s" % (node.tag_name()) 131 | if isinstance(node, e_nodes.AttributeNode): 132 | line += " --> %s" % (node.attribute_name().string()) 133 | return line 134 | 135 | def format_node(self, record, node): 136 | extra = None 137 | if isinstance(node, e_nodes.TemplateInstanceNode) and node.is_resident_template(): 138 | extra = "resident=True, length=%s" % (hex(node.template().data_length())) 139 | elif isinstance(node, e_nodes.TemplateInstanceNode): 140 | extra = "resident=False" 141 | yield self._l(self._format_node_name(record, node, extra=extra)) 142 | 143 | if isinstance(node, e_nodes.BXmlTypeNode): 144 | self._indent() 145 | for line in self.format_node(record, node._root): 146 | yield line 147 | self._dedent() 148 | elif isinstance(node, e_nodes.TemplateInstanceNode) and node.is_resident_template(): 149 | self._indent() 150 | for line in self.format_node(record, node.template()): 151 | yield line 152 | self._dedent() 153 | 154 | self._indent() 155 | for child in node.children(): 156 | for line in self.format_node(record, child): 157 | yield line 158 | self._dedent() 159 | 160 | if isinstance(node, e_nodes.RootNode): 161 | ofs = node.tag_and_children_length() 162 | yield self._l("Substitutions(offset=%s)" % (hex(node.offset() - record.offset() + ofs))) 163 | self._indent() 164 | 165 | for sub in node.substitutions(): 166 | for line in self.format_node(record, sub): 167 | yield line 168 | self._dedent() 169 | 170 | 171 | def main(): 172 | import argparse 173 | 174 | parser = argparse.ArgumentParser(description="Dump the structure of an EVTX file.") 175 | parser.add_argument("evtx", type=str, help="Path to the Windows EVTX event log file") 176 | args = parser.parse_args() 177 | 178 | with evtx.Evtx(args.evtx) as log: 179 | formatter = EvtxFormatter() 180 | for line in formatter.format_header(log.get_file_header()): 181 | print(line) 182 | 183 | 184 | if __name__ == "__main__": 185 | main() 186 | -------------------------------------------------------------------------------- /scripts/evtx_templates.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # This file is part of python-evtx. 3 | # 4 | # Copyright 2012, 2013 Willi Ballenthin 5 | # while at Mandiant 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | # Version v0.1 20 | import Evtx.Evtx as evtx 21 | import Evtx.Views as e_views 22 | 23 | 24 | def main(): 25 | import argparse 26 | 27 | parser = argparse.ArgumentParser(description="Dump templates from a binary EVTX file.") 28 | parser.add_argument("evtx", type=str, help="Path to the Windows EVTX event log file") 29 | args = parser.parse_args() 30 | 31 | with evtx.Evtx(args.evtx) as log: 32 | for i, chunk in enumerate(log.chunks()): 33 | for template in list(chunk.templates().values()): 34 | print("Template {%s} at chunk %d, offset %s" % (template.guid(), i, hex(template.absolute_offset(0x0)))) 35 | print(e_views.evtx_template_readable_view(template)) 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at: [package root]/LICENSE.txt 5 | # Unless required by applicable law or agreed to in writing, software distributed under the License 6 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 7 | # See the License for the specific language governing permissions and limitations under the License. 8 | 9 | # import all the symbols from our fixtures 10 | # and make available to test cases, implicitly. 11 | # this is thanks to pytest magic. 12 | # 13 | # see the following for a discussion: 14 | # https://www.revsys.com/tidbits/pytest-fixtures-are-magic/ 15 | # https://lobste.rs/s/j8xgym/pytest_fixtures_are_magic 16 | from fixtures import * # noqa: F403 [unable to detect undefined names] 17 | -------------------------------------------------------------------------------- /tests/data/dns_log_malformed.evtx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williballenthin/python-evtx/3d9ab5207c12e0ace4147df0b36aaed59e5b58ba/tests/data/dns_log_malformed.evtx -------------------------------------------------------------------------------- /tests/data/issue_38.evtx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williballenthin/python-evtx/3d9ab5207c12e0ace4147df0b36aaed59e5b58ba/tests/data/issue_38.evtx -------------------------------------------------------------------------------- /tests/data/issue_39.evtx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williballenthin/python-evtx/3d9ab5207c12e0ace4147df0b36aaed59e5b58ba/tests/data/issue_39.evtx -------------------------------------------------------------------------------- /tests/data/issue_43.evtx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williballenthin/python-evtx/3d9ab5207c12e0ace4147df0b36aaed59e5b58ba/tests/data/issue_43.evtx -------------------------------------------------------------------------------- /tests/data/readme.md: -------------------------------------------------------------------------------- 1 | The source for system.evtx with md5 182de19fe6a25b928a34ad59af0bbf1e 2 | was https://github.com/log2timeline/plaso/tree/1e2fa282efa2f839e1f179a3e98dbf922b5dbbc7/test_data 3 | 4 | The source for security.evtx with md5 8fa20a376cb6745453bc51f906e0fcd0 5 | was Carlos Dias, via email, on May 4, 2017. 6 | 7 | The source for ae831beda7dfda43f4de0e18a1035f64/dns_log_malformed.evtx 8 | was @stephensheridan, via Github issue #37 (https://github.com/williballenthin/python-evtx/issues/37). 9 | 10 | The source for d75c90e629f38c7b9e612905e02e2255 issue_38.evtx 11 | was @nbareil, via Github issue #38 (https://github.com/williballenthin/python-evtx/issues/38). 12 | 13 | -------------------------------------------------------------------------------- /tests/data/security.evtx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williballenthin/python-evtx/3d9ab5207c12e0ace4147df0b36aaed59e5b58ba/tests/data/security.evtx -------------------------------------------------------------------------------- /tests/data/system.evtx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williballenthin/python-evtx/3d9ab5207c12e0ace4147df0b36aaed59e5b58ba/tests/data/system.evtx -------------------------------------------------------------------------------- /tests/fixtures.py: -------------------------------------------------------------------------------- 1 | import os 2 | import mmap 3 | import os.path 4 | import contextlib 5 | 6 | import pytest 7 | 8 | 9 | def system_path(): 10 | """ 11 | fetch the file system path of the system.evtx test file. 12 | 13 | Returns: 14 | str: the file system path of the test file. 15 | """ 16 | cd = os.path.dirname(__file__) 17 | datadir = os.path.join(cd, "data") 18 | systempath = os.path.join(datadir, "system.evtx") 19 | return systempath 20 | 21 | 22 | @pytest.fixture 23 | def system(): 24 | """ 25 | yields the contents of the system.evtx test file. 26 | the returned value is a memory map of the contents, 27 | so it acts pretty much like a byte string. 28 | 29 | Returns: 30 | mmap.mmap: the contents of the test file. 31 | """ 32 | p = system_path() 33 | with open(p, "rb") as f: 34 | with contextlib.closing(mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)) as buf: 35 | yield buf 36 | 37 | 38 | def security_path(): 39 | """ 40 | fetch the file system path of the security.evtx test file. 41 | 42 | Returns: 43 | str: the file system path of the test file. 44 | """ 45 | cd = os.path.dirname(__file__) 46 | datadir = os.path.join(cd, "data") 47 | secpath = os.path.join(datadir, "security.evtx") 48 | return secpath 49 | 50 | 51 | @pytest.fixture 52 | def security(): 53 | """ 54 | yields the contents of the security.evtx test file. 55 | the returned value is a memory map of the contents, 56 | so it acts pretty much like a byte string. 57 | 58 | Returns: 59 | mmap.mmap: the contents of the test file. 60 | """ 61 | p = security_path() 62 | with open(p, "rb") as f: 63 | with contextlib.closing(mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)) as buf: 64 | yield buf 65 | 66 | 67 | @pytest.fixture 68 | def data_path(): 69 | """ 70 | fetch the file system path of the directory containing test files. 71 | 72 | Returns: 73 | str: the file system path of the test directory. 74 | """ 75 | cd = os.path.dirname(__file__) 76 | datadir = os.path.join(cd, "data") 77 | return datadir 78 | -------------------------------------------------------------------------------- /tests/test_chunks.py: -------------------------------------------------------------------------------- 1 | import Evtx.Evtx as evtx 2 | 3 | EMPTY_MAGIC = "\x00" * 0x8 4 | 5 | 6 | def test_chunks(system): 7 | """ 8 | regression test parsing some known fields in the file chunks. 9 | 10 | Args: 11 | system (bytes): the system.evtx test file contents. pytest fixture. 12 | """ 13 | fh = evtx.FileHeader(system, 0x0) 14 | 15 | # collected empirically 16 | expecteds = [ 17 | {"start_file": 1, "end_file": 153, "start_log": 12049, "end_log": 12201}, 18 | {"start_file": 154, "end_file": 336, "start_log": 12202, "end_log": 12384}, 19 | {"start_file": 337, "end_file": 526, "start_log": 12385, "end_log": 12574}, 20 | {"start_file": 527, "end_file": 708, "start_log": 12575, "end_log": 12756}, 21 | {"start_file": 709, "end_file": 882, "start_log": 12757, "end_log": 12930}, 22 | {"start_file": 883, "end_file": 1059, "start_log": 12931, "end_log": 13107}, 23 | {"start_file": 1060, "end_file": 1241, "start_log": 13108, "end_log": 13289}, 24 | {"start_file": 1242, "end_file": 1424, "start_log": 13290, "end_log": 13472}, 25 | {"start_file": 1425, "end_file": 1601, "start_log": 13473, "end_log": 13649}, 26 | ] 27 | 28 | for i, chunk in enumerate(fh.chunks()): 29 | # collected empirically 30 | if i < 9: 31 | assert chunk.check_magic() is True 32 | assert chunk.magic() == "ElfChnk\x00" 33 | assert chunk.calculate_header_checksum() == chunk.header_checksum() 34 | assert chunk.calculate_data_checksum() == chunk.data_checksum() 35 | 36 | expected = expecteds[i] 37 | assert chunk.file_first_record_number() == expected["start_file"] 38 | assert chunk.file_last_record_number() == expected["end_file"] 39 | assert chunk.log_first_record_number() == expected["start_log"] 40 | assert chunk.log_last_record_number() == expected["end_log"] 41 | 42 | else: 43 | assert chunk.check_magic() is False 44 | assert chunk.magic() == EMPTY_MAGIC 45 | 46 | 47 | def test_chunks2(security): 48 | """ 49 | regression test parsing some known fields in the file chunks. 50 | 51 | Args: 52 | security (bytes): the security.evtx test file contents. pytest fixture. 53 | """ 54 | fh = evtx.FileHeader(security, 0x0) 55 | 56 | # collected empirically 57 | expecteds = [ 58 | {"start_file": 1, "end_file": 91, "start_log": 1, "end_log": 91}, 59 | {"start_file": 92, "end_file": 177, "start_log": 92, "end_log": 177}, 60 | {"start_file": 178, "end_file": 260, "start_log": 178, "end_log": 260}, 61 | {"start_file": 261, "end_file": 349, "start_log": 261, "end_log": 349}, 62 | {"start_file": 350, "end_file": 441, "start_log": 350, "end_log": 441}, 63 | {"start_file": 442, "end_file": 530, "start_log": 442, "end_log": 530}, 64 | {"start_file": 531, "end_file": 622, "start_log": 531, "end_log": 622}, 65 | {"start_file": 623, "end_file": 711, "start_log": 623, "end_log": 711}, 66 | {"start_file": 712, "end_file": 802, "start_log": 712, "end_log": 802}, 67 | {"start_file": 803, "end_file": 888, "start_log": 803, "end_log": 888}, 68 | {"start_file": 889, "end_file": 976, "start_log": 889, "end_log": 976}, 69 | {"start_file": 977, "end_file": 1063, "start_log": 977, "end_log": 1063}, 70 | {"start_file": 1064, "end_file": 1148, "start_log": 1064, "end_log": 1148}, 71 | {"start_file": 1149, "end_file": 1239, "start_log": 1149, "end_log": 1239}, 72 | {"start_file": 1240, "end_file": 1327, "start_log": 1240, "end_log": 1327}, 73 | {"start_file": 1328, "end_file": 1414, "start_log": 1328, "end_log": 1414}, 74 | {"start_file": 1415, "end_file": 1501, "start_log": 1415, "end_log": 1501}, 75 | {"start_file": 1502, "end_file": 1587, "start_log": 1502, "end_log": 1587}, 76 | {"start_file": 1588, "end_file": 1682, "start_log": 1588, "end_log": 1682}, 77 | {"start_file": 1683, "end_file": 1766, "start_log": 1683, "end_log": 1766}, 78 | {"start_file": 1767, "end_file": 1847, "start_log": 1767, "end_log": 1847}, 79 | {"start_file": 1848, "end_file": 1942, "start_log": 1848, "end_log": 1942}, 80 | {"start_file": 1943, "end_file": 2027, "start_log": 1943, "end_log": 2027}, 81 | {"start_file": 2028, "end_file": 2109, "start_log": 2028, "end_log": 2109}, 82 | {"start_file": 2110, "end_file": 2201, "start_log": 2110, "end_log": 2201}, 83 | {"start_file": 2202, "end_file": 2261, "start_log": 2202, "end_log": 2261}, 84 | ] 85 | 86 | for i, chunk in enumerate(fh.chunks()): 87 | # collected empirically 88 | if i < 26: 89 | assert chunk.check_magic() is True 90 | assert chunk.magic() == "ElfChnk\x00" 91 | assert chunk.calculate_header_checksum() == chunk.header_checksum() 92 | assert chunk.calculate_data_checksum() == chunk.data_checksum() 93 | 94 | expected = expecteds[i] 95 | assert chunk.file_first_record_number() == expected["start_file"] 96 | assert chunk.file_last_record_number() == expected["end_file"] 97 | assert chunk.log_first_record_number() == expected["start_log"] 98 | assert chunk.log_last_record_number() == expected["end_log"] 99 | 100 | else: 101 | assert chunk.check_magic() is False 102 | assert chunk.magic() == EMPTY_MAGIC 103 | -------------------------------------------------------------------------------- /tests/test_header.py: -------------------------------------------------------------------------------- 1 | import Evtx.Evtx as evtx 2 | 3 | 4 | def test_file_header(system): 5 | """ 6 | regression test parsing some known fields in the file header. 7 | 8 | Args: 9 | system (bytes): the system.evtx test file contents. pytest fixture. 10 | """ 11 | fh = evtx.FileHeader(system, 0x0) 12 | 13 | # collected empirically 14 | assert fh.magic() == "ElfFile\x00" 15 | assert fh.major_version() == 0x3 16 | assert fh.minor_version() == 0x1 17 | assert fh.flags() == 0x1 18 | assert fh.is_dirty() is True 19 | assert fh.is_full() is False 20 | assert fh.current_chunk_number() == 0x8 21 | assert fh.chunk_count() == 0x9 22 | assert fh.oldest_chunk() == 0x0 23 | assert fh.next_record_number() == 0x34D8 24 | assert fh.checksum() == 0x41B4B1EC 25 | assert fh.calculate_checksum() == fh.checksum() 26 | 27 | 28 | def test_file_header2(security): 29 | """ 30 | regression test parsing some known fields in the file header. 31 | 32 | Args: 33 | security (bytes): the security.evtx test file contents. pytest fixture. 34 | """ 35 | fh = evtx.FileHeader(security, 0x0) 36 | 37 | # collected empirically 38 | assert fh.magic() == "ElfFile\x00" 39 | assert fh.major_version() == 0x3 40 | assert fh.minor_version() == 0x1 41 | assert fh.flags() == 0x1 42 | assert fh.is_dirty() is True 43 | assert fh.is_full() is False 44 | assert fh.current_chunk_number() == 0x19 45 | assert fh.chunk_count() == 0x1A 46 | assert fh.oldest_chunk() == 0x0 47 | assert fh.next_record_number() == 0x8B2 48 | assert fh.checksum() == 0x3F6E33D5 49 | assert fh.calculate_checksum() == fh.checksum() 50 | -------------------------------------------------------------------------------- /tests/test_issue_37.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | import Evtx.Evtx as evtx 6 | 7 | 8 | def test_corrupt_ascii_example(data_path): 9 | """ 10 | regression test demonstrating issue 37. 11 | 12 | Args: 13 | data_path (str): the file system path of the test directory. 14 | """ 15 | # record number two contains a QNAME xml element 16 | # with an ASCII text value that is invalid ASCII: 17 | # 18 | # 000002E0: 31 39 33 2E 31 2E 193.1. 19 | # 000002F0: 33 36 2E 31 32 31 30 2E 39 2E 31 35 2E 32 30 32 36.1210.9.15.202 20 | # 00000300: 01 62 2E 5F 64 6E 73 2D 73 64 2E 5F 75 64 70 2E .b._dns-sd._udp. 21 | # 00000310: 40 A6 35 01 2E @.5.. 22 | # ^^ ^^ ^^ 23 | # 24 | with pytest.raises(UnicodeDecodeError): 25 | with evtx.Evtx(os.path.join(data_path, "dns_log_malformed.evtx")) as log: 26 | for chunk in log.chunks(): 27 | for record in chunk.records(): 28 | assert record.xml() is not None 29 | 30 | 31 | def test_continue_parsing_after_corrupt_ascii(data_path): 32 | """ 33 | regression test demonstrating issue 37. 34 | 35 | Args: 36 | data_path (str): the file system path of the test directory. 37 | """ 38 | attempted = 0 39 | completed = 0 40 | failed = 0 41 | with evtx.Evtx(os.path.join(data_path, "dns_log_malformed.evtx")) as log: 42 | for chunk in log.chunks(): 43 | for record in chunk.records(): 44 | try: 45 | attempted += 1 46 | assert record.xml() is not None 47 | completed += 1 48 | except UnicodeDecodeError: 49 | failed += 1 50 | 51 | # this small log file has exactly five records. 52 | assert attempted == 5 53 | # the first record is valid. 54 | assert completed == 1 55 | # however the remaining four have corrupted ASCII strings, 56 | # which we are unable to decode. 57 | assert failed == 4 58 | -------------------------------------------------------------------------------- /tests/test_issue_38.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import Evtx.Evtx as evtx 4 | 5 | 6 | def one(iterable): 7 | """ 8 | fetch a single element from the given iterable. 9 | 10 | Args: 11 | iterable (iterable): a sequence of things. 12 | 13 | Returns: 14 | object: the first thing in the sequence. 15 | """ 16 | for i in iterable: 17 | return i 18 | 19 | 20 | def get_child(node, tag, ns="{http://schemas.microsoft.com/win/2004/08/events/event}"): 21 | return node.find("%s%s" % (ns, tag)) 22 | 23 | 24 | def test_hex64_value(data_path): 25 | """ 26 | regression test demonstrating issue 38. 27 | 28 | Args: 29 | data_path (str): the file system path of the test directory. 30 | """ 31 | with evtx.Evtx(os.path.join(data_path, "issue_38.evtx")) as log: 32 | for chunk in log.chunks(): 33 | record = one(chunk.records()) 34 | event_data = get_child(record.lxml(), "EventData") 35 | for data in event_data: 36 | if data.get("Name") != "SubjectLogonId": 37 | continue 38 | 39 | assert data.text == "0x000000000019d3af" 40 | -------------------------------------------------------------------------------- /tests/test_issue_39.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import Evtx.Evtx as evtx 4 | 5 | 6 | def one(iterable): 7 | """ 8 | fetch a single element from the given iterable. 9 | 10 | Args: 11 | iterable (iterable): a sequence of things. 12 | 13 | Returns: 14 | object: the first thing in the sequence. 15 | """ 16 | for i in iterable: 17 | return i 18 | 19 | 20 | def get_child(node, tag, ns="{http://schemas.microsoft.com/win/2004/08/events/event}"): 21 | return node.find("%s%s" % (ns, tag)) 22 | 23 | 24 | def get_children(node, tags, ns="{http://schemas.microsoft.com/win/2004/08/events/event}"): 25 | for tag in tags: 26 | node = get_child(node, tag, ns=ns) 27 | return node 28 | 29 | 30 | def test_systemtime(data_path): 31 | """ 32 | regression test demonstrating issue 39. 33 | 34 | Args: 35 | data_path (str): the file system path of the test directory. 36 | """ 37 | with evtx.Evtx(os.path.join(data_path, "issue_39.evtx")) as log: 38 | for record in log.records(): 39 | if record.record_num() != 129: 40 | continue 41 | 42 | time_created = get_children(record.lxml(), ["System", "TimeCreated"]) 43 | assert time_created.get("SystemTime") == "2017-04-21 07:41:17.003393+00:00" 44 | -------------------------------------------------------------------------------- /tests/test_issue_43.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | import Evtx.Evtx as evtx 6 | 7 | 8 | def get_record_by_num(log, record_num): 9 | for record in log.records(): 10 | if record.record_num() == record_num: 11 | return record 12 | raise KeyError(record_num) 13 | 14 | 15 | def test_issue_43(data_path): 16 | """ 17 | regression test demonstrating issue 43. 18 | 19 | Args: 20 | data_path (str): the file system path of the test directory. 21 | """ 22 | with evtx.Evtx(os.path.join(data_path, "issue_43.evtx")) as log: 23 | bad_rec = get_record_by_num(log, 508) 24 | with pytest.raises(UnicodeDecodeError): 25 | _ = bad_rec.xml() 26 | -------------------------------------------------------------------------------- /tests/test_records.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | import importlib.util 3 | 4 | import pytest 5 | 6 | import Evtx.Evtx as evtx 7 | import Evtx.Nodes as e_nodes 8 | 9 | if importlib.util.find_spec("lxml"): 10 | no_lxml = False 11 | else: 12 | no_lxml = True 13 | 14 | 15 | def test_parse_records(system): 16 | """ 17 | regression test demonstrating that all record metadata can be parsed. 18 | 19 | Args: 20 | system (bytes): the system.evtx test file contents. pytest fixture. 21 | """ 22 | fh = evtx.FileHeader(system, 0x0) 23 | for i, chunk in enumerate(fh.chunks()): 24 | for j, record in enumerate(chunk.records()): 25 | assert record.magic() == 0x2A2A 26 | 27 | 28 | def test_parse_records2(security): 29 | """ 30 | regression test demonstrating that all record metadata can be parsed. 31 | 32 | Args: 33 | security (bytes): the security.evtx test file contents. pytest fixture. 34 | """ 35 | fh = evtx.FileHeader(security, 0x0) 36 | for i, chunk in enumerate(fh.chunks()): 37 | for j, record in enumerate(chunk.records()): 38 | assert record.magic() == 0x2A2A 39 | 40 | 41 | def one(iterable): 42 | """ 43 | fetch a single element from the given iterable. 44 | 45 | Args: 46 | iterable (iterable): a sequence of things. 47 | 48 | Returns: 49 | object: the first thing in the sequence. 50 | """ 51 | for i in iterable: 52 | return i 53 | 54 | 55 | def extract_structure(node): 56 | """ 57 | given an evtx bxml node, generate a tree of all the nodes. 58 | each node has: 59 | - str: node type 60 | - str: (optional) value 61 | - list: (optional) children 62 | 63 | Args: 64 | node (evtx.Node): the root node. 65 | 66 | Returns: 67 | list: the tree representing the bxml structure. 68 | """ 69 | name = node.__class__.__name__ 70 | 71 | if isinstance(node, e_nodes.BXmlTypeNode): 72 | # must go before is VariantTypeNode 73 | value = None 74 | elif isinstance(node, e_nodes.VariantTypeNode): 75 | value = node.string() 76 | elif isinstance(node, e_nodes.OpenStartElementNode): 77 | value = node.tag_name() 78 | elif isinstance(node, e_nodes.AttributeNode): 79 | value = node.attribute_name().string() 80 | else: 81 | value = None 82 | 83 | children = [] 84 | if isinstance(node, e_nodes.BXmlTypeNode): 85 | children.append(extract_structure(node._root)) 86 | elif isinstance(node, e_nodes.TemplateInstanceNode) and node.is_resident_template(): 87 | children.append(extract_structure(node.template())) 88 | 89 | children.extend(list(map(extract_structure, node.children()))) 90 | 91 | if isinstance(node, e_nodes.RootNode): 92 | substitutions = list(map(extract_structure, node.substitutions())) 93 | children.append(["Substitutions", None, substitutions]) 94 | 95 | if children: 96 | return [name, value, children] 97 | elif value: 98 | return [name, value] 99 | else: 100 | return [name] 101 | 102 | 103 | def test_parse_record(system): 104 | """ 105 | regression test demonstrating binary xml nodes getting parsed. 106 | 107 | Args: 108 | system (bytes): the system.evtx test file contents. pytest fixture. 109 | """ 110 | fh = evtx.FileHeader(system, 0x0) 111 | chunk = one(fh.chunks()) 112 | record = one(chunk.records()) 113 | 114 | # generated by hand, but matches the output of extract_structure. 115 | expected = [ 116 | "RootNode", 117 | None, 118 | [ 119 | ["StreamStartNode"], 120 | [ 121 | "TemplateInstanceNode", 122 | None, 123 | [ 124 | [ 125 | "TemplateNode", 126 | None, 127 | [ 128 | ["StreamStartNode"], 129 | [ 130 | "OpenStartElementNode", 131 | "Event", 132 | [ 133 | [ 134 | "AttributeNode", 135 | "xmlns", 136 | [ 137 | [ 138 | "ValueNode", 139 | None, 140 | [ 141 | [ 142 | "WstringTypeNode", 143 | "http://schemas.microsoft.com/win/2004/08/events/event", 144 | ] 145 | ], 146 | ] 147 | ], 148 | ], 149 | ["CloseStartElementNode"], 150 | [ 151 | "OpenStartElementNode", 152 | "System", 153 | [ 154 | ["CloseStartElementNode"], 155 | [ 156 | "OpenStartElementNode", 157 | "Provider", 158 | [ 159 | [ 160 | "AttributeNode", 161 | "Name", 162 | [ 163 | [ 164 | "ValueNode", 165 | None, 166 | [["WstringTypeNode", "Microsoft-Windows-Eventlog"]], 167 | ] 168 | ], 169 | ], 170 | [ 171 | "AttributeNode", 172 | "Guid", 173 | [ 174 | [ 175 | "ValueNode", 176 | None, 177 | [ 178 | [ 179 | "WstringTypeNode", 180 | "{fc65ddd8-d6ef-4962-83d5-6e5cfe9ce148}", 181 | ] 182 | ], 183 | ] 184 | ], 185 | ], 186 | ["CloseEmptyElementNode"], 187 | ], 188 | ], 189 | [ 190 | "OpenStartElementNode", 191 | "EventID", 192 | [ 193 | ["AttributeNode", "Qualifiers", [["ConditionalSubstitutionNode"]]], 194 | ["CloseStartElementNode"], 195 | ["ConditionalSubstitutionNode"], 196 | ["CloseElementNode"], 197 | ], 198 | ], 199 | [ 200 | "OpenStartElementNode", 201 | "Version", 202 | [ 203 | ["CloseStartElementNode"], 204 | ["ConditionalSubstitutionNode"], 205 | ["CloseElementNode"], 206 | ], 207 | ], 208 | [ 209 | "OpenStartElementNode", 210 | "Level", 211 | [ 212 | ["CloseStartElementNode"], 213 | ["ConditionalSubstitutionNode"], 214 | ["CloseElementNode"], 215 | ], 216 | ], 217 | [ 218 | "OpenStartElementNode", 219 | "Task", 220 | [ 221 | ["CloseStartElementNode"], 222 | ["ConditionalSubstitutionNode"], 223 | ["CloseElementNode"], 224 | ], 225 | ], 226 | [ 227 | "OpenStartElementNode", 228 | "Opcode", 229 | [ 230 | ["CloseStartElementNode"], 231 | ["ConditionalSubstitutionNode"], 232 | ["CloseElementNode"], 233 | ], 234 | ], 235 | [ 236 | "OpenStartElementNode", 237 | "Keywords", 238 | [ 239 | ["CloseStartElementNode"], 240 | ["ConditionalSubstitutionNode"], 241 | ["CloseElementNode"], 242 | ], 243 | ], 244 | [ 245 | "OpenStartElementNode", 246 | "TimeCreated", 247 | [ 248 | ["AttributeNode", "SystemTime", [["ConditionalSubstitutionNode"]]], 249 | ["CloseEmptyElementNode"], 250 | ], 251 | ], 252 | [ 253 | "OpenStartElementNode", 254 | "EventRecordID", 255 | [ 256 | ["CloseStartElementNode"], 257 | ["ConditionalSubstitutionNode"], 258 | ["CloseElementNode"], 259 | ], 260 | ], 261 | [ 262 | "OpenStartElementNode", 263 | "Correlation", 264 | [ 265 | ["AttributeNode", "ActivityID", [["ConditionalSubstitutionNode"]]], 266 | [ 267 | "AttributeNode", 268 | "RelatedActivityID", 269 | [["ConditionalSubstitutionNode"]], 270 | ], 271 | ["CloseEmptyElementNode"], 272 | ], 273 | ], 274 | [ 275 | "OpenStartElementNode", 276 | "Execution", 277 | [ 278 | ["AttributeNode", "ProcessID", [["ConditionalSubstitutionNode"]]], 279 | ["AttributeNode", "ThreadID", [["ConditionalSubstitutionNode"]]], 280 | ["CloseEmptyElementNode"], 281 | ], 282 | ], 283 | [ 284 | "OpenStartElementNode", 285 | "Channel", 286 | [ 287 | ["CloseStartElementNode"], 288 | ["ValueNode", None, [["WstringTypeNode", "System"]]], 289 | ["CloseElementNode"], 290 | ], 291 | ], 292 | [ 293 | "OpenStartElementNode", 294 | "Computer", 295 | [ 296 | ["CloseStartElementNode"], 297 | [ 298 | "ValueNode", 299 | None, 300 | [["WstringTypeNode", "WKS-WIN764BITB.shieldbase.local"]], 301 | ], 302 | ["CloseElementNode"], 303 | ], 304 | ], 305 | [ 306 | "OpenStartElementNode", 307 | "Security", 308 | [ 309 | ["AttributeNode", "UserID", [["ConditionalSubstitutionNode"]]], 310 | ["CloseEmptyElementNode"], 311 | ], 312 | ], 313 | ["CloseElementNode"], 314 | ], 315 | ], 316 | [ 317 | "OpenStartElementNode", 318 | "UserData", 319 | [ 320 | ["CloseStartElementNode"], 321 | ["ConditionalSubstitutionNode"], 322 | ["CloseElementNode"], 323 | ], 324 | ], 325 | ["CloseElementNode"], 326 | ], 327 | ], 328 | ["EndOfStreamNode"], 329 | ], 330 | ] 331 | ], 332 | ], 333 | [ 334 | "Substitutions", 335 | None, 336 | [ 337 | ["UnsignedByteTypeNode", "4"], 338 | ["UnsignedByteTypeNode", "0"], 339 | ["UnsignedWordTypeNode", "105"], 340 | ["UnsignedWordTypeNode", "105"], 341 | ["NullTypeNode"], 342 | ["Hex64TypeNode", "0x8000000000000000"], 343 | ["FiletimeTypeNode", "2012-03-14 04:17:43.354563+00:00"], 344 | ["NullTypeNode"], 345 | ["UnsignedDwordTypeNode", "820"], 346 | ["UnsignedDwordTypeNode", "2868"], 347 | ["UnsignedQwordTypeNode", "12049"], 348 | ["UnsignedByteTypeNode", "0"], 349 | ["NullTypeNode"], 350 | ["NullTypeNode"], 351 | ["NullTypeNode"], 352 | ["NullTypeNode"], 353 | ["NullTypeNode"], 354 | ["NullTypeNode"], 355 | ["NullTypeNode"], 356 | [ 357 | "BXmlTypeNode", 358 | None, 359 | [ 360 | [ 361 | "RootNode", 362 | None, 363 | [ 364 | ["StreamStartNode"], 365 | [ 366 | "TemplateInstanceNode", 367 | None, 368 | [ 369 | [ 370 | "TemplateNode", 371 | None, 372 | [ 373 | ["StreamStartNode"], 374 | [ 375 | "OpenStartElementNode", 376 | "AutoBackup", 377 | [ 378 | [ 379 | "AttributeNode", 380 | "xmlns:auto-ns3", 381 | [ 382 | [ 383 | "ValueNode", 384 | None, 385 | [ 386 | [ 387 | "WstringTypeNode", 388 | "http://schemas.microsoft.com/win/2004/08/events", 389 | ] 390 | ], 391 | ] 392 | ], 393 | ], 394 | [ 395 | "AttributeNode", 396 | "xmlns", 397 | [ 398 | [ 399 | "ValueNode", 400 | None, 401 | [ 402 | [ 403 | "WstringTypeNode", 404 | "http://manifests.microsoft.com/win/2004/08/windows/eventlog", 405 | ] 406 | ], 407 | ] 408 | ], 409 | ], 410 | ["CloseStartElementNode"], 411 | [ 412 | "OpenStartElementNode", 413 | "Channel", 414 | [ 415 | ["CloseStartElementNode"], 416 | ["NormalSubstitutionNode"], 417 | ["CloseElementNode"], 418 | ], 419 | ], 420 | [ 421 | "OpenStartElementNode", 422 | "BackupPath", 423 | [ 424 | ["CloseStartElementNode"], 425 | ["NormalSubstitutionNode"], 426 | ["CloseElementNode"], 427 | ], 428 | ], 429 | ["CloseElementNode"], 430 | ], 431 | ], 432 | ["EndOfStreamNode"], 433 | ], 434 | ] 435 | ], 436 | ], 437 | [ 438 | "Substitutions", 439 | None, 440 | [ 441 | ["WstringTypeNode", "System"], 442 | [ 443 | "WstringTypeNode", 444 | r"C:\Windows\System32\Winevt\Logs\Archive-System-2012-03-14-04-17-39-932.evtx", 445 | ], 446 | ], 447 | ], 448 | ], 449 | ] 450 | ], 451 | ], 452 | ], 453 | ], 454 | ], 455 | ] 456 | 457 | assert extract_structure(record.root()) == expected 458 | 459 | 460 | def test_render_record(system): 461 | """ 462 | regression test demonstrating formatting a record to xml. 463 | 464 | Args: 465 | system (bytes): the system.evtx test file contents. pytest fixture. 466 | """ 467 | fh = evtx.FileHeader(system, 0x0) 468 | chunk = one(fh.chunks()) 469 | record = one(chunk.records()) 470 | 471 | xml = record.xml() 472 | assert xml == textwrap.dedent( 473 | """\ 474 | 475 | 105 476 | 0 477 | 4 478 | 105 479 | 0 480 | 0x8000000000000000 481 | 482 | 12049 483 | 484 | 485 | System 486 | WKS-WIN764BITB.shieldbase.local 487 | 488 | 489 | System 490 | C:\\Windows\\System32\\Winevt\\Logs\\Archive-System-2012-03-14-04-17-39-932.evtx 491 | 492 | 493 | 494 | """ 495 | ) 496 | 497 | 498 | def test_render_records(system): 499 | """ 500 | regression test demonstrating formatting records to xml. 501 | 502 | Args: 503 | system (bytes): the system.evtx test file contents. pytest fixture. 504 | """ 505 | fh = evtx.FileHeader(system, 0x0) 506 | for chunk in fh.chunks(): 507 | for record in chunk.records(): 508 | assert record.xml() is not None 509 | 510 | 511 | def test_render_records2(security): 512 | """ 513 | regression test demonstrating formatting records to xml. 514 | 515 | Args: 516 | security (bytes): the security.evtx test file contents. pytest fixture. 517 | """ 518 | fh = evtx.FileHeader(security, 0x0) 519 | for chunk in fh.chunks(): 520 | for record in chunk.records(): 521 | assert record.xml() is not None 522 | 523 | 524 | @pytest.mark.skipif(no_lxml, reason="lxml not installed") 525 | def test_render_records_lxml(system): 526 | """ 527 | regression test demonstrating formatting records to xml. 528 | 529 | Args: 530 | system (bytes): the system.evtx test file contents. pytest fixture. 531 | """ 532 | fh = evtx.FileHeader(system, 0x0) 533 | for i, chunk in enumerate(fh.chunks()): 534 | for j, record in enumerate(chunk.records()): 535 | assert record.lxml() is not None 536 | 537 | 538 | @pytest.mark.skipif(no_lxml, reason="lxml not installed") 539 | def test_render_records_lxml2(security): 540 | """ 541 | regression test demonstrating formatting records to xml. 542 | 543 | Args: 544 | security (bytes): the security.evtx test file contents. pytest fixture. 545 | """ 546 | fh = evtx.FileHeader(security, 0x0) 547 | for i, chunk in enumerate(fh.chunks()): 548 | for j, record in enumerate(chunk.records()): 549 | assert record.lxml() is not None 550 | --------------------------------------------------------------------------------