├── .editorconfig ├── .github └── workflows │ └── build.yml ├── .gitignore ├── LICENSE ├── README.md ├── kaitaistruct.py ├── pyproject.toml ├── setup.cfg └── setup.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | indent_style = space 6 | indent_size = 4 7 | insert_final_newline = true 8 | 9 | [*.ksy] 10 | charset = utf-8 11 | indent_style = space 12 | indent_size = 2 13 | insert_final_newline = true 14 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Check coding style 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v1 10 | - name: Python Style Checker 11 | uses: andymckay/pycodestyle-action@0.1.3 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask instance folder 57 | instance/ 58 | 59 | # Sphinx documentation 60 | docs/_build/ 61 | 62 | # PyBuilder 63 | target/ 64 | 65 | # IPython Notebook 66 | .ipynb_checkpoints 67 | 68 | # pyenv 69 | .python-version 70 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2015-2025 Kaitai Project 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kaitai Struct: runtime library for Python 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/kaitaistruct)](https://pypi.org/project/kaitaistruct/) 4 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/kaitaistruct)](https://pypi.org/project/kaitaistruct/#:~:text=Programming%20Language) 5 | 6 | This library implements Kaitai Struct API for Python. 7 | 8 | [Kaitai Struct](https://kaitai.io/) is a declarative language used for 9 | describe various binary data structures, laid out in files or in memory: 10 | i.e. binary file formats, network stream packet formats, etc. 11 | 12 | It is similar to [Python's Construct 2.10](https://construct.readthedocs.io/en/latest/) 13 | but it is language-agnostic. The format description is done in YAML-based .ksy 14 | format, which then can be compiled into a wide range of target languages. 15 | 16 | Further reading: 17 | 18 | * [About Kaitai Struct](https://kaitai.io/) 19 | * [About API implemented in this library](https://doc.kaitai.io/stream_api.html) 20 | * [Python-specific notes](https://doc.kaitai.io/lang_python.html) in KS 21 | documentation discuss installation and usage of this runtime 22 | -------------------------------------------------------------------------------- /kaitaistruct.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import sys 3 | import struct 4 | from io import open, BytesIO, SEEK_CUR, SEEK_END # noqa 5 | import warnings 6 | 7 | PY2 = sys.version_info[0] == 2 8 | 9 | # Kaitai Struct runtime version, in the format defined by PEP 440. 10 | # Used by our setup.cfg to set the version number in 11 | # packaging/distribution metadata. 12 | # Also used in Python code generated by older ksc versions (0.7 through 0.9) 13 | # to check that the imported runtime is compatible with the generated code. 14 | # Since ksc 0.10, the compatibility check instead uses the API_VERSION constant, 15 | # so that the version string does not need to be parsed at runtime 16 | # (see https://github.com/kaitai-io/kaitai_struct/issues/804). 17 | __version__ = '0.11.dev1' 18 | 19 | # Kaitai Struct runtime API version, as a tuple of ints. 20 | # Used in generated Python code (since ksc 0.10) to check that the imported 21 | # runtime is compatible with the generated code. 22 | API_VERSION = (0, 11) 23 | 24 | # pylint: disable=invalid-name,missing-docstring,too-many-public-methods 25 | # pylint: disable=useless-object-inheritance,super-with-arguments,consider-using-f-string 26 | 27 | 28 | class KaitaiStruct(object): 29 | def __init__(self, stream): 30 | self._io = stream 31 | 32 | def __enter__(self): 33 | return self 34 | 35 | def __exit__(self, *args, **kwargs): 36 | self.close() 37 | 38 | def close(self): 39 | self._io.close() 40 | 41 | @classmethod 42 | def from_file(cls, filename): 43 | f = open(filename, 'rb') 44 | try: 45 | return cls(KaitaiStream(f)) 46 | except Exception: 47 | # close file descriptor, then reraise the exception 48 | f.close() 49 | raise 50 | 51 | @classmethod 52 | def from_bytes(cls, buf): 53 | return cls(KaitaiStream(BytesIO(buf))) 54 | 55 | @classmethod 56 | def from_io(cls, io): 57 | return cls(KaitaiStream(io)) 58 | 59 | 60 | class ReadWriteKaitaiStruct(KaitaiStruct): 61 | def _fetch_instances(self): 62 | raise NotImplementedError() 63 | 64 | def _write(self, io=None): 65 | self._write__seq(io) 66 | self._fetch_instances() 67 | self._io.write_back_child_streams() 68 | 69 | def _write__seq(self, io): 70 | if io is not None: 71 | self._io = io 72 | 73 | 74 | class KaitaiStream(object): 75 | def __init__(self, io): 76 | self._io = io 77 | self.align_to_byte() 78 | self.bits_le = False 79 | self.bits_write_mode = False 80 | 81 | self.write_back_handler = None 82 | self.child_streams = [] 83 | 84 | try: 85 | self._size = self.size() 86 | # IOError is for Python 2 (IOError also exists in Python 3, but it has 87 | # become just an alias for OSError). 88 | # 89 | # Although I haven't actually seen a bare ValueError raised in this case 90 | # in practice, chances are some implementation may be doing it (see 91 | # for reference: 92 | # "Also, implementations may raise a ValueError (or 93 | # UnsupportedOperation) when operations they do not support are 94 | # called."). And I've seen ValueError raised at least in Python 2 when 95 | # calling read() on an unreadable stream. 96 | except (OSError, IOError, ValueError): 97 | # tell() or seek() failed - we have a non-seekable stream (which is 98 | # fine for reading, but writing will fail, see 99 | # _write_bytes_not_aligned()) 100 | pass 101 | 102 | def __enter__(self): 103 | return self 104 | 105 | def __exit__(self, *args, **kwargs): 106 | self.close() 107 | 108 | def close(self): 109 | try: 110 | if self.bits_write_mode: 111 | self.write_align_to_byte() 112 | else: 113 | self.align_to_byte() 114 | finally: 115 | self._io.close() 116 | 117 | # region Stream positioning 118 | 119 | def is_eof(self): 120 | if not self.bits_write_mode and self.bits_left > 0: 121 | return False 122 | 123 | # NB: previously, we first tried if self._io.read(1) did in fact read 1 124 | # byte from the stream (and then seeked 1 byte back if so), but given 125 | # that is_eof() may be called from both read and write contexts, it's 126 | # more universal not to use read() at all. See also 127 | # . 128 | return self._io.tell() >= self.size() 129 | 130 | def seek(self, n): 131 | if n < 0: 132 | raise InvalidArgumentError("cannot seek to invalid position %d" % (n,)) 133 | 134 | if self.bits_write_mode: 135 | self.write_align_to_byte() 136 | else: 137 | self.align_to_byte() 138 | 139 | self._io.seek(n) 140 | 141 | def pos(self): 142 | return self._io.tell() + (1 if self.bits_write_mode and self.bits_left > 0 else 0) 143 | 144 | def size(self): 145 | # Python has no internal File object API function to get 146 | # current file / StringIO size, thus we use the following 147 | # trick. 148 | io = self._io 149 | # Remember our current position 150 | cur_pos = io.tell() 151 | # Seek to the end of the stream and remember the full length 152 | full_size = io.seek(0, SEEK_END) 153 | 154 | if full_size is None: 155 | # In Python 2, the seek() method of 'file' objects (created by the 156 | # built-in open() function) has no return value, so we have to call 157 | # tell() ourselves to get the new absolute position - see 158 | # . 159 | # 160 | # In Python 3, seek() methods of all 161 | # streams return the new 162 | # position already, so this won't be needed once we drop support for 163 | # Python 2. 164 | full_size = io.tell() 165 | 166 | # Seek back to the current position 167 | io.seek(cur_pos) 168 | return full_size 169 | 170 | # endregion 171 | 172 | # region Structs for numeric types 173 | 174 | packer_s1 = struct.Struct('b') 175 | packer_s2be = struct.Struct('>h') 176 | packer_s4be = struct.Struct('>i') 177 | packer_s8be = struct.Struct('>q') 178 | packer_s2le = struct.Struct('H') 184 | packer_u4be = struct.Struct('>I') 185 | packer_u8be = struct.Struct('>Q') 186 | packer_u2le = struct.Struct('f') 191 | packer_f8be = struct.Struct('>d') 192 | packer_f4le = struct.Struct(' 0: 308 | # 1 bit => 1 byte 309 | # 8 bits => 1 byte 310 | # 9 bits => 2 bytes 311 | bytes_needed = ((bits_needed - 1) // 8) + 1 # `ceil(bits_needed / 8)` 312 | buf = self._read_bytes_not_aligned(bytes_needed) 313 | if PY2: 314 | buf = bytearray(buf) 315 | for byte in buf: 316 | res = res << 8 | byte 317 | 318 | new_bits = res 319 | res = res >> self.bits_left | self.bits << bits_needed 320 | self.bits = new_bits # will be masked at the end of the function 321 | else: 322 | res = self.bits >> -bits_needed # shift unneeded bits out 323 | 324 | mask = (1 << self.bits_left) - 1 # `bits_left` is in range 0..7 325 | self.bits &= mask 326 | 327 | return res 328 | 329 | def read_bits_int(self, n): 330 | """Deprecated and no longer used as of KSC 0.9. It is only available 331 | for backwards compatibility and will be removed in the future. 332 | 333 | KSC 0.9 and later uses `read_bits_int_be()` instead. 334 | """ 335 | warnings.warn( 336 | "read_bits_int() is deprecated since 0.9, use read_bits_int_be() instead", 337 | DeprecationWarning, 338 | stacklevel=2, 339 | ) 340 | return self.read_bits_int_be(n) 341 | 342 | def read_bits_int_le(self, n): 343 | self.bits_write_mode = False 344 | 345 | res = 0 346 | bits_needed = n - self.bits_left 347 | 348 | if bits_needed > 0: 349 | # 1 bit => 1 byte 350 | # 8 bits => 1 byte 351 | # 9 bits => 2 bytes 352 | bytes_needed = ((bits_needed - 1) // 8) + 1 # `ceil(bits_needed / 8)` 353 | buf = self._read_bytes_not_aligned(bytes_needed) 354 | if PY2: 355 | buf = bytearray(buf) 356 | for i, byte in enumerate(buf): 357 | res |= byte << (i * 8) 358 | 359 | new_bits = res >> bits_needed 360 | res = res << self.bits_left | self.bits 361 | self.bits = new_bits 362 | else: 363 | res = self.bits 364 | self.bits >>= n 365 | 366 | self.bits_left = -bits_needed % 8 367 | 368 | mask = (1 << n) - 1 # no problem with this in Python (arbitrary precision integers) 369 | res &= mask 370 | return res 371 | 372 | # endregion 373 | 374 | # region Byte arrays 375 | 376 | def read_bytes(self, n): 377 | self.align_to_byte() 378 | return self._read_bytes_not_aligned(n) 379 | 380 | def _read_bytes_not_aligned(self, n): 381 | if n < 0: 382 | raise InvalidArgumentError( 383 | "requested invalid %d amount of bytes" % 384 | (n,) 385 | ) 386 | 387 | is_satisfiable = True 388 | # When a large number of bytes is requested, try to check first 389 | # that there is indeed enough data left in the stream. 390 | # This avoids reading large amounts of data only to notice afterwards 391 | # that it's not long enough. For smaller amounts of data, it's faster to 392 | # first read the data unconditionally and check the length afterwards. 393 | if ( 394 | n >= 8*1024*1024 # = 8 MiB 395 | # in Python 2, there is a common error ['file' object has no 396 | # attribute 'seekable'], so we need to make sure that seekable() exists 397 | and callable(getattr(self._io, 'seekable', None)) 398 | and self._io.seekable() 399 | ): 400 | num_bytes_available = self.size() - self.pos() 401 | is_satisfiable = (n <= num_bytes_available) 402 | 403 | if is_satisfiable: 404 | r = self._io.read(n) 405 | num_bytes_available = len(r) 406 | is_satisfiable = (n <= num_bytes_available) 407 | 408 | if not is_satisfiable: 409 | # noinspection PyUnboundLocalVariable 410 | raise EndOfStreamError( 411 | "requested %d bytes, but only %d bytes available" % 412 | (n, num_bytes_available), 413 | n, num_bytes_available 414 | ) 415 | 416 | # noinspection PyUnboundLocalVariable 417 | return r 418 | 419 | def read_bytes_full(self): 420 | self.align_to_byte() 421 | return self._io.read() 422 | 423 | def read_bytes_term(self, term, include_term, consume_term, eos_error): 424 | self.align_to_byte() 425 | term_byte = KaitaiStream.byte_from_int(term) 426 | r = bytearray() 427 | while True: 428 | c = self._io.read(1) 429 | if not c: 430 | if eos_error: 431 | raise NoTerminatorFoundError(term_byte, 0) 432 | 433 | return bytes(r) 434 | 435 | if c == term_byte: 436 | if include_term: 437 | r += c 438 | if not consume_term: 439 | self._io.seek(-1, SEEK_CUR) 440 | return bytes(r) 441 | 442 | r += c 443 | 444 | def read_bytes_term_multi(self, term, include_term, consume_term, eos_error): 445 | self.align_to_byte() 446 | unit_size = len(term) 447 | r = bytearray() 448 | while True: 449 | c = self._io.read(unit_size) 450 | if len(c) < unit_size: 451 | if eos_error: 452 | raise NoTerminatorFoundError(term, len(c)) 453 | 454 | r += c 455 | return bytes(r) 456 | 457 | if c == term: 458 | if include_term: 459 | r += c 460 | if not consume_term: 461 | self._io.seek(-unit_size, SEEK_CUR) 462 | return bytes(r) 463 | 464 | r += c 465 | 466 | def ensure_fixed_contents(self, expected): 467 | """Deprecated and no longer used as of KSC 0.9. It is only available 468 | for backwards compatibility and will be removed in the future. 469 | 470 | KSC 0.9 and later explicitly raises `ValidationNotEqualError` from an 471 | `if` statement instead. 472 | """ 473 | warnings.warn( 474 | "ensure_fixed_contents() is deprecated since 0.9, explicitly raise " 475 | "ValidationNotEqualError from an `if` statement instead", 476 | DeprecationWarning, 477 | stacklevel=2, 478 | ) 479 | actual = self._io.read(len(expected)) 480 | if actual != expected: 481 | raise Exception( 482 | "unexpected fixed contents: got %r, was waiting for %r" % 483 | (actual, expected) 484 | ) 485 | return actual 486 | 487 | @staticmethod 488 | def bytes_strip_right(data, pad_byte): 489 | return data.rstrip(KaitaiStream.byte_from_int(pad_byte)) 490 | 491 | @staticmethod 492 | def bytes_terminate(data, term, include_term): 493 | term_index = KaitaiStream.byte_array_index_of(data, term) 494 | if term_index == -1: 495 | return data[:] 496 | return data[:term_index + (1 if include_term else 0)] 497 | 498 | @staticmethod 499 | def bytes_terminate_multi(data, term, include_term): 500 | unit_size = len(term) 501 | search_index = data.find(term) 502 | while True: 503 | if search_index == -1: 504 | return data[:] 505 | mod = search_index % unit_size 506 | if mod == 0: 507 | return data[:search_index + (unit_size if include_term else 0)] 508 | search_index = data.find(term, search_index + (unit_size - mod)) 509 | 510 | # endregion 511 | 512 | # endregion 513 | 514 | # region Writing 515 | 516 | def _ensure_bytes_left_to_write(self, n, pos): 517 | try: 518 | full_size = self._size 519 | except AttributeError: 520 | raise ValueError("writing to non-seekable streams is not supported") 521 | 522 | num_bytes_left = full_size - pos 523 | if n > num_bytes_left: 524 | raise EndOfStreamError( 525 | "requested to write %d bytes, but only %d bytes left in the stream" % 526 | (n, num_bytes_left), 527 | n, num_bytes_left 528 | ) 529 | 530 | # region Integer numbers 531 | 532 | # region Signed 533 | 534 | def write_s1(self, v): 535 | self.write_bytes(KaitaiStream.packer_s1.pack(v)) 536 | 537 | # region Big-endian 538 | 539 | def write_s2be(self, v): 540 | self.write_bytes(KaitaiStream.packer_s2be.pack(v)) 541 | 542 | def write_s4be(self, v): 543 | self.write_bytes(KaitaiStream.packer_s4be.pack(v)) 544 | 545 | def write_s8be(self, v): 546 | self.write_bytes(KaitaiStream.packer_s8be.pack(v)) 547 | 548 | # endregion 549 | 550 | # region Little-endian 551 | 552 | def write_s2le(self, v): 553 | self.write_bytes(KaitaiStream.packer_s2le.pack(v)) 554 | 555 | def write_s4le(self, v): 556 | self.write_bytes(KaitaiStream.packer_s4le.pack(v)) 557 | 558 | def write_s8le(self, v): 559 | self.write_bytes(KaitaiStream.packer_s8le.pack(v)) 560 | 561 | # endregion 562 | 563 | # endregion 564 | 565 | # region Unsigned 566 | 567 | def write_u1(self, v): 568 | self.write_bytes(KaitaiStream.packer_u1.pack(v)) 569 | 570 | # region Big-endian 571 | 572 | def write_u2be(self, v): 573 | self.write_bytes(KaitaiStream.packer_u2be.pack(v)) 574 | 575 | def write_u4be(self, v): 576 | self.write_bytes(KaitaiStream.packer_u4be.pack(v)) 577 | 578 | def write_u8be(self, v): 579 | self.write_bytes(KaitaiStream.packer_u8be.pack(v)) 580 | 581 | # endregion 582 | 583 | # region Little-endian 584 | 585 | def write_u2le(self, v): 586 | self.write_bytes(KaitaiStream.packer_u2le.pack(v)) 587 | 588 | def write_u4le(self, v): 589 | self.write_bytes(KaitaiStream.packer_u4le.pack(v)) 590 | 591 | def write_u8le(self, v): 592 | self.write_bytes(KaitaiStream.packer_u8le.pack(v)) 593 | 594 | # endregion 595 | 596 | # endregion 597 | 598 | # endregion 599 | 600 | # region Floating point numbers 601 | 602 | # region Big-endian 603 | 604 | def write_f4be(self, v): 605 | self.write_bytes(KaitaiStream.packer_f4be.pack(v)) 606 | 607 | def write_f8be(self, v): 608 | self.write_bytes(KaitaiStream.packer_f8be.pack(v)) 609 | 610 | # endregion 611 | 612 | # region Little-endian 613 | 614 | def write_f4le(self, v): 615 | self.write_bytes(KaitaiStream.packer_f4le.pack(v)) 616 | 617 | def write_f8le(self, v): 618 | self.write_bytes(KaitaiStream.packer_f8le.pack(v)) 619 | 620 | # endregion 621 | 622 | # endregion 623 | 624 | # region Unaligned bit values 625 | 626 | def write_align_to_byte(self): 627 | if self.bits_left > 0: 628 | b = self.bits 629 | if not self.bits_le: 630 | b <<= 8 - self.bits_left 631 | 632 | # We clear the `bits_left` and `bits` fields using align_to_byte() 633 | # before writing the byte in the stream so that it happens even in 634 | # case the write fails. The reason is that if the write fails, it 635 | # would likely be a permanent issue that's not going to resolve 636 | # itself when retrying the operation with the same stream state, and 637 | # since seek() calls write_align_to_byte() at the beginning too, you 638 | # wouldn't be even able to seek anywhere without getting the same 639 | # exception again. So the stream could be in a broken state, 640 | # throwing the same exception over and over again even though you've 641 | # already processed it and you'd like to move on. And the only way 642 | # to get rid of it would be to call align_to_byte() externally 643 | # (given how it's currently implemented), but that's really just a 644 | # coincidence - that's a method intended for reading (not writing) 645 | # and it should never be necessary to call it from the outside (it's 646 | # more like an internal method now). 647 | # 648 | # So it seems more reasonable to deliver the exception once and let 649 | # the user application process it, but otherwise clear the bit 650 | # buffer to make the stream ready for further operations and to 651 | # avoid repeatedly delivering an exception for one past failed 652 | # operation. The rationale behind this is that it's not really a 653 | # failure of the "align to byte" operation, but the writing of some 654 | # bits to the stream that was requested earlier. 655 | self.align_to_byte() 656 | self._write_bytes_not_aligned(KaitaiStream.byte_from_int(b)) 657 | 658 | def write_bits_int_be(self, n, val): 659 | self.bits_le = False 660 | self.bits_write_mode = True 661 | 662 | mask = (1 << n) - 1 # no problem with this in Python (arbitrary precision integers) 663 | val &= mask 664 | 665 | bits_to_write = self.bits_left + n 666 | bytes_needed = ((bits_to_write - 1) // 8) + 1 # `ceil(bits_to_write / 8)` 667 | 668 | # Unlike self._io.tell(), pos() respects the `bits_left` field (it 669 | # returns the stream position as if it were already aligned on a byte 670 | # boundary), which ensures that we report the same numbers of bytes here 671 | # as read_bits_int_*() methods would. 672 | self._ensure_bytes_left_to_write(bytes_needed - (1 if self.bits_left > 0 else 0), self.pos()) 673 | 674 | bytes_to_write = bits_to_write // 8 675 | self.bits_left = bits_to_write % 8 676 | 677 | if bytes_to_write > 0: 678 | buf = bytearray(bytes_to_write) 679 | 680 | mask = (1 << self.bits_left) - 1 # `bits_left` is in range 0..7 681 | new_bits = val & mask 682 | val = val >> self.bits_left | self.bits << (n - self.bits_left) 683 | self.bits = new_bits 684 | 685 | for i in range(bytes_to_write - 1, -1, -1): 686 | buf[i] = val & 0xff 687 | val >>= 8 688 | self._write_bytes_not_aligned(buf) 689 | else: 690 | self.bits = self.bits << n | val 691 | 692 | def write_bits_int_le(self, n, val): 693 | self.bits_le = True 694 | self.bits_write_mode = True 695 | 696 | bits_to_write = self.bits_left + n 697 | bytes_needed = ((bits_to_write - 1) // 8) + 1 # `ceil(bits_to_write / 8)` 698 | 699 | # Unlike self._io.tell(), pos() respects the `bits_left` field (it 700 | # returns the stream position as if it were already aligned on a byte 701 | # boundary), which ensures that we report the same numbers of bytes here 702 | # as read_bits_int_*() methods would. 703 | self._ensure_bytes_left_to_write(bytes_needed - (1 if self.bits_left > 0 else 0), self.pos()) 704 | 705 | bytes_to_write = bits_to_write // 8 706 | old_bits_left = self.bits_left 707 | self.bits_left = bits_to_write % 8 708 | 709 | if bytes_to_write > 0: 710 | buf = bytearray(bytes_to_write) 711 | 712 | new_bits = val >> (n - self.bits_left) # no problem with this in Python (arbitrary precision integers) 713 | val = val << old_bits_left | self.bits 714 | self.bits = new_bits 715 | 716 | for i in range(bytes_to_write): 717 | buf[i] = val & 0xff 718 | val >>= 8 719 | self._write_bytes_not_aligned(buf) 720 | else: 721 | self.bits |= val << old_bits_left 722 | 723 | mask = (1 << self.bits_left) - 1 # `bits_left` is in range 0..7 724 | self.bits &= mask 725 | 726 | # endregion 727 | 728 | # region Byte arrays 729 | 730 | def write_bytes(self, buf): 731 | self.write_align_to_byte() 732 | self._write_bytes_not_aligned(buf) 733 | 734 | def _write_bytes_not_aligned(self, buf): 735 | n = len(buf) 736 | self._ensure_bytes_left_to_write(n, self._io.tell()) 737 | self._io.write(buf) 738 | 739 | def write_bytes_limit(self, buf, size, term, pad_byte): 740 | n = len(buf) 741 | # Strictly speaking, this assertion is redundant because it is already 742 | # done in the corresponding _check() method in the generated code, but 743 | # it seems to make sense to include it here anyway so that this method 744 | # itself does something reasonable for every set of arguments. 745 | # 746 | # However, it should never be `false` when operated correctly (and in 747 | # this case, assigning inconsistent values to fields of a KS-generated 748 | # object is considered correct operation if the user application calls 749 | # the corresponding _check(), which we know would raise an error and 750 | # thus the code should not reach _write() and this method at all). So 751 | # it's by design that this throws AssertionError, not any specific 752 | # error, because it's not intended to be caught in user applications, 753 | # but avoided by calling all _check() methods correctly. 754 | assert n <= size, "writing %d bytes, but %d bytes were given" % (size, n) 755 | 756 | self.write_bytes(buf) 757 | if n < size: 758 | self.write_u1(term) 759 | self.write_bytes(KaitaiStream.byte_from_int(pad_byte) * (size - n - 1)) 760 | 761 | # endregion 762 | 763 | # endregion 764 | 765 | # region Byte array processing 766 | 767 | @staticmethod 768 | def process_xor_one(data, key): 769 | if PY2: 770 | return bytes(bytearray(v ^ key for v in bytearray(data))) 771 | 772 | return bytes(v ^ key for v in data) 773 | 774 | @staticmethod 775 | def process_xor_many(data, key): 776 | if PY2: 777 | return bytes(bytearray(a ^ b for a, b in zip(bytearray(data), itertools.cycle(bytearray(key))))) 778 | 779 | return bytes(a ^ b for a, b in zip(data, itertools.cycle(key))) 780 | 781 | @staticmethod 782 | def process_rotate_left(data, amount, group_size): 783 | if group_size != 1: 784 | raise NotImplementedError( 785 | "unable to rotate group of %d bytes yet" % 786 | (group_size,) 787 | ) 788 | 789 | anti_amount = -amount % (group_size * 8) 790 | 791 | r = bytearray(data) 792 | for i, byte in enumerate(r): 793 | r[i] = (byte << amount) & 0xff | (byte >> anti_amount) 794 | return bytes(r) 795 | 796 | # endregion 797 | 798 | # region Misc runtime operations 799 | 800 | @staticmethod 801 | def int_from_byte(v): 802 | return ord(v) if PY2 else v 803 | 804 | @staticmethod 805 | def byte_from_int(i): 806 | return chr(i) if PY2 else bytes((i,)) 807 | 808 | @staticmethod 809 | def byte_array_index(data, i): 810 | return KaitaiStream.int_from_byte(data[i]) 811 | 812 | @staticmethod 813 | def byte_array_min(b): 814 | return KaitaiStream.int_from_byte(min(b)) 815 | 816 | @staticmethod 817 | def byte_array_max(b): 818 | return KaitaiStream.int_from_byte(max(b)) 819 | 820 | @staticmethod 821 | def byte_array_index_of(data, b): 822 | return data.find(KaitaiStream.byte_from_int(b)) 823 | 824 | @staticmethod 825 | def resolve_enum(enum_obj, value): 826 | """Resolves value using enum: if the value is not found in the map, 827 | we'll just use literal value per se. Works around problem with Python 828 | enums throwing an exception when encountering unknown value. 829 | """ 830 | try: 831 | return enum_obj(value) 832 | except ValueError: 833 | return value 834 | 835 | # endregion 836 | 837 | def to_byte_array(self): 838 | pos = self.pos() 839 | self.seek(0) 840 | r = self.read_bytes_full() 841 | self.seek(pos) 842 | return r 843 | 844 | class WriteBackHandler(object): 845 | def __init__(self, pos, handler): 846 | self.pos = pos 847 | self.handler = handler 848 | 849 | def write_back(self, parent): 850 | parent.seek(self.pos) 851 | self.handler(parent) 852 | 853 | def add_child_stream(self, child): 854 | self.child_streams.append(child) 855 | 856 | def write_back_child_streams(self, parent=None): 857 | _pos = self.pos() 858 | for child in self.child_streams: 859 | child.write_back_child_streams(self) 860 | 861 | # NOTE: Python 2 doesn't have list.clear() so it can't be used, see 862 | # https://docs.python.org/3.11/library/stdtypes.html#mutable-sequence-types 863 | # ("New in version 3.3: clear() and copy() methods.") 864 | del self.child_streams[:] 865 | self.seek(_pos) 866 | if parent is not None: 867 | self._write_back(parent) 868 | 869 | def _write_back(self, parent): 870 | self.write_back_handler.write_back(parent) 871 | 872 | 873 | class KaitaiStructError(Exception): 874 | """Common ancestor for all errors originating from correct Kaitai Struct 875 | usage (i.e. errors that indicate a problem with user input, not errors 876 | indicating incorrect usage that are not meant to be caught but fixed in the 877 | application code). Use this exception type in the `except` clause if you 878 | want to handle all parse errors and serialization errors. 879 | 880 | If available, the `src_path` attribute will contain the KSY source path 881 | pointing to the element where the error occurred. If it is not available, 882 | `src_path` will be `None`. 883 | """ 884 | def __init__(self, msg, src_path): 885 | super(KaitaiStructError, self).__init__(("" if src_path is None else src_path + ": ") + msg) 886 | self.src_path = src_path 887 | 888 | 889 | class InvalidArgumentError(KaitaiStructError, ValueError): 890 | """Indicates that an invalid argument value was received (like `ValueError`), 891 | but used in places where this might indicate invalid user input and 892 | therefore represents a parse error or serialization error. 893 | """ 894 | def __init__(self, msg): 895 | super(InvalidArgumentError, self).__init__(msg, None) 896 | 897 | 898 | class EndOfStreamError(KaitaiStructError, EOFError): 899 | """Read or write beyond end of stream. Provides the `bytes_needed` (number 900 | of bytes requested to read or write) and `bytes_available` (number of bytes 901 | remaining in the stream) attributes. 902 | """ 903 | def __init__(self, msg, bytes_needed, bytes_available): 904 | super(EndOfStreamError, self).__init__(msg, None) 905 | self.bytes_needed = bytes_needed 906 | self.bytes_available = bytes_available 907 | 908 | 909 | class NoTerminatorFoundError(EndOfStreamError): 910 | """Special type of `EndOfStreamError` that occurs when end of stream is 911 | reached before the required terminator is found. If you want to tolerate a 912 | missing terminator, you can specify `eos-error: false` in the KSY 913 | specification, in which case the end of stream will be considered a valid 914 | end of field and this error will no longer be raised. 915 | 916 | The `term` attribute contains a `bytes` object with the searched terminator. 917 | """ 918 | def __init__(self, term, bytes_available): 919 | super(NoTerminatorFoundError, self).__init__("end of stream reached, but no terminator %r found" % (term,), len(term), bytes_available) 920 | self.term = term 921 | 922 | 923 | class UndecidedEndiannessError(KaitaiStructError): 924 | """Error that occurs when default endianness should be decided with 925 | switch, but nothing matches (although using endianness expression 926 | implies that there should be some positive result). 927 | """ 928 | def __init__(self, src_path): 929 | super(UndecidedEndiannessError, self).__init__("unable to decide on endianness for a type", src_path) 930 | 931 | 932 | class ValidationFailedError(KaitaiStructError): 933 | """Common ancestor for all validation failures. Stores pointer to 934 | KaitaiStream IO object which was involved in an error. 935 | """ 936 | def __init__(self, msg, io, src_path): 937 | super(ValidationFailedError, self).__init__(("" if io is None else "at pos %d: " % (io.pos(),)) + "validation failed: " + msg, src_path) 938 | self.io = io 939 | 940 | 941 | class ValidationNotEqualError(ValidationFailedError): 942 | """Signals validation failure: we required "actual" value to be equal to 943 | "expected", but it turned out that it's not. 944 | """ 945 | def __init__(self, expected, actual, io, src_path): 946 | super(ValidationNotEqualError, self).__init__("not equal, expected %s, but got %s" % (repr(expected), repr(actual)), io, src_path) 947 | self.expected = expected 948 | self.actual = actual 949 | 950 | 951 | class ValidationLessThanError(ValidationFailedError): 952 | """Signals validation failure: we required "actual" value to be 953 | greater than or equal to "min", but it turned out that it's not. 954 | """ 955 | def __init__(self, min_bound, actual, io, src_path): 956 | super(ValidationLessThanError, self).__init__("not in range, min %s, but got %s" % (repr(min_bound), repr(actual)), io, src_path) 957 | self.min = min_bound 958 | self.actual = actual 959 | 960 | 961 | class ValidationGreaterThanError(ValidationFailedError): 962 | """Signals validation failure: we required "actual" value to be 963 | less than or equal to "max", but it turned out that it's not. 964 | """ 965 | def __init__(self, max_bound, actual, io, src_path): 966 | super(ValidationGreaterThanError, self).__init__("not in range, max %s, but got %s" % (repr(max_bound), repr(actual)), io, src_path) 967 | self.max = max_bound 968 | self.actual = actual 969 | 970 | 971 | class ValidationNotAnyOfError(ValidationFailedError): 972 | """Signals validation failure: we required "actual" value to be 973 | from the list, but it turned out that it's not. 974 | """ 975 | def __init__(self, actual, io, src_path): 976 | super(ValidationNotAnyOfError, self).__init__("not any of the list, got %s" % (repr(actual)), io, src_path) 977 | self.actual = actual 978 | 979 | 980 | class ValidationNotInEnumError(ValidationFailedError): 981 | """Signals validation failure: we required "actual" value to be in 982 | the enum, but it turned out that it's not. 983 | """ 984 | def __init__(self, actual, io, src_path): 985 | super(ValidationNotInEnumError, self).__init__("not in the enum, got %s" % (repr(actual)), io, src_path) 986 | self.actual = actual 987 | 988 | 989 | class ValidationExprError(ValidationFailedError): 990 | """Signals validation failure: we required "actual" value to match 991 | the expression, but it turned out that it doesn't. 992 | """ 993 | def __init__(self, actual, io, src_path): 994 | super(ValidationExprError, self).__init__("not matching the expression, got %s" % (repr(actual)), io, src_path) 995 | self.actual = actual 996 | 997 | 998 | class ConsistencyError(Exception): 999 | def __init__(self, attr_id, actual, expected): 1000 | super(ConsistencyError, self).__init__("Check failed: %s, expected: %s, actual: %s" % (attr_id, repr(expected), repr(actual))) 1001 | self.id = attr_id 1002 | self.actual = actual 1003 | self.expected = expected 1004 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 38.6.0"] 3 | build-backend = "setuptools.build_meta" 4 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = kaitaistruct 3 | version = attr: kaitaistruct.__version__ 4 | author = Kaitai Project 5 | author_email = greycat@kaitai.io 6 | url = https://kaitai.io 7 | project_urls = 8 | Documentation = https://doc.kaitai.io/ 9 | Source = https://github.com/kaitai-io/kaitai_struct_python_runtime 10 | Tracker = https://github.com/kaitai-io/kaitai_struct_python_runtime/issues 11 | Gitter = https://gitter.im/kaitai_struct/Lobby 12 | description = Kaitai Struct declarative parser generator for binary data: runtime library for Python 13 | long_description = file: README.md 14 | long_description_content_type = text/markdown 15 | license = MIT 16 | keywords = kaitai, struct, construct, ksy, declarative, data structure, data format, file format, packet format, binary, parser, parsing, unpack, development 17 | classifiers = 18 | Development Status :: 4 - Beta 19 | Intended Audience :: Developers 20 | Topic :: Software Development :: Build Tools 21 | License :: OSI Approved :: MIT License 22 | Programming Language :: Python :: 2 23 | Programming Language :: Python :: 2.7 24 | Programming Language :: Python :: 3 25 | Programming Language :: Python :: 3.4 26 | Programming Language :: Python :: 3.5 27 | Programming Language :: Python :: 3.6 28 | Programming Language :: Python :: 3.7 29 | Programming Language :: Python :: 3.8 30 | Programming Language :: Python :: 3.9 31 | Programming Language :: Python :: 3.10 32 | Programming Language :: Python :: 3.11 33 | Programming Language :: Python :: 3.12 34 | Programming Language :: Python :: 3.13 35 | Programming Language :: Python :: Implementation :: CPython 36 | Programming Language :: Python :: Implementation :: PyPy 37 | 38 | [options] 39 | zip_safe = True 40 | include_package_data = True 41 | py_modules = kaitaistruct 42 | python_requires = >=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.* 43 | install_requires = 44 | enum34; python_version < "3.4" 45 | setup_requires = 46 | setuptools >= 38.6.0 47 | 48 | [bdist_wheel] 49 | # This flag says that the code is written to work on both Python 2 and Python 50 | # 3. If at all possible, it is good practice to do this. If you cannot, you 51 | # will need to generate wheels for each Python version that you support. 52 | universal=1 53 | 54 | [pycodestyle] 55 | max-line-length = 140 56 | statistics = True 57 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup() 4 | --------------------------------------------------------------------------------