├── .github └── workflows │ ├── format-and-lint.yml │ └── pytest.yml ├── .gitignore ├── LICENSE ├── README.md ├── images ├── console.png └── gtk.png ├── parse.sh ├── pyproject.toml ├── src └── mp4viewer │ ├── __init__.py │ ├── __main__.py │ ├── console.py │ ├── datasource.py │ ├── gui.py │ ├── isobmff │ ├── __init__.py │ ├── box.py │ ├── cenc.py │ ├── descriptors.py │ ├── flv.py │ ├── fragment.py │ ├── movie.py │ ├── parser.py │ └── utils.py │ ├── json_renderer.py │ └── tree.py └── tests ├── 1.dat ├── __init__.py ├── ftyp.atom ├── moov.atom ├── test_box_parsing.py └── test_datasource.py /.github/workflows/format-and-lint.yml: -------------------------------------------------------------------------------- 1 | name: linter 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.9", "3.10", "3.11"] 11 | 12 | steps: 13 | - uses: actions/checkout@v4 14 | - name: Set up Python ${{ matrix.python-version }} 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: ${{ matrix.python-version }} 18 | - name: Install dependencies 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install pylint black flake8 22 | - name: Code formatting using black 23 | run: | 24 | black -l 100 --check . 25 | - name: Analysing the code with pylint 26 | run: | 27 | pylint $(git ls-files '*.py') 28 | - name: Lint with flake8 29 | run: | 30 | # stop the build if there are Python syntax errors or undefined names 31 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 32 | # E203 and W503 are not compatible with black (and pep8) 33 | flake8 . --count --max-complexity=10 --max-line-length=100 --statistics --ignore=E203,W503 34 | -------------------------------------------------------------------------------- /.github/workflows/pytest.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: tests 5 | 6 | on: 7 | push: 8 | branches: [ "develop" ] 9 | pull_request: 10 | branches: [ "develop" ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.9", "3.10", "3.11"] 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v5 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | python -m pip install pytest coverage 31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 32 | - name: Test with pytest 33 | run: | 34 | PYTHONPATH=src coverage run --source=src -m pytest 35 | coverage report -m 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.mp4 2 | *.3gp 3 | *.pyc 4 | *.zip 5 | *.7z 6 | *.tar 7 | *.tgz 8 | *.tar.gz 9 | *.rar 10 | *.vscode 11 | *__pycache__ 12 | dist 13 | .coverage 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Amarghosh Vadakkoot 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![lint](https://github.com/amarghosh/mp4viewer/actions/workflows/format-and-lint.yml/badge.svg) 2 | ![Code style](https://img.shields.io/badge/code%20style-black-black "https://github.com/psf/black") 3 | ![pytest](https://github.com/amarghosh/mp4viewer/actions/workflows/pytest.yml/badge.svg) 4 | ![PyPI - Version](https://img.shields.io/pypi/v/mp4viewer "https://pypi.org/project/mp4viewer/") 5 | 6 | # MP4Viewer 7 | 8 | MP4Viewer is an mp4 metadata analyser (ISO base media file format) written in python. 9 | You can use it to analyse the metadata of mp4 files on the shell, a new gtk based window, or save it as a json file. 10 | The definitions of structures used in the code can be found in the ~publicly available standard~ (it is not free to download anymore) [ISO/IEC 14496-12 - ISO base media file format](http://standards.iso.org/ittf/PubliclyAvailableStandards/index.html). 11 | 12 | ## Install using pip 13 | ```bash 14 | pip install --upgrade mp4viewer 15 | 16 | python3 -m mp4viewer [-h] [-o {stdout,gui,json}] [-e] [-c {on,off}] file.mp4 17 | ``` 18 | 19 | ## Run directly from code 20 | ```bash 21 | ./parse.sh [-h] [-o {stdout,gui,json}] [-e] [-c {on,off}] file.mp4 22 | ``` 23 | 24 | ## Arguments 25 | 26 | ``` 27 | positional arguments: 28 | input_file Location of the ISO bmff file (mp4) 29 | 30 | options: 31 | -h, --help show this help message and exit 32 | -o {stdout,gui,json}, --output {stdout,gui,json} 33 | Specify the output format. Please note that pygtk is required for `gui`. 34 | -c {on,off}, --color {on,off} 35 | Toggle colors in console based output; on by default. 36 | -j JSON_PATH, --json JSON_PATH 37 | Path to the json file where the output should be saved. If this is specified, the json output will be generated and written to this file even if the requested output format is not 38 | json. If the output format is json and this argument is not specified, the json object will be written to the current directory using "$PWD/$(basename input_file).mp4viewer.json" 39 | -e, --expand-arrays Do not truncate long arrays 40 | --debug Used for internal debugging 41 | --latex Generate latex-in-markdown for github README 42 | ``` 43 | 44 | ## Sample outputs: 45 | ### The default output on the console 46 | ![shell output](https://github.com/amarghosh/mp4viewer/blob/develop/images/console.png?raw=true) 47 | 48 | ### GTK 49 | ![window with gtk treeview](https://github.com/amarghosh/mp4viewer/blob/develop/images/gtk.png?raw=true) 50 | 51 | MP4Viewer uses gtk in graphics mode and requires pygtk3 for viewing results in a window. 52 | This usually comes preinstalled with recent versions of ubuntu. 53 | 54 | You can find more similar [MP4 tools in this page](https://github.com/video-dev/community-knowledge-base/blob/master/list-of-iso-bmff-mp4-tools.md) 55 | -------------------------------------------------------------------------------- /images/console.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amarghosh/mp4viewer/291ba86d93e9bb62f362df01004fb0e28422a2f0/images/console.png -------------------------------------------------------------------------------- /images/gtk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amarghosh/mp4viewer/291ba86d93e9bb62f362df01004fb0e28422a2f0/images/gtk.png -------------------------------------------------------------------------------- /parse.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PYTHONPATH=src python3 -m mp4viewer $@ 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "mp4viewer" 3 | version = "0.1.2" 4 | authors = [ 5 | { name="Amarghosh Vadakkoot", email="myfirstname@gmail.com" }, 6 | ] 7 | description = "Parse mp4 files and inspect their metadata" 8 | readme = "README.md" 9 | requires-python = ">=3.8" 10 | classifiers = [ 11 | "Programming Language :: Python :: 3", 12 | "License :: OSI Approved :: MIT License", 13 | "Operating System :: OS Independent", 14 | ] 15 | 16 | [project.urls] 17 | Homepage = "https://github.com/amarghosh/mp4viewer" 18 | Issues = "https://github.com/amarghosh/mp4viewer/issues" 19 | 20 | [build-system] 21 | requires = ["hatchling"] 22 | build-backend = "hatchling.build" 23 | -------------------------------------------------------------------------------- /src/mp4viewer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amarghosh/mp4viewer/291ba86d93e9bb62f362df01004fb0e28422a2f0/src/mp4viewer/__init__.py -------------------------------------------------------------------------------- /src/mp4viewer/__main__.py: -------------------------------------------------------------------------------- 1 | """ The main entry point """ 2 | 3 | import os 4 | import sys 5 | import argparse 6 | 7 | from mp4viewer.tree import Tree, TreeType 8 | from mp4viewer.datasource import FileSource, DataBuffer 9 | from mp4viewer.console import ConsoleRenderer 10 | from mp4viewer.json_renderer import JsonRenderer 11 | 12 | from mp4viewer.isobmff.parser import IsobmffParser, getboxdesc 13 | from mp4viewer.isobmff.box import Box 14 | 15 | 16 | def get_box_node(box, args): 17 | """Get a tree node representing the box""" 18 | node = Tree(TreeType.ATOM, box.boxtype, getboxdesc(box.boxtype)) 19 | for field in box.generate_fields(): 20 | if isinstance(field, Box): 21 | add_box(node, field, args) 22 | continue 23 | if not isinstance(field, tuple): 24 | raise TypeError(f"Expected a tuple, got a {type(field)}") 25 | # generate fields yields a tuple of order (name, value, [formatted_value]) 26 | key = field[0] 27 | value = field[1] 28 | # Take care of lists of dicts 29 | if isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict): 30 | node.add_list_of_sub_objects(key, value) 31 | continue 32 | 33 | if isinstance(value, dict) and len(value) > 0: 34 | kv_node = node.add_attr(Tree(TreeType.DICT, key)) 35 | kv_node.add_sub_object(value) 36 | continue 37 | 38 | if args.truncate and isinstance(value, list) and len(value) > 16: 39 | first3 = ",".join([str(i) for i in value[:3]]) 40 | last3 = ",".join([str(i) for i in value[-3:]]) 41 | value = f"[{first3} ... {last3}] {len(value)} items" 42 | node.add_attr(field[0], value, field[2] if len(field) == 3 else None) 43 | return node 44 | 45 | 46 | def add_box(parent, box, args): 47 | """Add the box and its children to the tree""" 48 | box_node = parent.add_child(get_box_node(box, args)) 49 | for child in box.children: 50 | add_box(box_node, child, args) 51 | return box_node 52 | 53 | 54 | def get_tree_from_file(path, args): 55 | """Parse the mp4 file and return a tree of boxes""" 56 | with open(path, "rb") as fd: 57 | # isobmff file parser 58 | parser = IsobmffParser(DataBuffer(FileSource(fd)), args.debug) 59 | boxes = parser.getboxlist() 60 | root = Tree(TreeType.ATOM, os.path.basename(path), "File") 61 | for box in boxes: 62 | add_box(root, box, args) 63 | return root 64 | 65 | 66 | def main(): 67 | """the main""" 68 | parser = argparse.ArgumentParser( 69 | description="Parse mp4 files (ISO bmff) and view the boxes and their contents. " 70 | "The output can be viewed on the console, a window, or saved in to a json file." 71 | ) 72 | parser.add_argument( 73 | "-o", 74 | "--output", 75 | choices=["stdout", "gui", "json"], 76 | default="stdout", 77 | help="Specify the output format. Please note that pygtk is required for `gui`. ", 78 | dest="output_format", 79 | ) 80 | parser.add_argument( 81 | "-c", 82 | "--color", 83 | choices=["on", "off"], 84 | default="on", 85 | dest="color", 86 | help="Toggle colors in console based output; on by default.", 87 | ) 88 | parser.add_argument( 89 | "-j", 90 | "--json", 91 | dest="json_path", 92 | help="Path to the json file where the output should be saved. If this is specified, " 93 | "the json output will be generated and written to this file even if the requested " 94 | "output format is not json. If the output format is json and this argument is not " 95 | "specified, the json object will be written to the current directory using " 96 | '"$PWD/$(basename input_file).mp4viewer.json"', 97 | ) 98 | parser.add_argument( 99 | "-e", 100 | "--expand-arrays", 101 | action="store_false", 102 | help="Do not truncate long arrays", 103 | dest="truncate", 104 | ) 105 | parser.add_argument("--debug", action="store_true", help="Used for internal debugging") 106 | parser.add_argument( 107 | "--latex", 108 | action="store_true", 109 | help="Generate latex-in-markdown for github README", 110 | ) 111 | parser.add_argument("input_file", help="Location of the ISO bmff file (mp4)") 112 | args = parser.parse_args() 113 | 114 | root = get_tree_from_file(args.input_file, args) 115 | 116 | renderer = None 117 | if args.output_format == "stdout": 118 | renderer = ConsoleRenderer(args) 119 | if args.color == "off": 120 | renderer.disable_colors() 121 | else: 122 | renderer.update_colors() 123 | 124 | if args.output_format == "gui": 125 | # pylint: disable=import-outside-toplevel 126 | from .gui import GtkRenderer 127 | 128 | renderer = GtkRenderer() 129 | 130 | if args.output_format == "json": 131 | renderer = JsonRenderer(mp4_path=args.input_file, output_path=args.json_path) 132 | 133 | renderer.render(root) 134 | 135 | # Handle the case where json output is required in addition to the requested format 136 | if args.json_path is not None and args.output_format != "json": 137 | JsonRenderer(mp4_path=args.input_file, output_path=args.json_path).render(root) 138 | 139 | return 0 140 | 141 | 142 | if __name__ == "__main__": 143 | sys.exit(main()) 144 | -------------------------------------------------------------------------------- /src/mp4viewer/console.py: -------------------------------------------------------------------------------- 1 | """ Console renderer """ 2 | 3 | import sys 4 | 5 | from mp4viewer.tree import Tree 6 | 7 | 8 | def _write(s): 9 | sys.stdout.write(s) 10 | 11 | 12 | class ConsoleRenderer: 13 | """Print the box layout as a tree on to the console""" 14 | 15 | VERT = "!" 16 | HORI = "-" 17 | COLOR_HEADER = "\033[31m" # red 18 | COLOR_ATTR = "\033[36m" # cyan 19 | COLOR_SUB_TEXT = "\033[38;5;243m" # gray 20 | ENDCOL = "\033[0m" 21 | 22 | def __init__(self, args, offset=None, indent_unit=" "): 23 | self.offset = "" if offset is None else offset 24 | self.indent_unit = indent_unit 25 | self.header_prefix = "`" + indent_unit.replace(" ", ConsoleRenderer.HORI)[1:] 26 | self.use_colors = True 27 | self.eol = "\n" 28 | self.indent_with_vert = self.indent_unit[:-1] + ConsoleRenderer.VERT 29 | self.args = args 30 | if args.latex: 31 | self._enable_latex_md_for_github() 32 | 33 | def _enable_latex_md_for_github(self): 34 | self.offset = self.offset.replace(" ", " ") 35 | self.indent_unit = self.indent_unit.replace(" ", " ") 36 | self.header_prefix = "\\`" + self.header_prefix[1:] 37 | self.eol = " \n" 38 | self.indent_with_vert = self.indent_unit[: -len(" ")] + ConsoleRenderer.VERT 39 | ConsoleRenderer.COLOR_HEADER = " ${\\textsf{\\color{red}" 40 | ConsoleRenderer.COLOR_ATTR = " ${\\textsf{\\color{blue}" 41 | ConsoleRenderer.COLOR_SUB_TEXT = " ${\\textsf{\\color{grey}" 42 | ConsoleRenderer.ENDCOL = "}}$" 43 | 44 | def _wrap_color(self, text, color): 45 | suffix = ConsoleRenderer.ENDCOL if self.use_colors else "" 46 | return f"{color}{text}{suffix}" 47 | 48 | def _sub_text(self, text): 49 | if self.use_colors: 50 | wrapped_text = self._wrap_color(text, ConsoleRenderer.COLOR_SUB_TEXT) 51 | else: 52 | wrapped_text = f"<{text}>" 53 | return wrapped_text 54 | 55 | def _get_attr_color(self): 56 | return ConsoleRenderer.COLOR_ATTR if self.use_colors else "" 57 | 58 | def _get_data_prefix(self, atom, prefix): 59 | if atom.number_of_child_boxes(): 60 | data_prefix = prefix + self.indent_with_vert + self.indent_unit 61 | else: 62 | data_prefix = prefix + self.indent_unit + self.indent_unit 63 | return data_prefix 64 | 65 | def _show_attr_list(self, atom, attr, prefix): 66 | items = attr.children 67 | truncated = self.args.truncate and len(items) > 10 68 | if truncated: 69 | items = items[:10] 70 | 71 | for child in items: 72 | self.show_node(child, prefix + self.indent_unit) 73 | 74 | if truncated: 75 | data_prefix = self._get_data_prefix(atom, prefix) 76 | msg = f"<>" 77 | text = self._wrap_color(msg, self._get_attr_color()) 78 | _write(f"{data_prefix}{text}{self.eol}") 79 | 80 | def _show_attr(self, atom, attr, prefix): 81 | attr_color = self._get_attr_color() 82 | data_prefix = self._get_data_prefix(atom, prefix) 83 | _write(f"{data_prefix}{self._wrap_color(attr.name, attr_color)}: {attr.value}") 84 | if attr.display_value is not None: 85 | _write(f" {self._sub_text(attr.display_value)}{self.eol}") 86 | else: 87 | _write(self.eol) 88 | 89 | def show_node(self, node, prefix): 90 | """recursively display the node""" 91 | if node.is_atom(): 92 | header_color = ConsoleRenderer.COLOR_HEADER if self.use_colors else "" 93 | header_prefix = prefix + self.header_prefix if len(prefix) else "" 94 | else: 95 | header_color = "" 96 | header_prefix = prefix + self.indent_unit 97 | _write( 98 | f"{header_prefix}{self._wrap_color(node.name, header_color)}" 99 | f" {self._sub_text(node.value) if node.value else ''}{self.eol}" 100 | ) 101 | for i, child in enumerate(node.children): 102 | if child.is_attr(): 103 | self._show_attr(node, child, prefix) 104 | elif child.is_list(): 105 | self._show_attr_list(node, child, prefix) 106 | elif child.is_atom(): 107 | child_indent = prefix + self.indent_with_vert 108 | if i + 1 == len(node.children): 109 | child_indent = prefix + self.indent_unit 110 | self.show_node(child, child_indent) 111 | else: 112 | child_indent = prefix + self.indent_unit 113 | self.show_node(child, child_indent) 114 | 115 | def render(self, tree: Tree): 116 | """Render the tree""" 117 | print("=" * 80) 118 | self.show_node(tree, self.offset) 119 | 120 | def update_colors(self): 121 | """disable colours if they are not supported""" 122 | if not sys.stdout.isatty(): 123 | self.disable_colors() 124 | 125 | def disable_colors(self): 126 | """Do not use ascii color prefixes and sufixes in the output""" 127 | self.use_colors = False 128 | -------------------------------------------------------------------------------- /src/mp4viewer/datasource.py: -------------------------------------------------------------------------------- 1 | """ Defines data buffer related classes """ 2 | 3 | import os 4 | from typing import BinaryIO 5 | 6 | 7 | class FileSource: 8 | """Read isobmff data from a file""" 9 | 10 | def __init__(self, f: BinaryIO): 11 | self.file = f 12 | self.size = os.fstat(f.fileno()).st_size 13 | 14 | def read(self, req_bytes): 15 | """read up to req_bytes""" 16 | return self.file.read(req_bytes) 17 | 18 | def seek(self, count, pos): 19 | """wrapper around file.seek""" 20 | return self.file.seek(count, pos) 21 | 22 | def __len__(self): 23 | return self.size 24 | 25 | 26 | class DataBuffer: 27 | """ 28 | Class represending a data buffer. 29 | Provides helper functions to read uint32, UTF8 strings etc from the buffer. 30 | """ 31 | 32 | CHUNK_SIZE = 16384 33 | 34 | def __init__(self, source): 35 | self.source = source 36 | 37 | # Chunk of bytes loaded from the source stream for convenience. 38 | # This is a sub-sequence of the byte stream managed by self.source. 39 | self.data = b"" 40 | 41 | # length of current `data`; this can vary across calls to readmore 42 | self.buf_size = 0 43 | 44 | # Offset within `source` that matches the first byte of self.data 45 | self.stream_offset = 0 46 | 47 | # Number of bytes that has been consumed so far from `self.data` 48 | self.read_ptr = 0 49 | 50 | # Number of bits consumed from the current byte (data[read_ptr]) 51 | self.bit_position = 0 52 | 53 | self._reset() 54 | self.readmore() 55 | 56 | def reset(self): 57 | """reset everything""" 58 | self.source.seek(0, os.SEEK_SET) 59 | self._reset() 60 | 61 | def _reset(self): 62 | """reset internal offsets, doesn't touch the source""" 63 | self.bit_position = 0 64 | self.stream_offset = 0 65 | self.buf_size = 0 66 | self.read_ptr = 0 67 | self.data = b"" 68 | 69 | def __str__(self): 70 | # pylint: disable=consider-using-f-string 71 | return "" % ( 72 | len(self.source), 73 | self.buf_size, 74 | self.read_ptr, 75 | self.stream_offset, 76 | ) 77 | 78 | def current_position(self): 79 | """return the current offset of the buffer from the beginning of the `source`""" 80 | return self.stream_offset + self.read_ptr 81 | 82 | def remaining_bytes(self): 83 | """Return the number of bytes remaining to read""" 84 | return len(self.source) - (self.stream_offset + self.read_ptr) 85 | 86 | def readmore(self, minimum=0): 87 | # pylint: disable=consider-using-f-string 88 | """ 89 | Read some bytes from the source in to local data array. 90 | If minimum is set, this will try to read at least that many bytes 91 | """ 92 | req_bytes = max(minimum, DataBuffer.CHUNK_SIZE) 93 | data = self.source.read(req_bytes) 94 | remaining_bytes = self.buf_size - self.read_ptr 95 | if len(data): 96 | # print(f"Read {len(data)}") 97 | self.data = b"".join([self.data[self.read_ptr :], data]) 98 | self.buf_size = remaining_bytes + len(data) 99 | self.stream_offset += self.read_ptr 100 | self.read_ptr = 0 101 | if self.buf_size < minimum: 102 | raise AssertionError( 103 | "Not enough data for %d bytes; read %d, remaining %d" 104 | % (minimum, len(data), self.buf_size) 105 | ) 106 | else: 107 | raise AssertionError( 108 | "Read nothing: req %d, offset %d, read_ptr %d" 109 | % (minimum, self.stream_offset, self.read_ptr) 110 | ) 111 | 112 | def hasmore(self) -> bool: 113 | """return true if we have bytes remaining to be read from the source""" 114 | if self.read_ptr == self.buf_size: 115 | try: 116 | self.readmore() 117 | except AssertionError: 118 | pass 119 | return self.read_ptr < self.buf_size 120 | 121 | def checkbuffer(self, length): 122 | """ 123 | Ensure that the buffer has at least length bytes available to read. 124 | Throws ValueError if there aren't enough bytes left. 125 | """ 126 | if length < 0: 127 | raise ValueError(f"Negative bytes to check {length}") 128 | remaining_bytes = self.buf_size - self.read_ptr 129 | if remaining_bytes < length: 130 | self.readmore(length - remaining_bytes) 131 | remaining_bytes = self.buf_size - self.read_ptr 132 | 133 | if remaining_bytes < length: 134 | # pylint: disable=consider-using-f-string 135 | raise ValueError( 136 | "Attempt to read beyond buffer %d %d %d" % (self.read_ptr, self.buf_size, length) 137 | ) 138 | 139 | def peekstr(self, length, offset=0): 140 | """read a string of `length` bytes without updating the buffer pointer""" 141 | self.checkbuffer(length + offset) 142 | if self.bit_position: 143 | raise AssertionError(f"Not aligned: {self.bit_position}") 144 | return str(self.data[self.read_ptr + offset : self.read_ptr + offset + length], "utf-8") 145 | 146 | def readstr(self, length): 147 | """read a string of `length` bytes and update the buffer pointer""" 148 | s = self.peekstr(length) 149 | self.read_ptr += length 150 | return s 151 | 152 | def read_cstring(self, max_length=-1): 153 | """ 154 | Read a null ternimated string of max_length bytes and return a tuple with two elements: 155 | the string, and the number of bytes consumed. 156 | """ 157 | if self.bit_position: 158 | raise AssertionError(f"Not aligned: {self.bit_position}") 159 | str_bytes = bytearray() 160 | s = "" 161 | bytes_read = 0 162 | while self.hasmore(): 163 | if bytes_read == max_length: 164 | break 165 | c = self.readbyte() 166 | bytes_read += 1 167 | if not c: 168 | break 169 | str_bytes.append(c) 170 | s = str_bytes.decode("utf-8") 171 | return s, bytes_read 172 | 173 | def peekint(self, bytecount): 174 | """ 175 | Read a number of specified bytes from the stream without updating the current position 176 | """ 177 | self.checkbuffer(bytecount) 178 | if self.bit_position: 179 | raise AssertionError(f"Not aligned: {self.bit_position}") 180 | v = 0 181 | for i in range(0, bytecount): 182 | data_byte = self.data[self.read_ptr + i] 183 | v = v << 8 | data_byte 184 | return v 185 | 186 | def peekbits(self, bitcount): 187 | """read `bitcount` bits without moving the pointer""" 188 | bytes_req = (bitcount + self.bit_position) // 8 189 | bytes_req += 1 if (bitcount + self.bit_position) % 8 else 0 190 | self.checkbuffer(bytes_req) 191 | if bitcount > 32: 192 | raise AssertionError(f"{bitcount} bits? Use readint64() and DIY") 193 | if not 0 <= self.bit_position < 8: 194 | raise AssertionError(f"bit_position {self.bit_position}") 195 | byte_offset = 0 196 | bits_read = 0 197 | result = 0 198 | while bits_read != bitcount: 199 | result <<= 8 200 | data_byte = self.data[self.read_ptr + byte_offset] 201 | result |= data_byte 202 | byte_offset += 1 203 | if bits_read == 0 and self.bit_position != 0: 204 | result &= (1 << (8 - self.bit_position)) - 1 205 | bits_read += 8 - self.bit_position 206 | else: 207 | bits_read += 8 208 | if bits_read > bitcount: 209 | result >>= bits_read - bitcount 210 | bits_read = bitcount 211 | return result 212 | 213 | def readbits(self, bitcount): 214 | """read the next `bitcount` bits and return it as an integer""" 215 | res = self.peekbits(bitcount) 216 | self.read_ptr += (bitcount + self.bit_position) // 8 217 | self.bit_position = (self.bit_position + bitcount) % 8 218 | return res 219 | 220 | def readbytes(self, count): 221 | """read `count` bytes from the stream and return it as a list of ints""" 222 | return [self.readbyte() for i in range(count)] 223 | 224 | def readint(self, bytecount): 225 | """read an integer of `bytecount` bytes from the stream""" 226 | v = self.peekint(bytecount) 227 | self.read_ptr += bytecount 228 | return v 229 | 230 | def readbyte(self): 231 | """read one byte from the current position and return it as an int""" 232 | return self.readint(1) 233 | 234 | def readint16(self): 235 | """read a 16 bit integer from the current position""" 236 | return self.readint(2) 237 | 238 | def readint32(self): 239 | """read a 32 bit integer from the current position""" 240 | return self.readint(4) 241 | 242 | def readint64(self): 243 | """read a 64 bit integer from the current position""" 244 | return self.readint(8) 245 | 246 | def skipbytes(self, count): 247 | """ 248 | Skip `count` bytes. 249 | The read position should be aligned to the nearest byte before you call this. 250 | You can use readbits to discard any remaining bits from the current byte to accomplish this. 251 | """ 252 | if self.bit_position: 253 | raise AssertionError(f"Not aligned: {self.bit_position}") 254 | if count < 0: 255 | raise ValueError(f"Negative bytes to skip {count}") 256 | unread_loaded_bytes = self.buf_size - self.read_ptr 257 | if count < unread_loaded_bytes: 258 | self.read_ptr += count 259 | return 260 | 261 | if self.current_position() + count > len(self.source): 262 | overflow = (self.current_position() + count) - len(self) 263 | available_to_skip = len(self.source) 264 | raise BufferError( 265 | f"{self} consumed={self.current_position()} skipping {count} " 266 | f"bytes would cause overflow {overflow} available={available_to_skip}" 267 | ) 268 | 269 | self.source.seek(count - unread_loaded_bytes, os.SEEK_CUR) 270 | new_stream_offset = self.stream_offset + self.read_ptr + count 271 | self._reset() 272 | self.stream_offset = new_stream_offset 273 | 274 | def seekto(self, pos): 275 | """Move the read pointer to to `pos`, relative to the start of stream""" 276 | self.source.seek(pos, os.SEEK_SET) 277 | self._reset() 278 | self.stream_offset = pos 279 | self.readmore() 280 | 281 | def __len__(self): 282 | return len(self.source) 283 | -------------------------------------------------------------------------------- /src/mp4viewer/gui.py: -------------------------------------------------------------------------------- 1 | """ GTK based renderer """ 2 | 3 | # pylint: disable=import-error,wrong-import-position 4 | import xml.etree.ElementTree as ET 5 | import gi 6 | 7 | gi.require_version("Gtk", "3.0") 8 | from gi.repository import Gtk # noqa: E402 9 | 10 | 11 | class GtkRenderer: 12 | """GTK based renderer""" 13 | 14 | def __init__(self): 15 | w = Gtk.Window(title="MP4 Viewer") 16 | w.resize(1024, 768) 17 | w.connect("delete_event", self.on_delete) 18 | w.connect("destroy", self.on_destroy) 19 | self.window = w 20 | self.treestore = None 21 | self.treeview = None 22 | 23 | def on_delete(self, widget, event, data=None): 24 | # pylint: disable=unused-argument,missing-function-docstring 25 | return False 26 | 27 | def on_destroy(self, widget, data=None): 28 | # pylint: disable=unused-argument,missing-function-docstring 29 | Gtk.main_quit() 30 | 31 | def format_node(self, name, raw_value, display_value=None, istitle=False): 32 | """Returns an xml string that describes a single row""" 33 | # Ref: https://web.mit.edu/ghudson/dev/nokrb/third/pango/docs/html/PangoMarkupFormat.html 34 | root = ET.Element("markup") 35 | # key 36 | child = ET.SubElement(root, "span", {"size": "large"}) 37 | if istitle: 38 | child.attrib["weight"] = "bold" 39 | child.attrib["foreground"] = "red" 40 | else: 41 | child.attrib["foreground"] = "blue" 42 | child.text = name 43 | # value 44 | child = ET.SubElement(root, "span", {"foreground": "black", "size": "large"}) 45 | if display_value is None: 46 | child.text = f": {raw_value}" 47 | else: 48 | child.text = f": {display_value}" 49 | child = ET.SubElement(root, "span", {"foreground": "#121212", "style": "italic"}) 50 | child.text = f" ({raw_value})" 51 | 52 | return ET.tostring(root).decode() 53 | 54 | def populate(self, datanode, parent=None): 55 | """Add entries for each attribute of the current node and its children (recursive)""" 56 | treenode = self.treestore.append( 57 | parent, 58 | [ 59 | self.format_node( 60 | datanode.name, 61 | str(datanode.value) if datanode.value is not None else "", 62 | istitle=datanode.is_atom(), 63 | ) 64 | ], 65 | ) 66 | for child in datanode.children: 67 | self.populate(child, treenode) 68 | 69 | def render(self, data): 70 | """render the tree""" 71 | self.treestore = Gtk.TreeStore(str) 72 | self.treeview = Gtk.TreeView(model=self.treestore) 73 | 74 | col = Gtk.TreeViewColumn(data.name) 75 | cell = Gtk.CellRendererText() 76 | col.pack_start(cell, True) 77 | col.add_attribute(cell, "markup", 0) 78 | self.treeview.append_column(col) 79 | 80 | for child in data.children: 81 | self.populate(child) 82 | 83 | sw = Gtk.ScrolledWindow() 84 | sw.set_vexpand(True) 85 | sw.add(self.treeview) 86 | self.window.add(sw) 87 | self.treeview.expand_all() 88 | self.window.show_all() 89 | Gtk.main() 90 | -------------------------------------------------------------------------------- /src/mp4viewer/isobmff/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amarghosh/mp4viewer/291ba86d93e9bb62f362df01004fb0e28422a2f0/src/mp4viewer/isobmff/__init__.py -------------------------------------------------------------------------------- /src/mp4viewer/isobmff/box.py: -------------------------------------------------------------------------------- 1 | """ defines basic box classes Box, FullBox and FileType """ 2 | 3 | # pylint: disable=too-many-instance-attributes 4 | 5 | import traceback 6 | 7 | from collections import deque 8 | from .utils import error_print 9 | 10 | 11 | class Box: 12 | """ 13 | Base class for all boxes. 14 | Subclasses representing pure containers should set the is_container flag. 15 | Boxes with data and children should handle their children from their own parse() overrides. 16 | """ 17 | 18 | # Avoid printing parsing errors for known data boxes 19 | data_boxes = ["mdat", "udta"] 20 | 21 | def __init__(self, parser, parent=None, is_container=False): 22 | self.parent = parent 23 | buf = parser.buf 24 | pos = buf.current_position() 25 | self.buffer_offset = pos 26 | self.has_children = is_container 27 | # has_children can be updated by parse() of the derived class 28 | self.parse(parser) 29 | self.consumed_bytes = buf.current_position() - pos 30 | if self.has_children: 31 | self.parse_children(parser) 32 | if self.remaining_bytes() > 0: 33 | if self.boxtype not in Box.data_boxes: 34 | error_print( 35 | f"Skipping tailing bytes: Possible parse error (or unhandled box)" 36 | f" in {self}: consumed {self.consumed_bytes}, skip {self.remaining_bytes()} " 37 | f"{buf.peekint(4):08x}" 38 | ) 39 | try: 40 | self._skip_remaining_bytes(buf) 41 | assert self.consumed_bytes == self.size, f"{self} size error" 42 | except BufferError: 43 | error_print(f"\nInvalid data in box {self.boxtype} at {self.buffer_offset}") 44 | remaining_bytes = self._remaining_bytes_to_skip(buf) 45 | overflow = buf.current_position() + remaining_bytes - len(buf) 46 | error_print( 47 | f"Attempt to skip {remaining_bytes} bytes from " 48 | f"{buf.current_position()}, but the file is only {len(buf)} bytes; " 49 | f"overflow by {overflow} bytes." 50 | ) 51 | error_print( 52 | "It is possible that the file was truncated by an incomplete download," 53 | " or it was generated using a slightly buggy encoder." 54 | ) 55 | error_print( 56 | "You can use ffmpeg to get more details:" 57 | "`ffmpeg -v error -i file.mp4 -f null - `\n" 58 | ) 59 | parser.dump_remaining_fourccs() 60 | error_print(f"skipping the remaining {buf.remaining_bytes()} bytes.\n") 61 | buf.skipbytes(buf.remaining_bytes()) 62 | 63 | def _remaining_bytes_to_skip(self, buf): 64 | if self.size == 0: 65 | bytes_to_skip = buf.remaining_bytes() 66 | else: 67 | bytes_to_skip = self.size - self.consumed_bytes 68 | return bytes_to_skip 69 | 70 | def _skip_remaining_bytes(self, buf): 71 | bytes_to_skip = self._remaining_bytes_to_skip(buf) 72 | buf.skipbytes(bytes_to_skip) 73 | self.consumed_bytes += bytes_to_skip 74 | 75 | def remaining_bytes(self): 76 | """ 77 | Returns the number of bytes remaining to be consumed. 78 | The subclasses should keep updating the `consumed_bytes` along the parse() method 79 | to keep this value accurate. 80 | """ 81 | if self.size == 0: 82 | raise AssertionError(f"Box {self}: remaining_bytes not supported for size0") 83 | return self.size - self.consumed_bytes 84 | 85 | def parse(self, parse_ctx): 86 | """ 87 | Parse this box from the parse_ctx. 88 | Subclasses should override this and invoke super().parse() before proceeding with their own 89 | parsing logic. 90 | """ 91 | buf = parse_ctx.buf 92 | islarge = False 93 | size = buf.readint32() 94 | boxtype = buf.readstr(4) 95 | self.consumed_bytes = 8 96 | # 64 bit box 97 | if size == 1: 98 | size = buf.readint64() 99 | self.consumed_bytes += 8 100 | islarge = True 101 | 102 | # Basic sanity check 103 | if self.parent is not None: 104 | if self.parent.consumed_bytes + size > self.parent.size: 105 | # pylint: disable=consider-using-f-string 106 | raise AssertionError( 107 | "Size error: parent %s, consumed %d, child %s says %d" 108 | % (self.parent, self.parent.consumed_bytes, boxtype, size) 109 | ) 110 | 111 | self.size = size 112 | self.boxtype = boxtype 113 | self.islarge = islarge 114 | self.children = [] 115 | # usertype 116 | if boxtype == "uuid": 117 | buf.skipbytes(16) 118 | self.consumed_bytes += 16 119 | 120 | # free or skip shall be skipped 121 | if boxtype in ("free", "skip"): 122 | buf.skipbytes(self.remaining_bytes()) 123 | self.consumed_bytes = self.size 124 | 125 | def parse_children(self, parser): 126 | """ 127 | Parse all child boxes of this container. 128 | This is called from the super().parse() 129 | """ 130 | buf = parser.buf 131 | while self.consumed_bytes + 8 <= self.size: 132 | try: 133 | box = parser.getnextbox(self) 134 | self.children.append(box) 135 | self.consumed_bytes += box.size 136 | except AssertionError as e: 137 | print(traceback.format_exc()) 138 | error_print(f"Error parsing children of {self}: {e}") 139 | buf.seekto(self.buffer_offset + self.size) 140 | self.consumed_bytes = self.size 141 | 142 | def find_ancestor(self, boxtype): 143 | """ 144 | Get the first direct ancestor with a matching `boxtype`, or None 145 | """ 146 | p = self.parent 147 | while p is not None: 148 | if p.boxtype == boxtype: 149 | return p 150 | p = p.parent 151 | return None 152 | 153 | def find_child(self, boxtype): 154 | """Get the first child with the matching boxtype""" 155 | for child in self.children: 156 | if child.boxtype == boxtype: 157 | return child 158 | return None 159 | 160 | def find_descendant(self, boxtype): 161 | """Find the first descendant with the matching boxtype; performs a breadth first search""" 162 | q = deque(self.children) 163 | while len(q) > 0: 164 | box = q.popleft() 165 | q.extend(box.children) 166 | if box.boxtype == boxtype: 167 | return box 168 | 169 | return None 170 | 171 | def find_descendant_of_ancestor(self, ancestor_boxtype, target_boxtype): 172 | """Find the first matching BFS descendant of a matching direct ancestor""" 173 | ancestor = self.find_ancestor(ancestor_boxtype) 174 | if ancestor is None: 175 | error_print(f"{self} has no ancestor of type {ancestor_boxtype}") 176 | return None 177 | 178 | descendant = ancestor.find_descendant(target_boxtype) 179 | if descendant is None: 180 | error_print(f"{self}: ancestor {ancestor} has no descendant of type {target_boxtype}") 181 | return descendant 182 | 183 | def generate_fields(self): 184 | """ 185 | Generator that yields either boxes or tuples. 186 | Each tuple shall be or format (name-of-field, actual-value, ). 187 | Subclasses shall call `yield from super().generate_fields()` from the overriden functions. 188 | """ 189 | yield ("size", self.size) 190 | 191 | def __str__(self): 192 | return f"" 193 | 194 | 195 | class FullBox(Box): 196 | """base class for boxes with version and flags""" 197 | 198 | def parse(self, parse_ctx): 199 | buf = parse_ctx.buf 200 | super().parse(parse_ctx) 201 | self.version = buf.readbyte() 202 | self.flags = buf.readint(3) 203 | self.consumed_bytes += 4 204 | 205 | def generate_fields(self): 206 | yield from super().generate_fields() 207 | yield ("version", self.version) 208 | yield ("flags", f"0x{self.flags:06X}") 209 | 210 | 211 | class FileType(Box): 212 | """ftyp""" 213 | 214 | def parse(self, parse_ctx): 215 | buf = parse_ctx.buf 216 | super().parse(parse_ctx) 217 | self.major_brand = buf.readstr(4) 218 | self.minor_version = buf.readint32() 219 | self.consumed_bytes += 8 220 | self.brands = [] 221 | while self.consumed_bytes < self.size: 222 | self.brands.append(buf.readstr(4)) 223 | self.consumed_bytes += 4 224 | 225 | def generate_fields(self): 226 | yield from super().generate_fields() 227 | yield ("major brand", self.major_brand) 228 | yield ("minor version", self.minor_version) 229 | yield ("brands", ",".join(self.brands)) 230 | 231 | def __str__(self): 232 | # pylint: disable=consider-using-f-string 233 | return "%s major=%s minor=%x with %d brands <%s>" % ( 234 | super().__str__(), 235 | self.major_brand, 236 | self.minor_version, 237 | len(self.brands), 238 | ",".join(self.brands), 239 | ) 240 | -------------------------------------------------------------------------------- /src/mp4viewer/isobmff/cenc.py: -------------------------------------------------------------------------------- 1 | """ track encryption related boxes """ 2 | 3 | from . import box 4 | 5 | 6 | class TrackEncryptionBox(box.FullBox): 7 | """tenc""" 8 | 9 | def parse(self, parse_ctx): 10 | super().parse(parse_ctx) 11 | buf = parse_ctx.buf 12 | buf.skipbytes(1) 13 | if self.version == 0: 14 | buf.skipbytes(1) 15 | else: 16 | val = buf.readbyte() 17 | self.default_crypt_byte_block = (val & 0xF0) >> 4 18 | self.default_skip_byte_block = val & 0x0F 19 | self.default_is_protected = buf.readbyte() == 1 20 | self.default_per_sample_iv_size = buf.readbyte() 21 | self.default_kid = [] 22 | for _ in range(16): 23 | self.default_kid.append(buf.readbyte()) 24 | if self.default_is_protected == 1 and self.default_per_sample_iv_size == 0: 25 | self.default_constant_iv_size = buf.readbyte() 26 | self.default_constant_iv = [] 27 | for _ in range(self.default_constant_iv_size): 28 | self.default_constant_iv.append(buf.readbyte()) 29 | 30 | def generate_fields(self): 31 | yield from super().generate_fields() 32 | if self.version != 0: 33 | yield ("Default crypt byte block", self.default_crypt_byte_block) 34 | yield ("Default skip byte block", self.default_skip_byte_block) 35 | yield ("Default is protected", self.default_is_protected) 36 | yield ("Default per sample IV size", self.default_constant_iv_size) 37 | yield ("Default KID", [f"{i:02x}" for i in range(self.default_kid)]) 38 | if self.default_is_protected == 1 and self.default_per_sample_iv_size == 0: 39 | yield ("Default constant IV size", self.default_constant_iv_size) 40 | yield ( 41 | "Default constant IV", 42 | [f"{i:02x}" for i in range(self.default_constant_iv)], 43 | ) 44 | 45 | 46 | class ProtectionSystemSpecificHeader(box.FullBox): 47 | """pssh""" 48 | 49 | def parse(self, parse_ctx): 50 | super().parse(parse_ctx) 51 | buf = parse_ctx.buf 52 | self.system_id = [] 53 | for _ in range(16): 54 | self.system_id.append(buf.readbyte()) 55 | if self.version > 0: 56 | self.kid_count = buf.readint32() 57 | self.kids = [] 58 | for _ in range(self.kid_count): 59 | kid = [buf.readbyte() for _ in range(16)] 60 | self.kids.append(kid) 61 | self.data_size = buf.readint32() 62 | buf.skipbytes(self.data_size) 63 | 64 | def generate_fields(self): 65 | yield from super().generate_fields() 66 | yield ("System ID", "0x" + "%x" * 16 % tuple(self.system_id)) 67 | if self.version > 0: 68 | yield ("KID count", self.kid_count) 69 | for kid in self.kids: 70 | yield ("KID", ["f{i:02x}" for i in kid]) 71 | yield ("Data Size", self.data_size) 72 | 73 | 74 | class SchemeTypeBox(box.FullBox): 75 | """schm""" 76 | 77 | def parse(self, parse_ctx): 78 | super().parse(parse_ctx) 79 | buf = parse_ctx.buf 80 | self.scheme_type = buf.readstr(4) 81 | self.scheme_version = buf.readint32() 82 | if self.flags & 0x000001: 83 | self.consumed_bytes += 8 84 | self.scheme_uri = buf.read_cstring(self.size - self.consumed_bytes)[0] 85 | 86 | def generate_fields(self): 87 | yield from super().generate_fields() 88 | yield ("Scheme type", self.scheme_type) 89 | yield ("Scheme version", f"0x{self.scheme_version:x}") 90 | if self.flags & 0x000001: 91 | yield ("Scheme URI", self.scheme_uri) 92 | 93 | 94 | class OriginalFormatBox(box.Box): 95 | """frma""" 96 | 97 | def parse(self, parse_ctx): 98 | super().parse(parse_ctx) 99 | buf = parse_ctx.buf 100 | self.data_format = buf.readstr(4) 101 | 102 | def generate_fields(self): 103 | yield from super().generate_fields() 104 | yield ("Original format", self.data_format) 105 | 106 | 107 | boxmap = { 108 | "tenc": TrackEncryptionBox, 109 | # senc can't be used without guesswork/heroics/the creation of a 'root' 110 | # box because it needs the 'tenc' box which is in the moov header, 111 | # while this is in the moof header. They do not share a parent. 112 | # 'senc' : SampleEncryptionBox, 113 | "pssh": ProtectionSystemSpecificHeader, 114 | "schm": SchemeTypeBox, 115 | "frma": OriginalFormatBox, 116 | } 117 | -------------------------------------------------------------------------------- /src/mp4viewer/isobmff/descriptors.py: -------------------------------------------------------------------------------- 1 | """Defines descriptors from various mpeg-4 standards""" 2 | 3 | # pylint: disable=too-many-instance-attributes 4 | 5 | from mp4viewer.tree import Tree, TreeType 6 | 7 | 8 | class BaseDescriptor: 9 | """ 10 | Base class for all descriptors. 11 | The size does not include the bytes used to encode the tag and the size. 12 | """ 13 | 14 | def __init__(self, buf): 15 | self.size = 0 16 | self.tag = 0 17 | self.header_size = 1 18 | self.start_position = buf.current_position() 19 | self.descriptors = [] 20 | self.parse(buf) 21 | consumed_bytes = self.consumed_bytes(buf) 22 | self.unhandled_bytes = None 23 | # only tag and size were read, capture remaining bytes 24 | if consumed_bytes == self.header_size: 25 | self.unhandled_bytes = buf.readbytes(self.size) 26 | return 27 | 28 | self.parse_unhandled_descriptors(buf) 29 | 30 | def parse(self, buf): 31 | """ 32 | parse the descriptor from the buffer. 33 | Subclasses shall override this function and call this super variant before starting 34 | the descriptor specific parsing. 35 | """ 36 | self.tag = buf.readbyte() 37 | self.parse_size(buf) 38 | 39 | def parse_size(self, buf): 40 | """ 41 | Parse the size of an expandable descriptor. 42 | See sizeOfInstance in 14496-1 section 8.3.3. 43 | """ 44 | size = 0 45 | while True: 46 | b = buf.readbyte() 47 | self.header_size += 1 48 | size = (size << 7) | (b & 0x7F) 49 | if (b & 0x80) == 0: 50 | self.size = size 51 | return 52 | 53 | def parse_unhandled_descriptors(self, buf): 54 | """ 55 | Take care of any unhandled/optional descriptors at the end of a descriptor body. 56 | Called from BaseDescriptor.init, so the subclasses don't have to worry about this. 57 | """ 58 | while self.remaining_bytes(buf) > 0: 59 | klass = BaseDescriptor.get_descriptor_class(buf.peekint(1)) 60 | self.descriptors.append(klass(buf)) 61 | 62 | @staticmethod 63 | def get_descriptor_class(tag): 64 | """maintains a map of descriptor tags and corresponding classes""" 65 | class_map = { 66 | 0x03: EsDescriptor, 67 | 0x04: DecoderConfigDescriptor, 68 | } 69 | if tag in class_map: 70 | return class_map[tag] 71 | return BaseDescriptor 72 | 73 | def get_descriptor_name(self): 74 | """Get the name of the descriptor. This is an incomplete implementation""" 75 | name_map = { 76 | 0x03: "ES_Descriptor", 77 | 0x04: "DecoderConfigDescriptor", 78 | 0x05: "DecoderSpecificInfo", 79 | 0x06: "SLConfigDescriptor", 80 | } 81 | if self.tag in name_map: 82 | return name_map[self.tag] 83 | 84 | return f"Decriptor {self.tag:02x}" 85 | 86 | def consumed_bytes(self, buf): 87 | """return the number of bytes consumed so far""" 88 | return buf.current_position() - self.start_position 89 | 90 | def remaining_bytes(self, buf): 91 | """get the number of unparsed bytes in this descriptor""" 92 | x = self.size + self.header_size - self.consumed_bytes(buf) 93 | if x < 0: 94 | raise AssertionError(f"{self} consumed={self.consumed_bytes(buf)}") 95 | return x 96 | 97 | def serialise(self): 98 | """Serialise the descriptor data into a dict object""" 99 | data = {"tag": self.tag, "size": self.size} 100 | if self.unhandled_bytes: 101 | data["data bytes"] = " ".join([f"{b:02x}" for b in self.unhandled_bytes]) 102 | return data 103 | 104 | def add_optional_descriptors(self, data): 105 | """ 106 | Add optional descriptors to the data. 107 | Subclasses overriding serialise can call this at the end of their own implementation. 108 | """ 109 | for d in self.descriptors: 110 | data[d.get_descriptor_name()] = d.serialise() 111 | 112 | def __str__(self): 113 | return f"<{self.__class__.__name__}:{self.tag:02x} {self.size} bytes>" 114 | 115 | 116 | class DecoderConfigDescriptor(BaseDescriptor): 117 | """Descriptor tag=0x04, signalled within ES_Descriptor""" 118 | 119 | def parse(self, buf): 120 | super().parse(buf) 121 | self.object_type = buf.readbyte() 122 | self.stream_type = buf.readbits(6) 123 | self.upstream = buf.readbits(1) 124 | buf.readbits(1) 125 | self.buffer_size_db = buf.readint(3) 126 | self.max_bit_rate = buf.readint32() 127 | self.avg_bit_rate = buf.readint32() 128 | 129 | def serialise(self): 130 | data = { 131 | "tag": self.tag, 132 | "size": self.size, 133 | "object_type": Tree(TreeType.ATTR, "object_type", self.object_type, self.oti_str()), 134 | "stream_type": self.stream_type, 135 | "upstream": self.upstream, 136 | "buffer_size": self.buffer_size_db, 137 | "max bit rate": self.max_bit_rate, 138 | "avg bit rage": self.avg_bit_rate, 139 | } 140 | 141 | self.add_optional_descriptors(data) 142 | return data 143 | 144 | def oti_str(self): 145 | """Get the description for object type identifier""" 146 | values = { 147 | 0x00: "Forbidden", 148 | 0x01: "Systems ISO/IEC 14496-1", 149 | 0x02: "Systems ISO/IEC 14496-1", 150 | 0x03: "Interaction Stream", 151 | 0x04: "Systems ISO/IEC 14496-1 Extended BIFS Configuration", 152 | 0x05: "Systems ISO/IEC 14496-1 AFX", 153 | 0x06: "Font Data Stream", 154 | 0x07: "Synthesized Texture Stream", 155 | 0x08: "Streaming Text Stream", 156 | 0x20: "Visual ISO/IEC 14496-2", 157 | 0x21: "Visual ITU-T Recommendation H.264 | ISO/IEC 14496-10", 158 | 0x22: "Parameter Sets for ITU-T Recommendation H.264 | ISO/IEC 14496-10", 159 | 0x40: "Audio ISO/IEC 14496-3", 160 | 0x60: "Visual ISO/IEC 13818-2 Simple Profile", 161 | 0x61: "Visual ISO/IEC 13818-2 Main Profile", 162 | 0x62: "Visual ISO/IEC 13818-2 SNR Profile", 163 | 0x63: "Visual ISO/IEC 13818-2 Spatial Profile", 164 | 0x64: "Visual ISO/IEC 13818-2 High Profile", 165 | 0x65: "Visual ISO/IEC 13818-2 422 Profile", 166 | 0x66: "Audio ISO/IEC 13818-7 Main Profile", 167 | 0x67: "Audio ISO/IEC 13818-7 LowComplexity Profile", 168 | 0x68: "Audio ISO/IEC 13818-7 Scaleable Sampling Rate Profile", 169 | 0x69: "Audio ISO/IEC 13818-3", 170 | 0x6A: "Visual ISO/IEC 11172-2", 171 | 0x6B: "Audio ISO/IEC 11172-3", 172 | 0x6C: "Visual ISO/IEC 10918-1", 173 | 0x6D: "reserved for registration authority", 174 | 0x6E: "Visual ISO/IEC 15444-1", 175 | } 176 | oti = self.object_type 177 | s = values[oti] if oti in values else "reserved/user private" 178 | return f"0x{oti:02x}: {s}" 179 | 180 | 181 | class EsDescriptor(BaseDescriptor): 182 | """EE_descriptor, tag=0x03""" 183 | 184 | def parse(self, buf): 185 | super().parse(buf) 186 | self.esid = buf.readint16() 187 | self.stream_dependence_flag = buf.readbits(1) 188 | self.url_flag = buf.readbits(1) 189 | self.ocr_stream_flag = buf.readbits(1) 190 | self.stream_priority = buf.readbits(5) 191 | if self.stream_dependence_flag: 192 | self.depends_on_esid = buf.readint16() 193 | if self.url_flag: 194 | self.url_length = buf.readbyte() 195 | self.url = buf.readstr(self.url_length) 196 | if self.ocr_stream_flag: 197 | self.ocr_esid = buf.readint16() 198 | 199 | def serialise(self): 200 | data = { 201 | "tag": self.tag, 202 | "size": self.size, 203 | "esid": self.esid, 204 | "dependence_flag": self.stream_dependence_flag, 205 | "url_flag": self.url_flag, 206 | "ocr_stream_flag": self.ocr_stream_flag, 207 | "stream_priority": self.stream_priority, 208 | } 209 | 210 | if self.stream_dependence_flag: 211 | data["depends_on_esid"] = self.depends_on_esid 212 | 213 | self.add_optional_descriptors(data) 214 | return data 215 | -------------------------------------------------------------------------------- /src/mp4viewer/isobmff/flv.py: -------------------------------------------------------------------------------- 1 | """ Adobe FLV format related boxes """ 2 | 3 | # pylint: disable=too-many-instance-attributes 4 | 5 | from . import box 6 | 7 | 8 | class AdobeFragmentRandomAccess(box.FullBox): 9 | """afra""" 10 | 11 | def parse(self, parse_ctx): 12 | super().parse(parse_ctx) 13 | buf = parse_ctx.buf 14 | val = buf.readbyte() 15 | self.long_ids = val & 0x80 != 0 16 | self.long_offsets = val & 0x40 != 0 17 | self.global_entries_present = val & 0x20 != 0 18 | self.timescale = buf.readint32() 19 | self.entry_count = buf.readint32() 20 | self.entries = [] 21 | for _ in range(self.entry_count): 22 | time = buf.readint64() 23 | if self.long_offsets: 24 | offset = buf.readint64() 25 | else: 26 | offset = buf.readint32() 27 | self.entries.append((time, offset)) 28 | self.global_entry_count = 0 29 | self.global_entries = [] 30 | if self.global_entries_present: 31 | self.global_entry_count = buf.readint32() 32 | for _ in range(self.global_entry_count): 33 | time = buf.readint64() 34 | if self.long_ids: 35 | eid = buf.readint32() 36 | else: 37 | eid = buf.readint16() 38 | self.global_entries.append((time, eid)) 39 | 40 | def generate_fields(self): 41 | yield from super().generate_fields() 42 | yield ("Long IDs", self.long_ids) 43 | yield ("Long offsets", self.long_offsets) 44 | yield ("Global entries present", self.global_entries_present) 45 | yield ("Timescale", self.timescale) 46 | yield ("Entry count", self.entry_count) 47 | for i, e in enumerate(self.entries): 48 | yield (f" Entry {i+1}", f"time={e[0]}, offset={e[1]}") 49 | if self.global_entries_present: 50 | yield ("Global entry count", self.global_entry_count) 51 | for i, e in enumerate(self.global_entries): 52 | yield (f" Global entry {i+1}", f"time={e[0]}, id={e[1]}") 53 | 54 | 55 | class AdobeBootstrap(box.FullBox): 56 | """abst""" 57 | 58 | def parse(self, parse_ctx): 59 | super().parse(parse_ctx) 60 | buf = parse_ctx.buf 61 | self.bootstrap_info_version = buf.readint32() 62 | val = buf.readbyte() 63 | self.profile = (val & 0xC0) >> 6 64 | self.live = val & 0x40 != 0 65 | self.update = val & 0x20 != 0 66 | self.timescale = buf.readint32() 67 | self.current_media_time = buf.readint64() 68 | self.smpte_time_code_offset = buf.readint64() 69 | self.movie_id = buf.read_cstring()[0] 70 | self.server_entry_count = buf.readbyte() 71 | self.server_entries = [] 72 | for _ in range(self.server_entry_count): 73 | self.server_entries.append(buf.read_cstring()[0]) 74 | self.quality_entry_count = buf.readbyte() 75 | self.quality_entries = [] 76 | for _ in range(self.quality_entry_count): 77 | self.quality_entries.append(buf.read_cstring()[0]) 78 | self.drmdata = buf.read_cstring()[0] 79 | self.metadata = buf.read_cstring()[0] 80 | self.segment_run_table_entry_count = buf.readbyte() 81 | self.segment_run_table_entries = [] 82 | for _ in range(self.segment_run_table_entry_count): 83 | self.segment_run_table_entries.append(AdobeSegmentRunTable(buf)) 84 | self.fragment_run_table_entry_count = buf.readbyte() 85 | self.fragment_run_table_entries = [] 86 | for _ in range(self.fragment_run_table_entry_count): 87 | self.fragment_run_table_entries.append(AdobeFragmentRunTable(buf)) 88 | 89 | def generate_fields(self): 90 | yield from super().generate_fields() 91 | yield ("Profile", self.profile) 92 | yield ("Live", self.live) 93 | yield ("Update", self.update) 94 | yield ("Timescale", self.timescale) 95 | yield ("Current media time", self.current_media_time) 96 | yield ("SMPTE time code", self.smpte_time_code_offset) 97 | yield ("Movie ID", self.movie_id if len(self.movie_id) else "") 98 | yield ("Server entry count", self.server_entry_count) 99 | for s in self.server_entries: 100 | yield ("Server", s if len(s) else "") 101 | yield ("Quality entry count", self.quality_entry_count) 102 | for q in self.quality_entries: 103 | yield ("Quality", q if len(q) else "") 104 | yield ("DRM data", self.drmdata if len(self.drmdata) else "") 105 | yield ("Metadata", self.metadata if len(self.metadata) else "") 106 | yield ("Segment run table entry count", self.segment_run_table_entry_count) 107 | yield from self.segment_run_table_entries 108 | yield ("Fragment run table entry count", self.fragment_run_table_entry_count) 109 | yield from self.fragment_run_table_entries 110 | 111 | 112 | class AdobeSegmentRunTable(box.FullBox): 113 | """asrt""" 114 | 115 | def parse(self, parse_ctx): 116 | super().parse(parse_ctx) 117 | buf = parse_ctx.buf 118 | self.quality_entry_count = buf.readbyte() 119 | self.quality_url_modifiers = [] 120 | for _ in range(self.quality_entry_count): 121 | self.quality_url_modifiers.append(buf.read_cstring()[0]) 122 | self.segment_entry_count = buf.readint32() 123 | self.segment_entries = [] 124 | for _ in range(self.segment_entry_count): 125 | first_segment = buf.readint32() 126 | fragments_per_segment = buf.readint32() 127 | self.segment_entries.append((first_segment, fragments_per_segment)) 128 | 129 | def generate_fields(self): 130 | yield from super().generate_fields() 131 | yield ("Quality entry count", self.quality_entry_count) 132 | for q in self.quality_url_modifiers: 133 | yield ("Quality url modifier", q if len(q) else "") 134 | yield ("Segment entry count", self.segment_entry_count) 135 | for idx, e in enumerate(self.segment_entries): 136 | yield ( 137 | f"Entry {idx+1}", 138 | f"First segment={e[0]}, Fragments per segment={e[1]}", 139 | ) 140 | 141 | 142 | class AdobeFragmentRunTable(box.FullBox): 143 | """afrt""" 144 | 145 | def parse(self, parse_ctx): 146 | super().parse(parse_ctx) 147 | buf = parse_ctx.buf 148 | self.timescale = buf.readint32() 149 | self.quality_entry_count = buf.readbyte() 150 | self.quality_url_modifiers = [] 151 | for _ in range(self.quality_entry_count): 152 | self.quality_url_modifiers.append(buf.read_cstring()[0]) 153 | self.fragment_entry_count = buf.readint32() 154 | self.fragment_entries = [] 155 | for _ in range(self.fragment_entry_count): 156 | first_fragment = buf.readint32() 157 | first_fragment_timestamp = buf.readint64() 158 | fragment_duration = buf.readint32() 159 | discontinuity_idicator = 0 160 | if fragment_duration == 0: 161 | discontinuity_idicator = buf.readbyte() 162 | self.fragment_entries.append( 163 | ( 164 | first_fragment, 165 | first_fragment_timestamp, 166 | fragment_duration, 167 | discontinuity_idicator, 168 | ) 169 | ) 170 | 171 | def generate_fields(self): 172 | yield from super().generate_fields() 173 | yield ("Timescale", self.timescale) 174 | yield ("Quality entry count", self.quality_entry_count) 175 | for q in self.quality_url_modifiers: 176 | yield ("Quality url modifier", q if len(q) else "") 177 | yield ("Fragment entry count", self.fragment_entry_count) 178 | for i, e in enumerate(self.fragment_entries): 179 | yield ( 180 | f"Entry {i+1}", 181 | f"first fragment={e[0]}, first fragment timestamp={e[1]}, " 182 | f"fragment duration={e[2]}, discontinuity={e[3]}", 183 | ) 184 | 185 | 186 | boxmap = { 187 | "afra": AdobeFragmentRandomAccess, 188 | "abst": AdobeBootstrap, 189 | "asrt": AdobeSegmentRunTable, 190 | "afrt": AdobeFragmentRunTable, 191 | } 192 | -------------------------------------------------------------------------------- /src/mp4viewer/isobmff/fragment.py: -------------------------------------------------------------------------------- 1 | """ movie fragment related boxes """ 2 | 3 | # pylint: disable=too-many-instance-attributes 4 | 5 | from . import box 6 | 7 | 8 | class MovieFragmentHeader(box.FullBox): 9 | """mfhd""" 10 | 11 | def parse(self, parse_ctx): 12 | buf = parse_ctx.buf 13 | super().parse(parse_ctx) 14 | self.sequence_number = buf.readint32() 15 | 16 | def generate_fields(self): 17 | yield from super().generate_fields() 18 | yield ("Sequence number", self.sequence_number) 19 | 20 | 21 | class TrackFragmentHeader(box.FullBox): 22 | """tfhd""" 23 | 24 | def parse(self, parse_ctx): 25 | buf = parse_ctx.buf 26 | super().parse(parse_ctx) 27 | self.track_id = buf.readint32() 28 | if self.flags & 0x000001: 29 | self.base_data_offset = buf.readint64() 30 | if self.flags & 0x000002: 31 | self.sample_description_index = buf.readint32() 32 | if self.flags & 0x000008: 33 | self.default_sample_duration = buf.readint32() 34 | if self.flags & 0x000010: 35 | self.default_sample_size = buf.readint32() 36 | if self.flags & 0x000020: 37 | self.default_sample_flags = buf.readint32() 38 | self.duration_is_empty = self.flags & 0x010000 != 0 39 | self.default_base_is_moof = self.flags & 0x020000 != 0 40 | 41 | def generate_fields(self): 42 | yield from super().generate_fields() 43 | yield ("Track id", self.track_id) 44 | if self.flags & 0x000001: 45 | yield ("Base data offset", self.base_data_offset) 46 | if self.flags & 0x000002: 47 | yield ("Sample description index", self.sample_description_index) 48 | if self.flags & 0x000008: 49 | yield ("Default sample duration", self.default_sample_duration) 50 | if self.flags & 0x000010: 51 | yield ("Default sample size", self.default_sample_size) 52 | if self.flags & 0x000020: 53 | yield ("Default sample flags", f"{self.default_sample_flags:08x}") 54 | yield ("Duration is empty", self.duration_is_empty) 55 | yield ("Default base is moof", self.default_base_is_moof) 56 | 57 | 58 | class TrackFragmentRun(box.FullBox): 59 | """trun""" 60 | 61 | def parse(self, parse_ctx): 62 | buf = parse_ctx.buf 63 | super().parse(parse_ctx) 64 | self.sample_count = buf.readint32() 65 | if self.flags & 0x000001: 66 | self.data_offset = buf.readint32() 67 | if self.flags & 0x000004: 68 | self.first_sample_flags = buf.readint32() 69 | self.samples = [] 70 | for _ in range(self.sample_count): 71 | dur = 0 72 | size = 0 73 | flags = 0 74 | off = 0 75 | if self.flags & 0x000100: 76 | dur = buf.readint32() 77 | if self.flags & 0x000200: 78 | size = buf.readint32() 79 | if self.flags & 0x000400: 80 | flags = buf.readint32() 81 | if self.flags & 0x000800: 82 | if self.version == 0: 83 | off = buf.readint32() 84 | else: 85 | # signed, so do the two's complement 86 | off = buf.readint32() 87 | if off & 0x80000000: 88 | off = -1 * ((off ^ 0xFFFFFFFF) + 1) 89 | self.samples.append((dur, size, flags, off)) 90 | 91 | def generate_fields(self): 92 | yield from super().generate_fields() 93 | yield ("Sample count", self.sample_count) 94 | if self.flags & 0x000001: 95 | yield ("Data offset", self.data_offset) 96 | if self.flags & 0x000004: 97 | yield ("First sample flags", f"{self.first_sample_flags:08x}") 98 | i = 0 99 | for s in self.samples: 100 | i += 1 101 | vals = [] 102 | if self.flags & 0x000100: 103 | vals.append(f"duration={s[0]}") 104 | if self.flags & 0x000200: 105 | vals.append(f"size={s[1]}") 106 | if self.flags & 0x000400: 107 | vals.append(f"flags=0x{s[2]:08x}") 108 | if self.flags & 0x000800: 109 | vals.append(f"compositional time offset={s[3]}") 110 | yield (f" Sample {i}", ", ".join(vals)) 111 | 112 | 113 | class SampleAuxInfoSizes(box.FullBox): 114 | """saiz""" 115 | 116 | def parse(self, parse_ctx): 117 | buf = parse_ctx.buf 118 | super().parse(parse_ctx) 119 | if self.flags & 1: 120 | self.aux_info_type = buf.readint32() 121 | self.aux_info_type_parameter = buf.readint32() 122 | self.default_sample_info_size = buf.readbyte() 123 | self.sample_count = buf.readint32() 124 | self.samples = [] 125 | if self.default_sample_info_size == 0: 126 | for _ in range(self.sample_count): 127 | self.samples.append(buf.readbyte()) 128 | 129 | def generate_fields(self): 130 | yield from super().generate_fields() 131 | if self.flags & 1: 132 | yield ("Aux info type", self.aux_info_type) 133 | yield ("Aux info type parameter", self.aux_info_type_parameter) 134 | if self.default_sample_info_size: 135 | yield ("Default sample info size", self.default_sample_info_size) 136 | else: 137 | for sample in self.samples: 138 | yield (" Sample info size", sample) 139 | 140 | 141 | class SampleAuxInfoOffsets(box.FullBox): 142 | """saio""" 143 | 144 | def parse(self, parse_ctx): 145 | buf = parse_ctx.buf 146 | super().parse(parse_ctx) 147 | if self.flags & 1: 148 | self.aux_info_type = buf.readint32() 149 | self.aux_info_type_parameter = buf.readint32() 150 | self.entry_count = buf.readint32() 151 | self.offsets = [] 152 | if self.version == 0: 153 | for _ in range(self.entry_count): 154 | self.offsets.append(buf.readint32()) 155 | else: 156 | for _ in range(self.entry_count): 157 | self.offsets.append(buf.readint64()) 158 | 159 | def generate_fields(self): 160 | yield from super().generate_fields() 161 | if self.flags & 1: 162 | yield ("Aux info type", self.aux_info_type) 163 | yield ("Aux info type parameter", self.aux_info_type_parameter) 164 | yield ("Entry Count", self.entry_count) 165 | for offset in self.offsets: 166 | yield (" Offset", offset) 167 | 168 | 169 | class TrackFragmentDecodeTime(box.FullBox): 170 | """tfdt""" 171 | 172 | def parse(self, parse_ctx): 173 | buf = parse_ctx.buf 174 | super().parse(parse_ctx) 175 | if self.version == 1: 176 | self.decode_time = buf.readint64() 177 | else: 178 | self.decode_time = buf.readint32() 179 | 180 | def generate_fields(self): 181 | yield from super().generate_fields() 182 | yield ("Base media decode time", self.decode_time) 183 | 184 | 185 | class SegmentType(box.FileType): 186 | """ 187 | Box type: styp 188 | The definition of the segment type box is same as the file type box 189 | """ 190 | 191 | 192 | class SegmentIndexBox(box.FullBox): 193 | """sidx""" 194 | 195 | def parse(self, parse_ctx): 196 | buf = parse_ctx.buf 197 | super().parse(parse_ctx) 198 | self.reference_id = buf.readint32() 199 | self.timescale = buf.readint32() 200 | if self.version == 0: 201 | self.earliest_presentation_time = buf.readint32() 202 | self.first_offset = buf.readint32() 203 | else: 204 | self.earliest_presentation_time = buf.readint64() 205 | self.first_offset = buf.readint64() 206 | buf.skipbytes(2) 207 | self.references = [] 208 | self.reference_count = buf.readint16() 209 | for _ in range(self.reference_count): 210 | val = buf.readint32() 211 | ref_type = (val & 0x80000000) >> 31 212 | ref_size = val & 0x7FFFFFFF 213 | ref_duration = buf.readint32() 214 | val = buf.readint32() 215 | starts_with_sap = (val & 0x80000000) != 0 216 | sap_type = (val & 0x70000000) >> 28 217 | sap_delta_time = val & 0x0FFFFFFF 218 | self.references.append( 219 | ( 220 | ref_type, 221 | ref_size, 222 | ref_duration, 223 | starts_with_sap, 224 | sap_type, 225 | sap_delta_time, 226 | ) 227 | ) 228 | 229 | def generate_fields(self): 230 | # pylint: disable=consider-using-f-string 231 | yield from super().generate_fields() 232 | yield ("Reference ID", self.reference_id) 233 | yield ("Timescale", self.timescale) 234 | yield ("Earliest presentation time", self.earliest_presentation_time) 235 | yield ("First offset", self.first_offset) 236 | yield ("Reference count", self.reference_count) 237 | i = 0 238 | for ref in self.references: 239 | i += 1 240 | yield ( 241 | f" Reference {i}", 242 | f"type={ref[0]}, size={ref[1]}, duration={ref[2]}, " 243 | f"starts with SAP={ref[3]}, SAP type={ref[4]}, SAP delta time={ref[5]}", 244 | ) 245 | 246 | 247 | boxmap = { 248 | # 'mfra' : MovieFragmentRandomAccessBox 249 | "mfhd": MovieFragmentHeader, 250 | "tfhd": TrackFragmentHeader, 251 | "trun": TrackFragmentRun, 252 | "saiz": SampleAuxInfoSizes, 253 | "saio": SampleAuxInfoOffsets, 254 | "tfdt": TrackFragmentDecodeTime, 255 | "styp": SegmentType, 256 | "sidx": SegmentIndexBox, 257 | # 'ssix' : SubsegmentIndexBox, 258 | } 259 | -------------------------------------------------------------------------------- /src/mp4viewer/isobmff/movie.py: -------------------------------------------------------------------------------- 1 | """ Movie and track related boxes """ 2 | 3 | # pylint: disable=too-many-instance-attributes 4 | from mp4viewer.tree import Tree, TreeType 5 | from . import box 6 | from . import descriptors 7 | from .utils import get_utc_from_seconds_since_1904 8 | from .utils import parse_iso639_2_15bit 9 | from .utils import stringify_duration 10 | from .utils import error_print 11 | 12 | 13 | class MovieHeader(box.FullBox): 14 | """mvhd""" 15 | 16 | def parse(self, parse_ctx): 17 | buf = parse_ctx.buf 18 | super().parse(parse_ctx) 19 | if self.version == 1: 20 | self.creation_time = buf.readint64() 21 | self.modification_time = buf.readint64() 22 | self.timescale = buf.readint32() 23 | self.duration = buf.readint64() 24 | else: 25 | self.creation_time = buf.readint32() 26 | self.modification_time = buf.readint32() 27 | self.timescale = buf.readint32() 28 | self.duration = buf.readint32() 29 | self.rate = buf.readint32() 30 | self.volume = buf.readint16() 31 | buf.skipbytes(2 + 8) 32 | self.matrix = [[buf.readint32() for j in range(3)] for i in range(3)] 33 | buf.skipbytes(24) 34 | self.next_track_id = buf.readint32() 35 | 36 | def generate_fields(self): 37 | yield from super().generate_fields() 38 | yield ( 39 | "creation time", 40 | self.creation_time, 41 | get_utc_from_seconds_since_1904(self.creation_time).ctime(), 42 | ) 43 | yield ( 44 | "modification time", 45 | self.creation_time, 46 | get_utc_from_seconds_since_1904(self.modification_time).ctime(), 47 | ) 48 | if self.timescale > 1000 and self.timescale % 1000 == 0: 49 | yield ("timescale", self.timescale, f"{self.timescale//1000}ms") 50 | else: 51 | yield ("timescale", self.timescale) 52 | yield ( 53 | "duration", 54 | self.duration, 55 | stringify_duration(self.duration / self.timescale), 56 | ) 57 | yield ("rate", f"0x{self.rate:08X}") 58 | yield ("volume", f"0x{self.volume:04X}") 59 | yield ("matrix", self.matrix) 60 | yield ("next track id", self.next_track_id) 61 | 62 | 63 | class TrackHeader(box.FullBox): 64 | """tkhd""" 65 | 66 | def parse(self, parse_ctx): 67 | buf = parse_ctx.buf 68 | super().parse(parse_ctx) 69 | if self.version == 1: 70 | self.creation_time = buf.readint64() 71 | self.modification_time = buf.readint64() 72 | self.track_id = buf.readint32() 73 | buf.skipbytes(4) 74 | self.duration = buf.readint64() 75 | else: 76 | self.creation_time = buf.readint32() 77 | self.modification_time = buf.readint32() 78 | self.track_id = buf.readint32() 79 | buf.skipbytes(4) 80 | self.duration = buf.readint32() 81 | buf.skipbytes(8) 82 | self.layer = buf.readint16() 83 | self.altgroup = buf.readint16() 84 | self.volume = buf.readint16() 85 | buf.skipbytes(2) 86 | self.matrix = [[buf.readint32() for j in range(3)] for i in range(3)] 87 | self.width = buf.readint32() 88 | self.height = buf.readint32() 89 | 90 | def generate_fields(self): 91 | yield from super().generate_fields() 92 | flags = { 93 | "Track_enabled": self.flags & 1, 94 | "Track_in_movie": (self.flags & 2) >> 1, 95 | "Track_in_preview": (self.flags & 4) >> 2, 96 | "Track_size_is_aspect_ratio": (self.flags & 8) >> 3, 97 | } 98 | yield ("flag values", flags) 99 | yield ( 100 | "creation time", 101 | self.creation_time, 102 | get_utc_from_seconds_since_1904(self.creation_time).ctime(), 103 | ) 104 | yield ( 105 | "modification time", 106 | self.modification_time, 107 | get_utc_from_seconds_since_1904(self.modification_time).ctime(), 108 | ) 109 | yield ("track id", self.track_id) 110 | mvhd = self.find_descendant_of_ancestor("moov", "mvhd") 111 | if mvhd is None: 112 | error_print("Failed to find movie header to decode track duration") 113 | yield ("duration", self.duration) 114 | else: 115 | yield ( 116 | "duration", 117 | self.duration, 118 | stringify_duration(self.duration / mvhd.timescale), 119 | ) 120 | yield ("layer", f"0x{self.layer:04X}") 121 | yield ("alternate group", f"0x{self.altgroup:04X}") 122 | yield ("volume", f"0x{self.volume:04X}") 123 | yield ("matrix", self.matrix) 124 | yield ("width", self.width) 125 | yield ("height", self.height) 126 | 127 | 128 | class EditList(box.FullBox): 129 | """elst""" 130 | 131 | def parse(self, parse_ctx): 132 | super().parse(parse_ctx) 133 | buf = parse_ctx.buf 134 | entry_count = buf.readint32() 135 | self.entries = [] 136 | for _ in range(entry_count): 137 | if self.version == 0: 138 | segment_duration = buf.readint32() 139 | media_time = buf.readint32() 140 | else: 141 | segment_duration = buf.readint64() 142 | media_time = buf.readint64() 143 | media_rate_integer = buf.readint16() 144 | media_rate_fraction = buf.readint16() 145 | self.entries.append( 146 | { 147 | "segment_duration": segment_duration, 148 | "media_time": media_time, 149 | "media_rate_integer": media_rate_integer, 150 | "media_rate_fraction": media_rate_fraction, 151 | } 152 | ) 153 | 154 | def generate_fields(self): 155 | yield from super().generate_fields() 156 | yield ("entry_count", len(self.entries)) 157 | mvhd = self.find_descendant_of_ancestor("moov", "mvhd") 158 | entries = [] 159 | for entry in self.entries: 160 | dup = entry.copy() 161 | duration = entry["segment_duration"] 162 | if mvhd is None: 163 | str_duration = None 164 | else: 165 | str_duration = stringify_duration(duration / mvhd.timescale) 166 | dup["segment_duration"] = Tree( 167 | TreeType.ATTR, "segment_duration", duration, str_duration 168 | ) 169 | entries.append(dup) 170 | yield ("entries", entries) 171 | 172 | 173 | class ColourInformation(box.Box): 174 | """colr""" 175 | 176 | def parse(self, parse_ctx): 177 | super().parse(parse_ctx) 178 | buf = parse_ctx.buf 179 | self.c_type_str = buf.peekstr(4) 180 | self.colour_type = buf.readint32() 181 | if self.c_type_str == "nclx": 182 | self.colour_primaries = buf.readint16() 183 | self.transfer_characteristics = buf.readint16() 184 | self.matrix_coefficients = buf.readint16() 185 | self.fullrange_flag = buf.peekbits(1) 186 | buf.skipbytes(1) 187 | else: 188 | self._skip_remaining_bytes(buf) 189 | 190 | def generate_fields(self): 191 | yield from super().generate_fields() 192 | yield ("colour type", self.colour_type, self.c_type_str) 193 | if self.c_type_str == "nclx": 194 | yield ("colour primaries", self.colour_primaries) 195 | yield ("transfer characteristics", self.transfer_characteristics) 196 | yield ("matrix coefficients", self.matrix_coefficients) 197 | yield ("full range flag", self.fullrange_flag) 198 | else: 199 | yield (self.c_type_str, "ICC_profile (see ISO 15076-1)") 200 | 201 | 202 | class MediaHeader(box.FullBox): 203 | """mdhd""" 204 | 205 | def parse(self, parse_ctx): 206 | buf = parse_ctx.buf 207 | super().parse(parse_ctx) 208 | if self.version == 1: 209 | self.creation_time = buf.readint64() 210 | self.modification_time = buf.readint64() 211 | self.timescale = buf.readint32() 212 | self.duration = buf.readint64() 213 | else: 214 | self.creation_time = buf.readint32() 215 | self.modification_time = buf.readint32() 216 | self.timescale = buf.readint32() 217 | self.duration = buf.readint32() 218 | self.language = buf.readint16() & 0x7FFF 219 | buf.skipbytes(2) 220 | 221 | def generate_fields(self): 222 | yield from super().generate_fields() 223 | yield ( 224 | "creation time", 225 | self.creation_time, 226 | get_utc_from_seconds_since_1904(self.creation_time).ctime(), 227 | ) 228 | yield ( 229 | "modification time", 230 | self.modification_time, 231 | get_utc_from_seconds_since_1904(self.modification_time).ctime(), 232 | ) 233 | yield ("timescale", self.timescale) 234 | yield ( 235 | "duration", 236 | self.duration, 237 | stringify_duration(self.duration / self.timescale), 238 | ) 239 | yield ("language", self.language, parse_iso639_2_15bit(self.language)) 240 | 241 | 242 | class VideoMediaHeader(box.FullBox): 243 | """vmhd""" 244 | 245 | def parse(self, parse_ctx): 246 | buf = parse_ctx.buf 247 | super().parse(parse_ctx) 248 | self.graphicsmode = buf.readint16() 249 | self.opcolor = [] 250 | for _ in range(0, 3): 251 | self.opcolor.append(buf.readint16()) 252 | 253 | def generate_fields(self): 254 | yield from super().generate_fields() 255 | yield ("graphics mode", self.graphicsmode) 256 | yield ("opcolor", self.opcolor) 257 | 258 | 259 | class SoundMediaHeader(box.FullBox): 260 | """smhd""" 261 | 262 | def parse(self, parse_ctx): 263 | buf = parse_ctx.buf 264 | super().parse(parse_ctx) 265 | self.balance = buf.readint16() 266 | buf.skipbytes(2) 267 | 268 | def generate_fields(self): 269 | yield from super().generate_fields() 270 | yield ("balance", self.balance) 271 | 272 | 273 | class HintMediaHeader(box.FullBox): 274 | """hmhd""" 275 | 276 | def parse(self, parse_ctx): 277 | buf = parse_ctx.buf 278 | super().parse(parse_ctx) 279 | self.max_pdu_size = buf.readint16() 280 | self.avg_pdu_size = buf.readint16() 281 | self.max_bitrate = buf.readint16() 282 | self.avg_bitrate = buf.readint16() 283 | 284 | def generate_fields(self): 285 | yield from super().generate_fields() 286 | yield ("Max PDU size", self.max_pdu_size) 287 | yield ("Average PDU size", self.avg_pdu_size) 288 | yield ("Max bitrate", self.max_bitrate) 289 | yield ("Average bitrate", self.avg_bitrate) 290 | 291 | 292 | class HandlerBox(box.FullBox): 293 | """hdlr""" 294 | 295 | def parse(self, parse_ctx): 296 | buf = parse_ctx.buf 297 | super().parse(parse_ctx) 298 | buf.skipbytes(4) 299 | self.handler = buf.readstr(4) 300 | buf.skipbytes(12) 301 | self.consumed_bytes += 20 302 | self.name = buf.read_cstring(self.size - self.consumed_bytes)[0] 303 | 304 | def generate_fields(self): 305 | yield from super().generate_fields() 306 | yield ("handler", self.handler) 307 | yield ("name", self.name if len(self.name) else "") 308 | 309 | 310 | class SampleEntry(box.Box): 311 | """base type for various sample entry classes""" 312 | 313 | def parse(self, parse_ctx): 314 | buf = parse_ctx.buf 315 | super().parse(parse_ctx) 316 | buf.skipbytes(6) 317 | self.data_ref_index = buf.readint16() 318 | self.consumed_bytes += 8 319 | 320 | def generate_fields(self): 321 | yield from super().generate_fields() 322 | yield ("data reference index", self.data_ref_index) 323 | 324 | 325 | class HintSampleEntry(SampleEntry): 326 | """???? (inside sample description when handler=hint)""" 327 | 328 | def parse(self, parse_ctx): 329 | buf = parse_ctx.buf 330 | buf.skipbytes(self.size - self.consumed_bytes) 331 | 332 | 333 | class VisualSampleEntry(SampleEntry): 334 | """possibly avc1 (inside sample description when handler=vide)""" 335 | 336 | def parse(self, parse_ctx): 337 | buf = parse_ctx.buf 338 | super().parse(parse_ctx) 339 | buf.skipbytes(2 + 2 + 3 * 4) 340 | self.width = buf.readint16() 341 | self.height = buf.readint16() 342 | self.hori_resolution = buf.readint32() 343 | self.vert_resolution = buf.readint32() 344 | buf.skipbytes(4) 345 | self.frame_count = buf.readint16() 346 | compressor_name_length = buf.readbyte() 347 | self.compressor_name = buf.readstr(compressor_name_length) if compressor_name_length else "" 348 | buf.skipbytes(32 - compressor_name_length - 1) 349 | self.depth = buf.readint16() 350 | buf.skipbytes(2) 351 | self.has_children = True 352 | 353 | def generate_fields(self): 354 | yield from super().generate_fields() 355 | yield ("width", self.width) 356 | yield ("height", self.height) 357 | yield ("horizontal resolution", f"0x{self.hori_resolution:08X}") 358 | yield ("vertical resolution", f"0x{self.vert_resolution:08X}") 359 | yield ("frame count", self.frame_count) 360 | yield ("compressor name", self.compressor_name) 361 | yield ("depth", self.depth) 362 | 363 | 364 | class AudioSampleEntry(SampleEntry): 365 | """ 366 | boxtype depends on the audio coding. Usually mp4a for mp4 audio. 367 | This box would be signalled within the sample description box when handler=soun. 368 | """ 369 | 370 | def parse(self, parse_ctx): 371 | buf = parse_ctx.buf 372 | super().parse(parse_ctx) 373 | # 14496-12 says first eight bits are reserved. 374 | # Apple QuickTime format (MOV) uses those bytes for version, revision and vendor 375 | # The size of this box in QT varies according to the version, so we need the version 376 | self.quicktime_version = buf.readint16() 377 | buf.skipbytes(6) 378 | self.channel_count = buf.readint16() 379 | self.sample_size = buf.readint16() 380 | buf.skipbytes(4) 381 | self.sample_rate = buf.readint32() 382 | if self.quicktime_version == 1: 383 | self.samples_per_pkt = buf.readint32() 384 | self.bytes_per_pkt = buf.readint32() 385 | self.bytes_per_frame = buf.readint32() 386 | self.bytes_per_sample = buf.readint32() 387 | elif self.quicktime_version == 2: 388 | buf.skipbytes(36) 389 | self.has_children = True 390 | 391 | def generate_fields(self): 392 | yield from super().generate_fields() 393 | yield ("channel count", self.channel_count) 394 | yield ("sample size", self.sample_size) 395 | yield ( 396 | "sample rate", 397 | self.sample_rate, 398 | f"{self.sample_rate >> 16}, {self.sample_rate & 0xFFFF}", 399 | ) 400 | 401 | 402 | class EsdsBox(box.FullBox): 403 | """esds box that encapsulates ES_descriptor defined in 14496-1""" 404 | 405 | def parse(self, parse_ctx): 406 | super().parse(parse_ctx) 407 | self.esd = descriptors.EsDescriptor(parse_ctx.buf) 408 | 409 | def generate_fields(self): 410 | yield from super().generate_fields() 411 | yield ("ES descriptor", self.esd.serialise()) 412 | 413 | 414 | class MP4AudioSampleEntry(AudioSampleEntry): 415 | """mp4a""" 416 | 417 | def parse(self, parse_ctx): 418 | super().parse(parse_ctx) 419 | self.children.append(EsdsBox(parse_ctx)) 420 | 421 | 422 | class SampleDescription(box.FullBox): 423 | """stsd""" 424 | 425 | def parse(self, parse_ctx): 426 | buf = parse_ctx.buf 427 | super().parse(parse_ctx) 428 | media = self.find_ancestor("mdia") 429 | hdlr = media.find_child("hdlr") if media else None 430 | handler = hdlr.handler if hdlr else None 431 | self.entry_count = buf.readint32() 432 | for _ in range(self.entry_count): 433 | if handler == "soun": 434 | if buf.peekstr(4, 4) == "mp4a": 435 | self.children.append(MP4AudioSampleEntry(parse_ctx)) 436 | else: 437 | self.children.append(AudioSampleEntry(parse_ctx)) 438 | elif handler == "vide": 439 | self.children.append(VisualSampleEntry(parse_ctx)) 440 | elif handler == "hint": 441 | self.children.append(HintSampleEntry(parse_ctx)) 442 | else: 443 | entry = box.Box(parse_ctx) 444 | self.children.append(entry) 445 | buf.skipbytes(entry.size - entry.consumed_bytes) 446 | if len(self.children) != 0: 447 | self.has_children = True 448 | 449 | def generate_fields(self): 450 | yield from super().generate_fields() 451 | yield ("entry count", self.entry_count) 452 | 453 | 454 | class DataEntryUrnBox(box.FullBox): 455 | """'urn '""" 456 | 457 | def parse(self, parse_ctx): 458 | buf = parse_ctx.buf 459 | super().parse(parse_ctx) 460 | self.name = buf.read_cstring()[0] 461 | self.location = buf.read_cstring()[0] 462 | 463 | def generate_fields(self): 464 | yield from super().generate_fields() 465 | yield ("name", self.name) 466 | yield ("location", self.location) 467 | 468 | 469 | class DataEntryUrlBox(box.FullBox): 470 | """'url '""" 471 | 472 | def parse(self, parse_ctx): 473 | buf = parse_ctx.buf 474 | super().parse(parse_ctx) 475 | self.location = buf.read_cstring(self.size - self.consumed_bytes)[0] 476 | 477 | def generate_fields(self): 478 | yield from super().generate_fields() 479 | yield ("location", self.location) 480 | 481 | 482 | class DataReferenceBox(box.FullBox): 483 | """dref""" 484 | 485 | def parse(self, parse_ctx): 486 | buf = parse_ctx.buf 487 | super().parse(parse_ctx) 488 | self.entry_count = buf.readint32() 489 | self.has_children = True 490 | for _ in range(self.entry_count): 491 | self.children.append(parse_ctx.getnextbox(self)) 492 | 493 | def generate_fields(self): 494 | yield from super().generate_fields() 495 | yield ("entry count", self.entry_count) 496 | 497 | 498 | class TimeToSampleBox(box.FullBox): 499 | """stts""" 500 | 501 | def parse(self, parse_ctx): 502 | buf = parse_ctx.buf 503 | super().parse(parse_ctx) 504 | self.entry_count = buf.readint32() 505 | self.entries = [] 506 | for _ in range(self.entry_count): 507 | count = buf.readint32() 508 | delta = buf.readint32() 509 | self.entries.append((count, delta)) 510 | 511 | def generate_fields(self): 512 | yield from super().generate_fields() 513 | yield ("entry count", self.entry_count) 514 | for entry in self.entries: 515 | yield ("sample count", entry[0]) 516 | yield ("sample delta", entry[1]) 517 | 518 | 519 | class SampleToChunkBox(box.FullBox): 520 | """stsc""" 521 | 522 | def parse(self, parse_ctx): 523 | buf = parse_ctx.buf 524 | super().parse(parse_ctx) 525 | self.entry_count = buf.readint32() 526 | self.entries = [] 527 | for _ in range(self.entry_count): 528 | first = buf.readint32() 529 | samples_per_chunk = buf.readint32() 530 | sdix = buf.readint32() 531 | self.entries.append( 532 | {"first": first, "samples_per_chunk": samples_per_chunk, "sdix": sdix} 533 | ) 534 | 535 | def generate_fields(self): 536 | yield from super().generate_fields() 537 | yield ("entry count", self.entry_count) 538 | yield ("entries", self.entries) 539 | 540 | 541 | class ChunkOffsetBox(box.FullBox): 542 | """stco""" 543 | 544 | def parse(self, parse_ctx): 545 | buf = parse_ctx.buf 546 | super().parse(parse_ctx) 547 | self.entry_count = buf.readint32() 548 | self.entries = [buf.readint32() for i in range(self.entry_count)] 549 | 550 | def generate_fields(self): 551 | yield from super().generate_fields() 552 | yield ("entry count", self.entry_count) 553 | yield ("chunk offsets", self.entries) 554 | 555 | 556 | class SyncSampleBox(box.FullBox): 557 | """stss""" 558 | 559 | def parse(self, parse_ctx): 560 | buf = parse_ctx.buf 561 | super().parse(parse_ctx) 562 | self.entry_count = buf.readint32() 563 | self.entries = [buf.readint32() for i in range(self.entry_count)] 564 | 565 | def generate_fields(self): 566 | yield from super().generate_fields() 567 | yield ("entry count", self.entry_count) 568 | yield ("sample numbers", self.entries) 569 | 570 | 571 | class SampleSizeBox(box.FullBox): 572 | """stsz""" 573 | 574 | def parse(self, parse_ctx): 575 | buf = parse_ctx.buf 576 | super().parse(parse_ctx) 577 | self.sample_size = buf.readint32() 578 | self.sample_count = buf.readint32() 579 | if self.sample_size == 0: 580 | self.entries = [buf.readint32() for i in range(self.sample_count)] 581 | else: 582 | self.entries = [] 583 | 584 | def generate_fields(self): 585 | yield from super().generate_fields() 586 | yield ("sample size", self.sample_size) 587 | yield ("sample count", self.sample_count) 588 | if self.sample_size == 0: 589 | yield ("sample sizes", self.entries) 590 | 591 | 592 | class CompactSampleSizeBox(box.FullBox): 593 | """stz2""" 594 | 595 | def parse(self, parse_ctx): 596 | buf = parse_ctx.buf 597 | super().parse(parse_ctx) 598 | buf.skipbytes(3) 599 | self.field_size = buf.readbyte() 600 | self.sample_count = buf.readint32() 601 | self.entries = [buf.readbits(self.field_size) for i in range(self.sample_count)] 602 | # skip padding bits 603 | if self.field_size == 4 and self.sample_count % 2 != 0: 604 | buf.readbits(4) 605 | 606 | def generate_fields(self): 607 | yield from super().generate_fields() 608 | yield ("field size", self.field_size) 609 | yield ("sample count", self.sample_count) 610 | yield ("entries", self.entries) 611 | 612 | 613 | class MovieExtendsHeader(box.FullBox): 614 | """mehd""" 615 | 616 | def parse(self, parse_ctx): 617 | buf = parse_ctx.buf 618 | super().parse(parse_ctx) 619 | if self.version == 1: 620 | self.fragment_duration = buf.readint64() 621 | else: 622 | self.fragment_duration = buf.readint32() 623 | 624 | def generate_fields(self): 625 | yield from super().generate_fields() 626 | yield ("Fragment duration", self.fragment_duration) 627 | 628 | 629 | class TrackExtendsBox(box.FullBox): 630 | """trex""" 631 | 632 | def parse(self, parse_ctx): 633 | buf = parse_ctx.buf 634 | super().parse(parse_ctx) 635 | self.track_id = buf.readint32() 636 | self.default_sample_description_index = buf.readint32() 637 | self.default_sample_duration = buf.readint32() 638 | self.default_sample_size = buf.readint32() 639 | self.default_sample_flags = buf.readint32() 640 | 641 | def generate_fields(self): 642 | yield from super().generate_fields() 643 | yield ("Track ID", self.track_id) 644 | yield ( 645 | "Default sample description index", 646 | self.default_sample_description_index, 647 | ) 648 | yield ("Default sample duration", self.default_sample_duration) 649 | yield ("Default sample size", self.default_sample_size) 650 | yield ("Default sample flags", self.default_sample_flags) 651 | 652 | 653 | class AvcCBox(box.Box): 654 | """avcC""" 655 | 656 | def parse(self, parse_ctx): 657 | buf = parse_ctx.buf 658 | super().parse(parse_ctx) 659 | self.configuration_level = buf.readbyte() 660 | self.profile = buf.readbyte() 661 | self.profile_compatibility = buf.readbyte() 662 | self.level = buf.readbyte() 663 | buf.readbits(6) 664 | self.len_minus_1 = buf.readbits(2) 665 | buf.readbits(3) 666 | 667 | self.sps = [] 668 | num_of_sps = buf.readbits(5) 669 | for _ in range(num_of_sps): 670 | sps_len = buf.readint16() 671 | self.sps.append(buf.readbytes(sps_len)) 672 | 673 | self.pps = [] 674 | num_of_pps = buf.readbyte() 675 | for _ in range(num_of_pps): 676 | pps_len = buf.readint16() 677 | self.pps.append(buf.readbytes(pps_len)) 678 | 679 | if self.remaining_bytes() >= 4: 680 | buf.readbits(6) 681 | self.chroma_format = buf.readbits(2) 682 | buf.readbits(5) 683 | self.bit_depth_luma_minus_8 = buf.readbits(3) 684 | buf.readbits(5) 685 | self.bit_depth_chroma_minus_8 = buf.readbits(3) 686 | self.sps_ext_len = buf.readbyte() 687 | buf.skipbytes(self.sps_ext_len) 688 | else: 689 | self.chroma_format = -1 690 | 691 | self.has_children = False 692 | 693 | def generate_fields(self): 694 | yield from super().generate_fields() 695 | yield ("Confiuration level", self.configuration_level) 696 | yield ("Profile", self.profile) 697 | yield ("Profile compatibility", self.profile_compatibility) 698 | yield ("Level", self.level) 699 | yield ("Length size minus 1", self.len_minus_1) 700 | yield ("number of sps", len(self.sps)) 701 | for sps in self.sps: 702 | yield ("SPS", sps) 703 | yield ("number of pps", len(self.pps)) 704 | for pps in self.pps: 705 | yield ("PPS", pps) 706 | if self.chroma_format != -1: 707 | yield ("chroma format", self.chroma_format) 708 | yield ("bit depth luma minus 8", self.bit_depth_luma_minus_8) 709 | yield ("bit depth chroma minus 8", self.bit_depth_chroma_minus_8) 710 | yield ("sps ext byte count", self.sps_ext_len) 711 | 712 | 713 | class CompositionOffsetBox(box.FullBox): 714 | """ctts""" 715 | 716 | def parse(self, parse_ctx): 717 | super().parse(parse_ctx) 718 | buf = parse_ctx.buf 719 | self.entry_count = buf.readint32() 720 | self.entries = [] 721 | for _ in range(self.entry_count): 722 | entry = {} 723 | entry["sample_count"] = buf.readint32() 724 | # pylint: disable=fixme 725 | # TODO: handle signed based on self.version 726 | entry["sample_offset"] = buf.readint32() 727 | self.entries.append(entry) 728 | 729 | def generate_fields(self): 730 | yield from super().generate_fields() 731 | yield ("entry count", self.entry_count) 732 | yield ("entries", self.entries) 733 | 734 | 735 | class SampleDependencyTypeBox(box.FullBox): 736 | """sdtp""" 737 | 738 | def parse(self, parse_ctx): 739 | super().parse(parse_ctx) 740 | buf = parse_ctx.buf 741 | sample_count = self._get_sample_count() 742 | self.samples = [] 743 | for _ in range(sample_count): 744 | entry = {} 745 | entry["is_leading"] = buf.readbits(2) 746 | entry["sample_depends_on"] = buf.readbits(2) 747 | entry["sample_is_depended_on"] = buf.readbits(2) 748 | entry["sample_has_redundancy"] = buf.readbits(2) 749 | self.samples.append(entry) 750 | 751 | def generate_fields(self): 752 | yield from super().generate_fields() 753 | yield ("samples", self.samples) 754 | 755 | def _get_sample_count(self): 756 | """ 757 | sample_count is signalled in SampleSizeBox or CompactSampleSizeBox. 758 | Try to get it from one of those boxes, starting with the sample size box. 759 | If neither are present, we can estimate it from the remaining bytes; 760 | length of a single entry is one byte. 761 | """ 762 | stsz = self.find_descendant_of_ancestor("stbl", "stsz") 763 | if stsz is not None: 764 | return stsz.sample_count 765 | # try stz2 766 | stz2 = self.find_descendant_of_ancestor("stbl", "stz2") 767 | if stz2 is not None: 768 | return stz2.sample_count 769 | error_print( 770 | f"Box {self} without a matching stsz or stz2. " 771 | f"sample_count estimated as {self.remaining_bytes()}" 772 | ) 773 | return self.remaining_bytes() 774 | 775 | 776 | boxmap = { 777 | "mvhd": MovieHeader, 778 | "tkhd": TrackHeader, 779 | "elst": EditList, 780 | "colr": ColourInformation, 781 | "mdhd": MediaHeader, 782 | "ctts": CompositionOffsetBox, 783 | "sdtp": SampleDependencyTypeBox, 784 | "vmhd": VideoMediaHeader, 785 | "smhd": SoundMediaHeader, 786 | "hmhd": HintMediaHeader, 787 | "hdlr": HandlerBox, 788 | "stsd": SampleDescription, 789 | "dref": DataReferenceBox, 790 | "stts": TimeToSampleBox, 791 | "stsc": SampleToChunkBox, 792 | "stco": ChunkOffsetBox, 793 | "stss": SyncSampleBox, 794 | "stsz": SampleSizeBox, 795 | "stz2": CompactSampleSizeBox, 796 | "url ": DataEntryUrlBox, 797 | "urn ": DataEntryUrnBox, 798 | "mehd": MovieExtendsHeader, 799 | "trex": TrackExtendsBox, 800 | "avcC": AvcCBox, 801 | } 802 | -------------------------------------------------------------------------------- /src/mp4viewer/isobmff/parser.py: -------------------------------------------------------------------------------- 1 | """ isobmff parser public interface """ 2 | 3 | import traceback 4 | 5 | from mp4viewer.datasource import DataBuffer 6 | from . import box, movie, fragment, flv, cenc 7 | from .utils import error_print 8 | 9 | 10 | class IsobmffParser: 11 | """Parser class""" 12 | 13 | container_boxes = [ 14 | "moov", 15 | "trak", 16 | "edts", 17 | "mdia", 18 | "minf", 19 | "dinf", 20 | "stbl", 21 | "mvex", 22 | "moof", 23 | "traf", 24 | "mfra", 25 | "skip", 26 | "meta", 27 | "ipro", 28 | "sinf", 29 | "schi", 30 | ] 31 | 32 | def __init__(self, buf: DataBuffer, debug=False): 33 | boxmap = { 34 | "ftyp": box.FileType, 35 | } 36 | boxmap.update(movie.boxmap) 37 | boxmap.update(fragment.boxmap) 38 | boxmap.update(flv.boxmap) 39 | boxmap.update(cenc.boxmap) 40 | self.boxmap = boxmap 41 | self.buf = buf 42 | self.debug = debug 43 | 44 | def getboxlist(self): 45 | """returns a list of all boxes in the input stream""" 46 | boxes = [] 47 | try: 48 | while self.buf.hasmore(): 49 | next_box = self.getnextbox(None) 50 | boxes.append(next_box) 51 | except (AssertionError, TypeError): 52 | error_print(traceback.format_exc()) 53 | return boxes 54 | 55 | def getnextbox(self, parent: box.Box): 56 | """returns the next box in the stream""" 57 | fourcc = self.buf.peekstr(4, 4) 58 | if fourcc in self.boxmap: 59 | next_box = self.boxmap[fourcc](self, parent) 60 | else: 61 | is_container = fourcc in self.container_boxes 62 | next_box = box.Box(self, parent, is_container) 63 | return next_box 64 | 65 | def dump_remaining_fourccs(self): 66 | """ 67 | Scan through the bytestream and print potential box types and their sizes. 68 | Hopefully, this can be used for debugging our parser errors. 69 | This is a work in progress. 70 | """ 71 | if not self.debug: 72 | error_print("Detected potential parse error; run with --debug to see more info") 73 | return 74 | print( 75 | "\nBuffer error detected; scanning through the file looking for boxes." 76 | "This will take time as we need to go through every byte.\n" 77 | ) 78 | buf = self.buf 79 | known_boxtypes = set(list(self.boxmap) + list(box_names)) 80 | while buf.remaining_bytes() >= 4: 81 | fourcc = buf.peekint(4) 82 | if (fourcc & 0x80) | (fourcc & 0x8000) | (fourcc & 0x800000) | (fourcc & 0x80000000): 83 | buf.skipbytes(1) 84 | continue 85 | fourcc = buf.peekstr(4) 86 | if fourcc in known_boxtypes: 87 | buf.seekto(buf.current_position() - 4) 88 | sz = buf.readint32() 89 | remaining_bytes = buf.remaining_bytes() - 4 90 | if sz <= remaining_bytes: 91 | print(f"Possible box {fourcc} at {buf.current_position()} of size {sz}") 92 | else: 93 | delta = buf.current_position() + sz - len(buf.source) 94 | print( 95 | f"boxtype {fourcc} at {buf.current_position()} of size {sz} but " 96 | f"overflows by {delta}" 97 | ) 98 | buf.skipbytes(4) 99 | else: 100 | buf.skipbytes(1) 101 | 102 | 103 | # fourcc -> human readable description map 104 | box_names = { 105 | # iso bmff box types 106 | "ftyp": "File type", 107 | "moov": "Movie container", 108 | "moof": "Movie fragment", 109 | "mfra": "Movie fragment random access", 110 | "mfhd": "Movie fragment header", 111 | "traf": "Track fragment", 112 | "tfhd": "Track fragment header", 113 | "trun": "Track fragment run", 114 | "saiz": "Sample auxiliary information sizes", 115 | "saio": "Sample auxiliary information offsets", 116 | "tfdt": "Track fragment decode time", 117 | "trak": "Track container", 118 | "mdia": "Media container", 119 | "minf": "Media information box", 120 | "dinf": "Data information box", 121 | "vmhd": "Video media header", 122 | "smhd": "Sound media header", 123 | "hmhd": "hint media header", 124 | "mvhd": "Movie header", 125 | "tkhd": "Track header", 126 | "mdhd": "Media header", 127 | "stbl": "Sample table", 128 | "hdlr": "Handler box", 129 | "stsd": "Sample description", 130 | "mp4a": "MP4 audio sample entry", 131 | "esds": "Elementary stream descriptor", 132 | "dref": "Data reference box", 133 | "url ": "Data entry URL box", 134 | "stts": "Time-to-sample box", 135 | "stsc": "Sample-to-chunk box", 136 | "stco": "Chunk offset box", 137 | "stss": "Sync sample box", 138 | "stsz": "Sample size box", 139 | "stz2": "Compact sample size box", 140 | "mvex": "Movie extends box", 141 | "mehd": "Movie extends header box", 142 | "trex": "Track extends defaults", 143 | "udta": "User data", 144 | "skip": "Skip", 145 | "free": "Free", 146 | "mdat": "Media data container", 147 | "styp": "Segment type", 148 | "sidx": "Segment index", 149 | "ssix": "Subsegment index", 150 | "sbgp": "Sample to group box", 151 | "sgpd": "Sample group description box", 152 | "elst": "Edit list", 153 | "colr": "Colour information", 154 | "ctts": "Composition offset", 155 | # common encryption boxes 156 | "tenc": "Track encryption box", 157 | "senc": "Sample encryption box", 158 | "pssh": "Protection system specific header box", 159 | "schm": "Scheme type box", 160 | "schi": "Scheme information box", 161 | "sinf": "Protection scheme information box", 162 | "frma": "Original format box", 163 | # flv specific boxes 164 | "afra": "Adobe fragment random access box", 165 | "abst": "Adobe bootstrap info box", 166 | "asrt": "Adobe segment run table box", 167 | "afrt": "Adobe fragment run table box", 168 | } 169 | 170 | 171 | def getboxdesc(name): 172 | """get box description for the given fourcc""" 173 | return box_names[name] if name in box_names else name.upper() 174 | -------------------------------------------------------------------------------- /src/mp4viewer/isobmff/utils.py: -------------------------------------------------------------------------------- 1 | """ helper functions """ 2 | 3 | import sys 4 | from datetime import datetime, timedelta 5 | 6 | 7 | def error_print(s: str): 8 | """ 9 | Print to stderr. 10 | Use this for all errors so that we can easily redirect json output to a file from command line. 11 | """ 12 | color_red = "\033[31m" 13 | endcol = "\033[0m" 14 | print(f"{color_red}{s}{endcol}", file=sys.stderr) 15 | 16 | 17 | def parse_iso639_2_15bit(value): 18 | """ 19 | The iso-639-2 three letter language code is encoded as three 5 bit values 20 | in the range 1 to 26 for 'a' to 'z'. 21 | """ 22 | s = chr((value >> 10 & 0x1F) + ord("a") - 1) 23 | s += chr((value >> 5 & 0x1F) + ord("a") - 1) 24 | s += chr((value & 0x1F) + ord("a") - 1) 25 | return s 26 | 27 | 28 | def get_utc_from_seconds_since_1904(seconds): 29 | """Time in various boxes are represented as seconds since 1904""" 30 | return datetime(1904, 1, 1) + timedelta(days=seconds / 86400, seconds=seconds % 86400) 31 | 32 | 33 | def stringify_duration(total_seconds): 34 | """seconds to xxh xxm xxs""" 35 | value = int(total_seconds) 36 | hours = value // 3600 37 | value %= 3600 38 | minutes = value // 60 39 | value %= 60 40 | parts = [] 41 | if hours > 0: 42 | parts.append(f"{hours}h") 43 | parts.append(f"{minutes:02d}m") 44 | parts.append(f"{value:02d}s") 45 | return " ".join(parts) 46 | -------------------------------------------------------------------------------- /src/mp4viewer/json_renderer.py: -------------------------------------------------------------------------------- 1 | """ json renderer """ 2 | 3 | import os 4 | import json 5 | 6 | 7 | class JsonRenderer: 8 | """json renderer""" 9 | 10 | def __init__(self, mp4_path, output_path): 11 | self.mp4_path = mp4_path 12 | if output_path is not None: 13 | self.output_path = output_path 14 | else: 15 | mp4_base_name = os.path.basename(mp4_path) 16 | self.output_path = f"./{mp4_base_name}.mp4viewer.json" 17 | 18 | def _write(self, output_object): 19 | print(self.output_path) 20 | with open(self.output_path, "w+", encoding="utf-8") as fd: 21 | fd.write(json.dumps(output_object, indent=2)) 22 | 23 | def render(self, data): 24 | """generate a json object from the mp4 metadata""" 25 | root = {"file": self.mp4_path} 26 | for child in data.children: 27 | self.add_node(child, root) 28 | self._write(root) 29 | 30 | def _add_dict_node(self, node, parent): 31 | dict_wrapper = {} 32 | for item in node.children: 33 | if item.is_dict(): 34 | self._add_dict_node(item, dict_wrapper) 35 | else: 36 | dict_wrapper[item.name] = self._get_attr(item) 37 | 38 | if node.name not in parent: 39 | # first entry; may be the only one, so no need for a list 40 | parent[node.name] = dict_wrapper 41 | else: 42 | # has multiple entries by this name; change it to a list 43 | if isinstance(parent[node.name], dict): 44 | # second entry 45 | temp = parent[node.name] 46 | list_in_parent = [temp] 47 | parent[node.name] = list_in_parent 48 | else: 49 | # third and subsequent entries 50 | list_in_parent = parent[node.name] 51 | list_in_parent.append(dict_wrapper) 52 | 53 | def add_node(self, node, parent): 54 | """recursively serialise box data""" 55 | j_node = {} 56 | if node.is_atom(): 57 | j_node["boxtype"] = {"fourcc": node.name, "description": node.value} 58 | key_within_parent = "children" 59 | else: 60 | key_within_parent = node.name 61 | if key_within_parent not in parent: 62 | parent[key_within_parent] = [] 63 | parent[key_within_parent].append(j_node) 64 | for child in node.children: 65 | if child.is_atom(): 66 | self.add_node(child, j_node) 67 | elif child.is_dict(): 68 | self._add_dict_node(child, j_node) 69 | elif child.is_list(): 70 | for item in child.children: 71 | self.add_node(item, j_node) 72 | else: 73 | # attr 74 | j_node[child.name] = self._get_attr(child) 75 | return j_node 76 | 77 | def _get_attr(self, attr): 78 | if attr.display_value is not None: 79 | return {"raw value": attr.value, "decoded": attr.display_value} 80 | return attr.value 81 | -------------------------------------------------------------------------------- /src/mp4viewer/tree.py: -------------------------------------------------------------------------------- 1 | """ Defines the tree model used to represent the boxes """ 2 | 3 | from enum import Enum 4 | 5 | 6 | class TreeType(Enum): 7 | """ 8 | Types of trees. 9 | ATOM represents an iso 14996 atom. 10 | ATTR stands for a single attribute within an atom 11 | DICT represents a subobject within ATOM 12 | LIST is a list of DICT subobjects 13 | """ 14 | 15 | ATOM = 1 16 | ATTR = 2 17 | DICT = 3 18 | LIST = 4 19 | 20 | 21 | class Tree: 22 | """Class representing a Tree""" 23 | 24 | def __init__(self, tree_type, name: str, value=None, display_value=None): 25 | self.type = tree_type 26 | self.name = name 27 | self.value = value 28 | self.display_value = display_value 29 | self.children = [] 30 | 31 | def is_atom(self): 32 | """Return true if this node represents an ISO box""" 33 | return self.type == TreeType.ATOM 34 | 35 | def is_dict(self): 36 | """Return true if this node represents a sub object within an iso box""" 37 | return self.type == TreeType.DICT 38 | 39 | def is_attr(self): 40 | """Return true if this node represents a single value within an iso box""" 41 | return self.type == TreeType.ATTR 42 | 43 | def is_list(self): 44 | """Return true if this node represents a list of sub objects within an iso box""" 45 | return self.type == TreeType.LIST 46 | 47 | def add_attr(self, *args): 48 | """ 49 | Add an attribute to the root node of this tree 50 | Possible arguments are: 51 | - Either a single tree object of type ATTR, or 52 | - A key: str, value, and an optional display value, which will be used to construct 53 | a Tree(ATTR) 54 | """ 55 | # First arg can be Attr or the name. If it is name, give value and optional converted value 56 | if len(args) == 0: 57 | raise TypeError("Add what?") 58 | if len(args) == 1 and not isinstance(args[0], Tree): 59 | raise TypeError(f"Sole argument should be a Tree, received {type(args[0])}") 60 | if len(args) > 1 and not isinstance(args[0], str): 61 | raise TypeError(f"First parameter shall be a string, received {args[0]}(type{args[0]})") 62 | 63 | if len(args) == 1: 64 | child = args[0] 65 | else: 66 | child = Tree(TreeType.ATTR, args[0], args[1], args[2] if len(args) > 2 else None) 67 | self.children.append(child) 68 | return child 69 | 70 | def add_sub_object(self, kv_object): 71 | """Add key value pairs from a dictionary object to the tree""" 72 | for key, value in kv_object.items(): 73 | if isinstance(value, Tree): 74 | self.children.append(value) 75 | elif isinstance(value, dict): 76 | kv_node = self.add_attr(Tree(TreeType.DICT, key)) 77 | kv_node.add_sub_object(value) 78 | else: 79 | self.add_attr(key, value) 80 | 81 | def add_list_of_sub_objects(self, key: str, object_list: list): 82 | """Add a list of dict objects as a subtree""" 83 | wrapper = Tree(TreeType.LIST, key) 84 | self.children.append(wrapper) 85 | for index, item in enumerate(object_list): 86 | kv_node = wrapper.add_child(Tree(TreeType.DICT, key, str(index + 1))) 87 | kv_node.add_sub_object(item) 88 | 89 | def add_child(self, child): 90 | """Add a child node to this tree""" 91 | if not isinstance(child, Tree): 92 | raise TypeError(f"add_child received {type(child)}") 93 | self.children.append(child) 94 | return child 95 | 96 | def number_of_child_boxes(self): 97 | """Number of trees with type=ATOM""" 98 | return len([x for x in self.children if isinstance(x, Tree) and x.is_atom()]) 99 | 100 | def __str__(self): 101 | return f"Tree<{self.name}>" 102 | -------------------------------------------------------------------------------- /tests/1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amarghosh/mp4viewer/291ba86d93e9bb62f362df01004fb0e28422a2f0/tests/1.dat -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Test input files were generated using xxd and xxd -r""" 2 | -------------------------------------------------------------------------------- /tests/ftyp.atom: -------------------------------------------------------------------------------- 1 | ftypmp42mp42avc1 -------------------------------------------------------------------------------- /tests/moov.atom: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amarghosh/mp4viewer/291ba86d93e9bb62f362df01004fb0e28422a2f0/tests/moov.atom -------------------------------------------------------------------------------- /tests/test_box_parsing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Test box parsing""" 3 | 4 | from functools import reduce 5 | 6 | from mp4viewer.datasource import DataBuffer, FileSource 7 | from mp4viewer.isobmff.parser import IsobmffParser 8 | 9 | 10 | def _string_to_fourcc_int(s): 11 | return reduce(lambda a, b: (a << 8) + b, [ord(x) for x in s], 0) 12 | 13 | 14 | def test_ftyp(): 15 | """ftyp box""" 16 | with open("tests/ftyp.atom", "rb") as fd: 17 | parser = IsobmffParser(DataBuffer(FileSource(fd))) 18 | boxes = parser.getboxlist() 19 | assert len(boxes) == 1 20 | ftyp_box = boxes[0] 21 | assert ftyp_box.boxtype == "ftyp" 22 | assert ftyp_box.size == 24 23 | assert ftyp_box.major_brand == "mp42" 24 | assert ftyp_box.minor_version == 1 25 | assert len(ftyp_box.brands) == 2 26 | assert ftyp_box.brands[0] == "mp42" 27 | assert ftyp_box.brands[1] == "avc1" 28 | assert parser.buf.remaining_bytes() == 0 29 | assert len(list(ftyp_box.generate_fields())) > 0 30 | 31 | 32 | def _validate_matrix_values(matrix): 33 | # All 3x3 matrixes are set to the following 34 | # 0x10000 0 0 35 | # 0 0x10000 0 36 | # 0 0 0x40000000 37 | for i in range(3): 38 | for j in range(3): 39 | if i == j and i < 2: 40 | assert matrix[i][j] == 0x10000 41 | elif i == 2 and j == 2: 42 | assert matrix[i][j] == 0x40000000 43 | else: 44 | assert matrix[i][j] == 0, f"{i},{j}={matrix[i][j]}" 45 | 46 | 47 | def _validate_movie_header_box(mvhd): 48 | assert mvhd.boxtype == "mvhd" 49 | assert mvhd.size == 108 50 | assert mvhd.creation_time == 3531256179 51 | assert mvhd.modification_time == 3531256179 52 | assert mvhd.timescale == 1000 53 | assert mvhd.duration == 5096 54 | assert mvhd.rate == 0x10000 55 | assert mvhd.volume == 0x100 56 | assert mvhd.next_track_id == 0 57 | _validate_matrix_values(mvhd.matrix) 58 | assert len(list(mvhd.generate_fields())) > 0 59 | 60 | 61 | def _validate_trak_1(trak): 62 | assert trak.boxtype == "trak" 63 | assert trak.size == 0x88 64 | assert len(trak.children) == 2 65 | _validate_tkhd_1(trak.children[0]) 66 | _validate_edts(trak.children[1]) 67 | 68 | 69 | def _validate_edts(edts): 70 | assert edts.boxtype == "edts" 71 | assert edts.size == 0x24 72 | assert len(edts.children) == 1 73 | elst = edts.children[0] 74 | assert len(elst.entries) == 1 75 | entry = elst.entries[0] 76 | assert entry["segment_duration"] == 37026990 77 | assert entry["media_time"] == 3003 78 | assert entry["media_rate_integer"] == 1 79 | assert entry["media_rate_fraction"] == 0 80 | 81 | 82 | def _validate_tkhd_1(tkhd): 83 | assert tkhd.boxtype == "tkhd" 84 | assert tkhd.size == 0x5C 85 | assert tkhd.flags == 7 86 | assert tkhd.creation_time == 3531256179 87 | assert tkhd.modification_time == 3531256179 88 | assert tkhd.track_id == 1 89 | assert tkhd.duration == 5096 90 | assert tkhd.layer == 0xBB 91 | assert tkhd.altgroup == 0xAA00 92 | assert tkhd.volume == 0x0010 93 | _validate_matrix_values(tkhd.matrix) 94 | assert tkhd.width == 0x05A00000 95 | assert tkhd.height == 0x05A00000 96 | 97 | 98 | def test_moov(): 99 | """moov and its children""" 100 | with open("tests/moov.atom", "rb") as fd: 101 | parser = IsobmffParser(DataBuffer(FileSource(fd))) 102 | boxes = parser.getboxlist() 103 | assert len(boxes) == 1 104 | moov = boxes[0] 105 | assert moov.boxtype == "moov" 106 | assert moov.size == 0xFC 107 | assert len(moov.children) == 2, moov.children 108 | assert len(list(moov.generate_fields())) > 0 109 | mvhd = moov.children[0] 110 | _validate_movie_header_box(mvhd) 111 | trak = moov.children[1] 112 | _validate_trak_1(trak) 113 | 114 | 115 | if __name__ == "__main__": 116 | test_ftyp() 117 | test_moov() 118 | -------------------------------------------------------------------------------- /tests/test_datasource.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Use xxd to edit binary files. It lets you convert between binary and hexdump formats 4 | 5 | # 1.dat 6 | 00000000: a5a5 5a5a a5a5 a5a5 a5a5 a5a5 a5a5 a5a5 ..ZZ............ 7 | 00000010: a5a5 a5a5 a5a5 a5a5 a5a5 a5a5 a5a5 a5a5 ................ 8 | 00000020: a5a5 a5a5 ff6d 7034 7669 6577 6572 00a5 .....mp4viewer.. 9 | 00000030: a5a5 a5a5 a5a5 a5a5 a5a5 a5a5 a5a5 a5a5 ................ 10 | 11 | """ 12 | # pylint: disable=too-many-statements 13 | 14 | 15 | from mp4viewer.datasource import DataBuffer, FileSource 16 | 17 | 18 | class DataBufferTest: 19 | """Test DataBuffer and FileSource classes""" 20 | 21 | def __init__(self, file): 22 | self.buf = DataBuffer(FileSource(file)) 23 | 24 | def run(self): 25 | """check various read functions""" 26 | buf = self.buf 27 | assert len(buf) == 64, f"Length is {len(buf)}" 28 | 29 | # First word is a5a5, second word is 5a5a, everything else is a5 until 36 30 | actual = buf.readint32() 31 | assert actual == 0xA5A55A5A, actual 32 | assert buf.current_position() == 4 33 | 34 | buf.reset() 35 | assert buf.current_position() == 0 36 | assert buf.readint16() == 0xA5A5 37 | # 5A is Z 38 | assert buf.readstr(2) == "ZZ" 39 | 40 | actual = buf.readint64() 41 | assert buf.current_position() == 12 42 | assert actual == 0xA5A5A5A5A5A5A5A5, actual 43 | 44 | # Test read bits 45 | # multiples of 8 46 | self.checkreadbits(32, 0xA5A5A5A5) 47 | self.checkreadbits(16, 0xA5A5) 48 | self.checkreadbits(8, 0xA5) 49 | assert buf.current_position() == 19 50 | 51 | # byte as two nibbles 52 | self.checkreadbits(4, 0xA) 53 | self.checkreadbits(4, 0x5) 54 | assert buf.current_position() == 20 55 | 56 | # read bits across bytes 57 | self.checkreadbits(4, 0xA) 58 | self.checkreadbits(8, 0x5A) 59 | self.checkreadbits(24, 0x5A5A5A) 60 | self.checkreadbits(28, 0x5A5A5A5) 61 | 62 | # smaller number of bits 63 | self.checkreadbits(3, 5) 64 | self.checkreadbits(3, 1) 65 | self.checkreadbits(2, 1) 66 | 67 | # 3 + 12 + 1 68 | self.checkreadbits(3, 5) 69 | self.checkreadbits(12, 0x2D2) 70 | self.checkreadbits(1, 1) 71 | 72 | # validate bit_position 73 | self.checkreadbits(5, 0x14) 74 | assert buf.bit_position == 5 75 | self.checkreadbits(30, 0x2D2D2D2D) 76 | assert buf.bit_position == 3 77 | buf.readbits(5) 78 | 79 | assert buf.bit_position == 0 80 | assert buf.current_position() == 36 81 | 82 | # next byte shall be 0xff 83 | assert buf.readbyte() == 0xFF 84 | assert buf.remaining_bytes() == 27, str(buf) 85 | 86 | # strings 87 | assert buf.readstr(3) == "mp4" 88 | buf.seekto(buf.current_position() - 3) 89 | # null terminated string 90 | cstr = buf.read_cstring() 91 | assert cstr[0] == "mp4viewer" 92 | assert cstr[1] == 10 93 | assert buf.current_position() == 47, buf.current_position() 94 | 95 | # cstring with max-length 96 | buf.seekto(buf.current_position() - 10) 97 | assert buf.read_cstring(3) == ("mp4", 3) 98 | 99 | # skip viewer\0 and the last remaining 0xa5 100 | buf.skipbytes(8) 101 | assert buf.remaining_bytes() == 16 102 | assert buf.current_position() == 48 103 | 104 | # readbytes 105 | a5bytes = buf.readbytes(8) 106 | for x in a5bytes: 107 | assert x == 0xA5 108 | 109 | buf.skipbytes(8) 110 | assert buf.remaining_bytes() == 0 111 | assert buf.current_position() == 64 112 | 113 | def checkreadbits(self, count, value): 114 | """read `count` bits and check the value""" 115 | actual = self.buf.readbits(count) 116 | assert actual == value, f"Expected 0x{value:X}, got 0x{actual:X}" 117 | 118 | 119 | def test_datasource(): 120 | """Test datasource""" 121 | with open("tests/1.dat", "rb") as f: 122 | dbt = DataBufferTest(f) 123 | dbt.run() 124 | print("Success") 125 | 126 | 127 | if __name__ == "__main__": 128 | test_datasource() 129 | --------------------------------------------------------------------------------