├── test ├── __init__.py └── test_dm4.py ├── dm4 ├── __init__.py ├── __main__.py ├── headers.py ├── helpers.py └── dm4file.py ├── pyproject.toml ├── .gitignore ├── LICENSE └── README.rst /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dm4/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | A Digital Micrograph 4 (DM4) file reader. 3 | 4 | 1.0.1 Initial release 5 | 1.0.2 Switched to using Optional typing hint instead of Union for Python <= 3.8 compatibility 6 | 1.0.3 DM4TagDir is now imported with dm4 module to simplify typing. 7 | Invoking the dm4 module as a script now prints the tag directory tree of a passed DM4 file. 8 | Removed dependency on the six module 9 | """ 10 | 11 | __version__ = "1.0.3" 12 | 13 | from dm4.headers import DM4DataType, DM4DirHeader, DM4Header, DM4TagHeader, DM4Config, DM4TagDir, format_config 14 | from dm4.dm4file import DM4File 15 | from dm4.helpers import print_tag_directory_tree, print_tag_data 16 | -------------------------------------------------------------------------------- /dm4/__main__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Sep 12, 2013 3 | 4 | @author: u0490822 5 | """ 6 | import sys 7 | from dm4.dm4file import DM4File 8 | from dm4.helpers import print_tag_directory_tree, print_tag_data 9 | 10 | 11 | def main(): 12 | if len(sys.argv) < 2 or len(sys.argv) > 2: 13 | print("Usage: python -m dm4 ") 14 | print() 15 | print("Invoking dm4 as a module prints the tag directory tree of a Digital Micrograph 4 (DM4) file.") 16 | sys.exit(1) 17 | 18 | dm4_input_fullpath = sys.argv[1] 19 | 20 | with DM4File.open(dm4_input_fullpath) as dm4file: 21 | tags = dm4file.read_directory() 22 | print_tag_directory_tree(dm4file, tags) 23 | 24 | 25 | if __name__ == '__main__': 26 | main() 27 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["flit_core >=3.4,<4"] 3 | build-backend = "flit_core.buildapi" 4 | 5 | [project] 6 | name = "dm4" 7 | authors = [{ name = "James Anderson", email = "james.r.anderson@utah.edu" }] 8 | readme = "README.rst" 9 | license = { file = "LICENSE" } 10 | classifiers = [ 11 | "Programming Language :: Python :: 3", 12 | "License :: OSI Approved :: MIT License", 13 | "Operating System :: OS Independent", 14 | "Development Status :: 5 - Production/Stable", 15 | "Intended Audience :: Developers", 16 | "Intended Audience :: Science/Research", 17 | "Topic :: Software Development :: Libraries :: Python Modules", 18 | "Topic :: Scientific/Engineering :: Bio-Informatics" 19 | ] 20 | dynamic = ["version", "description"] 21 | 22 | [project.optional-dependencies] 23 | test = [ 24 | "numpy", 25 | ] 26 | 27 | [project.urls] 28 | Home = "https://github.com/nornir/dm4" 29 | Issues = "https://github.com/nornir/dm4/issues" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | /.project 59 | /.pydevproject 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 James Anderson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /dm4/headers.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from typing import NamedTuple, Optional 3 | 4 | 5 | class DM4Header(NamedTuple): 6 | version: int 7 | root_length: int 8 | little_endian: bool 9 | 10 | 11 | class DM4TagHeader(NamedTuple): 12 | type: int 13 | name: str 14 | byte_length: int 15 | array_length: int 16 | data_type_code: int 17 | header_offset: int 18 | data_offset: int 19 | 20 | 21 | class DM4DirHeader(NamedTuple): 22 | type: int 23 | name: Optional[str] 24 | byte_length: int 25 | sorted: bool 26 | closed: bool 27 | num_tags: int 28 | data_offset: int 29 | 30 | 31 | class DM4Tag(NamedTuple): 32 | name: str 33 | data_type_code: int 34 | data: object 35 | 36 | 37 | class DM4DataType(NamedTuple): 38 | num_bytes: int 39 | signed: bool 40 | type_format: str 41 | 42 | 43 | class DM4Config(NamedTuple): 44 | """ 45 | Configuration for reading a DM4 file, these are unlikely to change 46 | """ 47 | 48 | data_type_dict: dict[int, DM4DataType] 49 | header_size: int 50 | root_tag_dir_header_size: int 51 | 52 | 53 | class DM4TagDir(NamedTuple): 54 | """Description of a directory in a DM4 file""" 55 | name: str 56 | dm4_tag: DM4DirHeader 57 | named_subdirs: dict[str, DM4TagDir] 58 | unnamed_subdirs: list[DM4TagDir] 59 | named_tags: dict[str, DM4TagHeader] 60 | unnamed_tags: list[DM4TagHeader] 61 | 62 | 63 | format_config = DM4Config( 64 | { 65 | 2: DM4DataType(2, True, "h"), # 2byte signed integer 66 | 3: DM4DataType(4, True, "i"), # 4byte signed integer 67 | 4: DM4DataType(2, False, "H"), # 2byte unsigned integer 68 | 5: DM4DataType(4, False, "I"), # 4byte unsigned integer 69 | 6: DM4DataType(4, False, "f"), # 4byte float 70 | 7: DM4DataType(8, False, "d"), # 8byte float 71 | 8: DM4DataType(1, False, "?"), 72 | 9: DM4DataType(1, False, "c"), 73 | 10: DM4DataType(1, True, "b"), 74 | 11: DM4DataType(8, True, "q"), 75 | 12: DM4DataType(8, True, "Q"), 76 | }, 77 | header_size=4 + 8 + 4, 78 | root_tag_dir_header_size=1 + 1 + 8, 79 | ) 80 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ### 2 | dm4 3 | ### 4 | 5 | A pure python file reader for Digital Micrograph's DM4 file format 6 | 7 | This package would not have been possible without the documentation provided by Dr Chris Boothroyd at http://www.er-c.org/cbb/info/dmformat/ Thank you. 8 | 9 | ############ 10 | Installation 11 | ############ 12 | 13 | Install using pip from the command line:: 14 | 15 | pip install dm4 16 | 17 | ####### 18 | Example 19 | ####### 20 | 21 | Below is a short example of reading the image data from a dm4 file. A more complete example can be found in the tests.:: 22 | 23 | import dm4 24 | import PIL 25 | 26 | input_path = "your_filename_here.dm4" 27 | 28 | with dm4.DM4File.open(input_path) as dm4file: 29 | tags = dm4file.read_directory() 30 | 31 | image_data_tag = tags.named_subdirs['ImageList'].unnamed_subdirs[1].named_subdirs['ImageData'] 32 | image_tag = image_data_tag.named_tags['Data'] 33 | 34 | XDim = dm4file.read_tag_data(image_data_tag.named_subdirs['Dimensions'].unnamed_tags[0]) 35 | YDim = dm4file.read_tag_data(image_data_tag.named_subdirs['Dimensions'].unnamed_tags[1]) 36 | 37 | image_array = np.array(dm4file.read_tag_data(image_tag), dtype=np.uint16) 38 | image_array = np.reshape(image_array, (YDim, XDim)) 39 | 40 | output_fullpath = "sample.tif" 41 | image = PIL.Image.fromarray(image_array, 'I;16') 42 | image.save(output_fullpath) 43 | 44 | ############ 45 | Script usage 46 | ############ 47 | 48 | The dm4 module can be invoked as a script to print a dm4 file's full directory to the command line. This is helpful when determining the exact structure of a specific DM4 file: :: 49 | 50 | python -m dm4 your_dm4_file.dm4 51 | 52 | ################ 53 | Helper Functions 54 | ################ 55 | 56 | Print all of the tags and directories contianed in a dm4 file to the console:: 57 | 58 | dm4.print_tag_directory_tree(dmfile: DM4File, dir_obj: DM4TagDir, indent_level: int = 0) 59 | 60 | Print data associated with a specific tag to the console, if it is printable:: 61 | 62 | dm4.print_tag_data(dmfile: DM4File, tag: Union[DM4TagHeader, DM4DirHeader], indent_level: int): 63 | 64 | 65 | #### 66 | Todo 67 | #### 68 | 69 | Reading arrays of groups has not been implemented. 70 | -------------------------------------------------------------------------------- /dm4/helpers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions for working with dm4 files 3 | """ 4 | 5 | import sys 6 | from typing import Union 7 | import array 8 | from dm4.dm4file import DM4File 9 | from dm4.headers import DM4TagDir, DM4TagHeader, DM4DirHeader 10 | 11 | 12 | def _is_python3(): 13 | return sys.version_info[0] == 3 14 | 15 | 16 | def try_convert_unsigned_short_to_unicode(data: array.array, count_limit: int = 2048): 17 | """Attempt to convert arrays of 16-bit integers of less than specified length to a unicode string.""" 18 | 19 | if not isinstance(data, array.array): 20 | return data 21 | 22 | if data.typecode == 'H' and len(data) < count_limit: 23 | try: 24 | if _is_python3(): 25 | data = data.tobytes().decode('utf-16') 26 | else: 27 | data = data.tostring().decode('utf-16') 28 | except UnicodeDecodeError as e: 29 | pass 30 | except UnicodeEncodeError as e: 31 | pass 32 | 33 | return data 34 | 35 | 36 | def print_tag_data(dmfile: DM4File, tag: Union[DM4TagHeader, DM4DirHeader], indent_level: int): 37 | """Print data associated with a dm4 tag""" 38 | 39 | if tag.byte_length > 2048: 40 | print(indent_level * '\t' + '%s\t' % (tag.name) + "Array length %d too long to read" % (tag.array_length)) 41 | return 42 | 43 | try: 44 | data = dmfile.read_tag_data(tag) 45 | except NotImplementedError as e: 46 | print(indent_level * '\t' + '***' + str(e) + '***') 47 | return 48 | 49 | data = try_convert_unsigned_short_to_unicode(data) 50 | 51 | if _is_python3(): 52 | print(indent_level * '\t' + '%s\t%s' % (tag.name, str(data))) 53 | else: 54 | if isinstance(data, array.array) and data.typecode == 'H': # Unconverted unicode or image data 55 | print(indent_level * '\t' + '%s\t%s' % (tag.name, "Unconverted array of unsigned 16-bit integers")) 56 | elif isinstance(data, unicode): 57 | print(indent_level * '\t' + '%s\t%s' % (tag.name, data)) 58 | else: 59 | if tag.name is None: 60 | print(indent_level * '\t' + 'Unnamed tag\t%s' % (str(data))) 61 | else: 62 | print(indent_level * '\t' + tag.name.encode('ascii', 'ignore') + '\t%s' % (str(data))) 63 | 64 | 65 | def print_tag_directory_tree(dmfile: DM4File, 66 | dir_obj: DM4TagDir, 67 | indent_level: int = 0): 68 | """Print all of the tags and directories contained in a dm4 file""" 69 | 70 | for tag in dir_obj.unnamed_tags: 71 | print_tag_data(dmfile, tag, indent_level) 72 | 73 | for k in sorted(dir_obj.named_tags.keys()): 74 | tag = dir_obj.named_tags[k] 75 | print_tag_data(dmfile, tag, indent_level) 76 | 77 | for subdir in dir_obj.unnamed_subdirs: 78 | print(indent_level * '\t' + "Unnamed directory") 79 | print_tag_directory_tree(dmfile, subdir, indent_level + 1) 80 | 81 | for k in sorted(dir_obj.named_subdirs.keys()): 82 | subdir = dir_obj.named_subdirs[k] 83 | print(indent_level * '\t' + k) 84 | print_tag_directory_tree(dmfile, subdir, indent_level + 1) 85 | 86 | indent_level -= 1 87 | -------------------------------------------------------------------------------- /test/test_dm4.py: -------------------------------------------------------------------------------- 1 | """ 2 | The DM4 files I have are huge and not a good fit for github. Any dm4 file should work for testing this code. Set 3 | the path using the dm4_input_filename property. 4 | 5 | Note: This test is structured for a dm4 file produced by a specific microscope. Other platforms may change the directory 6 | structure and tags. Use the print_tag_directory_tree function to explore the structure of your dm4 file as needed. 7 | """ 8 | 9 | import unittest 10 | import os 11 | import dm4 12 | import numpy as np 13 | 14 | import PIL # For example code 15 | from PIL import Image 16 | 17 | from dm4 import DM4File, DM4TagDir, print_tag_directory_tree, print_tag_data 18 | import dm4.dm4file 19 | 20 | # Eliminating the MAX_IMAGE_PIXELS check in PIL is often necessary when dealing with multi-GB images often produced by microscopy platforms. 21 | Image.MAX_IMAGE_PIXELS = None 22 | 23 | 24 | class TestDM4(unittest.TestCase): 25 | 26 | @property 27 | def dm4_input_filename(self) -> str: 28 | """The name of a dm4 file to read during the test. Change this to suit your test input file""" 29 | return 'Glumi1_3VBSED_stack_00_slice_0476.dm4' 30 | 31 | @property 32 | def dm4_input_dirname(self) -> str: 33 | """The directory containing a dm4 file""" 34 | if 'TESTINPUTPATH' in os.environ: 35 | return os.environ['TESTINPUTPATH'] 36 | 37 | raise ValueError('TESTINPUTPATH environment variable not set') 38 | 39 | @property 40 | def FirstImageDimensionsTag(self) -> DM4TagDir: 41 | """Returns the dimension tag for the first image in the dm4 file.""" 42 | return self.tags.named_subdirs['ImageList'].unnamed_subdirs[1].named_subdirs['ImageData'].named_subdirs[ 43 | 'Dimensions'] 44 | 45 | def ReadImageShape(self, image_dimensions_tag: DM4TagDir) -> tuple[int, int]: 46 | """Returns the shape of an image stored in the dm4 file""" 47 | XDim = self.dm4file.read_tag_data(image_dimensions_tag.unnamed_tags[0]) 48 | YDim = self.dm4file.read_tag_data(image_dimensions_tag.unnamed_tags[1]) 49 | 50 | return YDim, XDim 51 | 52 | @property 53 | def dm4_input_fullpath(self) -> str: 54 | return os.path.join(self.dm4_input_dirname, self.dm4_input_filename) 55 | 56 | def test(self): 57 | with dm4.dm4file.DM4File.open(self.dm4_input_fullpath) as self.dm4file: 58 | self.tags = self.dm4file.read_directory() 59 | print_tag_directory_tree(self.dm4file, self.tags) 60 | 61 | # self.Extract_Image(self.dm4file , self.tags, self.dm4_input_filename) 62 | 63 | def Extract_Image(self, 64 | dmfile: DM4File, 65 | tags: DM4TagDir, 66 | image_filename: str): 67 | data_tag = tags.named_subdirs['ImageList'].unnamed_subdirs[1].named_subdirs['ImageData'].named_tags['Data'] 68 | 69 | file_basename = os.path.basename(image_filename) 70 | output_dirname = 'C:\\Temp' 71 | output_filename = os.path.basename(file_basename + '.tif') 72 | 73 | output_fullpath = os.path.join(output_dirname, output_filename) 74 | 75 | np_array = np.array(dmfile.read_tag_data(data_tag), dtype=np.uint16) 76 | np_array = np.reshape(np_array, self.ReadImageShape(self.FirstImageDimensionsTag)) 77 | 78 | image = Image.fromarray(np_array, 'I;16') 79 | image.save(output_fullpath) 80 | 81 | dmfile.close() 82 | 83 | def test_readme_example(self): 84 | """The code in the try block should match the readme example to ensure the documentation code is correct""" 85 | 86 | output_fullpath = "sample.tif" 87 | 88 | try: 89 | 90 | # Example code goes below 91 | 92 | input_path = self.dm4_input_fullpath 93 | 94 | with dm4.DM4File.open(input_path) as dm4file: 95 | tags = dm4file.read_directory() 96 | 97 | image_data_tag = tags.named_subdirs['ImageList'].unnamed_subdirs[1].named_subdirs['ImageData'] 98 | image_tag = image_data_tag.named_tags['Data'] 99 | 100 | XDim = dm4file.read_tag_data(image_data_tag.named_subdirs['Dimensions'].unnamed_tags[0]) 101 | YDim = dm4file.read_tag_data(image_data_tag.named_subdirs['Dimensions'].unnamed_tags[1]) 102 | 103 | image_array = np.array(dm4file.read_tag_data(image_tag), dtype=np.uint16) 104 | image_array = np.reshape(image_array, (YDim, XDim)) 105 | 106 | output_fullpath = "sample.tif" 107 | image = PIL.Image.fromarray(image_array, 'I;16') 108 | image.save(output_fullpath) 109 | 110 | finally: 111 | if os.path.exists(output_fullpath): 112 | os.remove(output_fullpath) 113 | else: 114 | raise ValueError(f"Output file {output_fullpath} was not created") 115 | -------------------------------------------------------------------------------- /dm4/dm4file.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import contextlib 3 | from typing import NamedTuple, BinaryIO, Generator, Any, Optional 4 | import struct 5 | import array 6 | import sys 7 | 8 | import dm4 9 | from dm4.headers import DM4TagHeader, DM4Header, DM4DirHeader, DM4TagDir 10 | 11 | from dm4 import format_config 12 | 13 | 14 | class DM4File: 15 | """ 16 | Provides functions for reading data from a DM4 file. 17 | Maintains an open file handle to the DM4 file. 18 | DM4File.open supports the context manager protocol and may be used in a with statement to automatically close the underlying file handle. 19 | """ 20 | _hfile: Optional[BinaryIO] # Set to None only when the file is closed 21 | header: DM4Header 22 | _endian_str: str # '>' == Little Endian, '<' == Big Endian 23 | root_tag_dir_header: DM4DirHeader 24 | 25 | @property 26 | def endian_str(self) -> str: 27 | """ 28 | '>' == Little Endian 29 | '<' == Big Endian 30 | 31 | Compatible for use with struct.unpack 32 | """ 33 | return self._endian_str 34 | 35 | @property 36 | def hfile(self) -> BinaryIO | None: 37 | """Handle to the DM4 file. Set to None only when the file has been closed. Should not be needed by library users in typical use cases.""" 38 | return self._hfile 39 | 40 | def __init__(self, filedata: BinaryIO): 41 | """ 42 | :param file filedata: file handle to dm4 file 43 | """ 44 | self._hfile = filedata 45 | self.header = read_header_dm4(self.hfile) 46 | self._endian_str = _get_struct_endian_str(self.header.little_endian) 47 | 48 | self.root_tag_dir_header = read_root_tag_dir_header_dm4(self.hfile, endian=self.endian_str) 49 | 50 | def close(self): 51 | """Manually close the file handle if one is not using a context manager""" 52 | self._hfile.close() 53 | self._hfile = None 54 | 55 | @staticmethod 56 | @contextlib.contextmanager 57 | def open(filename: str) -> Generator[BinaryIO, None, None]: 58 | """ 59 | Use this method to open a DM4 file. The file will be closed when the context is exited. 60 | 61 | with DM4File.open(filename) as dm4file: 62 | do stuff 63 | 64 | :param str filename: Name of DM4 file to open 65 | :rtype: DM4File 66 | :return: DM4File object 67 | """ 68 | hfile = open(filename, "rb") 69 | try: 70 | yield DM4File(hfile) 71 | finally: 72 | hfile.close() 73 | 74 | def read_tag_data(self, tag: DM4TagHeader) -> Any: 75 | """Read the data associated with the passed tag""" 76 | return _read_tag_data(self.hfile, tag, self.endian_str) 77 | 78 | def read_directory(self, directory_tag: DM4DirHeader | None = None) -> DM4TagDir: 79 | """ 80 | Read the directories and tags from a dm4 file. The first step in working with a dm4 file. 81 | :return: A named collection containing information about the directory 82 | """ 83 | 84 | if directory_tag is None: 85 | directory_tag = self.root_tag_dir_header 86 | 87 | dir_obj = DM4TagDir(directory_tag.name, directory_tag, {}, [], {}, []) 88 | 89 | for iTag in range(0, directory_tag.num_tags): 90 | tag = read_tag_header_dm4(self.hfile, self.endian_str) 91 | if tag is None: 92 | break 93 | 94 | if tag_is_directory(tag): 95 | if tag.name is None: 96 | dir_obj.unnamed_subdirs.append(self.read_directory(tag)) 97 | else: 98 | dir_obj.named_subdirs[tag.name] = self.read_directory(tag) 99 | else: 100 | if tag.name is None: 101 | dir_obj.unnamed_tags.append(tag) 102 | else: 103 | dir_obj.named_tags[tag.name] = tag 104 | 105 | return dir_obj 106 | 107 | 108 | def tag_is_directory(tag: DM4TagHeader) -> bool: 109 | return tag.type == 20 110 | 111 | 112 | def read_header_dm4(dmfile: BinaryIO) -> DM4Header: 113 | dmfile.seek(0) 114 | version = struct.unpack_from('>I', dmfile.read(4))[0] # int.from_bytes(dmfile.read(4), byteorder='big') 115 | rootlength = struct.unpack_from('>Q', dmfile.read(8))[0] 116 | byteorder = struct.unpack_from('>I', dmfile.read(4))[0] 117 | 118 | little_endian = byteorder == 1 119 | 120 | return DM4Header(version, rootlength, little_endian) 121 | 122 | 123 | def _get_endian_str(endian: str | int) -> str: 124 | """ 125 | DM4 header encodes little endian as byte value 1 in the header 126 | :return: 'big' or 'little' for use with python's int.frombytes function 127 | """ 128 | if isinstance(endian, str): 129 | return endian 130 | 131 | assert (isinstance(endian, int)) 132 | if endian == 1: 133 | return 'little' 134 | 135 | return 'big' 136 | 137 | 138 | def _get_struct_endian_str(endian: str | int | bool) -> str: 139 | """ 140 | DM4 header encodes little endian as byte value 1 in the header. However, when that convention is followed the wrong 141 | values are read. So this implementation is reversed. 142 | :return: '>' or '<' for use with python's struct.unpack function 143 | """ 144 | if isinstance(endian, str): 145 | if endian == 'little': 146 | return '>' # Little Endian 147 | else: 148 | return '<' # Big Endian 149 | else: 150 | if endian == 1: 151 | return '>' # Little Endian 152 | else: 153 | return '<' # Big Endian 154 | 155 | 156 | def read_root_tag_dir_header_dm4(dmfile: BinaryIO, endian: str | int): 157 | """Read the root directory information from a dm4 file. 158 | File seek position is left at end of root_tag_dir_header""" 159 | if not isinstance(endian, str): 160 | endian = _get_struct_endian_str(endian) 161 | 162 | dmfile.seek(dm4.format_config.header_size) 163 | 164 | issorted = struct.unpack_from(endian + 'b', dmfile.read(1))[0] # type: bool 165 | isclosed = struct.unpack_from(endian + 'b', dmfile.read(1))[0] # type: bool 166 | num_tags = struct.unpack_from('>Q', dmfile.read(8))[0] # DM4 specifies this property as always big endian 167 | 168 | return DM4DirHeader(20, None, 0, issorted, isclosed, num_tags, dm4.format_config.header_size) 169 | 170 | 171 | def read_tag_header_dm4(dmfile: BinaryIO, endian: str) -> DM4TagHeader | DM4DirHeader | None: 172 | """Read the tag from the file. Leaves file at the end of the tag data, ready to read the next tag from the file""" 173 | tag_header_offset = dmfile.tell() 174 | tag_type = struct.unpack_from(endian + 'B', dmfile.read(1))[0] 175 | if tag_type == 20: 176 | return _read_tag_dir_header_dm4(dmfile, endian) 177 | if tag_type == 0: 178 | return None 179 | 180 | tag_name = _read_tag_name(dmfile) 181 | tag_byte_length = struct.unpack_from('>Q', dmfile.read(8))[0] # DM4 specifies this property as always big endian 182 | 183 | tag_data_offset = dmfile.tell() 184 | 185 | _check_tag_verification_str(dmfile) 186 | 187 | (tag_array_length, tag_array_types) = _read_tag_data_info(dmfile) 188 | 189 | dmfile.seek(tag_data_offset + tag_byte_length) 190 | return DM4TagHeader(tag_type, tag_name, tag_byte_length, tag_array_length, tag_array_types[0], tag_header_offset, 191 | tag_data_offset) 192 | 193 | 194 | def _read_tag_name(dmfile: BinaryIO) -> str | None: 195 | # DM4 specifies this property as always big endian 196 | tag_name_len = struct.unpack_from('>H', dmfile.read(2))[0] 197 | tag_name = None 198 | if tag_name_len > 0: 199 | data = dmfile.read(tag_name_len) 200 | try: 201 | tag_name = data.decode('utf-8', errors='ignore') 202 | except UnicodeDecodeError: 203 | tag_name = None 204 | pass 205 | 206 | return tag_name 207 | 208 | 209 | def _read_tag_dir_header_dm4(dmfile: BinaryIO, endian: str) -> DM4DirHeader: 210 | tag_name = _read_tag_name(dmfile) 211 | tag_byte_length = struct.unpack_from('>Q', dmfile.read(8))[0] # DM4 specifies this property as always big endian 212 | issorted = struct.unpack_from(endian + 'b', dmfile.read(1))[0] 213 | isclosed = struct.unpack_from(endian + 'b', dmfile.read(1))[0] 214 | num_tags = struct.unpack_from('>Q', dmfile.read(8))[0] # DM4 specifies this property as always big endian 215 | 216 | data_offset = dmfile.tell() 217 | 218 | return DM4DirHeader(20, tag_name, tag_byte_length, issorted, isclosed, num_tags, data_offset) 219 | 220 | 221 | def _check_tag_verification_str(dmfile: BinaryIO) -> None: 222 | """ 223 | DM4 has four bytes of % symbols in the tag. Ensure it is there. Raises ValueError if the verification string is not present 224 | """ 225 | garbage_str = dmfile.read(4).decode('utf-8') 226 | if garbage_str != '%%%%': 227 | raise ValueError( 228 | "Invalid tag data garbage string. This suggests the file is not in DM4 format or is corrupted") 229 | 230 | 231 | def _read_tag_data_info(dmfile: BinaryIO) -> tuple[int, tuple[int, ...]]: 232 | # DM4 specifies this property as always big endian 233 | tag_array_length = struct.unpack_from('>Q', dmfile.read(8))[0] 234 | format_str = '>' + tag_array_length * 'q' # Big endian signed long 235 | 236 | tag_array_types = struct.unpack_from(format_str, dmfile.read(8 * tag_array_length)) # type: tuple[int, ...] 237 | 238 | return tag_array_length, tag_array_types 239 | 240 | 241 | def _read_tag_data(dmfile: BinaryIO, tag: DM4TagHeader, endian: str) -> Any: 242 | assert (tag.type == 21) 243 | try: 244 | endian = _get_struct_endian_str(endian) 245 | dmfile.seek(tag.data_offset) 246 | 247 | _check_tag_verification_str(dmfile) 248 | (tag_array_length, tag_array_types) = _read_tag_data_info(dmfile) 249 | 250 | tag_data_type_code = tag_array_types[0] 251 | 252 | if tag_data_type_code == 15: 253 | return read_tag_data_group(dmfile, tag, endian) 254 | elif tag_data_type_code == 20: 255 | return read_tag_data_array(dmfile, tag, endian) 256 | 257 | if tag_data_type_code not in dm4.format_config.data_type_dict: 258 | # You can replace the exception with "return None" if you want to get the data you can 259 | # from the file and ignore reading the unknown data types 260 | raise ValueError("Unknown data type code " + str(tag_data_type_code)) 261 | # print("Unknown data type " + str(tag_data_type_code)) 262 | # return None 263 | 264 | return _read_tag_data_value(dmfile, endian, tag_data_type_code) 265 | 266 | finally: 267 | # Ensure we are in the correct position to read the next tag regardless of how reading this tag goes 268 | dmfile.seek(tag.data_offset + tag.byte_length) 269 | 270 | 271 | def _read_tag_data_value(dmfile: BinaryIO, endian: str, type_code: int) -> tuple: 272 | if type_code not in dm4.format_config.data_type_dict: 273 | raise ValueError("Unknown data type code " + str(type_code)) 274 | 275 | data_type = dm4.format_config.data_type_dict[type_code] 276 | format_str = _get_struct_endian_str(endian) + data_type.type_format 277 | byte_data = dmfile.read(data_type.num_bytes) 278 | 279 | return struct.unpack_from(format_str, byte_data)[0] 280 | 281 | 282 | def read_tag_data_group(dmfile: BinaryIO, tag: DM4TagHeader, endian: str) -> list[Any]: 283 | endian = _get_struct_endian_str(endian) 284 | dmfile.seek(tag.data_offset) 285 | 286 | _check_tag_verification_str(dmfile) 287 | (tag_array_length, tag_array_types) = _read_tag_data_info(dmfile) 288 | 289 | tag_data_type = tag_array_types[0] 290 | assert (tag_data_type == 15) 291 | 292 | length_groupname = tag_array_types[1] 293 | number_of_entries_in_group = tag_array_types[2] 294 | field_data = tag_array_types[3:] 295 | 296 | field_types_list = [] # type: list[int] 297 | 298 | for iField in range(0, number_of_entries_in_group): 299 | fieldname_length = field_data[iField * 2] 300 | fieldname_type = field_data[(iField * 2) + 1] # type: int 301 | field_types_list.append(fieldname_type) 302 | 303 | fields_data = [] 304 | for field_type in field_types_list: 305 | field_data = _read_tag_data_value(dmfile, endian, field_type) 306 | fields_data.append(field_data) 307 | 308 | return fields_data 309 | 310 | 311 | def system_byte_order() -> str: 312 | """Fetches the system byte order with the < or > character convention used by struct unpack""" 313 | return '<' if sys.byteorder == 'little' else '>' 314 | 315 | 316 | def read_tag_data_array(dmfile: BinaryIO, tag: DM4TagHeader, endian: str) -> array.array: 317 | dmfile.seek(tag.data_offset) 318 | 319 | _check_tag_verification_str(dmfile) 320 | 321 | (tag_array_length, tag_array_types) = _read_tag_data_info(dmfile) 322 | 323 | assert (tag_array_types[0] == 20) 324 | array_data_type_code = tag_array_types[1] 325 | array_length = tag_array_types[2] 326 | 327 | if array_data_type_code == 15: 328 | return "Array of groups length %d and type %d" % (array_length, array_data_type_code) 329 | 330 | assert (len(tag_array_types) == 3) 331 | 332 | data_type = format_config.data_type_dict[array_data_type_code] 333 | 334 | data = array.array(data_type.type_format) 335 | data.fromfile(dmfile, array_length) 336 | 337 | # Correct the byte order if the machine order doesn't match the file order 338 | if endian != system_byte_order(): 339 | data.byteswap() 340 | 341 | return data 342 | --------------------------------------------------------------------------------