├── test
    ├── __init__.py
    └── test_dm4.py
├── dm4
    ├── __init__.py
    ├── __main__.py
    ├── headers.py
    ├── helpers.py
    └── dm4file.py
├── pyproject.toml
├── .gitignore
├── LICENSE
└── README.rst


/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dm4/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A Digital Micrograph 4 (DM4) file reader.
 3 | 
 4 | 1.0.1 Initial release
 5 | 1.0.2 Switched to using Optional typing hint instead of Union for Python <= 3.8 compatibility
 6 | 1.0.3 DM4TagDir is now imported with dm4 module to simplify typing.
 7 |       Invoking the dm4 module as a script now prints the tag directory tree of a passed DM4 file.
 8 |       Removed dependency on the six module
 9 | """
10 | 
11 | __version__ = "1.0.3"
12 | 
13 | from dm4.headers import DM4DataType, DM4DirHeader, DM4Header, DM4TagHeader, DM4Config, DM4TagDir, format_config
14 | from dm4.dm4file import DM4File
15 | from dm4.helpers import print_tag_directory_tree, print_tag_data
16 | 


--------------------------------------------------------------------------------
/dm4/__main__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Sep 12, 2013
 3 | 
 4 | @author: u0490822
 5 | """
 6 | import sys
 7 | from dm4.dm4file import DM4File
 8 | from dm4.helpers import print_tag_directory_tree, print_tag_data
 9 | 
10 | 
11 | def main():
12 |     if len(sys.argv) < 2 or len(sys.argv) > 2:
13 |         print("Usage: python -m dm4 <dm4_input_fullpath>")
14 |         print()
15 |         print("Invoking dm4 as a module prints the tag directory tree of a Digital Micrograph 4 (DM4) file.")
16 |         sys.exit(1)
17 | 
18 |     dm4_input_fullpath = sys.argv[1]
19 | 
20 |     with DM4File.open(dm4_input_fullpath) as dm4file:
21 |         tags = dm4file.read_directory()
22 |         print_tag_directory_tree(dm4file, tags)
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["flit_core >=3.4,<4"]
 3 | build-backend = "flit_core.buildapi"
 4 | 
 5 | [project]
 6 | name = "dm4"
 7 | authors = [{ name = "James Anderson", email = "james.r.anderson@utah.edu" }]
 8 | readme = "README.rst"
 9 | license = { file = "LICENSE" }
10 | classifiers = [
11 |     "Programming Language :: Python :: 3",
12 |     "License :: OSI Approved :: MIT License",
13 |     "Operating System :: OS Independent",
14 |     "Development Status :: 5 - Production/Stable",
15 |     "Intended Audience :: Developers",
16 |     "Intended Audience :: Science/Research",
17 |     "Topic :: Software Development :: Libraries :: Python Modules",
18 |     "Topic :: Scientific/Engineering :: Bio-Informatics"
19 | ]
20 | dynamic = ["version", "description"]
21 | 
22 | [project.optional-dependencies]
23 | test = [
24 |     "numpy",
25 | ]
26 | 
27 | [project.urls]
28 | Home = "https://github.com/nornir/dm4"
29 | Issues = "https://github.com/nornir/dm4/issues"


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | /.project
59 | /.pydevproject
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 James Anderson
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/dm4/headers.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from typing import NamedTuple, Optional
 3 | 
 4 | 
 5 | class DM4Header(NamedTuple):
 6 |     version: int
 7 |     root_length: int
 8 |     little_endian: bool
 9 | 
10 | 
11 | class DM4TagHeader(NamedTuple):
12 |     type: int
13 |     name: str
14 |     byte_length: int
15 |     array_length: int
16 |     data_type_code: int
17 |     header_offset: int
18 |     data_offset: int
19 | 
20 | 
21 | class DM4DirHeader(NamedTuple):
22 |     type: int
23 |     name: Optional[str]
24 |     byte_length: int
25 |     sorted: bool
26 |     closed: bool
27 |     num_tags: int
28 |     data_offset: int
29 | 
30 | 
31 | class DM4Tag(NamedTuple):
32 |     name: str
33 |     data_type_code: int
34 |     data: object
35 | 
36 | 
37 | class DM4DataType(NamedTuple):
38 |     num_bytes: int
39 |     signed: bool
40 |     type_format: str
41 | 
42 | 
43 | class DM4Config(NamedTuple):
44 |     """
45 |     Configuration for reading a DM4 file, these are unlikely to change
46 |     """
47 | 
48 |     data_type_dict: dict[int, DM4DataType]
49 |     header_size: int
50 |     root_tag_dir_header_size: int
51 | 
52 | 
53 | class DM4TagDir(NamedTuple):
54 |     """Description of a directory in a DM4 file"""
55 |     name: str
56 |     dm4_tag: DM4DirHeader
57 |     named_subdirs: dict[str, DM4TagDir]
58 |     unnamed_subdirs: list[DM4TagDir]
59 |     named_tags: dict[str, DM4TagHeader]
60 |     unnamed_tags: list[DM4TagHeader]
61 | 
62 | 
63 | format_config = DM4Config(
64 |     {
65 |         2: DM4DataType(2, True, "h"),  # 2byte signed integer
66 |         3: DM4DataType(4, True, "i"),  # 4byte signed integer
67 |         4: DM4DataType(2, False, "H"),  # 2byte unsigned integer
68 |         5: DM4DataType(4, False, "I"),  # 4byte unsigned integer
69 |         6: DM4DataType(4, False, "f"),  # 4byte float
70 |         7: DM4DataType(8, False, "d"),  # 8byte float
71 |         8: DM4DataType(1, False, "?"),
72 |         9: DM4DataType(1, False, "c"),
73 |         10: DM4DataType(1, True, "b"),
74 |         11: DM4DataType(8, True, "q"),
75 |         12: DM4DataType(8, True, "Q"),
76 |     },
77 |     header_size=4 + 8 + 4,
78 |     root_tag_dir_header_size=1 + 1 + 8,
79 | )
80 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | ###
 2 | dm4
 3 | ###
 4 | 
 5 | A pure python file reader for Digital Micrograph's DM4 file format
 6 | 
 7 | This package would not have been possible without the documentation provided by Dr Chris Boothroyd at http://www.er-c.org/cbb/info/dmformat/ Thank you.
 8 | 
 9 | ############
10 | Installation
11 | ############
12 | 
13 | Install using pip from the command line::
14 | 
15 |    pip install dm4
16 | 
17 | #######
18 | Example
19 | #######
20 |    
21 | Below is a short example of reading the image data from a dm4 file.  A more complete example can be found in the tests.::
22 | 
23 |    import dm4
24 |    import PIL
25 | 
26 |    input_path = "your_filename_here.dm4"
27 | 
28 |    with dm4.DM4File.open(input_path) as dm4file:
29 |        tags = dm4file.read_directory()
30 | 
31 |        image_data_tag = tags.named_subdirs['ImageList'].unnamed_subdirs[1].named_subdirs['ImageData']
32 |        image_tag = image_data_tag.named_tags['Data']
33 | 
34 |        XDim = dm4file.read_tag_data(image_data_tag.named_subdirs['Dimensions'].unnamed_tags[0])
35 |        YDim = dm4file.read_tag_data(image_data_tag.named_subdirs['Dimensions'].unnamed_tags[1])
36 | 
37 |        image_array = np.array(dm4file.read_tag_data(image_tag), dtype=np.uint16)
38 |        image_array = np.reshape(image_array, (YDim, XDim))
39 | 
40 |        output_fullpath = "sample.tif"
41 |        image = PIL.Image.fromarray(image_array, 'I;16')
42 |        image.save(output_fullpath)
43 | 
44 | ############
45 | Script usage
46 | ############
47 | 
48 | The dm4 module can be invoked as a script to print a dm4 file's full directory to the command line.  This is helpful when determining the exact structure of a specific DM4 file: ::
49 | 
50 |     python -m dm4 your_dm4_file.dm4
51 | 
52 | ################
53 | Helper Functions
54 | ################
55 | 
56 | Print all of the tags and directories contianed in a dm4 file to the console::
57 | 
58 |   dm4.print_tag_directory_tree(dmfile: DM4File, dir_obj: DM4TagDir, indent_level: int = 0)
59 | 
60 | Print data associated with a specific tag to the console, if it is printable::
61 | 
62 |   dm4.print_tag_data(dmfile: DM4File, tag: Union[DM4TagHeader, DM4DirHeader], indent_level: int):
63 | 
64 | 
65 | ####
66 | Todo
67 | ####
68 | 
69 | Reading arrays of groups has not been implemented.
70 | 


--------------------------------------------------------------------------------
/dm4/helpers.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helper functions for working with dm4 files
 3 | """
 4 | 
 5 | import sys
 6 | from typing import Union
 7 | import array
 8 | from dm4.dm4file import DM4File
 9 | from dm4.headers import DM4TagDir, DM4TagHeader, DM4DirHeader
10 | 
11 | 
12 | def _is_python3():
13 |     return sys.version_info[0] == 3
14 | 
15 | 
16 | def try_convert_unsigned_short_to_unicode(data: array.array, count_limit: int = 2048):
17 |     """Attempt to convert arrays of 16-bit integers of less than specified length to a unicode string."""
18 | 
19 |     if not isinstance(data, array.array):
20 |         return data
21 | 
22 |     if data.typecode == 'H' and len(data) < count_limit:
23 |         try:
24 |             if _is_python3():
25 |                 data = data.tobytes().decode('utf-16')
26 |             else:
27 |                 data = data.tostring().decode('utf-16')
28 |         except UnicodeDecodeError as e:
29 |             pass
30 |         except UnicodeEncodeError as e:
31 |             pass
32 | 
33 |     return data
34 | 
35 | 
36 | def print_tag_data(dmfile: DM4File, tag: Union[DM4TagHeader, DM4DirHeader], indent_level: int):
37 |     """Print data associated with a dm4 tag"""
38 | 
39 |     if tag.byte_length > 2048:
40 |         print(indent_level * '\t' + '%s\t' % (tag.name) + "Array length %d too long to read" % (tag.array_length))
41 |         return
42 | 
43 |     try:
44 |         data = dmfile.read_tag_data(tag)
45 |     except NotImplementedError as e:
46 |         print(indent_level * '\t' + '***' + str(e) + '***')
47 |         return
48 | 
49 |     data = try_convert_unsigned_short_to_unicode(data)
50 | 
51 |     if _is_python3():
52 |         print(indent_level * '\t' + '%s\t%s' % (tag.name, str(data)))
53 |     else:
54 |         if isinstance(data, array.array) and data.typecode == 'H':  # Unconverted unicode or image data
55 |             print(indent_level * '\t' + '%s\t%s' % (tag.name, "Unconverted array of unsigned 16-bit integers"))
56 |         elif isinstance(data, unicode):
57 |             print(indent_level * '\t' + '%s\t%s' % (tag.name, data))
58 |         else:
59 |             if tag.name is None:
60 |                 print(indent_level * '\t' + 'Unnamed tag\t%s' % (str(data)))
61 |             else:
62 |                 print(indent_level * '\t' + tag.name.encode('ascii', 'ignore') + '\t%s' % (str(data)))
63 | 
64 | 
65 | def print_tag_directory_tree(dmfile: DM4File,
66 |                              dir_obj: DM4TagDir,
67 |                              indent_level: int = 0):
68 |     """Print all of the tags and directories contained in a dm4 file"""
69 | 
70 |     for tag in dir_obj.unnamed_tags:
71 |         print_tag_data(dmfile, tag, indent_level)
72 | 
73 |     for k in sorted(dir_obj.named_tags.keys()):
74 |         tag = dir_obj.named_tags[k]
75 |         print_tag_data(dmfile, tag, indent_level)
76 | 
77 |     for subdir in dir_obj.unnamed_subdirs:
78 |         print(indent_level * '\t' + "Unnamed directory")
79 |         print_tag_directory_tree(dmfile, subdir, indent_level + 1)
80 | 
81 |     for k in sorted(dir_obj.named_subdirs.keys()):
82 |         subdir = dir_obj.named_subdirs[k]
83 |         print(indent_level * '\t' + k)
84 |         print_tag_directory_tree(dmfile, subdir, indent_level + 1)
85 | 
86 |     indent_level -= 1
87 | 


--------------------------------------------------------------------------------
/test/test_dm4.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The DM4 files I have are huge and not a good fit for github.  Any dm4 file should work for testing this code.  Set
  3 | the path using the dm4_input_filename property.
  4 | 
  5 | Note: This test is structured for a dm4 file produced by a specific microscope.  Other platforms may change the directory
  6 | structure and tags.  Use the print_tag_directory_tree function to explore the structure of your dm4 file as needed.
  7 | """
  8 | 
  9 | import unittest
 10 | import os
 11 | import dm4
 12 | import numpy as np
 13 | 
 14 | import PIL  # For example code
 15 | from PIL import Image
 16 | 
 17 | from dm4 import DM4File, DM4TagDir, print_tag_directory_tree, print_tag_data
 18 | import dm4.dm4file
 19 | 
 20 | # Eliminating the MAX_IMAGE_PIXELS check in PIL is often necessary when dealing with multi-GB images often produced by microscopy platforms.
 21 | Image.MAX_IMAGE_PIXELS = None
 22 | 
 23 | 
 24 | class TestDM4(unittest.TestCase):
 25 | 
 26 |     @property
 27 |     def dm4_input_filename(self) -> str:
 28 |         """The name of a dm4 file to read during the test.  Change this to suit your test input file"""
 29 |         return 'Glumi1_3VBSED_stack_00_slice_0476.dm4'
 30 | 
 31 |     @property
 32 |     def dm4_input_dirname(self) -> str:
 33 |         """The directory containing a dm4 file"""
 34 |         if 'TESTINPUTPATH' in os.environ:
 35 |             return os.environ['TESTINPUTPATH']
 36 | 
 37 |         raise ValueError('TESTINPUTPATH environment variable not set')
 38 | 
 39 |     @property
 40 |     def FirstImageDimensionsTag(self) -> DM4TagDir:
 41 |         """Returns the dimension tag for the first image in the dm4 file."""
 42 |         return self.tags.named_subdirs['ImageList'].unnamed_subdirs[1].named_subdirs['ImageData'].named_subdirs[
 43 |             'Dimensions']
 44 | 
 45 |     def ReadImageShape(self, image_dimensions_tag: DM4TagDir) -> tuple[int, int]:
 46 |         """Returns the shape of an image stored in the dm4 file"""
 47 |         XDim = self.dm4file.read_tag_data(image_dimensions_tag.unnamed_tags[0])
 48 |         YDim = self.dm4file.read_tag_data(image_dimensions_tag.unnamed_tags[1])
 49 | 
 50 |         return YDim, XDim
 51 | 
 52 |     @property
 53 |     def dm4_input_fullpath(self) -> str:
 54 |         return os.path.join(self.dm4_input_dirname, self.dm4_input_filename)
 55 | 
 56 |     def test(self):
 57 |         with dm4.dm4file.DM4File.open(self.dm4_input_fullpath) as self.dm4file:
 58 |             self.tags = self.dm4file.read_directory()
 59 |             print_tag_directory_tree(self.dm4file, self.tags)
 60 | 
 61 |         # self.Extract_Image(self.dm4file , self.tags, self.dm4_input_filename)
 62 | 
 63 |     def Extract_Image(self,
 64 |                       dmfile: DM4File,
 65 |                       tags: DM4TagDir,
 66 |                       image_filename: str):
 67 |         data_tag = tags.named_subdirs['ImageList'].unnamed_subdirs[1].named_subdirs['ImageData'].named_tags['Data']
 68 | 
 69 |         file_basename = os.path.basename(image_filename)
 70 |         output_dirname = 'C:\\Temp'
 71 |         output_filename = os.path.basename(file_basename + '.tif')
 72 | 
 73 |         output_fullpath = os.path.join(output_dirname, output_filename)
 74 | 
 75 |         np_array = np.array(dmfile.read_tag_data(data_tag), dtype=np.uint16)
 76 |         np_array = np.reshape(np_array, self.ReadImageShape(self.FirstImageDimensionsTag))
 77 | 
 78 |         image = Image.fromarray(np_array, 'I;16')
 79 |         image.save(output_fullpath)
 80 | 
 81 |         dmfile.close()
 82 | 
 83 |     def test_readme_example(self):
 84 |         """The code in the try block should match the readme example to ensure the documentation code is correct"""
 85 | 
 86 |         output_fullpath = "sample.tif"
 87 | 
 88 |         try:
 89 | 
 90 |             # Example code goes below
 91 | 
 92 |             input_path = self.dm4_input_fullpath
 93 | 
 94 |             with dm4.DM4File.open(input_path) as dm4file:
 95 |                 tags = dm4file.read_directory()
 96 | 
 97 |                 image_data_tag = tags.named_subdirs['ImageList'].unnamed_subdirs[1].named_subdirs['ImageData']
 98 |                 image_tag = image_data_tag.named_tags['Data']
 99 | 
100 |                 XDim = dm4file.read_tag_data(image_data_tag.named_subdirs['Dimensions'].unnamed_tags[0])
101 |                 YDim = dm4file.read_tag_data(image_data_tag.named_subdirs['Dimensions'].unnamed_tags[1])
102 | 
103 |                 image_array = np.array(dm4file.read_tag_data(image_tag), dtype=np.uint16)
104 |                 image_array = np.reshape(image_array, (YDim, XDim))
105 | 
106 |                 output_fullpath = "sample.tif"
107 |                 image = PIL.Image.fromarray(image_array, 'I;16')
108 |                 image.save(output_fullpath)
109 | 
110 |         finally:
111 |             if os.path.exists(output_fullpath):
112 |                 os.remove(output_fullpath)
113 |             else:
114 |                 raise ValueError(f"Output file {output_fullpath} was not created")
115 | 


--------------------------------------------------------------------------------
/dm4/dm4file.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | import contextlib
  3 | from typing import NamedTuple, BinaryIO, Generator, Any, Optional
  4 | import struct
  5 | import array
  6 | import sys
  7 | 
  8 | import dm4
  9 | from dm4.headers import DM4TagHeader, DM4Header, DM4DirHeader, DM4TagDir
 10 | 
 11 | from dm4 import format_config
 12 | 
 13 | 
 14 | class DM4File:
 15 |     """ 
 16 |     Provides functions for reading data from a DM4 file.
 17 |     Maintains an open file handle to the DM4 file.
 18 |     DM4File.open supports the context manager protocol and may be used in a with statement to automatically close the underlying file handle.
 19 |     """
 20 |     _hfile: Optional[BinaryIO]  # Set to None only when the file is closed
 21 |     header: DM4Header
 22 |     _endian_str: str  # '>' == Little Endian, '<' == Big Endian
 23 |     root_tag_dir_header: DM4DirHeader
 24 | 
 25 |     @property
 26 |     def endian_str(self) -> str:
 27 |         """
 28 |         '>' == Little Endian
 29 |         '<' == Big Endian
 30 | 
 31 |         Compatible for use with struct.unpack
 32 |         """
 33 |         return self._endian_str
 34 | 
 35 |     @property
 36 |     def hfile(self) -> BinaryIO | None:
 37 |         """Handle to the DM4 file.  Set to None only when the file has been closed.  Should not be needed by library users in typical use cases."""
 38 |         return self._hfile
 39 | 
 40 |     def __init__(self, filedata: BinaryIO):
 41 |         """
 42 |         :param file filedata: file handle to dm4 file
 43 |         """
 44 |         self._hfile = filedata
 45 |         self.header = read_header_dm4(self.hfile)
 46 |         self._endian_str = _get_struct_endian_str(self.header.little_endian)
 47 | 
 48 |         self.root_tag_dir_header = read_root_tag_dir_header_dm4(self.hfile, endian=self.endian_str)
 49 | 
 50 |     def close(self):
 51 |         """Manually close the file handle if one is not using a context manager"""
 52 |         self._hfile.close()
 53 |         self._hfile = None
 54 | 
 55 |     @staticmethod
 56 |     @contextlib.contextmanager
 57 |     def open(filename: str) -> Generator[BinaryIO, None, None]:
 58 |         """
 59 |         Use this method to open a DM4 file.  The file will be closed when the context is exited.
 60 | 
 61 |         with DM4File.open(filename) as dm4file:
 62 |             do stuff
 63 | 
 64 |         :param str filename: Name of DM4 file to open
 65 |         :rtype: DM4File
 66 |         :return: DM4File object
 67 |         """
 68 |         hfile = open(filename, "rb")
 69 |         try:
 70 |             yield DM4File(hfile)
 71 |         finally:
 72 |             hfile.close()
 73 | 
 74 |     def read_tag_data(self, tag: DM4TagHeader) -> Any:
 75 |         """Read the data associated with the passed tag"""
 76 |         return _read_tag_data(self.hfile, tag, self.endian_str)
 77 | 
 78 |     def read_directory(self, directory_tag: DM4DirHeader | None = None) -> DM4TagDir:
 79 |         """
 80 |         Read the directories and tags from a dm4 file.  The first step in working with a dm4 file.
 81 |         :return: A named collection containing information about the directory
 82 |         """
 83 | 
 84 |         if directory_tag is None:
 85 |             directory_tag = self.root_tag_dir_header
 86 | 
 87 |         dir_obj = DM4TagDir(directory_tag.name, directory_tag, {}, [], {}, [])
 88 | 
 89 |         for iTag in range(0, directory_tag.num_tags):
 90 |             tag = read_tag_header_dm4(self.hfile, self.endian_str)
 91 |             if tag is None:
 92 |                 break
 93 | 
 94 |             if tag_is_directory(tag):
 95 |                 if tag.name is None:
 96 |                     dir_obj.unnamed_subdirs.append(self.read_directory(tag))
 97 |                 else:
 98 |                     dir_obj.named_subdirs[tag.name] = self.read_directory(tag)
 99 |             else:
100 |                 if tag.name is None:
101 |                     dir_obj.unnamed_tags.append(tag)
102 |                 else:
103 |                     dir_obj.named_tags[tag.name] = tag
104 | 
105 |         return dir_obj
106 | 
107 | 
108 | def tag_is_directory(tag: DM4TagHeader) -> bool:
109 |     return tag.type == 20
110 | 
111 | 
112 | def read_header_dm4(dmfile: BinaryIO) -> DM4Header:
113 |     dmfile.seek(0)
114 |     version = struct.unpack_from('>I', dmfile.read(4))[0]  # int.from_bytes(dmfile.read(4), byteorder='big')
115 |     rootlength = struct.unpack_from('>Q', dmfile.read(8))[0]
116 |     byteorder = struct.unpack_from('>I', dmfile.read(4))[0]
117 | 
118 |     little_endian = byteorder == 1
119 | 
120 |     return DM4Header(version, rootlength, little_endian)
121 | 
122 | 
123 | def _get_endian_str(endian: str | int) -> str:
124 |     """
125 |     DM4 header encodes little endian as byte value 1 in the header
126 |     :return: 'big' or 'little' for use with python's int.frombytes function
127 |     """
128 |     if isinstance(endian, str):
129 |         return endian
130 | 
131 |     assert (isinstance(endian, int))
132 |     if endian == 1:
133 |         return 'little'
134 | 
135 |     return 'big'
136 | 
137 | 
138 | def _get_struct_endian_str(endian: str | int | bool) -> str:
139 |     """
140 |     DM4 header encodes little endian as byte value 1 in the header.  However, when that convention is followed the wrong
141 |     values are read.  So this implementation is reversed.
142 |     :return: '>' or '<' for use with python's struct.unpack function
143 |     """
144 |     if isinstance(endian, str):
145 |         if endian == 'little':
146 |             return '>'  # Little Endian
147 |         else:
148 |             return '<'  # Big Endian
149 |     else:
150 |         if endian == 1:
151 |             return '>'  # Little Endian
152 |         else:
153 |             return '<'  # Big Endian
154 | 
155 | 
156 | def read_root_tag_dir_header_dm4(dmfile: BinaryIO, endian: str | int):
157 |     """Read the root directory information from a dm4 file.
158 |        File seek position is left at end of root_tag_dir_header"""
159 |     if not isinstance(endian, str):
160 |         endian = _get_struct_endian_str(endian)
161 | 
162 |     dmfile.seek(dm4.format_config.header_size)
163 | 
164 |     issorted = struct.unpack_from(endian + 'b', dmfile.read(1))[0]  # type: bool
165 |     isclosed = struct.unpack_from(endian + 'b', dmfile.read(1))[0]  # type: bool
166 |     num_tags = struct.unpack_from('>Q', dmfile.read(8))[0]  # DM4 specifies this property as always big endian
167 | 
168 |     return DM4DirHeader(20, None, 0, issorted, isclosed, num_tags, dm4.format_config.header_size)
169 | 
170 | 
171 | def read_tag_header_dm4(dmfile: BinaryIO, endian: str) -> DM4TagHeader | DM4DirHeader | None:
172 |     """Read the tag from the file.  Leaves file at the end of the tag data, ready to read the next tag from the file"""
173 |     tag_header_offset = dmfile.tell()
174 |     tag_type = struct.unpack_from(endian + 'B', dmfile.read(1))[0]
175 |     if tag_type == 20:
176 |         return _read_tag_dir_header_dm4(dmfile, endian)
177 |     if tag_type == 0:
178 |         return None
179 | 
180 |     tag_name = _read_tag_name(dmfile)
181 |     tag_byte_length = struct.unpack_from('>Q', dmfile.read(8))[0]  # DM4 specifies this property as always big endian
182 | 
183 |     tag_data_offset = dmfile.tell()
184 | 
185 |     _check_tag_verification_str(dmfile)
186 | 
187 |     (tag_array_length, tag_array_types) = _read_tag_data_info(dmfile)
188 | 
189 |     dmfile.seek(tag_data_offset + tag_byte_length)
190 |     return DM4TagHeader(tag_type, tag_name, tag_byte_length, tag_array_length, tag_array_types[0], tag_header_offset,
191 |                         tag_data_offset)
192 | 
193 | 
194 | def _read_tag_name(dmfile: BinaryIO) -> str | None:
195 |     # DM4 specifies this property as always big endian
196 |     tag_name_len = struct.unpack_from('>H', dmfile.read(2))[0]
197 |     tag_name = None
198 |     if tag_name_len > 0:
199 |         data = dmfile.read(tag_name_len)
200 |         try:
201 |             tag_name = data.decode('utf-8', errors='ignore')
202 |         except UnicodeDecodeError:
203 |             tag_name = None
204 |             pass
205 | 
206 |     return tag_name
207 | 
208 | 
209 | def _read_tag_dir_header_dm4(dmfile: BinaryIO, endian: str) -> DM4DirHeader:
210 |     tag_name = _read_tag_name(dmfile)
211 |     tag_byte_length = struct.unpack_from('>Q', dmfile.read(8))[0]  # DM4 specifies this property as always big endian
212 |     issorted = struct.unpack_from(endian + 'b', dmfile.read(1))[0]
213 |     isclosed = struct.unpack_from(endian + 'b', dmfile.read(1))[0]
214 |     num_tags = struct.unpack_from('>Q', dmfile.read(8))[0]  # DM4 specifies this property as always big endian
215 | 
216 |     data_offset = dmfile.tell()
217 | 
218 |     return DM4DirHeader(20, tag_name, tag_byte_length, issorted, isclosed, num_tags, data_offset)
219 | 
220 | 
221 | def _check_tag_verification_str(dmfile: BinaryIO) -> None:
222 |     """
223 |     DM4 has four bytes of % symbols in the tag.  Ensure it is there. Raises ValueError if the verification string is not present
224 |     """
225 |     garbage_str = dmfile.read(4).decode('utf-8')
226 |     if garbage_str != '%%%%':
227 |         raise ValueError(
228 |             "Invalid tag data garbage string.  This suggests the file is not in DM4 format or is corrupted")
229 | 
230 | 
231 | def _read_tag_data_info(dmfile: BinaryIO) -> tuple[int, tuple[int, ...]]:
232 |     # DM4 specifies this property as always big endian
233 |     tag_array_length = struct.unpack_from('>Q', dmfile.read(8))[0]
234 |     format_str = '>' + tag_array_length * 'q'  # Big endian signed long
235 | 
236 |     tag_array_types = struct.unpack_from(format_str, dmfile.read(8 * tag_array_length))  # type: tuple[int, ...]
237 | 
238 |     return tag_array_length, tag_array_types
239 | 
240 | 
241 | def _read_tag_data(dmfile: BinaryIO, tag: DM4TagHeader, endian: str) -> Any:
242 |     assert (tag.type == 21)
243 |     try:
244 |         endian = _get_struct_endian_str(endian)
245 |         dmfile.seek(tag.data_offset)
246 | 
247 |         _check_tag_verification_str(dmfile)
248 |         (tag_array_length, tag_array_types) = _read_tag_data_info(dmfile)
249 | 
250 |         tag_data_type_code = tag_array_types[0]
251 | 
252 |         if tag_data_type_code == 15:
253 |             return read_tag_data_group(dmfile, tag, endian)
254 |         elif tag_data_type_code == 20:
255 |             return read_tag_data_array(dmfile, tag, endian)
256 | 
257 |         if tag_data_type_code not in dm4.format_config.data_type_dict:
258 |             # You can replace the exception with "return None" if you want to get the data you can
259 |             # from the file and ignore reading the unknown data types
260 |             raise ValueError("Unknown data type code " + str(tag_data_type_code))
261 |             # print("Unknown data type " + str(tag_data_type_code))
262 |             # return None
263 | 
264 |         return _read_tag_data_value(dmfile, endian, tag_data_type_code)
265 | 
266 |     finally:
267 |         # Ensure we are in the correct position to read the next tag regardless of how reading this tag goes
268 |         dmfile.seek(tag.data_offset + tag.byte_length)
269 | 
270 | 
271 | def _read_tag_data_value(dmfile: BinaryIO, endian: str, type_code: int) -> tuple:
272 |     if type_code not in dm4.format_config.data_type_dict:
273 |         raise ValueError("Unknown data type code " + str(type_code))
274 | 
275 |     data_type = dm4.format_config.data_type_dict[type_code]
276 |     format_str = _get_struct_endian_str(endian) + data_type.type_format
277 |     byte_data = dmfile.read(data_type.num_bytes)
278 | 
279 |     return struct.unpack_from(format_str, byte_data)[0]
280 | 
281 | 
282 | def read_tag_data_group(dmfile: BinaryIO, tag: DM4TagHeader, endian: str) -> list[Any]:
283 |     endian = _get_struct_endian_str(endian)
284 |     dmfile.seek(tag.data_offset)
285 | 
286 |     _check_tag_verification_str(dmfile)
287 |     (tag_array_length, tag_array_types) = _read_tag_data_info(dmfile)
288 | 
289 |     tag_data_type = tag_array_types[0]
290 |     assert (tag_data_type == 15)
291 | 
292 |     length_groupname = tag_array_types[1]
293 |     number_of_entries_in_group = tag_array_types[2]
294 |     field_data = tag_array_types[3:]
295 | 
296 |     field_types_list = []  # type: list[int]
297 | 
298 |     for iField in range(0, number_of_entries_in_group):
299 |         fieldname_length = field_data[iField * 2]
300 |         fieldname_type = field_data[(iField * 2) + 1]  # type: int
301 |         field_types_list.append(fieldname_type)
302 | 
303 |     fields_data = []
304 |     for field_type in field_types_list:
305 |         field_data = _read_tag_data_value(dmfile, endian, field_type)
306 |         fields_data.append(field_data)
307 | 
308 |     return fields_data
309 | 
310 | 
311 | def system_byte_order() -> str:
312 |     """Fetches the system byte order with the < or > character convention used by struct unpack"""
313 |     return '<' if sys.byteorder == 'little' else '>'
314 | 
315 | 
316 | def read_tag_data_array(dmfile: BinaryIO, tag: DM4TagHeader, endian: str) -> array.array:
317 |     dmfile.seek(tag.data_offset)
318 | 
319 |     _check_tag_verification_str(dmfile)
320 | 
321 |     (tag_array_length, tag_array_types) = _read_tag_data_info(dmfile)
322 | 
323 |     assert (tag_array_types[0] == 20)
324 |     array_data_type_code = tag_array_types[1]
325 |     array_length = tag_array_types[2]
326 | 
327 |     if array_data_type_code == 15:
328 |         return "Array of groups length %d and type %d" % (array_length, array_data_type_code)
329 | 
330 |     assert (len(tag_array_types) == 3)
331 | 
332 |     data_type = format_config.data_type_dict[array_data_type_code]
333 | 
334 |     data = array.array(data_type.type_format)
335 |     data.fromfile(dmfile, array_length)
336 | 
337 |     # Correct the byte order if the machine order doesn't match the file order
338 |     if endian != system_byte_order():
339 |         data.byteswap()
340 | 
341 |     return data
342 | 


--------------------------------------------------------------------------------