├── tests
    ├── __init__.py
    ├── data
    │   ├── Example_Processed.ibd
    │   ├── Example_Continuous.ibd
    │   ├── Example_Continuous.imzML
    │   └── Example_Processed.imzML
    ├── context.py
    └── test_basic.py
├── pyimzml
    ├── ontology
    │   ├── __init__.py
    │   ├── dump_obo_files.py
    │   ├── ontology.py
    │   ├── ims.py
    │   └── uo.py
    ├── .gitignore
    ├── __init__.py
    ├── compression.py
    ├── metadata.py
    ├── ImzMLWriter.py
    └── ImzMLParser.py
├── Report.pdf
├── docs
    ├── requirements.txt
    ├── source
    │   ├── pyimzml
    │   │   ├── ImzMLWriter.rst
    │   │   └── ImzMLParser.rst
    │   ├── index.rst
    │   └── conf.py
    └── Makefile
├── .github
    └── workflows
    │   └── pythonpublish.yml
├── .gitignore
├── setup.py
├── CHANGELOG.md
├── README.rst
└── LICENSE


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pyimzml/ontology/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pyimzml/.gitignore:
--------------------------------------------------------------------------------
1 | *.DS_Store
2 | 


--------------------------------------------------------------------------------
/pyimzml/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '1.5.5'
2 | 


--------------------------------------------------------------------------------
/Report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexandrovteam/pyimzML/HEAD/Report.pdf


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.10
2 | wheezy.template
3 | sphinx-rtd-theme==0.5.0
4 | sphinx_autodoc_typehints
5 | 


--------------------------------------------------------------------------------
/tests/data/Example_Processed.ibd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexandrovteam/pyimzML/HEAD/tests/data/Example_Processed.ibd


--------------------------------------------------------------------------------
/tests/data/Example_Continuous.ibd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexandrovteam/pyimzML/HEAD/tests/data/Example_Continuous.ibd


--------------------------------------------------------------------------------
/tests/data/Example_Continuous.imzML:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexandrovteam/pyimzML/HEAD/tests/data/Example_Continuous.imzML


--------------------------------------------------------------------------------
/tests/data/Example_Processed.imzML:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexandrovteam/pyimzML/HEAD/tests/data/Example_Processed.imzML


--------------------------------------------------------------------------------
/tests/context.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | def getspectrum(min_mz, max_mz, n_peaks):
3 |     return min_mz + max_mz*np.random.rand(n_peaks), np.abs(np.random.randn(n_peaks))


--------------------------------------------------------------------------------
/docs/source/pyimzml/ImzMLWriter.rst:
--------------------------------------------------------------------------------
 1 | ImzMLWriter
 2 | ========================
 3 | 
 4 | .. toctree::
 5 |     :maxdepth: 3
 6 | 
 7 | pyimzml.ImzMLWriter module
 8 | --------------------------
 9 | 
10 | .. automodule:: pyimzml.ImzMLWriter
11 |     :members:
12 |     :undoc-members:
13 | 
14 | 
15 | pyimzml.compression module
16 | --------------------------
17 | 
18 | This module holds adapters for compressing an ImzML file's binary data, currently only usable with ImzMLWriter.
19 | 
20 | .. automodule:: pyimzml.compression
21 |    :members:
22 |    :undoc-members:
23 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/.github/workflows/pythonpublish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish pyimzML package
 2 | 
 3 | on: workflow_dispatch  # Manual trigger through Actions page
 4 | 
 5 | jobs:
 6 |   build_and_deploy:
 7 |     name: Create and publish package to PyPI
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     steps:
11 | 
12 |     - name: Checkout repository
13 |       uses: actions/checkout@v4
14 | 
15 |     - name: Set up Python
16 |       uses: actions/setup-python@v4
17 |       with:
18 |         python-version: '3.8'
19 | 
20 |     - name: Install dependencies
21 |       run: |
22 |         python -m pip install --upgrade pip
23 |         pip install setuptools wheel twine
24 | 
25 |     - name: Build and publish
26 |       env:
27 |         TWINE_USERNAME: __token__
28 |         TWINE_PASSWORD: ${{ secrets.PYIMZML_PYPI_API_TOKEN }}
29 |       run: |
30 |         python setup.py sdist bdist_wheel
31 |         twine upload dist/*
32 | 


--------------------------------------------------------------------------------
/docs/source/pyimzml/ImzMLParser.rst:
--------------------------------------------------------------------------------
 1 | ImzMLParser
 2 | ========================
 3 | 
 4 | .. toctree::
 5 | 
 6 | pyimzml.ImzMLParser module
 7 | --------------------------
 8 | 
 9 | .. automodule:: pyimzml.ImzMLParser
10 |     :members:
11 |     :undoc-members:
12 | 
13 | pyimzml.metadata module
14 | --------------------------
15 | 
16 | This module contains the data structures used for the
17 | :py:attr:`pyimzml.ImzMLParser.ImzMLParser.metadata`
18 | and :py:attr:`pyimzml.ImzMLParser.ImzMLParser.full_spectrum_metadata` fields.
19 | 
20 | .. automodule:: pyimzml.metadata
21 |    :members:
22 |    :undoc-members:
23 | 
24 | pyimzml.ontology module
25 | --------------------------
26 | 
27 | This module contains exports of the controlled vocabulary ontologies used by the ImzML format,
28 | used for ensuring that ImzML metadata items can always be accessed by their canonical names
29 | or accessions.
30 | 
31 | .. automodule:: pyimzml.ontology.ontology
32 |    :members:
33 |    :undoc-members:
34 | 


--------------------------------------------------------------------------------
/pyimzml/compression.py:
--------------------------------------------------------------------------------
 1 | import zlib
 2 | 
 3 | class NoCompression(object):
 4 |     """
 5 |     No compression.
 6 |     """
 7 |     def __init__(self):
 8 |         pass
 9 | 
10 |     def rounding(self, data):
11 |         return data
12 | 
13 |     def compress(self, bytes):
14 |         return bytes
15 | 
16 |     def decompress(self, bytes):
17 |         return bytes
18 | 
19 |     name = "no compression"
20 | 
21 | class ZlibCompression(object):
22 |     """
23 |     Zlib compression with optional rounding of values.
24 |     Rounding helps the compression, but is lossy.
25 | 
26 |     :param round_amt:
27 |         Number of digits after comma. None means no rounding.
28 |     """
29 |     def __init__(self, round_amt=None):
30 |         self.round_amt = round_amt
31 | 
32 |     def rounding(self, data):
33 |         if self.round_amt is not None:
34 |             return [round(x, self.round_amt) for x in data]
35 |         return data
36 | 
37 |     def compress(self, bytes):
38 |         return zlib.compress(bytes)
39 | 
40 |     def decompress(self, bytes):
41 |         return zlib.decompress(bytes)
42 | 
43 |     name = "zlib compression"
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | ### Python template
 2 | # Byte-compiled / optimized / DLL files
 3 | __pycache__/
 4 | *.py[cod]
 5 | *$py.class
 6 | 
 7 | # C extensions
 8 | *.so
 9 | 
10 | # Distribution / packaging
11 | .Python
12 | env/
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | 
28 | # PyInstaller
29 | #  Usually these files are written by a python script from a template
30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 | 
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 | 
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *,cover
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/build/
57 | 
58 | # PyBuilder
59 | target/
60 | 
61 | # IntelliJ project files
62 | .idea
63 | *.iml
64 | out
65 | gen
66 | # Created by .ignore support plugin (hsz.mobi)
67 | *.DS_Store
68 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to pyimzML documentation!
 2 | ===================================
 3 | 
 4 | This package provides a parser of imzML format as well as a simple imzML writer.
 5 | 
 6 | Typical usage pattern is as follows:
 7 | 
 8 | .. code-block:: python
 9 | 
10 |     from pyimzml.ImzMLParser import ImzMLParser
11 | 
12 |     p = ImzMLParser('Example.imzML')
13 |     my_spectra = []
14 |     for idx, (x,y,z) in enumerate(p.coordinates):
15 |         mzs, intensities = p.getspectrum(idx)
16 |         my_spectra.append([mzs, intensities, (x, y, z)])
17 |         # ...
18 | 
19 |     from pyimzml.ImzMLWriter import ImzMLWriter
20 | 
21 |     with ImzMLWriter('output.imzML', polarity='positive') as w:
22 |         for mzs, intensities, coords in my_spectra:
23 |             # writes data to the .ibd file
24 |             w.addSpectrum(mzs, intensities, coords)
25 |     # at this point imzML file is written and files are closed
26 | 
27 | 
28 | .. _api:
29 | 
30 | API Reference
31 | =============
32 | 
33 | .. toctree::
34 |     :caption: API Reference
35 |     :glob:
36 | 
37 |     pyimzml/*
38 | 
39 | :ref:`genindex`
40 | 
41 | :ref:`modindex`
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from pyimzml import __version__
 2 | from setuptools import setup, find_packages
 3 | 
 4 | setup(
 5 |     name='pyimzML',
 6 |     version=__version__,
 7 |     description="Parser for conversion of imzML 1.1.0 files",
 8 |     long_description="""
 9 | Parser for conversion of imzML 1.1.0 files. 
10 | See specification here: https://ms-imaging.org/wp-content/uploads/2009/08/specifications_imzML1.1.0_RC1.pdf. 
11 | Outputs data as python lists, dicts or numpy array.
12 | """,
13 |     # The project's main homepage.
14 |     url='https://github.com/alexandrovteam/pyimzML',
15 |     author='Alexandrov Team, EMBL',
16 |     author_email='theodore.alexandrov@embl.de',
17 | 
18 |     license='Apache 2.0',
19 |     # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
20 |     classifiers=[
21 |         'Development Status :: 5 - Production/Stable',
22 |         'Intended Audience :: Developers',
23 |         'Intended Audience :: Science/Research',
24 |         'Topic :: Scientific/Engineering :: Bio-Informatics',
25 |         'License :: OSI Approved :: Apache Software License',
26 | 
27 |         'Programming Language :: Python :: 3',
28 |         'Programming Language :: Python :: 3.8',
29 |     ],
30 |     keywords='bioinformatics imaging mass spectrometry parser imzML',
31 | 
32 |     packages=find_packages(exclude=('tests', 'docs')),
33 | 
34 |     install_requires=['numpy', 'wheezy.template'],
35 | )
36 | 


--------------------------------------------------------------------------------
/pyimzml/ontology/dump_obo_files.py:
--------------------------------------------------------------------------------
 1 | # This file is not intended for general use. Its purpose is to dump the .obo files that define
 2 | # the cvParam accession fields into a dependency-free format that can be bundled with pyimzml.
 3 | #
 4 | # It requires the additonal pip dependency obonet==0.2.6
 5 | import re
 6 | from collections import defaultdict
 7 | from datetime import datetime
 8 | from pprint import pformat
 9 | 
10 | 
11 | ontology_sources = [
12 |     ('ms', 'https://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo', ['MS']),
13 |     ('uo', 'https://raw.githubusercontent.com/bio-ontology-research-group/unit-ontology/master/unit.obo', ['UO']),
14 |     ('ims', 'https://raw.githubusercontent.com/imzML/imzML/f2c8b6ce2affa8d8eef74d4bfe5922c815ff4dff/imagingMS.obo', ['IMS']),
15 | ]
16 | 
17 | if __name__ == '__main__':
18 |     import obonet
19 | 
20 |     now = datetime.utcnow().isoformat()
21 | 
22 |     for ontology_name, src, namespaces in ontology_sources:
23 |         print(f'Parsing {ontology_name}')
24 |         graph = obonet.read_obo(src, ignore_obsolete=False)
25 |         terms = {}
26 |         enums = defaultdict(list)
27 |         for node_id in graph.nodes:
28 |             node = graph.nodes[node_id]
29 |             if any(node_id.startswith(ns) for ns in namespaces) and 'name' in node:
30 |                 dtype = None
31 |                 for xref in node.get('xref', []):
32 |                     m = re.match(r'^value-type:xsd\\:(\w+) ', xref)
33 |                     if m:
34 |                         dtype = 'xsd:' + m[1]
35 |                         break
36 | 
37 |                 terms[node_id] = (node['name'], dtype)
38 | 
39 |         with open(f'./{ontology_name}.py', 'wt') as f:
40 |             f.write('# DO NOT EDIT BY HAND\n')
41 |             f.write(f'# This file was autogenerated by dump_obo_files.py at {now}\n')
42 |             terms_repr = pformat(terms, indent=4, width=100)
43 |             f.write(f'terms = {terms_repr}\n')
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## 1.5.5 (2024-11-04)
 2 | * Add dataset descriptive statistics
 3 | 
 4 | ## 1.5.4 (2024-03-13)
 5 | * Add `ImzMLParser.spectrum_mode` field.
 6 | * Fix a bug in parsing `userParam` value.
 7 | * Replaced broken links in README file
 8 | * Updated GitHub Actions workflow
 9 | 
10 | ## 1.5.3 (2022-11-09)
11 | * Fixing a bug in the documentation.
12 | 
13 | ## 1.5.2 (2022-07-21)
14 | * Change url for imzML 1.1.0 specification.
15 | 
16 | ## 1.5.1 (2021-08-16)
17 | * Fix code that causes `SyntaxWarning` in Python 3.8+
18 | * Change `ImzmlWriter` to output "linescan left right" instead of "line scan left right", to match the ontology
19 | 
20 | ## 1.5.0 (2021-07-19)
21 | * Handle mismatched accession for "positive scan"
22 | * Default `ImzMLParser` to ElementTree if no `parse_lib` is specified
23 | * Add `ImzMLParser.polarity` field
24 | 
25 | ## 1.4.1 (2020-10-26)
26 | * Fixed new modules missing from package
27 | 
28 | ## 1.4.0 (2020-10-23)
29 | * Add support for parsing all ImzML metadata
30 |     * Global metadata is always included through `ImzMLParser.metadata`
31 |     * Per-spectrum metadata requires `include_spectra_metadata='full'` 
32 |       or `include_spectra_metadata=[... list of accessions]` to be passed to ImzMLParser.
33 | * Handle mismatched accessions for specifying data types of binary arrays
34 | 
35 | ## 1.3.0 (2019-05-24)
36 | * Add `PortableSpectrumReader`, which holds the minimal subset of `ImzMLParser` needed to read m/z and intensity
37 |   data from the .ibd file, and is able to be pickled. 
38 |   
39 | ## 1.2.6 (2019-04-23)
40 | * Changed `ImzMLParser.getspectrum` to return NumPy arrays instead of Python lists
41 | 
42 | ## 1.2.5 (2019-04-10)
43 | * Added `parse_lib` parameter to `ImzMLParser`, allowing ElementTree to be used instead of lxml
44 | 
45 | ## 1.2.4 (2019-01-23)
46 | * Support `MS:1000519` and `MS:1000522` accessions for specifying integer binary data types 
47 | 
48 | ## 1.2.3 (2018-07-02)
49 | * Support `ImzMLParser` detecting .ibd files with a case-insensitive search
50 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | pyimzML
 2 | =======
 3 | 
 4 | .. image:: https://readthedocs.org/projects/pyimzml/badge/?version=latest
 5 |     :target: http://pyimzml.readthedocs.org/en/latest/?badge=latest
 6 |     :alt: Documentation Status
 7 | 
 8 | Description
 9 | -----------
10 | A parser for the imzML format used in imaging mass spectrometry. See specification
11 | `here  <https://ms-imaging.org/wp-content/uploads/2009/08/specifications_imzML1.1.0_RC1.pdf>`_.
12 | Designed for use with imzML version 1.1.0. Outputs data as python lists and dicts.
13 | 
14 | The parser is developed by `Alexandrov Team <https://www.embl.org/groups/alexandrov/>`_ at EMBL Heidelberg.
15 | 
16 | Installation
17 | ------------
18 | pyimzML is available on `PyPI <https://pypi.python.org/pypi/pyimzML>`_. pyimzML
19 | should be installed with pip using one of these three options:
20 | 
21 | * ``$ pip install pyimzml`` will install pyimzML from PyPI (easiest).
22 | * ``$ pip install git+git://github.com/alexandrovteam/pyimzML.git`` will install pyimzML from github.
23 | * Download the source tarball from `PyPI <https://pypi.python.org/pypi/pyimzML>`_ and ``$ pip install pyimzml-x-x-x.tar.gz``
24 | 
25 | **Dependency Notes**
26 | 
27 | * pyimzML has an optional dependency to `lxml <http://lxml.de/index.html>`_. If lxml is not installed, pyimzML will instead use the built-in cElementTree or ElementTree package.
28 | 
29 | **Testing**
30 | 
31 | To test your installation of pyimzML, you can download sample data from `imzml.org <https://www.ms-imaging.org/imzml/example-files-test/>`_ and run the tests.
32 | 
33 | Attribution
34 | -----------
35 | 
36 | The pyimzml/ontology directory includes data derived from the following ontologies:
37 | 
38 | * `Units of measurement ontology <http://www.obofoundry.org/ontology/uo.html>`_ by George Gkoutos `CC-BY license <https://creativecommons.org/licenses/by/3.0/>`_
39 | * `Mass spectrometry ontology <http://www.obofoundry.org/ontology/ms.html>`_ by `Gerhard Mayer et al. <https://pubmed.ncbi.nlm.nih.gov/23482073/>`_ `CC-BY license <https://creativecommons.org/licenses/by/3.0/>`_
40 | * `Imaging MS controlled vocabulary <https://www.ms-imaging.org/imzml/controlled-vocabulary/>`_
41 | 
42 | Documentation
43 | -------------
44 | 
45 | Documentation is available on `ReadTheDocs <http://pyimzml.readthedocs.org/en/latest/?badge=latest>`_
46 | 


--------------------------------------------------------------------------------
/pyimzml/ontology/ontology.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from warnings import warn
  3 | 
  4 | from .uo import terms as uo_terms
  5 | from .ms import terms as ms_terms
  6 | from .ims import terms as ims_terms
  7 | 
  8 | all_terms = {}
  9 | all_terms.update(uo_terms)
 10 | all_terms.update(ms_terms)
 11 | all_terms.update(ims_terms)
 12 | 
 13 | DTYPE_MAPPING = {
 14 |     'xsd:string': str,
 15 |     'xsd:anyURI': str,
 16 |     'xsd:float': float,
 17 |     'xsd:double': float,
 18 |     'xsd:decimal': float,
 19 |     'xsd:nonNegativeFloat': float,
 20 |     'xsd:int': int,
 21 |     'xsd:integer': int,
 22 |     'xsd:positiveInteger': int,
 23 |     'xsd:nonNegativeInteger': int,
 24 |     'xsd:boolean': bool,
 25 |     'xsd:dateTime': datetime,
 26 | }
 27 | 
 28 | ACCESSION_FIX_MAPPING = {
 29 |     # Normally cvParam names will be updated to match the accession, but there are some
 30 |     # known cases where exporters use the correct name and incorrect accession. This is a mapping
 31 |     # of the known cases where the accession should be fixed, instead of the name.
 32 |     # (erroneous accession, name) -> fixed accession
 33 |     # Spectrum data types: https://github.com/alexandrovteam/pyimzML/pull/21#issuecomment-713818463
 34 |     ('MS:1000523', '32-bit float'): 'MS:1000521',
 35 |     ('MS:1000521', '64-bit float'): 'MS:1000523',
 36 |     # Polarity
 37 |     ('MS:1000128', 'positive scan'): 'MS:1000130'
 38 | }
 39 | 
 40 | 
 41 | def convert_xml_value(dtype, value):
 42 |     try:
 43 |         if dtype is not None:
 44 |             return DTYPE_MAPPING[dtype](value)
 45 |         elif value is None or value == '':
 46 |             # Many cv_params are flags and have either a None or empty-string value.
 47 |             # Replace their value with True in these cases, so their existence isn't so ambiguous.
 48 |             return True
 49 |         else:
 50 |             return value
 51 |     except KeyError:
 52 |         return value
 53 |     except ValueError:
 54 |         return None
 55 | 
 56 | 
 57 | def convert_term_name(accession):
 58 |     return all_terms.get(accession, (accession, None))[0]
 59 | 
 60 | 
 61 | def convert_cv_param(accession, value):
 62 |     """
 63 |     Looks up a term by accession number, and convert the provided value to the expected type.
 64 |     """
 65 |     name, dtype = all_terms.get(accession, (accession, None))
 66 |     converted_value = convert_xml_value(dtype, value)
 67 |     return converted_value
 68 | 
 69 | 
 70 | def lookup_and_convert_cv_param(accession, raw_name, value, unit_accession=None):
 71 |     """
 72 |     Looks up a term by accession number, and returns the term name, its value converted into
 73 |     the expected datatype, and the unit name (if a unit accession number is also given).
 74 |     """
 75 |     name, dtype = all_terms.get(accession, (raw_name or accession, None))
 76 |     converted_value = convert_xml_value(dtype, value)
 77 |     unit_name = all_terms.get(unit_accession, (unit_accession, None))[0]
 78 | 
 79 |     if accession not in all_terms:
 80 |         warn('Unrecognized accession in <cvParam>: %s (name: "%s").' % (accession, raw_name))
 81 |     elif name != raw_name:
 82 |         fixed_accession = ACCESSION_FIX_MAPPING.get((accession, raw_name))
 83 |         if fixed_accession is not None:
 84 |             warn(
 85 |                 'Accession %s ("%s") found with mismatched name "%s". '
 86 |                 'This is a known bug with some imzML conversion software - using accession '
 87 |                 '%s ("%s") instead.' % (accession, name, raw_name, fixed_accession, raw_name)
 88 |             )
 89 |             accession = fixed_accession
 90 |             name = raw_name
 91 |         else:
 92 |             warn(
 93 |                 'Accession %s found with incorrect name "%s". Updating name to "%s".'
 94 |                 % (accession, raw_name, name)
 95 |             )
 96 | 
 97 |     return accession, name, converted_value, unit_name
 98 | 
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/tests/test_basic.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import unittest
  3 | 
  4 | import numpy as np
  5 | from pathlib import Path
  6 | from .context import getspectrum
  7 | import pyimzml.ImzMLParser as imzmlp
  8 | import pyimzml.ImzMLWriter as imzmlw
  9 | 
 10 | # Example files from https://www.ms-imaging.org/imzml/example-files-test/
 11 | CONTINUOUS_IMZML_PATH = str(Path(__file__).parent / 'data/Example_Continuous.imzML')
 12 | CONTINUOUS_IBD_PATH = str(Path(__file__).parent / 'data/Example_Continuous.ibd')
 13 | PROCESSED_IMZML_PATH = str(Path(__file__).parent / 'data/Example_Processed.imzML')
 14 | PROCESSED_IBD_PATH = str(Path(__file__).parent / 'data/Example_Processed.ibd')
 15 | PARSE_LIB_TEST_CASES = ['lxml', 'ElementTree']
 16 | DATA_TEST_CASES = [
 17 |     ('Continuous', CONTINUOUS_IMZML_PATH, CONTINUOUS_IBD_PATH),
 18 |     ('Processed', PROCESSED_IMZML_PATH, PROCESSED_IBD_PATH),
 19 | ]
 20 | ALL_TEST_CASES = [(parse_lib, data_name, imzml_path, ibd_path)
 21 |                   for parse_lib in PARSE_LIB_TEST_CASES
 22 |                   for data_name, imzml_path, ibd_path in DATA_TEST_CASES]
 23 | 
 24 | 
 25 | class ImzMLParser(unittest.TestCase):
 26 |     def test_bisect(self):
 27 |         mzs = [100., 201.89, 201.99, 202.0, 202.01, 202.10000001, 400.]
 28 |         test_mz = 202.0
 29 |         test_tol = 0.1
 30 |         ix_l, ix_u = imzmlp._bisect_spectrum(mzs, test_mz, test_tol)
 31 |         assert ix_l == 2
 32 |         assert ix_u == 4
 33 |         assert ix_l <= ix_u
 34 |         assert mzs[ix_l] >= test_mz - test_tol
 35 |         assert mzs[ix_u] <= test_mz + test_tol
 36 | 
 37 |     def test_getspectrum(self):
 38 |         for parse_lib, data_name, imzml_path, ibd_path in ALL_TEST_CASES:
 39 |             with self.subTest(parse_lib=parse_lib, data=data_name),\
 40 |                  imzmlp.ImzMLParser(imzml_path, parse_lib=parse_lib) as parser:
 41 | 
 42 |                 mzs, ints = parser.getspectrum(4)
 43 | 
 44 |                 assert parser.polarity == 'negative'
 45 |                 assert parser.spectrum_mode == 'profile'
 46 |                 assert len(parser.coordinates) == 9
 47 |                 assert mzs.dtype == np.float32
 48 |                 assert ints.dtype == np.float32
 49 |                 assert len(mzs) == 8399
 50 |                 assert len(ints) == 8399
 51 |                 assert np.all(mzs > 100.0)
 52 |                 assert np.all(mzs < 800.0)
 53 |                 assert np.all(ints >= 0.0)
 54 |                 assert np.all(ints < 3.0)
 55 | 
 56 |     def test_files_instead_of_paths(self):
 57 |         for parse_lib, data_name, imzml_path, ibd_path in ALL_TEST_CASES:
 58 |             with self.subTest(parse_lib=parse_lib, data=data_name),\
 59 |                  open(imzml_path, 'rb') as imzml_file,\
 60 |                  open(ibd_path, 'rb') as ibd_file,\
 61 |                  imzmlp.ImzMLParser(imzml_file, parse_lib=parse_lib, ibd_file=ibd_file) as parser:
 62 | 
 63 |                 mzs, ints = parser.getspectrum(4)
 64 | 
 65 |                 assert len(parser.coordinates) == 9
 66 |                 assert len(mzs) > 0
 67 |                 assert len(ints) > 0
 68 | 
 69 |     def test_parse_metadata(self):
 70 |         for parse_lib, data_name, imzml_path, ibd_path in ALL_TEST_CASES:
 71 |             with self.subTest(parse_lib=parse_lib, data=data_name),\
 72 |                  imzmlp.ImzMLParser(imzml_path, parse_lib=parse_lib) as parser:
 73 |                 md = parser.metadata
 74 |                 # fileDescription section
 75 |                 assert md.file_description['MS:1000579'] == True
 76 |                 assert 'ibd SHA-1' in md.file_description
 77 |                 assert len(md.file_description.source_files) == 1
 78 |                 assert md.file_description.source_files['sf1']['Thermo RAW format'] == True
 79 |                 assert md.file_description.source_files['sf1'].attrs['name'] == 'Example.raw'
 80 |                 assert len(md.file_description.contacts) == 1
 81 | 
 82 |                 # referenceableParamGroupList section
 83 |                 assert len(md.referenceable_param_groups) == 4
 84 |                 assert md.referenceable_param_groups['scan1']['increasing m/z scan']
 85 | 
 86 |                 # sampleList section
 87 |                 assert len(md.samples) == 1
 88 |                 assert md.samples['sample1']['sample number'] == '1'
 89 | 
 90 |                 # softwareList section
 91 |                 assert len(md.softwares) == 2
 92 |                 assert md.softwares['Xcalibur']['Xcalibur']
 93 | 
 94 |                 # scanSettingsList section
 95 |                 assert len(md.scan_settings) == 1
 96 |                 assert md.scan_settings['scansettings1']['pixel size (x)'] == 100.0
 97 | 
 98 |                 # instrumentConfigurationList section
 99 |                 assert len(md.instrument_configurations) == 1
100 |                 ic = md.instrument_configurations['LTQFTUltra0']
101 |                 assert ic.param_by_name['instrument serial number'] == 'none'
102 |                 assert len(ic.components) == 3
103 |                 assert ic.components[0].type == 'source'
104 |                 assert ic.components[1].type == 'analyzer'
105 |                 assert ic.components[2].type == 'detector'
106 |                 assert ic.software_ref == 'Xcalibur'
107 | 
108 |                 # dataProcessingList section
109 |                 assert len(md.data_processings) == 2
110 |                 assert md.data_processings['XcaliburProcessing'].methods[0].attrs['softwareRef'] == 'Xcalibur'
111 |                 assert md.data_processings['XcaliburProcessing'].methods[0]['low intensity data point removal']
112 | 
113 |     def test_parse_full_spectrum_metadata(self):
114 |         for parse_lib, data_name, imzml_path, ibd_path in ALL_TEST_CASES:
115 |             with self.subTest(parse_lib=parse_lib, data=data_name),\
116 |                  imzmlp.ImzMLParser(imzml_path, parse_lib=parse_lib, include_spectra_metadata='full') as parser:
117 |                 assert len(parser.spectrum_full_metadata) == len(parser.coordinates)
118 |                 spectrum = parser.spectrum_full_metadata[0]
119 |                 assert spectrum['ms level'] == 0  # comes from referenceable param group
120 |                 assert spectrum['total ion current'] > 100
121 |                 assert spectrum.scan_list_params['no combination']
122 |                 assert spectrum.scans[0].attrs['instrumentConfigurationRef'] == 'LTQFTUltra0'
123 |                 assert spectrum.scans[0]['position x'] == 1
124 |                 assert 'm/z array' in spectrum.binary_data_arrays[0]
125 |                 assert 'intensity array' in spectrum.binary_data_arrays[1]
126 | 
127 |     def test_parse_partial_spectrum_metadata(self):
128 |         TIC, POS_X, EXT_LEN, INVALID = 'MS:1000285', 'IMS:1000050', 'IMS:1000104', 'INVALID'
129 |         ACCESSIONS = [TIC, POS_X, EXT_LEN, INVALID]
130 |         for parse_lib, data_name, imzml_path, ibd_path in ALL_TEST_CASES:
131 |             with self.subTest(parse_lib=parse_lib, data=data_name),\
132 |                  imzmlp.ImzMLParser(imzml_path, parse_lib=parse_lib, include_spectra_metadata=ACCESSIONS) as parser:
133 | 
134 |                 assert len(parser.spectrum_metadata_fields[TIC]) == len(parser.coordinates)
135 |                 assert len(parser.spectrum_metadata_fields[POS_X]) == len(parser.coordinates)
136 |                 assert len(parser.spectrum_metadata_fields[EXT_LEN]) == len(parser.coordinates)
137 |                 assert len(parser.spectrum_metadata_fields[INVALID]) == len(parser.coordinates)
138 | 
139 |                 assert all(tic > 100 for tic in parser.spectrum_metadata_fields[TIC])
140 |                 assert all(isinstance(pos_x, int) for pos_x in parser.spectrum_metadata_fields[POS_X])
141 |                 assert all(isinstance(ext_len, int) for ext_len in parser.spectrum_metadata_fields[EXT_LEN])
142 |                 assert all(invalid is None for invalid in parser.spectrum_metadata_fields[INVALID])
143 | 
144 | 
145 | class PortableSpectrumReader(unittest.TestCase):
146 |     def test_read_file(self):
147 |         spectrum_idx = 4
148 |         for parse_lib, data_name, imzml_path, ibd_path in ALL_TEST_CASES:
149 |             with self.subTest(parse_lib=parse_lib, data=data_name),\
150 |                  imzmlp.ImzMLParser(imzml_path, parse_lib=parse_lib) as normal_parser,\
151 |                  open(ibd_path, 'rb') as ibd_file:
152 | 
153 |                 normal_mzs, normal_ints = normal_parser.getspectrum(spectrum_idx)
154 | 
155 |                 detached_parser = imzmlp.ImzMLParser(imzml_path, parse_lib=parse_lib, ibd_file=None)
156 |                 portable_reader = detached_parser.portable_spectrum_reader()
157 |                 # Pickle and unpickle to ensure it survives for its intended use case
158 |                 portable_reader = pickle.loads(pickle.dumps(portable_reader))
159 |                 portable_mzs, portable_ints = portable_reader.read_spectrum_from_file(ibd_file, spectrum_idx)
160 | 
161 |                 assert np.all(normal_mzs == portable_mzs)
162 |                 assert np.all(normal_ints == portable_ints)
163 | 
164 | 
165 | class ImzMLWriter(unittest.TestCase):
166 |     def test_simple_write(self):
167 |         mzs = np.linspace(100,1000,20)
168 |         ints = np.random.rand(mzs.shape[0])
169 |         coords = [1,1,1]
170 |         with imzmlw.ImzMLWriter("test.mzML", mode="processed") as imzml:
171 |             imzml.addSpectrum(mzs, ints, coords=coords)
172 | 
173 | 
174 | if __name__ == '__main__':
175 |     unittest.main()
176 | 


--------------------------------------------------------------------------------
/pyimzml/ontology/ims.py:
--------------------------------------------------------------------------------
  1 | # DO NOT EDIT BY HAND
  2 | # This file was autogenerated by dump_obo_files.py at 2020-10-21T18:55:01.621812
  3 | terms = {   'IMS:0000000': ('Imaging Mass Spectrometry Ontology', None),
  4 |     'IMS:1000001': ('ibd offset handle', None),
  5 |     'IMS:1000002': ('sample stage', None),
  6 |     'IMS:1000003': ('ibd binary type', None),
  7 |     'IMS:1000004': ('image parameter', None),
  8 |     'IMS:1000005': ('spectrum position', None),
  9 |     'IMS:1000007': ('ibd file', None),
 10 |     'IMS:1000008': ('ibd identification', None),
 11 |     'IMS:1000009': ('ibd checksum', None),
 12 |     'IMS:1000010': ('scan', None),
 13 |     'IMS:1000011': ('probe scan mode', None),
 14 |     'IMS:1000012': ('imaging ion source', None),
 15 |     'IMS:1000013': ('unit', None),
 16 |     'IMS:1000014': ('ibd data type', None),
 17 |     'IMS:1000015': ('charge density', None),
 18 |     'IMS:1000030': ('continuous', None),
 19 |     'IMS:1000031': ('processed', None),
 20 |     'IMS:1000040': ('linescan sequence', None),
 21 |     'IMS:1000041': ('scan pattern', None),
 22 |     'IMS:1000042': ('max count of pixels x', 'xsd:nonNegativeInteger'),
 23 |     'IMS:1000043': ('max count of pixels y', 'xsd:nonNegativeInteger'),
 24 |     'IMS:1000044': ('max dimension x', 'xsd:nonNegativeInteger'),
 25 |     'IMS:1000045': ('max dimension y', 'xsd:nonNegativeInteger'),
 26 |     'IMS:1000046': ('pixel size (x)', 'xsd:float'),
 27 |     'IMS:1000047': ('pixel size y', 'xsd:float'),
 28 |     'IMS:1000048': ('scan type', None),
 29 |     'IMS:1000049': ('line scan direction', None),
 30 |     'IMS:1000050': ('position x', 'xsd:nonNegativeInteger'),
 31 |     'IMS:1000051': ('position y', 'xsd:nonNegativeInteger'),
 32 |     'IMS:1000052': ('position z', 'xsd:nonNegativeInteger'),
 33 |     'IMS:1000053': ('absolute position offset x', 'xsd:nonNegativeFloat'),
 34 |     'IMS:1000054': ('absolute position offset y', 'xsd:nonNegativeFloat'),
 35 |     'IMS:1000055': ('subimage position x', 'xsd:nonNegativeInteger'),
 36 |     'IMS:1000056': ('subimage position y', 'xsd:nonNegativeInteger'),
 37 |     'IMS:1000057': ('subimage position z', 'xsd:nonNegativeInteger'),
 38 |     'IMS:1000070': ('external binary uri', 'xsd:string'),
 39 |     'IMS:1000080': ('universally unique identifier', 'xsd:string'),
 40 |     'IMS:1000090': ('ibd MD5', 'xsd:string'),
 41 |     'IMS:1000091': ('ibd SHA-1', 'xsd:string'),
 42 |     'IMS:1000101': ('external data', 'xsd:boolean'),
 43 |     'IMS:1000102': ('external offset', 'xsd:nonNegativeInteger'),
 44 |     'IMS:1000103': ('external array length', 'xsd:nonNegativeInteger'),
 45 |     'IMS:1000104': ('external encoded length', 'xsd:nonNegativeInteger'),
 46 |     'IMS:1000110': ('pixel mode', None),
 47 |     'IMS:1000111': ('raster mode', None),
 48 |     'IMS:1000112': ('stigmatic mode', None),
 49 |     'IMS:1000120': ('SIMS parameter', None),
 50 |     'IMS:1000121': ('DESI parameter', None),
 51 |     'IMS:1000130': ('ions per square centimeter', None),
 52 |     'IMS:1000131': ('milliliters per minute', None),
 53 |     'IMS:1000141': ('32-bit integer', None),
 54 |     'IMS:1000142': ('64-bit integer', None),
 55 |     'IMS:1000199': ('sample stage attribute', None),
 56 |     'IMS:1000200': ('position accuracy', 'xsd:float'),
 57 |     'IMS:1000201': ('step size', 'xsd:float'),
 58 |     'IMS:1000202': ('target material', 'xsd:string'),
 59 |     'IMS:1000203': ('stage scan speed', 'xsd:float'),
 60 |     'IMS:1000400': ('bottom up', None),
 61 |     'IMS:1000401': ('top down', None),
 62 |     'IMS:1000402': ('left right', None),
 63 |     'IMS:1000403': ('right left', None),
 64 |     'IMS:1000404': ('no direction', None),
 65 |     'IMS:1000410': ('meandering', None),
 66 |     'IMS:1000411': ('one way', None),
 67 |     'IMS:1000412': ('random access', None),
 68 |     'IMS:1000413': ('flyback', None),
 69 |     'IMS:1000480': ('horizontal line scan', None),
 70 |     'IMS:1000481': ('vertical line scan', None),
 71 |     'IMS:1000490': ('linescan right left', None),
 72 |     'IMS:1000491': ('linescan left right', None),
 73 |     'IMS:1000492': ('linescan bottom up', None),
 74 |     'IMS:1000493': ('linescan top down', None),
 75 |     'IMS:1000500': ('conversion to imzML', None),
 76 |     'IMS:1000501': ('imzMLParser', None),
 77 |     'IMS:1000502': ('imzMLConverter', None),
 78 |     'IMS:1000503': ('imzMLValidator', None),
 79 |     'IMS:1000504': ('SpectralAnalysis', None),
 80 |     'IMS:1001201': ('primary ion gun species', None),
 81 |     'IMS:1001202': ('beam energy', 'xsd:float'),
 82 |     'IMS:1001203': ('beam current', 'xsd:float'),
 83 |     'IMS:1001204': ('cycle time', 'xsd:float'),
 84 |     'IMS:1001205': ('time resolution', 'xsd:float'),
 85 |     'IMS:1001206': ('polarity', None),
 86 |     'IMS:1001207': ('primary ion dose density', 'xsd:float'),
 87 |     'IMS:1001211': ('solvent', None),
 88 |     'IMS:1001212': ('spray voltage', 'xsd:float'),
 89 |     'IMS:1001213': ('solvent flowrate', 'xsd:float'),
 90 |     'IMS:1001500': ('sample type', None),
 91 |     'IMS:1001510': ('inorganic sample', None),
 92 |     'IMS:1001520': ('organic sample', None),
 93 |     'IMS:1001521': ('biological sample', None),
 94 |     'IMS:1001522': ('clinical sample', None),
 95 |     'IMS:1001523': ('pathological sample', None),
 96 |     'IMS:1001524': ('food sample', 'xsd:string'),
 97 |     'IMS:1001525': ('bacteria sample', 'xsd:string'),
 98 |     'IMS:1001600': ('sample origin attribute', None),
 99 |     'IMS:1001601': ('sample ethical approval', 'xsd:string'),
100 |     'IMS:1001602': ('sample origin institution', 'xsd:string'),
101 |     'IMS:1002000': ('analysed sample portion', None),
102 |     'IMS:1002001': ('sectioned sample', None),
103 |     'IMS:1002002': ('whole sample', None),
104 |     'IMS:1002003': ('blockface sample', None),
105 |     'IMS:1002005': ('sampling method', 'xsd:string'),
106 |     'IMS:1002010': ('sample storage condition', None),
107 |     'IMS:1002011': ('fixed', 'xsd:string'),
108 |     'IMS:1002012': ('fresh frozen', 'xsd:string'),
109 |     'IMS:1002013': ('embedded', 'xsd:string'),
110 |     'IMS:1002014': ('sample storage attribute', None),
111 |     'IMS:1002015': ('sample storage time before sectioning', 'xsd:float'),
112 |     'IMS:1002016': ('section storage time after sectioning and before analysis', 'xsd:float'),
113 |     'IMS:1002017': ('sample storage temperature', 'xsd:float'),
114 |     'IMS:1002018': ('freezing method', 'xsd:string'),
115 |     'IMS:1002019': ('flash frozen', None),
116 |     'IMS:1002020': ('sample storage time before analysis', 'xsd:float'),
117 |     'IMS:1002021': ('sample storage method', 'xsd:string'),
118 |     'IMS:1002050': ('stabilisation', None),
119 |     'IMS:1002051': ('no stabilisation performed', None),
120 |     'IMS:1002052': ('stabilisation method', 'xsd:string'),
121 |     'IMS:1002053': ('rapid heating stabilisation', None),
122 |     'IMS:1002054': ('focused microwave irradiation stabilisation', None),
123 |     'IMS:1002100': ('sectioning method', None),
124 |     'IMS:1002101': ('microtome sectioning', 'xsd:string'),
125 |     'IMS:1002102': ('microtome model', 'xsd:string'),
126 |     'IMS:1002103': ('sectioning attribute', None),
127 |     'IMS:1002104': ('cutting temperature', 'xsd:float'),
128 |     'IMS:1002105': ('cutting thickness', 'xsd:float'),
129 |     'IMS:1002106': ('blade sectioning', 'xsd:string'),
130 |     'IMS:1002107': ('section thickness', 'xsd:float'),
131 |     'IMS:1002200': ('mounting method', 'xsd:string'),
132 |     'IMS:1002201': ('thaw mounting', None),
133 |     'IMS:1002202': ('tape mounting', None),
134 |     'IMS:1002300': ('sample drying', 'xsd:string'),
135 |     'IMS:1002301': ('drying method attribute', None),
136 |     'IMS:1002302': ('drying time', 'xsd:float'),
137 |     'IMS:1002303': ('no drying performed', None),
138 |     'IMS:1002304': ('sample drying method', 'xsd:string'),
139 |     'IMS:1002400': ('sample washing', None),
140 |     'IMS:1002401': ('no washing performed', None),
141 |     'IMS:1002402': ('sample washing method', 'xsd:string'),
142 |     'IMS:1002500': ('on-sample chemistry', None),
143 |     'IMS:1002501': ('no on-sample chemistry performed', None),
144 |     'IMS:1002502': ('on-sample chemistry method', 'xsd:string'),
145 |     'IMS:1002503': ('on-sample chemistry attribute', None),
146 |     'IMS:1002504': ('on-sample chemistry reagent', 'xsd:string'),
147 |     'IMS:1002600': ('in-experiment quantification', None),
148 |     'IMS:1002601': ('no in-experiment quantification performed', None),
149 |     'IMS:1002602': ('in-experiment quantification method', 'xsd:string'),
150 |     'IMS:1002603': ('internal standard quantification', None),
151 |     'IMS:1002604': ('adjacent dilution series quantification', None),
152 |     'IMS:1002605': ('sprayed-on standard quantification', None),
153 |     'IMS:1003000': ('spraying method', None),
154 |     'IMS:1003001': ('automated spraying of matrix', None),
155 |     'IMS:1003002': ('manual spraying of matrix', None),
156 |     'IMS:1003003': ('automated spraying device', 'xsd:string'),
157 |     'IMS:1003004': ('automated sprayer attribute', None),
158 |     'IMS:1003005': ('automated sprayer nozzle movement speed', 'xsd:float'),
159 |     'IMS:1003006': ('automated sprayer flow-rate', 'xsd:float'),
160 |     'IMS:1003007': ('automated sprayer nozzle temperature', 'xsd:float'),
161 |     'IMS:1005001': ('xz compression', None),
162 |     'IMS:1005002': ('lz4 compression', None),
163 |     'IMS:1005003': ('zstd compression', None),
164 |     'IMS:1006000': ('repetition rate', 'xsd:float'),
165 |     'IMS:1006001': ('laser shots per spectrum', 'xsd:float'),
166 |     'IMS:1006002': ('m/z at which resolution was measured', 'xsd:float'),
167 |     'IMS:1006003': ('postmortem time', 'xsd:float'),
168 |     'IMS:1006004': ('age', 'xsd:float'),
169 |     'IMS:1006005': ('sample species', 'xsd:string'),
170 |     'IMS:1006006': ('sample organ', 'xsd:string'),
171 |     'IMS:1006007': ('sample condition', 'xsd:string'),
172 |     'IMS:1006008': ('optical image location', 'xsd:string'),
173 |     'IMS:1006009': ('optical image attribute', None),
174 |     'IMS:1006010': ('optical image subject', None),
175 |     'IMS:1006011': ('optical image of analysed sample', 'xsd:string'),
176 |     'IMS:1006012': ('optical image of adjacent section of analysed sample', 'xsd:string'),
177 |     'IMS:1006013': ('sample morphological classification', 'xsd:string'),
178 |     'IMS:1006014': ('sampling location', 'xsd:string'),
179 |     'IMS:1006015': ('staining method used for optical image', 'xsd:string'),
180 |     'IMS:1006016': ('ion source model', 'xsd:string'),
181 |     'IMS:1006017': ('method used to align optical image', 'xsd:string'),
182 |     'IMS:1100000': ('8-bit integer', None),
183 |     'IMS:1100001': ('16-bit integer', None)}
184 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # SM_distributed documentation build configuration file, created by
  4 | # sphinx-quickstart on Tue Feb  9 15:19:18 2016.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import os
 16 | import sys
 17 | 
 18 | import sphinx_rtd_theme
 19 | 
 20 | sys.path.append('..')
 21 | sys.path.append('../..')
 22 | 
 23 | from pyimzml import __version__  # noqa
 24 | 
 25 | # If extensions (or modules to document with autodoc) are in another directory,
 26 | # add these directories to sys.path here. If the directory is relative to the
 27 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 28 | sys.path.insert(0, os.path.abspath('../..'))
 29 | 
 30 | # -- General configuration ------------------------------------------------
 31 | 
 32 | # If your documentation needs a minimal Sphinx version, state it here.
 33 | # needs_sphinx = '1.0'
 34 | 
 35 | # Add any Sphinx extension module names here, as strings. They can be
 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 37 | # ones.
 38 | extensions = [
 39 |     'sphinx.ext.autodoc',
 40 |     'sphinx.ext.napoleon',
 41 |     'sphinx.ext.viewcode',
 42 |     'sphinx_rtd_theme',
 43 |     'sphinx_autodoc_typehints',
 44 |     'sphinx.ext.autosummary',
 45 | ]
 46 | 
 47 | # source_parsers = {
 48 | #     '.md': CommonMarkParser,
 49 | # }
 50 | 
 51 | # Add any paths that contain templates here, relative to this directory.
 52 | templates_path = ['_templates']
 53 | 
 54 | # The suffix(es) of source filenames.
 55 | # You can specify multiple suffix as a list of string:
 56 | # source_suffix = ['.rst', '.md']
 57 | source_suffix = '.rst'
 58 | 
 59 | # The encoding of source files.
 60 | # source_encoding = 'utf-8-sig'
 61 | 
 62 | # The master toctree document.
 63 | master_doc = 'index'
 64 | 
 65 | # General information about the project.
 66 | project = u'pyimzML'
 67 | copyright = u'2016, Alexandrov Team'
 68 | author = u'Alexandrov Team'
 69 | 
 70 | # The version info for the project you're documenting, acts as replacement for
 71 | # |version| and |release|, also used in various other places throughout the
 72 | # built documents.
 73 | #
 74 | # The short X.Y version.
 75 | version = __version__
 76 | # The full version, including alpha/beta/rc tags.
 77 | release = __version__
 78 | 
 79 | # The language for content autogenerated by Sphinx. Refer to documentation
 80 | # for a list of supported languages.
 81 | #
 82 | # This is also used if you do content translation via gettext catalogs.
 83 | # Usually you set "language" from the command line for these cases.
 84 | language = None
 85 | 
 86 | # There are two options for replacing |today|: either, you set today to some
 87 | # non-false value, then it is used:
 88 | # today = ''
 89 | # Else, today_fmt is used as the format for a strftime call.
 90 | # today_fmt = '%B %d, %Y'
 91 | 
 92 | # List of patterns, relative to source directory, that match files and
 93 | # directories to ignore when looking for source files.
 94 | exclude_patterns = ['docs', 'tests', 'setup']
 95 | 
 96 | # The reST default role (used for this markup: `text`) to use for all
 97 | # documents.
 98 | # default_role = None
 99 | 
100 | # If true, '()' will be appended to :func: etc. cross-reference text.
101 | # add_function_parentheses = True
102 | 
103 | # If true, the current module name will be prepended to all description
104 | # unit titles (such as .. function::).
105 | # add_module_names = True
106 | 
107 | # If true, sectionauthor and moduleauthor directives will be shown in the
108 | # output. They are ignored by default.
109 | # show_authors = False
110 | 
111 | # The name of the Pygments (syntax highlighting) style to use.
112 | pygments_style = 'sphinx'
113 | 
114 | # A list of ignored prefixes for module index sorting.
115 | # modindex_common_prefix = []
116 | 
117 | # If true, keep warnings as "system message" paragraphs in the built documents.
118 | # keep_warnings = False
119 | 
120 | # If true, `todo` and `todoList` produce output, else they produce nothing.
121 | todo_include_todos = False
122 | 
123 | # -- Options for HTML output ----------------------------------------------
124 | 
125 | # The theme to use for HTML and HTML Help pages.  See the documentation for
126 | # a list of builtin themes.
127 | html_theme = 'sphinx_rtd_theme'
128 | 
129 | # Theme options are theme-specific and customize the look and feel of a theme
130 | # further.  For a list of options available for each theme, see the
131 | # documentation.
132 | # html_theme_options = {}
133 | 
134 | # Add any paths that contain custom themes here, relative to this directory.
135 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
136 | 
137 | # The name for this set of Sphinx documents.  If None, it defaults to
138 | # "<project> v<release> documentation".
139 | # html_title = None
140 | 
141 | # A shorter title for the navigation bar.  Default is the same as html_title.
142 | # html_short_title = None
143 | 
144 | # The name of an image file (relative to this directory) to place at the top
145 | # of the sidebar.
146 | # html_logo = None
147 | 
148 | # The name of an image file (within the static path) to use as favicon of the
149 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
150 | # pixels large.
151 | # html_favicon = None
152 | 
153 | # Add any paths that contain custom static files (such as style sheets) here,
154 | # relative to this directory. They are copied after the builtin static files,
155 | # so a file named "default.css" will overwrite the builtin "default.css".
156 | # html_static_path = ['_static']
157 | 
158 | # Add any extra paths that contain custom files (such as robots.txt or
159 | # .htaccess) here, relative to this directory. These files are copied
160 | # directly to the root of the documentation.
161 | # html_extra_path = []
162 | 
163 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
164 | # using the given strftime format.
165 | # html_last_updated_fmt = '%b %d, %Y'
166 | 
167 | # If true, SmartyPants will be used to convert quotes and dashes to
168 | # typographically correct entities.
169 | # html_use_smartypants = True
170 | 
171 | # Custom sidebar templates, maps document names to template names.
172 | # html_sidebars = {}
173 | 
174 | # Additional templates that should be rendered to pages, maps page names to
175 | # template names.
176 | # html_additional_pages = {}
177 | 
178 | # If false, no module index is generated.
179 | html_domain_indices = True
180 | 
181 | # If false, no index is generated.
182 | # html_use_index = True
183 | 
184 | # If true, the index is split into individual pages for each letter.
185 | # html_split_index = False
186 | 
187 | # If true, links to the reST sources are added to the pages.
188 | # html_show_sourcelink = True
189 | 
190 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
191 | # html_show_sphinx = True
192 | 
193 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
194 | # html_show_copyright = True
195 | 
196 | # If true, an OpenSearch description file will be output, and all pages will
197 | # contain a <link> tag referring to it.  The value of this option must be the
198 | # base URL from which the finished HTML is served.
199 | # html_use_opensearch = ''
200 | 
201 | # This is the file name suffix for HTML files (e.g. ".xhtml").
202 | # html_file_suffix = None
203 | 
204 | # Language to be used for generating the HTML full-text search index.
205 | # Sphinx supports the following languages:
206 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
207 | #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
208 | # html_search_language = 'en'
209 | 
210 | # A dictionary with options for the search language support, empty by default.
211 | # Now only 'ja' uses this config value
212 | # html_search_options = {'type': 'default'}
213 | 
214 | # The name of a javascript file (relative to the configuration directory) that
215 | # implements a search results scorer. If empty, the default will be used.
216 | # html_search_scorer = 'scorer.js'
217 | 
218 | # Output file base name for HTML help builder.
219 | htmlhelp_basename = 'pyimzMLdoc'
220 | 
221 | # -- Options for LaTeX output ---------------------------------------------
222 | 
223 | latex_elements = {
224 |     # The paper size ('letterpaper' or 'a4paper').
225 |     # 'papersize': 'letterpaper',
226 | 
227 |     # The font size ('10pt', '11pt' or '12pt').
228 |     # 'pointsize': '10pt',
229 | 
230 |     # Additional stuff for the LaTeX preamble.
231 |     # 'preamble': '',
232 | 
233 |     # Latex figure (float) alignment
234 |     # 'figure_align': 'htbp',
235 | }
236 | 
237 | # Grouping the document tree into LaTeX files. List of tuples
238 | # (source start file, target name, title,
239 | #  author, documentclass [howto, manual, or own class]).
240 | latex_documents = [
241 |     (master_doc, 'pyimzML.tex', u'pyimzML Documentation',
242 |      u'Alexandrov Team', 'manual'),
243 | ]
244 | 
245 | # The name of an image file (relative to this directory) to place at the top of
246 | # the title page.
247 | # latex_logo = None
248 | 
249 | # For "manual" documents, if this is true, then toplevel headings are parts,
250 | # not chapters.
251 | # latex_use_parts = False
252 | 
253 | # If true, show page references after internal links.
254 | # latex_show_pagerefs = False
255 | 
256 | # If true, show URL addresses after external links.
257 | # latex_show_urls = False
258 | 
259 | # Documents to append as an appendix to all manuals.
260 | # latex_appendices = []
261 | 
262 | # If false, no module index is generated.
263 | latex_domain_indices = True
264 | 
265 | # -- Options for manual page output ---------------------------------------
266 | 
267 | # One entry per manual page. List of tuples
268 | # (source start file, name, description, authors, manual section).
269 | man_pages = [
270 |     (master_doc, 'pyimzml', u'pyimzML Documentation',
271 |      [author], 1)
272 | ]
273 | 
274 | # If true, show URL addresses after external links.
275 | # man_show_urls = False
276 | 
277 | 
278 | # -- Options for Texinfo output -------------------------------------------
279 | 
280 | # Grouping the document tree into Texinfo files. List of tuples
281 | # (source start file, target name, title, author,
282 | #  dir menu entry, description, category)
283 | texinfo_documents = [
284 |     (master_doc, 'pyimzML', u'pyimzML Documentation',
285 |      author, 'pyimzML', 'One line description of project.',
286 |      'Miscellaneous'),
287 | ]
288 | 
289 | # Documents to append as an appendix to all manuals.
290 | # texinfo_appendices = []
291 | 
292 | # If false, no module index is generated.
293 | texinfo_domain_indices = True
294 | 
295 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
296 | # texinfo_show_urls = 'footnote'
297 | 
298 | # If true, do not generate a @detailmenu in the "Top" node's menu.
299 | # texinfo_no_detailmenu = False
300 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 


--------------------------------------------------------------------------------
/pyimzml/metadata.py:
--------------------------------------------------------------------------------
  1 | from warnings import warn
  2 | 
  3 | from pyimzml.ontology.ontology import lookup_and_convert_cv_param, convert_xml_value, convert_term_name
  4 | 
  5 | XMLNS_PREFIX = "{http://psi.hupo.org/ms/mzml}"
  6 | 
  7 | 
  8 | def _deep_pretty(obj):
  9 |     if isinstance(obj, list):
 10 |         return [_deep_pretty(item) for item in obj]
 11 |     if isinstance(obj, dict):
 12 |         return {k: _deep_pretty(v) for k, v in obj.items()}
 13 |     if hasattr(obj, 'pretty'):
 14 |         return obj.pretty()
 15 |     return obj
 16 | 
 17 | 
 18 | class _ParseUtils:
 19 |     """
 20 |     Utility class for common parsing patterns and tracking created param groups so that
 21 |     their refs to other param groups can later be linked up.
 22 |     """
 23 |     def __init__(self):
 24 |         self.created_param_groups = []
 25 | 
 26 |     def param_group(self, node, **extra_fields):
 27 |         pg = ParamGroup(node, **extra_fields)
 28 |         self.created_param_groups.append(pg)
 29 |         return pg
 30 | 
 31 |     def optional_param_group(self, parent_node, xpath, **extra_fields):
 32 |         node = parent_node.find(xpath.format(XMLNS_PREFIX))
 33 |         return self.param_group(node, **extra_fields) if node is not None else None
 34 | 
 35 |     def param_groups_by_id(self, parent_node, xpath):
 36 |         return dict(
 37 |             (n.get('id', idx), self.param_group(n))
 38 |             for idx, n in enumerate(parent_node.findall(xpath.format(XMLNS_PREFIX)))
 39 |         )
 40 | 
 41 |     def param_groups_list(self, parent_node, xpath):
 42 |         return [self.param_group(n) for n in parent_node.findall(xpath.format(XMLNS_PREFIX))]
 43 | 
 44 |     def refs_list(self, parent_node, xpath):
 45 |         return [n.attrib.get('ref') for n in parent_node.findall(xpath.format(XMLNS_PREFIX))]
 46 | 
 47 | 
 48 | class Metadata:
 49 |     def __init__(self, root):
 50 |         """
 51 |         Parse metadata headers from an imzML file into a structured format for easy access in Python code.
 52 |         This class deliberately excludes spectra, as they account for significantly more memory use
 53 |         and parsing time, and typically should be treated separately.
 54 |         """
 55 |         pu = _ParseUtils()
 56 | 
 57 |         fd_node = root.find('{0}fileDescription'.format(XMLNS_PREFIX))
 58 |         self.file_description = pu.param_group(
 59 |             fd_node.find('{0}fileContent'.format(XMLNS_PREFIX)),
 60 |             source_files=pu.param_groups_by_id(fd_node, '{0}sourceFileList/{0}sourceFile'),
 61 |             contacts=pu.param_groups_list(fd_node, '{0}contact'),
 62 |         )
 63 | 
 64 |         self.referenceable_param_groups = pu.param_groups_by_id(
 65 |             root,
 66 |             '{0}referenceableParamGroupList/{0}referenceableParamGroup'
 67 |         )
 68 |         self.samples = pu.param_groups_by_id(root, '{0}sampleList/{0}sample')
 69 |         self.softwares = pu.param_groups_by_id(root, '{0}softwareList/{0}software')
 70 | 
 71 |         self.scan_settings = {}
 72 |         for node in root.findall('{0}scanSettingsList/{0}scanSettings'.format(XMLNS_PREFIX)):
 73 |             self.scan_settings[node.get('id')] = pu.param_group(
 74 |                 node,
 75 |                 source_file_refs=pu.refs_list(node, '{0}sourceFileRefList/{0}sourceFileRef'),
 76 |                 targets=pu.param_groups_by_id(node, '{0}targetList/{0}target'),
 77 |             )
 78 | 
 79 |         self.instrument_configurations = {}
 80 |         for node in root.findall('{0}instrumentConfigurationList/{0}instrumentConfiguration'.format(XMLNS_PREFIX)):
 81 |             self.instrument_configurations[node.get('id')] = pu.param_group(
 82 |                 node,
 83 |                 components=pu.param_groups_list(node, '{0}componentList/*'),
 84 |                 software_ref=next(iter(pu.refs_list(node, '{0}softwareRef')), None),
 85 |             )
 86 | 
 87 |         self.data_processings = {}
 88 |         for node in root.findall('{0}dataProcessingList/{0}dataProcessing'.format(XMLNS_PREFIX)):
 89 |             self.data_processings[node.get('id')] = pu.param_group(
 90 |                 node,
 91 |                 methods=pu.param_groups_list(node, '{0}processingMethod')
 92 |             )
 93 | 
 94 |         # Apply referenceable_param_groups
 95 |         for pg in pu.created_param_groups:
 96 |             pg.apply_referenceable_param_groups(self.referenceable_param_groups)
 97 | 
 98 |     def pretty(self):
 99 |         """
100 |         Returns a nested dict summarizing all contained sections, intended to help human inspection.
101 |         """
102 |         return {
103 |             'file_description': self.file_description.pretty(),
104 |             'referenceable_param_groups': _deep_pretty(self.referenceable_param_groups),
105 |             'samples': _deep_pretty(self.samples),
106 |             'softwares': _deep_pretty(self.softwares),
107 |             'scan_settings': _deep_pretty(self.scan_settings),
108 |             'instrument_configurations': _deep_pretty(self.instrument_configurations),
109 |             'data_processings': _deep_pretty(self.data_processings),
110 |         }
111 | 
112 | 
113 | class ParamGroup:
114 |     """
115 |         This class exposes a group of imzML parameters at two layers of abstraction:
116 | 
117 |         High-level examples:
118 |         `param_group['MS:0000000']`
119 |             Access a controlled vocabulary parameter by accession ID or name, or a user-defined
120 |             parameter by name. Controlled vocabulary parameters will take priority.
121 |             This also inherits values from referenced referenceable param groups.
122 |         `'particle beam' in param_group`
123 |             Check if a parameter exists by name / accession ID.
124 |         `param_group.targets`
125 |             Access a subelement directly by name.
126 | 
127 |         Low-level examples:
128 |         `param_group.cv_params` - A list of all cvParams defined in this group. Includes raw values,
129 |                                   units, and multiple items if one accession is used multiple times.
130 |                                   Does not include values inherited from referenceable param groups.
131 |         `param_group.user_params` - A list of all userParams.
132 |         `param_group.attrs` - A dict of all XML attributes.
133 |         `param_group.subelements` - A dict of all subelements.
134 | 
135 | 
136 |     """
137 |     def __init__(self, elem, **extra_data):
138 |         """
139 |         Parses an XML element representing a group of controlled vocabulary parameters.
140 | 
141 |         :param elem:             an XML element containing cvParam children
142 |         :param extra_data:       extra attributes to assign to the class instance
143 |         """
144 |         self.param_group_refs = [
145 |             ref.get('ref')
146 |             for ref in elem.findall('{0}referenceableParamGroupRef'.format(XMLNS_PREFIX))
147 |         ]
148 |         self.type = elem.tag.replace(XMLNS_PREFIX, '')
149 | 
150 |         # Tuples of (name, accession, parsed_value, raw_value, unit_name, unit_accession)
151 |         # These are kept in an array as the imzML spec allows multiple uses of accession numbers
152 |         # in the same block
153 |         self.cv_params = []
154 |         for node in elem.findall('{0}cvParam'.format(XMLNS_PREFIX)):
155 |             accession = node.get('accession')
156 |             raw_name = node.get('name')
157 |             raw_value = node.get('value')
158 |             unit_accession = node.get('unitAccession')
159 |             accession, name, parsed_value, unit_name = lookup_and_convert_cv_param(
160 |                 accession, raw_name, raw_value, unit_accession
161 |             )
162 |             self.cv_params.append(
163 |                 (name, accession, parsed_value, raw_name, raw_value, unit_name, unit_accession)
164 |             )
165 | 
166 |         # Tuples of (name, type, parsed_value, raw_value, unit_name, unit_accession)
167 |         self.user_params = []
168 |         for node in elem.findall('{0}userParam'.format(XMLNS_PREFIX)):
169 |             name = node.get('name')
170 |             dtype = node.get('dtype')
171 |             raw_value = node.get('value')
172 |             parsed_value = convert_xml_value(dtype, raw_value)
173 |             unit_accession = node.get('unitAccession')
174 |             unit_name = convert_term_name(unit_accession)
175 |             self.user_params.append(
176 |                 (name, dtype, parsed_value, raw_value, unit_name, unit_accession)
177 |             )
178 | 
179 |         # Mapping of CV param name to parsed value
180 |         self.param_by_name = {}
181 |         self.param_by_name.update((param[0], param[2]) for param in self.user_params)
182 |         self.param_by_name.update((param[0], param[2]) for param in self.cv_params)
183 |         # Mapping of CV param accession to parsed value
184 |         self.param_by_accession = {
185 |             param[1]: param[2] for param in self.cv_params
186 |         }
187 | 
188 |         self.attrs = elem.attrib
189 | 
190 |         self.subelements = extra_data
191 |         for k, v in extra_data.items():
192 |             setattr(self, k, v)
193 | 
194 |     def __getitem__(self, key):
195 |         try:
196 |             return self.param_by_accession[key]
197 |         except KeyError:
198 |             return self.param_by_name[key]
199 | 
200 |     def __contains__(self, key):
201 |         return key in self.param_by_accession or key in self.param_by_name
202 | 
203 |     def apply_referenceable_param_groups(self, rpgs):
204 |         for ref in self.param_group_refs[::-1]:
205 |             rpg = rpgs.get(ref)
206 |             if rpg:
207 |                 for name, accession, parsed_value, *_ in rpg.cv_params:
208 |                     if name is not None and name != accession:
209 |                         self.param_by_name.setdefault(name, parsed_value)
210 |                     self.param_by_accession.setdefault(accession, parsed_value)
211 |                 for name, _, parsed_value, *_ in rpg.user_params:
212 |                     self.param_by_name.setdefault(name, parsed_value)
213 |             else:
214 |                 warn('ReferenceableParamGroup "%s" not found' % ref)
215 | 
216 |     def pretty(self):
217 |         """
218 |         Flattens attributes, params and extra fields into a single dict keyed by name.
219 |         This function is intended to help human inspection. For programmatic access to specific fields,
220 |         always use the `attrs`, `param_by_name`, `param_by_accession`, etc. instance attributes instead.
221 |         """
222 |         result = {
223 |             'type': self.type,
224 |         }
225 |         result.update(self.attrs)
226 |         result.update(self.param_by_name)
227 |         result.update(_deep_pretty(self.subelements))
228 | 
229 |         return result
230 | 
231 | 
232 | class SpectrumData(ParamGroup):
233 |     def __init__(self, root, referenceable_param_groups):
234 |         pu = _ParseUtils()
235 | 
236 |         scan_list_params = pu.optional_param_group(root, '{0}scanList')
237 |         scans = []
238 |         for node in root.findall('{0}scanList/{0}scan'.format(XMLNS_PREFIX)):
239 |             scans.append(
240 |                 pu.param_group(
241 |                     node,
242 |                     scan_windows=pu.param_groups_list(node, '{0}scanWindowList/{0}scanWindow')
243 |                 )
244 |             )
245 | 
246 |         precursors = []
247 |         for node in root.findall('{0}precursorList/{0}precursor'.format(XMLNS_PREFIX)):
248 |             precursors.append(
249 |                 pu.param_group(
250 |                     node,
251 |                     isolation_window=pu.optional_param_group(node, '{0}isolationWindow'),
252 |                     selected_ions=pu.param_groups_list(node, '{0}selectedIonList/{0}selectedIon'),
253 |                     activation=pu.optional_param_group(node, '{0}activation'),
254 |                 )
255 |             )
256 | 
257 |         products = []
258 |         for node in root.findall('{0}productList/{0}product'.format(XMLNS_PREFIX)):
259 |             products.append(
260 |                 pu.param_group(
261 |                     node,
262 |                     isolation_window=pu.optional_param_group(node, '{0}isolationWindow'),
263 |                 )
264 |             )
265 | 
266 |         binary_data_arrays = pu.param_groups_list(root, '{0}binaryDataArrayList/{0}binaryDataArray')
267 | 
268 |         super().__init__(
269 |             root,
270 |             scan_list_params=scan_list_params,
271 |             scans=scans,
272 |             precursors=precursors,
273 |             products=products,
274 |             binary_data_arrays=binary_data_arrays,
275 |         )
276 | 
277 |         for pg in pu.created_param_groups:
278 |             pg.apply_referenceable_param_groups(referenceable_param_groups)
279 | 
280 |         self.apply_referenceable_param_groups(referenceable_param_groups)
281 | 


--------------------------------------------------------------------------------
/pyimzml/ontology/uo.py:
--------------------------------------------------------------------------------
  1 | # DO NOT EDIT BY HAND
  2 | # This file was autogenerated by dump_obo_files.py at 2020-10-21T18:55:01.621812
  3 | terms = {   'UO:0000000': ('unit', None),
  4 |     'UO:0000001': ('length unit', None),
  5 |     'UO:0000002': ('mass unit', None),
  6 |     'UO:0000003': ('time unit', None),
  7 |     'UO:0000004': ('electric current unit', None),
  8 |     'UO:0000005': ('temperature unit', None),
  9 |     'UO:0000006': ('substance unit', None),
 10 |     'UO:0000007': ('luminous intensity unit', None),
 11 |     'UO:0000008': ('meter', None),
 12 |     'UO:0000009': ('kilogram', None),
 13 |     'UO:0000010': ('second', None),
 14 |     'UO:0000011': ('ampere', None),
 15 |     'UO:0000012': ('kelvin', None),
 16 |     'UO:0000013': ('mole', None),
 17 |     'UO:0000014': ('candela', None),
 18 |     'UO:0000015': ('centimeter', None),
 19 |     'UO:0000016': ('millimeter', None),
 20 |     'UO:0000017': ('micrometer', None),
 21 |     'UO:0000018': ('nanometer', None),
 22 |     'UO:0000019': ('angstrom', None),
 23 |     'UO:0000020': ('picometer', None),
 24 |     'UO:0000021': ('gram', None),
 25 |     'UO:0000022': ('milligram', None),
 26 |     'UO:0000023': ('microgram', None),
 27 |     'UO:0000024': ('nanogram', None),
 28 |     'UO:0000025': ('picogram', None),
 29 |     'UO:0000026': ('femtogram', None),
 30 |     'UO:0000027': ('degree Celsius', None),
 31 |     'UO:0000028': ('millisecond', None),
 32 |     'UO:0000029': ('microsecond', None),
 33 |     'UO:0000030': ('picosecond', None),
 34 |     'UO:0000031': ('minute', None),
 35 |     'UO:0000032': ('hour', None),
 36 |     'UO:0000033': ('day', None),
 37 |     'UO:0000034': ('week', None),
 38 |     'UO:0000035': ('month', None),
 39 |     'UO:0000036': ('year', None),
 40 |     'UO:0000037': ('milliampere', None),
 41 |     'UO:0000038': ('microampere', None),
 42 |     'UO:0000039': ('micromole', None),
 43 |     'UO:0000040': ('millimole', None),
 44 |     'UO:0000041': ('nanomole', None),
 45 |     'UO:0000042': ('picomole', None),
 46 |     'UO:0000043': ('femtomole', None),
 47 |     'UO:0000044': ('attomole', None),
 48 |     'UO:0000045': ('base unit', None),
 49 |     'UO:0000046': ('prefix', None),
 50 |     'UO:0000047': ('area unit', None),
 51 |     'UO:0000048': ('acceleration unit', None),
 52 |     'UO:0000049': ('angular velocity unit', None),
 53 |     'UO:0000050': ('angular acceleration unit', None),
 54 |     'UO:0000051': ('concentration unit', None),
 55 |     'UO:0000052': ('mass density unit', None),
 56 |     'UO:0000053': ('luminance unit', None),
 57 |     'UO:0000054': ('area density unit', None),
 58 |     'UO:0000055': ('molar mass unit', None),
 59 |     'UO:0000056': ('molar volume unit', None),
 60 |     'UO:0000057': ('momentum unit', None),
 61 |     'UO:0000058': ('rotational frequency unit', None),
 62 |     'UO:0000059': ('specific volume unit', None),
 63 |     'UO:0000060': ('speed/velocity unit', None),
 64 |     'UO:0000061': ('unit of molarity', None),
 65 |     'UO:0000062': ('molar', None),
 66 |     'UO:0000063': ('millimolar', None),
 67 |     'UO:0000064': ('micromolar', None),
 68 |     'UO:0000065': ('nanomolar', None),
 69 |     'UO:0000066': ('picomolar', None),
 70 |     'UO:0000067': ('unit of molality', None),
 71 |     'UO:0000068': ('molal', None),
 72 |     'UO:0000069': ('millimolal', None),
 73 |     'UO:0000070': ('micromolal', None),
 74 |     'UO:0000071': ('nanomolal', None),
 75 |     'UO:0000072': ('picomolal', None),
 76 |     'UO:0000073': ('femtomolar', None),
 77 |     'UO:0000074': ('unit of normality', None),
 78 |     'UO:0000075': ('normal', None),
 79 |     'UO:0000076': ('mole fraction', None),
 80 |     'UO:0000077': ('meter per second per second', None),
 81 |     'UO:0000078': ('radian per second per second', None),
 82 |     'UO:0000079': ('radian per second', None),
 83 |     'UO:0000080': ('square meter', None),
 84 |     'UO:0000081': ('square centimeter', None),
 85 |     'UO:0000082': ('square millimeter', None),
 86 |     'UO:0000083': ('kilogram per cubic meter', None),
 87 |     'UO:0000084': ('gram per cubic centimeter', None),
 88 |     'UO:0000085': ('candela per square meter', None),
 89 |     'UO:0000086': ('kilogram per square meter', None),
 90 |     'UO:0000087': ('kilogram per mole', None),
 91 |     'UO:0000088': ('gram per mole', None),
 92 |     'UO:0000089': ('cubic meter per mole', None),
 93 |     'UO:0000090': ('cubic centimeter per mole', None),
 94 |     'UO:0000091': ('kilogram meter per second', None),
 95 |     'UO:0000092': ('turns per second', None),
 96 |     'UO:0000093': ('cubic meter per kilogram', None),
 97 |     'UO:0000094': ('meter per second', None),
 98 |     'UO:0000095': ('volume unit', None),
 99 |     'UO:0000096': ('cubic meter', None),
100 |     'UO:0000097': ('cubic centimeter', None),
101 |     'UO:0000098': ('milliliter', None),
102 |     'UO:0000099': ('liter', None),
103 |     'UO:0000100': ('cubic decimeter', None),
104 |     'UO:0000101': ('microliter', None),
105 |     'UO:0000102': ('nanoliter', None),
106 |     'UO:0000103': ('picoliter', None),
107 |     'UO:0000104': ('femtoliter', None),
108 |     'UO:0000105': ('frequency unit', None),
109 |     'UO:0000106': ('hertz', None),
110 |     'UO:0000107': ('force unit', None),
111 |     'UO:0000108': ('newton', None),
112 |     'UO:0000109': ('pressure unit', None),
113 |     'UO:0000110': ('pascal', None),
114 |     'UO:0000111': ('energy unit', None),
115 |     'UO:0000112': ('joule', None),
116 |     'UO:0000113': ('power unit', None),
117 |     'UO:0000114': ('watt', None),
118 |     'UO:0000115': ('illuminance unit', None),
119 |     'UO:0000116': ('lux', None),
120 |     'UO:0000117': ('luminous flux unit', None),
121 |     'UO:0000118': ('lumen', None),
122 |     'UO:0000119': ('catalytic activity unit', None),
123 |     'UO:0000120': ('katal', None),
124 |     'UO:0000121': ('angle unit', None),
125 |     'UO:0000122': ('plane angle unit', None),
126 |     'UO:0000123': ('radian', None),
127 |     'UO:0000124': ('solid angle unit', None),
128 |     'UO:0000125': ('steradian', None),
129 |     'UO:0000127': ('radiation unit', None),
130 |     'UO:0000128': ('activity (of a radionuclide) unit', None),
131 |     'UO:0000129': ('absorbed dose unit', None),
132 |     'UO:0000130': ('dose equivalent unit', None),
133 |     'UO:0000131': ('exposure unit', None),
134 |     'UO:0000132': ('becquerel', None),
135 |     'UO:0000133': ('curie', None),
136 |     'UO:0000134': ('gray', None),
137 |     'UO:0000135': ('rad', None),
138 |     'UO:0000136': ('roentgen', None),
139 |     'UO:0000137': ('sievert', None),
140 |     'UO:0000138': ('millisievert', None),
141 |     'UO:0000139': ('microsievert', None),
142 |     'UO:0000140': ('Roentgen equivalent man', None),
143 |     'UO:0000141': ('microgray', None),
144 |     'UO:0000142': ('milligray', None),
145 |     'UO:0000143': ('nanogray', None),
146 |     'UO:0000144': ('nanosievert', None),
147 |     'UO:0000145': ('millicurie', None),
148 |     'UO:0000146': ('microcurie', None),
149 |     'UO:0000147': ('disintegrations per minute', None),
150 |     'UO:0000148': ('counts per minute', None),
151 |     'UO:0000150': ('nanosecond', None),
152 |     'UO:0000151': ('century', None),
153 |     'UO:0000152': ('half life', None),
154 |     'UO:0000153': ('foot candle', None),
155 |     'UO:0000154': ('irradiance unit', None),
156 |     'UO:0000155': ('watt per square meter', None),
157 |     'UO:0000156': ('einstein per square meter per second', None),
158 |     'UO:0000157': ('light unit', None),
159 |     'UO:0000158': ('watt per steradian per square meter', None),
160 |     'UO:0000159': ('radiant intensity unit', None),
161 |     'UO:0000160': ('microeinstein per square meter per second', None),
162 |     'UO:0000161': ('radiance unit', None),
163 |     'UO:0000162': ('watt per steradian', None),
164 |     'UO:0000163': ('mass percentage', None),
165 |     'UO:0000164': ('mass volume percentage', None),
166 |     'UO:0000165': ('volume percentage', None),
167 |     'UO:0000166': ('parts per notation unit', None),
168 |     'UO:0000167': ('parts per hundred', None),
169 |     'UO:0000168': ('parts per thousand', None),
170 |     'UO:0000169': ('parts per million', None),
171 |     'UO:0000170': ('parts per billion', None),
172 |     'UO:0000171': ('parts per trillion', None),
173 |     'UO:0000172': ('parts per quadrillion', None),
174 |     'UO:0000173': ('gram per milliliter', None),
175 |     'UO:0000174': ('kilogram per liter', None),
176 |     'UO:0000175': ('gram per liter', None),
177 |     'UO:0000176': ('milligram per milliliter', None),
178 |     'UO:0000177': ('unit per volume unit', None),
179 |     'UO:0000178': ('unit per milliliter', None),
180 |     'UO:0000179': ('unit per liter', None),
181 |     'UO:0000180': ('mass per unit volume', None),
182 |     'UO:0000181': ('enzyme unit', None),
183 |     'UO:0000182': ('density unit', None),
184 |     'UO:0000183': ('linear density unit', None),
185 |     'UO:0000184': ('kilogram per meter', None),
186 |     'UO:0000185': ('degree', None),
187 |     'UO:0000186': ('dimensionless unit', None),
188 |     'UO:0000187': ('percent', None),
189 |     'UO:0000188': ('pi', None),
190 |     'UO:0000189': ('count unit', None),
191 |     'UO:0000190': ('ratio', None),
192 |     'UO:0000191': ('fraction', None),
193 |     'UO:0000192': ('molecule count', None),
194 |     'UO:0000193': ('purity percentage', None),
195 |     'UO:0000194': ('confluence percentage', None),
196 |     'UO:0000195': ('degree Fahrenheit', None),
197 |     'UO:0000196': ('pH', None),
198 |     'UO:0000197': ('liter per kilogram', None),
199 |     'UO:0000198': ('milliliter per kilogram', None),
200 |     'UO:0000199': ('microliter per kilogram', None),
201 |     'UO:0000200': ('cell concentration unit', None),
202 |     'UO:0000201': ('cells per milliliter', None),
203 |     'UO:0000202': ('catalytic (activity) concentration unit', None),
204 |     'UO:0000203': ('katal per cubic meter', None),
205 |     'UO:0000204': ('katal per liter', None),
206 |     'UO:0000205': ('volume per unit volume', None),
207 |     'UO:0000206': ('milliliter per cubic meter', None),
208 |     'UO:0000207': ('milliliter per liter', None),
209 |     'UO:0000208': ('gram per deciliter', None),
210 |     'UO:0000209': ('deciliter', None),
211 |     'UO:0000210': ('colony forming unit', None),
212 |     'UO:0000211': ('plaque forming unit', None),
213 |     'UO:0000212': ('colony forming unit per volume', None),
214 |     'UO:0000213': ('colony forming unit per milliliter', None),
215 |     'UO:0000214': ('plaque forming unit per volume', None),
216 |     'UO:0000215': ('plaque forming unit per milliliter', None),
217 |     'UO:0000216': ('disintegrations per second', None),
218 |     'UO:0000217': ('electric potential difference unit', None),
219 |     'UO:0000218': ('volt', None),
220 |     'UO:0000219': ('electric charge', None),
221 |     'UO:0000220': ('coulomb', None),
222 |     'UO:0000221': ('dalton', None),
223 |     'UO:0000222': ('kilodalton', None),
224 |     'UO:0000223': ('watt-hour', None),
225 |     'UO:0000224': ('kilowatt-hour', None),
226 |     'UO:0000225': ('magnetic flux unit', None),
227 |     'UO:0000226': ('weber', None),
228 |     'UO:0000227': ('magnetic flux density unit', None),
229 |     'UO:0000228': ('tesla', None),
230 |     'UO:0000229': ('volt-hour', None),
231 |     'UO:0000230': ('kilovolt-hour', None),
232 |     'UO:0000231': ('information unit', None),
233 |     'UO:0000232': ('bit', None),
234 |     'UO:0000233': ('byte', None),
235 |     'UO:0000234': ('kilobyte', None),
236 |     'UO:0000235': ('megabyte', None),
237 |     'UO:0000236': ('image resolution unit', None),
238 |     'UO:0000237': ('chroma sampling unit', None),
239 |     'UO:0000238': ('dynamic range unit', None),
240 |     'UO:0000239': ('spatial resolution unit', None),
241 |     'UO:0000240': ('dots per inch', None),
242 |     'UO:0000241': ('micron pixel', None),
243 |     'UO:0000242': ('pixels per inch', None),
244 |     'UO:0000243': ('pixels per millimeter', None),
245 |     'UO:0000244': ('base pair', None),
246 |     'UO:0000245': ('kibibyte', None),
247 |     'UO:0000246': ('mebibyte', None),
248 |     'UO:0000247': ('millivolt', None),
249 |     'UO:0000248': ('kilovolt', None),
250 |     'UO:0000249': ('microvolt', None),
251 |     'UO:0000250': ('nanovolt', None),
252 |     'UO:0000251': ('picovolt', None),
253 |     'UO:0000252': ('megavolt', None),
254 |     'UO:0000253': ('surface tension unit', None),
255 |     'UO:0000254': ('newton per meter', None),
256 |     'UO:0000255': ('dyne per cm', None),
257 |     'UO:0000256': ('viscosity unit', None),
258 |     'UO:0000257': ('pascal second', None),
259 |     'UO:0000258': ('poise', None),
260 |     'UO:0000259': ('decibel', None),
261 |     'UO:0000260': ('effective dose unit', None),
262 |     'UO:0000261': ('conduction unit', None),
263 |     'UO:0000262': ('electrical conduction unit', None),
264 |     'UO:0000263': ('heat conduction unit', None),
265 |     'UO:0000264': ('siemens', None),
266 |     'UO:0000265': ('watt per meter kelvin', None),
267 |     'UO:0000266': ('electronvolt', None),
268 |     'UO:0000267': ('electric field strength unit', None),
269 |     'UO:0000268': ('volt per meter', None),
270 |     'UO:0000269': ('absorbance unit', None),
271 |     'UO:0000270': ('volumetric flow rate unit', None),
272 |     'UO:0000271': ('microliters per minute', None),
273 |     'UO:0000272': ('millimetres of mercury', None),
274 |     'UO:0000273': ('milligram per liter', None),
275 |     'UO:0000274': ('microgram per milliliter', None),
276 |     'UO:0000275': ('nanogram per milliliter', None),
277 |     'UO:0000276': ('amount per container', None),
278 |     'UO:0000277': ('ug/disk', None),
279 |     'UO:0000278': ('nmole/disk', None),
280 |     'UO:0000279': ('milliunits per milliliter', None),
281 |     'UO:0000280': ('rate unit', None),
282 |     'UO:0000281': ('count per nanomolar second', None),
283 |     'UO:0000282': ('count per molar second', None),
284 |     'UO:0000283': ('kilogram per hectare', None),
285 |     'UO:0000284': ('count per nanomolar', None),
286 |     'UO:0000285': ('count per molar', None),
287 |     'UO:0000286': ('yotta', None),
288 |     'UO:0000287': ('hecto', None),
289 |     'UO:0000288': ('zetta', None),
290 |     'UO:0000289': ('exa', None),
291 |     'UO:0000290': ('peta', None),
292 |     'UO:0000291': ('tera', None),
293 |     'UO:0000292': ('giga', None),
294 |     'UO:0000293': ('mega', None),
295 |     'UO:0000294': ('kilo', None),
296 |     'UO:0000295': ('deca', None),
297 |     'UO:0000296': ('deci', None),
298 |     'UO:0000297': ('milli', None),
299 |     'UO:0000298': ('centi', None),
300 |     'UO:0000299': ('micro', None),
301 |     'UO:0000300': ('nano', None),
302 |     'UO:0000301': ('microgram per liter', None),
303 |     'UO:0000302': ('pico', None),
304 |     'UO:0000303': ('femto', None),
305 |     'UO:0000304': ('atto', None),
306 |     'UO:0000305': ('zepto', None),
307 |     'UO:0000306': ('yocto', None),
308 |     'UO:0000307': ('dose unit', None),
309 |     'UO:0000308': ('milligram per kilogram', None),
310 |     'UO:0000309': ('milligram per square meter', None),
311 |     'UO:0000310': ('dosage unit', None),
312 |     'UO:0000311': ('milligram per kilogram per day', None),
313 |     'UO:0000312': ('relative light unit', None),
314 |     'UO:0000313': ('relative luminescence unit', None),
315 |     'UO:0000314': ('relative fluorescence unit', None),
316 |     'UO:0000315': ('turbidity unit', None),
317 |     'UO:0000316': ('cells per microliter', None),
318 |     'UO:0000317': ('cells per well', None),
319 |     'UO:0000318': ('formazin nephelometric unit', None),
320 |     'UO:0000319': ('radioactivity concentration', None),
321 |     'UO:0000320': ('curie per liter', None),
322 |     'UO:0000321': ('microcurie per milliliter', None),
323 |     'UO:0000322': ('fold dilution', None),
324 |     'UO:0000323': ('ton per hectare', None),
325 |     'UO:0000324': ('square angstrom', None),
326 |     'UO:0000325': ('megaHertz', None),
327 |     'UO:0000326': ('centiMorgan', None),
328 |     'UO:0000327': ('centiRay', None),
329 |     'UO:0000328': ('kilobasepair', None),
330 |     'UO:0000329': ('megabasepair', None),
331 |     'UO:0000330': ('gigabasepair', None),
332 |     'UO:0000331': ('gigabyte', None),
333 |     'UO:0000332': ('terabyte', None),
334 |     'UO:0010001': ('square micrometer', None),
335 |     'UO:0010002': ('millisiemens', None),
336 |     'UO:0010003': ('micromole per litre', None),
337 |     'UO:0010004': ('micromole per kilogram', None),
338 |     'UO:0010005': ('millimeters per day', None),
339 |     'UO:0010006': ('ratio', None),
340 |     'UO:0010007': ('volt-second per square centimeter', None),
341 |     'UO:0010008': ('kilometer per hour', None),
342 |     'UO:0010009': ('milli', None),
343 |     'UO:0010010': ('hectare', None),
344 |     'UO:0010011': ('inch', None),
345 |     'UO:0010012': ('thou', None),
346 |     'UO:0010013': ('foot', None),
347 |     'UO:0010014': ('yard', None),
348 |     'UO:0010015': ('chain', None),
349 |     'UO:0010016': ('furlong', None),
350 |     'UO:0010017': ('mile', None),
351 |     'UO:0010018': ('league', None),
352 |     'UO:0010019': ('maritime length unit', None),
353 |     'UO:0010020': ('fathom', None),
354 |     'UO:0010021': ('cable', None),
355 |     'UO:0010022': ('nautical mile', None),
356 |     'UO:0010023': ('perch', None),
357 |     'UO:0010024': ('rood', None),
358 |     'UO:0010025': ('acre', None),
359 |     'UO:0010026': ('fluid ounce', None),
360 |     'UO:0010027': ('gill', None),
361 |     'UO:0010028': ('pint', None),
362 |     'UO:0010029': ('quart', None),
363 |     'UO:0010030': ('gallon', None),
364 |     'UO:0010031': ('grain', None),
365 |     'UO:0010032': ('drachm', None),
366 |     'UO:0010033': ('ounce', None),
367 |     'UO:0010034': ('pound', None),
368 |     'UO:0010035': ('stone', None),
369 |     'UO:0010036': ('quarter', None),
370 |     'UO:0010037': ('hundredweight', None),
371 |     'UO:0010038': ('ton', None),
372 |     'UO:0010039': ('slug', None),
373 |     'UO:0010040': ('teaspoon', None),
374 |     'UO:0010041': ('united states customary teaspoon', None),
375 |     'UO:0010042': ('tablespoon', None),
376 |     'UO:0010043': ('australian metric tablespoon', None),
377 |     'UO:0010044': ('united states customary tablespoon', None),
378 |     'UO:0010045': ('metric cup', None),
379 |     'UO:0010046': ('united states customary cup', None),
380 |     'UO:0010047': ('united states fda cup', None),
381 |     'UO:0010048': ('micromole', None),
382 |     'UO:0010049': ('gram per square meter', None)}
383 | 


--------------------------------------------------------------------------------
/pyimzml/ImzMLWriter.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import os
  4 | import numpy as np
  5 | import uuid
  6 | import hashlib
  7 | import sys
  8 | import getopt
  9 | from collections import namedtuple, OrderedDict, defaultdict
 10 | 
 11 | from wheezy.template import Engine, CoreExtension, DictLoader
 12 | 
 13 | from pyimzml.compression import NoCompression, ZlibCompression
 14 | 
 15 | IMZML_TEMPLATE = """\
 16 | @require(uuid, sha1sum, mz_data_type, int_data_type, run_id, spectra, mode, obo_codes, obo_names, mz_compression, int_compression, polarity, spec_type, scan_direction, scan_pattern, scan_type, line_scan_direction)
 17 | <?xml version="1.0" encoding="ISO-8859-1"?>
 18 | <mzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0_idx.xsd" version="1.1">
 19 |   <cvList count="2">
 20 |     <cv uri="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" id="MS" version="3.65.0"/>
 21 |     <cv uri="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo" fullName="Unit Ontology" id="UO" version="12:10:2011"/>
 22 |   </cvList>
 23 | 
 24 |   <fileDescription>
 25 |     <fileContent>
 26 |       <cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" value=""/>
 27 |       @if spec_type=='centroid':
 28 |       <cvParam cvRef="MS" accession="MS:1000127" name="centroid spectrum" value=""/>
 29 |       @elif spec_type=='profile':
 30 |       <cvParam cvRef="MS" accession="MS:1000128" name="profile spectrum" value=""/>
 31 |       @end
 32 |       <cvParam cvRef="IMS" accession="IMS:@obo_codes[mode]" name="@mode" value=""/>
 33 |       <cvParam cvRef="IMS" accession="IMS:1000080" name="universally unique identifier" value="@uuid"/>
 34 |       <cvParam cvRef="IMS" accession="IMS:1000091" name="ibd SHA-1" value="@sha1sum"/>
 35 |     </fileContent>
 36 |   </fileDescription>
 37 | 
 38 |   <referenceableParamGroupList count="4">
 39 |     <referenceableParamGroup id="mzArray">
 40 |       <cvParam cvRef="MS" accession="MS:@obo_codes[mz_compression]" name="@mz_compression" value=""/>
 41 |       <cvParam cvRef="MS" accession="MS:1000514" name="m/z array" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/>
 42 |       <cvParam cvRef="MS" accession="MS:@obo_codes[mz_data_type]" name="@mz_data_type" value=""/>
 43 |       <cvParam cvRef="IMS" accession="IMS:1000101" name="external data" value="true"/>
 44 |     </referenceableParamGroup>
 45 |     <referenceableParamGroup id="intensityArray">
 46 |       <cvParam cvRef="MS" accession="MS:@obo_codes[int_data_type]" name="@int_data_type" value=""/>
 47 |       <cvParam cvRef="MS" accession="MS:1000515" name="intensity array" unitCvRef="MS" unitAccession="MS:1000131" unitName="number of detector counts"/>
 48 |       <cvParam cvRef="MS" accession="MS:@obo_codes[int_compression]" name="@int_compression" value=""/>
 49 |       <cvParam cvRef="IMS" accession="IMS:1000101" name="external data" value="true"/>
 50 |     </referenceableParamGroup>
 51 |     <referenceableParamGroup id="scan1">
 52 |       <cvParam cvRef="MS" accession="MS:1000093" name="increasing m/z scan"/>
 53 |       <cvParam cvRef="MS" accession="MS:1000512" name="filter string" value=""/>
 54 |     </referenceableParamGroup>
 55 |     <referenceableParamGroup id="spectrum1">
 56 |       <cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" value=""/>
 57 |       <cvParam cvRef="MS" accession="MS:1000511" name="ms level" value="0"/>
 58 |       @if spec_type=='centroid':
 59 |       <cvParam cvRef="MS" accession="MS:1000127" name="centroid spectrum" value=""/>
 60 |       @elif spec_type=='profile':
 61 |       <cvParam cvRef="MS" accession="MS:1000128" name="profile spectrum" value=""/>
 62 |       @end
 63 |       @if polarity=='positive':
 64 |       <cvParam cvRef="MS" accession="MS:1000130" name="positive scan" value=""/>
 65 |       @elif polarity=='negative':
 66 |       <cvParam cvRef="MS" accession="MS:1000129" name="negative scan" value=""/>
 67 |       @end
 68 |     </referenceableParamGroup>
 69 |   </referenceableParamGroupList>
 70 | 
 71 |   <softwareList count="1">
 72 |     <software id="pyimzml" version="1.5.4">
 73 |       <cvParam cvRef="MS" accession="MS:1000799" name="custom unreleased software tool" value="pyimzml exporter"/>
 74 |     </software>
 75 |   </softwareList>
 76 | 
 77 |   <scanSettingsList count="1">
 78 |     <scanSettings id="scanSettings1">
 79 |       <cvParam cvRef="IMS" accession="IMS:@obo_codes[scan_direction]" name="@obo_names[scan_direction]"/>
 80 |       <cvParam cvRef="IMS" accession="IMS:@obo_codes[scan_pattern]" name="@obo_names[scan_pattern]"/>
 81 |       <cvParam cvRef="IMS" accession="IMS:@obo_codes[scan_type]" name="@obo_names[scan_type]"/>
 82 |       <cvParam cvRef="IMS" accession="IMS:@obo_codes[line_scan_direction]" name="@obo_names[line_scan_direction]"/>
 83 |       <cvParam cvRef="IMS" accession="IMS:1000042" name="max count of pixels x" value="@{(max(s.coords[0] for s in spectra))!!s}"/>
 84 |       <cvParam cvRef="IMS" accession="IMS:1000043" name="max count of pixels y" value="@{(max(s.coords[1] for s in spectra))!!s}"/>
 85 |     </scanSettings>
 86 |   </scanSettingsList>
 87 | 
 88 |   <instrumentConfigurationList count="1">
 89 |     <instrumentConfiguration id="IC1">
 90 |     </instrumentConfiguration>
 91 |   </instrumentConfigurationList>
 92 | 
 93 |   <dataProcessingList count="1">
 94 |     <dataProcessing id="export_from_pyimzml">
 95 |       <processingMethod order="0" softwareRef="pyimzml">
 96 |         <cvParam cvRef="MS" accession="MS:1000530" name="file format conversion" value="Output to imzML"/>
 97 |       </processingMethod>
 98 |     </dataProcessing>
 99 |   </dataProcessingList>
100 | 
101 |   <run defaultInstrumentConfigurationRef="IC1" id="@run_id">
102 |     <spectrumList count="@{len(spectra)!!s}" defaultDataProcessingRef="export_from_pyimzml">
103 |       @for index, s in enumerate(spectra):
104 |       <spectrum defaultArrayLength="0" id="spectrum=@{(index+1)!!s}" index="@{(index+1)!!s}">
105 |         <referenceableParamGroupRef ref="spectrum1"/>
106 |         <cvParam cvRef="MS" accession="MS:1000528" name="lowest observed m/z" value="@{s.mz_min!!s}" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/>
107 |         <cvParam cvRef="MS" accession="MS:1000527" name="highest observed m/z" value="@{s.mz_max!!s}" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/>
108 |         <cvParam cvRef="MS" accession="MS:1000504" name="base peak m/z" value="@{s.mz_base!!s}" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/>
109 |         <cvParam cvRef="MS" accession="MS:1000505" name="base peak intensity" value="@{s.int_base!!s}" unitCvRef="MS" unitAccession="MS:1000131" unitName="number of counts"/>
110 |         <cvParam cvRef="MS" accession="MS:1000285" name="total ion current" value="@{s.int_tic!!s}"/>
111 |         <scanList count="1">
112 |           <cvParam accession="MS:1000795" cvRef="MS" name="no combination"/>
113 |           <scan instrumentConfigurationRef="instrumentConfiguration0">
114 |             <referenceableParamGroupRef ref="scan1"/>
115 |             <cvParam accession="IMS:1000050" cvRef="IMS" name="position x" value="@{s.coords[0]!!s}"/>
116 |             <cvParam accession="IMS:1000051" cvRef="IMS" name="position y" value="@{s.coords[1]!!s}"/>
117 |             @if len(s.coords) == 3:
118 |             <cvParam accession="IMS:1000052" cvRef="IMS" name="position z" value="@{s.coords[2]!!s}"/>
119 |             @end
120 |             @if s.userParams:
121 |                 @for up in s.userParams:
122 |                 <userParam name="@up['name']" value="@up['value']"/> 
123 |                 @end
124 |             @end
125 |           </scan>
126 |         </scanList>
127 |         <binaryDataArrayList count="2">
128 |           <binaryDataArray encodedLength="0">
129 |             <referenceableParamGroupRef ref="mzArray"/>
130 |             <cvParam accession="IMS:1000103" cvRef="IMS" name="external array length" value="@{s.mz_len!!s}"/>
131 |             <cvParam accession="IMS:1000104" cvRef="IMS" name="external encoded length" value="@{s.mz_enc_len!!s}"/>
132 |             <cvParam accession="IMS:1000102" cvRef="IMS" name="external offset" value="@{s.mz_offset!!s}"/>
133 |             <binary/>
134 |           </binaryDataArray>
135 |           <binaryDataArray encodedLength="0">
136 |             <referenceableParamGroupRef ref="intensityArray"/>
137 |             <cvParam accession="IMS:1000103" cvRef="IMS" name="external array length" value="@{s.int_len!!s}"/>
138 |             <cvParam accession="IMS:1000104" cvRef="IMS" name="external encoded length" value="@{s.int_enc_len!!s}"/>
139 |             <cvParam accession="IMS:1000102" cvRef="IMS" name="external offset" value="@{s.int_offset!!s}"/>
140 |             <binary/>
141 |           </binaryDataArray>
142 |         </binaryDataArrayList>
143 |       </spectrum>
144 |       @end
145 |     </spectrumList>
146 |   </run>
147 | </mzML>
148 | """
149 | 
150 | class _MaxlenDict(OrderedDict):
151 |     def __init__(self, *args, **kwargs):
152 |         self.maxlen = kwargs.pop('maxlen', None)
153 |         OrderedDict.__init__(self, *args, **kwargs)
154 | 
155 |     def __setitem__(self, key, value):
156 |         if self.maxlen is not None and len(self) >= self.maxlen:
157 |             self.popitem(0) #pop oldest
158 |         OrderedDict.__setitem__(self, key, value)
159 | 
160 | _Spectrum = namedtuple('_Spectrum', 'coords mz_len mz_offset mz_enc_len int_len int_offset int_enc_len mz_min mz_max mz_base int_base int_tic userParams') #todo: change named tuple to dict and parse xml template properly (i.e. remove hardcoding so parameters can be optional)
161 | 
162 | class ImzMLWriter(object):
163 |     """
164 |         Create an imzML+ibd file.
165 | 
166 |         :param output_filename:
167 |             is used to make the base name by removing the extension (if any).
168 |             two files will be made by adding ".ibd" and ".imzML" to the base name
169 |         :param intensity_dtype:
170 |             The numpy data type to use for saving intensity values
171 |         :param mz_dtype:
172 |             The numpy data type to use for saving mz array values
173 |         :param mode:
174 | 
175 |             * "continuous" mode will save the first mz array only
176 |             * "processed" mode save every mz array separately
177 |             * "auto" mode writes only mz arrays that have not already been written
178 |         :param intensity_compression:
179 |             How to compress the intensity data before saving
180 |             must be an instance of :class:`~pyimzml.compression.NoCompression` or :class:`~pyimzml.compression.ZlibCompression`
181 |         :param mz_compression:
182 |             How to compress the mz array data before saving
183 |     """
184 |     def __init__(self, output_filename,
185 |                  mz_dtype=np.float64, intensity_dtype=np.float32, mode="auto", spec_type="centroid",
186 |                  scan_direction="top_down", line_scan_direction="line_left_right", scan_pattern="one_way", scan_type="horizontal_line", 
187 |                  mz_compression=NoCompression(), intensity_compression=NoCompression(),
188 |                  polarity=None):
189 | 
190 |         self.mz_dtype = mz_dtype
191 |         self.intensity_dtype = intensity_dtype
192 |         self.mode = mode
193 |         self.spec_type = spec_type
194 |         self.mz_compression = mz_compression
195 |         self.intensity_compression = intensity_compression
196 |         self.run_id = os.path.splitext(output_filename)[0]
197 |         self.filename = self.run_id + ".imzML"
198 |         self.ibd_filename = self.run_id + ".ibd"
199 |         self.xml = open(self.filename, 'w')
200 |         self.ibd = open(self.ibd_filename, 'wb+')
201 |         self.sha1 = hashlib.sha1()
202 |         self.uuid = uuid.uuid4()
203 |         
204 |         self.scan_direction = scan_direction
205 |         self.scan_pattern = scan_pattern
206 |         self.scan_type = scan_type
207 |         self.line_scan_direction = line_scan_direction
208 | 
209 |         self._write_ibd(self.uuid.bytes)
210 | 
211 |         self.wheezy_engine = Engine(loader=DictLoader({'imzml': IMZML_TEMPLATE}), extensions=[CoreExtension()])
212 |         self.imzml_template = self.wheezy_engine.get_template('imzml')
213 |         self.spectra = []
214 |         self.first_mz = None
215 |         self.hashes = defaultdict(list)  # mz_hash -> list of mz_data (disk location)
216 |         self.lru_cache = _MaxlenDict(maxlen=10)  # mz_array (as tuple) -> mz_data (disk location)
217 |         self._setPolarity(polarity)
218 | 
219 |     @staticmethod
220 |     def _np_type_to_name(dtype):
221 |         if dtype.__name__.startswith('float'):
222 |             return "%s-bit float" % dtype.__name__[5:]
223 |         elif dtype.__name__.startswith('int'):
224 |             return "%s-bit integer" % dtype.__name__[3:]
225 | 
226 |     def _setPolarity(self, polarity):
227 |         if polarity:
228 |             if polarity.lower() in ['positive', 'negative']:
229 |                 self.polarity = polarity.lower()
230 |             else:
231 |                 raise ValueError('value for polarity must be one of "positive", "negative". Received: {}'.format(polarity))
232 |         else:
233 |             self.polarity = ""
234 | 
235 |     def _write_xml(self):
236 |         spectra = self.spectra
237 |         mz_data_type = self._np_type_to_name(self.mz_dtype)
238 |         int_data_type = self._np_type_to_name(self.intensity_dtype)
239 |         obo_codes = {"32-bit integer": "1000519", 
240 |                      "16-bit float": "1000520",
241 |                      "32-bit float": "1000521",
242 |                      "64-bit integer": "1000522",
243 |                      "64-bit float": "1000523",
244 |                      "continuous": "1000030",
245 |                      "processed": "1000031",
246 |                      "zlib compression": "1000574",
247 |                      "no compression": "1000576",
248 |                      "line_bottom_up": "1000492",
249 |                      "line_left_right": "1000491",
250 |                      "line_right_left": "1000490",
251 |                      "line_top_down": "1000493",
252 |                      "bottom_up": "1000400",
253 |                      "left_right": "1000402",
254 |                      "right_left": "1000403",
255 |                      "top_down": "1000401",
256 |                      "meandering": "1000410",
257 |                      "one_way": "1000411",
258 |                      "random_access": "1000412",
259 |                      "horizontal_line": "1000480",
260 |                      "vertical_line": "1000481"}
261 |         obo_names = {"line_bottom_up": "linescan bottom up",
262 |                      "line_left_right": "linescan left right",
263 |                      "line_right_left": "linescan right left",
264 |                      "line_top_down": "linescan top down",
265 |                      "bottom_up": "bottom up",
266 |                      "left_right": "left right",
267 |                      "right_left": "right left",
268 |                      "top_down": "top down",
269 |                      "meandering": "meandering",
270 |                      "one_way": "one way",
271 |                      "random_access": "random access",
272 |                      "horizontal_line": "horizontal line scan",
273 |                      "vertical_line": "vertical line scan"}
274 |         
275 |         uuid = ("{%s}" % self.uuid).upper()
276 |         sha1sum = self.sha1.hexdigest().upper()
277 |         run_id = self.run_id
278 |         if self.mode == 'auto':
279 |             mode = "processed" if len(self.lru_cache) > 1 else "continuous"
280 |         else:
281 |             mode = self.mode
282 |         spec_type = self.spec_type
283 |         mz_compression = self.mz_compression.name
284 |         int_compression = self.intensity_compression.name
285 |         polarity = self.polarity
286 |         scan_direction = self.scan_direction
287 |         scan_pattern = self.scan_pattern
288 |         scan_type = self.scan_type
289 |         line_scan_direction = self.line_scan_direction
290 |         
291 |         self.xml.write(self.imzml_template.render(locals()))
292 | 
293 |     def _write_ibd(self, bytes):
294 |         self.ibd.write(bytes)
295 |         self.sha1.update(bytes)
296 |         return len(bytes)
297 | 
298 |     def _encode_and_write(self, data, dtype=np.float32, compression=NoCompression()):
299 |         data = np.asarray(data, dtype=dtype)
300 |         offset = self.ibd.tell()
301 |         bytes = data.tobytes()
302 |         bytes = compression.compress(bytes)
303 |         return offset, data.shape[0], self._write_ibd(bytes)
304 | 
305 |     def _read_mz(self, mz_offset, mz_len, mz_enc_len):
306 |         '''reads a mz array from the currently open ibd file'''
307 |         self.ibd.seek(mz_offset)
308 |         data = self.ibd.read(mz_enc_len)
309 |         self.ibd.seek(0, 2)
310 |         data = self.mz_compression.decompress(data)
311 |         return tuple(np.fromstring(data, dtype=self.mz_dtype))
312 | 
313 |     def _get_previous_mz(self, mzs):
314 |         '''given an mz array, return the mz_data (disk location)
315 |         if the mz array was not previously written, write to disk first'''
316 |         mzs = tuple(mzs)  # must be hashable
317 |         if mzs in self.lru_cache:
318 |             return self.lru_cache[mzs]
319 | 
320 |         # mz not recognized ... check hash
321 |         mz_hash = "%s-%s-%s" % (hash(mzs), sum(mzs), len(mzs))
322 |         if mz_hash in self.hashes:
323 |             for mz_data in self.hashes[mz_hash]:
324 |                 test_mz = self._read_mz(*mz_data)
325 |                 if mzs == test_mz:
326 |                     self.lru_cache[test_mz] = mz_data
327 |                     return mz_data
328 |         # hash not recognized
329 |         # must be a new mz array ... write it, add it to lru_cache and hashes
330 |         mz_data = self._encode_and_write(mzs, self.mz_dtype, self.mz_compression)
331 |         self.hashes[mz_hash].append(mz_data)
332 |         self.lru_cache[mzs] = mz_data
333 |         return mz_data
334 | 
335 |     def addSpectrum(self, mzs, intensities, coords, userParams=[]):
336 |         """
337 |         Add a mass spectrum to the file.
338 | 
339 |         :param mz:
340 |             mz array
341 |         :param intensities:
342 |             intensity array
343 |         :param coords:
344 | 
345 |             * 2-tuple of x and y position OR
346 |             * 3-tuple of x, y, and z position
347 | 
348 |             note some applications want coords to be 1-indexed
349 |         """
350 |         # must be rounded now to allow comparisons to later data
351 |         # but don't waste CPU time in continuous mode since the data will not be used anyway
352 |         if self.mode != "continuous" or self.first_mz is None:
353 |             mzs = self.mz_compression.rounding(mzs)
354 |         intensities = self.intensity_compression.rounding(intensities)
355 | 
356 |         if self.mode == "continuous":
357 |             if self.first_mz is None:
358 |                 self.first_mz = self._encode_and_write(mzs, self.mz_dtype, self.mz_compression)
359 |             mz_data = self.first_mz
360 |         elif self.mode == "processed":
361 |             mz_data = self._encode_and_write(mzs, self.mz_dtype, self.mz_compression)
362 |         elif self.mode == "auto":
363 |             mz_data = self._get_previous_mz(mzs)
364 |         else:
365 |             raise TypeError("Unknown mode: %s" % self.mode)
366 |         mz_offset, mz_len, mz_enc_len = mz_data
367 | 
368 |         int_offset, int_len, int_enc_len = self._encode_and_write(intensities, self.intensity_dtype, self.intensity_compression)
369 |         mz_min = np.min(mzs)
370 |         mz_max = np.max(mzs)
371 |         ix_max = np.argmax(intensities)
372 |         mz_base = mzs[ix_max]
373 |         int_base = intensities[ix_max]
374 |         int_tic = np.sum(intensities)
375 |         s = _Spectrum(coords, mz_len, mz_offset, mz_enc_len, int_len, int_offset, int_enc_len, mz_min, mz_max, mz_base, int_base, int_tic, userParams)
376 |         self.spectra.append(s)
377 | 
378 |     def close(self):  # 'close' is a more common use for this
379 |         """
380 |         Writes the XML file and closes all files.
381 |         Will be called automatically if ``with``-pattern is used.
382 |         """
383 |         self.finish()
384 | 
385 |     def finish(self):
386 |         '''alias of close()'''
387 |         self.ibd.close()
388 |         self._write_xml()
389 |         self.xml.close()
390 | 
391 |     def __enter__(self):
392 |         return self
393 | 
394 |     def __exit__(self, exc_t, exc_v, trace):
395 |         if exc_t is None:
396 |             self.finish()
397 |         else:
398 |             self.ibd.close()
399 |             self.xml.close()
400 | 
401 | def _main(argv):
402 |     from pyimzml.ImzMLParser import ImzMLParser
403 |     inputfile = ''
404 |     outputfile = ''
405 |     try:
406 |         opts, args = getopt.getopt(argv,"hi:o:",["ifile=","ofile="])
407 |     except getopt.GetoptError:
408 |         print('test.py -i <inputfile> -o <outputfile>')
409 |         sys.exit(2)
410 |     for opt, arg in opts:
411 |         if opt == '-h':
412 |             print('test.py -i <inputfile> -o <outputfile>')
413 |             sys.exit()
414 |         elif opt in ("-i", "--ifile"):
415 |             inputfile = arg
416 |         elif opt in ("-o", "--ofile"):
417 |             outputfile = arg
418 |     if inputfile == '':
419 |         print('test.py -i <inputfile> -o <outputfile>')
420 |         raise IOError('input file not specified')
421 |     if outputfile=='':
422 |         outputfile=inputfile+'.imzML'
423 |     imzml = ImzMLParser(inputfile)
424 |     spectra = []
425 |     with ImzMLWriter(outputfile, mz_dtype=np.float32, intensity_dtype=np.float32) as writer:
426 |         for i, coords in enumerate(imzml.coordinates):
427 |             mzs, intensities = imzml.getspectrum(i)
428 |             writer.addSpectrum(mzs, intensities, coords)
429 |             spectra.append((mzs, intensities, coords))
430 | 
431 |     imzml = ImzMLParser(outputfile)
432 |     spectra2 = []
433 |     for i, coords in enumerate(imzml.coordinates):
434 |         mzs, intensities = imzml.getspectrum(i)
435 |         spectra2.append((mzs, intensities, coords))
436 | 
437 |     print(spectra[0] == spectra2[0])
438 | 
439 | if __name__ == '__main__':
440 |     _main(sys.argv[1:])
441 | 


--------------------------------------------------------------------------------
/pyimzml/ImzMLParser.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright 2015 Dominik Fay
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | # http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | from bisect import bisect_left, bisect_right
 18 | import sys
 19 | import random
 20 | import re
 21 | from collections import Counter
 22 | from pathlib import Path
 23 | from typing import Dict, Tuple, Any
 24 | 
 25 | from warnings import warn
 26 | import numpy as np
 27 | 
 28 | from pyimzml.metadata import Metadata, SpectrumData
 29 | from pyimzml.ontology.ontology import convert_cv_param
 30 | 
 31 | PRECISION_DICT = {"32-bit float": 'f', "64-bit float": 'd', "32-bit integer": 'i', "64-bit integer": 'l'}
 32 | SIZE_DICT = {'f': 4, 'd': 8, 'i': 4, 'l': 8}
 33 | INFER_IBD_FROM_IMZML = object()
 34 | XMLNS_PREFIX = "{http://psi.hupo.org/ms/mzml}"
 35 | 
 36 | param_group_elname = "referenceableParamGroup"
 37 | data_processing_elname = "dataProcessing"
 38 | instrument_confid_elname = "instrumentConfiguration"
 39 | 
 40 | 
 41 | def choose_iterparse(parse_lib=None):
 42 |     if parse_lib == 'ElementTree':
 43 |         from xml.etree.ElementTree import iterparse
 44 |     elif parse_lib == 'lxml':
 45 |         from lxml.etree import iterparse
 46 |     else:
 47 |         from xml.etree.ElementTree import iterparse
 48 |     return iterparse
 49 | 
 50 | 
 51 | def _get_cv_param(elem, accession, deep=False, convert=False):
 52 |     base = './/' if deep else ''
 53 |     node = elem.find('%s%scvParam[@accession="%s"]' % (base, XMLNS_PREFIX, accession))
 54 |     if node is not None:
 55 |         if convert:
 56 |             return convert_cv_param(accession, node.get('value'))
 57 |         return node.get('value')
 58 | 
 59 | 
 60 | def calc_mzs_digitize(mzs: np.ndarray) -> Counter:
 61 |     """Calculate the number of peaks in the interval [-0.5 Da + int(mz), +0.5 Da + int(mz)]"""
 62 |     mzs_min = int((mzs.min() // 1 - 0.5) * 10)
 63 |     mzs_max = int((mzs.max() // 1 + 2.5) * 10)
 64 | 
 65 |     bins = np.array([i / 10.0 for i in range(mzs_min, mzs_max, 10)])
 66 |     mzs_units = [int(i + 0.51) for i in bins]  # integer value of mz
 67 | 
 68 |     mzs_counts = [mzs_units[i] for i in np.digitize(mzs, bins, right=False)]
 69 |     mzs_digitized = Counter(mzs_counts)
 70 |     return mzs_digitized
 71 | 
 72 | 
 73 | def calc_peaks_overlap(mzs: np.array, ints: np.array, ppm: float) -> Tuple[int, int]:
 74 |     """Calculate the number of peaks that, when shifted by ppm, are interrupted by others"""
 75 | 
 76 |     # select peaks that have non-zero intensity
 77 |     non_zero_ints = np.where(ints > 0.0)[0]
 78 |     non_zero_mz = mzs[non_zero_ints]
 79 | 
 80 |     # calculation of the absolute value of the shift of each peak when shifting it by ppm
 81 |     shifted = non_zero_mz + (non_zero_mz * ppm * 1e-6)
 82 |     diff_shifted = shifted - non_zero_mz
 83 | 
 84 |     diff_mz = np.diff(non_zero_mz)  # difference between original adjacent peaks
 85 |     n_overlap = sum(diff_shifted[:-1] > diff_mz)
 86 | 
 87 |     return n_overlap, len(non_zero_mz)
 88 | 
 89 | 
 90 | class ImzMLParser:
 91 |     """
 92 |     Parser for imzML 1.1.0 files (see specification here:
 93 |     https://ms-imaging.org/wp-content/uploads/2009/08/specifications_imzML1.1.0_RC1.pdf ).
 94 | 
 95 |     Iteratively reads the .imzML file into memory while pruning the per-spectrum metadata (everything in
 96 |     <spectrumList> elements) during initialization. Returns a spectrum upon calling getspectrum(i). The binary file
 97 |     is read in every call of getspectrum(i). Use enumerate(parser.coordinates) to get all coordinates with their
 98 |     respective index. Coordinates are always 3-dimensional. If the third spatial dimension is not present in
 99 |     the data, it will be set to zero.
100 | 
101 |     The global metadata fields in the imzML file are stored in parser.metadata.
102 |     Spectrum-specific metadata fields are not stored by default due to avoid memory issues,
103 |     use the `include_spectra_metadata` parameter if spectrum-specific metadata is needed.
104 |     """
105 | 
106 |     def __init__(
107 |             self,
108 |             filename,
109 |             parse_lib=None,
110 |             ibd_file=INFER_IBD_FROM_IMZML,
111 |             include_spectra_metadata=None,
112 |     ):
113 |         """
114 |         Opens the two files corresponding to the file name, reads the entire .imzML
115 |         file and extracts required attributes. Does not read any binary data, yet.
116 | 
117 |         :param filename:
118 |             name of the XML file. Must end with .imzML. Binary data file must be named equally but ending with .ibd
119 |             Alternatively an open file or Buffer Protocol object can be supplied, if ibd_file is also supplied
120 |         :param parse_lib:
121 |             XML-parsing library to use: 'ElementTree' or 'lxml', the later will be used if argument not provided
122 |         :param ibd_file:
123 |             File or Buffer Protocol object for the .ibd file. Leave blank to infer it from the imzml filename.
124 |             Set to None if no data from the .ibd file is needed (getspectrum calls will not work)
125 |         :param include_spectra_metadata:
126 |             None, 'full', or a list/set of accession IDs.
127 |             If 'full' is given, parser.spectrum_full_metadata will be populated with a list of
128 |                 complex objects containing the full metadata for each spectrum.
129 |             If a list or set is given, parser.spectrum_metadata_fields will be populated with a dict mapping
130 |                 accession IDs to lists. Each list will contain the values for that accession ID for
131 |                 each spectrum. Note that for performance reasons, this mode only searches the
132 |                 spectrum itself for the value. It won't check any referenced referenceable param
133 |                 groups if the accession ID isn't present in the spectrum metadata.
134 |         """
135 |         # ElementTree requires the schema location for finding tags (why?) but
136 |         # fails to read it from the root element. As this should be identical
137 |         # for all imzML files, it is hard-coded here and prepended before every tag
138 |         self.sl = "{http://psi.hupo.org/ms/mzml}"
139 |         # maps each imzML number format to its struct equivalent
140 |         self.precisionDict = dict(PRECISION_DICT)
141 |         # maps each number format character to its amount of bytes used
142 |         self.sizeDict = dict(SIZE_DICT)
143 |         self.filename = filename
144 |         self.mzOffsets = []
145 |         self.intensityOffsets = []
146 |         self.mzLengths = []
147 |         self.intensityLengths = []
148 |         # list of all (x,y,z) coordinates as tuples.
149 |         self.coordinates = []
150 |         self.root = None
151 |         self.metadata = None
152 |         self.polarity = None
153 |         self.spectrum_mode = None
154 |         if include_spectra_metadata == 'full':
155 |             self.spectrum_full_metadata = []
156 |         elif include_spectra_metadata is not None:
157 |             include_spectra_metadata = set(include_spectra_metadata)
158 |             self.spectrum_metadata_fields = {
159 |                 k: [] for k in include_spectra_metadata
160 |             }
161 | 
162 |         self.mzGroupId = self.intGroupId = self.mzPrecision = self.intensityPrecision = None
163 |         self.iterparse = choose_iterparse(parse_lib)
164 |         self.__iter_read_spectrum_meta(include_spectra_metadata)
165 |         if ibd_file is INFER_IBD_FROM_IMZML:
166 |             # name of the binary file
167 |             ibd_filename = self._infer_bin_filename(self.filename)
168 |             self.m = open(ibd_filename, "rb")
169 |         else:
170 |             self.m = ibd_file
171 | 
172 |         # Dict for basic imzML metadata other than those required for reading
173 |         # spectra. See method __readimzmlmeta()
174 |         self.imzmldict = self.__readimzmlmeta()
175 |         self.imzmldict['max count of pixels z'] = np.asarray(self.coordinates)[:,2].max()
176 | 
177 |     @staticmethod
178 |     def _infer_bin_filename(imzml_path):
179 |         imzml_path = Path(imzml_path)
180 |         ibd_path = [f for f in imzml_path.parent.glob('*')
181 |                     if re.match(r'.+\.ibd', str(f), re.IGNORECASE) and f.stem == imzml_path.stem][0]
182 |         return str(ibd_path)
183 | 
184 |     # system method for use of 'with ... as'
185 |     def __enter__(self):
186 |         return self
187 | 
188 |     # system method for use of 'with ... as'
189 |     def __exit__(self, exc_t, exc_v, trace):
190 |         if self.m is not None:
191 |             self.m.close()
192 | 
193 |     def __iter_read_spectrum_meta(self, include_spectra_metadata):
194 |         """
195 |         This method should only be called by __init__. Reads the data formats, coordinates and offsets from
196 |         the .imzML file and initializes the respective attributes. While traversing the XML tree, the per-spectrum
197 |         metadata is pruned, i.e. the <spectrumList> element(s) are left behind empty.
198 | 
199 |         Supported accession values for the number formats: "MS:1000521", "MS:1000523", "IMS:1000141" or
200 |         "IMS:1000142". The string values are "32-bit float", "64-bit float", "32-bit integer", "64-bit integer".
201 |         """
202 |         mz_group = int_group = None
203 |         slist = None
204 |         elem_iterator = self.iterparse(self.filename, events=("start", "end"))
205 | 
206 |         if sys.version_info > (3,):
207 |             _, self.root = next(elem_iterator)
208 |         else:
209 |             _, self.root = elem_iterator.next()
210 | 
211 |         is_first_spectrum = True
212 | 
213 |         for event, elem in elem_iterator:
214 |             if elem.tag == self.sl + "spectrumList" and event == "start":
215 |                 self.__process_metadata()
216 |                 slist = elem
217 |             elif elem.tag == self.sl + "spectrum" and event == "end":
218 |                 self.__process_spectrum(elem, include_spectra_metadata)
219 |                 if is_first_spectrum:
220 |                     self.__read_polarity(elem)
221 |                     self.__read_spectrum_mode(elem)
222 |                     is_first_spectrum = False
223 |                 slist.remove(elem)
224 |         self.__fix_offsets()
225 | 
226 |     def __fix_offsets(self):
227 |         # clean up the mess after morons who use signed 32-bit where unsigned 64-bit is appropriate
228 |         def fix(array):
229 |             fixed = []
230 |             delta = 0
231 |             prev_value = float('nan')
232 |             for value in array:
233 |                 if value < 0 and prev_value >= 0:
234 |                     delta += 2**32
235 |                 fixed.append(value + delta)
236 |                 prev_value = value
237 |             return fixed
238 | 
239 |         self.mzOffsets = fix(self.mzOffsets)
240 |         self.intensityOffsets = fix(self.intensityOffsets)
241 | 
242 |     def __process_metadata(self):
243 |         if self.metadata is None:
244 |             self.metadata = Metadata(self.root)
245 |             for param_id, param_group in self.metadata.referenceable_param_groups.items():
246 |                 if 'm/z array' in param_group.param_by_name:
247 |                     self.mzGroupId = param_id
248 |                     for name, dtype in self.precisionDict.items():
249 |                         if name in param_group.param_by_name:
250 |                             self.mzPrecision = dtype
251 |                 if 'intensity array' in param_group.param_by_name:
252 |                     self.intGroupId = param_id
253 |                     for name, dtype in self.precisionDict.items():
254 |                         if name in param_group.param_by_name:
255 |                             self.intensityPrecision = dtype
256 |             if not hasattr(self, 'mzPrecision'):
257 |                 raise RuntimeError("Could not determine m/z precision")
258 |             if not hasattr(self, 'intensityPrecision'):
259 |                 raise RuntimeError("Could not determine intensity precision")
260 | 
261 |     def __process_spectrum(self, elem, include_spectra_metadata):
262 |         arrlistelem = elem.find('%sbinaryDataArrayList' % self.sl)
263 |         mz_group = None
264 |         int_group = None
265 |         for e in arrlistelem:
266 |             ref = e.find('%sreferenceableParamGroupRef' % self.sl).attrib["ref"]
267 |             if ref == self.mzGroupId:
268 |                 mz_group = e
269 |             elif ref == self.intGroupId:
270 |                 int_group = e
271 |         self.mzOffsets.append(int(_get_cv_param(mz_group, 'IMS:1000102')))
272 |         self.mzLengths.append(int(_get_cv_param(mz_group, 'IMS:1000103')))
273 |         self.intensityOffsets.append(int(_get_cv_param(int_group, 'IMS:1000102')))
274 |         self.intensityLengths.append(int(_get_cv_param(int_group, 'IMS:1000103')))
275 |         scan_elem = elem.find('%sscanList/%sscan' % (self.sl, self.sl))
276 |         x = _get_cv_param(scan_elem, 'IMS:1000050')
277 |         y = _get_cv_param(scan_elem, 'IMS:1000051')
278 |         z = _get_cv_param(scan_elem, 'IMS:1000052')
279 |         if z is not None:
280 |             self.coordinates.append((int(x), int(y), int(z)))
281 |         else:
282 |             self.coordinates.append((int(x), int(y), 1))
283 | 
284 |         if include_spectra_metadata == 'full':
285 |             self.spectrum_full_metadata.append(
286 |                 SpectrumData(elem, self.metadata.referenceable_param_groups)
287 |             )
288 |         elif include_spectra_metadata:
289 |             for param in include_spectra_metadata:
290 |                 value = _get_cv_param(elem, param, deep=True, convert=True)
291 |                 self.spectrum_metadata_fields[param].append(value)
292 | 
293 |     def __read_polarity(self, elem):
294 |         # It's too slow to always check all spectra, so first check the referenceable_param_groups
295 |         # in the header to see if they indicate the polarity. If not, try to detect it from
296 |         # the first spectrum's full metadata.
297 |         # LIMITATION: This won't detect "mixed" polarity if polarity is only specified outside the
298 |         # referenceable_param_groups.
299 |         param_groups = self.metadata.referenceable_param_groups.values()
300 |         spectrum_metadata = SpectrumData(elem, self.metadata.referenceable_param_groups)
301 |         has_positive = (
302 |             any('positive scan' in group for group in param_groups)
303 |             or 'positive scan' in spectrum_metadata
304 |         )
305 |         has_negative = (
306 |             any('negative scan' in group for group in param_groups)
307 |             or 'negative scan' in spectrum_metadata
308 |         )
309 |         if has_positive and has_negative:
310 |             self.polarity = 'mixed'
311 |         elif has_positive:
312 |             self.polarity = 'positive'
313 |         elif has_negative:
314 |             self.polarity = 'negative'
315 | 
316 |     def __read_spectrum_mode(self, elem):
317 |         """
318 |         This method checks for centroid (MS:1000127) / profile (MS:1000128) mode information.
319 | 
320 |         It's too slow to always check all spectra, so first check the referenceable_param_groups
321 |         in the header to see if they indicate the spectrum mode.
322 |         If not, try to detect it from the first spectrum's full metadata.
323 |         """
324 |         param_groups = self.metadata.referenceable_param_groups.values()
325 |         spectrum_metadata = SpectrumData(elem, self.metadata.referenceable_param_groups)
326 | 
327 |         profile_mode = (
328 |             any('profile spectrum' in group for group in param_groups)
329 |             or 'profile spectrum' in spectrum_metadata
330 |         )
331 |         centroid_mode = (
332 |             any('centroid spectrum' in group for group in param_groups)
333 |             or 'centroid spectrum' in spectrum_metadata
334 |         )
335 | 
336 |         if profile_mode:
337 |             self.spectrum_mode = 'profile'
338 |         elif centroid_mode:
339 |             self.spectrum_mode = 'centroid'
340 | 
341 |     def __readimzmlmeta(self):
342 |         """
343 |         DEPRECATED - use self.metadata instead, as it has much greater detail and allows for
344 |         multiple scan settings / instruments.
345 | 
346 |         This method should only be called by __init__. Initializes the imzmldict with frequently used metadata from
347 |         the .imzML file.
348 | 
349 |         :return d:
350 |             dict containing above mentioned meta data
351 |         :rtype:
352 |             dict
353 |         :raises Warning:
354 |             if an xml attribute has a number format different from the imzML specification
355 |         """
356 |         d = {}
357 |         scan_settings_list_elem = self.root.find('%sscanSettingsList' % self.sl)
358 |         instrument_config_list_elem = self.root.find('%sinstrumentConfigurationList' % self.sl)
359 |         scan_settings_params = [
360 |             ("max count of pixels x", "IMS:1000042"),
361 |             ("max count of pixels y", "IMS:1000043"),
362 |             ("max dimension x", "IMS:1000044"),
363 |             ("max dimension y", "IMS:1000045"),
364 |             ("pixel size x", "IMS:1000046"),
365 |             ("pixel size y", "IMS:1000047"),
366 |             ("matrix solution concentration", "MS:1000835"),
367 |         ]
368 |         instrument_config_params = [
369 |             ("wavelength", "MS:1000843"),
370 |             ("focus diameter x", "MS:1000844"),
371 |             ("focus diameter y", "MS:1000845"),
372 |             ("pulse energy", "MS:1000846"),
373 |             ("pulse duration", "MS:1000847"),
374 |             ("attenuation", "MS:1000848"),
375 |         ]
376 | 
377 |         for name, accession in scan_settings_params:
378 |             try:
379 |                 val = _get_cv_param(scan_settings_list_elem, accession, deep=True, convert=True)
380 |                 if val is not None:
381 |                     d[name] = val
382 |             except ValueError:
383 |                 warn(Warning('Wrong data type in XML file. Skipped attribute "%s"' % name))
384 | 
385 |         for name, accession in instrument_config_params:
386 |             try:
387 |                 val = _get_cv_param(instrument_config_list_elem, accession, deep=True, convert=True)
388 |                 if val is not None:
389 |                     d[name] = val
390 |             except ValueError:
391 |                 warn(Warning('Wrong data type in XML file. Skipped attribute "%s"' % name))
392 |         return d
393 | 
394 |     def get_physical_coordinates(self, i):
395 |         """
396 |         For a pixel index i, return the real-world coordinates in nanometers.
397 | 
398 |         This is equivalent to multiplying the image coordinates of the given pixel with the pixel size.
399 | 
400 |         :param i: the pixel index
401 |         :return: a tuple of x and y coordinates.
402 |         :rtype: Tuple[float]
403 |         :raises KeyError: if the .imzML file does not specify the attributes "pixel size x" and "pixel size y"
404 |         """
405 |         try:
406 |             pixel_size_x = self.imzmldict["pixel size x"]
407 |             pixel_size_y = self.imzmldict["pixel size y"]
408 |         except KeyError:
409 |             raise KeyError("Could not find all pixel size attributes in imzML file")
410 |         image_x, image_y = self.coordinates[i][:2]
411 |         return image_x * pixel_size_x, image_y * pixel_size_y
412 | 
413 |     def getspectrum(self, index):
414 |         """
415 |         Reads the spectrum at specified index from the .ibd file.
416 | 
417 |         :param index:
418 |             Index of the desired spectrum in the .imzML file
419 | 
420 |         Output:
421 | 
422 |         mz_array: numpy.ndarray
423 |             Sequence of m/z values representing the horizontal axis of the desired mass
424 |             spectrum
425 |         intensity_array: numpy.ndarray
426 |             Sequence of intensity values corresponding to mz_array
427 |         """
428 |         mz_bytes, intensity_bytes = self.get_spectrum_as_string(index)
429 |         mz_array = np.frombuffer(mz_bytes, dtype=self.mzPrecision)
430 |         intensity_array = np.frombuffer(intensity_bytes, dtype=self.intensityPrecision)
431 |         return mz_array, intensity_array
432 | 
433 |     def get_spectrum_as_string(self, index):
434 |         """
435 |         Reads m/z array and intensity array of the spectrum at specified location
436 |         from the binary file as a byte string. The string can be unpacked by the struct
437 |         module. To get the arrays as numbers, use getspectrum
438 | 
439 |         :param index:
440 |             Index of the desired spectrum in the .imzML file
441 |         :rtype: Tuple[str, str]
442 | 
443 |         Output:
444 | 
445 |         mz_string:
446 |             string where each character represents a byte of the mz array of the
447 |             spectrum
448 |         intensity_string:
449 |             string where each character represents a byte of the intensity array of
450 |             the spectrum
451 |         """
452 |         offsets = [self.mzOffsets[index], self.intensityOffsets[index]]
453 |         lengths = [self.mzLengths[index], self.intensityLengths[index]]
454 |         lengths[0] *= self.sizeDict[self.mzPrecision]
455 |         lengths[1] *= self.sizeDict[self.intensityPrecision]
456 |         self.m.seek(offsets[0])
457 |         mz_string = self.m.read(lengths[0])
458 |         self.m.seek(offsets[1])
459 |         intensity_string = self.m.read(lengths[1])
460 |         return mz_string, intensity_string
461 | 
462 |     def portable_spectrum_reader(self):
463 |         """
464 |         Builds a PortableSpectrumReader that holds the coordinates list and spectrum offsets in the .ibd file
465 |         so that the .ibd file can be read without opening the .imzML file again.
466 | 
467 |         The PortableSpectrumReader can be safely pickled and unpickled, making it useful for reading the spectra
468 |         in a distributed environment such as PySpark or PyWren.
469 |         """
470 |         return PortableSpectrumReader(self.coordinates,
471 |                                       self.mzPrecision, self.mzOffsets, self.mzLengths,
472 |                                       self.intensityPrecision, self.intensityOffsets, self.intensityLengths)
473 | 
474 |     def check_peaks_overlap(self, n_spectrum: int = 100, ppm: float = 3.0) -> float:
475 |         """
476 |         This function represents an approach for finding non-centroided datasets based on
477 |         comparing the distance to the neighboring peak and shifting the existing peak by N ppm.
478 | 
479 |         The algorithm is described in the "Exclusion of non-centroided datasets" section of the article
480 |         METASPACE-ML: Metabolite annotation for imaging mass spectrometry using machine learning
481 |         https://www.biorxiv.org/content/10.1101/2023.05.29.542736v2
482 |         """
483 |         random.seed(42)
484 |         indexes = set([
485 |             random.randrange(0, len(self.coordinates))
486 |             for _ in range(min(len(self.coordinates), n_spectrum))
487 |         ])
488 | 
489 |         n_overlap_peaks = []
490 |         non_zero_peaks = []
491 |         for idx in indexes:
492 |             mzs, ints = self.getspectrum(idx)
493 |             n_overlap, non_zero = calc_peaks_overlap(mzs, ints, ppm)
494 |             n_overlap_peaks.append(n_overlap)
495 |             non_zero_peaks.append(non_zero)
496 | 
497 |         overlap_percentage = sum(n_overlap_peaks) / sum(non_zero_peaks) * 100.0
498 |         return round(overlap_percentage, 2)
499 | 
500 |     def get_spectrum_statistics(self, idx: int) -> Dict[str, Any]:
501 |         """Calculate all the necessary metrics about m/z and intensity for the one spectrum"""
502 |         mzs, ints = self.getspectrum(idx)
503 |         nonzero_ints_indx = np.where(ints > 0.0)[0]
504 |         nonzero_ints = ints[nonzero_ints_indx]
505 | 
506 |         if len(mzs) == 0:
507 |             return {}
508 |         # some datasets have anomalous values of m/z, like 1.0e+35
509 |         elif mzs.max() > 1_000_000:
510 |             return {}
511 |         elif np.all(np.isnan(mzs)):
512 |             return {}
513 |         else:
514 |             return {
515 |                 'mzs_min': mzs.min(),
516 |                 'mzs_max': mzs.max(),
517 |                 'mzs_digitized': calc_mzs_digitize(mzs),
518 |                 'ints_min': nonzero_ints.min() if len(nonzero_ints) > 0 else 0,  # non zero
519 |                 'ints_50p': np.percentile(nonzero_ints, 50) if len(nonzero_ints) > 0 else 0,
520 |                 'ints_95p': np.percentile(nonzero_ints, 95) if len(nonzero_ints) > 0 else 0,
521 |                 'ints_99p': np.percentile(nonzero_ints, 99) if len(nonzero_ints) > 0 else 0,
522 |                 'ints_max': ints.max(),
523 |                 'ints_total': sum(ints),
524 |                 'nonzero_intensity_peaks_count': len(nonzero_ints),
525 |                 'total_peaks_count': len(ints),
526 |             }
527 | 
528 |     def calc_statistics(self, n_spectrum: int = 0, full: bool = False) -> Dict[str, Any]:
529 |         """
530 |         Calculate the statistics of the number of peaks for the entire dataset,
531 |         as well as full/n_spectrum is setting up - calculate extended statistics for each spectrum
532 | 
533 |         :param n_spectrum: the number of spectrum to analyze
534 |         :param full: analysis of all spectrum
535 |         """
536 |         peaks_statistics = {
537 |             'ds_peaks_stats': {
538 |                 'min': min(self.intensityLengths),
539 |                 'median': int(np.median(self.intensityLengths)),
540 |                 '95p': int(np.percentile(self.intensityLengths, q=95)),
541 |                 'max': max(self.intensityLengths),
542 |             }
543 |         }
544 | 
545 |         # select all coordinates or a subset depending on the value of the full/n_spectrum variables
546 |         if full:
547 |             indexes = list(range(len(self.coordinates)))
548 |         elif n_spectrum:
549 |             random.seed(42)
550 |             indexes = set([
551 |                 random.randrange(0, len(self.coordinates))
552 |                 for _ in range(min(len(self.coordinates), n_spectrum))
553 |             ])
554 |         else:
555 |             indexes = []
556 | 
557 |         if indexes:
558 |             mzs_min, mzs_max = [], []
559 |             ints_min, ints_max, ints_total = [], [], []
560 |             ints_50p, ints_95p, ints_99p = [], [], []
561 |             nonzero_intensity_peaks_count, total_peaks_count = [], []
562 |             mzs_digitized = Counter()
563 |             for idx in indexes:
564 |                 spectrum_stats = self.get_spectrum_statistics(idx)
565 |                 if not spectrum_stats:
566 |                     continue
567 |                 mzs_min.append(spectrum_stats['mzs_min'])
568 |                 mzs_max.append(spectrum_stats['mzs_max'])
569 |                 mzs_digitized += spectrum_stats['mzs_digitized']
570 |                 ints_min.append(spectrum_stats['ints_min'])
571 |                 ints_50p.append(spectrum_stats['ints_50p'])
572 |                 ints_95p.append(spectrum_stats['ints_95p'])
573 |                 ints_99p.append(spectrum_stats['ints_99p'])
574 |                 ints_max.append(spectrum_stats['ints_max'])
575 |                 ints_total.append(spectrum_stats['ints_total'])
576 |                 nonzero_intensity_peaks_count.append(spectrum_stats['nonzero_intensity_peaks_count'])
577 |                 total_peaks_count.append(spectrum_stats['total_peaks_count'])
578 | 
579 |             peaks_statistics.update({
580 |                 'mz_min': min(mzs_min),
581 |                 'mz_max': max(mzs_max),
582 |                 'mzs_min': np.array(mzs_min, dtype=np.float32),
583 |                 'mzs_max': np.array(mzs_max, dtype=np.float32),
584 |                 'mzs_digitized': mzs_digitized,
585 |                 'ints_min': np.array(ints_min, dtype=np.float32),
586 |                 'ints_50p': np.array(ints_50p, dtype=np.float32),
587 |                 'ints_95p': np.array(ints_95p, dtype=np.float32),
588 |                 'ints_99p': np.array(ints_99p, dtype=np.float32),
589 |                 'ints_max': np.array(ints_max, dtype=np.float32),
590 |                 'ints_total': np.array(ints_total, dtype=np.float32),
591 |                 'nonzero_intensity_lengths': np.array(nonzero_intensity_peaks_count, dtype=np.int32),
592 |                 'nonzero_peaks_percentage':
593 |                     round(sum(nonzero_intensity_peaks_count)/sum(total_peaks_count) * 100.0, 2),
594 |             })
595 | 
596 |         return peaks_statistics
597 | 
598 | 
599 | def getionimage(p, mz_value, tol=0.1, z=1, reduce_func=sum):
600 |     """
601 |     Get an image representation of the intensity distribution
602 |     of the ion with specified m/z value.
603 | 
604 |     By default, the intensity values within the tolerance region are summed.
605 | 
606 |     :param p:
607 |         the ImzMLParser (or anything else with similar attributes) for the desired dataset
608 |     :param mz_value:
609 |         m/z value for which the ion image shall be returned
610 |     :param tol:
611 |         Absolute tolerance for the m/z value, such that all ions with values
612 |         mz_value-|tol| <= x <= mz_value+|tol| are included. Defaults to 0.1
613 |     :param z:
614 |         z Value if spectrogram is 3-dimensional.
615 |     :param reduce_func:
616 |         the bahaviour for reducing the intensities between mz_value-|tol| and mz_value+|tol| to a single value. Must
617 |         be a function that takes a sequence as input and outputs a number. By default, the values are summed.
618 | 
619 |     :return:
620 |         numpy matrix with each element representing the ion intensity in this
621 |         pixel. Can be easily plotted with matplotlib
622 |     """
623 |     tol = abs(tol)
624 |     im = np.zeros((p.imzmldict["max count of pixels y"], p.imzmldict["max count of pixels x"]))
625 |     for i, (x, y, z_) in enumerate(p.coordinates):
626 |         if z_ == 0:
627 |             UserWarning("z coordinate = 0 present, if you're getting blank images set getionimage(.., .., z=0)")
628 |         if z_ == z:
629 |             mzs, ints = map(lambda x: np.asarray(x), p.getspectrum(i))
630 |             min_i, max_i = _bisect_spectrum(mzs, mz_value, tol)
631 |             im[y - 1, x - 1] = reduce_func(ints[min_i:max_i+1])
632 |     return im
633 | 
634 | 
635 | def browse(p):
636 |     """
637 |     Create a per-spectrum metadata browser for the parser.
638 |     Usage::
639 | 
640 |         # get a list of the instrument configurations used in the first pixel
641 |         instrument_configurations = browse(p).for_spectrum(0).get_ids("instrumentConfiguration")
642 | 
643 |     Currently, ``instrumentConfiguration``, ``dataProcessing`` and ``referenceableParamGroup`` are supported.
644 | 
645 |     For browsing all spectra iteratively, you should by all means use **ascending** indices. Doing otherwise can result
646 |     in quadratic runtime. The following example shows how to retrieve all unique instrumentConfigurations used::
647 | 
648 |         browser = browse(p)
649 |         all_config_ids = set()
650 |         for i, _ in enumerate(p.coordinates):
651 |             all_config_ids.update(browser.for_spectrum(i).get_ids("instrumentConfiguration"))
652 | 
653 |     This is a list of ids with which you can find the corresponding ``<instrumentConfiguration>`` tag in the xml tree.
654 | 
655 |     :param p: the parser
656 |     :return: the browser
657 |     """
658 |     return _ImzMLMetaDataBrowser(p.root, p.filename, p.sl)
659 | 
660 | 
661 | def _bisect_spectrum(mzs, mz_value, tol):
662 |     ix_l, ix_u = bisect_left(mzs, mz_value - tol), bisect_right(mzs, mz_value + tol) - 1
663 |     if ix_l == len(mzs):
664 |         return len(mzs), len(mzs)
665 |     if ix_u < 1:
666 |         return 0, 0
667 |     if ix_u == len(mzs):
668 |         ix_u -= 1
669 |     if mzs[ix_l] < (mz_value - tol):
670 |         ix_l += 1
671 |     if mzs[ix_u] > (mz_value + tol):
672 |         ix_u -= 1
673 |     return ix_l, ix_u
674 | 
675 | 
676 | class _ImzMLMetaDataBrowser(object):
677 |     def __init__(self, root, fn, sl):
678 |         self._root = root
679 |         self._sl = sl
680 |         self._fn = fn
681 |         self._iter, self._previous, self._list_elem = None, None, None
682 |         self.iterparse = choose_iterparse()
683 | 
684 |     def for_spectrum(self, i):
685 |         if self._previous is None or i <= self._previous:
686 |             self._iter = self.iterparse(self._fn, events=("start", "end"))
687 |         for event, s in self._iter:
688 |             if s.tag == self._sl + "spectrumList" and event == "start":
689 |                 self._list_elem = s
690 |             elif s.tag == self._sl + "spectrum" and event == "end":
691 |                 self._list_elem.remove(s)
692 |                 if s.attrib["index"] == str(i):
693 |                     self._previous = i
694 |                     return _SpectrumMetaDataBrowser(self._root, self._sl, s)
695 | 
696 | 
697 | class _SpectrumMetaDataBrowser(object):
698 |     def __init__(self, root, sl, spectrum):
699 |         self._root = root
700 |         self._sl = sl
701 |         self._spectrum = spectrum
702 | 
703 |     def get_ids(self, element):
704 |         param_methods = {
705 |             param_group_elname: self._find_referenceable_param_groups,
706 |             data_processing_elname: self._find_data_processing,
707 |             instrument_confid_elname: self._find_instrument_configurations,
708 |         }
709 |         try:
710 |             return param_methods[element]()
711 |         except KeyError as e:
712 |             raise ValueError("Unsupported element: " + str(element))
713 | 
714 |     def _find_referenceable_param_groups(self):
715 |         param_group_refs = self._spectrum.findall("%sreferenceableParamGroupRef" % self._sl)
716 |         ids = map(lambda g: g.attrib["ref"], param_group_refs)
717 |         return ids
718 | 
719 |     def _find_instrument_configurations(self):
720 |         ids = None
721 |         scan_list = self._spectrum.find("%sscanList" % self._sl)
722 |         if scan_list:
723 |             scans = scan_list.findall("%sscan[@instrumentConfigurationRef]" % self._sl)
724 |             ids = map(lambda s: s.attrib["instrumentConfigurationRef"], scans)
725 |         if not ids:
726 |             run = self._root.find("%srun")
727 |             try:
728 |                 return [run.attrib["defaultInstrumentConfigurationRef"]]
729 |             except KeyError as _:
730 |                 return list()
731 |         else:
732 |             return ids
733 | 
734 |     def _find_data_processing(self):
735 |         try:
736 |             return self._spectrum.attrib["dataProcessingRef"]
737 |         except KeyError as _:
738 |             spectrum_list = self._root.find("%srun/%sspectrumList" % tuple(2 * [self._sl]))
739 |             try:
740 |                 return [spectrum_list.attrib["defaultDataProcessingRef"]]
741 |             except KeyError as _:
742 |                 return []
743 | 
744 | 
745 | class PortableSpectrumReader(object):
746 |     """
747 |     A pickle-able class for holding the minimal set of data required for reading,
748 |     without holding any references to open files that wouldn't survive pickling.
749 |     """
750 | 
751 |     def __init__(self, coordinates, mzPrecision, mzOffsets, mzLengths,
752 |                  intensityPrecision, intensityOffsets, intensityLengths):
753 |         self.coordinates = coordinates
754 |         self.mzPrecision = mzPrecision
755 |         self.mzOffsets = mzOffsets
756 |         self.mzLengths = mzLengths
757 |         self.intensityPrecision = intensityPrecision
758 |         self.intensityOffsets = intensityOffsets
759 |         self.intensityLengths = intensityLengths
760 | 
761 |     def read_spectrum_from_file(self, file, index):
762 |         """
763 |         Reads the spectrum at specified index from the .ibd file.
764 | 
765 |         :param file:
766 |             File or file-like object for the .ibd file
767 |         :param index:
768 |             Index of the desired spectrum in the .imzML file
769 | 
770 |         Output:
771 | 
772 |         mz_array: numpy.ndarray
773 |             Sequence of m/z values representing the horizontal axis of the desired mass
774 |             spectrum
775 |         intensity_array: numpy.ndarray
776 |             Sequence of intensity values corresponding to mz_array
777 |         """
778 |         file.seek(self.mzOffsets[index])
779 |         mz_bytes = file.read(self.mzLengths[index] * SIZE_DICT[self.mzPrecision])
780 |         file.seek(self.intensityOffsets[index])
781 |         intensity_bytes = file.read(self.intensityLengths[index] * SIZE_DICT[self.intensityPrecision])
782 | 
783 |         mz_array = np.frombuffer(mz_bytes, dtype=self.mzPrecision)
784 |         intensity_array = np.frombuffer(intensity_bytes, dtype=self.intensityPrecision)
785 | 
786 |         return mz_array, intensity_array
787 | 


--------------------------------------------------------------------------------