├── tests ├── __init__.py ├── data │ ├── Example_Processed.ibd │ ├── Example_Continuous.ibd │ ├── Example_Continuous.imzML │ └── Example_Processed.imzML ├── context.py └── test_basic.py ├── pyimzml ├── ontology │ ├── __init__.py │ ├── dump_obo_files.py │ ├── ontology.py │ ├── ims.py │ └── uo.py ├── .gitignore ├── __init__.py ├── compression.py ├── metadata.py ├── ImzMLWriter.py └── ImzMLParser.py ├── Report.pdf ├── docs ├── requirements.txt ├── source │ ├── pyimzml │ │ ├── ImzMLWriter.rst │ │ └── ImzMLParser.rst │ ├── index.rst │ └── conf.py └── Makefile ├── .github └── workflows │ └── pythonpublish.yml ├── .gitignore ├── setup.py ├── CHANGELOG.md ├── README.rst └── LICENSE /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyimzml/ontology/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyimzml/.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | -------------------------------------------------------------------------------- /pyimzml/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.5.5' 2 | -------------------------------------------------------------------------------- /Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexandrovteam/pyimzML/HEAD/Report.pdf -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.10 2 | wheezy.template 3 | sphinx-rtd-theme==0.5.0 4 | sphinx_autodoc_typehints 5 | -------------------------------------------------------------------------------- /tests/data/Example_Processed.ibd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexandrovteam/pyimzML/HEAD/tests/data/Example_Processed.ibd -------------------------------------------------------------------------------- /tests/data/Example_Continuous.ibd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexandrovteam/pyimzML/HEAD/tests/data/Example_Continuous.ibd -------------------------------------------------------------------------------- /tests/data/Example_Continuous.imzML: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexandrovteam/pyimzML/HEAD/tests/data/Example_Continuous.imzML -------------------------------------------------------------------------------- /tests/data/Example_Processed.imzML: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexandrovteam/pyimzML/HEAD/tests/data/Example_Processed.imzML -------------------------------------------------------------------------------- /tests/context.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | def getspectrum(min_mz, max_mz, n_peaks): 3 | return min_mz + max_mz*np.random.rand(n_peaks), np.abs(np.random.randn(n_peaks)) -------------------------------------------------------------------------------- /docs/source/pyimzml/ImzMLWriter.rst: -------------------------------------------------------------------------------- 1 | ImzMLWriter 2 | ======================== 3 | 4 | .. toctree:: 5 | :maxdepth: 3 6 | 7 | pyimzml.ImzMLWriter module 8 | -------------------------- 9 | 10 | .. automodule:: pyimzml.ImzMLWriter 11 | :members: 12 | :undoc-members: 13 | 14 | 15 | pyimzml.compression module 16 | -------------------------- 17 | 18 | This module holds adapters for compressing an ImzML file's binary data, currently only usable with ImzMLWriter. 19 | 20 | .. automodule:: pyimzml.compression 21 | :members: 22 | :undoc-members: 23 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /.github/workflows/pythonpublish.yml: -------------------------------------------------------------------------------- 1 | name: Publish pyimzML package 2 | 3 | on: workflow_dispatch # Manual trigger through Actions page 4 | 5 | jobs: 6 | build_and_deploy: 7 | name: Create and publish package to PyPI 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | 12 | - name: Checkout repository 13 | uses: actions/checkout@v4 14 | 15 | - name: Set up Python 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: '3.8' 19 | 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install setuptools wheel twine 24 | 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: __token__ 28 | TWINE_PASSWORD: ${{ secrets.PYIMZML_PYPI_API_TOKEN }} 29 | run: | 30 | python setup.py sdist bdist_wheel 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /docs/source/pyimzml/ImzMLParser.rst: -------------------------------------------------------------------------------- 1 | ImzMLParser 2 | ======================== 3 | 4 | .. toctree:: 5 | 6 | pyimzml.ImzMLParser module 7 | -------------------------- 8 | 9 | .. automodule:: pyimzml.ImzMLParser 10 | :members: 11 | :undoc-members: 12 | 13 | pyimzml.metadata module 14 | -------------------------- 15 | 16 | This module contains the data structures used for the 17 | :py:attr:`pyimzml.ImzMLParser.ImzMLParser.metadata` 18 | and :py:attr:`pyimzml.ImzMLParser.ImzMLParser.full_spectrum_metadata` fields. 19 | 20 | .. automodule:: pyimzml.metadata 21 | :members: 22 | :undoc-members: 23 | 24 | pyimzml.ontology module 25 | -------------------------- 26 | 27 | This module contains exports of the controlled vocabulary ontologies used by the ImzML format, 28 | used for ensuring that ImzML metadata items can always be accessed by their canonical names 29 | or accessions. 30 | 31 | .. automodule:: pyimzml.ontology.ontology 32 | :members: 33 | :undoc-members: 34 | -------------------------------------------------------------------------------- /pyimzml/compression.py: -------------------------------------------------------------------------------- 1 | import zlib 2 | 3 | class NoCompression(object): 4 | """ 5 | No compression. 6 | """ 7 | def __init__(self): 8 | pass 9 | 10 | def rounding(self, data): 11 | return data 12 | 13 | def compress(self, bytes): 14 | return bytes 15 | 16 | def decompress(self, bytes): 17 | return bytes 18 | 19 | name = "no compression" 20 | 21 | class ZlibCompression(object): 22 | """ 23 | Zlib compression with optional rounding of values. 24 | Rounding helps the compression, but is lossy. 25 | 26 | :param round_amt: 27 | Number of digits after comma. None means no rounding. 28 | """ 29 | def __init__(self, round_amt=None): 30 | self.round_amt = round_amt 31 | 32 | def rounding(self, data): 33 | if self.round_amt is not None: 34 | return [round(x, self.round_amt) for x in data] 35 | return data 36 | 37 | def compress(self, bytes): 38 | return zlib.compress(bytes) 39 | 40 | def decompress(self, bytes): 41 | return zlib.decompress(bytes) 42 | 43 | name = "zlib compression" 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Python template 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | # IntelliJ project files 62 | .idea 63 | *.iml 64 | out 65 | gen 66 | # Created by .ignore support plugin (hsz.mobi) 67 | *.DS_Store 68 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to pyimzML documentation! 2 | =================================== 3 | 4 | This package provides a parser of imzML format as well as a simple imzML writer. 5 | 6 | Typical usage pattern is as follows: 7 | 8 | .. code-block:: python 9 | 10 | from pyimzml.ImzMLParser import ImzMLParser 11 | 12 | p = ImzMLParser('Example.imzML') 13 | my_spectra = [] 14 | for idx, (x,y,z) in enumerate(p.coordinates): 15 | mzs, intensities = p.getspectrum(idx) 16 | my_spectra.append([mzs, intensities, (x, y, z)]) 17 | # ... 18 | 19 | from pyimzml.ImzMLWriter import ImzMLWriter 20 | 21 | with ImzMLWriter('output.imzML', polarity='positive') as w: 22 | for mzs, intensities, coords in my_spectra: 23 | # writes data to the .ibd file 24 | w.addSpectrum(mzs, intensities, coords) 25 | # at this point imzML file is written and files are closed 26 | 27 | 28 | .. _api: 29 | 30 | API Reference 31 | ============= 32 | 33 | .. toctree:: 34 | :caption: API Reference 35 | :glob: 36 | 37 | pyimzml/* 38 | 39 | :ref:`genindex` 40 | 41 | :ref:`modindex` 42 | 43 | 44 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from pyimzml import __version__ 2 | from setuptools import setup, find_packages 3 | 4 | setup( 5 | name='pyimzML', 6 | version=__version__, 7 | description="Parser for conversion of imzML 1.1.0 files", 8 | long_description=""" 9 | Parser for conversion of imzML 1.1.0 files. 10 | See specification here: https://ms-imaging.org/wp-content/uploads/2009/08/specifications_imzML1.1.0_RC1.pdf. 11 | Outputs data as python lists, dicts or numpy array. 12 | """, 13 | # The project's main homepage. 14 | url='https://github.com/alexandrovteam/pyimzML', 15 | author='Alexandrov Team, EMBL', 16 | author_email='theodore.alexandrov@embl.de', 17 | 18 | license='Apache 2.0', 19 | # See https://pypi.python.org/pypi?%3Aaction=list_classifiers 20 | classifiers=[ 21 | 'Development Status :: 5 - Production/Stable', 22 | 'Intended Audience :: Developers', 23 | 'Intended Audience :: Science/Research', 24 | 'Topic :: Scientific/Engineering :: Bio-Informatics', 25 | 'License :: OSI Approved :: Apache Software License', 26 | 27 | 'Programming Language :: Python :: 3', 28 | 'Programming Language :: Python :: 3.8', 29 | ], 30 | keywords='bioinformatics imaging mass spectrometry parser imzML', 31 | 32 | packages=find_packages(exclude=('tests', 'docs')), 33 | 34 | install_requires=['numpy', 'wheezy.template'], 35 | ) 36 | -------------------------------------------------------------------------------- /pyimzml/ontology/dump_obo_files.py: -------------------------------------------------------------------------------- 1 | # This file is not intended for general use. Its purpose is to dump the .obo files that define 2 | # the cvParam accession fields into a dependency-free format that can be bundled with pyimzml. 3 | # 4 | # It requires the additonal pip dependency obonet==0.2.6 5 | import re 6 | from collections import defaultdict 7 | from datetime import datetime 8 | from pprint import pformat 9 | 10 | 11 | ontology_sources = [ 12 | ('ms', 'https://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo', ['MS']), 13 | ('uo', 'https://raw.githubusercontent.com/bio-ontology-research-group/unit-ontology/master/unit.obo', ['UO']), 14 | ('ims', 'https://raw.githubusercontent.com/imzML/imzML/f2c8b6ce2affa8d8eef74d4bfe5922c815ff4dff/imagingMS.obo', ['IMS']), 15 | ] 16 | 17 | if __name__ == '__main__': 18 | import obonet 19 | 20 | now = datetime.utcnow().isoformat() 21 | 22 | for ontology_name, src, namespaces in ontology_sources: 23 | print(f'Parsing {ontology_name}') 24 | graph = obonet.read_obo(src, ignore_obsolete=False) 25 | terms = {} 26 | enums = defaultdict(list) 27 | for node_id in graph.nodes: 28 | node = graph.nodes[node_id] 29 | if any(node_id.startswith(ns) for ns in namespaces) and 'name' in node: 30 | dtype = None 31 | for xref in node.get('xref', []): 32 | m = re.match(r'^value-type:xsd\\:(\w+) ', xref) 33 | if m: 34 | dtype = 'xsd:' + m[1] 35 | break 36 | 37 | terms[node_id] = (node['name'], dtype) 38 | 39 | with open(f'./{ontology_name}.py', 'wt') as f: 40 | f.write('# DO NOT EDIT BY HAND\n') 41 | f.write(f'# This file was autogenerated by dump_obo_files.py at {now}\n') 42 | terms_repr = pformat(terms, indent=4, width=100) 43 | f.write(f'terms = {terms_repr}\n') 44 | 45 | 46 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 1.5.5 (2024-11-04) 2 | * Add dataset descriptive statistics 3 | 4 | ## 1.5.4 (2024-03-13) 5 | * Add `ImzMLParser.spectrum_mode` field. 6 | * Fix a bug in parsing `userParam` value. 7 | * Replaced broken links in README file 8 | * Updated GitHub Actions workflow 9 | 10 | ## 1.5.3 (2022-11-09) 11 | * Fixing a bug in the documentation. 12 | 13 | ## 1.5.2 (2022-07-21) 14 | * Change url for imzML 1.1.0 specification. 15 | 16 | ## 1.5.1 (2021-08-16) 17 | * Fix code that causes `SyntaxWarning` in Python 3.8+ 18 | * Change `ImzmlWriter` to output "linescan left right" instead of "line scan left right", to match the ontology 19 | 20 | ## 1.5.0 (2021-07-19) 21 | * Handle mismatched accession for "positive scan" 22 | * Default `ImzMLParser` to ElementTree if no `parse_lib` is specified 23 | * Add `ImzMLParser.polarity` field 24 | 25 | ## 1.4.1 (2020-10-26) 26 | * Fixed new modules missing from package 27 | 28 | ## 1.4.0 (2020-10-23) 29 | * Add support for parsing all ImzML metadata 30 | * Global metadata is always included through `ImzMLParser.metadata` 31 | * Per-spectrum metadata requires `include_spectra_metadata='full'` 32 | or `include_spectra_metadata=[... list of accessions]` to be passed to ImzMLParser. 33 | * Handle mismatched accessions for specifying data types of binary arrays 34 | 35 | ## 1.3.0 (2019-05-24) 36 | * Add `PortableSpectrumReader`, which holds the minimal subset of `ImzMLParser` needed to read m/z and intensity 37 | data from the .ibd file, and is able to be pickled. 38 | 39 | ## 1.2.6 (2019-04-23) 40 | * Changed `ImzMLParser.getspectrum` to return NumPy arrays instead of Python lists 41 | 42 | ## 1.2.5 (2019-04-10) 43 | * Added `parse_lib` parameter to `ImzMLParser`, allowing ElementTree to be used instead of lxml 44 | 45 | ## 1.2.4 (2019-01-23) 46 | * Support `MS:1000519` and `MS:1000522` accessions for specifying integer binary data types 47 | 48 | ## 1.2.3 (2018-07-02) 49 | * Support `ImzMLParser` detecting .ibd files with a case-insensitive search 50 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | pyimzML 2 | ======= 3 | 4 | .. image:: https://readthedocs.org/projects/pyimzml/badge/?version=latest 5 | :target: http://pyimzml.readthedocs.org/en/latest/?badge=latest 6 | :alt: Documentation Status 7 | 8 | Description 9 | ----------- 10 | A parser for the imzML format used in imaging mass spectrometry. See specification 11 | `here `_. 12 | Designed for use with imzML version 1.1.0. Outputs data as python lists and dicts. 13 | 14 | The parser is developed by `Alexandrov Team `_ at EMBL Heidelberg. 15 | 16 | Installation 17 | ------------ 18 | pyimzML is available on `PyPI `_. pyimzML 19 | should be installed with pip using one of these three options: 20 | 21 | * ``$ pip install pyimzml`` will install pyimzML from PyPI (easiest). 22 | * ``$ pip install git+git://github.com/alexandrovteam/pyimzML.git`` will install pyimzML from github. 23 | * Download the source tarball from `PyPI `_ and ``$ pip install pyimzml-x-x-x.tar.gz`` 24 | 25 | **Dependency Notes** 26 | 27 | * pyimzML has an optional dependency to `lxml `_. If lxml is not installed, pyimzML will instead use the built-in cElementTree or ElementTree package. 28 | 29 | **Testing** 30 | 31 | To test your installation of pyimzML, you can download sample data from `imzml.org `_ and run the tests. 32 | 33 | Attribution 34 | ----------- 35 | 36 | The pyimzml/ontology directory includes data derived from the following ontologies: 37 | 38 | * `Units of measurement ontology `_ by George Gkoutos `CC-BY license `_ 39 | * `Mass spectrometry ontology `_ by `Gerhard Mayer et al. `_ `CC-BY license `_ 40 | * `Imaging MS controlled vocabulary `_ 41 | 42 | Documentation 43 | ------------- 44 | 45 | Documentation is available on `ReadTheDocs `_ 46 | -------------------------------------------------------------------------------- /pyimzml/ontology/ontology.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from warnings import warn 3 | 4 | from .uo import terms as uo_terms 5 | from .ms import terms as ms_terms 6 | from .ims import terms as ims_terms 7 | 8 | all_terms = {} 9 | all_terms.update(uo_terms) 10 | all_terms.update(ms_terms) 11 | all_terms.update(ims_terms) 12 | 13 | DTYPE_MAPPING = { 14 | 'xsd:string': str, 15 | 'xsd:anyURI': str, 16 | 'xsd:float': float, 17 | 'xsd:double': float, 18 | 'xsd:decimal': float, 19 | 'xsd:nonNegativeFloat': float, 20 | 'xsd:int': int, 21 | 'xsd:integer': int, 22 | 'xsd:positiveInteger': int, 23 | 'xsd:nonNegativeInteger': int, 24 | 'xsd:boolean': bool, 25 | 'xsd:dateTime': datetime, 26 | } 27 | 28 | ACCESSION_FIX_MAPPING = { 29 | # Normally cvParam names will be updated to match the accession, but there are some 30 | # known cases where exporters use the correct name and incorrect accession. This is a mapping 31 | # of the known cases where the accession should be fixed, instead of the name. 32 | # (erroneous accession, name) -> fixed accession 33 | # Spectrum data types: https://github.com/alexandrovteam/pyimzML/pull/21#issuecomment-713818463 34 | ('MS:1000523', '32-bit float'): 'MS:1000521', 35 | ('MS:1000521', '64-bit float'): 'MS:1000523', 36 | # Polarity 37 | ('MS:1000128', 'positive scan'): 'MS:1000130' 38 | } 39 | 40 | 41 | def convert_xml_value(dtype, value): 42 | try: 43 | if dtype is not None: 44 | return DTYPE_MAPPING[dtype](value) 45 | elif value is None or value == '': 46 | # Many cv_params are flags and have either a None or empty-string value. 47 | # Replace their value with True in these cases, so their existence isn't so ambiguous. 48 | return True 49 | else: 50 | return value 51 | except KeyError: 52 | return value 53 | except ValueError: 54 | return None 55 | 56 | 57 | def convert_term_name(accession): 58 | return all_terms.get(accession, (accession, None))[0] 59 | 60 | 61 | def convert_cv_param(accession, value): 62 | """ 63 | Looks up a term by accession number, and convert the provided value to the expected type. 64 | """ 65 | name, dtype = all_terms.get(accession, (accession, None)) 66 | converted_value = convert_xml_value(dtype, value) 67 | return converted_value 68 | 69 | 70 | def lookup_and_convert_cv_param(accession, raw_name, value, unit_accession=None): 71 | """ 72 | Looks up a term by accession number, and returns the term name, its value converted into 73 | the expected datatype, and the unit name (if a unit accession number is also given). 74 | """ 75 | name, dtype = all_terms.get(accession, (raw_name or accession, None)) 76 | converted_value = convert_xml_value(dtype, value) 77 | unit_name = all_terms.get(unit_accession, (unit_accession, None))[0] 78 | 79 | if accession not in all_terms: 80 | warn('Unrecognized accession in : %s (name: "%s").' % (accession, raw_name)) 81 | elif name != raw_name: 82 | fixed_accession = ACCESSION_FIX_MAPPING.get((accession, raw_name)) 83 | if fixed_accession is not None: 84 | warn( 85 | 'Accession %s ("%s") found with mismatched name "%s". ' 86 | 'This is a known bug with some imzML conversion software - using accession ' 87 | '%s ("%s") instead.' % (accession, name, raw_name, fixed_accession, raw_name) 88 | ) 89 | accession = fixed_accession 90 | name = raw_name 91 | else: 92 | warn( 93 | 'Accession %s found with incorrect name "%s". Updating name to "%s".' 94 | % (accession, raw_name, name) 95 | ) 96 | 97 | return accession, name, converted_value, unit_name 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /tests/test_basic.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import unittest 3 | 4 | import numpy as np 5 | from pathlib import Path 6 | from .context import getspectrum 7 | import pyimzml.ImzMLParser as imzmlp 8 | import pyimzml.ImzMLWriter as imzmlw 9 | 10 | # Example files from https://www.ms-imaging.org/imzml/example-files-test/ 11 | CONTINUOUS_IMZML_PATH = str(Path(__file__).parent / 'data/Example_Continuous.imzML') 12 | CONTINUOUS_IBD_PATH = str(Path(__file__).parent / 'data/Example_Continuous.ibd') 13 | PROCESSED_IMZML_PATH = str(Path(__file__).parent / 'data/Example_Processed.imzML') 14 | PROCESSED_IBD_PATH = str(Path(__file__).parent / 'data/Example_Processed.ibd') 15 | PARSE_LIB_TEST_CASES = ['lxml', 'ElementTree'] 16 | DATA_TEST_CASES = [ 17 | ('Continuous', CONTINUOUS_IMZML_PATH, CONTINUOUS_IBD_PATH), 18 | ('Processed', PROCESSED_IMZML_PATH, PROCESSED_IBD_PATH), 19 | ] 20 | ALL_TEST_CASES = [(parse_lib, data_name, imzml_path, ibd_path) 21 | for parse_lib in PARSE_LIB_TEST_CASES 22 | for data_name, imzml_path, ibd_path in DATA_TEST_CASES] 23 | 24 | 25 | class ImzMLParser(unittest.TestCase): 26 | def test_bisect(self): 27 | mzs = [100., 201.89, 201.99, 202.0, 202.01, 202.10000001, 400.] 28 | test_mz = 202.0 29 | test_tol = 0.1 30 | ix_l, ix_u = imzmlp._bisect_spectrum(mzs, test_mz, test_tol) 31 | assert ix_l == 2 32 | assert ix_u == 4 33 | assert ix_l <= ix_u 34 | assert mzs[ix_l] >= test_mz - test_tol 35 | assert mzs[ix_u] <= test_mz + test_tol 36 | 37 | def test_getspectrum(self): 38 | for parse_lib, data_name, imzml_path, ibd_path in ALL_TEST_CASES: 39 | with self.subTest(parse_lib=parse_lib, data=data_name),\ 40 | imzmlp.ImzMLParser(imzml_path, parse_lib=parse_lib) as parser: 41 | 42 | mzs, ints = parser.getspectrum(4) 43 | 44 | assert parser.polarity == 'negative' 45 | assert parser.spectrum_mode == 'profile' 46 | assert len(parser.coordinates) == 9 47 | assert mzs.dtype == np.float32 48 | assert ints.dtype == np.float32 49 | assert len(mzs) == 8399 50 | assert len(ints) == 8399 51 | assert np.all(mzs > 100.0) 52 | assert np.all(mzs < 800.0) 53 | assert np.all(ints >= 0.0) 54 | assert np.all(ints < 3.0) 55 | 56 | def test_files_instead_of_paths(self): 57 | for parse_lib, data_name, imzml_path, ibd_path in ALL_TEST_CASES: 58 | with self.subTest(parse_lib=parse_lib, data=data_name),\ 59 | open(imzml_path, 'rb') as imzml_file,\ 60 | open(ibd_path, 'rb') as ibd_file,\ 61 | imzmlp.ImzMLParser(imzml_file, parse_lib=parse_lib, ibd_file=ibd_file) as parser: 62 | 63 | mzs, ints = parser.getspectrum(4) 64 | 65 | assert len(parser.coordinates) == 9 66 | assert len(mzs) > 0 67 | assert len(ints) > 0 68 | 69 | def test_parse_metadata(self): 70 | for parse_lib, data_name, imzml_path, ibd_path in ALL_TEST_CASES: 71 | with self.subTest(parse_lib=parse_lib, data=data_name),\ 72 | imzmlp.ImzMLParser(imzml_path, parse_lib=parse_lib) as parser: 73 | md = parser.metadata 74 | # fileDescription section 75 | assert md.file_description['MS:1000579'] == True 76 | assert 'ibd SHA-1' in md.file_description 77 | assert len(md.file_description.source_files) == 1 78 | assert md.file_description.source_files['sf1']['Thermo RAW format'] == True 79 | assert md.file_description.source_files['sf1'].attrs['name'] == 'Example.raw' 80 | assert len(md.file_description.contacts) == 1 81 | 82 | # referenceableParamGroupList section 83 | assert len(md.referenceable_param_groups) == 4 84 | assert md.referenceable_param_groups['scan1']['increasing m/z scan'] 85 | 86 | # sampleList section 87 | assert len(md.samples) == 1 88 | assert md.samples['sample1']['sample number'] == '1' 89 | 90 | # softwareList section 91 | assert len(md.softwares) == 2 92 | assert md.softwares['Xcalibur']['Xcalibur'] 93 | 94 | # scanSettingsList section 95 | assert len(md.scan_settings) == 1 96 | assert md.scan_settings['scansettings1']['pixel size (x)'] == 100.0 97 | 98 | # instrumentConfigurationList section 99 | assert len(md.instrument_configurations) == 1 100 | ic = md.instrument_configurations['LTQFTUltra0'] 101 | assert ic.param_by_name['instrument serial number'] == 'none' 102 | assert len(ic.components) == 3 103 | assert ic.components[0].type == 'source' 104 | assert ic.components[1].type == 'analyzer' 105 | assert ic.components[2].type == 'detector' 106 | assert ic.software_ref == 'Xcalibur' 107 | 108 | # dataProcessingList section 109 | assert len(md.data_processings) == 2 110 | assert md.data_processings['XcaliburProcessing'].methods[0].attrs['softwareRef'] == 'Xcalibur' 111 | assert md.data_processings['XcaliburProcessing'].methods[0]['low intensity data point removal'] 112 | 113 | def test_parse_full_spectrum_metadata(self): 114 | for parse_lib, data_name, imzml_path, ibd_path in ALL_TEST_CASES: 115 | with self.subTest(parse_lib=parse_lib, data=data_name),\ 116 | imzmlp.ImzMLParser(imzml_path, parse_lib=parse_lib, include_spectra_metadata='full') as parser: 117 | assert len(parser.spectrum_full_metadata) == len(parser.coordinates) 118 | spectrum = parser.spectrum_full_metadata[0] 119 | assert spectrum['ms level'] == 0 # comes from referenceable param group 120 | assert spectrum['total ion current'] > 100 121 | assert spectrum.scan_list_params['no combination'] 122 | assert spectrum.scans[0].attrs['instrumentConfigurationRef'] == 'LTQFTUltra0' 123 | assert spectrum.scans[0]['position x'] == 1 124 | assert 'm/z array' in spectrum.binary_data_arrays[0] 125 | assert 'intensity array' in spectrum.binary_data_arrays[1] 126 | 127 | def test_parse_partial_spectrum_metadata(self): 128 | TIC, POS_X, EXT_LEN, INVALID = 'MS:1000285', 'IMS:1000050', 'IMS:1000104', 'INVALID' 129 | ACCESSIONS = [TIC, POS_X, EXT_LEN, INVALID] 130 | for parse_lib, data_name, imzml_path, ibd_path in ALL_TEST_CASES: 131 | with self.subTest(parse_lib=parse_lib, data=data_name),\ 132 | imzmlp.ImzMLParser(imzml_path, parse_lib=parse_lib, include_spectra_metadata=ACCESSIONS) as parser: 133 | 134 | assert len(parser.spectrum_metadata_fields[TIC]) == len(parser.coordinates) 135 | assert len(parser.spectrum_metadata_fields[POS_X]) == len(parser.coordinates) 136 | assert len(parser.spectrum_metadata_fields[EXT_LEN]) == len(parser.coordinates) 137 | assert len(parser.spectrum_metadata_fields[INVALID]) == len(parser.coordinates) 138 | 139 | assert all(tic > 100 for tic in parser.spectrum_metadata_fields[TIC]) 140 | assert all(isinstance(pos_x, int) for pos_x in parser.spectrum_metadata_fields[POS_X]) 141 | assert all(isinstance(ext_len, int) for ext_len in parser.spectrum_metadata_fields[EXT_LEN]) 142 | assert all(invalid is None for invalid in parser.spectrum_metadata_fields[INVALID]) 143 | 144 | 145 | class PortableSpectrumReader(unittest.TestCase): 146 | def test_read_file(self): 147 | spectrum_idx = 4 148 | for parse_lib, data_name, imzml_path, ibd_path in ALL_TEST_CASES: 149 | with self.subTest(parse_lib=parse_lib, data=data_name),\ 150 | imzmlp.ImzMLParser(imzml_path, parse_lib=parse_lib) as normal_parser,\ 151 | open(ibd_path, 'rb') as ibd_file: 152 | 153 | normal_mzs, normal_ints = normal_parser.getspectrum(spectrum_idx) 154 | 155 | detached_parser = imzmlp.ImzMLParser(imzml_path, parse_lib=parse_lib, ibd_file=None) 156 | portable_reader = detached_parser.portable_spectrum_reader() 157 | # Pickle and unpickle to ensure it survives for its intended use case 158 | portable_reader = pickle.loads(pickle.dumps(portable_reader)) 159 | portable_mzs, portable_ints = portable_reader.read_spectrum_from_file(ibd_file, spectrum_idx) 160 | 161 | assert np.all(normal_mzs == portable_mzs) 162 | assert np.all(normal_ints == portable_ints) 163 | 164 | 165 | class ImzMLWriter(unittest.TestCase): 166 | def test_simple_write(self): 167 | mzs = np.linspace(100,1000,20) 168 | ints = np.random.rand(mzs.shape[0]) 169 | coords = [1,1,1] 170 | with imzmlw.ImzMLWriter("test.mzML", mode="processed") as imzml: 171 | imzml.addSpectrum(mzs, ints, coords=coords) 172 | 173 | 174 | if __name__ == '__main__': 175 | unittest.main() 176 | -------------------------------------------------------------------------------- /pyimzml/ontology/ims.py: -------------------------------------------------------------------------------- 1 | # DO NOT EDIT BY HAND 2 | # This file was autogenerated by dump_obo_files.py at 2020-10-21T18:55:01.621812 3 | terms = { 'IMS:0000000': ('Imaging Mass Spectrometry Ontology', None), 4 | 'IMS:1000001': ('ibd offset handle', None), 5 | 'IMS:1000002': ('sample stage', None), 6 | 'IMS:1000003': ('ibd binary type', None), 7 | 'IMS:1000004': ('image parameter', None), 8 | 'IMS:1000005': ('spectrum position', None), 9 | 'IMS:1000007': ('ibd file', None), 10 | 'IMS:1000008': ('ibd identification', None), 11 | 'IMS:1000009': ('ibd checksum', None), 12 | 'IMS:1000010': ('scan', None), 13 | 'IMS:1000011': ('probe scan mode', None), 14 | 'IMS:1000012': ('imaging ion source', None), 15 | 'IMS:1000013': ('unit', None), 16 | 'IMS:1000014': ('ibd data type', None), 17 | 'IMS:1000015': ('charge density', None), 18 | 'IMS:1000030': ('continuous', None), 19 | 'IMS:1000031': ('processed', None), 20 | 'IMS:1000040': ('linescan sequence', None), 21 | 'IMS:1000041': ('scan pattern', None), 22 | 'IMS:1000042': ('max count of pixels x', 'xsd:nonNegativeInteger'), 23 | 'IMS:1000043': ('max count of pixels y', 'xsd:nonNegativeInteger'), 24 | 'IMS:1000044': ('max dimension x', 'xsd:nonNegativeInteger'), 25 | 'IMS:1000045': ('max dimension y', 'xsd:nonNegativeInteger'), 26 | 'IMS:1000046': ('pixel size (x)', 'xsd:float'), 27 | 'IMS:1000047': ('pixel size y', 'xsd:float'), 28 | 'IMS:1000048': ('scan type', None), 29 | 'IMS:1000049': ('line scan direction', None), 30 | 'IMS:1000050': ('position x', 'xsd:nonNegativeInteger'), 31 | 'IMS:1000051': ('position y', 'xsd:nonNegativeInteger'), 32 | 'IMS:1000052': ('position z', 'xsd:nonNegativeInteger'), 33 | 'IMS:1000053': ('absolute position offset x', 'xsd:nonNegativeFloat'), 34 | 'IMS:1000054': ('absolute position offset y', 'xsd:nonNegativeFloat'), 35 | 'IMS:1000055': ('subimage position x', 'xsd:nonNegativeInteger'), 36 | 'IMS:1000056': ('subimage position y', 'xsd:nonNegativeInteger'), 37 | 'IMS:1000057': ('subimage position z', 'xsd:nonNegativeInteger'), 38 | 'IMS:1000070': ('external binary uri', 'xsd:string'), 39 | 'IMS:1000080': ('universally unique identifier', 'xsd:string'), 40 | 'IMS:1000090': ('ibd MD5', 'xsd:string'), 41 | 'IMS:1000091': ('ibd SHA-1', 'xsd:string'), 42 | 'IMS:1000101': ('external data', 'xsd:boolean'), 43 | 'IMS:1000102': ('external offset', 'xsd:nonNegativeInteger'), 44 | 'IMS:1000103': ('external array length', 'xsd:nonNegativeInteger'), 45 | 'IMS:1000104': ('external encoded length', 'xsd:nonNegativeInteger'), 46 | 'IMS:1000110': ('pixel mode', None), 47 | 'IMS:1000111': ('raster mode', None), 48 | 'IMS:1000112': ('stigmatic mode', None), 49 | 'IMS:1000120': ('SIMS parameter', None), 50 | 'IMS:1000121': ('DESI parameter', None), 51 | 'IMS:1000130': ('ions per square centimeter', None), 52 | 'IMS:1000131': ('milliliters per minute', None), 53 | 'IMS:1000141': ('32-bit integer', None), 54 | 'IMS:1000142': ('64-bit integer', None), 55 | 'IMS:1000199': ('sample stage attribute', None), 56 | 'IMS:1000200': ('position accuracy', 'xsd:float'), 57 | 'IMS:1000201': ('step size', 'xsd:float'), 58 | 'IMS:1000202': ('target material', 'xsd:string'), 59 | 'IMS:1000203': ('stage scan speed', 'xsd:float'), 60 | 'IMS:1000400': ('bottom up', None), 61 | 'IMS:1000401': ('top down', None), 62 | 'IMS:1000402': ('left right', None), 63 | 'IMS:1000403': ('right left', None), 64 | 'IMS:1000404': ('no direction', None), 65 | 'IMS:1000410': ('meandering', None), 66 | 'IMS:1000411': ('one way', None), 67 | 'IMS:1000412': ('random access', None), 68 | 'IMS:1000413': ('flyback', None), 69 | 'IMS:1000480': ('horizontal line scan', None), 70 | 'IMS:1000481': ('vertical line scan', None), 71 | 'IMS:1000490': ('linescan right left', None), 72 | 'IMS:1000491': ('linescan left right', None), 73 | 'IMS:1000492': ('linescan bottom up', None), 74 | 'IMS:1000493': ('linescan top down', None), 75 | 'IMS:1000500': ('conversion to imzML', None), 76 | 'IMS:1000501': ('imzMLParser', None), 77 | 'IMS:1000502': ('imzMLConverter', None), 78 | 'IMS:1000503': ('imzMLValidator', None), 79 | 'IMS:1000504': ('SpectralAnalysis', None), 80 | 'IMS:1001201': ('primary ion gun species', None), 81 | 'IMS:1001202': ('beam energy', 'xsd:float'), 82 | 'IMS:1001203': ('beam current', 'xsd:float'), 83 | 'IMS:1001204': ('cycle time', 'xsd:float'), 84 | 'IMS:1001205': ('time resolution', 'xsd:float'), 85 | 'IMS:1001206': ('polarity', None), 86 | 'IMS:1001207': ('primary ion dose density', 'xsd:float'), 87 | 'IMS:1001211': ('solvent', None), 88 | 'IMS:1001212': ('spray voltage', 'xsd:float'), 89 | 'IMS:1001213': ('solvent flowrate', 'xsd:float'), 90 | 'IMS:1001500': ('sample type', None), 91 | 'IMS:1001510': ('inorganic sample', None), 92 | 'IMS:1001520': ('organic sample', None), 93 | 'IMS:1001521': ('biological sample', None), 94 | 'IMS:1001522': ('clinical sample', None), 95 | 'IMS:1001523': ('pathological sample', None), 96 | 'IMS:1001524': ('food sample', 'xsd:string'), 97 | 'IMS:1001525': ('bacteria sample', 'xsd:string'), 98 | 'IMS:1001600': ('sample origin attribute', None), 99 | 'IMS:1001601': ('sample ethical approval', 'xsd:string'), 100 | 'IMS:1001602': ('sample origin institution', 'xsd:string'), 101 | 'IMS:1002000': ('analysed sample portion', None), 102 | 'IMS:1002001': ('sectioned sample', None), 103 | 'IMS:1002002': ('whole sample', None), 104 | 'IMS:1002003': ('blockface sample', None), 105 | 'IMS:1002005': ('sampling method', 'xsd:string'), 106 | 'IMS:1002010': ('sample storage condition', None), 107 | 'IMS:1002011': ('fixed', 'xsd:string'), 108 | 'IMS:1002012': ('fresh frozen', 'xsd:string'), 109 | 'IMS:1002013': ('embedded', 'xsd:string'), 110 | 'IMS:1002014': ('sample storage attribute', None), 111 | 'IMS:1002015': ('sample storage time before sectioning', 'xsd:float'), 112 | 'IMS:1002016': ('section storage time after sectioning and before analysis', 'xsd:float'), 113 | 'IMS:1002017': ('sample storage temperature', 'xsd:float'), 114 | 'IMS:1002018': ('freezing method', 'xsd:string'), 115 | 'IMS:1002019': ('flash frozen', None), 116 | 'IMS:1002020': ('sample storage time before analysis', 'xsd:float'), 117 | 'IMS:1002021': ('sample storage method', 'xsd:string'), 118 | 'IMS:1002050': ('stabilisation', None), 119 | 'IMS:1002051': ('no stabilisation performed', None), 120 | 'IMS:1002052': ('stabilisation method', 'xsd:string'), 121 | 'IMS:1002053': ('rapid heating stabilisation', None), 122 | 'IMS:1002054': ('focused microwave irradiation stabilisation', None), 123 | 'IMS:1002100': ('sectioning method', None), 124 | 'IMS:1002101': ('microtome sectioning', 'xsd:string'), 125 | 'IMS:1002102': ('microtome model', 'xsd:string'), 126 | 'IMS:1002103': ('sectioning attribute', None), 127 | 'IMS:1002104': ('cutting temperature', 'xsd:float'), 128 | 'IMS:1002105': ('cutting thickness', 'xsd:float'), 129 | 'IMS:1002106': ('blade sectioning', 'xsd:string'), 130 | 'IMS:1002107': ('section thickness', 'xsd:float'), 131 | 'IMS:1002200': ('mounting method', 'xsd:string'), 132 | 'IMS:1002201': ('thaw mounting', None), 133 | 'IMS:1002202': ('tape mounting', None), 134 | 'IMS:1002300': ('sample drying', 'xsd:string'), 135 | 'IMS:1002301': ('drying method attribute', None), 136 | 'IMS:1002302': ('drying time', 'xsd:float'), 137 | 'IMS:1002303': ('no drying performed', None), 138 | 'IMS:1002304': ('sample drying method', 'xsd:string'), 139 | 'IMS:1002400': ('sample washing', None), 140 | 'IMS:1002401': ('no washing performed', None), 141 | 'IMS:1002402': ('sample washing method', 'xsd:string'), 142 | 'IMS:1002500': ('on-sample chemistry', None), 143 | 'IMS:1002501': ('no on-sample chemistry performed', None), 144 | 'IMS:1002502': ('on-sample chemistry method', 'xsd:string'), 145 | 'IMS:1002503': ('on-sample chemistry attribute', None), 146 | 'IMS:1002504': ('on-sample chemistry reagent', 'xsd:string'), 147 | 'IMS:1002600': ('in-experiment quantification', None), 148 | 'IMS:1002601': ('no in-experiment quantification performed', None), 149 | 'IMS:1002602': ('in-experiment quantification method', 'xsd:string'), 150 | 'IMS:1002603': ('internal standard quantification', None), 151 | 'IMS:1002604': ('adjacent dilution series quantification', None), 152 | 'IMS:1002605': ('sprayed-on standard quantification', None), 153 | 'IMS:1003000': ('spraying method', None), 154 | 'IMS:1003001': ('automated spraying of matrix', None), 155 | 'IMS:1003002': ('manual spraying of matrix', None), 156 | 'IMS:1003003': ('automated spraying device', 'xsd:string'), 157 | 'IMS:1003004': ('automated sprayer attribute', None), 158 | 'IMS:1003005': ('automated sprayer nozzle movement speed', 'xsd:float'), 159 | 'IMS:1003006': ('automated sprayer flow-rate', 'xsd:float'), 160 | 'IMS:1003007': ('automated sprayer nozzle temperature', 'xsd:float'), 161 | 'IMS:1005001': ('xz compression', None), 162 | 'IMS:1005002': ('lz4 compression', None), 163 | 'IMS:1005003': ('zstd compression', None), 164 | 'IMS:1006000': ('repetition rate', 'xsd:float'), 165 | 'IMS:1006001': ('laser shots per spectrum', 'xsd:float'), 166 | 'IMS:1006002': ('m/z at which resolution was measured', 'xsd:float'), 167 | 'IMS:1006003': ('postmortem time', 'xsd:float'), 168 | 'IMS:1006004': ('age', 'xsd:float'), 169 | 'IMS:1006005': ('sample species', 'xsd:string'), 170 | 'IMS:1006006': ('sample organ', 'xsd:string'), 171 | 'IMS:1006007': ('sample condition', 'xsd:string'), 172 | 'IMS:1006008': ('optical image location', 'xsd:string'), 173 | 'IMS:1006009': ('optical image attribute', None), 174 | 'IMS:1006010': ('optical image subject', None), 175 | 'IMS:1006011': ('optical image of analysed sample', 'xsd:string'), 176 | 'IMS:1006012': ('optical image of adjacent section of analysed sample', 'xsd:string'), 177 | 'IMS:1006013': ('sample morphological classification', 'xsd:string'), 178 | 'IMS:1006014': ('sampling location', 'xsd:string'), 179 | 'IMS:1006015': ('staining method used for optical image', 'xsd:string'), 180 | 'IMS:1006016': ('ion source model', 'xsd:string'), 181 | 'IMS:1006017': ('method used to align optical image', 'xsd:string'), 182 | 'IMS:1100000': ('8-bit integer', None), 183 | 'IMS:1100001': ('16-bit integer', None)} 184 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # SM_distributed documentation build configuration file, created by 4 | # sphinx-quickstart on Tue Feb 9 15:19:18 2016. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import os 16 | import sys 17 | 18 | import sphinx_rtd_theme 19 | 20 | sys.path.append('..') 21 | sys.path.append('../..') 22 | 23 | from pyimzml import __version__ # noqa 24 | 25 | # If extensions (or modules to document with autodoc) are in another directory, 26 | # add these directories to sys.path here. If the directory is relative to the 27 | # documentation root, use os.path.abspath to make it absolute, like shown here. 28 | sys.path.insert(0, os.path.abspath('../..')) 29 | 30 | # -- General configuration ------------------------------------------------ 31 | 32 | # If your documentation needs a minimal Sphinx version, state it here. 33 | # needs_sphinx = '1.0' 34 | 35 | # Add any Sphinx extension module names here, as strings. They can be 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 37 | # ones. 38 | extensions = [ 39 | 'sphinx.ext.autodoc', 40 | 'sphinx.ext.napoleon', 41 | 'sphinx.ext.viewcode', 42 | 'sphinx_rtd_theme', 43 | 'sphinx_autodoc_typehints', 44 | 'sphinx.ext.autosummary', 45 | ] 46 | 47 | # source_parsers = { 48 | # '.md': CommonMarkParser, 49 | # } 50 | 51 | # Add any paths that contain templates here, relative to this directory. 52 | templates_path = ['_templates'] 53 | 54 | # The suffix(es) of source filenames. 55 | # You can specify multiple suffix as a list of string: 56 | # source_suffix = ['.rst', '.md'] 57 | source_suffix = '.rst' 58 | 59 | # The encoding of source files. 60 | # source_encoding = 'utf-8-sig' 61 | 62 | # The master toctree document. 63 | master_doc = 'index' 64 | 65 | # General information about the project. 66 | project = u'pyimzML' 67 | copyright = u'2016, Alexandrov Team' 68 | author = u'Alexandrov Team' 69 | 70 | # The version info for the project you're documenting, acts as replacement for 71 | # |version| and |release|, also used in various other places throughout the 72 | # built documents. 73 | # 74 | # The short X.Y version. 75 | version = __version__ 76 | # The full version, including alpha/beta/rc tags. 77 | release = __version__ 78 | 79 | # The language for content autogenerated by Sphinx. Refer to documentation 80 | # for a list of supported languages. 81 | # 82 | # This is also used if you do content translation via gettext catalogs. 83 | # Usually you set "language" from the command line for these cases. 84 | language = None 85 | 86 | # There are two options for replacing |today|: either, you set today to some 87 | # non-false value, then it is used: 88 | # today = '' 89 | # Else, today_fmt is used as the format for a strftime call. 90 | # today_fmt = '%B %d, %Y' 91 | 92 | # List of patterns, relative to source directory, that match files and 93 | # directories to ignore when looking for source files. 94 | exclude_patterns = ['docs', 'tests', 'setup'] 95 | 96 | # The reST default role (used for this markup: `text`) to use for all 97 | # documents. 98 | # default_role = None 99 | 100 | # If true, '()' will be appended to :func: etc. cross-reference text. 101 | # add_function_parentheses = True 102 | 103 | # If true, the current module name will be prepended to all description 104 | # unit titles (such as .. function::). 105 | # add_module_names = True 106 | 107 | # If true, sectionauthor and moduleauthor directives will be shown in the 108 | # output. They are ignored by default. 109 | # show_authors = False 110 | 111 | # The name of the Pygments (syntax highlighting) style to use. 112 | pygments_style = 'sphinx' 113 | 114 | # A list of ignored prefixes for module index sorting. 115 | # modindex_common_prefix = [] 116 | 117 | # If true, keep warnings as "system message" paragraphs in the built documents. 118 | # keep_warnings = False 119 | 120 | # If true, `todo` and `todoList` produce output, else they produce nothing. 121 | todo_include_todos = False 122 | 123 | # -- Options for HTML output ---------------------------------------------- 124 | 125 | # The theme to use for HTML and HTML Help pages. See the documentation for 126 | # a list of builtin themes. 127 | html_theme = 'sphinx_rtd_theme' 128 | 129 | # Theme options are theme-specific and customize the look and feel of a theme 130 | # further. For a list of options available for each theme, see the 131 | # documentation. 132 | # html_theme_options = {} 133 | 134 | # Add any paths that contain custom themes here, relative to this directory. 135 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 136 | 137 | # The name for this set of Sphinx documents. If None, it defaults to 138 | # " v documentation". 139 | # html_title = None 140 | 141 | # A shorter title for the navigation bar. Default is the same as html_title. 142 | # html_short_title = None 143 | 144 | # The name of an image file (relative to this directory) to place at the top 145 | # of the sidebar. 146 | # html_logo = None 147 | 148 | # The name of an image file (within the static path) to use as favicon of the 149 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 150 | # pixels large. 151 | # html_favicon = None 152 | 153 | # Add any paths that contain custom static files (such as style sheets) here, 154 | # relative to this directory. They are copied after the builtin static files, 155 | # so a file named "default.css" will overwrite the builtin "default.css". 156 | # html_static_path = ['_static'] 157 | 158 | # Add any extra paths that contain custom files (such as robots.txt or 159 | # .htaccess) here, relative to this directory. These files are copied 160 | # directly to the root of the documentation. 161 | # html_extra_path = [] 162 | 163 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 164 | # using the given strftime format. 165 | # html_last_updated_fmt = '%b %d, %Y' 166 | 167 | # If true, SmartyPants will be used to convert quotes and dashes to 168 | # typographically correct entities. 169 | # html_use_smartypants = True 170 | 171 | # Custom sidebar templates, maps document names to template names. 172 | # html_sidebars = {} 173 | 174 | # Additional templates that should be rendered to pages, maps page names to 175 | # template names. 176 | # html_additional_pages = {} 177 | 178 | # If false, no module index is generated. 179 | html_domain_indices = True 180 | 181 | # If false, no index is generated. 182 | # html_use_index = True 183 | 184 | # If true, the index is split into individual pages for each letter. 185 | # html_split_index = False 186 | 187 | # If true, links to the reST sources are added to the pages. 188 | # html_show_sourcelink = True 189 | 190 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 191 | # html_show_sphinx = True 192 | 193 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 194 | # html_show_copyright = True 195 | 196 | # If true, an OpenSearch description file will be output, and all pages will 197 | # contain a tag referring to it. The value of this option must be the 198 | # base URL from which the finished HTML is served. 199 | # html_use_opensearch = '' 200 | 201 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 202 | # html_file_suffix = None 203 | 204 | # Language to be used for generating the HTML full-text search index. 205 | # Sphinx supports the following languages: 206 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' 207 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' 208 | # html_search_language = 'en' 209 | 210 | # A dictionary with options for the search language support, empty by default. 211 | # Now only 'ja' uses this config value 212 | # html_search_options = {'type': 'default'} 213 | 214 | # The name of a javascript file (relative to the configuration directory) that 215 | # implements a search results scorer. If empty, the default will be used. 216 | # html_search_scorer = 'scorer.js' 217 | 218 | # Output file base name for HTML help builder. 219 | htmlhelp_basename = 'pyimzMLdoc' 220 | 221 | # -- Options for LaTeX output --------------------------------------------- 222 | 223 | latex_elements = { 224 | # The paper size ('letterpaper' or 'a4paper'). 225 | # 'papersize': 'letterpaper', 226 | 227 | # The font size ('10pt', '11pt' or '12pt'). 228 | # 'pointsize': '10pt', 229 | 230 | # Additional stuff for the LaTeX preamble. 231 | # 'preamble': '', 232 | 233 | # Latex figure (float) alignment 234 | # 'figure_align': 'htbp', 235 | } 236 | 237 | # Grouping the document tree into LaTeX files. List of tuples 238 | # (source start file, target name, title, 239 | # author, documentclass [howto, manual, or own class]). 240 | latex_documents = [ 241 | (master_doc, 'pyimzML.tex', u'pyimzML Documentation', 242 | u'Alexandrov Team', 'manual'), 243 | ] 244 | 245 | # The name of an image file (relative to this directory) to place at the top of 246 | # the title page. 247 | # latex_logo = None 248 | 249 | # For "manual" documents, if this is true, then toplevel headings are parts, 250 | # not chapters. 251 | # latex_use_parts = False 252 | 253 | # If true, show page references after internal links. 254 | # latex_show_pagerefs = False 255 | 256 | # If true, show URL addresses after external links. 257 | # latex_show_urls = False 258 | 259 | # Documents to append as an appendix to all manuals. 260 | # latex_appendices = [] 261 | 262 | # If false, no module index is generated. 263 | latex_domain_indices = True 264 | 265 | # -- Options for manual page output --------------------------------------- 266 | 267 | # One entry per manual page. List of tuples 268 | # (source start file, name, description, authors, manual section). 269 | man_pages = [ 270 | (master_doc, 'pyimzml', u'pyimzML Documentation', 271 | [author], 1) 272 | ] 273 | 274 | # If true, show URL addresses after external links. 275 | # man_show_urls = False 276 | 277 | 278 | # -- Options for Texinfo output ------------------------------------------- 279 | 280 | # Grouping the document tree into Texinfo files. List of tuples 281 | # (source start file, target name, title, author, 282 | # dir menu entry, description, category) 283 | texinfo_documents = [ 284 | (master_doc, 'pyimzML', u'pyimzML Documentation', 285 | author, 'pyimzML', 'One line description of project.', 286 | 'Miscellaneous'), 287 | ] 288 | 289 | # Documents to append as an appendix to all manuals. 290 | # texinfo_appendices = [] 291 | 292 | # If false, no module index is generated. 293 | texinfo_domain_indices = True 294 | 295 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 296 | # texinfo_show_urls = 'footnote' 297 | 298 | # If true, do not generate a @detailmenu in the "Top" node's menu. 299 | # texinfo_no_detailmenu = False 300 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /pyimzml/metadata.py: -------------------------------------------------------------------------------- 1 | from warnings import warn 2 | 3 | from pyimzml.ontology.ontology import lookup_and_convert_cv_param, convert_xml_value, convert_term_name 4 | 5 | XMLNS_PREFIX = "{http://psi.hupo.org/ms/mzml}" 6 | 7 | 8 | def _deep_pretty(obj): 9 | if isinstance(obj, list): 10 | return [_deep_pretty(item) for item in obj] 11 | if isinstance(obj, dict): 12 | return {k: _deep_pretty(v) for k, v in obj.items()} 13 | if hasattr(obj, 'pretty'): 14 | return obj.pretty() 15 | return obj 16 | 17 | 18 | class _ParseUtils: 19 | """ 20 | Utility class for common parsing patterns and tracking created param groups so that 21 | their refs to other param groups can later be linked up. 22 | """ 23 | def __init__(self): 24 | self.created_param_groups = [] 25 | 26 | def param_group(self, node, **extra_fields): 27 | pg = ParamGroup(node, **extra_fields) 28 | self.created_param_groups.append(pg) 29 | return pg 30 | 31 | def optional_param_group(self, parent_node, xpath, **extra_fields): 32 | node = parent_node.find(xpath.format(XMLNS_PREFIX)) 33 | return self.param_group(node, **extra_fields) if node is not None else None 34 | 35 | def param_groups_by_id(self, parent_node, xpath): 36 | return dict( 37 | (n.get('id', idx), self.param_group(n)) 38 | for idx, n in enumerate(parent_node.findall(xpath.format(XMLNS_PREFIX))) 39 | ) 40 | 41 | def param_groups_list(self, parent_node, xpath): 42 | return [self.param_group(n) for n in parent_node.findall(xpath.format(XMLNS_PREFIX))] 43 | 44 | def refs_list(self, parent_node, xpath): 45 | return [n.attrib.get('ref') for n in parent_node.findall(xpath.format(XMLNS_PREFIX))] 46 | 47 | 48 | class Metadata: 49 | def __init__(self, root): 50 | """ 51 | Parse metadata headers from an imzML file into a structured format for easy access in Python code. 52 | This class deliberately excludes spectra, as they account for significantly more memory use 53 | and parsing time, and typically should be treated separately. 54 | """ 55 | pu = _ParseUtils() 56 | 57 | fd_node = root.find('{0}fileDescription'.format(XMLNS_PREFIX)) 58 | self.file_description = pu.param_group( 59 | fd_node.find('{0}fileContent'.format(XMLNS_PREFIX)), 60 | source_files=pu.param_groups_by_id(fd_node, '{0}sourceFileList/{0}sourceFile'), 61 | contacts=pu.param_groups_list(fd_node, '{0}contact'), 62 | ) 63 | 64 | self.referenceable_param_groups = pu.param_groups_by_id( 65 | root, 66 | '{0}referenceableParamGroupList/{0}referenceableParamGroup' 67 | ) 68 | self.samples = pu.param_groups_by_id(root, '{0}sampleList/{0}sample') 69 | self.softwares = pu.param_groups_by_id(root, '{0}softwareList/{0}software') 70 | 71 | self.scan_settings = {} 72 | for node in root.findall('{0}scanSettingsList/{0}scanSettings'.format(XMLNS_PREFIX)): 73 | self.scan_settings[node.get('id')] = pu.param_group( 74 | node, 75 | source_file_refs=pu.refs_list(node, '{0}sourceFileRefList/{0}sourceFileRef'), 76 | targets=pu.param_groups_by_id(node, '{0}targetList/{0}target'), 77 | ) 78 | 79 | self.instrument_configurations = {} 80 | for node in root.findall('{0}instrumentConfigurationList/{0}instrumentConfiguration'.format(XMLNS_PREFIX)): 81 | self.instrument_configurations[node.get('id')] = pu.param_group( 82 | node, 83 | components=pu.param_groups_list(node, '{0}componentList/*'), 84 | software_ref=next(iter(pu.refs_list(node, '{0}softwareRef')), None), 85 | ) 86 | 87 | self.data_processings = {} 88 | for node in root.findall('{0}dataProcessingList/{0}dataProcessing'.format(XMLNS_PREFIX)): 89 | self.data_processings[node.get('id')] = pu.param_group( 90 | node, 91 | methods=pu.param_groups_list(node, '{0}processingMethod') 92 | ) 93 | 94 | # Apply referenceable_param_groups 95 | for pg in pu.created_param_groups: 96 | pg.apply_referenceable_param_groups(self.referenceable_param_groups) 97 | 98 | def pretty(self): 99 | """ 100 | Returns a nested dict summarizing all contained sections, intended to help human inspection. 101 | """ 102 | return { 103 | 'file_description': self.file_description.pretty(), 104 | 'referenceable_param_groups': _deep_pretty(self.referenceable_param_groups), 105 | 'samples': _deep_pretty(self.samples), 106 | 'softwares': _deep_pretty(self.softwares), 107 | 'scan_settings': _deep_pretty(self.scan_settings), 108 | 'instrument_configurations': _deep_pretty(self.instrument_configurations), 109 | 'data_processings': _deep_pretty(self.data_processings), 110 | } 111 | 112 | 113 | class ParamGroup: 114 | """ 115 | This class exposes a group of imzML parameters at two layers of abstraction: 116 | 117 | High-level examples: 118 | `param_group['MS:0000000']` 119 | Access a controlled vocabulary parameter by accession ID or name, or a user-defined 120 | parameter by name. Controlled vocabulary parameters will take priority. 121 | This also inherits values from referenced referenceable param groups. 122 | `'particle beam' in param_group` 123 | Check if a parameter exists by name / accession ID. 124 | `param_group.targets` 125 | Access a subelement directly by name. 126 | 127 | Low-level examples: 128 | `param_group.cv_params` - A list of all cvParams defined in this group. Includes raw values, 129 | units, and multiple items if one accession is used multiple times. 130 | Does not include values inherited from referenceable param groups. 131 | `param_group.user_params` - A list of all userParams. 132 | `param_group.attrs` - A dict of all XML attributes. 133 | `param_group.subelements` - A dict of all subelements. 134 | 135 | 136 | """ 137 | def __init__(self, elem, **extra_data): 138 | """ 139 | Parses an XML element representing a group of controlled vocabulary parameters. 140 | 141 | :param elem: an XML element containing cvParam children 142 | :param extra_data: extra attributes to assign to the class instance 143 | """ 144 | self.param_group_refs = [ 145 | ref.get('ref') 146 | for ref in elem.findall('{0}referenceableParamGroupRef'.format(XMLNS_PREFIX)) 147 | ] 148 | self.type = elem.tag.replace(XMLNS_PREFIX, '') 149 | 150 | # Tuples of (name, accession, parsed_value, raw_value, unit_name, unit_accession) 151 | # These are kept in an array as the imzML spec allows multiple uses of accession numbers 152 | # in the same block 153 | self.cv_params = [] 154 | for node in elem.findall('{0}cvParam'.format(XMLNS_PREFIX)): 155 | accession = node.get('accession') 156 | raw_name = node.get('name') 157 | raw_value = node.get('value') 158 | unit_accession = node.get('unitAccession') 159 | accession, name, parsed_value, unit_name = lookup_and_convert_cv_param( 160 | accession, raw_name, raw_value, unit_accession 161 | ) 162 | self.cv_params.append( 163 | (name, accession, parsed_value, raw_name, raw_value, unit_name, unit_accession) 164 | ) 165 | 166 | # Tuples of (name, type, parsed_value, raw_value, unit_name, unit_accession) 167 | self.user_params = [] 168 | for node in elem.findall('{0}userParam'.format(XMLNS_PREFIX)): 169 | name = node.get('name') 170 | dtype = node.get('dtype') 171 | raw_value = node.get('value') 172 | parsed_value = convert_xml_value(dtype, raw_value) 173 | unit_accession = node.get('unitAccession') 174 | unit_name = convert_term_name(unit_accession) 175 | self.user_params.append( 176 | (name, dtype, parsed_value, raw_value, unit_name, unit_accession) 177 | ) 178 | 179 | # Mapping of CV param name to parsed value 180 | self.param_by_name = {} 181 | self.param_by_name.update((param[0], param[2]) for param in self.user_params) 182 | self.param_by_name.update((param[0], param[2]) for param in self.cv_params) 183 | # Mapping of CV param accession to parsed value 184 | self.param_by_accession = { 185 | param[1]: param[2] for param in self.cv_params 186 | } 187 | 188 | self.attrs = elem.attrib 189 | 190 | self.subelements = extra_data 191 | for k, v in extra_data.items(): 192 | setattr(self, k, v) 193 | 194 | def __getitem__(self, key): 195 | try: 196 | return self.param_by_accession[key] 197 | except KeyError: 198 | return self.param_by_name[key] 199 | 200 | def __contains__(self, key): 201 | return key in self.param_by_accession or key in self.param_by_name 202 | 203 | def apply_referenceable_param_groups(self, rpgs): 204 | for ref in self.param_group_refs[::-1]: 205 | rpg = rpgs.get(ref) 206 | if rpg: 207 | for name, accession, parsed_value, *_ in rpg.cv_params: 208 | if name is not None and name != accession: 209 | self.param_by_name.setdefault(name, parsed_value) 210 | self.param_by_accession.setdefault(accession, parsed_value) 211 | for name, _, parsed_value, *_ in rpg.user_params: 212 | self.param_by_name.setdefault(name, parsed_value) 213 | else: 214 | warn('ReferenceableParamGroup "%s" not found' % ref) 215 | 216 | def pretty(self): 217 | """ 218 | Flattens attributes, params and extra fields into a single dict keyed by name. 219 | This function is intended to help human inspection. For programmatic access to specific fields, 220 | always use the `attrs`, `param_by_name`, `param_by_accession`, etc. instance attributes instead. 221 | """ 222 | result = { 223 | 'type': self.type, 224 | } 225 | result.update(self.attrs) 226 | result.update(self.param_by_name) 227 | result.update(_deep_pretty(self.subelements)) 228 | 229 | return result 230 | 231 | 232 | class SpectrumData(ParamGroup): 233 | def __init__(self, root, referenceable_param_groups): 234 | pu = _ParseUtils() 235 | 236 | scan_list_params = pu.optional_param_group(root, '{0}scanList') 237 | scans = [] 238 | for node in root.findall('{0}scanList/{0}scan'.format(XMLNS_PREFIX)): 239 | scans.append( 240 | pu.param_group( 241 | node, 242 | scan_windows=pu.param_groups_list(node, '{0}scanWindowList/{0}scanWindow') 243 | ) 244 | ) 245 | 246 | precursors = [] 247 | for node in root.findall('{0}precursorList/{0}precursor'.format(XMLNS_PREFIX)): 248 | precursors.append( 249 | pu.param_group( 250 | node, 251 | isolation_window=pu.optional_param_group(node, '{0}isolationWindow'), 252 | selected_ions=pu.param_groups_list(node, '{0}selectedIonList/{0}selectedIon'), 253 | activation=pu.optional_param_group(node, '{0}activation'), 254 | ) 255 | ) 256 | 257 | products = [] 258 | for node in root.findall('{0}productList/{0}product'.format(XMLNS_PREFIX)): 259 | products.append( 260 | pu.param_group( 261 | node, 262 | isolation_window=pu.optional_param_group(node, '{0}isolationWindow'), 263 | ) 264 | ) 265 | 266 | binary_data_arrays = pu.param_groups_list(root, '{0}binaryDataArrayList/{0}binaryDataArray') 267 | 268 | super().__init__( 269 | root, 270 | scan_list_params=scan_list_params, 271 | scans=scans, 272 | precursors=precursors, 273 | products=products, 274 | binary_data_arrays=binary_data_arrays, 275 | ) 276 | 277 | for pg in pu.created_param_groups: 278 | pg.apply_referenceable_param_groups(referenceable_param_groups) 279 | 280 | self.apply_referenceable_param_groups(referenceable_param_groups) 281 | -------------------------------------------------------------------------------- /pyimzml/ontology/uo.py: -------------------------------------------------------------------------------- 1 | # DO NOT EDIT BY HAND 2 | # This file was autogenerated by dump_obo_files.py at 2020-10-21T18:55:01.621812 3 | terms = { 'UO:0000000': ('unit', None), 4 | 'UO:0000001': ('length unit', None), 5 | 'UO:0000002': ('mass unit', None), 6 | 'UO:0000003': ('time unit', None), 7 | 'UO:0000004': ('electric current unit', None), 8 | 'UO:0000005': ('temperature unit', None), 9 | 'UO:0000006': ('substance unit', None), 10 | 'UO:0000007': ('luminous intensity unit', None), 11 | 'UO:0000008': ('meter', None), 12 | 'UO:0000009': ('kilogram', None), 13 | 'UO:0000010': ('second', None), 14 | 'UO:0000011': ('ampere', None), 15 | 'UO:0000012': ('kelvin', None), 16 | 'UO:0000013': ('mole', None), 17 | 'UO:0000014': ('candela', None), 18 | 'UO:0000015': ('centimeter', None), 19 | 'UO:0000016': ('millimeter', None), 20 | 'UO:0000017': ('micrometer', None), 21 | 'UO:0000018': ('nanometer', None), 22 | 'UO:0000019': ('angstrom', None), 23 | 'UO:0000020': ('picometer', None), 24 | 'UO:0000021': ('gram', None), 25 | 'UO:0000022': ('milligram', None), 26 | 'UO:0000023': ('microgram', None), 27 | 'UO:0000024': ('nanogram', None), 28 | 'UO:0000025': ('picogram', None), 29 | 'UO:0000026': ('femtogram', None), 30 | 'UO:0000027': ('degree Celsius', None), 31 | 'UO:0000028': ('millisecond', None), 32 | 'UO:0000029': ('microsecond', None), 33 | 'UO:0000030': ('picosecond', None), 34 | 'UO:0000031': ('minute', None), 35 | 'UO:0000032': ('hour', None), 36 | 'UO:0000033': ('day', None), 37 | 'UO:0000034': ('week', None), 38 | 'UO:0000035': ('month', None), 39 | 'UO:0000036': ('year', None), 40 | 'UO:0000037': ('milliampere', None), 41 | 'UO:0000038': ('microampere', None), 42 | 'UO:0000039': ('micromole', None), 43 | 'UO:0000040': ('millimole', None), 44 | 'UO:0000041': ('nanomole', None), 45 | 'UO:0000042': ('picomole', None), 46 | 'UO:0000043': ('femtomole', None), 47 | 'UO:0000044': ('attomole', None), 48 | 'UO:0000045': ('base unit', None), 49 | 'UO:0000046': ('prefix', None), 50 | 'UO:0000047': ('area unit', None), 51 | 'UO:0000048': ('acceleration unit', None), 52 | 'UO:0000049': ('angular velocity unit', None), 53 | 'UO:0000050': ('angular acceleration unit', None), 54 | 'UO:0000051': ('concentration unit', None), 55 | 'UO:0000052': ('mass density unit', None), 56 | 'UO:0000053': ('luminance unit', None), 57 | 'UO:0000054': ('area density unit', None), 58 | 'UO:0000055': ('molar mass unit', None), 59 | 'UO:0000056': ('molar volume unit', None), 60 | 'UO:0000057': ('momentum unit', None), 61 | 'UO:0000058': ('rotational frequency unit', None), 62 | 'UO:0000059': ('specific volume unit', None), 63 | 'UO:0000060': ('speed/velocity unit', None), 64 | 'UO:0000061': ('unit of molarity', None), 65 | 'UO:0000062': ('molar', None), 66 | 'UO:0000063': ('millimolar', None), 67 | 'UO:0000064': ('micromolar', None), 68 | 'UO:0000065': ('nanomolar', None), 69 | 'UO:0000066': ('picomolar', None), 70 | 'UO:0000067': ('unit of molality', None), 71 | 'UO:0000068': ('molal', None), 72 | 'UO:0000069': ('millimolal', None), 73 | 'UO:0000070': ('micromolal', None), 74 | 'UO:0000071': ('nanomolal', None), 75 | 'UO:0000072': ('picomolal', None), 76 | 'UO:0000073': ('femtomolar', None), 77 | 'UO:0000074': ('unit of normality', None), 78 | 'UO:0000075': ('normal', None), 79 | 'UO:0000076': ('mole fraction', None), 80 | 'UO:0000077': ('meter per second per second', None), 81 | 'UO:0000078': ('radian per second per second', None), 82 | 'UO:0000079': ('radian per second', None), 83 | 'UO:0000080': ('square meter', None), 84 | 'UO:0000081': ('square centimeter', None), 85 | 'UO:0000082': ('square millimeter', None), 86 | 'UO:0000083': ('kilogram per cubic meter', None), 87 | 'UO:0000084': ('gram per cubic centimeter', None), 88 | 'UO:0000085': ('candela per square meter', None), 89 | 'UO:0000086': ('kilogram per square meter', None), 90 | 'UO:0000087': ('kilogram per mole', None), 91 | 'UO:0000088': ('gram per mole', None), 92 | 'UO:0000089': ('cubic meter per mole', None), 93 | 'UO:0000090': ('cubic centimeter per mole', None), 94 | 'UO:0000091': ('kilogram meter per second', None), 95 | 'UO:0000092': ('turns per second', None), 96 | 'UO:0000093': ('cubic meter per kilogram', None), 97 | 'UO:0000094': ('meter per second', None), 98 | 'UO:0000095': ('volume unit', None), 99 | 'UO:0000096': ('cubic meter', None), 100 | 'UO:0000097': ('cubic centimeter', None), 101 | 'UO:0000098': ('milliliter', None), 102 | 'UO:0000099': ('liter', None), 103 | 'UO:0000100': ('cubic decimeter', None), 104 | 'UO:0000101': ('microliter', None), 105 | 'UO:0000102': ('nanoliter', None), 106 | 'UO:0000103': ('picoliter', None), 107 | 'UO:0000104': ('femtoliter', None), 108 | 'UO:0000105': ('frequency unit', None), 109 | 'UO:0000106': ('hertz', None), 110 | 'UO:0000107': ('force unit', None), 111 | 'UO:0000108': ('newton', None), 112 | 'UO:0000109': ('pressure unit', None), 113 | 'UO:0000110': ('pascal', None), 114 | 'UO:0000111': ('energy unit', None), 115 | 'UO:0000112': ('joule', None), 116 | 'UO:0000113': ('power unit', None), 117 | 'UO:0000114': ('watt', None), 118 | 'UO:0000115': ('illuminance unit', None), 119 | 'UO:0000116': ('lux', None), 120 | 'UO:0000117': ('luminous flux unit', None), 121 | 'UO:0000118': ('lumen', None), 122 | 'UO:0000119': ('catalytic activity unit', None), 123 | 'UO:0000120': ('katal', None), 124 | 'UO:0000121': ('angle unit', None), 125 | 'UO:0000122': ('plane angle unit', None), 126 | 'UO:0000123': ('radian', None), 127 | 'UO:0000124': ('solid angle unit', None), 128 | 'UO:0000125': ('steradian', None), 129 | 'UO:0000127': ('radiation unit', None), 130 | 'UO:0000128': ('activity (of a radionuclide) unit', None), 131 | 'UO:0000129': ('absorbed dose unit', None), 132 | 'UO:0000130': ('dose equivalent unit', None), 133 | 'UO:0000131': ('exposure unit', None), 134 | 'UO:0000132': ('becquerel', None), 135 | 'UO:0000133': ('curie', None), 136 | 'UO:0000134': ('gray', None), 137 | 'UO:0000135': ('rad', None), 138 | 'UO:0000136': ('roentgen', None), 139 | 'UO:0000137': ('sievert', None), 140 | 'UO:0000138': ('millisievert', None), 141 | 'UO:0000139': ('microsievert', None), 142 | 'UO:0000140': ('Roentgen equivalent man', None), 143 | 'UO:0000141': ('microgray', None), 144 | 'UO:0000142': ('milligray', None), 145 | 'UO:0000143': ('nanogray', None), 146 | 'UO:0000144': ('nanosievert', None), 147 | 'UO:0000145': ('millicurie', None), 148 | 'UO:0000146': ('microcurie', None), 149 | 'UO:0000147': ('disintegrations per minute', None), 150 | 'UO:0000148': ('counts per minute', None), 151 | 'UO:0000150': ('nanosecond', None), 152 | 'UO:0000151': ('century', None), 153 | 'UO:0000152': ('half life', None), 154 | 'UO:0000153': ('foot candle', None), 155 | 'UO:0000154': ('irradiance unit', None), 156 | 'UO:0000155': ('watt per square meter', None), 157 | 'UO:0000156': ('einstein per square meter per second', None), 158 | 'UO:0000157': ('light unit', None), 159 | 'UO:0000158': ('watt per steradian per square meter', None), 160 | 'UO:0000159': ('radiant intensity unit', None), 161 | 'UO:0000160': ('microeinstein per square meter per second', None), 162 | 'UO:0000161': ('radiance unit', None), 163 | 'UO:0000162': ('watt per steradian', None), 164 | 'UO:0000163': ('mass percentage', None), 165 | 'UO:0000164': ('mass volume percentage', None), 166 | 'UO:0000165': ('volume percentage', None), 167 | 'UO:0000166': ('parts per notation unit', None), 168 | 'UO:0000167': ('parts per hundred', None), 169 | 'UO:0000168': ('parts per thousand', None), 170 | 'UO:0000169': ('parts per million', None), 171 | 'UO:0000170': ('parts per billion', None), 172 | 'UO:0000171': ('parts per trillion', None), 173 | 'UO:0000172': ('parts per quadrillion', None), 174 | 'UO:0000173': ('gram per milliliter', None), 175 | 'UO:0000174': ('kilogram per liter', None), 176 | 'UO:0000175': ('gram per liter', None), 177 | 'UO:0000176': ('milligram per milliliter', None), 178 | 'UO:0000177': ('unit per volume unit', None), 179 | 'UO:0000178': ('unit per milliliter', None), 180 | 'UO:0000179': ('unit per liter', None), 181 | 'UO:0000180': ('mass per unit volume', None), 182 | 'UO:0000181': ('enzyme unit', None), 183 | 'UO:0000182': ('density unit', None), 184 | 'UO:0000183': ('linear density unit', None), 185 | 'UO:0000184': ('kilogram per meter', None), 186 | 'UO:0000185': ('degree', None), 187 | 'UO:0000186': ('dimensionless unit', None), 188 | 'UO:0000187': ('percent', None), 189 | 'UO:0000188': ('pi', None), 190 | 'UO:0000189': ('count unit', None), 191 | 'UO:0000190': ('ratio', None), 192 | 'UO:0000191': ('fraction', None), 193 | 'UO:0000192': ('molecule count', None), 194 | 'UO:0000193': ('purity percentage', None), 195 | 'UO:0000194': ('confluence percentage', None), 196 | 'UO:0000195': ('degree Fahrenheit', None), 197 | 'UO:0000196': ('pH', None), 198 | 'UO:0000197': ('liter per kilogram', None), 199 | 'UO:0000198': ('milliliter per kilogram', None), 200 | 'UO:0000199': ('microliter per kilogram', None), 201 | 'UO:0000200': ('cell concentration unit', None), 202 | 'UO:0000201': ('cells per milliliter', None), 203 | 'UO:0000202': ('catalytic (activity) concentration unit', None), 204 | 'UO:0000203': ('katal per cubic meter', None), 205 | 'UO:0000204': ('katal per liter', None), 206 | 'UO:0000205': ('volume per unit volume', None), 207 | 'UO:0000206': ('milliliter per cubic meter', None), 208 | 'UO:0000207': ('milliliter per liter', None), 209 | 'UO:0000208': ('gram per deciliter', None), 210 | 'UO:0000209': ('deciliter', None), 211 | 'UO:0000210': ('colony forming unit', None), 212 | 'UO:0000211': ('plaque forming unit', None), 213 | 'UO:0000212': ('colony forming unit per volume', None), 214 | 'UO:0000213': ('colony forming unit per milliliter', None), 215 | 'UO:0000214': ('plaque forming unit per volume', None), 216 | 'UO:0000215': ('plaque forming unit per milliliter', None), 217 | 'UO:0000216': ('disintegrations per second', None), 218 | 'UO:0000217': ('electric potential difference unit', None), 219 | 'UO:0000218': ('volt', None), 220 | 'UO:0000219': ('electric charge', None), 221 | 'UO:0000220': ('coulomb', None), 222 | 'UO:0000221': ('dalton', None), 223 | 'UO:0000222': ('kilodalton', None), 224 | 'UO:0000223': ('watt-hour', None), 225 | 'UO:0000224': ('kilowatt-hour', None), 226 | 'UO:0000225': ('magnetic flux unit', None), 227 | 'UO:0000226': ('weber', None), 228 | 'UO:0000227': ('magnetic flux density unit', None), 229 | 'UO:0000228': ('tesla', None), 230 | 'UO:0000229': ('volt-hour', None), 231 | 'UO:0000230': ('kilovolt-hour', None), 232 | 'UO:0000231': ('information unit', None), 233 | 'UO:0000232': ('bit', None), 234 | 'UO:0000233': ('byte', None), 235 | 'UO:0000234': ('kilobyte', None), 236 | 'UO:0000235': ('megabyte', None), 237 | 'UO:0000236': ('image resolution unit', None), 238 | 'UO:0000237': ('chroma sampling unit', None), 239 | 'UO:0000238': ('dynamic range unit', None), 240 | 'UO:0000239': ('spatial resolution unit', None), 241 | 'UO:0000240': ('dots per inch', None), 242 | 'UO:0000241': ('micron pixel', None), 243 | 'UO:0000242': ('pixels per inch', None), 244 | 'UO:0000243': ('pixels per millimeter', None), 245 | 'UO:0000244': ('base pair', None), 246 | 'UO:0000245': ('kibibyte', None), 247 | 'UO:0000246': ('mebibyte', None), 248 | 'UO:0000247': ('millivolt', None), 249 | 'UO:0000248': ('kilovolt', None), 250 | 'UO:0000249': ('microvolt', None), 251 | 'UO:0000250': ('nanovolt', None), 252 | 'UO:0000251': ('picovolt', None), 253 | 'UO:0000252': ('megavolt', None), 254 | 'UO:0000253': ('surface tension unit', None), 255 | 'UO:0000254': ('newton per meter', None), 256 | 'UO:0000255': ('dyne per cm', None), 257 | 'UO:0000256': ('viscosity unit', None), 258 | 'UO:0000257': ('pascal second', None), 259 | 'UO:0000258': ('poise', None), 260 | 'UO:0000259': ('decibel', None), 261 | 'UO:0000260': ('effective dose unit', None), 262 | 'UO:0000261': ('conduction unit', None), 263 | 'UO:0000262': ('electrical conduction unit', None), 264 | 'UO:0000263': ('heat conduction unit', None), 265 | 'UO:0000264': ('siemens', None), 266 | 'UO:0000265': ('watt per meter kelvin', None), 267 | 'UO:0000266': ('electronvolt', None), 268 | 'UO:0000267': ('electric field strength unit', None), 269 | 'UO:0000268': ('volt per meter', None), 270 | 'UO:0000269': ('absorbance unit', None), 271 | 'UO:0000270': ('volumetric flow rate unit', None), 272 | 'UO:0000271': ('microliters per minute', None), 273 | 'UO:0000272': ('millimetres of mercury', None), 274 | 'UO:0000273': ('milligram per liter', None), 275 | 'UO:0000274': ('microgram per milliliter', None), 276 | 'UO:0000275': ('nanogram per milliliter', None), 277 | 'UO:0000276': ('amount per container', None), 278 | 'UO:0000277': ('ug/disk', None), 279 | 'UO:0000278': ('nmole/disk', None), 280 | 'UO:0000279': ('milliunits per milliliter', None), 281 | 'UO:0000280': ('rate unit', None), 282 | 'UO:0000281': ('count per nanomolar second', None), 283 | 'UO:0000282': ('count per molar second', None), 284 | 'UO:0000283': ('kilogram per hectare', None), 285 | 'UO:0000284': ('count per nanomolar', None), 286 | 'UO:0000285': ('count per molar', None), 287 | 'UO:0000286': ('yotta', None), 288 | 'UO:0000287': ('hecto', None), 289 | 'UO:0000288': ('zetta', None), 290 | 'UO:0000289': ('exa', None), 291 | 'UO:0000290': ('peta', None), 292 | 'UO:0000291': ('tera', None), 293 | 'UO:0000292': ('giga', None), 294 | 'UO:0000293': ('mega', None), 295 | 'UO:0000294': ('kilo', None), 296 | 'UO:0000295': ('deca', None), 297 | 'UO:0000296': ('deci', None), 298 | 'UO:0000297': ('milli', None), 299 | 'UO:0000298': ('centi', None), 300 | 'UO:0000299': ('micro', None), 301 | 'UO:0000300': ('nano', None), 302 | 'UO:0000301': ('microgram per liter', None), 303 | 'UO:0000302': ('pico', None), 304 | 'UO:0000303': ('femto', None), 305 | 'UO:0000304': ('atto', None), 306 | 'UO:0000305': ('zepto', None), 307 | 'UO:0000306': ('yocto', None), 308 | 'UO:0000307': ('dose unit', None), 309 | 'UO:0000308': ('milligram per kilogram', None), 310 | 'UO:0000309': ('milligram per square meter', None), 311 | 'UO:0000310': ('dosage unit', None), 312 | 'UO:0000311': ('milligram per kilogram per day', None), 313 | 'UO:0000312': ('relative light unit', None), 314 | 'UO:0000313': ('relative luminescence unit', None), 315 | 'UO:0000314': ('relative fluorescence unit', None), 316 | 'UO:0000315': ('turbidity unit', None), 317 | 'UO:0000316': ('cells per microliter', None), 318 | 'UO:0000317': ('cells per well', None), 319 | 'UO:0000318': ('formazin nephelometric unit', None), 320 | 'UO:0000319': ('radioactivity concentration', None), 321 | 'UO:0000320': ('curie per liter', None), 322 | 'UO:0000321': ('microcurie per milliliter', None), 323 | 'UO:0000322': ('fold dilution', None), 324 | 'UO:0000323': ('ton per hectare', None), 325 | 'UO:0000324': ('square angstrom', None), 326 | 'UO:0000325': ('megaHertz', None), 327 | 'UO:0000326': ('centiMorgan', None), 328 | 'UO:0000327': ('centiRay', None), 329 | 'UO:0000328': ('kilobasepair', None), 330 | 'UO:0000329': ('megabasepair', None), 331 | 'UO:0000330': ('gigabasepair', None), 332 | 'UO:0000331': ('gigabyte', None), 333 | 'UO:0000332': ('terabyte', None), 334 | 'UO:0010001': ('square micrometer', None), 335 | 'UO:0010002': ('millisiemens', None), 336 | 'UO:0010003': ('micromole per litre', None), 337 | 'UO:0010004': ('micromole per kilogram', None), 338 | 'UO:0010005': ('millimeters per day', None), 339 | 'UO:0010006': ('ratio', None), 340 | 'UO:0010007': ('volt-second per square centimeter', None), 341 | 'UO:0010008': ('kilometer per hour', None), 342 | 'UO:0010009': ('milli', None), 343 | 'UO:0010010': ('hectare', None), 344 | 'UO:0010011': ('inch', None), 345 | 'UO:0010012': ('thou', None), 346 | 'UO:0010013': ('foot', None), 347 | 'UO:0010014': ('yard', None), 348 | 'UO:0010015': ('chain', None), 349 | 'UO:0010016': ('furlong', None), 350 | 'UO:0010017': ('mile', None), 351 | 'UO:0010018': ('league', None), 352 | 'UO:0010019': ('maritime length unit', None), 353 | 'UO:0010020': ('fathom', None), 354 | 'UO:0010021': ('cable', None), 355 | 'UO:0010022': ('nautical mile', None), 356 | 'UO:0010023': ('perch', None), 357 | 'UO:0010024': ('rood', None), 358 | 'UO:0010025': ('acre', None), 359 | 'UO:0010026': ('fluid ounce', None), 360 | 'UO:0010027': ('gill', None), 361 | 'UO:0010028': ('pint', None), 362 | 'UO:0010029': ('quart', None), 363 | 'UO:0010030': ('gallon', None), 364 | 'UO:0010031': ('grain', None), 365 | 'UO:0010032': ('drachm', None), 366 | 'UO:0010033': ('ounce', None), 367 | 'UO:0010034': ('pound', None), 368 | 'UO:0010035': ('stone', None), 369 | 'UO:0010036': ('quarter', None), 370 | 'UO:0010037': ('hundredweight', None), 371 | 'UO:0010038': ('ton', None), 372 | 'UO:0010039': ('slug', None), 373 | 'UO:0010040': ('teaspoon', None), 374 | 'UO:0010041': ('united states customary teaspoon', None), 375 | 'UO:0010042': ('tablespoon', None), 376 | 'UO:0010043': ('australian metric tablespoon', None), 377 | 'UO:0010044': ('united states customary tablespoon', None), 378 | 'UO:0010045': ('metric cup', None), 379 | 'UO:0010046': ('united states customary cup', None), 380 | 'UO:0010047': ('united states fda cup', None), 381 | 'UO:0010048': ('micromole', None), 382 | 'UO:0010049': ('gram per square meter', None)} 383 | -------------------------------------------------------------------------------- /pyimzml/ImzMLWriter.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import numpy as np 5 | import uuid 6 | import hashlib 7 | import sys 8 | import getopt 9 | from collections import namedtuple, OrderedDict, defaultdict 10 | 11 | from wheezy.template import Engine, CoreExtension, DictLoader 12 | 13 | from pyimzml.compression import NoCompression, ZlibCompression 14 | 15 | IMZML_TEMPLATE = """\ 16 | @require(uuid, sha1sum, mz_data_type, int_data_type, run_id, spectra, mode, obo_codes, obo_names, mz_compression, int_compression, polarity, spec_type, scan_direction, scan_pattern, scan_type, line_scan_direction) 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | @if spec_type=='centroid': 28 | 29 | @elif spec_type=='profile': 30 | 31 | @end 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | @if spec_type=='centroid': 59 | 60 | @elif spec_type=='profile': 61 | 62 | @end 63 | @if polarity=='positive': 64 | 65 | @elif polarity=='negative': 66 | 67 | @end 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | @for index, s in enumerate(spectra): 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | @if len(s.coords) == 3: 118 | 119 | @end 120 | @if s.userParams: 121 | @for up in s.userParams: 122 | 123 | @end 124 | @end 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | @end 145 | 146 | 147 | 148 | """ 149 | 150 | class _MaxlenDict(OrderedDict): 151 | def __init__(self, *args, **kwargs): 152 | self.maxlen = kwargs.pop('maxlen', None) 153 | OrderedDict.__init__(self, *args, **kwargs) 154 | 155 | def __setitem__(self, key, value): 156 | if self.maxlen is not None and len(self) >= self.maxlen: 157 | self.popitem(0) #pop oldest 158 | OrderedDict.__setitem__(self, key, value) 159 | 160 | _Spectrum = namedtuple('_Spectrum', 'coords mz_len mz_offset mz_enc_len int_len int_offset int_enc_len mz_min mz_max mz_base int_base int_tic userParams') #todo: change named tuple to dict and parse xml template properly (i.e. remove hardcoding so parameters can be optional) 161 | 162 | class ImzMLWriter(object): 163 | """ 164 | Create an imzML+ibd file. 165 | 166 | :param output_filename: 167 | is used to make the base name by removing the extension (if any). 168 | two files will be made by adding ".ibd" and ".imzML" to the base name 169 | :param intensity_dtype: 170 | The numpy data type to use for saving intensity values 171 | :param mz_dtype: 172 | The numpy data type to use for saving mz array values 173 | :param mode: 174 | 175 | * "continuous" mode will save the first mz array only 176 | * "processed" mode save every mz array separately 177 | * "auto" mode writes only mz arrays that have not already been written 178 | :param intensity_compression: 179 | How to compress the intensity data before saving 180 | must be an instance of :class:`~pyimzml.compression.NoCompression` or :class:`~pyimzml.compression.ZlibCompression` 181 | :param mz_compression: 182 | How to compress the mz array data before saving 183 | """ 184 | def __init__(self, output_filename, 185 | mz_dtype=np.float64, intensity_dtype=np.float32, mode="auto", spec_type="centroid", 186 | scan_direction="top_down", line_scan_direction="line_left_right", scan_pattern="one_way", scan_type="horizontal_line", 187 | mz_compression=NoCompression(), intensity_compression=NoCompression(), 188 | polarity=None): 189 | 190 | self.mz_dtype = mz_dtype 191 | self.intensity_dtype = intensity_dtype 192 | self.mode = mode 193 | self.spec_type = spec_type 194 | self.mz_compression = mz_compression 195 | self.intensity_compression = intensity_compression 196 | self.run_id = os.path.splitext(output_filename)[0] 197 | self.filename = self.run_id + ".imzML" 198 | self.ibd_filename = self.run_id + ".ibd" 199 | self.xml = open(self.filename, 'w') 200 | self.ibd = open(self.ibd_filename, 'wb+') 201 | self.sha1 = hashlib.sha1() 202 | self.uuid = uuid.uuid4() 203 | 204 | self.scan_direction = scan_direction 205 | self.scan_pattern = scan_pattern 206 | self.scan_type = scan_type 207 | self.line_scan_direction = line_scan_direction 208 | 209 | self._write_ibd(self.uuid.bytes) 210 | 211 | self.wheezy_engine = Engine(loader=DictLoader({'imzml': IMZML_TEMPLATE}), extensions=[CoreExtension()]) 212 | self.imzml_template = self.wheezy_engine.get_template('imzml') 213 | self.spectra = [] 214 | self.first_mz = None 215 | self.hashes = defaultdict(list) # mz_hash -> list of mz_data (disk location) 216 | self.lru_cache = _MaxlenDict(maxlen=10) # mz_array (as tuple) -> mz_data (disk location) 217 | self._setPolarity(polarity) 218 | 219 | @staticmethod 220 | def _np_type_to_name(dtype): 221 | if dtype.__name__.startswith('float'): 222 | return "%s-bit float" % dtype.__name__[5:] 223 | elif dtype.__name__.startswith('int'): 224 | return "%s-bit integer" % dtype.__name__[3:] 225 | 226 | def _setPolarity(self, polarity): 227 | if polarity: 228 | if polarity.lower() in ['positive', 'negative']: 229 | self.polarity = polarity.lower() 230 | else: 231 | raise ValueError('value for polarity must be one of "positive", "negative". Received: {}'.format(polarity)) 232 | else: 233 | self.polarity = "" 234 | 235 | def _write_xml(self): 236 | spectra = self.spectra 237 | mz_data_type = self._np_type_to_name(self.mz_dtype) 238 | int_data_type = self._np_type_to_name(self.intensity_dtype) 239 | obo_codes = {"32-bit integer": "1000519", 240 | "16-bit float": "1000520", 241 | "32-bit float": "1000521", 242 | "64-bit integer": "1000522", 243 | "64-bit float": "1000523", 244 | "continuous": "1000030", 245 | "processed": "1000031", 246 | "zlib compression": "1000574", 247 | "no compression": "1000576", 248 | "line_bottom_up": "1000492", 249 | "line_left_right": "1000491", 250 | "line_right_left": "1000490", 251 | "line_top_down": "1000493", 252 | "bottom_up": "1000400", 253 | "left_right": "1000402", 254 | "right_left": "1000403", 255 | "top_down": "1000401", 256 | "meandering": "1000410", 257 | "one_way": "1000411", 258 | "random_access": "1000412", 259 | "horizontal_line": "1000480", 260 | "vertical_line": "1000481"} 261 | obo_names = {"line_bottom_up": "linescan bottom up", 262 | "line_left_right": "linescan left right", 263 | "line_right_left": "linescan right left", 264 | "line_top_down": "linescan top down", 265 | "bottom_up": "bottom up", 266 | "left_right": "left right", 267 | "right_left": "right left", 268 | "top_down": "top down", 269 | "meandering": "meandering", 270 | "one_way": "one way", 271 | "random_access": "random access", 272 | "horizontal_line": "horizontal line scan", 273 | "vertical_line": "vertical line scan"} 274 | 275 | uuid = ("{%s}" % self.uuid).upper() 276 | sha1sum = self.sha1.hexdigest().upper() 277 | run_id = self.run_id 278 | if self.mode == 'auto': 279 | mode = "processed" if len(self.lru_cache) > 1 else "continuous" 280 | else: 281 | mode = self.mode 282 | spec_type = self.spec_type 283 | mz_compression = self.mz_compression.name 284 | int_compression = self.intensity_compression.name 285 | polarity = self.polarity 286 | scan_direction = self.scan_direction 287 | scan_pattern = self.scan_pattern 288 | scan_type = self.scan_type 289 | line_scan_direction = self.line_scan_direction 290 | 291 | self.xml.write(self.imzml_template.render(locals())) 292 | 293 | def _write_ibd(self, bytes): 294 | self.ibd.write(bytes) 295 | self.sha1.update(bytes) 296 | return len(bytes) 297 | 298 | def _encode_and_write(self, data, dtype=np.float32, compression=NoCompression()): 299 | data = np.asarray(data, dtype=dtype) 300 | offset = self.ibd.tell() 301 | bytes = data.tobytes() 302 | bytes = compression.compress(bytes) 303 | return offset, data.shape[0], self._write_ibd(bytes) 304 | 305 | def _read_mz(self, mz_offset, mz_len, mz_enc_len): 306 | '''reads a mz array from the currently open ibd file''' 307 | self.ibd.seek(mz_offset) 308 | data = self.ibd.read(mz_enc_len) 309 | self.ibd.seek(0, 2) 310 | data = self.mz_compression.decompress(data) 311 | return tuple(np.fromstring(data, dtype=self.mz_dtype)) 312 | 313 | def _get_previous_mz(self, mzs): 314 | '''given an mz array, return the mz_data (disk location) 315 | if the mz array was not previously written, write to disk first''' 316 | mzs = tuple(mzs) # must be hashable 317 | if mzs in self.lru_cache: 318 | return self.lru_cache[mzs] 319 | 320 | # mz not recognized ... check hash 321 | mz_hash = "%s-%s-%s" % (hash(mzs), sum(mzs), len(mzs)) 322 | if mz_hash in self.hashes: 323 | for mz_data in self.hashes[mz_hash]: 324 | test_mz = self._read_mz(*mz_data) 325 | if mzs == test_mz: 326 | self.lru_cache[test_mz] = mz_data 327 | return mz_data 328 | # hash not recognized 329 | # must be a new mz array ... write it, add it to lru_cache and hashes 330 | mz_data = self._encode_and_write(mzs, self.mz_dtype, self.mz_compression) 331 | self.hashes[mz_hash].append(mz_data) 332 | self.lru_cache[mzs] = mz_data 333 | return mz_data 334 | 335 | def addSpectrum(self, mzs, intensities, coords, userParams=[]): 336 | """ 337 | Add a mass spectrum to the file. 338 | 339 | :param mz: 340 | mz array 341 | :param intensities: 342 | intensity array 343 | :param coords: 344 | 345 | * 2-tuple of x and y position OR 346 | * 3-tuple of x, y, and z position 347 | 348 | note some applications want coords to be 1-indexed 349 | """ 350 | # must be rounded now to allow comparisons to later data 351 | # but don't waste CPU time in continuous mode since the data will not be used anyway 352 | if self.mode != "continuous" or self.first_mz is None: 353 | mzs = self.mz_compression.rounding(mzs) 354 | intensities = self.intensity_compression.rounding(intensities) 355 | 356 | if self.mode == "continuous": 357 | if self.first_mz is None: 358 | self.first_mz = self._encode_and_write(mzs, self.mz_dtype, self.mz_compression) 359 | mz_data = self.first_mz 360 | elif self.mode == "processed": 361 | mz_data = self._encode_and_write(mzs, self.mz_dtype, self.mz_compression) 362 | elif self.mode == "auto": 363 | mz_data = self._get_previous_mz(mzs) 364 | else: 365 | raise TypeError("Unknown mode: %s" % self.mode) 366 | mz_offset, mz_len, mz_enc_len = mz_data 367 | 368 | int_offset, int_len, int_enc_len = self._encode_and_write(intensities, self.intensity_dtype, self.intensity_compression) 369 | mz_min = np.min(mzs) 370 | mz_max = np.max(mzs) 371 | ix_max = np.argmax(intensities) 372 | mz_base = mzs[ix_max] 373 | int_base = intensities[ix_max] 374 | int_tic = np.sum(intensities) 375 | s = _Spectrum(coords, mz_len, mz_offset, mz_enc_len, int_len, int_offset, int_enc_len, mz_min, mz_max, mz_base, int_base, int_tic, userParams) 376 | self.spectra.append(s) 377 | 378 | def close(self): # 'close' is a more common use for this 379 | """ 380 | Writes the XML file and closes all files. 381 | Will be called automatically if ``with``-pattern is used. 382 | """ 383 | self.finish() 384 | 385 | def finish(self): 386 | '''alias of close()''' 387 | self.ibd.close() 388 | self._write_xml() 389 | self.xml.close() 390 | 391 | def __enter__(self): 392 | return self 393 | 394 | def __exit__(self, exc_t, exc_v, trace): 395 | if exc_t is None: 396 | self.finish() 397 | else: 398 | self.ibd.close() 399 | self.xml.close() 400 | 401 | def _main(argv): 402 | from pyimzml.ImzMLParser import ImzMLParser 403 | inputfile = '' 404 | outputfile = '' 405 | try: 406 | opts, args = getopt.getopt(argv,"hi:o:",["ifile=","ofile="]) 407 | except getopt.GetoptError: 408 | print('test.py -i -o ') 409 | sys.exit(2) 410 | for opt, arg in opts: 411 | if opt == '-h': 412 | print('test.py -i -o ') 413 | sys.exit() 414 | elif opt in ("-i", "--ifile"): 415 | inputfile = arg 416 | elif opt in ("-o", "--ofile"): 417 | outputfile = arg 418 | if inputfile == '': 419 | print('test.py -i -o ') 420 | raise IOError('input file not specified') 421 | if outputfile=='': 422 | outputfile=inputfile+'.imzML' 423 | imzml = ImzMLParser(inputfile) 424 | spectra = [] 425 | with ImzMLWriter(outputfile, mz_dtype=np.float32, intensity_dtype=np.float32) as writer: 426 | for i, coords in enumerate(imzml.coordinates): 427 | mzs, intensities = imzml.getspectrum(i) 428 | writer.addSpectrum(mzs, intensities, coords) 429 | spectra.append((mzs, intensities, coords)) 430 | 431 | imzml = ImzMLParser(outputfile) 432 | spectra2 = [] 433 | for i, coords in enumerate(imzml.coordinates): 434 | mzs, intensities = imzml.getspectrum(i) 435 | spectra2.append((mzs, intensities, coords)) 436 | 437 | print(spectra[0] == spectra2[0]) 438 | 439 | if __name__ == '__main__': 440 | _main(sys.argv[1:]) 441 | -------------------------------------------------------------------------------- /pyimzml/ImzMLParser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2015 Dominik Fay 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from bisect import bisect_left, bisect_right 18 | import sys 19 | import random 20 | import re 21 | from collections import Counter 22 | from pathlib import Path 23 | from typing import Dict, Tuple, Any 24 | 25 | from warnings import warn 26 | import numpy as np 27 | 28 | from pyimzml.metadata import Metadata, SpectrumData 29 | from pyimzml.ontology.ontology import convert_cv_param 30 | 31 | PRECISION_DICT = {"32-bit float": 'f', "64-bit float": 'd', "32-bit integer": 'i', "64-bit integer": 'l'} 32 | SIZE_DICT = {'f': 4, 'd': 8, 'i': 4, 'l': 8} 33 | INFER_IBD_FROM_IMZML = object() 34 | XMLNS_PREFIX = "{http://psi.hupo.org/ms/mzml}" 35 | 36 | param_group_elname = "referenceableParamGroup" 37 | data_processing_elname = "dataProcessing" 38 | instrument_confid_elname = "instrumentConfiguration" 39 | 40 | 41 | def choose_iterparse(parse_lib=None): 42 | if parse_lib == 'ElementTree': 43 | from xml.etree.ElementTree import iterparse 44 | elif parse_lib == 'lxml': 45 | from lxml.etree import iterparse 46 | else: 47 | from xml.etree.ElementTree import iterparse 48 | return iterparse 49 | 50 | 51 | def _get_cv_param(elem, accession, deep=False, convert=False): 52 | base = './/' if deep else '' 53 | node = elem.find('%s%scvParam[@accession="%s"]' % (base, XMLNS_PREFIX, accession)) 54 | if node is not None: 55 | if convert: 56 | return convert_cv_param(accession, node.get('value')) 57 | return node.get('value') 58 | 59 | 60 | def calc_mzs_digitize(mzs: np.ndarray) -> Counter: 61 | """Calculate the number of peaks in the interval [-0.5 Da + int(mz), +0.5 Da + int(mz)]""" 62 | mzs_min = int((mzs.min() // 1 - 0.5) * 10) 63 | mzs_max = int((mzs.max() // 1 + 2.5) * 10) 64 | 65 | bins = np.array([i / 10.0 for i in range(mzs_min, mzs_max, 10)]) 66 | mzs_units = [int(i + 0.51) for i in bins] # integer value of mz 67 | 68 | mzs_counts = [mzs_units[i] for i in np.digitize(mzs, bins, right=False)] 69 | mzs_digitized = Counter(mzs_counts) 70 | return mzs_digitized 71 | 72 | 73 | def calc_peaks_overlap(mzs: np.array, ints: np.array, ppm: float) -> Tuple[int, int]: 74 | """Calculate the number of peaks that, when shifted by ppm, are interrupted by others""" 75 | 76 | # select peaks that have non-zero intensity 77 | non_zero_ints = np.where(ints > 0.0)[0] 78 | non_zero_mz = mzs[non_zero_ints] 79 | 80 | # calculation of the absolute value of the shift of each peak when shifting it by ppm 81 | shifted = non_zero_mz + (non_zero_mz * ppm * 1e-6) 82 | diff_shifted = shifted - non_zero_mz 83 | 84 | diff_mz = np.diff(non_zero_mz) # difference between original adjacent peaks 85 | n_overlap = sum(diff_shifted[:-1] > diff_mz) 86 | 87 | return n_overlap, len(non_zero_mz) 88 | 89 | 90 | class ImzMLParser: 91 | """ 92 | Parser for imzML 1.1.0 files (see specification here: 93 | https://ms-imaging.org/wp-content/uploads/2009/08/specifications_imzML1.1.0_RC1.pdf ). 94 | 95 | Iteratively reads the .imzML file into memory while pruning the per-spectrum metadata (everything in 96 | elements) during initialization. Returns a spectrum upon calling getspectrum(i). The binary file 97 | is read in every call of getspectrum(i). Use enumerate(parser.coordinates) to get all coordinates with their 98 | respective index. Coordinates are always 3-dimensional. If the third spatial dimension is not present in 99 | the data, it will be set to zero. 100 | 101 | The global metadata fields in the imzML file are stored in parser.metadata. 102 | Spectrum-specific metadata fields are not stored by default due to avoid memory issues, 103 | use the `include_spectra_metadata` parameter if spectrum-specific metadata is needed. 104 | """ 105 | 106 | def __init__( 107 | self, 108 | filename, 109 | parse_lib=None, 110 | ibd_file=INFER_IBD_FROM_IMZML, 111 | include_spectra_metadata=None, 112 | ): 113 | """ 114 | Opens the two files corresponding to the file name, reads the entire .imzML 115 | file and extracts required attributes. Does not read any binary data, yet. 116 | 117 | :param filename: 118 | name of the XML file. Must end with .imzML. Binary data file must be named equally but ending with .ibd 119 | Alternatively an open file or Buffer Protocol object can be supplied, if ibd_file is also supplied 120 | :param parse_lib: 121 | XML-parsing library to use: 'ElementTree' or 'lxml', the later will be used if argument not provided 122 | :param ibd_file: 123 | File or Buffer Protocol object for the .ibd file. Leave blank to infer it from the imzml filename. 124 | Set to None if no data from the .ibd file is needed (getspectrum calls will not work) 125 | :param include_spectra_metadata: 126 | None, 'full', or a list/set of accession IDs. 127 | If 'full' is given, parser.spectrum_full_metadata will be populated with a list of 128 | complex objects containing the full metadata for each spectrum. 129 | If a list or set is given, parser.spectrum_metadata_fields will be populated with a dict mapping 130 | accession IDs to lists. Each list will contain the values for that accession ID for 131 | each spectrum. Note that for performance reasons, this mode only searches the 132 | spectrum itself for the value. It won't check any referenced referenceable param 133 | groups if the accession ID isn't present in the spectrum metadata. 134 | """ 135 | # ElementTree requires the schema location for finding tags (why?) but 136 | # fails to read it from the root element. As this should be identical 137 | # for all imzML files, it is hard-coded here and prepended before every tag 138 | self.sl = "{http://psi.hupo.org/ms/mzml}" 139 | # maps each imzML number format to its struct equivalent 140 | self.precisionDict = dict(PRECISION_DICT) 141 | # maps each number format character to its amount of bytes used 142 | self.sizeDict = dict(SIZE_DICT) 143 | self.filename = filename 144 | self.mzOffsets = [] 145 | self.intensityOffsets = [] 146 | self.mzLengths = [] 147 | self.intensityLengths = [] 148 | # list of all (x,y,z) coordinates as tuples. 149 | self.coordinates = [] 150 | self.root = None 151 | self.metadata = None 152 | self.polarity = None 153 | self.spectrum_mode = None 154 | if include_spectra_metadata == 'full': 155 | self.spectrum_full_metadata = [] 156 | elif include_spectra_metadata is not None: 157 | include_spectra_metadata = set(include_spectra_metadata) 158 | self.spectrum_metadata_fields = { 159 | k: [] for k in include_spectra_metadata 160 | } 161 | 162 | self.mzGroupId = self.intGroupId = self.mzPrecision = self.intensityPrecision = None 163 | self.iterparse = choose_iterparse(parse_lib) 164 | self.__iter_read_spectrum_meta(include_spectra_metadata) 165 | if ibd_file is INFER_IBD_FROM_IMZML: 166 | # name of the binary file 167 | ibd_filename = self._infer_bin_filename(self.filename) 168 | self.m = open(ibd_filename, "rb") 169 | else: 170 | self.m = ibd_file 171 | 172 | # Dict for basic imzML metadata other than those required for reading 173 | # spectra. See method __readimzmlmeta() 174 | self.imzmldict = self.__readimzmlmeta() 175 | self.imzmldict['max count of pixels z'] = np.asarray(self.coordinates)[:,2].max() 176 | 177 | @staticmethod 178 | def _infer_bin_filename(imzml_path): 179 | imzml_path = Path(imzml_path) 180 | ibd_path = [f for f in imzml_path.parent.glob('*') 181 | if re.match(r'.+\.ibd', str(f), re.IGNORECASE) and f.stem == imzml_path.stem][0] 182 | return str(ibd_path) 183 | 184 | # system method for use of 'with ... as' 185 | def __enter__(self): 186 | return self 187 | 188 | # system method for use of 'with ... as' 189 | def __exit__(self, exc_t, exc_v, trace): 190 | if self.m is not None: 191 | self.m.close() 192 | 193 | def __iter_read_spectrum_meta(self, include_spectra_metadata): 194 | """ 195 | This method should only be called by __init__. Reads the data formats, coordinates and offsets from 196 | the .imzML file and initializes the respective attributes. While traversing the XML tree, the per-spectrum 197 | metadata is pruned, i.e. the element(s) are left behind empty. 198 | 199 | Supported accession values for the number formats: "MS:1000521", "MS:1000523", "IMS:1000141" or 200 | "IMS:1000142". The string values are "32-bit float", "64-bit float", "32-bit integer", "64-bit integer". 201 | """ 202 | mz_group = int_group = None 203 | slist = None 204 | elem_iterator = self.iterparse(self.filename, events=("start", "end")) 205 | 206 | if sys.version_info > (3,): 207 | _, self.root = next(elem_iterator) 208 | else: 209 | _, self.root = elem_iterator.next() 210 | 211 | is_first_spectrum = True 212 | 213 | for event, elem in elem_iterator: 214 | if elem.tag == self.sl + "spectrumList" and event == "start": 215 | self.__process_metadata() 216 | slist = elem 217 | elif elem.tag == self.sl + "spectrum" and event == "end": 218 | self.__process_spectrum(elem, include_spectra_metadata) 219 | if is_first_spectrum: 220 | self.__read_polarity(elem) 221 | self.__read_spectrum_mode(elem) 222 | is_first_spectrum = False 223 | slist.remove(elem) 224 | self.__fix_offsets() 225 | 226 | def __fix_offsets(self): 227 | # clean up the mess after morons who use signed 32-bit where unsigned 64-bit is appropriate 228 | def fix(array): 229 | fixed = [] 230 | delta = 0 231 | prev_value = float('nan') 232 | for value in array: 233 | if value < 0 and prev_value >= 0: 234 | delta += 2**32 235 | fixed.append(value + delta) 236 | prev_value = value 237 | return fixed 238 | 239 | self.mzOffsets = fix(self.mzOffsets) 240 | self.intensityOffsets = fix(self.intensityOffsets) 241 | 242 | def __process_metadata(self): 243 | if self.metadata is None: 244 | self.metadata = Metadata(self.root) 245 | for param_id, param_group in self.metadata.referenceable_param_groups.items(): 246 | if 'm/z array' in param_group.param_by_name: 247 | self.mzGroupId = param_id 248 | for name, dtype in self.precisionDict.items(): 249 | if name in param_group.param_by_name: 250 | self.mzPrecision = dtype 251 | if 'intensity array' in param_group.param_by_name: 252 | self.intGroupId = param_id 253 | for name, dtype in self.precisionDict.items(): 254 | if name in param_group.param_by_name: 255 | self.intensityPrecision = dtype 256 | if not hasattr(self, 'mzPrecision'): 257 | raise RuntimeError("Could not determine m/z precision") 258 | if not hasattr(self, 'intensityPrecision'): 259 | raise RuntimeError("Could not determine intensity precision") 260 | 261 | def __process_spectrum(self, elem, include_spectra_metadata): 262 | arrlistelem = elem.find('%sbinaryDataArrayList' % self.sl) 263 | mz_group = None 264 | int_group = None 265 | for e in arrlistelem: 266 | ref = e.find('%sreferenceableParamGroupRef' % self.sl).attrib["ref"] 267 | if ref == self.mzGroupId: 268 | mz_group = e 269 | elif ref == self.intGroupId: 270 | int_group = e 271 | self.mzOffsets.append(int(_get_cv_param(mz_group, 'IMS:1000102'))) 272 | self.mzLengths.append(int(_get_cv_param(mz_group, 'IMS:1000103'))) 273 | self.intensityOffsets.append(int(_get_cv_param(int_group, 'IMS:1000102'))) 274 | self.intensityLengths.append(int(_get_cv_param(int_group, 'IMS:1000103'))) 275 | scan_elem = elem.find('%sscanList/%sscan' % (self.sl, self.sl)) 276 | x = _get_cv_param(scan_elem, 'IMS:1000050') 277 | y = _get_cv_param(scan_elem, 'IMS:1000051') 278 | z = _get_cv_param(scan_elem, 'IMS:1000052') 279 | if z is not None: 280 | self.coordinates.append((int(x), int(y), int(z))) 281 | else: 282 | self.coordinates.append((int(x), int(y), 1)) 283 | 284 | if include_spectra_metadata == 'full': 285 | self.spectrum_full_metadata.append( 286 | SpectrumData(elem, self.metadata.referenceable_param_groups) 287 | ) 288 | elif include_spectra_metadata: 289 | for param in include_spectra_metadata: 290 | value = _get_cv_param(elem, param, deep=True, convert=True) 291 | self.spectrum_metadata_fields[param].append(value) 292 | 293 | def __read_polarity(self, elem): 294 | # It's too slow to always check all spectra, so first check the referenceable_param_groups 295 | # in the header to see if they indicate the polarity. If not, try to detect it from 296 | # the first spectrum's full metadata. 297 | # LIMITATION: This won't detect "mixed" polarity if polarity is only specified outside the 298 | # referenceable_param_groups. 299 | param_groups = self.metadata.referenceable_param_groups.values() 300 | spectrum_metadata = SpectrumData(elem, self.metadata.referenceable_param_groups) 301 | has_positive = ( 302 | any('positive scan' in group for group in param_groups) 303 | or 'positive scan' in spectrum_metadata 304 | ) 305 | has_negative = ( 306 | any('negative scan' in group for group in param_groups) 307 | or 'negative scan' in spectrum_metadata 308 | ) 309 | if has_positive and has_negative: 310 | self.polarity = 'mixed' 311 | elif has_positive: 312 | self.polarity = 'positive' 313 | elif has_negative: 314 | self.polarity = 'negative' 315 | 316 | def __read_spectrum_mode(self, elem): 317 | """ 318 | This method checks for centroid (MS:1000127) / profile (MS:1000128) mode information. 319 | 320 | It's too slow to always check all spectra, so first check the referenceable_param_groups 321 | in the header to see if they indicate the spectrum mode. 322 | If not, try to detect it from the first spectrum's full metadata. 323 | """ 324 | param_groups = self.metadata.referenceable_param_groups.values() 325 | spectrum_metadata = SpectrumData(elem, self.metadata.referenceable_param_groups) 326 | 327 | profile_mode = ( 328 | any('profile spectrum' in group for group in param_groups) 329 | or 'profile spectrum' in spectrum_metadata 330 | ) 331 | centroid_mode = ( 332 | any('centroid spectrum' in group for group in param_groups) 333 | or 'centroid spectrum' in spectrum_metadata 334 | ) 335 | 336 | if profile_mode: 337 | self.spectrum_mode = 'profile' 338 | elif centroid_mode: 339 | self.spectrum_mode = 'centroid' 340 | 341 | def __readimzmlmeta(self): 342 | """ 343 | DEPRECATED - use self.metadata instead, as it has much greater detail and allows for 344 | multiple scan settings / instruments. 345 | 346 | This method should only be called by __init__. Initializes the imzmldict with frequently used metadata from 347 | the .imzML file. 348 | 349 | :return d: 350 | dict containing above mentioned meta data 351 | :rtype: 352 | dict 353 | :raises Warning: 354 | if an xml attribute has a number format different from the imzML specification 355 | """ 356 | d = {} 357 | scan_settings_list_elem = self.root.find('%sscanSettingsList' % self.sl) 358 | instrument_config_list_elem = self.root.find('%sinstrumentConfigurationList' % self.sl) 359 | scan_settings_params = [ 360 | ("max count of pixels x", "IMS:1000042"), 361 | ("max count of pixels y", "IMS:1000043"), 362 | ("max dimension x", "IMS:1000044"), 363 | ("max dimension y", "IMS:1000045"), 364 | ("pixel size x", "IMS:1000046"), 365 | ("pixel size y", "IMS:1000047"), 366 | ("matrix solution concentration", "MS:1000835"), 367 | ] 368 | instrument_config_params = [ 369 | ("wavelength", "MS:1000843"), 370 | ("focus diameter x", "MS:1000844"), 371 | ("focus diameter y", "MS:1000845"), 372 | ("pulse energy", "MS:1000846"), 373 | ("pulse duration", "MS:1000847"), 374 | ("attenuation", "MS:1000848"), 375 | ] 376 | 377 | for name, accession in scan_settings_params: 378 | try: 379 | val = _get_cv_param(scan_settings_list_elem, accession, deep=True, convert=True) 380 | if val is not None: 381 | d[name] = val 382 | except ValueError: 383 | warn(Warning('Wrong data type in XML file. Skipped attribute "%s"' % name)) 384 | 385 | for name, accession in instrument_config_params: 386 | try: 387 | val = _get_cv_param(instrument_config_list_elem, accession, deep=True, convert=True) 388 | if val is not None: 389 | d[name] = val 390 | except ValueError: 391 | warn(Warning('Wrong data type in XML file. Skipped attribute "%s"' % name)) 392 | return d 393 | 394 | def get_physical_coordinates(self, i): 395 | """ 396 | For a pixel index i, return the real-world coordinates in nanometers. 397 | 398 | This is equivalent to multiplying the image coordinates of the given pixel with the pixel size. 399 | 400 | :param i: the pixel index 401 | :return: a tuple of x and y coordinates. 402 | :rtype: Tuple[float] 403 | :raises KeyError: if the .imzML file does not specify the attributes "pixel size x" and "pixel size y" 404 | """ 405 | try: 406 | pixel_size_x = self.imzmldict["pixel size x"] 407 | pixel_size_y = self.imzmldict["pixel size y"] 408 | except KeyError: 409 | raise KeyError("Could not find all pixel size attributes in imzML file") 410 | image_x, image_y = self.coordinates[i][:2] 411 | return image_x * pixel_size_x, image_y * pixel_size_y 412 | 413 | def getspectrum(self, index): 414 | """ 415 | Reads the spectrum at specified index from the .ibd file. 416 | 417 | :param index: 418 | Index of the desired spectrum in the .imzML file 419 | 420 | Output: 421 | 422 | mz_array: numpy.ndarray 423 | Sequence of m/z values representing the horizontal axis of the desired mass 424 | spectrum 425 | intensity_array: numpy.ndarray 426 | Sequence of intensity values corresponding to mz_array 427 | """ 428 | mz_bytes, intensity_bytes = self.get_spectrum_as_string(index) 429 | mz_array = np.frombuffer(mz_bytes, dtype=self.mzPrecision) 430 | intensity_array = np.frombuffer(intensity_bytes, dtype=self.intensityPrecision) 431 | return mz_array, intensity_array 432 | 433 | def get_spectrum_as_string(self, index): 434 | """ 435 | Reads m/z array and intensity array of the spectrum at specified location 436 | from the binary file as a byte string. The string can be unpacked by the struct 437 | module. To get the arrays as numbers, use getspectrum 438 | 439 | :param index: 440 | Index of the desired spectrum in the .imzML file 441 | :rtype: Tuple[str, str] 442 | 443 | Output: 444 | 445 | mz_string: 446 | string where each character represents a byte of the mz array of the 447 | spectrum 448 | intensity_string: 449 | string where each character represents a byte of the intensity array of 450 | the spectrum 451 | """ 452 | offsets = [self.mzOffsets[index], self.intensityOffsets[index]] 453 | lengths = [self.mzLengths[index], self.intensityLengths[index]] 454 | lengths[0] *= self.sizeDict[self.mzPrecision] 455 | lengths[1] *= self.sizeDict[self.intensityPrecision] 456 | self.m.seek(offsets[0]) 457 | mz_string = self.m.read(lengths[0]) 458 | self.m.seek(offsets[1]) 459 | intensity_string = self.m.read(lengths[1]) 460 | return mz_string, intensity_string 461 | 462 | def portable_spectrum_reader(self): 463 | """ 464 | Builds a PortableSpectrumReader that holds the coordinates list and spectrum offsets in the .ibd file 465 | so that the .ibd file can be read without opening the .imzML file again. 466 | 467 | The PortableSpectrumReader can be safely pickled and unpickled, making it useful for reading the spectra 468 | in a distributed environment such as PySpark or PyWren. 469 | """ 470 | return PortableSpectrumReader(self.coordinates, 471 | self.mzPrecision, self.mzOffsets, self.mzLengths, 472 | self.intensityPrecision, self.intensityOffsets, self.intensityLengths) 473 | 474 | def check_peaks_overlap(self, n_spectrum: int = 100, ppm: float = 3.0) -> float: 475 | """ 476 | This function represents an approach for finding non-centroided datasets based on 477 | comparing the distance to the neighboring peak and shifting the existing peak by N ppm. 478 | 479 | The algorithm is described in the "Exclusion of non-centroided datasets" section of the article 480 | METASPACE-ML: Metabolite annotation for imaging mass spectrometry using machine learning 481 | https://www.biorxiv.org/content/10.1101/2023.05.29.542736v2 482 | """ 483 | random.seed(42) 484 | indexes = set([ 485 | random.randrange(0, len(self.coordinates)) 486 | for _ in range(min(len(self.coordinates), n_spectrum)) 487 | ]) 488 | 489 | n_overlap_peaks = [] 490 | non_zero_peaks = [] 491 | for idx in indexes: 492 | mzs, ints = self.getspectrum(idx) 493 | n_overlap, non_zero = calc_peaks_overlap(mzs, ints, ppm) 494 | n_overlap_peaks.append(n_overlap) 495 | non_zero_peaks.append(non_zero) 496 | 497 | overlap_percentage = sum(n_overlap_peaks) / sum(non_zero_peaks) * 100.0 498 | return round(overlap_percentage, 2) 499 | 500 | def get_spectrum_statistics(self, idx: int) -> Dict[str, Any]: 501 | """Calculate all the necessary metrics about m/z and intensity for the one spectrum""" 502 | mzs, ints = self.getspectrum(idx) 503 | nonzero_ints_indx = np.where(ints > 0.0)[0] 504 | nonzero_ints = ints[nonzero_ints_indx] 505 | 506 | if len(mzs) == 0: 507 | return {} 508 | # some datasets have anomalous values of m/z, like 1.0e+35 509 | elif mzs.max() > 1_000_000: 510 | return {} 511 | elif np.all(np.isnan(mzs)): 512 | return {} 513 | else: 514 | return { 515 | 'mzs_min': mzs.min(), 516 | 'mzs_max': mzs.max(), 517 | 'mzs_digitized': calc_mzs_digitize(mzs), 518 | 'ints_min': nonzero_ints.min() if len(nonzero_ints) > 0 else 0, # non zero 519 | 'ints_50p': np.percentile(nonzero_ints, 50) if len(nonzero_ints) > 0 else 0, 520 | 'ints_95p': np.percentile(nonzero_ints, 95) if len(nonzero_ints) > 0 else 0, 521 | 'ints_99p': np.percentile(nonzero_ints, 99) if len(nonzero_ints) > 0 else 0, 522 | 'ints_max': ints.max(), 523 | 'ints_total': sum(ints), 524 | 'nonzero_intensity_peaks_count': len(nonzero_ints), 525 | 'total_peaks_count': len(ints), 526 | } 527 | 528 | def calc_statistics(self, n_spectrum: int = 0, full: bool = False) -> Dict[str, Any]: 529 | """ 530 | Calculate the statistics of the number of peaks for the entire dataset, 531 | as well as full/n_spectrum is setting up - calculate extended statistics for each spectrum 532 | 533 | :param n_spectrum: the number of spectrum to analyze 534 | :param full: analysis of all spectrum 535 | """ 536 | peaks_statistics = { 537 | 'ds_peaks_stats': { 538 | 'min': min(self.intensityLengths), 539 | 'median': int(np.median(self.intensityLengths)), 540 | '95p': int(np.percentile(self.intensityLengths, q=95)), 541 | 'max': max(self.intensityLengths), 542 | } 543 | } 544 | 545 | # select all coordinates or a subset depending on the value of the full/n_spectrum variables 546 | if full: 547 | indexes = list(range(len(self.coordinates))) 548 | elif n_spectrum: 549 | random.seed(42) 550 | indexes = set([ 551 | random.randrange(0, len(self.coordinates)) 552 | for _ in range(min(len(self.coordinates), n_spectrum)) 553 | ]) 554 | else: 555 | indexes = [] 556 | 557 | if indexes: 558 | mzs_min, mzs_max = [], [] 559 | ints_min, ints_max, ints_total = [], [], [] 560 | ints_50p, ints_95p, ints_99p = [], [], [] 561 | nonzero_intensity_peaks_count, total_peaks_count = [], [] 562 | mzs_digitized = Counter() 563 | for idx in indexes: 564 | spectrum_stats = self.get_spectrum_statistics(idx) 565 | if not spectrum_stats: 566 | continue 567 | mzs_min.append(spectrum_stats['mzs_min']) 568 | mzs_max.append(spectrum_stats['mzs_max']) 569 | mzs_digitized += spectrum_stats['mzs_digitized'] 570 | ints_min.append(spectrum_stats['ints_min']) 571 | ints_50p.append(spectrum_stats['ints_50p']) 572 | ints_95p.append(spectrum_stats['ints_95p']) 573 | ints_99p.append(spectrum_stats['ints_99p']) 574 | ints_max.append(spectrum_stats['ints_max']) 575 | ints_total.append(spectrum_stats['ints_total']) 576 | nonzero_intensity_peaks_count.append(spectrum_stats['nonzero_intensity_peaks_count']) 577 | total_peaks_count.append(spectrum_stats['total_peaks_count']) 578 | 579 | peaks_statistics.update({ 580 | 'mz_min': min(mzs_min), 581 | 'mz_max': max(mzs_max), 582 | 'mzs_min': np.array(mzs_min, dtype=np.float32), 583 | 'mzs_max': np.array(mzs_max, dtype=np.float32), 584 | 'mzs_digitized': mzs_digitized, 585 | 'ints_min': np.array(ints_min, dtype=np.float32), 586 | 'ints_50p': np.array(ints_50p, dtype=np.float32), 587 | 'ints_95p': np.array(ints_95p, dtype=np.float32), 588 | 'ints_99p': np.array(ints_99p, dtype=np.float32), 589 | 'ints_max': np.array(ints_max, dtype=np.float32), 590 | 'ints_total': np.array(ints_total, dtype=np.float32), 591 | 'nonzero_intensity_lengths': np.array(nonzero_intensity_peaks_count, dtype=np.int32), 592 | 'nonzero_peaks_percentage': 593 | round(sum(nonzero_intensity_peaks_count)/sum(total_peaks_count) * 100.0, 2), 594 | }) 595 | 596 | return peaks_statistics 597 | 598 | 599 | def getionimage(p, mz_value, tol=0.1, z=1, reduce_func=sum): 600 | """ 601 | Get an image representation of the intensity distribution 602 | of the ion with specified m/z value. 603 | 604 | By default, the intensity values within the tolerance region are summed. 605 | 606 | :param p: 607 | the ImzMLParser (or anything else with similar attributes) for the desired dataset 608 | :param mz_value: 609 | m/z value for which the ion image shall be returned 610 | :param tol: 611 | Absolute tolerance for the m/z value, such that all ions with values 612 | mz_value-|tol| <= x <= mz_value+|tol| are included. Defaults to 0.1 613 | :param z: 614 | z Value if spectrogram is 3-dimensional. 615 | :param reduce_func: 616 | the bahaviour for reducing the intensities between mz_value-|tol| and mz_value+|tol| to a single value. Must 617 | be a function that takes a sequence as input and outputs a number. By default, the values are summed. 618 | 619 | :return: 620 | numpy matrix with each element representing the ion intensity in this 621 | pixel. Can be easily plotted with matplotlib 622 | """ 623 | tol = abs(tol) 624 | im = np.zeros((p.imzmldict["max count of pixels y"], p.imzmldict["max count of pixels x"])) 625 | for i, (x, y, z_) in enumerate(p.coordinates): 626 | if z_ == 0: 627 | UserWarning("z coordinate = 0 present, if you're getting blank images set getionimage(.., .., z=0)") 628 | if z_ == z: 629 | mzs, ints = map(lambda x: np.asarray(x), p.getspectrum(i)) 630 | min_i, max_i = _bisect_spectrum(mzs, mz_value, tol) 631 | im[y - 1, x - 1] = reduce_func(ints[min_i:max_i+1]) 632 | return im 633 | 634 | 635 | def browse(p): 636 | """ 637 | Create a per-spectrum metadata browser for the parser. 638 | Usage:: 639 | 640 | # get a list of the instrument configurations used in the first pixel 641 | instrument_configurations = browse(p).for_spectrum(0).get_ids("instrumentConfiguration") 642 | 643 | Currently, ``instrumentConfiguration``, ``dataProcessing`` and ``referenceableParamGroup`` are supported. 644 | 645 | For browsing all spectra iteratively, you should by all means use **ascending** indices. Doing otherwise can result 646 | in quadratic runtime. The following example shows how to retrieve all unique instrumentConfigurations used:: 647 | 648 | browser = browse(p) 649 | all_config_ids = set() 650 | for i, _ in enumerate(p.coordinates): 651 | all_config_ids.update(browser.for_spectrum(i).get_ids("instrumentConfiguration")) 652 | 653 | This is a list of ids with which you can find the corresponding ```` tag in the xml tree. 654 | 655 | :param p: the parser 656 | :return: the browser 657 | """ 658 | return _ImzMLMetaDataBrowser(p.root, p.filename, p.sl) 659 | 660 | 661 | def _bisect_spectrum(mzs, mz_value, tol): 662 | ix_l, ix_u = bisect_left(mzs, mz_value - tol), bisect_right(mzs, mz_value + tol) - 1 663 | if ix_l == len(mzs): 664 | return len(mzs), len(mzs) 665 | if ix_u < 1: 666 | return 0, 0 667 | if ix_u == len(mzs): 668 | ix_u -= 1 669 | if mzs[ix_l] < (mz_value - tol): 670 | ix_l += 1 671 | if mzs[ix_u] > (mz_value + tol): 672 | ix_u -= 1 673 | return ix_l, ix_u 674 | 675 | 676 | class _ImzMLMetaDataBrowser(object): 677 | def __init__(self, root, fn, sl): 678 | self._root = root 679 | self._sl = sl 680 | self._fn = fn 681 | self._iter, self._previous, self._list_elem = None, None, None 682 | self.iterparse = choose_iterparse() 683 | 684 | def for_spectrum(self, i): 685 | if self._previous is None or i <= self._previous: 686 | self._iter = self.iterparse(self._fn, events=("start", "end")) 687 | for event, s in self._iter: 688 | if s.tag == self._sl + "spectrumList" and event == "start": 689 | self._list_elem = s 690 | elif s.tag == self._sl + "spectrum" and event == "end": 691 | self._list_elem.remove(s) 692 | if s.attrib["index"] == str(i): 693 | self._previous = i 694 | return _SpectrumMetaDataBrowser(self._root, self._sl, s) 695 | 696 | 697 | class _SpectrumMetaDataBrowser(object): 698 | def __init__(self, root, sl, spectrum): 699 | self._root = root 700 | self._sl = sl 701 | self._spectrum = spectrum 702 | 703 | def get_ids(self, element): 704 | param_methods = { 705 | param_group_elname: self._find_referenceable_param_groups, 706 | data_processing_elname: self._find_data_processing, 707 | instrument_confid_elname: self._find_instrument_configurations, 708 | } 709 | try: 710 | return param_methods[element]() 711 | except KeyError as e: 712 | raise ValueError("Unsupported element: " + str(element)) 713 | 714 | def _find_referenceable_param_groups(self): 715 | param_group_refs = self._spectrum.findall("%sreferenceableParamGroupRef" % self._sl) 716 | ids = map(lambda g: g.attrib["ref"], param_group_refs) 717 | return ids 718 | 719 | def _find_instrument_configurations(self): 720 | ids = None 721 | scan_list = self._spectrum.find("%sscanList" % self._sl) 722 | if scan_list: 723 | scans = scan_list.findall("%sscan[@instrumentConfigurationRef]" % self._sl) 724 | ids = map(lambda s: s.attrib["instrumentConfigurationRef"], scans) 725 | if not ids: 726 | run = self._root.find("%srun") 727 | try: 728 | return [run.attrib["defaultInstrumentConfigurationRef"]] 729 | except KeyError as _: 730 | return list() 731 | else: 732 | return ids 733 | 734 | def _find_data_processing(self): 735 | try: 736 | return self._spectrum.attrib["dataProcessingRef"] 737 | except KeyError as _: 738 | spectrum_list = self._root.find("%srun/%sspectrumList" % tuple(2 * [self._sl])) 739 | try: 740 | return [spectrum_list.attrib["defaultDataProcessingRef"]] 741 | except KeyError as _: 742 | return [] 743 | 744 | 745 | class PortableSpectrumReader(object): 746 | """ 747 | A pickle-able class for holding the minimal set of data required for reading, 748 | without holding any references to open files that wouldn't survive pickling. 749 | """ 750 | 751 | def __init__(self, coordinates, mzPrecision, mzOffsets, mzLengths, 752 | intensityPrecision, intensityOffsets, intensityLengths): 753 | self.coordinates = coordinates 754 | self.mzPrecision = mzPrecision 755 | self.mzOffsets = mzOffsets 756 | self.mzLengths = mzLengths 757 | self.intensityPrecision = intensityPrecision 758 | self.intensityOffsets = intensityOffsets 759 | self.intensityLengths = intensityLengths 760 | 761 | def read_spectrum_from_file(self, file, index): 762 | """ 763 | Reads the spectrum at specified index from the .ibd file. 764 | 765 | :param file: 766 | File or file-like object for the .ibd file 767 | :param index: 768 | Index of the desired spectrum in the .imzML file 769 | 770 | Output: 771 | 772 | mz_array: numpy.ndarray 773 | Sequence of m/z values representing the horizontal axis of the desired mass 774 | spectrum 775 | intensity_array: numpy.ndarray 776 | Sequence of intensity values corresponding to mz_array 777 | """ 778 | file.seek(self.mzOffsets[index]) 779 | mz_bytes = file.read(self.mzLengths[index] * SIZE_DICT[self.mzPrecision]) 780 | file.seek(self.intensityOffsets[index]) 781 | intensity_bytes = file.read(self.intensityLengths[index] * SIZE_DICT[self.intensityPrecision]) 782 | 783 | mz_array = np.frombuffer(mz_bytes, dtype=self.mzPrecision) 784 | intensity_array = np.frombuffer(intensity_bytes, dtype=self.intensityPrecision) 785 | 786 | return mz_array, intensity_array 787 | --------------------------------------------------------------------------------