├── .github
    └── workflows
    │   ├── build.yml
    │   └── docs.yml
├── .gitignore
├── .pre-commit-config.yaml
├── AUTHORS.md
├── CHANGELOG.md
├── CONTRIBUTING.md
├── CONTRIBUTORS.md
├── LICENSE
├── README.md
├── conftest.py
├── docs
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── _static
    │       └── .gitkeep
    │   ├── authors.md
    │   ├── changelog.md
    │   ├── conf.py
    │   ├── contributing.md
    │   ├── contributors.md
    │   ├── index.md
    │   ├── install.md
    │   ├── license.md
    │   └── usage.md
├── pyproject.toml
├── readimc
    ├── __init__.py
    ├── data
    │   ├── __init__.py
    │   ├── acquisition.py
    │   ├── panorama.py
    │   └── slide.py
    ├── imc_file.py
    ├── mcd_file.py
    ├── mcd_parser.py
    └── txt_file.py
├── requirements_devel.txt
├── requirements_docs.txt
├── requirements_test.txt
├── setup.cfg
└── tests
    ├── test_mcd_file.py
    ├── test_mcd_parser.py
    └── test_txt_file.py


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |     tags:
 7 |       - "v*"
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |       - develop
12 |   workflow_dispatch:
13 | jobs:
14 |   test:
15 |     name: ${{ matrix.platform }} py${{ matrix.python-version }}
16 |     runs-on: ${{ matrix.platform }}
17 |     strategy:
18 |       matrix:
19 |         platform:
20 |           - ubuntu-latest
21 |           - windows-latest
22 |           - macos-latest
23 |         python-version:
24 |           - '3.8'
25 |           - '3.9'
26 |           - '3.10'
27 |           - '3.11'
28 |     steps:
29 |       - uses: actions/checkout@v3
30 |         with:
31 |           fetch-depth: 0
32 |       - uses: actions/setup-python@v4
33 |         with:
34 |           python-version: ${{ matrix.python-version }}
35 |       - name: Run pytest with coverage
36 |         run: |
37 |           python -m pip install --upgrade pip
38 |           python -m pip install .
39 |           python -m pip install --upgrade -r requirements_test.txt
40 |           pytest
41 |       - uses: codecov/codecov-action@v3
42 |         with:
43 |           token: ${{ secrets.CODECOV_TOKEN }}
44 |           files: coverage.xml
45 |   deploy:
46 |     needs:
47 |       - test
48 |     if: startsWith(github.ref, 'refs/tags')
49 |     runs-on: ubuntu-latest
50 |     steps:
51 |       - uses: actions/checkout@v3
52 |         with:
53 |           fetch-depth: 0
54 |       - uses: actions/setup-python@v4
55 |         with:
56 |           python-version: '3.x'
57 |       - name: Build package
58 |         run: |
59 |           python -m pip install --upgrade pip build
60 |           python -m build
61 |       - uses: pypa/gh-action-pypi-publish@release/v1
62 |         with:
63 |           user: __token__
64 |           password: ${{ secrets.PYPI_API_TOKEN }}
65 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: docs
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |   workflow_dispatch:
 7 | jobs:
 8 |   docs:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v3
12 |         with:
13 |           fetch-depth: 0
14 |       - uses: actions/setup-python@v4
15 |         with:
16 |           python-version: '3.x'
17 |       - name: Build documentation
18 |         run: |
19 |           python -m pip install --upgrade pip
20 |           python -m pip install .
21 |           python -m pip install --upgrade -r requirements_docs.txt
22 |           cd docs && rm -rf build && make html && cd ..
23 |       - uses: peaceiris/actions-gh-pages@v3
24 |         with:
25 |           github_token: ${{ secrets.GITHUB_TOKEN }}
26 |           publish_dir: docs/build/html
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | /.vscode/
  2 | /data/
  3 | /readimc/_version.py
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | .pybuilder/
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # IPython
 86 | profile_default/
 87 | ipython_config.py
 88 | 
 89 | # pyenv
 90 | #   For a library or package, you might want to ignore these files since the code is
 91 | #   intended to run in multiple environments; otherwise, check them in:
 92 | # .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 | 
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/
137 | 
138 | # pytype static type analyzer
139 | .pytype/
140 | 
141 | # Cython debug symbols
142 | cython_debug/
143 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: ^(\.vscode/.*|docs/source/conf.py)$
 2 | repos:
 3 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 4 |     rev: v4.4.0
 5 |     hooks:
 6 |       - id: check-added-large-files
 7 |       - id: check-case-conflict
 8 |       - id: check-docstring-first
 9 |       - id: check-executables-have-shebangs
10 |       - id: check-merge-conflict
11 |       - id: check-shebang-scripts-are-executable
12 |       - id: check-toml
13 |       - id: check-yaml
14 |       - id: debug-statements
15 |       - id: end-of-file-fixer
16 |       - id: requirements-txt-fixer
17 |       - id: trailing-whitespace
18 |   - repo: https://github.com/astral-sh/ruff-pre-commit
19 |     rev: v0.0.282
20 |     hooks:
21 |       - id: ruff
22 |         args: [--fix, --exit-non-zero-on-fix]
23 |   - repo: https://github.com/psf/black
24 |     rev: '23.7.0'
25 |     hooks:
26 |       - id: black
27 |   - repo: https://github.com/pre-commit/mirrors-mypy
28 |     rev: v1.4.1
29 |     hooks:
30 |       - id: mypy
31 |         additional_dependencies: [types-requests, types-PyYAML]
32 | ci:
33 |   autoupdate_branch: develop
34 | 


--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
1 | # Authors
2 | 
3 | The `readimc` package was developed by [Jonas Windhager](mailto:jonas@windhager.io) based on existing functionality implemented in [imctools](https://github.com/BodenmillerGroup/imctools), which was originally created by Vito Zanotelli and Anton Rau.
4 | 
5 | It was maintained by Jonas Windhager until February 2023 and is currently maintained by [Milad Adibi](mailto:milad.adibi@uzh.ch).
6 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | All notable changes to this project will be documented in this file.
  4 | 
  5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
  6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
  7 | 
  8 | ## [0.8.0] - 2024-09-06
  9 | 
 10 | Added the option to return raw data for read_slide, read_panorama, read_before_ablation_image, read_after_ablation_image functions.
 11 | 
 12 | Fixed a bug where the reading of ablation image functions was returning an extra byte at the end of the DotNet Binary Serialization Record.
 13 | 
 14 | ## [0.7.0] - 2023-08-11
 15 | 
 16 | Implemented checks for overlapping raw data blocks in MCD file metadata [#6](https://github.com/BodenmillerGroup/readimc/issues/6)
 17 | 
 18 | Implemented lenient extraction of corrupted imaging data  [#19](https://github.com/BodenmillerGroup/readimc/pull/19)
 19 | 
 20 | ## [0.6.2] - 2023-01-31
 21 | 
 22 | Maintenance release
 23 | 
 24 | Made modules public
 25 | 
 26 | Renamed `MCDFile.metadata` to `MCDFile.schema_xml`
 27 | 
 28 | Toolchain (black, flake8, isort, mypy, pre-commit)
 29 | 
 30 | Updated GitHub Actions workflows
 31 | 
 32 | Small bugfixes and improvements
 33 | 
 34 | Python 3.11 compatibility
 35 | 
 36 | ## [0.6.1] - 2022-01-28
 37 | 
 38 | Rename `MCDXMLParser` to `MCDParser`
 39 | 
 40 | Refactor and simplify `MCDParser` usage
 41 | 
 42 | BREAKING CHANGES in `MCDFile`:
 43 | - Renamed `MCDFile.metadata_xml_str` to `MCDFile.metadata`
 44 | - Removed `MCDFile.metadata_xml` and `MCDFile.metadata_xmlns` (use `MCDParser` instead)
 45 | 
 46 | ## [0.6.0] - 2022-01-28
 47 | 
 48 | Expose `MCDXMLParser`
 49 | 
 50 | ## [0.5.0] - 2021-12-02
 51 | 
 52 | Refactored metadata accessors
 53 | 
 54 | Refactored accessors for ROI points/coordinates
 55 | 
 56 | Added link between acquisitions and associated panoramas
 57 | 
 58 | Renamed `IMCMcdFile` and `IMCTxtFile` to `MCDFile` and `TXTFile`
 59 | 
 60 | ## [0.4.2] - 2021-11-01
 61 | 
 62 | Use pandas for reading TXT files (better performance)
 63 | 
 64 | ## [0.4.1] - 2021-11-01
 65 | 
 66 | Added support for Python 3.10
 67 | 
 68 | ## [0.4.0] - 2021-10-19
 69 | 
 70 | Added support for older versions of the Fluidigm software
 71 | 
 72 | Use heuristics for determining acquisition start position
 73 | 
 74 | Add offline unit tests for data from (Damond et al., 2019)
 75 | 
 76 | Fix a numerical bug in determining panorama image dimensions
 77 | 
 78 | ## [0.3.1] - 2021-10-11
 79 | 
 80 | Renamed `IMCMCDFile` and `IMCTXTFile` to `IMCMcdFile` and `IMCTxtFile`, respectively
 81 | 
 82 | ## [0.3.0] - 2021-10-11
 83 | 
 84 | Retain meta-information after closing a file
 85 | 
 86 | Pre-compile regular expressions for faster parsing
 87 | 
 88 | Separately store and expose channel metals & masses; change channel name format from
 89 | `f"{metal}({mass})"` to `f"{metal}{mass}"` for backwards compatibility with imctools
 90 | 
 91 | ## [0.2.0] - 2021-10-09
 92 | 
 93 | Use dataclasses instead of NamedTuples
 94 | 
 95 | Renamed TXTFile and MCDFile to IMCTXTFile and IMCMCDFile, respectively
 96 | 
 97 | IMCTXTFile and IMCMCDFile now implement a shared IMCFileBase interface
 98 | 
 99 | IMCTXTFile and Acquisition now implement a shared AcquisitionBase interface
100 | 
101 | ## [0.1.2] - 2021-10-09
102 | 
103 | Explicit acquisition image reconstruction based on pixel indices
104 | 
105 | ## [0.1.1] - 2021-10-09
106 | 
107 | Minor documentation changes
108 | 
109 | ## [0.1.0] - 2021-10-09
110 | 
111 | Initial release
112 | [0.7.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.2...v0.7.0
113 | [0.6.2]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.1...v0.6.2
114 | [0.6.1]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.0...v0.6.1
115 | [0.6.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.5.0...v0.6.0
116 | [0.5.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.4.2...v0.5.0
117 | [0.4.2]: https://github.com/BodenmillerGroup/readimc/compare/v0.4.1...v0.4.2
118 | [0.4.1]: https://github.com/BodenmillerGroup/readimc/compare/v0.4.0...v0.4.1
119 | [0.4.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.3.1...v0.4.0
120 | [0.3.1]: https://github.com/BodenmillerGroup/readimc/compare/v0.3.0...v0.3.1
121 | [0.3.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.2.0...v0.3.0
122 | [0.2.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.1.2...v0.2.0
123 | [0.1.2]: https://github.com/BodenmillerGroup/readimc/compare/v0.1.1...v0.1.2
124 | [0.1.1]: https://github.com/BodenmillerGroup/readimc/compare/v0.1.0...v0.1.1
125 | [0.1.0]: https://github.com/BodenmillerGroup/readimc/releases/tag/v0.1.0
126 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 | 
3 | Pull requests are welcome. Please make sure to update tests and documentation as
4 | appropriate.
5 | 
6 | For major changes, please open an issue first to discuss what you would like to change.
7 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
1 | # Contributors
2 | 
3 | Gonzalo Peña-Castellanos [@goanpeca](https://github.com/goanpeca)
4 | - maintenance of the `readimc` conda-forge recipe
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 University of Zurich
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # readimc
 2 | 
 3 | <a href="#"><img alt="Python" src="https://img.shields.io/pypi/pyversions/readimc" /></a>
 4 | <a href="https://github.com/BodenmillerGroup/readimc/actions/workflows/build.yml" alt="Build"><img alt="Build" src="https://img.shields.io/github/actions/workflow/status/BodenmillerGroup/readimc/build.yml?label=build" /></a>
 5 | <a href="https://pypi.org/project/readimc" alt="PyPI"><img alt="PyPI" src="https://img.shields.io/pypi/v/readimc" /></a>
 6 | <a href="#"><img alt="Coverage" src="https://img.shields.io/codecov/c/github/BodenmillerGroup/readimc" /></a>
 7 | <a href="https://bodenmillergroup.github.io/readimc" alt="Documentation"><img alt="Documentation" src="https://img.shields.io/github/actions/workflow/status/BodenmillerGroup/readimc/docs.yml?label=docs" /></a>
 8 | <a href="https://github.com/BodenmillerGroup/readimc/issues" alt="Issues"><img alt="Issues" src="https://img.shields.io/github/issues/BodenmillerGroup/readimc" /></a>
 9 | <a href="https://github.com/BodenmillerGroup/readimc/pulls" alt="Pull requests"><img alt="Pull requests" src="https://img.shields.io/github/issues-pr/BodenmillerGroup/readimc" /></a>
10 | <a href="https://github.com/BodenmillerGroup/readimc/blob/main/LICENSE" alt="License"><img alt="License" src="https://img.shields.io/github/license/BodenmillerGroup/readimc" /></a>
11 | 
12 | Python package for reading imaging mass cytometry (IMC) files
13 | 
14 | Documentation is available at https://bodenmillergroup.github.io/readimc
15 | 
16 | ## Citation
17 | 
18 | Please cite the following paper when using `readimc` in your work:
19 | 
20 | >  Windhager, J., Zanotelli, V.R.T., Schulz, D. et al. An end-to-end workflow for multiplexed image processing and analysis. Nat Protoc (2023). https://doi.org/10.1038/s41596-023-00881-0
21 | 
22 |     @article{Windhager2023,
23 |         author = {Windhager, Jonas and Zanotelli, Vito R.T. and Schulz, Daniel and Meyer, Lasse and Daniel, Michelle and Bodenmiller, Bernd and Eling, Nils},
24 |         title = {An end-to-end workflow for multiplexed image processing and analysis},
25 |         year = {2023},
26 |         doi = {10.1038/s41596-023-00881-0},
27 |         URL = {https://www.nature.com/articles/s41596-023-00881-0},
28 |         journal = {Nature Protocols}
29 |     }
30 | 


--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | from pathlib import Path
 3 | from typing import Generator
 4 | 
 5 | import pytest
 6 | import requests
 7 | 
 8 | from readimc import MCDFile, TXTFile
 9 | 
10 | _imc_test_data_asset_url = (
11 |     "https://github.com/BodenmillerGroup/TestData"
12 |     "/releases/download/v1.0.7/210308_ImcTestData_raw.tar.gz"
13 | )
14 | _imc_test_data_raw_dir = "datasets/210308_ImcTestData/raw"
15 | _imc_test_data_mcd_file = "20210305_NE_mockData1/20210305_NE_mockData1.mcd"
16 | _imc_test_data_txt_file = "20210305_NE_mockData1/20210305_NE_mockData1_ROI_001_1.txt"
17 | 
18 | 
19 | def _download_and_extract_asset(tmp_dir_path: Path, asset_url: str):
20 |     asset_file_path = tmp_dir_path / "asset.tar.gz"
21 |     response = requests.get(asset_url, stream=True)
22 |     if response.status_code == 200:
23 |         with asset_file_path.open(mode="wb") as f:
24 |             f.write(response.raw.read())
25 |     shutil.unpack_archive(asset_file_path, tmp_dir_path)
26 | 
27 | 
28 | @pytest.fixture(scope="session")
29 | def imc_test_data_raw_path(tmp_path_factory) -> Generator[Path, None, None]:
30 |     tmp_dir_path = tmp_path_factory.mktemp("raw")
31 |     _download_and_extract_asset(tmp_dir_path, _imc_test_data_asset_url)
32 |     yield tmp_dir_path / Path(_imc_test_data_raw_dir)
33 |     shutil.rmtree(tmp_dir_path)
34 | 
35 | 
36 | @pytest.fixture
37 | def imc_test_data_mcd_file(
38 |     imc_test_data_raw_path: Path,
39 | ) -> Generator[MCDFile, None, None]:
40 |     path = imc_test_data_raw_path / Path(_imc_test_data_mcd_file)
41 |     with MCDFile(path) as f:
42 |         yield f
43 | 
44 | 
45 | @pytest.fixture
46 | def imc_test_data_txt_file(
47 |     imc_test_data_raw_path: Path,
48 | ) -> Generator[TXTFile, None, None]:
49 |     path = imc_test_data_raw_path / Path(_imc_test_data_txt_file)
50 |     with TXTFile(path) as f:
51 |         yield f
52 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.https://www.sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/_static/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/readimc/5eb3712cbe51ad8e2c7f1da7f309956b7bca007a/docs/source/_static/.gitkeep


--------------------------------------------------------------------------------
/docs/source/authors.md:
--------------------------------------------------------------------------------
1 | ```{include} ../../AUTHORS.md
2 | ```
3 | 


--------------------------------------------------------------------------------
/docs/source/changelog.md:
--------------------------------------------------------------------------------
1 | ```{include} ../../CHANGELOG.md
2 | ```
3 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('../..'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = "readimc"
21 | copyright = "2021, University of Zurich"
22 | author = "Jonas Windhager, Milad Adibi"
23 | 
24 | 
25 | # -- General configuration ---------------------------------------------------
26 | 
27 | # Add any Sphinx extension module names here, as strings. They can be
28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
29 | # ones.
30 | extensions = [
31 |     "autoapi.extension",
32 |     "myst_parser",
33 |     "sphinx.ext.autodoc",
34 | ]
35 | 
36 | # Add any paths that contain templates here, relative to this directory.
37 | templates_path = ["_templates"]
38 | 
39 | # List of patterns, relative to source directory, that match files and
40 | # directories to ignore when looking for source files.
41 | # This pattern also affects html_static_path and html_extra_path.
42 | exclude_patterns = []
43 | 
44 | 
45 | # -- Options for HTML output -------------------------------------------------
46 | 
47 | # The theme to use for HTML and HTML Help pages.  See the documentation for
48 | # a list of builtin themes.
49 | #
50 | html_theme = "sphinx_rtd_theme"
51 | 
52 | # Add any paths that contain custom static files (such as style sheets) here,
53 | # relative to this directory. They are copied after the builtin static files,
54 | # so a file named "default.css" will overwrite the builtin "default.css".
55 | html_static_path = ["_static"]
56 | 
57 | autoapi_type = "python"
58 | autoapi_dirs = ["../../readimc"]
59 | autoapi_add_toctree_entry = False
60 | autoapi_python_class_content = "init"
61 | autoapi_options = [
62 |     "show-module-summary",
63 |     "imported-members",
64 |     "members",
65 |     "undoc-members",
66 |     "inherited-members",
67 |     "show-inheritance",
68 | ]
69 | autodoc_typehints = "description"
70 | 


--------------------------------------------------------------------------------
/docs/source/contributing.md:
--------------------------------------------------------------------------------
1 | ```{include} ../../CONTRIBUTING.md
2 | ```
3 | 


--------------------------------------------------------------------------------
/docs/source/contributors.md:
--------------------------------------------------------------------------------
1 | ```{include} ../../CONTRIBUTORS.md
2 | ```
3 | 


--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
 1 | # Welcome
 2 | 
 3 | `readimc` is a Python package for reading imaging mass cytometry (IMC) files
 4 | 
 5 | ## Supported file types
 6 | 
 7 |   - IMC .mcd files
 8 |   - IMC .txt files
 9 | 
10 | ## Extractable image data
11 | 
12 |   - Slide images (user-uploaded)
13 |   - Panorama images (single-channel, color)
14 |   - IMC acquisitions (multi-channel, grayscale)
15 |   - Before/after-ablation images (single-channel, color)
16 | 
17 | ```{toctree}
18 | :hidden:
19 | :caption: Contents
20 | 
21 | install
22 | usage
23 | license
24 | changelog
25 | ```
26 | 
27 | ```{toctree}
28 | :hidden:
29 | :caption: Development
30 | 
31 | authors
32 | contributors
33 | contributing
34 | ```
35 | 
36 | ```{toctree}
37 | :hidden:
38 | :caption: API documentation
39 | 
40 | autoapi/readimc/index
41 | ```
42 | 


--------------------------------------------------------------------------------
/docs/source/install.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | You can install `readimc` from [PyPI](https://pypi.org) using
 4 | [pip](https://pypi.org/project/pip):
 5 | 
 6 |     pip install readimc
 7 | 
 8 | Alternatively, you can install `readimc` from [conda-forge](https://conda-forge.org) using [conda](https://conda.io):
 9 | 
10 |     conda install -c conda-forge readimc
11 | 


--------------------------------------------------------------------------------
/docs/source/license.md:
--------------------------------------------------------------------------------
1 | # License
2 | 
3 | ```{include} ../../LICENSE
4 | ```
5 | 


--------------------------------------------------------------------------------
/docs/source/usage.md:
--------------------------------------------------------------------------------
  1 | # Usage
  2 | 
  3 | The `readimc` package exports two classes for reading IMC .mcd and IMC .txt files:
  4 | 
  5 | ```python
  6 | from readimc import MCDFile, TXTFile
  7 | ```
  8 | 
  9 | ## Loading IMC .txt files
 10 | 
 11 | IMC .txt files can be loaded as follows:
 12 | 
 13 | ```python
 14 | with TXTFile("/path/to/file.txt") as f:
 15 |     print(f.channel_names)  # metals
 16 |     print(f.channel_labels)  # targets
 17 | ```
 18 | 
 19 | ### Reading IMC acquisitions
 20 | 
 21 | The acquisition contained in an IMC .txt file can be read as follows:
 22 | 
 23 | ```python
 24 | with TXTFile("/path/to/file.txt") as f:
 25 |     img = f.read_acquisition()  # numpy array, shape: (c, y, x), dtype: float32
 26 | ```
 27 | 
 28 | ```{note}
 29 | IMC .txt files only contain a single IMC acquisition.
 30 | ```
 31 | 
 32 | ## Loading IMC .mcd files
 33 | 
 34 | IMC .mcd files can be loaded as follows:
 35 | 
 36 | ```python
 37 | with MCDFile("/path/to/file.mcd") as f:
 38 |     num_slides = len(f.slides)
 39 | ```
 40 | 
 41 | ```{note}
 42 | Although uncommon, a single IMC .mcd file can contain multiple slides. Each slide can
 43 | have zero or more panorama images and zero or more IMC acquisitions.
 44 | ```
 45 | 
 46 | ### Extracting metadata
 47 | 
 48 | Basic metadata on slides, panoramas and acquisitions can be accessed through properties:
 49 | 
 50 | ```python
 51 | with MCDFile("/path/to/file.mcd") as f:
 52 |     # first slide
 53 |     slide = f.slides[0]
 54 |     print(
 55 |         slide.id,
 56 |         slide.description,
 57 |         slide.width_um,
 58 |         slide.height_um,
 59 |     )
 60 |     # first panorama of first slide
 61 |     panorama = slide.panoramas[0]
 62 |     print(
 63 |         panorama.id,
 64 |         panorama.description,
 65 |         panorama.width_um,
 66 |         panorama.height_um,
 67 |     )
 68 |     # first acquisition of first slide
 69 |     acquisition = slide.acquisitions[0]
 70 |     print(
 71 |         acquisition.id,
 72 |         acquisition.description,
 73 |         acquisition.width_um,
 74 |         acquisition.height_um,
 75 |         acquisition.channel_names,  # metals
 76 |         acquisition.channel_labels,  # targets
 77 |     )
 78 | ```
 79 | 
 80 | For a full list of available properties, please consult the API documentation of the
 81 | `Slide`, `Panorama` and `Acquisition` classes (additional metadata is available through
 82 | their `metadata` properties). The complete metadata embedded in IMC .mcd files is
 83 | accessible through `MCDFile.schema_xml` (in proprietary XML format).
 84 | 
 85 | ### Reading slide images
 86 | 
 87 | IMC .mcd files can store slide images uploaded by the user (e.g., photographs) or
 88 | acquired by the instrument. For
 89 | [supported image file formats](https://imageio.readthedocs.io/en/stable/formats.html),
 90 | these images can be read as follows:
 91 | 
 92 | ```python
 93 | with MCDFile("/path/to/file.mcd") as f:
 94 |     slide = f.slides[0]  # first slide
 95 |     img = f.read_slide(slide)  # numpy array or None
 96 | ```
 97 | 
 98 | ### Reading panorama images
 99 | 
100 | IMC .mcd files can contain zero or more panorama images acquired by the instrument,
101 | which can be read as follows:
102 | 
103 | ```python
104 | with MCDFile("/path/to/file.mcd") as f:
105 |     panorama = f.slides[0].panoramas[0]  # first panorama of first slide
106 |     img = f.read_panorama(panorama)  # numpy array
107 | ```
108 | 
109 | ```{note}
110 | `Slide.panoramas` only exposes panoramas for which panorama images are available. The
111 | raw metadata accessible through `MCDFile.schema_xml` may contain additional panorama
112 | entries of type `"Default"` that represent "virtual" panoramas and do not correspond
113 | to actual images.
114 | ```
115 | 
116 | ### Reading IMC acquisitions
117 | 
118 | IMC .mcd files can contain zero or more IMC acquisitions, which can be read as follows:
119 | 
120 | ```python
121 | with MCDFile("/path/to/file.mcd") as f:
122 |     acquisition = f.slides[0].acquisitions[0]  # first acquisition of first slide
123 |     img = f.read_acquisition(acquisition)  # array, shape: (c, y, x), dtype: float32
124 | ```
125 | 
126 | ### Reading before/after-ablation images
127 | 
128 | The IMC instrument may be configured to acquire an optical image before/after each IMC
129 | acquisition. If available, these before/after-ablation images can be read as follows:
130 | 
131 | ```python
132 | with MCDFile("/path/to/file.mcd") as f:
133 |     acquisition = f.slides[0].acquisitions[0]  # first acquisition of first slide
134 |     before_ablation_img = f.read_before_ablation_image(acquisition)  # array or None
135 |     after_ablation_img = f.read_after_ablation_image(acquisition)  # array or None
136 | ```
137 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=64", "wheel", "setuptools_scm[toml]>=6.2"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.pytest.ini_options]
 6 | addopts = ["--cov=readimc", "--cov-report=xml:coverage.xml"]
 7 | testpaths = ["tests"]
 8 | 
 9 | [tool.ruff]
10 | select = ["E", "F", "I"]
11 | 
12 | [tool.setuptools_scm]
13 | write_to = "readimc/_version.py"
14 | 


--------------------------------------------------------------------------------
/readimc/__init__.py:
--------------------------------------------------------------------------------
 1 | """Python package for reading imaging mass cytometry (IMC) files"""
 2 | 
 3 | from .imc_file import IMCFile
 4 | from .mcd_file import MCDFile
 5 | from .mcd_parser import MCDParser, MCDParserError
 6 | from .txt_file import TXTFile
 7 | 
 8 | __all__ = [
 9 |     "IMCFile",
10 |     "TXTFile",
11 |     "MCDFile",
12 |     "MCDParser",
13 |     "MCDParserError",
14 | ]
15 | 


--------------------------------------------------------------------------------
/readimc/data/__init__.py:
--------------------------------------------------------------------------------
1 | """Imaging mass cytometry (IMC) metadata classes"""
2 | 
3 | from readimc.data.acquisition import Acquisition, AcquisitionBase
4 | from readimc.data.panorama import Panorama
5 | from readimc.data.slide import Slide
6 | 
7 | __all__ = ["Slide", "Panorama", "Acquisition", "AcquisitionBase"]
8 | 


--------------------------------------------------------------------------------
/readimc/data/acquisition.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from abc import ABC, abstractmethod
  3 | from dataclasses import dataclass, field
  4 | from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple
  5 | 
  6 | import numpy as np
  7 | 
  8 | if TYPE_CHECKING:
  9 |     from readimc.data.panorama import Panorama
 10 |     from readimc.data.slide import Slide
 11 | 
 12 | 
 13 | class AcquisitionBase(ABC):
 14 |     """Shared IMC acquisition metadata interface"""
 15 | 
 16 |     @property
 17 |     @abstractmethod
 18 |     def num_channels(self) -> int:
 19 |         """Number of channels"""
 20 |         raise NotImplementedError()
 21 | 
 22 |     @property
 23 |     @abstractmethod
 24 |     def channel_metals(self) -> Sequence[str]:
 25 |         """Symbols of metal isotopes (e.g. ``["Ag", "Ir"]``)"""
 26 |         raise NotImplementedError()
 27 | 
 28 |     @property
 29 |     @abstractmethod
 30 |     def channel_masses(self) -> Sequence[int]:
 31 |         """Atomic masses of metal isotopes (e.g. ``[107, 191]``)"""
 32 |         raise NotImplementedError()
 33 | 
 34 |     @property
 35 |     @abstractmethod
 36 |     def channel_labels(self) -> Sequence[str]:
 37 |         """Channel labels (user-provided)"""
 38 |         raise NotImplementedError()
 39 | 
 40 |     @property
 41 |     def channel_names(self) -> Sequence[str]:
 42 |         """Unique channel names in the format ``f"{metal}{mass}"`` (e.g.
 43 |         ``["Ag107", "Ir191"]``)"""
 44 |         return [
 45 |             f"{channel_metal}{channel_mass}"
 46 |             for channel_metal, channel_mass in zip(
 47 |                 self.channel_metals, self.channel_masses
 48 |             )
 49 |         ]
 50 | 
 51 | 
 52 | @dataclass
 53 | class Acquisition(AcquisitionBase):
 54 |     """IMC acquisition metadata"""
 55 | 
 56 |     slide: "Slide"
 57 |     """Parent slide"""
 58 | 
 59 |     panorama: Optional["Panorama"]
 60 |     """Associated panorama"""
 61 | 
 62 |     id: int
 63 |     """Acquisition ID"""
 64 | 
 65 |     roi_points_um: Optional[
 66 |         Tuple[
 67 |             Tuple[float, float],
 68 |             Tuple[float, float],
 69 |             Tuple[float, float],
 70 |             Tuple[float, float],
 71 |         ]
 72 |     ]
 73 |     """User-provided ROI points, in micrometers
 74 | 
 75 |     Order: (top left, top right, bottom right, bottom left)"""
 76 | 
 77 |     metadata: Dict[str, str]
 78 |     """Full acquisition metadata"""
 79 | 
 80 |     _num_channels: int
 81 |     _channel_metals: List[str] = field(default_factory=list)
 82 |     _channel_masses: List[int] = field(default_factory=list)
 83 |     _channel_labels: List[str] = field(default_factory=list)
 84 | 
 85 |     @property
 86 |     def description(self) -> Optional[str]:
 87 |         """User-provided acquisition description"""
 88 |         return self.metadata.get("Description")
 89 | 
 90 |     @property
 91 |     def width_px(self) -> Optional[int]:
 92 |         """Acquisition width, in pixels"""
 93 |         value = self.metadata.get("MaxX")
 94 |         if value is not None:
 95 |             return int(value)
 96 |         return None
 97 | 
 98 |     @property
 99 |     def height_px(self) -> Optional[int]:
100 |         """Acquisition height, in pixels"""
101 |         value = self.metadata.get("MaxY")
102 |         if value is not None:
103 |             return int(value)
104 |         return None
105 | 
106 |     @property
107 |     def pixel_size_x_um(self) -> Optional[float]:
108 |         """Width of a single pixel, in micrometers"""
109 |         value = self.metadata.get("AblationDistanceBetweenShotsX")
110 |         if value is not None:
111 |             return float(value)
112 |         return None
113 | 
114 |     @property
115 |     def pixel_size_y_um(self) -> Optional[float]:
116 |         """Height of a single pixel, in micrometers"""
117 |         value = self.metadata.get("AblationDistanceBetweenShotsY")
118 |         if value is not None:
119 |             return float(value)
120 |         return None
121 | 
122 |     @property
123 |     def width_um(self) -> Optional[float]:
124 |         """Acquisition width, in micrometers"""
125 |         if self.width_px is not None and self.pixel_size_x_um is not None:
126 |             return self.width_px * self.pixel_size_x_um
127 |         return None
128 | 
129 |     @property
130 |     def height_um(self) -> Optional[float]:
131 |         """Acquisition height, in micrometers"""
132 |         if self.height_px is not None and self.pixel_size_y_um is not None:
133 |             return self.height_px * self.pixel_size_y_um
134 |         return None
135 | 
136 |     @property
137 |     def num_channels(self) -> int:
138 |         return self._num_channels
139 | 
140 |     @property
141 |     def channel_metals(self) -> Sequence[str]:
142 |         return self._channel_metals
143 | 
144 |     @property
145 |     def channel_masses(self) -> Sequence[int]:
146 |         return self._channel_masses
147 | 
148 |     @property
149 |     def channel_labels(self) -> Sequence[str]:
150 |         return self._channel_labels
151 | 
152 |     @property
153 |     def roi_coords_um(
154 |         self,
155 |     ) -> Optional[
156 |         Tuple[
157 |             Tuple[float, float],
158 |             Tuple[float, float],
159 |             Tuple[float, float],
160 |             Tuple[float, float],
161 |         ]
162 |     ]:
163 |         """ROI stage coordinates, in micrometers
164 | 
165 |         Order: (top left, top right, bottom right, bottom left)"""
166 |         x1_str = self.metadata.get("ROIStartXPosUm")
167 |         y1_str = self.metadata.get("ROIStartYPosUm")
168 |         x3_str = self.metadata.get("ROIEndXPosUm")
169 |         y3_str = self.metadata.get("ROIEndYPosUm")
170 |         if (
171 |             x1_str != x3_str
172 |             and y1_str != y3_str
173 |             and x1_str is not None
174 |             and y1_str is not None
175 |             and x3_str is not None
176 |             and y3_str is not None
177 |             and self.width_um is not None
178 |             and self.height_um is not None
179 |         ):
180 |             x1, y1 = float(x1_str), float(y1_str)
181 |             x3, y3 = float(x3_str), float(y3_str)
182 |             # fix Fluidigm bug, where start positions are multiplied by 1000
183 |             if abs(x1 / 1000.0 - x3) < abs(x1 - x3):
184 |                 x1 /= 1000.0
185 |             if abs(y1 / 1000.0 - y3) < abs(y1 - y3):
186 |                 y1 /= 1000.0
187 |             # calculate counter-clockwise rotation angle, in radians
188 |             rotated_main_diag_angle = np.arctan2(y1 - y3, x1 - x3)
189 |             main_diag_angle = np.arctan2(self.height_um, -self.width_um)
190 |             angle = rotated_main_diag_angle - main_diag_angle
191 |             # calculate missing points (generative approach)
192 |             x2, y2 = self.width_um / 2.0, self.height_um / 2.0
193 |             x4, y4 = -self.width_um / 2.0, -self.height_um / 2.0
194 |             x2, y2 = (
195 |                 math.cos(angle) * x2 - math.sin(angle) * y2 + (x1 + x3) / 2.0,
196 |                 math.sin(angle) * x2 + math.cos(angle) * y2 + (y1 + y3) / 2.0,
197 |             )
198 |             x4, y4 = (
199 |                 math.cos(angle) * x4 - math.sin(angle) * y4 + (x1 + x3) / 2.0,
200 |                 math.sin(angle) * x4 + math.cos(angle) * y4 + (y1 + y3) / 2.0,
201 |             )
202 |             return ((x1, y1), (x2, y2), (x3, y3), (x4, y4))
203 |         return None
204 | 


--------------------------------------------------------------------------------
/readimc/data/panorama.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
 3 | 
 4 | from readimc.data.acquisition import Acquisition
 5 | 
 6 | if TYPE_CHECKING:
 7 |     from readimc.data.slide import Slide
 8 | 
 9 | 
10 | @dataclass
11 | class Panorama:
12 |     """Panorama metadata (only for panoramas with panorama image data)"""
13 | 
14 |     slide: "Slide"
15 |     """Parent slide"""
16 | 
17 |     id: int
18 |     """Panorama ID"""
19 | 
20 |     metadata: Dict[str, str]
21 |     """Full panorama metadata"""
22 | 
23 |     acquisitions: List[Acquisition] = field(default_factory=list)
24 |     """List of acquisitions associated with this panorama"""
25 | 
26 |     @property
27 |     def description(self) -> Optional[str]:
28 |         """User-provided panorama description"""
29 |         return self.metadata.get("Description")
30 | 
31 |     @property
32 |     def width_um(self) -> Optional[float]:
33 |         """Panorama width, in micrometers"""
34 |         if self.points_um is not None:
35 |             (x1, y1), (x2, y2), (x3, y3), (x4, y4) = self.points_um
36 |             w1 = ((x1 - x2) ** 2.0 + (y1 - y2) ** 2.0) ** 0.5
37 |             w2 = ((x3 - x4) ** 2.0 + (y3 - y4) ** 2.0) ** 0.5
38 |             if abs(w1 - w2) > 0.001:
39 |                 raise ValueError(f"Panorama {self.id}: inconsistent image widths")
40 |             return (w1 + w2) / 2.0
41 |         return None
42 | 
43 |     @property
44 |     def height_um(self) -> Optional[float]:
45 |         """Panorama height, in micrometers"""
46 |         if self.points_um is not None:
47 |             (x1, y1), (x2, y2), (x3, y3), (x4, y4) = self.points_um
48 |             h1 = ((x1 - x4) ** 2.0 + (y1 - y4) ** 2.0) ** 0.5
49 |             h2 = ((x2 - x3) ** 2.0 + (y2 - y3) ** 2.0) ** 0.5
50 |             if abs(h1 - h2) > 0.001:
51 |                 raise ValueError(f"Panorama {self.id}: inconsistent image heights")
52 |             return (h1 + h2) / 2.0
53 |         return None
54 | 
55 |     @property
56 |     def points_um(
57 |         self,
58 |     ) -> Optional[
59 |         Tuple[
60 |             Tuple[float, float],
61 |             Tuple[float, float],
62 |             Tuple[float, float],
63 |             Tuple[float, float],
64 |         ]
65 |     ]:
66 |         """User-provided ROI points, in micrometers
67 | 
68 |         Order: (top left, top right, bottom right, bottom left)"""
69 |         x1_str = self.metadata.get("SlideX1PosUm")
70 |         y1_str = self.metadata.get("SlideY1PosUm")
71 |         x2_str = self.metadata.get("SlideX2PosUm")
72 |         y2_str = self.metadata.get("SlideY2PosUm")
73 |         x3_str = self.metadata.get("SlideX3PosUm")
74 |         y3_str = self.metadata.get("SlideY3PosUm")
75 |         x4_str = self.metadata.get("SlideX4PosUm")
76 |         y4_str = self.metadata.get("SlideY4PosUm")
77 |         if (
78 |             x1_str is not None
79 |             and y1_str is not None
80 |             and x2_str is not None
81 |             and y2_str is not None
82 |             and x3_str is not None
83 |             and y3_str is not None
84 |             and x4_str is not None
85 |             and y4_str is not None
86 |         ):
87 |             return (
88 |                 (float(x1_str), float(y1_str)),
89 |                 (float(x2_str), float(y2_str)),
90 |                 (float(x3_str), float(y3_str)),
91 |                 (float(x4_str), float(y4_str)),
92 |             )
93 |         return None
94 | 


--------------------------------------------------------------------------------
/readimc/data/slide.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import Dict, List, Optional
 3 | 
 4 | from readimc.data.acquisition import Acquisition
 5 | from readimc.data.panorama import Panorama
 6 | 
 7 | 
 8 | @dataclass
 9 | class Slide:
10 |     """Slide metadata"""
11 | 
12 |     id: int
13 |     """Slide ID"""
14 | 
15 |     metadata: Dict[str, str]
16 |     """Full slide metadata"""
17 | 
18 |     panoramas: List[Panorama] = field(default_factory=list)
19 |     """List of panoramas associated with this slide"""
20 | 
21 |     acquisitions: List[Acquisition] = field(default_factory=list)
22 |     """List of acquisitions associated with this slide"""
23 | 
24 |     @property
25 |     def description(self) -> Optional[str]:
26 |         """User-provided slide description"""
27 |         return self.metadata.get("Description")
28 | 
29 |     @property
30 |     def width_um(self) -> Optional[float]:
31 |         """Slide width, in micrometers"""
32 |         value = self.metadata.get("WidthUm")
33 |         if value is not None:
34 |             return float(value)
35 |         return None
36 | 
37 |     @property
38 |     def height_um(self) -> Optional[float]:
39 |         """Slide height, in micrometers"""
40 |         value = self.metadata.get("HeightUm")
41 |         if value is not None:
42 |             return float(value)
43 |         return None
44 | 


--------------------------------------------------------------------------------
/readimc/imc_file.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from os import PathLike
 3 | from pathlib import Path
 4 | from typing import Optional, Union
 5 | 
 6 | import numpy as np
 7 | 
 8 | from .data import Acquisition
 9 | 
10 | 
11 | class IMCFile(ABC):
12 |     """Shared IMC file interface"""
13 | 
14 |     def __init__(self, path: Union[str, PathLike]) -> None:
15 |         super().__init__()
16 |         self._path = Path(path)
17 | 
18 |     @property
19 |     def path(self) -> Path:
20 |         """Path to the IMC file"""
21 |         return self._path
22 | 
23 |     @abstractmethod
24 |     def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndarray:
25 |         """Reads IMC acquisition data as numpy array.
26 | 
27 |         :param acquisition: the acquisition to read
28 |         :return: the acquisition data as 32-bit floating point array,
29 |             shape: (c, y, x)
30 |         """
31 |         raise NotImplementedError()
32 | 


--------------------------------------------------------------------------------
/readimc/mcd_file.py:
--------------------------------------------------------------------------------
  1 | import mmap
  2 | from os import PathLike
  3 | from typing import BinaryIO, List, Optional, Sequence, Union
  4 | from warnings import warn
  5 | 
  6 | import numpy as np
  7 | from imageio.v2 import imread
  8 | 
  9 | from .data import Acquisition, Panorama, Slide
 10 | from .imc_file import IMCFile
 11 | from .mcd_parser import MCDParser, MCDParserError
 12 | 
 13 | 
 14 | class MCDFile(IMCFile):
 15 |     def __init__(self, path: Union[str, PathLike]) -> None:
 16 |         """A class for reading IMC .mcd files
 17 | 
 18 |         :param path: path to the IMC .mcd file
 19 |         """
 20 |         super(MCDFile, self).__init__(path)
 21 |         self._fh: Optional[BinaryIO] = None
 22 |         self._schema_xml: Optional[str] = None
 23 |         self._slides: Optional[List[Slide]] = None
 24 | 
 25 |     @property
 26 |     def schema_xml(self) -> str:
 27 |         """Full metadata in proprietary XML format"""
 28 |         if self._schema_xml is None:
 29 |             raise IOError(f"MCD file '{self.path.name}' has not been opened")
 30 |         return self._schema_xml
 31 | 
 32 |     @property
 33 |     def metadata(self) -> str:
 34 |         """Legacy accessor for `schema_xml`"""
 35 |         warn(
 36 |             "`MCDFile.metadata` will be removed in future readimc releases; "
 37 |             "use `MCDFile.schema_xml` instead"
 38 |         )
 39 |         return self.schema_xml
 40 | 
 41 |     @property
 42 |     def slides(self) -> Sequence[Slide]:
 43 |         """Metadata on slides contained in this IMC .mcd file"""
 44 |         if self._slides is None:
 45 |             raise IOError(f"MCD file '{self.path.name}' has not been opened")
 46 |         return self._slides
 47 | 
 48 |     def __enter__(self) -> "MCDFile":
 49 |         self.open()
 50 |         return self
 51 | 
 52 |     def __exit__(self, exc_type, exc_value, traceback) -> None:
 53 |         self.close()
 54 | 
 55 |     def open(self) -> None:
 56 |         """Opens the IMC .mcd file for reading.
 57 | 
 58 |         It is good practice to use context managers whenever possible:
 59 | 
 60 |         .. code-block:: python
 61 | 
 62 |             with MCDFile("/path/to/file.mcd") as f:
 63 |                 pass
 64 | 
 65 |         """
 66 |         if self._fh is not None:
 67 |             self._fh.close()
 68 |         self._fh = open(self._path, mode="rb")
 69 |         self._schema_xml = self._read_schema_xml()
 70 |         try:
 71 |             self._slides = MCDParser(self._schema_xml).parse_slides()
 72 |         except MCDParserError as e:
 73 |             raise IOError(
 74 |                 f"MCD file '{self.path.name}' corrupted: "
 75 |                 "error parsing slide information from MCD-XML"
 76 |             ) from e
 77 | 
 78 |     def close(self) -> None:
 79 |         """Closes the IMC .mcd file.
 80 | 
 81 |         It is good practice to use context managers whenever possible:
 82 | 
 83 |         .. code-block:: python
 84 | 
 85 |             with MCDFile("/path/to/file.mcd") as f:
 86 |                 pass
 87 | 
 88 |         """
 89 |         if self._fh is not None:
 90 |             self._fh.close()
 91 |             self._fh = None
 92 | 
 93 |     def read_acquisition(
 94 |         self, acquisition: Optional[Acquisition] = None, strict: bool = True
 95 |     ) -> np.ndarray:
 96 |         """Reads IMC acquisition data as numpy array.
 97 | 
 98 |         :param acquisition: the acquisition to read
 99 |         :param strict: set this parameter to False to try to recover corrupted data
100 |         :return: the acquisition data as 32-bit floating point array,
101 |             shape: (c, y, x)
102 |         """
103 |         if acquisition is None:
104 |             raise ValueError("acquisition")
105 |         if self._fh is None:
106 |             raise IOError(f"MCD file '{self.path.name}' has not been opened")
107 |         try:
108 |             data_start_offset = int(acquisition.metadata["DataStartOffset"])
109 |             data_end_offset = int(acquisition.metadata["DataEndOffset"])
110 |             value_bytes = int(acquisition.metadata["ValueBytes"])
111 |         except (KeyError, ValueError) as e:
112 |             raise IOError(
113 |                 f"MCD file '{self.path.name}' corrupted: "
114 |                 "cannot locate acquisition image data"
115 |             ) from e
116 |         if data_start_offset >= data_end_offset:
117 |             raise IOError(
118 |                 f"MCD file '{self.path.name}' corrupted: "
119 |                 "invalid acquisition image data offsets"
120 |             )
121 |         if value_bytes <= 0:
122 |             raise IOError("MCD file corrupted: invalid byte size")
123 |         num_channels = acquisition.num_channels
124 |         data_size = data_end_offset - data_start_offset
125 |         bytes_per_pixel = (num_channels + 3) * value_bytes
126 |         if data_size % bytes_per_pixel != 0:
127 |             data_size += 1
128 |         if data_size % bytes_per_pixel != 0:
129 |             if strict:
130 |                 raise IOError(
131 |                     f"MCD file '{self.path.name}' corrupted: "
132 |                     "invalid acquisition image data size"
133 |                 )
134 |             warn(
135 |                 f"MCD file '{self.path.name}' corrupted: "
136 |                 "invalid acquisition image data size"
137 |             )
138 |         num_pixels = data_size // bytes_per_pixel
139 |         self._fh.seek(0)
140 |         data = np.memmap(
141 |             self._fh,
142 |             dtype=np.float32,
143 |             mode="r",
144 |             offset=data_start_offset,
145 |             shape=(num_pixels, num_channels + 3),
146 |         )
147 |         xs = data[:, 0].astype(int)
148 |         ys = data[:, 1].astype(int)
149 |         try:
150 |             width = int(acquisition.metadata["MaxX"])
151 |             height = int(acquisition.metadata["MaxY"])
152 |             if width <= np.amax(xs) or height <= np.amax(ys):
153 |                 raise ValueError(
154 |                     "data shape is incompatible with acquisition image dimensions"
155 |                 )
156 |         except (KeyError, ValueError):
157 |             warn(
158 |                 f"MCD file '{self.path.name}' corrupted: "
159 |                 "cannot read acquisition image dimensions; recovering from data shape"
160 |             )
161 |             width = np.amax(xs) + 1
162 |             height = np.amax(ys) + 1
163 |         if width * height != data.shape[0]:
164 |             if strict:
165 |                 raise IOError(
166 |                     f"MCD file '{self.path.name}' corrupted: "
167 |                     "inconsistent acquisition image data size"
168 |                 )
169 |             warn(
170 |                 f"MCD file '{self.path.name}' corrupted: "
171 |                 "inconsistent acquisition image data size"
172 |             )
173 |         img = np.zeros((num_channels, height, width), dtype=np.float32)
174 |         img[:, ys, xs] = np.transpose(data[:, 3:])
175 |         return img
176 | 
177 |     def read_slide(
178 |         self, slide: Slide, raw: bool = False
179 |     ) -> Union[np.ndarray, bytes, None]:
180 |         """Reads and decodes a slide image as numpy array using the ``imageio``
181 |         package.
182 | 
183 |         .. note::
184 |             Slide images are stored as binary data within the IMC .mcd file in
185 |             an arbitrary encoding. The ``imageio`` package can decode most
186 |             commonly used image file formats, but may fail for more obscure,
187 |             in which case an ``IOException`` is raised.
188 | 
189 |         :param slide: the slide to read
190 |         :return: the slide image, or ``None`` if no image is available for the
191 |             specified slide
192 |         """
193 |         try:
194 |             data_start_offset = int(slide.metadata["ImageStartOffset"])
195 |             data_end_offset = int(slide.metadata["ImageEndOffset"])
196 |         except (KeyError, ValueError) as e:
197 |             raise IOError(
198 |                 f"MCD file '{self.path.name}' corrupted: "
199 |                 f"cannot locate image data for slide {slide.id}"
200 |             ) from e
201 |         if data_start_offset == data_end_offset == 0:
202 |             return None
203 |         data_start_offset += 161
204 |         data_end_offset -= 1
205 |         if data_start_offset >= data_end_offset:
206 |             raise IOError(
207 |                 f"MCD file '{self.path.name}' corrupted: "
208 |                 f"invalid image data offsets for slide {slide.id}"
209 |             )
210 |         try:
211 |             return self._read_image(
212 |                 data_start_offset, data_end_offset - data_start_offset, raw
213 |             )
214 |         except Exception as e:
215 |             raise IOError(
216 |                 f"MCD file '{self.path.name}' corrupted: "
217 |                 f"cannot read image for slide {slide.id}"
218 |             ) from e
219 | 
220 |     def read_panorama(
221 |         self, panorama: Panorama, raw: bool = False
222 |     ) -> Union[np.ndarray, bytes, None]:
223 |         """Reads and decodes a panorama image as numpy array using the
224 |         ``imageio`` package.
225 | 
226 |         :param panorama: the panorama to read
227 |         :return: the panorama image as numpy array
228 |         """
229 |         try:
230 |             data_start_offset = int(panorama.metadata["ImageStartOffset"])
231 |             data_end_offset = int(panorama.metadata["ImageEndOffset"])
232 |         except (KeyError, ValueError) as e:
233 |             raise IOError(
234 |                 f"MCD file '{self.path.name}' corrupted: "
235 |                 f"cannot locate image data for panorama {panorama.id}"
236 |             ) from e
237 |         if data_start_offset == data_end_offset == 0:
238 |             return None
239 |         data_start_offset += 161
240 |         data_end_offset -= 1
241 |         if data_start_offset >= data_end_offset:
242 |             raise IOError(
243 |                 f"MCD file '{self.path.name}' corrupted: "
244 |                 f"invalid image data offsets for panorama {panorama.id}"
245 |             )
246 |         try:
247 |             return self._read_image(
248 |                 data_start_offset, data_end_offset - data_start_offset, raw
249 |             )
250 |         except Exception as e:
251 |             raise IOError(
252 |                 f"MCD file '{self.path.name}' corrupted: "
253 |                 f"cannot read image for panorama {panorama.id}"
254 |             ) from e
255 | 
256 |     def read_before_ablation_image(
257 |         self, acquisition: Acquisition, raw: bool = False
258 |     ) -> Union[np.ndarray, bytes, None]:
259 |         """Reads and decodes a before-ablation image as numpy array using the
260 |         ``imageio`` package.
261 | 
262 |         :param acquisition: the acquisition for which to read the
263 |             before-ablation image
264 |         :return: the before-ablation image as numpy array, or ``None`` if no
265 |             before-ablation image is available for the specified acquisition
266 |         """
267 |         try:
268 |             data_start_offset = int(
269 |                 acquisition.metadata["BeforeAblationImageStartOffset"]
270 |             )
271 |             data_end_offset = int(acquisition.metadata["BeforeAblationImageEndOffset"])
272 |         except (KeyError, ValueError) as e:
273 |             raise IOError(
274 |                 f"MCD file '{self.path.name}' corrupted: "
275 |                 f"cannot locate before-ablation image data "
276 |                 f"for acquisition {acquisition.id}"
277 |             ) from e
278 |         if data_start_offset == data_end_offset == 0:
279 |             return None
280 |         data_start_offset += 161
281 |         data_end_offset -= 1
282 |         if data_start_offset >= data_end_offset:
283 |             raise IOError(
284 |                 f"MCD file '{self.path.name}' corrupted: "
285 |                 f"invalid before-ablation image data offsets "
286 |                 f"for acquisition {acquisition.id}"
287 |             )
288 |         try:
289 |             return self._read_image(
290 |                 data_start_offset, data_end_offset - data_start_offset, raw
291 |             )
292 |         except Exception as e:
293 |             raise IOError(
294 |                 f"MCD file '{self.path.name}' corrupted: "
295 |                 f"cannot read before-ablation image "
296 |                 f"for acquisition {acquisition.id}"
297 |             ) from e
298 | 
299 |     def read_after_ablation_image(
300 |         self, acquisition: Acquisition, raw: bool = False
301 |     ) -> Union[np.ndarray, bytes, None]:
302 |         """Reads and decodes a after-ablation image as numpy array using the
303 |         ``imageio`` package.
304 | 
305 |         :param acquisition: the acquisition for which to read the
306 |             after-ablation image
307 |         :return: the after-ablation image as numpy array, or ``None`` if no
308 |             after-ablation image is available for the specified acquisition
309 |         """
310 |         try:
311 |             data_start_offset = int(
312 |                 acquisition.metadata["AfterAblationImageStartOffset"]
313 |             )
314 |             data_end_offset = int(acquisition.metadata["AfterAblationImageEndOffset"])
315 |         except (KeyError, ValueError) as e:
316 |             raise IOError(
317 |                 f"MCD file '{self.path.name}' corrupted: "
318 |                 f"cannot locate after-ablation image data "
319 |                 f"for acquisition {acquisition.id}"
320 |             ) from e
321 |         if data_start_offset == data_end_offset == 0:
322 |             return None
323 |         data_start_offset += 161
324 |         data_end_offset -= 1
325 |         if data_start_offset >= data_end_offset:
326 |             raise IOError(
327 |                 f"MCD file '{self.path.name}' corrupted: "
328 |                 f"invalid after-ablation image data offsets "
329 |                 f"for acquisition {acquisition.id}"
330 |             )
331 |         try:
332 |             return self._read_image(
333 |                 data_start_offset, data_end_offset - data_start_offset, raw
334 |             )
335 |         except Exception as e:
336 |             raise IOError(
337 |                 f"MCD file '{self.path.name}' corrupted: "
338 |                 f"cannot read after-ablation image "
339 |                 f"for acquisition {acquisition.id}"
340 |             ) from e
341 | 
342 |     def _read_schema_xml(
343 |         self,
344 |         encoding: str = "utf-16-le",
345 |         start_sub: str = "<MCDSchema",
346 |         end_sub: str = "</MCDSchema>",
347 |     ) -> str:
348 |         if self._fh is None:
349 |             raise IOError(f"MCD file '{self.path.name}' has not been opened")
350 |         with mmap.mmap(self._fh.fileno(), 0, access=mmap.ACCESS_READ) as mm:
351 |             # V1 contains multiple MCDSchema entries
352 |             # As per imctools, the latest entry should be taken
353 |             start_sub_encoded = start_sub.encode(encoding=encoding)
354 |             start_index = mm.rfind(start_sub_encoded)
355 |             if start_index == -1:
356 |                 raise IOError(
357 |                     f"MCD file '{self.path.name}' corrupted: "
358 |                     f"start of XML document '{start_sub}' not found"
359 |                 )
360 |             end_sub_encoded = end_sub.encode(encoding=encoding)
361 |             end_index = mm.rfind(end_sub_encoded, start_index)
362 |             if end_index == -1:
363 |                 raise IOError(
364 |                     f"MCD file '{self.path.name}' corrupted: "
365 |                     f"end of XML document '{end_sub}' not found"
366 |                 )
367 |             mm.seek(start_index)
368 |             data = mm.read(end_index + len(end_sub_encoded) - start_index)
369 |         return data.decode(encoding=encoding)
370 | 
371 |     def _read_image(
372 |         self, data_offset: int, data_size: int, raw: bool = False
373 |     ) -> Union[np.ndarray, bytes]:
374 |         if self._fh is None:
375 |             raise IOError(f"MCD file '{self.path.name}' has not been opened")
376 |         self._fh.seek(data_offset)
377 |         data = self._fh.read(data_size)
378 |         if raw:
379 |             return data
380 |         else:
381 |             return imread(data)
382 | 
383 |     def __repr__(self) -> str:
384 |         return str(self._path)
385 | 


--------------------------------------------------------------------------------
/readimc/mcd_parser.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import re
  3 | from typing import Dict, List, Optional, Tuple
  4 | from warnings import warn
  5 | from xml.etree import ElementTree as ET
  6 | 
  7 | from .data import Acquisition, Panorama, Slide
  8 | 
  9 | 
 10 | class MCDParserError(Exception):
 11 |     def __init__(self, *args) -> None:
 12 |         """Error occurring when parsing invalid IMC .mcd file metadata"""
 13 |         super(MCDParserError, self).__init__(*args)
 14 | 
 15 | 
 16 | class MCDParser:
 17 |     _XMLNS_REGEX = re.compile(r"{(?P<xmlns>.*)}")
 18 |     _CHANNEL_REGEX = re.compile(r"^(?P<metal>[a-zA-Z]+)\((?P<mass>[0-9]+)\)$")
 19 | 
 20 |     def __init__(self, schema_xml: str) -> None:
 21 |         """A class for parsing IMC .mcd file metadata
 22 | 
 23 |         :param schema_xml: IMC .mcd file metadata in proprietary XML format
 24 |         """
 25 |         self._schema_xml = schema_xml
 26 |         self._schema_xml_elem = ET.fromstring(self._schema_xml)
 27 |         m = self._XMLNS_REGEX.match(self._schema_xml_elem.tag)
 28 |         self._schema_xml_xmlns = m.group("xmlns") if m is not None else None
 29 | 
 30 |     @property
 31 |     def schema_xml(self) -> str:
 32 |         """Full IMC .mcd file metadata in proprietary XML format"""
 33 |         return self._schema_xml
 34 | 
 35 |     @property
 36 |     def schema_xml_elem(self) -> ET.Element:
 37 |         """Full IMC .mcd file metadata as Python ElementTree element"""
 38 |         return self._schema_xml_elem
 39 | 
 40 |     @property
 41 |     def schema_xml_xmlns(self) -> Optional[str]:
 42 |         """Value of the metadata `xmlns` XML namespace attribute"""
 43 |         return self._schema_xml_xmlns
 44 | 
 45 |     @property
 46 |     def metadata(self) -> str:
 47 |         """Legacy accessor for `schema_xml`"""
 48 |         warn(
 49 |             "`MCDParser.metadata` will be removed in future readimc releases; "
 50 |             "use `MCDFile.schema_xml` instead"
 51 |         )
 52 |         return self.schema_xml
 53 | 
 54 |     @property
 55 |     def metadata_elem(self) -> ET.Element:
 56 |         """Legacy accessor for `schema_xml_elem`"""
 57 |         warn(
 58 |             "`MCDParser.metadata_elem` will be removed in future readimc releases; "
 59 |             "use `MCDFile.schema_xml_elem` instead"
 60 |         )
 61 |         return self.schema_xml_elem
 62 | 
 63 |     @property
 64 |     def metadata_xmlns(self) -> Optional[str]:
 65 |         """Legacy accessor for `schema_xml_xmlns`"""
 66 |         warn(
 67 |             "`MCDParser.metadata_xmlns` will be removed in future readimc releases; "
 68 |             "use `MCDFile.schema_xml_xmlns` instead"
 69 |         )
 70 |         return self.schema_xml_xmlns
 71 | 
 72 |     def parse_slides(self) -> List[Slide]:
 73 |         """Extract slide metadata"""
 74 |         slides = [
 75 |             self._parse_slide(slide_elem) for slide_elem in self._find_elements("Slide")
 76 |         ]
 77 |         slides.sort(key=lambda slide: slide.id)
 78 |         return slides
 79 | 
 80 |     def _parse_slide(self, slide_elem: ET.Element) -> Slide:
 81 |         slide = Slide(
 82 |             self._get_text_as_int(slide_elem, "ID"),
 83 |             self._get_metadata_dict(slide_elem),
 84 |         )
 85 |         panorama_elems = self._find_elements(f"Panorama[SlideID='{slide.id}']")
 86 |         for panorama_elem in panorama_elems:
 87 |             panorama = None
 88 |             panorama_id = self._get_text_as_int(panorama_elem, "ID")
 89 |             panorama_type = self._get_text_or_none(panorama_elem, "Type")
 90 |             if panorama_type != "Default":  # ignore "virtual" Panoramas
 91 |                 panorama = self._parse_panorama(panorama_elem, slide)
 92 |                 slide.panoramas.append(panorama)
 93 |             acquisition_roi_elems = self._find_elements(
 94 |                 f"AcquisitionROI[PanoramaID='{panorama_id}']"
 95 |             )
 96 |             for acquisition_roi_elem in acquisition_roi_elems:
 97 |                 acquisition_roi_id = self._get_text_as_int(acquisition_roi_elem, "ID")
 98 |                 roi_point_elems = self._find_elements(
 99 |                     f"ROIPoint[AcquisitionROIID='{acquisition_roi_id}']"
100 |                 )
101 |                 roi_points_um = None
102 |                 if len(roi_point_elems) == 4:
103 |                     roi_points_um = tuple(
104 |                         (
105 |                             self._get_text_as_float(roi_point_elem, "SlideXPosUm"),
106 |                             self._get_text_as_float(roi_point_elem, "SlideYPosUm"),
107 |                         )
108 |                         for roi_point_elem in sorted(
109 |                             roi_point_elems,
110 |                             key=lambda roi_point_elem: self._get_text_as_int(
111 |                                 roi_point_elem, "OrderNumber"
112 |                             ),
113 |                         )
114 |                     )
115 |                 acquisition_elems = self._find_elements(
116 |                     f"Acquisition[AcquisitionROIID='{acquisition_roi_id}']"
117 |                 )
118 |                 for acquisition_elem in acquisition_elems:
119 |                     acquisition = self._parse_acquisition(
120 |                         acquisition_elem, slide, panorama, roi_points_um  # type: ignore
121 |                     )
122 |                     slide.acquisitions.append(acquisition)
123 |                     if panorama is not None:
124 |                         panorama.acquisitions.append(acquisition)
125 |         for a, b in itertools.combinations(slide.acquisitions, 2):
126 |             a_start = int(a.metadata["DataStartOffset"])
127 |             a_end = int(a.metadata["DataEndOffset"])
128 |             b_start = int(b.metadata["DataStartOffset"])
129 |             b_end = int(b.metadata["DataEndOffset"])
130 |             if b_start <= a_start < b_end or b_start < a_end <= b_end:
131 |                 warn(
132 |                     f"Slide {slide.id} corrupted: "
133 |                     f"overlapping memory blocks for acquisitions {a.id} and {b.id}"
134 |                 )
135 |         slide.panoramas.sort(key=lambda panorama: panorama.id)
136 |         slide.acquisitions.sort(key=lambda acquisition: acquisition.id)
137 |         return slide
138 | 
139 |     def _parse_panorama(self, panorama_elem: ET.Element, slide: Slide) -> Panorama:
140 |         return Panorama(
141 |             slide,
142 |             self._get_text_as_int(panorama_elem, "ID"),
143 |             self._get_metadata_dict(panorama_elem),
144 |         )
145 | 
146 |     def _parse_acquisition(
147 |         self,
148 |         acquisition_elem: ET.Element,
149 |         slide: Slide,
150 |         panorama: Optional[Panorama],
151 |         roi_points_um: Optional[
152 |             Tuple[
153 |                 Tuple[float, float],
154 |                 Tuple[float, float],
155 |                 Tuple[float, float],
156 |                 Tuple[float, float],
157 |             ]
158 |         ],
159 |     ) -> Acquisition:
160 |         acquisition_id = self._get_text_as_int(acquisition_elem, "ID")
161 |         acquisition_channel_elems = self._find_elements(
162 |             f"AcquisitionChannel[AcquisitionID='{acquisition_id}']"
163 |         )
164 |         acquisition_channel_elems.sort(
165 |             key=lambda acquisition_channel_elem: self._get_text_as_int(
166 |                 acquisition_channel_elem, "OrderNumber"
167 |             )
168 |         )
169 |         acquisition = Acquisition(
170 |             slide,
171 |             panorama,
172 |             acquisition_id,
173 |             roi_points_um,
174 |             self._get_metadata_dict(acquisition_elem),
175 |             len(acquisition_channel_elems) - 3,
176 |         )
177 |         for i, acquisition_channel_elem in enumerate(acquisition_channel_elems):
178 |             channel_name = self._get_text(acquisition_channel_elem, "ChannelName")
179 |             if i == 0 and channel_name != "X":
180 |                 raise MCDParserError(
181 |                     f"First channel '{channel_name}' should be named 'X'"
182 |                 )
183 |             if i == 1 and channel_name != "Y":
184 |                 raise MCDParserError(
185 |                     f"Second channel '{channel_name}' should be named 'Y'"
186 |                 )
187 |             if i == 2 and channel_name != "Z":
188 |                 raise MCDParserError(
189 |                     f"Third channel '{channel_name}' should be named 'Z'"
190 |                 )
191 |             if channel_name in ("X", "Y", "Z"):
192 |                 continue
193 |             m = self._CHANNEL_REGEX.match(channel_name)
194 |             if m is None:
195 |                 raise MCDParserError(
196 |                     "Cannot extract channel information "
197 |                     f"from channel name '{channel_name}' "
198 |                     f"for acquisition {acquisition.id}"
199 |                 )
200 |             channel_label = self._get_text(acquisition_channel_elem, "ChannelLabel")
201 |             acquisition._channel_metals.append(m.group("metal"))
202 |             acquisition._channel_masses.append(int(m.group("mass")))
203 |             acquisition._channel_labels.append(channel_label)
204 |         return acquisition
205 | 
206 |     def _find_elements(self, path: str) -> List[ET.Element]:
207 |         namespaces = None
208 |         if self._schema_xml_xmlns is not None:
209 |             namespaces = {"": self._schema_xml_xmlns}
210 |         return self._schema_xml_elem.findall(path, namespaces=namespaces)
211 | 
212 |     def _get_text_or_none(self, parent_elem: ET.Element, tag: str) -> Optional[str]:
213 |         namespaces = None
214 |         if self._schema_xml_xmlns is not None:
215 |             namespaces = {"": self._schema_xml_xmlns}
216 |         elem = parent_elem.find(tag, namespaces=namespaces)
217 |         return (elem.text or "") if elem is not None else None
218 | 
219 |     def _get_text(self, parent_elem: ET.Element, tag: str) -> str:
220 |         text = self._get_text_or_none(parent_elem, tag)
221 |         if text is None:
222 |             raise MCDParserError(
223 |                 f"XML tag '{tag}' not found for parent XML tag '{parent_elem.tag}'"
224 |             )
225 |         return text
226 | 
227 |     def _get_text_as_int(self, parent_elem: ET.Element, tag: str) -> int:
228 |         text = self._get_text(parent_elem, tag)
229 |         try:
230 |             return int(text)
231 |         except ValueError as e:
232 |             raise MCDParserError(
233 |                 f"Text '{text}' of XML tag '{tag}' cannot be converted to int "
234 |                 f"for parent XML tag '{parent_elem.tag}'"
235 |             ) from e
236 | 
237 |     def _get_text_as_float(self, parent_elem: ET.Element, tag: str) -> float:
238 |         text = self._get_text(parent_elem, tag)
239 |         try:
240 |             return float(text)
241 |         except ValueError as e:
242 |             raise MCDParserError(
243 |                 f"Text '{text}' of XML tag '{tag}' cannot be converted to "
244 |                 f"float for parent XML tag '{parent_elem.tag}'"
245 |             ) from e
246 | 
247 |     def _get_metadata_dict(self, parent_elem: ET.Element) -> Dict[str, str]:
248 |         metadata = {}
249 |         for elem in parent_elem:
250 |             tag = elem.tag
251 |             if self._schema_xml_xmlns is not None:
252 |                 tag = tag.replace(f"{{{self._schema_xml_xmlns}}}", "")
253 |             metadata[tag] = elem.text or ""
254 |         return metadata
255 | 


--------------------------------------------------------------------------------
/readimc/txt_file.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from os import PathLike
  3 | from typing import List, Optional, Sequence, TextIO, Tuple, Union
  4 | from warnings import warn
  5 | 
  6 | import numpy as np
  7 | import pandas as pd
  8 | 
  9 | from .data import Acquisition, AcquisitionBase
 10 | from .imc_file import IMCFile
 11 | 
 12 | 
 13 | class TXTFile(IMCFile, AcquisitionBase):
 14 |     _CHANNEL_REGEX = re.compile(
 15 |         r"^(?P<label>.*)\((?P<metal>[a-zA-Z]+)(?P<mass>[0-9]+)[^0-9]*\)$"
 16 |     )
 17 | 
 18 |     def __init__(self, path: Union[str, PathLike]) -> None:
 19 |         """A class for reading IMC .txt files
 20 | 
 21 |         :param path: path to the IMC .txt file
 22 |         """
 23 |         super(TXTFile, self).__init__(path)
 24 |         self._fh: Optional[TextIO] = None
 25 |         self._num_channels: Optional[int] = None
 26 |         self._channel_metals: Optional[List[str]] = None
 27 |         self._channel_masses: Optional[List[int]] = None
 28 |         self._channel_labels: Optional[List[str]] = None
 29 | 
 30 |     @property
 31 |     def num_channels(self) -> int:
 32 |         if self._num_channels is None:
 33 |             raise IOError(f"TXT file '{self.path.name}' has not been opened")
 34 |         return self._num_channels
 35 | 
 36 |     @property
 37 |     def channel_metals(self) -> Sequence[str]:
 38 |         if self._channel_metals is None:
 39 |             raise IOError(f"TXT file '{self.path.name}' has not been opened")
 40 |         return self._channel_metals
 41 | 
 42 |     @property
 43 |     def channel_masses(self) -> Sequence[int]:
 44 |         if self._channel_masses is None:
 45 |             raise IOError(f"TXT file '{self.path.name}' has not been opened")
 46 |         return self._channel_masses
 47 | 
 48 |     @property
 49 |     def channel_labels(self) -> Sequence[str]:
 50 |         if self._channel_labels is None:
 51 |             raise IOError(f"TXT file '{self.path.name}' has not been opened")
 52 |         return self._channel_labels
 53 | 
 54 |     def __enter__(self) -> "TXTFile":
 55 |         self.open()
 56 |         return self
 57 | 
 58 |     def __exit__(self, exc_type, exc_value, traceback) -> None:
 59 |         self.close()
 60 | 
 61 |     def open(self) -> None:
 62 |         """Opens the IMC .txt file for reading.
 63 | 
 64 |         It is good practice to use context managers whenever possible:
 65 | 
 66 |         .. code-block:: python
 67 | 
 68 |             with TXTFile("/path/to/file.txt") as f:
 69 |                 pass
 70 | 
 71 |         """
 72 |         if self._fh is not None:
 73 |             self._fh.close()
 74 |         self._fh = open(self._path, mode="r")
 75 |         (
 76 |             self._num_channels,
 77 |             self._channel_metals,
 78 |             self._channel_masses,
 79 |             self._channel_labels,
 80 |         ) = self._read_channels()
 81 | 
 82 |     def close(self) -> None:
 83 |         """Closes the IMC .txt file.
 84 | 
 85 |         It is good practice to use context managers whenever possible:
 86 | 
 87 |         .. code-block:: python
 88 | 
 89 |             with TXTFile("/path/to/file.txt") as f:
 90 |                 pass
 91 | 
 92 |         """
 93 |         if self._fh is not None:
 94 |             self._fh.close()
 95 |             self._fh = None
 96 | 
 97 |     def read_acquisition(
 98 |         self, acquisition: Optional[Acquisition] = None, strict: bool = True
 99 |     ) -> np.ndarray:
100 |         """Reads IMC acquisition data as numpy array.
101 | 
102 |         :param acquisition: the acquisition to read (for compatibility with ``IMCFile``
103 |             and ``MCDFile``; unused)
104 |         :param strict: set this parameter to False to try to recover corrupted data
105 |         :return: the acquisition data as 32-bit floating point array,
106 |             shape: (c, y, x)
107 |         """
108 |         if self._fh is None:
109 |             raise IOError(f"TXT file '{self.path.name}' has not been opened")
110 |         self._fh.seek(0)
111 |         df = pd.read_table(self._fh, dtype=np.float32)
112 |         if tuple(df.columns[:3]) != (
113 |             "Start_push",
114 |             "End_push",
115 |             "Pushes_duration",
116 |         ):
117 |             raise IOError(
118 |                 f"TXT file '{self.path.name}' corrupted: "
119 |                 "push columns not found in tabular data"
120 |             )
121 |         if tuple(df.columns[3:6]) != ("X", "Y", "Z"):
122 |             raise IOError(
123 |                 f"TXT file '{self.path.name}' corrupted: "
124 |                 "XYZ channels not found in tabular data"
125 |             )
126 |         width, height = df[["X", "Y"]].add(1).max(axis=0).astype(int)
127 |         if width * height != len(df.index):
128 |             if strict:
129 |                 raise IOError(
130 |                     f"TXT file '{self.path.name}' corrupted: "
131 |                     "inconsistent acquisition image data size"
132 |                 )
133 |             warn(
134 |                 f"TXT file '{self.path.name}' corrupted: "
135 |                 "inconsistent acquisition image data size"
136 |             )
137 |         img = np.zeros((height, width, self.num_channels), dtype=np.float32)
138 |         img[df["Y"].astype(int), df["X"].astype(int), :] = df.values[:, 6:]
139 |         return np.moveaxis(img, -1, 0)
140 | 
141 |     def _read_channels(self) -> Tuple[int, List[str], List[int], List[str]]:
142 |         if self._fh is None:
143 |             raise IOError(f"TXT file '{self.path.name}' has not been opened")
144 |         self._fh.seek(0)
145 |         columns = self._fh.readline().split("\t")
146 |         if tuple(columns[:3]) != ("Start_push", "End_push", "Pushes_duration"):
147 |             raise IOError(
148 |                 f"TXT file '{self.path.name}' corrupted: "
149 |                 "push columns not found in tabular data"
150 |             )
151 |         if tuple(columns[3:6]) != ("X", "Y", "Z"):
152 |             raise IOError(
153 |                 f"TXT file '{self.path.name}' corrupted: "
154 |                 "XYZ channels not found in tabular data"
155 |             )
156 |         channel_metals: List[str] = []
157 |         channel_masses: List[int] = []
158 |         channel_labels: List[str] = []
159 |         for column in columns[6:]:
160 |             m = re.match(self._CHANNEL_REGEX, column)
161 |             if m is None:
162 |                 raise IOError(
163 |                     f"TXT file '{self.path.name}' corrupted: "
164 |                     f"cannot extract channel information from text '{column}'"
165 |                 )
166 |             channel_metals.append(m.group("metal"))
167 |             channel_masses.append(int(m.group("mass")))
168 |             channel_labels.append(m.group("label"))
169 |         return len(columns[6:]), channel_metals, channel_masses, channel_labels
170 | 
171 |     def __repr__(self) -> str:
172 |         return str(self._path)
173 | 


--------------------------------------------------------------------------------
/requirements_devel.txt:
--------------------------------------------------------------------------------
1 | black
2 | mypy
3 | pre-commit
4 | ruff
5 | 


--------------------------------------------------------------------------------
/requirements_docs.txt:
--------------------------------------------------------------------------------
1 | myst-parser
2 | sphinx
3 | sphinx-autoapi
4 | sphinx_rtd_theme
5 | 


--------------------------------------------------------------------------------
/requirements_test.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 | requests
4 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = readimc
 3 | url = https://github.com/BodenmillerGroup/readimc
 4 | author = Jonas Windhager
 5 | author_email = jonas@windhager.io
 6 | maintainer = Milad Adibi
 7 | maintainer_email = milad.adibi@uzh.ch
 8 | classifiers =
 9 |     Operating System :: OS Independent
10 |     Programming Language :: Python :: 3
11 |     Programming Language :: Python :: 3.8
12 |     Programming Language :: Python :: 3.9
13 |     Programming Language :: Python :: 3.10
14 |     Programming Language :: Python :: 3.11
15 |     License :: OSI Approved :: MIT License
16 | license = MIT
17 | license_files = LICENSE
18 | description = Python package for reading imaging mass cytometry (IMC) files
19 | long_description = file: README.md
20 | long_description_content_type = text/markdown
21 | keywords = imaging mass cytometry, IMC
22 | 
23 | [options]
24 | zip_safe = True
25 | install_requires =
26 |     imageio[tifffile]>=v2.24.0
27 |     numpy
28 |     pandas
29 | python_requires = >=3.8
30 | packages = find:
31 | 
32 | [flake8]
33 | max-line-length = 88
34 | extend-ignore = E203
35 | 


--------------------------------------------------------------------------------
/tests/test_mcd_file.py:
--------------------------------------------------------------------------------
  1 | from hashlib import md5
  2 | from pathlib import Path
  3 | 
  4 | import numpy as np
  5 | import pytest
  6 | 
  7 | from readimc import MCDFile
  8 | 
  9 | 
 10 | class TestMCDFile:
 11 |     damond_mcd_file_path = Path("data/Damond2019/20170814_G_SE.mcd")
 12 | 
 13 |     @classmethod
 14 |     def setup_class(cls):
 15 |         if cls.damond_mcd_file_path.exists():
 16 |             cls.damond_mcd_file = MCDFile(cls.damond_mcd_file_path)
 17 |             cls.damond_mcd_file.open()
 18 |         else:
 19 |             cls.damond_mcd_file = None
 20 | 
 21 |     @classmethod
 22 |     def teardown_class(cls):
 23 |         if cls.damond_mcd_file is not None:
 24 |             cls.damond_mcd_file.close()
 25 |             cls.damond_mcd_file = None
 26 | 
 27 |     def test_schema_xml(self, imc_test_data_mcd_file: MCDFile):
 28 |         digest = md5(imc_test_data_mcd_file.schema_xml.encode("utf-8")).digest()
 29 |         assert digest == b"\xac\xd8@\x0f\x0b\xf4p\x89\xdd!\xe7o\x19\xa6\x8d\x97"
 30 | 
 31 |     def test_slides(self, imc_test_data_mcd_file: MCDFile):
 32 |         assert len(imc_test_data_mcd_file.slides) == 1
 33 | 
 34 |         slide = imc_test_data_mcd_file.slides[0]
 35 |         assert slide.id == 0
 36 |         assert slide.description == "Slide"
 37 |         assert slide.width_um == 75000.0
 38 |         assert slide.height_um == 25000.0
 39 |         assert len(slide.panoramas) == 1
 40 |         assert len(slide.acquisitions) == 3
 41 | 
 42 |         panorama = next(p for p in slide.panoramas if p.id == 1)
 43 |         assert panorama.slide == slide
 44 |         assert panorama.id == 1
 45 |         assert panorama.description == "Panorama_001"
 46 |         assert panorama.width_um == 193.0
 47 |         assert panorama.height_um == 162.0
 48 |         assert panorama.points_um == (
 49 |             (31020.0, 13486.0),
 50 |             (31213.0, 13486.0),
 51 |             (31213.0, 13324.0),
 52 |             (31020.0, 13324.0),
 53 |         )
 54 |         assert len(panorama.acquisitions) == 0
 55 | 
 56 |         acquisition = next(a for a in slide.acquisitions if a.id == 1)
 57 |         assert acquisition.slide == slide
 58 |         assert acquisition.panorama is None
 59 |         assert acquisition.id == 1
 60 |         assert acquisition.description == "ROI_001"
 61 |         assert acquisition.width_px == 60
 62 |         assert acquisition.height_px == 60
 63 |         assert acquisition.pixel_size_x_um == 1.0
 64 |         assert acquisition.pixel_size_y_um == 1.0
 65 |         assert acquisition.width_um == 60.0
 66 |         assert acquisition.height_um == 60.0
 67 |         assert acquisition.num_channels == 5
 68 |         assert tuple(acquisition.channel_metals) == (
 69 |             "Ag",
 70 |             "Pr",
 71 |             "Sm",
 72 |             "Eu",
 73 |             "Yb",
 74 |         )
 75 |         assert tuple(acquisition.channel_masses) == (107, 141, 147, 153, 172)
 76 |         assert tuple(acquisition.channel_labels) == (
 77 |             "107Ag",
 78 |             "Cytoker_651((3356))Pr141",
 79 |             "Laminin_681((851))Sm147",
 80 |             "YBX1_2987((3532))Eu153",
 81 |             "H3K27Ac_1977((2242))Yb172",
 82 |         )
 83 |         assert tuple(acquisition.channel_names) == (
 84 |             "Ag107",
 85 |             "Pr141",
 86 |             "Sm147",
 87 |             "Eu153",
 88 |             "Yb172",
 89 |         )
 90 |         assert acquisition.roi_points_um == (
 91 |             (31080.0, 13449.0),
 92 |             (31140.0, 13449.0),
 93 |             (31140.0, 13389.0),
 94 |             (31080.0, 13389.0),
 95 |         )
 96 |         assert acquisition.roi_coords_um == (
 97 |             (31080.0, 13449.0),
 98 |             (31139.799043811327, 13450.084762417044),
 99 |             (31140.501, 13390.28),
100 |             (31080.701956188677, 13389.195237582955),
101 |         )
102 | 
103 |     def test_read_acquisition(self, imc_test_data_mcd_file: MCDFile):
104 |         slide = imc_test_data_mcd_file.slides[0]
105 |         acquisition = next(a for a in slide.acquisitions if a.id == 1)
106 |         img = imc_test_data_mcd_file.read_acquisition(acquisition=acquisition)
107 |         assert img.dtype == np.float32
108 |         assert img.shape == (5, 60, 60)
109 | 
110 |     def test_read_slide(self, imc_test_data_mcd_file: MCDFile):
111 |         slide = imc_test_data_mcd_file.slides[0]
112 |         img = imc_test_data_mcd_file.read_slide(slide)
113 |         assert img is not None
114 |         assert img.dtype == np.uint8
115 |         assert img.shape == (669, 2002, 4)
116 | 
117 |     def test_read_panorama(self, imc_test_data_mcd_file: MCDFile):
118 |         slide = imc_test_data_mcd_file.slides[0]
119 |         panorama = next(p for p in slide.panoramas if p.id == 1)
120 |         img = imc_test_data_mcd_file.read_panorama(panorama)
121 |         assert img.dtype == np.uint8
122 |         assert img.shape == (162, 193, 4)
123 | 
124 |     def test_read_before_ablation_image(self, imc_test_data_mcd_file: MCDFile):
125 |         slide = imc_test_data_mcd_file.slides[0]
126 |         acquisition = next(a for a in slide.acquisitions if a.id == 1)
127 |         img = imc_test_data_mcd_file.read_before_ablation_image(acquisition)
128 |         assert img is None
129 | 
130 |     def test_read_after_ablation_image(self, imc_test_data_mcd_file: MCDFile):
131 |         slide = imc_test_data_mcd_file.slides[0]
132 |         acquisition = next(a for a in slide.acquisitions if a.id == 1)
133 |         img = imc_test_data_mcd_file.read_after_ablation_image(acquisition)
134 |         assert img is None
135 | 
136 |     @pytest.mark.skipif(not damond_mcd_file_path.exists(), reason="data not available")
137 |     def test_schema_xml_damond(self, imc_test_data_mcd_file: MCDFile):
138 |         digest = md5(imc_test_data_mcd_file.schema_xml.encode("utf-8")).digest()
139 |         assert digest == b"\xac\xd8@\x0f\x0b\xf4p\x89\xdd!\xe7o\x19\xa6\x8d\x97"
140 | 
141 |     @pytest.mark.skipif(not damond_mcd_file_path.exists(), reason="data not available")
142 |     def test_slides_damond(self):
143 |         assert len(self.damond_mcd_file.slides) == 1
144 | 
145 |         slide = self.damond_mcd_file.slides[0]
146 |         assert slide.id == 1
147 |         assert slide.description == "compensationslide1000"
148 |         assert slide.width_um == 75000.0
149 |         assert slide.height_um == 25000.0
150 |         assert len(slide.panoramas) == 8
151 |         assert len(slide.acquisitions) == 41
152 | 
153 |         panorama = next(p for p in slide.panoramas if p.id == 1)
154 |         assert panorama.slide == slide
155 |         assert panorama.id == 1
156 |         assert panorama.description == "TuningTape"
157 |         assert panorama.width_um == 1472.9184890671672
158 |         assert panorama.height_um == 1526.6011674842225
159 |         assert panorama.points_um == (
160 |             (28961.0, 6460.0),
161 |             (30433.682653484964, 6486.356736527695),
162 |             (30461.0, 4960.0),
163 |             (28988.317346515036, 4933.643263472244),
164 |         )
165 |         assert len(panorama.acquisitions) == 5
166 | 
167 |         acquisition = next(a for a in panorama.acquisitions if a.id == 1)
168 |         assert acquisition.slide == slide
169 |         assert acquisition.panorama == panorama
170 |         assert acquisition.id == 1
171 |         assert acquisition.description == "TT_G01"
172 |         assert acquisition.width_px == 51
173 |         assert acquisition.height_px == 50
174 |         assert acquisition.pixel_size_x_um == 1.0
175 |         assert acquisition.pixel_size_y_um == 1.0
176 |         assert acquisition.width_um == 51.0
177 |         assert acquisition.height_um == 50.0
178 |         assert acquisition.num_channels == 3
179 |         assert tuple(acquisition.channel_metals) == ("Eu", "Eu", "Lu")
180 |         assert tuple(acquisition.channel_masses) == (151, 153, 175)
181 |         assert tuple(acquisition.channel_labels) == ("151Eu", "153Eu", "175Lu")
182 |         assert tuple(acquisition.channel_names) == ("Eu151", "Eu153", "Lu175")
183 |         assert acquisition.roi_points_um == (
184 |             (29195.563447789347, 6091.267354770278),
185 |             (29245.675268795687, 6091.269356041085),
186 |             (29245.67326752488, 6041.381177047424),
187 |             (29195.781085989598, 6041.606820330908),
188 |         )
189 |         assert acquisition.roi_coords_um is None
190 | 
191 |     @pytest.mark.skipif(not damond_mcd_file_path.exists(), reason="data not available")
192 |     def test_read_acquisition_damond(self):
193 |         slide = self.damond_mcd_file.slides[0]
194 |         acquisition = next(a for a in slide.acquisitions if a.id == 1)
195 |         img = self.damond_mcd_file.read_acquisition(acquisition=acquisition)
196 |         assert img.dtype == np.float32
197 |         assert img.shape == (3, 50, 51)
198 | 
199 |     @pytest.mark.skipif(not damond_mcd_file_path.exists(), reason="data not available")
200 |     def test_read_slide_damond(self):
201 |         slide = self.damond_mcd_file.slides[0]
202 |         img = self.damond_mcd_file.read_slide(slide)
203 |         assert img.dtype == np.uint8
204 |         assert img.shape == (930, 2734, 3)
205 | 
206 |     @pytest.mark.skipif(not damond_mcd_file_path.exists(), reason="data not available")
207 |     def test_read_panorama_damond(self):
208 |         slide = self.damond_mcd_file.slides[0]
209 |         panorama = next(p for p in slide.panoramas if p.id == 1)
210 |         img = self.damond_mcd_file.read_panorama(panorama)
211 |         assert img.dtype == np.uint8
212 |         assert img.shape == (4096, 3951, 4)
213 | 
214 |     @pytest.mark.skipif(not damond_mcd_file_path.exists(), reason="data not available")
215 |     def test_read_before_ablation_image_damond(self):
216 |         slide = self.damond_mcd_file.slides[0]
217 |         acquisition = next(a for a in slide.acquisitions if a.id == 1)
218 |         img = self.damond_mcd_file.read_before_ablation_image(acquisition)
219 |         assert img is None
220 | 
221 |     @pytest.mark.skipif(not damond_mcd_file_path.exists(), reason="data not available")
222 |     def test_read_after_ablation_image_damond(self):
223 |         slide = self.damond_mcd_file.slides[0]
224 |         acquisition = next(a for a in slide.acquisitions if a.id == 1)
225 |         img = self.damond_mcd_file.read_after_ablation_image(acquisition)
226 |         assert img is None
227 | 


--------------------------------------------------------------------------------
/tests/test_mcd_parser.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | import pytest
  4 | 
  5 | from readimc import MCDFile, MCDParser
  6 | 
  7 | 
  8 | class TestMCDParser:
  9 |     damond_mcd_file_path = Path("data/Damond2019/20170814_G_SE.mcd")
 10 | 
 11 |     @classmethod
 12 |     def setup_class(cls):
 13 |         if cls.damond_mcd_file_path.exists():
 14 |             cls.damond_mcd_file = MCDFile(cls.damond_mcd_file_path)
 15 |             cls.damond_mcd_file.open()
 16 |         else:
 17 |             cls.damond_mcd_file = None
 18 | 
 19 |     @classmethod
 20 |     def teardown_class(cls):
 21 |         if cls.damond_mcd_file is not None:
 22 |             cls.damond_mcd_file.close()
 23 |             cls.damond_mcd_file = None
 24 | 
 25 |     def test_schema_xml(self):
 26 |         pass  # TODO
 27 | 
 28 |     def test_schema_xml_elem(self):
 29 |         pass  # TODO
 30 | 
 31 |     def test_schema_xml_xmlns(self):
 32 |         pass  # TODO
 33 | 
 34 |     def test_parse_slides(self, imc_test_data_mcd_file: MCDFile):
 35 |         parser = MCDParser(imc_test_data_mcd_file.schema_xml)
 36 |         slides = parser.parse_slides()
 37 |         assert len(slides) == 1
 38 | 
 39 |         slide = slides[0]
 40 |         assert slide.id == 0
 41 |         assert slide.description == "Slide"
 42 |         assert slide.width_um == 75000.0
 43 |         assert slide.height_um == 25000.0
 44 |         assert len(slide.panoramas) == 1
 45 |         assert len(slide.acquisitions) == 3
 46 | 
 47 |         panorama = next(p for p in slide.panoramas if p.id == 1)
 48 |         assert panorama.slide == slide
 49 |         assert panorama.id == 1
 50 |         assert panorama.description == "Panorama_001"
 51 |         assert panorama.width_um == 193.0
 52 |         assert panorama.height_um == 162.0
 53 |         assert panorama.points_um == (
 54 |             (31020.0, 13486.0),
 55 |             (31213.0, 13486.0),
 56 |             (31213.0, 13324.0),
 57 |             (31020.0, 13324.0),
 58 |         )
 59 |         assert len(panorama.acquisitions) == 0
 60 | 
 61 |         acquisition = next(a for a in slide.acquisitions if a.id == 1)
 62 |         assert acquisition.slide == slide
 63 |         assert acquisition.panorama is None
 64 |         assert acquisition.id == 1
 65 |         assert acquisition.description == "ROI_001"
 66 |         assert acquisition.width_px == 60
 67 |         assert acquisition.height_px == 60
 68 |         assert acquisition.pixel_size_x_um == 1.0
 69 |         assert acquisition.pixel_size_y_um == 1.0
 70 |         assert acquisition.width_um == 60.0
 71 |         assert acquisition.height_um == 60.0
 72 |         assert acquisition.num_channels == 5
 73 |         assert tuple(acquisition.channel_metals) == (
 74 |             "Ag",
 75 |             "Pr",
 76 |             "Sm",
 77 |             "Eu",
 78 |             "Yb",
 79 |         )
 80 |         assert tuple(acquisition.channel_masses) == (107, 141, 147, 153, 172)
 81 |         assert tuple(acquisition.channel_labels) == (
 82 |             "107Ag",
 83 |             "Cytoker_651((3356))Pr141",
 84 |             "Laminin_681((851))Sm147",
 85 |             "YBX1_2987((3532))Eu153",
 86 |             "H3K27Ac_1977((2242))Yb172",
 87 |         )
 88 |         assert tuple(acquisition.channel_names) == (
 89 |             "Ag107",
 90 |             "Pr141",
 91 |             "Sm147",
 92 |             "Eu153",
 93 |             "Yb172",
 94 |         )
 95 |         assert acquisition.roi_points_um == (
 96 |             (31080.0, 13449.0),
 97 |             (31140.0, 13449.0),
 98 |             (31140.0, 13389.0),
 99 |             (31080.0, 13389.0),
100 |         )
101 |         assert acquisition.roi_coords_um == (
102 |             (31080.0, 13449.0),
103 |             (31139.799043811327, 13450.084762417044),
104 |             (31140.501, 13390.28),
105 |             (31080.701956188677, 13389.195237582955),
106 |         )
107 | 
108 |     @pytest.mark.skipif(not damond_mcd_file_path.exists(), reason="data not available")
109 |     def test_schema_xml_damond(self):
110 |         pass  # TODO
111 | 
112 |     @pytest.mark.skipif(not damond_mcd_file_path.exists(), reason="data not available")
113 |     def test_schema_xml_elem_damond(self):
114 |         pass  # TODO
115 | 
116 |     @pytest.mark.skipif(not damond_mcd_file_path.exists(), reason="data not available")
117 |     def test_schema_xml_xmlns_damond(self):
118 |         pass  # TODO
119 | 
120 |     @pytest.mark.skipif(not damond_mcd_file_path.exists(), reason="data not available")
121 |     def test_parse_slides_damond(self):
122 |         parser = MCDParser(self.damond_mcd_file.schema_xml)
123 |         slides = parser.parse_slides()
124 |         assert len(slides) == 1
125 | 
126 |         slide = slides[0]
127 |         assert slide.id == 1
128 |         assert slide.description == "compensationslide1000"
129 |         assert slide.width_um == 75000.0
130 |         assert slide.height_um == 25000.0
131 |         assert len(slide.panoramas) == 8
132 |         assert len(slide.acquisitions) == 41
133 | 
134 |         panorama = next(p for p in slide.panoramas if p.id == 1)
135 |         assert panorama.slide == slide
136 |         assert panorama.id == 1
137 |         assert panorama.description == "TuningTape"
138 |         assert panorama.width_um == 1472.9184890671672
139 |         assert panorama.height_um == 1526.6011674842225
140 |         assert panorama.points_um == (
141 |             (28961.0, 6460.0),
142 |             (30433.682653484964, 6486.356736527695),
143 |             (30461.0, 4960.0),
144 |             (28988.317346515036, 4933.643263472244),
145 |         )
146 |         assert len(panorama.acquisitions) == 5
147 | 
148 |         acquisition = next(a for a in panorama.acquisitions if a.id == 1)
149 |         assert acquisition.slide == slide
150 |         assert acquisition.panorama == panorama
151 |         assert acquisition.id == 1
152 |         assert acquisition.description == "TT_G01"
153 |         assert acquisition.width_px == 51
154 |         assert acquisition.height_px == 50
155 |         assert acquisition.pixel_size_x_um == 1.0
156 |         assert acquisition.pixel_size_y_um == 1.0
157 |         assert acquisition.width_um == 51.0
158 |         assert acquisition.height_um == 50.0
159 |         assert acquisition.num_channels == 3
160 |         assert tuple(acquisition.channel_metals) == ("Eu", "Eu", "Lu")
161 |         assert tuple(acquisition.channel_masses) == (151, 153, 175)
162 |         assert tuple(acquisition.channel_labels) == ("151Eu", "153Eu", "175Lu")
163 |         assert tuple(acquisition.channel_names) == ("Eu151", "Eu153", "Lu175")
164 |         assert acquisition.roi_points_um == (
165 |             (29195.563447789347, 6091.267354770278),
166 |             (29245.675268795687, 6091.269356041085),
167 |             (29245.67326752488, 6041.381177047424),
168 |             (29195.781085989598, 6041.606820330908),
169 |         )
170 |         assert acquisition.roi_coords_um is None
171 | 


--------------------------------------------------------------------------------
/tests/test_txt_file.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from readimc import TXTFile
 4 | 
 5 | 
 6 | class TestTXTFile:
 7 |     def test_num_channels(self, imc_test_data_txt_file: TXTFile):
 8 |         assert imc_test_data_txt_file.num_channels == 5
 9 | 
10 |     def test_channel_metals(self, imc_test_data_txt_file: TXTFile):
11 |         assert tuple(imc_test_data_txt_file.channel_metals) == (
12 |             "Ag",
13 |             "Pr",
14 |             "Sm",
15 |             "Eu",
16 |             "Yb",
17 |         )
18 | 
19 |     def test_channel_masses(self, imc_test_data_txt_file: TXTFile):
20 |         assert tuple(imc_test_data_txt_file.channel_masses) == (
21 |             107,
22 |             141,
23 |             147,
24 |             153,
25 |             172,
26 |         )
27 | 
28 |     def test_channel_labels(self, imc_test_data_txt_file: TXTFile):
29 |         assert tuple(imc_test_data_txt_file.channel_labels) == (
30 |             "107Ag",
31 |             "Cytoker_651((3356))Pr141",
32 |             "Laminin_681((851))Sm147",
33 |             "YBX1_2987((3532))Eu153",
34 |             "H3K27Ac_1977((2242))Yb172",
35 |         )
36 | 
37 |     def test_channel_names(self, imc_test_data_txt_file: TXTFile):
38 |         assert tuple(imc_test_data_txt_file.channel_names) == (
39 |             "Ag107",
40 |             "Pr141",
41 |             "Sm147",
42 |             "Eu153",
43 |             "Yb172",
44 |         )
45 | 
46 |     def test_read_acquisition(self, imc_test_data_txt_file: TXTFile):
47 |         img = imc_test_data_txt_file.read_acquisition()
48 |         assert img.dtype == np.float32
49 |         assert img.shape == (5, 60, 60)
50 | 


--------------------------------------------------------------------------------