├── .github
└── workflows
│ ├── build.yml
│ └── docs.yml
├── .gitignore
├── .pre-commit-config.yaml
├── AUTHORS.md
├── CHANGELOG.md
├── CONTRIBUTING.md
├── CONTRIBUTORS.md
├── LICENSE
├── README.md
├── conftest.py
├── docs
├── Makefile
├── make.bat
└── source
│ ├── _static
│ └── .gitkeep
│ ├── authors.md
│ ├── changelog.md
│ ├── conf.py
│ ├── contributing.md
│ ├── contributors.md
│ ├── index.md
│ ├── install.md
│ ├── license.md
│ └── usage.md
├── pyproject.toml
├── readimc
├── __init__.py
├── data
│ ├── __init__.py
│ ├── acquisition.py
│ ├── panorama.py
│ └── slide.py
├── imc_file.py
├── mcd_file.py
├── mcd_parser.py
└── txt_file.py
├── requirements_devel.txt
├── requirements_docs.txt
├── requirements_test.txt
├── setup.cfg
└── tests
├── test_mcd_file.py
├── test_mcd_parser.py
└── test_txt_file.py
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: build
2 | on:
3 | push:
4 | branches:
5 | - main
6 | tags:
7 | - "v*"
8 | pull_request:
9 | branches:
10 | - main
11 | - develop
12 | workflow_dispatch:
13 | jobs:
14 | test:
15 | name: ${{ matrix.platform }} py${{ matrix.python-version }}
16 | runs-on: ${{ matrix.platform }}
17 | strategy:
18 | matrix:
19 | platform:
20 | - ubuntu-latest
21 | - windows-latest
22 | - macos-latest
23 | python-version:
24 | - '3.8'
25 | - '3.9'
26 | - '3.10'
27 | - '3.11'
28 | steps:
29 | - uses: actions/checkout@v3
30 | with:
31 | fetch-depth: 0
32 | - uses: actions/setup-python@v4
33 | with:
34 | python-version: ${{ matrix.python-version }}
35 | - name: Run pytest with coverage
36 | run: |
37 | python -m pip install --upgrade pip
38 | python -m pip install .
39 | python -m pip install --upgrade -r requirements_test.txt
40 | pytest
41 | - uses: codecov/codecov-action@v3
42 | with:
43 | token: ${{ secrets.CODECOV_TOKEN }}
44 | files: coverage.xml
45 | deploy:
46 | needs:
47 | - test
48 | if: startsWith(github.ref, 'refs/tags')
49 | runs-on: ubuntu-latest
50 | steps:
51 | - uses: actions/checkout@v3
52 | with:
53 | fetch-depth: 0
54 | - uses: actions/setup-python@v4
55 | with:
56 | python-version: '3.x'
57 | - name: Build package
58 | run: |
59 | python -m pip install --upgrade pip build
60 | python -m build
61 | - uses: pypa/gh-action-pypi-publish@release/v1
62 | with:
63 | user: __token__
64 | password: ${{ secrets.PYPI_API_TOKEN }}
65 |
--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
1 | name: docs
2 | on:
3 | push:
4 | branches:
5 | - main
6 | workflow_dispatch:
7 | jobs:
8 | docs:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v3
12 | with:
13 | fetch-depth: 0
14 | - uses: actions/setup-python@v4
15 | with:
16 | python-version: '3.x'
17 | - name: Build documentation
18 | run: |
19 | python -m pip install --upgrade pip
20 | python -m pip install .
21 | python -m pip install --upgrade -r requirements_docs.txt
22 | cd docs && rm -rf build && make html && cd ..
23 | - uses: peaceiris/actions-gh-pages@v3
24 | with:
25 | github_token: ${{ secrets.GITHUB_TOKEN }}
26 | publish_dir: docs/build/html
27 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.vscode/
2 | /data/
3 | /readimc/_version.py
4 |
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 | cover/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | .pybuilder/
80 | target/
81 |
82 | # Jupyter Notebook
83 | .ipynb_checkpoints
84 |
85 | # IPython
86 | profile_default/
87 | ipython_config.py
88 |
89 | # pyenv
90 | # For a library or package, you might want to ignore these files since the code is
91 | # intended to run in multiple environments; otherwise, check them in:
92 | # .python-version
93 |
94 | # pipenv
95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
98 | # install all needed dependencies.
99 | #Pipfile.lock
100 |
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 |
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 |
108 | # SageMath parsed files
109 | *.sage.py
110 |
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 |
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 |
124 | # Rope project settings
125 | .ropeproject
126 |
127 | # mkdocs documentation
128 | /site
129 |
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 |
135 | # Pyre type checker
136 | .pyre/
137 |
138 | # pytype static type analyzer
139 | .pytype/
140 |
141 | # Cython debug symbols
142 | cython_debug/
143 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | exclude: ^(\.vscode/.*|docs/source/conf.py)$
2 | repos:
3 | - repo: https://github.com/pre-commit/pre-commit-hooks
4 | rev: v4.4.0
5 | hooks:
6 | - id: check-added-large-files
7 | - id: check-case-conflict
8 | - id: check-docstring-first
9 | - id: check-executables-have-shebangs
10 | - id: check-merge-conflict
11 | - id: check-shebang-scripts-are-executable
12 | - id: check-toml
13 | - id: check-yaml
14 | - id: debug-statements
15 | - id: end-of-file-fixer
16 | - id: requirements-txt-fixer
17 | - id: trailing-whitespace
18 | - repo: https://github.com/astral-sh/ruff-pre-commit
19 | rev: v0.0.282
20 | hooks:
21 | - id: ruff
22 | args: [--fix, --exit-non-zero-on-fix]
23 | - repo: https://github.com/psf/black
24 | rev: '23.7.0'
25 | hooks:
26 | - id: black
27 | - repo: https://github.com/pre-commit/mirrors-mypy
28 | rev: v1.4.1
29 | hooks:
30 | - id: mypy
31 | additional_dependencies: [types-requests, types-PyYAML]
32 | ci:
33 | autoupdate_branch: develop
34 |
--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
1 | # Authors
2 |
3 | The `readimc` package was developed by [Jonas Windhager](mailto:jonas@windhager.io) based on existing functionality implemented in [imctools](https://github.com/BodenmillerGroup/imctools), which was originally created by Vito Zanotelli and Anton Rau.
4 |
5 | It was maintained by Jonas Windhager until February 2023 and is currently maintained by [Milad Adibi](mailto:milad.adibi@uzh.ch).
6 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to this project will be documented in this file.
4 |
5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7 |
8 | ## [0.8.0] - 2024-09-06
9 |
10 | Added the option to return raw data for read_slide, read_panorama, read_before_ablation_image, read_after_ablation_image functions.
11 |
12 | Fixed a bug where the reading of ablation image functions was returning an extra byte at the end of the DotNet Binary Serialization Record.
13 |
14 | ## [0.7.0] - 2023-08-11
15 |
16 | Implemented checks for overlapping raw data blocks in MCD file metadata [#6](https://github.com/BodenmillerGroup/readimc/issues/6)
17 |
18 | Implemented lenient extraction of corrupted imaging data [#19](https://github.com/BodenmillerGroup/readimc/pull/19)
19 |
20 | ## [0.6.2] - 2023-01-31
21 |
22 | Maintenance release
23 |
24 | Made modules public
25 |
26 | Renamed `MCDFile.metadata` to `MCDFile.schema_xml`
27 |
28 | Toolchain (black, flake8, isort, mypy, pre-commit)
29 |
30 | Updated GitHub Actions workflows
31 |
32 | Small bugfixes and improvements
33 |
34 | Python 3.11 compatibility
35 |
36 | ## [0.6.1] - 2022-01-28
37 |
38 | Rename `MCDXMLParser` to `MCDParser`
39 |
40 | Refactor and simplify `MCDParser` usage
41 |
42 | BREAKING CHANGES in `MCDFile`:
43 | - Renamed `MCDFile.metadata_xml_str` to `MCDFile.metadata`
44 | - Removed `MCDFile.metadata_xml` and `MCDFile.metadata_xmlns` (use `MCDParser` instead)
45 |
46 | ## [0.6.0] - 2022-01-28
47 |
48 | Expose `MCDXMLParser`
49 |
50 | ## [0.5.0] - 2021-12-02
51 |
52 | Refactored metadata accessors
53 |
54 | Refactored accessors for ROI points/coordinates
55 |
56 | Added link between acquisitions and associated panoramas
57 |
58 | Renamed `IMCMcdFile` and `IMCTxtFile` to `MCDFile` and `TXTFile`
59 |
60 | ## [0.4.2] - 2021-11-01
61 |
62 | Use pandas for reading TXT files (better performance)
63 |
64 | ## [0.4.1] - 2021-11-01
65 |
66 | Added support for Python 3.10
67 |
68 | ## [0.4.0] - 2021-10-19
69 |
70 | Added support for older versions of the Fluidigm software
71 |
72 | Use heuristics for determining acquisition start position
73 |
74 | Add offline unit tests for data from (Damond et al., 2019)
75 |
76 | Fix a numerical bug in determining panorama image dimensions
77 |
78 | ## [0.3.1] - 2021-10-11
79 |
80 | Renamed `IMCMCDFile` and `IMCTXTFile` to `IMCMcdFile` and `IMCTxtFile`, respectively
81 |
82 | ## [0.3.0] - 2021-10-11
83 |
84 | Retain meta-information after closing a file
85 |
86 | Pre-compile regular expressions for faster parsing
87 |
88 | Separately store and expose channel metals & masses; change channel name format from
89 | `f"{metal}({mass})"` to `f"{metal}{mass}"` for backwards compatibility with imctools
90 |
91 | ## [0.2.0] - 2021-10-09
92 |
93 | Use dataclasses instead of NamedTuples
94 |
95 | Renamed TXTFile and MCDFile to IMCTXTFile and IMCMCDFile, respectively
96 |
97 | IMCTXTFile and IMCMCDFile now implement a shared IMCFileBase interface
98 |
99 | IMCTXTFile and Acquisition now implement a shared AcquisitionBase interface
100 |
101 | ## [0.1.2] - 2021-10-09
102 |
103 | Explicit acquisition image reconstruction based on pixel indices
104 |
105 | ## [0.1.1] - 2021-10-09
106 |
107 | Minor documentation changes
108 |
109 | ## [0.1.0] - 2021-10-09
110 |
111 | Initial release
112 | [0.7.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.2...v0.7.0
113 | [0.6.2]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.1...v0.6.2
114 | [0.6.1]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.0...v0.6.1
115 | [0.6.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.5.0...v0.6.0
116 | [0.5.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.4.2...v0.5.0
117 | [0.4.2]: https://github.com/BodenmillerGroup/readimc/compare/v0.4.1...v0.4.2
118 | [0.4.1]: https://github.com/BodenmillerGroup/readimc/compare/v0.4.0...v0.4.1
119 | [0.4.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.3.1...v0.4.0
120 | [0.3.1]: https://github.com/BodenmillerGroup/readimc/compare/v0.3.0...v0.3.1
121 | [0.3.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.2.0...v0.3.0
122 | [0.2.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.1.2...v0.2.0
123 | [0.1.2]: https://github.com/BodenmillerGroup/readimc/compare/v0.1.1...v0.1.2
124 | [0.1.1]: https://github.com/BodenmillerGroup/readimc/compare/v0.1.0...v0.1.1
125 | [0.1.0]: https://github.com/BodenmillerGroup/readimc/releases/tag/v0.1.0
126 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | Pull requests are welcome. Please make sure to update tests and documentation as
4 | appropriate.
5 |
6 | For major changes, please open an issue first to discuss what you would like to change.
7 |
--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
1 | # Contributors
2 |
3 | Gonzalo Peña-Castellanos [@goanpeca](https://github.com/goanpeca)
4 | - maintenance of the `readimc` conda-forge recipe
5 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 University of Zurich
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # readimc
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | Python package for reading imaging mass cytometry (IMC) files
13 |
14 | Documentation is available at https://bodenmillergroup.github.io/readimc
15 |
16 | ## Citation
17 |
18 | Please cite the following paper when using `readimc` in your work:
19 |
20 | > Windhager, J., Zanotelli, V.R.T., Schulz, D. et al. An end-to-end workflow for multiplexed image processing and analysis. Nat Protoc (2023). https://doi.org/10.1038/s41596-023-00881-0
21 |
22 | @article{Windhager2023,
23 | author = {Windhager, Jonas and Zanotelli, Vito R.T. and Schulz, Daniel and Meyer, Lasse and Daniel, Michelle and Bodenmiller, Bernd and Eling, Nils},
24 | title = {An end-to-end workflow for multiplexed image processing and analysis},
25 | year = {2023},
26 | doi = {10.1038/s41596-023-00881-0},
27 | URL = {https://www.nature.com/articles/s41596-023-00881-0},
28 | journal = {Nature Protocols}
29 | }
30 |
--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | from pathlib import Path
3 | from typing import Generator
4 |
5 | import pytest
6 | import requests
7 |
8 | from readimc import MCDFile, TXTFile
9 |
10 | _imc_test_data_asset_url = (
11 | "https://github.com/BodenmillerGroup/TestData"
12 | "/releases/download/v1.0.7/210308_ImcTestData_raw.tar.gz"
13 | )
14 | _imc_test_data_raw_dir = "datasets/210308_ImcTestData/raw"
15 | _imc_test_data_mcd_file = "20210305_NE_mockData1/20210305_NE_mockData1.mcd"
16 | _imc_test_data_txt_file = "20210305_NE_mockData1/20210305_NE_mockData1_ROI_001_1.txt"
17 |
18 |
19 | def _download_and_extract_asset(tmp_dir_path: Path, asset_url: str):
20 | asset_file_path = tmp_dir_path / "asset.tar.gz"
21 | response = requests.get(asset_url, stream=True)
22 | if response.status_code == 200:
23 | with asset_file_path.open(mode="wb") as f:
24 | f.write(response.raw.read())
25 | shutil.unpack_archive(asset_file_path, tmp_dir_path)
26 |
27 |
28 | @pytest.fixture(scope="session")
29 | def imc_test_data_raw_path(tmp_path_factory) -> Generator[Path, None, None]:
30 | tmp_dir_path = tmp_path_factory.mktemp("raw")
31 | _download_and_extract_asset(tmp_dir_path, _imc_test_data_asset_url)
32 | yield tmp_dir_path / Path(_imc_test_data_raw_dir)
33 | shutil.rmtree(tmp_dir_path)
34 |
35 |
36 | @pytest.fixture
37 | def imc_test_data_mcd_file(
38 | imc_test_data_raw_path: Path,
39 | ) -> Generator[MCDFile, None, None]:
40 | path = imc_test_data_raw_path / Path(_imc_test_data_mcd_file)
41 | with MCDFile(path) as f:
42 | yield f
43 |
44 |
45 | @pytest.fixture
46 | def imc_test_data_txt_file(
47 | imc_test_data_raw_path: Path,
48 | ) -> Generator[TXTFile, None, None]:
49 | path = imc_test_data_raw_path / Path(_imc_test_data_txt_file)
50 | with TXTFile(path) as f:
51 | yield f
52 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.https://www.sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/source/_static/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/readimc/5eb3712cbe51ad8e2c7f1da7f309956b7bca007a/docs/source/_static/.gitkeep
--------------------------------------------------------------------------------
/docs/source/authors.md:
--------------------------------------------------------------------------------
1 | ```{include} ../../AUTHORS.md
2 | ```
3 |
--------------------------------------------------------------------------------
/docs/source/changelog.md:
--------------------------------------------------------------------------------
1 | ```{include} ../../CHANGELOG.md
2 | ```
3 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('../..'))
16 |
17 |
18 | # -- Project information -----------------------------------------------------
19 |
20 | project = "readimc"
21 | copyright = "2021, University of Zurich"
22 | author = "Jonas Windhager, Milad Adibi"
23 |
24 |
25 | # -- General configuration ---------------------------------------------------
26 |
27 | # Add any Sphinx extension module names here, as strings. They can be
28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
29 | # ones.
30 | extensions = [
31 | "autoapi.extension",
32 | "myst_parser",
33 | "sphinx.ext.autodoc",
34 | ]
35 |
36 | # Add any paths that contain templates here, relative to this directory.
37 | templates_path = ["_templates"]
38 |
39 | # List of patterns, relative to source directory, that match files and
40 | # directories to ignore when looking for source files.
41 | # This pattern also affects html_static_path and html_extra_path.
42 | exclude_patterns = []
43 |
44 |
45 | # -- Options for HTML output -------------------------------------------------
46 |
47 | # The theme to use for HTML and HTML Help pages. See the documentation for
48 | # a list of builtin themes.
49 | #
50 | html_theme = "sphinx_rtd_theme"
51 |
52 | # Add any paths that contain custom static files (such as style sheets) here,
53 | # relative to this directory. They are copied after the builtin static files,
54 | # so a file named "default.css" will overwrite the builtin "default.css".
55 | html_static_path = ["_static"]
56 |
57 | autoapi_type = "python"
58 | autoapi_dirs = ["../../readimc"]
59 | autoapi_add_toctree_entry = False
60 | autoapi_python_class_content = "init"
61 | autoapi_options = [
62 | "show-module-summary",
63 | "imported-members",
64 | "members",
65 | "undoc-members",
66 | "inherited-members",
67 | "show-inheritance",
68 | ]
69 | autodoc_typehints = "description"
70 |
--------------------------------------------------------------------------------
/docs/source/contributing.md:
--------------------------------------------------------------------------------
1 | ```{include} ../../CONTRIBUTING.md
2 | ```
3 |
--------------------------------------------------------------------------------
/docs/source/contributors.md:
--------------------------------------------------------------------------------
1 | ```{include} ../../CONTRIBUTORS.md
2 | ```
3 |
--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
1 | # Welcome
2 |
3 | `readimc` is a Python package for reading imaging mass cytometry (IMC) files
4 |
5 | ## Supported file types
6 |
7 | - IMC .mcd files
8 | - IMC .txt files
9 |
10 | ## Extractable image data
11 |
12 | - Slide images (user-uploaded)
13 | - Panorama images (single-channel, color)
14 | - IMC acquisitions (multi-channel, grayscale)
15 | - Before/after-ablation images (single-channel, color)
16 |
17 | ```{toctree}
18 | :hidden:
19 | :caption: Contents
20 |
21 | install
22 | usage
23 | license
24 | changelog
25 | ```
26 |
27 | ```{toctree}
28 | :hidden:
29 | :caption: Development
30 |
31 | authors
32 | contributors
33 | contributing
34 | ```
35 |
36 | ```{toctree}
37 | :hidden:
38 | :caption: API documentation
39 |
40 | autoapi/readimc/index
41 | ```
42 |
--------------------------------------------------------------------------------
/docs/source/install.md:
--------------------------------------------------------------------------------
1 | # Installation
2 |
3 | You can install `readimc` from [PyPI](https://pypi.org) using
4 | [pip](https://pypi.org/project/pip):
5 |
6 | pip install readimc
7 |
8 | Alternatively, you can install `readimc` from [conda-forge](https://conda-forge.org) using [conda](https://conda.io):
9 |
10 | conda install -c conda-forge readimc
11 |
--------------------------------------------------------------------------------
/docs/source/license.md:
--------------------------------------------------------------------------------
1 | # License
2 |
3 | ```{include} ../../LICENSE
4 | ```
5 |
--------------------------------------------------------------------------------
/docs/source/usage.md:
--------------------------------------------------------------------------------
1 | # Usage
2 |
3 | The `readimc` package exports two classes for reading IMC .mcd and IMC .txt files:
4 |
5 | ```python
6 | from readimc import MCDFile, TXTFile
7 | ```
8 |
9 | ## Loading IMC .txt files
10 |
11 | IMC .txt files can be loaded as follows:
12 |
13 | ```python
14 | with TXTFile("/path/to/file.txt") as f:
15 | print(f.channel_names) # metals
16 | print(f.channel_labels) # targets
17 | ```
18 |
19 | ### Reading IMC acquisitions
20 |
21 | The acquisition contained in an IMC .txt file can be read as follows:
22 |
23 | ```python
24 | with TXTFile("/path/to/file.txt") as f:
25 | img = f.read_acquisition() # numpy array, shape: (c, y, x), dtype: float32
26 | ```
27 |
28 | ```{note}
29 | IMC .txt files only contain a single IMC acquisition.
30 | ```
31 |
32 | ## Loading IMC .mcd files
33 |
34 | IMC .mcd files can be loaded as follows:
35 |
36 | ```python
37 | with MCDFile("/path/to/file.mcd") as f:
38 | num_slides = len(f.slides)
39 | ```
40 |
41 | ```{note}
42 | Although uncommon, a single IMC .mcd file can contain multiple slides. Each slide can
43 | have zero or more panorama images and zero or more IMC acquisitions.
44 | ```
45 |
46 | ### Extracting metadata
47 |
48 | Basic metadata on slides, panoramas and acquisitions can be accessed through properties:
49 |
50 | ```python
51 | with MCDFile("/path/to/file.mcd") as f:
52 | # first slide
53 | slide = f.slides[0]
54 | print(
55 | slide.id,
56 | slide.description,
57 | slide.width_um,
58 | slide.height_um,
59 | )
60 | # first panorama of first slide
61 | panorama = slide.panoramas[0]
62 | print(
63 | panorama.id,
64 | panorama.description,
65 | panorama.width_um,
66 | panorama.height_um,
67 | )
68 | # first acquisition of first slide
69 | acquisition = slide.acquisitions[0]
70 | print(
71 | acquisition.id,
72 | acquisition.description,
73 | acquisition.width_um,
74 | acquisition.height_um,
75 | acquisition.channel_names, # metals
76 | acquisition.channel_labels, # targets
77 | )
78 | ```
79 |
80 | For a full list of available properties, please consult the API documentation of the
81 | `Slide`, `Panorama` and `Acquisition` classes (additional metadata is available through
82 | their `metadata` properties). The complete metadata embedded in IMC .mcd files is
83 | accessible through `MCDFile.schema_xml` (in proprietary XML format).
84 |
85 | ### Reading slide images
86 |
87 | IMC .mcd files can store slide images uploaded by the user (e.g., photographs) or
88 | acquired by the instrument. For
89 | [supported image file formats](https://imageio.readthedocs.io/en/stable/formats.html),
90 | these images can be read as follows:
91 |
92 | ```python
93 | with MCDFile("/path/to/file.mcd") as f:
94 | slide = f.slides[0] # first slide
95 | img = f.read_slide(slide) # numpy array or None
96 | ```
97 |
98 | ### Reading panorama images
99 |
100 | IMC .mcd files can contain zero or more panorama images acquired by the instrument,
101 | which can be read as follows:
102 |
103 | ```python
104 | with MCDFile("/path/to/file.mcd") as f:
105 | panorama = f.slides[0].panoramas[0] # first panorama of first slide
106 | img = f.read_panorama(panorama) # numpy array
107 | ```
108 |
109 | ```{note}
110 | `Slide.panoramas` only exposes panoramas for which panorama images are available. The
111 | raw metadata accessible through `MCDFile.schema_xml` may contain additional panorama
112 | entries of type `"Default"` that represent "virtual" panoramas and do not correspond
113 | to actual images.
114 | ```
115 |
116 | ### Reading IMC acquisitions
117 |
118 | IMC .mcd files can contain zero or more IMC acquisitions, which can be read as follows:
119 |
120 | ```python
121 | with MCDFile("/path/to/file.mcd") as f:
122 | acquisition = f.slides[0].acquisitions[0] # first acquisition of first slide
123 | img = f.read_acquisition(acquisition) # array, shape: (c, y, x), dtype: float32
124 | ```
125 |
126 | ### Reading before/after-ablation images
127 |
128 | The IMC instrument may be configured to acquire an optical image before/after each IMC
129 | acquisition. If available, these before/after-ablation images can be read as follows:
130 |
131 | ```python
132 | with MCDFile("/path/to/file.mcd") as f:
133 | acquisition = f.slides[0].acquisitions[0] # first acquisition of first slide
134 | before_ablation_img = f.read_before_ablation_image(acquisition) # array or None
135 | after_ablation_img = f.read_after_ablation_image(acquisition) # array or None
136 | ```
137 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "wheel", "setuptools_scm[toml]>=6.2"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [tool.pytest.ini_options]
6 | addopts = ["--cov=readimc", "--cov-report=xml:coverage.xml"]
7 | testpaths = ["tests"]
8 |
9 | [tool.ruff]
10 | select = ["E", "F", "I"]
11 |
12 | [tool.setuptools_scm]
13 | write_to = "readimc/_version.py"
14 |
--------------------------------------------------------------------------------
/readimc/__init__.py:
--------------------------------------------------------------------------------
1 | """Python package for reading imaging mass cytometry (IMC) files"""
2 |
3 | from .imc_file import IMCFile
4 | from .mcd_file import MCDFile
5 | from .mcd_parser import MCDParser, MCDParserError
6 | from .txt_file import TXTFile
7 |
8 | __all__ = [
9 | "IMCFile",
10 | "TXTFile",
11 | "MCDFile",
12 | "MCDParser",
13 | "MCDParserError",
14 | ]
15 |
--------------------------------------------------------------------------------
/readimc/data/__init__.py:
--------------------------------------------------------------------------------
1 | """Imaging mass cytometry (IMC) metadata classes"""
2 |
3 | from readimc.data.acquisition import Acquisition, AcquisitionBase
4 | from readimc.data.panorama import Panorama
5 | from readimc.data.slide import Slide
6 |
7 | __all__ = ["Slide", "Panorama", "Acquisition", "AcquisitionBase"]
8 |
--------------------------------------------------------------------------------
/readimc/data/acquisition.py:
--------------------------------------------------------------------------------
1 | import math
2 | from abc import ABC, abstractmethod
3 | from dataclasses import dataclass, field
4 | from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple
5 |
6 | import numpy as np
7 |
8 | if TYPE_CHECKING:
9 | from readimc.data.panorama import Panorama
10 | from readimc.data.slide import Slide
11 |
12 |
13 | class AcquisitionBase(ABC):
14 | """Shared IMC acquisition metadata interface"""
15 |
16 | @property
17 | @abstractmethod
18 | def num_channels(self) -> int:
19 | """Number of channels"""
20 | raise NotImplementedError()
21 |
22 | @property
23 | @abstractmethod
24 | def channel_metals(self) -> Sequence[str]:
25 | """Symbols of metal isotopes (e.g. ``["Ag", "Ir"]``)"""
26 | raise NotImplementedError()
27 |
28 | @property
29 | @abstractmethod
30 | def channel_masses(self) -> Sequence[int]:
31 | """Atomic masses of metal isotopes (e.g. ``[107, 191]``)"""
32 | raise NotImplementedError()
33 |
34 | @property
35 | @abstractmethod
36 | def channel_labels(self) -> Sequence[str]:
37 | """Channel labels (user-provided)"""
38 | raise NotImplementedError()
39 |
40 | @property
41 | def channel_names(self) -> Sequence[str]:
42 | """Unique channel names in the format ``f"{metal}{mass}"`` (e.g.
43 | ``["Ag107", "Ir191"]``)"""
44 | return [
45 | f"{channel_metal}{channel_mass}"
46 | for channel_metal, channel_mass in zip(
47 | self.channel_metals, self.channel_masses
48 | )
49 | ]
50 |
51 |
52 | @dataclass
53 | class Acquisition(AcquisitionBase):
54 | """IMC acquisition metadata"""
55 |
56 | slide: "Slide"
57 | """Parent slide"""
58 |
59 | panorama: Optional["Panorama"]
60 | """Associated panorama"""
61 |
62 | id: int
63 | """Acquisition ID"""
64 |
65 | roi_points_um: Optional[
66 | Tuple[
67 | Tuple[float, float],
68 | Tuple[float, float],
69 | Tuple[float, float],
70 | Tuple[float, float],
71 | ]
72 | ]
73 | """User-provided ROI points, in micrometers
74 |
75 | Order: (top left, top right, bottom right, bottom left)"""
76 |
77 | metadata: Dict[str, str]
78 | """Full acquisition metadata"""
79 |
80 | _num_channels: int
81 | _channel_metals: List[str] = field(default_factory=list)
82 | _channel_masses: List[int] = field(default_factory=list)
83 | _channel_labels: List[str] = field(default_factory=list)
84 |
85 | @property
86 | def description(self) -> Optional[str]:
87 | """User-provided acquisition description"""
88 | return self.metadata.get("Description")
89 |
90 | @property
91 | def width_px(self) -> Optional[int]:
92 | """Acquisition width, in pixels"""
93 | value = self.metadata.get("MaxX")
94 | if value is not None:
95 | return int(value)
96 | return None
97 |
98 | @property
99 | def height_px(self) -> Optional[int]:
100 | """Acquisition height, in pixels"""
101 | value = self.metadata.get("MaxY")
102 | if value is not None:
103 | return int(value)
104 | return None
105 |
106 | @property
107 | def pixel_size_x_um(self) -> Optional[float]:
108 | """Width of a single pixel, in micrometers"""
109 | value = self.metadata.get("AblationDistanceBetweenShotsX")
110 | if value is not None:
111 | return float(value)
112 | return None
113 |
114 | @property
115 | def pixel_size_y_um(self) -> Optional[float]:
116 | """Height of a single pixel, in micrometers"""
117 | value = self.metadata.get("AblationDistanceBetweenShotsY")
118 | if value is not None:
119 | return float(value)
120 | return None
121 |
122 | @property
123 | def width_um(self) -> Optional[float]:
124 | """Acquisition width, in micrometers"""
125 | if self.width_px is not None and self.pixel_size_x_um is not None:
126 | return self.width_px * self.pixel_size_x_um
127 | return None
128 |
129 | @property
130 | def height_um(self) -> Optional[float]:
131 | """Acquisition height, in micrometers"""
132 | if self.height_px is not None and self.pixel_size_y_um is not None:
133 | return self.height_px * self.pixel_size_y_um
134 | return None
135 |
136 | @property
137 | def num_channels(self) -> int:
138 | return self._num_channels
139 |
140 | @property
141 | def channel_metals(self) -> Sequence[str]:
142 | return self._channel_metals
143 |
144 | @property
145 | def channel_masses(self) -> Sequence[int]:
146 | return self._channel_masses
147 |
148 | @property
149 | def channel_labels(self) -> Sequence[str]:
150 | return self._channel_labels
151 |
152 | @property
153 | def roi_coords_um(
154 | self,
155 | ) -> Optional[
156 | Tuple[
157 | Tuple[float, float],
158 | Tuple[float, float],
159 | Tuple[float, float],
160 | Tuple[float, float],
161 | ]
162 | ]:
163 | """ROI stage coordinates, in micrometers
164 |
165 | Order: (top left, top right, bottom right, bottom left)"""
166 | x1_str = self.metadata.get("ROIStartXPosUm")
167 | y1_str = self.metadata.get("ROIStartYPosUm")
168 | x3_str = self.metadata.get("ROIEndXPosUm")
169 | y3_str = self.metadata.get("ROIEndYPosUm")
170 | if (
171 | x1_str != x3_str
172 | and y1_str != y3_str
173 | and x1_str is not None
174 | and y1_str is not None
175 | and x3_str is not None
176 | and y3_str is not None
177 | and self.width_um is not None
178 | and self.height_um is not None
179 | ):
180 | x1, y1 = float(x1_str), float(y1_str)
181 | x3, y3 = float(x3_str), float(y3_str)
182 | # fix Fluidigm bug, where start positions are multiplied by 1000
183 | if abs(x1 / 1000.0 - x3) < abs(x1 - x3):
184 | x1 /= 1000.0
185 | if abs(y1 / 1000.0 - y3) < abs(y1 - y3):
186 | y1 /= 1000.0
187 | # calculate counter-clockwise rotation angle, in radians
188 | rotated_main_diag_angle = np.arctan2(y1 - y3, x1 - x3)
189 | main_diag_angle = np.arctan2(self.height_um, -self.width_um)
190 | angle = rotated_main_diag_angle - main_diag_angle
191 | # calculate missing points (generative approach)
192 | x2, y2 = self.width_um / 2.0, self.height_um / 2.0
193 | x4, y4 = -self.width_um / 2.0, -self.height_um / 2.0
194 | x2, y2 = (
195 | math.cos(angle) * x2 - math.sin(angle) * y2 + (x1 + x3) / 2.0,
196 | math.sin(angle) * x2 + math.cos(angle) * y2 + (y1 + y3) / 2.0,
197 | )
198 | x4, y4 = (
199 | math.cos(angle) * x4 - math.sin(angle) * y4 + (x1 + x3) / 2.0,
200 | math.sin(angle) * x4 + math.cos(angle) * y4 + (y1 + y3) / 2.0,
201 | )
202 | return ((x1, y1), (x2, y2), (x3, y3), (x4, y4))
203 | return None
204 |
--------------------------------------------------------------------------------
/readimc/data/panorama.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass, field
2 | from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
3 |
4 | from readimc.data.acquisition import Acquisition
5 |
6 | if TYPE_CHECKING:
7 | from readimc.data.slide import Slide
8 |
9 |
10 | @dataclass
11 | class Panorama:
12 | """Panorama metadata (only for panoramas with panorama image data)"""
13 |
14 | slide: "Slide"
15 | """Parent slide"""
16 |
17 | id: int
18 | """Panorama ID"""
19 |
20 | metadata: Dict[str, str]
21 | """Full panorama metadata"""
22 |
23 | acquisitions: List[Acquisition] = field(default_factory=list)
24 | """List of acquisitions associated with this panorama"""
25 |
26 | @property
27 | def description(self) -> Optional[str]:
28 | """User-provided panorama description"""
29 | return self.metadata.get("Description")
30 |
31 | @property
32 | def width_um(self) -> Optional[float]:
33 | """Panorama width, in micrometers"""
34 | if self.points_um is not None:
35 | (x1, y1), (x2, y2), (x3, y3), (x4, y4) = self.points_um
36 | w1 = ((x1 - x2) ** 2.0 + (y1 - y2) ** 2.0) ** 0.5
37 | w2 = ((x3 - x4) ** 2.0 + (y3 - y4) ** 2.0) ** 0.5
38 | if abs(w1 - w2) > 0.001:
39 | raise ValueError(f"Panorama {self.id}: inconsistent image widths")
40 | return (w1 + w2) / 2.0
41 | return None
42 |
43 | @property
44 | def height_um(self) -> Optional[float]:
45 | """Panorama height, in micrometers"""
46 | if self.points_um is not None:
47 | (x1, y1), (x2, y2), (x3, y3), (x4, y4) = self.points_um
48 | h1 = ((x1 - x4) ** 2.0 + (y1 - y4) ** 2.0) ** 0.5
49 | h2 = ((x2 - x3) ** 2.0 + (y2 - y3) ** 2.0) ** 0.5
50 | if abs(h1 - h2) > 0.001:
51 | raise ValueError(f"Panorama {self.id}: inconsistent image heights")
52 | return (h1 + h2) / 2.0
53 | return None
54 |
55 | @property
56 | def points_um(
57 | self,
58 | ) -> Optional[
59 | Tuple[
60 | Tuple[float, float],
61 | Tuple[float, float],
62 | Tuple[float, float],
63 | Tuple[float, float],
64 | ]
65 | ]:
66 | """User-provided ROI points, in micrometers
67 |
68 | Order: (top left, top right, bottom right, bottom left)"""
69 | x1_str = self.metadata.get("SlideX1PosUm")
70 | y1_str = self.metadata.get("SlideY1PosUm")
71 | x2_str = self.metadata.get("SlideX2PosUm")
72 | y2_str = self.metadata.get("SlideY2PosUm")
73 | x3_str = self.metadata.get("SlideX3PosUm")
74 | y3_str = self.metadata.get("SlideY3PosUm")
75 | x4_str = self.metadata.get("SlideX4PosUm")
76 | y4_str = self.metadata.get("SlideY4PosUm")
77 | if (
78 | x1_str is not None
79 | and y1_str is not None
80 | and x2_str is not None
81 | and y2_str is not None
82 | and x3_str is not None
83 | and y3_str is not None
84 | and x4_str is not None
85 | and y4_str is not None
86 | ):
87 | return (
88 | (float(x1_str), float(y1_str)),
89 | (float(x2_str), float(y2_str)),
90 | (float(x3_str), float(y3_str)),
91 | (float(x4_str), float(y4_str)),
92 | )
93 | return None
94 |
--------------------------------------------------------------------------------
/readimc/data/slide.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass, field
2 | from typing import Dict, List, Optional
3 |
4 | from readimc.data.acquisition import Acquisition
5 | from readimc.data.panorama import Panorama
6 |
7 |
8 | @dataclass
9 | class Slide:
10 | """Slide metadata"""
11 |
12 | id: int
13 | """Slide ID"""
14 |
15 | metadata: Dict[str, str]
16 | """Full slide metadata"""
17 |
18 | panoramas: List[Panorama] = field(default_factory=list)
19 | """List of panoramas associated with this slide"""
20 |
21 | acquisitions: List[Acquisition] = field(default_factory=list)
22 | """List of acquisitions associated with this slide"""
23 |
24 | @property
25 | def description(self) -> Optional[str]:
26 | """User-provided slide description"""
27 | return self.metadata.get("Description")
28 |
29 | @property
30 | def width_um(self) -> Optional[float]:
31 | """Slide width, in micrometers"""
32 | value = self.metadata.get("WidthUm")
33 | if value is not None:
34 | return float(value)
35 | return None
36 |
37 | @property
38 | def height_um(self) -> Optional[float]:
39 | """Slide height, in micrometers"""
40 | value = self.metadata.get("HeightUm")
41 | if value is not None:
42 | return float(value)
43 | return None
44 |
--------------------------------------------------------------------------------
/readimc/imc_file.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from os import PathLike
3 | from pathlib import Path
4 | from typing import Optional, Union
5 |
6 | import numpy as np
7 |
8 | from .data import Acquisition
9 |
10 |
11 | class IMCFile(ABC):
12 | """Shared IMC file interface"""
13 |
14 | def __init__(self, path: Union[str, PathLike]) -> None:
15 | super().__init__()
16 | self._path = Path(path)
17 |
18 | @property
19 | def path(self) -> Path:
20 | """Path to the IMC file"""
21 | return self._path
22 |
23 | @abstractmethod
24 | def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndarray:
25 | """Reads IMC acquisition data as numpy array.
26 |
27 | :param acquisition: the acquisition to read
28 | :return: the acquisition data as 32-bit floating point array,
29 | shape: (c, y, x)
30 | """
31 | raise NotImplementedError()
32 |
--------------------------------------------------------------------------------
/readimc/mcd_file.py:
--------------------------------------------------------------------------------
1 | import mmap
2 | from os import PathLike
3 | from typing import BinaryIO, List, Optional, Sequence, Union
4 | from warnings import warn
5 |
6 | import numpy as np
7 | from imageio.v2 import imread
8 |
9 | from .data import Acquisition, Panorama, Slide
10 | from .imc_file import IMCFile
11 | from .mcd_parser import MCDParser, MCDParserError
12 |
13 |
14 | class MCDFile(IMCFile):
15 | def __init__(self, path: Union[str, PathLike]) -> None:
16 | """A class for reading IMC .mcd files
17 |
18 | :param path: path to the IMC .mcd file
19 | """
20 | super(MCDFile, self).__init__(path)
21 | self._fh: Optional[BinaryIO] = None
22 | self._schema_xml: Optional[str] = None
23 | self._slides: Optional[List[Slide]] = None
24 |
25 | @property
26 | def schema_xml(self) -> str:
27 | """Full metadata in proprietary XML format"""
28 | if self._schema_xml is None:
29 | raise IOError(f"MCD file '{self.path.name}' has not been opened")
30 | return self._schema_xml
31 |
32 | @property
33 | def metadata(self) -> str:
34 | """Legacy accessor for `schema_xml`"""
35 | warn(
36 | "`MCDFile.metadata` will be removed in future readimc releases; "
37 | "use `MCDFile.schema_xml` instead"
38 | )
39 | return self.schema_xml
40 |
41 | @property
42 | def slides(self) -> Sequence[Slide]:
43 | """Metadata on slides contained in this IMC .mcd file"""
44 | if self._slides is None:
45 | raise IOError(f"MCD file '{self.path.name}' has not been opened")
46 | return self._slides
47 |
48 | def __enter__(self) -> "MCDFile":
49 | self.open()
50 | return self
51 |
52 | def __exit__(self, exc_type, exc_value, traceback) -> None:
53 | self.close()
54 |
55 | def open(self) -> None:
56 | """Opens the IMC .mcd file for reading.
57 |
58 | It is good practice to use context managers whenever possible:
59 |
60 | .. code-block:: python
61 |
62 | with MCDFile("/path/to/file.mcd") as f:
63 | pass
64 |
65 | """
66 | if self._fh is not None:
67 | self._fh.close()
68 | self._fh = open(self._path, mode="rb")
69 | self._schema_xml = self._read_schema_xml()
70 | try:
71 | self._slides = MCDParser(self._schema_xml).parse_slides()
72 | except MCDParserError as e:
73 | raise IOError(
74 | f"MCD file '{self.path.name}' corrupted: "
75 | "error parsing slide information from MCD-XML"
76 | ) from e
77 |
78 | def close(self) -> None:
79 | """Closes the IMC .mcd file.
80 |
81 | It is good practice to use context managers whenever possible:
82 |
83 | .. code-block:: python
84 |
85 | with MCDFile("/path/to/file.mcd") as f:
86 | pass
87 |
88 | """
89 | if self._fh is not None:
90 | self._fh.close()
91 | self._fh = None
92 |
93 | def read_acquisition(
94 | self, acquisition: Optional[Acquisition] = None, strict: bool = True
95 | ) -> np.ndarray:
96 | """Reads IMC acquisition data as numpy array.
97 |
98 | :param acquisition: the acquisition to read
99 | :param strict: set this parameter to False to try to recover corrupted data
100 | :return: the acquisition data as 32-bit floating point array,
101 | shape: (c, y, x)
102 | """
103 | if acquisition is None:
104 | raise ValueError("acquisition")
105 | if self._fh is None:
106 | raise IOError(f"MCD file '{self.path.name}' has not been opened")
107 | try:
108 | data_start_offset = int(acquisition.metadata["DataStartOffset"])
109 | data_end_offset = int(acquisition.metadata["DataEndOffset"])
110 | value_bytes = int(acquisition.metadata["ValueBytes"])
111 | except (KeyError, ValueError) as e:
112 | raise IOError(
113 | f"MCD file '{self.path.name}' corrupted: "
114 | "cannot locate acquisition image data"
115 | ) from e
116 | if data_start_offset >= data_end_offset:
117 | raise IOError(
118 | f"MCD file '{self.path.name}' corrupted: "
119 | "invalid acquisition image data offsets"
120 | )
121 | if value_bytes <= 0:
122 | raise IOError("MCD file corrupted: invalid byte size")
123 | num_channels = acquisition.num_channels
124 | data_size = data_end_offset - data_start_offset
125 | bytes_per_pixel = (num_channels + 3) * value_bytes
126 | if data_size % bytes_per_pixel != 0:
127 | data_size += 1
128 | if data_size % bytes_per_pixel != 0:
129 | if strict:
130 | raise IOError(
131 | f"MCD file '{self.path.name}' corrupted: "
132 | "invalid acquisition image data size"
133 | )
134 | warn(
135 | f"MCD file '{self.path.name}' corrupted: "
136 | "invalid acquisition image data size"
137 | )
138 | num_pixels = data_size // bytes_per_pixel
139 | self._fh.seek(0)
140 | data = np.memmap(
141 | self._fh,
142 | dtype=np.float32,
143 | mode="r",
144 | offset=data_start_offset,
145 | shape=(num_pixels, num_channels + 3),
146 | )
147 | xs = data[:, 0].astype(int)
148 | ys = data[:, 1].astype(int)
149 | try:
150 | width = int(acquisition.metadata["MaxX"])
151 | height = int(acquisition.metadata["MaxY"])
152 | if width <= np.amax(xs) or height <= np.amax(ys):
153 | raise ValueError(
154 | "data shape is incompatible with acquisition image dimensions"
155 | )
156 | except (KeyError, ValueError):
157 | warn(
158 | f"MCD file '{self.path.name}' corrupted: "
159 | "cannot read acquisition image dimensions; recovering from data shape"
160 | )
161 | width = np.amax(xs) + 1
162 | height = np.amax(ys) + 1
163 | if width * height != data.shape[0]:
164 | if strict:
165 | raise IOError(
166 | f"MCD file '{self.path.name}' corrupted: "
167 | "inconsistent acquisition image data size"
168 | )
169 | warn(
170 | f"MCD file '{self.path.name}' corrupted: "
171 | "inconsistent acquisition image data size"
172 | )
173 | img = np.zeros((num_channels, height, width), dtype=np.float32)
174 | img[:, ys, xs] = np.transpose(data[:, 3:])
175 | return img
176 |
177 | def read_slide(
178 | self, slide: Slide, raw: bool = False
179 | ) -> Union[np.ndarray, bytes, None]:
180 | """Reads and decodes a slide image as numpy array using the ``imageio``
181 | package.
182 |
183 | .. note::
184 | Slide images are stored as binary data within the IMC .mcd file in
185 | an arbitrary encoding. The ``imageio`` package can decode most
186 | commonly used image file formats, but may fail for more obscure,
187 | in which case an ``IOException`` is raised.
188 |
189 | :param slide: the slide to read
190 | :return: the slide image, or ``None`` if no image is available for the
191 | specified slide
192 | """
193 | try:
194 | data_start_offset = int(slide.metadata["ImageStartOffset"])
195 | data_end_offset = int(slide.metadata["ImageEndOffset"])
196 | except (KeyError, ValueError) as e:
197 | raise IOError(
198 | f"MCD file '{self.path.name}' corrupted: "
199 | f"cannot locate image data for slide {slide.id}"
200 | ) from e
201 | if data_start_offset == data_end_offset == 0:
202 | return None
203 | data_start_offset += 161
204 | data_end_offset -= 1
205 | if data_start_offset >= data_end_offset:
206 | raise IOError(
207 | f"MCD file '{self.path.name}' corrupted: "
208 | f"invalid image data offsets for slide {slide.id}"
209 | )
210 | try:
211 | return self._read_image(
212 | data_start_offset, data_end_offset - data_start_offset, raw
213 | )
214 | except Exception as e:
215 | raise IOError(
216 | f"MCD file '{self.path.name}' corrupted: "
217 | f"cannot read image for slide {slide.id}"
218 | ) from e
219 |
220 | def read_panorama(
221 | self, panorama: Panorama, raw: bool = False
222 | ) -> Union[np.ndarray, bytes, None]:
223 | """Reads and decodes a panorama image as numpy array using the
224 | ``imageio`` package.
225 |
226 | :param panorama: the panorama to read
227 | :return: the panorama image as numpy array
228 | """
229 | try:
230 | data_start_offset = int(panorama.metadata["ImageStartOffset"])
231 | data_end_offset = int(panorama.metadata["ImageEndOffset"])
232 | except (KeyError, ValueError) as e:
233 | raise IOError(
234 | f"MCD file '{self.path.name}' corrupted: "
235 | f"cannot locate image data for panorama {panorama.id}"
236 | ) from e
237 | if data_start_offset == data_end_offset == 0:
238 | return None
239 | data_start_offset += 161
240 | data_end_offset -= 1
241 | if data_start_offset >= data_end_offset:
242 | raise IOError(
243 | f"MCD file '{self.path.name}' corrupted: "
244 | f"invalid image data offsets for panorama {panorama.id}"
245 | )
246 | try:
247 | return self._read_image(
248 | data_start_offset, data_end_offset - data_start_offset, raw
249 | )
250 | except Exception as e:
251 | raise IOError(
252 | f"MCD file '{self.path.name}' corrupted: "
253 | f"cannot read image for panorama {panorama.id}"
254 | ) from e
255 |
256 | def read_before_ablation_image(
257 | self, acquisition: Acquisition, raw: bool = False
258 | ) -> Union[np.ndarray, bytes, None]:
259 | """Reads and decodes a before-ablation image as numpy array using the
260 | ``imageio`` package.
261 |
262 | :param acquisition: the acquisition for which to read the
263 | before-ablation image
264 | :return: the before-ablation image as numpy array, or ``None`` if no
265 | before-ablation image is available for the specified acquisition
266 | """
267 | try:
268 | data_start_offset = int(
269 | acquisition.metadata["BeforeAblationImageStartOffset"]
270 | )
271 | data_end_offset = int(acquisition.metadata["BeforeAblationImageEndOffset"])
272 | except (KeyError, ValueError) as e:
273 | raise IOError(
274 | f"MCD file '{self.path.name}' corrupted: "
275 | f"cannot locate before-ablation image data "
276 | f"for acquisition {acquisition.id}"
277 | ) from e
278 | if data_start_offset == data_end_offset == 0:
279 | return None
280 | data_start_offset += 161
281 | data_end_offset -= 1
282 | if data_start_offset >= data_end_offset:
283 | raise IOError(
284 | f"MCD file '{self.path.name}' corrupted: "
285 | f"invalid before-ablation image data offsets "
286 | f"for acquisition {acquisition.id}"
287 | )
288 | try:
289 | return self._read_image(
290 | data_start_offset, data_end_offset - data_start_offset, raw
291 | )
292 | except Exception as e:
293 | raise IOError(
294 | f"MCD file '{self.path.name}' corrupted: "
295 | f"cannot read before-ablation image "
296 | f"for acquisition {acquisition.id}"
297 | ) from e
298 |
299 | def read_after_ablation_image(
300 | self, acquisition: Acquisition, raw: bool = False
301 | ) -> Union[np.ndarray, bytes, None]:
302 | """Reads and decodes a after-ablation image as numpy array using the
303 | ``imageio`` package.
304 |
305 | :param acquisition: the acquisition for which to read the
306 | after-ablation image
307 | :return: the after-ablation image as numpy array, or ``None`` if no
308 | after-ablation image is available for the specified acquisition
309 | """
310 | try:
311 | data_start_offset = int(
312 | acquisition.metadata["AfterAblationImageStartOffset"]
313 | )
314 | data_end_offset = int(acquisition.metadata["AfterAblationImageEndOffset"])
315 | except (KeyError, ValueError) as e:
316 | raise IOError(
317 | f"MCD file '{self.path.name}' corrupted: "
318 | f"cannot locate after-ablation image data "
319 | f"for acquisition {acquisition.id}"
320 | ) from e
321 | if data_start_offset == data_end_offset == 0:
322 | return None
323 | data_start_offset += 161
324 | data_end_offset -= 1
325 | if data_start_offset >= data_end_offset:
326 | raise IOError(
327 | f"MCD file '{self.path.name}' corrupted: "
328 | f"invalid after-ablation image data offsets "
329 | f"for acquisition {acquisition.id}"
330 | )
331 | try:
332 | return self._read_image(
333 | data_start_offset, data_end_offset - data_start_offset, raw
334 | )
335 | except Exception as e:
336 | raise IOError(
337 | f"MCD file '{self.path.name}' corrupted: "
338 | f"cannot read after-ablation image "
339 | f"for acquisition {acquisition.id}"
340 | ) from e
341 |
342 | def _read_schema_xml(
343 | self,
344 | encoding: str = "utf-16-le",
345 | start_sub: str = " str:
348 | if self._fh is None:
349 | raise IOError(f"MCD file '{self.path.name}' has not been opened")
350 | with mmap.mmap(self._fh.fileno(), 0, access=mmap.ACCESS_READ) as mm:
351 | # V1 contains multiple MCDSchema entries
352 | # As per imctools, the latest entry should be taken
353 | start_sub_encoded = start_sub.encode(encoding=encoding)
354 | start_index = mm.rfind(start_sub_encoded)
355 | if start_index == -1:
356 | raise IOError(
357 | f"MCD file '{self.path.name}' corrupted: "
358 | f"start of XML document '{start_sub}' not found"
359 | )
360 | end_sub_encoded = end_sub.encode(encoding=encoding)
361 | end_index = mm.rfind(end_sub_encoded, start_index)
362 | if end_index == -1:
363 | raise IOError(
364 | f"MCD file '{self.path.name}' corrupted: "
365 | f"end of XML document '{end_sub}' not found"
366 | )
367 | mm.seek(start_index)
368 | data = mm.read(end_index + len(end_sub_encoded) - start_index)
369 | return data.decode(encoding=encoding)
370 |
371 | def _read_image(
372 | self, data_offset: int, data_size: int, raw: bool = False
373 | ) -> Union[np.ndarray, bytes]:
374 | if self._fh is None:
375 | raise IOError(f"MCD file '{self.path.name}' has not been opened")
376 | self._fh.seek(data_offset)
377 | data = self._fh.read(data_size)
378 | if raw:
379 | return data
380 | else:
381 | return imread(data)
382 |
383 | def __repr__(self) -> str:
384 | return str(self._path)
385 |
--------------------------------------------------------------------------------
/readimc/mcd_parser.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | import re
3 | from typing import Dict, List, Optional, Tuple
4 | from warnings import warn
5 | from xml.etree import ElementTree as ET
6 |
7 | from .data import Acquisition, Panorama, Slide
8 |
9 |
10 | class MCDParserError(Exception):
11 | def __init__(self, *args) -> None:
12 | """Error occurring when parsing invalid IMC .mcd file metadata"""
13 | super(MCDParserError, self).__init__(*args)
14 |
15 |
16 | class MCDParser:
17 | _XMLNS_REGEX = re.compile(r"{(?P.*)}")
18 | _CHANNEL_REGEX = re.compile(r"^(?P[a-zA-Z]+)\((?P[0-9]+)\)$")
19 |
20 | def __init__(self, schema_xml: str) -> None:
21 | """A class for parsing IMC .mcd file metadata
22 |
23 | :param schema_xml: IMC .mcd file metadata in proprietary XML format
24 | """
25 | self._schema_xml = schema_xml
26 | self._schema_xml_elem = ET.fromstring(self._schema_xml)
27 | m = self._XMLNS_REGEX.match(self._schema_xml_elem.tag)
28 | self._schema_xml_xmlns = m.group("xmlns") if m is not None else None
29 |
30 | @property
31 | def schema_xml(self) -> str:
32 | """Full IMC .mcd file metadata in proprietary XML format"""
33 | return self._schema_xml
34 |
35 | @property
36 | def schema_xml_elem(self) -> ET.Element:
37 | """Full IMC .mcd file metadata as Python ElementTree element"""
38 | return self._schema_xml_elem
39 |
40 | @property
41 | def schema_xml_xmlns(self) -> Optional[str]:
42 | """Value of the metadata `xmlns` XML namespace attribute"""
43 | return self._schema_xml_xmlns
44 |
45 | @property
46 | def metadata(self) -> str:
47 | """Legacy accessor for `schema_xml`"""
48 | warn(
49 | "`MCDParser.metadata` will be removed in future readimc releases; "
50 | "use `MCDFile.schema_xml` instead"
51 | )
52 | return self.schema_xml
53 |
54 | @property
55 | def metadata_elem(self) -> ET.Element:
56 | """Legacy accessor for `schema_xml_elem`"""
57 | warn(
58 | "`MCDParser.metadata_elem` will be removed in future readimc releases; "
59 | "use `MCDFile.schema_xml_elem` instead"
60 | )
61 | return self.schema_xml_elem
62 |
63 | @property
64 | def metadata_xmlns(self) -> Optional[str]:
65 | """Legacy accessor for `schema_xml_xmlns`"""
66 | warn(
67 | "`MCDParser.metadata_xmlns` will be removed in future readimc releases; "
68 | "use `MCDFile.schema_xml_xmlns` instead"
69 | )
70 | return self.schema_xml_xmlns
71 |
72 | def parse_slides(self) -> List[Slide]:
73 | """Extract slide metadata"""
74 | slides = [
75 | self._parse_slide(slide_elem) for slide_elem in self._find_elements("Slide")
76 | ]
77 | slides.sort(key=lambda slide: slide.id)
78 | return slides
79 |
80 | def _parse_slide(self, slide_elem: ET.Element) -> Slide:
81 | slide = Slide(
82 | self._get_text_as_int(slide_elem, "ID"),
83 | self._get_metadata_dict(slide_elem),
84 | )
85 | panorama_elems = self._find_elements(f"Panorama[SlideID='{slide.id}']")
86 | for panorama_elem in panorama_elems:
87 | panorama = None
88 | panorama_id = self._get_text_as_int(panorama_elem, "ID")
89 | panorama_type = self._get_text_or_none(panorama_elem, "Type")
90 | if panorama_type != "Default": # ignore "virtual" Panoramas
91 | panorama = self._parse_panorama(panorama_elem, slide)
92 | slide.panoramas.append(panorama)
93 | acquisition_roi_elems = self._find_elements(
94 | f"AcquisitionROI[PanoramaID='{panorama_id}']"
95 | )
96 | for acquisition_roi_elem in acquisition_roi_elems:
97 | acquisition_roi_id = self._get_text_as_int(acquisition_roi_elem, "ID")
98 | roi_point_elems = self._find_elements(
99 | f"ROIPoint[AcquisitionROIID='{acquisition_roi_id}']"
100 | )
101 | roi_points_um = None
102 | if len(roi_point_elems) == 4:
103 | roi_points_um = tuple(
104 | (
105 | self._get_text_as_float(roi_point_elem, "SlideXPosUm"),
106 | self._get_text_as_float(roi_point_elem, "SlideYPosUm"),
107 | )
108 | for roi_point_elem in sorted(
109 | roi_point_elems,
110 | key=lambda roi_point_elem: self._get_text_as_int(
111 | roi_point_elem, "OrderNumber"
112 | ),
113 | )
114 | )
115 | acquisition_elems = self._find_elements(
116 | f"Acquisition[AcquisitionROIID='{acquisition_roi_id}']"
117 | )
118 | for acquisition_elem in acquisition_elems:
119 | acquisition = self._parse_acquisition(
120 | acquisition_elem, slide, panorama, roi_points_um # type: ignore
121 | )
122 | slide.acquisitions.append(acquisition)
123 | if panorama is not None:
124 | panorama.acquisitions.append(acquisition)
125 | for a, b in itertools.combinations(slide.acquisitions, 2):
126 | a_start = int(a.metadata["DataStartOffset"])
127 | a_end = int(a.metadata["DataEndOffset"])
128 | b_start = int(b.metadata["DataStartOffset"])
129 | b_end = int(b.metadata["DataEndOffset"])
130 | if b_start <= a_start < b_end or b_start < a_end <= b_end:
131 | warn(
132 | f"Slide {slide.id} corrupted: "
133 | f"overlapping memory blocks for acquisitions {a.id} and {b.id}"
134 | )
135 | slide.panoramas.sort(key=lambda panorama: panorama.id)
136 | slide.acquisitions.sort(key=lambda acquisition: acquisition.id)
137 | return slide
138 |
139 | def _parse_panorama(self, panorama_elem: ET.Element, slide: Slide) -> Panorama:
140 | return Panorama(
141 | slide,
142 | self._get_text_as_int(panorama_elem, "ID"),
143 | self._get_metadata_dict(panorama_elem),
144 | )
145 |
146 | def _parse_acquisition(
147 | self,
148 | acquisition_elem: ET.Element,
149 | slide: Slide,
150 | panorama: Optional[Panorama],
151 | roi_points_um: Optional[
152 | Tuple[
153 | Tuple[float, float],
154 | Tuple[float, float],
155 | Tuple[float, float],
156 | Tuple[float, float],
157 | ]
158 | ],
159 | ) -> Acquisition:
160 | acquisition_id = self._get_text_as_int(acquisition_elem, "ID")
161 | acquisition_channel_elems = self._find_elements(
162 | f"AcquisitionChannel[AcquisitionID='{acquisition_id}']"
163 | )
164 | acquisition_channel_elems.sort(
165 | key=lambda acquisition_channel_elem: self._get_text_as_int(
166 | acquisition_channel_elem, "OrderNumber"
167 | )
168 | )
169 | acquisition = Acquisition(
170 | slide,
171 | panorama,
172 | acquisition_id,
173 | roi_points_um,
174 | self._get_metadata_dict(acquisition_elem),
175 | len(acquisition_channel_elems) - 3,
176 | )
177 | for i, acquisition_channel_elem in enumerate(acquisition_channel_elems):
178 | channel_name = self._get_text(acquisition_channel_elem, "ChannelName")
179 | if i == 0 and channel_name != "X":
180 | raise MCDParserError(
181 | f"First channel '{channel_name}' should be named 'X'"
182 | )
183 | if i == 1 and channel_name != "Y":
184 | raise MCDParserError(
185 | f"Second channel '{channel_name}' should be named 'Y'"
186 | )
187 | if i == 2 and channel_name != "Z":
188 | raise MCDParserError(
189 | f"Third channel '{channel_name}' should be named 'Z'"
190 | )
191 | if channel_name in ("X", "Y", "Z"):
192 | continue
193 | m = self._CHANNEL_REGEX.match(channel_name)
194 | if m is None:
195 | raise MCDParserError(
196 | "Cannot extract channel information "
197 | f"from channel name '{channel_name}' "
198 | f"for acquisition {acquisition.id}"
199 | )
200 | channel_label = self._get_text(acquisition_channel_elem, "ChannelLabel")
201 | acquisition._channel_metals.append(m.group("metal"))
202 | acquisition._channel_masses.append(int(m.group("mass")))
203 | acquisition._channel_labels.append(channel_label)
204 | return acquisition
205 |
206 | def _find_elements(self, path: str) -> List[ET.Element]:
207 | namespaces = None
208 | if self._schema_xml_xmlns is not None:
209 | namespaces = {"": self._schema_xml_xmlns}
210 | return self._schema_xml_elem.findall(path, namespaces=namespaces)
211 |
212 | def _get_text_or_none(self, parent_elem: ET.Element, tag: str) -> Optional[str]:
213 | namespaces = None
214 | if self._schema_xml_xmlns is not None:
215 | namespaces = {"": self._schema_xml_xmlns}
216 | elem = parent_elem.find(tag, namespaces=namespaces)
217 | return (elem.text or "") if elem is not None else None
218 |
219 | def _get_text(self, parent_elem: ET.Element, tag: str) -> str:
220 | text = self._get_text_or_none(parent_elem, tag)
221 | if text is None:
222 | raise MCDParserError(
223 | f"XML tag '{tag}' not found for parent XML tag '{parent_elem.tag}'"
224 | )
225 | return text
226 |
227 | def _get_text_as_int(self, parent_elem: ET.Element, tag: str) -> int:
228 | text = self._get_text(parent_elem, tag)
229 | try:
230 | return int(text)
231 | except ValueError as e:
232 | raise MCDParserError(
233 | f"Text '{text}' of XML tag '{tag}' cannot be converted to int "
234 | f"for parent XML tag '{parent_elem.tag}'"
235 | ) from e
236 |
237 | def _get_text_as_float(self, parent_elem: ET.Element, tag: str) -> float:
238 | text = self._get_text(parent_elem, tag)
239 | try:
240 | return float(text)
241 | except ValueError as e:
242 | raise MCDParserError(
243 | f"Text '{text}' of XML tag '{tag}' cannot be converted to "
244 | f"float for parent XML tag '{parent_elem.tag}'"
245 | ) from e
246 |
247 | def _get_metadata_dict(self, parent_elem: ET.Element) -> Dict[str, str]:
248 | metadata = {}
249 | for elem in parent_elem:
250 | tag = elem.tag
251 | if self._schema_xml_xmlns is not None:
252 | tag = tag.replace(f"{{{self._schema_xml_xmlns}}}", "")
253 | metadata[tag] = elem.text or ""
254 | return metadata
255 |
--------------------------------------------------------------------------------
/readimc/txt_file.py:
--------------------------------------------------------------------------------
1 | import re
2 | from os import PathLike
3 | from typing import List, Optional, Sequence, TextIO, Tuple, Union
4 | from warnings import warn
5 |
6 | import numpy as np
7 | import pandas as pd
8 |
9 | from .data import Acquisition, AcquisitionBase
10 | from .imc_file import IMCFile
11 |
12 |
13 | class TXTFile(IMCFile, AcquisitionBase):
14 | _CHANNEL_REGEX = re.compile(
15 | r"^(?P