├── .github
└── workflows
│ └── test.yml
├── .gitignore
├── .readthedocs.yaml
├── Makefile
├── Manifest.in
├── README.md
├── docs
├── Makefile
├── make.bat
└── source
│ ├── api.md
│ ├── changelog.md
│ ├── concepts.md
│ ├── conf.py
│ ├── examples.md
│ ├── index.md
│ ├── install.md
│ ├── log_config.md
│ ├── testing.md
│ └── usage.md
├── imc
├── __init__.py
├── cli.py
├── data_models
│ ├── __init__.py
│ ├── project.py
│ ├── roi.py
│ └── sample.py
├── defaults.py
├── demo
│ ├── __init__.py
│ ├── generate_data.py
│ └── get_demo_data.py
├── exceptions.py
├── graphics.py
├── interactive_volume_viewer.py
├── logo.png
├── ops
│ ├── __init__.py
│ ├── adjacency.py
│ ├── clustering.py
│ ├── community.py
│ ├── compensation.py
│ ├── domain.py
│ ├── mixture.py
│ ├── quant.py
│ └── signal.py
├── py.typed
├── scripts
│ ├── __init__.py
│ ├── illustrate.py
│ ├── inspect_ilastik_model.py
│ ├── inspect_mcds.py
│ ├── phenotype.py
│ ├── predict.py
│ ├── prepare.py
│ ├── process.py
│ ├── quantify.py
│ ├── segment_stacks.py
│ └── view.py
├── segmentation.py
├── tests
│ ├── __init__.py
│ ├── _test_layers.py
│ ├── conftest.py
│ ├── test_full_analysis.py
│ ├── test_graphics.py
│ ├── test_obj_creation.py
│ └── test_serialization.py
├── types.py
└── utils.py
├── noxfile.py
├── pyproject.toml
└── requirements
├── requirements.cellpose.txt
├── requirements.deepcell.txt
├── requirements.dev.txt
├── requirements.doc.txt
├── requirements.stardist.txt
└── requirements.txt
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Test imc package
5 |
6 | on:
7 | push:
8 | branches: [ main ]
9 | pull_request:
10 | branches: [ main ]
11 |
12 | jobs:
13 | linux:
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: actions/checkout@v2
17 | - name: Set up Python 3.8
18 | uses: actions/setup-python@v2
19 | with:
20 | python-version: 3.8
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install wheel pytest
25 | pip install .[deepcell,astir]
26 | - name: Test with pytest
27 | run: |
28 | # Test package
29 | python -m pytest imc/
30 |
31 | # Run pipeline
32 | mkdir -p imctest
33 | cd imctest
34 | imc process https://zenodo.org/record/5018260/files/COVID19_brain_Patient03_ROI3_COVID19_olfactorybulb.txt?download=1
35 |
36 | # List output files
37 | ls -l processed/
38 | ls -l processed/${SAMPLE}
39 | ls -l results/phenotyping
40 | - name: Cache resources
41 | id: cache-resources
42 | uses: actions/cache@v2
43 | with:
44 | path: /home/$USER/.imc
45 | key: imc-resources-linux
46 |
47 | osx:
48 | runs-on: macos-10.14
49 | steps:
50 | - uses: actions/checkout@v2
51 | - name: Set up Python 3.8
52 | uses: actions/setup-python@v2
53 | with:
54 | python-version: 3.8
55 | - name: Install dependencies
56 | run: |
57 | python -m pip install --upgrade pip
58 | pip install wheel pytest
59 | pip install .[deepcell,astir]
60 | - name: Test with pytest
61 | run: |
62 | # Test package
63 | python -m pytest imc/
64 |
65 | # Run example processing pipeline
66 | mkdir -p imctest
67 | cd imctest
68 | imc process https://zenodo.org/record/5018260/files/COVID19_brain_Patient03_ROI3_COVID19_olfactorybulb.txt?download=1
69 |
70 | # List output files
71 | ls -l processed/
72 | ls -l processed/${SAMPLE}
73 | ls -l results/phenotyping
74 | - name: Cache resources
75 | id: cache-resources
76 | uses: actions/cache@v2
77 | with:
78 | path: /home/$USER/.imc
79 | key: imc-resources-osx
80 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # project specific
2 | data
3 | submission
4 | processed
5 | _models
6 | results
7 |
8 | *.tiff
9 | *.csv
10 |
11 |
12 | # ignore test files
13 | .tox
14 | _version.py
15 | pytest.log
16 | .coverage*
17 |
18 | # Build-related stuff
19 | build/
20 | dist/
21 | *.egg-info
22 |
23 |
24 | # toy/experimental files
25 | *.txt
26 | # *.csv
27 | *.tsv
28 | *.pkl
29 | *.pickle
30 | *.svg
31 | *.png
32 | *.jpg
33 | *.jpeg
34 |
35 | # ignore mypy
36 | .mypy*
37 |
38 | # ignore eggs
39 | .eggs/
40 |
41 | # ignore built docs
42 | doc/build/*
43 |
44 | # generic ignore list:
45 | *.lst
46 |
47 | # Compiled source
48 | *.com
49 | *.class
50 | *.dll
51 | *.exe
52 | *.o
53 | *.so
54 | *.pyc
55 |
56 | # Packages
57 | # it's better to unpack these files and commit the raw source
58 | # git has its own built in compression methods
59 | *.7z
60 | *.dmg
61 | *.gz
62 | *.iso
63 | *.jar
64 | *.rar
65 | *.tar
66 | *.zip
67 |
68 | # Logs and databases
69 | *.log
70 | *.sql
71 | *.sqlite
72 |
73 | # OS generated files
74 | .DS_Store
75 | .DS_Store?
76 | ._*
77 | .Spotlight-V100
78 | .Trashes
79 | ehthumbs.db
80 | Thumbs.db
81 |
82 | # Sublime files
83 | *.sublime-*
84 |
85 | # Gedit temporary files
86 | *~
87 |
88 | # libreoffice lock files:
89 | .~lock*
90 |
91 | # IDE-specific items
92 | .idea/
93 |
94 | # pytest-related
95 | .cache/
96 | .coverage*
97 | coverage.xml
98 |
99 | # Reserved files for comparison
100 | *RESERVE*
101 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yaml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Set the version of Python and other tools you might need
9 | build:
10 | os: ubuntu-20.04
11 | tools:
12 | python: "3.9"
13 |
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 | configuration: docs/source/conf.py
17 |
18 | # If using Sphinx, optionally build your docs in additional formats such as PDF
19 | # formats:
20 | # - pdf
21 |
22 | # Optionally declare the Python requirements required to build your docs
23 | python:
24 | system_packages: true
25 | install:
26 | - method: pip
27 | path: .
28 | extra_requirements:
29 | - doc
30 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .DEFAULT_GOAL := all
2 |
3 |
4 | NAME=$(shell basename `pwd`)
5 | DOCS_DIR="docs"
6 |
7 |
8 | help: ## Display help and quit
9 | @echo Makefile for the $(NAME) package.
10 | @echo Available commands:
11 | @grep -E '^[0-9a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
12 | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m\
13 | %s\n", $$1, $$2}'
14 |
15 | all: install test ## Install the package and run tests
16 |
17 | clean_build:
18 | rm -rf build/
19 |
20 | clean_dist:
21 | rm -rf dist/
22 |
23 | clean_eggs:
24 | rm -rf *.egg-info
25 |
26 | clean_mypy:
27 | rm -rf .mypy_cache/
28 |
29 | clean_docs:
30 | rm -rf docs/build/*
31 |
32 | clean_tests:
33 | rm -rf /tmp/pytest*
34 |
35 | clean: clean_dist clean_eggs clean_build clean_mypy clean_docs ## Remove build, mypy cache, tests and docs
36 |
37 | _install:
38 | # python setup.py sdist
39 | # python -m pip wheel --no-index --no-deps --wheel-dir dist dist/*.tar.gz
40 | # python -m pip install dist/*-py3-none-any.whl --user --upgrade
41 | python -m pip install .
42 |
43 | install: ## Install the package
44 | ${MAKE} clean
45 | ${MAKE} _install
46 | ${MAKE} clean
47 |
48 | docs: ## Build the documentation
49 | ${MAKE} -C $(DOCS_DIR) html
50 | xdg-open $(DOCS_DIR)/build/html/index.html
51 |
52 |
53 | lint:
54 | -flake8 --count --ignore E501,F401,F841,W503,E402,E203,E266,E722 --exclude tests/ imc/
55 |
56 | test: lint ## Run the tests
57 | python -m pytest -m "not slow" $(NAME)/
58 |
59 | backup_time:
60 | echo "Last backup: " `date` >> _backup_time
61 | chmod 700 _backup_time
62 |
63 | _sync:
64 | rsync --copy-links --progress -r \
65 | . afr4001@pascal.med.cornell.edu:projects/$(NAME)
66 |
67 | sync: _sync backup_time ## [dev] Sync data/code to SCU server
68 |
69 | build: test
70 | python setup.py sdist bdist_wheel
71 |
72 | pypitest: build
73 | twine \
74 | upload \
75 | -r pypitest dist/*
76 |
77 | pypi: build
78 | twine \
79 | upload \
80 | dist/*
81 |
82 | .PHONY : clean_build clean_dist clean_eggs clean_mypy clean_docs clean_tests \
83 | clean _install install clean_docs docs test backup_time _sync sync \
84 | build pypitest pypi
85 |
--------------------------------------------------------------------------------
/Manifest.in:
--------------------------------------------------------------------------------
1 | include AUTHORS.md
2 | include CONTRIBUTING.md
3 | include CHANGELOG.md
4 | include LICENSE
5 | include README.md
6 |
7 | recursive-include requirements *
8 | recursive-include tests *
9 | recursive-include docs *.md *.rst conf.py Makefile make.bat
10 | recursive-exclude * __pycache__
11 | recursive-exclude * *.py[co]
12 |
13 | global-include *.typed
14 |
15 | include logo.png
16 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | # Imaging mass cytometry
6 |
7 | A package for processing and analysis of imaging mass cytometry (IMC) data.
8 |
9 | It implements image- and channel-wise quality control, quantification of cell
10 | intenstity and morphology, cell type discovery through clustering, automated
11 | cell type labeling, community and super-community finding and differential
12 | comparisons between sample groups, in addition to many handy visualization tools.
13 | Above all, it is a tool for the use of IMC data at scale.
14 |
15 | Development is still underway, so use at your own risk.
16 |
17 |
18 | ## Requirements and installation
19 |
20 | Requires `Python >= 3.9`. `imc` uses a `pyproject.toml` configuration only, so you'll need a up-to-date version of `pip` before installing. Base packages as `gcc` and `g++` will also need to be installed on system using the command `sudo apt install g++` or likewise. We also highly recommend installing the package on a `conda` environment to avoid dependency issues.
21 |
22 | To install the most updated version of the program:
23 | ```bash
24 | git clone https://github.com/ElementoLab/imc.git
25 | cd imc
26 | make install
27 | ```
28 |
29 | Install from [PyPI](https://pypi.org/project/imc/) with [`pip`](https://pip.pypa.io/) or with [poetry](https://python-poetry.org/):
30 | ```bash
31 | pip install imc
32 | # or
33 | poetry install imc
34 | ```
35 |
36 | ## Quick start
37 |
38 | Install the package from [PyPI](https://pypi.org/project/imc/) with extra packages required for all steps:
39 | ```bash
40 | pip install imc[extra]
41 | # or
42 | poetry install imc[extra]
43 | ```
44 |
45 | ### Use case 1 (pipeline processing)
46 |
47 | #### Example: Lung sample processing from MCD to single-cell h5ad
48 |
49 | One-line IMC data processing:
50 | ```bash
51 | # Run pipeline in one step with remote MCD file
52 | MCD_URL=https://zenodo.org/record/4110560/files/data/20200612_FLU_1923/20200612_FLU_1923.mcd
53 | imc process $MCD_URL
54 | ```
55 | `imc` also supports TXT or TIFF files as input, local or remote files:
56 | ```bash
57 | # Run pipeline in one step with remote TXT file
58 | TXT_URL=https://zenodo.org/record/5018260/files/COVID19_brain_Patient03_ROI3_COVID19_olfactorybulb.txt?download=1
59 | imc process $TXT_URL
60 | ```
61 | Input can be MCD, TIFF, or TXT files.
62 | Several files can be given to `imc process` at once. See more with the `--help` option.
63 |
64 | `imc` is nonetheless very modular and allows the user to run any of the step seperately as well.
65 |
66 | The above is also equivalent to the following:
67 | ```bash
68 | MCD_URL=https://zenodo.org/record/4110560/files/data/20200612_FLU_1923/20200612_FLU_1923.mcd
69 | SAMPLE=20200612_FLU_1923
70 |
71 | wget -O data/${SAMPLE}.mcd $MCD_URL
72 |
73 | ## output description of acquired data
74 | imc inspect data/${SAMPLE}.mcd
75 |
76 | ## convert MCD to TIFFs and auxiliary files
77 | imc prepare \
78 | --ilastik \
79 | --n-crops 0 \
80 | --ilastik-compartment nuclear \
81 | data/${SAMPLE}.mcd
82 |
83 | ## For each TIFF file, output prediction of mask probabilities and segment them
84 | TIFFS=processed/${SAMPLE}/tiffs/${SAMPLE}*_full.tiff
85 |
86 | ## Output pixel probabilities of nucleus, membrane and background using ilastik
87 | imc predict $TIFFS
88 |
89 | ## Segment cell instances with DeepCell
90 | imc segment \
91 | --from-probabilities \
92 | --model deepcell \
93 | --compartment both $TIFFS
94 |
95 | ## Quantify channel intensity and morphology for each single cell in every image
96 | imc quantify $TIFFS
97 | ```
98 |
99 | Once all MCD files have been processed for the project, create a concatenated AnnData object containing all cells within a project.
100 |
101 | ```python
102 | from glob import glob
103 | import os
104 | import anndata
105 | pattern = glob('processed/*.h5ad')
106 | adatas = [anndata.read(f) for f in pattern if os.path.exists(f)]
107 | adata = anndata.concat(adatas)
108 | adata.write('results/quant.h5ad')
109 | ```
110 |
111 | To perform batch correction and cell clustering:
112 | ```bash
113 | ## Phenotype cells into clusters
114 | imc phenotype processed/quant.h5ad
115 | ```
116 |
117 | There are many customization options for each step. Do `imc --help` or `imc --help` to see all.
118 |
119 | `imc` also includes a lightweight interactive image viewer:
120 | ```bash
121 | imc view $TIFFS
122 | ```
123 |
124 | There is also an interface to the more full fledged `napari` image viwer:
125 | ```bash
126 | imc view --napari data/${SAMPLE}.mcd # view MCD file
127 | napari $TIFFS # view TIFF files directly with napari. Requires napari
128 | ```
129 |
130 | A quick example of further analysis steps of single cell data downstream in IPython/Jupyter notebook:
131 | ```python
132 | import scanpy as sc
133 | a = sc.read('processed/quantification.h5ad')
134 | sc.pp.log1p(a)
135 | sc.pp.pca(a)
136 | sc.pp.neighbors(a)
137 | sc.tl.umap(a)
138 | sc.pl.umap(a, color=a.var.index)
139 | ```
140 |
141 | ### Use case 2 (API usage)
142 |
143 | #### Demo data (synthetic)
144 | ```python
145 | >>> from imc.demo import generate_project
146 | >>> prj = generate_project(n_samples=2, n_rois_per_sample=3, shape=(8, 8))
147 | >>> prj
148 | Project 'project' with 2 samples and 6 ROIs in total.
149 |
150 | >>> prj.samples # type: List[IMCSample]
151 | [Sample 'test_sample_01' with 3 ROIs,
152 | Sample 'test_sample_02' with 3 ROIs]
153 |
154 | >>> prj.rois # type: List[ROI]
155 | [Region 1 of sample 'test_sample_01',
156 | Region 2 of sample 'test_sample_01',
157 | Region 3 of sample 'test_sample_01',
158 | Region 1 of sample 'test_sample_02',
159 | Region 2 of sample 'test_sample_02',
160 | Region 3 of sample 'test_sample_02']
161 |
162 | >>> prj.samples[0].rois # type: List[ROI]
163 | [Region 1 of sample 'test_sample_01',
164 | Region 2 of sample 'test_sample_01',
165 | Region 3 of sample 'test_sample_01']
166 |
167 | >>> roi = prj.rois[0] # Let's assign one ROI to explore it
168 | >>> roi.channel_labels # type: pandas.Series; `channel_names`, `channel_metals` also available
169 | 0 Ch01(Ch01)
170 | 1 Ch02(Ch02)
171 | 2 Ch03(Ch03)
172 | Name: channel, dtype: object
173 |
174 | >>> roi.mask # type: numpy.ndarray
175 | array([[0, 0, 0, 0, 0, 0, 0, 0],
176 | [0, 0, 0, 0, 0, 0, 0, 0],
177 | [0, 0, 0, 0, 0, 0, 1, 0],
178 | [0, 0, 0, 0, 0, 0, 0, 0],
179 | [0, 2, 0, 0, 0, 3, 0, 0],
180 | [0, 0, 0, 0, 0, 0, 0, 0],
181 | [0, 0, 4, 0, 0, 0, 0, 0],
182 | [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)
183 |
184 | >>> roi.stack.shape # roi.stack -> type: numpy.ndarray
185 | (3, 8, 8)
186 |
187 | >>> # QC
188 | >>> prj.channel_correlation()
189 | >>> prj.channel_summary()
190 |
191 | >>> # Cell type discovery
192 | >>> prj.cluster_cells()
193 | >>> prj.find_communities()
194 |
195 | ```
196 | #### Demo data (real)
197 | ```python
198 | >>> import imc.demo
199 | >>> imc.demo.datasets
200 | ['jackson_2019_short', 'jackson_2019_short_joint']
201 |
202 | >>> prj = imc.demo.get_dataset('jackson_2019_short')
203 | >>> prj # type: Project
204 | Project 'jackson_2019_short' with 4 samples and 4 ROIs in total.
205 |
206 | >>> prj.samples # type: List[IMCSample]
207 | [Sample 'BaselTMA_SP41_15.475kx12.665ky_10000x8500_5_20170905_90_88_X11Y5_242_a0' with 1 ROI,
208 | Sample 'BaselTMA_SP41_25.475kx12.665ky_8000x8500_3_20170905_90_88_X11Y5_235_a0' with 1 ROI,
209 | Sample 'BaselTMA_SP41_33.475kx12.66ky_8500x8500_2_20170905_24_61_X3Y4_207_a0' with 1 ROI,
210 | Sample 'BaselTMA_SP41_33.475kx12.66ky_8500x8500_2_20170905_33_61_X4Y4_215_a0' with 1 ROI]
211 |
212 | >>> prj.samples[0].channel_labels # type: pandas.Series
213 | chanel
214 | 0 Ar80(Ar80)
215 | 1 Ru96(Ru96)
216 | 2 Ru98(Ru98)
217 | 3 Ru99(Ru99)
218 | 4 Ru100(Ru100)
219 | 5 Ru101(Ru101)
220 | 6 Ru102(Ru102)
221 | 7 Ru104(Ru104)
222 | 8 HistoneH3(In113)
223 | 9 EMPTY(Xe126)
224 | 10 EMPTY(I127)
225 | 11 HistoneH3(La139)
226 | ...
227 | 42 vWF-CD31(Yb172)
228 | 43 mTOR(Yb173)
229 | 44 Cytokeratin7(Yb174)
230 | 45 PanCytokeratin-KeratinEpithelial(Lu175)
231 | 46 CleavedPARP-CleavedCaspase3(Yb176)
232 | 47 DNA1(Ir191)
233 | 48 DNA2(Ir193)
234 | 49 EMPTY(Pb206)
235 | 50 EMPTY(Pb207)
236 | 51 EMPTY(Pb208)
237 | Name: BaselTMA_SP41_15.475kx12.665ky_10000x8500_5_20170905_90_88_X11Y5_242_a0, dtype: object
238 | >>> prj.plot_channels(['DNA2', 'Ki67', "Cytokeratin7"])
239 |
240 | ```
241 |
242 | #### Your own data
243 |
244 | The best way is to have a CSV file with one row per sample, or one row per ROI.
245 | That will ensure additional sample/ROI metadata is passed to the objects and used later in analysis.
246 | Pass the path to the CSV file to the `Project` object constructor:
247 |
248 | ```python
249 | from imc import Project
250 |
251 | prj = Project() # will search current directory for Samples/ROIs
252 |
253 | prj = Project(processed_dir="processed") # will search `processed` for Samples/ROIs
254 |
255 | prj = Project("path/to/sample/annotation.csv", processed_dir="processed")
256 | # ^^ will use metadata from CSV and use the files in `processed`.
257 | ```
258 |
259 | However, if one is not given, `Project` will search the current directory or the
260 | argument of `processed_dir` for IMCSamples and ROIs.
261 |
262 | The `processed_dir` directory can be structured in two ways:
263 | 1. One directory per sample.
264 | - Inside there is a directory `"tiffs"` which contains the stack `"*_full.tiff"`, channel labels
265 | `"*_full.csv"` and optionally a segmentation `"*_full_mask.tiff"`.
266 |
267 | 2. All samples in the same directory `processed_dir`.
268 | - Inside the one directory there are stack `"*_full.tiff"`, channel label `"*_full.csv"` and
269 | optionally segmentation `"*_full_mask.tiff"` files.
270 |
271 | The default is option one. If you choose `2`, simply pass `subfolder_per_sample`:
272 |
273 | ``` python
274 | prj = Project(subfolder_per_sample=True)
275 | ```
276 |
277 | The expected files are produced by common preprocessing pipelines such as
278 | [imcpipeline](https://github.com/elementolab/imcpipeline) or [imcyto](https://nf-co.re/imcyto).
279 |
280 |
281 | ## Documentation
282 |
283 | Documentation is for now mostly a skeleton but will be expanded soon:
284 |
285 | ```bash
286 | make docs
287 | ```
288 |
289 | ## Testing
290 |
291 | Tests are still very limited, but you can run tests this way:
292 |
293 | ```bash
294 | pip install pytest # install testing package
295 | python -m pytest --pyargs imc
296 | ```
297 |
298 | For data processing, running the example lung data should make sure eveything is running smoothly.
299 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/source/api.md:
--------------------------------------------------------------------------------
1 | # API
2 |
3 | The great flexibility of `imc` comes from the ability to compose workflows using the API.
4 |
5 | It provides a rich but abstract `imc.analysis.Project` object and implements various modules building on it depending on the data type.
6 |
7 | In addition, the `imc.operations` module contains several analysis-independent methods and the `imc.utils` module provides low-level functions of general use.
8 |
9 | ## imc.data_models.project
10 | ```{eval-rst}
11 | .. automodule:: imc.data_models.project
12 | :members:
13 | ```
14 |
15 | ## imc.data_models.sample
16 | ```{eval-rst}
17 | .. automodule:: imc.data_models.sample
18 | :members:
19 | ```
20 |
21 | ## imc.data_models.roi
22 | ```{eval-rst}
23 | .. automodule:: imc.data_models.roi
24 | :members:
25 | ```
26 |
27 | ## imc.operations
28 | ### imc.ops.signal
29 | ```{eval-rst}
30 | .. automodule:: imc.ops.signal
31 | :members:
32 | ```
33 | ### imc.ops.compensation
34 | ```{eval-rst}
35 | .. automodule:: imc.ops.compensation
36 | :members:
37 | ```
38 | ### imc.ops.mixture
39 | ```{eval-rst}
40 | .. automodule:: imc.ops.mixture
41 | :members:
42 | ```
43 | ### imc.ops.domain
44 | ```{eval-rst}
45 | .. automodule:: imc.ops.domain
46 | :members:
47 | ```
48 | ### imc.ops.quant
49 | ```{eval-rst}
50 | .. automodule:: imc.ops.quant
51 | :members:
52 | ```
53 | ### imc.ops.clustering
54 | ```{eval-rst}
55 | .. automodule:: imc.ops.clustering
56 | :members:
57 | ```
58 | ### imc.ops.adjacency
59 | ```{eval-rst}
60 | .. automodule:: imc.ops.adjacency
61 | :members:
62 | ```
63 | ### imc.ops.community
64 | ```{eval-rst}
65 | .. automodule:: imc.ops.community
66 | :members:
67 | ```
68 | ## imc.graphics
69 | ```{eval-rst}
70 | .. automodule:: imc.graphics
71 | :members:
72 | ```
73 |
74 | ## imc.utils
75 | ```{eval-rst}
76 | .. automodule:: imc.utils
77 | :members:
78 | ```
79 |
80 | ## imc.types
81 | ```{eval-rst}
82 | .. automodule:: imc.types
83 | :members:
84 | ```
--------------------------------------------------------------------------------
/docs/source/changelog.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to this project will be documented in this file.
4 |
5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7 |
8 | ## [Unreleased]
9 | ### Added
10 | -
11 | ### Changed
12 | -
13 | ### Removed
14 | -
15 |
16 | ## [0.0.12] - 2021-07-19
17 | ### Added
18 | - functions to handle multi-cell masks (topological domains)
19 | - napari + napari_imc to view MCD files
20 | ### Changed
21 | - fix support of OSX in ilastik segmentation
22 | - centralized package data under `.imc`
23 |
24 | ## [0.0.11] - 2021-07-01
25 | ### Added
26 | - Command `imc process`.
27 |
28 | ## [0.0.10] - 2021-07-01
29 | ### Added
30 | - CI on Github actions
31 | - add more CLI commands
32 | ### Changed
33 | - centralized package data under `.imc`
34 | - fix packaging
35 |
36 | ## [0.0.8] - 2021-06-01
37 | ### Added
38 | - add `.pyproject.toml`
39 | - support subcellular mask quantification
40 | ### Changed
41 | - rasterized linecollection plots by default
42 |
43 | ## [0.0.7] - 2021-04-26
44 | ### Added
45 | - initial support subcellular mask quantification
46 | - DeepCell postprocessing to match nuclear and cellular masks
47 | - function to plot and extract panorama images matching ROIs
48 | - Cellpose as segmentation method
49 | - add CLI command for segmentation
50 | ### Changed
51 | - rasterized linecollection plots by default
52 |
53 | ## [0.0.6] - 2020-12-16
54 | ### Added
55 | - segmentation module
56 | - mask layers to support alternative segmentations
57 | ### Changed
58 | - rasterized linecollection plots by default
59 | ### Removed
60 | -
61 | - graphics code that was abstracted to `seaborn_extensions` module
62 |
63 | ## [0.0.5] - 2020-12-07
64 | ### Added
65 | - segmentation module
66 | - mask layers to support alternative segmentations
67 | ### Changed
68 | - export panoramas by default
69 | - support ome-tiff
70 | - upgrade to `imctools==2.1.0`
71 |
72 | ## [0.0.4] - 2020-10-07
73 |
74 |
75 | ## [0.0.3] - 2020-06-17
76 | ### Changed
77 | - Patch `pathlib.Path` to support path building with `+` (operator overload)
78 |
79 | ## [0.0.2] - 2020-06-15
80 | ### Added
81 | - Many features
82 |
83 |
84 | ## [0.0.1] - 2020-04-14
85 | ### Added
86 | - Project, Sample and ROI modules/objects
87 |
88 | [Unreleased]: https://github.com/ElementoLab/imc/compare/0.0.2...HEAD
89 | [0.0.11]: https://github.com/ElementoLab/imc/compare/0.0.10...v0.0.11
90 | [0.0.10]: https://github.com/ElementoLab/imc/compare/0.0.9...v0.0.10
91 | [0.0.9]: https://github.com/ElementoLab/imc/compare/0.0.8...v0.0.9
92 | [0.0.8]: https://github.com/ElementoLab/imc/compare/0.0.7...v0.0.8
93 | [0.0.7]: https://github.com/ElementoLab/imc/compare/0.0.6...v0.0.7
94 | [0.0.6]: https://github.com/ElementoLab/imc/compare/0.0.5...v0.0.6
95 | [0.0.5]: https://github.com/ElementoLab/imc/compare/0.0.4...v0.0.5
96 | [0.0.4]: https://github.com/ElementoLab/imc/compare/0.0.3...v0.0.4
97 | [0.0.3]: https://github.com/ElementoLab/imc/compare/0.0.2...v0.0.3
98 | [0.0.2]: https://github.com/ElementoLab/imc/compare/0.0.1...v0.0.2
99 | [0.0.1]: https://github.com/ElementoLab/imc/releases/tag/v0.0.1
100 |
--------------------------------------------------------------------------------
/docs/source/concepts.md:
--------------------------------------------------------------------------------
1 | # Concepts
2 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import sphinx_rtd_theme
5 |
6 | # If extensions (or modules to document with autodoc) are in another directory,
7 | # add these directories to sys.path here. If the directory is relative to the
8 | # documentation root, use os.path.abspath to make it absolute, like shown here.
9 | sys.path.insert(0, os.path.abspath("../../"))
10 |
11 |
12 | # Configuration file for the Sphinx documentation builder.
13 | #
14 | # This file only contains a selection of the most common options. For a full
15 | # list see the documentation:
16 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
17 |
18 | # -- Path setup --------------------------------------------------------------
19 |
20 | # If extensions (or modules to document with autodoc) are in another directory,
21 | # add these directories to sys.path here. If the directory is relative to the
22 | # documentation root, use os.path.abspath to make it absolute, like shown here.
23 | #
24 | # import os
25 | # import sys
26 | # sys.path.insert(0, os.path.abspath('.'))
27 |
28 |
29 | # -- Project information -----------------------------------------------------
30 |
31 | project = "imc"
32 | copyright = "2021, Andre Rendeiro"
33 | author = "Andre Rendeiro"
34 |
35 |
36 | # -- General configuration ---------------------------------------------------
37 |
38 | # Add any Sphinx extension module names here, as strings. They can be
39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
40 | # ones.
41 | extensions = [
42 | "sphinx.ext.autodoc",
43 | "sphinx.ext.autosummary",
44 | "sphinx.ext.intersphinx",
45 | "sphinx.ext.todo",
46 | "sphinx.ext.coverage",
47 | "sphinx.ext.viewcode",
48 | # "numpydoc", # numpy-style docs
49 | "sphinx.ext.napoleon", # numpy-style docs
50 | "sphinx_issues",
51 | "myst_parser", # to use markdown
52 | "sphinxarg.ext", # for CLI parsing of arguments
53 | "sphinx_autodoc_typehints" # <- this would be handy when whole codebase has typehinting
54 | # "sphinxcontrib.jupyter", <- this could be useful to make jupyter NBs
55 | ]
56 | autodoc_typehints = "signature" # https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#confval-autodoc_typehints
57 |
58 | # Add any paths that contain templates here, relative to this directory.
59 | templates_path = ["_templates"]
60 |
61 | # List of patterns, relative to source directory, that match files and
62 | # directories to ignore when looking for source files.
63 | # This pattern also affects html_static_path and html_extra_path.
64 | exclude_patterns = []
65 |
66 |
67 | # -- Options for type of input -----------------------------------------------
68 | source_suffix = {
69 | ".rst": "restructuredtext",
70 | ".txt": "markdown",
71 | ".md": "markdown",
72 | }
73 |
74 | # -- Options for HTML output -------------------------------------------------
75 |
76 | # The theme to use for HTML and HTML Help pages. See the documentation for
77 | # a list of builtin themes.
78 |
79 | # html_theme = "alabaster"
80 | html_theme = "sphinx_rtd_theme"
81 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
82 | # html_theme = "sphinx_material"
83 | # html_theme_options = {
84 | # "color_primary": "#ff4500",
85 | # }
86 |
87 | # Add any paths that contain custom static files (such as style sheets) here,
88 | # relative to this directory. They are copied after the builtin static files,
89 | # so a file named "default.css" will overwrite the builtin "default.css".
90 | html_static_path = ["_static"]
91 |
92 | issues_github_path = "ElementoLab/imc"
93 |
94 | napoleon_numpy_docstring = True
95 | napoleon_google_docstring = False
96 | napoleon_use_param = False
97 | napoleon_use_ivar = True
98 |
99 | # Example configuration for intersphinx: refer to the Python standard library.
100 | intersphinx_mapping = {
101 | "python": ("http://docs.python.org/3", None),
102 | "urllib3": ("http://urllib3.readthedocs.org/en/latest", None),
103 | "numpy": ("http://docs.scipy.org/doc/numpy/", None),
104 | "scipy": ("https://docs.scipy.org/doc/scipy-1.3.0/reference/", None),
105 | "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None),
106 | }
107 |
--------------------------------------------------------------------------------
/docs/source/examples.md:
--------------------------------------------------------------------------------
1 | # Examples
2 |
--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
1 | # Welcome
2 |
3 | `imc` is a Python library for the analysis of imaging mass cytometry data.
4 |
5 | Head to the [installation](/install) to see installation instructions, to
6 | [usage](/usage) for quick use, or have a look at the catalogue of available
7 | functions in the [API](/api).
8 |
9 |
10 | ```{admonition} imc is still in development!
11 | This means things may change in the future, use at your own risk.
12 | ```
13 |
14 | ## Contents
15 |
16 | ```{toctree}
17 | ---
18 | maxdepth: 1
19 | ---
20 | install.md
21 | usage.md
22 | examples.md
23 | concepts.md
24 | log_config.md
25 | api.md
26 | testing.md
27 | changelog.md
28 | ```
29 |
30 | ## Links
31 |
32 | - Documentation: [http://imc.readthedocs.io/](http://imc.readthedocs.io/)
33 | - Issues and source code: [https://github.com/ElementoLab/imc](https://github.com/ElementoLab/imc)
34 |
--------------------------------------------------------------------------------
/docs/source/install.md:
--------------------------------------------------------------------------------
1 | # Install
2 |
--------------------------------------------------------------------------------
/docs/source/log_config.md:
--------------------------------------------------------------------------------
1 | # Logging and configuration
2 |
--------------------------------------------------------------------------------
/docs/source/testing.md:
--------------------------------------------------------------------------------
1 | # Testing
2 |
--------------------------------------------------------------------------------
/docs/source/usage.md:
--------------------------------------------------------------------------------
1 | # Usage
2 |
--------------------------------------------------------------------------------
/imc/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 |
3 | # fix the type annotatiton of not yet undefined classes
4 | from __future__ import annotations
5 | import os
6 | import sys
7 | import logging
8 | from functools import partialmethod
9 | from pathlib import Path as _Path
10 |
11 | from outdated import warn_if_outdated
12 | from joblib import Memory
13 | import matplotlib
14 | import matplotlib.pyplot as plt
15 | import seaborn as _sns
16 |
17 | try:
18 | # Even though there is no "imc/_version" file,
19 | # it should be generated by
20 | # setuptools_scm when building the package
21 | from imc._version import version
22 |
23 | __version__ = version
24 | except ImportError:
25 | from setuptools_scm import get_version as _get_version
26 |
27 | version = __version__ = _get_version(root="..", relative_to=__file__)
28 |
29 |
30 | warn_if_outdated("imc", __version__)
31 |
32 | plt.rcParams["svg.fonttype"] = "none"
33 | plt.rcParams["font.family"] = "Arial"
34 | plt.rcParams["font.sans-serif"] = ["Arial"]
35 | plt.rcParams["text.usetex"] = False
36 |
37 | import scanpy as _sc
38 |
39 | _sc.settings.n_jobs = -1
40 |
41 |
42 | def setup_logger(name: str = "imc", level: int = logging.INFO) -> logging.Logger:
43 | """Setup the logger for the package."""
44 | logger = logging.getLogger(name)
45 | logger.setLevel(level)
46 |
47 | handler = logging.StreamHandler(sys.stdout)
48 | handler.setLevel(level)
49 | formatter = logging.Formatter("%(asctime)s - %(message)s")
50 | handler.setFormatter(formatter)
51 | logger.addHandler(handler)
52 | return logger
53 |
54 |
55 | LOGGER = setup_logger()
56 |
57 | # Setup joblib memory
58 | _Path.mkdir = partialmethod(_Path.mkdir, exist_ok=True, parents=True)
59 | JOBLIB_CACHE_DIR = _Path("~/.imc").expanduser()
60 | JOBLIB_CACHE_DIR.mkdir()
61 | MEMORY = Memory(location=JOBLIB_CACHE_DIR, verbose=0)
62 |
63 | # Decorate seaborn clustermap
64 | # _sns.clustermap = colorbar_decorator(_sns.clustermap)
65 |
66 |
67 | from imc.data_models.project import Project
68 | from imc.data_models.sample import IMCSample
69 | from imc.data_models.roi import ROI
70 |
--------------------------------------------------------------------------------
/imc/cli.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Inspect MCD files, reporting on their basic statistics, saving
5 | metadata as YAML files, and panel information as CSV files.
6 | """
7 |
8 | import sys
9 | import argparse
10 | from argparse import RawTextHelpFormatter
11 | import typing as tp
12 |
13 | from imc._version import version
14 | from imc.scripts.process import main as process
15 | from imc.scripts.inspect_mcds import main as inspect
16 | from imc.scripts.prepare import main as prepare
17 | from imc.scripts.predict import main as predict
18 | from imc.scripts.segment_stacks import main as segment
19 | from imc.scripts.quantify import main as quantify
20 | from imc.scripts.phenotype import main as phenotype
21 | from imc.scripts.illustrate import main as illustrate
22 | from imc.scripts.view import main as view
23 |
24 | cli_config: tp.Dict[str, tp.Any]
25 | from imc.scripts import cli_config
26 |
27 |
28 | def main(cli: tp.Sequence[str] = None) -> int:
29 | parser = get_args()
30 | parser.add_argument("-v", "--version", action="version", version=version)
31 | main_args, cmd_args = parser.parse_known_args(cli)
32 |
33 | if main_args.command not in cli_config["subcommands"]:
34 | raise ValueError(f"Command '{main_args.command}' not known!")
35 | return eval(main_args.command)(cmd_args)
36 |
37 |
38 | def get_args() -> argparse.ArgumentParser:
39 | parser = argparse.ArgumentParser(**cli_config["main"], formatter_class=RawTextHelpFormatter) # type: ignore[index]
40 |
41 | subparsers = parser.add_subparsers(dest="command", required=True)
42 |
43 | for cmd in cli_config["subcommands"]:
44 | subparsers.add_parser(cmd, add_help=False)
45 | return parser
46 |
47 |
48 | if __name__ == "__main__":
49 | try:
50 | sys.exit(main())
51 | except KeyboardInterrupt:
52 | sys.exit(1)
53 |
--------------------------------------------------------------------------------
/imc/data_models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ElementoLab/imc/9725b3ab72f2273cb4a702964fa8518c2f189e9c/imc/data_models/__init__.py
--------------------------------------------------------------------------------
/imc/defaults.py:
--------------------------------------------------------------------------------
1 | from imc.types import Path
2 |
3 | # project
4 | DEFAULT_PROJECT_NAME = "project"
5 | DEFAULT_SAMPLE_NAME_ATTRIBUTE = "sample_name"
6 | DEFAULT_SAMPLE_GROUPING_ATTRIBUTEs = [DEFAULT_SAMPLE_NAME_ATTRIBUTE]
7 | DEFAULT_TOGGLE_ATTRIBUTE = "toggle"
8 | DEFAULT_PROCESSED_DIR_NAME = Path("processed")
9 | DEFAULT_RESULTS_DIR_NAME = Path("results")
10 | DEFAULT_PRJ_SINGLE_CELL_DIR = Path("single_cell")
11 | DEFAULT_ROI_NAME_ATTRIBUTE = "roi_name"
12 | DEFAULT_ROI_NUMBER_ATTRIBUTE = "roi_number"
13 |
14 | # # processed directory structure
15 | SUBFOLDERS_PER_SAMPLE = True
16 | ROI_STACKS_DIR = Path("tiffs")
17 | ROI_MASKS_DIR = Path("tiffs")
18 | ROI_UNCERTAINTY_DIR = Path("uncertainty")
19 | ROI_SINGLE_CELL_DIR = Path("single_cell")
20 |
21 | # sample
22 | DEFAULT_SAMPLE_NAME = "sample"
23 | DEFAULT_ROI_NAME_ATTRIBUTE = "roi_name"
24 | DEFAULT_ROI_NUMBER_ATTRIBUTE = "roi_number"
25 | DEFAULT_TOGGLE_ATTRIBUTE = "toggle"
26 |
27 | # roi
28 | SUBFOLDERS_PER_SAMPLE = True
29 | DEFAULT_ROI_NAME = "roi"
30 | ROI_STACKS_DIR = Path("tiffs")
31 | ROI_MASKS_DIR = Path("tiffs")
32 | ROI_UNCERTAINTY_DIR = Path("uncertainty")
33 | ROI_SINGLE_CELL_DIR = Path("single_cell")
34 |
35 | # graphics
36 | FIG_KWS = dict(dpi=300, bbox_inches="tight")
37 |
--------------------------------------------------------------------------------
/imc/demo/__init__.py:
--------------------------------------------------------------------------------
1 | from .generate_data import generate_project
2 | from .get_demo_data import DATASETS as _DATASETS, get_dataset
3 |
4 | datasets = list(_DATASETS.keys())
5 |
--------------------------------------------------------------------------------
/imc/demo/generate_data.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from typing import Tuple, List, Dict, Union
4 | import tempfile
5 |
6 | import numpy as np
7 | import scipy.ndimage as ndi
8 | import matplotlib.pyplot as plt
9 | import tifffile
10 | import pandas as pd
11 | import skimage
12 |
13 | from imc import Project
14 | from imc.types import Array, Figure, Path
15 | from imc.utils import filter_kwargs_by_callable as filter_kws
16 |
17 |
18 | def generate_mask(
19 | shape: Tuple[int, int] = (8, 8),
20 | seeding_density: float = 0.1,
21 | # widths: int = None,
22 | # connectivity: float = None
23 | ) -> Array:
24 | mask = np.zeros(shape, dtype=bool)
25 | # Cells are placed in an effective mask area which is not touching borders
26 | eff_mask = mask[1:-1, 1:-1]
27 | centroids = np.random.choice(
28 | np.arange(eff_mask.size),
29 | int(np.ceil(eff_mask.size * seeding_density)),
30 | replace=False,
31 | )
32 | eff_mask.flat[centroids] = True # type: ignore
33 | mask[1:-1, 1:-1] = eff_mask
34 | return ndi.label(mask, structure=np.zeros((3, 3)))[0]
35 |
36 |
37 | def generate_disk_masks(
38 | shape: Tuple[int, int] = (128, 128),
39 | seeding_density: float = 0.1,
40 | disk_diameter: int = 10,
41 | ):
42 | mask = np.zeros(shape, dtype=bool)
43 |
44 | area = np.multiply(*mask.shape)
45 | n = int(np.ceil(mask.size * seeding_density) * (disk_diameter**2 / area))
46 | centroids = np.random.choice(np.arange(mask.size), n, replace=False)
47 |
48 | r = disk_diameter // 2
49 | disk = skimage.morphology.disk(r)
50 | x = centroids // shape[0]
51 | y = centroids % shape[1]
52 | for i in range(n):
53 | s = mask[x[i] - r : x[i] + r + 1, y[i] - r : y[i] + r + 1].shape
54 | mask[x[i] - r : x[i] + r + 1, y[i] - r : y[i] + r + 1] = disk[: s[0], : s[1]]
55 | return ndi.label(mask)[0]
56 |
57 |
58 | def generate_stack(
59 | mask: Array,
60 | n_channels: int = 3,
61 | channel_coeffs: Array = None,
62 | channel_std: Array = None,
63 | n_cell_types: int = 2,
64 | cell_type_coeffs: Array = None,
65 | cell_type_std: Array = None,
66 | ) -> Array:
67 | # partition cells into cell types
68 | n_cells = (mask > 0).sum()
69 | cells = np.arange(mask.size)[mask.flat > 0]
70 | assigned_cells = np.array([], dtype=int)
71 | ct_cells = dict()
72 | for i in range(n_cell_types):
73 | available_cells = [c for c in cells if c not in assigned_cells]
74 | ct_cells[i] = np.random.choice(
75 | available_cells,
76 | int(np.floor(n_cells / n_cell_types)),
77 | replace=False,
78 | )
79 | assigned_cells = np.append(assigned_cells, ct_cells[i])
80 | ct_cells[i] = np.append(ct_cells[i], cells[~np.isin(cells, assigned_cells)])
81 | assert sum([len(x) for x in ct_cells.values()]) == n_cells
82 |
83 | # assign intensity values
84 | stack = np.zeros((n_channels,) + mask.shape, dtype=float)
85 | std_sd = 0.1
86 | if channel_coeffs is None:
87 | channel_coeffs = np.random.choice(np.linspace(-5, 5), n_channels)
88 | if channel_std is None:
89 | channel_std = np.abs(channel_coeffs) * std_sd
90 | if cell_type_coeffs is None:
91 | cell_type_coeffs = np.random.choice(np.linspace(-5, 5), n_cell_types)
92 | if cell_type_std is None:
93 | cell_type_std = np.abs(cell_type_coeffs) * std_sd
94 | # means = intercept + np.dot(
95 | means = np.dot(
96 | channel_coeffs.reshape((-1, n_channels)).T,
97 | cell_type_coeffs.reshape((-1, n_cell_types)),
98 | )
99 | intercept = np.abs(means.min()) * 2
100 | means += intercept
101 | stds = channel_std.reshape((-1, n_channels)).T + cell_type_std.reshape(
102 | (-1, n_cell_types)
103 | )
104 |
105 | for cell_type in range(n_cell_types):
106 | n = ct_cells[i].size
107 | for channel in range(n_channels):
108 | stack[channel].flat[ct_cells[cell_type]] = np.random.normal(
109 | means[channel, cell_type], stds[channel, cell_type], n
110 | )
111 |
112 | # make sure array is non-negative
113 | if stack.min() < 0:
114 | stack[stack == 0] = stack.min()
115 | stack += abs(stack.min())
116 | return stack
117 |
118 |
119 | def write_tiff(array: Array, output_file: Path) -> None:
120 | fr = tifffile.TiffWriter(output_file)
121 | fr.write(array)
122 | fr.close()
123 |
124 |
125 | def write_roi_to_disk(mask: Array, stack: Array, output_prefix: Path) -> None:
126 | # mask
127 | write_tiff(mask, output_prefix + "_full_mask.tiff")
128 | # stack
129 | write_tiff(stack, output_prefix + "_full.tiff")
130 | # channel_labels
131 | labels = [str(c).zfill(2) for c in range(1, stack.shape[0] + 1)]
132 | channel_labels = pd.Series([f"Ch{c}(Ch{c})" for c in labels], name="channel")
133 | channel_labels.to_csv(output_prefix + "_full.csv")
134 |
135 |
136 | def visualize_roi(mask: Array, stack: Array) -> Figure:
137 | fig, axes = plt.subplots(1, 5, figsize=(4 * 5, 4))
138 | axes[0].set_title("Mask")
139 | axes[0].imshow(mask, cmap="binary_r")
140 | axes[1].set_title("RGB signal")
141 | axes[1].imshow(np.moveaxis(stack, 0, -1) / stack.max())
142 | for i, (ax, cmap) in enumerate(zip(axes[2:], ["Reds", "Greens", "Blues"])):
143 | ax.set_title(f"Channel {i}")
144 | ax.imshow(stack[i] / stack.max(), cmap=cmap)
145 | return fig
146 |
147 |
148 | def generate_project(
149 | name: str = None,
150 | n_samples: int = 3,
151 | rois_per_sample: int = 3,
152 | root_dir: Path = None,
153 | sample_names: List[str] = None,
154 | return_object: bool = True,
155 | visualize: bool = False,
156 | **kwargs,
157 | ) -> Union[Project, Path]:
158 | if name is None:
159 | name = "test_project"
160 | if root_dir is None:
161 | root_dir = Path(tempfile.mkdtemp())
162 | else:
163 | root_dir = Path(root_dir)
164 | root_dir.mkdir(exist_ok=True)
165 | meta_dir = root_dir / "metadata"
166 | meta_dir.mkdir(exist_ok=True)
167 | processed_dir = root_dir / "processed"
168 | processed_dir.mkdir(exist_ok=True)
169 |
170 | if sample_names is None:
171 | sample_names = ["test_sample_" + str(i).zfill(2) for i in range(1, n_samples + 1)]
172 | _meta: Dict[str, Dict[str, Union[str, int]]] = dict()
173 | for sample in sample_names:
174 | tiffs_dir = processed_dir / sample / "tiffs"
175 | tiffs_dir.mkdir(exist_ok=True, parents=True)
176 | for roi in range(1, rois_per_sample + 1):
177 | roi_name = f"{sample}-{str(roi).zfill(2)}"
178 | output_prefix = tiffs_dir / roi_name
179 | mask = generate_mask(**filter_kws(kwargs, generate_mask))
180 | stack = generate_stack(mask, **filter_kws(kwargs, generate_stack))
181 | if visualize:
182 | visualize_roi(mask, stack)
183 | write_roi_to_disk(mask, stack, output_prefix)
184 | _meta[roi_name] = {"roi_number": roi, "sample_name": sample}
185 |
186 | # write metadata
187 | meta = pd.DataFrame(_meta).T
188 | meta.index.name = "roi_name"
189 | meta.to_csv(meta_dir / "samples.csv")
190 | return (
191 | Project(
192 | metadata=meta_dir / "samples.csv",
193 | processed_dir=processed_dir,
194 | results_dir=processed_dir.parent / "results",
195 | )
196 | if return_object
197 | else root_dir
198 | )
199 |
--------------------------------------------------------------------------------
/imc/demo/get_demo_data.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import typing as tp
4 | import shutil
5 | import urllib.request as request
6 | from contextlib import closing
7 | import tarfile
8 | import tempfile
9 | import zipfile
10 | import re
11 |
12 | import requests
13 | from urlpath import URL
14 | import tifffile
15 | import numpy as np
16 | import pandas as pd
17 |
18 | from imc.types import Path
19 | from imc import Project
20 |
21 |
22 | DATASET_DB_PATH = Path("~").expanduser() / ".imc" / "demo_datasets"
23 | DATASETS = {
24 | "jackson_2019_short": "https://wcm.box.com/shared/static/eq1m5j972cf3b5jqoe2vdju3bg9e0r5n",
25 | "jackson_2019_short_joint": "https://wcm.box.com/shared/static/b8nxku3ywvenghxvvm4wki9znxwbenzb",
26 | "schwabenland_2021_full": "https://zenodo.org/record/5018260/files/COVID19_brain_all_patients_singletiffs_and_cellmasks.zip?download=1",
27 | }
28 |
29 |
30 | def _download_file(url: str, output_path: Path, chunk_size=1024) -> None:
31 | """
32 | Download a file and write to disk in chunks (not in memory).
33 |
34 | Parameters
35 | ----------
36 | url : :obj:`str`
37 | URL to download from.
38 | output_path : :obj:`str`
39 | Path to file as output.
40 | chunk_size : :obj:`int`
41 | Size in bytes of chunk to write to disk at a time.
42 | """
43 | if url.startswith("ftp://"):
44 | with closing(request.urlopen(url)) as r:
45 | with open(output_path, "wb") as f:
46 | shutil.copyfileobj(r, f)
47 | else:
48 | response = requests.get(url, stream=True)
49 | with open(output_path, "wb") as outfile:
50 | outfile.writelines(response.iter_content(chunk_size=chunk_size))
51 |
52 |
53 | def _decompress_tar_file(path: Path, output_root: Path = None) -> None:
54 | """Decompress a tar.xz file."""
55 | with tarfile.open(path) as f:
56 | f.extractall(path.parent if output_root is None else output_root)
57 |
58 |
59 | def get_dataset(dataset_name: str, output_dir: Path = None) -> Project:
60 | DATASET_DB_PATH.mkdir()
61 |
62 | if dataset_name == "schwabenland_2021":
63 | return get_schwabenland_2021_data(output_dir)
64 | dataset_file = DATASET_DB_PATH / dataset_name + ".tar.gz"
65 |
66 | if output_dir is None:
67 | output_dir = Path(tempfile.TemporaryDirectory().name)
68 |
69 | if not dataset_file.exists():
70 | _download_file(DATASETS[dataset_name], dataset_file)
71 | _decompress_tar_file(dataset_file, output_dir)
72 | return Project(
73 | name=dataset_name,
74 | processed_dir=output_dir / dataset_name / "processed",
75 | subfolder_per_sample="joint" not in dataset_name,
76 | )
77 |
78 |
79 | def get_schwabenland_2021_data(output_dir: Path = None) -> Project:
80 | dataset_name = "schwabenland_2021"
81 | zip_file_url = (
82 | "https://zenodo.org/record/5018260/files/"
83 | "COVID19_brain_all_patients_singletiffs_and_cellmasks.zip"
84 | "?download=1"
85 | )
86 |
87 | if output_dir is None:
88 | output_dir = Path(tempfile.TemporaryDirectory().name).mkdir()
89 |
90 | zip_file = output_dir / dataset_name + "_imc_data.zip"
91 |
92 | if not zip_file.exists():
93 | _download_file(zip_file_url, zip_file)
94 | with zipfile.ZipFile(zip_file) as zf:
95 | zf.extractall(output_dir)
96 | zip_file.unlink()
97 |
98 | for dir_ in filter(lambda x: x.is_dir(), output_dir.iterdir()):
99 | name = dir_.name
100 | _stack = list()
101 | _channel_names = list()
102 | for file in dir_.iterdir():
103 | if "_mask.tiff" in file.as_posix():
104 | mask = tifffile.imread(file)
105 | continue
106 | _stack.append(tifffile.imread(file))
107 | _channel_names.append(file.stem)
108 | stack = np.asarray(_stack)
109 | channel_names = pd.Series(_channel_names)
110 | annotation = (
111 | channel_names.str.split("_")
112 | .apply(pd.Series)
113 | .set_index(channel_names)
114 | .rename(columns={0: "marker", 1: "metal"})
115 | )
116 | annotation["mass"] = annotation["metal"].str.extract(r"(\d+)")[0].astype(int)
117 | stack = stack[annotation["mass"].rank().astype(int) - 1]
118 | annotation = annotation.sort_values("mass")
119 | annotation.index = annotation.index.str.replace("_", "(") + ")"
120 | labels = annotation.index.to_series().reset_index(drop=True).rename("channel")
121 |
122 | if "ROI" not in name:
123 | roi_number = "1"
124 | else:
125 | roi_number = re.findall(r"_ROI(\d)_", name)[0]
126 | name = re.sub(r"_ROI(\d)", "", name)
127 |
128 | od = (output_dir / "processed" / name / "tiffs").mkdir()
129 | output_prefix = od / name + f"-{roi_number}_full"
130 | tifffile.imwrite(output_prefix + ".tiff", stack)
131 | tifffile.imwrite(output_prefix + "_mask.tiff", mask)
132 | labels.to_csv(output_prefix + ".csv")
133 |
134 | shutil.rmtree(dir_)
135 |
136 | return Project(name=dataset_name, processed_dir=output_dir / "processed")
137 |
138 |
139 | def get_phillips_2021(output_dir: Path = None) -> Project:
140 | """
141 | doi:10.3389/fimmu.2021.687673
142 | """
143 | if output_dir is None:
144 | output_dir = Path(tempfile.TemporaryDirectory().name).mkdir()
145 |
146 | (output_dir / "processed").mkdir()
147 |
148 | dataset_name = "phillips_2021"
149 | base_url = URL("https://immunoatlas.org")
150 | group_id = "NOLN"
151 | project_id = "210614-2"
152 | cases = [f"NOLN2100{i}" for i in range(2, 10)]
153 | rois = ["A01"]
154 | markers = [
155 | "DNA (Hoechst)",
156 | "T-bet",
157 | "GATA3",
158 | "FoxP3",
159 | "CD56",
160 | "TCR-γ/δ",
161 | "Tim-3",
162 | "CD30",
163 | "CCR6",
164 | "PD-L1",
165 | "TCR-β",
166 | "CD4",
167 | "CD2",
168 | "CD5",
169 | "Ki-67",
170 | "CD25",
171 | "CD134",
172 | "α-SMA",
173 | "CD20",
174 | "LAG3",
175 | "MUC-1/EMA",
176 | "CD11c",
177 | "PD-1",
178 | "Vimentin",
179 | "CD16",
180 | "IDO-1",
181 | "CD15",
182 | "EGFR",
183 | "VISTA",
184 | "Granzyme B",
185 | "CD206",
186 | "ICOS",
187 | "CD69",
188 | "CD45RA",
189 | "CD57",
190 | "CD3",
191 | "HLA-DR",
192 | "CD8",
193 | "BCL-2",
194 | "β-catenin",
195 | "CD7",
196 | "CD1a",
197 | "CD45RO",
198 | "CCR4/CD194",
199 | "CD163",
200 | "CD11b",
201 | "CD34",
202 | "Cytokeratin",
203 | "CD38",
204 | "CD68",
205 | "CD31",
206 | "Collagen IV",
207 | "CD138",
208 | "Podoplanin",
209 | "CD45",
210 | "MMP-9",
211 | "MCT",
212 | "CLA/CD162",
213 | "DNA (DRAQ5)",
214 | ]
215 |
216 | for case in cases:
217 | for roi in rois:
218 | print(case, roi)
219 | url = base_url / group_id / project_id / case / roi / f"{case}_{roi}.tif"
220 | roi = roi.replace("A", "")
221 | od = (output_dir / "processed" / case / "tiffs").mkdir()
222 | f = od / f"{case}-{roi}_full.tiff"
223 | if f.exists():
224 | continue
225 | # Somehow the _download_file failed a few times
226 | _download_file(url.as_posix(), f)
227 | # resp = url.get()
228 | # with open(f, "wb") as handle:
229 | # handle.write(resp.content)
230 | pd.Series(markers, name="channel").to_csv(f.replace_(".tiff", ".csv"))
231 |
232 | return Project(name=dataset_name, processed_dir=output_dir / "processed")
233 |
234 |
235 | def get_allam_2021_data(output_dir: Path = None) -> Project:
236 | if output_dir is None:
237 | output_dir = Path(tempfile.TemporaryDirectory().name).mkdir()
238 |
239 | base_url = URL("https://raw.githubusercontent.com/coskunlab/SpatialViz/main/data")
240 | samples = [
241 | y[0] + str(y[1]) for code in ["DT", "NT"] for y in zip([code] * 6, range(1, 7))
242 | ]
243 | markers = [
244 | "CD20",
245 | "CD3",
246 | "CD4",
247 | "CD45RO",
248 | "CD68",
249 | "CD8a",
250 | "Col1",
251 | "DNA1",
252 | "DNA2",
253 | "Ecadherin",
254 | "FoxP3",
255 | "GranzymeB",
256 | "Histone3",
257 | "Ki67",
258 | "PD1",
259 | "PDL1",
260 | "Pankeratin",
261 | "SMA",
262 | "Vimentin",
263 | ]
264 |
265 | for sample in samples:
266 | mask_url = base_url / "cell_masks" / f"{sample}_cell_Mask.tiff"
267 | for marker in markers:
268 | channel_url = base_url / "raw" / sample / f"{sample}_{marker}.tiff"
269 |
--------------------------------------------------------------------------------
/imc/exceptions.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | from imc.types import GenericType
3 |
4 |
5 | class AttributeNotSetError(Exception):
6 | pass
7 |
8 |
9 | def cast(arg: Optional[GenericType]) -> GenericType:
10 | """Remove `Optional` from `T`."""
11 | if arg is None:
12 | raise AttributeNotSetError("Attribute cannot be None!")
13 | return arg
14 |
--------------------------------------------------------------------------------
/imc/interactive_volume_viewer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | An example program to display a volumetric image from the command line.
5 | """
6 |
7 | import sys
8 | import typing as tp
9 | from urlpath import URL
10 | from functools import partial
11 |
12 | import imageio
13 | import numpy as np
14 | import matplotlib.pyplot as plt
15 | from tqdm import tqdm
16 |
17 | from imc.types import Array, Axis, Figure, Path # https://github.com/ElementoLab/imc
18 |
19 |
20 | def multi_slice_viewer(
21 | volume: Array, up_key: str = "w", down_key: str = "s", **kwargs
22 | ) -> Figure:
23 | remove_keymap_conflicts({up_key, down_key})
24 | print(f"Press '{up_key}' and '{down_key}' for scrolling through image channels.")
25 |
26 | fig, ax = plt.subplots()
27 | ax.volume = volume
28 | ax.index = volume.shape[0] // 2
29 | ax.imshow(volume[ax.index], **kwargs)
30 | fig.canvas.mpl_connect(
31 | "key_press_event", partial(process_key, up_key=up_key, down_key=down_key)
32 | )
33 | return fig
34 |
35 |
36 | def remove_keymap_conflicts(new_keys_set: tp.Set) -> None:
37 | for prop in plt.rcParams:
38 | if prop.startswith("keymap."):
39 | keys = plt.rcParams[prop]
40 | remove_list = set(keys) & new_keys_set
41 | for key in remove_list:
42 | keys.remove(key)
43 |
44 |
45 | def process_key(event, up_key: str = "w", down_key: str = "s") -> None:
46 | fig = event.canvas.figure
47 | ax = fig.axes[0]
48 | if event.key == up_key:
49 | previous_slice(ax)
50 | elif event.key == down_key:
51 | next_slice(ax)
52 | fig.canvas.draw()
53 |
54 |
55 | def previous_slice(ax: Axis) -> None:
56 | """Go to the previous slice."""
57 | volume = ax.volume
58 | ax.index = (ax.index - 1) % volume.shape[0] # wrap around using %
59 | ax.images[0].set_array(volume[ax.index])
60 |
61 |
62 | def next_slice(ax: Axis) -> None:
63 | """Go to the next slice."""
64 | volume = ax.volume
65 | ax.index = (ax.index + 1) % volume.shape[0]
66 | ax.images[0].set_array(volume[ax.index])
67 |
68 |
69 | def get_volume() -> Array:
70 | base_url = URL("https://prod-images-static.radiopaedia.org/images/")
71 | start_n = 53734044
72 | length = 137
73 |
74 | imgs = list()
75 | for i in tqdm(range(length)):
76 | url = base_url / f"{start_n + i}/{i + 1}_gallery.jpeg"
77 | resp = url.get()
78 | c = resp.content
79 | imgs.append(imageio.read(c, format="jpeg").get_data(0))
80 | img = np.asarray(imgs)
81 | return img
82 |
83 |
84 | def main() -> int:
85 | """
86 | Run
87 | """
88 | img_file = Path("/tmp/volumetric_image.npz")
89 | if not img_file.exists():
90 | print("Downloading volumetric image.")
91 | img = get_volume()
92 | np.savez_compressed(img_file, img)
93 | else:
94 | img = np.load(img_file)["arr_0"]
95 |
96 | _ = multi_slice_viewer(img)
97 | print("Displaying volume.")
98 | print("Press 'w' for up and 's' for down.")
99 | plt.show(block=True)
100 | print("Done.")
101 | return 0
102 |
103 |
104 | if __name__ == "__main__" and "get_ipython" not in locals():
105 | try:
106 | sys.exit(main())
107 | except KeyboardInterrupt:
108 | sys.exit(1)
109 |
--------------------------------------------------------------------------------
/imc/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ElementoLab/imc/9725b3ab72f2273cb4a702964fa8518c2f189e9c/imc/logo.png
--------------------------------------------------------------------------------
/imc/ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ElementoLab/imc/9725b3ab72f2273cb4a702964fa8518c2f189e9c/imc/ops/__init__.py
--------------------------------------------------------------------------------
/imc/ops/adjacency.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions for single-cell adjacency.
3 | """
4 |
5 | import typing as tp
6 |
7 | import numpy as np
8 | import pandas as pd
9 | import matplotlib.pyplot as plt
10 | import seaborn as sns
11 | from tqdm import tqdm
12 | import scipy.ndimage as ndi
13 | from skimage import exposure
14 | from skimage import graph
15 | import networkx as nx
16 |
17 | import imc.data_models.roi as _roi
18 | from imc.types import DataFrame, Series, Path
19 |
20 | FIG_KWS = dict(bbox_inches="tight", dpi=300)
21 | MAX_BETWEEN_CELL_DIST = 4
22 |
23 |
24 | def get_adjacency_graph(
25 | roi: _roi.ROI,
26 | output_prefix: Path = None,
27 | max_dist: int = MAX_BETWEEN_CELL_DIST,
28 | ) -> graph:
29 | """
30 | Derive a spatial representation of cells in image using a graph.
31 |
32 | Parameters
33 | ----------
34 | roi: imc.ROI
35 | ROI object to derive graph for.
36 |
37 | output_prefix: typing.Path
38 | Prefix to output file with graph.
39 | Defaults to sample root dir / 'single_cell'.
40 |
41 | max_dist: int
42 | Maximum distance to consider physical interaction between cells (graph edges)
43 |
44 | Returns
45 | -------
46 | networkx.Graph
47 | Adjacency graph for cells in ROI.
48 | """
49 | import pickle
50 |
51 | clusters = roi.clusters
52 | if clusters is None:
53 | print("ROI does not have assigned clusters.")
54 |
55 | output_prefix = Path(output_prefix or (roi.single_cell_dir / roi.name + "."))
56 | if not output_prefix.endswith("."):
57 | output_prefix += "."
58 | output_prefix.parent.mkdir()
59 |
60 | mask = roi.cell_mask
61 |
62 | # align mask with cell type assignment (this is only to remove border cells)
63 | if clusters is not None:
64 | mask[~np.isin(mask, roi.clusters.index)] = 0
65 |
66 | # Get the closest cell of each background point dependent on `max_dist`
67 | # # first measure the distance of each background point to the closest cell
68 | background = mask == 0
69 | d = ndi.distance_transform_edt(
70 | background, return_distances=True, return_indices=False
71 | )
72 |
73 | background = background & (d <= max_dist)
74 | i, j = ndi.distance_transform_edt(
75 | background, return_distances=False, return_indices=True
76 | )
77 | mask = mask[i, j]
78 |
79 | # Simply use mean of channels as distance
80 | stack = roi.stack
81 | if hasattr(roi, "channel_exclude"):
82 | stack = stack[~roi.channel_exclude]
83 | image_mean = np.asarray([exposure.equalize_hist(x) for x in stack]).mean(0)
84 | image_mean = (image_mean - image_mean.min()) / (
85 | np.percentile(image_mean, 98) - image_mean.min()
86 | )
87 |
88 | # Construct adjacency graph based on cell distances
89 | g = graph.rag_mean_color(image_mean, mask, connectivity=2, mode="distance")
90 | # g = skimage.future.graph.RAG(mask, connectivity=2)
91 | # remove background node (unfortunately it can't be masked beforehand)
92 | if 0 in g.nodes:
93 | g.remove_node(0)
94 |
95 | fig, ax = plt.subplots(1, 1)
96 | i = (image_mean * 255).astype("uint8")
97 | i = np.moveaxis(np.asarray([i, i, i]), 0, -1)
98 | lc = graph.show_rag(
99 | mask.astype("uint32"),
100 | g,
101 | i,
102 | ax=ax,
103 | img_cmap="viridis",
104 | edge_cmap="Reds",
105 | edge_width=1,
106 | )
107 | ax.axis("off")
108 | fig.colorbar(lc, fraction=0.03, ax=ax)
109 | ax.get_children()[0].set_rasterized(True)
110 | ax.get_children()[-2].set_rasterized(True)
111 | fig.savefig(output_prefix + "neighbor_graph.svg", **FIG_KWS)
112 | plt.close(fig)
113 |
114 | # add cluster label atrtribute
115 | if clusters is not None:
116 | nx.set_node_attributes(g, roi.clusters.to_dict(), name="cluster")
117 | nx.set_node_attributes(g, roi.clusters.index.to_series().to_dict(), name="obj_id")
118 |
119 | # save graph
120 | with open(output_prefix + "neighbor_graph.gpickle", "wb") as f:
121 | pickle.dump(g, f)
122 | return g
123 |
124 |
125 | def measure_cell_type_adjacency(
126 | roi: _roi.ROI,
127 | method: str = "random",
128 | adjacency_graph: nx.Graph = None,
129 | n_iterations: int = 100,
130 | inf_replace_method: str = "min",
131 | output_prefix: Path = None,
132 | plot: bool = True,
133 | save: bool = True,
134 | ) -> DataFrame:
135 | """
136 | Derive an aggregated measure of adjacency betwen cell types for one ROI.
137 |
138 | Parameters
139 | ----------
140 | roi: imc.ROI
141 | ROI object to derive graph for.
142 |
143 | method: str
144 | Method to normalize interactions by.
145 | - 'random': generate empirical background of expected interactions based on cell type abundance by randomization (permutation of cell type identities).
146 | - 'pharmacoscopy': method with analytical solution from Vladimer et al (10.1038/nchembio.2360). Not recommended for small images.
147 | Default is 'random'.
148 |
149 | adjacency_graph: networkx.Graph
150 | Adjacency graph per cell for ROI.
151 | By default, and if not given will be the `ROI.adjacency_graph` attribute.
152 |
153 | n_iterations: int
154 | Number of permutations to run when `method` == 'random'.
155 | Defaults to 100.
156 |
157 | inf_replace_method: str
158 | If `method` == 'pharmacoscopy', how to handle cases where interactions are not observed.
159 |
160 | output_prefix: typing.Path
161 | Prefix to output file with graph.
162 | Defaults to sample root dir / 'single_cell'.
163 |
164 | plot: bool
165 | Whether to plot visualizations.
166 | Default is `True`.
167 |
168 | save: bool
169 | Whether to save output to disk.
170 | Default is `True`.
171 |
172 | Returns
173 | -------
174 | pandas.DataFrame
175 | DataFrame of cell type interactions normalized by `method`.
176 | """
177 | output_prefix = output_prefix or (
178 | roi.sample.root_dir / "single_cell" / roi.name + "."
179 | )
180 | if not output_prefix.endswith("."):
181 | output_prefix += "."
182 |
183 | cluster_counts = roi.clusters.value_counts()
184 |
185 | if adjacency_graph is None:
186 | adjacency_graph = roi.adjacency_graph
187 |
188 | import warnings # Networkx warns that the output of nx.linalg.attrmatrix.attr_matrix will be an array instead of a matrix
189 |
190 | with warnings.catch_warnings():
191 | warnings.filterwarnings("ignore", category=FutureWarning)
192 | adj, order = nx.linalg.attrmatrix.attr_matrix(
193 | adjacency_graph, node_attr="cluster"
194 | )
195 | order = pd.Series(order).astype(
196 | roi.clusters.dtype
197 | ) # passing dtype at instantiation gives warning
198 | freqs = pd.DataFrame(adj, order, order).sort_index(axis=0).sort_index(axis=1)
199 | if save:
200 | freqs.to_csv(output_prefix + "cluster_adjacency_graph.frequencies.csv")
201 |
202 | if method == "random":
203 | norm_freqs = correct_interaction_background_random(
204 | roi, freqs, "cluster", n_iterations, save, output_prefix
205 | )
206 | elif method == "pharmacoscopy":
207 | norm_freqs = correct_interaction_background_pharmacoscopy(
208 | freqs, cluster_counts, roi.clusters.shape[0], inf_replace_method
209 | )
210 | if save:
211 | norm_freqs.to_csv(output_prefix + "cluster_adjacency_graph.norm_over_random.csv")
212 |
213 | if not plot:
214 | return norm_freqs
215 | v = norm_freqs.values.std() * 2
216 | fig, axes = plt.subplots(1, 2, sharey=True, figsize=(4 * 2, 4))
217 | kws = dict(cmap="RdBu_r", center=0, square=True, xticklabels=True, yticklabels=True)
218 | sns.heatmap(norm_freqs, robust=True, ax=axes[0], **kws)
219 | kws2 = dict(vmin=-v, vmax=v, cbar_kws=dict(label="Log odds interaction"))
220 | sns.heatmap(norm_freqs, ax=axes[1], **kws, **kws2)
221 | fig.savefig(
222 | output_prefix + "cluster_adjacency_graph.norm_over_random.heatmap.svg",
223 | **FIG_KWS,
224 | )
225 | plt.close(fig)
226 | del kws["square"]
227 | try:
228 | grid = sns.clustermap(norm_freqs, **kws, **kws2)
229 | grid.savefig(
230 | output_prefix + "cluster_adjacency_graph.norm_over_random.clustermap.svg",
231 | **FIG_KWS,
232 | )
233 | plt.close(grid.fig)
234 | except FloatingPointError:
235 | pass
236 | return norm_freqs
237 |
238 |
239 | def correct_interaction_background_random(
240 | roi: _roi.ROI,
241 | freqs: DataFrame,
242 | attribute,
243 | n_iterations: int,
244 | save: bool,
245 | output_prefix: tp.Union[str, Path],
246 | ):
247 | values = {
248 | x: roi.adjacency_graph.nodes[x][attribute] for x in roi.adjacency_graph.nodes
249 | }
250 | shuffled_freqs = list()
251 | for _ in tqdm(range(n_iterations)):
252 | g2 = roi.adjacency_graph.copy()
253 | shuffled_attr = pd.Series(values).sample(frac=1)
254 | shuffled_attr.index = values
255 | nx.set_node_attributes(g2, shuffled_attr.to_dict(), name=attribute)
256 | import warnings
257 |
258 | with warnings.catch_warnings():
259 | warnings.filterwarnings("ignore", category=FutureWarning)
260 | rf, rl = nx.linalg.attrmatrix.attr_matrix(g2, node_attr=attribute)
261 | rl = pd.Series(rl, dtype=roi.clusters.dtype)
262 | shuffled_freqs.append(
263 | pd.DataFrame(rf, index=rl, columns=rl).sort_index(axis=0).sort_index(axis=1)
264 | )
265 | shuffled_freq = pd.concat(shuffled_freqs)
266 | if save:
267 | shuffled_freq.to_csv(
268 | output_prefix
269 | + f"cluster_adjacency_graph.random_frequencies.all_iterations_{n_iterations}.csv"
270 | )
271 | shuffled_freq = shuffled_freq.groupby(level=0).sum().sort_index(axis=1)
272 | if save:
273 | shuffled_freq.to_csv(
274 | output_prefix + "cluster_adjacency_graph.random_frequencies.csv"
275 | )
276 |
277 | fl = np.log1p((freqs / freqs.values.sum()) * 1e6)
278 | sl = np.log1p((shuffled_freq / shuffled_freq.values.sum()) * 1e6)
279 | # make sure both contain all edges/nodes
280 | fl = fl.reindex(sl.index, axis=0).reindex(sl.index, axis=1).fillna(0)
281 | sl = sl.reindex(fl.index, axis=0).reindex(fl.index, axis=1).fillna(0)
282 | return fl - sl
283 |
284 |
285 | def correct_interaction_background_pharmacoscopy(
286 | frequency_matrix: DataFrame,
287 | cluster_counts: Series,
288 | total_cells: int,
289 | inf_replace_method: tp.Optional[str] = "min_symmetric",
290 | ):
291 | c = np.log(total_cells)
292 | fa = np.log(frequency_matrix.sum().sum()) - c
293 | norms = pd.DataFrame()
294 | for ct1 in frequency_matrix.index:
295 | for ct2 in frequency_matrix.columns:
296 | with np.errstate(divide="ignore", invalid="ignore"):
297 | o = np.log(frequency_matrix.loc[ct1, ct2]) - np.log(
298 | frequency_matrix.loc[ct1].sum()
299 | )
300 | if o == 0:
301 | norms.loc[ct1, ct2] = 0.0
302 | continue
303 | f1 = np.log(cluster_counts.loc[ct1]) - c
304 | f2 = np.log(cluster_counts.loc[ct2]) - c
305 |
306 | norms.loc[ct1, ct2] = o - (f1 + f2 + fa)
307 | if inf_replace_method is None:
308 | return norms
309 |
310 | # three ways to replace -inf (cell types with no event touching):
311 | # # 1. replace with lowest non-inf value (dehemphasize the lower bottom - lack of touching)
312 | if inf_replace_method == "min":
313 | norm_freqs = norms.replace(-np.inf, norms[norms != (-np.inf)].min().min())
314 | # # 2. replace with minus highest (try to )
315 | if inf_replace_method == "max":
316 | norm_freqs = norms.replace(-np.inf, -norms.max().max())
317 | # # 3. One of the above + make symmetric by X @ X.T + Z-score
318 | if inf_replace_method == "min_symmetric":
319 | norm_freqs = norms.replace(-np.inf, norms[norms != (-np.inf)].min().min())
320 | norm_freqs = norm_freqs @ norm_freqs.T
321 | norm_freqs = (norm_freqs - norm_freqs.values.mean()) / norm_freqs.values.std()
322 | if inf_replace_method == "max_symmetric":
323 | norm_freqs = norms.replace(-np.inf, norms[norms != (-np.inf)].max().max())
324 | norm_freqs = norm_freqs @ norm_freqs.T
325 | norm_freqs = (norm_freqs - norm_freqs.values.mean()) / norm_freqs.values.std()
326 | return norm_freqs
327 |
--------------------------------------------------------------------------------
/imc/ops/clustering.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions for single-cell clustering.
3 | """
4 |
5 | import typing as tp
6 |
7 | import numpy as np
8 | import pandas as pd
9 | import matplotlib.pyplot as plt
10 | from tqdm import tqdm
11 |
12 | from anndata import AnnData
13 | import scanpy as sc
14 |
15 | from imc.types import DataFrame, Path
16 | from imc.graphics import rasterize_scanpy
17 |
18 |
19 | FIG_KWS = dict(bbox_inches="tight", dpi=300)
20 | sc.settings.n_jobs = -1
21 |
22 |
23 | DEFAULT_CELL_TYPE_REFERENCE = (
24 | "https://gist.github.com/afrendeiro/4aa133c2fcb5eb0152957b11ec753b74/raw",
25 | Path(".imc.cell_type_reference.yaml"),
26 | )
27 |
28 |
29 | def anndata_to_cluster_means(
30 | ann: AnnData, cluster_label: str, raw: bool = False
31 | ) -> DataFrame:
32 | means = dict()
33 | obj = ann if not raw else ann.raw
34 | for cluster in ann.obs[cluster_label].unique():
35 | clust = ann.obs[cluster_label] == cluster
36 | means[cluster] = obj[clust, :].X.mean(0)
37 | mean_expr = pd.DataFrame(means, index=obj.var.index).sort_index(axis=1)
38 | mean_expr.columns.name = "cluster"
39 | return mean_expr
40 |
41 |
42 | def phenotyping(
43 | a: tp.Union[AnnData, Path],
44 | channels_include: tp.Sequence[str] = None,
45 | channels_exclude: tp.Sequence[str] = None,
46 | filter_cells: bool = True,
47 | z_score: bool = True,
48 | z_score_per: str = "roi",
49 | z_score_cap: float = 3.0,
50 | remove_batch: bool = True,
51 | batch_variable: str = "sample",
52 | dim_res_algos: tp.Sequence[str] = ("umap",),
53 | clustering_method: str = "leiden",
54 | clustering_resolutions: tp.Sequence[float] = (1.0,),
55 | ) -> AnnData:
56 | import anndata
57 |
58 | if "pymde" in dim_res_algos:
59 | import pymde
60 | if clustering_method == "parc":
61 | from parc import PARC
62 |
63 | # Checks
64 | reason = f"Can only Z-score values per 'roi' or 'sample'. '{z_score_per}' is not supported."
65 | assert z_score_per in ["sample", "roi"], reason
66 | reason = f"Clustering method '{clustering_method}' is not supported."
67 | assert clustering_method in ["leiden", "parc"]
68 | reason = "Can only use 'pca', 'umap', 'diffmap', or 'pymde' in `dim_res_algos`."
69 | assert all(x in ["pca", "umap", "diffmap", "pymde"] for x in dim_res_algos), reason
70 |
71 | if isinstance(a, Path):
72 | print(f"Reading h5ad file: '{a}'.")
73 | a = sc.read(a)
74 |
75 | if remove_batch:
76 | if a.obs[batch_variable].nunique() <= 1:
77 | print(
78 | "Batch correction not possible as only one batch detected. "
79 | "Check `batch_variable` keyord argument."
80 | )
81 | remove_batch = False
82 |
83 | if "sample" not in a.obs.columns:
84 | a.obs["sample"] = a.obs["roi"].str.extract(r"(.*)-\d+")[0].fillna("")
85 | if a.raw is None:
86 | a.raw = a
87 |
88 | # Add morphological variables to obs
89 | sel = a.var.index.str.contains(r"\(")
90 | v = a.var.index[~sel]
91 | for col in v:
92 | a.obs[col] = a[:, col].X.tolist()
93 | a = a[:, sel]
94 |
95 | # Filter out channels
96 | if channels_exclude is not None:
97 | a = a[:, ~a.var.index.isin(channels_exclude)]
98 | if channels_include is not None:
99 | a = a[:, channels_include]
100 | a = a.copy()
101 |
102 | # # reduce DNA chanels to one, and move to obs
103 | dnas = a.var.index[a.var.index.str.contains(r"DNA\d")]
104 | a.obs["DNA"] = a[:, dnas].X.mean(1)
105 | a = a[:, ~a.var.index.isin(dnas)]
106 |
107 | # Filter out cells
108 | if filter_cells:
109 | if "solidity" not in a.obs.columns:
110 | print(
111 | "Could not filter cells based on solidity likely because morphological quantification was not performed!"
112 | )
113 | else:
114 | exclude = a.obs["solidity"] == 1
115 | p = (exclude).sum() / a.shape[0] * 100
116 | print(f"Filtered out {exclude.sum()} cells ({p:.2f} %)")
117 |
118 | # Scaling/Normalization
119 | print("Performing data scaling/normalization.")
120 | sc.pp.log1p(a)
121 | if z_score:
122 | _ads = list()
123 | for roi_name in a.obs["roi"].unique():
124 | a2 = a[a.obs["roi"] == roi_name, :].copy()
125 | sc.pp.scale(a2, max_value=z_score_cap)
126 | a2.X[a2.X < -z_score_cap] = -z_score_cap
127 | # print(a2.X.min(), a2.X.max())
128 | _ads.append(a2)
129 | a = anndata.concat(_ads)
130 | sc.pp.scale(a)
131 | if remove_batch:
132 | sc.pp.combat(a, batch_variable)
133 | sc.pp.scale(a)
134 |
135 | # Dimensionality reduction
136 | print("Performing dimensionality reduction.")
137 | sc.pp.pca(a)
138 | if remove_batch:
139 | sc.external.pp.bbknn(a, batch_key=batch_variable)
140 | else:
141 | sc.pp.neighbors(a)
142 | if "umap" in dim_res_algos:
143 | sc.tl.umap(a, gamma=25)
144 | if "diffmap" in dim_res_algos:
145 | sc.tl.diffmap(a)
146 | if "pymde" in dim_res_algos:
147 | a.obsm["X_pymde"] = pymde.preserve_neighbors(a.X, embedding_dim=2).embed().numpy()
148 | a.obsm["X_pymde2"] = (
149 | pymde.preserve_neighbors(
150 | a.X,
151 | embedding_dim=2,
152 | attractive_penalty=pymde.penalties.Quadratic,
153 | repulsive_penalty=None,
154 | )
155 | .embed()
156 | .numpy()
157 | )
158 |
159 | # Clustering
160 | print("Performing clustering.")
161 | if clustering_method == "leiden":
162 | for res in clustering_resolutions:
163 | sc.tl.leiden(a, resolution=res, key_added=f"cluster_{res}")
164 | a.obs[f"cluster_{res}"] = pd.Categorical(
165 | a.obs[f"cluster_{res}"].astype(int) + 1
166 | )
167 | elif clustering_method == "parc":
168 | for res in clustering_resolutions:
169 | p = PARC(
170 | a.X,
171 | neighbor_graph=a.obsp["connectivities"],
172 | random_seed=42,
173 | resolution_parameter=res,
174 | )
175 | p.run_PARC()
176 | a.obs[f"cluster_{res}"] = pd.Categorical(pd.Series(p.labels) + 1)
177 |
178 | print("Finished phenotyping.")
179 | return a
180 |
181 |
182 | def plot_phenotyping(
183 | a: tp.Union[AnnData, Path],
184 | output_prefix: Path,
185 | tech_channels: tp.Sequence[str] = None,
186 | dim_res_algos: tp.Sequence[str] = ("umap",),
187 | clustering_resolutions: tp.Sequence[float] = None,
188 | ):
189 | from matplotlib.backends.backend_pdf import PdfPages
190 | from imc.graphics import add_centroids
191 | from seaborn_extensions import clustermap
192 |
193 | # Read in
194 | if isinstance(a, Path):
195 | print(f"Reading h5ad file: '{a}'.")
196 | a = sc.read(a)
197 | a = a[a.obs.sample(frac=1).index]
198 |
199 | # Checks
200 | if output_prefix.is_dir():
201 | output_prefix = output_prefix / "phenotypes."
202 | if not output_prefix.endswith("."):
203 | output_prefix += "."
204 | output_prefix.parent.mkdir()
205 |
206 | if "sample" not in a.obs.columns:
207 | a.obs["sample"] = a.obs["roi"].str.extract(r"(.*)-\d+")[0].fillna("")
208 |
209 | if tech_channels is None:
210 | tech_channels = [
211 | "DNA",
212 | "eccentricity",
213 | "solidity",
214 | "area",
215 | "perimeter",
216 | "major_axis_length",
217 | ]
218 | tech_channels = [c for c in tech_channels if c in a.obs.columns]
219 |
220 | if clustering_resolutions is None:
221 | clustering_resolutions = (
222 | a.obs.columns[a.obs.columns.str.contains("cluster_")]
223 | .str.extract(r"cluster_(.*)$")[0]
224 | .astype(float)
225 | )
226 |
227 | # Plot projections
228 | non_tech_channels = a.var.index[~a.var.index.isin(tech_channels)].tolist()
229 | vmax = (
230 | [None]
231 | + np.percentile(a.raw[:, non_tech_channels].X, 95, axis=0).tolist()
232 | + np.percentile(a.obs[tech_channels], 95, axis=0).tolist()
233 | # + [None]
234 | + ([None] * len(clustering_resolutions))
235 | )
236 | color = (
237 | ["sample"]
238 | + non_tech_channels
239 | + tech_channels
240 | # + ["topological_domain"]
241 | + [f"cluster_{res}" for res in clustering_resolutions]
242 | )
243 | for algo in tqdm(dim_res_algos):
244 | f = output_prefix + f"{algo}.pdf"
245 | with PdfPages(f) as pdf:
246 | for i, col in enumerate(color):
247 | fig = sc.pl.embedding(
248 | a,
249 | basis=algo,
250 | color=col,
251 | show=False,
252 | vmax=vmax[i],
253 | use_raw=True,
254 | ).figure
255 | rasterize_scanpy(fig)
256 | if i >= len(color) - len(clustering_resolutions):
257 | res = clustering_resolutions[i - len(color)]
258 | add_centroids(a, res=res, ax=fig.axes[0], algo=algo)
259 | plt.figure(fig)
260 | pdf.savefig(**FIG_KWS)
261 | plt.close(fig)
262 |
263 | # Plot ROIs separately
264 | f = output_prefix + f"{algo}.sample_roi.pdf"
265 | projf = getattr(sc.pl, algo)
266 | fig = projf(a, color=["sample", "roi"], show=False)[0].figure
267 | rasterize_scanpy(fig)
268 | fig.savefig(f, **FIG_KWS)
269 | plt.close(fig)
270 |
271 | # Plot average phenotypes
272 | for res in tqdm(clustering_resolutions):
273 | df = a.to_df()[non_tech_channels].join(a.obs[tech_channels])
274 |
275 | # Drop variables with no variance
276 | v = df.var()
277 | if (v == 0).any():
278 | df = df.drop(v.index[v == 0], axis=1)
279 |
280 | cluster_means = df.groupby(a.obs[f"cluster_{res}"].values).mean()
281 |
282 | cell_counts = a.obs[f"cluster_{res}"].value_counts().rename("Cells per cluster")
283 |
284 | cell_percs = ((cell_counts / cell_counts.sum()) * 100).rename("Cells (%)")
285 |
286 | op = output_prefix + f"cluster_means.{res}_res."
287 | kws = dict(
288 | row_colors=cell_percs.to_frame().join(cell_counts),
289 | figsize=(10, 6 * res),
290 | )
291 | grid = clustermap(cluster_means, **kws)
292 | grid.savefig(op + "abs.svg")
293 | plt.close(grid.fig)
294 |
295 | grid = clustermap(cluster_means, **kws, config="z")
296 | grid.savefig(op + "zscore.svg")
297 | plt.close(grid.fig)
298 |
299 | # To plot topological domains:
300 | # df = (a.obs[args.sc_topo.columns.drop(["domain", "topological_domain"])]).replace(
301 | # {"False": False, "True": True, "nan": np.nan}
302 | # )
303 | # topo_means = df.groupby(a.obs[f"cluster_{res}"].values).mean()
304 | # topo_means = topo_means.loc[:, topo_means.sum() > 0]
305 |
306 | # g = clustermap(
307 | # topo_means.loc[cluster_means.index[grid.dendrogram_row.reordered_ind]],
308 | # figsize=(3, 6 * res),
309 | # config="z",
310 | # row_cluster=False,
311 | # cmap="PuOr_r",
312 | # )
313 | # g.savefig(op + "abs.topologic.svg")
314 |
315 | # g = clustermap(
316 | # topo_means.loc[cluster_means.index[grid.dendrogram_row.reordered_ind]],
317 | # figsize=(3, 6 * res),
318 | # config="z",
319 | # row_cluster=False,
320 | # cmap="PuOr_r",
321 | # )
322 | # g.savefig(op + "zscore.topologic.svg")
323 |
324 | # grid = clustermap(cluster_means, **kws, config="z", row_cluster=False)
325 | # grid.savefig(op + "zscore.sorted.svg")
326 | # g = clustermap(
327 | # topo_means,
328 | # figsize=(3, 6 * res),
329 | # config="z",
330 | # row_cluster=False,
331 | # cmap="PuOr_r",
332 | # )
333 | # g.savefig(op + "zscore.sorted.topologic.svg")
334 | # plt.close("all")
335 |
336 |
337 | def predict_cell_types_from_reference(
338 | quant: tp.Union[AnnData, DataFrame, Path],
339 | output_prefix: Path,
340 | covariates: DataFrame,
341 | method: str = "astir",
342 | astir_reference: Path = None,
343 | astir_parameters: tp.Dict[str, tp.Any] = {},
344 | ):
345 | import anndata
346 | import yaml
347 | from imc.utils import download_file
348 |
349 | # Get dataframe with expression
350 | if isinstance(quant, Path):
351 | if quant.endswith("csv") or quant.endswith("csv.gz"):
352 | quant = pd.read_csv(quant, index_col=0)
353 | elif quant.endswith(".h5ad"):
354 | quant = anndata.read(quant)
355 | elif isinstance(quant, anndata.AnnData):
356 | quant = quant.to_df()
357 |
358 | # Remove metal label from column names
359 | quant.columns = quant.columns.str.extract(r"(.*)\(.*")[0].fillna(
360 | quant.columns.to_series().reset_index(drop=True)
361 | )
362 |
363 | if method != "astir":
364 | raise NotImplementedError("Only the `astir` method is currently supported.")
365 |
366 | # Prepare reference dictionary
367 | if astir_reference is not None:
368 | reference = yaml.safe_load(astir_reference.open())
369 | else:
370 | # if not DEFAULT_CELL_TYPE_REFERENCE[1].exists():
371 | download_file(DEFAULT_CELL_TYPE_REFERENCE[0], DEFAULT_CELL_TYPE_REFERENCE[1])
372 | ref = yaml.safe_load(DEFAULT_CELL_TYPE_REFERENCE[1].open())
373 | reference = dict()
374 | reference["cell_types"] = unroll_reference_dict(ref["cell_types"], False)
375 | reference["cell_states"] = unroll_reference_dict(ref["cell_states"], False)
376 | reference = filter_reference_based_on_available_markers(reference, quant.columns)
377 |
378 | res = astir(
379 | input_expr=quant,
380 | marker_dict=reference,
381 | design=covariates,
382 | output_prefix=output_prefix,
383 | **astir_parameters,
384 | )
385 | return res
386 |
387 |
388 | def astir(
389 | input_expr: DataFrame,
390 | marker_dict: tp.Dict[str, tp.List[str]],
391 | design: DataFrame,
392 | output_prefix: Path,
393 | batch_size: int = None,
394 | max_epochs: int = 200,
395 | learning_rate: float = 2e-3,
396 | initial_epochs: int = 3,
397 | device: str = "cpu",
398 | plot: bool = True,
399 | ):
400 | from astir import Astir
401 | import torch
402 |
403 | if output_prefix.is_dir():
404 | output_prefix = output_prefix / "astir."
405 | output_prefix.parent.mkdir()
406 |
407 | ast = Astir(input_expr, marker_dict, design)
408 | ast._device = torch.device("cpu")
409 | if batch_size is None:
410 | batch_size = ast.get_type_dataset().get_exprs_df().shape[0] // 100
411 |
412 | params = dict(
413 | max_epochs=max_epochs,
414 | batch_size=batch_size,
415 | learning_rate=learning_rate,
416 | n_init_epochs=initial_epochs,
417 | )
418 | res = pd.DataFrame(index=input_expr.index)
419 | if "cell_types" in marker_dict:
420 | ast.fit_type(**params)
421 | _t = ast.get_celltypes()
422 | res = res.join(_t)
423 | _tp = ast.get_celltype_probabilities()
424 | _tp.columns = _tp.columns + "_probability"
425 | res = res.join(_tp)
426 | if plot:
427 | fig, ax = plt.subplots(1, 1, figsize=(4, 2))
428 | ax.plot(ast.get_type_losses(), label="loss")
429 | ax.legend()
430 | ax.set(xlabel="Epochs", ylabel="Loss")
431 | fig.savefig(output_prefix + "cell_type.loss.svg", **FIG_KWS)
432 | plt.close(fig)
433 | if "cell_states" in marker_dict:
434 | ast.fit_state(**params)
435 | _s = ast.get_cellstates()
436 | res = res.join(_s)
437 | if plot:
438 | fig, ax = plt.subplots(1, 1, figsize=(4, 2))
439 | ax.plot(ast.get_state_losses(), label="loss")
440 | ax.legend()
441 | ax.set(xlabel="Epochs", ylabel="Loss")
442 | fig.savefig(output_prefix + "cell_state.loss.svg", **FIG_KWS)
443 | plt.close(fig)
444 | ast.save_models(output_prefix + "fitted_model.hdf5")
445 | return res
446 |
447 |
448 | def unroll_reference_dict(
449 | x: tp.Dict,
450 | name_with_predecessors: bool = True,
451 | max_depth: int = -1,
452 | _cur_depth: int = 0,
453 | _predecessors: tp.List[str] = [],
454 | ) -> tp.Dict:
455 | from copy import deepcopy
456 |
457 | x = deepcopy(x)
458 | new = dict()
459 | for k, v in x.items():
460 | if "markers" in v:
461 | name = " - ".join(_predecessors + [k]) if name_with_predecessors else k
462 | if v["markers"] != [None]:
463 | new[name] = v["markers"]
464 | v.pop("markers")
465 | if (
466 | isinstance(v, dict)
467 | and (len(v) > 0)
468 | and ((_cur_depth < max_depth) or max_depth == -1)
469 | ):
470 | new.update(
471 | unroll_reference_dict(
472 | v,
473 | name_with_predecessors=name_with_predecessors,
474 | max_depth=max_depth,
475 | _cur_depth=_cur_depth + 1,
476 | _predecessors=_predecessors + [k],
477 | )
478 | )
479 | return new
480 |
481 |
482 | def filter_reference_based_on_available_markers(
483 | x: tp.Dict, markers: tp.Sequence[str]
484 | ) -> tp.Dict:
485 | def _filter(x2):
486 | inter = dict()
487 | for k, v in x2.items():
488 | n = list(filter(lambda i: i in markers, v))
489 | if n:
490 | inter[k] = n
491 | return inter
492 |
493 | new = dict()
494 | new["cell_types"] = _filter(x["cell_types"])
495 | new["cell_states"] = _filter(x["cell_states"])
496 | return new
497 |
--------------------------------------------------------------------------------
/imc/ops/community.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions for community detection.
3 | """
4 |
5 | import typing as tp
6 | from collections import Counter
7 |
8 | import numpy as np
9 | import pandas as pd
10 | import matplotlib.pyplot as plt
11 | import seaborn as sns
12 | from tqdm import tqdm
13 | import parmap
14 | from anndata import AnnData
15 | import scanpy as sc
16 | import community
17 |
18 | import imc.data_models.roi as _roi
19 | from imc.exceptions import cast
20 | from imc.types import Series, Path
21 | from imc.graphics import add_legend
22 |
23 |
24 | FIG_KWS = dict(bbox_inches="tight", dpi=300)
25 |
26 | DEFAULT_SINGLE_CELL_RESOLUTION = 1.0
27 | MAX_BETWEEN_CELL_DIST = 4
28 | DEFAULT_COMMUNITY_RESOLUTION = 0.005
29 | DEFAULT_SUPERCOMMUNITY_RESOLUTION = 0.5
30 | # DEFAULT_SUPER_COMMUNITY_NUMBER = 12
31 |
32 |
33 | def find_communities(
34 | roi: _roi.ROI,
35 | community_resolution: float = DEFAULT_COMMUNITY_RESOLUTION,
36 | plot: bool = True,
37 | ) -> tp.Tuple[Series, tp.Tuple]:
38 | # def networkx_to_igraph(graph):
39 | # import igraph as ig
40 | # g = ig.Graph(edges=list(graph.edges))
41 | # # If the original graph has non-consecutive integer labels,
42 | # # igraph will create a node for the non existing vertexes.
43 | # # These can simply be removed from the graph.
44 | # nodes = pd.Series(list(graph.nodes))
45 | # vertexes = pd.Series(range(len(g.vs)))
46 | # g.delete_vertices(vertexes[~vertexes.isin(nodes)].values)
47 | # return g
48 |
49 | def get_community_members(partition: tp.Dict) -> tp.Dict:
50 | counts = Counter(partition)
51 | # {com: members}
52 | comms: tp.Dict[int, set] = dict()
53 | for com in counts.keys():
54 | comms[com] = set()
55 | for n, com in partition.items():
56 | comms[com].add(n)
57 | return comms
58 |
59 | def get_community_cell_type_composition(roi: _roi.ROI, partition: Series):
60 | cts = dict()
61 | for com, members in get_community_members(partition).items():
62 | # cts[f"{roi.sample.name} - {roi.roi_number} - {com}"] = \
63 | cts[com] = roi.clusters.loc[members].value_counts()
64 | return (
65 | pd.DataFrame(cts)
66 | .fillna(0)
67 | .rename_axis(index="cell_type", columns="community")
68 | .astype(int)
69 | )
70 |
71 | # Community finding in graph (overclustering)
72 | roi_output_prefix = roi.sample.root_dir / "single_cell" / (roi.name + ".communities.")
73 |
74 | # TODO: use leiden instead of louvain
75 | # g = networkx_to_igraph(roi.adjacency_graph)
76 | # p = partitions[roi] = pd.Series(
77 | # la.find_partition(
78 | # g, la.RBConfigurationVertexPartition,
79 | # resolution_parameter=community_resolution).membership,
80 | # name="community", index=roi.adjacency_graph.nodes).sort_index()
81 | partition = pd.Series(
82 | community.best_partition(
83 | roi.adjacency_graph, resolution=community_resolution
84 | ), # , weight="expr_weight")
85 | name="community",
86 | ).sort_index()
87 | n = partition.value_counts().shape[0]
88 | tqdm.write(f"Found {n} communities for ROI {roi}.")
89 | partition += 1
90 | partition.to_csv(roi_output_prefix + "graph_partition.csv")
91 | comps = (
92 | get_community_cell_type_composition(roi, partition)
93 | .T.assign(sample=roi.sample.name, roi=roi.name)
94 | .set_index(["sample", "roi"], append=True)
95 | )
96 | comps.index = comps.index.reorder_levels(["sample", "roi", "community"])
97 |
98 | if plot:
99 | # get cell type counts per community
100 | comps_s = comps.reset_index(level=["sample", "roi"], drop=True)
101 | percent = (comps_s.T / comps_s.sum(1)) * 100
102 | grid = sns.clustermap(
103 | percent, metric="correlation", cbar_kws=dict(label="% of cell type")
104 | )
105 | grid.savefig(roi_output_prefix + "cell_type_composition.svg", **FIG_KWS)
106 | grid = sns.clustermap(
107 | percent,
108 | z_score=1,
109 | cmap="RdBu_r",
110 | center=0,
111 | metric="correlation",
112 | cbar_kws=dict(label="% of cell type (Z-score)"),
113 | )
114 | grid.savefig(roi_output_prefix + "cell_type_composition.zscore.svg", **FIG_KWS)
115 | return partition, comps
116 |
117 |
118 | def cluster_communities(
119 | rois: tp.Sequence[_roi.ROI],
120 | output_prefix: Path = None,
121 | supercommunity_resolution: float = DEFAULT_SUPERCOMMUNITY_RESOLUTION,
122 | ) -> Series:
123 | from scipy.cluster.hierarchy import fcluster
124 |
125 | output_prefix = output_prefix or (
126 | rois[0].prj.processed_dir / "single_cell" / (rois[0].prj.name + ".communities.")
127 | )
128 | output_prefix = cast(output_prefix)
129 |
130 | res = parmap.map(find_communities, rois)
131 | partitions = {k: v[0] for k, v in zip(rois, res)}
132 | composition = pd.concat([v[1] for v in res]).fillna(0).astype(int).sort_index()
133 | composition.to_csv(output_prefix + ".all_communities.cell_type_composition.csv")
134 |
135 | print(f"Found {composition.shape[0]} communities across all ROIs.")
136 |
137 | composition = pd.read_csv(
138 | output_prefix + ".all_communities.cell_type_composition.csv",
139 | index_col=[0, 1, 2],
140 | )
141 |
142 | # Cluster communities by leiden clustering based on cell type composition
143 | a = AnnData(composition)
144 | sc.pp.log1p(a)
145 | sc.pp.neighbors(a)
146 | sc.tl.leiden(a, resolution=supercommunity_resolution, key_added="supercommunity")
147 | n_scomms = len(a.obs["supercommunity"].unique())
148 | print(f"Found {n_scomms} supercommunities.")
149 | # Make supercommunities 1-based (to distinguish from masks where 0 == background)
150 | a.obs["supercommunity"] = pd.Categorical(a.obs["supercommunity"].astype(int) + 1)
151 | sc.tl.umap(a)
152 | sc.pp.pca(a)
153 |
154 | # DataFrame(cell vs [celltype, community, supercommunity])
155 | _assignments = list()
156 | for roi in rois:
157 | # {cell: cell type}
158 | if roi.clusters.dtype == "int" and roi.clusters.min() == 0:
159 | c1 = (
160 | roi.clusters + 1
161 | ) # TODO: this +1 should be removed when clustering is re-run since the new implm
162 | else:
163 | c1 = roi.clusters
164 | # {cell: community}
165 | c2 = pd.Series(partitions[roi], name="community").rename_axis(index="obj_id")
166 | scomm = a.obs.loc[(roi.sample.name, roi.name), "supercommunity"].astype(int)
167 | assert c2.value_counts().shape[0] == scomm.shape[0]
168 | c3 = c2.replace(scomm.to_dict()).rename("supercommunity")
169 | assert c3.max() <= n_scomms
170 | assert c1.shape == c2.shape == c3.shape
171 | assert (c1.index == c2.index).all()
172 | assert (c2.index == c3.index).all()
173 | c = c1.to_frame().join(c2).join(c3)
174 | assert roi.clusters.shape[0] == c.shape[0]
175 | c["sample"] = roi.sample.name
176 | c["roi"] = roi.roi_number
177 | _assignments.append(c)
178 | assignments = pd.concat(_assignments).set_index(["sample", "roi"], append=True)
179 | assignments.index = assignments.index.reorder_levels(["sample", "roi", "obj_id"])
180 |
181 | # Further merge supercommunities if distant by less than X% of composition
182 | # TODO: revise supercommunity merging
183 | max_supercommunity_difference = 10.0
184 | comp = assignments.assign(count=1).pivot_table(
185 | index="supercommunity",
186 | columns="cluster",
187 | values="count",
188 | aggfunc=sum,
189 | fill_value=0,
190 | )
191 |
192 | perc = (comp.T / comp.sum(1)).T * 100
193 | diffs = pd.DataFrame(
194 | np.sqrt(abs(perc.values - perc.values[:, None]).sum(axis=2)),
195 | index=perc.index,
196 | columns=perc.index,
197 | )
198 | grid = sns.clustermap(diffs)
199 | repl = pd.Series(
200 | dict(
201 | zip(
202 | grid.data.columns,
203 | fcluster(
204 | grid.dendrogram_col.linkage,
205 | t=max_supercommunity_difference,
206 | criterion="distance",
207 | ),
208 | )
209 | )
210 | ).sort_index()
211 |
212 | comp.index = comp.index.to_series().replace(repl)
213 | comp = comp.groupby(level=0).sum()
214 |
215 | assignments["supercommunity"] = assignments["supercommunity"].replace(repl)
216 |
217 | # check name/number supercommunities is sorted on the abundance of their cell types
218 | s = assignments["supercommunity"].value_counts().sort_values(ascending=False)
219 | assignments["supercommunity"] = assignments["supercommunity"].replace(
220 | dict(zip(s.index, np.arange(1, len(s))))
221 | )
222 |
223 | # save final assignments
224 | assignments.to_csv(output_prefix + "cell_type.community.supercommunities.csv")
225 |
226 | # Visualize
227 | # # visualize initial communities in clustermap, PCA or UMAP
228 | perc = (composition.T / composition.sum(1)).T * 100
229 | grid = sns.clustermap(perc, metric="correlation", rasterized=True)
230 | grid.savefig(
231 | output_prefix
232 | + "communities.cell_type_composition.leiden_clustering.clustermap_viz.svg",
233 | **FIG_KWS,
234 | )
235 | grid = sns.clustermap(
236 | np.log1p(composition),
237 | row_linkage=grid.dendrogram_row.linkage,
238 | col_linkage=grid.dendrogram_col.linkage,
239 | metric="correlation",
240 | row_colors=plt.get_cmap("tab20")(a.obs["supercommunity"].astype(int)),
241 | rasterized=True,
242 | )
243 | grid.savefig(
244 | output_prefix
245 | + "communities.cell_type_composition.leiden_clustering.clustermap_viz.counts.svg",
246 | **FIG_KWS,
247 | )
248 | for method in ["pca", "umap"]:
249 | fig = getattr(sc.pl, method)(
250 | a,
251 | color=["supercommunity"] + a.var.index.tolist(),
252 | return_fig=True,
253 | show=False,
254 | )
255 | fig.savefig(
256 | output_prefix
257 | + f"communities.cell_type_composition.leiden_clustering.{method}_viz.svg",
258 | **FIG_KWS,
259 | )
260 |
261 | # # visualize the rediction of supercommunities based on difference thresh
262 | grid = sns.clustermap(
263 | diffs,
264 | col_colors=plt.get_cmap("tab20")(repl.values),
265 | row_colors=plt.get_cmap("tab20")(repl.values),
266 | cbar_kws=dict(label="Sqrt(Sum(diff))"),
267 | )
268 | grid.savefig(
269 | output_prefix + "supercommunities.reduction_by_diff.clustermap.svg",
270 | **FIG_KWS,
271 | )
272 |
273 | # assignments = pd.read_csv(output_prefix + "cell_type.community.supercommunities.csv", index_col=[0, 1, 2])
274 | # # cell type vs {community, supercommunity}
275 | for var_ in ["community", "supercommunity"]:
276 | supercts = assignments.assign(count=1).pivot_table(
277 | index="cluster",
278 | columns=var_,
279 | values="count",
280 | aggfunc=sum,
281 | fill_value=0,
282 | )
283 | perc_supercts = (supercts / supercts.sum()) * 100
284 |
285 | grid = sns.clustermap(
286 | perc_supercts,
287 | metric="correlation",
288 | rasterized=True,
289 | cbar_kws=dict(label="% of supercommunity"),
290 | )
291 | grid.savefig(output_prefix + f"{var_}.cell_type_composition.svg", **FIG_KWS)
292 | grid = sns.clustermap(
293 | perc_supercts,
294 | z_score=1,
295 | cmap="RdBu_r",
296 | center=0,
297 | metric="correlation",
298 | rasterized=True,
299 | cbar_kws=dict(label="% of supercommunity (Z-score)"),
300 | )
301 | grid.savefig(
302 | output_prefix + f"{var_}.cell_type_composition.zscore.svg",
303 | **FIG_KWS,
304 | )
305 |
306 | leg_kws = dict(bbox_to_anchor=(0, -0.05))
307 |
308 | vars_ = ["cluster", "community", "supercommunity"]
309 | n = len(rois)
310 | m = len(vars_)
311 | patches: tp.Dict[str, tp.List] = dict()
312 | fig, axes = plt.subplots(
313 | n, m, figsize=(4 * m, 4 * n), squeeze=False, sharex="row", sharey="row"
314 | )
315 | for i, roi in enumerate(rois):
316 | for j, var_ in enumerate(vars_):
317 | if i == 0:
318 | patches[var_] = list()
319 | p = roi.plot_cell_types(
320 | ax=axes[i, j, np.newaxis, np.newaxis],
321 | cell_type_assignments=assignments.loc[
322 | (roi.sample.name, roi.roi_number), var_
323 | ],
324 | palette="nipy_spectral",
325 | )
326 | patches[var_] += p
327 | for j, var_ in enumerate(vars_):
328 | if var_ == "community":
329 | continue
330 | add_legend(patches[var_], axes[-1, j], **leg_kws) # label="Super community",
331 | _z = zip(
332 | axes[0].squeeze(),
333 | ["Cell types", "Communities", "Super communities"],
334 | )
335 | for axs, lab in _z:
336 | axs.set_title(lab)
337 | # TODO: limit rasterization to main image
338 | for axs in axes.flat:
339 | axs.set_rasterized(True)
340 | fig.savefig(output_prefix + "communities_supercommunities.all_rois.svg", **FIG_KWS)
341 |
342 | return assignments["supercommunity"]
343 |
--------------------------------------------------------------------------------
/imc/ops/compensation.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 |
3 | """
4 | Functions for compensation of imaging mass cytometry data.
5 | """
6 |
7 | from functools import partial
8 | import typing as tp
9 |
10 | import numpy as np
11 | import pandas as pd
12 | from scipy.optimize import nnls
13 | import parmap
14 |
15 | from imc import ROI
16 | from imc.types import Array, DataFrame
17 |
18 |
19 | def stack_to_flat_array(stack: Array) -> Array:
20 | return stack.reshape((stack.shape[0], -1)).T
21 |
22 |
23 | def _get_cytospill_spillover_matrix(
24 | array: DataFrame, subsample_frac: float = None, subsample_n: int = None
25 | ) -> Array:
26 | """
27 | The columns of array must be metal labels (e.g. Nd142Di)!
28 |
29 | Requires the Github version of CytoSpill installed from a local clone,
30 | not through devtools pointing to the Github repo - not sure why.
31 |
32 | $ git clone https://github.com/KChen-lab/CytoSpill.git
33 | $ R CMD INSTALL CytoSpill/
34 | """
35 | from rpy2.robjects import numpy2ri, pandas2ri
36 | from rpy2.robjects.packages import importr
37 |
38 | numpy2ri.activate()
39 | pandas2ri.activate()
40 |
41 | cytospill = importr("CytoSpill")
42 |
43 | if subsample_frac is not None:
44 | subsample_n = int(array.shape[0] * subsample_frac)
45 |
46 | kwargs = dict()
47 | if subsample_n is not None:
48 | kwargs["n"] = subsample_n
49 |
50 | spillover_matrix, thresholds = cytospill.GetSpillMat(
51 | data=array,
52 | cols=np.arange(array.shape[1]),
53 | threshold=0.1,
54 | flexrep=5,
55 | neighbor=2,
56 | **kwargs,
57 | )
58 | # spillover_matrix = pd.DataFrame(spillover_matrix, index=df.columns, columns=df.columns)
59 | return spillover_matrix
60 |
61 |
62 | def _get_correlation_spillover_matrix(array: Array, k=60) -> Array:
63 | return k ** np.corrcoef(array.T) / k
64 |
65 |
66 | def get_spillover_matrix(array: Array, method: str = "cytospill", **kwargs) -> Array:
67 | """"""
68 | if method == "cytospill":
69 | return _get_cytospill_spillover_matrix(array, **kwargs)
70 | if method == "correlation":
71 | return _get_correlation_spillover_matrix(array)
72 | raise ValueError("`method` must be one of 'cytospill' or 'correlation'.")
73 |
74 |
75 | def compensate_array(
76 | flat_array: Array, spillover_matrix: Array, original_shape: tp.Tuple[int, int, int]
77 | ) -> Array:
78 | new_shape = original_shape[1:] + (original_shape[0],)
79 | _nnls = partial(nnls, spillover_matrix)
80 | res = parmap.map(_nnls, flat_array)
81 | comp = np.asarray([x[0] for x in res])
82 | return np.moveaxis(
83 | (comp).reshape(new_shape),
84 | -1,
85 | 0,
86 | )
87 |
88 |
89 | def compensate_image_stack(roi: ROI, normalize: bool = True) -> Array:
90 | from imc.segmentation import normalize as _normf
91 |
92 | stack = roi.stack
93 | if roi.channel_exclude is not None:
94 | if roi.channel_exclude.any():
95 | stack = stack[~roi.channel_exclude]
96 | if normalize:
97 | stack = _normf(stack)
98 | flat_array = stack_to_flat_array(stack)
99 |
100 | labels = roi.channel_labels[~roi.channel_exclude.values]
101 | metals = labels.str.extract(r".*\((.*)\)")[0] + "Di"
102 | df = pd.DataFrame(flat_array, columns=metals) # .iloc[:, 4:-4]
103 | spill = get_spillover_matrix(df, subsample_n=2000)
104 | comp_stack = compensate_array(flat_array, spill, roi.stack.shape)
105 | return comp_stack
106 |
--------------------------------------------------------------------------------
/imc/ops/domain.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions for image annotations.
3 |
4 | """
5 |
6 | import os
7 | import json
8 | import typing as tp
9 | from collections import Counter
10 |
11 | import numpy as np
12 | import pandas as pd
13 | import matplotlib
14 | import matplotlib.pyplot as plt
15 | import seaborn as sns
16 | from tqdm import tqdm
17 |
18 | import imc.data_models.roi as _roi
19 | from imc.types import DataFrame, Array, Path
20 |
21 |
22 | def label_domains(
23 | rois: tp.Sequence[_roi.ROI],
24 | output_dir: Path,
25 | export: bool = True,
26 | domains: tp.Sequence[str] = ["T", "S", "A", "L", "V", "E"],
27 | **kwargs,
28 | ) -> None:
29 | """
30 | Draw shapes outying topological domains in tissue.
31 | This step is done manually using the `labelme` program.
32 |
33 | $ labelme --autosave --labels metadata/labelme_labels.txt
34 | """
35 | if export:
36 | export_images_for_topological_labeling(rois, output_dir, **kwargs)
37 |
38 | labels_f = (output_dir).mkdir() / "labelme_labels.txt"
39 | with open(labels_f, "w") as handle:
40 | handle.write("\n".join(domains))
41 | os.system(f"labelme --autosave --labels {labels_f} {output_dir}")
42 |
43 |
44 | def export_images_for_topological_labeling(
45 | rois: tp.Sequence[_roi.ROI],
46 | output_dir: Path,
47 | channels: tp.Sequence[str] = ["mean"],
48 | overwrite: bool = False,
49 | ) -> None:
50 | """
51 | Export PNGs for labeling with `labelme`.
52 | """
53 | for roi in tqdm(rois):
54 | f = output_dir / roi.name + ".jpg"
55 | if not overwrite and f.exists():
56 | continue
57 | array = roi._get_channels(channels, minmax=True, equalize=True)[1].squeeze()
58 | if array.ndim > 2:
59 | array = np.moveaxis(array, 0, -1)
60 | matplotlib.image.imsave(f, array)
61 |
62 |
63 | def collect_domains(
64 | input_dir: Path, rois: tp.Sequence[_roi.ROI] = None, output_file: Path = None
65 | ) -> tp.Dict[str, tp.Dict]:
66 | if rois is not None:
67 | roi_names = [r.name for r in rois]
68 |
69 | filenames = list(input_dir.glob("*.json"))
70 | if rois is not None:
71 | filenames = [f for f in filenames if f.stem in roi_names]
72 |
73 | topo_annots = dict()
74 | for filename in tqdm(filenames):
75 | annot_f = filename.replace_(".jpg", ".json")
76 | if not annot_f.exists():
77 | continue
78 | with open(annot_f, "r") as handle:
79 | annot = json.load(handle)
80 | if annot["shapes"]:
81 | topo_annots[filename.stem] = annot["shapes"]
82 | if output_file is not None:
83 | with open(output_file, "w") as handle:
84 | json.dump(topo_annots, handle, indent=4)
85 | return topo_annots
86 |
87 |
88 | def illustrate_domains(
89 | topo_annots: tp.Dict[str, tp.Dict],
90 | rois: tp.Sequence[_roi.ROI],
91 | output_dir: Path,
92 | channels: tp.Sequence[str],
93 | domain_exclude: tp.Sequence[str] = None,
94 | cleanup: bool = False,
95 | cmap_str: str = "Set3",
96 | ) -> None:
97 | """
98 | Illustrate annotated topological domains of each ROI.
99 | """
100 | from imc.utils import polygon_to_mask
101 | from imc.graphics import legend_without_duplicate_labels
102 | from shapely.geometry import Polygon
103 |
104 | if domain_exclude is None:
105 | domain_exclude = []
106 |
107 | output_dir.mkdir()
108 |
109 | labels = list(set(geom["label"] for n, j in topo_annots.items() for geom in j))
110 | label_color = dict(zip(labels, sns.color_palette(cmap_str)))
111 | label_order = dict(zip(labels, range(1, len(labels) + 1)))
112 | cmap = plt.get_cmap(cmap_str)(range(len(labels) + 1))
113 | cmap = np.vstack([[0, 0, 0, 1], cmap])
114 |
115 | for roi_name in tqdm(topo_annots):
116 | roi = [r for r in rois if r.name == roi_name][0]
117 | shapes = topo_annots[roi_name]
118 |
119 | # re-order shapes so that largest are first
120 | areas = [
121 | polygon_to_mask(shape["points"], roi.shape[1:][::-1]).sum()
122 | for shape in shapes
123 | ]
124 | shapes = np.asarray(shapes)[np.argsort(areas)[::-1]].tolist()
125 |
126 | annot_mask = np.zeros(roi.shape[1:])
127 | for shape in shapes:
128 | if shape["label"] in domain_exclude:
129 | continue
130 | region = polygon_to_mask(shape["points"], roi.shape[1:][::-1])
131 | annot_mask[region > 0] = label_order[shape["label"]]
132 |
133 | ar = roi.shape[1] / roi.shape[2]
134 |
135 | fig, axes = plt.subplots(
136 | 1, 2, figsize=(2 * 4, 4 * ar), gridspec_kw=dict(wspace=0, hspace=0)
137 | )
138 | extra_txt = (
139 | ""
140 | if getattr(roi, "attributes", None) is None
141 | else "; ".join([str(getattr(roi, attr)) for attr in roi.attributes])
142 | )
143 |
144 | axes[0].set(title=roi.name + "\n" + extra_txt)
145 | roi.plot_channels(channels, axes=[axes[0]], merged=True)
146 |
147 | shape_types: Counter[str] = Counter()
148 | for shape in shapes:
149 | label: str = shape["label"]
150 | if label in domain_exclude:
151 | continue
152 | shape_types[label] += 1
153 | c = Polygon(shape["points"]).centroid
154 | axes[1].text(
155 | c.x,
156 | c.y,
157 | s=f"{label}{shape_types[label]}",
158 | ha="center",
159 | va="center",
160 | )
161 | axes[0].plot(
162 | *np.asarray(shape["points"] + [shape["points"][0]]).T,
163 | label=label,
164 | color=cmap[label_order[label]],
165 | )
166 |
167 | axes[1].imshow(
168 | annot_mask,
169 | cmap=matplotlib.colors.ListedColormap(cmap),
170 | vmax=len(label_color) + 1,
171 | interpolation="none",
172 | )
173 | axes[1].set(title="Manual annotations")
174 | legend_without_duplicate_labels(
175 | axes[0], title="Domain:", bbox_to_anchor=(-0.1, 1), loc="upper right"
176 | )
177 | for ax in axes:
178 | ax.axis("off")
179 | fig.savefig(
180 | output_dir / roi.name + ".annotations.pdf",
181 | dpi=300,
182 | bbox_inches="tight",
183 | )
184 | plt.close(fig)
185 |
186 | cmd = f"""pdftk
187 | {output_dir}/*.annotations.pdf
188 | cat
189 | output
190 | {output_dir}/topological_domain_annotations.pdf"""
191 | os.system(cmd.replace("\n", " "))
192 |
193 | if cleanup:
194 | files = output_dir.glob("*.annotations.pdf")
195 | for file in files:
196 | file.unlink()
197 |
198 |
199 | def get_domains_per_cell(
200 | topo_annots: tp.Dict[str, tp.Dict],
201 | rois: tp.Sequence[_roi.ROI],
202 | exclude_domains: tp.Sequence[str] = None,
203 | remaining_domain: tp.Union[str, tp.Dict[str, str]] = "background",
204 | resolution: str = "largest",
205 | nest_domains: bool = True,
206 | ) -> DataFrame:
207 | """
208 | Generate annotation of topological domain each cell is contained in
209 | based on manual annotated masks.
210 |
211 | Parameters
212 | ----------
213 | topo_annots: dict
214 | Dictionary of annotations for each ROI.
215 | rois: list
216 | List of ROI objects.
217 | exclude_domains: list[str]
218 | Domains to ignore
219 | remaining_domain: str | dict[str, str]
220 | Name of domain to fill in for cells that do not fall under any domain annotation.
221 | If given a string, it will simply use that.
222 | If given a dict, the filled domain will be the value of the key which exists in the image.
223 | E.g. Annotating tumor/stroma domains. If an image has only domains of type 'Tumor',
224 | given `remaining_domain` == {'Tumor': 'Stroma', 'Stroma': 'Tumor'}, the remaining cells
225 | will be annotated with 'Stroma'. In an image annotated only with 'Stroma' domains,
226 | remaining cells will be annotated with 'Tumor' domains.
227 | resolution: str
228 | If `remaining_domain` is a dict, there may be more than one domain present in the image.
229 | A resolution method is thus needed to select which domain will be filled for the remaining cells.
230 | - 'largest' will choose as key of `remaining_domain` the largest annotated domain class.
231 | - 'unique' will be strict and only fill in if there is a unique domain.
232 | """
233 | from imc.utils import polygon_to_mask
234 |
235 | if exclude_domains is None:
236 | exclude_domains = []
237 |
238 | _full_assigns = list()
239 | for roi_name, shapes in tqdm(topo_annots.items()):
240 | roi = [r for r in rois if r.name == roi_name][0]
241 | mask = roi.mask
242 | cells = np.unique(mask)[1:]
243 | td_count: tp.Counter[str] = Counter()
244 | regions = list()
245 | _assigns = list()
246 | for shape in shapes:
247 | label = shape["label"]
248 | points = shape["points"]
249 | if label in exclude_domains:
250 | continue
251 | td_count[label] += 1
252 | points += [points[0]]
253 | region = polygon_to_mask(points, roi.shape[1:][::-1])
254 | regions.append(region)
255 | assign = (
256 | pd.Series(np.unique(mask[(mask > 0) & region]), name="obj_id")
257 | .to_frame()
258 | .assign(
259 | roi=roi.name,
260 | sample=roi.sample.name,
261 | domain_id=f"{label}{td_count[label]}",
262 | )
263 | )
264 | _assigns.append(assign)
265 |
266 | ## if remaining_domain explicitely annotated, skip
267 | if isinstance(remaining_domain, str):
268 | if remaining_domain in td_count:
269 | print(
270 | f"ROI '{roi.name}' has been manually annotated"
271 | " with remaining domains."
272 | )
273 | _full_assigns += _assigns
274 | continue
275 |
276 | ## add a domain for cells not annotated
277 | remain = ~np.asarray(regions).sum(0).astype(bool)
278 | existing = np.sort(pd.concat(_assigns)["obj_id"].unique())
279 | remain = remain & (~np.isin(mask, existing))
280 | if remain.sum() == 0:
281 | _full_assigns += _assigns
282 | continue
283 |
284 | if isinstance(remaining_domain, str):
285 | ### if given a string just make that the domain for unnanotated cells
286 | domain = remaining_domain
287 | # print(f"ROI '{roi.name}' will be annotated with '{domain}' by default.")
288 |
289 | elif isinstance(remaining_domain, dict):
290 | ### if given a dict, dependent on the existing domains choose what to label the remaining
291 | ### useful for when labeling e.g. tumor/stroma and different images may be labeled with only one of them
292 | existing_domains = pd.concat(_assigns)["domain_id"].value_counts()
293 | existing_domains.index = existing_domains.index.str.replace(
294 | r"\d+", "", regex=True
295 | )
296 | repl = set(v for k, v in remaining_domain.items() if k in existing_domains)
297 | if resolution == "largest":
298 | domain = remaining_domain[existing_domains.idxmax()]
299 | elif resolution == "unique":
300 | if len(repl) == 1:
301 | domain = repl.pop()
302 | else:
303 | raise ValueError(
304 | "More than one domain was detected and it is"
305 | " unclear how to annotate the remaining cells "
306 | f"with the mapping: {remaining_domain}"
307 | )
308 |
309 | assign = (
310 | pd.Series(np.unique(mask[remain]), name="obj_id")
311 | .drop(0, errors="ignore")
312 | .to_frame()
313 | .assign(
314 | roi=roi.name,
315 | sample=roi.sample.name,
316 | domain_id=domain + "1",
317 | )
318 | )
319 | _assigns.append(assign)
320 | _full_assigns += _assigns
321 |
322 | assigns = pd.concat(_full_assigns)
323 | assigns["topological_domain"] = assigns["domain_id"].str.replace(
324 | r"\d", "", regex=True
325 | )
326 |
327 | # reduce duplicated annotations but for cells annotated with background, make that the primary annotation
328 | id_cols = ["sample", "roi", "obj_id"]
329 | assigns = (
330 | assigns.groupby(id_cols).apply(
331 | lambda x: x
332 | if (x.shape[0] == 1)
333 | else x.loc[x["topological_domain"] == remaining_domain, :]
334 | if (x["topological_domain"] == remaining_domain).any()
335 | else x
336 | )
337 | # .drop(id_cols, axis=1)
338 | .reset_index(level=-1, drop=True)
339 | ).set_index(id_cols)
340 |
341 | # If more than one domain per cell:
342 | if nest_domains:
343 | # Keep them all
344 | assigns = assigns.groupby(id_cols)["domain_id"].apply("-".join).to_frame()
345 | assigns["topological_domain"] = assigns["domain_id"].str.replace(
346 | r"\d", "", regex=True
347 | )
348 | else:
349 | # make sure there are no cells with more than one domain that is background
350 | tpc = assigns.groupby(id_cols)["domain_id"].nunique()
351 | cells = tpc.index
352 | assert not assigns.loc[cells[tpc > 1]].isin([remaining_domain]).any().any()
353 |
354 | assigns = (
355 | assigns.reset_index()
356 | .drop_duplicates(subset=id_cols)
357 | .set_index(id_cols)
358 | .sort_index()
359 | )
360 |
361 | # expand domains
362 | for domain in assigns["topological_domain"].unique():
363 | assigns[domain] = assigns["topological_domain"] == domain
364 |
365 | return assigns
366 |
367 |
368 | @tp.overload
369 | def get_domain_areas(
370 | topo_annots: tp.Dict[str, tp.Dict],
371 | rois: tp.Sequence[_roi.ROI],
372 | per_domain: tp.Literal[False],
373 | ) -> tp.Dict[Path, float]:
374 | ...
375 |
376 |
377 | @tp.overload
378 | def get_domain_areas(
379 | topo_annots: tp.Dict[str, tp.Dict],
380 | rois: tp.Sequence[_roi.ROI],
381 | per_domain: tp.Literal[True],
382 | ) -> DataFrame:
383 | ...
384 |
385 |
386 | def get_domain_areas(
387 | topo_annots: tp.Dict[str, tp.Dict],
388 | rois: tp.Sequence[_roi.ROI] = None,
389 | per_domain: bool = False,
390 | ) -> tp.Union[tp.Dict[Path, float], DataFrame]:
391 | """
392 | Get area of airways per image in microns.
393 | """
394 | from shapely.geometry import Polygon
395 |
396 | mpp = 1 # scale
397 | if rois is not None:
398 | roi_names = [r.name for r in rois]
399 | topo_annots = {k: v for k, v in topo_annots.items() if k in roi_names}
400 |
401 | _areas = list()
402 | for roi_name, shapes in tqdm(topo_annots.items()):
403 | count: tp.Counter[str] = Counter()
404 | for shape in shapes:
405 | label = shape["label"]
406 | count[label] += 1
407 | a = Polygon(shape["points"]).area
408 | _areas.append([roi_name, label + str(count[label]), a * mpp])
409 |
410 | areas = (
411 | pd.DataFrame(_areas)
412 | .rename(columns={0: "roi", 1: "domain_obj", 2: "area"})
413 | .set_index("roi")
414 | )
415 | areas["topological_domain"] = areas["domain_obj"].str.replace(r"\d", "", regex=True)
416 | if not per_domain:
417 | areas = areas.groupby("roi")["area"].sum().to_dict()
418 |
419 | return areas
420 |
421 |
422 | def get_domain_masks(
423 | topo_annots: tp.Dict,
424 | rois: tp.Sequence[_roi.ROI],
425 | exclude_domains: tp.Sequence[str] = None,
426 | fill_remaining: str = None,
427 | per_domain: bool = False,
428 | ) -> Array:
429 | _x = list()
430 | for roi in rois:
431 | x = get_domain_mask(
432 | topo_annots[roi.name],
433 | roi,
434 | exclude_domains=exclude_domains,
435 | fill_remaining=fill_remaining,
436 | per_domain=per_domain,
437 | )
438 | _x.append(x)
439 | x = np.asarray(_x)
440 | return x
441 |
442 |
443 | def get_domain_mask(
444 | topo_annot: tp.Dict,
445 | roi: _roi.ROI,
446 | exclude_domains: tp.Sequence[str] = None,
447 | fill_remaining: str = None,
448 | per_domain: bool = False,
449 | ) -> Array:
450 | """ """
451 | import tifffile
452 | from imc.utils import polygon_to_mask
453 |
454 | if exclude_domains is None:
455 | exclude_domains = []
456 |
457 | _, h, w = roi.shape
458 | masks = list()
459 | region_types = list()
460 | region_names = list()
461 | count: tp.Counter[str] = Counter()
462 | for shape in topo_annot:
463 | shape["points"] += [shape["points"][0]]
464 | region = polygon_to_mask(shape["points"], (w, h))
465 | label = shape["label"]
466 | count[label] += 1
467 | masks.append(region)
468 | region_types.append(label)
469 | region_names.append(label + str(count[label]))
470 |
471 | for_mask = np.asarray(
472 | [m for ll, m in zip(region_types, masks) if ll not in exclude_domains]
473 | ).sum(0)
474 | if fill_remaining is not None:
475 | masks += [for_mask == 0]
476 | region_types += [fill_remaining]
477 | for_mask[for_mask == 0] = -1
478 | exc_mask = np.asarray(
479 | [m for ll, m in zip(region_types, masks) if ll in exclude_domains]
480 | ).sum(0)
481 | mask: Array = (
482 | ((for_mask != 0) & ~(exc_mask != 0))
483 | if isinstance(exc_mask, np.ndarray)
484 | else for_mask
485 | ).astype(bool)
486 |
487 | if per_domain:
488 | nmask = np.empty_like(mask, dtype="object")
489 | for r, ll in zip(masks, region_types):
490 | if ll not in exclude_domains:
491 | nmask[mask & r] = ll
492 | mask = np.ma.masked_array(nmask, mask=nmask == None)
493 |
494 | return mask
495 |
--------------------------------------------------------------------------------
/imc/ops/mixture.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions for mixtures of signal.
3 | """
4 |
5 | import typing as tp
6 |
7 | import numpy as np
8 | import pandas as pd
9 | import matplotlib.pyplot as plt
10 | import seaborn as sns
11 | from tqdm import tqdm
12 |
13 | from imc.types import DataFrame, Series, Array
14 |
15 |
16 | @tp.overload
17 | def get_best_mixture_number(
18 | x: Series,
19 | min_mix: int,
20 | max_mix: int,
21 | subsample_if_needed: bool,
22 | n_iters: int,
23 | metrics: tp.Sequence[str],
24 | red_func: str,
25 | return_prediction: tp.Literal[False],
26 | ) -> int:
27 | ...
28 |
29 |
30 | @tp.overload
31 | def get_best_mixture_number(
32 | x: Series,
33 | min_mix: int,
34 | max_mix: int,
35 | subsample_if_needed: bool,
36 | n_iters: int,
37 | metrics: tp.Sequence[str],
38 | red_func: str,
39 | return_prediction: tp.Literal[True],
40 | ) -> tp.Tuple[int, Array]:
41 | ...
42 |
43 |
44 | def get_best_mixture_number(
45 | x: Series,
46 | min_mix: int = 2,
47 | max_mix: int = 6,
48 | subsample_if_needed: bool = True,
49 | n_iters: int = 3,
50 | metrics: tp.Sequence[str] = [
51 | "silhouette_score",
52 | "calinski_harabasz_score",
53 | "davies_bouldin_score",
54 | ],
55 | red_func: str = "mean",
56 | return_prediction: bool = False,
57 | ) -> tp.Union[int, tp.Tuple[int, Array]]:
58 | from sklearn.mixture import GaussianMixture
59 | import sklearn.metrics
60 |
61 | def get_means(num: Series, pred: tp.Union[Series, Array]) -> Series:
62 | return num.groupby(pred).mean().sort_values()
63 |
64 | def replace_pred(x: Series, y: tp.Union[Series, Array]) -> Series:
65 | means = get_means(x, y)
66 | repl = dict(zip(means.index, range(len(means))))
67 | y2 = pd.Series(y, index=x.index).replace(repl)
68 | new_means = get_means(x, y2.values)
69 | assert all(new_means.index == range(len(new_means)))
70 | return y2
71 |
72 | xx = x.sample(n=10_000) if subsample_if_needed and x.shape[0] > 10_000 else x
73 |
74 | if isinstance(xx, pd.Series):
75 | xx = xx.values.reshape((-1, 1))
76 |
77 | mi = range(min_mix, max_mix)
78 | mixes = pd.DataFrame(index=metrics, columns=mi)
79 | for i in tqdm(mi):
80 | mix = GaussianMixture(i)
81 | # mix.fit_predict(x)
82 | for f in metrics:
83 | func = getattr(sklearn.metrics, "davies_bouldin_score")
84 | mixes.loc[f, i] = np.mean(
85 | [func(xx, mix.fit_predict(xx)) for _ in range(n_iters)]
86 | )
87 | # mixes[i] = np.mean([silhouette_score(x, mix.fit_predict(x)) for _ in range(iters)])
88 | mixes.loc["davies_bouldin_score"] = 1 / mixes.loc["davies_bouldin_score"]
89 |
90 | # return best
91 | # return np.argmax(mixes.values()) + min_mix # type: ignore
92 | best = mixes.columns[int(getattr(np, red_func)(mixes.apply(np.argmax, 1)))]
93 | if not return_prediction:
94 | return best # type: ignore
95 |
96 | # now train with full data
97 | mix = GaussianMixture(best)
98 | return (best, replace_pred(x, mix.fit_predict(x.values.reshape((-1, 1)))))
99 |
100 |
101 | def get_threshold_from_gaussian_mixture(
102 | x: Series, y: Series = None, n_components: int = 2
103 | ) -> Array:
104 | def get_means(num: Series, pred: tp.Union[Series, Array]) -> Series:
105 | return num.groupby(pred).mean().sort_values()
106 |
107 | def replace_pred(x: Series, y: tp.Union[Series, Array]) -> Series:
108 | means = get_means(x, y)
109 | repl = dict(zip(means.index, range(len(means))))
110 | y2 = pd.Series(y, index=x.index).replace(repl)
111 | new_means = get_means(x, y2.values)
112 | assert all(new_means.index == range(len(new_means)))
113 | return y2
114 |
115 | x = x.sort_values()
116 |
117 | if y is None:
118 | from sklearn.mixture import GaussianMixture # type: ignore
119 |
120 | mix = GaussianMixture(n_components=n_components)
121 | xx = x.values.reshape((-1, 1))
122 | y = mix.fit_predict(xx)
123 | else:
124 | y = y.reindex(x.index).values
125 | y = replace_pred(x, y).values
126 | thresh = x.loc[((y[:-1] < y[1::])).tolist() + [False]].reset_index(drop=True)
127 | assert len(thresh) == (n_components - 1)
128 | return thresh
129 |
130 |
131 | def get_probability_of_gaussian_mixture(
132 | x: Series, n_components: int = 2, population=-1
133 | ) -> Series:
134 | from sklearn.mixture import GaussianMixture # type: ignore
135 |
136 | x = x.sort_values()
137 | mix = GaussianMixture(n_components=n_components)
138 | xx = x.values.reshape((-1, 1))
139 | mix.fit(xx)
140 | means = pd.Series(mix.means_.squeeze()).sort_values()
141 | # assert (means.index == range(n_components)).all()
142 | # order components by mean
143 | p = mix.predict_proba(xx)[:, means.index]
144 | # take requested population
145 | p = p[:, population]
146 | return pd.Series(p, index=x.index).sort_index()
147 |
148 |
149 | def fit_gaussian_mixture(
150 | x: tp.Union[Series, DataFrame], n_mixtures: tp.Union[int, tp.List[int]] = None
151 | ) -> tp.Union[Series, DataFrame]:
152 | # TODO: paralelize
153 | from sklearn.mixture import GaussianMixture
154 |
155 | if isinstance(x, pd.Series):
156 | x = x.to_frame()
157 | if isinstance(n_mixtures, int):
158 | n_mixtures = [n_mixtures] * x.shape[1]
159 | expr_thresh = x.astype(int)
160 |
161 | def get_means(num, pred):
162 | return num.groupby(pred).mean().sort_values()
163 |
164 | def replace_pred(x, y):
165 | means = get_means(x, y)
166 | repl = dict(zip(range(len(means)), means.index))
167 | y2 = y.replace(repl)
168 | new_means = get_means(x, y2)
169 | assert all(new_means.index == range(len(new_means)))
170 | return y2
171 |
172 | for i, ch in enumerate(x.columns):
173 | if n_mixtures is None:
174 | n_best = get_best_mixture_number(x, return_prediction=False) # type: ignore[call-tp.overload]
175 | mix = GaussianMixture(n_best)
176 | else:
177 | mix = GaussianMixture(n_mixtures[i])
178 | _x = x.loc[:, ch]
179 | x2 = _x.values.reshape((-1, 1))
180 | mix.fit(x2)
181 | y = pd.Series(mix.predict(x2), index=x.index, name="class")
182 | expr_thresh[ch] = replace_pred(_x, y)
183 | return expr_thresh.squeeze()
184 |
185 |
186 | def get_population(
187 | ser: Series, population: int = -1, plot=False, ax=None, **kwargs
188 | ) -> pd.Index:
189 | if population == -1:
190 | operator = np.greater_equal
191 | elif population == 0:
192 | operator = np.less_equal
193 | else:
194 | raise ValueError("Chosen population must be '0' (lowest) or '-1' (highest).")
195 |
196 | # Make sure index is unique
197 | if not ser.index.is_monotonic:
198 | ser = ser.reset_index(drop=True)
199 |
200 | # Work only in positive space
201 | xx = ser # + abs(ser.min())
202 | done = False
203 | while not done:
204 | try:
205 | n, y = get_best_mixture_number(xx, return_prediction=True, **kwargs)
206 | except ValueError: # "Number of labels is 1. Valid values are 2 to n_samples - 1 (inclusive)"
207 | continue
208 | done = True
209 | print(f"Chosen mixture of {n} distributions.")
210 | done = False
211 | while not done:
212 | try:
213 | thresh = get_threshold_from_gaussian_mixture(xx, n_components=n)
214 | except AssertionError:
215 | continue
216 | done = True
217 |
218 | sel = operator(xx, thresh.iloc[population]).values
219 |
220 | if plot:
221 | ax = plt.gca() if ax is None else ax
222 | sns.distplot(xx, kde=False, ax=ax)
223 | sns.distplot(xx.loc[sel], kde=False, ax=ax)
224 | [ax.axvline(q, linestyle="--", color="grey") for q in thresh]
225 | ax = None
226 | return sel
227 |
--------------------------------------------------------------------------------
/imc/ops/quant.py:
--------------------------------------------------------------------------------
1 | """
2 | Operations of signal quantification.
3 | """
4 |
5 | from __future__ import annotations
6 | import typing as tp
7 |
8 | import numpy as np
9 | import pandas as pd
10 | import parmap
11 |
12 | import skimage.measure
13 | from skimage.segmentation import clear_border
14 |
15 | from imc.data_models import roi as _roi
16 | from imc.types import DataFrame, Array, Path
17 | from imc.utils import read_image_from_file, minmax_scale
18 |
19 |
20 | def quantify_cell_intensity(
21 | stack: tp.Union[Array, Path],
22 | mask: tp.Union[Array, Path],
23 | red_func: str = "mean",
24 | border_objs: bool = False,
25 | equalize: bool = True,
26 | scale: bool = False,
27 | channel_include: Array = None,
28 | channel_exclude: Array = None,
29 | ) -> DataFrame:
30 | """
31 | Measure the intensity of each channel in each cell
32 |
33 | Parameters
34 | ----------
35 | stack: tp.Union[Array, Path]
36 | Image to quantify.
37 | mask: tp.Union[Array, Path]
38 | Mask to quantify.
39 | red_func: str
40 | Function to reduce pixels to object borders. Defaults to 'mean'.
41 | border_objs: bool
42 | Whether to quantify objects touching image border. Defaults to False.
43 | channel_include: :class:`~np.ndarray`
44 | Boolean array for channels to include.
45 | channel_exclude: :class:`~np.ndarray`
46 | Boolean array for channels to exclude.
47 | """
48 | from skimage.exposure import equalize_hist as eq
49 |
50 | if isinstance(stack, Path):
51 | stack = read_image_from_file(stack)
52 | if isinstance(mask, Path):
53 | mask = read_image_from_file(mask)
54 | if not border_objs:
55 | mask = clear_border(mask)
56 |
57 | if equalize:
58 | # stack = np.asarray([eq(x) for x in stack])
59 | _stack = list()
60 | for x in stack:
61 | p = np.percentile(x, 98)
62 | x[x > p] = p
63 | _stack.append(x)
64 | stack = np.asarray(_stack)
65 | if scale:
66 | stack = np.asarray([minmax_scale(x) for x in stack])
67 |
68 | cells = [c for c in np.unique(mask) if c != 0]
69 | n_channels = stack.shape[0]
70 |
71 | if channel_include is None:
72 | channel_include = np.asarray([True] * n_channels)
73 | if channel_exclude is None:
74 | channel_exclude = np.asarray([False] * n_channels)
75 |
76 | res = np.zeros((len(cells), n_channels), dtype=int if red_func == "sum" else float)
77 | for channel in np.arange(stack.shape[0])[channel_include & ~channel_exclude]:
78 | res[:, channel] = [
79 | getattr(x.intensity_image[x.image], red_func)()
80 | for x in skimage.measure.regionprops(mask, stack[channel])
81 | ]
82 | return pd.DataFrame(res, index=cells).rename_axis(index="obj_id")
83 |
84 |
85 | def quantify_cell_morphology(
86 | mask: tp.Union[Array, Path],
87 | attributes: tp.Sequence[str] = [
88 | "area",
89 | "perimeter",
90 | "minor_axis_length",
91 | "major_axis_length",
92 | # In some images I get ValueError for 'minor_axis_length'
93 | # just like https://github.com/scikit-image/scikit-image/issues/2625
94 | # 'orientation', # should be ~random for non-optical imaging, so I'm not including it
95 | "eccentricity",
96 | "solidity",
97 | "centroid",
98 | ],
99 | border_objs: bool = False,
100 | ) -> DataFrame:
101 | if isinstance(mask, Path):
102 | mask = read_image_from_file(mask)
103 | if not border_objs:
104 | mask = clear_border(mask)
105 |
106 | morph = (
107 | pd.DataFrame(
108 | skimage.measure.regionprops_table(mask, properties=attributes),
109 | index=[c for c in np.unique(mask) if c != 0],
110 | )
111 | .rename_axis(index="obj_id")
112 | .rename(columns={"centroid-0": "X_centroid", "centroid-1": "Y_centroid"})
113 | )
114 | if ("minor_axis_length" in attributes) and ("major_axis_length" in attributes):
115 | morph["ratio_axis_length"] = (
116 | morph["major_axis_length"] / morph["minor_axis_length"]
117 | )
118 | return morph
119 |
120 |
121 | def _quantify_cell_intensity__roi(roi: _roi.ROI, **kwargs) -> DataFrame:
122 | assignment = dict(roi=roi.name)
123 | if roi.sample is not None:
124 | assignment["sample"] = roi.sample.name
125 | return roi.quantify_cell_intensity(**kwargs).assign(**assignment)
126 |
127 |
128 | def _quantify_cell_morphology__roi(roi: _roi.ROI, **kwargs) -> DataFrame:
129 | assignment = dict(roi=roi.name)
130 | if roi.sample is not None:
131 | assignment["sample"] = roi.sample.name
132 | return roi.quantify_cell_morphology(**kwargs).assign(**assignment)
133 |
134 |
135 | def _correlate_channels__roi(roi: _roi.ROI, labels: str = "channel_names") -> DataFrame:
136 | xcorr = np.corrcoef(roi.stack.reshape((roi.channel_number, -1)))
137 | np.fill_diagonal(xcorr, 0)
138 | labs = getattr(roi, labels)
139 | return pd.DataFrame(xcorr, index=labs, columns=labs)
140 |
141 |
142 | # def _get_adjacency_graph__roi(roi: _roi.ROI, **kwargs) -> DataFrame:
143 | # output_prefix = roi.sample.root_dir / "single_cell" / roi.name
144 | # return get_adjacency_graph(roi.stack, roi.mask, roi.clusters, output_prefix, **kwargs)
145 |
146 |
147 | def quantify_cell_intensity_rois(
148 | rois: tp.Sequence[_roi.ROI],
149 | **kwargs,
150 | ) -> DataFrame:
151 | """
152 | Measure the intensity of each channel in each single cell.
153 | """
154 | return pd.concat(
155 | parmap.map(_quantify_cell_intensity__roi, rois, pm_pbar=True, **kwargs)
156 | ).rename_axis(index="obj_id")
157 |
158 |
159 | def quantify_cell_morphology_rois(
160 | rois: tp.Sequence[_roi.ROI],
161 | **kwargs,
162 | ) -> DataFrame:
163 | """
164 | Measure the shape parameters of each single cell.
165 | """
166 | return pd.concat(
167 | parmap.map(_quantify_cell_morphology__roi, rois, pm_pbar=True, **kwargs)
168 | ).rename_axis(index="obj_id")
169 |
170 |
171 | def quantify_cells_rois(
172 | rois: tp.Sequence[_roi.ROI],
173 | layers: tp.Sequence[str],
174 | intensity: bool = True,
175 | intensity_kwargs: tp.Dict[str, tp.Any] = {},
176 | morphology: bool = True,
177 | morphology_kwargs: tp.Dict[str, tp.Any] = {},
178 | ) -> DataFrame:
179 | """
180 | Measure the intensity of each channel in each single cell.
181 | """
182 | quants = list()
183 | if intensity:
184 | quants.append(
185 | quantify_cell_intensity_rois(rois=rois, layers=layers, **intensity_kwargs)
186 | )
187 | if morphology:
188 | quants.append(
189 | quantify_cell_morphology_rois(rois=rois, layers=layers, **morphology_kwargs)
190 | )
191 |
192 | return (
193 | # todo: this will fail if there's different layers in intensity and morphology
194 | pd.concat(
195 | # ignore because a ROI is not obliged to have a Sample
196 | [quants[0].drop(["sample", "roi"], axis=1, errors="ignore"), quants[1]],
197 | axis=1,
198 | )
199 | if len(quants) > 1
200 | else quants[0]
201 | ).rename_axis(index="obj_id")
202 |
--------------------------------------------------------------------------------
/imc/ops/signal.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions for handling signal intensity in images.
3 | """
4 |
5 | import typing as tp
6 |
7 | import numpy as np
8 | import pandas as pd
9 | import matplotlib.patches as mpatches
10 | import matplotlib.pyplot as plt
11 | import seaborn as sns
12 | import parmap
13 | from skimage import exposure
14 |
15 | import imc.data_models.roi as _roi
16 | from imc.exceptions import cast
17 | from imc.types import DataFrame, Series, Array, Path
18 |
19 | FIG_KWS = dict(bbox_inches="tight", dpi=300)
20 |
21 |
22 | # def check_channel_axis_correlation(
23 | # arr: Array, channel_labels: tp.Sequence[str], output_prefix: Path
24 | # ) -> DataFrame:
25 | # # # Plot and regress
26 | # n, m = get_grid_dims(arr.shape[0])
27 | # fig, axis = plt.subplots(
28 | # m, n, figsize=(n * 4, m * 4), squeeze=False, sharex=True, sharey=True
29 | # )
30 |
31 | # res = list()
32 | # for channel in range(arr.shape[0]):
33 | # for axs in [0, 1]:
34 | # s = arr[channel].mean(axis=axs)
35 | # order = np.arange(s.shape[0])
36 | # model = LinearRegression()
37 | # model.fit(order[:, np.newaxis] / max(order), s)
38 | # res.append(
39 | # [
40 | # channel,
41 | # axs,
42 | # model.coef_[0],
43 | # model.intercept_,
44 | # pearsonr(order, s)[0],
45 | # ]
46 | # )
47 |
48 | # axis.flatten()[channel].plot(order, s)
49 | # axis.flatten()[channel].set_title(
50 | # f"{channel_labels[channel]}\nr[X] = {res[-2][-1]:.2f}; r[Y] = {res[-1][-1]:.2f}"
51 | # )
52 |
53 | # axis[int(m / 2), 0].set_ylabel("Mean signal along axis")
54 | # axis[-1, int(n / 2)].set_xlabel("Order along axis")
55 | # c = sns.color_palette("colorblind")
56 | # patches = [
57 | # mpatches.Patch(color=c[0], label="X"),
58 | # mpatches.Patch(color=c[1], label="Y"),
59 | # ]
60 | # axis[int(m / 2), -1].legend(
61 | # handles=patches,
62 | # bbox_to_anchor=(1.05, 1),
63 | # loc=2,
64 | # borderaxespad=0.0,
65 | # title="Axis",
66 | # )
67 | # fig.savefig(output_prefix + "channel-axis_correlation.svg", **FIG_KWS)
68 |
69 | # df = pd.DataFrame(res, columns=["channel", "axis", "coef", "intercept", "r"])
70 | # df["axis_label"] = df["axis"].replace(0, "X_centroid").replace(1, "Y_centroid")
71 | # df["channel_label"] = [x for x in channel_labels for _ in range(2)]
72 | # df["abs_r"] = df["r"].abs()
73 | # df.to_csv(output_prefix + "channel-axis_correlation.csv", index=False)
74 | # return df
75 |
76 |
77 | def fix_signal_axis_dependency(
78 | arr: Array, channel_labels: tp.Sequence[str], res: DataFrame, output_prefix: Path
79 | ) -> Array:
80 | # res = pd.read_csv(pjoin("processed", "case_b", "plots", "qc", roi + "_channel-axis_correlation.csv"))
81 | corr_d = np.empty_like(arr)
82 | for channel in range(arr.shape[0]):
83 | r = res.query(f"channel == {channel}")
84 | x = r.query("axis_label == 'X'")["coef"].squeeze()
85 | xinter = r.query("axis_label == 'X'")["intercept"].squeeze()
86 | y = r.query("axis_label == 'Y'")["coef"].squeeze()
87 | yinter = r.query("axis_label == 'Y'")["intercept"].squeeze()
88 | # to_reg = pd.DataFrame(arr[channel]).reset_index().melt(id_vars='index').rename(columns=dict(index="X", variable="Y"))
89 |
90 | order = np.arange(arr[channel].shape[0])
91 | dd = arr[channel]
92 | m = np.ones_like(dd)
93 | m = m * (order / max(order) * x) + (xinter)
94 | m = (m.T * (order / max(order) * y)).T + (yinter)
95 | ddfix = (dd - m) + dd.mean()
96 | corr_d[channel] = ddfix
97 |
98 | fig, axis = plt.subplots(1, 7, sharex=True, sharey=False, figsize=(7 * 3, 3 * 1))
99 | fig.suptitle(channel_labels[channel])
100 | axis[0].set_title("Original")
101 | axis[0].imshow(dd)
102 | axis[1].set_title("Original, equalized")
103 | axis[1].imshow(exposure.equalize_hist(dd))
104 | axis[2].set_title("Bias mask")
105 | axis[2].imshow(m)
106 | axis[3].set_title("Bias removed")
107 | axis[3].imshow(ddfix)
108 | axis[4].set_title("Bias removed, equalized")
109 | axis[4].imshow(exposure.equalize_hist(ddfix))
110 | axis[5].set_title("Channel bias")
111 | axis[5].plot(order, dd.mean(axis=0), label="Original", alpha=0.5)
112 | axis[5].plot(order, ddfix.mean(axis=0), label="Bias removed", alpha=0.5)
113 | axis[5].set_xlabel("Position along X axis")
114 | axis[5].set_ylabel("Signal along X axis")
115 | axis[5].legend()
116 | axis[6].set_title("Channel bias")
117 | axis[6].plot(order, dd.mean(axis=1), label="Original", alpha=0.5)
118 | axis[6].plot(order, ddfix.mean(axis=1), label="Bias removed", alpha=0.5)
119 | axis[6].set_xlabel("Position along Y axis")
120 | axis[6].set_ylabel("Signal along Y axis")
121 | axis[6].legend()
122 | for ax in axis[:-2]:
123 | ax.axis("off")
124 | fig.savefig(
125 | output_prefix
126 | + f"channel-axis_correlation_removal.{channel_labels[channel]}.demonstration.svg",
127 | **FIG_KWS,
128 | )
129 | plt.close("all")
130 | return corr_d
131 |
132 |
133 | def channel_stats(roi: _roi.ROI, channels: tp.Sequence[str] = None):
134 | from skimage.restoration import estimate_sigma
135 | from imc.utils import estimate_sigma
136 |
137 | if channels is None:
138 | channels = roi.channel_labels.tolist()
139 | stack = roi._get_channels(channels)[1]
140 | mask = roi.cell_mask == 0
141 | res = dict()
142 | res["wmeans"] = pd.Series(stack.mean(axis=(1, 2)), index=channels)
143 | res["wstds"] = pd.Series(stack.std(axis=(1, 2)), index=channels)
144 | res["cmeans"] = pd.Series(
145 | [np.ma.masked_array(stack[i], mask=mask).mean() for i in range(len(channels))],
146 | index=channels,
147 | )
148 | res["cstds"] = pd.Series(
149 | [np.ma.masked_array(stack[i], mask=mask).std() for i in range(len(channels))],
150 | index=channels,
151 | )
152 | res["emeans"] = pd.Series(
153 | [np.ma.masked_array(stack[i], mask=~mask).mean() for i in range(len(channels))],
154 | index=channels,
155 | )
156 | res["estds"] = pd.Series(
157 | [np.ma.masked_array(stack[i], mask=~mask).std() for i in range(len(channels))],
158 | index=channels,
159 | )
160 | # res["noises"] = pd.Series([estimate_noise(ch) for ch in stack], index=channels)
161 | res["sigmas"] = pd.Series(
162 | estimate_sigma(np.moveaxis(stack, 0, -1), multichannel=True), index=channels
163 | )
164 | return res
165 |
166 |
167 | def measure_channel_background(
168 | rois: tp.Sequence[_roi.ROI], plot: bool = True, output_prefix: Path = None
169 | ) -> Series:
170 | from imc.utils import align_channels_by_name
171 | from mpl_toolkits.axes_grid1 import make_axes_locatable
172 |
173 | if plot:
174 | assert (
175 | output_prefix is not None
176 | ), "If `plot` is True, `output_prefix` must be given."
177 |
178 | _channels = pd.DataFrame(
179 | {r.name: r.channel_labels[~r.channel_exclude.values] for r in rois}
180 | )
181 | channels = align_channels_by_name(_channels).dropna().iloc[:, 0].tolist()
182 | roi_names = [r.name for r in rois]
183 |
184 | res = parmap.map(channel_stats, rois, channels=channels, pm_pbar=True)
185 |
186 | wmeans = pd.DataFrame((x["wmeans"] for x in res), index=roi_names).T
187 | wstds = pd.DataFrame((x["wstds"] for x in res), index=roi_names).T
188 | wqv2s = np.sqrt(wstds / wmeans)
189 | cmeans = pd.DataFrame((x["cmeans"] for x in res), index=roi_names).T
190 | cstds = pd.DataFrame((x["cstds"] for x in res), index=roi_names).T
191 | cqv2s = np.sqrt(cstds / cmeans)
192 | emeans = pd.DataFrame((x["emeans"] for x in res), index=roi_names).T
193 | estds = pd.DataFrame((x["estds"] for x in res), index=roi_names).T
194 | eqv2s = np.sqrt(estds / emeans)
195 | fore_backg: DataFrame = np.log(cmeans / emeans)
196 | # fore_backg_disp = np.log1p(((cmeans / emeans) / (cmeans + emeans))).mean(1)
197 | noises = pd.DataFrame((x["noises"] for x in res), index=roi_names).T
198 | sigmas = pd.DataFrame((x["sigmas"] for x in res), index=roi_names).T
199 |
200 | # Join all metrics
201 | metrics = (
202 | wmeans.mean(1)
203 | .to_frame(name="image_mean")
204 | .join(wstds.mean(1).rename("image_std"))
205 | .join(wqv2s.mean(1).rename("image_qv2"))
206 | .join(cmeans.mean(1).rename("cell_mean"))
207 | .join(cstds.mean(1).rename("cell_std"))
208 | .join(cqv2s.mean(1).rename("cell_qv2"))
209 | .join(emeans.mean(1).rename("extra_mean"))
210 | .join(estds.mean(1).rename("extra_std"))
211 | .join(eqv2s.mean(1).rename("extra_qv2"))
212 | .join(fore_backg.mean(1).rename("fore_backg"))
213 | .join(noises.mean(1).rename("noise"))
214 | .join(sigmas.mean(1).rename("sigma"))
215 | ).rename_axis(index="channel")
216 | metrics_std = (metrics - metrics.min()) / (metrics.max() - metrics.min())
217 |
218 | if not plot:
219 | # Invert QV2
220 | sel = metrics_std.columns.str.contains("_qv2")
221 | metrics_std.loc[:, sel] = 1 - metrics_std.loc[:, sel]
222 | # TODO: better decision on which metrics matter
223 | return metrics_std.mean(1)
224 |
225 | output_prefix = cast(output_prefix)
226 | if not output_prefix.endswith("."):
227 | output_prefix += "."
228 |
229 | metrics.to_csv(output_prefix + "channel_background_noise_measurements.csv")
230 | metrics = pd.read_csv(
231 | output_prefix + "channel_background_noise_measurements.csv", index_col=0
232 | )
233 |
234 | # Plot
235 | fig, axes = plt.subplots(2, 3, figsize=(3 * 4.1, 2 * 4), sharex="col")
236 | axes[0, 0].set_title("Whole image")
237 | axes[0, 1].set_title("Cells")
238 | axes[0, 2].set_title("Extracellular")
239 | for i, (means, stds, qv2s) in enumerate(
240 | [(wmeans, wstds, wqv2s), (cmeans, cstds, cqv2s), (emeans, estds, eqv2s)]
241 | ):
242 | # plot mean vs variance
243 | mean = means.mean(1)
244 | std = stds.mean(1) ** 2
245 | qv2 = qv2s.mean(1)
246 | fb = fore_backg.mean(1)
247 |
248 | axes[0, i].set_xlabel("Mean")
249 | axes[0, i].set_ylabel("Variance")
250 | pts = axes[0, i].scatter(mean, std, c=fb)
251 | if i == 2:
252 | div = make_axes_locatable(axes[0, i])
253 | cax = div.append_axes("right", size="5%", pad=0.05)
254 | fig.colorbar(pts, cax=cax)
255 |
256 | for channel in means.index:
257 | lab = "left" if np.random.rand() > 0.5 else "right"
258 | axes[0, i].text(
259 | mean.loc[channel], std.loc[channel], channel, ha=lab, fontsize=4
260 | )
261 | v = max(mean.max().max(), std.max().max())
262 | axes[0, i].plot((0, v), (0, v), linestyle="--", color="grey")
263 | axes[0, i].loglog()
264 |
265 | # plot mean vs qv2
266 | axes[1, i].set_xlabel("Mean")
267 | axes[1, i].set_ylabel("Squared coefficient of variation")
268 | axes[1, i].scatter(mean, qv2, c=fb)
269 | for channel in means.index:
270 | lab = "left" if np.random.rand() > 0.5 else "right"
271 | axes[1, i].text(
272 | mean.loc[channel], qv2.loc[channel], channel, ha=lab, fontsize=4
273 | )
274 | axes[1, i].axhline(1, linestyle="--", color="grey")
275 | axes[1, i].set_xscale("log")
276 | # if qv2.min() > 0.01:
277 | # axes[1, i].set_yscale("log")
278 | fig.savefig(output_prefix + "channel_mean_variation_noise.svg", **FIG_KWS)
279 |
280 | fig, axes = plt.subplots(1, 2, figsize=(2 * 6.2, 4))
281 | p = fore_backg.mean(1).sort_values()
282 | r1 = p.rank()
283 | r2 = p.abs().rank()
284 | axes[0].scatter(r1, p)
285 | axes[1].scatter(r2, p.abs())
286 | for i in p.index:
287 | axes[0].text(r1.loc[i], p.loc[i], s=i, rotation=90, ha="center", va="bottom")
288 | axes[1].text(
289 | r2.loc[i], p.abs().loc[i], s=i, rotation=90, ha="center", va="bottom"
290 | )
291 | axes[1].set_yscale("log")
292 | axes[0].set_xlabel("Channel rank")
293 | axes[1].set_xlabel("Channel rank")
294 | axes[0].set_ylabel("Cellular/extracellular difference")
295 | axes[1].set_ylabel("Cellular/extracellular difference (abs)")
296 | axes[0].axhline(0, linestyle="--", color="grey")
297 | axes[1].axhline(0, linestyle="--", color="grey")
298 | fig.savefig(
299 | output_prefix + "channel_foreground_background_diff.rankplot.svg",
300 | **FIG_KWS,
301 | )
302 |
303 | grid = sns.clustermap(
304 | metrics_std,
305 | xticklabels=True,
306 | yticklabels=True,
307 | metric="correlation",
308 | cbar_kws=dict(label="Variable (min-max)"),
309 | )
310 | grid.fig.savefig(
311 | output_prefix + "channel_mean_variation_noise.clustermap.svg", **FIG_KWS
312 | )
313 |
314 | # Invert QV2
315 | sel = metrics_std.columns.str.contains("_qv2")
316 | metrics_std.loc[:, sel] = 1 - metrics_std.loc[:, sel]
317 | # TODO: better decision on which metrics matter
318 | return metrics_std.mean(1)
319 |
--------------------------------------------------------------------------------
/imc/py.typed:
--------------------------------------------------------------------------------
1 | # Marker file for PEP 561. This package uses inline types.
2 |
--------------------------------------------------------------------------------
/imc/scripts/illustrate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Illustrate IMC data.
5 | """
6 |
7 | import sys
8 | import typing as tp
9 |
10 | from tqdm import tqdm
11 | import matplotlib.pyplot as plt
12 | import scanpy as sc
13 |
14 | from imc import Project
15 | from imc.scripts import build_cli, find_tiffs, find_h5ad
16 |
17 | figkws = dict(dpi=300, bbox_inches="tight")
18 |
19 |
20 | def main(cli: tp.Sequence[str] = None) -> int:
21 | parser = build_cli("illustrate")
22 | args = parser.parse_args(cli)
23 |
24 | if args.tiffs is None:
25 | args.tiffs = find_tiffs()
26 | if len(args.tiffs) == 0:
27 | raise ValueError("Input files were not provided and could not be found!")
28 |
29 | if args.h5ad is None:
30 | args.h5ad = find_h5ad()
31 | if args.h5ad is None:
32 | if args.clusters:
33 | print(
34 | "No h5ad file was provided and it could not be found. "
35 | "Not illustrating clusters."
36 | )
37 | args.clusters = False
38 | if args.cell_types:
39 | print(
40 | "No h5ad file was provided and it could not be found. "
41 | "Not illustrating cell types."
42 | )
43 | args.cell_types = False
44 |
45 | print("Starting illustration step!")
46 |
47 | args.channels_include = (
48 | args.channels_include.split(",") if args.channels_include is not None else None
49 | )
50 | args.channels_exclude = args.channels_exclude.split(",")
51 | args.output_dir.mkdir()
52 |
53 | prj = Project.from_stacks(args.tiffs)
54 | if args.stacks:
55 | dir_ = (args.output_dir / "stacks").mkdir()
56 | print(f"Plotting full image stacks in directory '{dir_}'.")
57 | for roi in tqdm(prj.rois):
58 | f = dir_ / roi.name + ".full_stack.pdf"
59 | if f.exists() and not args.overwrite:
60 | continue
61 | fig = roi.plot_channels()
62 | fig.savefig(f, **figkws)
63 | plt.close(fig)
64 |
65 | if args.channels:
66 | dir_ = (args.output_dir / "channels").mkdir()
67 | print(f"Plotting channels for all images jointly in directory '{dir_}'.")
68 | for ch in tqdm(prj.rois[0].channel_labels):
69 | f = dir_ / ch + ".rois.pdf"
70 | if f.exists() and not args.overwrite:
71 | continue
72 | fig = prj.plot_channels([ch])
73 | fig.savefig(f, **figkws)
74 | plt.close(fig)
75 |
76 | id_cols = ["sample", "roi", "obj_id"]
77 | if args.clusters:
78 | dir_ = (args.output_dir / "clusters").mkdir()
79 | print(f"Plotting cluster illustrations in directory '{dir_}'.")
80 |
81 | a = sc.read(args.h5ad)
82 | clusters = a.obs.columns[a.obs.columns.str.contains("cluster_")]
83 | for cluster in tqdm(clusters):
84 | f = dir_ / f"clustering_illustrations.{cluster}.pdf"
85 | if f.exists() and not args.overwrite:
86 | continue
87 | # TODO: plot markers next to clusters, or overlay
88 | prj.set_clusters(a.obs.set_index(id_cols)[cluster].rename("cluster"))
89 | fig = prj.plot_cell_types()
90 | for ax in fig.axes[1:]:
91 | ax.legend_.set_visible(False)
92 | fig.savefig(f, **figkws)
93 | plt.close(fig)
94 |
95 | if args.cell_types:
96 | dir_ = (args.output_dir / "cell_type").mkdir()
97 | print(f"Plotting cell_type illustrations in directory '{dir_}'.")
98 |
99 | a = sc.read(args.h5ad)
100 | cts = a.obs.columns[a.obs.columns.str.contains("cluster_")].intersection(
101 | a.obs.columns[a.obs.columns.str.contains("_label")]
102 | )
103 | for ct in tqdm(cts):
104 | f = dir_ / f"cell_type_illustrations.{ct}.pdf"
105 | if f.exists() and not args.overwrite:
106 | continue
107 | # TODO: plot markers next to cell types, or overlay
108 | prj.set_clusters(a.obs.set_index(id_cols)[ct].rename("cluster"))
109 | fig = prj.plot_cell_types()
110 | for ax in fig.axes[1:]:
111 | ax.legend_.set_visible(False)
112 | fig.savefig(f, **figkws)
113 | plt.close(fig)
114 |
115 | print("Finished illustration step.")
116 | return 0
117 |
118 |
119 | if __name__ == "__main__":
120 | try:
121 | sys.exit(main())
122 | except KeyboardInterrupt:
123 | sys.exit(1)
124 |
--------------------------------------------------------------------------------
/imc/scripts/inspect_ilastik_model.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 |
3 | import argparse
4 | import sys
5 | import typing as tp
6 |
7 | import h5py
8 | import numpy as np
9 | import pandas as pd
10 | import matplotlib
11 | import matplotlib.pyplot as plt
12 | import seaborn as sns
13 |
14 | from imc.types import Path, Array
15 | from imc.graphics import get_grid_dims
16 |
17 |
18 | matplotlib.rcParams["svg.fonttype"] = "none"
19 | FIG_KWS = dict(dpi=300, bbox_inches="tight")
20 |
21 |
22 | cli = ["_models/utuc-imc/utuc-imc.ilp"]
23 |
24 |
25 | def main(cli: tp.List[str] = None) -> int:
26 | args = parse_arguments().parse_args(cli)
27 |
28 | inspect_ilastik_model(args.model_path)
29 |
30 | if args.plot:
31 | plot_training_data(args.model_path, args.channels_to_plot)
32 |
33 | if args.extract:
34 | extract_training_data(args.model_path, args.labels_output_file)
35 |
36 | if args.convert:
37 | convert_model_data(
38 | args.model_path,
39 | args.converted_model_output,
40 | args.channels_to_retain,
41 | )
42 |
43 | return 0
44 |
45 |
46 | def parse_arguments() -> argparse.ArgumentParser:
47 | parser = argparse.ArgumentParser()
48 |
49 | # Extract
50 | parser.add_argument(
51 | "-e",
52 | "--extract",
53 | dest="extract",
54 | action="store_true",
55 | help="Whether to extract training labels from ilastik file into numpy array.",
56 | )
57 | parser.add_argument(
58 | "--labels-output",
59 | dest="labels_output_file",
60 | default=None,
61 | type=Path,
62 | help="Path to file storing numpy array with training labels."
63 | " If not given will be same as model with different suffix.",
64 | )
65 |
66 | # Plot
67 | parser.add_argument(
68 | "-p",
69 | "--plot",
70 | dest="plot",
71 | action="store_true",
72 | help="Whether training set examples should be plotted.",
73 | )
74 | parser.add_argument(
75 | "--channels-to-plot",
76 | dest="channels_to_plot",
77 | choices=["mean", "last"],
78 | default="mean",
79 | help="Which channels to plot. One of 'mean' or 'last'.",
80 | )
81 |
82 | # Convert
83 | parser.add_argument(
84 | "-c",
85 | "--convert",
86 | dest="convert",
87 | action="store_true",
88 | help="Whether to convert ilastik model to new file by changing the input channels.",
89 | )
90 | parser.add_argument(
91 | "--keep-channels",
92 | dest="channels_to_retain",
93 | nargs="+",
94 | type=int,
95 | help="Channel numbers to retain in new model.",
96 | )
97 | parser.add_argument(
98 | "--converted-model-output",
99 | dest="converted_model_output",
100 | type=Path,
101 | help="Path to new model output file.",
102 | )
103 | parser.add_argument(dest="model_path", type=Path)
104 |
105 | return parser
106 |
107 |
108 | def inspect_ilastik_model(model_path: Path) -> None:
109 | print(f"Ilastik model '{model_path}'.")
110 |
111 | f = h5py.File(model_path.as_posix(), mode="r")
112 |
113 | # Input files
114 | # f['Input Data']['infos']['lane0000']['Raw Data']['filePath'][()].decode()
115 | n_input = len(f["Input Data"]["infos"])
116 | training_files = [
117 | f["Input Data"]["infos"]["lane" + str(x).zfill(4)]["Raw Data"]["filePath"][
118 | ()
119 | ].decode()
120 | for x in range(n_input)
121 | ]
122 |
123 | print(f"Model was trained with {n_input} files.")
124 |
125 | # Feature matrix
126 | fv = f["FeatureSelections"]["SelectionMatrix"][()] # values
127 | fx = f["FeatureSelections"]["FeatureIds"][()] # x = derivative
128 | fy = f["FeatureSelections"]["Scales"][()] # y = sigma
129 | feature_matrix = pd.DataFrame(
130 | fv,
131 | index=pd.Series(fx, name="Feature").str.decode("utf8"),
132 | columns=pd.Series(fy, name="Sigma"),
133 | )
134 | used = feature_matrix.values.sum()
135 | total = np.multiply(*feature_matrix.shape)
136 | print(f"{used}/{total} of the possible feature combinations used.")
137 | print("Here is the feature matrix:")
138 | print(feature_matrix, "\n")
139 |
140 | # Pixel classification
141 | # labels = [x.decode() for x in f['PixelClassification']['LabelNames'][()]]
142 | # 3 labels (3 classes?)
143 | # 35 blocks (35 inputs)
144 | # values, shape=(x, y, 1)
145 | annots = [len(x) for x in f["PixelClassification"]["LabelSets"].values()]
146 | filled_annots = [x for x in annots if x != 0]
147 | print(f"{len(filled_annots)}/{n_input} of the input files were labeled.")
148 |
149 | f.close()
150 |
151 |
152 | def plot_training_data(
153 | model_path: Path,
154 | channels_to_plot: tp.Union[tp.Literal["mean"], tp.Literal["last"]] = "mean",
155 | ) -> None:
156 | from imc.segmentation import normalize
157 |
158 | f = h5py.File(model_path.as_posix(), mode="r")
159 | n_input = len(f["Input Data"]["infos"])
160 | annots = [len(x) for x in f["PixelClassification"]["LabelSets"].values()]
161 | training_files = [
162 | f["Input Data"]["infos"]["lane" + str(x).zfill(4)]["Raw Data"]["filePath"][
163 | ()
164 | ].decode()
165 | for x in range(n_input)
166 | ]
167 |
168 | # Plot labels on top of sum of channels
169 | n, m = get_grid_dims(len(annots))
170 | fig, axes = plt.subplots(
171 | m, n, figsize=(n * 3, m * 3), gridspec_kw=dict(wspace=0, hspace=0.05)
172 | )
173 | axes = axes.ravel()
174 |
175 | # get colormap depending on what channels are being plotted
176 | if channels_to_plot == "mean":
177 | cmap = matplotlib.colors.ListedColormap(
178 | np.asarray(sns.color_palette("tab10"))[np.asarray([-1, 1, 3])]
179 | )
180 | else:
181 | cmap = matplotlib.colors.ListedColormap(
182 | np.asarray(sns.color_palette("tab10"))[np.asarray([-4, -6, 3])]
183 | )
184 |
185 | # plot
186 | for i in range(n_input):
187 | if training_files[i].startswith("Input Data"):
188 | train_arr = f[training_files[i]]
189 | else:
190 | train_file = model_path.parent / training_files[i].replace(
191 | "/stacked_channels", ""
192 | )
193 | train_arr = h5py.File(train_file, mode="r")["stacked_channels"]
194 |
195 | train_arr = train_arr[()]
196 | train_arr[pd.isnull(train_arr)] = 0
197 |
198 | if channels_to_plot == "mean":
199 | train_arr = normalize(train_arr).mean(-1)
200 | else:
201 | train_arr = normalize(train_arr[..., -1])
202 | training_file_shape = train_arr.shape
203 |
204 | axes[i].imshow(train_arr, rasterized=True) # , cmap='inferno')
205 | # axes[i].set_title(image)
206 | axes[i].axis("off")
207 |
208 | # Now for each block, get coordinates and plot
209 | label_arr = np.zeros(training_file_shape, dtype=float)
210 | # label_arr = scipy.sparse.lil_matrix(training_file_shape)
211 | b = f["PixelClassification"]["LabelSets"]["labels" + str(i).zfill((3))]
212 | for j, label in enumerate(b):
213 | # get start-end coordinates within training image
214 | d = b["block" + str(j).zfill(4)]
215 | pos = dict(d.attrs)["blockSlice"].replace("[", "").replace("]", "").split(",")
216 | xs, ys, zs = [(int(x.split(":")[0]), int(x.split(":")[1])) for x in pos]
217 | arr = d[()].squeeze()
218 | # now fill the image with the labeled pixels
219 | label_arr[slice(*xs), slice(*ys)] = arr
220 | label_arr = np.ma.masked_array(label_arr, label_arr == 0)
221 | axes[i].imshow(label_arr, cmap=cmap, vmin=1, vmax=3, rasterized=True)
222 | fig.savefig(
223 | model_path.replace_(".ilp", f".training_data.{channels_to_plot}.pdf"),
224 | bbox_inches="tight",
225 | dpi=300,
226 | )
227 |
228 | f.close()
229 |
230 |
231 | def extract_training_data(
232 | model_path: Path, output_path: Path = None
233 | ) -> tp.Tuple[Array, Array]:
234 | # Extract training labels for preservation independent of model
235 |
236 | if output_path is None:
237 | output_path = model_path.replace_(".ilp", ".training_data.npz")
238 |
239 | fi = h5py.File(model_path.as_posix(), mode="r")
240 |
241 | n_input = len(fi["Input Data"]["infos"])
242 | training_files = [
243 | fi["Input Data"]["infos"]["lane" + str(x).zfill(4)]["Raw Data"]["filePath"][
244 | ()
245 | ].decode()
246 | for x in range(n_input)
247 | ]
248 |
249 | # Extract arrays
250 | _signals = list()
251 | _labels = list()
252 | for i, file in enumerate(training_files):
253 | if file.startswith("Input Data"):
254 | train_arr = fi[file]
255 | else:
256 | train_file = model_path.parent / file.replace("/stacked_channels", "")
257 | train_arr = h5py.File(train_file, mode="r")["stacked_channels"]
258 | shape = train_arr.shape[:-1]
259 |
260 | # Now for each block, get coordinates and assemble
261 | label_arr = np.zeros(shape, dtype=float)
262 | b = fi["PixelClassification"]["LabelSets"]["labels" + str(i).zfill((3))]
263 | for j, _ in enumerate(b):
264 | # get start-end coordinates within training image
265 | d = b["block" + str(j).zfill(4)]
266 | pos = dict(d.attrs)["blockSlice"].replace("[", "").replace("]", "").split(",")
267 | xs, ys, _ = [(int(x.split(":")[0]), int(x.split(":")[1])) for x in pos]
268 | arr = d[()].squeeze()
269 | # now fill the image with the labeled pixels
270 | label_arr[slice(*xs), slice(*ys)] = arr
271 |
272 | _signals.append(train_arr[()])
273 | _labels.append(label_arr)
274 | fi.close()
275 |
276 | # Save as numpy array
277 | signals = np.asarray(_signals)
278 | labels = np.asarray(_labels)
279 | np.savez_compressed(output_path, x=signals, y=labels)
280 | return (signals, labels)
281 |
282 |
283 | def convert_model_data(
284 | input_model_path: Path,
285 | output_model_path: Path,
286 | channels_to_retain: tp.List[int] = [-1],
287 | ) -> None:
288 | # For now this will assume all files were copied into H5 model
289 | # TODO: implement copying of h5 files with suffix if referenced to disk paths
290 |
291 | # After this, model should be reloaded in ilastik,
292 | # change one pixel in the training data and re-train
293 |
294 | if output_model_path is None:
295 | output_model_path = input_model_path.replace_(".ilp", ".converted.ilp")
296 |
297 | with open(output_model_path, "wb") as handle:
298 | handle.write(open(input_model_path, "rb").read())
299 |
300 | f = h5py.File(output_model_path.as_posix(), mode="r+")
301 |
302 | shape = [v.shape for k, v in f["Input Data"]["local_data"].items()][0]
303 | print(f"Current shape of input data: {shape}")
304 |
305 | # Change shape of input data
306 | for k, v in f["Input Data"]["local_data"].items():
307 | del f["Input Data"]["local_data"][k]
308 | from imc.segmentation import normalize
309 |
310 | f["Input Data"]["local_data"][k] = normalize(v[()][..., channels_to_retain])
311 |
312 | shape = [v.shape for k, v in f["Input Data"]["local_data"].items()][0]
313 | print(f"Current shape of input data: {shape}")
314 |
315 | f.close()
316 |
317 |
318 | if __name__ == "__main__":
319 | try:
320 | sys.exit(main())
321 | except KeyboardInterrupt:
322 | sys.exit(1)
323 |
--------------------------------------------------------------------------------
/imc/scripts/inspect_mcds.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Inspect MCD files, reporting on their basic statistics, saving
5 | metadata as YAML files, and panel information as CSV files.
6 | """
7 |
8 | import sys
9 | import yaml
10 | import argparse
11 | from collections import OrderedDict
12 | import typing as tp
13 |
14 | import pandas as pd
15 |
16 | from imctools.io.mcd.mcdparser import McdParser
17 |
18 | from imc.types import Path, DataFrame, Args
19 | from imc.utils import cleanup_channel_names, build_channel_name
20 | from imc.scripts import build_cli, find_mcds
21 |
22 |
23 | def main(cli: tp.Sequence[str] = None) -> int:
24 | parser = build_cli("inspect")
25 | args = parser.parse_args(cli)
26 | if len(args.mcd_files) == 0:
27 | args.mcd_files = find_mcds()
28 | if len(args.mcd_files) == 0:
29 | print("MCD files were not provided and could not be found!")
30 | return 1
31 |
32 | fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.mcd_files])
33 | print(f"Starting inspection step for {len(args.mcd_files)} MCD files:{fs}!")
34 |
35 | # Inspect each MCD
36 | metas = dict()
37 | _chs = list()
38 | for mcd_file in args.mcd_files:
39 | print(f"\tAnalyzing '{mcd_file}':")
40 | meta, ch = inspect_mcd(mcd_file, args)
41 | metas[mcd_file.as_posix()] = meta
42 | _chs.append(ch.assign(mcd_file=mcd_file))
43 | print(f"\tFinished with '{mcd_file}'!")
44 |
45 | # Dump joint metadata
46 | if not args.no_write:
47 | yaml.dump(
48 | encode(metas),
49 | open(args.output_prefix + ".all_mcds.yaml", "w"),
50 | indent=4,
51 | default_flow_style=False,
52 | sort_keys=False,
53 | )
54 |
55 | # Save joint panel info
56 | # join panels and reorder columns
57 | channels = pd.concat(_chs)
58 | channels = channels.reset_index().reindex(
59 | ["mcd_file", "channel"] + ch.columns.tolist(), axis=1
60 | )
61 | # check if more than one panel present
62 | n_panels = channels.groupby("mcd_file")["channel"].sum().nunique()
63 | if n_panels == 1:
64 | print("All MCD files use same panel.")
65 | else:
66 | print(f"MCD files use different panels, {n_panels} in total.")
67 |
68 | if not args.no_write:
69 | channels.to_csv(args.output_prefix + ".all_mcds.channel_labels.csv", index=False)
70 |
71 | print("Finished inspect step!")
72 | return 0
73 |
74 |
75 | def inspect_mcd(mcd_file: Path, args: Args) -> tp.Tuple[DataFrame, DataFrame]:
76 | cols = [
77 | "Target",
78 | "Metal_Tag",
79 | "Atom",
80 | "full",
81 | "ilastik",
82 | ]
83 | exclude_channels = ["EMPTY", "190BCKG", "80Ar", "89Y", "127I", "124Xe"]
84 |
85 | mcd = McdParser(mcd_file)
86 | session = mcd.session
87 |
88 | # get channel labels
89 | ac_ids = session.acquisition_ids
90 | labels = pd.DataFrame(
91 | {
92 | # ac_id: pd.Series(cleanup_channel_names(
93 | # session.acquisitions[ac_id].channel_labels
94 | # ).values, index=session.acquisitions[ac_id].channel_masses)
95 | ac_id: cleanup_channel_names(session.acquisitions[ac_id].channel_labels)
96 | for ac_id in ac_ids
97 | }
98 | )
99 | # the below fails if ROIs have different lengths of metals
100 | # metals = pd.DataFrame(
101 | # {ac_id: session.acquisitions[ac_id].channel_names for ac_id in ac_ids}
102 | # )
103 | metals = pd.DataFrame(
104 | [
105 | pd.Series(session.acquisitions[ac_id].channel_names, name=ac_id)
106 | for ac_id in ac_ids
107 | ]
108 | ).T
109 | if metals.isnull().any().any():
110 | print(
111 | "Some ROIs have less metals than the others. "
112 | "Keeping only ROIs with most metals."
113 | )
114 | metals = metals.loc[:, ~metals.isnull().any()]
115 |
116 | labels = labels.reindex(metals.columns, axis=1)
117 |
118 | channel_names = labels.replace({None: ""}) + "(" + metals + ")"
119 |
120 | same_channels = bool(
121 | channel_names.nunique(1).replace(0, 1).all()
122 | ) # np.bool is not serializable
123 |
124 | if same_channels:
125 | print("\t * All ROIs have the same markers/metals.")
126 | ch = channel_names.iloc[:, 0].rename("channel")
127 | ids = ch.str.extract(r"(?P.*)\((?P.*)\)")
128 | ids.index = ch
129 |
130 | annot = pd.DataFrame(ids, columns=cols)
131 | annot["Atom"] = annot["Metal_Tag"].str.extract(r"(\d+)")[0]
132 | annot["full"] = (~annot.index.str.contains("|".join(exclude_channels))).astype(
133 | int
134 | )
135 | annot["ilastik"] = (
136 | annot.index.str.contains("DNA") | annot.index.str.startswith("CD")
137 | ).astype(int)
138 | if not args.no_write:
139 | annot.to_csv(mcd_file.replace_(".mcd", ".channel_labels.csv"))
140 | else:
141 | annot = pd.DataFrame(columns=cols)
142 | print("\t * ROIs have different markers/metals.")
143 |
144 | # Save some metadata
145 | meta = session.get_csv_dict()
146 | meta["n_slides"] = len(session.slides)
147 | print(f"\t * Contains {meta['n_slides']} slides.")
148 | meta["n_panoramas"] = len(session.panoramas)
149 | print(f"\t * Contains {meta['n_panoramas']} panoramas.")
150 | meta["n_ROIs"] = len(session.acquisition_ids)
151 | print(f"\t * Contains {meta['n_ROIs']} ROIs.")
152 | meta["ROI_numbers"] = session.acquisition_ids
153 | meta["all_ROIs_same_channels"] = same_channels
154 | meta["consensus_channels"] = (
155 | channel_names.iloc[:, 0].to_dict() if same_channels else None
156 | )
157 | meta["panoramas"] = {p: v.get_csv_dict() for p, v in session.panoramas.items()}
158 | meta["acquisitions"] = {
159 | a: ac.get_csv_dict() for a, ac in session.acquisitions.items()
160 | }
161 | meta.update(session.metadata)
162 | if not args.no_write:
163 | yaml.dump(
164 | encode(meta),
165 | open(mcd_file.replace_(".mcd", ".session_metadata.yaml"), "w"),
166 | indent=4,
167 | default_flow_style=False,
168 | sort_keys=False,
169 | )
170 |
171 | mcd.close()
172 | return meta, annot
173 |
174 |
175 | def encode(obj: tp.Any) -> tp.Any:
176 | """
177 | For serializing to JSON or YAML with no special Python object references.
178 |
179 | Not fit for roundtrip!
180 | """
181 | if isinstance(obj, bool):
182 | return str(obj).lower()
183 | if isinstance(obj, (list, tuple)):
184 | return [encode(item) for item in obj]
185 | if isinstance(obj, (dict, OrderedDict)):
186 | return {encode(key): encode(value) for key, value in obj.items()}
187 | return obj
188 |
189 |
190 | if __name__ == "__main__":
191 | try:
192 | sys.exit(main())
193 | except KeyboardInterrupt:
194 | sys.exit(1)
195 |
--------------------------------------------------------------------------------
/imc/scripts/phenotype.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Phenotype cells.
5 | """
6 |
7 | import sys
8 | import typing as tp
9 |
10 | import pandas as pd
11 |
12 | from imc.ops.clustering import (
13 | phenotyping,
14 | # plot_phenotyping,
15 | predict_cell_types_from_reference,
16 | )
17 | from imc.scripts import build_cli
18 | from imc.utils import filter_kwargs_by_callable
19 |
20 |
21 | def main(cli: tp.Sequence[str] = None) -> int:
22 | parser = build_cli("phenotype")
23 | args = parser.parse_args(cli)
24 | print("Starting phenotyping step!")
25 |
26 | args.channels_include = (
27 | args.channels_include.split(",") if args.channels_include is not None else None
28 | )
29 | args.channels_exclude = args.channels_exclude.split(",")
30 | args.dim_res_algos = args.dim_res_algos.split(",")
31 | args.clustering_resolutions = list(map(float, args.clustering_resolutions.split(",")))
32 | args.output_dir.mkdir()
33 |
34 | if args.compute:
35 | print(f"Phenotyping quantified cells in '{args.a}'.")
36 | pkwargs = filter_kwargs_by_callable(args.__dict__, phenotyping)
37 | a = phenotyping(**pkwargs)
38 | a.write(args.output_dir / "processed.h5ad")
39 | # Save for project:
40 | # prj.get_input_filename("cell_cluster_assignments")
41 |
42 | # Cell type identity
43 | # TODO: connect options to CLI
44 | print("Matching expression to reference cell types.")
45 | df = a.raw.to_adata().to_df()[a.var.index[~a.var.index.str.contains("EMPTY")]]
46 | df = df.loc[:, df.var() > 0]
47 | cov = pd.get_dummies(a.obs[args.batch_variable])
48 | preds = predict_cell_types_from_reference(df, args.output_dir, covariates=cov)
49 | a.obs = a.obs.join(preds)
50 | a.write(args.output_dir / "processed.h5ad")
51 |
52 | # grid = clustermap(a.to_df().groupby(a.obs['cell_type']).mean())
53 | # grid = clustermap(a.obs.corr(), cmap='RdBu_r', center=0)
54 |
55 | # if args.plot:
56 | # print(f"Plotting phenotypes in directory '{args.output_dir}'.")
57 | # output_prefix = args.output_dir / "phenotypes."
58 | # if args.compute:
59 | # args.a = a
60 | # pkwargs = filter_kwargs_by_callable(args.__dict__, plot_phenotyping)
61 | # plot_phenotyping(output_prefix=output_prefix, **pkwargs)
62 |
63 | print("Finished phenotyping step.")
64 | return 0
65 |
66 |
67 | if __name__ == "__main__":
68 | try:
69 | sys.exit(main())
70 | except KeyboardInterrupt:
71 | sys.exit(1)
72 |
--------------------------------------------------------------------------------
/imc/scripts/predict.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Generate probablity maps for each pixel in each image.
5 | """
6 |
7 | import sys
8 | import typing as tp
9 |
10 | from imc import ROI
11 | from imc.types import Path
12 | from imc.scripts import build_cli, find_tiffs
13 | from imc.utils import download_file, run_shell_command
14 |
15 |
16 | def main(cli: tp.Sequence[str] = None) -> int:
17 | """Generate probability maps for each ROI using ilastik."""
18 | parser = build_cli("predict")
19 | args = parser.parse_args(cli)
20 | if not args.tiffs:
21 | args.tiffs = find_tiffs()
22 | if not args.tiffs:
23 | print("Input files were not provided and cannot be found!")
24 | return 1
25 |
26 | fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.tiffs])
27 | print(f"Starting predict step for {len(args.tiffs)} TIFF files:{fs}!")
28 |
29 | # Prepare ROI objects
30 | rois = list()
31 | for tiff in args.tiffs:
32 | roi = ROI.from_stack(tiff)
33 | out = roi.get_input_filename("probabilities")
34 | if not args.overwrite and out.exists():
35 | continue
36 | rois.append(roi)
37 |
38 | if not rois:
39 | print("All output predictions exist. Skipping prediction step.")
40 | return 0
41 |
42 | # Get resources
43 | ilastik_sh = get_ilastik(args.lib_dir)
44 | if args.custom_model is None:
45 | model_ilp = get_model(args.models_dir, args.ilastik_model_version)
46 | else:
47 | model_ilp = args.custom_model
48 |
49 | # Predict
50 | print("Starting ilastik pixel classification.")
51 | tiff_files = [roi.get_input_filename("ilastik_input") for roi in rois]
52 | predict_with_ilastik(tiff_files, ilastik_sh, model_ilp, args.quiet)
53 |
54 | for roi in rois:
55 | _in = roi.root_dir / roi.name + "_ilastik_s2_Probabilities.tiff"
56 | if _in.exists():
57 | _in.rename(roi.get_input_filename("probabilities"))
58 |
59 | if args.cleanup:
60 | for roi in rois:
61 | roi.get_input_filename("ilastik_input").unlink()
62 |
63 | print("Finished predict step!")
64 | return 0
65 |
66 |
67 | def predict_with_ilastik(
68 | tiff_files: tp.Sequence[Path], ilastik_sh: Path, model_ilp: Path, quiet: bool = True
69 | ) -> int:
70 | """
71 | Use a trained ilastik model to classify pixels in an IMC image.
72 | """
73 | quiet_arg = "\n --redirect_output /dev/null \\" if quiet else ""
74 | cmd = f"""{ilastik_sh} \\
75 | --headless \\
76 | --readonly \\
77 | --export_source probabilities \\{quiet_arg}
78 | --project {model_ilp} \\
79 | """
80 | # Shell expansion of input files won't happen in subprocess call
81 | cmd += " ".join([x.replace_(" ", r"\ ").as_posix() for x in tiff_files])
82 | return run_shell_command(cmd, quiet=True)
83 |
84 |
85 | def get_ilastik(lib_dir: Path, version: str = "1.3.3post2") -> Path:
86 | """Download ilastik software."""
87 | import tarfile
88 |
89 | base_url = "https://files.ilastik.org/"
90 |
91 | if sys.platform.startswith("linux"):
92 | _os = "Linux"
93 | file = f"ilastik-{version}-{_os}.tar.bz2"
94 | f = lib_dir / f"ilastik-{version}-{_os}" / "run_ilastik.sh"
95 | elif sys.platform.startswith("darwin"):
96 | _os = "OSX"
97 | file = f"ilastik-{version}-{_os}.tar.bz2"
98 | f = (
99 | lib_dir
100 | / f"ilastik-{version}-{_os}.app"
101 | / "Contents"
102 | / "ilastik-release"
103 | / "run_ilastik.sh"
104 | )
105 | else:
106 | raise NotImplementedError(
107 | "ilastik command line use is only available for Linux and MacOS!"
108 | )
109 |
110 | if not f.exists():
111 | lib_dir.mkdir()
112 | print("Downloading ilastik archive.")
113 | download_file(base_url + file, lib_dir / file)
114 | print("Extracting ilastik archive.")
115 | with tarfile.open(lib_dir / file, "r:bz2") as tar:
116 | tar.extractall(lib_dir)
117 | (lib_dir / file).unlink()
118 | return f
119 |
120 |
121 | def get_model(models_dir: Path, version: str = "20210302") -> Path:
122 | """Download pre-trained ilastik model."""
123 | import tarfile
124 |
125 | versions = {
126 | "20210302": "https://wcm.box.com/shared/static/1q41oshxe76b1uzt1b12etbq3l5dyov4.ilp"
127 | }
128 |
129 | url = versions[version]
130 | file = f"pan_dataset.{version}.ilp"
131 |
132 | f = models_dir / file
133 | if not f.exists():
134 | models_dir.mkdir()
135 | print("Downloading ilastik model.")
136 | download_file(url, f)
137 | return f
138 |
139 |
140 | if __name__ == "__main__":
141 | try:
142 | sys.exit(main())
143 | except KeyboardInterrupt:
144 | sys.exit(1)
145 |
--------------------------------------------------------------------------------
/imc/scripts/prepare.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Convert MCD files to TIFF and Sample/ROI structure.
5 | """
6 |
7 | import sys
8 | import typing as tp
9 |
10 | import numpy as np
11 | import tifffile
12 |
13 | from imc import ROI
14 | from imc.scripts import build_cli
15 | from imc.segmentation import prepare_stack
16 | from imc.utils import (
17 | mcd_to_dir,
18 | plot_panoramas_rois,
19 | stack_to_ilastik_h5,
20 | txt_to_tiff,
21 | filter_kwargs_by_callable,
22 | )
23 |
24 |
25 | MCD_FILE_ENDINGS = (".mcd", ".MCD")
26 | TIFF_FILE_ENDINGS = (".tiff", ".TIFF", ".tif", ".TIF")
27 | TXT_FILE_ENDINGS = (".txt", ".TXT")
28 |
29 |
30 | def main(cli: tp.Sequence[str] = None) -> int:
31 | parser = build_cli("prepare")
32 | args = parser.parse_args(cli)
33 |
34 | if not args.pannel_csvs:
35 | args.pannel_csvs = [None] * len(args.input_files)
36 | elif len(args.pannel_csvs) == 1:
37 | args.pannel_csvs = args.pannel_csvs * len(args.input_files)
38 | else:
39 | assert len(args.input_files) == len(args.pannel_csvs)
40 |
41 | if (args.sample_names is None) or (len(args.input_files) != len(args.sample_names)):
42 | args.sample_names = [None] * len(args.input_files)
43 |
44 | args.compression = getattr(tifffile.TIFF.COMPRESSION, args.compression)
45 |
46 | mcds = [file for file in args.input_files if file.endswith(MCD_FILE_ENDINGS)]
47 | tiffs = [file for file in args.input_files if file.endswith(TIFF_FILE_ENDINGS)]
48 | txts = [file for file in args.input_files if file.endswith(TXT_FILE_ENDINGS)]
49 | if mcds and (tiffs or txts):
50 | raise ValueError(
51 | "Mixture of MCD and TIFFs/TXTs were given. "
52 | "Not yet supported, please run prepare step for each file type separately."
53 | )
54 |
55 | if not args.quiet:
56 | ...
57 |
58 | fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.input_files])
59 | print(f"Starting prepare step for {len(args.input_files)} files:{fs}!")
60 |
61 | for mcd_file, pannel_csv, sample_name in zip(
62 | mcds, args.pannel_csvs, args.sample_names
63 | ):
64 | sargs = args.__dict__.copy()
65 | sargs["mcd_file"] = mcd_file
66 | sargs["pannel_csv"] = pannel_csv
67 | sargs["sample_name"] = sample_name
68 | sargs["output_dir"] = args.root_output_dir / mcd_file.stem
69 | sargs = {k: v for k, v in sargs.items() if v is not None}
70 | sargs = filter_kwargs_by_callable(sargs, mcd_to_dir)
71 |
72 | print(f"Started analyzing '{mcd_file}'.")
73 | mcd_to_dir(**sargs)
74 |
75 | # Plot ROI positions on panoramas and slide
76 | plot_panoramas_rois(
77 | yaml_spec=mcd_file.replace_(".mcd", ".session_metadata.yaml"),
78 | output_prefix=args.root_output_dir / mcd_file.stem / mcd_file.stem + ".",
79 | panorama_image_prefix=args.root_output_dir / mcd_file.stem / "Panorama_",
80 | save_roi_arrays=False,
81 | overwrite=args.overwrite,
82 | )
83 | print(f"Finished with '{mcd_file}'.")
84 |
85 | for txt in txts:
86 | print(f"Preparing TXT file: '{txt}'.")
87 | name = txt.name.replace(".txt", "")
88 | tiff_f = args.root_output_dir / name / "tiffs" / name + "_full.tiff"
89 | tiff_f.parent.mkdir()
90 | txt_to_tiff(txt, tiff_f, write_channel_labels=True)
91 | tiffs.append(tiff_f)
92 |
93 | for tiff in tiffs:
94 | roi = ROI.from_stack(tiff)
95 | ilastik_input = tiff.replace_("_full.tiff", "_ilastik_s2.h5")
96 | if (not ilastik_input.exists()) or args.overwrite:
97 | print(f"Preparing TIFF file: '{tiff}'.")
98 | s = prepare_stack(roi.stack, roi.channel_labels)
99 | _ = stack_to_ilastik_h5(s[np.newaxis, ...], ilastik_input)
100 |
101 | print("Finished prepare step!")
102 | return 0
103 |
104 |
105 | if __name__ == "__main__":
106 | try:
107 | sys.exit(main())
108 | except KeyboardInterrupt:
109 | sys.exit(1)
110 |
--------------------------------------------------------------------------------
/imc/scripts/process.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Process raw IMC files end-to-end.
5 | """
6 |
7 | import sys
8 | import typing as tp
9 | import json
10 | from collections import defaultdict
11 | import time
12 | import warnings
13 |
14 | from urlpath import URL
15 |
16 | from imc.types import Path
17 | from imc.scripts import build_cli, find_mcds, find_tiffs
18 | from imc.scripts.inspect_mcds import main as inspect
19 | from imc.scripts.prepare import main as prepare
20 | from imc.scripts.predict import main as predict
21 | from imc.scripts.segment_stacks import main as segment
22 | from imc.scripts.quantify import main as quantify
23 | from imc.scripts.phenotype import main as phenotype
24 | from imc.utils import download_file
25 |
26 |
27 | DATA_DIR = Path("data")
28 | PROCESSED_DIR = Path("processed")
29 | MCD_FILE_ENDINGS = (".mcd", ".MCD")
30 | TIFF_FILE_ENDINGS = (".tiff", ".TIFF", ".tif", ".TIF")
31 | TXT_FILE_ENDINGS = (".txt", ".TXT")
32 | DEFAULT_STEP_ARGS = {
33 | "prepare": ["--ilastik", "--n-crops", "0", "--ilastik-compartment", "nuclear"],
34 | "segment": ["--from-probabilities", "--model", "deepcell", "--compartment", "both"],
35 | }
36 | process_step_order = ["inspect", "prepare", "predict", "segment", "quantify", "phenotype"]
37 | opts = defaultdict(list)
38 | for k, v in DEFAULT_STEP_ARGS.items():
39 | opts[k] = v
40 |
41 |
42 | def main(cli: tp.Sequence[str] = None) -> int:
43 | parser = build_cli("process")
44 | args = parser.parse_args(cli)
45 |
46 | if args.quiet:
47 | warnings.filterwarnings("ignore")
48 |
49 | if not args.files:
50 | print(
51 | "No input files were given, "
52 | "searching for MCD files under current directory."
53 | )
54 | args.files = find_mcds()
55 | if not args.files:
56 | print("No MCD files found. Searching for TIFF files.")
57 | args.files = find_tiffs()
58 | if not args.files:
59 | print(
60 | "No input files could be found. Specify them manually: "
61 | "`imc process $FILE`."
62 | )
63 | return 1
64 |
65 | # If provided URLs, download files
66 | urls = list(map(URL, filter(is_url, args.files)))
67 | args.files = list(filter(lambda x: not is_url(x), args.files))
68 | args.files = [Path(x).absolute().resolve() for x in args.files]
69 |
70 | missing = [f for f in args.files if not f.exists()]
71 | if missing:
72 | fs = "\n\t- ".join(map(str, missing))
73 | print(f"Could not find the following input files:\n\t- {fs}")
74 | return 1
75 |
76 | for url in urls:
77 | print("Given URLs as input, will download.")
78 | if url.name.endswith(MCD_FILE_ENDINGS):
79 | f = DATA_DIR.mkdir() / url.name
80 | elif url.name.endswith(TIFF_FILE_ENDINGS):
81 | f = PROCESSED_DIR.mkdir() / url.name
82 | elif url.name.endswith(TXT_FILE_ENDINGS):
83 | f = DATA_DIR.mkdir() / url.name
84 | if not f.exists():
85 | print(f"Downloading file '{url}' into '{f}'.")
86 | download_file(url.as_posix(), f)
87 | print("Completed.")
88 | args.files.append(f)
89 |
90 | # Figure out which steps are going to be done
91 | if args.steps is None:
92 | args.steps = process_step_order
93 | else:
94 | args.steps = args.steps.split(",")
95 | assert all(x in process_step_order for x in args.steps)
96 | if args.start_step is not None:
97 | args.steps = args.steps[args.steps.index(args.start_step) :]
98 | if args.stop_step is not None:
99 | args.steps = args.steps[: args.steps.index(args.stop_step) + 1]
100 |
101 | # Load config
102 | if args.config is not None:
103 | with open(args.config) as h:
104 | opts.update(json.load(h))
105 |
106 | fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.files])
107 | print(f"Starting processing of {len(args.files)} files:{fs}!")
108 | steps_s = "\n\t- ".join(args.steps)
109 | print(f"Will do following steps:\n\t- {steps_s}\n")
110 | time.sleep(1)
111 |
112 | mcds = [file for file in args.files if file.endswith(MCD_FILE_ENDINGS)]
113 | mcds_s = list(map(str, mcds))
114 | tiffs = [file for file in args.files if file.endswith(TIFF_FILE_ENDINGS)]
115 | tiffs_s = list(map(str, tiffs))
116 | txts = [file for file in args.files if file.endswith(TXT_FILE_ENDINGS)]
117 | txts_s = list(map(str, txts))
118 | if "inspect" in args.steps and mcds:
119 | inspect(opts["inspect"] + mcds_s)
120 | if "prepare" in args.steps:
121 | prepare(opts["prepare"] + mcds_s + tiffs_s + txts_s)
122 |
123 | # Now run remaining for all
124 | new_tiffs = list()
125 | for mcd in mcds:
126 | new_tiffs += list(
127 | (PROCESSED_DIR / mcd.stem / "tiffs").glob(f"{mcd.stem}*_full.tiff")
128 | )
129 | for txt in txts:
130 | name = txt.name.replace(".txt", "")
131 | tiff_f = PROCESSED_DIR / name / "tiffs" / name + "_full.tiff"
132 | new_tiffs += [tiff_f]
133 | tiffs = sorted(list(map(str, set(tiffs + new_tiffs))))
134 |
135 | s_parser = build_cli("segment")
136 | s_args = s_parser.parse_args(opts["segment"] + tiffs)
137 | reason = (
138 | f"Skipping predict step as segmentation model '{s_args.model}' does not need it."
139 | )
140 | if "predict" in args.steps:
141 | if s_args.model == "deepcell":
142 | out = predict(opts["predict"] + tiffs)
143 | if out:
144 | return out
145 | else:
146 | print(reason)
147 | if "segment" in args.steps:
148 | segment(opts["segment"] + tiffs)
149 | if "quantify" in args.steps:
150 | quantify(opts["quantify"] + tiffs)
151 | h5ad_f = "processed/quantification.h5ad"
152 | if "phenotype" in args.steps:
153 | phenotype(opts["phenotype"] + [h5ad_f])
154 |
155 | print("Finished processing!")
156 | return 0
157 |
158 |
159 | def is_url(x: str) -> bool:
160 | from urllib.parse import urlparse
161 |
162 | if isinstance(x, Path):
163 | x = x.as_posix()
164 |
165 | try:
166 | result = urlparse(x)
167 | return all([result.scheme, result.netloc])
168 | except:
169 | return False
170 |
171 |
172 | if __name__ == "__main__":
173 | try:
174 | sys.exit(main())
175 | except KeyboardInterrupt:
176 | sys.exit(1)
177 |
--------------------------------------------------------------------------------
/imc/scripts/quantify.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Quantify images in stacks.
5 | """
6 |
7 | import sys
8 | import typing as tp
9 |
10 | import numpy as np
11 | import anndata
12 |
13 | from imc import ROI
14 | from imc.types import Path
15 | from imc.ops.quant import quantify_cells_rois
16 | from imc.scripts import build_cli, find_tiffs
17 |
18 | def main(cli: tp.Sequence[str] = None) -> int:
19 | parser = build_cli("quantify")
20 | args = parser.parse_args(cli)
21 | if not args.tiffs:
22 | args.tiffs = sorted(find_tiffs())
23 | if not args.tiffs:
24 | print("Input files were not provided and cannot be found!")
25 | return 1
26 |
27 | fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.tiffs])
28 | print(f"Starting quantification step for {len(args.tiffs)} TIFF files:{fs}!")
29 |
30 | # Prepare ROI objects
31 | rois = list()
32 | for tiff in args.tiffs:
33 | roi = ROI.from_stack(tiff)
34 | roi.set_channel_exclude(args.channel_exclude.split(","))
35 | rois.append(roi)
36 |
37 | missing = [r.name for r in rois if not r.get_input_filename("stack").exists()]
38 | if missing:
39 | m = "\n\t- ".join(missing)
40 | error = f"Not all stacks exist! Missing:\n\t- {m}"
41 | raise ValueError(error)
42 | missing = [r.name for r in rois if not r.get_input_filename("cell_mask").exists()]
43 | if missing:
44 | m = "\n\t- ".join(missing)
45 | error = f"Not all cell masks exist! Missing:\n\t- {m}"
46 | raise ValueError(error)
47 |
48 | quant = quantify_cells_rois(
49 | rois, args.layers.split(","), morphology=args.morphology
50 | ).reset_index()
51 |
52 | # reorder columns for nice effect
53 | ext = ["roi", "obj_id"] + (["X_centroid", "Y_centroid"] if args.morphology else [])
54 | rem = [x for x in quant.columns if x not in ext]
55 | quant = quant[ext + rem]
56 |
57 | if args.output is None:
58 | f = Path("processed").mkdir() / "quantification.csv.gz"
59 | else:
60 | f = Path(args.output)
61 | quant.to_csv(f, index=False)
62 | print(f"Wrote CSV file to '{f.absolute()}'.")
63 |
64 | if args.output_h5ad:
65 | v = len(str(quant["obj_id"].max()))
66 | idx = quant["roi"] + "-" + quant["obj_id"].astype(str).str.zfill(v)
67 | quant.index = idx
68 |
69 | cols = ["sample", "roi", "obj_id", "X_centroid", "Y_centroid", "layer"]
70 | cols = [c for c in cols if c in quant.columns]
71 | ann = anndata.AnnData(
72 | quant.drop(cols, axis=1, errors="ignore").astype(float), obs=quant[cols]
73 | )
74 | if "X_centroid" in ann.obs.columns:
75 | ann.obsm["spatial"] = ann.obs[["Y_centroid", "X_centroid"]].values
76 | f = f.replace_(".csv.gz", ".h5ad")
77 | ann.write(f)
78 | print(f"Wrote h5ad file to '{f.absolute()}'.")
79 | ann2 = anndata.read(f)
80 | assert np.allclose(ann.X, ann2.X)
81 |
82 | print("Finished quantification step.")
83 | return 0
84 |
85 |
86 | if __name__ == "__main__":
87 | try:
88 | sys.exit(main())
89 | except KeyboardInterrupt:
90 | sys.exit(1)
91 |
--------------------------------------------------------------------------------
/imc/scripts/segment_stacks.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Segment image stacks.
5 | """
6 |
7 | import sys
8 | import argparse
9 | import typing as tp
10 | from dataclasses import dataclass
11 |
12 | import numpy as np
13 | import pandas as pd
14 | import tifffile
15 | import matplotlib.pyplot as plt
16 |
17 | from imc import ROI
18 | from imc.types import Path, Series, Array
19 | from imc.segmentation import segment_roi, plot_image_and_mask
20 | from imc.scripts import build_cli, find_tiffs
21 |
22 |
23 | def main(cli: tp.Sequence[str] = None) -> int:
24 | parser = build_cli("segment")
25 | args = parser.parse_args(cli)
26 | if len(args.tiffs) == 0:
27 | args.tiffs = find_tiffs()
28 | if len(args.tiffs) == 0:
29 | print("TIFF files were not provided and could not be found!")
30 | return 1
31 |
32 | fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.tiffs])
33 | print(f"Starting segmentation step for {len(args.tiffs)} TIFF files:{fs}!")
34 |
35 | # Prepare ROI objects
36 | rois = list()
37 | for tiff in args.tiffs:
38 | roi = ROI.from_stack(tiff)
39 | roi.set_channel_exclude(args.channel_exclude.split(","))
40 | rois.append(roi)
41 |
42 | # Run segmentation
43 | for roi in rois:
44 | if args.compartment == "both":
45 | mask_files = {
46 | "cell": roi.get_input_filename("cell_mask"),
47 | "nuclei": roi.get_input_filename("nuclei_mask"),
48 | }
49 | else:
50 | mask_files = {
51 | args.compartment: roi.get_input_filename(args.compartment + "_mask")
52 | }
53 | exists = all(f.exists() for f in mask_files.values())
54 | if exists and not args.overwrite:
55 | print(f"Mask for '{roi}' already exists, skipping...")
56 | continue
57 |
58 | print(f"Started segmentation of '{roi} with shape: '{roi.stack.shape}'")
59 | try:
60 | _ = segment_roi(
61 | roi,
62 | from_probabilities=args.from_probabilities,
63 | model=args.model,
64 | compartment=args.compartment,
65 | postprocessing=args.postprocessing,
66 | save=args.save,
67 | overwrite=args.overwrite,
68 | plot_segmentation=args.plot,
69 | verbose=not args.quiet,
70 | )
71 | except ValueError as e:
72 | print("Error segmenting stack. Perhaps XY shape is not compatible?")
73 | print(e)
74 | continue
75 | print(f"Finished segmentation of '{roi}'.")
76 |
77 | print("Finished segmentation step!")
78 | return 0
79 |
80 |
81 | if __name__ == "__main__":
82 | try:
83 | sys.exit(main())
84 | except KeyboardInterrupt:
85 | sys.exit(1)
86 |
--------------------------------------------------------------------------------
/imc/scripts/view.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | View multiplexed TIFF files interactively.
5 | """
6 |
7 | import sys
8 | import time
9 | import typing as tp
10 |
11 | import matplotlib.pyplot as plt
12 |
13 | from imc import ROI
14 | from imc.graphics import InteractiveViewer
15 | from imc.scripts import build_cli
16 |
17 |
18 | def main(cli: tp.Sequence[str] = None) -> int:
19 | parser = build_cli("view")
20 | args = parser.parse_args(cli)
21 | if len(args.input_files) == 0:
22 | print("Input files were not provided and could not be found!")
23 | return 1
24 |
25 | kwargs = {}
26 | if args.kwargs is not None:
27 | print(args.kwargs)
28 | params = [x.split("=") for x in args.kwargs.split(",")]
29 | kwargs = {y[0]: y[1] for y in params}
30 |
31 | fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.input_files])
32 | print(f"Starting viewers for {len(args.input_files)} files: {fs}!")
33 |
34 | if args.napari:
35 | assert all(
36 | f.endswith(".mcd") for f in args.input_files
37 | ), "If using napari input must be MCD files!"
38 | import napari
39 |
40 | viewer = napari.Viewer()
41 | viewer.open(args.input_files)
42 | napari.run()
43 | return 0
44 |
45 | assert all(
46 | f.endswith((".tiff", ".tif")) for f in args.input_files
47 | ), "Input must be TIFF files!"
48 |
49 | # Prepare ROI objects
50 | rois = [ROI.from_stack(tiff) for tiff in args.input_files]
51 |
52 | # Generate viewer instances
53 | viewers = list()
54 | for roi in rois:
55 | view = InteractiveViewer(
56 | roi,
57 | up_key=args.up_key,
58 | down_key=args.down_key,
59 | log_key=args.log_key,
60 | **kwargs,
61 | )
62 | viewers.append(view)
63 |
64 | print(
65 | f"Press '{args.up_key}' and '{args.down_key}' to scroll through image channels."
66 | + f" '{args.log_key}' to toggle logarithmic transformation."
67 | )
68 | time.sleep(2)
69 | for view in viewers:
70 | view.fig.show()
71 | plt.show(block=True)
72 |
73 | print("Terminating!")
74 | return 0
75 |
76 |
77 | if __name__ == "__main__":
78 | try:
79 | sys.exit(main())
80 | except KeyboardInterrupt:
81 | sys.exit(1)
82 |
--------------------------------------------------------------------------------
/imc/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ElementoLab/imc/9725b3ab72f2273cb4a702964fa8518c2f189e9c/imc/tests/__init__.py
--------------------------------------------------------------------------------
/imc/tests/_test_layers.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import matplotlib.pyplot as plt
4 | import seaborn as sns
5 | import tifffile
6 | import anndata
7 | import scanpy as sc
8 | import scipy.ndimage as ndi
9 |
10 | from imc import Project
11 | from imc.graphics import random_label_cmap
12 |
13 | layer_names = ["cell", "nuclei", "cytoplasm", "membrane", "extracellular"]
14 |
15 | prj = Project()
16 |
17 | roi = prj.rois[25]
18 | fig, axes = plt.subplots(1, 5, figsize=(5 * 4, 4), sharex=True, sharey=True)
19 | cmap = random_label_cmap()
20 | for i, layer in enumerate(layer_names):
21 | mask = getattr(roi, layer + "_mask")
22 | mask = np.ma.masked_array(mask, mask=mask == 0)
23 | axes[i].imshow(mask, cmap=cmap)
24 | axes[i].set(title=layer)
25 | axes[i].axis("off")
26 |
27 |
28 | prj.rois = prj.rois[25:27]
29 | quant = prj.quantify_cells(layers=layer_names, set_attribute=False)
30 |
31 |
32 | quant = quant.reset_index().melt(id_vars=["roi", "obj_id", "layer"], var_name="channel")
33 | quant = quant.pivot_table(
34 | index=["roi", "obj_id"], columns=["layer", "channel"], values="value"
35 | )
36 | quant = quant.reset_index()
37 |
38 | X = quant.loc[:, layer_names[0]]
39 | obs = quant[["roi", "obj_id"]]
40 | obs["in_tissue"] = 1
41 | obs["array_row"] = ...
42 | obs["array_col"] = ...
43 | obs.columns = ["roi", "obj_id"]
44 | layers = quant.loc[:, layer_names[1:]]
45 |
46 | a = anndata.AnnData(
47 | X=X.reset_index(drop=True),
48 | obs=obs,
49 | layers={l: layers[l] for l in layer_names[1:]},
50 | )
51 |
52 | a = anndata.AnnData(X=quant.drop(["roi", "obj_id"], 1), obs=obs)
53 |
54 | for roi in prj.rois:
55 | a.uns["spatial"][roi.name] = {
56 | "images": {"hires": roi.stack},
57 | "metadata": {},
58 | "scalefactors": {
59 | "spot_diameter_fullres": 89.56665687930325,
60 | "tissue_hires_scalef": 0.150015,
61 | "fiducial_diameter_fullres": 144.6845995742591,
62 | "tissue_lowres_scalef": 0.045004502,
63 | },
64 | }
65 |
66 |
67 | sc.pp.log1p(a)
68 | sc.pp.scale(a)
69 | sc.pp.pca(a)
70 |
--------------------------------------------------------------------------------
/imc/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from imc.demo import generate_project
4 |
5 |
6 | # # To run manually:
7 | # import tempfile
8 | # tmp_path = tempfile.TemporaryDirectory().name
9 |
10 |
11 | @pytest.fixture
12 | def project(tmp_path):
13 | return generate_project(root_dir=tmp_path)
14 |
15 |
16 | @pytest.fixture
17 | def metadata(project):
18 | return project.sample_metadata
19 |
20 |
21 | @pytest.fixture
22 | def project_with_clusters(tmp_path):
23 | p = generate_project(root_dir=tmp_path)
24 | p.quantify_cells()
25 | c = (
26 | p.quantification.set_index(["sample", "roi"], append=True)
27 | .rename_axis(["obj_id", "sample", "roi"])
28 | .reorder_levels([1, 2, 0])
29 | .assign(cluster=(p.quantification.index % 2))["cluster"]
30 | )
31 | p.set_clusters(c, write_to_disk=True)
32 | return p
33 |
--------------------------------------------------------------------------------
/imc/tests/test_full_analysis.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 |
4 | class TestHighOrderFunctions:
5 | # @pytest.mark.slow
6 | @pytest.mark.xfail
7 | def test_cluster_cells(self, project):
8 | project.cluster_cells()
9 |
10 | @pytest.mark.slow
11 | def test_measure_adjacency(self, project_with_clusters):
12 | files = [
13 | "cluster_adjacency_graph.frequencies.csv",
14 | "cluster_adjacency_graph.norm_over_random.clustermap.svg",
15 | "cluster_adjacency_graph.norm_over_random.csv",
16 | "cluster_adjacency_graph.norm_over_random.heatmap.svg",
17 | "cluster_adjacency_graph.random_frequencies.all_iterations_100.csv",
18 | "cluster_adjacency_graph.random_frequencies.csv",
19 | "neighbor_graph.gpickle",
20 | "neighbor_graph.svg",
21 | ]
22 |
23 | with project_with_clusters as prj:
24 | adj = prj.measure_adjacency()
25 | assert (
26 | prj.results_dir / "single_cell" / "project.adjacency_frequencies.csv"
27 | ).exists()
28 | assert adj.shape == (36, 5)
29 | assert ~adj.isnull().any().any()
30 |
31 | for roi in prj.rois:
32 | prefix = roi.sample.root_dir / "single_cell" / roi.name + "."
33 | for file in files:
34 | assert (prefix + file).exists()
35 |
--------------------------------------------------------------------------------
/imc/tests/test_graphics.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import numpy as np
4 |
5 | from matplotlib.image import AxesImage
6 | from matplotlib.legend import Legend
7 |
8 |
9 | class TestCellTypePlotting:
10 | def test_clusters_labeled_with_numbers(self, project_with_clusters):
11 | p = project_with_clusters
12 |
13 | # # make pattern: "int (1-based) - str"
14 | c = (p.clusters + 1).astype(str) + " - " + (p.clusters + 1).astype(str)
15 | p.set_clusters(c)
16 |
17 | # Plot both clusters
18 | roi = p.rois[0]
19 | fig1 = roi.plot_cell_types()
20 |
21 | # Remove first cluster
22 | c2 = roi.clusters.copy()
23 | for e in c2.index:
24 | c2[e] = roi.clusters.max()
25 | roi.set_clusters(c2)
26 | fig2 = roi.plot_cell_types()
27 |
28 | # Get arrays back from images
29 | a1 = [i for i in fig1.axes[0].get_children() if isinstance(i, AxesImage)]
30 | a1 = [a for a in a1 if len(a.get_array().shape) == 3][0].get_array()
31 | a2 = [i for i in fig2.axes[0].get_children() if isinstance(i, AxesImage)]
32 | a2 = [a for a in a2 if len(a.get_array().shape) == 3][0].get_array()
33 |
34 | # Get legend of second image
35 | l2 = [i for i in fig2.axes[0].get_children() if isinstance(i, Legend)][0]
36 |
37 | # Get color of legend patch (RGBA)
38 | lc = l2.get_patches()[0].get_facecolor()[:-1]
39 | # Get color from array (should be only one besides black)
40 | _t = a2.reshape((8 * 8, 3))
41 | ac = _t[_t.sum(1) > 0][0]
42 |
43 | assert np.equal(ac, lc).all()
44 |
--------------------------------------------------------------------------------
/imc/tests/test_obj_creation.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 |
4 | import pickle
5 | import tempfile
6 |
7 | import pytest
8 |
9 | from imc import Project, IMCSample, ROI
10 | from imc.demo import generate_project
11 | from imc.data_models.project import DEFAULT_PROJECT_NAME
12 | from imc.data_models.sample import DEFAULT_SAMPLE_NAME
13 | from imc.data_models.roi import DEFAULT_ROI_NAME
14 |
15 |
16 | class TestProjectInitialization:
17 | def test_empty_project(self):
18 | p = Project()
19 | assert p.name == DEFAULT_PROJECT_NAME
20 | assert isinstance(p.samples, list)
21 | assert isinstance(p.rois, list)
22 | assert not p.samples
23 | assert not p.rois
24 |
25 | def test_empty_sample(self):
26 | s = IMCSample()
27 | assert s.name == DEFAULT_SAMPLE_NAME
28 | assert isinstance(s.rois, list)
29 | assert not s.rois
30 |
31 | def test_empty_roi(self):
32 | r = ROI()
33 | assert r.name == DEFAULT_ROI_NAME
34 |
35 | def test_creation_without_rois(self, tmp_path):
36 | p = generate_project(root_dir=tmp_path)
37 | p2 = Project(p.metadata[["sample_name"]].drop_duplicates(), processed_dir=p.processed_dir)
38 | assert len(p2.samples) == 3
39 | assert len(p2.rois) == 9
40 |
--------------------------------------------------------------------------------
/imc/tests/test_serialization.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | from typing import Any
3 |
4 | import parmap
5 | import pandas as pd
6 |
7 | from imc import Project, IMCSample, ROI
8 | from imc.ops.quant import _quantify_cell_intensity__roi
9 | from imc.types import Path
10 |
11 |
12 | def roundtrip(obj: Any, _dir: Path) -> Any:
13 | pickle.dump(obj, open(_dir / "file.pkl", "wb"))
14 | return pickle.load(open(_dir / "file.pkl", "rb"))
15 |
16 |
17 | class TestSimpleSerialization:
18 | def test_empty_project(self, tmp_path):
19 | p = Project(name="test_empty_project")
20 | q = roundtrip(p, tmp_path)
21 | assert q.name == "test_empty_project"
22 | # assert p is q
23 |
24 | def test_empty_sample(self, tmp_path):
25 | s = IMCSample(sample_name="test_empty_sample", root_dir=".")
26 | r = roundtrip(s, tmp_path)
27 | assert r.name == "test_empty_sample"
28 | # assert s is r
29 |
30 | def test_empty_roi(self, tmp_path):
31 | r = ROI(name="test_empty_roi", roi_number=1)
32 | s = roundtrip(r, tmp_path)
33 | assert s.name == "test_empty_roi"
34 | # assert r is s
35 |
36 |
37 | def func(roi: ROI) -> int:
38 | return len(roi.shape)
39 |
40 |
41 | class TestParmapSerialization:
42 | def test_simple_parmap(self, project):
43 |
44 | res = parmap.map(func, project.rois)
45 | assert all(x == 3 for x in res)
46 |
47 | def test_quant_parmap_lowlevel(self, project):
48 |
49 | _res = parmap.map(_quantify_cell_intensity__roi, project.rois)
50 | res = pd.concat(_res)
51 | assert not res.empty
52 | assert all(
53 | res.columns == project.rois[0].channel_labels.tolist() + ["roi", "sample"]
54 | )
55 |
56 | def test_quant_parmap_highlevel(self, project):
57 | res = project.quantify_cell_intensity()
58 | assert not res.empty
59 | assert all(
60 | res.columns == project.rois[0].channel_labels.tolist() + ["roi", "sample"]
61 | )
62 |
--------------------------------------------------------------------------------
/imc/types.py:
--------------------------------------------------------------------------------
1 | """
2 | Specific types or type aliases used in the library.
3 | """
4 |
5 | from __future__ import annotations
6 | import os
7 | import typing as tp
8 | import pathlib
9 | import argparse
10 |
11 | import matplotlib
12 | import pandas
13 | import numpy
14 | from anndata import AnnData as _AnnData
15 |
16 |
17 | __all__ = [
18 | "Path",
19 | "GenericType",
20 | "Args",
21 | "Array",
22 | "MultiIndexSeries",
23 | "Series",
24 | "DataFrame",
25 | "AnnData",
26 | "Figure",
27 | "Axis",
28 | "Patch",
29 | "ColorMap",
30 | ]
31 |
32 |
33 | class Path(pathlib.Path):
34 | """
35 | A pathlib.Path child class that allows concatenation with strings
36 | by overloading the addition operator.
37 |
38 | In addition, it implements the ``startswith`` and ``endswith`` methods
39 | just like in the base :obj:`str` type.
40 |
41 | The ``replace_`` implementation is meant to be an implementation closer
42 | to the :obj:`str` type.
43 |
44 | Iterating over a directory with ``iterdir`` that does not exists
45 | will return an empty iterator instead of throwing an error.
46 |
47 | Creating a directory with ``mkdir`` allows existing directory and
48 | creates parents by default.
49 | """
50 |
51 | _flavour = (
52 | pathlib._windows_flavour # type: ignore[attr-defined] # pylint: disable=W0212
53 | if os.name == "nt"
54 | else pathlib._posix_flavour # type: ignore[attr-defined] # pylint: disable=W0212
55 | )
56 |
57 | def __add__(self, string: str) -> Path:
58 | return Path(str(self) + string)
59 |
60 | def startswith(self, string: str) -> bool:
61 | return str(self).startswith(string)
62 |
63 | def endswith(self, string: str) -> bool:
64 | return str(self).endswith(string)
65 |
66 | def replace_(self, patt: str, repl: str) -> Path:
67 | return Path(str(self).replace(patt, repl))
68 |
69 | def iterdir(self) -> tp.Generator:
70 | if self.exists():
71 | yield from [Path(x) for x in pathlib.Path(str(self)).iterdir()]
72 | yield from []
73 |
74 | def unlink(self, missing_ok: bool = True) -> Path:
75 | super().unlink(missing_ok=missing_ok)
76 | return self
77 |
78 | def mkdir(self, mode=0o777, parents: bool = True, exist_ok: bool = True) -> Path:
79 | super().mkdir(mode=mode, parents=parents, exist_ok=exist_ok)
80 | return self
81 |
82 | def glob(self, pattern: str) -> tp.Generator:
83 | # to support ** with symlinks: https://bugs.python.org/issue33428
84 | from glob import glob
85 |
86 | if "**" in pattern:
87 | sep = "/" if self.is_dir() else ""
88 | yield from map(
89 | Path,
90 | glob(self.as_posix() + sep + pattern, recursive=True),
91 | )
92 | else:
93 | yield from super().glob(pattern)
94 |
95 |
96 | GenericType = tp.TypeVar("GenericType")
97 |
98 | # type aliasing (done with Union to distinguish from other declared variables)
99 |
100 |
101 | # # Args = Union[argparse.Namespace]
102 | # class Args(argparse.Namespace, tp.Mapping[str, tp.Any]):
103 | # pass
104 |
105 |
106 | # # Series = Union[pandas.Series]
107 | # class Series(pandas.Series, tp.Mapping[tp.Any, tp.Any]):
108 | # pass
109 |
110 |
111 | Args = tp.Union[argparse.Namespace, tp.Mapping[str, tp.Any]]
112 |
113 | Array = numpy.ndarray
114 |
115 | MultiIndexSeries = pandas.Series
116 | Series = pandas.Series
117 | DataFrame = pandas.DataFrame
118 | AnnData = _AnnData
119 |
120 | Figure = matplotlib.figure.Figure
121 | Axis = matplotlib.axis.Axis
122 | Patch = matplotlib.patches.Patch
123 | ColorMap = matplotlib.colors.LinearSegmentedColormap
124 |
--------------------------------------------------------------------------------
/noxfile.py:
--------------------------------------------------------------------------------
1 | import nox
2 |
3 | python_versions = [
4 | "3.8",
5 | "3.9",
6 | "3.10",
7 | ]
8 |
9 | # TODO: annotate these with explanation
10 | ignore_rules = [
11 | "E501",
12 | "F401",
13 | "F841",
14 | "W503",
15 | "E402",
16 | "E203",
17 | "E266",
18 | "E722", # bare except
19 | ]
20 |
21 | exclude_directories = [
22 | "tests",
23 | ]
24 |
25 |
26 | @nox.session(python=python_versions)
27 | def lint(session):
28 | session.install("flake8")
29 | session.run(
30 | "flake8",
31 | "--ignore",
32 | ",".join(ignore_rules),
33 | "--exclude",
34 | ",".join(exclude_directories),
35 | "imc/",
36 | )
37 |
38 |
39 | @nox.session(python=python_versions)
40 | def test(session):
41 | session.install(".[dev]")
42 | session.run("pytest")
43 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | # PIP, using PEP621
2 | [project]
3 | name = "imc"
4 | # version = "0.0.19.dev24+g43d6c06"
5 | description = "A framework for IMC data analysis."
6 | authors = [
7 | {name = "Andre Rendeiro", email = "afrendeiro@gmail.com"},
8 | ]
9 | # python = "^3.8"
10 | readme = "README.md"
11 | keywords = [
12 | "computational biology",
13 | "bioinformatics",
14 | "imaging mass cytometry",
15 | "imaging",
16 | "mass cytometry",
17 | "mass spectrometry",
18 | ]
19 | classifiers = [
20 | "Programming Language :: Python :: 3 :: Only",
21 | "Programming Language :: Python :: 3.8",
22 | "Programming Language :: Python :: 3.9",
23 | "Programming Language :: Python :: 3.10",
24 | "Development Status :: 3 - Alpha",
25 | "Typing :: Typed",
26 | "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
27 | "Topic :: Scientific/Engineering :: Bio-Informatics",
28 | ]
29 | requires-python = ">=3.8"
30 | dependencies = [
31 | "setuptools_scm",
32 | "outdated",
33 | "ordered-set",
34 | "urlpath",
35 | "PyYAML",
36 | "imctools>=2.1.0",
37 | "joblib",
38 | "leidenalg",
39 | "python-louvain",
40 | "networkx>=3.0.0",
41 | "pandas>=1.0.1",
42 | "matplotlib>=3.5.0",
43 | "scikit-image==0.19.0",
44 | "seaborn",
45 | "fastcluster",
46 | "parmap",
47 | "scanpy",
48 | "bbknn",
49 | "numpy_groupies",
50 | "tifffile==2022.4.8",
51 | "seaborn-extensions"
52 | ]
53 | dynamic = ['version']
54 |
55 | [project.optional-dependencies]
56 | # not yet supported by pip!
57 | extra = [
58 | "stardist>=0.7.1,<1.0.0",
59 | "DeepCell>=0.8.3,<1.0.0",
60 | "cellpose>=0.6.5,<1.0.0",
61 | "astir>=0.1.4,<1.0.0",
62 | ]
63 | stardist = [
64 | "stardist>=0.7.1,<1.0.0",
65 | ]
66 | deepcell = [
67 | "DeepCell>=0.8.3,<1.0.0",
68 | ]
69 | cellpose = [
70 | "cellpose>=0.6.5,<1.0.0",
71 | ]
72 | astir = [
73 | "astir>=0.1.4,<1.0.0",
74 | ]
75 | dev = [
76 | "wheel",
77 | "ipython",
78 | "black[d]",
79 | "mypy>=0.900", # pin to version supporting pyproject.toml
80 | "pandas-stubs",
81 | "pylint",
82 | "flake8",
83 | "git-lint",
84 | "pydocstyle",
85 | "rich",
86 | # data-science-types
87 | "PyQt5",
88 | ]
89 | test = [
90 | "pytest>=6",
91 | "pytest-cov",
92 | ]
93 | doc = [
94 | "Sphinx",
95 | "sphinx-issues",
96 | "sphinx-rtd-theme",
97 | "sphinx-argparse",
98 | ]
99 |
100 | [project.urls]
101 | homepage = "https://github.com/ElementoLab/imc"
102 | repository = "https://github.com/ElementoLab/imc"
103 | documentation = "https://github.com/ElementoLab/imc"
104 | changelog = "https://github.com/ElementoLab/imc/blob/master/docs/source/changelog.md"
105 |
106 | [project.scripts]
107 | imc = "imc.cli:main"
108 |
109 | [build-system]
110 | build-backend = "setuptools.build_meta"
111 | requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.0"]
112 |
113 | # build-backend = "poetry.masonry.api"
114 | # requires = ["poetry>=1.2.0b1", "setuptools>=45", "wheel", "poetry-dynamic-versioning-plugin"]
115 |
116 | # build-backend = "flit_core.buildapi"
117 | # requires = ["flit_core >=3.2,<4"]
118 |
119 | [tool.setuptools_scm]
120 | write_to = "imc/_version.py"
121 | write_to_template = 'version = __version__ = "{version}"'
122 |
123 | # Poetry
124 | [tool.poetry-dynamic-versioning]
125 | enable = true
126 | vcs = "git"
127 | style = "semver"
128 |
129 | [tool.poetry]
130 | name = "imc"
131 | version = "0.0.0" # waiting on next release of poetry to use dynamic-versioning extension
132 | description = "A package for the analysis of imaging mass cytometry (IMC) data"
133 | authors = ["Andre Rendeiro "]
134 | homepage = "https://github.com/ElementoLab/imc"
135 | repository = "https://github.com/ElementoLab/imc"
136 | documentation = "https://github.com/ElementoLab/imc"
137 |
138 | [tool.poetry.dependencies]
139 | python = "^3.8"
140 | ordered-set = "^4.0.2"
141 | PyYAML = "^5.4.1"
142 | pandas = ">=1.0.1"
143 | tifffile = ">=2022.5.4"
144 | imctools = "^2.1.0"
145 | scikit-image = "^0.20.0"
146 | imagecodecs = "^2020.5.30"
147 | colorama = "^0.4.3"
148 | h5py = "^2.10.0"
149 | anndata = "^0.7.3"
150 | scanpy = "^1.5.1"
151 | leidenalg = "^0.8.1"
152 | python-louvain = "^0.14"
153 | networkx = "^3.0"
154 | parmap = "^1.5.2"
155 | joblib = "^0.15.1"
156 |
157 | [tool.poetry.dev-dependencies]
158 | ipython = "^7.16.1"
159 | pylint = "^2.5.3"
160 | git-lint = "^0.1.2"
161 | black = {extras = ["d"], version = "^19.10b0"}
162 | mypy = "^0.900"
163 | pytest = "^5.4.3"
164 | Sphinx = "^3.1.1"
165 | sphinx-issues = "^1.2.0"
166 | sphinx-rtd-theme = "^0.5.0"
167 |
168 | [tool.poetry.extras]
169 | stardist = [
170 | "stardist",
171 | ]
172 | deepcell = [
173 | "DeepCell",
174 | ]
175 | cellpose = [
176 | "cellpose",
177 | ]
178 | astir = [
179 | "astir",
180 | ]
181 |
182 | [tool.black]
183 | line-length = 90
184 | target-version = ['py39']
185 | include = '\.pyi?$'
186 | exclude = '''
187 |
188 | (
189 | /(
190 | \.eggs # exclude a few common directories in the
191 | | \.git # root of the project
192 | | \.hg
193 | | \.mypy_cache
194 | | \.tox
195 | | \.venv
196 | | _build
197 | | buck-out
198 | | build
199 | | dist
200 | )/
201 | | foo.py # also separately exclude a file named foo.py in
202 | # the root of the project
203 | )
204 | '''
205 |
206 | [tool.mypy]
207 | python_version = '3.9'
208 | warn_return_any = true
209 | warn_unused_configs = true
210 |
211 | # Packages without type annotations in shed yet
212 | [[tool.mypy.overrides]]
213 | module = [
214 | 'numpy.*',
215 | 'pandas.*',
216 | 'scipy.*',
217 | 'skimage.*',
218 | 'matplotlib.*',
219 | 'seaborn.*',
220 | 'parmap.*',
221 | 'anndata.*',
222 | 'scanpy.*',
223 | 'pymde.*',
224 | 'umap.*',
225 | 'networkx.*',
226 | 'pingouin.*',
227 | 'tqdm.*',
228 | ]
229 | ignore_missing_imports = true
230 |
231 | [tool.pytest.ini_options]
232 | minversion = "6.0"
233 | addopts = "-ra -q --strict-markers"
234 | testpaths = [
235 | "imc/tests"
236 | ]
237 | markers = [
238 | 'slow', # 'marks tests as slow (deselect with "-m 'not slow'")',
239 | 'serial'
240 | ]
241 |
242 |
243 | [tool.tox]
244 | legacy_tox_ini = """
245 | [tox]
246 | envlist = py39
247 |
248 | [testenv]
249 | deps = pytest >= 6, <7
250 | commands = pytest
251 | """
252 |
--------------------------------------------------------------------------------
/requirements/requirements.cellpose.txt:
--------------------------------------------------------------------------------
1 | cellpose>=0.1.0.1,<1.0.0
2 |
--------------------------------------------------------------------------------
/requirements/requirements.deepcell.txt:
--------------------------------------------------------------------------------
1 | DeepCell>=0.8.3,<1.0.0
2 |
--------------------------------------------------------------------------------
/requirements/requirements.dev.txt:
--------------------------------------------------------------------------------
1 | wheel
2 | ipython
3 | black[d]
4 | mypy>=0.900
5 | pandas-stubs
6 | pylint
7 | flake8
8 | git-lint
9 | pydocstyle
10 | rich
11 | pytest>=6
12 | pytest-cov
13 |
--------------------------------------------------------------------------------
/requirements/requirements.doc.txt:
--------------------------------------------------------------------------------
1 | # data-science-types
2 | Sphinx
3 | sphinx-issues
4 | sphinx-rtd-theme
5 | sphinx-argparse
6 | myst_parser
7 | sphinx-autodoc-typehints
8 |
--------------------------------------------------------------------------------
/requirements/requirements.stardist.txt:
--------------------------------------------------------------------------------
1 | stardist==0.6.0,<1.0.0
2 |
--------------------------------------------------------------------------------
/requirements/requirements.txt:
--------------------------------------------------------------------------------
1 | setuptools_scm
2 | outdated
3 | ordered-set
4 | PyYAML
5 | parmap
6 | tqdm
7 | joblib
8 | numpy
9 | numpy_groupies
10 | scipy>=1.7
11 | pandas>=1.0.1
12 | matplotlib>=3.5
13 | imctools>=2.1.0
14 | tifffile==2022.4.8
15 | scikit-image==0.17.2
16 | leidenalg
17 | python-louvain
18 | networkx
19 | scanpy
20 | bbknn
21 | seaborn-extensions
22 | harmonypy
23 |
--------------------------------------------------------------------------------