├── .github
    └── workflows
    │   └── test.yml
├── .gitignore
├── .readthedocs.yaml
├── Makefile
├── Manifest.in
├── README.md
├── docs
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── api.md
    │   ├── changelog.md
    │   ├── concepts.md
    │   ├── conf.py
    │   ├── examples.md
    │   ├── index.md
    │   ├── install.md
    │   ├── log_config.md
    │   ├── testing.md
    │   └── usage.md
├── imc
    ├── __init__.py
    ├── cli.py
    ├── data_models
    │   ├── __init__.py
    │   ├── project.py
    │   ├── roi.py
    │   └── sample.py
    ├── defaults.py
    ├── demo
    │   ├── __init__.py
    │   ├── generate_data.py
    │   └── get_demo_data.py
    ├── exceptions.py
    ├── graphics.py
    ├── interactive_volume_viewer.py
    ├── logo.png
    ├── ops
    │   ├── __init__.py
    │   ├── adjacency.py
    │   ├── clustering.py
    │   ├── community.py
    │   ├── compensation.py
    │   ├── domain.py
    │   ├── mixture.py
    │   ├── quant.py
    │   └── signal.py
    ├── py.typed
    ├── scripts
    │   ├── __init__.py
    │   ├── illustrate.py
    │   ├── inspect_ilastik_model.py
    │   ├── inspect_mcds.py
    │   ├── phenotype.py
    │   ├── predict.py
    │   ├── prepare.py
    │   ├── process.py
    │   ├── quantify.py
    │   ├── segment_stacks.py
    │   └── view.py
    ├── segmentation.py
    ├── tests
    │   ├── __init__.py
    │   ├── _test_layers.py
    │   ├── conftest.py
    │   ├── test_full_analysis.py
    │   ├── test_graphics.py
    │   ├── test_obj_creation.py
    │   └── test_serialization.py
    ├── types.py
    └── utils.py
├── noxfile.py
├── pyproject.toml
└── requirements
    ├── requirements.cellpose.txt
    ├── requirements.deepcell.txt
    ├── requirements.dev.txt
    ├── requirements.doc.txt
    ├── requirements.stardist.txt
    └── requirements.txt


/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Test imc package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ main ]
 9 |   pull_request:
10 |     branches: [ main ]
11 | 
12 | jobs:
13 |   linux:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - name: Set up Python 3.8
18 |       uses: actions/setup-python@v2
19 |       with:
20 |         python-version: 3.8
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install wheel pytest
25 |         pip install .[deepcell,astir]
26 |     - name: Test with pytest
27 |       run: |
28 |         # Test package
29 |         python -m pytest imc/
30 | 
31 |         # Run pipeline
32 |         mkdir -p imctest
33 |         cd imctest
34 |         imc process https://zenodo.org/record/5018260/files/COVID19_brain_Patient03_ROI3_COVID19_olfactorybulb.txt?download=1
35 | 
36 |         # List output files
37 |         ls -l processed/
38 |         ls -l processed/${SAMPLE}
39 |         ls -l results/phenotyping
40 |     - name: Cache resources
41 |       id: cache-resources
42 |       uses: actions/cache@v2
43 |       with:
44 |         path: /home/$USER/.imc
45 |         key: imc-resources-linux
46 | 
47 |   osx:
48 |     runs-on: macos-10.14
49 |     steps:
50 |     - uses: actions/checkout@v2
51 |     - name: Set up Python 3.8
52 |       uses: actions/setup-python@v2
53 |       with:
54 |         python-version: 3.8
55 |     - name: Install dependencies
56 |       run: |
57 |         python -m pip install --upgrade pip
58 |         pip install wheel pytest
59 |         pip install .[deepcell,astir]
60 |     - name: Test with pytest
61 |       run: |
62 |         # Test package
63 |         python -m pytest imc/
64 | 
65 |         # Run example processing pipeline
66 |         mkdir -p imctest
67 |         cd imctest
68 |         imc process https://zenodo.org/record/5018260/files/COVID19_brain_Patient03_ROI3_COVID19_olfactorybulb.txt?download=1
69 | 
70 |         # List output files
71 |         ls -l processed/
72 |         ls -l processed/${SAMPLE}
73 |         ls -l results/phenotyping
74 |     - name: Cache resources
75 |       id: cache-resources
76 |       uses: actions/cache@v2
77 |       with:
78 |         path: /home/$USER/.imc
79 |         key: imc-resources-osx
80 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # project specific
  2 | data
  3 | submission
  4 | processed
  5 | _models
  6 | results
  7 | 
  8 | *.tiff
  9 | *.csv
 10 | 
 11 | 
 12 | # ignore test files
 13 | .tox
 14 | _version.py
 15 | pytest.log
 16 | .coverage*
 17 | 
 18 | # Build-related stuff
 19 | build/
 20 | dist/
 21 | *.egg-info
 22 | 
 23 | 
 24 | # toy/experimental files
 25 | *.txt
 26 | # *.csv
 27 | *.tsv
 28 | *.pkl
 29 | *.pickle
 30 | *.svg
 31 | *.png
 32 | *.jpg
 33 | *.jpeg
 34 | 
 35 | # ignore mypy
 36 | .mypy*
 37 | 
 38 | # ignore eggs
 39 | .eggs/
 40 | 
 41 | # ignore built docs
 42 | doc/build/*
 43 | 
 44 | # generic ignore list:
 45 | *.lst
 46 | 
 47 | # Compiled source
 48 | *.com
 49 | *.class
 50 | *.dll
 51 | *.exe
 52 | *.o
 53 | *.so
 54 | *.pyc
 55 | 
 56 | # Packages
 57 | # it's better to unpack these files and commit the raw source
 58 | # git has its own built in compression methods
 59 | *.7z
 60 | *.dmg
 61 | *.gz
 62 | *.iso
 63 | *.jar
 64 | *.rar
 65 | *.tar
 66 | *.zip
 67 | 
 68 | # Logs and databases
 69 | *.log
 70 | *.sql
 71 | *.sqlite
 72 | 
 73 | # OS generated files
 74 | .DS_Store
 75 | .DS_Store?
 76 | ._*
 77 | .Spotlight-V100
 78 | .Trashes
 79 | ehthumbs.db
 80 | Thumbs.db
 81 | 
 82 | # Sublime files
 83 | *.sublime-*
 84 | 
 85 | # Gedit temporary files
 86 | *~
 87 | 
 88 | # libreoffice lock files:
 89 | .~lock*
 90 | 
 91 | # IDE-specific items
 92 | .idea/
 93 | 
 94 | # pytest-related
 95 | .cache/
 96 | .coverage*
 97 | coverage.xml
 98 | 
 99 | # Reserved files for comparison
100 | *RESERVE*
101 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |   os: ubuntu-20.04
11 |   tools:
12 |     python: "3.9"
13 | 
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 |   configuration: docs/source/conf.py
17 | 
18 | # If using Sphinx, optionally build your docs in additional formats such as PDF
19 | # formats:
20 | #    - pdf
21 | 
22 | # Optionally declare the Python requirements required to build your docs
23 | python:
24 |   system_packages: true
25 |   install:
26 |     - method: pip
27 |       path: .
28 |       extra_requirements:
29 |         - doc
30 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .DEFAULT_GOAL := all
 2 | 
 3 | 
 4 | NAME=$(shell basename `pwd`)
 5 | DOCS_DIR="docs"
 6 | 
 7 | 
 8 | help:  ## Display help and quit
 9 | 	@echo Makefile for the $(NAME) package.
10 | 	@echo Available commands:
11 | 	@grep -E '^[0-9a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
12 | 		awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m\
13 | 		%s\n", $$1, $$2}'
14 | 
15 | all: install test  ## Install the package and run tests
16 | 
17 | clean_build:
18 | 	rm -rf build/
19 | 
20 | clean_dist:
21 | 	rm -rf dist/
22 | 
23 | clean_eggs:
24 | 	rm -rf *.egg-info
25 | 
26 | clean_mypy:
27 | 	rm -rf .mypy_cache/
28 | 
29 | clean_docs:
30 | 	rm -rf docs/build/*
31 | 
32 | clean_tests:
33 | 	rm -rf /tmp/pytest*
34 | 
35 | clean: clean_dist clean_eggs clean_build clean_mypy clean_docs  ## Remove build, mypy cache, tests and docs
36 | 
37 | _install:
38 | 	# python setup.py sdist
39 | 	# python -m pip wheel --no-index --no-deps --wheel-dir dist dist/*.tar.gz
40 | 	# python -m pip install dist/*-py3-none-any.whl --user --upgrade
41 | 	python -m pip install .
42 | 
43 | install:  ## Install the package
44 | 	${MAKE} clean
45 | 	${MAKE} _install
46 | 	${MAKE} clean
47 | 
48 | docs:  ## Build the documentation
49 | 	${MAKE} -C $(DOCS_DIR) html
50 | 	xdg-open $(DOCS_DIR)/build/html/index.html
51 | 
52 | 
53 | lint:
54 | 	-flake8 --count --ignore E501,F401,F841,W503,E402,E203,E266,E722 --exclude tests/ imc/
55 | 
56 | test: lint ## Run the tests
57 | 	python -m pytest -m "not slow" $(NAME)/ 
58 | 
59 | backup_time:
60 | 	echo "Last backup: " `date` >> _backup_time
61 | 	chmod 700 _backup_time
62 | 
63 | _sync:
64 | 	rsync --copy-links --progress -r \
65 | 	. afr4001@pascal.med.cornell.edu:projects/$(NAME)
66 | 
67 | sync: _sync backup_time ## [dev] Sync data/code to SCU server
68 | 
69 | build: test
70 | 		python setup.py sdist bdist_wheel
71 | 
72 | pypitest: build
73 | 		twine \
74 | 				upload \
75 | 				-r pypitest dist/*
76 | 
77 | pypi: build
78 | 		twine \
79 | 				upload \
80 | 				dist/*
81 | 
82 | .PHONY : clean_build clean_dist clean_eggs clean_mypy clean_docs clean_tests \
83 | clean _install install clean_docs docs test backup_time _sync sync \
84 | build pypitest pypi
85 | 


--------------------------------------------------------------------------------
/Manifest.in:
--------------------------------------------------------------------------------
 1 | include AUTHORS.md
 2 | include CONTRIBUTING.md
 3 | include CHANGELOG.md
 4 | include LICENSE
 5 | include README.md
 6 | 
 7 | recursive-include requirements *
 8 | recursive-include tests *
 9 | recursive-include docs *.md *.rst conf.py Makefile make.bat
10 | recursive-exclude * __pycache__
11 | recursive-exclude * *.py[co]
12 | 
13 | global-include *.typed
14 | 
15 | include logo.png
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <img width="460" src="https://github.com/ElementoLab/imc/blob/main/imc/logo.png?raw=True">
  3 | </p>
  4 | 
  5 | # Imaging mass cytometry
  6 | 
  7 | A package for processing and analysis of imaging mass cytometry (IMC) data.
  8 | 
  9 | It implements image- and channel-wise quality control, quantification of cell
 10 | intenstity and morphology, cell type discovery through clustering, automated
 11 | cell type labeling, community and super-community finding and differential
 12 | comparisons between sample groups, in addition to many handy visualization tools.
 13 | Above all, it is a tool for the use of IMC data at scale.
 14 | 
 15 | Development is still underway, so use at your own risk.
 16 | 
 17 | 
 18 | ## Requirements and installation
 19 | 
 20 | Requires `Python >= 3.9`. `imc` uses a `pyproject.toml` configuration only, so you'll need a up-to-date version of `pip` before installing. Base packages as `gcc` and `g++` will also need to be installed on system using the command `sudo apt install g++` or likewise. We also highly recommend installing the package on a `conda` environment to avoid dependency issues.
 21 | 
 22 | To install the most updated version of the program:
 23 | ```bash
 24 | git clone https://github.com/ElementoLab/imc.git
 25 | cd imc
 26 | make install
 27 | ```
 28 | 
 29 | Install from [PyPI](https://pypi.org/project/imc/) with [`pip`](https://pip.pypa.io/) or with [poetry](https://python-poetry.org/):
 30 | ```bash
 31 | pip install imc
 32 | # or
 33 | poetry install imc
 34 | ```
 35 | 
 36 | ## Quick start
 37 | 
 38 | Install the package from [PyPI](https://pypi.org/project/imc/) with extra packages required for all steps:
 39 | ```bash
 40 | pip install imc[extra]
 41 | # or
 42 | poetry install imc[extra]
 43 | ```
 44 | 
 45 | ### Use case 1 (pipeline processing)
 46 | 
 47 | #### Example: Lung sample processing from MCD to single-cell h5ad
 48 | 
 49 | One-line IMC data processing:
 50 | ```bash
 51 | # Run pipeline in one step with remote MCD file
 52 | MCD_URL=https://zenodo.org/record/4110560/files/data/20200612_FLU_1923/20200612_FLU_1923.mcd
 53 | imc process $MCD_URL
 54 | ```
 55 | `imc` also supports TXT or TIFF files as input, local or remote files:
 56 | ```bash
 57 | # Run pipeline in one step with remote TXT file
 58 | TXT_URL=https://zenodo.org/record/5018260/files/COVID19_brain_Patient03_ROI3_COVID19_olfactorybulb.txt?download=1
 59 | imc process $TXT_URL
 60 | ```
 61 | Input can be MCD, TIFF, or TXT files.
 62 | Several files can be given to `imc process` at once. See more with the `--help` option.
 63 | 
 64 | `imc` is nonetheless very modular and allows the user to run any of the step seperately as well.
 65 | 
 66 | The above is also equivalent to the following:
 67 | ```bash
 68 | MCD_URL=https://zenodo.org/record/4110560/files/data/20200612_FLU_1923/20200612_FLU_1923.mcd
 69 | SAMPLE=20200612_FLU_1923
 70 | 
 71 | wget -O data/${SAMPLE}.mcd $MCD_URL
 72 | 
 73 | ## output description of acquired data
 74 | imc inspect data/${SAMPLE}.mcd
 75 | 
 76 | ## convert MCD to TIFFs and auxiliary files
 77 | imc prepare \
 78 |   --ilastik \
 79 |   --n-crops 0 \
 80 |   --ilastik-compartment nuclear \
 81 |   data/${SAMPLE}.mcd
 82 | 
 83 | ## For each TIFF file, output prediction of mask probabilities and segment them 
 84 | TIFFS=processed/${SAMPLE}/tiffs/${SAMPLE}*_full.tiff
 85 | 
 86 | ## Output pixel probabilities of nucleus, membrane and background using ilastik
 87 | imc predict $TIFFS
 88 | 
 89 | ## Segment cell instances with DeepCell
 90 | imc segment \
 91 |   --from-probabilities \
 92 |   --model deepcell \
 93 |   --compartment both $TIFFS
 94 | 
 95 | ## Quantify channel intensity and morphology for each single cell in every image
 96 | imc quantify $TIFFS
 97 | ```
 98 | 
 99 | Once all MCD files have been processed for the project, create a concatenated AnnData object containing all cells within a project.
100 | 
101 | ```python
102 | from glob import glob
103 | import os
104 | import anndata
105 | pattern = glob('processed/*.h5ad')
106 | adatas = [anndata.read(f) for f in pattern if os.path.exists(f)]
107 | adata = anndata.concat(adatas)
108 | adata.write('results/quant.h5ad')
109 | ```
110 | 
111 | To perform batch correction and cell clustering:
112 | ```bash
113 | ## Phenotype cells into clusters
114 | imc phenotype processed/quant.h5ad
115 | ```
116 | 
117 | There are many customization options for each step. Do `imc --help` or `imc <subcommand> --help` to see all.
118 | 
119 | `imc` also includes a lightweight interactive image viewer:
120 | ```bash
121 | imc view $TIFFS
122 | ```
123 | 
124 | There is also an interface to the more full fledged `napari` image viwer:
125 | ```bash
126 | imc view --napari data/${SAMPLE}.mcd  # view MCD file
127 | napari $TIFFS  # view TIFF files directly with napari. Requires napari
128 | ```
129 | 
130 | A quick example of further analysis steps of single cell data downstream in IPython/Jupyter notebook:
131 | ```python
132 | import scanpy as sc
133 | a = sc.read('processed/quantification.h5ad')
134 | sc.pp.log1p(a)
135 | sc.pp.pca(a)
136 | sc.pp.neighbors(a)
137 | sc.tl.umap(a)
138 | sc.pl.umap(a, color=a.var.index)
139 | ```
140 | 
141 | ### Use case 2 (API usage)
142 | 
143 | #### Demo data (synthetic)
144 | ```python
145 | >>> from imc.demo import generate_project
146 | >>> prj = generate_project(n_samples=2, n_rois_per_sample=3, shape=(8, 8))
147 | >>> prj
148 | Project 'project' with 2 samples and 6 ROIs in total.
149 | 
150 | >>> prj.samples  # type: List[IMCSample]
151 | [Sample 'test_sample_01' with 3 ROIs,
152 |  Sample 'test_sample_02' with 3 ROIs]
153 | 
154 | >>> prj.rois  # type: List[ROI]
155 | [Region 1 of sample 'test_sample_01',
156 |  Region 2 of sample 'test_sample_01',
157 |  Region 3 of sample 'test_sample_01',
158 |  Region 1 of sample 'test_sample_02',
159 |  Region 2 of sample 'test_sample_02',
160 |  Region 3 of sample 'test_sample_02']
161 | 
162 | >>> prj.samples[0].rois  # type: List[ROI]
163 | [Region 1 of sample 'test_sample_01',
164 |  Region 2 of sample 'test_sample_01',
165 |  Region 3 of sample 'test_sample_01']
166 | 
167 | >>> roi = prj.rois[0]  # Let's assign one ROI to explore it
168 | >>> roi.channel_labels  # type: pandas.Series; `channel_names`, `channel_metals` also available
169 | 0    Ch01(Ch01)
170 | 1    Ch02(Ch02)
171 | 2    Ch03(Ch03)
172 | Name: channel, dtype: object
173 | 
174 | >>> roi.mask  # type: numpy.ndarray
175 | array([[0, 0, 0, 0, 0, 0, 0, 0],
176 |        [0, 0, 0, 0, 0, 0, 0, 0],
177 |        [0, 0, 0, 0, 0, 0, 1, 0],
178 |        [0, 0, 0, 0, 0, 0, 0, 0],
179 |        [0, 2, 0, 0, 0, 3, 0, 0],
180 |        [0, 0, 0, 0, 0, 0, 0, 0],
181 |        [0, 0, 4, 0, 0, 0, 0, 0],
182 |        [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)
183 | 
184 | >>> roi.stack.shape  # roi.stack -> type: numpy.ndarray
185 | (3, 8, 8)
186 | 
187 | >>> # QC
188 | >>> prj.channel_correlation()
189 | >>> prj.channel_summary()
190 | 
191 | >>> # Cell type discovery
192 | >>> prj.cluster_cells()
193 | >>> prj.find_communities()
194 | 
195 | ```
196 | #### Demo data (real)
197 | ```python
198 | >>> import imc.demo
199 | >>> imc.demo.datasets
200 | ['jackson_2019_short', 'jackson_2019_short_joint']
201 | 
202 | >>> prj = imc.demo.get_dataset('jackson_2019_short')
203 | >>> prj  # type: Project
204 | Project 'jackson_2019_short' with 4 samples and 4 ROIs in total.
205 | 
206 | >>> prj.samples  # type: List[IMCSample]
207 | [Sample 'BaselTMA_SP41_15.475kx12.665ky_10000x8500_5_20170905_90_88_X11Y5_242_a0' with 1 ROI,
208 |  Sample 'BaselTMA_SP41_25.475kx12.665ky_8000x8500_3_20170905_90_88_X11Y5_235_a0' with 1 ROI,
209 |  Sample 'BaselTMA_SP41_33.475kx12.66ky_8500x8500_2_20170905_24_61_X3Y4_207_a0' with 1 ROI,
210 |  Sample 'BaselTMA_SP41_33.475kx12.66ky_8500x8500_2_20170905_33_61_X4Y4_215_a0' with 1 ROI]
211 | 
212 | >>> prj.samples[0].channel_labels  # type: pandas.Series
213 | chanel
214 | 0                                  Ar80(Ar80)
215 | 1                                  Ru96(Ru96)
216 | 2                                  Ru98(Ru98)
217 | 3                                  Ru99(Ru99)
218 | 4                                Ru100(Ru100)
219 | 5                                Ru101(Ru101)
220 | 6                                Ru102(Ru102)
221 | 7                                Ru104(Ru104)
222 | 8                            HistoneH3(In113)
223 | 9                                EMPTY(Xe126)
224 | 10                                EMPTY(I127)
225 | 11                           HistoneH3(La139)
226 | ...
227 | 42                            vWF-CD31(Yb172)
228 | 43                                mTOR(Yb173)
229 | 44                        Cytokeratin7(Yb174)
230 | 45    PanCytokeratin-KeratinEpithelial(Lu175)
231 | 46         CleavedPARP-CleavedCaspase3(Yb176)
232 | 47                                DNA1(Ir191)
233 | 48                                DNA2(Ir193)
234 | 49                               EMPTY(Pb206)
235 | 50                               EMPTY(Pb207)
236 | 51                               EMPTY(Pb208)
237 | Name: BaselTMA_SP41_15.475kx12.665ky_10000x8500_5_20170905_90_88_X11Y5_242_a0, dtype: object
238 | >>> prj.plot_channels(['DNA2', 'Ki67', "Cytokeratin7"])
239 | <Figure size 400x1200 with 12 Axes>
240 | ```
241 | 
242 | #### Your own data
243 | 
244 | The best way is to have a CSV file with one row per sample, or one row per ROI.
245 | That will ensure additional sample/ROI metadata is passed to the objects and used later in analysis.
246 | Pass the path to the CSV file to the `Project` object constructor:
247 | 
248 | ```python
249 | from imc import Project
250 | 
251 | prj = Project()  # will search current directory for Samples/ROIs
252 | 
253 | prj = Project(processed_dir="processed")  # will search `processed` for Samples/ROIs
254 | 
255 | prj = Project("path/to/sample/annotation.csv", processed_dir="processed")
256 | # ^^ will use metadata from CSV and use the files in `processed`.
257 | ```
258 | 
259 | However, if one is not given, `Project` will search the current directory or the
260 | argument of `processed_dir` for IMCSamples and ROIs.
261 | 
262 | The `processed_dir` directory can be structured in two ways:
263 | 1. One directory per sample.
264 |   - Inside there is a directory `"tiffs"` which contains the stack `"*_full.tiff"`, channel labels
265 |   `"*_full.csv"` and optionally a segmentation `"*_full_mask.tiff"`.
266 | 
267 | 2. All samples in the same directory `processed_dir`.
268 |   - Inside the one directory there are stack `"*_full.tiff"`, channel label `"*_full.csv"` and
269 |   optionally segmentation `"*_full_mask.tiff"` files.
270 | 
271 | The default is option one. If you choose `2`, simply pass `subfolder_per_sample`:
272 | 
273 | ``` python
274 | prj = Project(subfolder_per_sample=True)
275 | ```
276 | 
277 | The expected files are produced by common preprocessing pipelines such as
278 | [imcpipeline](https://github.com/elementolab/imcpipeline) or [imcyto](https://nf-co.re/imcyto).
279 | 
280 | 
281 | ## Documentation
282 | 
283 | Documentation is for now mostly a skeleton but will be expanded soon:
284 | 
285 | ```bash
286 | make docs
287 | ```
288 | 
289 | ## Testing
290 | 
291 | Tests are still very limited, but you can run tests this way:
292 | 
293 | ```bash
294 | pip install pytest  # install testing package
295 | python -m pytest --pyargs imc
296 | ```
297 | 
298 | For data processing, running the example lung data should make sure eveything is running smoothly.
299 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/api.md:
--------------------------------------------------------------------------------
 1 | # API
 2 | 
 3 | The great flexibility of `imc` comes from the ability to compose workflows using the API.
 4 | 
 5 | It provides a rich but abstract `imc.analysis.Project` object and implements various modules building on it depending on the data type.
 6 | 
 7 | In addition, the `imc.operations` module contains several analysis-independent methods and the `imc.utils` module provides low-level functions of general use.
 8 | 
 9 | ## imc.data_models.project
10 | ```{eval-rst}
11 | .. automodule:: imc.data_models.project
12 |    :members:
13 | ```
14 | 
15 | ## imc.data_models.sample
16 | ```{eval-rst}
17 | .. automodule:: imc.data_models.sample
18 |    :members:
19 | ```
20 | 
21 | ## imc.data_models.roi
22 | ```{eval-rst}
23 | .. automodule:: imc.data_models.roi
24 |    :members:
25 | ```
26 | 
27 | ## imc.operations
28 | ### imc.ops.signal
29 | ```{eval-rst}
30 | .. automodule:: imc.ops.signal
31 |    :members:
32 | ```
33 | ### imc.ops.compensation
34 | ```{eval-rst}
35 | .. automodule:: imc.ops.compensation
36 |    :members:
37 | ```
38 | ### imc.ops.mixture
39 | ```{eval-rst}
40 | .. automodule:: imc.ops.mixture
41 |    :members:
42 | ```
43 | ### imc.ops.domain
44 | ```{eval-rst}
45 | .. automodule:: imc.ops.domain
46 |    :members:
47 | ```
48 | ### imc.ops.quant
49 | ```{eval-rst}
50 | .. automodule:: imc.ops.quant
51 |    :members:
52 | ```
53 | ### imc.ops.clustering
54 | ```{eval-rst}
55 | .. automodule:: imc.ops.clustering
56 |    :members:
57 | ```
58 | ### imc.ops.adjacency
59 | ```{eval-rst}
60 | .. automodule:: imc.ops.adjacency
61 |    :members:
62 | ```
63 | ### imc.ops.community
64 | ```{eval-rst}
65 | .. automodule:: imc.ops.community
66 |    :members:
67 | ```
68 | ## imc.graphics
69 | ```{eval-rst}
70 | .. automodule:: imc.graphics
71 |    :members:
72 | ```
73 | 
74 | ## imc.utils
75 | ```{eval-rst}
76 | .. automodule:: imc.utils
77 |    :members:
78 | ```
79 | 
80 | ## imc.types
81 | ```{eval-rst}
82 | .. automodule:: imc.types
83 |    :members:
84 | ```


--------------------------------------------------------------------------------
/docs/source/changelog.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | All notable changes to this project will be documented in this file.
  4 | 
  5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
  6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
  7 | 
  8 | ## [Unreleased]
  9 | ### Added
 10 | - 
 11 | ### Changed
 12 | - 
 13 | ### Removed
 14 | -
 15 | 
 16 | ## [0.0.12] - 2021-07-19
 17 | ### Added
 18 | - functions to handle multi-cell masks (topological domains)
 19 | - napari + napari_imc to view MCD files
 20 | ### Changed
 21 | - fix support of OSX in ilastik segmentation
 22 | - centralized package data under `.imc`
 23 | 
 24 | ## [0.0.11] - 2021-07-01
 25 | ### Added
 26 | - Command `imc process`.
 27 | 
 28 | ## [0.0.10] - 2021-07-01
 29 | ### Added
 30 | - CI on Github actions
 31 | - add more CLI commands
 32 | ### Changed
 33 | - centralized package data under `.imc`
 34 | - fix packaging
 35 | 
 36 | ## [0.0.8] - 2021-06-01
 37 | ### Added
 38 | - add `.pyproject.toml`
 39 | - support subcellular mask quantification
 40 | ### Changed
 41 | - rasterized linecollection plots by default
 42 | 
 43 | ## [0.0.7] - 2021-04-26
 44 | ### Added
 45 | - initial support subcellular mask quantification
 46 | - DeepCell postprocessing to match nuclear and cellular masks
 47 | - function to plot and extract panorama images matching ROIs
 48 | - Cellpose as segmentation method
 49 | - add CLI command for segmentation
 50 | ### Changed
 51 | - rasterized linecollection plots by default
 52 | 
 53 | ## [0.0.6] - 2020-12-16
 54 | ### Added
 55 | - segmentation module
 56 | - mask layers to support alternative segmentations
 57 | ### Changed
 58 | - rasterized linecollection plots by default
 59 | ### Removed
 60 | -
 61 | - graphics code that was abstracted to `seaborn_extensions` module
 62 | 
 63 | ## [0.0.5] - 2020-12-07
 64 | ### Added
 65 | - segmentation module
 66 | - mask layers to support alternative segmentations
 67 | ### Changed
 68 | - export panoramas by default
 69 | - support ome-tiff
 70 | - upgrade to `imctools==2.1.0`
 71 | 
 72 | ## [0.0.4] - 2020-10-07
 73 | 
 74 | 
 75 | ## [0.0.3] - 2020-06-17
 76 | ### Changed
 77 | - Patch `pathlib.Path` to support path building with `+` (operator overload)
 78 | 
 79 | ## [0.0.2] - 2020-06-15
 80 | ### Added
 81 | - Many features
 82 | 
 83 | 
 84 | ## [0.0.1] - 2020-04-14
 85 | ### Added
 86 | - Project, Sample and ROI modules/objects
 87 | 
 88 | [Unreleased]: https://github.com/ElementoLab/imc/compare/0.0.2...HEAD
 89 | [0.0.11]: https://github.com/ElementoLab/imc/compare/0.0.10...v0.0.11
 90 | [0.0.10]: https://github.com/ElementoLab/imc/compare/0.0.9...v0.0.10
 91 | [0.0.9]: https://github.com/ElementoLab/imc/compare/0.0.8...v0.0.9
 92 | [0.0.8]: https://github.com/ElementoLab/imc/compare/0.0.7...v0.0.8
 93 | [0.0.7]: https://github.com/ElementoLab/imc/compare/0.0.6...v0.0.7
 94 | [0.0.6]: https://github.com/ElementoLab/imc/compare/0.0.5...v0.0.6
 95 | [0.0.5]: https://github.com/ElementoLab/imc/compare/0.0.4...v0.0.5
 96 | [0.0.4]: https://github.com/ElementoLab/imc/compare/0.0.3...v0.0.4
 97 | [0.0.3]: https://github.com/ElementoLab/imc/compare/0.0.2...v0.0.3
 98 | [0.0.2]: https://github.com/ElementoLab/imc/compare/0.0.1...v0.0.2
 99 | [0.0.1]: https://github.com/ElementoLab/imc/releases/tag/v0.0.1
100 | 


--------------------------------------------------------------------------------
/docs/source/concepts.md:
--------------------------------------------------------------------------------
1 | # Concepts
2 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | import sphinx_rtd_theme
  5 | 
  6 | # If extensions (or modules to document with autodoc) are in another directory,
  7 | # add these directories to sys.path here. If the directory is relative to the
  8 | # documentation root, use os.path.abspath to make it absolute, like shown here.
  9 | sys.path.insert(0, os.path.abspath("../../"))
 10 | 
 11 | 
 12 | # Configuration file for the Sphinx documentation builder.
 13 | #
 14 | # This file only contains a selection of the most common options. For a full
 15 | # list see the documentation:
 16 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 17 | 
 18 | # -- Path setup --------------------------------------------------------------
 19 | 
 20 | # If extensions (or modules to document with autodoc) are in another directory,
 21 | # add these directories to sys.path here. If the directory is relative to the
 22 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 23 | #
 24 | # import os
 25 | # import sys
 26 | # sys.path.insert(0, os.path.abspath('.'))
 27 | 
 28 | 
 29 | # -- Project information -----------------------------------------------------
 30 | 
 31 | project = "imc"
 32 | copyright = "2021, Andre Rendeiro"
 33 | author = "Andre Rendeiro"
 34 | 
 35 | 
 36 | # -- General configuration ---------------------------------------------------
 37 | 
 38 | # Add any Sphinx extension module names here, as strings. They can be
 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 40 | # ones.
 41 | extensions = [
 42 |     "sphinx.ext.autodoc",
 43 |     "sphinx.ext.autosummary",
 44 |     "sphinx.ext.intersphinx",
 45 |     "sphinx.ext.todo",
 46 |     "sphinx.ext.coverage",
 47 |     "sphinx.ext.viewcode",
 48 |     # "numpydoc",  # numpy-style docs
 49 |     "sphinx.ext.napoleon",  # numpy-style docs
 50 |     "sphinx_issues",
 51 |     "myst_parser",  # to use markdown
 52 |     "sphinxarg.ext",  # for CLI parsing of arguments
 53 |     "sphinx_autodoc_typehints"  #  <- this would be handy when whole codebase has typehinting
 54 |     # "sphinxcontrib.jupyter", <- this could be useful to make jupyter NBs
 55 | ]
 56 | autodoc_typehints = "signature"  # https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#confval-autodoc_typehints
 57 | 
 58 | # Add any paths that contain templates here, relative to this directory.
 59 | templates_path = ["_templates"]
 60 | 
 61 | # List of patterns, relative to source directory, that match files and
 62 | # directories to ignore when looking for source files.
 63 | # This pattern also affects html_static_path and html_extra_path.
 64 | exclude_patterns = []
 65 | 
 66 | 
 67 | # -- Options for type of input -----------------------------------------------
 68 | source_suffix = {
 69 |     ".rst": "restructuredtext",
 70 |     ".txt": "markdown",
 71 |     ".md": "markdown",
 72 | }
 73 | 
 74 | # -- Options for HTML output -------------------------------------------------
 75 | 
 76 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 77 | # a list of builtin themes.
 78 | 
 79 | # html_theme = "alabaster"
 80 | html_theme = "sphinx_rtd_theme"
 81 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 82 | # html_theme = "sphinx_material"
 83 | # html_theme_options = {
 84 | #     "color_primary": "#ff4500",
 85 | # }
 86 | 
 87 | # Add any paths that contain custom static files (such as style sheets) here,
 88 | # relative to this directory. They are copied after the builtin static files,
 89 | # so a file named "default.css" will overwrite the builtin "default.css".
 90 | html_static_path = ["_static"]
 91 | 
 92 | issues_github_path = "ElementoLab/imc"
 93 | 
 94 | napoleon_numpy_docstring = True
 95 | napoleon_google_docstring = False
 96 | napoleon_use_param = False
 97 | napoleon_use_ivar = True
 98 | 
 99 | # Example configuration for intersphinx: refer to the Python standard library.
100 | intersphinx_mapping = {
101 |     "python": ("http://docs.python.org/3", None),
102 |     "urllib3": ("http://urllib3.readthedocs.org/en/latest", None),
103 |     "numpy": ("http://docs.scipy.org/doc/numpy/", None),
104 |     "scipy": ("https://docs.scipy.org/doc/scipy-1.3.0/reference/", None),
105 |     "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None),
106 | }
107 | 


--------------------------------------------------------------------------------
/docs/source/examples.md:
--------------------------------------------------------------------------------
1 | # Examples
2 | 


--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
 1 | # Welcome
 2 | 
 3 | `imc` is a Python library for the analysis of imaging mass cytometry data.
 4 | 
 5 | Head to the [installation](/install) to see installation instructions, to
 6 | [usage](/usage) for quick use, or have a look at the catalogue of available
 7 | functions in the [API](/api).
 8 | 
 9 | 
10 | ```{admonition} imc is still in development!
11 | This means things may change in the future, use at your own risk.
12 | ```
13 | 
14 | ## Contents
15 | 
16 | ```{toctree}
17 | ---
18 | maxdepth: 1
19 | ---
20 | install.md
21 | usage.md
22 | examples.md
23 | concepts.md
24 | log_config.md
25 | api.md
26 | testing.md
27 | changelog.md
28 | ```
29 | 
30 | ## Links
31 | 
32 | - Documentation: [http://imc.readthedocs.io/](http://imc.readthedocs.io/)
33 | - Issues and source code: [https://github.com/ElementoLab/imc](https://github.com/ElementoLab/imc)
34 | 


--------------------------------------------------------------------------------
/docs/source/install.md:
--------------------------------------------------------------------------------
1 | # Install
2 | 


--------------------------------------------------------------------------------
/docs/source/log_config.md:
--------------------------------------------------------------------------------
1 | # Logging and configuration
2 | 


--------------------------------------------------------------------------------
/docs/source/testing.md:
--------------------------------------------------------------------------------
1 | # Testing
2 | 


--------------------------------------------------------------------------------
/docs/source/usage.md:
--------------------------------------------------------------------------------
1 | # Usage
2 | 


--------------------------------------------------------------------------------
/imc/__init__.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | # fix the type annotatiton of not yet undefined classes
 4 | from __future__ import annotations
 5 | import os
 6 | import sys
 7 | import logging
 8 | from functools import partialmethod
 9 | from pathlib import Path as _Path
10 | 
11 | from outdated import warn_if_outdated
12 | from joblib import Memory
13 | import matplotlib
14 | import matplotlib.pyplot as plt
15 | import seaborn as _sns
16 | 
17 | try:
18 |     # Even though there is no "imc/_version" file,
19 |     # it should be generated by
20 |     # setuptools_scm when building the package
21 |     from imc._version import version
22 | 
23 |     __version__ = version
24 | except ImportError:
25 |     from setuptools_scm import get_version as _get_version
26 | 
27 |     version = __version__ = _get_version(root="..", relative_to=__file__)
28 | 
29 | 
30 | warn_if_outdated("imc", __version__)
31 | 
32 | plt.rcParams["svg.fonttype"] = "none"
33 | plt.rcParams["font.family"] = "Arial"
34 | plt.rcParams["font.sans-serif"] = ["Arial"]
35 | plt.rcParams["text.usetex"] = False
36 | 
37 | import scanpy as _sc
38 | 
39 | _sc.settings.n_jobs = -1
40 | 
41 | 
42 | def setup_logger(name: str = "imc", level: int = logging.INFO) -> logging.Logger:
43 |     """Setup the logger for the package."""
44 |     logger = logging.getLogger(name)
45 |     logger.setLevel(level)
46 | 
47 |     handler = logging.StreamHandler(sys.stdout)
48 |     handler.setLevel(level)
49 |     formatter = logging.Formatter("%(asctime)s - %(message)s")
50 |     handler.setFormatter(formatter)
51 |     logger.addHandler(handler)
52 |     return logger
53 | 
54 | 
55 | LOGGER = setup_logger()
56 | 
57 | # Setup joblib memory
58 | _Path.mkdir = partialmethod(_Path.mkdir, exist_ok=True, parents=True)
59 | JOBLIB_CACHE_DIR = _Path("~/.imc").expanduser()
60 | JOBLIB_CACHE_DIR.mkdir()
61 | MEMORY = Memory(location=JOBLIB_CACHE_DIR, verbose=0)
62 | 
63 | # Decorate seaborn clustermap
64 | # _sns.clustermap = colorbar_decorator(_sns.clustermap)
65 | 
66 | 
67 | from imc.data_models.project import Project
68 | from imc.data_models.sample import IMCSample
69 | from imc.data_models.roi import ROI
70 | 


--------------------------------------------------------------------------------
/imc/cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | Inspect MCD files, reporting on their basic statistics, saving
 5 | metadata as YAML files, and panel information as CSV files.
 6 | """
 7 | 
 8 | import sys
 9 | import argparse
10 | from argparse import RawTextHelpFormatter
11 | import typing as tp
12 | 
13 | from imc._version import version
14 | from imc.scripts.process import main as process
15 | from imc.scripts.inspect_mcds import main as inspect
16 | from imc.scripts.prepare import main as prepare
17 | from imc.scripts.predict import main as predict
18 | from imc.scripts.segment_stacks import main as segment
19 | from imc.scripts.quantify import main as quantify
20 | from imc.scripts.phenotype import main as phenotype
21 | from imc.scripts.illustrate import main as illustrate
22 | from imc.scripts.view import main as view
23 | 
24 | cli_config: tp.Dict[str, tp.Any]
25 | from imc.scripts import cli_config
26 | 
27 | 
28 | def main(cli: tp.Sequence[str] = None) -> int:
29 |     parser = get_args()
30 |     parser.add_argument("-v", "--version", action="version", version=version)
31 |     main_args, cmd_args = parser.parse_known_args(cli)
32 | 
33 |     if main_args.command not in cli_config["subcommands"]:
34 |         raise ValueError(f"Command '{main_args.command}' not known!")
35 |     return eval(main_args.command)(cmd_args)
36 | 
37 | 
38 | def get_args() -> argparse.ArgumentParser:
39 |     parser = argparse.ArgumentParser(**cli_config["main"], formatter_class=RawTextHelpFormatter)  # type: ignore[index]
40 | 
41 |     subparsers = parser.add_subparsers(dest="command", required=True)
42 | 
43 |     for cmd in cli_config["subcommands"]:
44 |         subparsers.add_parser(cmd, add_help=False)
45 |     return parser
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     try:
50 |         sys.exit(main())
51 |     except KeyboardInterrupt:
52 |         sys.exit(1)
53 | 


--------------------------------------------------------------------------------
/imc/data_models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ElementoLab/imc/9725b3ab72f2273cb4a702964fa8518c2f189e9c/imc/data_models/__init__.py


--------------------------------------------------------------------------------
/imc/defaults.py:
--------------------------------------------------------------------------------
 1 | from imc.types import Path
 2 | 
 3 | # project
 4 | DEFAULT_PROJECT_NAME = "project"
 5 | DEFAULT_SAMPLE_NAME_ATTRIBUTE = "sample_name"
 6 | DEFAULT_SAMPLE_GROUPING_ATTRIBUTEs = [DEFAULT_SAMPLE_NAME_ATTRIBUTE]
 7 | DEFAULT_TOGGLE_ATTRIBUTE = "toggle"
 8 | DEFAULT_PROCESSED_DIR_NAME = Path("processed")
 9 | DEFAULT_RESULTS_DIR_NAME = Path("results")
10 | DEFAULT_PRJ_SINGLE_CELL_DIR = Path("single_cell")
11 | DEFAULT_ROI_NAME_ATTRIBUTE = "roi_name"
12 | DEFAULT_ROI_NUMBER_ATTRIBUTE = "roi_number"
13 | 
14 | # # processed directory structure
15 | SUBFOLDERS_PER_SAMPLE = True
16 | ROI_STACKS_DIR = Path("tiffs")
17 | ROI_MASKS_DIR = Path("tiffs")
18 | ROI_UNCERTAINTY_DIR = Path("uncertainty")
19 | ROI_SINGLE_CELL_DIR = Path("single_cell")
20 | 
21 | # sample
22 | DEFAULT_SAMPLE_NAME = "sample"
23 | DEFAULT_ROI_NAME_ATTRIBUTE = "roi_name"
24 | DEFAULT_ROI_NUMBER_ATTRIBUTE = "roi_number"
25 | DEFAULT_TOGGLE_ATTRIBUTE = "toggle"
26 | 
27 | # roi
28 | SUBFOLDERS_PER_SAMPLE = True
29 | DEFAULT_ROI_NAME = "roi"
30 | ROI_STACKS_DIR = Path("tiffs")
31 | ROI_MASKS_DIR = Path("tiffs")
32 | ROI_UNCERTAINTY_DIR = Path("uncertainty")
33 | ROI_SINGLE_CELL_DIR = Path("single_cell")
34 | 
35 | # graphics
36 | FIG_KWS = dict(dpi=300, bbox_inches="tight")
37 | 


--------------------------------------------------------------------------------
/imc/demo/__init__.py:
--------------------------------------------------------------------------------
1 | from .generate_data import generate_project
2 | from .get_demo_data import DATASETS as _DATASETS, get_dataset
3 | 
4 | datasets = list(_DATASETS.keys())
5 | 


--------------------------------------------------------------------------------
/imc/demo/generate_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from typing import Tuple, List, Dict, Union
  4 | import tempfile
  5 | 
  6 | import numpy as np
  7 | import scipy.ndimage as ndi
  8 | import matplotlib.pyplot as plt
  9 | import tifffile
 10 | import pandas as pd
 11 | import skimage
 12 | 
 13 | from imc import Project
 14 | from imc.types import Array, Figure, Path
 15 | from imc.utils import filter_kwargs_by_callable as filter_kws
 16 | 
 17 | 
 18 | def generate_mask(
 19 |     shape: Tuple[int, int] = (8, 8),
 20 |     seeding_density: float = 0.1,
 21 |     # widths: int = None,
 22 |     # connectivity: float = None
 23 | ) -> Array:
 24 |     mask = np.zeros(shape, dtype=bool)
 25 |     # Cells are placed in an effective mask area which is not touching borders
 26 |     eff_mask = mask[1:-1, 1:-1]
 27 |     centroids = np.random.choice(
 28 |         np.arange(eff_mask.size),
 29 |         int(np.ceil(eff_mask.size * seeding_density)),
 30 |         replace=False,
 31 |     )
 32 |     eff_mask.flat[centroids] = True  # type: ignore
 33 |     mask[1:-1, 1:-1] = eff_mask
 34 |     return ndi.label(mask, structure=np.zeros((3, 3)))[0]
 35 | 
 36 | 
 37 | def generate_disk_masks(
 38 |     shape: Tuple[int, int] = (128, 128),
 39 |     seeding_density: float = 0.1,
 40 |     disk_diameter: int = 10,
 41 | ):
 42 |     mask = np.zeros(shape, dtype=bool)
 43 | 
 44 |     area = np.multiply(*mask.shape)
 45 |     n = int(np.ceil(mask.size * seeding_density) * (disk_diameter**2 / area))
 46 |     centroids = np.random.choice(np.arange(mask.size), n, replace=False)
 47 | 
 48 |     r = disk_diameter // 2
 49 |     disk = skimage.morphology.disk(r)
 50 |     x = centroids // shape[0]
 51 |     y = centroids % shape[1]
 52 |     for i in range(n):
 53 |         s = mask[x[i] - r : x[i] + r + 1, y[i] - r : y[i] + r + 1].shape
 54 |         mask[x[i] - r : x[i] + r + 1, y[i] - r : y[i] + r + 1] = disk[: s[0], : s[1]]
 55 |     return ndi.label(mask)[0]
 56 | 
 57 | 
 58 | def generate_stack(
 59 |     mask: Array,
 60 |     n_channels: int = 3,
 61 |     channel_coeffs: Array = None,
 62 |     channel_std: Array = None,
 63 |     n_cell_types: int = 2,
 64 |     cell_type_coeffs: Array = None,
 65 |     cell_type_std: Array = None,
 66 | ) -> Array:
 67 |     # partition cells into cell types
 68 |     n_cells = (mask > 0).sum()
 69 |     cells = np.arange(mask.size)[mask.flat > 0]
 70 |     assigned_cells = np.array([], dtype=int)
 71 |     ct_cells = dict()
 72 |     for i in range(n_cell_types):
 73 |         available_cells = [c for c in cells if c not in assigned_cells]
 74 |         ct_cells[i] = np.random.choice(
 75 |             available_cells,
 76 |             int(np.floor(n_cells / n_cell_types)),
 77 |             replace=False,
 78 |         )
 79 |         assigned_cells = np.append(assigned_cells, ct_cells[i])
 80 |     ct_cells[i] = np.append(ct_cells[i], cells[~np.isin(cells, assigned_cells)])
 81 |     assert sum([len(x) for x in ct_cells.values()]) == n_cells
 82 | 
 83 |     # assign intensity values
 84 |     stack = np.zeros((n_channels,) + mask.shape, dtype=float)
 85 |     std_sd = 0.1
 86 |     if channel_coeffs is None:
 87 |         channel_coeffs = np.random.choice(np.linspace(-5, 5), n_channels)
 88 |     if channel_std is None:
 89 |         channel_std = np.abs(channel_coeffs) * std_sd
 90 |     if cell_type_coeffs is None:
 91 |         cell_type_coeffs = np.random.choice(np.linspace(-5, 5), n_cell_types)
 92 |     if cell_type_std is None:
 93 |         cell_type_std = np.abs(cell_type_coeffs) * std_sd
 94 |     # means = intercept + np.dot(
 95 |     means = np.dot(
 96 |         channel_coeffs.reshape((-1, n_channels)).T,
 97 |         cell_type_coeffs.reshape((-1, n_cell_types)),
 98 |     )
 99 |     intercept = np.abs(means.min()) * 2
100 |     means += intercept
101 |     stds = channel_std.reshape((-1, n_channels)).T + cell_type_std.reshape(
102 |         (-1, n_cell_types)
103 |     )
104 | 
105 |     for cell_type in range(n_cell_types):
106 |         n = ct_cells[i].size
107 |         for channel in range(n_channels):
108 |             stack[channel].flat[ct_cells[cell_type]] = np.random.normal(
109 |                 means[channel, cell_type], stds[channel, cell_type], n
110 |             )
111 | 
112 |     # make sure array is non-negative
113 |     if stack.min() < 0:
114 |         stack[stack == 0] = stack.min()
115 |         stack += abs(stack.min())
116 |     return stack
117 | 
118 | 
119 | def write_tiff(array: Array, output_file: Path) -> None:
120 |     fr = tifffile.TiffWriter(output_file)
121 |     fr.write(array)
122 |     fr.close()
123 | 
124 | 
125 | def write_roi_to_disk(mask: Array, stack: Array, output_prefix: Path) -> None:
126 |     # mask
127 |     write_tiff(mask, output_prefix + "_full_mask.tiff")
128 |     # stack
129 |     write_tiff(stack, output_prefix + "_full.tiff")
130 |     # channel_labels
131 |     labels = [str(c).zfill(2) for c in range(1, stack.shape[0] + 1)]
132 |     channel_labels = pd.Series([f"Ch{c}(Ch{c})" for c in labels], name="channel")
133 |     channel_labels.to_csv(output_prefix + "_full.csv")
134 | 
135 | 
136 | def visualize_roi(mask: Array, stack: Array) -> Figure:
137 |     fig, axes = plt.subplots(1, 5, figsize=(4 * 5, 4))
138 |     axes[0].set_title("Mask")
139 |     axes[0].imshow(mask, cmap="binary_r")
140 |     axes[1].set_title("RGB signal")
141 |     axes[1].imshow(np.moveaxis(stack, 0, -1) / stack.max())
142 |     for i, (ax, cmap) in enumerate(zip(axes[2:], ["Reds", "Greens", "Blues"])):
143 |         ax.set_title(f"Channel {i}")
144 |         ax.imshow(stack[i] / stack.max(), cmap=cmap)
145 |     return fig
146 | 
147 | 
148 | def generate_project(
149 |     name: str = None,
150 |     n_samples: int = 3,
151 |     rois_per_sample: int = 3,
152 |     root_dir: Path = None,
153 |     sample_names: List[str] = None,
154 |     return_object: bool = True,
155 |     visualize: bool = False,
156 |     **kwargs,
157 | ) -> Union[Project, Path]:
158 |     if name is None:
159 |         name = "test_project"
160 |     if root_dir is None:
161 |         root_dir = Path(tempfile.mkdtemp())
162 |     else:
163 |         root_dir = Path(root_dir)
164 |     root_dir.mkdir(exist_ok=True)
165 |     meta_dir = root_dir / "metadata"
166 |     meta_dir.mkdir(exist_ok=True)
167 |     processed_dir = root_dir / "processed"
168 |     processed_dir.mkdir(exist_ok=True)
169 | 
170 |     if sample_names is None:
171 |         sample_names = ["test_sample_" + str(i).zfill(2) for i in range(1, n_samples + 1)]
172 |     _meta: Dict[str, Dict[str, Union[str, int]]] = dict()
173 |     for sample in sample_names:
174 |         tiffs_dir = processed_dir / sample / "tiffs"
175 |         tiffs_dir.mkdir(exist_ok=True, parents=True)
176 |         for roi in range(1, rois_per_sample + 1):
177 |             roi_name = f"{sample}-{str(roi).zfill(2)}"
178 |             output_prefix = tiffs_dir / roi_name
179 |             mask = generate_mask(**filter_kws(kwargs, generate_mask))
180 |             stack = generate_stack(mask, **filter_kws(kwargs, generate_stack))
181 |             if visualize:
182 |                 visualize_roi(mask, stack)
183 |             write_roi_to_disk(mask, stack, output_prefix)
184 |             _meta[roi_name] = {"roi_number": roi, "sample_name": sample}
185 | 
186 |     # write metadata
187 |     meta = pd.DataFrame(_meta).T
188 |     meta.index.name = "roi_name"
189 |     meta.to_csv(meta_dir / "samples.csv")
190 |     return (
191 |         Project(
192 |             metadata=meta_dir / "samples.csv",
193 |             processed_dir=processed_dir,
194 |             results_dir=processed_dir.parent / "results",
195 |         )
196 |         if return_object
197 |         else root_dir
198 |     )
199 | 


--------------------------------------------------------------------------------
/imc/demo/get_demo_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import typing as tp
  4 | import shutil
  5 | import urllib.request as request
  6 | from contextlib import closing
  7 | import tarfile
  8 | import tempfile
  9 | import zipfile
 10 | import re
 11 | 
 12 | import requests
 13 | from urlpath import URL
 14 | import tifffile
 15 | import numpy as np
 16 | import pandas as pd
 17 | 
 18 | from imc.types import Path
 19 | from imc import Project
 20 | 
 21 | 
 22 | DATASET_DB_PATH = Path("~").expanduser() / ".imc" / "demo_datasets"
 23 | DATASETS = {
 24 |     "jackson_2019_short": "https://wcm.box.com/shared/static/eq1m5j972cf3b5jqoe2vdju3bg9e0r5n",
 25 |     "jackson_2019_short_joint": "https://wcm.box.com/shared/static/b8nxku3ywvenghxvvm4wki9znxwbenzb",
 26 |     "schwabenland_2021_full": "https://zenodo.org/record/5018260/files/COVID19_brain_all_patients_singletiffs_and_cellmasks.zip?download=1",
 27 | }
 28 | 
 29 | 
 30 | def _download_file(url: str, output_path: Path, chunk_size=1024) -> None:
 31 |     """
 32 |     Download a file and write to disk in chunks (not in memory).
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     url : :obj:`str`
 37 |         URL to download from.
 38 |     output_path : :obj:`str`
 39 |         Path to file as output.
 40 |     chunk_size : :obj:`int`
 41 |         Size in bytes of chunk to write to disk at a time.
 42 |     """
 43 |     if url.startswith("ftp://"):
 44 |         with closing(request.urlopen(url)) as r:
 45 |             with open(output_path, "wb") as f:
 46 |                 shutil.copyfileobj(r, f)
 47 |     else:
 48 |         response = requests.get(url, stream=True)
 49 |         with open(output_path, "wb") as outfile:
 50 |             outfile.writelines(response.iter_content(chunk_size=chunk_size))
 51 | 
 52 | 
 53 | def _decompress_tar_file(path: Path, output_root: Path = None) -> None:
 54 |     """Decompress a tar.xz file."""
 55 |     with tarfile.open(path) as f:
 56 |         f.extractall(path.parent if output_root is None else output_root)
 57 | 
 58 | 
 59 | def get_dataset(dataset_name: str, output_dir: Path = None) -> Project:
 60 |     DATASET_DB_PATH.mkdir()
 61 | 
 62 |     if dataset_name == "schwabenland_2021":
 63 |         return get_schwabenland_2021_data(output_dir)
 64 |     dataset_file = DATASET_DB_PATH / dataset_name + ".tar.gz"
 65 | 
 66 |     if output_dir is None:
 67 |         output_dir = Path(tempfile.TemporaryDirectory().name)
 68 | 
 69 |     if not dataset_file.exists():
 70 |         _download_file(DATASETS[dataset_name], dataset_file)
 71 |     _decompress_tar_file(dataset_file, output_dir)
 72 |     return Project(
 73 |         name=dataset_name,
 74 |         processed_dir=output_dir / dataset_name / "processed",
 75 |         subfolder_per_sample="joint" not in dataset_name,
 76 |     )
 77 | 
 78 | 
 79 | def get_schwabenland_2021_data(output_dir: Path = None) -> Project:
 80 |     dataset_name = "schwabenland_2021"
 81 |     zip_file_url = (
 82 |         "https://zenodo.org/record/5018260/files/"
 83 |         "COVID19_brain_all_patients_singletiffs_and_cellmasks.zip"
 84 |         "?download=1"
 85 |     )
 86 | 
 87 |     if output_dir is None:
 88 |         output_dir = Path(tempfile.TemporaryDirectory().name).mkdir()
 89 | 
 90 |     zip_file = output_dir / dataset_name + "_imc_data.zip"
 91 | 
 92 |     if not zip_file.exists():
 93 |         _download_file(zip_file_url, zip_file)
 94 |     with zipfile.ZipFile(zip_file) as zf:
 95 |         zf.extractall(output_dir)
 96 |     zip_file.unlink()
 97 | 
 98 |     for dir_ in filter(lambda x: x.is_dir(), output_dir.iterdir()):
 99 |         name = dir_.name
100 |         _stack = list()
101 |         _channel_names = list()
102 |         for file in dir_.iterdir():
103 |             if "_mask.tiff" in file.as_posix():
104 |                 mask = tifffile.imread(file)
105 |                 continue
106 |             _stack.append(tifffile.imread(file))
107 |             _channel_names.append(file.stem)
108 |         stack = np.asarray(_stack)
109 |         channel_names = pd.Series(_channel_names)
110 |         annotation = (
111 |             channel_names.str.split("_")
112 |             .apply(pd.Series)
113 |             .set_index(channel_names)
114 |             .rename(columns={0: "marker", 1: "metal"})
115 |         )
116 |         annotation["mass"] = annotation["metal"].str.extract(r"(\d+)")[0].astype(int)
117 |         stack = stack[annotation["mass"].rank().astype(int) - 1]
118 |         annotation = annotation.sort_values("mass")
119 |         annotation.index = annotation.index.str.replace("_", "(") + ")"
120 |         labels = annotation.index.to_series().reset_index(drop=True).rename("channel")
121 | 
122 |         if "ROI" not in name:
123 |             roi_number = "1"
124 |         else:
125 |             roi_number = re.findall(r"_ROI(\d)_", name)[0]
126 |             name = re.sub(r"_ROI(\d)", "", name)
127 | 
128 |         od = (output_dir / "processed" / name / "tiffs").mkdir()
129 |         output_prefix = od / name + f"-{roi_number}_full"
130 |         tifffile.imwrite(output_prefix + ".tiff", stack)
131 |         tifffile.imwrite(output_prefix + "_mask.tiff", mask)
132 |         labels.to_csv(output_prefix + ".csv")
133 | 
134 |         shutil.rmtree(dir_)
135 | 
136 |     return Project(name=dataset_name, processed_dir=output_dir / "processed")
137 | 
138 | 
139 | def get_phillips_2021(output_dir: Path = None) -> Project:
140 |     """
141 |     doi:10.3389/fimmu.2021.687673
142 |     """
143 |     if output_dir is None:
144 |         output_dir = Path(tempfile.TemporaryDirectory().name).mkdir()
145 | 
146 |     (output_dir / "processed").mkdir()
147 | 
148 |     dataset_name = "phillips_2021"
149 |     base_url = URL("https://immunoatlas.org")
150 |     group_id = "NOLN"
151 |     project_id = "210614-2"
152 |     cases = [f"NOLN2100{i}" for i in range(2, 10)]
153 |     rois = ["A01"]
154 |     markers = [
155 |         "DNA (Hoechst)",
156 |         "T-bet",
157 |         "GATA3",
158 |         "FoxP3",
159 |         "CD56",
160 |         "TCR-γ/δ",
161 |         "Tim-3",
162 |         "CD30",
163 |         "CCR6",
164 |         "PD-L1",
165 |         "TCR-β",
166 |         "CD4",
167 |         "CD2",
168 |         "CD5",
169 |         "Ki-67",
170 |         "CD25",
171 |         "CD134",
172 |         "α-SMA",
173 |         "CD20",
174 |         "LAG3",
175 |         "MUC-1/EMA",
176 |         "CD11c",
177 |         "PD-1",
178 |         "Vimentin",
179 |         "CD16",
180 |         "IDO-1",
181 |         "CD15",
182 |         "EGFR",
183 |         "VISTA",
184 |         "Granzyme B",
185 |         "CD206",
186 |         "ICOS",
187 |         "CD69",
188 |         "CD45RA",
189 |         "CD57",
190 |         "CD3",
191 |         "HLA-DR",
192 |         "CD8",
193 |         "BCL-2",
194 |         "β-catenin",
195 |         "CD7",
196 |         "CD1a",
197 |         "CD45RO",
198 |         "CCR4/CD194",
199 |         "CD163",
200 |         "CD11b",
201 |         "CD34",
202 |         "Cytokeratin",
203 |         "CD38",
204 |         "CD68",
205 |         "CD31",
206 |         "Collagen IV",
207 |         "CD138",
208 |         "Podoplanin",
209 |         "CD45",
210 |         "MMP-9",
211 |         "MCT",
212 |         "CLA/CD162",
213 |         "DNA (DRAQ5)",
214 |     ]
215 | 
216 |     for case in cases:
217 |         for roi in rois:
218 |             print(case, roi)
219 |             url = base_url / group_id / project_id / case / roi / f"{case}_{roi}.tif"
220 |             roi = roi.replace("A", "")
221 |             od = (output_dir / "processed" / case / "tiffs").mkdir()
222 |             f = od / f"{case}-{roi}_full.tiff"
223 |             if f.exists():
224 |                 continue
225 |             # Somehow the _download_file failed a few times
226 |             _download_file(url.as_posix(), f)
227 |             # resp = url.get()
228 |             # with open(f, "wb") as handle:
229 |             #     handle.write(resp.content)
230 |             pd.Series(markers, name="channel").to_csv(f.replace_(".tiff", ".csv"))
231 | 
232 |     return Project(name=dataset_name, processed_dir=output_dir / "processed")
233 | 
234 | 
235 | def get_allam_2021_data(output_dir: Path = None) -> Project:
236 |     if output_dir is None:
237 |         output_dir = Path(tempfile.TemporaryDirectory().name).mkdir()
238 | 
239 |     base_url = URL("https://raw.githubusercontent.com/coskunlab/SpatialViz/main/data")
240 |     samples = [
241 |         y[0] + str(y[1]) for code in ["DT", "NT"] for y in zip([code] * 6, range(1, 7))
242 |     ]
243 |     markers = [
244 |         "CD20",
245 |         "CD3",
246 |         "CD4",
247 |         "CD45RO",
248 |         "CD68",
249 |         "CD8a",
250 |         "Col1",
251 |         "DNA1",
252 |         "DNA2",
253 |         "Ecadherin",
254 |         "FoxP3",
255 |         "GranzymeB",
256 |         "Histone3",
257 |         "Ki67",
258 |         "PD1",
259 |         "PDL1",
260 |         "Pankeratin",
261 |         "SMA",
262 |         "Vimentin",
263 |     ]
264 | 
265 |     for sample in samples:
266 |         mask_url = base_url / "cell_masks" / f"{sample}_cell_Mask.tiff"
267 |         for marker in markers:
268 |             channel_url = base_url / "raw" / sample / f"{sample}_{marker}.tiff"
269 | 


--------------------------------------------------------------------------------
/imc/exceptions.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from imc.types import GenericType
 3 | 
 4 | 
 5 | class AttributeNotSetError(Exception):
 6 |     pass
 7 | 
 8 | 
 9 | def cast(arg: Optional[GenericType]) -> GenericType:
10 |     """Remove `Optional` from `T`."""
11 |     if arg is None:
12 |         raise AttributeNotSetError("Attribute cannot be None!")
13 |     return arg
14 | 


--------------------------------------------------------------------------------
/imc/interactive_volume_viewer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | An example program to display a volumetric image from the command line.
  5 | """
  6 | 
  7 | import sys
  8 | import typing as tp
  9 | from urlpath import URL
 10 | from functools import partial
 11 | 
 12 | import imageio
 13 | import numpy as np
 14 | import matplotlib.pyplot as plt
 15 | from tqdm import tqdm
 16 | 
 17 | from imc.types import Array, Axis, Figure, Path  # https://github.com/ElementoLab/imc
 18 | 
 19 | 
 20 | def multi_slice_viewer(
 21 |     volume: Array, up_key: str = "w", down_key: str = "s", **kwargs
 22 | ) -> Figure:
 23 |     remove_keymap_conflicts({up_key, down_key})
 24 |     print(f"Press '{up_key}' and '{down_key}' for scrolling through image channels.")
 25 | 
 26 |     fig, ax = plt.subplots()
 27 |     ax.volume = volume
 28 |     ax.index = volume.shape[0] // 2
 29 |     ax.imshow(volume[ax.index], **kwargs)
 30 |     fig.canvas.mpl_connect(
 31 |         "key_press_event", partial(process_key, up_key=up_key, down_key=down_key)
 32 |     )
 33 |     return fig
 34 | 
 35 | 
 36 | def remove_keymap_conflicts(new_keys_set: tp.Set) -> None:
 37 |     for prop in plt.rcParams:
 38 |         if prop.startswith("keymap."):
 39 |             keys = plt.rcParams[prop]
 40 |             remove_list = set(keys) & new_keys_set
 41 |             for key in remove_list:
 42 |                 keys.remove(key)
 43 | 
 44 | 
 45 | def process_key(event, up_key: str = "w", down_key: str = "s") -> None:
 46 |     fig = event.canvas.figure
 47 |     ax = fig.axes[0]
 48 |     if event.key == up_key:
 49 |         previous_slice(ax)
 50 |     elif event.key == down_key:
 51 |         next_slice(ax)
 52 |     fig.canvas.draw()
 53 | 
 54 | 
 55 | def previous_slice(ax: Axis) -> None:
 56 |     """Go to the previous slice."""
 57 |     volume = ax.volume
 58 |     ax.index = (ax.index - 1) % volume.shape[0]  # wrap around using %
 59 |     ax.images[0].set_array(volume[ax.index])
 60 | 
 61 | 
 62 | def next_slice(ax: Axis) -> None:
 63 |     """Go to the next slice."""
 64 |     volume = ax.volume
 65 |     ax.index = (ax.index + 1) % volume.shape[0]
 66 |     ax.images[0].set_array(volume[ax.index])
 67 | 
 68 | 
 69 | def get_volume() -> Array:
 70 |     base_url = URL("https://prod-images-static.radiopaedia.org/images/")
 71 |     start_n = 53734044
 72 |     length = 137
 73 | 
 74 |     imgs = list()
 75 |     for i in tqdm(range(length)):
 76 |         url = base_url / f"{start_n + i}/{i + 1}_gallery.jpeg"
 77 |         resp = url.get()
 78 |         c = resp.content
 79 |         imgs.append(imageio.read(c, format="jpeg").get_data(0))
 80 |     img = np.asarray(imgs)
 81 |     return img
 82 | 
 83 | 
 84 | def main() -> int:
 85 |     """
 86 |     Run
 87 |     """
 88 |     img_file = Path("/tmp/volumetric_image.npz")
 89 |     if not img_file.exists():
 90 |         print("Downloading volumetric image.")
 91 |         img = get_volume()
 92 |         np.savez_compressed(img_file, img)
 93 |     else:
 94 |         img = np.load(img_file)["arr_0"]
 95 | 
 96 |     _ = multi_slice_viewer(img)
 97 |     print("Displaying volume.")
 98 |     print("Press 'w' for up and 's' for down.")
 99 |     plt.show(block=True)
100 |     print("Done.")
101 |     return 0
102 | 
103 | 
104 | if __name__ == "__main__" and "get_ipython" not in locals():
105 |     try:
106 |         sys.exit(main())
107 |     except KeyboardInterrupt:
108 |         sys.exit(1)
109 | 


--------------------------------------------------------------------------------
/imc/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ElementoLab/imc/9725b3ab72f2273cb4a702964fa8518c2f189e9c/imc/logo.png


--------------------------------------------------------------------------------
/imc/ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ElementoLab/imc/9725b3ab72f2273cb4a702964fa8518c2f189e9c/imc/ops/__init__.py


--------------------------------------------------------------------------------
/imc/ops/adjacency.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions for single-cell adjacency.
  3 | """
  4 | 
  5 | import typing as tp
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | import matplotlib.pyplot as plt
 10 | import seaborn as sns
 11 | from tqdm import tqdm
 12 | import scipy.ndimage as ndi
 13 | from skimage import exposure
 14 | from skimage import graph
 15 | import networkx as nx
 16 | 
 17 | import imc.data_models.roi as _roi
 18 | from imc.types import DataFrame, Series, Path
 19 | 
 20 | FIG_KWS = dict(bbox_inches="tight", dpi=300)
 21 | MAX_BETWEEN_CELL_DIST = 4
 22 | 
 23 | 
 24 | def get_adjacency_graph(
 25 |     roi: _roi.ROI,
 26 |     output_prefix: Path = None,
 27 |     max_dist: int = MAX_BETWEEN_CELL_DIST,
 28 | ) -> graph:
 29 |     """
 30 |     Derive a spatial representation of cells in image using a graph.
 31 | 
 32 |     Parameters
 33 |     ----------
 34 |     roi: imc.ROI
 35 |         ROI object to derive graph for.
 36 | 
 37 |     output_prefix: typing.Path
 38 |         Prefix to output file with graph.
 39 |         Defaults to sample root dir / 'single_cell'.
 40 | 
 41 |     max_dist: int
 42 |         Maximum distance to consider physical interaction between cells (graph edges)
 43 | 
 44 |     Returns
 45 |     -------
 46 |     networkx.Graph
 47 |         Adjacency graph for cells in ROI.
 48 |     """
 49 |     import pickle
 50 | 
 51 |     clusters = roi.clusters
 52 |     if clusters is None:
 53 |         print("ROI does not have assigned clusters.")
 54 | 
 55 |     output_prefix = Path(output_prefix or (roi.single_cell_dir / roi.name + "."))
 56 |     if not output_prefix.endswith("."):
 57 |         output_prefix += "."
 58 |     output_prefix.parent.mkdir()
 59 | 
 60 |     mask = roi.cell_mask
 61 | 
 62 |     # align mask with cell type assignment (this is only to remove border cells)
 63 |     if clusters is not None:
 64 |         mask[~np.isin(mask, roi.clusters.index)] = 0
 65 | 
 66 |     # Get the closest cell of each background point dependent on `max_dist`
 67 |     # # first measure the distance of each background point to the closest cell
 68 |     background = mask == 0
 69 |     d = ndi.distance_transform_edt(
 70 |         background, return_distances=True, return_indices=False
 71 |     )
 72 | 
 73 |     background = background & (d <= max_dist)
 74 |     i, j = ndi.distance_transform_edt(
 75 |         background, return_distances=False, return_indices=True
 76 |     )
 77 |     mask = mask[i, j]
 78 | 
 79 |     # Simply use mean of channels as distance
 80 |     stack = roi.stack
 81 |     if hasattr(roi, "channel_exclude"):
 82 |         stack = stack[~roi.channel_exclude]
 83 |     image_mean = np.asarray([exposure.equalize_hist(x) for x in stack]).mean(0)
 84 |     image_mean = (image_mean - image_mean.min()) / (
 85 |         np.percentile(image_mean, 98) - image_mean.min()
 86 |     )
 87 | 
 88 |     # Construct adjacency graph based on cell distances
 89 |     g = graph.rag_mean_color(image_mean, mask, connectivity=2, mode="distance")
 90 |     # g = skimage.future.graph.RAG(mask, connectivity=2)
 91 |     # remove background node (unfortunately it can't be masked beforehand)
 92 |     if 0 in g.nodes:
 93 |         g.remove_node(0)
 94 | 
 95 |     fig, ax = plt.subplots(1, 1)
 96 |     i = (image_mean * 255).astype("uint8")
 97 |     i = np.moveaxis(np.asarray([i, i, i]), 0, -1)
 98 |     lc = graph.show_rag(
 99 |         mask.astype("uint32"),
100 |         g,
101 |         i,
102 |         ax=ax,
103 |         img_cmap="viridis",
104 |         edge_cmap="Reds",
105 |         edge_width=1,
106 |     )
107 |     ax.axis("off")
108 |     fig.colorbar(lc, fraction=0.03, ax=ax)
109 |     ax.get_children()[0].set_rasterized(True)
110 |     ax.get_children()[-2].set_rasterized(True)
111 |     fig.savefig(output_prefix + "neighbor_graph.svg", **FIG_KWS)
112 |     plt.close(fig)
113 | 
114 |     # add cluster label atrtribute
115 |     if clusters is not None:
116 |         nx.set_node_attributes(g, roi.clusters.to_dict(), name="cluster")
117 |         nx.set_node_attributes(g, roi.clusters.index.to_series().to_dict(), name="obj_id")
118 | 
119 |     # save graph
120 |     with open(output_prefix + "neighbor_graph.gpickle", "wb") as f:
121 |         pickle.dump(g, f)
122 |     return g
123 | 
124 | 
125 | def measure_cell_type_adjacency(
126 |     roi: _roi.ROI,
127 |     method: str = "random",
128 |     adjacency_graph: nx.Graph = None,
129 |     n_iterations: int = 100,
130 |     inf_replace_method: str = "min",
131 |     output_prefix: Path = None,
132 |     plot: bool = True,
133 |     save: bool = True,
134 | ) -> DataFrame:
135 |     """
136 |     Derive an aggregated measure of adjacency betwen cell types for one ROI.
137 | 
138 |     Parameters
139 |     ----------
140 |     roi: imc.ROI
141 |         ROI object to derive graph for.
142 | 
143 |     method: str
144 |         Method to normalize interactions by.
145 |         - 'random': generate empirical background of expected interactions based on cell type abundance by randomization (permutation of cell type identities).
146 |         - 'pharmacoscopy': method with analytical solution from Vladimer et al (10.1038/nchembio.2360). Not recommended for small images.
147 |         Default is 'random'.
148 | 
149 |     adjacency_graph: networkx.Graph
150 |         Adjacency graph per cell for ROI.
151 |         By default, and if not given will be the `ROI.adjacency_graph` attribute.
152 | 
153 |     n_iterations: int
154 |         Number of permutations to run when `method` == 'random'.
155 |         Defaults to 100.
156 | 
157 |     inf_replace_method: str
158 |         If `method` == 'pharmacoscopy', how to handle cases where interactions are not observed.
159 | 
160 |     output_prefix: typing.Path
161 |         Prefix to output file with graph.
162 |         Defaults to sample root dir / 'single_cell'.
163 | 
164 |     plot: bool
165 |         Whether to plot visualizations.
166 |         Default is `True`.
167 | 
168 |     save: bool
169 |         Whether to save output to disk.
170 |         Default is `True`.
171 | 
172 |     Returns
173 |     -------
174 |     pandas.DataFrame
175 |         DataFrame of cell type interactions normalized by `method`.
176 |     """
177 |     output_prefix = output_prefix or (
178 |         roi.sample.root_dir / "single_cell" / roi.name + "."
179 |     )
180 |     if not output_prefix.endswith("."):
181 |         output_prefix += "."
182 | 
183 |     cluster_counts = roi.clusters.value_counts()
184 | 
185 |     if adjacency_graph is None:
186 |         adjacency_graph = roi.adjacency_graph
187 | 
188 |     import warnings  # Networkx warns that the output of nx.linalg.attrmatrix.attr_matrix will be an array instead of a matrix
189 | 
190 |     with warnings.catch_warnings():
191 |         warnings.filterwarnings("ignore", category=FutureWarning)
192 |         adj, order = nx.linalg.attrmatrix.attr_matrix(
193 |             adjacency_graph, node_attr="cluster"
194 |         )
195 |     order = pd.Series(order).astype(
196 |         roi.clusters.dtype
197 |     )  #  passing dtype at instantiation gives warning
198 |     freqs = pd.DataFrame(adj, order, order).sort_index(axis=0).sort_index(axis=1)
199 |     if save:
200 |         freqs.to_csv(output_prefix + "cluster_adjacency_graph.frequencies.csv")
201 | 
202 |     if method == "random":
203 |         norm_freqs = correct_interaction_background_random(
204 |             roi, freqs, "cluster", n_iterations, save, output_prefix
205 |         )
206 |     elif method == "pharmacoscopy":
207 |         norm_freqs = correct_interaction_background_pharmacoscopy(
208 |             freqs, cluster_counts, roi.clusters.shape[0], inf_replace_method
209 |         )
210 |     if save:
211 |         norm_freqs.to_csv(output_prefix + "cluster_adjacency_graph.norm_over_random.csv")
212 | 
213 |     if not plot:
214 |         return norm_freqs
215 |     v = norm_freqs.values.std() * 2
216 |     fig, axes = plt.subplots(1, 2, sharey=True, figsize=(4 * 2, 4))
217 |     kws = dict(cmap="RdBu_r", center=0, square=True, xticklabels=True, yticklabels=True)
218 |     sns.heatmap(norm_freqs, robust=True, ax=axes[0], **kws)
219 |     kws2 = dict(vmin=-v, vmax=v, cbar_kws=dict(label="Log odds interaction"))
220 |     sns.heatmap(norm_freqs, ax=axes[1], **kws, **kws2)
221 |     fig.savefig(
222 |         output_prefix + "cluster_adjacency_graph.norm_over_random.heatmap.svg",
223 |         **FIG_KWS,
224 |     )
225 |     plt.close(fig)
226 |     del kws["square"]
227 |     try:
228 |         grid = sns.clustermap(norm_freqs, **kws, **kws2)
229 |         grid.savefig(
230 |             output_prefix + "cluster_adjacency_graph.norm_over_random.clustermap.svg",
231 |             **FIG_KWS,
232 |         )
233 |         plt.close(grid.fig)
234 |     except FloatingPointError:
235 |         pass
236 |     return norm_freqs
237 | 
238 | 
239 | def correct_interaction_background_random(
240 |     roi: _roi.ROI,
241 |     freqs: DataFrame,
242 |     attribute,
243 |     n_iterations: int,
244 |     save: bool,
245 |     output_prefix: tp.Union[str, Path],
246 | ):
247 |     values = {
248 |         x: roi.adjacency_graph.nodes[x][attribute] for x in roi.adjacency_graph.nodes
249 |     }
250 |     shuffled_freqs = list()
251 |     for _ in tqdm(range(n_iterations)):
252 |         g2 = roi.adjacency_graph.copy()
253 |         shuffled_attr = pd.Series(values).sample(frac=1)
254 |         shuffled_attr.index = values
255 |         nx.set_node_attributes(g2, shuffled_attr.to_dict(), name=attribute)
256 |         import warnings
257 | 
258 |         with warnings.catch_warnings():
259 |             warnings.filterwarnings("ignore", category=FutureWarning)
260 |             rf, rl = nx.linalg.attrmatrix.attr_matrix(g2, node_attr=attribute)
261 |         rl = pd.Series(rl, dtype=roi.clusters.dtype)
262 |         shuffled_freqs.append(
263 |             pd.DataFrame(rf, index=rl, columns=rl).sort_index(axis=0).sort_index(axis=1)
264 |         )
265 |     shuffled_freq = pd.concat(shuffled_freqs)
266 |     if save:
267 |         shuffled_freq.to_csv(
268 |             output_prefix
269 |             + f"cluster_adjacency_graph.random_frequencies.all_iterations_{n_iterations}.csv"
270 |         )
271 |     shuffled_freq = shuffled_freq.groupby(level=0).sum().sort_index(axis=1)
272 |     if save:
273 |         shuffled_freq.to_csv(
274 |             output_prefix + "cluster_adjacency_graph.random_frequencies.csv"
275 |         )
276 | 
277 |     fl = np.log1p((freqs / freqs.values.sum()) * 1e6)
278 |     sl = np.log1p((shuffled_freq / shuffled_freq.values.sum()) * 1e6)
279 |     # make sure both contain all edges/nodes
280 |     fl = fl.reindex(sl.index, axis=0).reindex(sl.index, axis=1).fillna(0)
281 |     sl = sl.reindex(fl.index, axis=0).reindex(fl.index, axis=1).fillna(0)
282 |     return fl - sl
283 | 
284 | 
285 | def correct_interaction_background_pharmacoscopy(
286 |     frequency_matrix: DataFrame,
287 |     cluster_counts: Series,
288 |     total_cells: int,
289 |     inf_replace_method: tp.Optional[str] = "min_symmetric",
290 | ):
291 |     c = np.log(total_cells)
292 |     fa = np.log(frequency_matrix.sum().sum()) - c
293 |     norms = pd.DataFrame()
294 |     for ct1 in frequency_matrix.index:
295 |         for ct2 in frequency_matrix.columns:
296 |             with np.errstate(divide="ignore", invalid="ignore"):
297 |                 o = np.log(frequency_matrix.loc[ct1, ct2]) - np.log(
298 |                     frequency_matrix.loc[ct1].sum()
299 |                 )
300 |                 if o == 0:
301 |                     norms.loc[ct1, ct2] = 0.0
302 |                     continue
303 |                 f1 = np.log(cluster_counts.loc[ct1]) - c
304 |                 f2 = np.log(cluster_counts.loc[ct2]) - c
305 | 
306 |             norms.loc[ct1, ct2] = o - (f1 + f2 + fa)
307 |     if inf_replace_method is None:
308 |         return norms
309 | 
310 |     # three ways to replace -inf (cell types with no event touching):
311 |     # # 1. replace with lowest non-inf value (dehemphasize the lower bottom - lack of touching)
312 |     if inf_replace_method == "min":
313 |         norm_freqs = norms.replace(-np.inf, norms[norms != (-np.inf)].min().min())
314 |     # # 2. replace with minus highest (try to )
315 |     if inf_replace_method == "max":
316 |         norm_freqs = norms.replace(-np.inf, -norms.max().max())
317 |     # # 3. One of the above + make symmetric by  X @ X.T + Z-score
318 |     if inf_replace_method == "min_symmetric":
319 |         norm_freqs = norms.replace(-np.inf, norms[norms != (-np.inf)].min().min())
320 |         norm_freqs = norm_freqs @ norm_freqs.T
321 |         norm_freqs = (norm_freqs - norm_freqs.values.mean()) / norm_freqs.values.std()
322 |     if inf_replace_method == "max_symmetric":
323 |         norm_freqs = norms.replace(-np.inf, norms[norms != (-np.inf)].max().max())
324 |         norm_freqs = norm_freqs @ norm_freqs.T
325 |         norm_freqs = (norm_freqs - norm_freqs.values.mean()) / norm_freqs.values.std()
326 |     return norm_freqs
327 | 


--------------------------------------------------------------------------------
/imc/ops/clustering.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions for single-cell clustering.
  3 | """
  4 | 
  5 | import typing as tp
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | import matplotlib.pyplot as plt
 10 | from tqdm import tqdm
 11 | 
 12 | from anndata import AnnData
 13 | import scanpy as sc
 14 | 
 15 | from imc.types import DataFrame, Path
 16 | from imc.graphics import rasterize_scanpy
 17 | 
 18 | 
 19 | FIG_KWS = dict(bbox_inches="tight", dpi=300)
 20 | sc.settings.n_jobs = -1
 21 | 
 22 | 
 23 | DEFAULT_CELL_TYPE_REFERENCE = (
 24 |     "https://gist.github.com/afrendeiro/4aa133c2fcb5eb0152957b11ec753b74/raw",
 25 |     Path(".imc.cell_type_reference.yaml"),
 26 | )
 27 | 
 28 | 
 29 | def anndata_to_cluster_means(
 30 |     ann: AnnData, cluster_label: str, raw: bool = False
 31 | ) -> DataFrame:
 32 |     means = dict()
 33 |     obj = ann if not raw else ann.raw
 34 |     for cluster in ann.obs[cluster_label].unique():
 35 |         clust = ann.obs[cluster_label] == cluster
 36 |         means[cluster] = obj[clust, :].X.mean(0)
 37 |     mean_expr = pd.DataFrame(means, index=obj.var.index).sort_index(axis=1)
 38 |     mean_expr.columns.name = "cluster"
 39 |     return mean_expr
 40 | 
 41 | 
 42 | def phenotyping(
 43 |     a: tp.Union[AnnData, Path],
 44 |     channels_include: tp.Sequence[str] = None,
 45 |     channels_exclude: tp.Sequence[str] = None,
 46 |     filter_cells: bool = True,
 47 |     z_score: bool = True,
 48 |     z_score_per: str = "roi",
 49 |     z_score_cap: float = 3.0,
 50 |     remove_batch: bool = True,
 51 |     batch_variable: str = "sample",
 52 |     dim_res_algos: tp.Sequence[str] = ("umap",),
 53 |     clustering_method: str = "leiden",
 54 |     clustering_resolutions: tp.Sequence[float] = (1.0,),
 55 | ) -> AnnData:
 56 |     import anndata
 57 | 
 58 |     if "pymde" in dim_res_algos:
 59 |         import pymde
 60 |     if clustering_method == "parc":
 61 |         from parc import PARC
 62 | 
 63 |     # Checks
 64 |     reason = f"Can only Z-score values per 'roi' or 'sample'. '{z_score_per}' is not supported."
 65 |     assert z_score_per in ["sample", "roi"], reason
 66 |     reason = f"Clustering method '{clustering_method}' is not supported."
 67 |     assert clustering_method in ["leiden", "parc"]
 68 |     reason = "Can only use 'pca', 'umap', 'diffmap', or 'pymde' in `dim_res_algos`."
 69 |     assert all(x in ["pca", "umap", "diffmap", "pymde"] for x in dim_res_algos), reason
 70 | 
 71 |     if isinstance(a, Path):
 72 |         print(f"Reading h5ad file: '{a}'.")
 73 |         a = sc.read(a)
 74 | 
 75 |     if remove_batch:
 76 |         if a.obs[batch_variable].nunique() <= 1:
 77 |             print(
 78 |                 "Batch correction not possible as only one batch detected. "
 79 |                 "Check `batch_variable` keyord argument."
 80 |             )
 81 |             remove_batch = False
 82 | 
 83 |     if "sample" not in a.obs.columns:
 84 |         a.obs["sample"] = a.obs["roi"].str.extract(r"(.*)-\d+")[0].fillna("")
 85 |     if a.raw is None:
 86 |         a.raw = a
 87 | 
 88 |     # Add morphological variables to obs
 89 |     sel = a.var.index.str.contains(r"\(")
 90 |     v = a.var.index[~sel]
 91 |     for col in v:
 92 |         a.obs[col] = a[:, col].X.tolist()
 93 |     a = a[:, sel]
 94 | 
 95 |     # Filter out channels
 96 |     if channels_exclude is not None:
 97 |         a = a[:, ~a.var.index.isin(channels_exclude)]
 98 |     if channels_include is not None:
 99 |         a = a[:, channels_include]
100 |     a = a.copy()
101 | 
102 |     # # reduce DNA chanels to one, and move to obs
103 |     dnas = a.var.index[a.var.index.str.contains(r"DNA\d")]
104 |     a.obs["DNA"] = a[:, dnas].X.mean(1)
105 |     a = a[:, ~a.var.index.isin(dnas)]
106 | 
107 |     # Filter out cells
108 |     if filter_cells:
109 |         if "solidity" not in a.obs.columns:
110 |             print(
111 |                 "Could not filter cells based on solidity likely because morphological quantification was not performed!"
112 |             )
113 |         else:
114 |             exclude = a.obs["solidity"] == 1
115 |             p = (exclude).sum() / a.shape[0] * 100
116 |             print(f"Filtered out {exclude.sum()} cells ({p:.2f} %)")
117 | 
118 |     # Scaling/Normalization
119 |     print("Performing data scaling/normalization.")
120 |     sc.pp.log1p(a)
121 |     if z_score:
122 |         _ads = list()
123 |         for roi_name in a.obs["roi"].unique():
124 |             a2 = a[a.obs["roi"] == roi_name, :].copy()
125 |             sc.pp.scale(a2, max_value=z_score_cap)
126 |             a2.X[a2.X < -z_score_cap] = -z_score_cap
127 |             # print(a2.X.min(), a2.X.max())
128 |             _ads.append(a2)
129 |         a = anndata.concat(_ads)
130 |         sc.pp.scale(a)
131 |     if remove_batch:
132 |         sc.pp.combat(a, batch_variable)
133 |         sc.pp.scale(a)
134 | 
135 |     # Dimensionality reduction
136 |     print("Performing dimensionality reduction.")
137 |     sc.pp.pca(a)
138 |     if remove_batch:
139 |         sc.external.pp.bbknn(a, batch_key=batch_variable)
140 |     else:
141 |         sc.pp.neighbors(a)
142 |     if "umap" in dim_res_algos:
143 |         sc.tl.umap(a, gamma=25)
144 |     if "diffmap" in dim_res_algos:
145 |         sc.tl.diffmap(a)
146 |     if "pymde" in dim_res_algos:
147 |         a.obsm["X_pymde"] = pymde.preserve_neighbors(a.X, embedding_dim=2).embed().numpy()
148 |         a.obsm["X_pymde2"] = (
149 |             pymde.preserve_neighbors(
150 |                 a.X,
151 |                 embedding_dim=2,
152 |                 attractive_penalty=pymde.penalties.Quadratic,
153 |                 repulsive_penalty=None,
154 |             )
155 |             .embed()
156 |             .numpy()
157 |         )
158 | 
159 |     # Clustering
160 |     print("Performing clustering.")
161 |     if clustering_method == "leiden":
162 |         for res in clustering_resolutions:
163 |             sc.tl.leiden(a, resolution=res, key_added=f"cluster_{res}")
164 |             a.obs[f"cluster_{res}"] = pd.Categorical(
165 |                 a.obs[f"cluster_{res}"].astype(int) + 1
166 |             )
167 |     elif clustering_method == "parc":
168 |         for res in clustering_resolutions:
169 |             p = PARC(
170 |                 a.X,
171 |                 neighbor_graph=a.obsp["connectivities"],
172 |                 random_seed=42,
173 |                 resolution_parameter=res,
174 |             )
175 |             p.run_PARC()
176 |             a.obs[f"cluster_{res}"] = pd.Categorical(pd.Series(p.labels) + 1)
177 | 
178 |     print("Finished phenotyping.")
179 |     return a
180 | 
181 | 
182 | def plot_phenotyping(
183 |     a: tp.Union[AnnData, Path],
184 |     output_prefix: Path,
185 |     tech_channels: tp.Sequence[str] = None,
186 |     dim_res_algos: tp.Sequence[str] = ("umap",),
187 |     clustering_resolutions: tp.Sequence[float] = None,
188 | ):
189 |     from matplotlib.backends.backend_pdf import PdfPages
190 |     from imc.graphics import add_centroids
191 |     from seaborn_extensions import clustermap
192 | 
193 |     # Read in
194 |     if isinstance(a, Path):
195 |         print(f"Reading h5ad file: '{a}'.")
196 |         a = sc.read(a)
197 |     a = a[a.obs.sample(frac=1).index]
198 | 
199 |     # Checks
200 |     if output_prefix.is_dir():
201 |         output_prefix = output_prefix / "phenotypes."
202 |     if not output_prefix.endswith("."):
203 |         output_prefix += "."
204 |     output_prefix.parent.mkdir()
205 | 
206 |     if "sample" not in a.obs.columns:
207 |         a.obs["sample"] = a.obs["roi"].str.extract(r"(.*)-\d+")[0].fillna("")
208 | 
209 |     if tech_channels is None:
210 |         tech_channels = [
211 |             "DNA",
212 |             "eccentricity",
213 |             "solidity",
214 |             "area",
215 |             "perimeter",
216 |             "major_axis_length",
217 |         ]
218 |         tech_channels = [c for c in tech_channels if c in a.obs.columns]
219 | 
220 |     if clustering_resolutions is None:
221 |         clustering_resolutions = (
222 |             a.obs.columns[a.obs.columns.str.contains("cluster_")]
223 |             .str.extract(r"cluster_(.*)$")[0]
224 |             .astype(float)
225 |         )
226 | 
227 |     # Plot projections
228 |     non_tech_channels = a.var.index[~a.var.index.isin(tech_channels)].tolist()
229 |     vmax = (
230 |         [None]
231 |         + np.percentile(a.raw[:, non_tech_channels].X, 95, axis=0).tolist()
232 |         + np.percentile(a.obs[tech_channels], 95, axis=0).tolist()
233 |         # + [None]
234 |         + ([None] * len(clustering_resolutions))
235 |     )
236 |     color = (
237 |         ["sample"]
238 |         + non_tech_channels
239 |         + tech_channels
240 |         # + ["topological_domain"]
241 |         + [f"cluster_{res}" for res in clustering_resolutions]
242 |     )
243 |     for algo in tqdm(dim_res_algos):
244 |         f = output_prefix + f"{algo}.pdf"
245 |         with PdfPages(f) as pdf:
246 |             for i, col in enumerate(color):
247 |                 fig = sc.pl.embedding(
248 |                     a,
249 |                     basis=algo,
250 |                     color=col,
251 |                     show=False,
252 |                     vmax=vmax[i],
253 |                     use_raw=True,
254 |                 ).figure
255 |                 rasterize_scanpy(fig)
256 |                 if i >= len(color) - len(clustering_resolutions):
257 |                     res = clustering_resolutions[i - len(color)]
258 |                     add_centroids(a, res=res, ax=fig.axes[0], algo=algo)
259 |                 plt.figure(fig)
260 |                 pdf.savefig(**FIG_KWS)
261 |                 plt.close(fig)
262 | 
263 |         # Plot ROIs separately
264 |         f = output_prefix + f"{algo}.sample_roi.pdf"
265 |         projf = getattr(sc.pl, algo)
266 |         fig = projf(a, color=["sample", "roi"], show=False)[0].figure
267 |         rasterize_scanpy(fig)
268 |         fig.savefig(f, **FIG_KWS)
269 |         plt.close(fig)
270 | 
271 |     # Plot average phenotypes
272 |     for res in tqdm(clustering_resolutions):
273 |         df = a.to_df()[non_tech_channels].join(a.obs[tech_channels])
274 | 
275 |         # Drop variables with no variance
276 |         v = df.var()
277 |         if (v == 0).any():
278 |             df = df.drop(v.index[v == 0], axis=1)
279 | 
280 |         cluster_means = df.groupby(a.obs[f"cluster_{res}"].values).mean()
281 | 
282 |         cell_counts = a.obs[f"cluster_{res}"].value_counts().rename("Cells per cluster")
283 | 
284 |         cell_percs = ((cell_counts / cell_counts.sum()) * 100).rename("Cells (%)")
285 | 
286 |         op = output_prefix + f"cluster_means.{res}_res."
287 |         kws = dict(
288 |             row_colors=cell_percs.to_frame().join(cell_counts),
289 |             figsize=(10, 6 * res),
290 |         )
291 |         grid = clustermap(cluster_means, **kws)
292 |         grid.savefig(op + "abs.svg")
293 |         plt.close(grid.fig)
294 | 
295 |         grid = clustermap(cluster_means, **kws, config="z")
296 |         grid.savefig(op + "zscore.svg")
297 |         plt.close(grid.fig)
298 | 
299 |         # To plot topological domains:
300 |         # df = (a.obs[args.sc_topo.columns.drop(["domain", "topological_domain"])]).replace(
301 |         #     {"False": False, "True": True, "nan": np.nan}
302 |         # )
303 |         # topo_means = df.groupby(a.obs[f"cluster_{res}"].values).mean()
304 |         # topo_means = topo_means.loc[:, topo_means.sum() > 0]
305 | 
306 |         # g = clustermap(
307 |         #     topo_means.loc[cluster_means.index[grid.dendrogram_row.reordered_ind]],
308 |         #     figsize=(3, 6 * res),
309 |         #     config="z",
310 |         #     row_cluster=False,
311 |         #     cmap="PuOr_r",
312 |         # )
313 |         # g.savefig(op + "abs.topologic.svg")
314 | 
315 |         # g = clustermap(
316 |         #     topo_means.loc[cluster_means.index[grid.dendrogram_row.reordered_ind]],
317 |         #     figsize=(3, 6 * res),
318 |         #     config="z",
319 |         #     row_cluster=False,
320 |         #     cmap="PuOr_r",
321 |         # )
322 |         # g.savefig(op + "zscore.topologic.svg")
323 | 
324 |         # grid = clustermap(cluster_means, **kws, config="z", row_cluster=False)
325 |         # grid.savefig(op + "zscore.sorted.svg")
326 |         # g = clustermap(
327 |         #     topo_means,
328 |         #     figsize=(3, 6 * res),
329 |         #     config="z",
330 |         #     row_cluster=False,
331 |         #     cmap="PuOr_r",
332 |         # )
333 |         # g.savefig(op + "zscore.sorted.topologic.svg")
334 |         # plt.close("all")
335 | 
336 | 
337 | def predict_cell_types_from_reference(
338 |     quant: tp.Union[AnnData, DataFrame, Path],
339 |     output_prefix: Path,
340 |     covariates: DataFrame,
341 |     method: str = "astir",
342 |     astir_reference: Path = None,
343 |     astir_parameters: tp.Dict[str, tp.Any] = {},
344 | ):
345 |     import anndata
346 |     import yaml
347 |     from imc.utils import download_file
348 | 
349 |     # Get dataframe with expression
350 |     if isinstance(quant, Path):
351 |         if quant.endswith("csv") or quant.endswith("csv.gz"):
352 |             quant = pd.read_csv(quant, index_col=0)
353 |         elif quant.endswith(".h5ad"):
354 |             quant = anndata.read(quant)
355 |     elif isinstance(quant, anndata.AnnData):
356 |         quant = quant.to_df()
357 | 
358 |     # Remove metal label from column names
359 |     quant.columns = quant.columns.str.extract(r"(.*)\(.*")[0].fillna(
360 |         quant.columns.to_series().reset_index(drop=True)
361 |     )
362 | 
363 |     if method != "astir":
364 |         raise NotImplementedError("Only the `astir` method is currently supported.")
365 | 
366 |     # Prepare reference dictionary
367 |     if astir_reference is not None:
368 |         reference = yaml.safe_load(astir_reference.open())
369 |     else:
370 |         # if not DEFAULT_CELL_TYPE_REFERENCE[1].exists():
371 |         download_file(DEFAULT_CELL_TYPE_REFERENCE[0], DEFAULT_CELL_TYPE_REFERENCE[1])
372 |         ref = yaml.safe_load(DEFAULT_CELL_TYPE_REFERENCE[1].open())
373 |         reference = dict()
374 |         reference["cell_types"] = unroll_reference_dict(ref["cell_types"], False)
375 |         reference["cell_states"] = unroll_reference_dict(ref["cell_states"], False)
376 |         reference = filter_reference_based_on_available_markers(reference, quant.columns)
377 | 
378 |     res = astir(
379 |         input_expr=quant,
380 |         marker_dict=reference,
381 |         design=covariates,
382 |         output_prefix=output_prefix,
383 |         **astir_parameters,
384 |     )
385 |     return res
386 | 
387 | 
388 | def astir(
389 |     input_expr: DataFrame,
390 |     marker_dict: tp.Dict[str, tp.List[str]],
391 |     design: DataFrame,
392 |     output_prefix: Path,
393 |     batch_size: int = None,
394 |     max_epochs: int = 200,
395 |     learning_rate: float = 2e-3,
396 |     initial_epochs: int = 3,
397 |     device: str = "cpu",
398 |     plot: bool = True,
399 | ):
400 |     from astir import Astir
401 |     import torch
402 | 
403 |     if output_prefix.is_dir():
404 |         output_prefix = output_prefix / "astir."
405 |         output_prefix.parent.mkdir()
406 | 
407 |     ast = Astir(input_expr, marker_dict, design)
408 |     ast._device = torch.device("cpu")
409 |     if batch_size is None:
410 |         batch_size = ast.get_type_dataset().get_exprs_df().shape[0] // 100
411 | 
412 |     params = dict(
413 |         max_epochs=max_epochs,
414 |         batch_size=batch_size,
415 |         learning_rate=learning_rate,
416 |         n_init_epochs=initial_epochs,
417 |     )
418 |     res = pd.DataFrame(index=input_expr.index)
419 |     if "cell_types" in marker_dict:
420 |         ast.fit_type(**params)
421 |         _t = ast.get_celltypes()
422 |         res = res.join(_t)
423 |         _tp = ast.get_celltype_probabilities()
424 |         _tp.columns = _tp.columns + "_probability"
425 |         res = res.join(_tp)
426 |         if plot:
427 |             fig, ax = plt.subplots(1, 1, figsize=(4, 2))
428 |             ax.plot(ast.get_type_losses(), label="loss")
429 |             ax.legend()
430 |             ax.set(xlabel="Epochs", ylabel="Loss")
431 |             fig.savefig(output_prefix + "cell_type.loss.svg", **FIG_KWS)
432 |             plt.close(fig)
433 |     if "cell_states" in marker_dict:
434 |         ast.fit_state(**params)
435 |         _s = ast.get_cellstates()
436 |         res = res.join(_s)
437 |         if plot:
438 |             fig, ax = plt.subplots(1, 1, figsize=(4, 2))
439 |             ax.plot(ast.get_state_losses(), label="loss")
440 |             ax.legend()
441 |             ax.set(xlabel="Epochs", ylabel="Loss")
442 |             fig.savefig(output_prefix + "cell_state.loss.svg", **FIG_KWS)
443 |             plt.close(fig)
444 |     ast.save_models(output_prefix + "fitted_model.hdf5")
445 |     return res
446 | 
447 | 
448 | def unroll_reference_dict(
449 |     x: tp.Dict,
450 |     name_with_predecessors: bool = True,
451 |     max_depth: int = -1,
452 |     _cur_depth: int = 0,
453 |     _predecessors: tp.List[str] = [],
454 | ) -> tp.Dict:
455 |     from copy import deepcopy
456 | 
457 |     x = deepcopy(x)
458 |     new = dict()
459 |     for k, v in x.items():
460 |         if "markers" in v:
461 |             name = " - ".join(_predecessors + [k]) if name_with_predecessors else k
462 |             if v["markers"] != [None]:
463 |                 new[name] = v["markers"]
464 |                 v.pop("markers")
465 |         if (
466 |             isinstance(v, dict)
467 |             and (len(v) > 0)
468 |             and ((_cur_depth < max_depth) or max_depth == -1)
469 |         ):
470 |             new.update(
471 |                 unroll_reference_dict(
472 |                     v,
473 |                     name_with_predecessors=name_with_predecessors,
474 |                     max_depth=max_depth,
475 |                     _cur_depth=_cur_depth + 1,
476 |                     _predecessors=_predecessors + [k],
477 |                 )
478 |             )
479 |     return new
480 | 
481 | 
482 | def filter_reference_based_on_available_markers(
483 |     x: tp.Dict, markers: tp.Sequence[str]
484 | ) -> tp.Dict:
485 |     def _filter(x2):
486 |         inter = dict()
487 |         for k, v in x2.items():
488 |             n = list(filter(lambda i: i in markers, v))
489 |             if n:
490 |                 inter[k] = n
491 |         return inter
492 | 
493 |     new = dict()
494 |     new["cell_types"] = _filter(x["cell_types"])
495 |     new["cell_states"] = _filter(x["cell_states"])
496 |     return new
497 | 


--------------------------------------------------------------------------------
/imc/ops/community.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions for community detection.
  3 | """
  4 | 
  5 | import typing as tp
  6 | from collections import Counter
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | import matplotlib.pyplot as plt
 11 | import seaborn as sns
 12 | from tqdm import tqdm
 13 | import parmap
 14 | from anndata import AnnData
 15 | import scanpy as sc
 16 | import community
 17 | 
 18 | import imc.data_models.roi as _roi
 19 | from imc.exceptions import cast
 20 | from imc.types import Series, Path
 21 | from imc.graphics import add_legend
 22 | 
 23 | 
 24 | FIG_KWS = dict(bbox_inches="tight", dpi=300)
 25 | 
 26 | DEFAULT_SINGLE_CELL_RESOLUTION = 1.0
 27 | MAX_BETWEEN_CELL_DIST = 4
 28 | DEFAULT_COMMUNITY_RESOLUTION = 0.005
 29 | DEFAULT_SUPERCOMMUNITY_RESOLUTION = 0.5
 30 | # DEFAULT_SUPER_COMMUNITY_NUMBER = 12
 31 | 
 32 | 
 33 | def find_communities(
 34 |     roi: _roi.ROI,
 35 |     community_resolution: float = DEFAULT_COMMUNITY_RESOLUTION,
 36 |     plot: bool = True,
 37 | ) -> tp.Tuple[Series, tp.Tuple]:
 38 |     # def networkx_to_igraph(graph):
 39 |     #     import igraph as ig
 40 |     #     g = ig.Graph(edges=list(graph.edges))
 41 |     #     # If the original graph has non-consecutive integer labels,
 42 |     #     # igraph will create a node for the non existing vertexes.
 43 |     #     # These can simply be removed from the graph.
 44 |     #     nodes = pd.Series(list(graph.nodes))
 45 |     #     vertexes = pd.Series(range(len(g.vs)))
 46 |     #     g.delete_vertices(vertexes[~vertexes.isin(nodes)].values)
 47 |     #     return g
 48 | 
 49 |     def get_community_members(partition: tp.Dict) -> tp.Dict:
 50 |         counts = Counter(partition)
 51 |         # {com: members}
 52 |         comms: tp.Dict[int, set] = dict()
 53 |         for com in counts.keys():
 54 |             comms[com] = set()
 55 |         for n, com in partition.items():
 56 |             comms[com].add(n)
 57 |         return comms
 58 | 
 59 |     def get_community_cell_type_composition(roi: _roi.ROI, partition: Series):
 60 |         cts = dict()
 61 |         for com, members in get_community_members(partition).items():
 62 |             # cts[f"{roi.sample.name} - {roi.roi_number} - {com}"] = \
 63 |             cts[com] = roi.clusters.loc[members].value_counts()
 64 |         return (
 65 |             pd.DataFrame(cts)
 66 |             .fillna(0)
 67 |             .rename_axis(index="cell_type", columns="community")
 68 |             .astype(int)
 69 |         )
 70 | 
 71 |     # Community finding in graph (overclustering)
 72 |     roi_output_prefix = roi.sample.root_dir / "single_cell" / (roi.name + ".communities.")
 73 | 
 74 |     # TODO: use leiden instead of louvain
 75 |     # g = networkx_to_igraph(roi.adjacency_graph)
 76 |     # p = partitions[roi] = pd.Series(
 77 |     #     la.find_partition(
 78 |     #         g, la.RBConfigurationVertexPartition,
 79 |     #         resolution_parameter=community_resolution).membership,
 80 |     #     name="community", index=roi.adjacency_graph.nodes).sort_index()
 81 |     partition = pd.Series(
 82 |         community.best_partition(
 83 |             roi.adjacency_graph, resolution=community_resolution
 84 |         ),  # , weight="expr_weight")
 85 |         name="community",
 86 |     ).sort_index()
 87 |     n = partition.value_counts().shape[0]
 88 |     tqdm.write(f"Found {n} communities for ROI {roi}.")
 89 |     partition += 1
 90 |     partition.to_csv(roi_output_prefix + "graph_partition.csv")
 91 |     comps = (
 92 |         get_community_cell_type_composition(roi, partition)
 93 |         .T.assign(sample=roi.sample.name, roi=roi.name)
 94 |         .set_index(["sample", "roi"], append=True)
 95 |     )
 96 |     comps.index = comps.index.reorder_levels(["sample", "roi", "community"])
 97 | 
 98 |     if plot:
 99 |         # get cell type counts per community
100 |         comps_s = comps.reset_index(level=["sample", "roi"], drop=True)
101 |         percent = (comps_s.T / comps_s.sum(1)) * 100
102 |         grid = sns.clustermap(
103 |             percent, metric="correlation", cbar_kws=dict(label="% of cell type")
104 |         )
105 |         grid.savefig(roi_output_prefix + "cell_type_composition.svg", **FIG_KWS)
106 |         grid = sns.clustermap(
107 |             percent,
108 |             z_score=1,
109 |             cmap="RdBu_r",
110 |             center=0,
111 |             metric="correlation",
112 |             cbar_kws=dict(label="% of cell type (Z-score)"),
113 |         )
114 |         grid.savefig(roi_output_prefix + "cell_type_composition.zscore.svg", **FIG_KWS)
115 |     return partition, comps
116 | 
117 | 
118 | def cluster_communities(
119 |     rois: tp.Sequence[_roi.ROI],
120 |     output_prefix: Path = None,
121 |     supercommunity_resolution: float = DEFAULT_SUPERCOMMUNITY_RESOLUTION,
122 | ) -> Series:
123 |     from scipy.cluster.hierarchy import fcluster
124 | 
125 |     output_prefix = output_prefix or (
126 |         rois[0].prj.processed_dir / "single_cell" / (rois[0].prj.name + ".communities.")
127 |     )
128 |     output_prefix = cast(output_prefix)
129 | 
130 |     res = parmap.map(find_communities, rois)
131 |     partitions = {k: v[0] for k, v in zip(rois, res)}
132 |     composition = pd.concat([v[1] for v in res]).fillna(0).astype(int).sort_index()
133 |     composition.to_csv(output_prefix + ".all_communities.cell_type_composition.csv")
134 | 
135 |     print(f"Found {composition.shape[0]} communities across all ROIs.")
136 | 
137 |     composition = pd.read_csv(
138 |         output_prefix + ".all_communities.cell_type_composition.csv",
139 |         index_col=[0, 1, 2],
140 |     )
141 | 
142 |     # Cluster communities by leiden clustering based on cell type composition
143 |     a = AnnData(composition)
144 |     sc.pp.log1p(a)
145 |     sc.pp.neighbors(a)
146 |     sc.tl.leiden(a, resolution=supercommunity_resolution, key_added="supercommunity")
147 |     n_scomms = len(a.obs["supercommunity"].unique())
148 |     print(f"Found {n_scomms} supercommunities.")
149 |     # Make supercommunities 1-based (to distinguish from masks where 0 == background)
150 |     a.obs["supercommunity"] = pd.Categorical(a.obs["supercommunity"].astype(int) + 1)
151 |     sc.tl.umap(a)
152 |     sc.pp.pca(a)
153 | 
154 |     # DataFrame(cell vs [celltype, community, supercommunity])
155 |     _assignments = list()
156 |     for roi in rois:
157 |         # {cell: cell type}
158 |         if roi.clusters.dtype == "int" and roi.clusters.min() == 0:
159 |             c1 = (
160 |                 roi.clusters + 1
161 |             )  # TODO: this +1 should be removed when clustering is re-run since the new implm
162 |         else:
163 |             c1 = roi.clusters
164 |         # {cell: community}
165 |         c2 = pd.Series(partitions[roi], name="community").rename_axis(index="obj_id")
166 |         scomm = a.obs.loc[(roi.sample.name, roi.name), "supercommunity"].astype(int)
167 |         assert c2.value_counts().shape[0] == scomm.shape[0]
168 |         c3 = c2.replace(scomm.to_dict()).rename("supercommunity")
169 |         assert c3.max() <= n_scomms
170 |         assert c1.shape == c2.shape == c3.shape
171 |         assert (c1.index == c2.index).all()
172 |         assert (c2.index == c3.index).all()
173 |         c = c1.to_frame().join(c2).join(c3)
174 |         assert roi.clusters.shape[0] == c.shape[0]
175 |         c["sample"] = roi.sample.name
176 |         c["roi"] = roi.roi_number
177 |         _assignments.append(c)
178 |     assignments = pd.concat(_assignments).set_index(["sample", "roi"], append=True)
179 |     assignments.index = assignments.index.reorder_levels(["sample", "roi", "obj_id"])
180 | 
181 |     # Further merge supercommunities if distant by less than X% of composition
182 |     # TODO: revise supercommunity merging
183 |     max_supercommunity_difference = 10.0
184 |     comp = assignments.assign(count=1).pivot_table(
185 |         index="supercommunity",
186 |         columns="cluster",
187 |         values="count",
188 |         aggfunc=sum,
189 |         fill_value=0,
190 |     )
191 | 
192 |     perc = (comp.T / comp.sum(1)).T * 100
193 |     diffs = pd.DataFrame(
194 |         np.sqrt(abs(perc.values - perc.values[:, None]).sum(axis=2)),
195 |         index=perc.index,
196 |         columns=perc.index,
197 |     )
198 |     grid = sns.clustermap(diffs)
199 |     repl = pd.Series(
200 |         dict(
201 |             zip(
202 |                 grid.data.columns,
203 |                 fcluster(
204 |                     grid.dendrogram_col.linkage,
205 |                     t=max_supercommunity_difference,
206 |                     criterion="distance",
207 |                 ),
208 |             )
209 |         )
210 |     ).sort_index()
211 | 
212 |     comp.index = comp.index.to_series().replace(repl)
213 |     comp = comp.groupby(level=0).sum()
214 | 
215 |     assignments["supercommunity"] = assignments["supercommunity"].replace(repl)
216 | 
217 |     # check name/number supercommunities is sorted on the abundance of their cell types
218 |     s = assignments["supercommunity"].value_counts().sort_values(ascending=False)
219 |     assignments["supercommunity"] = assignments["supercommunity"].replace(
220 |         dict(zip(s.index, np.arange(1, len(s))))
221 |     )
222 | 
223 |     # save final assignments
224 |     assignments.to_csv(output_prefix + "cell_type.community.supercommunities.csv")
225 | 
226 |     # Visualize
227 |     # # visualize initial communities in clustermap, PCA or UMAP
228 |     perc = (composition.T / composition.sum(1)).T * 100
229 |     grid = sns.clustermap(perc, metric="correlation", rasterized=True)
230 |     grid.savefig(
231 |         output_prefix
232 |         + "communities.cell_type_composition.leiden_clustering.clustermap_viz.svg",
233 |         **FIG_KWS,
234 |     )
235 |     grid = sns.clustermap(
236 |         np.log1p(composition),
237 |         row_linkage=grid.dendrogram_row.linkage,
238 |         col_linkage=grid.dendrogram_col.linkage,
239 |         metric="correlation",
240 |         row_colors=plt.get_cmap("tab20")(a.obs["supercommunity"].astype(int)),
241 |         rasterized=True,
242 |     )
243 |     grid.savefig(
244 |         output_prefix
245 |         + "communities.cell_type_composition.leiden_clustering.clustermap_viz.counts.svg",
246 |         **FIG_KWS,
247 |     )
248 |     for method in ["pca", "umap"]:
249 |         fig = getattr(sc.pl, method)(
250 |             a,
251 |             color=["supercommunity"] + a.var.index.tolist(),
252 |             return_fig=True,
253 |             show=False,
254 |         )
255 |         fig.savefig(
256 |             output_prefix
257 |             + f"communities.cell_type_composition.leiden_clustering.{method}_viz.svg",
258 |             **FIG_KWS,
259 |         )
260 | 
261 |     # # visualize the rediction of supercommunities based on difference thresh
262 |     grid = sns.clustermap(
263 |         diffs,
264 |         col_colors=plt.get_cmap("tab20")(repl.values),
265 |         row_colors=plt.get_cmap("tab20")(repl.values),
266 |         cbar_kws=dict(label="Sqrt(Sum(diff))"),
267 |     )
268 |     grid.savefig(
269 |         output_prefix + "supercommunities.reduction_by_diff.clustermap.svg",
270 |         **FIG_KWS,
271 |     )
272 | 
273 |     # assignments = pd.read_csv(output_prefix + "cell_type.community.supercommunities.csv", index_col=[0, 1, 2])
274 |     # # cell type vs {community, supercommunity}
275 |     for var_ in ["community", "supercommunity"]:
276 |         supercts = assignments.assign(count=1).pivot_table(
277 |             index="cluster",
278 |             columns=var_,
279 |             values="count",
280 |             aggfunc=sum,
281 |             fill_value=0,
282 |         )
283 |         perc_supercts = (supercts / supercts.sum()) * 100
284 | 
285 |         grid = sns.clustermap(
286 |             perc_supercts,
287 |             metric="correlation",
288 |             rasterized=True,
289 |             cbar_kws=dict(label="% of supercommunity"),
290 |         )
291 |         grid.savefig(output_prefix + f"{var_}.cell_type_composition.svg", **FIG_KWS)
292 |         grid = sns.clustermap(
293 |             perc_supercts,
294 |             z_score=1,
295 |             cmap="RdBu_r",
296 |             center=0,
297 |             metric="correlation",
298 |             rasterized=True,
299 |             cbar_kws=dict(label="% of supercommunity (Z-score)"),
300 |         )
301 |         grid.savefig(
302 |             output_prefix + f"{var_}.cell_type_composition.zscore.svg",
303 |             **FIG_KWS,
304 |         )
305 | 
306 |     leg_kws = dict(bbox_to_anchor=(0, -0.05))
307 | 
308 |     vars_ = ["cluster", "community", "supercommunity"]
309 |     n = len(rois)
310 |     m = len(vars_)
311 |     patches: tp.Dict[str, tp.List] = dict()
312 |     fig, axes = plt.subplots(
313 |         n, m, figsize=(4 * m, 4 * n), squeeze=False, sharex="row", sharey="row"
314 |     )
315 |     for i, roi in enumerate(rois):
316 |         for j, var_ in enumerate(vars_):
317 |             if i == 0:
318 |                 patches[var_] = list()
319 |             p = roi.plot_cell_types(
320 |                 ax=axes[i, j, np.newaxis, np.newaxis],
321 |                 cell_type_assignments=assignments.loc[
322 |                     (roi.sample.name, roi.roi_number), var_
323 |                 ],
324 |                 palette="nipy_spectral",
325 |             )
326 |             patches[var_] += p
327 |     for j, var_ in enumerate(vars_):
328 |         if var_ == "community":
329 |             continue
330 |         add_legend(patches[var_], axes[-1, j], **leg_kws)  # label="Super community",
331 |         _z = zip(
332 |             axes[0].squeeze(),
333 |             ["Cell types", "Communities", "Super communities"],
334 |         )
335 |     for axs, lab in _z:
336 |         axs.set_title(lab)
337 |     # TODO: limit rasterization to main image
338 |     for axs in axes.flat:
339 |         axs.set_rasterized(True)
340 |     fig.savefig(output_prefix + "communities_supercommunities.all_rois.svg", **FIG_KWS)
341 | 
342 |     return assignments["supercommunity"]
343 | 


--------------------------------------------------------------------------------
/imc/ops/compensation.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | """
  4 | Functions for compensation of imaging mass cytometry data.
  5 | """
  6 | 
  7 | from functools import partial
  8 | import typing as tp
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | from scipy.optimize import nnls
 13 | import parmap
 14 | 
 15 | from imc import ROI
 16 | from imc.types import Array, DataFrame
 17 | 
 18 | 
 19 | def stack_to_flat_array(stack: Array) -> Array:
 20 |     return stack.reshape((stack.shape[0], -1)).T
 21 | 
 22 | 
 23 | def _get_cytospill_spillover_matrix(
 24 |     array: DataFrame, subsample_frac: float = None, subsample_n: int = None
 25 | ) -> Array:
 26 |     """
 27 |     The columns of array must be metal labels (e.g. Nd142Di)!
 28 | 
 29 |     Requires the Github version of CytoSpill installed from a local clone,
 30 |     not through devtools pointing to the Github repo - not sure why.
 31 | 
 32 |     $ git clone https://github.com/KChen-lab/CytoSpill.git
 33 |     $ R CMD INSTALL CytoSpill/
 34 |     """
 35 |     from rpy2.robjects import numpy2ri, pandas2ri
 36 |     from rpy2.robjects.packages import importr
 37 | 
 38 |     numpy2ri.activate()
 39 |     pandas2ri.activate()
 40 | 
 41 |     cytospill = importr("CytoSpill")
 42 | 
 43 |     if subsample_frac is not None:
 44 |         subsample_n = int(array.shape[0] * subsample_frac)
 45 | 
 46 |     kwargs = dict()
 47 |     if subsample_n is not None:
 48 |         kwargs["n"] = subsample_n
 49 | 
 50 |     spillover_matrix, thresholds = cytospill.GetSpillMat(
 51 |         data=array,
 52 |         cols=np.arange(array.shape[1]),
 53 |         threshold=0.1,
 54 |         flexrep=5,
 55 |         neighbor=2,
 56 |         **kwargs,
 57 |     )
 58 |     # spillover_matrix = pd.DataFrame(spillover_matrix, index=df.columns, columns=df.columns)
 59 |     return spillover_matrix
 60 | 
 61 | 
 62 | def _get_correlation_spillover_matrix(array: Array, k=60) -> Array:
 63 |     return k ** np.corrcoef(array.T) / k
 64 | 
 65 | 
 66 | def get_spillover_matrix(array: Array, method: str = "cytospill", **kwargs) -> Array:
 67 |     """"""
 68 |     if method == "cytospill":
 69 |         return _get_cytospill_spillover_matrix(array, **kwargs)
 70 |     if method == "correlation":
 71 |         return _get_correlation_spillover_matrix(array)
 72 |     raise ValueError("`method` must be one of 'cytospill' or 'correlation'.")
 73 | 
 74 | 
 75 | def compensate_array(
 76 |     flat_array: Array, spillover_matrix: Array, original_shape: tp.Tuple[int, int, int]
 77 | ) -> Array:
 78 |     new_shape = original_shape[1:] + (original_shape[0],)
 79 |     _nnls = partial(nnls, spillover_matrix)
 80 |     res = parmap.map(_nnls, flat_array)
 81 |     comp = np.asarray([x[0] for x in res])
 82 |     return np.moveaxis(
 83 |         (comp).reshape(new_shape),
 84 |         -1,
 85 |         0,
 86 |     )
 87 | 
 88 | 
 89 | def compensate_image_stack(roi: ROI, normalize: bool = True) -> Array:
 90 |     from imc.segmentation import normalize as _normf
 91 | 
 92 |     stack = roi.stack
 93 |     if roi.channel_exclude is not None:
 94 |         if roi.channel_exclude.any():
 95 |             stack = stack[~roi.channel_exclude]
 96 |     if normalize:
 97 |         stack = _normf(stack)
 98 |     flat_array = stack_to_flat_array(stack)
 99 | 
100 |     labels = roi.channel_labels[~roi.channel_exclude.values]
101 |     metals = labels.str.extract(r".*\((.*)\)")[0] + "Di"
102 |     df = pd.DataFrame(flat_array, columns=metals)  # .iloc[:, 4:-4]
103 |     spill = get_spillover_matrix(df, subsample_n=2000)
104 |     comp_stack = compensate_array(flat_array, spill, roi.stack.shape)
105 |     return comp_stack
106 | 


--------------------------------------------------------------------------------
/imc/ops/domain.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions for image annotations.
  3 | 
  4 | """
  5 | 
  6 | import os
  7 | import json
  8 | import typing as tp
  9 | from collections import Counter
 10 | 
 11 | import numpy as np
 12 | import pandas as pd
 13 | import matplotlib
 14 | import matplotlib.pyplot as plt
 15 | import seaborn as sns
 16 | from tqdm import tqdm
 17 | 
 18 | import imc.data_models.roi as _roi
 19 | from imc.types import DataFrame, Array, Path
 20 | 
 21 | 
 22 | def label_domains(
 23 |     rois: tp.Sequence[_roi.ROI],
 24 |     output_dir: Path,
 25 |     export: bool = True,
 26 |     domains: tp.Sequence[str] = ["T", "S", "A", "L", "V", "E"],
 27 |     **kwargs,
 28 | ) -> None:
 29 |     """
 30 |     Draw shapes outying topological domains in tissue.
 31 |     This step is done manually using the `labelme` program.
 32 | 
 33 |     $ labelme --autosave --labels metadata/labelme_labels.txt
 34 |     """
 35 |     if export:
 36 |         export_images_for_topological_labeling(rois, output_dir, **kwargs)
 37 | 
 38 |     labels_f = (output_dir).mkdir() / "labelme_labels.txt"
 39 |     with open(labels_f, "w") as handle:
 40 |         handle.write("\n".join(domains))
 41 |     os.system(f"labelme --autosave --labels {labels_f} {output_dir}")
 42 | 
 43 | 
 44 | def export_images_for_topological_labeling(
 45 |     rois: tp.Sequence[_roi.ROI],
 46 |     output_dir: Path,
 47 |     channels: tp.Sequence[str] = ["mean"],
 48 |     overwrite: bool = False,
 49 | ) -> None:
 50 |     """
 51 |     Export PNGs for labeling with `labelme`.
 52 |     """
 53 |     for roi in tqdm(rois):
 54 |         f = output_dir / roi.name + ".jpg"
 55 |         if not overwrite and f.exists():
 56 |             continue
 57 |         array = roi._get_channels(channels, minmax=True, equalize=True)[1].squeeze()
 58 |         if array.ndim > 2:
 59 |             array = np.moveaxis(array, 0, -1)
 60 |         matplotlib.image.imsave(f, array)
 61 | 
 62 | 
 63 | def collect_domains(
 64 |     input_dir: Path, rois: tp.Sequence[_roi.ROI] = None, output_file: Path = None
 65 | ) -> tp.Dict[str, tp.Dict]:
 66 |     if rois is not None:
 67 |         roi_names = [r.name for r in rois]
 68 | 
 69 |     filenames = list(input_dir.glob("*.json"))
 70 |     if rois is not None:
 71 |         filenames = [f for f in filenames if f.stem in roi_names]
 72 | 
 73 |     topo_annots = dict()
 74 |     for filename in tqdm(filenames):
 75 |         annot_f = filename.replace_(".jpg", ".json")
 76 |         if not annot_f.exists():
 77 |             continue
 78 |         with open(annot_f, "r") as handle:
 79 |             annot = json.load(handle)
 80 |         if annot["shapes"]:
 81 |             topo_annots[filename.stem] = annot["shapes"]
 82 |     if output_file is not None:
 83 |         with open(output_file, "w") as handle:
 84 |             json.dump(topo_annots, handle, indent=4)
 85 |     return topo_annots
 86 | 
 87 | 
 88 | def illustrate_domains(
 89 |     topo_annots: tp.Dict[str, tp.Dict],
 90 |     rois: tp.Sequence[_roi.ROI],
 91 |     output_dir: Path,
 92 |     channels: tp.Sequence[str],
 93 |     domain_exclude: tp.Sequence[str] = None,
 94 |     cleanup: bool = False,
 95 |     cmap_str: str = "Set3",
 96 | ) -> None:
 97 |     """
 98 |     Illustrate annotated topological domains of each ROI.
 99 |     """
100 |     from imc.utils import polygon_to_mask
101 |     from imc.graphics import legend_without_duplicate_labels
102 |     from shapely.geometry import Polygon
103 | 
104 |     if domain_exclude is None:
105 |         domain_exclude = []
106 | 
107 |     output_dir.mkdir()
108 | 
109 |     labels = list(set(geom["label"] for n, j in topo_annots.items() for geom in j))
110 |     label_color = dict(zip(labels, sns.color_palette(cmap_str)))
111 |     label_order = dict(zip(labels, range(1, len(labels) + 1)))
112 |     cmap = plt.get_cmap(cmap_str)(range(len(labels) + 1))
113 |     cmap = np.vstack([[0, 0, 0, 1], cmap])
114 | 
115 |     for roi_name in tqdm(topo_annots):
116 |         roi = [r for r in rois if r.name == roi_name][0]
117 |         shapes = topo_annots[roi_name]
118 | 
119 |         # re-order shapes so that largest are first
120 |         areas = [
121 |             polygon_to_mask(shape["points"], roi.shape[1:][::-1]).sum()
122 |             for shape in shapes
123 |         ]
124 |         shapes = np.asarray(shapes)[np.argsort(areas)[::-1]].tolist()
125 | 
126 |         annot_mask = np.zeros(roi.shape[1:])
127 |         for shape in shapes:
128 |             if shape["label"] in domain_exclude:
129 |                 continue
130 |             region = polygon_to_mask(shape["points"], roi.shape[1:][::-1])
131 |             annot_mask[region > 0] = label_order[shape["label"]]
132 | 
133 |         ar = roi.shape[1] / roi.shape[2]
134 | 
135 |         fig, axes = plt.subplots(
136 |             1, 2, figsize=(2 * 4, 4 * ar), gridspec_kw=dict(wspace=0, hspace=0)
137 |         )
138 |         extra_txt = (
139 |             ""
140 |             if getattr(roi, "attributes", None) is None
141 |             else "; ".join([str(getattr(roi, attr)) for attr in roi.attributes])
142 |         )
143 | 
144 |         axes[0].set(title=roi.name + "\n" + extra_txt)
145 |         roi.plot_channels(channels, axes=[axes[0]], merged=True)
146 | 
147 |         shape_types: Counter[str] = Counter()
148 |         for shape in shapes:
149 |             label: str = shape["label"]
150 |             if label in domain_exclude:
151 |                 continue
152 |             shape_types[label] += 1
153 |             c = Polygon(shape["points"]).centroid
154 |             axes[1].text(
155 |                 c.x,
156 |                 c.y,
157 |                 s=f"{label}{shape_types[label]}",
158 |                 ha="center",
159 |                 va="center",
160 |             )
161 |             axes[0].plot(
162 |                 *np.asarray(shape["points"] + [shape["points"][0]]).T,
163 |                 label=label,
164 |                 color=cmap[label_order[label]],
165 |             )
166 | 
167 |         axes[1].imshow(
168 |             annot_mask,
169 |             cmap=matplotlib.colors.ListedColormap(cmap),
170 |             vmax=len(label_color) + 1,
171 |             interpolation="none",
172 |         )
173 |         axes[1].set(title="Manual annotations")
174 |         legend_without_duplicate_labels(
175 |             axes[0], title="Domain:", bbox_to_anchor=(-0.1, 1), loc="upper right"
176 |         )
177 |         for ax in axes:
178 |             ax.axis("off")
179 |         fig.savefig(
180 |             output_dir / roi.name + ".annotations.pdf",
181 |             dpi=300,
182 |             bbox_inches="tight",
183 |         )
184 |         plt.close(fig)
185 | 
186 |     cmd = f"""pdftk
187 |     {output_dir}/*.annotations.pdf
188 |     cat
189 |     output
190 |     {output_dir}/topological_domain_annotations.pdf"""
191 |     os.system(cmd.replace("\n", " "))
192 | 
193 |     if cleanup:
194 |         files = output_dir.glob("*.annotations.pdf")
195 |         for file in files:
196 |             file.unlink()
197 | 
198 | 
199 | def get_domains_per_cell(
200 |     topo_annots: tp.Dict[str, tp.Dict],
201 |     rois: tp.Sequence[_roi.ROI],
202 |     exclude_domains: tp.Sequence[str] = None,
203 |     remaining_domain: tp.Union[str, tp.Dict[str, str]] = "background",
204 |     resolution: str = "largest",
205 |     nest_domains: bool = True,
206 | ) -> DataFrame:
207 |     """
208 |     Generate annotation of topological domain each cell is contained in
209 |     based on manual annotated masks.
210 | 
211 |     Parameters
212 |     ----------
213 |     topo_annots: dict
214 |         Dictionary of annotations for each ROI.
215 |     rois: list
216 |         List of ROI objects.
217 |     exclude_domains: list[str]
218 |         Domains to ignore
219 |     remaining_domain: str | dict[str, str]
220 |         Name of domain to fill in for cells that do not fall under any domain annotation.
221 |         If given a string, it will simply use that.
222 |         If given a dict, the filled domain will be the value of the key which exists in the image.
223 |         E.g. Annotating tumor/stroma domains. If an image has only domains of type 'Tumor',
224 |         given `remaining_domain` == {'Tumor': 'Stroma', 'Stroma': 'Tumor'}, the remaining cells
225 |         will be annotated with 'Stroma'. In an image annotated only with 'Stroma' domains,
226 |         remaining cells will be annotated with 'Tumor' domains.
227 |     resolution: str
228 |         If `remaining_domain` is a dict, there may be more than one domain present in the image.
229 |         A resolution method is thus needed to select which domain will be filled for the remaining cells.
230 |          - 'largest' will choose as key of `remaining_domain` the largest annotated domain class.
231 |          - 'unique' will be strict and only fill in if there is a unique domain.
232 |     """
233 |     from imc.utils import polygon_to_mask
234 | 
235 |     if exclude_domains is None:
236 |         exclude_domains = []
237 | 
238 |     _full_assigns = list()
239 |     for roi_name, shapes in tqdm(topo_annots.items()):
240 |         roi = [r for r in rois if r.name == roi_name][0]
241 |         mask = roi.mask
242 |         cells = np.unique(mask)[1:]
243 |         td_count: tp.Counter[str] = Counter()
244 |         regions = list()
245 |         _assigns = list()
246 |         for shape in shapes:
247 |             label = shape["label"]
248 |             points = shape["points"]
249 |             if label in exclude_domains:
250 |                 continue
251 |             td_count[label] += 1
252 |             points += [points[0]]
253 |             region = polygon_to_mask(points, roi.shape[1:][::-1])
254 |             regions.append(region)
255 |             assign = (
256 |                 pd.Series(np.unique(mask[(mask > 0) & region]), name="obj_id")
257 |                 .to_frame()
258 |                 .assign(
259 |                     roi=roi.name,
260 |                     sample=roi.sample.name,
261 |                     domain_id=f"{label}{td_count[label]}",
262 |                 )
263 |             )
264 |             _assigns.append(assign)
265 | 
266 |         ## if remaining_domain explicitely annotated, skip
267 |         if isinstance(remaining_domain, str):
268 |             if remaining_domain in td_count:
269 |                 print(
270 |                     f"ROI '{roi.name}' has been manually annotated"
271 |                     " with remaining domains."
272 |                 )
273 |                 _full_assigns += _assigns
274 |                 continue
275 | 
276 |         ## add a domain for cells not annotated
277 |         remain = ~np.asarray(regions).sum(0).astype(bool)
278 |         existing = np.sort(pd.concat(_assigns)["obj_id"].unique())
279 |         remain = remain & (~np.isin(mask, existing))
280 |         if remain.sum() == 0:
281 |             _full_assigns += _assigns
282 |             continue
283 | 
284 |         if isinstance(remaining_domain, str):
285 |             ### if given a string just make that the domain for unnanotated cells
286 |             domain = remaining_domain
287 |             # print(f"ROI '{roi.name}' will be annotated with '{domain}' by default.")
288 | 
289 |         elif isinstance(remaining_domain, dict):
290 |             ### if given a dict, dependent on the existing domains choose what to label the remaining
291 |             ### useful for when labeling e.g. tumor/stroma and different images may be labeled with only one of them
292 |             existing_domains = pd.concat(_assigns)["domain_id"].value_counts()
293 |             existing_domains.index = existing_domains.index.str.replace(
294 |                 r"\d+", "", regex=True
295 |             )
296 |             repl = set(v for k, v in remaining_domain.items() if k in existing_domains)
297 |             if resolution == "largest":
298 |                 domain = remaining_domain[existing_domains.idxmax()]
299 |             elif resolution == "unique":
300 |                 if len(repl) == 1:
301 |                     domain = repl.pop()
302 |                 else:
303 |                     raise ValueError(
304 |                         "More than one domain was detected and it is"
305 |                         " unclear how to annotate the remaining cells "
306 |                         f"with the mapping: {remaining_domain}"
307 |                     )
308 | 
309 |         assign = (
310 |             pd.Series(np.unique(mask[remain]), name="obj_id")
311 |             .drop(0, errors="ignore")
312 |             .to_frame()
313 |             .assign(
314 |                 roi=roi.name,
315 |                 sample=roi.sample.name,
316 |                 domain_id=domain + "1",
317 |             )
318 |         )
319 |         _assigns.append(assign)
320 |         _full_assigns += _assigns
321 | 
322 |     assigns = pd.concat(_full_assigns)
323 |     assigns["topological_domain"] = assigns["domain_id"].str.replace(
324 |         r"\d", "", regex=True
325 |     )
326 | 
327 |     # reduce duplicated annotations but for cells annotated with background, make that the primary annotation
328 |     id_cols = ["sample", "roi", "obj_id"]
329 |     assigns = (
330 |         assigns.groupby(id_cols).apply(
331 |             lambda x: x
332 |             if (x.shape[0] == 1)
333 |             else x.loc[x["topological_domain"] == remaining_domain, :]
334 |             if (x["topological_domain"] == remaining_domain).any()
335 |             else x
336 |         )
337 |         # .drop(id_cols, axis=1)
338 |         .reset_index(level=-1, drop=True)
339 |     ).set_index(id_cols)
340 | 
341 |     # If more than one domain per cell:
342 |     if nest_domains:
343 |         # Keep them all
344 |         assigns = assigns.groupby(id_cols)["domain_id"].apply("-".join).to_frame()
345 |         assigns["topological_domain"] = assigns["domain_id"].str.replace(
346 |             r"\d", "", regex=True
347 |         )
348 |     else:
349 |         # make sure there are no cells with more than one domain that is background
350 |         tpc = assigns.groupby(id_cols)["domain_id"].nunique()
351 |         cells = tpc.index
352 |         assert not assigns.loc[cells[tpc > 1]].isin([remaining_domain]).any().any()
353 | 
354 |     assigns = (
355 |         assigns.reset_index()
356 |         .drop_duplicates(subset=id_cols)
357 |         .set_index(id_cols)
358 |         .sort_index()
359 |     )
360 | 
361 |     # expand domains
362 |     for domain in assigns["topological_domain"].unique():
363 |         assigns[domain] = assigns["topological_domain"] == domain
364 | 
365 |     return assigns
366 | 
367 | 
368 | @tp.overload
369 | def get_domain_areas(
370 |     topo_annots: tp.Dict[str, tp.Dict],
371 |     rois: tp.Sequence[_roi.ROI],
372 |     per_domain: tp.Literal[False],
373 | ) -> tp.Dict[Path, float]:
374 |     ...
375 | 
376 | 
377 | @tp.overload
378 | def get_domain_areas(
379 |     topo_annots: tp.Dict[str, tp.Dict],
380 |     rois: tp.Sequence[_roi.ROI],
381 |     per_domain: tp.Literal[True],
382 | ) -> DataFrame:
383 |     ...
384 | 
385 | 
386 | def get_domain_areas(
387 |     topo_annots: tp.Dict[str, tp.Dict],
388 |     rois: tp.Sequence[_roi.ROI] = None,
389 |     per_domain: bool = False,
390 | ) -> tp.Union[tp.Dict[Path, float], DataFrame]:
391 |     """
392 |     Get area of airways per image in microns.
393 |     """
394 |     from shapely.geometry import Polygon
395 | 
396 |     mpp = 1  # scale
397 |     if rois is not None:
398 |         roi_names = [r.name for r in rois]
399 |         topo_annots = {k: v for k, v in topo_annots.items() if k in roi_names}
400 | 
401 |     _areas = list()
402 |     for roi_name, shapes in tqdm(topo_annots.items()):
403 |         count: tp.Counter[str] = Counter()
404 |         for shape in shapes:
405 |             label = shape["label"]
406 |             count[label] += 1
407 |             a = Polygon(shape["points"]).area
408 |             _areas.append([roi_name, label + str(count[label]), a * mpp])
409 | 
410 |     areas = (
411 |         pd.DataFrame(_areas)
412 |         .rename(columns={0: "roi", 1: "domain_obj", 2: "area"})
413 |         .set_index("roi")
414 |     )
415 |     areas["topological_domain"] = areas["domain_obj"].str.replace(r"\d", "", regex=True)
416 |     if not per_domain:
417 |         areas = areas.groupby("roi")["area"].sum().to_dict()
418 | 
419 |     return areas
420 | 
421 | 
422 | def get_domain_masks(
423 |     topo_annots: tp.Dict,
424 |     rois: tp.Sequence[_roi.ROI],
425 |     exclude_domains: tp.Sequence[str] = None,
426 |     fill_remaining: str = None,
427 |     per_domain: bool = False,
428 | ) -> Array:
429 |     _x = list()
430 |     for roi in rois:
431 |         x = get_domain_mask(
432 |             topo_annots[roi.name],
433 |             roi,
434 |             exclude_domains=exclude_domains,
435 |             fill_remaining=fill_remaining,
436 |             per_domain=per_domain,
437 |         )
438 |         _x.append(x)
439 |     x = np.asarray(_x)
440 |     return x
441 | 
442 | 
443 | def get_domain_mask(
444 |     topo_annot: tp.Dict,
445 |     roi: _roi.ROI,
446 |     exclude_domains: tp.Sequence[str] = None,
447 |     fill_remaining: str = None,
448 |     per_domain: bool = False,
449 | ) -> Array:
450 |     """ """
451 |     import tifffile
452 |     from imc.utils import polygon_to_mask
453 | 
454 |     if exclude_domains is None:
455 |         exclude_domains = []
456 | 
457 |     _, h, w = roi.shape
458 |     masks = list()
459 |     region_types = list()
460 |     region_names = list()
461 |     count: tp.Counter[str] = Counter()
462 |     for shape in topo_annot:
463 |         shape["points"] += [shape["points"][0]]
464 |         region = polygon_to_mask(shape["points"], (w, h))
465 |         label = shape["label"]
466 |         count[label] += 1
467 |         masks.append(region)
468 |         region_types.append(label)
469 |         region_names.append(label + str(count[label]))
470 | 
471 |     for_mask = np.asarray(
472 |         [m for ll, m in zip(region_types, masks) if ll not in exclude_domains]
473 |     ).sum(0)
474 |     if fill_remaining is not None:
475 |         masks += [for_mask == 0]
476 |         region_types += [fill_remaining]
477 |         for_mask[for_mask == 0] = -1
478 |     exc_mask = np.asarray(
479 |         [m for ll, m in zip(region_types, masks) if ll in exclude_domains]
480 |     ).sum(0)
481 |     mask: Array = (
482 |         ((for_mask != 0) & ~(exc_mask != 0))
483 |         if isinstance(exc_mask, np.ndarray)
484 |         else for_mask
485 |     ).astype(bool)
486 | 
487 |     if per_domain:
488 |         nmask = np.empty_like(mask, dtype="object")
489 |         for r, ll in zip(masks, region_types):
490 |             if ll not in exclude_domains:
491 |                 nmask[mask & r] = ll
492 |         mask = np.ma.masked_array(nmask, mask=nmask == None)
493 | 
494 |     return mask
495 | 


--------------------------------------------------------------------------------
/imc/ops/mixture.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions for mixtures of signal.
  3 | """
  4 | 
  5 | import typing as tp
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | import matplotlib.pyplot as plt
 10 | import seaborn as sns
 11 | from tqdm import tqdm
 12 | 
 13 | from imc.types import DataFrame, Series, Array
 14 | 
 15 | 
 16 | @tp.overload
 17 | def get_best_mixture_number(
 18 |     x: Series,
 19 |     min_mix: int,
 20 |     max_mix: int,
 21 |     subsample_if_needed: bool,
 22 |     n_iters: int,
 23 |     metrics: tp.Sequence[str],
 24 |     red_func: str,
 25 |     return_prediction: tp.Literal[False],
 26 | ) -> int:
 27 |     ...
 28 | 
 29 | 
 30 | @tp.overload
 31 | def get_best_mixture_number(
 32 |     x: Series,
 33 |     min_mix: int,
 34 |     max_mix: int,
 35 |     subsample_if_needed: bool,
 36 |     n_iters: int,
 37 |     metrics: tp.Sequence[str],
 38 |     red_func: str,
 39 |     return_prediction: tp.Literal[True],
 40 | ) -> tp.Tuple[int, Array]:
 41 |     ...
 42 | 
 43 | 
 44 | def get_best_mixture_number(
 45 |     x: Series,
 46 |     min_mix: int = 2,
 47 |     max_mix: int = 6,
 48 |     subsample_if_needed: bool = True,
 49 |     n_iters: int = 3,
 50 |     metrics: tp.Sequence[str] = [
 51 |         "silhouette_score",
 52 |         "calinski_harabasz_score",
 53 |         "davies_bouldin_score",
 54 |     ],
 55 |     red_func: str = "mean",
 56 |     return_prediction: bool = False,
 57 | ) -> tp.Union[int, tp.Tuple[int, Array]]:
 58 |     from sklearn.mixture import GaussianMixture
 59 |     import sklearn.metrics
 60 | 
 61 |     def get_means(num: Series, pred: tp.Union[Series, Array]) -> Series:
 62 |         return num.groupby(pred).mean().sort_values()
 63 | 
 64 |     def replace_pred(x: Series, y: tp.Union[Series, Array]) -> Series:
 65 |         means = get_means(x, y)
 66 |         repl = dict(zip(means.index, range(len(means))))
 67 |         y2 = pd.Series(y, index=x.index).replace(repl)
 68 |         new_means = get_means(x, y2.values)
 69 |         assert all(new_means.index == range(len(new_means)))
 70 |         return y2
 71 | 
 72 |     xx = x.sample(n=10_000) if subsample_if_needed and x.shape[0] > 10_000 else x
 73 | 
 74 |     if isinstance(xx, pd.Series):
 75 |         xx = xx.values.reshape((-1, 1))
 76 | 
 77 |     mi = range(min_mix, max_mix)
 78 |     mixes = pd.DataFrame(index=metrics, columns=mi)
 79 |     for i in tqdm(mi):
 80 |         mix = GaussianMixture(i)
 81 |         # mix.fit_predict(x)
 82 |         for f in metrics:
 83 |             func = getattr(sklearn.metrics, "davies_bouldin_score")
 84 |             mixes.loc[f, i] = np.mean(
 85 |                 [func(xx, mix.fit_predict(xx)) for _ in range(n_iters)]
 86 |             )
 87 |         # mixes[i] = np.mean([silhouette_score(x, mix.fit_predict(x)) for _ in range(iters)])
 88 |     mixes.loc["davies_bouldin_score"] = 1 / mixes.loc["davies_bouldin_score"]
 89 | 
 90 |     # return best
 91 |     # return np.argmax(mixes.values()) + min_mix  # type: ignore
 92 |     best = mixes.columns[int(getattr(np, red_func)(mixes.apply(np.argmax, 1)))]
 93 |     if not return_prediction:
 94 |         return best  # type: ignore
 95 | 
 96 |     # now train with full data
 97 |     mix = GaussianMixture(best)
 98 |     return (best, replace_pred(x, mix.fit_predict(x.values.reshape((-1, 1)))))
 99 | 
100 | 
101 | def get_threshold_from_gaussian_mixture(
102 |     x: Series, y: Series = None, n_components: int = 2
103 | ) -> Array:
104 |     def get_means(num: Series, pred: tp.Union[Series, Array]) -> Series:
105 |         return num.groupby(pred).mean().sort_values()
106 | 
107 |     def replace_pred(x: Series, y: tp.Union[Series, Array]) -> Series:
108 |         means = get_means(x, y)
109 |         repl = dict(zip(means.index, range(len(means))))
110 |         y2 = pd.Series(y, index=x.index).replace(repl)
111 |         new_means = get_means(x, y2.values)
112 |         assert all(new_means.index == range(len(new_means)))
113 |         return y2
114 | 
115 |     x = x.sort_values()
116 | 
117 |     if y is None:
118 |         from sklearn.mixture import GaussianMixture  # type: ignore
119 | 
120 |         mix = GaussianMixture(n_components=n_components)
121 |         xx = x.values.reshape((-1, 1))
122 |         y = mix.fit_predict(xx)
123 |     else:
124 |         y = y.reindex(x.index).values
125 |     y = replace_pred(x, y).values
126 |     thresh = x.loc[((y[:-1] < y[1::])).tolist() + [False]].reset_index(drop=True)
127 |     assert len(thresh) == (n_components - 1)
128 |     return thresh
129 | 
130 | 
131 | def get_probability_of_gaussian_mixture(
132 |     x: Series, n_components: int = 2, population=-1
133 | ) -> Series:
134 |     from sklearn.mixture import GaussianMixture  # type: ignore
135 | 
136 |     x = x.sort_values()
137 |     mix = GaussianMixture(n_components=n_components)
138 |     xx = x.values.reshape((-1, 1))
139 |     mix.fit(xx)
140 |     means = pd.Series(mix.means_.squeeze()).sort_values()
141 |     # assert (means.index == range(n_components)).all()
142 |     # order components by mean
143 |     p = mix.predict_proba(xx)[:, means.index]
144 |     # take requested population
145 |     p = p[:, population]
146 |     return pd.Series(p, index=x.index).sort_index()
147 | 
148 | 
149 | def fit_gaussian_mixture(
150 |     x: tp.Union[Series, DataFrame], n_mixtures: tp.Union[int, tp.List[int]] = None
151 | ) -> tp.Union[Series, DataFrame]:
152 |     # TODO: paralelize
153 |     from sklearn.mixture import GaussianMixture
154 | 
155 |     if isinstance(x, pd.Series):
156 |         x = x.to_frame()
157 |     if isinstance(n_mixtures, int):
158 |         n_mixtures = [n_mixtures] * x.shape[1]
159 |     expr_thresh = x.astype(int)
160 | 
161 |     def get_means(num, pred):
162 |         return num.groupby(pred).mean().sort_values()
163 | 
164 |     def replace_pred(x, y):
165 |         means = get_means(x, y)
166 |         repl = dict(zip(range(len(means)), means.index))
167 |         y2 = y.replace(repl)
168 |         new_means = get_means(x, y2)
169 |         assert all(new_means.index == range(len(new_means)))
170 |         return y2
171 | 
172 |     for i, ch in enumerate(x.columns):
173 |         if n_mixtures is None:
174 |             n_best = get_best_mixture_number(x, return_prediction=False)  # type: ignore[call-tp.overload]
175 |             mix = GaussianMixture(n_best)
176 |         else:
177 |             mix = GaussianMixture(n_mixtures[i])
178 |         _x = x.loc[:, ch]
179 |         x2 = _x.values.reshape((-1, 1))
180 |         mix.fit(x2)
181 |         y = pd.Series(mix.predict(x2), index=x.index, name="class")
182 |         expr_thresh[ch] = replace_pred(_x, y)
183 |     return expr_thresh.squeeze()
184 | 
185 | 
186 | def get_population(
187 |     ser: Series, population: int = -1, plot=False, ax=None, **kwargs
188 | ) -> pd.Index:
189 |     if population == -1:
190 |         operator = np.greater_equal
191 |     elif population == 0:
192 |         operator = np.less_equal
193 |     else:
194 |         raise ValueError("Chosen population must be '0' (lowest) or '-1' (highest).")
195 | 
196 |     # Make sure index is unique
197 |     if not ser.index.is_monotonic:
198 |         ser = ser.reset_index(drop=True)
199 | 
200 |     # Work only in positive space
201 |     xx = ser  # + abs(ser.min())
202 |     done = False
203 |     while not done:
204 |         try:
205 |             n, y = get_best_mixture_number(xx, return_prediction=True, **kwargs)
206 |         except ValueError:  # "Number of labels is 1. Valid values are 2 to n_samples - 1 (inclusive)"
207 |             continue
208 |         done = True
209 |     print(f"Chosen mixture of {n} distributions.")
210 |     done = False
211 |     while not done:
212 |         try:
213 |             thresh = get_threshold_from_gaussian_mixture(xx, n_components=n)
214 |         except AssertionError:
215 |             continue
216 |         done = True
217 | 
218 |     sel = operator(xx, thresh.iloc[population]).values
219 | 
220 |     if plot:
221 |         ax = plt.gca() if ax is None else ax
222 |         sns.distplot(xx, kde=False, ax=ax)
223 |         sns.distplot(xx.loc[sel], kde=False, ax=ax)
224 |         [ax.axvline(q, linestyle="--", color="grey") for q in thresh]
225 |         ax = None
226 |     return sel
227 | 


--------------------------------------------------------------------------------
/imc/ops/quant.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Operations of signal quantification.
  3 | """
  4 | 
  5 | from __future__ import annotations
  6 | import typing as tp
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | import parmap
 11 | 
 12 | import skimage.measure
 13 | from skimage.segmentation import clear_border
 14 | 
 15 | from imc.data_models import roi as _roi
 16 | from imc.types import DataFrame, Array, Path
 17 | from imc.utils import read_image_from_file, minmax_scale
 18 | 
 19 | 
 20 | def quantify_cell_intensity(
 21 |     stack: tp.Union[Array, Path],
 22 |     mask: tp.Union[Array, Path],
 23 |     red_func: str = "mean",
 24 |     border_objs: bool = False,
 25 |     equalize: bool = True,
 26 |     scale: bool = False,
 27 |     channel_include: Array = None,
 28 |     channel_exclude: Array = None,
 29 | ) -> DataFrame:
 30 |     """
 31 |     Measure the intensity of each channel in each cell
 32 | 
 33 |     Parameters
 34 |     ----------
 35 |     stack: tp.Union[Array, Path]
 36 |         Image to quantify.
 37 |     mask: tp.Union[Array, Path]
 38 |         Mask to quantify.
 39 |     red_func: str
 40 |         Function to reduce pixels to object borders. Defaults to 'mean'.
 41 |     border_objs: bool
 42 |         Whether to quantify objects touching image border. Defaults to False.
 43 |     channel_include: :class:`~np.ndarray`
 44 |         Boolean array for channels to include.
 45 |     channel_exclude: :class:`~np.ndarray`
 46 |         Boolean array for channels to exclude.
 47 |     """
 48 |     from skimage.exposure import equalize_hist as eq
 49 | 
 50 |     if isinstance(stack, Path):
 51 |         stack = read_image_from_file(stack)
 52 |     if isinstance(mask, Path):
 53 |         mask = read_image_from_file(mask)
 54 |     if not border_objs:
 55 |         mask = clear_border(mask)
 56 | 
 57 |     if equalize:
 58 |         # stack = np.asarray([eq(x) for x in stack])
 59 |         _stack = list()
 60 |         for x in stack:
 61 |             p = np.percentile(x, 98)
 62 |             x[x > p] = p
 63 |             _stack.append(x)
 64 |         stack = np.asarray(_stack)
 65 |     if scale:
 66 |         stack = np.asarray([minmax_scale(x) for x in stack])
 67 | 
 68 |     cells = [c for c in np.unique(mask) if c != 0]
 69 |     n_channels = stack.shape[0]
 70 | 
 71 |     if channel_include is None:
 72 |         channel_include = np.asarray([True] * n_channels)
 73 |     if channel_exclude is None:
 74 |         channel_exclude = np.asarray([False] * n_channels)
 75 | 
 76 |     res = np.zeros((len(cells), n_channels), dtype=int if red_func == "sum" else float)
 77 |     for channel in np.arange(stack.shape[0])[channel_include & ~channel_exclude]:
 78 |         res[:, channel] = [
 79 |             getattr(x.intensity_image[x.image], red_func)()
 80 |             for x in skimage.measure.regionprops(mask, stack[channel])
 81 |         ]
 82 |     return pd.DataFrame(res, index=cells).rename_axis(index="obj_id")
 83 | 
 84 | 
 85 | def quantify_cell_morphology(
 86 |     mask: tp.Union[Array, Path],
 87 |     attributes: tp.Sequence[str] = [
 88 |         "area",
 89 |         "perimeter",
 90 |         "minor_axis_length",
 91 |         "major_axis_length",
 92 |         # In some images I get ValueError for 'minor_axis_length'
 93 |         # just like https://github.com/scikit-image/scikit-image/issues/2625
 94 |         # 'orientation', # should be ~random for non-optical imaging, so I'm not including it
 95 |         "eccentricity",
 96 |         "solidity",
 97 |         "centroid",
 98 |     ],
 99 |     border_objs: bool = False,
100 | ) -> DataFrame:
101 |     if isinstance(mask, Path):
102 |         mask = read_image_from_file(mask)
103 |     if not border_objs:
104 |         mask = clear_border(mask)
105 | 
106 |     morph = (
107 |         pd.DataFrame(
108 |             skimage.measure.regionprops_table(mask, properties=attributes),
109 |             index=[c for c in np.unique(mask) if c != 0],
110 |         )
111 |         .rename_axis(index="obj_id")
112 |         .rename(columns={"centroid-0": "X_centroid", "centroid-1": "Y_centroid"})
113 |     )
114 |     if ("minor_axis_length" in attributes) and ("major_axis_length" in attributes):
115 |         morph["ratio_axis_length"] = (
116 |             morph["major_axis_length"] / morph["minor_axis_length"]
117 |         )
118 |     return morph
119 | 
120 | 
121 | def _quantify_cell_intensity__roi(roi: _roi.ROI, **kwargs) -> DataFrame:
122 |     assignment = dict(roi=roi.name)
123 |     if roi.sample is not None:
124 |         assignment["sample"] = roi.sample.name
125 |     return roi.quantify_cell_intensity(**kwargs).assign(**assignment)
126 | 
127 | 
128 | def _quantify_cell_morphology__roi(roi: _roi.ROI, **kwargs) -> DataFrame:
129 |     assignment = dict(roi=roi.name)
130 |     if roi.sample is not None:
131 |         assignment["sample"] = roi.sample.name
132 |     return roi.quantify_cell_morphology(**kwargs).assign(**assignment)
133 | 
134 | 
135 | def _correlate_channels__roi(roi: _roi.ROI, labels: str = "channel_names") -> DataFrame:
136 |     xcorr = np.corrcoef(roi.stack.reshape((roi.channel_number, -1)))
137 |     np.fill_diagonal(xcorr, 0)
138 |     labs = getattr(roi, labels)
139 |     return pd.DataFrame(xcorr, index=labs, columns=labs)
140 | 
141 | 
142 | # def _get_adjacency_graph__roi(roi: _roi.ROI, **kwargs) -> DataFrame:
143 | #     output_prefix = roi.sample.root_dir / "single_cell" / roi.name
144 | #     return get_adjacency_graph(roi.stack, roi.mask, roi.clusters, output_prefix, **kwargs)
145 | 
146 | 
147 | def quantify_cell_intensity_rois(
148 |     rois: tp.Sequence[_roi.ROI],
149 |     **kwargs,
150 | ) -> DataFrame:
151 |     """
152 |     Measure the intensity of each channel in each single cell.
153 |     """
154 |     return pd.concat(
155 |         parmap.map(_quantify_cell_intensity__roi, rois, pm_pbar=True, **kwargs)
156 |     ).rename_axis(index="obj_id")
157 | 
158 | 
159 | def quantify_cell_morphology_rois(
160 |     rois: tp.Sequence[_roi.ROI],
161 |     **kwargs,
162 | ) -> DataFrame:
163 |     """
164 |     Measure the shape parameters of each single cell.
165 |     """
166 |     return pd.concat(
167 |         parmap.map(_quantify_cell_morphology__roi, rois, pm_pbar=True, **kwargs)
168 |     ).rename_axis(index="obj_id")
169 | 
170 | 
171 | def quantify_cells_rois(
172 |     rois: tp.Sequence[_roi.ROI],
173 |     layers: tp.Sequence[str],
174 |     intensity: bool = True,
175 |     intensity_kwargs: tp.Dict[str, tp.Any] = {},
176 |     morphology: bool = True,
177 |     morphology_kwargs: tp.Dict[str, tp.Any] = {},
178 | ) -> DataFrame:
179 |     """
180 |     Measure the intensity of each channel in each single cell.
181 |     """
182 |     quants = list()
183 |     if intensity:
184 |         quants.append(
185 |             quantify_cell_intensity_rois(rois=rois, layers=layers, **intensity_kwargs)
186 |         )
187 |     if morphology:
188 |         quants.append(
189 |             quantify_cell_morphology_rois(rois=rois, layers=layers, **morphology_kwargs)
190 |         )
191 | 
192 |     return (
193 |         # todo: this will fail if there's different layers in intensity and morphology
194 |         pd.concat(
195 |             # ignore because a ROI is not obliged to have a Sample
196 |             [quants[0].drop(["sample", "roi"], axis=1, errors="ignore"), quants[1]],
197 |             axis=1,
198 |         )
199 |         if len(quants) > 1
200 |         else quants[0]
201 |     ).rename_axis(index="obj_id")
202 | 


--------------------------------------------------------------------------------
/imc/ops/signal.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions for handling signal intensity in images.
  3 | """
  4 | 
  5 | import typing as tp
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | import matplotlib.patches as mpatches
 10 | import matplotlib.pyplot as plt
 11 | import seaborn as sns
 12 | import parmap
 13 | from skimage import exposure
 14 | 
 15 | import imc.data_models.roi as _roi
 16 | from imc.exceptions import cast
 17 | from imc.types import DataFrame, Series, Array, Path
 18 | 
 19 | FIG_KWS = dict(bbox_inches="tight", dpi=300)
 20 | 
 21 | 
 22 | # def check_channel_axis_correlation(
 23 | #     arr: Array, channel_labels: tp.Sequence[str], output_prefix: Path
 24 | # ) -> DataFrame:
 25 | #     # # Plot and regress
 26 | #     n, m = get_grid_dims(arr.shape[0])
 27 | #     fig, axis = plt.subplots(
 28 | #         m, n, figsize=(n * 4, m * 4), squeeze=False, sharex=True, sharey=True
 29 | #     )
 30 | 
 31 | #     res = list()
 32 | #     for channel in range(arr.shape[0]):
 33 | #         for axs in [0, 1]:
 34 | #             s = arr[channel].mean(axis=axs)
 35 | #             order = np.arange(s.shape[0])
 36 | #             model = LinearRegression()
 37 | #             model.fit(order[:, np.newaxis] / max(order), s)
 38 | #             res.append(
 39 | #                 [
 40 | #                     channel,
 41 | #                     axs,
 42 | #                     model.coef_[0],
 43 | #                     model.intercept_,
 44 | #                     pearsonr(order, s)[0],
 45 | #                 ]
 46 | #             )
 47 | 
 48 | #             axis.flatten()[channel].plot(order, s)
 49 | #         axis.flatten()[channel].set_title(
 50 | #             f"{channel_labels[channel]}\nr[X] = {res[-2][-1]:.2f}; r[Y] = {res[-1][-1]:.2f}"
 51 | #         )
 52 | 
 53 | #     axis[int(m / 2), 0].set_ylabel("Mean signal along axis")
 54 | #     axis[-1, int(n / 2)].set_xlabel("Order along axis")
 55 | #     c = sns.color_palette("colorblind")
 56 | #     patches = [
 57 | #         mpatches.Patch(color=c[0], label="X"),
 58 | #         mpatches.Patch(color=c[1], label="Y"),
 59 | #     ]
 60 | #     axis[int(m / 2), -1].legend(
 61 | #         handles=patches,
 62 | #         bbox_to_anchor=(1.05, 1),
 63 | #         loc=2,
 64 | #         borderaxespad=0.0,
 65 | #         title="Axis",
 66 | #     )
 67 | #     fig.savefig(output_prefix + "channel-axis_correlation.svg", **FIG_KWS)
 68 | 
 69 | #     df = pd.DataFrame(res, columns=["channel", "axis", "coef", "intercept", "r"])
 70 | #     df["axis_label"] = df["axis"].replace(0, "X_centroid").replace(1, "Y_centroid")
 71 | #     df["channel_label"] = [x for x in channel_labels for _ in range(2)]
 72 | #     df["abs_r"] = df["r"].abs()
 73 | #     df.to_csv(output_prefix + "channel-axis_correlation.csv", index=False)
 74 | #     return df
 75 | 
 76 | 
 77 | def fix_signal_axis_dependency(
 78 |     arr: Array, channel_labels: tp.Sequence[str], res: DataFrame, output_prefix: Path
 79 | ) -> Array:
 80 |     # res = pd.read_csv(pjoin("processed", "case_b", "plots", "qc", roi + "_channel-axis_correlation.csv"))
 81 |     corr_d = np.empty_like(arr)
 82 |     for channel in range(arr.shape[0]):
 83 |         r = res.query(f"channel == {channel}")
 84 |         x = r.query("axis_label == 'X'")["coef"].squeeze()
 85 |         xinter = r.query("axis_label == 'X'")["intercept"].squeeze()
 86 |         y = r.query("axis_label == 'Y'")["coef"].squeeze()
 87 |         yinter = r.query("axis_label == 'Y'")["intercept"].squeeze()
 88 |         # to_reg = pd.DataFrame(arr[channel]).reset_index().melt(id_vars='index').rename(columns=dict(index="X", variable="Y"))
 89 | 
 90 |         order = np.arange(arr[channel].shape[0])
 91 |         dd = arr[channel]
 92 |         m = np.ones_like(dd)
 93 |         m = m * (order / max(order) * x) + (xinter)
 94 |         m = (m.T * (order / max(order) * y)).T + (yinter)
 95 |         ddfix = (dd - m) + dd.mean()
 96 |         corr_d[channel] = ddfix
 97 | 
 98 |         fig, axis = plt.subplots(1, 7, sharex=True, sharey=False, figsize=(7 * 3, 3 * 1))
 99 |         fig.suptitle(channel_labels[channel])
100 |         axis[0].set_title("Original")
101 |         axis[0].imshow(dd)
102 |         axis[1].set_title("Original, equalized")
103 |         axis[1].imshow(exposure.equalize_hist(dd))
104 |         axis[2].set_title("Bias mask")
105 |         axis[2].imshow(m)
106 |         axis[3].set_title("Bias removed")
107 |         axis[3].imshow(ddfix)
108 |         axis[4].set_title("Bias removed, equalized")
109 |         axis[4].imshow(exposure.equalize_hist(ddfix))
110 |         axis[5].set_title("Channel bias")
111 |         axis[5].plot(order, dd.mean(axis=0), label="Original", alpha=0.5)
112 |         axis[5].plot(order, ddfix.mean(axis=0), label="Bias removed", alpha=0.5)
113 |         axis[5].set_xlabel("Position along X axis")
114 |         axis[5].set_ylabel("Signal along X axis")
115 |         axis[5].legend()
116 |         axis[6].set_title("Channel bias")
117 |         axis[6].plot(order, dd.mean(axis=1), label="Original", alpha=0.5)
118 |         axis[6].plot(order, ddfix.mean(axis=1), label="Bias removed", alpha=0.5)
119 |         axis[6].set_xlabel("Position along Y axis")
120 |         axis[6].set_ylabel("Signal along Y axis")
121 |         axis[6].legend()
122 |         for ax in axis[:-2]:
123 |             ax.axis("off")
124 |         fig.savefig(
125 |             output_prefix
126 |             + f"channel-axis_correlation_removal.{channel_labels[channel]}.demonstration.svg",
127 |             **FIG_KWS,
128 |         )
129 |         plt.close("all")
130 |     return corr_d
131 | 
132 | 
133 | def channel_stats(roi: _roi.ROI, channels: tp.Sequence[str] = None):
134 |     from skimage.restoration import estimate_sigma
135 |     from imc.utils import estimate_sigma
136 | 
137 |     if channels is None:
138 |         channels = roi.channel_labels.tolist()
139 |     stack = roi._get_channels(channels)[1]
140 |     mask = roi.cell_mask == 0
141 |     res = dict()
142 |     res["wmeans"] = pd.Series(stack.mean(axis=(1, 2)), index=channels)
143 |     res["wstds"] = pd.Series(stack.std(axis=(1, 2)), index=channels)
144 |     res["cmeans"] = pd.Series(
145 |         [np.ma.masked_array(stack[i], mask=mask).mean() for i in range(len(channels))],
146 |         index=channels,
147 |     )
148 |     res["cstds"] = pd.Series(
149 |         [np.ma.masked_array(stack[i], mask=mask).std() for i in range(len(channels))],
150 |         index=channels,
151 |     )
152 |     res["emeans"] = pd.Series(
153 |         [np.ma.masked_array(stack[i], mask=~mask).mean() for i in range(len(channels))],
154 |         index=channels,
155 |     )
156 |     res["estds"] = pd.Series(
157 |         [np.ma.masked_array(stack[i], mask=~mask).std() for i in range(len(channels))],
158 |         index=channels,
159 |     )
160 |     # res["noises"] = pd.Series([estimate_noise(ch) for ch in stack], index=channels)
161 |     res["sigmas"] = pd.Series(
162 |         estimate_sigma(np.moveaxis(stack, 0, -1), multichannel=True), index=channels
163 |     )
164 |     return res
165 | 
166 | 
167 | def measure_channel_background(
168 |     rois: tp.Sequence[_roi.ROI], plot: bool = True, output_prefix: Path = None
169 | ) -> Series:
170 |     from imc.utils import align_channels_by_name
171 |     from mpl_toolkits.axes_grid1 import make_axes_locatable
172 | 
173 |     if plot:
174 |         assert (
175 |             output_prefix is not None
176 |         ), "If `plot` is True, `output_prefix` must be given."
177 | 
178 |     _channels = pd.DataFrame(
179 |         {r.name: r.channel_labels[~r.channel_exclude.values] for r in rois}
180 |     )
181 |     channels = align_channels_by_name(_channels).dropna().iloc[:, 0].tolist()
182 |     roi_names = [r.name for r in rois]
183 | 
184 |     res = parmap.map(channel_stats, rois, channels=channels, pm_pbar=True)
185 | 
186 |     wmeans = pd.DataFrame((x["wmeans"] for x in res), index=roi_names).T
187 |     wstds = pd.DataFrame((x["wstds"] for x in res), index=roi_names).T
188 |     wqv2s = np.sqrt(wstds / wmeans)
189 |     cmeans = pd.DataFrame((x["cmeans"] for x in res), index=roi_names).T
190 |     cstds = pd.DataFrame((x["cstds"] for x in res), index=roi_names).T
191 |     cqv2s = np.sqrt(cstds / cmeans)
192 |     emeans = pd.DataFrame((x["emeans"] for x in res), index=roi_names).T
193 |     estds = pd.DataFrame((x["estds"] for x in res), index=roi_names).T
194 |     eqv2s = np.sqrt(estds / emeans)
195 |     fore_backg: DataFrame = np.log(cmeans / emeans)
196 |     # fore_backg_disp = np.log1p(((cmeans / emeans) / (cmeans + emeans))).mean(1)
197 |     noises = pd.DataFrame((x["noises"] for x in res), index=roi_names).T
198 |     sigmas = pd.DataFrame((x["sigmas"] for x in res), index=roi_names).T
199 | 
200 |     # Join all metrics
201 |     metrics = (
202 |         wmeans.mean(1)
203 |         .to_frame(name="image_mean")
204 |         .join(wstds.mean(1).rename("image_std"))
205 |         .join(wqv2s.mean(1).rename("image_qv2"))
206 |         .join(cmeans.mean(1).rename("cell_mean"))
207 |         .join(cstds.mean(1).rename("cell_std"))
208 |         .join(cqv2s.mean(1).rename("cell_qv2"))
209 |         .join(emeans.mean(1).rename("extra_mean"))
210 |         .join(estds.mean(1).rename("extra_std"))
211 |         .join(eqv2s.mean(1).rename("extra_qv2"))
212 |         .join(fore_backg.mean(1).rename("fore_backg"))
213 |         .join(noises.mean(1).rename("noise"))
214 |         .join(sigmas.mean(1).rename("sigma"))
215 |     ).rename_axis(index="channel")
216 |     metrics_std = (metrics - metrics.min()) / (metrics.max() - metrics.min())
217 | 
218 |     if not plot:
219 |         # Invert QV2
220 |         sel = metrics_std.columns.str.contains("_qv2")
221 |         metrics_std.loc[:, sel] = 1 - metrics_std.loc[:, sel]
222 |         # TODO: better decision on which metrics matter
223 |         return metrics_std.mean(1)
224 | 
225 |     output_prefix = cast(output_prefix)
226 |     if not output_prefix.endswith("."):
227 |         output_prefix += "."
228 | 
229 |     metrics.to_csv(output_prefix + "channel_background_noise_measurements.csv")
230 |     metrics = pd.read_csv(
231 |         output_prefix + "channel_background_noise_measurements.csv", index_col=0
232 |     )
233 | 
234 |     # Plot
235 |     fig, axes = plt.subplots(2, 3, figsize=(3 * 4.1, 2 * 4), sharex="col")
236 |     axes[0, 0].set_title("Whole image")
237 |     axes[0, 1].set_title("Cells")
238 |     axes[0, 2].set_title("Extracellular")
239 |     for i, (means, stds, qv2s) in enumerate(
240 |         [(wmeans, wstds, wqv2s), (cmeans, cstds, cqv2s), (emeans, estds, eqv2s)]
241 |     ):
242 |         # plot mean vs variance
243 |         mean = means.mean(1)
244 |         std = stds.mean(1) ** 2
245 |         qv2 = qv2s.mean(1)
246 |         fb = fore_backg.mean(1)
247 | 
248 |         axes[0, i].set_xlabel("Mean")
249 |         axes[0, i].set_ylabel("Variance")
250 |         pts = axes[0, i].scatter(mean, std, c=fb)
251 |         if i == 2:
252 |             div = make_axes_locatable(axes[0, i])
253 |             cax = div.append_axes("right", size="5%", pad=0.05)
254 |             fig.colorbar(pts, cax=cax)
255 | 
256 |         for channel in means.index:
257 |             lab = "left" if np.random.rand() > 0.5 else "right"
258 |             axes[0, i].text(
259 |                 mean.loc[channel], std.loc[channel], channel, ha=lab, fontsize=4
260 |             )
261 |         v = max(mean.max().max(), std.max().max())
262 |         axes[0, i].plot((0, v), (0, v), linestyle="--", color="grey")
263 |         axes[0, i].loglog()
264 | 
265 |         # plot mean vs qv2
266 |         axes[1, i].set_xlabel("Mean")
267 |         axes[1, i].set_ylabel("Squared coefficient of variation")
268 |         axes[1, i].scatter(mean, qv2, c=fb)
269 |         for channel in means.index:
270 |             lab = "left" if np.random.rand() > 0.5 else "right"
271 |             axes[1, i].text(
272 |                 mean.loc[channel], qv2.loc[channel], channel, ha=lab, fontsize=4
273 |             )
274 |         axes[1, i].axhline(1, linestyle="--", color="grey")
275 |         axes[1, i].set_xscale("log")
276 |         # if qv2.min() > 0.01:
277 |         #     axes[1, i].set_yscale("log")
278 |     fig.savefig(output_prefix + "channel_mean_variation_noise.svg", **FIG_KWS)
279 | 
280 |     fig, axes = plt.subplots(1, 2, figsize=(2 * 6.2, 4))
281 |     p = fore_backg.mean(1).sort_values()
282 |     r1 = p.rank()
283 |     r2 = p.abs().rank()
284 |     axes[0].scatter(r1, p)
285 |     axes[1].scatter(r2, p.abs())
286 |     for i in p.index:
287 |         axes[0].text(r1.loc[i], p.loc[i], s=i, rotation=90, ha="center", va="bottom")
288 |         axes[1].text(
289 |             r2.loc[i], p.abs().loc[i], s=i, rotation=90, ha="center", va="bottom"
290 |         )
291 |     axes[1].set_yscale("log")
292 |     axes[0].set_xlabel("Channel rank")
293 |     axes[1].set_xlabel("Channel rank")
294 |     axes[0].set_ylabel("Cellular/extracellular difference")
295 |     axes[1].set_ylabel("Cellular/extracellular difference (abs)")
296 |     axes[0].axhline(0, linestyle="--", color="grey")
297 |     axes[1].axhline(0, linestyle="--", color="grey")
298 |     fig.savefig(
299 |         output_prefix + "channel_foreground_background_diff.rankplot.svg",
300 |         **FIG_KWS,
301 |     )
302 | 
303 |     grid = sns.clustermap(
304 |         metrics_std,
305 |         xticklabels=True,
306 |         yticklabels=True,
307 |         metric="correlation",
308 |         cbar_kws=dict(label="Variable (min-max)"),
309 |     )
310 |     grid.fig.savefig(
311 |         output_prefix + "channel_mean_variation_noise.clustermap.svg", **FIG_KWS
312 |     )
313 | 
314 |     # Invert QV2
315 |     sel = metrics_std.columns.str.contains("_qv2")
316 |     metrics_std.loc[:, sel] = 1 - metrics_std.loc[:, sel]
317 |     # TODO: better decision on which metrics matter
318 |     return metrics_std.mean(1)
319 | 


--------------------------------------------------------------------------------
/imc/py.typed:
--------------------------------------------------------------------------------
1 | # Marker file for PEP 561.  This package uses inline types.
2 | 


--------------------------------------------------------------------------------
/imc/scripts/illustrate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | Illustrate IMC data.
  5 | """
  6 | 
  7 | import sys
  8 | import typing as tp
  9 | 
 10 | from tqdm import tqdm
 11 | import matplotlib.pyplot as plt
 12 | import scanpy as sc
 13 | 
 14 | from imc import Project
 15 | from imc.scripts import build_cli, find_tiffs, find_h5ad
 16 | 
 17 | figkws = dict(dpi=300, bbox_inches="tight")
 18 | 
 19 | 
 20 | def main(cli: tp.Sequence[str] = None) -> int:
 21 |     parser = build_cli("illustrate")
 22 |     args = parser.parse_args(cli)
 23 | 
 24 |     if args.tiffs is None:
 25 |         args.tiffs = find_tiffs()
 26 |     if len(args.tiffs) == 0:
 27 |         raise ValueError("Input files were not provided and could not be found!")
 28 | 
 29 |     if args.h5ad is None:
 30 |         args.h5ad = find_h5ad()
 31 |     if args.h5ad is None:
 32 |         if args.clusters:
 33 |             print(
 34 |                 "No h5ad file was provided and it could not be found. "
 35 |                 "Not illustrating clusters."
 36 |             )
 37 |             args.clusters = False
 38 |         if args.cell_types:
 39 |             print(
 40 |                 "No h5ad file was provided and it could not be found. "
 41 |                 "Not illustrating cell types."
 42 |             )
 43 |             args.cell_types = False
 44 | 
 45 |     print("Starting illustration step!")
 46 | 
 47 |     args.channels_include = (
 48 |         args.channels_include.split(",") if args.channels_include is not None else None
 49 |     )
 50 |     args.channels_exclude = args.channels_exclude.split(",")
 51 |     args.output_dir.mkdir()
 52 | 
 53 |     prj = Project.from_stacks(args.tiffs)
 54 |     if args.stacks:
 55 |         dir_ = (args.output_dir / "stacks").mkdir()
 56 |         print(f"Plotting full image stacks in directory '{dir_}'.")
 57 |         for roi in tqdm(prj.rois):
 58 |             f = dir_ / roi.name + ".full_stack.pdf"
 59 |             if f.exists() and not args.overwrite:
 60 |                 continue
 61 |             fig = roi.plot_channels()
 62 |             fig.savefig(f, **figkws)
 63 |             plt.close(fig)
 64 | 
 65 |     if args.channels:
 66 |         dir_ = (args.output_dir / "channels").mkdir()
 67 |         print(f"Plotting channels for all images jointly in directory '{dir_}'.")
 68 |         for ch in tqdm(prj.rois[0].channel_labels):
 69 |             f = dir_ / ch + ".rois.pdf"
 70 |             if f.exists() and not args.overwrite:
 71 |                 continue
 72 |             fig = prj.plot_channels([ch])
 73 |             fig.savefig(f, **figkws)
 74 |             plt.close(fig)
 75 | 
 76 |     id_cols = ["sample", "roi", "obj_id"]
 77 |     if args.clusters:
 78 |         dir_ = (args.output_dir / "clusters").mkdir()
 79 |         print(f"Plotting cluster illustrations in directory '{dir_}'.")
 80 | 
 81 |         a = sc.read(args.h5ad)
 82 |         clusters = a.obs.columns[a.obs.columns.str.contains("cluster_")]
 83 |         for cluster in tqdm(clusters):
 84 |             f = dir_ / f"clustering_illustrations.{cluster}.pdf"
 85 |             if f.exists() and not args.overwrite:
 86 |                 continue
 87 |             # TODO: plot markers next to clusters, or overlay
 88 |             prj.set_clusters(a.obs.set_index(id_cols)[cluster].rename("cluster"))
 89 |             fig = prj.plot_cell_types()
 90 |             for ax in fig.axes[1:]:
 91 |                 ax.legend_.set_visible(False)
 92 |             fig.savefig(f, **figkws)
 93 |             plt.close(fig)
 94 | 
 95 |     if args.cell_types:
 96 |         dir_ = (args.output_dir / "cell_type").mkdir()
 97 |         print(f"Plotting cell_type illustrations in directory '{dir_}'.")
 98 | 
 99 |         a = sc.read(args.h5ad)
100 |         cts = a.obs.columns[a.obs.columns.str.contains("cluster_")].intersection(
101 |             a.obs.columns[a.obs.columns.str.contains("_label")]
102 |         )
103 |         for ct in tqdm(cts):
104 |             f = dir_ / f"cell_type_illustrations.{ct}.pdf"
105 |             if f.exists() and not args.overwrite:
106 |                 continue
107 |             # TODO: plot markers next to cell types, or overlay
108 |             prj.set_clusters(a.obs.set_index(id_cols)[ct].rename("cluster"))
109 |             fig = prj.plot_cell_types()
110 |             for ax in fig.axes[1:]:
111 |                 ax.legend_.set_visible(False)
112 |             fig.savefig(f, **figkws)
113 |             plt.close(fig)
114 | 
115 |     print("Finished illustration step.")
116 |     return 0
117 | 
118 | 
119 | if __name__ == "__main__":
120 |     try:
121 |         sys.exit(main())
122 |     except KeyboardInterrupt:
123 |         sys.exit(1)
124 | 


--------------------------------------------------------------------------------
/imc/scripts/inspect_ilastik_model.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | import argparse
  4 | import sys
  5 | import typing as tp
  6 | 
  7 | import h5py
  8 | import numpy as np
  9 | import pandas as pd
 10 | import matplotlib
 11 | import matplotlib.pyplot as plt
 12 | import seaborn as sns
 13 | 
 14 | from imc.types import Path, Array
 15 | from imc.graphics import get_grid_dims
 16 | 
 17 | 
 18 | matplotlib.rcParams["svg.fonttype"] = "none"
 19 | FIG_KWS = dict(dpi=300, bbox_inches="tight")
 20 | 
 21 | 
 22 | cli = ["_models/utuc-imc/utuc-imc.ilp"]
 23 | 
 24 | 
 25 | def main(cli: tp.List[str] = None) -> int:
 26 |     args = parse_arguments().parse_args(cli)
 27 | 
 28 |     inspect_ilastik_model(args.model_path)
 29 | 
 30 |     if args.plot:
 31 |         plot_training_data(args.model_path, args.channels_to_plot)
 32 | 
 33 |     if args.extract:
 34 |         extract_training_data(args.model_path, args.labels_output_file)
 35 | 
 36 |     if args.convert:
 37 |         convert_model_data(
 38 |             args.model_path,
 39 |             args.converted_model_output,
 40 |             args.channels_to_retain,
 41 |         )
 42 | 
 43 |     return 0
 44 | 
 45 | 
 46 | def parse_arguments() -> argparse.ArgumentParser:
 47 |     parser = argparse.ArgumentParser()
 48 | 
 49 |     # Extract
 50 |     parser.add_argument(
 51 |         "-e",
 52 |         "--extract",
 53 |         dest="extract",
 54 |         action="store_true",
 55 |         help="Whether to extract training labels from ilastik file into numpy array.",
 56 |     )
 57 |     parser.add_argument(
 58 |         "--labels-output",
 59 |         dest="labels_output_file",
 60 |         default=None,
 61 |         type=Path,
 62 |         help="Path to file storing numpy array with training labels."
 63 |         " If not given will be same as model with different suffix.",
 64 |     )
 65 | 
 66 |     # Plot
 67 |     parser.add_argument(
 68 |         "-p",
 69 |         "--plot",
 70 |         dest="plot",
 71 |         action="store_true",
 72 |         help="Whether training set examples should be plotted.",
 73 |     )
 74 |     parser.add_argument(
 75 |         "--channels-to-plot",
 76 |         dest="channels_to_plot",
 77 |         choices=["mean", "last"],
 78 |         default="mean",
 79 |         help="Which channels to plot. One of 'mean' or 'last'.",
 80 |     )
 81 | 
 82 |     # Convert
 83 |     parser.add_argument(
 84 |         "-c",
 85 |         "--convert",
 86 |         dest="convert",
 87 |         action="store_true",
 88 |         help="Whether to convert ilastik model to new file by changing the input channels.",
 89 |     )
 90 |     parser.add_argument(
 91 |         "--keep-channels",
 92 |         dest="channels_to_retain",
 93 |         nargs="+",
 94 |         type=int,
 95 |         help="Channel numbers to retain in new model.",
 96 |     )
 97 |     parser.add_argument(
 98 |         "--converted-model-output",
 99 |         dest="converted_model_output",
100 |         type=Path,
101 |         help="Path to new model output file.",
102 |     )
103 |     parser.add_argument(dest="model_path", type=Path)
104 | 
105 |     return parser
106 | 
107 | 
108 | def inspect_ilastik_model(model_path: Path) -> None:
109 |     print(f"Ilastik model '{model_path}'.")
110 | 
111 |     f = h5py.File(model_path.as_posix(), mode="r")
112 | 
113 |     # Input files
114 |     # f['Input Data']['infos']['lane0000']['Raw Data']['filePath'][()].decode()
115 |     n_input = len(f["Input Data"]["infos"])
116 |     training_files = [
117 |         f["Input Data"]["infos"]["lane" + str(x).zfill(4)]["Raw Data"]["filePath"][
118 |             ()
119 |         ].decode()
120 |         for x in range(n_input)
121 |     ]
122 | 
123 |     print(f"Model was trained with {n_input} files.")
124 | 
125 |     # Feature matrix
126 |     fv = f["FeatureSelections"]["SelectionMatrix"][()]  # values
127 |     fx = f["FeatureSelections"]["FeatureIds"][()]  # x = derivative
128 |     fy = f["FeatureSelections"]["Scales"][()]  # y = sigma
129 |     feature_matrix = pd.DataFrame(
130 |         fv,
131 |         index=pd.Series(fx, name="Feature").str.decode("utf8"),
132 |         columns=pd.Series(fy, name="Sigma"),
133 |     )
134 |     used = feature_matrix.values.sum()
135 |     total = np.multiply(*feature_matrix.shape)
136 |     print(f"{used}/{total} of the possible feature combinations used.")
137 |     print("Here is the feature matrix:")
138 |     print(feature_matrix, "\n")
139 | 
140 |     # Pixel classification
141 |     # labels = [x.decode() for x in f['PixelClassification']['LabelNames'][()]]
142 |     # 3 labels (3 classes?)
143 |     # 35 blocks (35 inputs)
144 |     # values, shape=(x, y, 1)
145 |     annots = [len(x) for x in f["PixelClassification"]["LabelSets"].values()]
146 |     filled_annots = [x for x in annots if x != 0]
147 |     print(f"{len(filled_annots)}/{n_input} of the input files were labeled.")
148 | 
149 |     f.close()
150 | 
151 | 
152 | def plot_training_data(
153 |     model_path: Path,
154 |     channels_to_plot: tp.Union[tp.Literal["mean"], tp.Literal["last"]] = "mean",
155 | ) -> None:
156 |     from imc.segmentation import normalize
157 | 
158 |     f = h5py.File(model_path.as_posix(), mode="r")
159 |     n_input = len(f["Input Data"]["infos"])
160 |     annots = [len(x) for x in f["PixelClassification"]["LabelSets"].values()]
161 |     training_files = [
162 |         f["Input Data"]["infos"]["lane" + str(x).zfill(4)]["Raw Data"]["filePath"][
163 |             ()
164 |         ].decode()
165 |         for x in range(n_input)
166 |     ]
167 | 
168 |     # Plot labels on top of sum of channels
169 |     n, m = get_grid_dims(len(annots))
170 |     fig, axes = plt.subplots(
171 |         m, n, figsize=(n * 3, m * 3), gridspec_kw=dict(wspace=0, hspace=0.05)
172 |     )
173 |     axes = axes.ravel()
174 | 
175 |     # get colormap depending on what channels are being plotted
176 |     if channels_to_plot == "mean":
177 |         cmap = matplotlib.colors.ListedColormap(
178 |             np.asarray(sns.color_palette("tab10"))[np.asarray([-1, 1, 3])]
179 |         )
180 |     else:
181 |         cmap = matplotlib.colors.ListedColormap(
182 |             np.asarray(sns.color_palette("tab10"))[np.asarray([-4, -6, 3])]
183 |         )
184 | 
185 |     # plot
186 |     for i in range(n_input):
187 |         if training_files[i].startswith("Input Data"):
188 |             train_arr = f[training_files[i]]
189 |         else:
190 |             train_file = model_path.parent / training_files[i].replace(
191 |                 "/stacked_channels", ""
192 |             )
193 |             train_arr = h5py.File(train_file, mode="r")["stacked_channels"]
194 | 
195 |         train_arr = train_arr[()]
196 |         train_arr[pd.isnull(train_arr)] = 0
197 | 
198 |         if channels_to_plot == "mean":
199 |             train_arr = normalize(train_arr).mean(-1)
200 |         else:
201 |             train_arr = normalize(train_arr[..., -1])
202 |         training_file_shape = train_arr.shape
203 | 
204 |         axes[i].imshow(train_arr, rasterized=True)  # , cmap='inferno')
205 |         # axes[i].set_title(image)
206 |         axes[i].axis("off")
207 | 
208 |         # Now for each block, get coordinates and plot
209 |         label_arr = np.zeros(training_file_shape, dtype=float)
210 |         # label_arr = scipy.sparse.lil_matrix(training_file_shape)
211 |         b = f["PixelClassification"]["LabelSets"]["labels" + str(i).zfill((3))]
212 |         for j, label in enumerate(b):
213 |             # get start-end coordinates within training image
214 |             d = b["block" + str(j).zfill(4)]
215 |             pos = dict(d.attrs)["blockSlice"].replace("[", "").replace("]", "").split(",")
216 |             xs, ys, zs = [(int(x.split(":")[0]), int(x.split(":")[1])) for x in pos]
217 |             arr = d[()].squeeze()
218 |             # now fill the image with the labeled pixels
219 |             label_arr[slice(*xs), slice(*ys)] = arr
220 |         label_arr = np.ma.masked_array(label_arr, label_arr == 0)
221 |         axes[i].imshow(label_arr, cmap=cmap, vmin=1, vmax=3, rasterized=True)
222 |     fig.savefig(
223 |         model_path.replace_(".ilp", f".training_data.{channels_to_plot}.pdf"),
224 |         bbox_inches="tight",
225 |         dpi=300,
226 |     )
227 | 
228 |     f.close()
229 | 
230 | 
231 | def extract_training_data(
232 |     model_path: Path, output_path: Path = None
233 | ) -> tp.Tuple[Array, Array]:
234 |     # Extract training labels for preservation independent of model
235 | 
236 |     if output_path is None:
237 |         output_path = model_path.replace_(".ilp", ".training_data.npz")
238 | 
239 |     fi = h5py.File(model_path.as_posix(), mode="r")
240 | 
241 |     n_input = len(fi["Input Data"]["infos"])
242 |     training_files = [
243 |         fi["Input Data"]["infos"]["lane" + str(x).zfill(4)]["Raw Data"]["filePath"][
244 |             ()
245 |         ].decode()
246 |         for x in range(n_input)
247 |     ]
248 | 
249 |     # Extract arrays
250 |     _signals = list()
251 |     _labels = list()
252 |     for i, file in enumerate(training_files):
253 |         if file.startswith("Input Data"):
254 |             train_arr = fi[file]
255 |         else:
256 |             train_file = model_path.parent / file.replace("/stacked_channels", "")
257 |             train_arr = h5py.File(train_file, mode="r")["stacked_channels"]
258 |         shape = train_arr.shape[:-1]
259 | 
260 |         # Now for each block, get coordinates and assemble
261 |         label_arr = np.zeros(shape, dtype=float)
262 |         b = fi["PixelClassification"]["LabelSets"]["labels" + str(i).zfill((3))]
263 |         for j, _ in enumerate(b):
264 |             # get start-end coordinates within training image
265 |             d = b["block" + str(j).zfill(4)]
266 |             pos = dict(d.attrs)["blockSlice"].replace("[", "").replace("]", "").split(",")
267 |             xs, ys, _ = [(int(x.split(":")[0]), int(x.split(":")[1])) for x in pos]
268 |             arr = d[()].squeeze()
269 |             # now fill the image with the labeled pixels
270 |             label_arr[slice(*xs), slice(*ys)] = arr
271 | 
272 |         _signals.append(train_arr[()])
273 |         _labels.append(label_arr)
274 |     fi.close()
275 | 
276 |     # Save as numpy array
277 |     signals = np.asarray(_signals)
278 |     labels = np.asarray(_labels)
279 |     np.savez_compressed(output_path, x=signals, y=labels)
280 |     return (signals, labels)
281 | 
282 | 
283 | def convert_model_data(
284 |     input_model_path: Path,
285 |     output_model_path: Path,
286 |     channels_to_retain: tp.List[int] = [-1],
287 | ) -> None:
288 |     # For now this will assume all files were copied into H5 model
289 |     # TODO: implement copying of h5 files with suffix if referenced to disk paths
290 | 
291 |     # After this, model should be reloaded in ilastik,
292 |     # change one pixel in the training data and re-train
293 | 
294 |     if output_model_path is None:
295 |         output_model_path = input_model_path.replace_(".ilp", ".converted.ilp")
296 | 
297 |     with open(output_model_path, "wb") as handle:
298 |         handle.write(open(input_model_path, "rb").read())
299 | 
300 |     f = h5py.File(output_model_path.as_posix(), mode="r+")
301 | 
302 |     shape = [v.shape for k, v in f["Input Data"]["local_data"].items()][0]
303 |     print(f"Current shape of input data: {shape}")
304 | 
305 |     # Change shape of input data
306 |     for k, v in f["Input Data"]["local_data"].items():
307 |         del f["Input Data"]["local_data"][k]
308 |         from imc.segmentation import normalize
309 | 
310 |         f["Input Data"]["local_data"][k] = normalize(v[()][..., channels_to_retain])
311 | 
312 |     shape = [v.shape for k, v in f["Input Data"]["local_data"].items()][0]
313 |     print(f"Current shape of input data: {shape}")
314 | 
315 |     f.close()
316 | 
317 | 
318 | if __name__ == "__main__":
319 |     try:
320 |         sys.exit(main())
321 |     except KeyboardInterrupt:
322 |         sys.exit(1)
323 | 


--------------------------------------------------------------------------------
/imc/scripts/inspect_mcds.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | Inspect MCD files, reporting on their basic statistics, saving
  5 | metadata as YAML files, and panel information as CSV files.
  6 | """
  7 | 
  8 | import sys
  9 | import yaml
 10 | import argparse
 11 | from collections import OrderedDict
 12 | import typing as tp
 13 | 
 14 | import pandas as pd
 15 | 
 16 | from imctools.io.mcd.mcdparser import McdParser
 17 | 
 18 | from imc.types import Path, DataFrame, Args
 19 | from imc.utils import cleanup_channel_names, build_channel_name
 20 | from imc.scripts import build_cli, find_mcds
 21 | 
 22 | 
 23 | def main(cli: tp.Sequence[str] = None) -> int:
 24 |     parser = build_cli("inspect")
 25 |     args = parser.parse_args(cli)
 26 |     if len(args.mcd_files) == 0:
 27 |         args.mcd_files = find_mcds()
 28 |         if len(args.mcd_files) == 0:
 29 |             print("MCD files were not provided and could not be found!")
 30 |             return 1
 31 | 
 32 |     fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.mcd_files])
 33 |     print(f"Starting inspection step for {len(args.mcd_files)} MCD files:{fs}!")
 34 | 
 35 |     # Inspect each MCD
 36 |     metas = dict()
 37 |     _chs = list()
 38 |     for mcd_file in args.mcd_files:
 39 |         print(f"\tAnalyzing '{mcd_file}':")
 40 |         meta, ch = inspect_mcd(mcd_file, args)
 41 |         metas[mcd_file.as_posix()] = meta
 42 |         _chs.append(ch.assign(mcd_file=mcd_file))
 43 |         print(f"\tFinished with '{mcd_file}'!")
 44 | 
 45 |     # Dump joint metadata
 46 |     if not args.no_write:
 47 |         yaml.dump(
 48 |             encode(metas),
 49 |             open(args.output_prefix + ".all_mcds.yaml", "w"),
 50 |             indent=4,
 51 |             default_flow_style=False,
 52 |             sort_keys=False,
 53 |         )
 54 | 
 55 |     # Save joint panel info
 56 |     # join panels and reorder columns
 57 |     channels = pd.concat(_chs)
 58 |     channels = channels.reset_index().reindex(
 59 |         ["mcd_file", "channel"] + ch.columns.tolist(), axis=1
 60 |     )
 61 |     # check if more than one panel present
 62 |     n_panels = channels.groupby("mcd_file")["channel"].sum().nunique()
 63 |     if n_panels == 1:
 64 |         print("All MCD files use same panel.")
 65 |     else:
 66 |         print(f"MCD files use different panels, {n_panels} in total.")
 67 | 
 68 |     if not args.no_write:
 69 |         channels.to_csv(args.output_prefix + ".all_mcds.channel_labels.csv", index=False)
 70 | 
 71 |     print("Finished inspect step!")
 72 |     return 0
 73 | 
 74 | 
 75 | def inspect_mcd(mcd_file: Path, args: Args) -> tp.Tuple[DataFrame, DataFrame]:
 76 |     cols = [
 77 |         "Target",
 78 |         "Metal_Tag",
 79 |         "Atom",
 80 |         "full",
 81 |         "ilastik",
 82 |     ]
 83 |     exclude_channels = ["EMPTY", "190BCKG", "80Ar", "89Y", "127I", "124Xe"]
 84 | 
 85 |     mcd = McdParser(mcd_file)
 86 |     session = mcd.session
 87 | 
 88 |     # get channel labels
 89 |     ac_ids = session.acquisition_ids
 90 |     labels = pd.DataFrame(
 91 |         {
 92 |             # ac_id: pd.Series(cleanup_channel_names(
 93 |             #     session.acquisitions[ac_id].channel_labels
 94 |             # ).values, index=session.acquisitions[ac_id].channel_masses)
 95 |             ac_id: cleanup_channel_names(session.acquisitions[ac_id].channel_labels)
 96 |             for ac_id in ac_ids
 97 |         }
 98 |     )
 99 |     # the below fails if ROIs have different lengths of metals
100 |     # metals = pd.DataFrame(
101 |     #     {ac_id: session.acquisitions[ac_id].channel_names for ac_id in ac_ids}
102 |     # )
103 |     metals = pd.DataFrame(
104 |         [
105 |             pd.Series(session.acquisitions[ac_id].channel_names, name=ac_id)
106 |             for ac_id in ac_ids
107 |         ]
108 |     ).T
109 |     if metals.isnull().any().any():
110 |         print(
111 |             "Some ROIs have less metals than the others. "
112 |             "Keeping only ROIs with most metals."
113 |         )
114 |         metals = metals.loc[:, ~metals.isnull().any()]
115 | 
116 |     labels = labels.reindex(metals.columns, axis=1)
117 | 
118 |     channel_names = labels.replace({None: "<EMPTY>"}) + "(" + metals + ")"
119 | 
120 |     same_channels = bool(
121 |         channel_names.nunique(1).replace(0, 1).all()
122 |     )  # np.bool is not serializable
123 | 
124 |     if same_channels:
125 |         print("\t * All ROIs have the same markers/metals.")
126 |         ch = channel_names.iloc[:, 0].rename("channel")
127 |         ids = ch.str.extract(r"(?P<Target>.*)\((?P<Metal_Tag>.*)\)")
128 |         ids.index = ch
129 | 
130 |         annot = pd.DataFrame(ids, columns=cols)
131 |         annot["Atom"] = annot["Metal_Tag"].str.extract(r"(\d+)")[0]
132 |         annot["full"] = (~annot.index.str.contains("|".join(exclude_channels))).astype(
133 |             int
134 |         )
135 |         annot["ilastik"] = (
136 |             annot.index.str.contains("DNA") | annot.index.str.startswith("CD")
137 |         ).astype(int)
138 |         if not args.no_write:
139 |             annot.to_csv(mcd_file.replace_(".mcd", ".channel_labels.csv"))
140 |     else:
141 |         annot = pd.DataFrame(columns=cols)
142 |         print("\t * ROIs have different markers/metals.")
143 | 
144 |     # Save some metadata
145 |     meta = session.get_csv_dict()
146 |     meta["n_slides"] = len(session.slides)
147 |     print(f"\t * Contains {meta['n_slides']} slides.")
148 |     meta["n_panoramas"] = len(session.panoramas)
149 |     print(f"\t * Contains {meta['n_panoramas']} panoramas.")
150 |     meta["n_ROIs"] = len(session.acquisition_ids)
151 |     print(f"\t * Contains {meta['n_ROIs']} ROIs.")
152 |     meta["ROI_numbers"] = session.acquisition_ids
153 |     meta["all_ROIs_same_channels"] = same_channels
154 |     meta["consensus_channels"] = (
155 |         channel_names.iloc[:, 0].to_dict() if same_channels else None
156 |     )
157 |     meta["panoramas"] = {p: v.get_csv_dict() for p, v in session.panoramas.items()}
158 |     meta["acquisitions"] = {
159 |         a: ac.get_csv_dict() for a, ac in session.acquisitions.items()
160 |     }
161 |     meta.update(session.metadata)
162 |     if not args.no_write:
163 |         yaml.dump(
164 |             encode(meta),
165 |             open(mcd_file.replace_(".mcd", ".session_metadata.yaml"), "w"),
166 |             indent=4,
167 |             default_flow_style=False,
168 |             sort_keys=False,
169 |         )
170 | 
171 |     mcd.close()
172 |     return meta, annot
173 | 
174 | 
175 | def encode(obj: tp.Any) -> tp.Any:
176 |     """
177 |     For serializing to JSON or YAML with no special Python object references.
178 | 
179 |     Not fit for roundtrip!
180 |     """
181 |     if isinstance(obj, bool):
182 |         return str(obj).lower()
183 |     if isinstance(obj, (list, tuple)):
184 |         return [encode(item) for item in obj]
185 |     if isinstance(obj, (dict, OrderedDict)):
186 |         return {encode(key): encode(value) for key, value in obj.items()}
187 |     return obj
188 | 
189 | 
190 | if __name__ == "__main__":
191 |     try:
192 |         sys.exit(main())
193 |     except KeyboardInterrupt:
194 |         sys.exit(1)
195 | 


--------------------------------------------------------------------------------
/imc/scripts/phenotype.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | Phenotype cells.
 5 | """
 6 | 
 7 | import sys
 8 | import typing as tp
 9 | 
10 | import pandas as pd
11 | 
12 | from imc.ops.clustering import (
13 |     phenotyping,
14 |     # plot_phenotyping,
15 |     predict_cell_types_from_reference,
16 | )
17 | from imc.scripts import build_cli
18 | from imc.utils import filter_kwargs_by_callable
19 | 
20 | 
21 | def main(cli: tp.Sequence[str] = None) -> int:
22 |     parser = build_cli("phenotype")
23 |     args = parser.parse_args(cli)
24 |     print("Starting phenotyping step!")
25 | 
26 |     args.channels_include = (
27 |         args.channels_include.split(",") if args.channels_include is not None else None
28 |     )
29 |     args.channels_exclude = args.channels_exclude.split(",")
30 |     args.dim_res_algos = args.dim_res_algos.split(",")
31 |     args.clustering_resolutions = list(map(float, args.clustering_resolutions.split(",")))
32 |     args.output_dir.mkdir()
33 | 
34 |     if args.compute:
35 |         print(f"Phenotyping quantified cells in '{args.a}'.")
36 |         pkwargs = filter_kwargs_by_callable(args.__dict__, phenotyping)
37 |         a = phenotyping(**pkwargs)
38 |         a.write(args.output_dir / "processed.h5ad")
39 |         # Save for project:
40 |         # prj.get_input_filename("cell_cluster_assignments")
41 | 
42 |         # Cell type identity
43 |         # TODO: connect options to CLI
44 |         print("Matching expression to reference cell types.")
45 |         df = a.raw.to_adata().to_df()[a.var.index[~a.var.index.str.contains("EMPTY")]]
46 |         df = df.loc[:, df.var() > 0]
47 |         cov = pd.get_dummies(a.obs[args.batch_variable])
48 |         preds = predict_cell_types_from_reference(df, args.output_dir, covariates=cov)
49 |         a.obs = a.obs.join(preds)
50 |         a.write(args.output_dir / "processed.h5ad")
51 | 
52 |         # grid = clustermap(a.to_df().groupby(a.obs['cell_type']).mean())
53 |         # grid = clustermap(a.obs.corr(), cmap='RdBu_r', center=0)
54 | 
55 |     # if args.plot:
56 |     #     print(f"Plotting phenotypes in directory '{args.output_dir}'.")
57 |     #     output_prefix = args.output_dir / "phenotypes."
58 |     #     if args.compute:
59 |     #         args.a = a
60 |     #     pkwargs = filter_kwargs_by_callable(args.__dict__, plot_phenotyping)
61 |     #     plot_phenotyping(output_prefix=output_prefix, **pkwargs)
62 | 
63 |     print("Finished phenotyping step.")
64 |     return 0
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     try:
69 |         sys.exit(main())
70 |     except KeyboardInterrupt:
71 |         sys.exit(1)
72 | 


--------------------------------------------------------------------------------
/imc/scripts/predict.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | Generate probablity maps for each pixel in each image.
  5 | """
  6 | 
  7 | import sys
  8 | import typing as tp
  9 | 
 10 | from imc import ROI
 11 | from imc.types import Path
 12 | from imc.scripts import build_cli, find_tiffs
 13 | from imc.utils import download_file, run_shell_command
 14 | 
 15 | 
 16 | def main(cli: tp.Sequence[str] = None) -> int:
 17 |     """Generate probability maps for each ROI using ilastik."""
 18 |     parser = build_cli("predict")
 19 |     args = parser.parse_args(cli)
 20 |     if not args.tiffs:
 21 |         args.tiffs = find_tiffs()
 22 |         if not args.tiffs:
 23 |             print("Input files were not provided and cannot be found!")
 24 |             return 1
 25 | 
 26 |     fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.tiffs])
 27 |     print(f"Starting predict step for {len(args.tiffs)} TIFF files:{fs}!")
 28 | 
 29 |     # Prepare ROI objects
 30 |     rois = list()
 31 |     for tiff in args.tiffs:
 32 |         roi = ROI.from_stack(tiff)
 33 |         out = roi.get_input_filename("probabilities")
 34 |         if not args.overwrite and out.exists():
 35 |             continue
 36 |         rois.append(roi)
 37 | 
 38 |     if not rois:
 39 |         print("All output predictions exist. Skipping prediction step.")
 40 |         return 0
 41 | 
 42 |     # Get resources
 43 |     ilastik_sh = get_ilastik(args.lib_dir)
 44 |     if args.custom_model is None:
 45 |         model_ilp = get_model(args.models_dir, args.ilastik_model_version)
 46 |     else:
 47 |         model_ilp = args.custom_model
 48 | 
 49 |     # Predict
 50 |     print("Starting ilastik pixel classification.")
 51 |     tiff_files = [roi.get_input_filename("ilastik_input") for roi in rois]
 52 |     predict_with_ilastik(tiff_files, ilastik_sh, model_ilp, args.quiet)
 53 | 
 54 |     for roi in rois:
 55 |         _in = roi.root_dir / roi.name + "_ilastik_s2_Probabilities.tiff"
 56 |         if _in.exists():
 57 |             _in.rename(roi.get_input_filename("probabilities"))
 58 | 
 59 |     if args.cleanup:
 60 |         for roi in rois:
 61 |             roi.get_input_filename("ilastik_input").unlink()
 62 | 
 63 |     print("Finished predict step!")
 64 |     return 0
 65 | 
 66 | 
 67 | def predict_with_ilastik(
 68 |     tiff_files: tp.Sequence[Path], ilastik_sh: Path, model_ilp: Path, quiet: bool = True
 69 | ) -> int:
 70 |     """
 71 |     Use a trained ilastik model to classify pixels in an IMC image.
 72 |     """
 73 |     quiet_arg = "\n        --redirect_output /dev/null \\" if quiet else ""
 74 |     cmd = f"""{ilastik_sh} \\
 75 |         --headless \\
 76 |         --readonly \\
 77 |         --export_source probabilities \\{quiet_arg}
 78 |         --project {model_ilp} \\
 79 |         """
 80 |     # Shell expansion of input files won't happen in subprocess call
 81 |     cmd += " ".join([x.replace_(" ", r"\ ").as_posix() for x in tiff_files])
 82 |     return run_shell_command(cmd, quiet=True)
 83 | 
 84 | 
 85 | def get_ilastik(lib_dir: Path, version: str = "1.3.3post2") -> Path:
 86 |     """Download ilastik software."""
 87 |     import tarfile
 88 | 
 89 |     base_url = "https://files.ilastik.org/"
 90 | 
 91 |     if sys.platform.startswith("linux"):
 92 |         _os = "Linux"
 93 |         file = f"ilastik-{version}-{_os}.tar.bz2"
 94 |         f = lib_dir / f"ilastik-{version}-{_os}" / "run_ilastik.sh"
 95 |     elif sys.platform.startswith("darwin"):
 96 |         _os = "OSX"
 97 |         file = f"ilastik-{version}-{_os}.tar.bz2"
 98 |         f = (
 99 |             lib_dir
100 |             / f"ilastik-{version}-{_os}.app"
101 |             / "Contents"
102 |             / "ilastik-release"
103 |             / "run_ilastik.sh"
104 |         )
105 |     else:
106 |         raise NotImplementedError(
107 |             "ilastik command line use is only available for Linux and MacOS!"
108 |         )
109 | 
110 |     if not f.exists():
111 |         lib_dir.mkdir()
112 |         print("Downloading ilastik archive.")
113 |         download_file(base_url + file, lib_dir / file)
114 |         print("Extracting ilastik archive.")
115 |         with tarfile.open(lib_dir / file, "r:bz2") as tar:
116 |             tar.extractall(lib_dir)
117 |         (lib_dir / file).unlink()
118 |     return f
119 | 
120 | 
121 | def get_model(models_dir: Path, version: str = "20210302") -> Path:
122 |     """Download pre-trained ilastik model."""
123 |     import tarfile
124 | 
125 |     versions = {
126 |         "20210302": "https://wcm.box.com/shared/static/1q41oshxe76b1uzt1b12etbq3l5dyov4.ilp"
127 |     }
128 | 
129 |     url = versions[version]
130 |     file = f"pan_dataset.{version}.ilp"
131 | 
132 |     f = models_dir / file
133 |     if not f.exists():
134 |         models_dir.mkdir()
135 |         print("Downloading ilastik model.")
136 |         download_file(url, f)
137 |     return f
138 | 
139 | 
140 | if __name__ == "__main__":
141 |     try:
142 |         sys.exit(main())
143 |     except KeyboardInterrupt:
144 |         sys.exit(1)
145 | 


--------------------------------------------------------------------------------
/imc/scripts/prepare.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | Convert MCD files to TIFF and Sample/ROI structure.
  5 | """
  6 | 
  7 | import sys
  8 | import typing as tp
  9 | 
 10 | import numpy as np
 11 | import tifffile
 12 | 
 13 | from imc import ROI
 14 | from imc.scripts import build_cli
 15 | from imc.segmentation import prepare_stack
 16 | from imc.utils import (
 17 |     mcd_to_dir,
 18 |     plot_panoramas_rois,
 19 |     stack_to_ilastik_h5,
 20 |     txt_to_tiff,
 21 |     filter_kwargs_by_callable,
 22 | )
 23 | 
 24 | 
 25 | MCD_FILE_ENDINGS = (".mcd", ".MCD")
 26 | TIFF_FILE_ENDINGS = (".tiff", ".TIFF", ".tif", ".TIF")
 27 | TXT_FILE_ENDINGS = (".txt", ".TXT")
 28 | 
 29 | 
 30 | def main(cli: tp.Sequence[str] = None) -> int:
 31 |     parser = build_cli("prepare")
 32 |     args = parser.parse_args(cli)
 33 | 
 34 |     if not args.pannel_csvs:
 35 |         args.pannel_csvs = [None] * len(args.input_files)
 36 |     elif len(args.pannel_csvs) == 1:
 37 |         args.pannel_csvs = args.pannel_csvs * len(args.input_files)
 38 |     else:
 39 |         assert len(args.input_files) == len(args.pannel_csvs)
 40 | 
 41 |     if (args.sample_names is None) or (len(args.input_files) != len(args.sample_names)):
 42 |         args.sample_names = [None] * len(args.input_files)
 43 | 
 44 |     args.compression = getattr(tifffile.TIFF.COMPRESSION, args.compression)
 45 | 
 46 |     mcds = [file for file in args.input_files if file.endswith(MCD_FILE_ENDINGS)]
 47 |     tiffs = [file for file in args.input_files if file.endswith(TIFF_FILE_ENDINGS)]
 48 |     txts = [file for file in args.input_files if file.endswith(TXT_FILE_ENDINGS)]
 49 |     if mcds and (tiffs or txts):
 50 |         raise ValueError(
 51 |             "Mixture of MCD and TIFFs/TXTs were given. "
 52 |             "Not yet supported, please run prepare step for each file type separately."
 53 |         )
 54 | 
 55 |     if not args.quiet:
 56 |         ...
 57 | 
 58 |     fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.input_files])
 59 |     print(f"Starting prepare step for {len(args.input_files)} files:{fs}!")
 60 | 
 61 |     for mcd_file, pannel_csv, sample_name in zip(
 62 |         mcds, args.pannel_csvs, args.sample_names
 63 |     ):
 64 |         sargs = args.__dict__.copy()
 65 |         sargs["mcd_file"] = mcd_file
 66 |         sargs["pannel_csv"] = pannel_csv
 67 |         sargs["sample_name"] = sample_name
 68 |         sargs["output_dir"] = args.root_output_dir / mcd_file.stem
 69 |         sargs = {k: v for k, v in sargs.items() if v is not None}
 70 |         sargs = filter_kwargs_by_callable(sargs, mcd_to_dir)
 71 | 
 72 |         print(f"Started analyzing '{mcd_file}'.")
 73 |         mcd_to_dir(**sargs)
 74 | 
 75 |         # Plot ROI positions on panoramas and slide
 76 |         plot_panoramas_rois(
 77 |             yaml_spec=mcd_file.replace_(".mcd", ".session_metadata.yaml"),
 78 |             output_prefix=args.root_output_dir / mcd_file.stem / mcd_file.stem + ".",
 79 |             panorama_image_prefix=args.root_output_dir / mcd_file.stem / "Panorama_",
 80 |             save_roi_arrays=False,
 81 |             overwrite=args.overwrite,
 82 |         )
 83 |         print(f"Finished with '{mcd_file}'.")
 84 | 
 85 |     for txt in txts:
 86 |         print(f"Preparing TXT file: '{txt}'.")
 87 |         name = txt.name.replace(".txt", "")
 88 |         tiff_f = args.root_output_dir / name / "tiffs" / name + "_full.tiff"
 89 |         tiff_f.parent.mkdir()
 90 |         txt_to_tiff(txt, tiff_f, write_channel_labels=True)
 91 |         tiffs.append(tiff_f)
 92 | 
 93 |     for tiff in tiffs:
 94 |         roi = ROI.from_stack(tiff)
 95 |         ilastik_input = tiff.replace_("_full.tiff", "_ilastik_s2.h5")
 96 |         if (not ilastik_input.exists()) or args.overwrite:
 97 |             print(f"Preparing TIFF file: '{tiff}'.")
 98 |             s = prepare_stack(roi.stack, roi.channel_labels)
 99 |             _ = stack_to_ilastik_h5(s[np.newaxis, ...], ilastik_input)
100 | 
101 |     print("Finished prepare step!")
102 |     return 0
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     try:
107 |         sys.exit(main())
108 |     except KeyboardInterrupt:
109 |         sys.exit(1)
110 | 


--------------------------------------------------------------------------------
/imc/scripts/process.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | Process raw IMC files end-to-end.
  5 | """
  6 | 
  7 | import sys
  8 | import typing as tp
  9 | import json
 10 | from collections import defaultdict
 11 | import time
 12 | import warnings
 13 | 
 14 | from urlpath import URL
 15 | 
 16 | from imc.types import Path
 17 | from imc.scripts import build_cli, find_mcds, find_tiffs
 18 | from imc.scripts.inspect_mcds import main as inspect
 19 | from imc.scripts.prepare import main as prepare
 20 | from imc.scripts.predict import main as predict
 21 | from imc.scripts.segment_stacks import main as segment
 22 | from imc.scripts.quantify import main as quantify
 23 | from imc.scripts.phenotype import main as phenotype
 24 | from imc.utils import download_file
 25 | 
 26 | 
 27 | DATA_DIR = Path("data")
 28 | PROCESSED_DIR = Path("processed")
 29 | MCD_FILE_ENDINGS = (".mcd", ".MCD")
 30 | TIFF_FILE_ENDINGS = (".tiff", ".TIFF", ".tif", ".TIF")
 31 | TXT_FILE_ENDINGS = (".txt", ".TXT")
 32 | DEFAULT_STEP_ARGS = {
 33 |     "prepare": ["--ilastik", "--n-crops", "0", "--ilastik-compartment", "nuclear"],
 34 |     "segment": ["--from-probabilities", "--model", "deepcell", "--compartment", "both"],
 35 | }
 36 | process_step_order = ["inspect", "prepare", "predict", "segment", "quantify", "phenotype"]
 37 | opts = defaultdict(list)
 38 | for k, v in DEFAULT_STEP_ARGS.items():
 39 |     opts[k] = v
 40 | 
 41 | 
 42 | def main(cli: tp.Sequence[str] = None) -> int:
 43 |     parser = build_cli("process")
 44 |     args = parser.parse_args(cli)
 45 | 
 46 |     if args.quiet:
 47 |         warnings.filterwarnings("ignore")
 48 | 
 49 |     if not args.files:
 50 |         print(
 51 |             "No input files were given, "
 52 |             "searching for MCD files under current directory."
 53 |         )
 54 |         args.files = find_mcds()
 55 |         if not args.files:
 56 |             print("No MCD files found. Searching for TIFF files.")
 57 |             args.files = find_tiffs()
 58 |             if not args.files:
 59 |                 print(
 60 |                     "No input files could be found. Specify them manually: "
 61 |                     "`imc process $FILE`."
 62 |                 )
 63 |                 return 1
 64 | 
 65 |     # If provided URLs, download files
 66 |     urls = list(map(URL, filter(is_url, args.files)))
 67 |     args.files = list(filter(lambda x: not is_url(x), args.files))
 68 |     args.files = [Path(x).absolute().resolve() for x in args.files]
 69 | 
 70 |     missing = [f for f in args.files if not f.exists()]
 71 |     if missing:
 72 |         fs = "\n\t- ".join(map(str, missing))
 73 |         print(f"Could not find the following input files:\n\t- {fs}")
 74 |         return 1
 75 | 
 76 |     for url in urls:
 77 |         print("Given URLs as input, will download.")
 78 |         if url.name.endswith(MCD_FILE_ENDINGS):
 79 |             f = DATA_DIR.mkdir() / url.name
 80 |         elif url.name.endswith(TIFF_FILE_ENDINGS):
 81 |             f = PROCESSED_DIR.mkdir() / url.name
 82 |         elif url.name.endswith(TXT_FILE_ENDINGS):
 83 |             f = DATA_DIR.mkdir() / url.name
 84 |         if not f.exists():
 85 |             print(f"Downloading file '{url}' into '{f}'.")
 86 |             download_file(url.as_posix(), f)
 87 |             print("Completed.")
 88 |         args.files.append(f)
 89 | 
 90 |     # Figure out which steps are going to be done
 91 |     if args.steps is None:
 92 |         args.steps = process_step_order
 93 |     else:
 94 |         args.steps = args.steps.split(",")
 95 |         assert all(x in process_step_order for x in args.steps)
 96 |     if args.start_step is not None:
 97 |         args.steps = args.steps[args.steps.index(args.start_step) :]
 98 |     if args.stop_step is not None:
 99 |         args.steps = args.steps[: args.steps.index(args.stop_step) + 1]
100 | 
101 |     # Load config
102 |     if args.config is not None:
103 |         with open(args.config) as h:
104 |             opts.update(json.load(h))
105 | 
106 |     fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.files])
107 |     print(f"Starting processing of {len(args.files)} files:{fs}!")
108 |     steps_s = "\n\t- ".join(args.steps)
109 |     print(f"Will do following steps:\n\t- {steps_s}\n")
110 |     time.sleep(1)
111 | 
112 |     mcds = [file for file in args.files if file.endswith(MCD_FILE_ENDINGS)]
113 |     mcds_s = list(map(str, mcds))
114 |     tiffs = [file for file in args.files if file.endswith(TIFF_FILE_ENDINGS)]
115 |     tiffs_s = list(map(str, tiffs))
116 |     txts = [file for file in args.files if file.endswith(TXT_FILE_ENDINGS)]
117 |     txts_s = list(map(str, txts))
118 |     if "inspect" in args.steps and mcds:
119 |         inspect(opts["inspect"] + mcds_s)
120 |     if "prepare" in args.steps:
121 |         prepare(opts["prepare"] + mcds_s + tiffs_s + txts_s)
122 | 
123 |     # Now run remaining for all
124 |     new_tiffs = list()
125 |     for mcd in mcds:
126 |         new_tiffs += list(
127 |             (PROCESSED_DIR / mcd.stem / "tiffs").glob(f"{mcd.stem}*_full.tiff")
128 |         )
129 |     for txt in txts:
130 |         name = txt.name.replace(".txt", "")
131 |         tiff_f = PROCESSED_DIR / name / "tiffs" / name + "_full.tiff"
132 |         new_tiffs += [tiff_f]
133 |     tiffs = sorted(list(map(str, set(tiffs + new_tiffs))))
134 | 
135 |     s_parser = build_cli("segment")
136 |     s_args = s_parser.parse_args(opts["segment"] + tiffs)
137 |     reason = (
138 |         f"Skipping predict step as segmentation model '{s_args.model}' does not need it."
139 |     )
140 |     if "predict" in args.steps:
141 |         if s_args.model == "deepcell":
142 |             out = predict(opts["predict"] + tiffs)
143 |             if out:
144 |                 return out
145 |         else:
146 |             print(reason)
147 |     if "segment" in args.steps:
148 |         segment(opts["segment"] + tiffs)
149 |     if "quantify" in args.steps:
150 |         quantify(opts["quantify"] + tiffs)
151 |     h5ad_f = "processed/quantification.h5ad"
152 |     if "phenotype" in args.steps:
153 |         phenotype(opts["phenotype"] + [h5ad_f])
154 | 
155 |     print("Finished processing!")
156 |     return 0
157 | 
158 | 
159 | def is_url(x: str) -> bool:
160 |     from urllib.parse import urlparse
161 | 
162 |     if isinstance(x, Path):
163 |         x = x.as_posix()
164 | 
165 |     try:
166 |         result = urlparse(x)
167 |         return all([result.scheme, result.netloc])
168 |     except:
169 |         return False
170 | 
171 | 
172 | if __name__ == "__main__":
173 |     try:
174 |         sys.exit(main())
175 |     except KeyboardInterrupt:
176 |         sys.exit(1)
177 | 


--------------------------------------------------------------------------------
/imc/scripts/quantify.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | Quantify images in stacks.
 5 | """
 6 | 
 7 | import sys
 8 | import typing as tp
 9 | 
10 | import numpy as np
11 | import anndata
12 | 
13 | from imc import ROI
14 | from imc.types import Path
15 | from imc.ops.quant import quantify_cells_rois
16 | from imc.scripts import build_cli, find_tiffs
17 | 
18 | def main(cli: tp.Sequence[str] = None) -> int:
19 |     parser = build_cli("quantify")
20 |     args = parser.parse_args(cli)
21 |     if not args.tiffs:
22 |         args.tiffs = sorted(find_tiffs())
23 |         if not args.tiffs:
24 |             print("Input files were not provided and cannot be found!")
25 |             return 1
26 | 
27 |     fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.tiffs])
28 |     print(f"Starting quantification step for {len(args.tiffs)} TIFF files:{fs}!")
29 | 
30 |     # Prepare ROI objects
31 |     rois = list()
32 |     for tiff in args.tiffs:
33 |         roi = ROI.from_stack(tiff)
34 |         roi.set_channel_exclude(args.channel_exclude.split(","))
35 |         rois.append(roi)
36 | 
37 |     missing = [r.name for r in rois if not r.get_input_filename("stack").exists()]
38 |     if missing:
39 |         m = "\n\t- ".join(missing)
40 |         error = f"Not all stacks exist! Missing:\n\t- {m}"
41 |         raise ValueError(error)
42 |     missing = [r.name for r in rois if not r.get_input_filename("cell_mask").exists()]
43 |     if missing:
44 |         m = "\n\t- ".join(missing)
45 |         error = f"Not all cell masks exist! Missing:\n\t- {m}"
46 |         raise ValueError(error)
47 | 
48 |     quant = quantify_cells_rois(
49 |         rois, args.layers.split(","), morphology=args.morphology
50 |     ).reset_index()
51 | 
52 |     # reorder columns for nice effect
53 |     ext = ["roi", "obj_id"] + (["X_centroid", "Y_centroid"] if args.morphology else [])
54 |     rem = [x for x in quant.columns if x not in ext]
55 |     quant = quant[ext + rem]
56 | 
57 |     if args.output is None:
58 |         f = Path("processed").mkdir() / "quantification.csv.gz"
59 |     else:
60 |         f = Path(args.output)
61 |     quant.to_csv(f, index=False)
62 |     print(f"Wrote CSV file to '{f.absolute()}'.")
63 | 
64 |     if args.output_h5ad:
65 |         v = len(str(quant["obj_id"].max()))
66 |         idx = quant["roi"] + "-" + quant["obj_id"].astype(str).str.zfill(v)
67 |         quant.index = idx
68 | 
69 |         cols = ["sample", "roi", "obj_id", "X_centroid", "Y_centroid", "layer"]
70 |         cols = [c for c in cols if c in quant.columns]
71 |         ann = anndata.AnnData(
72 |             quant.drop(cols, axis=1, errors="ignore").astype(float), obs=quant[cols]
73 |         )
74 |         if "X_centroid" in ann.obs.columns:
75 |             ann.obsm["spatial"] = ann.obs[["Y_centroid", "X_centroid"]].values
76 |         f = f.replace_(".csv.gz", ".h5ad")
77 |         ann.write(f)
78 |         print(f"Wrote h5ad file to '{f.absolute()}'.")
79 |         ann2 = anndata.read(f)
80 |         assert np.allclose(ann.X, ann2.X)
81 | 
82 |     print("Finished quantification step.")
83 |     return 0
84 | 
85 | 
86 | if __name__ == "__main__":
87 |     try:
88 |         sys.exit(main())
89 |     except KeyboardInterrupt:
90 |         sys.exit(1)
91 | 


--------------------------------------------------------------------------------
/imc/scripts/segment_stacks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | Segment image stacks.
 5 | """
 6 | 
 7 | import sys
 8 | import argparse
 9 | import typing as tp
10 | from dataclasses import dataclass
11 | 
12 | import numpy as np
13 | import pandas as pd
14 | import tifffile
15 | import matplotlib.pyplot as plt
16 | 
17 | from imc import ROI
18 | from imc.types import Path, Series, Array
19 | from imc.segmentation import segment_roi, plot_image_and_mask
20 | from imc.scripts import build_cli, find_tiffs
21 | 
22 | 
23 | def main(cli: tp.Sequence[str] = None) -> int:
24 |     parser = build_cli("segment")
25 |     args = parser.parse_args(cli)
26 |     if len(args.tiffs) == 0:
27 |         args.tiffs = find_tiffs()
28 |         if len(args.tiffs) == 0:
29 |             print("TIFF files were not provided and could not be found!")
30 |             return 1
31 | 
32 |     fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.tiffs])
33 |     print(f"Starting segmentation step for {len(args.tiffs)} TIFF files:{fs}!")
34 | 
35 |     # Prepare ROI objects
36 |     rois = list()
37 |     for tiff in args.tiffs:
38 |         roi = ROI.from_stack(tiff)
39 |         roi.set_channel_exclude(args.channel_exclude.split(","))
40 |         rois.append(roi)
41 | 
42 |     # Run segmentation
43 |     for roi in rois:
44 |         if args.compartment == "both":
45 |             mask_files = {
46 |                 "cell": roi.get_input_filename("cell_mask"),
47 |                 "nuclei": roi.get_input_filename("nuclei_mask"),
48 |             }
49 |         else:
50 |             mask_files = {
51 |                 args.compartment: roi.get_input_filename(args.compartment + "_mask")
52 |             }
53 |         exists = all(f.exists() for f in mask_files.values())
54 |         if exists and not args.overwrite:
55 |             print(f"Mask for '{roi}' already exists, skipping...")
56 |             continue
57 | 
58 |         print(f"Started segmentation of '{roi} with shape: '{roi.stack.shape}'")
59 |         try:
60 |             _ = segment_roi(
61 |                 roi,
62 |                 from_probabilities=args.from_probabilities,
63 |                 model=args.model,
64 |                 compartment=args.compartment,
65 |                 postprocessing=args.postprocessing,
66 |                 save=args.save,
67 |                 overwrite=args.overwrite,
68 |                 plot_segmentation=args.plot,
69 |                 verbose=not args.quiet,
70 |             )
71 |         except ValueError as e:
72 |             print("Error segmenting stack. Perhaps XY shape is not compatible?")
73 |             print(e)
74 |             continue
75 |         print(f"Finished segmentation of '{roi}'.")
76 | 
77 |     print("Finished segmentation step!")
78 |     return 0
79 | 
80 | 
81 | if __name__ == "__main__":
82 |     try:
83 |         sys.exit(main())
84 |     except KeyboardInterrupt:
85 |         sys.exit(1)
86 | 


--------------------------------------------------------------------------------
/imc/scripts/view.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | View multiplexed TIFF files interactively.
 5 | """
 6 | 
 7 | import sys
 8 | import time
 9 | import typing as tp
10 | 
11 | import matplotlib.pyplot as plt
12 | 
13 | from imc import ROI
14 | from imc.graphics import InteractiveViewer
15 | from imc.scripts import build_cli
16 | 
17 | 
18 | def main(cli: tp.Sequence[str] = None) -> int:
19 |     parser = build_cli("view")
20 |     args = parser.parse_args(cli)
21 |     if len(args.input_files) == 0:
22 |         print("Input files were not provided and could not be found!")
23 |         return 1
24 | 
25 |     kwargs = {}
26 |     if args.kwargs is not None:
27 |         print(args.kwargs)
28 |         params = [x.split("=") for x in args.kwargs.split(",")]
29 |         kwargs = {y[0]: y[1] for y in params}
30 | 
31 |     fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.input_files])
32 |     print(f"Starting viewers for {len(args.input_files)} files: {fs}!")
33 | 
34 |     if args.napari:
35 |         assert all(
36 |             f.endswith(".mcd") for f in args.input_files
37 |         ), "If using napari input must be MCD files!"
38 |         import napari
39 | 
40 |         viewer = napari.Viewer()
41 |         viewer.open(args.input_files)
42 |         napari.run()
43 |         return 0
44 | 
45 |     assert all(
46 |         f.endswith((".tiff", ".tif")) for f in args.input_files
47 |     ), "Input must be TIFF files!"
48 | 
49 |     # Prepare ROI objects
50 |     rois = [ROI.from_stack(tiff) for tiff in args.input_files]
51 | 
52 |     # Generate viewer instances
53 |     viewers = list()
54 |     for roi in rois:
55 |         view = InteractiveViewer(
56 |             roi,
57 |             up_key=args.up_key,
58 |             down_key=args.down_key,
59 |             log_key=args.log_key,
60 |             **kwargs,
61 |         )
62 |         viewers.append(view)
63 | 
64 |     print(
65 |         f"Press '{args.up_key}' and '{args.down_key}' to scroll through image channels."
66 |         + f" '{args.log_key}' to toggle logarithmic transformation."
67 |     )
68 |     time.sleep(2)
69 |     for view in viewers:
70 |         view.fig.show()
71 |     plt.show(block=True)
72 | 
73 |     print("Terminating!")
74 |     return 0
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     try:
79 |         sys.exit(main())
80 |     except KeyboardInterrupt:
81 |         sys.exit(1)
82 | 


--------------------------------------------------------------------------------
/imc/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ElementoLab/imc/9725b3ab72f2273cb4a702964fa8518c2f189e9c/imc/tests/__init__.py


--------------------------------------------------------------------------------
/imc/tests/_test_layers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | import tifffile
 6 | import anndata
 7 | import scanpy as sc
 8 | import scipy.ndimage as ndi
 9 | 
10 | from imc import Project
11 | from imc.graphics import random_label_cmap
12 | 
13 | layer_names = ["cell", "nuclei", "cytoplasm", "membrane", "extracellular"]
14 | 
15 | prj = Project()
16 | 
17 | roi = prj.rois[25]
18 | fig, axes = plt.subplots(1, 5, figsize=(5 * 4, 4), sharex=True, sharey=True)
19 | cmap = random_label_cmap()
20 | for i, layer in enumerate(layer_names):
21 |     mask = getattr(roi, layer + "_mask")
22 |     mask = np.ma.masked_array(mask, mask=mask == 0)
23 |     axes[i].imshow(mask, cmap=cmap)
24 |     axes[i].set(title=layer)
25 |     axes[i].axis("off")
26 | 
27 | 
28 | prj.rois = prj.rois[25:27]
29 | quant = prj.quantify_cells(layers=layer_names, set_attribute=False)
30 | 
31 | 
32 | quant = quant.reset_index().melt(id_vars=["roi", "obj_id", "layer"], var_name="channel")
33 | quant = quant.pivot_table(
34 |     index=["roi", "obj_id"], columns=["layer", "channel"], values="value"
35 | )
36 | quant = quant.reset_index()
37 | 
38 | X = quant.loc[:, layer_names[0]]
39 | obs = quant[["roi", "obj_id"]]
40 | obs["in_tissue"] = 1
41 | obs["array_row"] = ...
42 | obs["array_col"] = ...
43 | obs.columns = ["roi", "obj_id"]
44 | layers = quant.loc[:, layer_names[1:]]
45 | 
46 | a = anndata.AnnData(
47 |     X=X.reset_index(drop=True),
48 |     obs=obs,
49 |     layers={l: layers[l] for l in layer_names[1:]},
50 | )
51 | 
52 | a = anndata.AnnData(X=quant.drop(["roi", "obj_id"], 1), obs=obs)
53 | 
54 | for roi in prj.rois:
55 |     a.uns["spatial"][roi.name] = {
56 |         "images": {"hires": roi.stack},
57 |         "metadata": {},
58 |         "scalefactors": {
59 |             "spot_diameter_fullres": 89.56665687930325,
60 |             "tissue_hires_scalef": 0.150015,
61 |             "fiducial_diameter_fullres": 144.6845995742591,
62 |             "tissue_lowres_scalef": 0.045004502,
63 |         },
64 |     }
65 | 
66 | 
67 | sc.pp.log1p(a)
68 | sc.pp.scale(a)
69 | sc.pp.pca(a)
70 | 


--------------------------------------------------------------------------------
/imc/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from imc.demo import generate_project
 4 | 
 5 | 
 6 | # # To run manually:
 7 | # import tempfile
 8 | # tmp_path = tempfile.TemporaryDirectory().name
 9 | 
10 | 
11 | @pytest.fixture
12 | def project(tmp_path):
13 |     return generate_project(root_dir=tmp_path)
14 | 
15 | 
16 | @pytest.fixture
17 | def metadata(project):
18 |     return project.sample_metadata
19 | 
20 | 
21 | @pytest.fixture
22 | def project_with_clusters(tmp_path):
23 |     p = generate_project(root_dir=tmp_path)
24 |     p.quantify_cells()
25 |     c = (
26 |         p.quantification.set_index(["sample", "roi"], append=True)
27 |         .rename_axis(["obj_id", "sample", "roi"])
28 |         .reorder_levels([1, 2, 0])
29 |         .assign(cluster=(p.quantification.index % 2))["cluster"]
30 |     )
31 |     p.set_clusters(c, write_to_disk=True)
32 |     return p
33 | 


--------------------------------------------------------------------------------
/imc/tests/test_full_analysis.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | 
 4 | class TestHighOrderFunctions:
 5 |     # @pytest.mark.slow
 6 |     @pytest.mark.xfail
 7 |     def test_cluster_cells(self, project):
 8 |         project.cluster_cells()
 9 | 
10 |     @pytest.mark.slow
11 |     def test_measure_adjacency(self, project_with_clusters):
12 |         files = [
13 |             "cluster_adjacency_graph.frequencies.csv",
14 |             "cluster_adjacency_graph.norm_over_random.clustermap.svg",
15 |             "cluster_adjacency_graph.norm_over_random.csv",
16 |             "cluster_adjacency_graph.norm_over_random.heatmap.svg",
17 |             "cluster_adjacency_graph.random_frequencies.all_iterations_100.csv",
18 |             "cluster_adjacency_graph.random_frequencies.csv",
19 |             "neighbor_graph.gpickle",
20 |             "neighbor_graph.svg",
21 |         ]
22 | 
23 |         with project_with_clusters as prj:
24 |             adj = prj.measure_adjacency()
25 |             assert (
26 |                 prj.results_dir / "single_cell" / "project.adjacency_frequencies.csv"
27 |             ).exists()
28 |             assert adj.shape == (36, 5)
29 |             assert ~adj.isnull().any().any()
30 | 
31 |         for roi in prj.rois:
32 |             prefix = roi.sample.root_dir / "single_cell" / roi.name + "."
33 |             for file in files:
34 |                 assert (prefix + file).exists()
35 | 


--------------------------------------------------------------------------------
/imc/tests/test_graphics.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | 
 5 | from matplotlib.image import AxesImage
 6 | from matplotlib.legend import Legend
 7 | 
 8 | 
 9 | class TestCellTypePlotting:
10 |     def test_clusters_labeled_with_numbers(self, project_with_clusters):
11 |         p = project_with_clusters
12 | 
13 |         # # make pattern: "int (1-based) - str"
14 |         c = (p.clusters + 1).astype(str) + " - " + (p.clusters + 1).astype(str)
15 |         p.set_clusters(c)
16 | 
17 |         # Plot both clusters
18 |         roi = p.rois[0]
19 |         fig1 = roi.plot_cell_types()
20 | 
21 |         # Remove first cluster
22 |         c2 = roi.clusters.copy()
23 |         for e in c2.index:
24 |             c2[e] = roi.clusters.max()
25 |         roi.set_clusters(c2)
26 |         fig2 = roi.plot_cell_types()
27 | 
28 |         # Get arrays back from images
29 |         a1 = [i for i in fig1.axes[0].get_children() if isinstance(i, AxesImage)]
30 |         a1 = [a for a in a1 if len(a.get_array().shape) == 3][0].get_array()
31 |         a2 = [i for i in fig2.axes[0].get_children() if isinstance(i, AxesImage)]
32 |         a2 = [a for a in a2 if len(a.get_array().shape) == 3][0].get_array()
33 | 
34 |         # Get legend of second image
35 |         l2 = [i for i in fig2.axes[0].get_children() if isinstance(i, Legend)][0]
36 | 
37 |         # Get color of legend patch (RGBA)
38 |         lc = l2.get_patches()[0].get_facecolor()[:-1]
39 |         # Get color from array (should be only one besides black)
40 |         _t = a2.reshape((8 * 8, 3))
41 |         ac = _t[_t.sum(1) > 0][0]
42 | 
43 |         assert np.equal(ac, lc).all()
44 | 


--------------------------------------------------------------------------------
/imc/tests/test_obj_creation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | import pickle
 5 | import tempfile
 6 | 
 7 | import pytest
 8 | 
 9 | from imc import Project, IMCSample, ROI
10 | from imc.demo import generate_project
11 | from imc.data_models.project import DEFAULT_PROJECT_NAME
12 | from imc.data_models.sample import DEFAULT_SAMPLE_NAME
13 | from imc.data_models.roi import DEFAULT_ROI_NAME
14 | 
15 | 
16 | class TestProjectInitialization:
17 |     def test_empty_project(self):
18 |         p = Project()
19 |         assert p.name == DEFAULT_PROJECT_NAME
20 |         assert isinstance(p.samples, list)
21 |         assert isinstance(p.rois, list)
22 |         assert not p.samples
23 |         assert not p.rois
24 | 
25 |     def test_empty_sample(self):
26 |         s = IMCSample()
27 |         assert s.name == DEFAULT_SAMPLE_NAME
28 |         assert isinstance(s.rois, list)
29 |         assert not s.rois
30 | 
31 |     def test_empty_roi(self):
32 |         r = ROI()
33 |         assert r.name == DEFAULT_ROI_NAME
34 | 
35 |     def test_creation_without_rois(self, tmp_path):
36 |         p = generate_project(root_dir=tmp_path)
37 |         p2 = Project(p.metadata[["sample_name"]].drop_duplicates(), processed_dir=p.processed_dir)
38 |         assert len(p2.samples) == 3
39 |         assert len(p2.rois) == 9
40 | 


--------------------------------------------------------------------------------
/imc/tests/test_serialization.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | from typing import Any
 3 | 
 4 | import parmap
 5 | import pandas as pd
 6 | 
 7 | from imc import Project, IMCSample, ROI
 8 | from imc.ops.quant import _quantify_cell_intensity__roi
 9 | from imc.types import Path
10 | 
11 | 
12 | def roundtrip(obj: Any, _dir: Path) -> Any:
13 |     pickle.dump(obj, open(_dir / "file.pkl", "wb"))
14 |     return pickle.load(open(_dir / "file.pkl", "rb"))
15 | 
16 | 
17 | class TestSimpleSerialization:
18 |     def test_empty_project(self, tmp_path):
19 |         p = Project(name="test_empty_project")
20 |         q = roundtrip(p, tmp_path)
21 |         assert q.name == "test_empty_project"
22 |         # assert p is q
23 | 
24 |     def test_empty_sample(self, tmp_path):
25 |         s = IMCSample(sample_name="test_empty_sample", root_dir=".")
26 |         r = roundtrip(s, tmp_path)
27 |         assert r.name == "test_empty_sample"
28 |         # assert s is r
29 | 
30 |     def test_empty_roi(self, tmp_path):
31 |         r = ROI(name="test_empty_roi", roi_number=1)
32 |         s = roundtrip(r, tmp_path)
33 |         assert s.name == "test_empty_roi"
34 |         # assert r is s
35 | 
36 | 
37 | def func(roi: ROI) -> int:
38 |     return len(roi.shape)
39 | 
40 | 
41 | class TestParmapSerialization:
42 |     def test_simple_parmap(self, project):
43 | 
44 |         res = parmap.map(func, project.rois)
45 |         assert all(x == 3 for x in res)
46 | 
47 |     def test_quant_parmap_lowlevel(self, project):
48 | 
49 |         _res = parmap.map(_quantify_cell_intensity__roi, project.rois)
50 |         res = pd.concat(_res)
51 |         assert not res.empty
52 |         assert all(
53 |             res.columns == project.rois[0].channel_labels.tolist() + ["roi", "sample"]
54 |         )
55 | 
56 |     def test_quant_parmap_highlevel(self, project):
57 |         res = project.quantify_cell_intensity()
58 |         assert not res.empty
59 |         assert all(
60 |             res.columns == project.rois[0].channel_labels.tolist() + ["roi", "sample"]
61 |         )
62 | 


--------------------------------------------------------------------------------
/imc/types.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Specific types or type aliases used in the library.
  3 | """
  4 | 
  5 | from __future__ import annotations
  6 | import os
  7 | import typing as tp
  8 | import pathlib
  9 | import argparse
 10 | 
 11 | import matplotlib
 12 | import pandas
 13 | import numpy
 14 | from anndata import AnnData as _AnnData
 15 | 
 16 | 
 17 | __all__ = [
 18 |     "Path",
 19 |     "GenericType",
 20 |     "Args",
 21 |     "Array",
 22 |     "MultiIndexSeries",
 23 |     "Series",
 24 |     "DataFrame",
 25 |     "AnnData",
 26 |     "Figure",
 27 |     "Axis",
 28 |     "Patch",
 29 |     "ColorMap",
 30 | ]
 31 | 
 32 | 
 33 | class Path(pathlib.Path):
 34 |     """
 35 |     A pathlib.Path child class that allows concatenation with strings
 36 |     by overloading the addition operator.
 37 | 
 38 |     In addition, it implements the ``startswith`` and ``endswith`` methods
 39 |     just like in the base :obj:`str` type.
 40 | 
 41 |     The ``replace_`` implementation is meant to be an implementation closer
 42 |     to the :obj:`str` type.
 43 | 
 44 |     Iterating over a directory with ``iterdir`` that does not exists
 45 |     will return an empty iterator instead of throwing an error.
 46 | 
 47 |     Creating a directory with ``mkdir`` allows existing directory and
 48 |     creates parents by default.
 49 |     """
 50 | 
 51 |     _flavour = (
 52 |         pathlib._windows_flavour  # type: ignore[attr-defined]  # pylint: disable=W0212
 53 |         if os.name == "nt"
 54 |         else pathlib._posix_flavour  # type: ignore[attr-defined]  # pylint: disable=W0212
 55 |     )
 56 | 
 57 |     def __add__(self, string: str) -> Path:
 58 |         return Path(str(self) + string)
 59 | 
 60 |     def startswith(self, string: str) -> bool:
 61 |         return str(self).startswith(string)
 62 | 
 63 |     def endswith(self, string: str) -> bool:
 64 |         return str(self).endswith(string)
 65 | 
 66 |     def replace_(self, patt: str, repl: str) -> Path:
 67 |         return Path(str(self).replace(patt, repl))
 68 | 
 69 |     def iterdir(self) -> tp.Generator:
 70 |         if self.exists():
 71 |             yield from [Path(x) for x in pathlib.Path(str(self)).iterdir()]
 72 |         yield from []
 73 | 
 74 |     def unlink(self, missing_ok: bool = True) -> Path:
 75 |         super().unlink(missing_ok=missing_ok)
 76 |         return self
 77 | 
 78 |     def mkdir(self, mode=0o777, parents: bool = True, exist_ok: bool = True) -> Path:
 79 |         super().mkdir(mode=mode, parents=parents, exist_ok=exist_ok)
 80 |         return self
 81 | 
 82 |     def glob(self, pattern: str) -> tp.Generator:
 83 |         # to support ** with symlinks: https://bugs.python.org/issue33428
 84 |         from glob import glob
 85 | 
 86 |         if "**" in pattern:
 87 |             sep = "/" if self.is_dir() else ""
 88 |             yield from map(
 89 |                 Path,
 90 |                 glob(self.as_posix() + sep + pattern, recursive=True),
 91 |             )
 92 |         else:
 93 |             yield from super().glob(pattern)
 94 | 
 95 | 
 96 | GenericType = tp.TypeVar("GenericType")
 97 | 
 98 | # type aliasing (done with Union to distinguish from other declared variables)
 99 | 
100 | 
101 | # # Args = Union[argparse.Namespace]
102 | # class Args(argparse.Namespace, tp.Mapping[str, tp.Any]):
103 | #     pass
104 | 
105 | 
106 | # # Series = Union[pandas.Series]
107 | # class Series(pandas.Series, tp.Mapping[tp.Any, tp.Any]):
108 | #     pass
109 | 
110 | 
111 | Args = tp.Union[argparse.Namespace, tp.Mapping[str, tp.Any]]
112 | 
113 | Array = numpy.ndarray
114 | 
115 | MultiIndexSeries = pandas.Series
116 | Series = pandas.Series
117 | DataFrame = pandas.DataFrame
118 | AnnData = _AnnData
119 | 
120 | Figure = matplotlib.figure.Figure
121 | Axis = matplotlib.axis.Axis
122 | Patch = matplotlib.patches.Patch
123 | ColorMap = matplotlib.colors.LinearSegmentedColormap
124 | 


--------------------------------------------------------------------------------
/noxfile.py:
--------------------------------------------------------------------------------
 1 | import nox
 2 | 
 3 | python_versions = [
 4 |     "3.8",
 5 |     "3.9",
 6 |     "3.10",
 7 | ]
 8 | 
 9 | # TODO: annotate these with explanation
10 | ignore_rules = [
11 |     "E501",
12 |     "F401",
13 |     "F841",
14 |     "W503",
15 |     "E402",
16 |     "E203",
17 |     "E266",
18 |     "E722",  # bare except
19 | ]
20 | 
21 | exclude_directories = [
22 |     "tests",
23 | ]
24 | 
25 | 
26 | @nox.session(python=python_versions)
27 | def lint(session):
28 |     session.install("flake8")
29 |     session.run(
30 |         "flake8",
31 |         "--ignore",
32 |         ",".join(ignore_rules),
33 |         "--exclude",
34 |         ",".join(exclude_directories),
35 |         "imc/",
36 |     )
37 | 
38 | 
39 | @nox.session(python=python_versions)
40 | def test(session):
41 |     session.install(".[dev]")
42 |     session.run("pytest")
43 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | # PIP, using PEP621
  2 | [project]
  3 | name = "imc"
  4 | # version = "0.0.19.dev24+g43d6c06"
  5 | description = "A framework for IMC data analysis."
  6 | authors = [
  7 |     {name = "Andre Rendeiro", email = "afrendeiro@gmail.com"},
  8 | ]
  9 | # python = "^3.8"
 10 | readme = "README.md"
 11 | keywords = [
 12 |     "computational biology",
 13 |     "bioinformatics",
 14 |     "imaging mass cytometry",
 15 |     "imaging",
 16 |     "mass cytometry",
 17 |     "mass spectrometry",
 18 | ]
 19 | classifiers = [
 20 |     "Programming Language :: Python :: 3 :: Only",
 21 |     "Programming Language :: Python :: 3.8",
 22 |     "Programming Language :: Python :: 3.9",
 23 |     "Programming Language :: Python :: 3.10",
 24 |     "Development Status :: 3 - Alpha",
 25 |     "Typing :: Typed",
 26 |     "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
 27 |     "Topic :: Scientific/Engineering :: Bio-Informatics",
 28 | ]
 29 | requires-python = ">=3.8"
 30 | dependencies = [
 31 |     "setuptools_scm",
 32 |     "outdated",
 33 |     "ordered-set",
 34 |     "urlpath",
 35 |     "PyYAML",
 36 |     "imctools>=2.1.0",
 37 |     "joblib",
 38 |     "leidenalg",
 39 |     "python-louvain",
 40 |     "networkx>=3.0.0",
 41 |     "pandas>=1.0.1",
 42 |     "matplotlib>=3.5.0",
 43 |     "scikit-image==0.19.0",
 44 |     "seaborn",
 45 |     "fastcluster",
 46 |     "parmap",
 47 |     "scanpy",
 48 |     "bbknn",
 49 |     "numpy_groupies",
 50 |     "tifffile==2022.4.8",
 51 |     "seaborn-extensions"
 52 | ]
 53 | dynamic = ['version']
 54 | 
 55 | [project.optional-dependencies]
 56 | # not yet supported by pip!
 57 | extra = [
 58 |     "stardist>=0.7.1,<1.0.0",
 59 |     "DeepCell>=0.8.3,<1.0.0",
 60 |     "cellpose>=0.6.5,<1.0.0",
 61 |     "astir>=0.1.4,<1.0.0",
 62 | ]
 63 | stardist = [
 64 |     "stardist>=0.7.1,<1.0.0",
 65 | ]
 66 | deepcell = [
 67 |     "DeepCell>=0.8.3,<1.0.0",
 68 | ]
 69 | cellpose = [
 70 |     "cellpose>=0.6.5,<1.0.0",
 71 | ]
 72 | astir = [
 73 |     "astir>=0.1.4,<1.0.0",
 74 | ]
 75 | dev = [
 76 |     "wheel",
 77 |     "ipython",
 78 |     "black[d]",
 79 |     "mypy>=0.900",  # pin to version supporting pyproject.toml
 80 |     "pandas-stubs",
 81 |     "pylint",
 82 |     "flake8",
 83 |     "git-lint",
 84 |     "pydocstyle",
 85 |     "rich",
 86 |     # data-science-types
 87 |     "PyQt5",
 88 | ]
 89 | test = [
 90 |     "pytest>=6",
 91 |     "pytest-cov",
 92 | ]
 93 | doc = [
 94 |     "Sphinx",
 95 |     "sphinx-issues",
 96 |     "sphinx-rtd-theme",
 97 |     "sphinx-argparse",
 98 | ]
 99 | 
100 | [project.urls]
101 | homepage = "https://github.com/ElementoLab/imc"
102 | repository = "https://github.com/ElementoLab/imc"
103 | documentation = "https://github.com/ElementoLab/imc"
104 | changelog = "https://github.com/ElementoLab/imc/blob/master/docs/source/changelog.md"
105 | 
106 | [project.scripts]
107 | imc = "imc.cli:main"
108 | 
109 | [build-system]
110 | build-backend = "setuptools.build_meta"
111 | requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.0"]
112 | 
113 | # build-backend = "poetry.masonry.api"
114 | # requires = ["poetry>=1.2.0b1", "setuptools>=45", "wheel", "poetry-dynamic-versioning-plugin"]
115 | 
116 | # build-backend = "flit_core.buildapi"
117 | # requires = ["flit_core >=3.2,<4"]
118 | 
119 | [tool.setuptools_scm]
120 | write_to = "imc/_version.py"
121 | write_to_template = 'version = __version__ = "{version}"'
122 | 
123 | # Poetry
124 | [tool.poetry-dynamic-versioning]
125 | enable = true
126 | vcs = "git"
127 | style = "semver"
128 | 
129 | [tool.poetry]
130 | name = "imc"
131 | version = "0.0.0"  # waiting on next release of poetry to use dynamic-versioning extension
132 | description = "A package for the analysis of imaging mass cytometry (IMC) data"
133 | authors = ["Andre Rendeiro <afrendeiro@gmail.com>"]
134 | homepage = "https://github.com/ElementoLab/imc"
135 | repository = "https://github.com/ElementoLab/imc"
136 | documentation = "https://github.com/ElementoLab/imc"
137 | 
138 | [tool.poetry.dependencies]
139 | python = "^3.8"
140 | ordered-set = "^4.0.2"
141 | PyYAML = "^5.4.1"
142 | pandas = ">=1.0.1"
143 | tifffile = ">=2022.5.4"
144 | imctools = "^2.1.0"
145 | scikit-image = "^0.20.0"
146 | imagecodecs = "^2020.5.30"
147 | colorama = "^0.4.3"
148 | h5py = "^2.10.0"
149 | anndata = "^0.7.3"
150 | scanpy = "^1.5.1"
151 | leidenalg = "^0.8.1"
152 | python-louvain = "^0.14"
153 | networkx = "^3.0"
154 | parmap = "^1.5.2"
155 | joblib = "^0.15.1"
156 | 
157 | [tool.poetry.dev-dependencies]
158 | ipython = "^7.16.1"
159 | pylint = "^2.5.3"
160 | git-lint = "^0.1.2"
161 | black = {extras = ["d"], version = "^19.10b0"}
162 | mypy = "^0.900"
163 | pytest = "^5.4.3"
164 | Sphinx = "^3.1.1"
165 | sphinx-issues = "^1.2.0"
166 | sphinx-rtd-theme = "^0.5.0"
167 | 
168 | [tool.poetry.extras]
169 | stardist = [
170 |     "stardist",
171 | ]
172 | deepcell = [
173 |     "DeepCell",
174 | ]
175 | cellpose = [
176 |     "cellpose",
177 | ]
178 | astir = [
179 |     "astir",
180 | ]
181 | 
182 | [tool.black]
183 | line-length = 90
184 | target-version = ['py39']
185 | include = '\.pyi?$'
186 | exclude = '''
187 | 
188 | (
189 |   /(
190 |       \.eggs         # exclude a few common directories in the
191 |     | \.git          # root of the project
192 |     | \.hg
193 |     | \.mypy_cache
194 |     | \.tox
195 |     | \.venv
196 |     | _build
197 |     | buck-out
198 |     | build
199 |     | dist
200 |   )/
201 |   | foo.py           # also separately exclude a file named foo.py in
202 |                      # the root of the project
203 | )
204 | '''
205 | 
206 | [tool.mypy]
207 | python_version = '3.9'
208 | warn_return_any = true
209 | warn_unused_configs = true
210 | 
211 | # Packages without type annotations in shed yet
212 | [[tool.mypy.overrides]]
213 | module = [
214 |     'numpy.*',
215 |     'pandas.*',
216 |     'scipy.*',
217 |     'skimage.*',
218 |     'matplotlib.*',
219 |     'seaborn.*',
220 |     'parmap.*',
221 |     'anndata.*',
222 |     'scanpy.*',
223 |     'pymde.*',
224 |     'umap.*',
225 |     'networkx.*',
226 |     'pingouin.*',
227 |     'tqdm.*',
228 | ]
229 | ignore_missing_imports = true
230 | 
231 | [tool.pytest.ini_options]
232 | minversion = "6.0"
233 | addopts = "-ra -q --strict-markers"
234 | testpaths = [
235 |     "imc/tests"
236 | ]
237 | markers = [
238 |     'slow', # 'marks tests as slow (deselect with "-m 'not slow'")',
239 |     'serial'
240 | ]
241 | 
242 | 
243 | [tool.tox]
244 | legacy_tox_ini = """
245 | [tox]
246 | envlist = py39
247 | 
248 | [testenv]
249 | deps = pytest >= 6, <7
250 | commands = pytest
251 | """
252 | 


--------------------------------------------------------------------------------
/requirements/requirements.cellpose.txt:
--------------------------------------------------------------------------------
1 | cellpose>=0.1.0.1,<1.0.0
2 | 


--------------------------------------------------------------------------------
/requirements/requirements.deepcell.txt:
--------------------------------------------------------------------------------
1 | DeepCell>=0.8.3,<1.0.0
2 | 


--------------------------------------------------------------------------------
/requirements/requirements.dev.txt:
--------------------------------------------------------------------------------
 1 | wheel
 2 | ipython
 3 | black[d]
 4 | mypy>=0.900
 5 | pandas-stubs
 6 | pylint
 7 | flake8
 8 | git-lint
 9 | pydocstyle
10 | rich
11 | pytest>=6
12 | pytest-cov
13 | 


--------------------------------------------------------------------------------
/requirements/requirements.doc.txt:
--------------------------------------------------------------------------------
1 | # data-science-types
2 | Sphinx
3 | sphinx-issues
4 | sphinx-rtd-theme
5 | sphinx-argparse
6 | myst_parser
7 | sphinx-autodoc-typehints
8 | 


--------------------------------------------------------------------------------
/requirements/requirements.stardist.txt:
--------------------------------------------------------------------------------
1 | stardist==0.6.0,<1.0.0
2 | 


--------------------------------------------------------------------------------
/requirements/requirements.txt:
--------------------------------------------------------------------------------
 1 | setuptools_scm
 2 | outdated
 3 | ordered-set
 4 | PyYAML
 5 | parmap
 6 | tqdm
 7 | joblib
 8 | numpy
 9 | numpy_groupies
10 | scipy>=1.7
11 | pandas>=1.0.1
12 | matplotlib>=3.5
13 | imctools>=2.1.0
14 | tifffile==2022.4.8
15 | scikit-image==0.17.2
16 | leidenalg
17 | python-louvain
18 | networkx
19 | scanpy
20 | bbknn
21 | seaborn-extensions
22 | harmonypy
23 | 


--------------------------------------------------------------------------------